coderouter-cli 2.1.0__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
coderouter/logging.py CHANGED
@@ -595,6 +595,114 @@ def log_demote_unhealthy_provider(
595
595
  logger.info("demote-unhealthy-provider", extra=payload)
596
596
 
597
597
 
598
+ # ---------------------------------------------------------------------------
599
+ # v2.0-J: self-healing log shapes
600
+ # ---------------------------------------------------------------------------
601
+
602
+
603
+ class SelfHealingExcludePayload(TypedDict):
604
+ """Structured shape of the ``self-healing-exclude`` log record."""
605
+
606
+ provider: str
607
+ profile: str
608
+ consecutive_failures: int
609
+
610
+
611
+ class SelfHealingRestorePayload(TypedDict):
612
+ """Structured shape of the ``self-healing-restore`` log record."""
613
+
614
+ provider: str
615
+ profile: str
616
+ excluded_duration_s: float
617
+
618
+
619
+ class SelfHealingRestartPayload(TypedDict):
620
+ """Structured shape of the ``self-healing-restart`` log record."""
621
+
622
+ provider: str
623
+ command: str
624
+ success: bool
625
+ error: str | None
626
+
627
+
628
+ class SelfHealingRecoveryProbePayload(TypedDict):
629
+ """Structured shape of the ``self-healing-recovery-probe`` log record."""
630
+
631
+ provider: str
632
+ success: bool
633
+ next_interval_s: float
634
+ latency_ms: float
635
+
636
+
637
+ def log_self_healing_exclude(
638
+ logger: logging.Logger,
639
+ *,
640
+ provider: str,
641
+ profile: str,
642
+ consecutive_failures: int,
643
+ ) -> None:
644
+ """Emit when a provider is excluded from the chain by self-healing."""
645
+ payload: SelfHealingExcludePayload = {
646
+ "provider": provider,
647
+ "profile": profile,
648
+ "consecutive_failures": consecutive_failures,
649
+ }
650
+ logger.warning("self-healing-exclude", extra=payload)
651
+
652
+
653
+ def log_self_healing_restore(
654
+ logger: logging.Logger,
655
+ *,
656
+ provider: str,
657
+ profile: str,
658
+ excluded_duration_s: float,
659
+ ) -> None:
660
+ """Emit when a previously excluded provider is restored to the chain."""
661
+ payload: SelfHealingRestorePayload = {
662
+ "provider": provider,
663
+ "profile": profile,
664
+ "excluded_duration_s": round(excluded_duration_s, 1),
665
+ }
666
+ logger.info("self-healing-restore", extra=payload)
667
+
668
+
669
+ def log_self_healing_restart(
670
+ logger: logging.Logger,
671
+ *,
672
+ provider: str,
673
+ command: str,
674
+ success: bool,
675
+ error: str | None = None,
676
+ ) -> None:
677
+ """Emit after attempting to restart a provider's backend process."""
678
+ payload: SelfHealingRestartPayload = {
679
+ "provider": provider,
680
+ "command": command,
681
+ "success": success,
682
+ "error": error,
683
+ }
684
+ level = logging.INFO if success else logging.WARNING
685
+ logger.log(level, "self-healing-restart", extra=payload)
686
+
687
+
688
+ def log_self_healing_recovery_probe(
689
+ logger: logging.Logger,
690
+ *,
691
+ provider: str,
692
+ success: bool,
693
+ next_interval_s: float,
694
+ latency_ms: float,
695
+ ) -> None:
696
+ """Emit after each recovery probe attempt for an excluded provider."""
697
+ payload: SelfHealingRecoveryProbePayload = {
698
+ "provider": provider,
699
+ "success": success,
700
+ "next_interval_s": round(next_interval_s, 1),
701
+ "latency_ms": round(latency_ms, 1),
702
+ }
703
+ logger.info("self-healing-recovery-probe", extra=payload)
704
+
705
+
598
706
  # ---------------------------------------------------------------------------
599
707
  # v1.0-A: output-filter-applied log shape
600
708
  #
@@ -623,6 +623,81 @@ class MetricsCollector(logging.Handler):
623
623
  "recent": list(self._recent),
624
624
  }
625
625
 
626
+ # ------------------------------------------------------------------
627
+ # v2.0-K: Persistence
628
+ # ------------------------------------------------------------------
629
+
630
+ def save_state(self) -> dict[str, object]:
631
+ """Export key counters for cross-restart persistence.
632
+
633
+ Returns a JSON-safe dict of the most operationally-important
634
+ counters. The ``recent`` ring and per-provider ``last_error``
635
+ are excluded (ephemeral by nature).
636
+ """
637
+ with self._lock:
638
+ return {
639
+ "requests_total": self._requests_total,
640
+ "provider_attempts": dict(self._provider_attempts),
641
+ "provider_outcomes": {
642
+ k: dict(v) for k, v in self._provider_outcomes.items()
643
+ },
644
+ "cost_total_usd": dict(self._cost_total_usd),
645
+ "cost_savings_usd": dict(self._cost_savings_usd),
646
+ "cost_total_usd_aggregate": self._cost_total_usd_aggregate,
647
+ "cost_savings_usd_aggregate": self._cost_savings_usd_aggregate,
648
+ "chain_paid_gate_blocked_total": self._chain_paid_gate_blocked_total,
649
+ "chain_budget_exceeded_total": self._chain_budget_exceeded_total,
650
+ "chain_memory_pressure_blocked_total": self._chain_memory_pressure_blocked_total,
651
+ "chain_uniform_auth_failure_total": self._chain_uniform_auth_failure_total,
652
+ "probe_rounds_total": self._probe_rounds_total,
653
+ }
654
+
655
+ def load_state(self, state: dict[str, object]) -> None:
656
+ """Restore counters from a previously saved dict.
657
+
658
+ Additive: values from ``state`` are *added* to the current
659
+ (zeroed) counters, so calling ``load_state`` on a fresh
660
+ collector restores the prior session's totals.
661
+ """
662
+ if not isinstance(state, dict):
663
+ return
664
+ with self._lock:
665
+ self._requests_total += int(state.get("requests_total", 0))
666
+ for k, v in (state.get("provider_attempts") or {}).items():
667
+ self._provider_attempts[k] += int(v)
668
+ for prov, outcomes in (state.get("provider_outcomes") or {}).items():
669
+ if not isinstance(outcomes, dict):
670
+ continue
671
+ if prov not in self._provider_outcomes:
672
+ self._provider_outcomes[prov] = Counter()
673
+ for k, v in outcomes.items():
674
+ self._provider_outcomes[prov][k] += int(v)
675
+ for k, v in (state.get("cost_total_usd") or {}).items():
676
+ self._cost_total_usd[k] = self._cost_total_usd.get(k, 0.0) + float(v)
677
+ for k, v in (state.get("cost_savings_usd") or {}).items():
678
+ self._cost_savings_usd[k] = self._cost_savings_usd.get(k, 0.0) + float(v)
679
+ self._cost_total_usd_aggregate += float(
680
+ state.get("cost_total_usd_aggregate", 0.0)
681
+ )
682
+ self._cost_savings_usd_aggregate += float(
683
+ state.get("cost_savings_usd_aggregate", 0.0)
684
+ )
685
+ self._chain_paid_gate_blocked_total += int(
686
+ state.get("chain_paid_gate_blocked_total", 0)
687
+ )
688
+ self._chain_budget_exceeded_total += int(
689
+ state.get("chain_budget_exceeded_total", 0)
690
+ )
691
+ self._chain_memory_pressure_blocked_total += int(
692
+ state.get("chain_memory_pressure_blocked_total", 0)
693
+ )
694
+ self._chain_uniform_auth_failure_total += int(
695
+ state.get("chain_uniform_auth_failure_total", 0)
696
+ )
697
+ self._probe_rounds_total += int(
698
+ state.get("probe_rounds_total", 0)
699
+ )
700
+
626
701
  # ------------------------------------------------------------------
627
702
  # Test hook
628
703
  # ------------------------------------------------------------------
@@ -52,6 +52,7 @@ __all__ = [
52
52
  "OutputFilterChain",
53
53
  "StripStopMarkersFilter",
54
54
  "StripThinkingFilter",
55
+ "StripToolCallXmlFilter",
55
56
  "apply_output_filters",
56
57
  "validate_output_filters",
57
58
  ]
@@ -63,20 +64,28 @@ __all__ = [
63
64
 
64
65
 
65
66
  DEFAULT_STOP_MARKERS: tuple[str, ...] = (
67
+ # v1.0-A originals
66
68
  "<|turn|>",
67
69
  "<|end|>",
68
70
  "<|python_tag|>",
69
71
  "<|im_end|>",
70
72
  "<|eot_id|>",
71
73
  "<|channel>thought",
74
+ # v2.2: tool-call XML tags leaked by Qwen / Hermes / Llama tool-call
75
+ # formats. These appear when the model writes tool calls as XML
76
+ # instead of structured JSON, or when the tokenizer's special-token
77
+ # handling leaks through.
78
+ "<|tool▁call|>",
79
+ "<|tool▁sep|>",
72
80
  )
73
81
  """Default stop/harness markers stripped by ``strip_stop_markers``.
74
82
 
75
83
  Covers Llama 3.x (``<|python_tag|>``, ``<|eot_id|>``), ChatML / Qwen
76
- (``<|im_end|>``, ``<|end|>``), Gemma-ish (``<|turn|>``) and OpenAI-
77
- harmony (``<|channel>thought``). Extending this tuple is an ABI change
78
- users who need a bespoke set can add a dedicated filter entry in
79
- a later minor; for v1.0-A the fixed list covers observed leaks.
84
+ (``<|im_end|>``, ``<|end|>``), Gemma-ish (``<|turn|>``), OpenAI-
85
+ harmony (``<|channel>thought``), and Qwen / Hermes tool-call markers
86
+ (``<|tool▁call|>``, ``<|tool▁sep|>``). Extending this tuple is an ABI
87
+ change users who need a bespoke set can add a dedicated filter entry
88
+ in a later minor.
80
89
  """
81
90
 
82
91
 
@@ -292,6 +301,87 @@ class StripStopMarkersFilter:
292
301
  return "".join(out_parts)
293
302
 
294
303
 
304
+ # ---------------------------------------------------------------------------
305
+ # strip_tool_call_xml (v2.2)
306
+ # ---------------------------------------------------------------------------
307
+
308
+
309
+ _TOOL_CALL_OPEN = "<tool_call>"
310
+ _TOOL_CALL_CLOSE = "</tool_call>"
311
+
312
+
313
+ class StripToolCallXmlFilter:
314
+ """Remove ``<tool_call>...</tool_call>`` XML blocks from assistant content.
315
+
316
+ Qwen / Hermes / Llama tool-call formats sometimes emit tool calls
317
+ as ``<tool_call>{"name": "Bash", ...}</tool_call>`` XML in the
318
+ content stream. When ``tool_repair`` has already extracted the
319
+ structured JSON from these blocks, the XML wrapper tags are
320
+ leftover debris that confuse downstream clients.
321
+
322
+ Architecture note: this filter should run AFTER ``tool_repair``
323
+ has had a chance to extract the JSON. The filter chain is applied
324
+ at the adapter boundary (post-repair), so ordering is naturally
325
+ correct.
326
+
327
+ Implementation mirrors ``StripThinkingFilter`` — the same
328
+ stateful open/close tag scanning, same chunk-boundary safety.
329
+ """
330
+
331
+ name = "strip_tool_call_xml"
332
+
333
+ def __init__(self) -> None:
334
+ """Initialize the per-request buffer + in-block state to empty."""
335
+ self.modified: bool = False
336
+ self._in_block: bool = False
337
+ self._buffer: str = ""
338
+
339
+ def feed(self, text: str, *, eof: bool = False) -> str:
340
+ """Consume ``text`` and return the portion safe to emit now.
341
+
342
+ Mirrors the ``StripThinkingFilter`` algorithm: greedy tag
343
+ matching with partial-prefix holdback across chunk boundaries.
344
+ """
345
+ self._buffer += text
346
+ out_parts: list[str] = []
347
+
348
+ while True:
349
+ if not self._in_block:
350
+ idx = self._buffer.find(_TOOL_CALL_OPEN)
351
+ if idx != -1:
352
+ out_parts.append(self._buffer[:idx])
353
+ self._buffer = self._buffer[idx + len(_TOOL_CALL_OPEN) :]
354
+ self._in_block = True
355
+ self.modified = True
356
+ continue
357
+ # No open tag — emit all but a potential partial prefix.
358
+ overlap = _max_suffix_overlap(self._buffer, _TOOL_CALL_OPEN)
359
+ if overlap:
360
+ out_parts.append(self._buffer[:-overlap])
361
+ self._buffer = self._buffer[-overlap:]
362
+ else:
363
+ out_parts.append(self._buffer)
364
+ self._buffer = ""
365
+ break
366
+ # in_block: suppress until we find the close tag.
367
+ idx = self._buffer.find(_TOOL_CALL_CLOSE)
368
+ if idx != -1:
369
+ self._buffer = self._buffer[idx + len(_TOOL_CALL_CLOSE) :]
370
+ self._in_block = False
371
+ continue
372
+ # No close tag — retain potential partial suffix, drop the rest.
373
+ overlap = _max_suffix_overlap(self._buffer, _TOOL_CALL_CLOSE)
374
+ self._buffer = self._buffer[-overlap:] if overlap else ""
375
+ break
376
+
377
+ if eof:
378
+ if not self._in_block:
379
+ out_parts.append(self._buffer)
380
+ # If still in block at eof, silently drop the partial block.
381
+ self._buffer = ""
382
+ return "".join(out_parts)
383
+
384
+
295
385
  # ---------------------------------------------------------------------------
296
386
  # Registry + chain
297
387
  # ---------------------------------------------------------------------------
@@ -300,6 +390,7 @@ class StripStopMarkersFilter:
300
390
  KNOWN_FILTERS: dict[str, type[OutputFilter]] = {
301
391
  StripThinkingFilter.name: StripThinkingFilter,
302
392
  StripStopMarkersFilter.name: StripStopMarkersFilter,
393
+ StripToolCallXmlFilter.name: StripToolCallXmlFilter,
303
394
  }
304
395
  """Registry of string-name → filter class.
305
396
 
@@ -187,5 +187,40 @@ class BudgetTracker:
187
187
  self._totals.clear()
188
188
  self._month = current
189
189
 
190
+ # ------------------------------------------------------------------
191
+ # v2.0-K: Persistence
192
+ # ------------------------------------------------------------------
193
+
194
+ def save_state(self) -> dict[str, object]:
195
+ """Export the current state as a JSON-safe dict.
196
+
197
+ Called by the engine to persist budget totals across restarts.
198
+ """
199
+ with self._lock:
200
+ return {
201
+ "month": self._month,
202
+ "totals": dict(self._totals),
203
+ }
204
+
205
+ def load_state(self, state: dict[str, object]) -> None:
206
+ """Restore state from a previously saved dict.
207
+
208
+ Only restores if the saved month matches the current month
209
+ (no point restoring last month's totals into a new month).
210
+ """
211
+ if not isinstance(state, dict):
212
+ return
213
+ saved_month = state.get("month", "")
214
+ with self._lock:
215
+ current = _utc_month_key()
216
+ if saved_month != current:
217
+ return # stale month — skip
218
+ totals = state.get("totals", {})
219
+ if isinstance(totals, dict):
220
+ self._totals = {
221
+ k: float(v) for k, v in totals.items() if isinstance(v, (int, float))
222
+ }
223
+ self._month = current
224
+
190
225
 
191
226
  __all__ = ["BudgetTracker"]
@@ -24,12 +24,16 @@ Dual entry points (v0.3.x-1):
24
24
 
25
25
  from __future__ import annotations
26
26
 
27
+ import asyncio
27
28
  import time
28
29
  from collections.abc import AsyncIterator
29
30
  from typing import TYPE_CHECKING, Any, Final
30
31
 
31
32
  if TYPE_CHECKING:
33
+ from coderouter.config.schemas import FallbackChain
32
34
  from coderouter.guards.drift_detection import DriftVerdict
35
+ from coderouter.guards.self_healing import SelfHealingOrchestrator
36
+ from coderouter.state.store import StateStore
33
37
 
34
38
  from coderouter.adapters.anthropic_native import AnthropicAdapter
35
39
  from coderouter.adapters.base import (
@@ -51,7 +55,9 @@ from coderouter.guards.memory_pressure import (
51
55
  )
52
56
  from coderouter.guards.tool_loop import (
53
57
  DEFAULT_LOOP_INJECT_HINT,
58
+ ToolCountExceededError,
54
59
  ToolLoopBreakError,
60
+ check_total_tool_count,
55
61
  detect_tool_loop,
56
62
  inject_loop_break_hint,
57
63
  )
@@ -130,7 +136,8 @@ def _apply_tool_loop_guard(
130
136
 
131
137
  Returns the (possibly mutated) request. Raises
132
138
  :class:`ToolLoopBreakError` when the configured action is ``break``
133
- and a loop was detected.
139
+ and a loop was detected. Also raises :class:`ToolCountExceededError`
140
+ when the total tool-call count exceeds ``max_tool_calls`` (v2.2).
134
141
 
135
142
  Profile resolution: uses ``request.profile`` (the X-CodeRouter-Mode
136
143
  header / explicit body field) and falls back to
@@ -149,6 +156,30 @@ def _apply_tool_loop_guard(
149
156
  # resolution path produces its own diagnostic.
150
157
  return request
151
158
 
159
+ # v2.2: total tool-call count hard cap — runs before streak
160
+ # detection because it's a cheaper O(n) scan that catches a
161
+ # broader class of runaway behavior.
162
+ if profile.max_tool_calls > 0:
163
+ exceeded = check_total_tool_count(
164
+ request,
165
+ max_calls=profile.max_tool_calls,
166
+ )
167
+ if exceeded is not None:
168
+ logger.warning(
169
+ "tool-count-exceeded",
170
+ extra={
171
+ "profile": profile.name,
172
+ "total_count": exceeded.total_count,
173
+ "max_allowed": exceeded.max_allowed,
174
+ "action": profile.tool_loop_action,
175
+ },
176
+ )
177
+ if profile.tool_loop_action == "break":
178
+ raise ToolCountExceededError(exceeded, profile.name)
179
+ # For "warn" and "inject" actions, log only and continue.
180
+ # The inject action's hint is not meaningful for count
181
+ # exceeded (not a same-tool loop), so we just warn.
182
+
152
183
  detection = detect_tool_loop(
153
184
  request,
154
185
  window=profile.tool_loop_window,
@@ -818,6 +849,12 @@ class FallbackEngine:
818
849
  # Distinct from v1.9-C ``adaptive`` which handles the
819
850
  # gradient case via a rolling window.
820
851
  self._backend_health_monitor: BackendHealthMonitor = BackendHealthMonitor()
852
+ # v2.0-J: self-healing orchestrator. Manages provider exclusion,
853
+ # restart, and recovery probing when backend_health_action is
854
+ # "exclude". Composes with the L5 backend health monitor.
855
+ from coderouter.guards.self_healing import SelfHealingOrchestrator
856
+
857
+ self._self_healing: SelfHealingOrchestrator = SelfHealingOrchestrator()
821
858
  # v2.0-G (L4): per-process drift detection window manager.
822
859
  # Stores per-provider rolling observations; the detector is
823
860
  # invoked after each provider-ok / provider-failed event and
@@ -831,6 +868,12 @@ class FallbackEngine:
831
868
  self._drift_demoted: dict[str, float] = {}
832
869
  # Last drift verdict (set by _observe_drift_signal for ingress header).
833
870
  self._last_drift_verdict: DriftVerdict | None = None
871
+ # v2.0-J: active recovery probe tasks (one per excluded provider).
872
+ self._recovery_tasks: dict[str, asyncio.Task[None]] = {}
873
+ # v2.0-J: shutdown event shared with recovery probe tasks.
874
+ self._recovery_shutdown: asyncio.Event | None = None
875
+ # v2.0-K: persistent state store (None = in-memory only).
876
+ self._state_store: StateStore | None = None
834
877
 
835
878
  @property
836
879
  def last_drift_severity(self) -> str | None:
@@ -914,6 +957,20 @@ class FallbackEngine:
914
957
  def _backend_health(self) -> BackendHealthMonitor:
915
958
  return self.backend_health
916
959
 
960
+ @property
961
+ def self_healing(self) -> SelfHealingOrchestrator:
962
+ """Return the v2.0-J self-healing orchestrator.
963
+
964
+ Lazy init for backward compat with __new__-constructed test engines.
965
+ """
966
+ from coderouter.guards.self_healing import SelfHealingOrchestrator
967
+
968
+ existing = getattr(self, "_self_healing", None)
969
+ if existing is None:
970
+ self._self_healing = SelfHealingOrchestrator()
971
+ existing = self._self_healing
972
+ return existing
973
+
917
974
  def _observe_provider_failure(
918
975
  self,
919
976
  provider: str,
@@ -991,6 +1048,18 @@ class FallbackEngine:
991
1048
  new_state=transition.new_state,
992
1049
  consecutive_failures=transition.consecutive_failures,
993
1050
  )
1051
+ # v2.0-J: trigger self-healing on UNHEALTHY + exclude.
1052
+ if (
1053
+ transition.new_state == "UNHEALTHY"
1054
+ and bh_action == "exclude"
1055
+ ):
1056
+ newly_excluded = self.self_healing.on_unhealthy(
1057
+ provider,
1058
+ profile=chosen,
1059
+ consecutive_failures=transition.consecutive_failures,
1060
+ )
1061
+ if newly_excluded:
1062
+ self._spawn_recovery_probe(provider, chain=chain)
994
1063
 
995
1064
  def _observe_provider_success(
996
1065
  self,
@@ -1032,6 +1101,134 @@ class FallbackEngine:
1032
1101
  consecutive_failures=transition.consecutive_failures,
1033
1102
  )
1034
1103
 
1104
+ def _spawn_recovery_probe(
1105
+ self,
1106
+ provider: str,
1107
+ *,
1108
+ chain: FallbackChain,
1109
+ ) -> None:
1110
+ """Launch an async recovery probe task for an excluded provider.
1111
+
1112
+ v2.0-J: called by ``_observe_provider_failure`` when a provider
1113
+ is newly excluded. The task runs ``recovery_probe_loop`` with
1114
+ exponential backoff until the provider recovers or shutdown.
1115
+
1116
+ Safe to call from a sync context — uses ``asyncio.get_event_loop``
1117
+ to schedule the task. No-op if no running event loop (e.g. in
1118
+ pure-sync tests).
1119
+ """
1120
+ import asyncio
1121
+
1122
+ from coderouter.guards.self_healing import recovery_probe_loop
1123
+
1124
+ # Find the ProviderConfig for this provider name.
1125
+ provider_config = None
1126
+ for p in self.config.providers:
1127
+ if p.name == provider:
1128
+ provider_config = p
1129
+ break
1130
+ if provider_config is None:
1131
+ return
1132
+
1133
+ # Reuse or create a shared shutdown event.
1134
+ if self._recovery_shutdown is None:
1135
+ self._recovery_shutdown = asyncio.Event()
1136
+
1137
+ # Don't spawn duplicate tasks.
1138
+ existing = self._recovery_tasks.get(provider)
1139
+ if existing is not None and not existing.done():
1140
+ return
1141
+
1142
+ try:
1143
+ loop = asyncio.get_running_loop()
1144
+ except RuntimeError:
1145
+ return # no event loop — skip (sync test context)
1146
+
1147
+ task = loop.create_task(
1148
+ recovery_probe_loop(
1149
+ provider_config,
1150
+ orchestrator=self.self_healing,
1151
+ record_fn=self.backend_health.record_attempt,
1152
+ health_threshold=chain.backend_health_threshold,
1153
+ initial_interval_s=chain.recovery_probe_initial_s,
1154
+ max_interval_s=chain.recovery_probe_max_s,
1155
+ restart_timeout_s=chain.restart_timeout_s,
1156
+ probe_timeout_s=10.0,
1157
+ shutdown_event=self._recovery_shutdown,
1158
+ profile=chain.name,
1159
+ ),
1160
+ name=f"recovery-probe-{provider}",
1161
+ )
1162
+ self._recovery_tasks[provider] = task
1163
+
1164
+ async def shutdown_recovery_probes(self) -> None:
1165
+ """Signal all recovery probe tasks to stop and await them.
1166
+
1167
+ Called from the app lifespan shutdown path.
1168
+ """
1169
+ import contextlib
1170
+
1171
+ if self._recovery_shutdown is not None:
1172
+ self._recovery_shutdown.set()
1173
+ for task in self._recovery_tasks.values():
1174
+ if not task.done():
1175
+ with contextlib.suppress(Exception):
1176
+ await task
1177
+ self._recovery_tasks.clear()
1178
+
1179
+ # ------------------------------------------------------------------
1180
+ # v2.0-K: State persistence
1181
+ # ------------------------------------------------------------------
1182
+
1183
+ def attach_state_store(self, store: StateStore) -> None:
1184
+ """Attach a :class:`StateStore` and load persisted state.
1185
+
1186
+ Called from the app lifespan startup path when ``state_dir``
1187
+ is configured. Loads budget, health, self-healing, and
1188
+ metrics state from the store.
1189
+ """
1190
+ self._state_store = store
1191
+ self._load_all_state()
1192
+
1193
+ def save_all_state(self) -> None:
1194
+ """Persist all subsystem state to the attached store.
1195
+
1196
+ Called from the app lifespan shutdown path and optionally
1197
+ on a periodic timer. No-op if no store is attached.
1198
+ """
1199
+ store = self._state_store
1200
+ if store is None:
1201
+ return
1202
+ import contextlib
1203
+
1204
+ with contextlib.suppress(Exception):
1205
+ store.put("budget", "state", self._budget.save_state())
1206
+ with contextlib.suppress(Exception):
1207
+ store.put("health", "state", self.backend_health.save_state())
1208
+ with contextlib.suppress(Exception):
1209
+ store.put("self_healing", "state", self.self_healing.save_state())
1210
+ # MetricsCollector state is saved separately via the singleton.
1211
+
1212
+ def _load_all_state(self) -> None:
1213
+ """Restore subsystem state from the attached store."""
1214
+ store = self._state_store
1215
+ if store is None:
1216
+ return
1217
+ import contextlib
1218
+
1219
+ with contextlib.suppress(Exception):
1220
+ budget_state = store.get("budget", "state")
1221
+ if budget_state is not None:
1222
+ self._budget.load_state(budget_state) # type: ignore[arg-type]
1223
+ with contextlib.suppress(Exception):
1224
+ health_state = store.get("health", "state")
1225
+ if health_state is not None:
1226
+ self.backend_health.load_state(health_state) # type: ignore[arg-type]
1227
+ with contextlib.suppress(Exception):
1228
+ sh_state = store.get("self_healing", "state")
1229
+ if sh_state is not None:
1230
+ self.self_healing.load_state(sh_state) # type: ignore[arg-type]
1231
+
1035
1232
  def _observe_drift_signal(
1036
1233
  self,
1037
1234
  provider: str,
@@ -1340,6 +1537,19 @@ class FallbackEngine:
1340
1537
  profile=chosen,
1341
1538
  )
1342
1539
  adapters = healthy + unhealthy
1540
+
1541
+ # Pass 4b: v2.0-J self-healing exclusion. When the action is
1542
+ # "exclude", providers in the orchestrator's excluded set are
1543
+ # removed entirely from the chain. Unlike "demote" (which
1544
+ # moves to the back), excluded providers are not attempted at
1545
+ # all — recovery probes run in the background to detect when
1546
+ # they come back. If all providers are excluded, fall through
1547
+ # to the existing NoProvidersAvailableError path.
1548
+ if chain.backend_health_action == "exclude":
1549
+ excluded = self.self_healing.excluded_providers()
1550
+ if excluded:
1551
+ adapters = [a for a in adapters if a.name not in excluded]
1552
+
1343
1553
  return adapters
1344
1554
 
1345
1555
  def _resolve_anthropic_chain(self, request: AnthropicRequest) -> list[tuple[BaseAdapter, bool]]:
@@ -0,0 +1,15 @@
1
+ """Persistent state layer (v2.0-K).
2
+
3
+ Four modules:
4
+
5
+ * :mod:`coderouter.state.store` — sqlite3 KV store for operational
6
+ metadata (budget totals, health
7
+ state, self-healing exclusions).
8
+ * :mod:`coderouter.state.audit_log` — JSONL structured event log with
9
+ rotation and CLI reader.
10
+ * :mod:`coderouter.state.request_log` — JSONL request metadata journal
11
+ (per-request token counts, cost,
12
+ provider — no request body).
13
+ * :mod:`coderouter.state.replay` — Statistical A/B analysis engine
14
+ over request journal entries.
15
+ """