coderouter-cli 2.0.0__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,9 +24,16 @@ Dual entry points (v0.3.x-1):
24
24
 
25
25
  from __future__ import annotations
26
26
 
27
+ import asyncio
27
28
  import time
28
29
  from collections.abc import AsyncIterator
29
- from typing import Final
30
+ from typing import TYPE_CHECKING, Any, Final
31
+
32
+ if TYPE_CHECKING:
33
+ from coderouter.config.schemas import FallbackChain
34
+ from coderouter.guards.drift_detection import DriftVerdict
35
+ from coderouter.guards.self_healing import SelfHealingOrchestrator
36
+ from coderouter.state.store import StateStore
30
37
 
31
38
  from coderouter.adapters.anthropic_native import AnthropicAdapter
32
39
  from coderouter.adapters.base import (
@@ -48,7 +55,9 @@ from coderouter.guards.memory_pressure import (
48
55
  )
49
56
  from coderouter.guards.tool_loop import (
50
57
  DEFAULT_LOOP_INJECT_HINT,
58
+ ToolCountExceededError,
51
59
  ToolLoopBreakError,
60
+ check_total_tool_count,
52
61
  detect_tool_loop,
53
62
  inject_loop_break_hint,
54
63
  )
@@ -127,7 +136,8 @@ def _apply_tool_loop_guard(
127
136
 
128
137
  Returns the (possibly mutated) request. Raises
129
138
  :class:`ToolLoopBreakError` when the configured action is ``break``
130
- and a loop was detected.
139
+ and a loop was detected. Also raises :class:`ToolCountExceededError`
140
+ when the total tool-call count exceeds ``max_tool_calls`` (v2.2).
131
141
 
132
142
  Profile resolution: uses ``request.profile`` (the X-CodeRouter-Mode
133
143
  header / explicit body field) and falls back to
@@ -146,6 +156,30 @@ def _apply_tool_loop_guard(
146
156
  # resolution path produces its own diagnostic.
147
157
  return request
148
158
 
159
+ # v2.2: total tool-call count hard cap — runs before streak
160
+ # detection because it's a cheaper O(n) scan that catches a
161
+ # broader class of runaway behavior.
162
+ if profile.max_tool_calls > 0:
163
+ exceeded = check_total_tool_count(
164
+ request,
165
+ max_calls=profile.max_tool_calls,
166
+ )
167
+ if exceeded is not None:
168
+ logger.warning(
169
+ "tool-count-exceeded",
170
+ extra={
171
+ "profile": profile.name,
172
+ "total_count": exceeded.total_count,
173
+ "max_allowed": exceeded.max_allowed,
174
+ "action": profile.tool_loop_action,
175
+ },
176
+ )
177
+ if profile.tool_loop_action == "break":
178
+ raise ToolCountExceededError(exceeded, profile.name)
179
+ # For "warn" and "inject" actions, log only and continue.
180
+ # The inject action's hint is not meaningful for count
181
+ # exceeded (not a same-tool loop), so we just warn.
182
+
149
183
  detection = detect_tool_loop(
150
184
  request,
151
185
  window=profile.tool_loop_window,
@@ -469,11 +503,16 @@ class _StreamUsageAccumulator:
469
503
  """
470
504
 
471
505
  __slots__ = (
506
+ "_current_block_text",
507
+ "_current_block_type",
472
508
  "_observed",
509
+ "_text_blocks",
473
510
  "cache_creation_input_tokens",
474
511
  "cache_read_input_tokens",
512
+ "has_tool_use",
475
513
  "input_tokens",
476
514
  "output_tokens",
515
+ "stop_reason",
477
516
  )
478
517
 
479
518
  def __init__(self) -> None:
@@ -482,6 +521,32 @@ class _StreamUsageAccumulator:
482
521
  self.cache_read_input_tokens = 0
483
522
  self.cache_creation_input_tokens = 0
484
523
  self._observed = False
524
+ # v2.0-G: tracked for drift detection observation at stream end.
525
+ self.has_tool_use: bool = False
526
+ self.stop_reason: str | None = None
527
+ # v2.0-H: partial content accumulation for mid-stream recovery.
528
+ # Completed text blocks are moved to _text_blocks on content_block_stop.
529
+ # In-progress text is in _current_block_text (list of str fragments).
530
+ self._text_blocks: list[str] = []
531
+ self._current_block_type: str | None = None
532
+ self._current_block_text: list[str] = []
533
+
534
+ @property
535
+ def partial_content(self) -> list[dict[str, Any]]:
536
+ """Return accumulated text content as Anthropic content blocks.
537
+
538
+ Includes both completed blocks and any in-progress text block
539
+ (useful when the stream is interrupted mid-block). Tool_use blocks
540
+ are excluded because partial JSON is unusable.
541
+ """
542
+ blocks: list[dict[str, Any]] = []
543
+ for text in self._text_blocks:
544
+ if text:
545
+ blocks.append({"type": "text", "text": text})
546
+ # Include in-progress text block if any
547
+ if self._current_block_type == "text" and self._current_block_text:
548
+ blocks.append({"type": "text", "text": "".join(self._current_block_text)})
549
+ return blocks
485
550
 
486
551
  def observe(self, event: AnthropicStreamEvent) -> None:
487
552
  """Update counters from one stream event (no-op for non-usage events)."""
@@ -494,6 +559,33 @@ class _StreamUsageAccumulator:
494
559
  usage = event.data.get("usage") if isinstance(event.data, dict) else None
495
560
  if isinstance(usage, dict):
496
561
  self._merge(usage)
562
+ # v2.0-G: capture stop_reason from the terminal message_delta.
563
+ delta = event.data.get("delta") if isinstance(event.data, dict) else None
564
+ if isinstance(delta, dict) and "stop_reason" in delta:
565
+ self.stop_reason = delta["stop_reason"]
566
+ elif event.type == "content_block_start":
567
+ # v2.0-G: detect tool_use content blocks for drift observation.
568
+ cb = event.data.get("content_block") if isinstance(event.data, dict) else None
569
+ if isinstance(cb, dict):
570
+ block_type = cb.get("type", "")
571
+ if block_type == "tool_use":
572
+ self.has_tool_use = True
573
+ # v2.0-H: start tracking a new content block.
574
+ self._current_block_type = block_type
575
+ self._current_block_text = []
576
+ elif event.type == "content_block_delta":
577
+ # v2.0-H: accumulate text_delta fragments.
578
+ delta = event.data.get("delta") if isinstance(event.data, dict) else None
579
+ if isinstance(delta, dict) and delta.get("type") == "text_delta":
580
+ text = delta.get("text", "")
581
+ if text:
582
+ self._current_block_text.append(text)
583
+ elif event.type == "content_block_stop":
584
+ # v2.0-H: finalize current block.
585
+ if self._current_block_type == "text" and self._current_block_text:
586
+ self._text_blocks.append("".join(self._current_block_text))
587
+ self._current_block_type = None
588
+ self._current_block_text = []
497
589
 
498
590
  def _merge(self, usage: dict[str, object]) -> None:
499
591
  any_nonzero = False
@@ -613,9 +705,18 @@ class MidStreamError(CodeRouterError):
613
705
  one chunk to the client. Fallback is not attempted (the client has
614
706
  received partial content, so switching providers would corrupt the
615
707
  stream). Callers should surface this as a terminal error event.
708
+
709
+ v2.0-H: carries ``partial_content`` — the accumulated text blocks
710
+ generated before the failure. The ingress uses this to synthesize
711
+ a graceful stream termination when ``partial_stitch_action: surface``.
616
712
  """
617
713
 
618
- def __init__(self, provider: str, original: AdapterError) -> None:
714
+ def __init__(
715
+ self,
716
+ provider: str,
717
+ original: AdapterError,
718
+ partial_content: list[dict[str, Any]] | None = None,
719
+ ) -> None:
619
720
  """Wrap the underlying :class:`AdapterError` with the provider name.
620
721
 
621
722
  The ingress layer catches this and converts it into an in-stream
@@ -624,6 +725,7 @@ class MidStreamError(CodeRouterError):
624
725
  """
625
726
  self.provider = provider
626
727
  self.original = original
728
+ self.partial_content: list[dict[str, Any]] = partial_content or []
627
729
  super().__init__(f"provider {provider!r} failed mid-stream: {original}")
628
730
 
629
731
 
@@ -747,6 +849,44 @@ class FallbackEngine:
747
849
  # Distinct from v1.9-C ``adaptive`` which handles the
748
850
  # gradient case via a rolling window.
749
851
  self._backend_health_monitor: BackendHealthMonitor = BackendHealthMonitor()
852
+ # v2.0-J: self-healing orchestrator. Manages provider exclusion,
853
+ # restart, and recovery probing when backend_health_action is
854
+ # "exclude". Composes with the L5 backend health monitor.
855
+ from coderouter.guards.self_healing import SelfHealingOrchestrator
856
+
857
+ self._self_healing: SelfHealingOrchestrator = SelfHealingOrchestrator()
858
+ # v2.0-G (L4): per-process drift detection window manager.
859
+ # Stores per-provider rolling observations; the detector is
860
+ # invoked after each provider-ok / provider-failed event and
861
+ # returns a verdict. Action dispatch (promote/reload) reuses
862
+ # the adaptive rank machinery.
863
+ from coderouter.guards.drift_detection import DriftWindow
864
+
865
+ self._drift_window: DriftWindow = DriftWindow()
866
+ # Track which providers are currently in drift-demoted state
867
+ # and when their cooldown expires (monotonic timestamp).
868
+ self._drift_demoted: dict[str, float] = {}
869
+ # Last drift verdict (set by _observe_drift_signal for ingress header).
870
+ self._last_drift_verdict: DriftVerdict | None = None
871
+ # v2.0-J: active recovery probe tasks (one per excluded provider).
872
+ self._recovery_tasks: dict[str, asyncio.Task[None]] = {}
873
+ # v2.0-J: shutdown event shared with recovery probe tasks.
874
+ self._recovery_shutdown: asyncio.Event | None = None
875
+ # v2.0-K: persistent state store (None = in-memory only).
876
+ self._state_store: StateStore | None = None
877
+
878
+ @property
879
+ def last_drift_severity(self) -> str | None:
880
+ """Return the severity string of the most recent drift verdict, or None.
881
+
882
+ The ingress reads this after generate_anthropic / stream_anthropic to
883
+ set the ``X-CodeRouter-Drift`` response header. Returns ``"mild"`` or
884
+ ``"severe"`` when drift was detected, ``None`` otherwise.
885
+ """
886
+ v = self._last_drift_verdict
887
+ if v is None or not v.drifted:
888
+ return None
889
+ return v.severity
750
890
 
751
891
  @property
752
892
  def _adaptive(self) -> AdaptiveAdjuster:
@@ -794,12 +934,17 @@ class FallbackEngine:
794
934
  return existing
795
935
 
796
936
  @property
797
- def _backend_health(self) -> BackendHealthMonitor:
937
+ def backend_health(self) -> BackendHealthMonitor:
798
938
  """Return the L5 backend-health monitor, lazily building one if absent.
799
939
 
800
940
  Same legacy-test compatibility pattern as the other guard
801
941
  properties — ``__new__``-constructed engines get a fresh
802
942
  empty monitor so ``state_for`` is always answerable.
943
+
944
+ v2.0-I: promoted from ``_backend_health`` to public ``backend_health``
945
+ so the continuous probe background task can feed results into the
946
+ same state machine. Internal callers continue to work (property
947
+ access is transparent).
803
948
  """
804
949
  existing = getattr(self, "_backend_health_monitor", None)
805
950
  if existing is None:
@@ -807,6 +952,25 @@ class FallbackEngine:
807
952
  existing = self._backend_health_monitor
808
953
  return existing
809
954
 
955
+ # Alias for backward compat with internal callers.
956
+ @property
957
+ def _backend_health(self) -> BackendHealthMonitor:
958
+ return self.backend_health
959
+
960
+ @property
961
+ def self_healing(self) -> SelfHealingOrchestrator:
962
+ """Return the v2.0-J self-healing orchestrator.
963
+
964
+ Lazy init for backward compat with __new__-constructed test engines.
965
+ """
966
+ from coderouter.guards.self_healing import SelfHealingOrchestrator
967
+
968
+ existing = getattr(self, "_self_healing", None)
969
+ if existing is None:
970
+ self._self_healing = SelfHealingOrchestrator()
971
+ existing = self._self_healing
972
+ return existing
973
+
810
974
  def _observe_provider_failure(
811
975
  self,
812
976
  provider: str,
@@ -884,6 +1048,18 @@ class FallbackEngine:
884
1048
  new_state=transition.new_state,
885
1049
  consecutive_failures=transition.consecutive_failures,
886
1050
  )
1051
+ # v2.0-J: trigger self-healing on UNHEALTHY + exclude.
1052
+ if (
1053
+ transition.new_state == "UNHEALTHY"
1054
+ and bh_action == "exclude"
1055
+ ):
1056
+ newly_excluded = self.self_healing.on_unhealthy(
1057
+ provider,
1058
+ profile=chosen,
1059
+ consecutive_failures=transition.consecutive_failures,
1060
+ )
1061
+ if newly_excluded:
1062
+ self._spawn_recovery_probe(provider, chain=chain)
887
1063
 
888
1064
  def _observe_provider_success(
889
1065
  self,
@@ -925,6 +1101,260 @@ class FallbackEngine:
925
1101
  consecutive_failures=transition.consecutive_failures,
926
1102
  )
927
1103
 
1104
+ def _spawn_recovery_probe(
1105
+ self,
1106
+ provider: str,
1107
+ *,
1108
+ chain: FallbackChain,
1109
+ ) -> None:
1110
+ """Launch an async recovery probe task for an excluded provider.
1111
+
1112
+ v2.0-J: called by ``_observe_provider_failure`` when a provider
1113
+ is newly excluded. The task runs ``recovery_probe_loop`` with
1114
+ exponential backoff until the provider recovers or shutdown.
1115
+
1116
+ Safe to call from a sync context — uses ``asyncio.get_event_loop``
1117
+ to schedule the task. No-op if no running event loop (e.g. in
1118
+ pure-sync tests).
1119
+ """
1120
+ import asyncio
1121
+
1122
+ from coderouter.guards.self_healing import recovery_probe_loop
1123
+
1124
+ # Find the ProviderConfig for this provider name.
1125
+ provider_config = None
1126
+ for p in self.config.providers:
1127
+ if p.name == provider:
1128
+ provider_config = p
1129
+ break
1130
+ if provider_config is None:
1131
+ return
1132
+
1133
+ # Reuse or create a shared shutdown event.
1134
+ if self._recovery_shutdown is None:
1135
+ self._recovery_shutdown = asyncio.Event()
1136
+
1137
+ # Don't spawn duplicate tasks.
1138
+ existing = self._recovery_tasks.get(provider)
1139
+ if existing is not None and not existing.done():
1140
+ return
1141
+
1142
+ try:
1143
+ loop = asyncio.get_running_loop()
1144
+ except RuntimeError:
1145
+ return # no event loop — skip (sync test context)
1146
+
1147
+ task = loop.create_task(
1148
+ recovery_probe_loop(
1149
+ provider_config,
1150
+ orchestrator=self.self_healing,
1151
+ record_fn=self.backend_health.record_attempt,
1152
+ health_threshold=chain.backend_health_threshold,
1153
+ initial_interval_s=chain.recovery_probe_initial_s,
1154
+ max_interval_s=chain.recovery_probe_max_s,
1155
+ restart_timeout_s=chain.restart_timeout_s,
1156
+ probe_timeout_s=10.0,
1157
+ shutdown_event=self._recovery_shutdown,
1158
+ profile=chain.name,
1159
+ ),
1160
+ name=f"recovery-probe-{provider}",
1161
+ )
1162
+ self._recovery_tasks[provider] = task
1163
+
1164
+ async def shutdown_recovery_probes(self) -> None:
1165
+ """Signal all recovery probe tasks to stop and await them.
1166
+
1167
+ Called from the app lifespan shutdown path.
1168
+ """
1169
+ import contextlib
1170
+
1171
+ if self._recovery_shutdown is not None:
1172
+ self._recovery_shutdown.set()
1173
+ for task in self._recovery_tasks.values():
1174
+ if not task.done():
1175
+ with contextlib.suppress(Exception):
1176
+ await task
1177
+ self._recovery_tasks.clear()
1178
+
1179
+ # ------------------------------------------------------------------
1180
+ # v2.0-K: State persistence
1181
+ # ------------------------------------------------------------------
1182
+
1183
+ def attach_state_store(self, store: StateStore) -> None:
1184
+ """Attach a :class:`StateStore` and load persisted state.
1185
+
1186
+ Called from the app lifespan startup path when ``state_dir``
1187
+ is configured. Loads budget, health, self-healing, and
1188
+ metrics state from the store.
1189
+ """
1190
+ self._state_store = store
1191
+ self._load_all_state()
1192
+
1193
+ def save_all_state(self) -> None:
1194
+ """Persist all subsystem state to the attached store.
1195
+
1196
+ Called from the app lifespan shutdown path and optionally
1197
+ on a periodic timer. No-op if no store is attached.
1198
+ """
1199
+ store = self._state_store
1200
+ if store is None:
1201
+ return
1202
+ import contextlib
1203
+
1204
+ with contextlib.suppress(Exception):
1205
+ store.put("budget", "state", self._budget.save_state())
1206
+ with contextlib.suppress(Exception):
1207
+ store.put("health", "state", self.backend_health.save_state())
1208
+ with contextlib.suppress(Exception):
1209
+ store.put("self_healing", "state", self.self_healing.save_state())
1210
+ # MetricsCollector state is saved separately via the singleton.
1211
+
1212
+ def _load_all_state(self) -> None:
1213
+ """Restore subsystem state from the attached store."""
1214
+ store = self._state_store
1215
+ if store is None:
1216
+ return
1217
+ import contextlib
1218
+
1219
+ with contextlib.suppress(Exception):
1220
+ budget_state = store.get("budget", "state")
1221
+ if budget_state is not None:
1222
+ self._budget.load_state(budget_state) # type: ignore[arg-type]
1223
+ with contextlib.suppress(Exception):
1224
+ health_state = store.get("health", "state")
1225
+ if health_state is not None:
1226
+ self.backend_health.load_state(health_state) # type: ignore[arg-type]
1227
+ with contextlib.suppress(Exception):
1228
+ sh_state = store.get("self_healing", "state")
1229
+ if sh_state is not None:
1230
+ self.self_healing.load_state(sh_state) # type: ignore[arg-type]
1231
+
1232
+ def _observe_drift_signal(
1233
+ self,
1234
+ provider: str,
1235
+ *,
1236
+ profile: str | None,
1237
+ output_tokens: int = 0,
1238
+ has_tool_use: bool = False,
1239
+ request_had_tools: bool = False,
1240
+ stop_reason: str | None = None,
1241
+ is_error: bool = False,
1242
+ stream: bool = False,
1243
+ ) -> DriftVerdict | None:
1244
+ """v2.0-G (L4): record an observation and check for drift.
1245
+
1246
+ Called after every provider-ok / provider-failed event on the
1247
+ Anthropic-shaped paths. Returns a :class:`DriftVerdict` when
1248
+ drift is detected (drifted=True), None otherwise.
1249
+
1250
+ Side effects on detection:
1251
+ - Emits ``drift-detected`` log.
1252
+ - If action is ``promote`` or ``reload``, demotes the provider
1253
+ via the adaptive rank machinery.
1254
+ """
1255
+ from coderouter.guards.drift_detection import (
1256
+ SENSITIVITY_PRESETS,
1257
+ ResponseObservation,
1258
+ detect_drift,
1259
+ )
1260
+ from coderouter.logging import log_drift_detected, log_drift_promoted
1261
+
1262
+ chosen = profile or self.config.default_profile
1263
+ try:
1264
+ chain_cfg = self.config.profile_by_name(chosen)
1265
+ except (KeyError, ValueError):
1266
+ return None
1267
+ if chain_cfg.drift_detection_action == "off":
1268
+ return None
1269
+
1270
+ # Update window size if config differs from default
1271
+ self._drift_window.max_size = chain_cfg.drift_detection_window_size
1272
+
1273
+ # Record observation
1274
+ obs = ResponseObservation(
1275
+ provider=provider,
1276
+ output_tokens=output_tokens,
1277
+ has_tool_use=has_tool_use,
1278
+ request_had_tools=request_had_tools,
1279
+ stop_reason=stop_reason,
1280
+ is_error=is_error,
1281
+ stream=stream,
1282
+ )
1283
+ self._drift_window.record(obs)
1284
+
1285
+ # Check for cooldown recovery
1286
+ import time as _time
1287
+
1288
+ demote_expires = self._drift_demoted.get(provider)
1289
+ if demote_expires is not None and _time.monotonic() >= demote_expires:
1290
+ # Cooldown expired — restore rank and clear drift state
1291
+ from coderouter.logging import log_drift_recovered
1292
+
1293
+ elapsed = chain_cfg.drift_detection_cooldown_s
1294
+ log_drift_recovered(logger, provider=provider, profile=chosen, after_s=elapsed)
1295
+ self._drift_demoted.pop(provider, None)
1296
+ self._drift_window.clear(provider)
1297
+ return None
1298
+
1299
+ # Don't re-detect while in cooldown
1300
+ if provider in self._drift_demoted:
1301
+ return None
1302
+
1303
+ # Run detection
1304
+ window = self._drift_window.get_window(provider)
1305
+ thresholds = SENSITIVITY_PRESETS.get(
1306
+ chain_cfg.drift_detection_sensitivity, SENSITIVITY_PRESETS["normal"]
1307
+ )
1308
+ verdict = detect_drift(window, thresholds)
1309
+
1310
+ if not verdict.drifted:
1311
+ self._last_drift_verdict = None
1312
+ return None
1313
+
1314
+ # Store for ingress response header.
1315
+ self._last_drift_verdict = verdict
1316
+
1317
+ # Emit log
1318
+ log_drift_detected(
1319
+ logger,
1320
+ provider=provider,
1321
+ profile=chosen,
1322
+ severity=verdict.severity,
1323
+ reason=verdict.reason,
1324
+ action=chain_cfg.drift_detection_action,
1325
+ signals=verdict.signals,
1326
+ )
1327
+
1328
+ # Action: promote / reload
1329
+ if chain_cfg.drift_detection_action in ("promote", "reload"):
1330
+ import time as _time_mod
1331
+
1332
+ # Demote via adaptive rank
1333
+ self._adaptive.demote(provider, steps=2)
1334
+ log_drift_promoted(
1335
+ logger,
1336
+ provider=provider,
1337
+ profile=chosen,
1338
+ demoted_to_rank=2,
1339
+ cooldown_s=chain_cfg.drift_detection_cooldown_s,
1340
+ )
1341
+ # Record cooldown expiry
1342
+ self._drift_demoted[provider] = (
1343
+ _time_mod.monotonic() + chain_cfg.drift_detection_cooldown_s
1344
+ )
1345
+
1346
+ # v2.0-G: reload action — attempt Ollama KV cache flush
1347
+ # (best-effort, fire-and-forget background task).
1348
+ if chain_cfg.drift_detection_action == "reload":
1349
+ import asyncio
1350
+
1351
+ from coderouter.guards.drift_actions import attempt_reload
1352
+
1353
+ provider_config = self._adapters[provider].config
1354
+ self._reload_task = asyncio.create_task(attempt_reload(provider_config))
1355
+
1356
+ return verdict
1357
+
928
1358
  def _resolve_profile_overrides(self, profile_name: str | None) -> ProviderCallOverrides:
929
1359
  """v0.6-B: build the ProviderCallOverrides for the active profile.
930
1360
 
@@ -1107,6 +1537,19 @@ class FallbackEngine:
1107
1537
  profile=chosen,
1108
1538
  )
1109
1539
  adapters = healthy + unhealthy
1540
+
1541
+ # Pass 4b: v2.0-J self-healing exclusion. When the action is
1542
+ # "exclude", providers in the orchestrator's excluded set are
1543
+ # removed entirely from the chain. Unlike "demote" (which
1544
+ # moves to the back), excluded providers are not attempted at
1545
+ # all — recovery probes run in the background to detect when
1546
+ # they come back. If all providers are excluded, fall through
1547
+ # to the existing NoProvidersAvailableError path.
1548
+ if chain.backend_health_action == "exclude":
1549
+ excluded = self.self_healing.excluded_providers()
1550
+ if excluded:
1551
+ adapters = [a for a in adapters if a.name not in excluded]
1552
+
1110
1553
  return adapters
1111
1554
 
1112
1555
  def _resolve_anthropic_chain(self, request: AnthropicRequest) -> list[tuple[BaseAdapter, bool]]:
@@ -1455,6 +1898,14 @@ class FallbackEngine:
1455
1898
  self._observe_provider_failure(
1456
1899
  adapter.name, exc, profile=request.profile
1457
1900
  )
1901
+ # v2.0-G (L4): drift detection observation (failure path).
1902
+ self._observe_drift_signal(
1903
+ adapter.name,
1904
+ profile=request.profile,
1905
+ is_error=True,
1906
+ request_had_tools=bool(request.tools),
1907
+ stream=False,
1908
+ )
1458
1909
  errors.append(exc)
1459
1910
  if not exc.retryable:
1460
1911
  break
@@ -1482,6 +1933,18 @@ class FallbackEngine:
1482
1933
  self._observe_provider_success(
1483
1934
  adapter.name, profile=request.profile
1484
1935
  )
1936
+ # v2.0-G (L4): drift detection observation (success path).
1937
+ self._observe_drift_signal(
1938
+ adapter.name,
1939
+ profile=request.profile,
1940
+ output_tokens=resp.usage.output_tokens if resp.usage else 0,
1941
+ has_tool_use=any(
1942
+ getattr(b, "type", None) == "tool_use" for b in (resp.content or [])
1943
+ ),
1944
+ request_had_tools=bool(request.tools),
1945
+ stop_reason=resp.stop_reason,
1946
+ stream=False,
1947
+ )
1485
1948
  # v1.9-A: pair every successful Anthropic response with a
1486
1949
  # cache-observed log line. Native Anthropic / LM Studio
1487
1950
  # /v1/messages report cache_read_input_tokens /
@@ -1620,6 +2083,14 @@ class FallbackEngine:
1620
2083
  self._observe_provider_failure(
1621
2084
  adapter.name, exc, profile=request.profile
1622
2085
  )
2086
+ # v2.0-G (L4): drift detection observation (stream failure).
2087
+ self._observe_drift_signal(
2088
+ adapter.name,
2089
+ profile=request.profile,
2090
+ is_error=True,
2091
+ request_had_tools=bool(request.tools),
2092
+ stream=True,
2093
+ )
1623
2094
  errors.append(exc)
1624
2095
  if not exc.retryable:
1625
2096
  break
@@ -1662,7 +2133,27 @@ class FallbackEngine:
1662
2133
  self._observe_provider_failure(
1663
2134
  adapter.name, exc, profile=request.profile
1664
2135
  )
1665
- raise MidStreamError(adapter.name, exc) from exc
2136
+ # v2.0-G (L4): drift detection observation (mid-stream failure).
2137
+ self._observe_drift_signal(
2138
+ adapter.name,
2139
+ profile=request.profile,
2140
+ is_error=True,
2141
+ request_had_tools=bool(request.tools),
2142
+ stream=True,
2143
+ )
2144
+ raise MidStreamError(
2145
+ adapter.name, exc, partial_content=acc.partial_content
2146
+ ) from exc
2147
+ # v2.0-G (L4): drift detection observation (stream success).
2148
+ self._observe_drift_signal(
2149
+ adapter.name,
2150
+ profile=request.profile,
2151
+ output_tokens=acc.output_tokens,
2152
+ has_tool_use=acc.has_tool_use,
2153
+ request_had_tools=bool(request.tools),
2154
+ stop_reason=acc.stop_reason,
2155
+ stream=True,
2156
+ )
1666
2157
  # v1.9-B2: pair the successful stream with a cache-observed
1667
2158
  # log line carrying the aggregated usage counters that the
1668
2159
  # ``_StreamUsageAccumulator`` collected from the
@@ -0,0 +1,15 @@
1
+ """Persistent state layer (v2.0-K).
2
+
3
+ Four modules:
4
+
5
+ * :mod:`coderouter.state.store` — sqlite3 KV store for operational
6
+ metadata (budget totals, health
7
+ state, self-healing exclusions).
8
+ * :mod:`coderouter.state.audit_log` — JSONL structured event log with
9
+ rotation and CLI reader.
10
+ * :mod:`coderouter.state.request_log` — JSONL request metadata journal
11
+ (per-request token counts, cost,
12
+ provider — no request body).
13
+ * :mod:`coderouter.state.replay` — Statistical A/B analysis engine
14
+ over request journal entries.
15
+ """