coderouter-cli 2.0.0__py3-none-any.whl → 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderouter/cli.py +219 -0
- coderouter/config/schemas.py +235 -2
- coderouter/guards/__init__.py +6 -4
- coderouter/guards/backend_health.py +34 -0
- coderouter/guards/continuous_probe.py +349 -0
- coderouter/guards/drift_actions.py +111 -0
- coderouter/guards/drift_detection.py +308 -0
- coderouter/guards/self_healing.py +413 -0
- coderouter/guards/tool_loop.py +71 -0
- coderouter/ingress/anthropic_routes.py +106 -12
- coderouter/ingress/app.py +129 -0
- coderouter/logging.py +370 -0
- coderouter/metrics/collector.py +168 -0
- coderouter/metrics/prometheus.py +141 -0
- coderouter/output_filters.py +95 -4
- coderouter/routing/adaptive.py +23 -0
- coderouter/routing/budget.py +35 -0
- coderouter/routing/fallback.py +496 -5
- coderouter/state/__init__.py +15 -0
- coderouter/state/audit_log.py +269 -0
- coderouter/state/replay.py +316 -0
- coderouter/state/request_log.py +178 -0
- coderouter/state/store.py +212 -0
- coderouter/translation/tool_repair.py +42 -1
- coderouter_cli-2.2.0.dist-info/METADATA +243 -0
- {coderouter_cli-2.0.0.dist-info → coderouter_cli-2.2.0.dist-info}/RECORD +29 -20
- coderouter_cli-2.0.0.dist-info/METADATA +0 -559
- {coderouter_cli-2.0.0.dist-info → coderouter_cli-2.2.0.dist-info}/WHEEL +0 -0
- {coderouter_cli-2.0.0.dist-info → coderouter_cli-2.2.0.dist-info}/entry_points.txt +0 -0
- {coderouter_cli-2.0.0.dist-info → coderouter_cli-2.2.0.dist-info}/licenses/LICENSE +0 -0
coderouter/routing/fallback.py
CHANGED
|
@@ -24,9 +24,16 @@ Dual entry points (v0.3.x-1):
|
|
|
24
24
|
|
|
25
25
|
from __future__ import annotations
|
|
26
26
|
|
|
27
|
+
import asyncio
|
|
27
28
|
import time
|
|
28
29
|
from collections.abc import AsyncIterator
|
|
29
|
-
from typing import Final
|
|
30
|
+
from typing import TYPE_CHECKING, Any, Final
|
|
31
|
+
|
|
32
|
+
if TYPE_CHECKING:
|
|
33
|
+
from coderouter.config.schemas import FallbackChain
|
|
34
|
+
from coderouter.guards.drift_detection import DriftVerdict
|
|
35
|
+
from coderouter.guards.self_healing import SelfHealingOrchestrator
|
|
36
|
+
from coderouter.state.store import StateStore
|
|
30
37
|
|
|
31
38
|
from coderouter.adapters.anthropic_native import AnthropicAdapter
|
|
32
39
|
from coderouter.adapters.base import (
|
|
@@ -48,7 +55,9 @@ from coderouter.guards.memory_pressure import (
|
|
|
48
55
|
)
|
|
49
56
|
from coderouter.guards.tool_loop import (
|
|
50
57
|
DEFAULT_LOOP_INJECT_HINT,
|
|
58
|
+
ToolCountExceededError,
|
|
51
59
|
ToolLoopBreakError,
|
|
60
|
+
check_total_tool_count,
|
|
52
61
|
detect_tool_loop,
|
|
53
62
|
inject_loop_break_hint,
|
|
54
63
|
)
|
|
@@ -127,7 +136,8 @@ def _apply_tool_loop_guard(
|
|
|
127
136
|
|
|
128
137
|
Returns the (possibly mutated) request. Raises
|
|
129
138
|
:class:`ToolLoopBreakError` when the configured action is ``break``
|
|
130
|
-
and a loop was detected.
|
|
139
|
+
and a loop was detected. Also raises :class:`ToolCountExceededError`
|
|
140
|
+
when the total tool-call count exceeds ``max_tool_calls`` (v2.2).
|
|
131
141
|
|
|
132
142
|
Profile resolution: uses ``request.profile`` (the X-CodeRouter-Mode
|
|
133
143
|
header / explicit body field) and falls back to
|
|
@@ -146,6 +156,30 @@ def _apply_tool_loop_guard(
|
|
|
146
156
|
# resolution path produces its own diagnostic.
|
|
147
157
|
return request
|
|
148
158
|
|
|
159
|
+
# v2.2: total tool-call count hard cap — runs before streak
|
|
160
|
+
# detection because it's a cheaper O(n) scan that catches a
|
|
161
|
+
# broader class of runaway behavior.
|
|
162
|
+
if profile.max_tool_calls > 0:
|
|
163
|
+
exceeded = check_total_tool_count(
|
|
164
|
+
request,
|
|
165
|
+
max_calls=profile.max_tool_calls,
|
|
166
|
+
)
|
|
167
|
+
if exceeded is not None:
|
|
168
|
+
logger.warning(
|
|
169
|
+
"tool-count-exceeded",
|
|
170
|
+
extra={
|
|
171
|
+
"profile": profile.name,
|
|
172
|
+
"total_count": exceeded.total_count,
|
|
173
|
+
"max_allowed": exceeded.max_allowed,
|
|
174
|
+
"action": profile.tool_loop_action,
|
|
175
|
+
},
|
|
176
|
+
)
|
|
177
|
+
if profile.tool_loop_action == "break":
|
|
178
|
+
raise ToolCountExceededError(exceeded, profile.name)
|
|
179
|
+
# For "warn" and "inject" actions, log only and continue.
|
|
180
|
+
# The inject action's hint is not meaningful for count
|
|
181
|
+
# exceeded (not a same-tool loop), so we just warn.
|
|
182
|
+
|
|
149
183
|
detection = detect_tool_loop(
|
|
150
184
|
request,
|
|
151
185
|
window=profile.tool_loop_window,
|
|
@@ -469,11 +503,16 @@ class _StreamUsageAccumulator:
|
|
|
469
503
|
"""
|
|
470
504
|
|
|
471
505
|
__slots__ = (
|
|
506
|
+
"_current_block_text",
|
|
507
|
+
"_current_block_type",
|
|
472
508
|
"_observed",
|
|
509
|
+
"_text_blocks",
|
|
473
510
|
"cache_creation_input_tokens",
|
|
474
511
|
"cache_read_input_tokens",
|
|
512
|
+
"has_tool_use",
|
|
475
513
|
"input_tokens",
|
|
476
514
|
"output_tokens",
|
|
515
|
+
"stop_reason",
|
|
477
516
|
)
|
|
478
517
|
|
|
479
518
|
def __init__(self) -> None:
|
|
@@ -482,6 +521,32 @@ class _StreamUsageAccumulator:
|
|
|
482
521
|
self.cache_read_input_tokens = 0
|
|
483
522
|
self.cache_creation_input_tokens = 0
|
|
484
523
|
self._observed = False
|
|
524
|
+
# v2.0-G: tracked for drift detection observation at stream end.
|
|
525
|
+
self.has_tool_use: bool = False
|
|
526
|
+
self.stop_reason: str | None = None
|
|
527
|
+
# v2.0-H: partial content accumulation for mid-stream recovery.
|
|
528
|
+
# Completed text blocks are moved to _text_blocks on content_block_stop.
|
|
529
|
+
# In-progress text is in _current_block_text (list of str fragments).
|
|
530
|
+
self._text_blocks: list[str] = []
|
|
531
|
+
self._current_block_type: str | None = None
|
|
532
|
+
self._current_block_text: list[str] = []
|
|
533
|
+
|
|
534
|
+
@property
|
|
535
|
+
def partial_content(self) -> list[dict[str, Any]]:
|
|
536
|
+
"""Return accumulated text content as Anthropic content blocks.
|
|
537
|
+
|
|
538
|
+
Includes both completed blocks and any in-progress text block
|
|
539
|
+
(useful when the stream is interrupted mid-block). Tool_use blocks
|
|
540
|
+
are excluded because partial JSON is unusable.
|
|
541
|
+
"""
|
|
542
|
+
blocks: list[dict[str, Any]] = []
|
|
543
|
+
for text in self._text_blocks:
|
|
544
|
+
if text:
|
|
545
|
+
blocks.append({"type": "text", "text": text})
|
|
546
|
+
# Include in-progress text block if any
|
|
547
|
+
if self._current_block_type == "text" and self._current_block_text:
|
|
548
|
+
blocks.append({"type": "text", "text": "".join(self._current_block_text)})
|
|
549
|
+
return blocks
|
|
485
550
|
|
|
486
551
|
def observe(self, event: AnthropicStreamEvent) -> None:
|
|
487
552
|
"""Update counters from one stream event (no-op for non-usage events)."""
|
|
@@ -494,6 +559,33 @@ class _StreamUsageAccumulator:
|
|
|
494
559
|
usage = event.data.get("usage") if isinstance(event.data, dict) else None
|
|
495
560
|
if isinstance(usage, dict):
|
|
496
561
|
self._merge(usage)
|
|
562
|
+
# v2.0-G: capture stop_reason from the terminal message_delta.
|
|
563
|
+
delta = event.data.get("delta") if isinstance(event.data, dict) else None
|
|
564
|
+
if isinstance(delta, dict) and "stop_reason" in delta:
|
|
565
|
+
self.stop_reason = delta["stop_reason"]
|
|
566
|
+
elif event.type == "content_block_start":
|
|
567
|
+
# v2.0-G: detect tool_use content blocks for drift observation.
|
|
568
|
+
cb = event.data.get("content_block") if isinstance(event.data, dict) else None
|
|
569
|
+
if isinstance(cb, dict):
|
|
570
|
+
block_type = cb.get("type", "")
|
|
571
|
+
if block_type == "tool_use":
|
|
572
|
+
self.has_tool_use = True
|
|
573
|
+
# v2.0-H: start tracking a new content block.
|
|
574
|
+
self._current_block_type = block_type
|
|
575
|
+
self._current_block_text = []
|
|
576
|
+
elif event.type == "content_block_delta":
|
|
577
|
+
# v2.0-H: accumulate text_delta fragments.
|
|
578
|
+
delta = event.data.get("delta") if isinstance(event.data, dict) else None
|
|
579
|
+
if isinstance(delta, dict) and delta.get("type") == "text_delta":
|
|
580
|
+
text = delta.get("text", "")
|
|
581
|
+
if text:
|
|
582
|
+
self._current_block_text.append(text)
|
|
583
|
+
elif event.type == "content_block_stop":
|
|
584
|
+
# v2.0-H: finalize current block.
|
|
585
|
+
if self._current_block_type == "text" and self._current_block_text:
|
|
586
|
+
self._text_blocks.append("".join(self._current_block_text))
|
|
587
|
+
self._current_block_type = None
|
|
588
|
+
self._current_block_text = []
|
|
497
589
|
|
|
498
590
|
def _merge(self, usage: dict[str, object]) -> None:
|
|
499
591
|
any_nonzero = False
|
|
@@ -613,9 +705,18 @@ class MidStreamError(CodeRouterError):
|
|
|
613
705
|
one chunk to the client. Fallback is not attempted (the client has
|
|
614
706
|
received partial content, so switching providers would corrupt the
|
|
615
707
|
stream). Callers should surface this as a terminal error event.
|
|
708
|
+
|
|
709
|
+
v2.0-H: carries ``partial_content`` — the accumulated text blocks
|
|
710
|
+
generated before the failure. The ingress uses this to synthesize
|
|
711
|
+
a graceful stream termination when ``partial_stitch_action: surface``.
|
|
616
712
|
"""
|
|
617
713
|
|
|
618
|
-
def __init__(
|
|
714
|
+
def __init__(
|
|
715
|
+
self,
|
|
716
|
+
provider: str,
|
|
717
|
+
original: AdapterError,
|
|
718
|
+
partial_content: list[dict[str, Any]] | None = None,
|
|
719
|
+
) -> None:
|
|
619
720
|
"""Wrap the underlying :class:`AdapterError` with the provider name.
|
|
620
721
|
|
|
621
722
|
The ingress layer catches this and converts it into an in-stream
|
|
@@ -624,6 +725,7 @@ class MidStreamError(CodeRouterError):
|
|
|
624
725
|
"""
|
|
625
726
|
self.provider = provider
|
|
626
727
|
self.original = original
|
|
728
|
+
self.partial_content: list[dict[str, Any]] = partial_content or []
|
|
627
729
|
super().__init__(f"provider {provider!r} failed mid-stream: {original}")
|
|
628
730
|
|
|
629
731
|
|
|
@@ -747,6 +849,44 @@ class FallbackEngine:
|
|
|
747
849
|
# Distinct from v1.9-C ``adaptive`` which handles the
|
|
748
850
|
# gradient case via a rolling window.
|
|
749
851
|
self._backend_health_monitor: BackendHealthMonitor = BackendHealthMonitor()
|
|
852
|
+
# v2.0-J: self-healing orchestrator. Manages provider exclusion,
|
|
853
|
+
# restart, and recovery probing when backend_health_action is
|
|
854
|
+
# "exclude". Composes with the L5 backend health monitor.
|
|
855
|
+
from coderouter.guards.self_healing import SelfHealingOrchestrator
|
|
856
|
+
|
|
857
|
+
self._self_healing: SelfHealingOrchestrator = SelfHealingOrchestrator()
|
|
858
|
+
# v2.0-G (L4): per-process drift detection window manager.
|
|
859
|
+
# Stores per-provider rolling observations; the detector is
|
|
860
|
+
# invoked after each provider-ok / provider-failed event and
|
|
861
|
+
# returns a verdict. Action dispatch (promote/reload) reuses
|
|
862
|
+
# the adaptive rank machinery.
|
|
863
|
+
from coderouter.guards.drift_detection import DriftWindow
|
|
864
|
+
|
|
865
|
+
self._drift_window: DriftWindow = DriftWindow()
|
|
866
|
+
# Track which providers are currently in drift-demoted state
|
|
867
|
+
# and when their cooldown expires (monotonic timestamp).
|
|
868
|
+
self._drift_demoted: dict[str, float] = {}
|
|
869
|
+
# Last drift verdict (set by _observe_drift_signal for ingress header).
|
|
870
|
+
self._last_drift_verdict: DriftVerdict | None = None
|
|
871
|
+
# v2.0-J: active recovery probe tasks (one per excluded provider).
|
|
872
|
+
self._recovery_tasks: dict[str, asyncio.Task[None]] = {}
|
|
873
|
+
# v2.0-J: shutdown event shared with recovery probe tasks.
|
|
874
|
+
self._recovery_shutdown: asyncio.Event | None = None
|
|
875
|
+
# v2.0-K: persistent state store (None = in-memory only).
|
|
876
|
+
self._state_store: StateStore | None = None
|
|
877
|
+
|
|
878
|
+
@property
|
|
879
|
+
def last_drift_severity(self) -> str | None:
|
|
880
|
+
"""Return the severity string of the most recent drift verdict, or None.
|
|
881
|
+
|
|
882
|
+
The ingress reads this after generate_anthropic / stream_anthropic to
|
|
883
|
+
set the ``X-CodeRouter-Drift`` response header. Returns ``"mild"`` or
|
|
884
|
+
``"severe"`` when drift was detected, ``None`` otherwise.
|
|
885
|
+
"""
|
|
886
|
+
v = self._last_drift_verdict
|
|
887
|
+
if v is None or not v.drifted:
|
|
888
|
+
return None
|
|
889
|
+
return v.severity
|
|
750
890
|
|
|
751
891
|
@property
|
|
752
892
|
def _adaptive(self) -> AdaptiveAdjuster:
|
|
@@ -794,12 +934,17 @@ class FallbackEngine:
|
|
|
794
934
|
return existing
|
|
795
935
|
|
|
796
936
|
@property
|
|
797
|
-
def
|
|
937
|
+
def backend_health(self) -> BackendHealthMonitor:
|
|
798
938
|
"""Return the L5 backend-health monitor, lazily building one if absent.
|
|
799
939
|
|
|
800
940
|
Same legacy-test compatibility pattern as the other guard
|
|
801
941
|
properties — ``__new__``-constructed engines get a fresh
|
|
802
942
|
empty monitor so ``state_for`` is always answerable.
|
|
943
|
+
|
|
944
|
+
v2.0-I: promoted from ``_backend_health`` to public ``backend_health``
|
|
945
|
+
so the continuous probe background task can feed results into the
|
|
946
|
+
same state machine. Internal callers continue to work (property
|
|
947
|
+
access is transparent).
|
|
803
948
|
"""
|
|
804
949
|
existing = getattr(self, "_backend_health_monitor", None)
|
|
805
950
|
if existing is None:
|
|
@@ -807,6 +952,25 @@ class FallbackEngine:
|
|
|
807
952
|
existing = self._backend_health_monitor
|
|
808
953
|
return existing
|
|
809
954
|
|
|
955
|
+
# Alias for backward compat with internal callers.
|
|
956
|
+
@property
|
|
957
|
+
def _backend_health(self) -> BackendHealthMonitor:
|
|
958
|
+
return self.backend_health
|
|
959
|
+
|
|
960
|
+
@property
|
|
961
|
+
def self_healing(self) -> SelfHealingOrchestrator:
|
|
962
|
+
"""Return the v2.0-J self-healing orchestrator.
|
|
963
|
+
|
|
964
|
+
Lazy init for backward compat with __new__-constructed test engines.
|
|
965
|
+
"""
|
|
966
|
+
from coderouter.guards.self_healing import SelfHealingOrchestrator
|
|
967
|
+
|
|
968
|
+
existing = getattr(self, "_self_healing", None)
|
|
969
|
+
if existing is None:
|
|
970
|
+
self._self_healing = SelfHealingOrchestrator()
|
|
971
|
+
existing = self._self_healing
|
|
972
|
+
return existing
|
|
973
|
+
|
|
810
974
|
def _observe_provider_failure(
|
|
811
975
|
self,
|
|
812
976
|
provider: str,
|
|
@@ -884,6 +1048,18 @@ class FallbackEngine:
|
|
|
884
1048
|
new_state=transition.new_state,
|
|
885
1049
|
consecutive_failures=transition.consecutive_failures,
|
|
886
1050
|
)
|
|
1051
|
+
# v2.0-J: trigger self-healing on UNHEALTHY + exclude.
|
|
1052
|
+
if (
|
|
1053
|
+
transition.new_state == "UNHEALTHY"
|
|
1054
|
+
and bh_action == "exclude"
|
|
1055
|
+
):
|
|
1056
|
+
newly_excluded = self.self_healing.on_unhealthy(
|
|
1057
|
+
provider,
|
|
1058
|
+
profile=chosen,
|
|
1059
|
+
consecutive_failures=transition.consecutive_failures,
|
|
1060
|
+
)
|
|
1061
|
+
if newly_excluded:
|
|
1062
|
+
self._spawn_recovery_probe(provider, chain=chain)
|
|
887
1063
|
|
|
888
1064
|
def _observe_provider_success(
|
|
889
1065
|
self,
|
|
@@ -925,6 +1101,260 @@ class FallbackEngine:
|
|
|
925
1101
|
consecutive_failures=transition.consecutive_failures,
|
|
926
1102
|
)
|
|
927
1103
|
|
|
1104
|
+
def _spawn_recovery_probe(
|
|
1105
|
+
self,
|
|
1106
|
+
provider: str,
|
|
1107
|
+
*,
|
|
1108
|
+
chain: FallbackChain,
|
|
1109
|
+
) -> None:
|
|
1110
|
+
"""Launch an async recovery probe task for an excluded provider.
|
|
1111
|
+
|
|
1112
|
+
v2.0-J: called by ``_observe_provider_failure`` when a provider
|
|
1113
|
+
is newly excluded. The task runs ``recovery_probe_loop`` with
|
|
1114
|
+
exponential backoff until the provider recovers or shutdown.
|
|
1115
|
+
|
|
1116
|
+
Safe to call from a sync context — uses ``asyncio.get_event_loop``
|
|
1117
|
+
to schedule the task. No-op if no running event loop (e.g. in
|
|
1118
|
+
pure-sync tests).
|
|
1119
|
+
"""
|
|
1120
|
+
import asyncio
|
|
1121
|
+
|
|
1122
|
+
from coderouter.guards.self_healing import recovery_probe_loop
|
|
1123
|
+
|
|
1124
|
+
# Find the ProviderConfig for this provider name.
|
|
1125
|
+
provider_config = None
|
|
1126
|
+
for p in self.config.providers:
|
|
1127
|
+
if p.name == provider:
|
|
1128
|
+
provider_config = p
|
|
1129
|
+
break
|
|
1130
|
+
if provider_config is None:
|
|
1131
|
+
return
|
|
1132
|
+
|
|
1133
|
+
# Reuse or create a shared shutdown event.
|
|
1134
|
+
if self._recovery_shutdown is None:
|
|
1135
|
+
self._recovery_shutdown = asyncio.Event()
|
|
1136
|
+
|
|
1137
|
+
# Don't spawn duplicate tasks.
|
|
1138
|
+
existing = self._recovery_tasks.get(provider)
|
|
1139
|
+
if existing is not None and not existing.done():
|
|
1140
|
+
return
|
|
1141
|
+
|
|
1142
|
+
try:
|
|
1143
|
+
loop = asyncio.get_running_loop()
|
|
1144
|
+
except RuntimeError:
|
|
1145
|
+
return # no event loop — skip (sync test context)
|
|
1146
|
+
|
|
1147
|
+
task = loop.create_task(
|
|
1148
|
+
recovery_probe_loop(
|
|
1149
|
+
provider_config,
|
|
1150
|
+
orchestrator=self.self_healing,
|
|
1151
|
+
record_fn=self.backend_health.record_attempt,
|
|
1152
|
+
health_threshold=chain.backend_health_threshold,
|
|
1153
|
+
initial_interval_s=chain.recovery_probe_initial_s,
|
|
1154
|
+
max_interval_s=chain.recovery_probe_max_s,
|
|
1155
|
+
restart_timeout_s=chain.restart_timeout_s,
|
|
1156
|
+
probe_timeout_s=10.0,
|
|
1157
|
+
shutdown_event=self._recovery_shutdown,
|
|
1158
|
+
profile=chain.name,
|
|
1159
|
+
),
|
|
1160
|
+
name=f"recovery-probe-{provider}",
|
|
1161
|
+
)
|
|
1162
|
+
self._recovery_tasks[provider] = task
|
|
1163
|
+
|
|
1164
|
+
async def shutdown_recovery_probes(self) -> None:
|
|
1165
|
+
"""Signal all recovery probe tasks to stop and await them.
|
|
1166
|
+
|
|
1167
|
+
Called from the app lifespan shutdown path.
|
|
1168
|
+
"""
|
|
1169
|
+
import contextlib
|
|
1170
|
+
|
|
1171
|
+
if self._recovery_shutdown is not None:
|
|
1172
|
+
self._recovery_shutdown.set()
|
|
1173
|
+
for task in self._recovery_tasks.values():
|
|
1174
|
+
if not task.done():
|
|
1175
|
+
with contextlib.suppress(Exception):
|
|
1176
|
+
await task
|
|
1177
|
+
self._recovery_tasks.clear()
|
|
1178
|
+
|
|
1179
|
+
# ------------------------------------------------------------------
|
|
1180
|
+
# v2.0-K: State persistence
|
|
1181
|
+
# ------------------------------------------------------------------
|
|
1182
|
+
|
|
1183
|
+
def attach_state_store(self, store: StateStore) -> None:
|
|
1184
|
+
"""Attach a :class:`StateStore` and load persisted state.
|
|
1185
|
+
|
|
1186
|
+
Called from the app lifespan startup path when ``state_dir``
|
|
1187
|
+
is configured. Loads budget, health, self-healing, and
|
|
1188
|
+
metrics state from the store.
|
|
1189
|
+
"""
|
|
1190
|
+
self._state_store = store
|
|
1191
|
+
self._load_all_state()
|
|
1192
|
+
|
|
1193
|
+
def save_all_state(self) -> None:
|
|
1194
|
+
"""Persist all subsystem state to the attached store.
|
|
1195
|
+
|
|
1196
|
+
Called from the app lifespan shutdown path and optionally
|
|
1197
|
+
on a periodic timer. No-op if no store is attached.
|
|
1198
|
+
"""
|
|
1199
|
+
store = self._state_store
|
|
1200
|
+
if store is None:
|
|
1201
|
+
return
|
|
1202
|
+
import contextlib
|
|
1203
|
+
|
|
1204
|
+
with contextlib.suppress(Exception):
|
|
1205
|
+
store.put("budget", "state", self._budget.save_state())
|
|
1206
|
+
with contextlib.suppress(Exception):
|
|
1207
|
+
store.put("health", "state", self.backend_health.save_state())
|
|
1208
|
+
with contextlib.suppress(Exception):
|
|
1209
|
+
store.put("self_healing", "state", self.self_healing.save_state())
|
|
1210
|
+
# MetricsCollector state is saved separately via the singleton.
|
|
1211
|
+
|
|
1212
|
+
def _load_all_state(self) -> None:
|
|
1213
|
+
"""Restore subsystem state from the attached store."""
|
|
1214
|
+
store = self._state_store
|
|
1215
|
+
if store is None:
|
|
1216
|
+
return
|
|
1217
|
+
import contextlib
|
|
1218
|
+
|
|
1219
|
+
with contextlib.suppress(Exception):
|
|
1220
|
+
budget_state = store.get("budget", "state")
|
|
1221
|
+
if budget_state is not None:
|
|
1222
|
+
self._budget.load_state(budget_state) # type: ignore[arg-type]
|
|
1223
|
+
with contextlib.suppress(Exception):
|
|
1224
|
+
health_state = store.get("health", "state")
|
|
1225
|
+
if health_state is not None:
|
|
1226
|
+
self.backend_health.load_state(health_state) # type: ignore[arg-type]
|
|
1227
|
+
with contextlib.suppress(Exception):
|
|
1228
|
+
sh_state = store.get("self_healing", "state")
|
|
1229
|
+
if sh_state is not None:
|
|
1230
|
+
self.self_healing.load_state(sh_state) # type: ignore[arg-type]
|
|
1231
|
+
|
|
1232
|
+
def _observe_drift_signal(
|
|
1233
|
+
self,
|
|
1234
|
+
provider: str,
|
|
1235
|
+
*,
|
|
1236
|
+
profile: str | None,
|
|
1237
|
+
output_tokens: int = 0,
|
|
1238
|
+
has_tool_use: bool = False,
|
|
1239
|
+
request_had_tools: bool = False,
|
|
1240
|
+
stop_reason: str | None = None,
|
|
1241
|
+
is_error: bool = False,
|
|
1242
|
+
stream: bool = False,
|
|
1243
|
+
) -> DriftVerdict | None:
|
|
1244
|
+
"""v2.0-G (L4): record an observation and check for drift.
|
|
1245
|
+
|
|
1246
|
+
Called after every provider-ok / provider-failed event on the
|
|
1247
|
+
Anthropic-shaped paths. Returns a :class:`DriftVerdict` when
|
|
1248
|
+
drift is detected (drifted=True), None otherwise.
|
|
1249
|
+
|
|
1250
|
+
Side effects on detection:
|
|
1251
|
+
- Emits ``drift-detected`` log.
|
|
1252
|
+
- If action is ``promote`` or ``reload``, demotes the provider
|
|
1253
|
+
via the adaptive rank machinery.
|
|
1254
|
+
"""
|
|
1255
|
+
from coderouter.guards.drift_detection import (
|
|
1256
|
+
SENSITIVITY_PRESETS,
|
|
1257
|
+
ResponseObservation,
|
|
1258
|
+
detect_drift,
|
|
1259
|
+
)
|
|
1260
|
+
from coderouter.logging import log_drift_detected, log_drift_promoted
|
|
1261
|
+
|
|
1262
|
+
chosen = profile or self.config.default_profile
|
|
1263
|
+
try:
|
|
1264
|
+
chain_cfg = self.config.profile_by_name(chosen)
|
|
1265
|
+
except (KeyError, ValueError):
|
|
1266
|
+
return None
|
|
1267
|
+
if chain_cfg.drift_detection_action == "off":
|
|
1268
|
+
return None
|
|
1269
|
+
|
|
1270
|
+
# Update window size if config differs from default
|
|
1271
|
+
self._drift_window.max_size = chain_cfg.drift_detection_window_size
|
|
1272
|
+
|
|
1273
|
+
# Record observation
|
|
1274
|
+
obs = ResponseObservation(
|
|
1275
|
+
provider=provider,
|
|
1276
|
+
output_tokens=output_tokens,
|
|
1277
|
+
has_tool_use=has_tool_use,
|
|
1278
|
+
request_had_tools=request_had_tools,
|
|
1279
|
+
stop_reason=stop_reason,
|
|
1280
|
+
is_error=is_error,
|
|
1281
|
+
stream=stream,
|
|
1282
|
+
)
|
|
1283
|
+
self._drift_window.record(obs)
|
|
1284
|
+
|
|
1285
|
+
# Check for cooldown recovery
|
|
1286
|
+
import time as _time
|
|
1287
|
+
|
|
1288
|
+
demote_expires = self._drift_demoted.get(provider)
|
|
1289
|
+
if demote_expires is not None and _time.monotonic() >= demote_expires:
|
|
1290
|
+
# Cooldown expired — restore rank and clear drift state
|
|
1291
|
+
from coderouter.logging import log_drift_recovered
|
|
1292
|
+
|
|
1293
|
+
elapsed = chain_cfg.drift_detection_cooldown_s
|
|
1294
|
+
log_drift_recovered(logger, provider=provider, profile=chosen, after_s=elapsed)
|
|
1295
|
+
self._drift_demoted.pop(provider, None)
|
|
1296
|
+
self._drift_window.clear(provider)
|
|
1297
|
+
return None
|
|
1298
|
+
|
|
1299
|
+
# Don't re-detect while in cooldown
|
|
1300
|
+
if provider in self._drift_demoted:
|
|
1301
|
+
return None
|
|
1302
|
+
|
|
1303
|
+
# Run detection
|
|
1304
|
+
window = self._drift_window.get_window(provider)
|
|
1305
|
+
thresholds = SENSITIVITY_PRESETS.get(
|
|
1306
|
+
chain_cfg.drift_detection_sensitivity, SENSITIVITY_PRESETS["normal"]
|
|
1307
|
+
)
|
|
1308
|
+
verdict = detect_drift(window, thresholds)
|
|
1309
|
+
|
|
1310
|
+
if not verdict.drifted:
|
|
1311
|
+
self._last_drift_verdict = None
|
|
1312
|
+
return None
|
|
1313
|
+
|
|
1314
|
+
# Store for ingress response header.
|
|
1315
|
+
self._last_drift_verdict = verdict
|
|
1316
|
+
|
|
1317
|
+
# Emit log
|
|
1318
|
+
log_drift_detected(
|
|
1319
|
+
logger,
|
|
1320
|
+
provider=provider,
|
|
1321
|
+
profile=chosen,
|
|
1322
|
+
severity=verdict.severity,
|
|
1323
|
+
reason=verdict.reason,
|
|
1324
|
+
action=chain_cfg.drift_detection_action,
|
|
1325
|
+
signals=verdict.signals,
|
|
1326
|
+
)
|
|
1327
|
+
|
|
1328
|
+
# Action: promote / reload
|
|
1329
|
+
if chain_cfg.drift_detection_action in ("promote", "reload"):
|
|
1330
|
+
import time as _time_mod
|
|
1331
|
+
|
|
1332
|
+
# Demote via adaptive rank
|
|
1333
|
+
self._adaptive.demote(provider, steps=2)
|
|
1334
|
+
log_drift_promoted(
|
|
1335
|
+
logger,
|
|
1336
|
+
provider=provider,
|
|
1337
|
+
profile=chosen,
|
|
1338
|
+
demoted_to_rank=2,
|
|
1339
|
+
cooldown_s=chain_cfg.drift_detection_cooldown_s,
|
|
1340
|
+
)
|
|
1341
|
+
# Record cooldown expiry
|
|
1342
|
+
self._drift_demoted[provider] = (
|
|
1343
|
+
_time_mod.monotonic() + chain_cfg.drift_detection_cooldown_s
|
|
1344
|
+
)
|
|
1345
|
+
|
|
1346
|
+
# v2.0-G: reload action — attempt Ollama KV cache flush
|
|
1347
|
+
# (best-effort, fire-and-forget background task).
|
|
1348
|
+
if chain_cfg.drift_detection_action == "reload":
|
|
1349
|
+
import asyncio
|
|
1350
|
+
|
|
1351
|
+
from coderouter.guards.drift_actions import attempt_reload
|
|
1352
|
+
|
|
1353
|
+
provider_config = self._adapters[provider].config
|
|
1354
|
+
self._reload_task = asyncio.create_task(attempt_reload(provider_config))
|
|
1355
|
+
|
|
1356
|
+
return verdict
|
|
1357
|
+
|
|
928
1358
|
def _resolve_profile_overrides(self, profile_name: str | None) -> ProviderCallOverrides:
|
|
929
1359
|
"""v0.6-B: build the ProviderCallOverrides for the active profile.
|
|
930
1360
|
|
|
@@ -1107,6 +1537,19 @@ class FallbackEngine:
|
|
|
1107
1537
|
profile=chosen,
|
|
1108
1538
|
)
|
|
1109
1539
|
adapters = healthy + unhealthy
|
|
1540
|
+
|
|
1541
|
+
# Pass 4b: v2.0-J self-healing exclusion. When the action is
|
|
1542
|
+
# "exclude", providers in the orchestrator's excluded set are
|
|
1543
|
+
# removed entirely from the chain. Unlike "demote" (which
|
|
1544
|
+
# moves to the back), excluded providers are not attempted at
|
|
1545
|
+
# all — recovery probes run in the background to detect when
|
|
1546
|
+
# they come back. If all providers are excluded, fall through
|
|
1547
|
+
# to the existing NoProvidersAvailableError path.
|
|
1548
|
+
if chain.backend_health_action == "exclude":
|
|
1549
|
+
excluded = self.self_healing.excluded_providers()
|
|
1550
|
+
if excluded:
|
|
1551
|
+
adapters = [a for a in adapters if a.name not in excluded]
|
|
1552
|
+
|
|
1110
1553
|
return adapters
|
|
1111
1554
|
|
|
1112
1555
|
def _resolve_anthropic_chain(self, request: AnthropicRequest) -> list[tuple[BaseAdapter, bool]]:
|
|
@@ -1455,6 +1898,14 @@ class FallbackEngine:
|
|
|
1455
1898
|
self._observe_provider_failure(
|
|
1456
1899
|
adapter.name, exc, profile=request.profile
|
|
1457
1900
|
)
|
|
1901
|
+
# v2.0-G (L4): drift detection observation (failure path).
|
|
1902
|
+
self._observe_drift_signal(
|
|
1903
|
+
adapter.name,
|
|
1904
|
+
profile=request.profile,
|
|
1905
|
+
is_error=True,
|
|
1906
|
+
request_had_tools=bool(request.tools),
|
|
1907
|
+
stream=False,
|
|
1908
|
+
)
|
|
1458
1909
|
errors.append(exc)
|
|
1459
1910
|
if not exc.retryable:
|
|
1460
1911
|
break
|
|
@@ -1482,6 +1933,18 @@ class FallbackEngine:
|
|
|
1482
1933
|
self._observe_provider_success(
|
|
1483
1934
|
adapter.name, profile=request.profile
|
|
1484
1935
|
)
|
|
1936
|
+
# v2.0-G (L4): drift detection observation (success path).
|
|
1937
|
+
self._observe_drift_signal(
|
|
1938
|
+
adapter.name,
|
|
1939
|
+
profile=request.profile,
|
|
1940
|
+
output_tokens=resp.usage.output_tokens if resp.usage else 0,
|
|
1941
|
+
has_tool_use=any(
|
|
1942
|
+
getattr(b, "type", None) == "tool_use" for b in (resp.content or [])
|
|
1943
|
+
),
|
|
1944
|
+
request_had_tools=bool(request.tools),
|
|
1945
|
+
stop_reason=resp.stop_reason,
|
|
1946
|
+
stream=False,
|
|
1947
|
+
)
|
|
1485
1948
|
# v1.9-A: pair every successful Anthropic response with a
|
|
1486
1949
|
# cache-observed log line. Native Anthropic / LM Studio
|
|
1487
1950
|
# /v1/messages report cache_read_input_tokens /
|
|
@@ -1620,6 +2083,14 @@ class FallbackEngine:
|
|
|
1620
2083
|
self._observe_provider_failure(
|
|
1621
2084
|
adapter.name, exc, profile=request.profile
|
|
1622
2085
|
)
|
|
2086
|
+
# v2.0-G (L4): drift detection observation (stream failure).
|
|
2087
|
+
self._observe_drift_signal(
|
|
2088
|
+
adapter.name,
|
|
2089
|
+
profile=request.profile,
|
|
2090
|
+
is_error=True,
|
|
2091
|
+
request_had_tools=bool(request.tools),
|
|
2092
|
+
stream=True,
|
|
2093
|
+
)
|
|
1623
2094
|
errors.append(exc)
|
|
1624
2095
|
if not exc.retryable:
|
|
1625
2096
|
break
|
|
@@ -1662,7 +2133,27 @@ class FallbackEngine:
|
|
|
1662
2133
|
self._observe_provider_failure(
|
|
1663
2134
|
adapter.name, exc, profile=request.profile
|
|
1664
2135
|
)
|
|
1665
|
-
|
|
2136
|
+
# v2.0-G (L4): drift detection observation (mid-stream failure).
|
|
2137
|
+
self._observe_drift_signal(
|
|
2138
|
+
adapter.name,
|
|
2139
|
+
profile=request.profile,
|
|
2140
|
+
is_error=True,
|
|
2141
|
+
request_had_tools=bool(request.tools),
|
|
2142
|
+
stream=True,
|
|
2143
|
+
)
|
|
2144
|
+
raise MidStreamError(
|
|
2145
|
+
adapter.name, exc, partial_content=acc.partial_content
|
|
2146
|
+
) from exc
|
|
2147
|
+
# v2.0-G (L4): drift detection observation (stream success).
|
|
2148
|
+
self._observe_drift_signal(
|
|
2149
|
+
adapter.name,
|
|
2150
|
+
profile=request.profile,
|
|
2151
|
+
output_tokens=acc.output_tokens,
|
|
2152
|
+
has_tool_use=acc.has_tool_use,
|
|
2153
|
+
request_had_tools=bool(request.tools),
|
|
2154
|
+
stop_reason=acc.stop_reason,
|
|
2155
|
+
stream=True,
|
|
2156
|
+
)
|
|
1666
2157
|
# v1.9-B2: pair the successful stream with a cache-observed
|
|
1667
2158
|
# log line carrying the aggregated usage counters that the
|
|
1668
2159
|
# ``_StreamUsageAccumulator`` collected from the
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Persistent state layer (v2.0-K).
|
|
2
|
+
|
|
3
|
+
Four modules:
|
|
4
|
+
|
|
5
|
+
* :mod:`coderouter.state.store` — sqlite3 KV store for operational
|
|
6
|
+
metadata (budget totals, health
|
|
7
|
+
state, self-healing exclusions).
|
|
8
|
+
* :mod:`coderouter.state.audit_log` — JSONL structured event log with
|
|
9
|
+
rotation and CLI reader.
|
|
10
|
+
* :mod:`coderouter.state.request_log` — JSONL request metadata journal
|
|
11
|
+
(per-request token counts, cost,
|
|
12
|
+
provider — no request body).
|
|
13
|
+
* :mod:`coderouter.state.replay` — Statistical A/B analysis engine
|
|
14
|
+
over request journal entries.
|
|
15
|
+
"""
|