coderouter-cli 2.1.0__py3-none-any.whl → 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderouter/cli.py +219 -0
- coderouter/config/schemas.py +132 -2
- coderouter/guards/__init__.py +6 -4
- coderouter/guards/backend_health.py +34 -0
- coderouter/guards/self_healing.py +413 -0
- coderouter/guards/tool_loop.py +71 -0
- coderouter/ingress/anthropic_routes.py +31 -1
- coderouter/ingress/app.py +90 -0
- coderouter/logging.py +108 -0
- coderouter/metrics/collector.py +75 -0
- coderouter/output_filters.py +95 -4
- coderouter/routing/budget.py +35 -0
- coderouter/routing/fallback.py +211 -1
- coderouter/state/__init__.py +15 -0
- coderouter/state/audit_log.py +269 -0
- coderouter/state/replay.py +316 -0
- coderouter/state/request_log.py +178 -0
- coderouter/state/store.py +212 -0
- coderouter/translation/tool_repair.py +42 -1
- coderouter_cli-2.2.0.dist-info/METADATA +243 -0
- {coderouter_cli-2.1.0.dist-info → coderouter_cli-2.2.0.dist-info}/RECORD +24 -18
- coderouter_cli-2.1.0.dist-info/METADATA +0 -560
- {coderouter_cli-2.1.0.dist-info → coderouter_cli-2.2.0.dist-info}/WHEEL +0 -0
- {coderouter_cli-2.1.0.dist-info → coderouter_cli-2.2.0.dist-info}/entry_points.txt +0 -0
- {coderouter_cli-2.1.0.dist-info → coderouter_cli-2.2.0.dist-info}/licenses/LICENSE +0 -0
coderouter/logging.py
CHANGED
|
@@ -595,6 +595,114 @@ def log_demote_unhealthy_provider(
|
|
|
595
595
|
logger.info("demote-unhealthy-provider", extra=payload)
|
|
596
596
|
|
|
597
597
|
|
|
598
|
+
# ---------------------------------------------------------------------------
|
|
599
|
+
# v2.0-J: self-healing log shapes
|
|
600
|
+
# ---------------------------------------------------------------------------
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
class SelfHealingExcludePayload(TypedDict):
|
|
604
|
+
"""Structured shape of the ``self-healing-exclude`` log record."""
|
|
605
|
+
|
|
606
|
+
provider: str
|
|
607
|
+
profile: str
|
|
608
|
+
consecutive_failures: int
|
|
609
|
+
|
|
610
|
+
|
|
611
|
+
class SelfHealingRestorePayload(TypedDict):
|
|
612
|
+
"""Structured shape of the ``self-healing-restore`` log record."""
|
|
613
|
+
|
|
614
|
+
provider: str
|
|
615
|
+
profile: str
|
|
616
|
+
excluded_duration_s: float
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
class SelfHealingRestartPayload(TypedDict):
|
|
620
|
+
"""Structured shape of the ``self-healing-restart`` log record."""
|
|
621
|
+
|
|
622
|
+
provider: str
|
|
623
|
+
command: str
|
|
624
|
+
success: bool
|
|
625
|
+
error: str | None
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
class SelfHealingRecoveryProbePayload(TypedDict):
|
|
629
|
+
"""Structured shape of the ``self-healing-recovery-probe`` log record."""
|
|
630
|
+
|
|
631
|
+
provider: str
|
|
632
|
+
success: bool
|
|
633
|
+
next_interval_s: float
|
|
634
|
+
latency_ms: float
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
def log_self_healing_exclude(
|
|
638
|
+
logger: logging.Logger,
|
|
639
|
+
*,
|
|
640
|
+
provider: str,
|
|
641
|
+
profile: str,
|
|
642
|
+
consecutive_failures: int,
|
|
643
|
+
) -> None:
|
|
644
|
+
"""Emit when a provider is excluded from the chain by self-healing."""
|
|
645
|
+
payload: SelfHealingExcludePayload = {
|
|
646
|
+
"provider": provider,
|
|
647
|
+
"profile": profile,
|
|
648
|
+
"consecutive_failures": consecutive_failures,
|
|
649
|
+
}
|
|
650
|
+
logger.warning("self-healing-exclude", extra=payload)
|
|
651
|
+
|
|
652
|
+
|
|
653
|
+
def log_self_healing_restore(
|
|
654
|
+
logger: logging.Logger,
|
|
655
|
+
*,
|
|
656
|
+
provider: str,
|
|
657
|
+
profile: str,
|
|
658
|
+
excluded_duration_s: float,
|
|
659
|
+
) -> None:
|
|
660
|
+
"""Emit when a previously excluded provider is restored to the chain."""
|
|
661
|
+
payload: SelfHealingRestorePayload = {
|
|
662
|
+
"provider": provider,
|
|
663
|
+
"profile": profile,
|
|
664
|
+
"excluded_duration_s": round(excluded_duration_s, 1),
|
|
665
|
+
}
|
|
666
|
+
logger.info("self-healing-restore", extra=payload)
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def log_self_healing_restart(
|
|
670
|
+
logger: logging.Logger,
|
|
671
|
+
*,
|
|
672
|
+
provider: str,
|
|
673
|
+
command: str,
|
|
674
|
+
success: bool,
|
|
675
|
+
error: str | None = None,
|
|
676
|
+
) -> None:
|
|
677
|
+
"""Emit after attempting to restart a provider's backend process."""
|
|
678
|
+
payload: SelfHealingRestartPayload = {
|
|
679
|
+
"provider": provider,
|
|
680
|
+
"command": command,
|
|
681
|
+
"success": success,
|
|
682
|
+
"error": error,
|
|
683
|
+
}
|
|
684
|
+
level = logging.INFO if success else logging.WARNING
|
|
685
|
+
logger.log(level, "self-healing-restart", extra=payload)
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
def log_self_healing_recovery_probe(
|
|
689
|
+
logger: logging.Logger,
|
|
690
|
+
*,
|
|
691
|
+
provider: str,
|
|
692
|
+
success: bool,
|
|
693
|
+
next_interval_s: float,
|
|
694
|
+
latency_ms: float,
|
|
695
|
+
) -> None:
|
|
696
|
+
"""Emit after each recovery probe attempt for an excluded provider."""
|
|
697
|
+
payload: SelfHealingRecoveryProbePayload = {
|
|
698
|
+
"provider": provider,
|
|
699
|
+
"success": success,
|
|
700
|
+
"next_interval_s": round(next_interval_s, 1),
|
|
701
|
+
"latency_ms": round(latency_ms, 1),
|
|
702
|
+
}
|
|
703
|
+
logger.info("self-healing-recovery-probe", extra=payload)
|
|
704
|
+
|
|
705
|
+
|
|
598
706
|
# ---------------------------------------------------------------------------
|
|
599
707
|
# v1.0-A: output-filter-applied log shape
|
|
600
708
|
#
|
coderouter/metrics/collector.py
CHANGED
|
@@ -623,6 +623,81 @@ class MetricsCollector(logging.Handler):
|
|
|
623
623
|
"recent": list(self._recent),
|
|
624
624
|
}
|
|
625
625
|
|
|
626
|
+
# ------------------------------------------------------------------
|
|
627
|
+
# v2.0-K: Persistence
|
|
628
|
+
# ------------------------------------------------------------------
|
|
629
|
+
|
|
630
|
+
def save_state(self) -> dict[str, object]:
|
|
631
|
+
"""Export key counters for cross-restart persistence.
|
|
632
|
+
|
|
633
|
+
Returns a JSON-safe dict of the most operationally-important
|
|
634
|
+
counters. The ``recent`` ring and per-provider ``last_error``
|
|
635
|
+
are excluded (ephemeral by nature).
|
|
636
|
+
"""
|
|
637
|
+
with self._lock:
|
|
638
|
+
return {
|
|
639
|
+
"requests_total": self._requests_total,
|
|
640
|
+
"provider_attempts": dict(self._provider_attempts),
|
|
641
|
+
"provider_outcomes": {
|
|
642
|
+
k: dict(v) for k, v in self._provider_outcomes.items()
|
|
643
|
+
},
|
|
644
|
+
"cost_total_usd": dict(self._cost_total_usd),
|
|
645
|
+
"cost_savings_usd": dict(self._cost_savings_usd),
|
|
646
|
+
"cost_total_usd_aggregate": self._cost_total_usd_aggregate,
|
|
647
|
+
"cost_savings_usd_aggregate": self._cost_savings_usd_aggregate,
|
|
648
|
+
"chain_paid_gate_blocked_total": self._chain_paid_gate_blocked_total,
|
|
649
|
+
"chain_budget_exceeded_total": self._chain_budget_exceeded_total,
|
|
650
|
+
"chain_memory_pressure_blocked_total": self._chain_memory_pressure_blocked_total,
|
|
651
|
+
"chain_uniform_auth_failure_total": self._chain_uniform_auth_failure_total,
|
|
652
|
+
"probe_rounds_total": self._probe_rounds_total,
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
def load_state(self, state: dict[str, object]) -> None:
|
|
656
|
+
"""Restore counters from a previously saved dict.
|
|
657
|
+
|
|
658
|
+
Additive: values from ``state`` are *added* to the current
|
|
659
|
+
(zeroed) counters, so calling ``load_state`` on a fresh
|
|
660
|
+
collector restores the prior session's totals.
|
|
661
|
+
"""
|
|
662
|
+
if not isinstance(state, dict):
|
|
663
|
+
return
|
|
664
|
+
with self._lock:
|
|
665
|
+
self._requests_total += int(state.get("requests_total", 0))
|
|
666
|
+
for k, v in (state.get("provider_attempts") or {}).items():
|
|
667
|
+
self._provider_attempts[k] += int(v)
|
|
668
|
+
for prov, outcomes in (state.get("provider_outcomes") or {}).items():
|
|
669
|
+
if not isinstance(outcomes, dict):
|
|
670
|
+
continue
|
|
671
|
+
if prov not in self._provider_outcomes:
|
|
672
|
+
self._provider_outcomes[prov] = Counter()
|
|
673
|
+
for k, v in outcomes.items():
|
|
674
|
+
self._provider_outcomes[prov][k] += int(v)
|
|
675
|
+
for k, v in (state.get("cost_total_usd") or {}).items():
|
|
676
|
+
self._cost_total_usd[k] = self._cost_total_usd.get(k, 0.0) + float(v)
|
|
677
|
+
for k, v in (state.get("cost_savings_usd") or {}).items():
|
|
678
|
+
self._cost_savings_usd[k] = self._cost_savings_usd.get(k, 0.0) + float(v)
|
|
679
|
+
self._cost_total_usd_aggregate += float(
|
|
680
|
+
state.get("cost_total_usd_aggregate", 0.0)
|
|
681
|
+
)
|
|
682
|
+
self._cost_savings_usd_aggregate += float(
|
|
683
|
+
state.get("cost_savings_usd_aggregate", 0.0)
|
|
684
|
+
)
|
|
685
|
+
self._chain_paid_gate_blocked_total += int(
|
|
686
|
+
state.get("chain_paid_gate_blocked_total", 0)
|
|
687
|
+
)
|
|
688
|
+
self._chain_budget_exceeded_total += int(
|
|
689
|
+
state.get("chain_budget_exceeded_total", 0)
|
|
690
|
+
)
|
|
691
|
+
self._chain_memory_pressure_blocked_total += int(
|
|
692
|
+
state.get("chain_memory_pressure_blocked_total", 0)
|
|
693
|
+
)
|
|
694
|
+
self._chain_uniform_auth_failure_total += int(
|
|
695
|
+
state.get("chain_uniform_auth_failure_total", 0)
|
|
696
|
+
)
|
|
697
|
+
self._probe_rounds_total += int(
|
|
698
|
+
state.get("probe_rounds_total", 0)
|
|
699
|
+
)
|
|
700
|
+
|
|
626
701
|
# ------------------------------------------------------------------
|
|
627
702
|
# Test hook
|
|
628
703
|
# ------------------------------------------------------------------
|
coderouter/output_filters.py
CHANGED
|
@@ -52,6 +52,7 @@ __all__ = [
|
|
|
52
52
|
"OutputFilterChain",
|
|
53
53
|
"StripStopMarkersFilter",
|
|
54
54
|
"StripThinkingFilter",
|
|
55
|
+
"StripToolCallXmlFilter",
|
|
55
56
|
"apply_output_filters",
|
|
56
57
|
"validate_output_filters",
|
|
57
58
|
]
|
|
@@ -63,20 +64,28 @@ __all__ = [
|
|
|
63
64
|
|
|
64
65
|
|
|
65
66
|
DEFAULT_STOP_MARKERS: tuple[str, ...] = (
|
|
67
|
+
# v1.0-A originals
|
|
66
68
|
"<|turn|>",
|
|
67
69
|
"<|end|>",
|
|
68
70
|
"<|python_tag|>",
|
|
69
71
|
"<|im_end|>",
|
|
70
72
|
"<|eot_id|>",
|
|
71
73
|
"<|channel>thought",
|
|
74
|
+
# v2.2: tool-call XML tags leaked by Qwen / Hermes / Llama tool-call
|
|
75
|
+
# formats. These appear when the model writes tool calls as XML
|
|
76
|
+
# instead of structured JSON, or when the tokenizer's special-token
|
|
77
|
+
# handling leaks through.
|
|
78
|
+
"<|tool▁call|>",
|
|
79
|
+
"<|tool▁sep|>",
|
|
72
80
|
)
|
|
73
81
|
"""Default stop/harness markers stripped by ``strip_stop_markers``.
|
|
74
82
|
|
|
75
83
|
Covers Llama 3.x (``<|python_tag|>``, ``<|eot_id|>``), ChatML / Qwen
|
|
76
|
-
(``<|im_end|>``, ``<|end|>``), Gemma-ish (``<|turn|>``)
|
|
77
|
-
harmony (``<|channel>thought``)
|
|
78
|
-
|
|
79
|
-
|
|
84
|
+
(``<|im_end|>``, ``<|end|>``), Gemma-ish (``<|turn|>``), OpenAI-
|
|
85
|
+
harmony (``<|channel>thought``), and Qwen / Hermes tool-call markers
|
|
86
|
+
(``<|tool▁call|>``, ``<|tool▁sep|>``). Extending this tuple is an ABI
|
|
87
|
+
change — users who need a bespoke set can add a dedicated filter entry
|
|
88
|
+
in a later minor.
|
|
80
89
|
"""
|
|
81
90
|
|
|
82
91
|
|
|
@@ -292,6 +301,87 @@ class StripStopMarkersFilter:
|
|
|
292
301
|
return "".join(out_parts)
|
|
293
302
|
|
|
294
303
|
|
|
304
|
+
# ---------------------------------------------------------------------------
|
|
305
|
+
# strip_tool_call_xml (v2.2)
|
|
306
|
+
# ---------------------------------------------------------------------------
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
_TOOL_CALL_OPEN = "<tool_call>"
|
|
310
|
+
_TOOL_CALL_CLOSE = "</tool_call>"
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
class StripToolCallXmlFilter:
|
|
314
|
+
"""Remove ``<tool_call>...</tool_call>`` XML blocks from assistant content.
|
|
315
|
+
|
|
316
|
+
Qwen / Hermes / Llama tool-call formats sometimes emit tool calls
|
|
317
|
+
as ``<tool_call>{"name": "Bash", ...}</tool_call>`` XML in the
|
|
318
|
+
content stream. When ``tool_repair`` has already extracted the
|
|
319
|
+
structured JSON from these blocks, the XML wrapper tags are
|
|
320
|
+
leftover debris that confuse downstream clients.
|
|
321
|
+
|
|
322
|
+
Architecture note: this filter should run AFTER ``tool_repair``
|
|
323
|
+
has had a chance to extract the JSON. The filter chain is applied
|
|
324
|
+
at the adapter boundary (post-repair), so ordering is naturally
|
|
325
|
+
correct.
|
|
326
|
+
|
|
327
|
+
Implementation mirrors ``StripThinkingFilter`` — the same
|
|
328
|
+
stateful open/close tag scanning, same chunk-boundary safety.
|
|
329
|
+
"""
|
|
330
|
+
|
|
331
|
+
name = "strip_tool_call_xml"
|
|
332
|
+
|
|
333
|
+
def __init__(self) -> None:
|
|
334
|
+
"""Initialize the per-request buffer + in-block state to empty."""
|
|
335
|
+
self.modified: bool = False
|
|
336
|
+
self._in_block: bool = False
|
|
337
|
+
self._buffer: str = ""
|
|
338
|
+
|
|
339
|
+
def feed(self, text: str, *, eof: bool = False) -> str:
|
|
340
|
+
"""Consume ``text`` and return the portion safe to emit now.
|
|
341
|
+
|
|
342
|
+
Mirrors the ``StripThinkingFilter`` algorithm: greedy tag
|
|
343
|
+
matching with partial-prefix holdback across chunk boundaries.
|
|
344
|
+
"""
|
|
345
|
+
self._buffer += text
|
|
346
|
+
out_parts: list[str] = []
|
|
347
|
+
|
|
348
|
+
while True:
|
|
349
|
+
if not self._in_block:
|
|
350
|
+
idx = self._buffer.find(_TOOL_CALL_OPEN)
|
|
351
|
+
if idx != -1:
|
|
352
|
+
out_parts.append(self._buffer[:idx])
|
|
353
|
+
self._buffer = self._buffer[idx + len(_TOOL_CALL_OPEN) :]
|
|
354
|
+
self._in_block = True
|
|
355
|
+
self.modified = True
|
|
356
|
+
continue
|
|
357
|
+
# No open tag — emit all but a potential partial prefix.
|
|
358
|
+
overlap = _max_suffix_overlap(self._buffer, _TOOL_CALL_OPEN)
|
|
359
|
+
if overlap:
|
|
360
|
+
out_parts.append(self._buffer[:-overlap])
|
|
361
|
+
self._buffer = self._buffer[-overlap:]
|
|
362
|
+
else:
|
|
363
|
+
out_parts.append(self._buffer)
|
|
364
|
+
self._buffer = ""
|
|
365
|
+
break
|
|
366
|
+
# in_block: suppress until we find the close tag.
|
|
367
|
+
idx = self._buffer.find(_TOOL_CALL_CLOSE)
|
|
368
|
+
if idx != -1:
|
|
369
|
+
self._buffer = self._buffer[idx + len(_TOOL_CALL_CLOSE) :]
|
|
370
|
+
self._in_block = False
|
|
371
|
+
continue
|
|
372
|
+
# No close tag — retain potential partial suffix, drop the rest.
|
|
373
|
+
overlap = _max_suffix_overlap(self._buffer, _TOOL_CALL_CLOSE)
|
|
374
|
+
self._buffer = self._buffer[-overlap:] if overlap else ""
|
|
375
|
+
break
|
|
376
|
+
|
|
377
|
+
if eof:
|
|
378
|
+
if not self._in_block:
|
|
379
|
+
out_parts.append(self._buffer)
|
|
380
|
+
# If still in block at eof, silently drop the partial block.
|
|
381
|
+
self._buffer = ""
|
|
382
|
+
return "".join(out_parts)
|
|
383
|
+
|
|
384
|
+
|
|
295
385
|
# ---------------------------------------------------------------------------
|
|
296
386
|
# Registry + chain
|
|
297
387
|
# ---------------------------------------------------------------------------
|
|
@@ -300,6 +390,7 @@ class StripStopMarkersFilter:
|
|
|
300
390
|
KNOWN_FILTERS: dict[str, type[OutputFilter]] = {
|
|
301
391
|
StripThinkingFilter.name: StripThinkingFilter,
|
|
302
392
|
StripStopMarkersFilter.name: StripStopMarkersFilter,
|
|
393
|
+
StripToolCallXmlFilter.name: StripToolCallXmlFilter,
|
|
303
394
|
}
|
|
304
395
|
"""Registry of string-name → filter class.
|
|
305
396
|
|
coderouter/routing/budget.py
CHANGED
|
@@ -187,5 +187,40 @@ class BudgetTracker:
|
|
|
187
187
|
self._totals.clear()
|
|
188
188
|
self._month = current
|
|
189
189
|
|
|
190
|
+
# ------------------------------------------------------------------
|
|
191
|
+
# v2.0-K: Persistence
|
|
192
|
+
# ------------------------------------------------------------------
|
|
193
|
+
|
|
194
|
+
def save_state(self) -> dict[str, object]:
|
|
195
|
+
"""Export the current state as a JSON-safe dict.
|
|
196
|
+
|
|
197
|
+
Called by the engine to persist budget totals across restarts.
|
|
198
|
+
"""
|
|
199
|
+
with self._lock:
|
|
200
|
+
return {
|
|
201
|
+
"month": self._month,
|
|
202
|
+
"totals": dict(self._totals),
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
def load_state(self, state: dict[str, object]) -> None:
|
|
206
|
+
"""Restore state from a previously saved dict.
|
|
207
|
+
|
|
208
|
+
Only restores if the saved month matches the current month
|
|
209
|
+
(no point restoring last month's totals into a new month).
|
|
210
|
+
"""
|
|
211
|
+
if not isinstance(state, dict):
|
|
212
|
+
return
|
|
213
|
+
saved_month = state.get("month", "")
|
|
214
|
+
with self._lock:
|
|
215
|
+
current = _utc_month_key()
|
|
216
|
+
if saved_month != current:
|
|
217
|
+
return # stale month — skip
|
|
218
|
+
totals = state.get("totals", {})
|
|
219
|
+
if isinstance(totals, dict):
|
|
220
|
+
self._totals = {
|
|
221
|
+
k: float(v) for k, v in totals.items() if isinstance(v, (int, float))
|
|
222
|
+
}
|
|
223
|
+
self._month = current
|
|
224
|
+
|
|
190
225
|
|
|
191
226
|
__all__ = ["BudgetTracker"]
|
coderouter/routing/fallback.py
CHANGED
|
@@ -24,12 +24,16 @@ Dual entry points (v0.3.x-1):
|
|
|
24
24
|
|
|
25
25
|
from __future__ import annotations
|
|
26
26
|
|
|
27
|
+
import asyncio
|
|
27
28
|
import time
|
|
28
29
|
from collections.abc import AsyncIterator
|
|
29
30
|
from typing import TYPE_CHECKING, Any, Final
|
|
30
31
|
|
|
31
32
|
if TYPE_CHECKING:
|
|
33
|
+
from coderouter.config.schemas import FallbackChain
|
|
32
34
|
from coderouter.guards.drift_detection import DriftVerdict
|
|
35
|
+
from coderouter.guards.self_healing import SelfHealingOrchestrator
|
|
36
|
+
from coderouter.state.store import StateStore
|
|
33
37
|
|
|
34
38
|
from coderouter.adapters.anthropic_native import AnthropicAdapter
|
|
35
39
|
from coderouter.adapters.base import (
|
|
@@ -51,7 +55,9 @@ from coderouter.guards.memory_pressure import (
|
|
|
51
55
|
)
|
|
52
56
|
from coderouter.guards.tool_loop import (
|
|
53
57
|
DEFAULT_LOOP_INJECT_HINT,
|
|
58
|
+
ToolCountExceededError,
|
|
54
59
|
ToolLoopBreakError,
|
|
60
|
+
check_total_tool_count,
|
|
55
61
|
detect_tool_loop,
|
|
56
62
|
inject_loop_break_hint,
|
|
57
63
|
)
|
|
@@ -130,7 +136,8 @@ def _apply_tool_loop_guard(
|
|
|
130
136
|
|
|
131
137
|
Returns the (possibly mutated) request. Raises
|
|
132
138
|
:class:`ToolLoopBreakError` when the configured action is ``break``
|
|
133
|
-
and a loop was detected.
|
|
139
|
+
and a loop was detected. Also raises :class:`ToolCountExceededError`
|
|
140
|
+
when the total tool-call count exceeds ``max_tool_calls`` (v2.2).
|
|
134
141
|
|
|
135
142
|
Profile resolution: uses ``request.profile`` (the X-CodeRouter-Mode
|
|
136
143
|
header / explicit body field) and falls back to
|
|
@@ -149,6 +156,30 @@ def _apply_tool_loop_guard(
|
|
|
149
156
|
# resolution path produces its own diagnostic.
|
|
150
157
|
return request
|
|
151
158
|
|
|
159
|
+
# v2.2: total tool-call count hard cap — runs before streak
|
|
160
|
+
# detection because it's a cheaper O(n) scan that catches a
|
|
161
|
+
# broader class of runaway behavior.
|
|
162
|
+
if profile.max_tool_calls > 0:
|
|
163
|
+
exceeded = check_total_tool_count(
|
|
164
|
+
request,
|
|
165
|
+
max_calls=profile.max_tool_calls,
|
|
166
|
+
)
|
|
167
|
+
if exceeded is not None:
|
|
168
|
+
logger.warning(
|
|
169
|
+
"tool-count-exceeded",
|
|
170
|
+
extra={
|
|
171
|
+
"profile": profile.name,
|
|
172
|
+
"total_count": exceeded.total_count,
|
|
173
|
+
"max_allowed": exceeded.max_allowed,
|
|
174
|
+
"action": profile.tool_loop_action,
|
|
175
|
+
},
|
|
176
|
+
)
|
|
177
|
+
if profile.tool_loop_action == "break":
|
|
178
|
+
raise ToolCountExceededError(exceeded, profile.name)
|
|
179
|
+
# For "warn" and "inject" actions, log only and continue.
|
|
180
|
+
# The inject action's hint is not meaningful for count
|
|
181
|
+
# exceeded (not a same-tool loop), so we just warn.
|
|
182
|
+
|
|
152
183
|
detection = detect_tool_loop(
|
|
153
184
|
request,
|
|
154
185
|
window=profile.tool_loop_window,
|
|
@@ -818,6 +849,12 @@ class FallbackEngine:
|
|
|
818
849
|
# Distinct from v1.9-C ``adaptive`` which handles the
|
|
819
850
|
# gradient case via a rolling window.
|
|
820
851
|
self._backend_health_monitor: BackendHealthMonitor = BackendHealthMonitor()
|
|
852
|
+
# v2.0-J: self-healing orchestrator. Manages provider exclusion,
|
|
853
|
+
# restart, and recovery probing when backend_health_action is
|
|
854
|
+
# "exclude". Composes with the L5 backend health monitor.
|
|
855
|
+
from coderouter.guards.self_healing import SelfHealingOrchestrator
|
|
856
|
+
|
|
857
|
+
self._self_healing: SelfHealingOrchestrator = SelfHealingOrchestrator()
|
|
821
858
|
# v2.0-G (L4): per-process drift detection window manager.
|
|
822
859
|
# Stores per-provider rolling observations; the detector is
|
|
823
860
|
# invoked after each provider-ok / provider-failed event and
|
|
@@ -831,6 +868,12 @@ class FallbackEngine:
|
|
|
831
868
|
self._drift_demoted: dict[str, float] = {}
|
|
832
869
|
# Last drift verdict (set by _observe_drift_signal for ingress header).
|
|
833
870
|
self._last_drift_verdict: DriftVerdict | None = None
|
|
871
|
+
# v2.0-J: active recovery probe tasks (one per excluded provider).
|
|
872
|
+
self._recovery_tasks: dict[str, asyncio.Task[None]] = {}
|
|
873
|
+
# v2.0-J: shutdown event shared with recovery probe tasks.
|
|
874
|
+
self._recovery_shutdown: asyncio.Event | None = None
|
|
875
|
+
# v2.0-K: persistent state store (None = in-memory only).
|
|
876
|
+
self._state_store: StateStore | None = None
|
|
834
877
|
|
|
835
878
|
@property
|
|
836
879
|
def last_drift_severity(self) -> str | None:
|
|
@@ -914,6 +957,20 @@ class FallbackEngine:
|
|
|
914
957
|
def _backend_health(self) -> BackendHealthMonitor:
|
|
915
958
|
return self.backend_health
|
|
916
959
|
|
|
960
|
+
@property
|
|
961
|
+
def self_healing(self) -> SelfHealingOrchestrator:
|
|
962
|
+
"""Return the v2.0-J self-healing orchestrator.
|
|
963
|
+
|
|
964
|
+
Lazy init for backward compat with __new__-constructed test engines.
|
|
965
|
+
"""
|
|
966
|
+
from coderouter.guards.self_healing import SelfHealingOrchestrator
|
|
967
|
+
|
|
968
|
+
existing = getattr(self, "_self_healing", None)
|
|
969
|
+
if existing is None:
|
|
970
|
+
self._self_healing = SelfHealingOrchestrator()
|
|
971
|
+
existing = self._self_healing
|
|
972
|
+
return existing
|
|
973
|
+
|
|
917
974
|
def _observe_provider_failure(
|
|
918
975
|
self,
|
|
919
976
|
provider: str,
|
|
@@ -991,6 +1048,18 @@ class FallbackEngine:
|
|
|
991
1048
|
new_state=transition.new_state,
|
|
992
1049
|
consecutive_failures=transition.consecutive_failures,
|
|
993
1050
|
)
|
|
1051
|
+
# v2.0-J: trigger self-healing on UNHEALTHY + exclude.
|
|
1052
|
+
if (
|
|
1053
|
+
transition.new_state == "UNHEALTHY"
|
|
1054
|
+
and bh_action == "exclude"
|
|
1055
|
+
):
|
|
1056
|
+
newly_excluded = self.self_healing.on_unhealthy(
|
|
1057
|
+
provider,
|
|
1058
|
+
profile=chosen,
|
|
1059
|
+
consecutive_failures=transition.consecutive_failures,
|
|
1060
|
+
)
|
|
1061
|
+
if newly_excluded:
|
|
1062
|
+
self._spawn_recovery_probe(provider, chain=chain)
|
|
994
1063
|
|
|
995
1064
|
def _observe_provider_success(
|
|
996
1065
|
self,
|
|
@@ -1032,6 +1101,134 @@ class FallbackEngine:
|
|
|
1032
1101
|
consecutive_failures=transition.consecutive_failures,
|
|
1033
1102
|
)
|
|
1034
1103
|
|
|
1104
|
+
def _spawn_recovery_probe(
|
|
1105
|
+
self,
|
|
1106
|
+
provider: str,
|
|
1107
|
+
*,
|
|
1108
|
+
chain: FallbackChain,
|
|
1109
|
+
) -> None:
|
|
1110
|
+
"""Launch an async recovery probe task for an excluded provider.
|
|
1111
|
+
|
|
1112
|
+
v2.0-J: called by ``_observe_provider_failure`` when a provider
|
|
1113
|
+
is newly excluded. The task runs ``recovery_probe_loop`` with
|
|
1114
|
+
exponential backoff until the provider recovers or shutdown.
|
|
1115
|
+
|
|
1116
|
+
Safe to call from a sync context — uses ``asyncio.get_event_loop``
|
|
1117
|
+
to schedule the task. No-op if no running event loop (e.g. in
|
|
1118
|
+
pure-sync tests).
|
|
1119
|
+
"""
|
|
1120
|
+
import asyncio
|
|
1121
|
+
|
|
1122
|
+
from coderouter.guards.self_healing import recovery_probe_loop
|
|
1123
|
+
|
|
1124
|
+
# Find the ProviderConfig for this provider name.
|
|
1125
|
+
provider_config = None
|
|
1126
|
+
for p in self.config.providers:
|
|
1127
|
+
if p.name == provider:
|
|
1128
|
+
provider_config = p
|
|
1129
|
+
break
|
|
1130
|
+
if provider_config is None:
|
|
1131
|
+
return
|
|
1132
|
+
|
|
1133
|
+
# Reuse or create a shared shutdown event.
|
|
1134
|
+
if self._recovery_shutdown is None:
|
|
1135
|
+
self._recovery_shutdown = asyncio.Event()
|
|
1136
|
+
|
|
1137
|
+
# Don't spawn duplicate tasks.
|
|
1138
|
+
existing = self._recovery_tasks.get(provider)
|
|
1139
|
+
if existing is not None and not existing.done():
|
|
1140
|
+
return
|
|
1141
|
+
|
|
1142
|
+
try:
|
|
1143
|
+
loop = asyncio.get_running_loop()
|
|
1144
|
+
except RuntimeError:
|
|
1145
|
+
return # no event loop — skip (sync test context)
|
|
1146
|
+
|
|
1147
|
+
task = loop.create_task(
|
|
1148
|
+
recovery_probe_loop(
|
|
1149
|
+
provider_config,
|
|
1150
|
+
orchestrator=self.self_healing,
|
|
1151
|
+
record_fn=self.backend_health.record_attempt,
|
|
1152
|
+
health_threshold=chain.backend_health_threshold,
|
|
1153
|
+
initial_interval_s=chain.recovery_probe_initial_s,
|
|
1154
|
+
max_interval_s=chain.recovery_probe_max_s,
|
|
1155
|
+
restart_timeout_s=chain.restart_timeout_s,
|
|
1156
|
+
probe_timeout_s=10.0,
|
|
1157
|
+
shutdown_event=self._recovery_shutdown,
|
|
1158
|
+
profile=chain.name,
|
|
1159
|
+
),
|
|
1160
|
+
name=f"recovery-probe-{provider}",
|
|
1161
|
+
)
|
|
1162
|
+
self._recovery_tasks[provider] = task
|
|
1163
|
+
|
|
1164
|
+
async def shutdown_recovery_probes(self) -> None:
|
|
1165
|
+
"""Signal all recovery probe tasks to stop and await them.
|
|
1166
|
+
|
|
1167
|
+
Called from the app lifespan shutdown path.
|
|
1168
|
+
"""
|
|
1169
|
+
import contextlib
|
|
1170
|
+
|
|
1171
|
+
if self._recovery_shutdown is not None:
|
|
1172
|
+
self._recovery_shutdown.set()
|
|
1173
|
+
for task in self._recovery_tasks.values():
|
|
1174
|
+
if not task.done():
|
|
1175
|
+
with contextlib.suppress(Exception):
|
|
1176
|
+
await task
|
|
1177
|
+
self._recovery_tasks.clear()
|
|
1178
|
+
|
|
1179
|
+
# ------------------------------------------------------------------
|
|
1180
|
+
# v2.0-K: State persistence
|
|
1181
|
+
# ------------------------------------------------------------------
|
|
1182
|
+
|
|
1183
|
+
def attach_state_store(self, store: StateStore) -> None:
|
|
1184
|
+
"""Attach a :class:`StateStore` and load persisted state.
|
|
1185
|
+
|
|
1186
|
+
Called from the app lifespan startup path when ``state_dir``
|
|
1187
|
+
is configured. Loads budget, health, self-healing, and
|
|
1188
|
+
metrics state from the store.
|
|
1189
|
+
"""
|
|
1190
|
+
self._state_store = store
|
|
1191
|
+
self._load_all_state()
|
|
1192
|
+
|
|
1193
|
+
def save_all_state(self) -> None:
|
|
1194
|
+
"""Persist all subsystem state to the attached store.
|
|
1195
|
+
|
|
1196
|
+
Called from the app lifespan shutdown path and optionally
|
|
1197
|
+
on a periodic timer. No-op if no store is attached.
|
|
1198
|
+
"""
|
|
1199
|
+
store = self._state_store
|
|
1200
|
+
if store is None:
|
|
1201
|
+
return
|
|
1202
|
+
import contextlib
|
|
1203
|
+
|
|
1204
|
+
with contextlib.suppress(Exception):
|
|
1205
|
+
store.put("budget", "state", self._budget.save_state())
|
|
1206
|
+
with contextlib.suppress(Exception):
|
|
1207
|
+
store.put("health", "state", self.backend_health.save_state())
|
|
1208
|
+
with contextlib.suppress(Exception):
|
|
1209
|
+
store.put("self_healing", "state", self.self_healing.save_state())
|
|
1210
|
+
# MetricsCollector state is saved separately via the singleton.
|
|
1211
|
+
|
|
1212
|
+
def _load_all_state(self) -> None:
|
|
1213
|
+
"""Restore subsystem state from the attached store."""
|
|
1214
|
+
store = self._state_store
|
|
1215
|
+
if store is None:
|
|
1216
|
+
return
|
|
1217
|
+
import contextlib
|
|
1218
|
+
|
|
1219
|
+
with contextlib.suppress(Exception):
|
|
1220
|
+
budget_state = store.get("budget", "state")
|
|
1221
|
+
if budget_state is not None:
|
|
1222
|
+
self._budget.load_state(budget_state) # type: ignore[arg-type]
|
|
1223
|
+
with contextlib.suppress(Exception):
|
|
1224
|
+
health_state = store.get("health", "state")
|
|
1225
|
+
if health_state is not None:
|
|
1226
|
+
self.backend_health.load_state(health_state) # type: ignore[arg-type]
|
|
1227
|
+
with contextlib.suppress(Exception):
|
|
1228
|
+
sh_state = store.get("self_healing", "state")
|
|
1229
|
+
if sh_state is not None:
|
|
1230
|
+
self.self_healing.load_state(sh_state) # type: ignore[arg-type]
|
|
1231
|
+
|
|
1035
1232
|
def _observe_drift_signal(
|
|
1036
1233
|
self,
|
|
1037
1234
|
provider: str,
|
|
@@ -1340,6 +1537,19 @@ class FallbackEngine:
|
|
|
1340
1537
|
profile=chosen,
|
|
1341
1538
|
)
|
|
1342
1539
|
adapters = healthy + unhealthy
|
|
1540
|
+
|
|
1541
|
+
# Pass 4b: v2.0-J self-healing exclusion. When the action is
|
|
1542
|
+
# "exclude", providers in the orchestrator's excluded set are
|
|
1543
|
+
# removed entirely from the chain. Unlike "demote" (which
|
|
1544
|
+
# moves to the back), excluded providers are not attempted at
|
|
1545
|
+
# all — recovery probes run in the background to detect when
|
|
1546
|
+
# they come back. If all providers are excluded, fall through
|
|
1547
|
+
# to the existing NoProvidersAvailableError path.
|
|
1548
|
+
if chain.backend_health_action == "exclude":
|
|
1549
|
+
excluded = self.self_healing.excluded_providers()
|
|
1550
|
+
if excluded:
|
|
1551
|
+
adapters = [a for a in adapters if a.name not in excluded]
|
|
1552
|
+
|
|
1343
1553
|
return adapters
|
|
1344
1554
|
|
|
1345
1555
|
def _resolve_anthropic_chain(self, request: AnthropicRequest) -> list[tuple[BaseAdapter, bool]]:
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Persistent state layer (v2.0-K).
|
|
2
|
+
|
|
3
|
+
Four modules:
|
|
4
|
+
|
|
5
|
+
* :mod:`coderouter.state.store` — sqlite3 KV store for operational
|
|
6
|
+
metadata (budget totals, health
|
|
7
|
+
state, self-healing exclusions).
|
|
8
|
+
* :mod:`coderouter.state.audit_log` — JSONL structured event log with
|
|
9
|
+
rotation and CLI reader.
|
|
10
|
+
* :mod:`coderouter.state.request_log` — JSONL request metadata journal
|
|
11
|
+
(per-request token counts, cost,
|
|
12
|
+
provider — no request body).
|
|
13
|
+
* :mod:`coderouter.state.replay` — Statistical A/B analysis engine
|
|
14
|
+
over request journal entries.
|
|
15
|
+
"""
|