coderouter-cli 2.0.0__py3-none-any.whl → 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderouter/cli.py +219 -0
- coderouter/config/schemas.py +235 -2
- coderouter/guards/__init__.py +6 -4
- coderouter/guards/backend_health.py +34 -0
- coderouter/guards/continuous_probe.py +349 -0
- coderouter/guards/drift_actions.py +111 -0
- coderouter/guards/drift_detection.py +308 -0
- coderouter/guards/self_healing.py +413 -0
- coderouter/guards/tool_loop.py +71 -0
- coderouter/ingress/anthropic_routes.py +106 -12
- coderouter/ingress/app.py +129 -0
- coderouter/logging.py +370 -0
- coderouter/metrics/collector.py +168 -0
- coderouter/metrics/prometheus.py +141 -0
- coderouter/output_filters.py +95 -4
- coderouter/routing/adaptive.py +23 -0
- coderouter/routing/budget.py +35 -0
- coderouter/routing/fallback.py +496 -5
- coderouter/state/__init__.py +15 -0
- coderouter/state/audit_log.py +269 -0
- coderouter/state/replay.py +316 -0
- coderouter/state/request_log.py +178 -0
- coderouter/state/store.py +212 -0
- coderouter/translation/tool_repair.py +42 -1
- coderouter_cli-2.2.0.dist-info/METADATA +243 -0
- {coderouter_cli-2.0.0.dist-info → coderouter_cli-2.2.0.dist-info}/RECORD +29 -20
- coderouter_cli-2.0.0.dist-info/METADATA +0 -559
- {coderouter_cli-2.0.0.dist-info → coderouter_cli-2.2.0.dist-info}/WHEEL +0 -0
- {coderouter_cli-2.0.0.dist-info → coderouter_cli-2.2.0.dist-info}/entry_points.txt +0 -0
- {coderouter_cli-2.0.0.dist-info → coderouter_cli-2.2.0.dist-info}/licenses/LICENSE +0 -0
coderouter/cli.py
CHANGED
|
@@ -196,6 +196,104 @@ def _build_parser() -> argparse.ArgumentParser:
|
|
|
196
196
|
help="Print one snapshot as plain text and exit (scripts / non-tty).",
|
|
197
197
|
)
|
|
198
198
|
|
|
199
|
+
# v2.0-K: `coderouter audit` — read structured JSONL audit log.
|
|
200
|
+
audit = sub.add_parser(
|
|
201
|
+
"audit",
|
|
202
|
+
help="Read the structured audit log (v2.0-K).",
|
|
203
|
+
description=(
|
|
204
|
+
"Read and filter the JSONL audit log written by `coderouter serve` "
|
|
205
|
+
"when state_dir and audit_log are configured. Shows guard activations, "
|
|
206
|
+
"chain fallbacks, budget warnings, self-healing events, and drift "
|
|
207
|
+
"transitions in chronological order."
|
|
208
|
+
),
|
|
209
|
+
)
|
|
210
|
+
audit.add_argument(
|
|
211
|
+
"--state-dir",
|
|
212
|
+
default=None,
|
|
213
|
+
help=(
|
|
214
|
+
"Path to the state directory containing audit.jsonl. "
|
|
215
|
+
"Defaults to ~/.coderouter/state/."
|
|
216
|
+
),
|
|
217
|
+
)
|
|
218
|
+
audit.add_argument(
|
|
219
|
+
"--tail",
|
|
220
|
+
type=int,
|
|
221
|
+
default=None,
|
|
222
|
+
metavar="N",
|
|
223
|
+
help="Show only the last N entries.",
|
|
224
|
+
)
|
|
225
|
+
audit.add_argument(
|
|
226
|
+
"--filter",
|
|
227
|
+
default=None,
|
|
228
|
+
metavar="EVENT",
|
|
229
|
+
help="Only entries whose event name contains this substring (case-insensitive).",
|
|
230
|
+
)
|
|
231
|
+
audit.add_argument(
|
|
232
|
+
"--since",
|
|
233
|
+
default=None,
|
|
234
|
+
metavar="DATETIME",
|
|
235
|
+
help="Only entries with ts >= this ISO 8601 prefix (e.g. '2026-05-06').",
|
|
236
|
+
)
|
|
237
|
+
audit.add_argument(
|
|
238
|
+
"--summary",
|
|
239
|
+
action="store_true",
|
|
240
|
+
help="Print event type → count summary instead of individual entries.",
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
# v2.0-K (Replay): `coderouter replay` — statistical A/B analysis
|
|
244
|
+
# of request journal metadata across providers.
|
|
245
|
+
replay = sub.add_parser(
|
|
246
|
+
"replay",
|
|
247
|
+
help="Statistical replay analysis of request journal (v2.0-K).",
|
|
248
|
+
description=(
|
|
249
|
+
"Read the request metadata journal and display per-provider "
|
|
250
|
+
"statistics (token counts, cost, cache hit ratios). Optionally "
|
|
251
|
+
"compare two providers side-by-side. Request/response bodies "
|
|
252
|
+
"are not recorded, so this is statistical analysis — not "
|
|
253
|
+
"literal re-execution."
|
|
254
|
+
),
|
|
255
|
+
)
|
|
256
|
+
replay.add_argument(
|
|
257
|
+
"--state-dir",
|
|
258
|
+
default=None,
|
|
259
|
+
help=(
|
|
260
|
+
"Path to the state directory containing requests.jsonl. "
|
|
261
|
+
"Defaults to ~/.coderouter/state/."
|
|
262
|
+
),
|
|
263
|
+
)
|
|
264
|
+
replay.add_argument(
|
|
265
|
+
"--log",
|
|
266
|
+
default=None,
|
|
267
|
+
metavar="PATH",
|
|
268
|
+
help="Direct path to the request journal JSONL file (overrides --state-dir).",
|
|
269
|
+
)
|
|
270
|
+
replay.add_argument(
|
|
271
|
+
"--provider",
|
|
272
|
+
default=None,
|
|
273
|
+
metavar="NAME",
|
|
274
|
+
help="Filter entries to this provider only.",
|
|
275
|
+
)
|
|
276
|
+
replay.add_argument(
|
|
277
|
+
"--compare",
|
|
278
|
+
nargs=2,
|
|
279
|
+
metavar=("A", "B"),
|
|
280
|
+
default=None,
|
|
281
|
+
help="Compare two providers side-by-side (e.g. --compare anthropic-api openrouter-free).",
|
|
282
|
+
)
|
|
283
|
+
replay.add_argument(
|
|
284
|
+
"--since",
|
|
285
|
+
default=None,
|
|
286
|
+
metavar="DATETIME",
|
|
287
|
+
help="Only entries with ts >= this ISO 8601 prefix (e.g. '2026-05-06').",
|
|
288
|
+
)
|
|
289
|
+
replay.add_argument(
|
|
290
|
+
"--limit",
|
|
291
|
+
type=int,
|
|
292
|
+
default=None,
|
|
293
|
+
metavar="N",
|
|
294
|
+
help="Use only the last N entries (applied after --since and --provider filters).",
|
|
295
|
+
)
|
|
296
|
+
|
|
199
297
|
return parser
|
|
200
298
|
|
|
201
299
|
|
|
@@ -277,6 +375,12 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
277
375
|
|
|
278
376
|
return stats_main(args.url, interval=args.interval, once=args.once)
|
|
279
377
|
|
|
378
|
+
if args.command == "audit":
|
|
379
|
+
return _run_audit(args)
|
|
380
|
+
|
|
381
|
+
if args.command == "replay":
|
|
382
|
+
return _run_replay(args)
|
|
383
|
+
|
|
280
384
|
print(f"unknown command: {args.command}", file=sys.stderr)
|
|
281
385
|
return 2
|
|
282
386
|
|
|
@@ -476,6 +580,121 @@ def _run_apply_or_dry_run(
|
|
|
476
580
|
|
|
477
581
|
|
|
478
582
|
|
|
583
|
+
def _run_audit(args: argparse.Namespace) -> int:
|
|
584
|
+
"""v2.0-K: read and display the structured audit log.
|
|
585
|
+
|
|
586
|
+
Resolves the audit log path from --state-dir (or default
|
|
587
|
+
~/.coderouter/state/) and renders entries with optional filtering.
|
|
588
|
+
"""
|
|
589
|
+
import json
|
|
590
|
+
|
|
591
|
+
from coderouter.state.audit_log import read_audit_log, summarize_audit_log
|
|
592
|
+
|
|
593
|
+
state_dir = Path(args.state_dir).expanduser() if args.state_dir else (
|
|
594
|
+
Path.home() / ".coderouter" / "state"
|
|
595
|
+
)
|
|
596
|
+
log_path = state_dir / "audit.jsonl"
|
|
597
|
+
|
|
598
|
+
if not log_path.exists():
|
|
599
|
+
print(f"audit: no audit log found at {log_path}", file=sys.stderr)
|
|
600
|
+
print(
|
|
601
|
+
" Ensure state_dir and audit_log are configured in providers.yaml.",
|
|
602
|
+
file=sys.stderr,
|
|
603
|
+
)
|
|
604
|
+
return 1
|
|
605
|
+
|
|
606
|
+
entries = read_audit_log(
|
|
607
|
+
log_path,
|
|
608
|
+
tail=args.tail,
|
|
609
|
+
event_filter=args.filter,
|
|
610
|
+
since=args.since,
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
if not entries:
|
|
614
|
+
print("audit: no matching entries found.")
|
|
615
|
+
return 0
|
|
616
|
+
|
|
617
|
+
if args.summary:
|
|
618
|
+
summary = summarize_audit_log(entries)
|
|
619
|
+
print(f"Audit log summary ({len(entries)} entries):\n")
|
|
620
|
+
for event, count in summary.items():
|
|
621
|
+
print(f" {event:<40s} {count:>6d}")
|
|
622
|
+
return 0
|
|
623
|
+
|
|
624
|
+
for entry in entries:
|
|
625
|
+
ts = entry.get("ts", "")
|
|
626
|
+
event = entry.get("event", "")
|
|
627
|
+
level = entry.get("level", "")
|
|
628
|
+
# Build a compact one-line display.
|
|
629
|
+
extras = {
|
|
630
|
+
k: v
|
|
631
|
+
for k, v in entry.items()
|
|
632
|
+
if k not in ("ts", "event", "level")
|
|
633
|
+
}
|
|
634
|
+
extra_str = ""
|
|
635
|
+
if extras:
|
|
636
|
+
extra_str = " " + json.dumps(extras, default=str, ensure_ascii=False)
|
|
637
|
+
print(f"[{ts}] {level:<7s} {event}{extra_str}")
|
|
638
|
+
|
|
639
|
+
return 0
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
def _run_replay(args: argparse.Namespace) -> int:
|
|
643
|
+
"""v2.0-K (Replay): statistical A/B analysis of request journal.
|
|
644
|
+
|
|
645
|
+
Reads the request journal (requests.jsonl) and either displays a
|
|
646
|
+
per-provider summary table or a side-by-side comparison of two
|
|
647
|
+
providers.
|
|
648
|
+
"""
|
|
649
|
+
from coderouter.state.replay import (
|
|
650
|
+
compare_providers,
|
|
651
|
+
format_comparison_table,
|
|
652
|
+
format_summary_table,
|
|
653
|
+
summarize_window,
|
|
654
|
+
)
|
|
655
|
+
from coderouter.state.request_log import read_request_log
|
|
656
|
+
|
|
657
|
+
# Resolve the journal file path.
|
|
658
|
+
if args.log:
|
|
659
|
+
log_path = Path(args.log).expanduser()
|
|
660
|
+
else:
|
|
661
|
+
state_dir = Path(args.state_dir).expanduser() if args.state_dir else (
|
|
662
|
+
Path.home() / ".coderouter" / "state"
|
|
663
|
+
)
|
|
664
|
+
log_path = state_dir / "requests.jsonl"
|
|
665
|
+
|
|
666
|
+
if not log_path.exists():
|
|
667
|
+
print(f"replay: no request journal found at {log_path}", file=sys.stderr)
|
|
668
|
+
print(
|
|
669
|
+
" Ensure state_dir and request_log are configured in providers.yaml.",
|
|
670
|
+
file=sys.stderr,
|
|
671
|
+
)
|
|
672
|
+
return 1
|
|
673
|
+
|
|
674
|
+
entries = read_request_log(
|
|
675
|
+
log_path,
|
|
676
|
+
provider_filter=args.provider,
|
|
677
|
+
since=args.since,
|
|
678
|
+
)
|
|
679
|
+
|
|
680
|
+
if args.limit is not None and args.limit > 0:
|
|
681
|
+
entries = entries[-args.limit:]
|
|
682
|
+
|
|
683
|
+
if not entries:
|
|
684
|
+
print("replay: no matching entries found.")
|
|
685
|
+
return 0
|
|
686
|
+
|
|
687
|
+
if args.compare:
|
|
688
|
+
provider_a, provider_b = args.compare
|
|
689
|
+
comparison = compare_providers(entries, provider_a, provider_b)
|
|
690
|
+
print(format_comparison_table(comparison))
|
|
691
|
+
else:
|
|
692
|
+
summary = summarize_window(entries)
|
|
693
|
+
print(format_summary_table(summary))
|
|
694
|
+
|
|
695
|
+
return 0
|
|
696
|
+
|
|
697
|
+
|
|
479
698
|
def _run_check_env(arg_value: str) -> int:
|
|
480
699
|
"""v1.6.3: filesystem / git security checks for `.env`.
|
|
481
700
|
|
coderouter/config/schemas.py
CHANGED
|
@@ -242,6 +242,21 @@ class ProviderConfig(BaseModel):
|
|
|
242
242
|
"LM Studio Qwen3.5 128K → 131072, Anthropic Claude → 200000."
|
|
243
243
|
),
|
|
244
244
|
)
|
|
245
|
+
# v2.0-J: optional shell command to restart this provider's backend
|
|
246
|
+
# process when it becomes UNHEALTHY. Executed via subprocess when
|
|
247
|
+
# self-healing is enabled and the provider crosses the UNHEALTHY
|
|
248
|
+
# threshold. Security: opt-in only — unset means no restart attempt.
|
|
249
|
+
restart_command: str | None = Field(
|
|
250
|
+
default=None,
|
|
251
|
+
description=(
|
|
252
|
+
"v2.0-J (Self-healing): shell command to restart this "
|
|
253
|
+
"provider's backend process. Examples: 'ollama serve', "
|
|
254
|
+
"'open -a LM\\ Studio'. Only executed when the profile's "
|
|
255
|
+
"backend_health_action is 'exclude' and the provider "
|
|
256
|
+
"transitions to UNHEALTHY. Unset = no automatic restart "
|
|
257
|
+
"(recovery probe still runs, waiting for manual restart)."
|
|
258
|
+
),
|
|
259
|
+
)
|
|
245
260
|
|
|
246
261
|
@model_validator(mode="after")
|
|
247
262
|
def _check_output_filters_known(self) -> ProviderConfig:
|
|
@@ -350,6 +365,24 @@ class FallbackChain(BaseModel):
|
|
|
350
365
|
"error response. See FallbackChain comment for trade-offs."
|
|
351
366
|
),
|
|
352
367
|
)
|
|
368
|
+
# v2.2: total tool-call count hard cap. A safety valve against
|
|
369
|
+
# runaway agents that call many *different* tools without looping
|
|
370
|
+
# (which the streak-based L3 detector misses). Set to 0 to
|
|
371
|
+
# disable the cap entirely.
|
|
372
|
+
max_tool_calls: int = Field(
|
|
373
|
+
default=50,
|
|
374
|
+
ge=0,
|
|
375
|
+
le=1000,
|
|
376
|
+
description=(
|
|
377
|
+
"v2.2: maximum total tool_use blocks allowed in the "
|
|
378
|
+
"conversation. When exceeded, the request is rejected with "
|
|
379
|
+
"a ``tool_count_exceeded`` error (if tool_loop_action is "
|
|
380
|
+
"``break``) or logged (if ``warn``). Set to 0 to disable. "
|
|
381
|
+
"Default 50 is deliberately more permissive than Unsloth "
|
|
382
|
+
"Studio's 25 — Claude Code agent sessions routinely reach "
|
|
383
|
+
"25+ calls in normal operation."
|
|
384
|
+
),
|
|
385
|
+
)
|
|
353
386
|
# v1.9-E phase 2 (L2): memory-pressure detection + cooldown.
|
|
354
387
|
#
|
|
355
388
|
# Local backends (Ollama / LM Studio / llama.cpp) report VRAM
|
|
@@ -410,7 +443,7 @@ class FallbackChain(BaseModel):
|
|
|
410
443
|
# Distinct from the v1.9-C ``adaptive`` gradient (continuous
|
|
411
444
|
# latency / error-rate buffer with debounce) which handles the
|
|
412
445
|
# "slow but alive" case; L5 handles the "hard crash" case.
|
|
413
|
-
backend_health_action: Literal["off", "warn", "demote"] = Field(
|
|
446
|
+
backend_health_action: Literal["off", "warn", "demote", "exclude"] = Field(
|
|
414
447
|
default="warn",
|
|
415
448
|
description=(
|
|
416
449
|
"v1.9-E (L5 phase 2): action when a provider transitions "
|
|
@@ -420,7 +453,12 @@ class FallbackChain(BaseModel):
|
|
|
420
453
|
"moves the UNHEALTHY provider to the back of the chain "
|
|
421
454
|
"for the next ``_resolve_chain`` (similar to v1.9-C "
|
|
422
455
|
"adaptive demotion but state-machine-based, not "
|
|
423
|
-
"rolling-window-based). ``
|
|
456
|
+
"rolling-window-based). ``exclude`` (v2.0-J) removes the "
|
|
457
|
+
"UNHEALTHY provider from the chain entirely + triggers "
|
|
458
|
+
"self-healing (restart helper if configured, recovery "
|
|
459
|
+
"probe with exponential backoff). On recovery, the "
|
|
460
|
+
"provider is automatically restored to its original "
|
|
461
|
+
"chain position. ``off`` disables the monitor "
|
|
424
462
|
"entirely (zero observation overhead, identical to "
|
|
425
463
|
"v1.9.x behavior)."
|
|
426
464
|
),
|
|
@@ -439,6 +477,41 @@ class FallbackChain(BaseModel):
|
|
|
439
477
|
"blips that the v1.9-C adaptive adjuster already handles."
|
|
440
478
|
),
|
|
441
479
|
)
|
|
480
|
+
# v2.0-J: self-healing recovery probe configuration.
|
|
481
|
+
recovery_probe_initial_s: float = Field(
|
|
482
|
+
default=30.0,
|
|
483
|
+
ge=5.0,
|
|
484
|
+
le=600.0,
|
|
485
|
+
description=(
|
|
486
|
+
"v2.0-J: initial interval (seconds) for recovery probes "
|
|
487
|
+
"sent to an UNHEALTHY-excluded provider. Each failed probe "
|
|
488
|
+
"doubles the interval up to ``recovery_probe_max_s``. "
|
|
489
|
+
"A successful probe restores the provider to its original "
|
|
490
|
+
"chain position immediately."
|
|
491
|
+
),
|
|
492
|
+
)
|
|
493
|
+
recovery_probe_max_s: float = Field(
|
|
494
|
+
default=300.0,
|
|
495
|
+
ge=30.0,
|
|
496
|
+
le=3600.0,
|
|
497
|
+
description=(
|
|
498
|
+
"v2.0-J: maximum interval (seconds) for recovery probe "
|
|
499
|
+
"exponential backoff. Default 300 s (5 min) means a dead "
|
|
500
|
+
"backend is probed at most every 5 minutes indefinitely "
|
|
501
|
+
"until it recovers or the server shuts down."
|
|
502
|
+
),
|
|
503
|
+
)
|
|
504
|
+
restart_timeout_s: float = Field(
|
|
505
|
+
default=30.0,
|
|
506
|
+
ge=5.0,
|
|
507
|
+
le=120.0,
|
|
508
|
+
description=(
|
|
509
|
+
"v2.0-J: timeout (seconds) for the restart_command "
|
|
510
|
+
"subprocess. If the command doesn't complete within this "
|
|
511
|
+
"window, it is killed. Prevents hung restart commands from "
|
|
512
|
+
"blocking recovery."
|
|
513
|
+
),
|
|
514
|
+
)
|
|
442
515
|
adaptive: bool = Field(
|
|
443
516
|
default=False,
|
|
444
517
|
description=(
|
|
@@ -531,6 +604,73 @@ class FallbackChain(BaseModel):
|
|
|
531
604
|
),
|
|
532
605
|
)
|
|
533
606
|
|
|
607
|
+
# ------------------------------------------------------------------
|
|
608
|
+
# v2.0-G (L4): Drift detection — response quality degradation guard
|
|
609
|
+
# ------------------------------------------------------------------
|
|
610
|
+
#
|
|
611
|
+
# Long-running sessions on local LLMs can suffer gradual quality
|
|
612
|
+
# decay (KV cache pressure, thermal throttling, VRAM fragmentation)
|
|
613
|
+
# where the model "succeeds" but produces empty/short/toolless
|
|
614
|
+
# responses. This guard observes response quality signals in a
|
|
615
|
+
# rolling window and detects statistical drift.
|
|
616
|
+
#
|
|
617
|
+
# Four actions:
|
|
618
|
+
# * ``off`` — no detection (default).
|
|
619
|
+
# * ``warn`` — emit structured log + response header.
|
|
620
|
+
# * ``promote`` — ``warn`` + demote drifted provider in chain.
|
|
621
|
+
# * ``reload`` — ``promote`` + attempt KV cache flush (Ollama).
|
|
622
|
+
drift_detection_action: Literal["off", "warn", "promote", "reload"] = Field(
|
|
623
|
+
default="off",
|
|
624
|
+
description=(
|
|
625
|
+
"v2.0-G (L4): action on response quality drift detection. "
|
|
626
|
+
"``off`` (default) disables drift detection. ``warn`` emits "
|
|
627
|
+
"a log and response header. ``promote`` additionally demotes "
|
|
628
|
+
"the drifted provider in the chain. ``reload`` attempts to "
|
|
629
|
+
"flush the provider's KV cache (Ollama only) before promoting."
|
|
630
|
+
),
|
|
631
|
+
)
|
|
632
|
+
drift_detection_window_size: int = Field(
|
|
633
|
+
default=20,
|
|
634
|
+
ge=4,
|
|
635
|
+
le=200,
|
|
636
|
+
description=(
|
|
637
|
+
"v2.0-G (L4): number of recent responses to keep in the "
|
|
638
|
+
"rolling observation window per provider. Larger windows "
|
|
639
|
+
"are more robust to noise but slower to detect drift."
|
|
640
|
+
),
|
|
641
|
+
)
|
|
642
|
+
drift_detection_cooldown_s: int = Field(
|
|
643
|
+
default=300,
|
|
644
|
+
ge=10,
|
|
645
|
+
le=3600,
|
|
646
|
+
description=(
|
|
647
|
+
"v2.0-G (L4): seconds after a promote/reload action before "
|
|
648
|
+
"the drifted provider's rank is reset for recovery check. "
|
|
649
|
+
"Default 300s (5 min) gives the model time to stabilize."
|
|
650
|
+
),
|
|
651
|
+
)
|
|
652
|
+
drift_detection_sensitivity: Literal["low", "normal", "high"] = Field(
|
|
653
|
+
default="normal",
|
|
654
|
+
description=(
|
|
655
|
+
"v2.0-G (L4): threshold preset for drift signals. "
|
|
656
|
+
"``low`` tolerates more degradation before triggering, "
|
|
657
|
+
"``high`` is stricter (fewer bad responses needed)."
|
|
658
|
+
),
|
|
659
|
+
)
|
|
660
|
+
|
|
661
|
+
# --- v2.0-H (L6): Mid-stream partial stitching --------------------------
|
|
662
|
+
# * ``off`` — discard partial content on mid-stream failure (legacy).
|
|
663
|
+
# * ``surface`` — return partial content as a truncated-but-valid response.
|
|
664
|
+
partial_stitch_action: Literal["off", "surface"] = Field(
|
|
665
|
+
default="off",
|
|
666
|
+
description=(
|
|
667
|
+
"v2.0-H (L6): action when a streaming response fails mid-stream. "
|
|
668
|
+
"``off`` discards partial content (legacy error event). "
|
|
669
|
+
"``surface`` returns accumulated text as a graceful stream "
|
|
670
|
+
"termination with a ``coderouter_partial`` metadata event."
|
|
671
|
+
),
|
|
672
|
+
)
|
|
673
|
+
|
|
534
674
|
|
|
535
675
|
# ---------------------------------------------------------------------------
|
|
536
676
|
# v1.6-A: auto_router — declarative request-body classifier
|
|
@@ -768,6 +908,99 @@ class CodeRouterConfig(BaseModel):
|
|
|
768
908
|
),
|
|
769
909
|
)
|
|
770
910
|
|
|
911
|
+
# v2.0-I: Continuous probing — background health checks for idle periods.
|
|
912
|
+
continuous_probe: Literal["off", "active"] = Field(
|
|
913
|
+
default="off",
|
|
914
|
+
description=(
|
|
915
|
+
"v2.0-I: enable background health probes. 'active' starts a "
|
|
916
|
+
"background task that periodically sends 1-token requests to "
|
|
917
|
+
"each provider, feeding results into the L5 backend health "
|
|
918
|
+
"state machine. 'off' = no probing (backward-compatible default)."
|
|
919
|
+
),
|
|
920
|
+
)
|
|
921
|
+
probe_interval_s: float = Field(
|
|
922
|
+
default=60.0,
|
|
923
|
+
ge=5.0,
|
|
924
|
+
le=3600.0,
|
|
925
|
+
description=(
|
|
926
|
+
"v2.0-I: seconds between probe rounds. Lower = faster detection "
|
|
927
|
+
"but more probe traffic. 60s is a good balance for local models."
|
|
928
|
+
),
|
|
929
|
+
)
|
|
930
|
+
probe_paid: bool = Field(
|
|
931
|
+
default=False,
|
|
932
|
+
description=(
|
|
933
|
+
"v2.0-I: whether to probe providers marked ``paid: true``. "
|
|
934
|
+
"Default false protects operators from accidental API charges."
|
|
935
|
+
),
|
|
936
|
+
)
|
|
937
|
+
probe_timeout_s: float = Field(
|
|
938
|
+
default=10.0,
|
|
939
|
+
ge=1.0,
|
|
940
|
+
le=60.0,
|
|
941
|
+
description=(
|
|
942
|
+
"v2.0-I: per-provider timeout for probe requests. A provider "
|
|
943
|
+
"that doesn't respond within this window is recorded as failed."
|
|
944
|
+
),
|
|
945
|
+
)
|
|
946
|
+
|
|
947
|
+
# v2.0-K: Persistent state — survive restarts.
|
|
948
|
+
state_dir: str | None = Field(
|
|
949
|
+
default=None,
|
|
950
|
+
description=(
|
|
951
|
+
"v2.0-K: directory for persistent state (sqlite3 KV store + "
|
|
952
|
+
"audit log). None = in-memory only (no persistence, backward-"
|
|
953
|
+
"compatible). Set to a path like '~/.coderouter/state/' to "
|
|
954
|
+
"enable cross-restart durability for budget totals, health "
|
|
955
|
+
"state, and self-healing exclusions. The directory is created "
|
|
956
|
+
"automatically if it doesn't exist."
|
|
957
|
+
),
|
|
958
|
+
)
|
|
959
|
+
audit_log: Literal["off", "active"] = Field(
|
|
960
|
+
default="off",
|
|
961
|
+
description=(
|
|
962
|
+
"v2.0-K: structured audit log. 'active' writes guard "
|
|
963
|
+
"activations, chain fallbacks, budget warnings, self-healing "
|
|
964
|
+
"events, and drift transitions to a JSONL file in state_dir. "
|
|
965
|
+
"'off' = no audit log (backward-compatible default). Requires "
|
|
966
|
+
"state_dir to be set."
|
|
967
|
+
),
|
|
968
|
+
)
|
|
969
|
+
audit_log_max_bytes: int = Field(
|
|
970
|
+
default=10_485_760,
|
|
971
|
+
ge=1_048_576,
|
|
972
|
+
le=1_073_741_824,
|
|
973
|
+
description=(
|
|
974
|
+
"v2.0-K: maximum audit log file size before rotation (bytes). "
|
|
975
|
+
"Default 10 MiB. When exceeded, the current file is renamed "
|
|
976
|
+
"to audit.jsonl.1 and a fresh file is started. Only one "
|
|
977
|
+
"backup is kept."
|
|
978
|
+
),
|
|
979
|
+
)
|
|
980
|
+
request_log: Literal["off", "active"] = Field(
|
|
981
|
+
default="off",
|
|
982
|
+
description=(
|
|
983
|
+
"v2.0-K (Replay): request metadata journal. 'active' records "
|
|
984
|
+
"per-request metadata (provider, token counts, cost, streaming "
|
|
985
|
+
"flag) to a JSONL file in state_dir on every successful "
|
|
986
|
+
"response. Request/response bodies are NOT recorded (privacy "
|
|
987
|
+
"+ size). Used by ``coderouter replay`` for statistical A/B "
|
|
988
|
+
"analysis. 'off' = no journal (backward-compatible default). "
|
|
989
|
+
"Requires state_dir to be set."
|
|
990
|
+
),
|
|
991
|
+
)
|
|
992
|
+
request_log_max_bytes: int = Field(
|
|
993
|
+
default=52_428_800,
|
|
994
|
+
ge=1_048_576,
|
|
995
|
+
le=1_073_741_824,
|
|
996
|
+
description=(
|
|
997
|
+
"v2.0-K (Replay): maximum request journal file size before "
|
|
998
|
+
"rotation (bytes). Default 50 MiB. Same single-backup "
|
|
999
|
+
"rotation as audit_log — when exceeded, the current file is "
|
|
1000
|
+
"renamed to requests.jsonl.1 and a fresh file is started."
|
|
1001
|
+
),
|
|
1002
|
+
)
|
|
1003
|
+
|
|
771
1004
|
@model_validator(mode="after")
|
|
772
1005
|
def _check_default_profile_exists(self) -> CodeRouterConfig:
|
|
773
1006
|
"""v0.6-A: surface a typo'd ``default_profile`` at load time.
|
coderouter/guards/__init__.py
CHANGED
|
@@ -6,10 +6,12 @@ failure modes that a continuously-running local-LLM agent loop tends
|
|
|
6
6
|
to hit:
|
|
7
7
|
|
|
8
8
|
* :mod:`coderouter.guards.tool_loop` — L3 stuck-tool detection
|
|
9
|
-
* :mod:`coderouter.guards.memory_pressure` — L2 backend OOM
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
9
|
+
* :mod:`coderouter.guards.memory_pressure` — L2 backend OOM awareness
|
|
10
|
+
* :mod:`coderouter.guards.backend_health` — L5 health state machine +
|
|
11
|
+
chain reorder
|
|
12
|
+
* :mod:`coderouter.guards.self_healing` — v2.0-J auto-exclude +
|
|
13
|
+
restart + recovery probe
|
|
14
|
+
* :mod:`coderouter.guards.continuous_probe` — v2.0-I background probing
|
|
13
15
|
|
|
14
16
|
Each guard is a pure-functional / single-class module that the engine
|
|
15
17
|
consults at the appropriate dispatch point. Guards never block the
|
|
@@ -200,6 +200,40 @@ class BackendHealthMonitor:
|
|
|
200
200
|
"""True iff ``provider``'s current state is ``UNHEALTHY``."""
|
|
201
201
|
return self.state_for(provider) == "UNHEALTHY"
|
|
202
202
|
|
|
203
|
+
# ------------------------------------------------------------------
|
|
204
|
+
# v2.0-K: Persistence
|
|
205
|
+
# ------------------------------------------------------------------
|
|
206
|
+
|
|
207
|
+
def save_state(self) -> dict[str, object]:
|
|
208
|
+
"""Export the current per-provider health state for persistence."""
|
|
209
|
+
with self._lock:
|
|
210
|
+
return {
|
|
211
|
+
name: {
|
|
212
|
+
"state": entry.state,
|
|
213
|
+
"consecutive_failures": entry.consecutive_failures,
|
|
214
|
+
}
|
|
215
|
+
for name, entry in self._state.items()
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
def load_state(self, state: dict[str, object]) -> None:
|
|
219
|
+
"""Restore health state from a previously saved dict."""
|
|
220
|
+
if not isinstance(state, dict):
|
|
221
|
+
return
|
|
222
|
+
with self._lock:
|
|
223
|
+
for name, data in state.items():
|
|
224
|
+
if not isinstance(data, dict):
|
|
225
|
+
continue
|
|
226
|
+
saved_state = data.get("state", "HEALTHY")
|
|
227
|
+
if saved_state not in ("HEALTHY", "DEGRADED", "UNHEALTHY"):
|
|
228
|
+
saved_state = "HEALTHY"
|
|
229
|
+
failures = data.get("consecutive_failures", 0)
|
|
230
|
+
if not isinstance(failures, int) or failures < 0:
|
|
231
|
+
failures = 0
|
|
232
|
+
self._state[name] = _ProviderHealth(
|
|
233
|
+
state=saved_state, # type: ignore[arg-type]
|
|
234
|
+
consecutive_failures=failures,
|
|
235
|
+
)
|
|
236
|
+
|
|
203
237
|
|
|
204
238
|
__all__ = [
|
|
205
239
|
"BackendHealthMonitor",
|