coderouter-cli 2.1.0__py3-none-any.whl → 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
coderouter/cli.py CHANGED
@@ -196,6 +196,104 @@ def _build_parser() -> argparse.ArgumentParser:
196
196
  help="Print one snapshot as plain text and exit (scripts / non-tty).",
197
197
  )
198
198
 
199
+ # v2.0-K: `coderouter audit` — read structured JSONL audit log.
200
+ audit = sub.add_parser(
201
+ "audit",
202
+ help="Read the structured audit log (v2.0-K).",
203
+ description=(
204
+ "Read and filter the JSONL audit log written by `coderouter serve` "
205
+ "when state_dir and audit_log are configured. Shows guard activations, "
206
+ "chain fallbacks, budget warnings, self-healing events, and drift "
207
+ "transitions in chronological order."
208
+ ),
209
+ )
210
+ audit.add_argument(
211
+ "--state-dir",
212
+ default=None,
213
+ help=(
214
+ "Path to the state directory containing audit.jsonl. "
215
+ "Defaults to ~/.coderouter/state/."
216
+ ),
217
+ )
218
+ audit.add_argument(
219
+ "--tail",
220
+ type=int,
221
+ default=None,
222
+ metavar="N",
223
+ help="Show only the last N entries.",
224
+ )
225
+ audit.add_argument(
226
+ "--filter",
227
+ default=None,
228
+ metavar="EVENT",
229
+ help="Only entries whose event name contains this substring (case-insensitive).",
230
+ )
231
+ audit.add_argument(
232
+ "--since",
233
+ default=None,
234
+ metavar="DATETIME",
235
+ help="Only entries with ts >= this ISO 8601 prefix (e.g. '2026-05-06').",
236
+ )
237
+ audit.add_argument(
238
+ "--summary",
239
+ action="store_true",
240
+ help="Print event type → count summary instead of individual entries.",
241
+ )
242
+
243
+ # v2.0-K (Replay): `coderouter replay` — statistical A/B analysis
244
+ # of request journal metadata across providers.
245
+ replay = sub.add_parser(
246
+ "replay",
247
+ help="Statistical replay analysis of request journal (v2.0-K).",
248
+ description=(
249
+ "Read the request metadata journal and display per-provider "
250
+ "statistics (token counts, cost, cache hit ratios). Optionally "
251
+ "compare two providers side-by-side. Request/response bodies "
252
+ "are not recorded, so this is statistical analysis — not "
253
+ "literal re-execution."
254
+ ),
255
+ )
256
+ replay.add_argument(
257
+ "--state-dir",
258
+ default=None,
259
+ help=(
260
+ "Path to the state directory containing requests.jsonl. "
261
+ "Defaults to ~/.coderouter/state/."
262
+ ),
263
+ )
264
+ replay.add_argument(
265
+ "--log",
266
+ default=None,
267
+ metavar="PATH",
268
+ help="Direct path to the request journal JSONL file (overrides --state-dir).",
269
+ )
270
+ replay.add_argument(
271
+ "--provider",
272
+ default=None,
273
+ metavar="NAME",
274
+ help="Filter entries to this provider only.",
275
+ )
276
+ replay.add_argument(
277
+ "--compare",
278
+ nargs=2,
279
+ metavar=("A", "B"),
280
+ default=None,
281
+ help="Compare two providers side-by-side (e.g. --compare anthropic-api openrouter-free).",
282
+ )
283
+ replay.add_argument(
284
+ "--since",
285
+ default=None,
286
+ metavar="DATETIME",
287
+ help="Only entries with ts >= this ISO 8601 prefix (e.g. '2026-05-06').",
288
+ )
289
+ replay.add_argument(
290
+ "--limit",
291
+ type=int,
292
+ default=None,
293
+ metavar="N",
294
+ help="Use only the last N entries (applied after --since and --provider filters).",
295
+ )
296
+
199
297
  return parser
200
298
 
201
299
 
@@ -277,6 +375,12 @@ def main(argv: list[str] | None = None) -> int:
277
375
 
278
376
  return stats_main(args.url, interval=args.interval, once=args.once)
279
377
 
378
+ if args.command == "audit":
379
+ return _run_audit(args)
380
+
381
+ if args.command == "replay":
382
+ return _run_replay(args)
383
+
280
384
  print(f"unknown command: {args.command}", file=sys.stderr)
281
385
  return 2
282
386
 
@@ -476,6 +580,121 @@ def _run_apply_or_dry_run(
476
580
 
477
581
 
478
582
 
583
+ def _run_audit(args: argparse.Namespace) -> int:
584
+ """v2.0-K: read and display the structured audit log.
585
+
586
+ Resolves the audit log path from --state-dir (or default
587
+ ~/.coderouter/state/) and renders entries with optional filtering.
588
+ """
589
+ import json
590
+
591
+ from coderouter.state.audit_log import read_audit_log, summarize_audit_log
592
+
593
+ state_dir = Path(args.state_dir).expanduser() if args.state_dir else (
594
+ Path.home() / ".coderouter" / "state"
595
+ )
596
+ log_path = state_dir / "audit.jsonl"
597
+
598
+ if not log_path.exists():
599
+ print(f"audit: no audit log found at {log_path}", file=sys.stderr)
600
+ print(
601
+ " Ensure state_dir and audit_log are configured in providers.yaml.",
602
+ file=sys.stderr,
603
+ )
604
+ return 1
605
+
606
+ entries = read_audit_log(
607
+ log_path,
608
+ tail=args.tail,
609
+ event_filter=args.filter,
610
+ since=args.since,
611
+ )
612
+
613
+ if not entries:
614
+ print("audit: no matching entries found.")
615
+ return 0
616
+
617
+ if args.summary:
618
+ summary = summarize_audit_log(entries)
619
+ print(f"Audit log summary ({len(entries)} entries):\n")
620
+ for event, count in summary.items():
621
+ print(f" {event:<40s} {count:>6d}")
622
+ return 0
623
+
624
+ for entry in entries:
625
+ ts = entry.get("ts", "")
626
+ event = entry.get("event", "")
627
+ level = entry.get("level", "")
628
+ # Build a compact one-line display.
629
+ extras = {
630
+ k: v
631
+ for k, v in entry.items()
632
+ if k not in ("ts", "event", "level")
633
+ }
634
+ extra_str = ""
635
+ if extras:
636
+ extra_str = " " + json.dumps(extras, default=str, ensure_ascii=False)
637
+ print(f"[{ts}] {level:<7s} {event}{extra_str}")
638
+
639
+ return 0
640
+
641
+
642
+ def _run_replay(args: argparse.Namespace) -> int:
643
+ """v2.0-K (Replay): statistical A/B analysis of request journal.
644
+
645
+ Reads the request journal (requests.jsonl) and either displays a
646
+ per-provider summary table or a side-by-side comparison of two
647
+ providers.
648
+ """
649
+ from coderouter.state.replay import (
650
+ compare_providers,
651
+ format_comparison_table,
652
+ format_summary_table,
653
+ summarize_window,
654
+ )
655
+ from coderouter.state.request_log import read_request_log
656
+
657
+ # Resolve the journal file path.
658
+ if args.log:
659
+ log_path = Path(args.log).expanduser()
660
+ else:
661
+ state_dir = Path(args.state_dir).expanduser() if args.state_dir else (
662
+ Path.home() / ".coderouter" / "state"
663
+ )
664
+ log_path = state_dir / "requests.jsonl"
665
+
666
+ if not log_path.exists():
667
+ print(f"replay: no request journal found at {log_path}", file=sys.stderr)
668
+ print(
669
+ " Ensure state_dir and request_log are configured in providers.yaml.",
670
+ file=sys.stderr,
671
+ )
672
+ return 1
673
+
674
+ entries = read_request_log(
675
+ log_path,
676
+ provider_filter=args.provider,
677
+ since=args.since,
678
+ )
679
+
680
+ if args.limit is not None and args.limit > 0:
681
+ entries = entries[-args.limit:]
682
+
683
+ if not entries:
684
+ print("replay: no matching entries found.")
685
+ return 0
686
+
687
+ if args.compare:
688
+ provider_a, provider_b = args.compare
689
+ comparison = compare_providers(entries, provider_a, provider_b)
690
+ print(format_comparison_table(comparison))
691
+ else:
692
+ summary = summarize_window(entries)
693
+ print(format_summary_table(summary))
694
+
695
+ return 0
696
+
697
+
479
698
  def _run_check_env(arg_value: str) -> int:
480
699
  """v1.6.3: filesystem / git security checks for `.env`.
481
700
 
@@ -242,6 +242,21 @@ class ProviderConfig(BaseModel):
242
242
  "LM Studio Qwen3.5 128K → 131072, Anthropic Claude → 200000."
243
243
  ),
244
244
  )
245
+ # v2.0-J: optional shell command to restart this provider's backend
246
+ # process when it becomes UNHEALTHY. Executed via subprocess when
247
+ # self-healing is enabled and the provider crosses the UNHEALTHY
248
+ # threshold. Security: opt-in only — unset means no restart attempt.
249
+ restart_command: str | None = Field(
250
+ default=None,
251
+ description=(
252
+ "v2.0-J (Self-healing): shell command to restart this "
253
+ "provider's backend process. Examples: 'ollama serve', "
254
+ "'open -a LM\\ Studio'. Only executed when the profile's "
255
+ "backend_health_action is 'exclude' and the provider "
256
+ "transitions to UNHEALTHY. Unset = no automatic restart "
257
+ "(recovery probe still runs, waiting for manual restart)."
258
+ ),
259
+ )
245
260
 
246
261
  @model_validator(mode="after")
247
262
  def _check_output_filters_known(self) -> ProviderConfig:
@@ -350,6 +365,24 @@ class FallbackChain(BaseModel):
350
365
  "error response. See FallbackChain comment for trade-offs."
351
366
  ),
352
367
  )
368
+ # v2.2: total tool-call count hard cap. A safety valve against
369
+ # runaway agents that call many *different* tools without looping
370
+ # (which the streak-based L3 detector misses). Set to 0 to
371
+ # disable the cap entirely.
372
+ max_tool_calls: int = Field(
373
+ default=50,
374
+ ge=0,
375
+ le=1000,
376
+ description=(
377
+ "v2.2: maximum total tool_use blocks allowed in the "
378
+ "conversation. When exceeded, the request is rejected with "
379
+ "a ``tool_count_exceeded`` error (if tool_loop_action is "
380
+ "``break``) or logged (if ``warn``). Set to 0 to disable. "
381
+ "Default 50 is deliberately more permissive than Unsloth "
382
+ "Studio's 25 — Claude Code agent sessions routinely reach "
383
+ "25+ calls in normal operation."
384
+ ),
385
+ )
353
386
  # v1.9-E phase 2 (L2): memory-pressure detection + cooldown.
354
387
  #
355
388
  # Local backends (Ollama / LM Studio / llama.cpp) report VRAM
@@ -410,7 +443,7 @@ class FallbackChain(BaseModel):
410
443
  # Distinct from the v1.9-C ``adaptive`` gradient (continuous
411
444
  # latency / error-rate buffer with debounce) which handles the
412
445
  # "slow but alive" case; L5 handles the "hard crash" case.
413
- backend_health_action: Literal["off", "warn", "demote"] = Field(
446
+ backend_health_action: Literal["off", "warn", "demote", "exclude"] = Field(
414
447
  default="warn",
415
448
  description=(
416
449
  "v1.9-E (L5 phase 2): action when a provider transitions "
@@ -420,7 +453,12 @@ class FallbackChain(BaseModel):
420
453
  "moves the UNHEALTHY provider to the back of the chain "
421
454
  "for the next ``_resolve_chain`` (similar to v1.9-C "
422
455
  "adaptive demotion but state-machine-based, not "
423
- "rolling-window-based). ``off`` disables the monitor "
456
+ "rolling-window-based). ``exclude`` (v2.0-J) removes the "
457
+ "UNHEALTHY provider from the chain entirely + triggers "
458
+ "self-healing (restart helper if configured, recovery "
459
+ "probe with exponential backoff). On recovery, the "
460
+ "provider is automatically restored to its original "
461
+ "chain position. ``off`` disables the monitor "
424
462
  "entirely (zero observation overhead, identical to "
425
463
  "v1.9.x behavior)."
426
464
  ),
@@ -439,6 +477,41 @@ class FallbackChain(BaseModel):
439
477
  "blips that the v1.9-C adaptive adjuster already handles."
440
478
  ),
441
479
  )
480
+ # v2.0-J: self-healing recovery probe configuration.
481
+ recovery_probe_initial_s: float = Field(
482
+ default=30.0,
483
+ ge=5.0,
484
+ le=600.0,
485
+ description=(
486
+ "v2.0-J: initial interval (seconds) for recovery probes "
487
+ "sent to an UNHEALTHY-excluded provider. Each failed probe "
488
+ "doubles the interval up to ``recovery_probe_max_s``. "
489
+ "A successful probe restores the provider to its original "
490
+ "chain position immediately."
491
+ ),
492
+ )
493
+ recovery_probe_max_s: float = Field(
494
+ default=300.0,
495
+ ge=30.0,
496
+ le=3600.0,
497
+ description=(
498
+ "v2.0-J: maximum interval (seconds) for recovery probe "
499
+ "exponential backoff. Default 300 s (5 min) means a dead "
500
+ "backend is probed at most every 5 minutes indefinitely "
501
+ "until it recovers or the server shuts down."
502
+ ),
503
+ )
504
+ restart_timeout_s: float = Field(
505
+ default=30.0,
506
+ ge=5.0,
507
+ le=120.0,
508
+ description=(
509
+ "v2.0-J: timeout (seconds) for the restart_command "
510
+ "subprocess. If the command doesn't complete within this "
511
+ "window, it is killed. Prevents hung restart commands from "
512
+ "blocking recovery."
513
+ ),
514
+ )
442
515
  adaptive: bool = Field(
443
516
  default=False,
444
517
  description=(
@@ -871,6 +944,63 @@ class CodeRouterConfig(BaseModel):
871
944
  ),
872
945
  )
873
946
 
947
+ # v2.0-K: Persistent state — survive restarts.
948
+ state_dir: str | None = Field(
949
+ default=None,
950
+ description=(
951
+ "v2.0-K: directory for persistent state (sqlite3 KV store + "
952
+ "audit log). None = in-memory only (no persistence, backward-"
953
+ "compatible). Set to a path like '~/.coderouter/state/' to "
954
+ "enable cross-restart durability for budget totals, health "
955
+ "state, and self-healing exclusions. The directory is created "
956
+ "automatically if it doesn't exist."
957
+ ),
958
+ )
959
+ audit_log: Literal["off", "active"] = Field(
960
+ default="off",
961
+ description=(
962
+ "v2.0-K: structured audit log. 'active' writes guard "
963
+ "activations, chain fallbacks, budget warnings, self-healing "
964
+ "events, and drift transitions to a JSONL file in state_dir. "
965
+ "'off' = no audit log (backward-compatible default). Requires "
966
+ "state_dir to be set."
967
+ ),
968
+ )
969
+ audit_log_max_bytes: int = Field(
970
+ default=10_485_760,
971
+ ge=1_048_576,
972
+ le=1_073_741_824,
973
+ description=(
974
+ "v2.0-K: maximum audit log file size before rotation (bytes). "
975
+ "Default 10 MiB. When exceeded, the current file is renamed "
976
+ "to audit.jsonl.1 and a fresh file is started. Only one "
977
+ "backup is kept."
978
+ ),
979
+ )
980
+ request_log: Literal["off", "active"] = Field(
981
+ default="off",
982
+ description=(
983
+ "v2.0-K (Replay): request metadata journal. 'active' records "
984
+ "per-request metadata (provider, token counts, cost, streaming "
985
+ "flag) to a JSONL file in state_dir on every successful "
986
+ "response. Request/response bodies are NOT recorded (privacy "
987
+ "+ size). Used by ``coderouter replay`` for statistical A/B "
988
+ "analysis. 'off' = no journal (backward-compatible default). "
989
+ "Requires state_dir to be set."
990
+ ),
991
+ )
992
+ request_log_max_bytes: int = Field(
993
+ default=52_428_800,
994
+ ge=1_048_576,
995
+ le=1_073_741_824,
996
+ description=(
997
+ "v2.0-K (Replay): maximum request journal file size before "
998
+ "rotation (bytes). Default 50 MiB. Same single-backup "
999
+ "rotation as audit_log — when exceeded, the current file is "
1000
+ "renamed to requests.jsonl.1 and a fresh file is started."
1001
+ ),
1002
+ )
1003
+
874
1004
  @model_validator(mode="after")
875
1005
  def _check_default_profile_exists(self) -> CodeRouterConfig:
876
1006
  """v0.6-A: surface a typo'd ``default_profile`` at load time.
@@ -6,10 +6,12 @@ failure modes that a continuously-running local-LLM agent loop tends
6
6
  to hit:
7
7
 
8
8
  * :mod:`coderouter.guards.tool_loop` — L3 stuck-tool detection
9
- * :mod:`coderouter.guards.memory_pressure` — L2 backend OOM
10
- awareness (planned)
11
- * :mod:`coderouter.guards.backend_health` — L5 continuous probe +
12
- chain reorder (planned)
9
+ * :mod:`coderouter.guards.memory_pressure` — L2 backend OOM awareness
10
+ * :mod:`coderouter.guards.backend_health` — L5 health state machine +
11
+ chain reorder
12
+ * :mod:`coderouter.guards.self_healing` — v2.0-J auto-exclude +
13
+ restart + recovery probe
14
+ * :mod:`coderouter.guards.continuous_probe` — v2.0-I background probing
13
15
 
14
16
  Each guard is a pure-functional / single-class module that the engine
15
17
  consults at the appropriate dispatch point. Guards never block the
@@ -200,6 +200,40 @@ class BackendHealthMonitor:
200
200
  """True iff ``provider``'s current state is ``UNHEALTHY``."""
201
201
  return self.state_for(provider) == "UNHEALTHY"
202
202
 
203
+ # ------------------------------------------------------------------
204
+ # v2.0-K: Persistence
205
+ # ------------------------------------------------------------------
206
+
207
+ def save_state(self) -> dict[str, object]:
208
+ """Export the current per-provider health state for persistence."""
209
+ with self._lock:
210
+ return {
211
+ name: {
212
+ "state": entry.state,
213
+ "consecutive_failures": entry.consecutive_failures,
214
+ }
215
+ for name, entry in self._state.items()
216
+ }
217
+
218
+ def load_state(self, state: dict[str, object]) -> None:
219
+ """Restore health state from a previously saved dict."""
220
+ if not isinstance(state, dict):
221
+ return
222
+ with self._lock:
223
+ for name, data in state.items():
224
+ if not isinstance(data, dict):
225
+ continue
226
+ saved_state = data.get("state", "HEALTHY")
227
+ if saved_state not in ("HEALTHY", "DEGRADED", "UNHEALTHY"):
228
+ saved_state = "HEALTHY"
229
+ failures = data.get("consecutive_failures", 0)
230
+ if not isinstance(failures, int) or failures < 0:
231
+ failures = 0
232
+ self._state[name] = _ProviderHealth(
233
+ state=saved_state, # type: ignore[arg-type]
234
+ consecutive_failures=failures,
235
+ )
236
+
203
237
 
204
238
  __all__ = [
205
239
  "BackendHealthMonitor",