PyPI - coderouter-cli - Versions diffs - 2.0.0__py3-none-any.whl → 2.2.0__py3-none-any.whl - Mend

coderouter-cli 2.0.0py3-none-any.whl → 2.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

coderouter/cli.py +219 -0
coderouter/config/schemas.py +235 -2
coderouter/guards/__init__.py +6 -4
coderouter/guards/backend_health.py +34 -0
coderouter/guards/continuous_probe.py +349 -0
coderouter/guards/drift_actions.py +111 -0
coderouter/guards/drift_detection.py +308 -0
coderouter/guards/self_healing.py +413 -0
coderouter/guards/tool_loop.py +71 -0
coderouter/ingress/anthropic_routes.py +106 -12
coderouter/ingress/app.py +129 -0
coderouter/logging.py +370 -0
coderouter/metrics/collector.py +168 -0
coderouter/metrics/prometheus.py +141 -0
coderouter/output_filters.py +95 -4
coderouter/routing/adaptive.py +23 -0
coderouter/routing/budget.py +35 -0
coderouter/routing/fallback.py +496 -5
coderouter/state/__init__.py +15 -0
coderouter/state/audit_log.py +269 -0
coderouter/state/replay.py +316 -0
coderouter/state/request_log.py +178 -0
coderouter/state/store.py +212 -0
coderouter/translation/tool_repair.py +42 -1
coderouter_cli-2.2.0.dist-info/METADATA +243 -0
{coderouter_cli-2.0.0.dist-info → coderouter_cli-2.2.0.dist-info}/RECORD +29 -20
coderouter_cli-2.0.0.dist-info/METADATA +0 -559
{coderouter_cli-2.0.0.dist-info → coderouter_cli-2.2.0.dist-info}/WHEEL +0 -0
{coderouter_cli-2.0.0.dist-info → coderouter_cli-2.2.0.dist-info}/entry_points.txt +0 -0
{coderouter_cli-2.0.0.dist-info → coderouter_cli-2.2.0.dist-info}/licenses/LICENSE +0 -0

coderouter/cli.py CHANGED Viewed

@@ -196,6 +196,104 @@ def _build_parser() -> argparse.ArgumentParser:
         help="Print one snapshot as plain text and exit (scripts / non-tty).",
     )
+    # v2.0-K: `coderouter audit` — read structured JSONL audit log.
+    audit = sub.add_parser(
+        "audit",
+        help="Read the structured audit log (v2.0-K).",
+        description=(
+            "Read and filter the JSONL audit log written by `coderouter serve` "
+            "when state_dir and audit_log are configured. Shows guard activations, "
+            "chain fallbacks, budget warnings, self-healing events, and drift "
+            "transitions in chronological order."
+        ),
+    )
+    audit.add_argument(
+        "--state-dir",
+        default=None,
+        help=(
+            "Path to the state directory containing audit.jsonl. "
+            "Defaults to ~/.coderouter/state/."
+        ),
+    )
+    audit.add_argument(
+        "--tail",
+        type=int,
+        default=None,
+        metavar="N",
+        help="Show only the last N entries.",
+    )
+    audit.add_argument(
+        "--filter",
+        default=None,
+        metavar="EVENT",
+        help="Only entries whose event name contains this substring (case-insensitive).",
+    )
+    audit.add_argument(
+        "--since",
+        default=None,
+        metavar="DATETIME",
+        help="Only entries with ts >= this ISO 8601 prefix (e.g. '2026-05-06').",
+    )
+    audit.add_argument(
+        "--summary",
+        action="store_true",
+        help="Print event type → count summary instead of individual entries.",
+    )
+    # v2.0-K (Replay): `coderouter replay` — statistical A/B analysis
+    # of request journal metadata across providers.
+    replay = sub.add_parser(
+        "replay",
+        help="Statistical replay analysis of request journal (v2.0-K).",
+        description=(
+            "Read the request metadata journal and display per-provider "
+            "statistics (token counts, cost, cache hit ratios). Optionally "
+            "compare two providers side-by-side. Request/response bodies "
+            "are not recorded, so this is statistical analysis — not "
+            "literal re-execution."
+        ),
+    )
+    replay.add_argument(
+        "--state-dir",
+        default=None,
+        help=(
+            "Path to the state directory containing requests.jsonl. "
+            "Defaults to ~/.coderouter/state/."
+        ),
+    )
+    replay.add_argument(
+        "--log",
+        default=None,
+        metavar="PATH",
+        help="Direct path to the request journal JSONL file (overrides --state-dir).",
+    )
+    replay.add_argument(
+        "--provider",
+        default=None,
+        metavar="NAME",
+        help="Filter entries to this provider only.",
+    )
+    replay.add_argument(
+        "--compare",
+        nargs=2,
+        metavar=("A", "B"),
+        default=None,
+        help="Compare two providers side-by-side (e.g. --compare anthropic-api openrouter-free).",
+    )
+    replay.add_argument(
+        "--since",
+        default=None,
+        metavar="DATETIME",
+        help="Only entries with ts >= this ISO 8601 prefix (e.g. '2026-05-06').",
+    )
+    replay.add_argument(
+        "--limit",
+        type=int,
+        default=None,
+        metavar="N",
+        help="Use only the last N entries (applied after --since and --provider filters).",
+    )
     return parser
@@ -277,6 +375,12 @@ def main(argv: list[str] | None = None) -> int:
         return stats_main(args.url, interval=args.interval, once=args.once)
+    if args.command == "audit":
+        return _run_audit(args)
+    if args.command == "replay":
+        return _run_replay(args)
     print(f"unknown command: {args.command}", file=sys.stderr)
     return 2
@@ -476,6 +580,121 @@ def _run_apply_or_dry_run(
+def _run_audit(args: argparse.Namespace) -> int:
+    """v2.0-K: read and display the structured audit log.
+    Resolves the audit log path from --state-dir (or default
+    ~/.coderouter/state/) and renders entries with optional filtering.
+    """
+    import json
+    from coderouter.state.audit_log import read_audit_log, summarize_audit_log
+    state_dir = Path(args.state_dir).expanduser() if args.state_dir else (
+        Path.home() / ".coderouter" / "state"
+    )
+    log_path = state_dir / "audit.jsonl"
+    if not log_path.exists():
+        print(f"audit: no audit log found at {log_path}", file=sys.stderr)
+        print(
+            "  Ensure state_dir and audit_log are configured in providers.yaml.",
+            file=sys.stderr,
+        )
+        return 1
+    entries = read_audit_log(
+        log_path,
+        tail=args.tail,
+        event_filter=args.filter,
+        since=args.since,
+    )
+    if not entries:
+        print("audit: no matching entries found.")
+        return 0
+    if args.summary:
+        summary = summarize_audit_log(entries)
+        print(f"Audit log summary ({len(entries)} entries):\n")
+        for event, count in summary.items():
+            print(f"  {event:<40s} {count:>6d}")
+        return 0
+    for entry in entries:
+        ts = entry.get("ts", "")
+        event = entry.get("event", "")
+        level = entry.get("level", "")
+        # Build a compact one-line display.
+        extras = {
+            k: v
+            for k, v in entry.items()
+            if k not in ("ts", "event", "level")
+        }
+        extra_str = ""
+        if extras:
+            extra_str = " " + json.dumps(extras, default=str, ensure_ascii=False)
+        print(f"[{ts}] {level:<7s} {event}{extra_str}")
+    return 0
+def _run_replay(args: argparse.Namespace) -> int:
+    """v2.0-K (Replay): statistical A/B analysis of request journal.
+    Reads the request journal (requests.jsonl) and either displays a
+    per-provider summary table or a side-by-side comparison of two
+    providers.
+    """
+    from coderouter.state.replay import (
+        compare_providers,
+        format_comparison_table,
+        format_summary_table,
+        summarize_window,
+    )
+    from coderouter.state.request_log import read_request_log
+    # Resolve the journal file path.
+    if args.log:
+        log_path = Path(args.log).expanduser()
+    else:
+        state_dir = Path(args.state_dir).expanduser() if args.state_dir else (
+            Path.home() / ".coderouter" / "state"
+        )
+        log_path = state_dir / "requests.jsonl"
+    if not log_path.exists():
+        print(f"replay: no request journal found at {log_path}", file=sys.stderr)
+        print(
+            "  Ensure state_dir and request_log are configured in providers.yaml.",
+            file=sys.stderr,
+        )
+        return 1
+    entries = read_request_log(
+        log_path,
+        provider_filter=args.provider,
+        since=args.since,
+    )
+    if args.limit is not None and args.limit > 0:
+        entries = entries[-args.limit:]
+    if not entries:
+        print("replay: no matching entries found.")
+        return 0
+    if args.compare:
+        provider_a, provider_b = args.compare
+        comparison = compare_providers(entries, provider_a, provider_b)
+        print(format_comparison_table(comparison))
+    else:
+        summary = summarize_window(entries)
+        print(format_summary_table(summary))
+    return 0
 def _run_check_env(arg_value: str) -> int:
     """v1.6.3: filesystem / git security checks for `.env`.

coderouter/config/schemas.py CHANGED Viewed

@@ -242,6 +242,21 @@ class ProviderConfig(BaseModel):
             "LM Studio Qwen3.5 128K → 131072, Anthropic Claude → 200000."
         ),
     )
+    # v2.0-J: optional shell command to restart this provider's backend
+    # process when it becomes UNHEALTHY. Executed via subprocess when
+    # self-healing is enabled and the provider crosses the UNHEALTHY
+    # threshold. Security: opt-in only — unset means no restart attempt.
+    restart_command: str | None = Field(
+        default=None,
+        description=(
+            "v2.0-J (Self-healing): shell command to restart this "
+            "provider's backend process. Examples: 'ollama serve', "
+            "'open -a LM\\ Studio'. Only executed when the profile's "
+            "backend_health_action is 'exclude' and the provider "
+            "transitions to UNHEALTHY. Unset = no automatic restart "
+            "(recovery probe still runs, waiting for manual restart)."
+        ),
+    )
     @model_validator(mode="after")
     def _check_output_filters_known(self) -> ProviderConfig:
@@ -350,6 +365,24 @@ class FallbackChain(BaseModel):
             "error response. See FallbackChain comment for trade-offs."
         ),
     )
+    # v2.2: total tool-call count hard cap. A safety valve against
+    # runaway agents that call many *different* tools without looping
+    # (which the streak-based L3 detector misses). Set to 0 to
+    # disable the cap entirely.
+    max_tool_calls: int = Field(
+        default=50,
+        ge=0,
+        le=1000,
+        description=(
+            "v2.2: maximum total tool_use blocks allowed in the "
+            "conversation. When exceeded, the request is rejected with "
+            "a ``tool_count_exceeded`` error (if tool_loop_action is "
+            "``break``) or logged (if ``warn``). Set to 0 to disable. "
+            "Default 50 is deliberately more permissive than Unsloth "
+            "Studio's 25 — Claude Code agent sessions routinely reach "
+            "25+ calls in normal operation."
+        ),
+    )
     # v1.9-E phase 2 (L2): memory-pressure detection + cooldown.
     #
     # Local backends (Ollama / LM Studio / llama.cpp) report VRAM
@@ -410,7 +443,7 @@ class FallbackChain(BaseModel):
     # Distinct from the v1.9-C ``adaptive`` gradient (continuous
     # latency / error-rate buffer with debounce) which handles the
     # "slow but alive" case; L5 handles the "hard crash" case.
-    backend_health_action: Literal["off", "warn", "demote"] = Field(
+    backend_health_action: Literal["off", "warn", "demote", "exclude"] = Field(
         default="warn",
         description=(
             "v1.9-E (L5 phase 2): action when a provider transitions "
@@ -420,7 +453,12 @@ class FallbackChain(BaseModel):
             "moves the UNHEALTHY provider to the back of the chain "
             "for the next ``_resolve_chain`` (similar to v1.9-C "
             "adaptive demotion but state-machine-based, not "
-            "rolling-window-based). ``off`` disables the monitor "
+            "rolling-window-based). ``exclude`` (v2.0-J) removes the "
+            "UNHEALTHY provider from the chain entirely + triggers "
+            "self-healing (restart helper if configured, recovery "
+            "probe with exponential backoff). On recovery, the "
+            "provider is automatically restored to its original "
+            "chain position. ``off`` disables the monitor "
             "entirely (zero observation overhead, identical to "
             "v1.9.x behavior)."
         ),
@@ -439,6 +477,41 @@ class FallbackChain(BaseModel):
             "blips that the v1.9-C adaptive adjuster already handles."
         ),
     )
+    # v2.0-J: self-healing recovery probe configuration.
+    recovery_probe_initial_s: float = Field(
+        default=30.0,
+        ge=5.0,
+        le=600.0,
+        description=(
+            "v2.0-J: initial interval (seconds) for recovery probes "
+            "sent to an UNHEALTHY-excluded provider. Each failed probe "
+            "doubles the interval up to ``recovery_probe_max_s``. "
+            "A successful probe restores the provider to its original "
+            "chain position immediately."
+        ),
+    )
+    recovery_probe_max_s: float = Field(
+        default=300.0,
+        ge=30.0,
+        le=3600.0,
+        description=(
+            "v2.0-J: maximum interval (seconds) for recovery probe "
+            "exponential backoff. Default 300 s (5 min) means a dead "
+            "backend is probed at most every 5 minutes indefinitely "
+            "until it recovers or the server shuts down."
+        ),
+    )
+    restart_timeout_s: float = Field(
+        default=30.0,
+        ge=5.0,
+        le=120.0,
+        description=(
+            "v2.0-J: timeout (seconds) for the restart_command "
+            "subprocess. If the command doesn't complete within this "
+            "window, it is killed. Prevents hung restart commands from "
+            "blocking recovery."
+        ),
+    )
     adaptive: bool = Field(
         default=False,
         description=(
@@ -531,6 +604,73 @@ class FallbackChain(BaseModel):
         ),
     )
+    # ------------------------------------------------------------------
+    # v2.0-G (L4): Drift detection — response quality degradation guard
+    # ------------------------------------------------------------------
+    #
+    # Long-running sessions on local LLMs can suffer gradual quality
+    # decay (KV cache pressure, thermal throttling, VRAM fragmentation)
+    # where the model "succeeds" but produces empty/short/toolless
+    # responses. This guard observes response quality signals in a
+    # rolling window and detects statistical drift.
+    #
+    # Four actions:
+    #   * ``off``     — no detection (default).
+    #   * ``warn``    — emit structured log + response header.
+    #   * ``promote`` — ``warn`` + demote drifted provider in chain.
+    #   * ``reload``  — ``promote`` + attempt KV cache flush (Ollama).
+    drift_detection_action: Literal["off", "warn", "promote", "reload"] = Field(
+        default="off",
+        description=(
+            "v2.0-G (L4): action on response quality drift detection. "
+            "``off`` (default) disables drift detection. ``warn`` emits "
+            "a log and response header. ``promote`` additionally demotes "
+            "the drifted provider in the chain. ``reload`` attempts to "
+            "flush the provider's KV cache (Ollama only) before promoting."
+        ),
+    )
+    drift_detection_window_size: int = Field(
+        default=20,
+        ge=4,
+        le=200,
+        description=(
+            "v2.0-G (L4): number of recent responses to keep in the "
+            "rolling observation window per provider. Larger windows "
+            "are more robust to noise but slower to detect drift."
+        ),
+    )
+    drift_detection_cooldown_s: int = Field(
+        default=300,
+        ge=10,
+        le=3600,
+        description=(
+            "v2.0-G (L4): seconds after a promote/reload action before "
+            "the drifted provider's rank is reset for recovery check. "
+            "Default 300s (5 min) gives the model time to stabilize."
+        ),
+    )
+    drift_detection_sensitivity: Literal["low", "normal", "high"] = Field(
+        default="normal",
+        description=(
+            "v2.0-G (L4): threshold preset for drift signals. "
+            "``low`` tolerates more degradation before triggering, "
+            "``high`` is stricter (fewer bad responses needed)."
+        ),
+    )
+    # --- v2.0-H (L6): Mid-stream partial stitching --------------------------
+    #   * ``off``      — discard partial content on mid-stream failure (legacy).
+    #   * ``surface``  — return partial content as a truncated-but-valid response.
+    partial_stitch_action: Literal["off", "surface"] = Field(
+        default="off",
+        description=(
+            "v2.0-H (L6): action when a streaming response fails mid-stream. "
+            "``off`` discards partial content (legacy error event). "
+            "``surface`` returns accumulated text as a graceful stream "
+            "termination with a ``coderouter_partial`` metadata event."
+        ),
+    )
 # ---------------------------------------------------------------------------
 # v1.6-A: auto_router — declarative request-body classifier
@@ -768,6 +908,99 @@ class CodeRouterConfig(BaseModel):
         ),
     )
+    # v2.0-I: Continuous probing — background health checks for idle periods.
+    continuous_probe: Literal["off", "active"] = Field(
+        default="off",
+        description=(
+            "v2.0-I: enable background health probes. 'active' starts a "
+            "background task that periodically sends 1-token requests to "
+            "each provider, feeding results into the L5 backend health "
+            "state machine. 'off' = no probing (backward-compatible default)."
+        ),
+    )
+    probe_interval_s: float = Field(
+        default=60.0,
+        ge=5.0,
+        le=3600.0,
+        description=(
+            "v2.0-I: seconds between probe rounds. Lower = faster detection "
+            "but more probe traffic. 60s is a good balance for local models."
+        ),
+    )
+    probe_paid: bool = Field(
+        default=False,
+        description=(
+            "v2.0-I: whether to probe providers marked ``paid: true``. "
+            "Default false protects operators from accidental API charges."
+        ),
+    )
+    probe_timeout_s: float = Field(
+        default=10.0,
+        ge=1.0,
+        le=60.0,
+        description=(
+            "v2.0-I: per-provider timeout for probe requests. A provider "
+            "that doesn't respond within this window is recorded as failed."
+        ),
+    )
+    # v2.0-K: Persistent state — survive restarts.
+    state_dir: str | None = Field(
+        default=None,
+        description=(
+            "v2.0-K: directory for persistent state (sqlite3 KV store + "
+            "audit log). None = in-memory only (no persistence, backward-"
+            "compatible). Set to a path like '~/.coderouter/state/' to "
+            "enable cross-restart durability for budget totals, health "
+            "state, and self-healing exclusions. The directory is created "
+            "automatically if it doesn't exist."
+        ),
+    )
+    audit_log: Literal["off", "active"] = Field(
+        default="off",
+        description=(
+            "v2.0-K: structured audit log. 'active' writes guard "
+            "activations, chain fallbacks, budget warnings, self-healing "
+            "events, and drift transitions to a JSONL file in state_dir. "
+            "'off' = no audit log (backward-compatible default). Requires "
+            "state_dir to be set."
+        ),
+    )
+    audit_log_max_bytes: int = Field(
+        default=10_485_760,
+        ge=1_048_576,
+        le=1_073_741_824,
+        description=(
+            "v2.0-K: maximum audit log file size before rotation (bytes). "
+            "Default 10 MiB. When exceeded, the current file is renamed "
+            "to audit.jsonl.1 and a fresh file is started. Only one "
+            "backup is kept."
+        ),
+    )
+    request_log: Literal["off", "active"] = Field(
+        default="off",
+        description=(
+            "v2.0-K (Replay): request metadata journal. 'active' records "
+            "per-request metadata (provider, token counts, cost, streaming "
+            "flag) to a JSONL file in state_dir on every successful "
+            "response. Request/response bodies are NOT recorded (privacy "
+            "+ size). Used by ``coderouter replay`` for statistical A/B "
+            "analysis. 'off' = no journal (backward-compatible default). "
+            "Requires state_dir to be set."
+        ),
+    )
+    request_log_max_bytes: int = Field(
+        default=52_428_800,
+        ge=1_048_576,
+        le=1_073_741_824,
+        description=(
+            "v2.0-K (Replay): maximum request journal file size before "
+            "rotation (bytes). Default 50 MiB. Same single-backup "
+            "rotation as audit_log — when exceeded, the current file is "
+            "renamed to requests.jsonl.1 and a fresh file is started."
+        ),
+    )
     @model_validator(mode="after")
     def _check_default_profile_exists(self) -> CodeRouterConfig:
         """v0.6-A: surface a typo'd ``default_profile`` at load time.

coderouter/guards/__init__.py CHANGED Viewed

@@ -6,10 +6,12 @@ failure modes that a continuously-running local-LLM agent loop tends
 to hit:
   * :mod:`coderouter.guards.tool_loop`      — L3 stuck-tool detection
-  * :mod:`coderouter.guards.memory_pressure` — L2 backend OOM
-                                                 awareness (planned)
-  * :mod:`coderouter.guards.backend_health`  — L5 continuous probe +
-                                                 chain reorder (planned)
+  * :mod:`coderouter.guards.memory_pressure` — L2 backend OOM awareness
+  * :mod:`coderouter.guards.backend_health`  — L5 health state machine +
+                                                 chain reorder
+  * :mod:`coderouter.guards.self_healing`    — v2.0-J auto-exclude +
+                                                 restart + recovery probe
+  * :mod:`coderouter.guards.continuous_probe` — v2.0-I background probing
 Each guard is a pure-functional / single-class module that the engine
 consults at the appropriate dispatch point. Guards never block the

coderouter/guards/backend_health.py CHANGED Viewed

@@ -200,6 +200,40 @@ class BackendHealthMonitor:
         """True iff ``provider``'s current state is ``UNHEALTHY``."""
         return self.state_for(provider) == "UNHEALTHY"
+    # ------------------------------------------------------------------
+    # v2.0-K: Persistence
+    # ------------------------------------------------------------------
+    def save_state(self) -> dict[str, object]:
+        """Export the current per-provider health state for persistence."""
+        with self._lock:
+            return {
+                name: {
+                    "state": entry.state,
+                    "consecutive_failures": entry.consecutive_failures,
+                }
+                for name, entry in self._state.items()
+            }
+    def load_state(self, state: dict[str, object]) -> None:
+        """Restore health state from a previously saved dict."""
+        if not isinstance(state, dict):
+            return
+        with self._lock:
+            for name, data in state.items():
+                if not isinstance(data, dict):
+                    continue
+                saved_state = data.get("state", "HEALTHY")
+                if saved_state not in ("HEALTHY", "DEGRADED", "UNHEALTHY"):
+                    saved_state = "HEALTHY"
+                failures = data.get("consecutive_failures", 0)
+                if not isinstance(failures, int) or failures < 0:
+                    failures = 0
+                self._state[name] = _ProviderHealth(
+                    state=saved_state,  # type: ignore[arg-type]
+                    consecutive_failures=failures,
+                )
 __all__ = [
     "BackendHealthMonitor",

coderouter-cli 2.0.0__py3-none-any.whl → 2.2.0__py3-none-any.whl

coderouter-cli 2.0.0py3-none-any.whl → 2.2.0py3-none-any.whl