PyPI - coderouter-cli - Versions diffs - 2.0.0__py3-none-any.whl → 2.2.0__py3-none-any.whl - Mend

coderouter-cli 2.0.0py3-none-any.whl → 2.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

coderouter/cli.py +219 -0
coderouter/config/schemas.py +235 -2
coderouter/guards/__init__.py +6 -4
coderouter/guards/backend_health.py +34 -0
coderouter/guards/continuous_probe.py +349 -0
coderouter/guards/drift_actions.py +111 -0
coderouter/guards/drift_detection.py +308 -0
coderouter/guards/self_healing.py +413 -0
coderouter/guards/tool_loop.py +71 -0
coderouter/ingress/anthropic_routes.py +106 -12
coderouter/ingress/app.py +129 -0
coderouter/logging.py +370 -0
coderouter/metrics/collector.py +168 -0
coderouter/metrics/prometheus.py +141 -0
coderouter/output_filters.py +95 -4
coderouter/routing/adaptive.py +23 -0
coderouter/routing/budget.py +35 -0
coderouter/routing/fallback.py +496 -5
coderouter/state/__init__.py +15 -0
coderouter/state/audit_log.py +269 -0
coderouter/state/replay.py +316 -0
coderouter/state/request_log.py +178 -0
coderouter/state/store.py +212 -0
coderouter/translation/tool_repair.py +42 -1
coderouter_cli-2.2.0.dist-info/METADATA +243 -0
{coderouter_cli-2.0.0.dist-info → coderouter_cli-2.2.0.dist-info}/RECORD +29 -20
coderouter_cli-2.0.0.dist-info/METADATA +0 -559
{coderouter_cli-2.0.0.dist-info → coderouter_cli-2.2.0.dist-info}/WHEEL +0 -0
{coderouter_cli-2.0.0.dist-info → coderouter_cli-2.2.0.dist-info}/entry_points.txt +0 -0
{coderouter_cli-2.0.0.dist-info → coderouter_cli-2.2.0.dist-info}/licenses/LICENSE +0 -0

coderouter/routing/fallback.py CHANGED Viewed

@@ -24,9 +24,16 @@ Dual entry points (v0.3.x-1):
 from __future__ import annotations
+import asyncio
 import time
 from collections.abc import AsyncIterator
-from typing import Final
+from typing import TYPE_CHECKING, Any, Final
+if TYPE_CHECKING:
+    from coderouter.config.schemas import FallbackChain
+    from coderouter.guards.drift_detection import DriftVerdict
+    from coderouter.guards.self_healing import SelfHealingOrchestrator
+    from coderouter.state.store import StateStore
 from coderouter.adapters.anthropic_native import AnthropicAdapter
 from coderouter.adapters.base import (
@@ -48,7 +55,9 @@ from coderouter.guards.memory_pressure import (
 )
 from coderouter.guards.tool_loop import (
     DEFAULT_LOOP_INJECT_HINT,
+    ToolCountExceededError,
     ToolLoopBreakError,
+    check_total_tool_count,
     detect_tool_loop,
     inject_loop_break_hint,
 )
@@ -127,7 +136,8 @@ def _apply_tool_loop_guard(
     Returns the (possibly mutated) request. Raises
     :class:`ToolLoopBreakError` when the configured action is ``break``
-    and a loop was detected.
+    and a loop was detected. Also raises :class:`ToolCountExceededError`
+    when the total tool-call count exceeds ``max_tool_calls`` (v2.2).
     Profile resolution: uses ``request.profile`` (the X-CodeRouter-Mode
     header / explicit body field) and falls back to
@@ -146,6 +156,30 @@ def _apply_tool_loop_guard(
         # resolution path produces its own diagnostic.
         return request
+    # v2.2: total tool-call count hard cap — runs before streak
+    # detection because it's a cheaper O(n) scan that catches a
+    # broader class of runaway behavior.
+    if profile.max_tool_calls > 0:
+        exceeded = check_total_tool_count(
+            request,
+            max_calls=profile.max_tool_calls,
+        )
+        if exceeded is not None:
+            logger.warning(
+                "tool-count-exceeded",
+                extra={
+                    "profile": profile.name,
+                    "total_count": exceeded.total_count,
+                    "max_allowed": exceeded.max_allowed,
+                    "action": profile.tool_loop_action,
+                },
+            )
+            if profile.tool_loop_action == "break":
+                raise ToolCountExceededError(exceeded, profile.name)
+            # For "warn" and "inject" actions, log only and continue.
+            # The inject action's hint is not meaningful for count
+            # exceeded (not a same-tool loop), so we just warn.
     detection = detect_tool_loop(
         request,
         window=profile.tool_loop_window,
@@ -469,11 +503,16 @@ class _StreamUsageAccumulator:
     """
     __slots__ = (
+        "_current_block_text",
+        "_current_block_type",
         "_observed",
+        "_text_blocks",
         "cache_creation_input_tokens",
         "cache_read_input_tokens",
+        "has_tool_use",
         "input_tokens",
         "output_tokens",
+        "stop_reason",
     )
     def __init__(self) -> None:
@@ -482,6 +521,32 @@ class _StreamUsageAccumulator:
         self.cache_read_input_tokens = 0
         self.cache_creation_input_tokens = 0
         self._observed = False
+        # v2.0-G: tracked for drift detection observation at stream end.
+        self.has_tool_use: bool = False
+        self.stop_reason: str | None = None
+        # v2.0-H: partial content accumulation for mid-stream recovery.
+        # Completed text blocks are moved to _text_blocks on content_block_stop.
+        # In-progress text is in _current_block_text (list of str fragments).
+        self._text_blocks: list[str] = []
+        self._current_block_type: str | None = None
+        self._current_block_text: list[str] = []
+    @property
+    def partial_content(self) -> list[dict[str, Any]]:
+        """Return accumulated text content as Anthropic content blocks.
+        Includes both completed blocks and any in-progress text block
+        (useful when the stream is interrupted mid-block). Tool_use blocks
+        are excluded because partial JSON is unusable.
+        """
+        blocks: list[dict[str, Any]] = []
+        for text in self._text_blocks:
+            if text:
+                blocks.append({"type": "text", "text": text})
+        # Include in-progress text block if any
+        if self._current_block_type == "text" and self._current_block_text:
+            blocks.append({"type": "text", "text": "".join(self._current_block_text)})
+        return blocks
     def observe(self, event: AnthropicStreamEvent) -> None:
         """Update counters from one stream event (no-op for non-usage events)."""
@@ -494,6 +559,33 @@ class _StreamUsageAccumulator:
             usage = event.data.get("usage") if isinstance(event.data, dict) else None
             if isinstance(usage, dict):
                 self._merge(usage)
+            # v2.0-G: capture stop_reason from the terminal message_delta.
+            delta = event.data.get("delta") if isinstance(event.data, dict) else None
+            if isinstance(delta, dict) and "stop_reason" in delta:
+                self.stop_reason = delta["stop_reason"]
+        elif event.type == "content_block_start":
+            # v2.0-G: detect tool_use content blocks for drift observation.
+            cb = event.data.get("content_block") if isinstance(event.data, dict) else None
+            if isinstance(cb, dict):
+                block_type = cb.get("type", "")
+                if block_type == "tool_use":
+                    self.has_tool_use = True
+                # v2.0-H: start tracking a new content block.
+                self._current_block_type = block_type
+                self._current_block_text = []
+        elif event.type == "content_block_delta":
+            # v2.0-H: accumulate text_delta fragments.
+            delta = event.data.get("delta") if isinstance(event.data, dict) else None
+            if isinstance(delta, dict) and delta.get("type") == "text_delta":
+                text = delta.get("text", "")
+                if text:
+                    self._current_block_text.append(text)
+        elif event.type == "content_block_stop":
+            # v2.0-H: finalize current block.
+            if self._current_block_type == "text" and self._current_block_text:
+                self._text_blocks.append("".join(self._current_block_text))
+            self._current_block_type = None
+            self._current_block_text = []
     def _merge(self, usage: dict[str, object]) -> None:
         any_nonzero = False
@@ -613,9 +705,18 @@ class MidStreamError(CodeRouterError):
     one chunk to the client. Fallback is not attempted (the client has
     received partial content, so switching providers would corrupt the
     stream). Callers should surface this as a terminal error event.
+    v2.0-H: carries ``partial_content`` — the accumulated text blocks
+    generated before the failure. The ingress uses this to synthesize
+    a graceful stream termination when ``partial_stitch_action: surface``.
     """
-    def __init__(self, provider: str, original: AdapterError) -> None:
+    def __init__(
+        self,
+        provider: str,
+        original: AdapterError,
+        partial_content: list[dict[str, Any]] | None = None,
+    ) -> None:
         """Wrap the underlying :class:`AdapterError` with the provider name.
         The ingress layer catches this and converts it into an in-stream
@@ -624,6 +725,7 @@ class MidStreamError(CodeRouterError):
         """
         self.provider = provider
         self.original = original
+        self.partial_content: list[dict[str, Any]] = partial_content or []
         super().__init__(f"provider {provider!r} failed mid-stream: {original}")
@@ -747,6 +849,44 @@ class FallbackEngine:
         # Distinct from v1.9-C ``adaptive`` which handles the
         # gradient case via a rolling window.
         self._backend_health_monitor: BackendHealthMonitor = BackendHealthMonitor()
+        # v2.0-J: self-healing orchestrator. Manages provider exclusion,
+        # restart, and recovery probing when backend_health_action is
+        # "exclude". Composes with the L5 backend health monitor.
+        from coderouter.guards.self_healing import SelfHealingOrchestrator
+        self._self_healing: SelfHealingOrchestrator = SelfHealingOrchestrator()
+        # v2.0-G (L4): per-process drift detection window manager.
+        # Stores per-provider rolling observations; the detector is
+        # invoked after each provider-ok / provider-failed event and
+        # returns a verdict. Action dispatch (promote/reload) reuses
+        # the adaptive rank machinery.
+        from coderouter.guards.drift_detection import DriftWindow
+        self._drift_window: DriftWindow = DriftWindow()
+        # Track which providers are currently in drift-demoted state
+        # and when their cooldown expires (monotonic timestamp).
+        self._drift_demoted: dict[str, float] = {}
+        # Last drift verdict (set by _observe_drift_signal for ingress header).
+        self._last_drift_verdict: DriftVerdict | None = None
+        # v2.0-J: active recovery probe tasks (one per excluded provider).
+        self._recovery_tasks: dict[str, asyncio.Task[None]] = {}
+        # v2.0-J: shutdown event shared with recovery probe tasks.
+        self._recovery_shutdown: asyncio.Event | None = None
+        # v2.0-K: persistent state store (None = in-memory only).
+        self._state_store: StateStore | None = None
+    @property
+    def last_drift_severity(self) -> str | None:
+        """Return the severity string of the most recent drift verdict, or None.
+        The ingress reads this after generate_anthropic / stream_anthropic to
+        set the ``X-CodeRouter-Drift`` response header.  Returns ``"mild"`` or
+        ``"severe"`` when drift was detected, ``None`` otherwise.
+        """
+        v = self._last_drift_verdict
+        if v is None or not v.drifted:
+            return None
+        return v.severity
     @property
     def _adaptive(self) -> AdaptiveAdjuster:
@@ -794,12 +934,17 @@ class FallbackEngine:
         return existing
     @property
-    def _backend_health(self) -> BackendHealthMonitor:
+    def backend_health(self) -> BackendHealthMonitor:
         """Return the L5 backend-health monitor, lazily building one if absent.
         Same legacy-test compatibility pattern as the other guard
         properties — ``__new__``-constructed engines get a fresh
         empty monitor so ``state_for`` is always answerable.
+        v2.0-I: promoted from ``_backend_health`` to public ``backend_health``
+        so the continuous probe background task can feed results into the
+        same state machine. Internal callers continue to work (property
+        access is transparent).
         """
         existing = getattr(self, "_backend_health_monitor", None)
         if existing is None:
@@ -807,6 +952,25 @@ class FallbackEngine:
             existing = self._backend_health_monitor
         return existing
+    # Alias for backward compat with internal callers.
+    @property
+    def _backend_health(self) -> BackendHealthMonitor:
+        return self.backend_health
+    @property
+    def self_healing(self) -> SelfHealingOrchestrator:
+        """Return the v2.0-J self-healing orchestrator.
+        Lazy init for backward compat with __new__-constructed test engines.
+        """
+        from coderouter.guards.self_healing import SelfHealingOrchestrator
+        existing = getattr(self, "_self_healing", None)
+        if existing is None:
+            self._self_healing = SelfHealingOrchestrator()
+            existing = self._self_healing
+        return existing
     def _observe_provider_failure(
         self,
         provider: str,
@@ -884,6 +1048,18 @@ class FallbackEngine:
                     new_state=transition.new_state,
                     consecutive_failures=transition.consecutive_failures,
                 )
+                # v2.0-J: trigger self-healing on UNHEALTHY + exclude.
+                if (
+                    transition.new_state == "UNHEALTHY"
+                    and bh_action == "exclude"
+                ):
+                    newly_excluded = self.self_healing.on_unhealthy(
+                        provider,
+                        profile=chosen,
+                        consecutive_failures=transition.consecutive_failures,
+                    )
+                    if newly_excluded:
+                        self._spawn_recovery_probe(provider, chain=chain)
     def _observe_provider_success(
         self,
@@ -925,6 +1101,260 @@ class FallbackEngine:
                 consecutive_failures=transition.consecutive_failures,
             )
+    def _spawn_recovery_probe(
+        self,
+        provider: str,
+        *,
+        chain: FallbackChain,
+    ) -> None:
+        """Launch an async recovery probe task for an excluded provider.
+        v2.0-J: called by ``_observe_provider_failure`` when a provider
+        is newly excluded. The task runs ``recovery_probe_loop`` with
+        exponential backoff until the provider recovers or shutdown.
+        Safe to call from a sync context — uses ``asyncio.get_event_loop``
+        to schedule the task. No-op if no running event loop (e.g. in
+        pure-sync tests).
+        """
+        import asyncio
+        from coderouter.guards.self_healing import recovery_probe_loop
+        # Find the ProviderConfig for this provider name.
+        provider_config = None
+        for p in self.config.providers:
+            if p.name == provider:
+                provider_config = p
+                break
+        if provider_config is None:
+            return
+        # Reuse or create a shared shutdown event.
+        if self._recovery_shutdown is None:
+            self._recovery_shutdown = asyncio.Event()
+        # Don't spawn duplicate tasks.
+        existing = self._recovery_tasks.get(provider)
+        if existing is not None and not existing.done():
+            return
+        try:
+            loop = asyncio.get_running_loop()
+        except RuntimeError:
+            return  # no event loop — skip (sync test context)
+        task = loop.create_task(
+            recovery_probe_loop(
+                provider_config,
+                orchestrator=self.self_healing,
+                record_fn=self.backend_health.record_attempt,
+                health_threshold=chain.backend_health_threshold,
+                initial_interval_s=chain.recovery_probe_initial_s,
+                max_interval_s=chain.recovery_probe_max_s,
+                restart_timeout_s=chain.restart_timeout_s,
+                probe_timeout_s=10.0,
+                shutdown_event=self._recovery_shutdown,
+                profile=chain.name,
+            ),
+            name=f"recovery-probe-{provider}",
+        )
+        self._recovery_tasks[provider] = task
+    async def shutdown_recovery_probes(self) -> None:
+        """Signal all recovery probe tasks to stop and await them.
+        Called from the app lifespan shutdown path.
+        """
+        import contextlib
+        if self._recovery_shutdown is not None:
+            self._recovery_shutdown.set()
+        for task in self._recovery_tasks.values():
+            if not task.done():
+                with contextlib.suppress(Exception):
+                    await task
+        self._recovery_tasks.clear()
+    # ------------------------------------------------------------------
+    # v2.0-K: State persistence
+    # ------------------------------------------------------------------
+    def attach_state_store(self, store: StateStore) -> None:
+        """Attach a :class:`StateStore` and load persisted state.
+        Called from the app lifespan startup path when ``state_dir``
+        is configured.  Loads budget, health, self-healing, and
+        metrics state from the store.
+        """
+        self._state_store = store
+        self._load_all_state()
+    def save_all_state(self) -> None:
+        """Persist all subsystem state to the attached store.
+        Called from the app lifespan shutdown path and optionally
+        on a periodic timer.  No-op if no store is attached.
+        """
+        store = self._state_store
+        if store is None:
+            return
+        import contextlib
+        with contextlib.suppress(Exception):
+            store.put("budget", "state", self._budget.save_state())
+        with contextlib.suppress(Exception):
+            store.put("health", "state", self.backend_health.save_state())
+        with contextlib.suppress(Exception):
+            store.put("self_healing", "state", self.self_healing.save_state())
+        # MetricsCollector state is saved separately via the singleton.
+    def _load_all_state(self) -> None:
+        """Restore subsystem state from the attached store."""
+        store = self._state_store
+        if store is None:
+            return
+        import contextlib
+        with contextlib.suppress(Exception):
+            budget_state = store.get("budget", "state")
+            if budget_state is not None:
+                self._budget.load_state(budget_state)  # type: ignore[arg-type]
+        with contextlib.suppress(Exception):
+            health_state = store.get("health", "state")
+            if health_state is not None:
+                self.backend_health.load_state(health_state)  # type: ignore[arg-type]
+        with contextlib.suppress(Exception):
+            sh_state = store.get("self_healing", "state")
+            if sh_state is not None:
+                self.self_healing.load_state(sh_state)  # type: ignore[arg-type]
+    def _observe_drift_signal(
+        self,
+        provider: str,
+        *,
+        profile: str | None,
+        output_tokens: int = 0,
+        has_tool_use: bool = False,
+        request_had_tools: bool = False,
+        stop_reason: str | None = None,
+        is_error: bool = False,
+        stream: bool = False,
+    ) -> DriftVerdict | None:
+        """v2.0-G (L4): record an observation and check for drift.
+        Called after every provider-ok / provider-failed event on the
+        Anthropic-shaped paths. Returns a :class:`DriftVerdict` when
+        drift is detected (drifted=True), None otherwise.
+        Side effects on detection:
+        - Emits ``drift-detected`` log.
+        - If action is ``promote`` or ``reload``, demotes the provider
+          via the adaptive rank machinery.
+        """
+        from coderouter.guards.drift_detection import (
+            SENSITIVITY_PRESETS,
+            ResponseObservation,
+            detect_drift,
+        )
+        from coderouter.logging import log_drift_detected, log_drift_promoted
+        chosen = profile or self.config.default_profile
+        try:
+            chain_cfg = self.config.profile_by_name(chosen)
+        except (KeyError, ValueError):
+            return None
+        if chain_cfg.drift_detection_action == "off":
+            return None
+        # Update window size if config differs from default
+        self._drift_window.max_size = chain_cfg.drift_detection_window_size
+        # Record observation
+        obs = ResponseObservation(
+            provider=provider,
+            output_tokens=output_tokens,
+            has_tool_use=has_tool_use,
+            request_had_tools=request_had_tools,
+            stop_reason=stop_reason,
+            is_error=is_error,
+            stream=stream,
+        )
+        self._drift_window.record(obs)
+        # Check for cooldown recovery
+        import time as _time
+        demote_expires = self._drift_demoted.get(provider)
+        if demote_expires is not None and _time.monotonic() >= demote_expires:
+            # Cooldown expired — restore rank and clear drift state
+            from coderouter.logging import log_drift_recovered
+            elapsed = chain_cfg.drift_detection_cooldown_s
+            log_drift_recovered(logger, provider=provider, profile=chosen, after_s=elapsed)
+            self._drift_demoted.pop(provider, None)
+            self._drift_window.clear(provider)
+            return None
+        # Don't re-detect while in cooldown
+        if provider in self._drift_demoted:
+            return None
+        # Run detection
+        window = self._drift_window.get_window(provider)
+        thresholds = SENSITIVITY_PRESETS.get(
+            chain_cfg.drift_detection_sensitivity, SENSITIVITY_PRESETS["normal"]
+        )
+        verdict = detect_drift(window, thresholds)
+        if not verdict.drifted:
+            self._last_drift_verdict = None
+            return None
+        # Store for ingress response header.
+        self._last_drift_verdict = verdict
+        # Emit log
+        log_drift_detected(
+            logger,
+            provider=provider,
+            profile=chosen,
+            severity=verdict.severity,
+            reason=verdict.reason,
+            action=chain_cfg.drift_detection_action,
+            signals=verdict.signals,
+        )
+        # Action: promote / reload
+        if chain_cfg.drift_detection_action in ("promote", "reload"):
+            import time as _time_mod
+            # Demote via adaptive rank
+            self._adaptive.demote(provider, steps=2)
+            log_drift_promoted(
+                logger,
+                provider=provider,
+                profile=chosen,
+                demoted_to_rank=2,
+                cooldown_s=chain_cfg.drift_detection_cooldown_s,
+            )
+            # Record cooldown expiry
+            self._drift_demoted[provider] = (
+                _time_mod.monotonic() + chain_cfg.drift_detection_cooldown_s
+            )
+            # v2.0-G: reload action — attempt Ollama KV cache flush
+            # (best-effort, fire-and-forget background task).
+            if chain_cfg.drift_detection_action == "reload":
+                import asyncio
+                from coderouter.guards.drift_actions import attempt_reload
+                provider_config = self._adapters[provider].config
+                self._reload_task = asyncio.create_task(attempt_reload(provider_config))
+        return verdict
     def _resolve_profile_overrides(self, profile_name: str | None) -> ProviderCallOverrides:
         """v0.6-B: build the ProviderCallOverrides for the active profile.
@@ -1107,6 +1537,19 @@ class FallbackEngine:
                         profile=chosen,
                     )
                 adapters = healthy + unhealthy
+        # Pass 4b: v2.0-J self-healing exclusion. When the action is
+        # "exclude", providers in the orchestrator's excluded set are
+        # removed entirely from the chain. Unlike "demote" (which
+        # moves to the back), excluded providers are not attempted at
+        # all — recovery probes run in the background to detect when
+        # they come back. If all providers are excluded, fall through
+        # to the existing NoProvidersAvailableError path.
+        if chain.backend_health_action == "exclude":
+            excluded = self.self_healing.excluded_providers()
+            if excluded:
+                adapters = [a for a in adapters if a.name not in excluded]
         return adapters
     def _resolve_anthropic_chain(self, request: AnthropicRequest) -> list[tuple[BaseAdapter, bool]]:
@@ -1455,6 +1898,14 @@ class FallbackEngine:
                 self._observe_provider_failure(
                     adapter.name, exc, profile=request.profile
                 )
+                # v2.0-G (L4): drift detection observation (failure path).
+                self._observe_drift_signal(
+                    adapter.name,
+                    profile=request.profile,
+                    is_error=True,
+                    request_had_tools=bool(request.tools),
+                    stream=False,
+                )
                 errors.append(exc)
                 if not exc.retryable:
                     break
@@ -1482,6 +1933,18 @@ class FallbackEngine:
             self._observe_provider_success(
                 adapter.name, profile=request.profile
             )
+            # v2.0-G (L4): drift detection observation (success path).
+            self._observe_drift_signal(
+                adapter.name,
+                profile=request.profile,
+                output_tokens=resp.usage.output_tokens if resp.usage else 0,
+                has_tool_use=any(
+                    getattr(b, "type", None) == "tool_use" for b in (resp.content or [])
+                ),
+                request_had_tools=bool(request.tools),
+                stop_reason=resp.stop_reason,
+                stream=False,
+            )
             # v1.9-A: pair every successful Anthropic response with a
             # cache-observed log line. Native Anthropic / LM Studio
             # /v1/messages report cache_read_input_tokens /
@@ -1620,6 +2083,14 @@ class FallbackEngine:
                 self._observe_provider_failure(
                     adapter.name, exc, profile=request.profile
                 )
+                # v2.0-G (L4): drift detection observation (stream failure).
+                self._observe_drift_signal(
+                    adapter.name,
+                    profile=request.profile,
+                    is_error=True,
+                    request_had_tools=bool(request.tools),
+                    stream=True,
+                )
                 errors.append(exc)
                 if not exc.retryable:
                     break
@@ -1662,7 +2133,27 @@ class FallbackEngine:
                 self._observe_provider_failure(
                     adapter.name, exc, profile=request.profile
                 )
-                raise MidStreamError(adapter.name, exc) from exc
+                # v2.0-G (L4): drift detection observation (mid-stream failure).
+                self._observe_drift_signal(
+                    adapter.name,
+                    profile=request.profile,
+                    is_error=True,
+                    request_had_tools=bool(request.tools),
+                    stream=True,
+                )
+                raise MidStreamError(
+                    adapter.name, exc, partial_content=acc.partial_content
+                ) from exc
+            # v2.0-G (L4): drift detection observation (stream success).
+            self._observe_drift_signal(
+                adapter.name,
+                profile=request.profile,
+                output_tokens=acc.output_tokens,
+                has_tool_use=acc.has_tool_use,
+                request_had_tools=bool(request.tools),
+                stop_reason=acc.stop_reason,
+                stream=True,
+            )
             # v1.9-B2: pair the successful stream with a cache-observed
             # log line carrying the aggregated usage counters that the
             # ``_StreamUsageAccumulator`` collected from the

coderouter/state/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""Persistent state layer (v2.0-K).
+Four modules:
+* :mod:`coderouter.state.store`       — sqlite3 KV store for operational
+                                         metadata (budget totals, health
+                                         state, self-healing exclusions).
+* :mod:`coderouter.state.audit_log`   — JSONL structured event log with
+                                         rotation and CLI reader.
+* :mod:`coderouter.state.request_log` — JSONL request metadata journal
+                                         (per-request token counts, cost,
+                                         provider — no request body).
+* :mod:`coderouter.state.replay`      — Statistical A/B analysis engine
+                                         over request journal entries.
+"""

coderouter-cli 2.0.0__py3-none-any.whl → 2.2.0__py3-none-any.whl

coderouter-cli 2.0.0py3-none-any.whl → 2.2.0py3-none-any.whl