PyPI - coderouter-cli - Versions diffs - 2.3.0a4__py3-none-any.whl → 2.5.0__py3-none-any.whl - Mend

coderouter-cli 2.3.0a4py3-none-any.whl → 2.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

coderouter/cli.py +31 -0
coderouter/config/schemas.py +157 -0
coderouter/guards/__init__.py +2 -0
coderouter/guards/_fingerprint.py +125 -0
coderouter/guards/drift_detection.py +55 -0
coderouter/ingress/app.py +11 -0
coderouter/ingress/dashboard_routes.py +1 -0
coderouter/ingress/launcher_routes.py +1176 -0
coderouter/routing/fallback.py +33 -3
coderouter/state/__init__.py +15 -11
coderouter/state/suggest_rules.py +413 -0
{coderouter_cli-2.3.0a4.dist-info → coderouter_cli-2.5.0.dist-info}/METADATA +36 -4
{coderouter_cli-2.3.0a4.dist-info → coderouter_cli-2.5.0.dist-info}/RECORD +16 -13
{coderouter_cli-2.3.0a4.dist-info → coderouter_cli-2.5.0.dist-info}/WHEEL +0 -0
{coderouter_cli-2.3.0a4.dist-info → coderouter_cli-2.5.0.dist-info}/entry_points.txt +0 -0
{coderouter_cli-2.3.0a4.dist-info → coderouter_cli-2.5.0.dist-info}/licenses/LICENSE +0 -0

coderouter/cli.py CHANGED Viewed

@@ -293,6 +293,18 @@ def _build_parser() -> argparse.ArgumentParser:
         metavar="N",
         help="Use only the last N entries (applied after --since and --provider filters).",
     )
+    # P1-6: --suggest-rules — statistical analysis → routing rule proposals.
+    replay.add_argument(
+        "--suggest-rules",
+        action="store_true",
+        help=(
+            "P1-6: analyse the request journal and print actionable routing "
+            "rule suggestions as copy-paste YAML snippets. Suggestions cover "
+            "provider reordering by cost, prompt_cache enablement, drift "
+            "detection configuration, and goal profile creation. "
+            "Can be combined with --since / --limit to scope the analysis window."
+        ),
+    )
     return parser
@@ -684,6 +696,25 @@ def _run_replay(args: argparse.Namespace) -> int:
         print("replay: no matching entries found.")
         return 0
+    if getattr(args, "suggest_rules", False):
+        # P1-6: statistical rule suggestion mode.
+        # Always compute a full window summary (ignores --compare / --provider).
+        from coderouter.state.replay import summarize_window as _sw
+        from coderouter.state.suggest_rules import format_suggestions, suggest_rules
+        # Re-read without provider filter so we see all providers.
+        all_entries = read_request_log(log_path, since=args.since)
+        if args.limit is not None and args.limit > 0:
+            all_entries = all_entries[-args.limit:]
+        full_summary = _sw(all_entries)
+        suggestions = suggest_rules(full_summary)
+        print(f"Request journal: {len(all_entries)} entries analysed")
+        print(f"  Window: {full_summary.first_ts} → {full_summary.last_ts}")
+        print(f"  Providers: {', '.join(sorted(full_summary.providers))}")
+        print()
+        print(format_suggestions(suggestions))
+        return 0
     if args.compare:
         provider_a, provider_b = args.compare
         comparison = compare_providers(entries, provider_a, provider_b)

coderouter/config/schemas.py CHANGED Viewed

@@ -658,6 +658,28 @@ class FallbackChain(BaseModel):
         ),
     )
+    # --- P1-5: goal_mode — tighter drift thresholds for /goal sessions -------
+    #
+    # When True, the drift detector automatically switches to the
+    # ``THRESHOLDS_GOAL`` preset regardless of ``drift_detection_sensitivity``,
+    # and lowers ``min_window_fill`` to 4 so stall detection fires faster.
+    #
+    # Intended for profiles routed by the ``/goal`` meta-command where
+    # the agent is expected to make steady forward progress. Repetition and
+    # length collapse are much more meaningful signals in that context than
+    # in a general-purpose chat session.
+    goal_mode: bool = Field(
+        default=False,
+        description=(
+            "P1-5: when True, automatically applies the ``goal`` drift "
+            "threshold preset (stricter thresholds, lower ``min_window_fill`` "
+            "of 4) for this profile. Overrides ``drift_detection_sensitivity`` "
+            "when drift_detection_action is not ``off``. Designed for "
+            "agent/goal sessions where forward-progress stalls are more "
+            "actionable than in ad-hoc chat."
+        ),
+    )
     # --- v2.0-H (L6): Mid-stream partial stitching --------------------------
     #   * ``off``      — discard partial content on mid-stream failure (legacy).
     #   * ``surface``  — return partial content as a truncated-but-valid response.
@@ -852,6 +874,130 @@ class AutoRouterConfig(BaseModel):
     )
+class LauncherBackendConfig(BaseModel):
+    """Per-backend binary path configuration for the Launcher.
+    When ``binary`` is unset, the Launcher falls back to the default
+    executable name (``llama-server`` for llama.cpp, ``python`` for vllm)
+    and relies on ``$PATH`` resolution — which works when the tool is
+    globally installed.  Set ``binary`` when:
+    - llama.cpp was built from source (e.g. ``~/llama.cpp/build/bin/llama-server``)
+    - vllm lives in a virtualenv (e.g. ``~/.venv/bin/python``)
+    - Multiple builds coexist and you want to pin a specific one
+    Tilde (``~``) and environment variables are expanded at launch time.
+    Example::
+        backends:
+          llama.cpp:
+            binary: ~/llama.cpp/build/bin/llama-server
+          vllm:
+            binary: ~/.venv/bin/python
+    """
+    model_config = ConfigDict(extra="forbid")
+    binary: str | None = Field(
+        default=None,
+        description=(
+            "Absolute or ``~``-relative path to the backend executable. "
+            "llama.cpp default: ``llama-server`` (PATH). "
+            "vllm default: ``python`` (PATH). "
+            "Expanded at launch time."
+        ),
+    )
+class LauncherOptionProfile(BaseModel):
+    """One named option preset for a launcher backend (e.g. llama.cpp / vllm).
+    ``args`` maps CLI flag strings to their values.  A bool value of
+    ``True`` means "include the flag without a value" (e.g. ``--no-mmap``);
+    ``False`` means "omit the flag entirely".  All other value types are
+    converted to strings and appended as ``--flag value`` pairs.
+    Example::
+        name: "GPU速度重視"
+        args:
+          "-ngl": 99
+          "--ctx-size": 4096
+          "--no-mmap": false
+    """
+    model_config = ConfigDict(extra="forbid")
+    name: str = Field(..., description="Display name shown in the Launcher UI dropdown.")
+    args: dict[str, str | int | float | bool] = Field(
+        default_factory=dict,
+        description=(
+            "CLI flag → value mapping. "
+            "bool True = flag only (no value). "
+            "bool False = omit flag. "
+            "All other types are stringified and passed as '--flag value'."
+        ),
+    )
+class LauncherConfig(BaseModel):
+    """The ``launcher:`` block in providers.yaml.
+    Controls the Launcher UI available at ``/launcher``.
+    Example::
+        launcher:
+          model_dirs:
+            - ~/models
+            - /data/gguf
+          option_profiles:
+            llama.cpp:
+              - name: "GPU速度重視"
+                args:
+                  "-ngl": 99
+                  "--ctx-size": 4096
+            vllm:
+              - name: "標準"
+                args:
+                  "--dtype": "auto"
+                  "--max-model-len": 4096
+    """
+    model_config = ConfigDict(extra="forbid")
+    model_dirs: list[str] = Field(
+        default_factory=list,
+        description=(
+            "Directories to scan for model files "
+            "(.gguf, .safetensors, .bin, .pt, .ggml). "
+            "Paths are expanded (~ and env vars) at scan time, not at load. "
+            "Non-existent paths are silently skipped."
+        ),
+    )
+    backends: dict[str, LauncherBackendConfig] = Field(
+        default_factory=dict,
+        description=(
+            "Per-backend binary path overrides. "
+            "Keys are backend names ('llama.cpp', 'vllm'). "
+            "When a key is absent, the default executable is used "
+            "('llama-server' / 'python') and resolved via PATH. "
+            "Useful when running a from-source build or a venv-specific binary."
+        ),
+    )
+    option_profiles: dict[str, list[LauncherOptionProfile]] = Field(
+        default_factory=dict,
+        description=(
+            "Named option presets per backend. "
+            "Keys should be backend names: 'llama.cpp', 'vllm'. "
+            "Each key maps to an ordered list of named presets. "
+            "A free-form 'extra args' field is always available in the UI "
+            "for one-off overrides without touching this config."
+        ),
+    )
 class PluginsConfig(BaseModel):
     """The ``plugins:`` block in providers.yaml (v2.3.0).
@@ -1060,6 +1206,17 @@ class CodeRouterConfig(BaseModel):
             "plugins (zero-cost, backward-compatible default)."
         ),
     )
+    launcher: LauncherConfig | None = Field(
+        default=None,
+        description=(
+            "Launcher configuration for the /launcher UI. "
+            "Defines model_dirs to scan and option_profiles per backend "
+            "('llama.cpp', 'vllm'). "
+            "Unset (None) = Launcher UI shows empty model list and no profiles. "
+            "The Launcher UI itself is always available at /launcher "
+            "regardless of this setting."
+        ),
+    )
     @model_validator(mode="after")
     def _check_default_profile_exists(self) -> CodeRouterConfig:

coderouter/guards/__init__.py CHANGED Viewed

@@ -12,6 +12,8 @@ to hit:
   * :mod:`coderouter.guards.self_healing`    — v2.0-J auto-exclude +
                                                  restart + recovery probe
   * :mod:`coderouter.guards.continuous_probe` — v2.0-I background probing
+  * :mod:`coderouter.guards._fingerprint`     — P1-4 response fingerprinting
+                                                 for goal_progress_stall signal
 Each guard is a pure-functional / single-class module that the engine
 consults at the appropriate dispatch point. Guards never block the

coderouter/guards/_fingerprint.py ADDED Viewed

@@ -0,0 +1,125 @@
+"""Response fingerprinting for goal_progress_stall detection (P1-4).
+A "fingerprint" is a compact, order-independent signature of the *content*
+of an assistant response — independent of surface variation (filler phrases,
+minor rewordings).  Two responses with the same fingerprint are considered
+semantically repetitive for stall-detection purposes.
+Algorithm
+---------
+1. Normalise: lowercase, strip punctuation, collapse whitespace.
+2. Extract the N most-frequent content words (excluding a small stop-list).
+3. Sort alphabetically, join with '|', SHA-256 → 12-hex prefix.
+The 12-hex prefix gives 281 trillion distinct values — collision probability
+across any 20-response window is negligible (< 1 in 10^15).
+Why top-N content words instead of full hash?
+----------------------------------------------
+A verbatim hash would fail to catch "I cannot do X. Let me try Y" vs
+"Let me try Y as I cannot do X" — same stall, different hash.  By
+extracting the dominant vocabulary we get useful fuzzy equality without
+the overhead of embedding models.
+Usage
+-----
+    from coderouter.guards._fingerprint import fingerprint_response
+    fp = fingerprint_response(response_text)
+    obs = ResponseObservation(..., response_fingerprint=fp)
+"""
+from __future__ import annotations
+import hashlib
+import re
+import unicodedata
+# ---------------------------------------------------------------------------
+# Stop-word list (English + common LLM filler)
+# ---------------------------------------------------------------------------
+_STOP_WORDS: frozenset[str] = frozenset(
+    {
+        # English function words
+        "a", "an", "the", "and", "or", "but", "if", "in", "on", "at", "to",
+        "for", "of", "with", "by", "from", "as", "is", "it", "its", "be",
+        "was", "are", "were", "been", "has", "have", "had", "do", "does",
+        "did", "will", "would", "could", "should", "may", "might", "shall",
+        "this", "that", "these", "those", "i", "you", "he", "she", "we",
+        "they", "me", "him", "her", "us", "them", "my", "your", "his",
+        "their", "our", "what", "which", "who", "how", "when", "where",
+        "why", "not", "no", "so", "up", "out", "into", "about", "than",
+        "then", "there", "here", "also", "just", "can", "get", "all",
+        # Common LLM assistant filler
+        "certainly", "sure", "absolutely", "great", "happy", "help",
+        "please", "let", "know", "feel", "free", "answer", "question",
+        "response", "following", "based", "provide", "using",
+    }
+)
+# ---------------------------------------------------------------------------
+# Number of top content words to include in the fingerprint
+# ---------------------------------------------------------------------------
+_TOP_N: int = 12
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+def fingerprint_response(text: str, *, top_n: int = _TOP_N) -> str:
+    """Return a 12-hex fingerprint string for *text*.
+    Parameters
+    ----------
+    text:
+        Raw assistant response text (plain text, not JSON).
+    top_n:
+        Number of most-frequent content words to include in the signature.
+        Defaults to ``_TOP_N`` (12).  Lower values are more fuzzy; higher
+        values are more precise.
+    Returns
+    -------
+    A 12-character lowercase hexadecimal string, e.g. ``"a3f7b2c091de"``.
+    Returns ``""`` for empty / whitespace-only input.
+    """
+    if not text or not text.strip():
+        return ""
+    # 1. Unicode normalisation + lowercase
+    normalised = unicodedata.normalize("NFKC", text).lower()
+    # 2. Strip punctuation / digits, collapse whitespace
+    normalised = re.sub(r"[^\w\s]", " ", normalised)
+    normalised = re.sub(r"\d+", " ", normalised)
+    normalised = re.sub(r"\s+", " ", normalised).strip()
+    # 3. Tokenise and filter stop words (also skip very short tokens)
+    tokens = [w for w in normalised.split() if len(w) > 2 and w not in _STOP_WORDS]
+    if not tokens:
+        return ""
+    # 4. Count frequencies, take top-N
+    freq: dict[str, int] = {}
+    for tok in tokens:
+        freq[tok] = freq.get(tok, 0) + 1
+    # Require at least 3 distinct content words; single-word or near-empty
+    # responses (e.g. "xxxxx..." test stubs, error codes, bare ACKs) produce
+    # the same fingerprint every time and would falsely inflate the repetition
+    # rate.  Returning "" marks these as "not fingerprinted" so detect_drift
+    # skips them entirely.
+    if len(freq) < 3:
+        return ""
+    top_words = sorted(freq, key=lambda w: (-freq[w], w))[:top_n]
+    # 5. Sort alphabetically → stable join → hash
+    signature = "|".join(sorted(top_words))
+    digest = hashlib.sha256(signature.encode()).hexdigest()
+    return digest[:12]

coderouter/guards/drift_detection.py CHANGED Viewed

@@ -34,6 +34,10 @@ Signals
   * ``stop_anomaly_rate`` — fraction of responses with unexpected stop_reason
     (not "end_turn" / "tool_use" / "max_tokens")
   * ``error_rate`` — fraction of attempts that ended in failure
+  * ``goal_progress_stall`` (P1-4) — fraction of fingerprinted responses
+    whose fingerprint matches a previously-seen fingerprint in the window,
+    indicating the model is repeating itself without making progress.
+    Only fires when ``response_fingerprint`` is populated on observations.
 Thresholds are bundled as :class:`DriftThresholds` with three presets
 (``low`` / ``normal`` / ``high`` sensitivity).
@@ -71,6 +75,15 @@ class ResponseObservation:
     is_error: bool = False
     """True if the attempt ended in provider-failed / provider-failed-midstream."""
     stream: bool = False
+    response_fingerprint: str | None = None
+    """P1-4: compact content fingerprint of the response text.
+    When set, used by the ``goal_progress_stall`` signal to detect
+    repetition: the same fingerprint appearing multiple times in the
+    window indicates the model is not making progress. Computed by
+    :func:`coderouter.guards._fingerprint.fingerprint_response`.
+    Pass ``None`` (default) to opt-out — the signal is silently skipped.
+    """
 # ---------------------------------------------------------------------------
@@ -100,6 +113,12 @@ class DriftThresholds:
     length_collapse_ratio: float = 0.5
     """If recent half median is < 50% of earlier half median → collapse."""
+    # P1-4: repetition/stall threshold
+    repetition_rate_threshold: float = 0.4
+    """P1-4: fraction of fingerprinted responses whose fingerprint has
+    appeared before in the window. Above this rate → goal_progress_stall
+    signal fires (mild). Default 0.4 = 2 out of 5 responses are repeats."""
     # Minimum observations before detection fires
     min_window_fill: int = 6
     """Don't trigger until at least this many observations in the window."""
@@ -112,6 +131,7 @@ THRESHOLDS_LOW = DriftThresholds(
     tool_silence_rate=0.8,
     stop_anomaly_rate=0.6,
     error_rate=0.4,
+    repetition_rate_threshold=0.6,
     min_window_fill=10,
 )
@@ -123,6 +143,19 @@ THRESHOLDS_HIGH = DriftThresholds(
     tool_silence_rate=0.5,
     stop_anomaly_rate=0.3,
     error_rate=0.15,
+    repetition_rate_threshold=0.25,
+    min_window_fill=4,
+)
+# P1-5: goal-mode preset — tighter thresholds + lower min_window_fill.
+# Applied automatically when the profile has goal_mode=True.
+THRESHOLDS_GOAL = DriftThresholds(
+    empty_response_rate=0.2,
+    length_collapse_ratio=0.6,
+    tool_silence_rate=0.5,
+    stop_anomaly_rate=0.3,
+    error_rate=0.15,
+    repetition_rate_threshold=0.2,
     min_window_fill=4,
 )
@@ -130,6 +163,7 @@ SENSITIVITY_PRESETS: dict[str, DriftThresholds] = {
     "low": THRESHOLDS_LOW,
     "normal": THRESHOLDS_NORMAL,
     "high": THRESHOLDS_HIGH,
+    "goal": THRESHOLDS_GOAL,
 }
@@ -244,6 +278,27 @@ def detect_drift(
     if error_rate > thresholds.error_rate:
         mild_flags.append(f"error_rate={error_rate:.2f}")
+    # --- Signal 6: Goal progress stall (P1-4) ---
+    # Only active when at least some observations have a fingerprint.
+    # Computes: how many fingerprinted responses repeat a fingerprint
+    # already seen earlier in the window.  High repetition → stall.
+    fingerprinted = [
+        obs for obs in window if obs.response_fingerprint  # excludes None and ""
+    ]
+    if len(fingerprinted) >= 3:
+        seen: set[str] = set()
+        repeat_count = 0
+        for obs in fingerprinted:
+            fp = obs.response_fingerprint  # guaranteed non-empty by filter above
+            if fp in seen:
+                repeat_count += 1
+            else:
+                seen.add(fp)
+        repetition_rate = repeat_count / len(fingerprinted)
+        signals["goal_progress_stall"] = round(repetition_rate, 3)
+        if repetition_rate > thresholds.repetition_rate_threshold:
+            mild_flags.append(f"goal_progress_stall={repetition_rate:.2f}")
     # --- Severity synthesis ---
     if severe_flags:
         severity: Literal["none", "mild", "severe"] = "severe"

coderouter/ingress/app.py CHANGED Viewed

@@ -13,6 +13,7 @@ from coderouter import __version__
 from coderouter.config import load_config
 from coderouter.ingress.anthropic_routes import router as anthropic_router
 from coderouter.ingress.dashboard_routes import router as dashboard_router
+from coderouter.ingress.launcher_routes import router as launcher_router
 from coderouter.ingress.metrics_routes import router as metrics_router
 from coderouter.ingress.openai_routes import router as openai_router
 from coderouter.logging import configure_logging, get_logger
@@ -178,6 +179,12 @@ def create_app(config_path: str | None = None) -> FastAPI:
             with contextlib.suppress(Exception):
                 await probe_task
+        # Launcher: stop child llama.cpp / vllm processes so they don't orphan.
+        from coderouter.ingress.launcher_routes import shutdown_launcher
+        with contextlib.suppress(Exception):
+            await shutdown_launcher(app)
         # v2.0-J: graceful shutdown of recovery probe tasks.
         with contextlib.suppress(Exception):
             await engine.shutdown_recovery_probes()
@@ -259,6 +266,10 @@ def create_app(config_path: str | None = None) -> FastAPI:
     # Same root-level mount as /metrics.json — the dashboard is a UI
     # concern and doesn't belong under the /v1 API surface.
     app.include_router(dashboard_router, tags=["dashboard"])
+    # Launcher UI + process management API.
+    # /launcher       → single-page HTML UI
+    # /api/launcher/* → model scan, process start/stop/logs
+    app.include_router(launcher_router, tags=["launcher"])
     return app

coderouter/ingress/dashboard_routes.py CHANGED Viewed

@@ -76,6 +76,7 @@ _DASHBOARD_HTML = r"""<!doctype html>
   <header class="border-b border-slate-800 px-6 py-3">
     <div class="max-w-7xl mx-auto flex flex-wrap items-center gap-x-6 gap-y-2 text-sm">
       <span class="text-lg font-semibold tracking-tight">CodeRouter</span>
+      <a href="/launcher" class="text-slate-400 hover:text-slate-200 transition-colors text-sm">Launcher</a>
       <span class="text-slate-400">profile: <span data-bind="profile" class="text-slate-100 font-mono">—</span></span>
       <span class="text-slate-400">uptime: <span data-bind="uptime" class="text-slate-100 font-mono tabnum">—</span></span>
       <span class="text-slate-400">requests: <span data-bind="requests_total" class="text-slate-100 font-mono tabnum">0</span></span>

coderouter-cli 2.3.0a4__py3-none-any.whl → 2.5.0__py3-none-any.whl

coderouter-cli 2.3.0a4py3-none-any.whl → 2.5.0py3-none-any.whl