PyPI - coderouter-cli - Versions diffs - 2.4.0__py3-none-any.whl → 2.5.1__py3-none-any.whl - Mend

coderouter-cli 2.4.0py3-none-any.whl → 2.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

coderouter/cli.py +3 -3
coderouter/config/capability_registry.py +1 -1
coderouter/config/schemas.py +135 -0
coderouter/data/model-capabilities.yaml +2 -2
coderouter/ingress/app.py +11 -0
coderouter/ingress/dashboard_routes.py +1 -0
coderouter/ingress/launcher_routes.py +1187 -0
coderouter/logging.py +2 -2
coderouter/routing/capability.py +1 -1
coderouter/state/suggest_rules.py +4 -4
{coderouter_cli-2.4.0.dist-info → coderouter_cli-2.5.1.dist-info}/METADATA +49 -17
{coderouter_cli-2.4.0.dist-info → coderouter_cli-2.5.1.dist-info}/RECORD +15 -14
{coderouter_cli-2.4.0.dist-info → coderouter_cli-2.5.1.dist-info}/WHEEL +0 -0
{coderouter_cli-2.4.0.dist-info → coderouter_cli-2.5.1.dist-info}/entry_points.txt +0 -0
{coderouter_cli-2.4.0.dist-info → coderouter_cli-2.5.1.dist-info}/licenses/LICENSE +0 -0

coderouter/cli.py CHANGED Viewed

@@ -61,7 +61,7 @@ def _build_parser() -> argparse.ArgumentParser:
             "binding the server. Repeat to layer multiple files. By "
             "default, file values do NOT override variables already in "
             "the environment (the shell `export` wins). See "
-            "docs/troubleshooting.md §5 for 1Password / direnv / sops "
+            "docs/guides/troubleshooting.md §5 for 1Password / direnv / sops "
             "integration recipes."
         ),
     )
@@ -116,7 +116,7 @@ def _build_parser() -> argparse.ArgumentParser:
             "POSIX file mode (0600 expected), .gitignore coverage, "
             "and git-tracking state. Bare `--check-env` (no PATH) "
             "looks for `./.env` then `~/.coderouter/.env`. "
-            "See docs/troubleshooting.md §5 for the threat model."
+            "See docs/guides/troubleshooting.md §5 for the threat model."
         ),
     )
     doctor.add_argument(
@@ -699,8 +699,8 @@ def _run_replay(args: argparse.Namespace) -> int:
     if getattr(args, "suggest_rules", False):
         # P1-6: statistical rule suggestion mode.
         # Always compute a full window summary (ignores --compare / --provider).
-        from coderouter.state.suggest_rules import format_suggestions, suggest_rules
         from coderouter.state.replay import summarize_window as _sw
+        from coderouter.state.suggest_rules import format_suggestions, suggest_rules
         # Re-read without provider filter so we see all providers.
         all_entries = read_request_log(log_path, since=args.since)

coderouter/config/capability_registry.py CHANGED Viewed

@@ -109,7 +109,7 @@ class RegistryCapabilities(BaseModel):
             "harness. ``degraded`` = the model over-eagerly invokes "
             "tools/skills when given Claude Code's system prompt — e.g. "
             "Llama-3.3-70B treating small talk like ``こんにちは`` as "
-            "``Skill(hello)`` invocations (see docs/troubleshooting.md "
+            "``Skill(hello)`` invocations (see docs/guides/troubleshooting.md "
             "§4-1 for the symptom log). ``ok`` = explicitly verified "
             "clean. ``None`` = no opinion (treated as ``ok`` at the "
             "startup check)."

coderouter/config/schemas.py CHANGED Viewed

@@ -874,6 +874,130 @@ class AutoRouterConfig(BaseModel):
     )
+class LauncherBackendConfig(BaseModel):
+    """Per-backend binary path configuration for the Launcher.
+    When ``binary`` is unset, the Launcher falls back to the default
+    executable name (``llama-server`` for llama.cpp, ``python`` for vllm)
+    and relies on ``$PATH`` resolution — which works when the tool is
+    globally installed.  Set ``binary`` when:
+    - llama.cpp was built from source (e.g. ``~/llama.cpp/build/bin/llama-server``)
+    - vllm lives in a virtualenv (e.g. ``~/.venv/bin/python``)
+    - Multiple builds coexist and you want to pin a specific one
+    Tilde (``~``) and environment variables are expanded at launch time.
+    Example::
+        backends:
+          llama.cpp:
+            binary: ~/llama.cpp/build/bin/llama-server
+          vllm:
+            binary: ~/.venv/bin/python
+    """
+    model_config = ConfigDict(extra="forbid")
+    binary: str | None = Field(
+        default=None,
+        description=(
+            "Absolute or ``~``-relative path to the backend executable. "
+            "llama.cpp default: ``llama-server`` (PATH). "
+            "vllm default: ``python`` (PATH). "
+            "Expanded at launch time."
+        ),
+    )
+class LauncherOptionProfile(BaseModel):
+    """One named option preset for a launcher backend (e.g. llama.cpp / vllm).
+    ``args`` maps CLI flag strings to their values.  A bool value of
+    ``True`` means "include the flag without a value" (e.g. ``--no-mmap``);
+    ``False`` means "omit the flag entirely".  All other value types are
+    converted to strings and appended as ``--flag value`` pairs.
+    Example::
+        name: "GPU速度重視"
+        args:
+          "-ngl": 99
+          "--ctx-size": 4096
+          "--no-mmap": false
+    """
+    model_config = ConfigDict(extra="forbid")
+    name: str = Field(..., description="Display name shown in the Launcher UI dropdown.")
+    args: dict[str, str | int | float | bool] = Field(
+        default_factory=dict,
+        description=(
+            "CLI flag → value mapping. "
+            "bool True = flag only (no value). "
+            "bool False = omit flag. "
+            "All other types are stringified and passed as '--flag value'."
+        ),
+    )
+class LauncherConfig(BaseModel):
+    """The ``launcher:`` block in providers.yaml.
+    Controls the Launcher UI available at ``/launcher``.
+    Example::
+        launcher:
+          model_dirs:
+            - ~/models
+            - /data/gguf
+          option_profiles:
+            llama.cpp:
+              - name: "GPU速度重視"
+                args:
+                  "-ngl": 99
+                  "--ctx-size": 4096
+            vllm:
+              - name: "標準"
+                args:
+                  "--dtype": "auto"
+                  "--max-model-len": 4096
+    """
+    model_config = ConfigDict(extra="forbid")
+    model_dirs: list[str] = Field(
+        default_factory=list,
+        description=(
+            "Directories to scan for model files "
+            "(.gguf, .safetensors, .bin, .pt, .ggml). "
+            "Paths are expanded (~ and env vars) at scan time, not at load. "
+            "Non-existent paths are silently skipped."
+        ),
+    )
+    backends: dict[str, LauncherBackendConfig] = Field(
+        default_factory=dict,
+        description=(
+            "Per-backend binary path overrides. "
+            "Keys are backend names ('llama.cpp', 'vllm'). "
+            "When a key is absent, the default executable is used "
+            "('llama-server' / 'python') and resolved via PATH. "
+            "Useful when running a from-source build or a venv-specific binary."
+        ),
+    )
+    option_profiles: dict[str, list[LauncherOptionProfile]] = Field(
+        default_factory=dict,
+        description=(
+            "Named option presets per backend. "
+            "Keys should be backend names: 'llama.cpp', 'vllm'. "
+            "Each key maps to an ordered list of named presets. "
+            "A free-form 'extra args' field is always available in the UI "
+            "for one-off overrides without touching this config."
+        ),
+    )
 class PluginsConfig(BaseModel):
     """The ``plugins:`` block in providers.yaml (v2.3.0).
@@ -1082,6 +1206,17 @@ class CodeRouterConfig(BaseModel):
             "plugins (zero-cost, backward-compatible default)."
         ),
     )
+    launcher: LauncherConfig | None = Field(
+        default=None,
+        description=(
+            "Launcher configuration for the /launcher UI. "
+            "Defines model_dirs to scan and option_profiles per backend "
+            "('llama.cpp', 'vllm'). "
+            "Unset (None) = Launcher UI shows empty model list and no profiles. "
+            "The Launcher UI itself is always available at /launcher "
+            "regardless of this setting."
+        ),
+    )
     @model_validator(mode="after")
     def _check_default_profile_exists(self) -> CodeRouterConfig:

coderouter/data/model-capabilities.yaml CHANGED Viewed

@@ -35,7 +35,7 @@
 #                                       Claude Code's agentic-coding harness;
 #                                       "degraded" triggers a startup WARN when
 #                                       the provider is on a `claude-code-*`
-#                                       chain. See docs/troubleshooting.md §4-1.
+#                                       chain. See docs/guides/troubleshooting.md §4-1.
 #
 # First-match semantics: rules within a file are evaluated top-to-bottom
 # per flag; the first rule whose glob matches AND declares that flag
@@ -153,7 +153,7 @@ rules:
   # Llama-3.3-70B (verified 2026-04-24 against NVIDIA NIM) rewrites
   # ``こんにちは`` into ``Skill(hello)`` invocations and fabricates
   # ``AskUserQuestion("What is your name?")`` elicitations — see
-  # docs/articles/note-nvidia-nim.md §6-2 + docs/troubleshooting.md §4-1.
+  # docs/articles/note/note-nvidia-nim.md §6-2 + docs/guides/troubleshooting.md §4-1.
   #
   # Glob coverage: NIM uses ``meta/llama-3.3-70b-instruct``, OpenRouter
   # uses ``meta-llama/llama-3.3-70b-instruct``, some local servers use

coderouter/ingress/app.py CHANGED Viewed

@@ -13,6 +13,7 @@ from coderouter import __version__
 from coderouter.config import load_config
 from coderouter.ingress.anthropic_routes import router as anthropic_router
 from coderouter.ingress.dashboard_routes import router as dashboard_router
+from coderouter.ingress.launcher_routes import router as launcher_router
 from coderouter.ingress.metrics_routes import router as metrics_router
 from coderouter.ingress.openai_routes import router as openai_router
 from coderouter.logging import configure_logging, get_logger
@@ -178,6 +179,12 @@ def create_app(config_path: str | None = None) -> FastAPI:
             with contextlib.suppress(Exception):
                 await probe_task
+        # Launcher: stop child llama.cpp / vllm processes so they don't orphan.
+        from coderouter.ingress.launcher_routes import shutdown_launcher
+        with contextlib.suppress(Exception):
+            await shutdown_launcher(app)
         # v2.0-J: graceful shutdown of recovery probe tasks.
         with contextlib.suppress(Exception):
             await engine.shutdown_recovery_probes()
@@ -259,6 +266,10 @@ def create_app(config_path: str | None = None) -> FastAPI:
     # Same root-level mount as /metrics.json — the dashboard is a UI
     # concern and doesn't belong under the /v1 API surface.
     app.include_router(dashboard_router, tags=["dashboard"])
+    # Launcher UI + process management API.
+    # /launcher       → single-page HTML UI
+    # /api/launcher/* → model scan, process start/stop/logs
+    app.include_router(launcher_router, tags=["launcher"])
     return app

coderouter/ingress/dashboard_routes.py CHANGED Viewed

@@ -76,6 +76,7 @@ _DASHBOARD_HTML = r"""<!doctype html>
   <header class="border-b border-slate-800 px-6 py-3">
     <div class="max-w-7xl mx-auto flex flex-wrap items-center gap-x-6 gap-y-2 text-sm">
       <span class="text-lg font-semibold tracking-tight">CodeRouter</span>
+      <a href="/launcher" class="text-slate-400 hover:text-slate-200 transition-colors text-sm">Launcher</a>
       <span class="text-slate-400">profile: <span data-bind="profile" class="text-slate-100 font-mono">—</span></span>
       <span class="text-slate-400">uptime: <span data-bind="uptime" class="text-slate-100 font-mono tabnum">—</span></span>
       <span class="text-slate-400">requests: <span data-bind="requests_total" class="text-slate-100 font-mono tabnum">0</span></span>

coderouter-cli 2.4.0__py3-none-any.whl → 2.5.1__py3-none-any.whl

coderouter-cli 2.4.0py3-none-any.whl → 2.5.1py3-none-any.whl