coderouter-cli 2.4.0__py3-none-any.whl → 2.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
coderouter/cli.py CHANGED
@@ -61,7 +61,7 @@ def _build_parser() -> argparse.ArgumentParser:
61
61
  "binding the server. Repeat to layer multiple files. By "
62
62
  "default, file values do NOT override variables already in "
63
63
  "the environment (the shell `export` wins). See "
64
- "docs/troubleshooting.md §5 for 1Password / direnv / sops "
64
+ "docs/guides/troubleshooting.md §5 for 1Password / direnv / sops "
65
65
  "integration recipes."
66
66
  ),
67
67
  )
@@ -116,7 +116,7 @@ def _build_parser() -> argparse.ArgumentParser:
116
116
  "POSIX file mode (0600 expected), .gitignore coverage, "
117
117
  "and git-tracking state. Bare `--check-env` (no PATH) "
118
118
  "looks for `./.env` then `~/.coderouter/.env`. "
119
- "See docs/troubleshooting.md §5 for the threat model."
119
+ "See docs/guides/troubleshooting.md §5 for the threat model."
120
120
  ),
121
121
  )
122
122
  doctor.add_argument(
@@ -699,8 +699,8 @@ def _run_replay(args: argparse.Namespace) -> int:
699
699
  if getattr(args, "suggest_rules", False):
700
700
  # P1-6: statistical rule suggestion mode.
701
701
  # Always compute a full window summary (ignores --compare / --provider).
702
- from coderouter.state.suggest_rules import format_suggestions, suggest_rules
703
702
  from coderouter.state.replay import summarize_window as _sw
703
+ from coderouter.state.suggest_rules import format_suggestions, suggest_rules
704
704
 
705
705
  # Re-read without provider filter so we see all providers.
706
706
  all_entries = read_request_log(log_path, since=args.since)
@@ -109,7 +109,7 @@ class RegistryCapabilities(BaseModel):
109
109
  "harness. ``degraded`` = the model over-eagerly invokes "
110
110
  "tools/skills when given Claude Code's system prompt — e.g. "
111
111
  "Llama-3.3-70B treating small talk like ``こんにちは`` as "
112
- "``Skill(hello)`` invocations (see docs/troubleshooting.md "
112
+ "``Skill(hello)`` invocations (see docs/guides/troubleshooting.md "
113
113
  "§4-1 for the symptom log). ``ok`` = explicitly verified "
114
114
  "clean. ``None`` = no opinion (treated as ``ok`` at the "
115
115
  "startup check)."
@@ -874,6 +874,130 @@ class AutoRouterConfig(BaseModel):
874
874
  )
875
875
 
876
876
 
877
+ class LauncherBackendConfig(BaseModel):
878
+ """Per-backend binary path configuration for the Launcher.
879
+
880
+ When ``binary`` is unset, the Launcher falls back to the default
881
+ executable name (``llama-server`` for llama.cpp, ``python`` for vllm)
882
+ and relies on ``$PATH`` resolution — which works when the tool is
883
+ globally installed. Set ``binary`` when:
884
+
885
+ - llama.cpp was built from source (e.g. ``~/llama.cpp/build/bin/llama-server``)
886
+ - vllm lives in a virtualenv (e.g. ``~/.venv/bin/python``)
887
+ - Multiple builds coexist and you want to pin a specific one
888
+
889
+ Tilde (``~``) and environment variables are expanded at launch time.
890
+
891
+ Example::
892
+
893
+ backends:
894
+ llama.cpp:
895
+ binary: ~/llama.cpp/build/bin/llama-server
896
+ vllm:
897
+ binary: ~/.venv/bin/python
898
+ """
899
+
900
+ model_config = ConfigDict(extra="forbid")
901
+
902
+ binary: str | None = Field(
903
+ default=None,
904
+ description=(
905
+ "Absolute or ``~``-relative path to the backend executable. "
906
+ "llama.cpp default: ``llama-server`` (PATH). "
907
+ "vllm default: ``python`` (PATH). "
908
+ "Expanded at launch time."
909
+ ),
910
+ )
911
+
912
+
913
+ class LauncherOptionProfile(BaseModel):
914
+ """One named option preset for a launcher backend (e.g. llama.cpp / vllm).
915
+
916
+ ``args`` maps CLI flag strings to their values. A bool value of
917
+ ``True`` means "include the flag without a value" (e.g. ``--no-mmap``);
918
+ ``False`` means "omit the flag entirely". All other value types are
919
+ converted to strings and appended as ``--flag value`` pairs.
920
+
921
+ Example::
922
+
923
+ name: "GPU速度重視"
924
+ args:
925
+ "-ngl": 99
926
+ "--ctx-size": 4096
927
+ "--no-mmap": false
928
+ """
929
+
930
+ model_config = ConfigDict(extra="forbid")
931
+
932
+ name: str = Field(..., description="Display name shown in the Launcher UI dropdown.")
933
+ args: dict[str, str | int | float | bool] = Field(
934
+ default_factory=dict,
935
+ description=(
936
+ "CLI flag → value mapping. "
937
+ "bool True = flag only (no value). "
938
+ "bool False = omit flag. "
939
+ "All other types are stringified and passed as '--flag value'."
940
+ ),
941
+ )
942
+
943
+
944
+ class LauncherConfig(BaseModel):
945
+ """The ``launcher:`` block in providers.yaml.
946
+
947
+ Controls the Launcher UI available at ``/launcher``.
948
+
949
+ Example::
950
+
951
+ launcher:
952
+ model_dirs:
953
+ - ~/models
954
+ - /data/gguf
955
+ option_profiles:
956
+ llama.cpp:
957
+ - name: "GPU速度重視"
958
+ args:
959
+ "-ngl": 99
960
+ "--ctx-size": 4096
961
+ vllm:
962
+ - name: "標準"
963
+ args:
964
+ "--dtype": "auto"
965
+ "--max-model-len": 4096
966
+ """
967
+
968
+ model_config = ConfigDict(extra="forbid")
969
+
970
+ model_dirs: list[str] = Field(
971
+ default_factory=list,
972
+ description=(
973
+ "Directories to scan for model files "
974
+ "(.gguf, .safetensors, .bin, .pt, .ggml). "
975
+ "Paths are expanded (~ and env vars) at scan time, not at load. "
976
+ "Non-existent paths are silently skipped."
977
+ ),
978
+ )
979
+ backends: dict[str, LauncherBackendConfig] = Field(
980
+ default_factory=dict,
981
+ description=(
982
+ "Per-backend binary path overrides. "
983
+ "Keys are backend names ('llama.cpp', 'vllm'). "
984
+ "When a key is absent, the default executable is used "
985
+ "('llama-server' / 'python') and resolved via PATH. "
986
+ "Useful when running a from-source build or a venv-specific binary."
987
+ ),
988
+ )
989
+ option_profiles: dict[str, list[LauncherOptionProfile]] = Field(
990
+ default_factory=dict,
991
+ description=(
992
+ "Named option presets per backend. "
993
+ "Keys should be backend names: 'llama.cpp', 'vllm'. "
994
+ "Each key maps to an ordered list of named presets. "
995
+ "A free-form 'extra args' field is always available in the UI "
996
+ "for one-off overrides without touching this config."
997
+ ),
998
+ )
999
+
1000
+
877
1001
  class PluginsConfig(BaseModel):
878
1002
  """The ``plugins:`` block in providers.yaml (v2.3.0).
879
1003
 
@@ -1082,6 +1206,17 @@ class CodeRouterConfig(BaseModel):
1082
1206
  "plugins (zero-cost, backward-compatible default)."
1083
1207
  ),
1084
1208
  )
1209
+ launcher: LauncherConfig | None = Field(
1210
+ default=None,
1211
+ description=(
1212
+ "Launcher configuration for the /launcher UI. "
1213
+ "Defines model_dirs to scan and option_profiles per backend "
1214
+ "('llama.cpp', 'vllm'). "
1215
+ "Unset (None) = Launcher UI shows empty model list and no profiles. "
1216
+ "The Launcher UI itself is always available at /launcher "
1217
+ "regardless of this setting."
1218
+ ),
1219
+ )
1085
1220
 
1086
1221
  @model_validator(mode="after")
1087
1222
  def _check_default_profile_exists(self) -> CodeRouterConfig:
@@ -35,7 +35,7 @@
35
35
  # Claude Code's agentic-coding harness;
36
36
  # "degraded" triggers a startup WARN when
37
37
  # the provider is on a `claude-code-*`
38
- # chain. See docs/troubleshooting.md §4-1.
38
+ # chain. See docs/guides/troubleshooting.md §4-1.
39
39
  #
40
40
  # First-match semantics: rules within a file are evaluated top-to-bottom
41
41
  # per flag; the first rule whose glob matches AND declares that flag
@@ -153,7 +153,7 @@ rules:
153
153
  # Llama-3.3-70B (verified 2026-04-24 against NVIDIA NIM) rewrites
154
154
  # ``こんにちは`` into ``Skill(hello)`` invocations and fabricates
155
155
  # ``AskUserQuestion("What is your name?")`` elicitations — see
156
- # docs/articles/note-nvidia-nim.md §6-2 + docs/troubleshooting.md §4-1.
156
+ # docs/articles/note/note-nvidia-nim.md §6-2 + docs/guides/troubleshooting.md §4-1.
157
157
  #
158
158
  # Glob coverage: NIM uses ``meta/llama-3.3-70b-instruct``, OpenRouter
159
159
  # uses ``meta-llama/llama-3.3-70b-instruct``, some local servers use
coderouter/ingress/app.py CHANGED
@@ -13,6 +13,7 @@ from coderouter import __version__
13
13
  from coderouter.config import load_config
14
14
  from coderouter.ingress.anthropic_routes import router as anthropic_router
15
15
  from coderouter.ingress.dashboard_routes import router as dashboard_router
16
+ from coderouter.ingress.launcher_routes import router as launcher_router
16
17
  from coderouter.ingress.metrics_routes import router as metrics_router
17
18
  from coderouter.ingress.openai_routes import router as openai_router
18
19
  from coderouter.logging import configure_logging, get_logger
@@ -178,6 +179,12 @@ def create_app(config_path: str | None = None) -> FastAPI:
178
179
  with contextlib.suppress(Exception):
179
180
  await probe_task
180
181
 
182
+ # Launcher: stop child llama.cpp / vllm processes so they don't orphan.
183
+ from coderouter.ingress.launcher_routes import shutdown_launcher
184
+
185
+ with contextlib.suppress(Exception):
186
+ await shutdown_launcher(app)
187
+
181
188
  # v2.0-J: graceful shutdown of recovery probe tasks.
182
189
  with contextlib.suppress(Exception):
183
190
  await engine.shutdown_recovery_probes()
@@ -259,6 +266,10 @@ def create_app(config_path: str | None = None) -> FastAPI:
259
266
  # Same root-level mount as /metrics.json — the dashboard is a UI
260
267
  # concern and doesn't belong under the /v1 API surface.
261
268
  app.include_router(dashboard_router, tags=["dashboard"])
269
+ # Launcher UI + process management API.
270
+ # /launcher → single-page HTML UI
271
+ # /api/launcher/* → model scan, process start/stop/logs
272
+ app.include_router(launcher_router, tags=["launcher"])
262
273
 
263
274
  return app
264
275
 
@@ -76,6 +76,7 @@ _DASHBOARD_HTML = r"""<!doctype html>
76
76
  <header class="border-b border-slate-800 px-6 py-3">
77
77
  <div class="max-w-7xl mx-auto flex flex-wrap items-center gap-x-6 gap-y-2 text-sm">
78
78
  <span class="text-lg font-semibold tracking-tight">CodeRouter</span>
79
+ <a href="/launcher" class="text-slate-400 hover:text-slate-200 transition-colors text-sm">Launcher</a>
79
80
  <span class="text-slate-400">profile: <span data-bind="profile" class="text-slate-100 font-mono">—</span></span>
80
81
  <span class="text-slate-400">uptime: <span data-bind="uptime" class="text-slate-100 font-mono tabnum">—</span></span>
81
82
  <span class="text-slate-400">requests: <span data-bind="requests_total" class="text-slate-100 font-mono tabnum">0</span></span>