coderouter-cli 2.4.0__py3-none-any.whl → 2.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderouter/cli.py +3 -3
- coderouter/config/capability_registry.py +1 -1
- coderouter/config/schemas.py +135 -0
- coderouter/data/model-capabilities.yaml +2 -2
- coderouter/ingress/app.py +11 -0
- coderouter/ingress/dashboard_routes.py +1 -0
- coderouter/ingress/launcher_routes.py +1187 -0
- coderouter/logging.py +2 -2
- coderouter/routing/capability.py +1 -1
- coderouter/state/suggest_rules.py +4 -4
- {coderouter_cli-2.4.0.dist-info → coderouter_cli-2.5.1.dist-info}/METADATA +49 -17
- {coderouter_cli-2.4.0.dist-info → coderouter_cli-2.5.1.dist-info}/RECORD +15 -14
- {coderouter_cli-2.4.0.dist-info → coderouter_cli-2.5.1.dist-info}/WHEEL +0 -0
- {coderouter_cli-2.4.0.dist-info → coderouter_cli-2.5.1.dist-info}/entry_points.txt +0 -0
- {coderouter_cli-2.4.0.dist-info → coderouter_cli-2.5.1.dist-info}/licenses/LICENSE +0 -0
coderouter/cli.py
CHANGED
|
@@ -61,7 +61,7 @@ def _build_parser() -> argparse.ArgumentParser:
|
|
|
61
61
|
"binding the server. Repeat to layer multiple files. By "
|
|
62
62
|
"default, file values do NOT override variables already in "
|
|
63
63
|
"the environment (the shell `export` wins). See "
|
|
64
|
-
"docs/troubleshooting.md §5 for 1Password / direnv / sops "
|
|
64
|
+
"docs/guides/troubleshooting.md §5 for 1Password / direnv / sops "
|
|
65
65
|
"integration recipes."
|
|
66
66
|
),
|
|
67
67
|
)
|
|
@@ -116,7 +116,7 @@ def _build_parser() -> argparse.ArgumentParser:
|
|
|
116
116
|
"POSIX file mode (0600 expected), .gitignore coverage, "
|
|
117
117
|
"and git-tracking state. Bare `--check-env` (no PATH) "
|
|
118
118
|
"looks for `./.env` then `~/.coderouter/.env`. "
|
|
119
|
-
"See docs/troubleshooting.md §5 for the threat model."
|
|
119
|
+
"See docs/guides/troubleshooting.md §5 for the threat model."
|
|
120
120
|
),
|
|
121
121
|
)
|
|
122
122
|
doctor.add_argument(
|
|
@@ -699,8 +699,8 @@ def _run_replay(args: argparse.Namespace) -> int:
|
|
|
699
699
|
if getattr(args, "suggest_rules", False):
|
|
700
700
|
# P1-6: statistical rule suggestion mode.
|
|
701
701
|
# Always compute a full window summary (ignores --compare / --provider).
|
|
702
|
-
from coderouter.state.suggest_rules import format_suggestions, suggest_rules
|
|
703
702
|
from coderouter.state.replay import summarize_window as _sw
|
|
703
|
+
from coderouter.state.suggest_rules import format_suggestions, suggest_rules
|
|
704
704
|
|
|
705
705
|
# Re-read without provider filter so we see all providers.
|
|
706
706
|
all_entries = read_request_log(log_path, since=args.since)
|
|
@@ -109,7 +109,7 @@ class RegistryCapabilities(BaseModel):
|
|
|
109
109
|
"harness. ``degraded`` = the model over-eagerly invokes "
|
|
110
110
|
"tools/skills when given Claude Code's system prompt — e.g. "
|
|
111
111
|
"Llama-3.3-70B treating small talk like ``こんにちは`` as "
|
|
112
|
-
"``Skill(hello)`` invocations (see docs/troubleshooting.md "
|
|
112
|
+
"``Skill(hello)`` invocations (see docs/guides/troubleshooting.md "
|
|
113
113
|
"§4-1 for the symptom log). ``ok`` = explicitly verified "
|
|
114
114
|
"clean. ``None`` = no opinion (treated as ``ok`` at the "
|
|
115
115
|
"startup check)."
|
coderouter/config/schemas.py
CHANGED
|
@@ -874,6 +874,130 @@ class AutoRouterConfig(BaseModel):
|
|
|
874
874
|
)
|
|
875
875
|
|
|
876
876
|
|
|
877
|
+
class LauncherBackendConfig(BaseModel):
|
|
878
|
+
"""Per-backend binary path configuration for the Launcher.
|
|
879
|
+
|
|
880
|
+
When ``binary`` is unset, the Launcher falls back to the default
|
|
881
|
+
executable name (``llama-server`` for llama.cpp, ``python`` for vllm)
|
|
882
|
+
and relies on ``$PATH`` resolution — which works when the tool is
|
|
883
|
+
globally installed. Set ``binary`` when:
|
|
884
|
+
|
|
885
|
+
- llama.cpp was built from source (e.g. ``~/llama.cpp/build/bin/llama-server``)
|
|
886
|
+
- vllm lives in a virtualenv (e.g. ``~/.venv/bin/python``)
|
|
887
|
+
- Multiple builds coexist and you want to pin a specific one
|
|
888
|
+
|
|
889
|
+
Tilde (``~``) and environment variables are expanded at launch time.
|
|
890
|
+
|
|
891
|
+
Example::
|
|
892
|
+
|
|
893
|
+
backends:
|
|
894
|
+
llama.cpp:
|
|
895
|
+
binary: ~/llama.cpp/build/bin/llama-server
|
|
896
|
+
vllm:
|
|
897
|
+
binary: ~/.venv/bin/python
|
|
898
|
+
"""
|
|
899
|
+
|
|
900
|
+
model_config = ConfigDict(extra="forbid")
|
|
901
|
+
|
|
902
|
+
binary: str | None = Field(
|
|
903
|
+
default=None,
|
|
904
|
+
description=(
|
|
905
|
+
"Absolute or ``~``-relative path to the backend executable. "
|
|
906
|
+
"llama.cpp default: ``llama-server`` (PATH). "
|
|
907
|
+
"vllm default: ``python`` (PATH). "
|
|
908
|
+
"Expanded at launch time."
|
|
909
|
+
),
|
|
910
|
+
)
|
|
911
|
+
|
|
912
|
+
|
|
913
|
+
class LauncherOptionProfile(BaseModel):
|
|
914
|
+
"""One named option preset for a launcher backend (e.g. llama.cpp / vllm).
|
|
915
|
+
|
|
916
|
+
``args`` maps CLI flag strings to their values. A bool value of
|
|
917
|
+
``True`` means "include the flag without a value" (e.g. ``--no-mmap``);
|
|
918
|
+
``False`` means "omit the flag entirely". All other value types are
|
|
919
|
+
converted to strings and appended as ``--flag value`` pairs.
|
|
920
|
+
|
|
921
|
+
Example::
|
|
922
|
+
|
|
923
|
+
name: "GPU速度重視"
|
|
924
|
+
args:
|
|
925
|
+
"-ngl": 99
|
|
926
|
+
"--ctx-size": 4096
|
|
927
|
+
"--no-mmap": false
|
|
928
|
+
"""
|
|
929
|
+
|
|
930
|
+
model_config = ConfigDict(extra="forbid")
|
|
931
|
+
|
|
932
|
+
name: str = Field(..., description="Display name shown in the Launcher UI dropdown.")
|
|
933
|
+
args: dict[str, str | int | float | bool] = Field(
|
|
934
|
+
default_factory=dict,
|
|
935
|
+
description=(
|
|
936
|
+
"CLI flag → value mapping. "
|
|
937
|
+
"bool True = flag only (no value). "
|
|
938
|
+
"bool False = omit flag. "
|
|
939
|
+
"All other types are stringified and passed as '--flag value'."
|
|
940
|
+
),
|
|
941
|
+
)
|
|
942
|
+
|
|
943
|
+
|
|
944
|
+
class LauncherConfig(BaseModel):
|
|
945
|
+
"""The ``launcher:`` block in providers.yaml.
|
|
946
|
+
|
|
947
|
+
Controls the Launcher UI available at ``/launcher``.
|
|
948
|
+
|
|
949
|
+
Example::
|
|
950
|
+
|
|
951
|
+
launcher:
|
|
952
|
+
model_dirs:
|
|
953
|
+
- ~/models
|
|
954
|
+
- /data/gguf
|
|
955
|
+
option_profiles:
|
|
956
|
+
llama.cpp:
|
|
957
|
+
- name: "GPU速度重視"
|
|
958
|
+
args:
|
|
959
|
+
"-ngl": 99
|
|
960
|
+
"--ctx-size": 4096
|
|
961
|
+
vllm:
|
|
962
|
+
- name: "標準"
|
|
963
|
+
args:
|
|
964
|
+
"--dtype": "auto"
|
|
965
|
+
"--max-model-len": 4096
|
|
966
|
+
"""
|
|
967
|
+
|
|
968
|
+
model_config = ConfigDict(extra="forbid")
|
|
969
|
+
|
|
970
|
+
model_dirs: list[str] = Field(
|
|
971
|
+
default_factory=list,
|
|
972
|
+
description=(
|
|
973
|
+
"Directories to scan for model files "
|
|
974
|
+
"(.gguf, .safetensors, .bin, .pt, .ggml). "
|
|
975
|
+
"Paths are expanded (~ and env vars) at scan time, not at load. "
|
|
976
|
+
"Non-existent paths are silently skipped."
|
|
977
|
+
),
|
|
978
|
+
)
|
|
979
|
+
backends: dict[str, LauncherBackendConfig] = Field(
|
|
980
|
+
default_factory=dict,
|
|
981
|
+
description=(
|
|
982
|
+
"Per-backend binary path overrides. "
|
|
983
|
+
"Keys are backend names ('llama.cpp', 'vllm'). "
|
|
984
|
+
"When a key is absent, the default executable is used "
|
|
985
|
+
"('llama-server' / 'python') and resolved via PATH. "
|
|
986
|
+
"Useful when running a from-source build or a venv-specific binary."
|
|
987
|
+
),
|
|
988
|
+
)
|
|
989
|
+
option_profiles: dict[str, list[LauncherOptionProfile]] = Field(
|
|
990
|
+
default_factory=dict,
|
|
991
|
+
description=(
|
|
992
|
+
"Named option presets per backend. "
|
|
993
|
+
"Keys should be backend names: 'llama.cpp', 'vllm'. "
|
|
994
|
+
"Each key maps to an ordered list of named presets. "
|
|
995
|
+
"A free-form 'extra args' field is always available in the UI "
|
|
996
|
+
"for one-off overrides without touching this config."
|
|
997
|
+
),
|
|
998
|
+
)
|
|
999
|
+
|
|
1000
|
+
|
|
877
1001
|
class PluginsConfig(BaseModel):
|
|
878
1002
|
"""The ``plugins:`` block in providers.yaml (v2.3.0).
|
|
879
1003
|
|
|
@@ -1082,6 +1206,17 @@ class CodeRouterConfig(BaseModel):
|
|
|
1082
1206
|
"plugins (zero-cost, backward-compatible default)."
|
|
1083
1207
|
),
|
|
1084
1208
|
)
|
|
1209
|
+
launcher: LauncherConfig | None = Field(
|
|
1210
|
+
default=None,
|
|
1211
|
+
description=(
|
|
1212
|
+
"Launcher configuration for the /launcher UI. "
|
|
1213
|
+
"Defines model_dirs to scan and option_profiles per backend "
|
|
1214
|
+
"('llama.cpp', 'vllm'). "
|
|
1215
|
+
"Unset (None) = Launcher UI shows empty model list and no profiles. "
|
|
1216
|
+
"The Launcher UI itself is always available at /launcher "
|
|
1217
|
+
"regardless of this setting."
|
|
1218
|
+
),
|
|
1219
|
+
)
|
|
1085
1220
|
|
|
1086
1221
|
@model_validator(mode="after")
|
|
1087
1222
|
def _check_default_profile_exists(self) -> CodeRouterConfig:
|
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
# Claude Code's agentic-coding harness;
|
|
36
36
|
# "degraded" triggers a startup WARN when
|
|
37
37
|
# the provider is on a `claude-code-*`
|
|
38
|
-
# chain. See docs/troubleshooting.md §4-1.
|
|
38
|
+
# chain. See docs/guides/troubleshooting.md §4-1.
|
|
39
39
|
#
|
|
40
40
|
# First-match semantics: rules within a file are evaluated top-to-bottom
|
|
41
41
|
# per flag; the first rule whose glob matches AND declares that flag
|
|
@@ -153,7 +153,7 @@ rules:
|
|
|
153
153
|
# Llama-3.3-70B (verified 2026-04-24 against NVIDIA NIM) rewrites
|
|
154
154
|
# ``こんにちは`` into ``Skill(hello)`` invocations and fabricates
|
|
155
155
|
# ``AskUserQuestion("What is your name?")`` elicitations — see
|
|
156
|
-
# docs/articles/note-nvidia-nim.md §6-2 + docs/troubleshooting.md §4-1.
|
|
156
|
+
# docs/articles/note/note-nvidia-nim.md §6-2 + docs/guides/troubleshooting.md §4-1.
|
|
157
157
|
#
|
|
158
158
|
# Glob coverage: NIM uses ``meta/llama-3.3-70b-instruct``, OpenRouter
|
|
159
159
|
# uses ``meta-llama/llama-3.3-70b-instruct``, some local servers use
|
coderouter/ingress/app.py
CHANGED
|
@@ -13,6 +13,7 @@ from coderouter import __version__
|
|
|
13
13
|
from coderouter.config import load_config
|
|
14
14
|
from coderouter.ingress.anthropic_routes import router as anthropic_router
|
|
15
15
|
from coderouter.ingress.dashboard_routes import router as dashboard_router
|
|
16
|
+
from coderouter.ingress.launcher_routes import router as launcher_router
|
|
16
17
|
from coderouter.ingress.metrics_routes import router as metrics_router
|
|
17
18
|
from coderouter.ingress.openai_routes import router as openai_router
|
|
18
19
|
from coderouter.logging import configure_logging, get_logger
|
|
@@ -178,6 +179,12 @@ def create_app(config_path: str | None = None) -> FastAPI:
|
|
|
178
179
|
with contextlib.suppress(Exception):
|
|
179
180
|
await probe_task
|
|
180
181
|
|
|
182
|
+
# Launcher: stop child llama.cpp / vllm processes so they don't orphan.
|
|
183
|
+
from coderouter.ingress.launcher_routes import shutdown_launcher
|
|
184
|
+
|
|
185
|
+
with contextlib.suppress(Exception):
|
|
186
|
+
await shutdown_launcher(app)
|
|
187
|
+
|
|
181
188
|
# v2.0-J: graceful shutdown of recovery probe tasks.
|
|
182
189
|
with contextlib.suppress(Exception):
|
|
183
190
|
await engine.shutdown_recovery_probes()
|
|
@@ -259,6 +266,10 @@ def create_app(config_path: str | None = None) -> FastAPI:
|
|
|
259
266
|
# Same root-level mount as /metrics.json — the dashboard is a UI
|
|
260
267
|
# concern and doesn't belong under the /v1 API surface.
|
|
261
268
|
app.include_router(dashboard_router, tags=["dashboard"])
|
|
269
|
+
# Launcher UI + process management API.
|
|
270
|
+
# /launcher → single-page HTML UI
|
|
271
|
+
# /api/launcher/* → model scan, process start/stop/logs
|
|
272
|
+
app.include_router(launcher_router, tags=["launcher"])
|
|
262
273
|
|
|
263
274
|
return app
|
|
264
275
|
|
|
@@ -76,6 +76,7 @@ _DASHBOARD_HTML = r"""<!doctype html>
|
|
|
76
76
|
<header class="border-b border-slate-800 px-6 py-3">
|
|
77
77
|
<div class="max-w-7xl mx-auto flex flex-wrap items-center gap-x-6 gap-y-2 text-sm">
|
|
78
78
|
<span class="text-lg font-semibold tracking-tight">CodeRouter</span>
|
|
79
|
+
<a href="/launcher" class="text-slate-400 hover:text-slate-200 transition-colors text-sm">Launcher</a>
|
|
79
80
|
<span class="text-slate-400">profile: <span data-bind="profile" class="text-slate-100 font-mono">—</span></span>
|
|
80
81
|
<span class="text-slate-400">uptime: <span data-bind="uptime" class="text-slate-100 font-mono tabnum">—</span></span>
|
|
81
82
|
<span class="text-slate-400">requests: <span data-bind="requests_total" class="text-slate-100 font-mono tabnum">0</span></span>
|