coderouter-cli 1.7.0__py3-none-any.whl → 1.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderouter/cli.py +168 -2
- coderouter/config/capability_registry.py +27 -1
- coderouter/config/loader.py +27 -0
- coderouter/data/model-capabilities.yaml +255 -0
- coderouter/doctor_apply.py +612 -0
- coderouter/ingress/app.py +8 -0
- coderouter/logging.py +86 -0
- coderouter/routing/capability.py +113 -1
- {coderouter_cli-1.7.0.dist-info → coderouter_cli-1.8.1.dist-info}/METADATA +44 -19
- {coderouter_cli-1.7.0.dist-info → coderouter_cli-1.8.1.dist-info}/RECORD +13 -12
- {coderouter_cli-1.7.0.dist-info → coderouter_cli-1.8.1.dist-info}/WHEEL +0 -0
- {coderouter_cli-1.7.0.dist-info → coderouter_cli-1.8.1.dist-info}/entry_points.txt +0 -0
- {coderouter_cli-1.7.0.dist-info → coderouter_cli-1.8.1.dist-info}/licenses/LICENSE +0 -0
coderouter/cli.py
CHANGED
|
@@ -4,6 +4,7 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import argparse
|
|
6
6
|
import sys
|
|
7
|
+
from pathlib import Path
|
|
7
8
|
|
|
8
9
|
import uvicorn
|
|
9
10
|
|
|
@@ -126,6 +127,40 @@ def _build_parser() -> argparse.ArgumentParser:
|
|
|
126
127
|
"./providers.yaml, or ~/.coderouter/providers.yaml."
|
|
127
128
|
),
|
|
128
129
|
)
|
|
130
|
+
# v1.7-B (#3): --apply writes the doctor-emitted YAML patches back
|
|
131
|
+
# into providers.yaml / model-capabilities.yaml while preserving
|
|
132
|
+
# comments and key order. --dry-run is the same path minus the file
|
|
133
|
+
# write — prints a unified diff (``git apply``-compatible) for review.
|
|
134
|
+
# Bare ``--dry-run`` (without ``--apply``) is the canonical "preview"
|
|
135
|
+
# form; ``--apply --dry-run`` is also accepted as an explicit synonym
|
|
136
|
+
# so muscle-memory from ``git apply --dry-run`` works either way.
|
|
137
|
+
# Both flags are no-ops when --check-model is absent (--check-env
|
|
138
|
+
# has its own remediation surface and is not in scope for --apply).
|
|
139
|
+
# Implementation lives in coderouter/doctor_apply.py — round-trip
|
|
140
|
+
# via the optional ``ruamel.yaml`` dependency, see that module's
|
|
141
|
+
# docstring for the contract and shape invariants.
|
|
142
|
+
doctor.add_argument(
|
|
143
|
+
"--apply",
|
|
144
|
+
action="store_true",
|
|
145
|
+
help=(
|
|
146
|
+
"After --check-model, write the suggested patches back into "
|
|
147
|
+
"providers.yaml / model-capabilities.yaml. A `.bak` backup is "
|
|
148
|
+
"created next to each modified file. Idempotent: a re-run "
|
|
149
|
+
"after a successful apply is a no-op (no write, exit 0). "
|
|
150
|
+
"Requires the optional `ruamel.yaml` dependency — install "
|
|
151
|
+
"via `pip install coderouter-cli[doctor]`."
|
|
152
|
+
),
|
|
153
|
+
)
|
|
154
|
+
doctor.add_argument(
|
|
155
|
+
"--dry-run",
|
|
156
|
+
action="store_true",
|
|
157
|
+
help=(
|
|
158
|
+
"Preview --apply changes as a unified diff without writing "
|
|
159
|
+
"to disk. Implies --apply mode for diff generation. The "
|
|
160
|
+
"output is `git apply`-compatible so it can be saved and "
|
|
161
|
+
"applied later (or piped to `patch -p0`)."
|
|
162
|
+
),
|
|
163
|
+
)
|
|
129
164
|
|
|
130
165
|
# v1.5-C: `coderouter stats` — live TUI over GET /metrics.json.
|
|
131
166
|
# Lazy-imports ``curses`` inside the runner so the CLI boot stays
|
|
@@ -283,7 +318,14 @@ def _run_doctor(args: argparse.Namespace) -> int:
|
|
|
283
318
|
|
|
284
319
|
|
|
285
320
|
def _run_check_model(args: argparse.Namespace) -> int:
|
|
286
|
-
"""v0.7-B: per-provider HTTP capability probe.
|
|
321
|
+
"""v0.7-B: per-provider HTTP capability probe.
|
|
322
|
+
|
|
323
|
+
v1.7-B (#3): when ``--apply`` or ``--dry-run`` is also set, we run
|
|
324
|
+
the same probes and then route the emitted patches through
|
|
325
|
+
:func:`coderouter.doctor_apply.apply_doctor_patches`. Bare probe
|
|
326
|
+
(no apply / dry-run flags) keeps the original behavior verbatim
|
|
327
|
+
so existing CI integrations don't change shape.
|
|
328
|
+
"""
|
|
287
329
|
from coderouter.config.loader import load_config
|
|
288
330
|
from coderouter.doctor import (
|
|
289
331
|
exit_code_for,
|
|
@@ -307,7 +349,131 @@ def _run_check_model(args: argparse.Namespace) -> int:
|
|
|
307
349
|
return 1
|
|
308
350
|
|
|
309
351
|
print(format_report(report))
|
|
310
|
-
|
|
352
|
+
base_exit = exit_code_for(report)
|
|
353
|
+
|
|
354
|
+
apply_mode = bool(getattr(args, "apply", False))
|
|
355
|
+
dry_run_mode = bool(getattr(args, "dry_run", False))
|
|
356
|
+
if apply_mode or dry_run_mode:
|
|
357
|
+
# Resolve the same providers.yaml the loader picked up so the
|
|
358
|
+
# apply step writes back to the exact file that was probed
|
|
359
|
+
# (avoids a mismatch when CODEROUTER_CONFIG points elsewhere
|
|
360
|
+
# than the default path).
|
|
361
|
+
config_path = _resolve_config_path(args.config)
|
|
362
|
+
return _run_apply_or_dry_run(
|
|
363
|
+
report=report,
|
|
364
|
+
config_path=config_path,
|
|
365
|
+
write=apply_mode and not dry_run_mode,
|
|
366
|
+
base_exit=base_exit,
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
return base_exit
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def _resolve_config_path(explicit: str | None) -> Path:
|
|
373
|
+
"""Mirror loader._candidate_paths and return the file actually used.
|
|
374
|
+
|
|
375
|
+
Used by ``--apply`` to write back to the same path the loader
|
|
376
|
+
picked up when it parsed providers.yaml. Falls through the same
|
|
377
|
+
search order so a ``CODEROUTER_CONFIG`` env or default-path lookup
|
|
378
|
+
matches the live config.
|
|
379
|
+
"""
|
|
380
|
+
import os
|
|
381
|
+
|
|
382
|
+
candidates: list[Path] = []
|
|
383
|
+
if explicit:
|
|
384
|
+
candidates.append(Path(explicit))
|
|
385
|
+
if env_path := os.environ.get("CODEROUTER_CONFIG"):
|
|
386
|
+
candidates.append(Path(env_path))
|
|
387
|
+
candidates.append(Path.cwd() / "providers.yaml")
|
|
388
|
+
candidates.append(Path.home() / ".coderouter" / "providers.yaml")
|
|
389
|
+
for p in candidates:
|
|
390
|
+
if p.is_file():
|
|
391
|
+
return p
|
|
392
|
+
# Fall back to the last candidate even if absent — the apply step
|
|
393
|
+
# will surface a clearer error than this resolver would.
|
|
394
|
+
return candidates[-1]
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def _run_apply_or_dry_run(
|
|
398
|
+
*,
|
|
399
|
+
report: object,
|
|
400
|
+
config_path: Path,
|
|
401
|
+
write: bool,
|
|
402
|
+
base_exit: int,
|
|
403
|
+
) -> int:
|
|
404
|
+
"""v1.7-B (#3): drive ``apply_doctor_patches`` and render the result.
|
|
405
|
+
|
|
406
|
+
Returns 0 when the apply step itself is clean (regardless of
|
|
407
|
+
whether the underlying probes flagged ``NEEDS_TUNING``). The
|
|
408
|
+
rationale: once the operator has applied the patches, the next
|
|
409
|
+
``doctor`` run is the right place to re-evaluate the chain — a
|
|
410
|
+
successful apply should not propagate the "exit 2 / needs tuning"
|
|
411
|
+
signal because the issue is now (presumably) addressed.
|
|
412
|
+
"""
|
|
413
|
+
from coderouter.doctor_apply import (
|
|
414
|
+
DoctorApplyError,
|
|
415
|
+
MissingDependencyError,
|
|
416
|
+
apply_doctor_patches,
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
print() # blank line between probe report and apply section
|
|
420
|
+
try:
|
|
421
|
+
result = apply_doctor_patches(
|
|
422
|
+
report=report,
|
|
423
|
+
config_path=config_path,
|
|
424
|
+
write=write,
|
|
425
|
+
)
|
|
426
|
+
except MissingDependencyError as exc:
|
|
427
|
+
print(f"doctor --apply: {exc}", file=sys.stderr)
|
|
428
|
+
return 1
|
|
429
|
+
except DoctorApplyError as exc:
|
|
430
|
+
print(f"doctor --apply: {exc}", file=sys.stderr)
|
|
431
|
+
return 1
|
|
432
|
+
|
|
433
|
+
label = "Apply" if write else "Dry-run"
|
|
434
|
+
print(f"{label}: {len(result.target_paths)} target file(s).")
|
|
435
|
+
if result.skipped_unknown_target:
|
|
436
|
+
print(
|
|
437
|
+
f" warning: {len(result.skipped_unknown_target)} probe(s) "
|
|
438
|
+
f"emitted an unknown target_file value: "
|
|
439
|
+
f"{sorted(set(result.skipped_unknown_target))}",
|
|
440
|
+
file=sys.stderr,
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
if result.is_no_op:
|
|
444
|
+
# Distinguish "nothing to do because base_exit was 0" from
|
|
445
|
+
# "nothing to do because everything already applied":
|
|
446
|
+
if base_exit == 0:
|
|
447
|
+
print(" No NEEDS_TUNING patches to apply — chain is healthy.")
|
|
448
|
+
else:
|
|
449
|
+
print(
|
|
450
|
+
f" All {result.no_op_patches} patch(es) already applied "
|
|
451
|
+
f"— providers.yaml is up to date."
|
|
452
|
+
)
|
|
453
|
+
return 0
|
|
454
|
+
|
|
455
|
+
print(
|
|
456
|
+
f" {result.changes_applied} patch(es) applied"
|
|
457
|
+
+ (f", {result.no_op_patches} already up to date" if result.no_op_patches else "")
|
|
458
|
+
+ "."
|
|
459
|
+
)
|
|
460
|
+
for path in result.target_paths:
|
|
461
|
+
diff = result.diffs.get(str(path), "")
|
|
462
|
+
if not diff:
|
|
463
|
+
continue
|
|
464
|
+
print()
|
|
465
|
+
print(diff, end="" if diff.endswith("\n") else "\n")
|
|
466
|
+
|
|
467
|
+
if write:
|
|
468
|
+
for orig, bak in result.backups.items():
|
|
469
|
+
print(f" Backup: {orig} → {bak}")
|
|
470
|
+
else:
|
|
471
|
+
print()
|
|
472
|
+
print(" (dry-run — no files were modified. Re-run with --apply to write.)")
|
|
473
|
+
|
|
474
|
+
return 0
|
|
475
|
+
|
|
476
|
+
|
|
311
477
|
|
|
312
478
|
|
|
313
479
|
def _run_check_env(arg_value: str) -> int:
|
|
@@ -102,6 +102,19 @@ class RegistryCapabilities(BaseModel):
|
|
|
102
102
|
"doctor --check-model num_ctx probe (not consumed in v0.7-A)."
|
|
103
103
|
),
|
|
104
104
|
)
|
|
105
|
+
claude_code_suitability: Literal["ok", "degraded"] | None = Field(
|
|
106
|
+
default=None,
|
|
107
|
+
description=(
|
|
108
|
+
"v1.7-B: hint for use behind Claude Code's agentic-coding "
|
|
109
|
+
"harness. ``degraded`` = the model over-eagerly invokes "
|
|
110
|
+
"tools/skills when given Claude Code's system prompt — e.g. "
|
|
111
|
+
"Llama-3.3-70B treating small talk like ``こんにちは`` as "
|
|
112
|
+
"``Skill(hello)`` invocations (see docs/troubleshooting.md "
|
|
113
|
+
"§4-1 for the symptom log). ``ok`` = explicitly verified "
|
|
114
|
+
"clean. ``None`` = no opinion (treated as ``ok`` at the "
|
|
115
|
+
"startup check)."
|
|
116
|
+
),
|
|
117
|
+
)
|
|
105
118
|
|
|
106
119
|
|
|
107
120
|
class CapabilityRule(BaseModel):
|
|
@@ -168,6 +181,7 @@ class ResolvedCapabilities:
|
|
|
168
181
|
reasoning_passthrough: bool | None = None
|
|
169
182
|
tools: bool | None = None
|
|
170
183
|
max_context_tokens: int | None = None
|
|
184
|
+
claude_code_suitability: Literal["ok", "degraded"] | None = None
|
|
171
185
|
|
|
172
186
|
|
|
173
187
|
# ---------------------------------------------------------------------------
|
|
@@ -218,11 +232,13 @@ class CapabilityRegistry:
|
|
|
218
232
|
resolved_reasoning: bool | None = None
|
|
219
233
|
resolved_tools: bool | None = None
|
|
220
234
|
resolved_max_ctx: int | None = None
|
|
235
|
+
resolved_suitability: Literal["ok", "degraded"] | None = None
|
|
221
236
|
|
|
222
237
|
thinking_locked = False
|
|
223
238
|
reasoning_locked = False
|
|
224
239
|
tools_locked = False
|
|
225
240
|
max_ctx_locked = False
|
|
241
|
+
suitability_locked = False
|
|
226
242
|
|
|
227
243
|
for rule in self._rules:
|
|
228
244
|
if not rule.kind_matches(kind):
|
|
@@ -242,7 +258,16 @@ class CapabilityRegistry:
|
|
|
242
258
|
if not max_ctx_locked and caps.max_context_tokens is not None:
|
|
243
259
|
resolved_max_ctx = caps.max_context_tokens
|
|
244
260
|
max_ctx_locked = True
|
|
245
|
-
if
|
|
261
|
+
if not suitability_locked and caps.claude_code_suitability is not None:
|
|
262
|
+
resolved_suitability = caps.claude_code_suitability
|
|
263
|
+
suitability_locked = True
|
|
264
|
+
if (
|
|
265
|
+
thinking_locked
|
|
266
|
+
and reasoning_locked
|
|
267
|
+
and tools_locked
|
|
268
|
+
and max_ctx_locked
|
|
269
|
+
and suitability_locked
|
|
270
|
+
):
|
|
246
271
|
break
|
|
247
272
|
|
|
248
273
|
return ResolvedCapabilities(
|
|
@@ -250,6 +275,7 @@ class CapabilityRegistry:
|
|
|
250
275
|
reasoning_passthrough=resolved_reasoning,
|
|
251
276
|
tools=resolved_tools,
|
|
252
277
|
max_context_tokens=resolved_max_ctx,
|
|
278
|
+
claude_code_suitability=resolved_suitability,
|
|
253
279
|
)
|
|
254
280
|
|
|
255
281
|
# ------------------------------------------------------------------
|
coderouter/config/loader.py
CHANGED
|
@@ -49,8 +49,35 @@ def load_config(path: str | os.PathLike[str] | None = None) -> CodeRouterConfig:
|
|
|
49
49
|
# fail can be rescued by an explicit env-set mode, and (b) the model-
|
|
50
50
|
# validator's "default_profile must exist in profiles" check applies to the
|
|
51
51
|
# *effective* mode the engine will see, not the pre-override YAML value.
|
|
52
|
+
#
|
|
53
|
+
# v1.8.0+: also resolve env_mode through ``mode_aliases`` before assigning,
|
|
54
|
+
# so that startup-time ``--mode coding`` (env CODEROUTER_MODE=coding)
|
|
55
|
+
# behaves symmetrically with the runtime ``X-CodeRouter-Mode: coding``
|
|
56
|
+
# header — both should accept short intent names like ``coding`` /
|
|
57
|
+
# ``general`` / ``reasoning`` and resolve them to the underlying profile
|
|
58
|
+
# (e.g. ``claude-code-nim`` in providers.nvidia-nim.yaml). Without this,
|
|
59
|
+
# users on the NIM example yaml hit
|
|
60
|
+
# "default_profile 'coding' is not declared in profiles:
|
|
61
|
+
# known=['claude-code-nim', ...]"
|
|
62
|
+
# because mode_aliases only fired at request time, not at startup.
|
|
52
63
|
env_mode = os.environ.get("CODEROUTER_MODE", "").strip()
|
|
53
64
|
if env_mode:
|
|
65
|
+
# Pre-validation alias resolution: if env_mode isn't directly a
|
|
66
|
+
# profile name but matches an entry in raw["mode_aliases"], swap it
|
|
67
|
+
# for the underlying profile name. This avoids forcing every example
|
|
68
|
+
# yaml to mirror the v1.8.0 four-profile names (multi/coding/general
|
|
69
|
+
# /reasoning) just to accept the canonical short --mode flags.
|
|
70
|
+
raw_profiles = raw.get("profiles", []) or []
|
|
71
|
+
profile_names = {
|
|
72
|
+
p.get("name") for p in raw_profiles if isinstance(p, dict)
|
|
73
|
+
}
|
|
74
|
+
raw_aliases = raw.get("mode_aliases", {}) or {}
|
|
75
|
+
if (
|
|
76
|
+
env_mode not in profile_names
|
|
77
|
+
and isinstance(raw_aliases, dict)
|
|
78
|
+
and env_mode in raw_aliases
|
|
79
|
+
):
|
|
80
|
+
env_mode = raw_aliases[env_mode]
|
|
54
81
|
raw["default_profile"] = env_mode
|
|
55
82
|
|
|
56
83
|
config = CodeRouterConfig.model_validate(raw)
|
|
@@ -31,6 +31,11 @@
|
|
|
31
31
|
# reasoning_passthrough: bool — opt OUT of the adapter's passive `reasoning` strip
|
|
32
32
|
# tools: bool — upstream reliably emits tool_calls
|
|
33
33
|
# max_context_tokens: int — declared model context window
|
|
34
|
+
# claude_code_suitability: str — "ok" | "degraded". Hint for use behind
|
|
35
|
+
# Claude Code's agentic-coding harness;
|
|
36
|
+
# "degraded" triggers a startup WARN when
|
|
37
|
+
# the provider is on a `claude-code-*`
|
|
38
|
+
# chain. See docs/troubleshooting.md §4-1.
|
|
34
39
|
#
|
|
35
40
|
# First-match semantics: rules within a file are evaluated top-to-bottom
|
|
36
41
|
# per flag; the first rule whose glob matches AND declares that flag
|
|
@@ -84,3 +89,253 @@ rules:
|
|
|
84
89
|
kind: anthropic
|
|
85
90
|
capabilities:
|
|
86
91
|
thinking: true
|
|
92
|
+
|
|
93
|
+
# ------------------------------------------------------------------
|
|
94
|
+
# Claude Code suitability — agentic harness compatibility hint (v1.7-B).
|
|
95
|
+
#
|
|
96
|
+
# "degraded" = the model over-eagerly invokes tools / skills when given
|
|
97
|
+
# Claude Code's system prompt, even for trivial small talk. Concretely,
|
|
98
|
+
# Llama-3.3-70B (verified 2026-04-24 against NVIDIA NIM) rewrites
|
|
99
|
+
# ``こんにちは`` into ``Skill(hello)`` invocations and fabricates
|
|
100
|
+
# ``AskUserQuestion("What is your name?")`` elicitations — see
|
|
101
|
+
# docs/articles/note-nvidia-nim.md §6-2 + docs/troubleshooting.md §4-1.
|
|
102
|
+
#
|
|
103
|
+
# Glob coverage: NIM uses ``meta/llama-3.3-70b-instruct``, OpenRouter
|
|
104
|
+
# uses ``meta-llama/llama-3.3-70b-instruct``, some local servers use
|
|
105
|
+
# ``Llama-3.3-70B-Instruct``. fnmatch is case-sensitive so we declare
|
|
106
|
+
# both common case-variants explicitly. The leading ``*`` wildcard
|
|
107
|
+
# absorbs any vendor-prefix slug (``meta/`` / ``meta-llama/`` / etc.).
|
|
108
|
+
#
|
|
109
|
+
# An operator who has tuned their Llama-3.3 deployment (custom system
|
|
110
|
+
# prompt, tool whitelist, etc.) can opt out via
|
|
111
|
+
# ``~/.coderouter/model-capabilities.yaml`` with the matching glob and
|
|
112
|
+
# ``claude_code_suitability: ok``.
|
|
113
|
+
# ------------------------------------------------------------------
|
|
114
|
+
|
|
115
|
+
- match: "*llama-3.3-70b*"
|
|
116
|
+
kind: openai_compat
|
|
117
|
+
capabilities:
|
|
118
|
+
claude_code_suitability: degraded
|
|
119
|
+
|
|
120
|
+
- match: "*Llama-3.3-70B*"
|
|
121
|
+
kind: openai_compat
|
|
122
|
+
capabilities:
|
|
123
|
+
claude_code_suitability: degraded
|
|
124
|
+
|
|
125
|
+
# ------------------------------------------------------------------
|
|
126
|
+
# Qwen3-Coder family — agentic coding 専用設計 (v1.7-B 追加)
|
|
127
|
+
#
|
|
128
|
+
# Alibaba の Qwen3-Coder series は agentic coding と tool use を
|
|
129
|
+
# 主目的に学習されており、Claude Sonnet の tool-call 行動に最も近い
|
|
130
|
+
# ローカル/オープン代替として知られています (note 記事 + r/LocalLLaMA
|
|
131
|
+
# 2026-04 Megathread コミュニティ評)。
|
|
132
|
+
#
|
|
133
|
+
# ここで `tools: true` を先回り宣言することで、providers.yaml 側で
|
|
134
|
+
# 個別に capabilities.tools: true を書かなくても tool-call 経路が
|
|
135
|
+
# 有効になります。`claude_code_suitability: ok` も併せて宣言、
|
|
136
|
+
# claude-code-* プロファイル startup check (v1.7-B) で degraded 警告が
|
|
137
|
+
# 出ないことを保証。
|
|
138
|
+
#
|
|
139
|
+
# glob 範囲 (case-sensitive — 大文字版も併記):
|
|
140
|
+
# Ollama tag : qwen3-coder:* (例: qwen3-coder:30b-a3b)
|
|
141
|
+
# NIM slug : qwen/qwen3-coder-* (例: qwen/qwen3-coder-480b-a35b-instruct)
|
|
142
|
+
# OpenRouter slug : qwen/qwen3-coder* (例: qwen/qwen3-coder:free)
|
|
143
|
+
# HF GGUF (Ollama) : hf.co/*/Qwen3-Coder-*-GGUF* (大文字)
|
|
144
|
+
# ------------------------------------------------------------------
|
|
145
|
+
|
|
146
|
+
- match: "qwen3-coder:*"
|
|
147
|
+
kind: openai_compat
|
|
148
|
+
capabilities:
|
|
149
|
+
tools: true
|
|
150
|
+
claude_code_suitability: ok
|
|
151
|
+
|
|
152
|
+
- match: "qwen/qwen3-coder-*"
|
|
153
|
+
kind: openai_compat
|
|
154
|
+
capabilities:
|
|
155
|
+
tools: true
|
|
156
|
+
claude_code_suitability: ok
|
|
157
|
+
|
|
158
|
+
- match: "qwen/qwen3-coder*"
|
|
159
|
+
kind: openai_compat
|
|
160
|
+
capabilities:
|
|
161
|
+
tools: true
|
|
162
|
+
claude_code_suitability: ok
|
|
163
|
+
|
|
164
|
+
- match: "*Qwen3-Coder-*"
|
|
165
|
+
kind: openai_compat
|
|
166
|
+
capabilities:
|
|
167
|
+
tools: true
|
|
168
|
+
claude_code_suitability: ok
|
|
169
|
+
|
|
170
|
+
# ------------------------------------------------------------------
|
|
171
|
+
# Qwen3.6 family (v1.7-B 追加、v1.8.1 で suitability 撤回)
|
|
172
|
+
#
|
|
173
|
+
# 2026-04 リリースの Qwen3.6 シリーズ。Ollama 公式 tag は
|
|
174
|
+
# qwen3.6:27b / qwen3.6:35b、metadata 上は tools+vision+thinking 対応、
|
|
175
|
+
# 256K context を宣言。note 記事 (r/LocalLLaMA 2026-04 Megathread) で
|
|
176
|
+
# 「Claude Code 代替として最高」「local champ」と評価されている。
|
|
177
|
+
#
|
|
178
|
+
# ただし v1.8.0 までで `claude_code_suitability: ok` を declare していた
|
|
179
|
+
# のは note 記事の伝聞ベースの先回り宣言で、v1.8.1 (2026-04-26) の
|
|
180
|
+
# 実機検証 (M3 Max 32GB / Ollama 0.21.2) で次の課題が判明:
|
|
181
|
+
# - num_ctx を declare 32768 しても Ollama 側で silent に縮められる
|
|
182
|
+
# (canary echo-back probe 失敗)
|
|
183
|
+
# - tool_calls probe が native tool_calls / 修復可能 JSON のいずれも
|
|
184
|
+
# 返さず NEEDS_TUNING
|
|
185
|
+
# - streaming probe が finish_reason='length' で 0 chars 打ち切り
|
|
186
|
+
# これらは Ollama 経由特有の問題で、HF / vLLM 直接ロードなら違う可能性。
|
|
187
|
+
# 確証ない以上、`claude_code_suitability` は撤回し `tools` 宣言だけ残す。
|
|
188
|
+
# 実機で動いたユーザーは `~/.coderouter/model-capabilities.yaml` で
|
|
189
|
+
# `claude_code_suitability: ok` を上書きできる。
|
|
190
|
+
# ------------------------------------------------------------------
|
|
191
|
+
|
|
192
|
+
- match: "qwen3.6:*"
|
|
193
|
+
kind: openai_compat
|
|
194
|
+
capabilities:
|
|
195
|
+
tools: true
|
|
196
|
+
|
|
197
|
+
- match: "qwen/qwen3.6-*"
|
|
198
|
+
kind: openai_compat
|
|
199
|
+
capabilities:
|
|
200
|
+
tools: true
|
|
201
|
+
|
|
202
|
+
# ------------------------------------------------------------------
|
|
203
|
+
# Gemma 4 family (v1.7-B 追加)
|
|
204
|
+
#
|
|
205
|
+
# Google 公式 Gemma 4。Ollama 公式 tag は gemma4:e2b / e4b / 26b / 31b、
|
|
206
|
+
# 全 variant が tools+vision+thinking 対応、E2B/E4B は audio もサポート。
|
|
207
|
+
# MoE (26b は active 3.8B / total 25.2B)。note 記事で「日常・バランスの
|
|
208
|
+
# 王者」と評価。Claude Haiku 互換性に近い簡潔な応答スタイル。
|
|
209
|
+
# ------------------------------------------------------------------
|
|
210
|
+
|
|
211
|
+
- match: "gemma4:*"
|
|
212
|
+
kind: openai_compat
|
|
213
|
+
capabilities:
|
|
214
|
+
tools: true
|
|
215
|
+
|
|
216
|
+
- match: "google/gemma-4*"
|
|
217
|
+
kind: openai_compat
|
|
218
|
+
capabilities:
|
|
219
|
+
tools: true
|
|
220
|
+
|
|
221
|
+
# ------------------------------------------------------------------
|
|
222
|
+
# GLM family (Z.AI / Zhipu AI、v1.7-B 追加)
|
|
223
|
+
#
|
|
224
|
+
# Z.AI の OpenAI-compat エンドポイントから利用する GLM-4.x / 5.x 系列。
|
|
225
|
+
# モデル名 slug は **大文字必須** (Cursor 等のドキュメント明記)。
|
|
226
|
+
# tools / vision 対応、Coding Plan の API 経由でも General API 経由でも
|
|
227
|
+
# 同じモデルが利用可能。
|
|
228
|
+
#
|
|
229
|
+
# GLM-5.1 / GLM-5-Turbo: Opus 級フラッグシップ
|
|
230
|
+
# GLM-4.7: Sonnet/Opus 級、Coding Plan のデフォルト
|
|
231
|
+
# GLM-4.5-Air: Haiku 級、軽量・高速
|
|
232
|
+
#
|
|
233
|
+
# note 記事は「intent 理解が Claude Opus 級」と評価。reasoning 用途に
|
|
234
|
+
# 特に向く。
|
|
235
|
+
# ------------------------------------------------------------------
|
|
236
|
+
|
|
237
|
+
- match: "GLM-5*"
|
|
238
|
+
kind: openai_compat
|
|
239
|
+
capabilities:
|
|
240
|
+
tools: true
|
|
241
|
+
|
|
242
|
+
- match: "GLM-4.[5-9]*"
|
|
243
|
+
kind: openai_compat
|
|
244
|
+
capabilities:
|
|
245
|
+
tools: true
|
|
246
|
+
|
|
247
|
+
# ------------------------------------------------------------------
|
|
248
|
+
# Kimi K2 family (Moonshot AI、v1.8.0 追加)
|
|
249
|
+
#
|
|
250
|
+
# NVIDIA NIM 経由で実機検証済み (2026-04-23) の tool-capable モデル。
|
|
251
|
+
# examples/providers.nvidia-nim.yaml の `nim-kimi-k2` / `nim-kimi-k2-thinking`
|
|
252
|
+
# で運用実績あり。Unsloth tool-calling guide にも tool calling 対応モデル
|
|
253
|
+
# として掲載 (Kimi K2.5 / K2 Thinking)。providers.yaml 側で個別の
|
|
254
|
+
# `capabilities.tools: true` 宣言を省略可能にするのが目的。
|
|
255
|
+
# ------------------------------------------------------------------
|
|
256
|
+
|
|
257
|
+
- match: "moonshotai/kimi-k2*"
|
|
258
|
+
kind: openai_compat
|
|
259
|
+
capabilities:
|
|
260
|
+
tools: true
|
|
261
|
+
|
|
262
|
+
- match: "moonshotai/Kimi-K2*"
|
|
263
|
+
kind: openai_compat
|
|
264
|
+
capabilities:
|
|
265
|
+
tools: true
|
|
266
|
+
|
|
267
|
+
# ------------------------------------------------------------------
|
|
268
|
+
# gpt-oss family (OpenAI 117B MoE オープンウェイト、v1.8.0 追加)
|
|
269
|
+
#
|
|
270
|
+
# OpenRouter free 経由で実機検証済み (`openai/gpt-oss-120b:free` を
|
|
271
|
+
# examples/providers.yaml の `openrouter-gpt-oss-free` で運用)。
|
|
272
|
+
# native tool calling 設計、131K context、Unsloth tool-calling guide
|
|
273
|
+
# にも tool calling 対応モデルとして掲載 (gpt-oss)。
|
|
274
|
+
# ------------------------------------------------------------------
|
|
275
|
+
|
|
276
|
+
- match: "openai/gpt-oss-*"
|
|
277
|
+
kind: openai_compat
|
|
278
|
+
capabilities:
|
|
279
|
+
tools: true
|
|
280
|
+
|
|
281
|
+
- match: "gpt-oss-*"
|
|
282
|
+
kind: openai_compat
|
|
283
|
+
capabilities:
|
|
284
|
+
tools: true
|
|
285
|
+
|
|
286
|
+
# ------------------------------------------------------------------
|
|
287
|
+
# 先回り宣言 family (Unsloth tool-calling guide 掲載、v1.8.0 追加)
|
|
288
|
+
#
|
|
289
|
+
# Unsloth のローカル LLM tool-calling ガイド
|
|
290
|
+
# (https://unsloth.ai/docs/jp/ji-ben/tool-calling-guide-for-local-llms)
|
|
291
|
+
# で tool-calling 対応モデルとして掲載されているが、CodeRouter 側で
|
|
292
|
+
# 実機検証は未実施。tools=true の事前宣言だけ入れて、providers.yaml で
|
|
293
|
+
# これらを使う際の `capabilities.tools: true` 明示宣言を不要にする。
|
|
294
|
+
# claude_code_suitability は実機検証後に追加判断 — それまでは "意見なし"。
|
|
295
|
+
# 不具合があれば user-side の `~/.coderouter/model-capabilities.yaml` で
|
|
296
|
+
# `tools: false` を declare して上書き可能 (first-match-per-flag)。
|
|
297
|
+
# ------------------------------------------------------------------
|
|
298
|
+
|
|
299
|
+
# DeepSeek-V3.x — DeepSeek-AI の主力 (V3.1 / V3.2 等)
|
|
300
|
+
- match: "deepseek-ai/DeepSeek-V3*"
|
|
301
|
+
kind: openai_compat
|
|
302
|
+
capabilities:
|
|
303
|
+
tools: true
|
|
304
|
+
|
|
305
|
+
- match: "deepseek/deepseek-v3*"
|
|
306
|
+
kind: openai_compat
|
|
307
|
+
capabilities:
|
|
308
|
+
tools: true
|
|
309
|
+
|
|
310
|
+
# MiniMax — MiniMaxAI の MoE 系
|
|
311
|
+
- match: "MiniMaxAI/MiniMax-*"
|
|
312
|
+
kind: openai_compat
|
|
313
|
+
capabilities:
|
|
314
|
+
tools: true
|
|
315
|
+
|
|
316
|
+
- match: "minimax/minimax-*"
|
|
317
|
+
kind: openai_compat
|
|
318
|
+
capabilities:
|
|
319
|
+
tools: true
|
|
320
|
+
|
|
321
|
+
# NVIDIA Nemotron 3 — Nano 系の小型モデル
|
|
322
|
+
- match: "nvidia/nemotron-3-*"
|
|
323
|
+
kind: openai_compat
|
|
324
|
+
capabilities:
|
|
325
|
+
tools: true
|
|
326
|
+
|
|
327
|
+
- match: "nvidia/Nemotron-3-*"
|
|
328
|
+
kind: openai_compat
|
|
329
|
+
capabilities:
|
|
330
|
+
tools: true
|
|
331
|
+
|
|
332
|
+
# Devstral 2 — Mistral AI の coding 特化 fine-tune
|
|
333
|
+
- match: "mistralai/Devstral-*"
|
|
334
|
+
kind: openai_compat
|
|
335
|
+
capabilities:
|
|
336
|
+
tools: true
|
|
337
|
+
|
|
338
|
+
- match: "mistral/devstral*"
|
|
339
|
+
kind: openai_compat
|
|
340
|
+
capabilities:
|
|
341
|
+
tools: true
|