coderouter-cli 1.8.0__tar.gz → 1.8.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/CHANGELOG.md +73 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/PKG-INFO +1 -1
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/config/loader.py +27 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/data/model-capabilities.yaml +113 -7
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/hf-ollama-models.md +2 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/troubleshooting.en.md +4 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/troubleshooting.md +60 -1
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/examples/providers.nvidia-nim.yaml +27 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/examples/providers.yaml +26 -7
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/pyproject.toml +1 -1
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_capability_registry.py +46 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_config.py +63 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/.gitignore +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/LICENSE +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/README.en.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/README.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/__init__.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/__main__.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/adapters/__init__.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/adapters/anthropic_native.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/adapters/base.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/adapters/openai_compat.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/adapters/registry.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/cli.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/cli_stats.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/config/__init__.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/config/capability_registry.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/config/env_file.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/config/schemas.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/data/__init__.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/doctor.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/doctor_apply.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/env_security.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/errors.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/ingress/__init__.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/ingress/anthropic_routes.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/ingress/app.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/ingress/dashboard_routes.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/ingress/metrics_routes.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/ingress/openai_routes.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/logging.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/metrics/__init__.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/metrics/collector.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/metrics/prometheus.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/output_filters.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/routing/__init__.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/routing/auto_router.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/routing/capability.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/routing/fallback.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/translation/__init__.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/translation/anthropic.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/translation/convert.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/coderouter/translation/tool_repair.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/assets/dashboard-demo.png +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/designs/v1.5-dashboard-mockup.html +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/designs/v1.6-auto-router-verification.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/designs/v1.6-auto-router.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/free-tier-guide.en.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/free-tier-guide.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/openrouter-roster/README.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/openrouter-roster/latest.json +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/quickstart.en.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/quickstart.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/retrospectives/v0.4.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/retrospectives/v0.5-verify.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/retrospectives/v0.5.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/retrospectives/v0.6.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/retrospectives/v0.7.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/retrospectives/v1.0-verify.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/retrospectives/v1.0.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/security.en.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/security.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/usage-guide.en.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/usage-guide.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/when-do-i-need-coderouter.en.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/docs/when-do-i-need-coderouter.md +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/examples/.env.example +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/examples/providers.auto-custom.yaml +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/examples/providers.auto.yaml +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/examples/providers.note-2026.yaml +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/scripts/demo_traffic.sh +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/scripts/openrouter_roster_diff.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/scripts/verify_v0_5.sh +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/scripts/verify_v1_0.sh +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/__init__.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/conftest.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_adapter_anthropic.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_auto_router.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_capability.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_capability_degraded_payload.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_claude_code_suitability.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_cli.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_cli_stats.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_dashboard_endpoint.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_doctor.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_doctor_apply.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_env_file.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_env_security.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_errors.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_examples_yaml.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_fallback.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_fallback_anthropic.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_fallback_cache_control.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_fallback_misconfig_warn.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_fallback_paid_gate.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_fallback_thinking.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_ingress_anthropic.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_ingress_profile.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_metrics_collector.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_metrics_endpoint.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_metrics_jsonl.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_metrics_prometheus.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_openai_compat.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_openrouter_roster_diff.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_output_filters.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_output_filters_adapters.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_reasoning_strip.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_setup_sh.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_tool_repair.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_translation_anthropic.py +0 -0
- {coderouter_cli-1.8.0 → coderouter_cli-1.8.1}/tests/test_translation_reverse.py +0 -0
|
@@ -6,6 +6,79 @@ versioning follows [SemVer](https://semver.org/).
|
|
|
6
6
|
|
|
7
7
|
---
|
|
8
8
|
|
|
9
|
+
## [v1.8.1] — 2026-04-26 (実機検証反映 patch — mode_aliases 解決 + Gemma 4 第一候補化 + Ollama 既知問題ドキュメント化)
|
|
10
|
+
|
|
11
|
+
**Theme: v1.8.0 出荷直後の実機検証 (M3 Max 32GB / Ollama 0.21.2) で踏んだ問題 3 件を patch で解消。**
|
|
12
|
+
|
|
13
|
+
v1.8.0 の用途別 4 プロファイルが、NIM example yaml ベースで運用しているユーザーで `coderouter serve --mode coding` が **`default_profile 'coding' is not declared in profiles`** エラーで起動失敗する loader bug が判明。あわせて `coding` profile の primary に置いていた Qwen3.6:27b/35b が Ollama 経由で実用厳しい (num_ctx silent cap / tool_calls 0 / streaming 0 chars) ことも実機検証で確認。**「note 記事や HF 評判が高くても Ollama 経由ですぐ動くとは限らない」現実**を troubleshooting.md §4-2 として明文化。
|
|
14
|
+
|
|
15
|
+
- Tests: 729 → **730** (+1: loader の mode_aliases 解決テスト)
|
|
16
|
+
- Runtime deps: 5 → 5 (19 sub-release 連続据え置き)
|
|
17
|
+
- Backward compat: 完全互換、`providers.yaml` 編集不要 (loader が alias 経由で解決)
|
|
18
|
+
|
|
19
|
+
### Changes
|
|
20
|
+
|
|
21
|
+
#### Bug fixes (実機検証で踏んだもの)
|
|
22
|
+
|
|
23
|
+
- **`coderouter/config/loader.py`**: `CODEROUTER_MODE` env (= `--mode` CLI) が **`mode_aliases` を解決せず直接 `default_profile` に代入** していた v0.6-A の素朴実装を修正。runtime の `X-CodeRouter-Mode` ヘッダ (v0.6-D) は alias 解決していたので、startup と runtime で semantic 非対称だった。v1.8.1 で env_mode を `mode_aliases` 経由で解決してから `default_profile` 代入する流れに揃え、両者を symmetric に。これで `cr serve --mode coding` が NIM example yaml (profiles=`[claude-code-nim, ...]`、mode_aliases=`{coding: claude-code-nim}`) でも validation エラーにならず起動する
|
|
24
|
+
- **`examples/providers.nvidia-nim.yaml`**: v1.8.0 で main `providers.yaml` に追加した `mode_aliases` (default/coding/general/multi/reasoning/fast/cheap/think/vision) を NIM example yaml にも追加。NIM ユーザーも `--mode coding|general|reasoning|multi` を canonical な短縮 alias として使えるように
|
|
25
|
+
|
|
26
|
+
#### `coding` profile primary を実機検証反映に調整
|
|
27
|
+
|
|
28
|
+
- **`examples/providers.yaml`**: `coding` profile の providers リスト先頭を Qwen3.6:35b/27b → **`ollama-qwen-coder-14b` / `ollama-gemma4-26b` / `ollama-qwen-coder-7b` / `ollama-qwen3-coder-30b`** の順に変更。Qwen3.6 系は末尾退避線にコメントアウトで降格 (LM/llama.cpp が後日対応強化されたら primary に戻す候補として残置)。順序原則「枯れて確実に動くもの」を上に、note 推奨の新しいものは安定確認後に昇格、を反映
|
|
29
|
+
- **`coderouter/data/model-capabilities.yaml`**: `qwen3.6:*` / `qwen/qwen3.6-*` の `claude_code_suitability: ok` を**撤回**。v1.7-B 追加時は note 記事の伝聞ベースで先回り宣言していたが、v1.8.1 実機検証で num_ctx / tool_calls / streaming すべて NEEDS_TUNING 確認、確証ない以上 `tools` 宣言だけ残して suitability は出さない方針に。実機で動いた人は `~/.coderouter/model-capabilities.yaml` で `claude_code_suitability: ok` を user-side override 可能 (registry の first-match-per-flag walk が user → bundled の順序なので)
|
|
30
|
+
|
|
31
|
+
#### Documentation: 実機 Ollama 運用の Known Issues 追加
|
|
32
|
+
|
|
33
|
+
- **`docs/troubleshooting.md` §4-2 新設「ローカル Ollama 経由で踏みやすい既知問題」**:
|
|
34
|
+
- **§4-2-A**: Qwen3.6:27b/35b が Ollama 0.21.2 経由で実用厳しい (num_ctx silent cap / tool_calls 0 / streaming 0)、`/no_think` でも改善せず。回避は Gemma 4 / Qwen2.5-Coder を上位に
|
|
35
|
+
- **§4-2-B**: Qwen3.5 系 HF 蒸留モデル (Qwopus3.5 等) は llama.cpp が `qwen35` architecture (hybrid Transformer-SSM) 未対応で `unable to load model` 500 エラー。フレームワーク本体の対応待ち
|
|
36
|
+
- **§4-2-C**: Gemma 4 26B が無加工で tool_calls OK 確認、note 記事「日常の王者」評価が裏付け
|
|
37
|
+
- **§4-2-D**: ベスト実践「枯れたモデル + 観測ツール (doctor)」、HF で見つけた新モデルは `ollama run` → server log で `unknown model architecture` 確認、出たら今は諦め
|
|
38
|
+
|
|
39
|
+
### Why
|
|
40
|
+
|
|
41
|
+
v1.8.0 出荷で「用途別 4 プロファイルで `--mode coding` が使える」と謳ったが、NIM example yaml ベースのユーザーが踏むことが分かった loader bug は**最初の実プロンプト到達前に validation で死ぬ**ので最重要修正。あわせて、v1.8.0 example の primary に置いていた Qwen3.6 系列が実機で 3 つの probe NEEDS_TUNING を出すこと、Qwen3.5 ベース HF 蒸留が llama.cpp 未対応であることは、**「先回り実装より実機 evidence」原則** (plan.md §5.4) を再確認させる結果。
|
|
42
|
+
|
|
43
|
+
### Migration
|
|
44
|
+
|
|
45
|
+
`pyproject.toml version 1.8.0 → 1.8.1`、`coderouter --version` は 1.8.1 を返す。**手元の `~/.coderouter/providers.yaml` は触らない限り完全に変化なし**。
|
|
46
|
+
|
|
47
|
+
NIM example ベースで `cr serve --mode coding` が動かなかったユーザーは、
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
# 最新 example をコピー (v1.8.1 で mode_aliases 追加済み)
|
|
51
|
+
cp examples/providers.nvidia-nim.yaml ~/.coderouter/providers.yaml
|
|
52
|
+
# あるいは手で mode_aliases セクションを既存ファイルに追加
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
または、`cr` をローカル開発版から再 install:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
uv tool install --reinstall --force --from /path/to/CodeRouter coderouter-cli --with ruamel.yaml
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Real-machine verification
|
|
62
|
+
|
|
63
|
+
```
|
|
64
|
+
$ pytest -q
|
|
65
|
+
730 passed, 1 skipped in 1.86s
|
|
66
|
+
|
|
67
|
+
$ ruff check coderouter/ tests/
|
|
68
|
+
All checks passed!
|
|
69
|
+
|
|
70
|
+
$ cr serve --port 8088 --mode coding # NIM example yaml でも起動成功
|
|
71
|
+
$ cr doctor --check-model ollama-gemma4-26b --apply # tool_calls OK 確認済み
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Out of scope / 次回送り
|
|
75
|
+
|
|
76
|
+
- Qwen3.5 系 HF 蒸留 (Qwopus / 類似): llama.cpp が `qwen35` architecture を実装したら再評価
|
|
77
|
+
- Qwen3.6:27b/35b の Ollama 経由動作: Ollama / llama.cpp 側の改善があれば再評価、`claude_code_suitability` を再付与の検討
|
|
78
|
+
- v1.7-C 候補 (network audit / launcher / 起動時 update check) は引き続き需要待ち
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
9
82
|
## [v1.8.0] — 2026-04-26 (用途別 4 プロファイル + GLM/Gemma 4/Qwen3.6 公式化 + apply 自動化)
|
|
10
83
|
|
|
11
84
|
**Theme: 「Claude Code で意味合いがズレない代替モデル」を operator に渡す minor。** plan.md §11.B (v1.7-B umbrella) を 6 タスクで一気に消化:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coderouter-cli
|
|
3
|
-
Version: 1.8.
|
|
3
|
+
Version: 1.8.1
|
|
4
4
|
Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
|
|
5
5
|
Project-URL: Homepage, https://github.com/zephel01/CodeRouter
|
|
6
6
|
Project-URL: Repository, https://github.com/zephel01/CodeRouter
|
|
@@ -49,8 +49,35 @@ def load_config(path: str | os.PathLike[str] | None = None) -> CodeRouterConfig:
|
|
|
49
49
|
# fail can be rescued by an explicit env-set mode, and (b) the model-
|
|
50
50
|
# validator's "default_profile must exist in profiles" check applies to the
|
|
51
51
|
# *effective* mode the engine will see, not the pre-override YAML value.
|
|
52
|
+
#
|
|
53
|
+
# v1.8.0+: also resolve env_mode through ``mode_aliases`` before assigning,
|
|
54
|
+
# so that startup-time ``--mode coding`` (env CODEROUTER_MODE=coding)
|
|
55
|
+
# behaves symmetrically with the runtime ``X-CodeRouter-Mode: coding``
|
|
56
|
+
# header — both should accept short intent names like ``coding`` /
|
|
57
|
+
# ``general`` / ``reasoning`` and resolve them to the underlying profile
|
|
58
|
+
# (e.g. ``claude-code-nim`` in providers.nvidia-nim.yaml). Without this,
|
|
59
|
+
# users on the NIM example yaml hit
|
|
60
|
+
# "default_profile 'coding' is not declared in profiles:
|
|
61
|
+
# known=['claude-code-nim', ...]"
|
|
62
|
+
# because mode_aliases only fired at request time, not at startup.
|
|
52
63
|
env_mode = os.environ.get("CODEROUTER_MODE", "").strip()
|
|
53
64
|
if env_mode:
|
|
65
|
+
# Pre-validation alias resolution: if env_mode isn't directly a
|
|
66
|
+
# profile name but matches an entry in raw["mode_aliases"], swap it
|
|
67
|
+
# for the underlying profile name. This avoids forcing every example
|
|
68
|
+
# yaml to mirror the v1.8.0 four-profile names (multi/coding/general
|
|
69
|
+
# /reasoning) just to accept the canonical short --mode flags.
|
|
70
|
+
raw_profiles = raw.get("profiles", []) or []
|
|
71
|
+
profile_names = {
|
|
72
|
+
p.get("name") for p in raw_profiles if isinstance(p, dict)
|
|
73
|
+
}
|
|
74
|
+
raw_aliases = raw.get("mode_aliases", {}) or {}
|
|
75
|
+
if (
|
|
76
|
+
env_mode not in profile_names
|
|
77
|
+
and isinstance(raw_aliases, dict)
|
|
78
|
+
and env_mode in raw_aliases
|
|
79
|
+
):
|
|
80
|
+
env_mode = raw_aliases[env_mode]
|
|
54
81
|
raw["default_profile"] = env_mode
|
|
55
82
|
|
|
56
83
|
config = CodeRouterConfig.model_validate(raw)
|
|
@@ -168,26 +168,36 @@ rules:
|
|
|
168
168
|
claude_code_suitability: ok
|
|
169
169
|
|
|
170
170
|
# ------------------------------------------------------------------
|
|
171
|
-
# Qwen3.6 family (v1.7-B
|
|
171
|
+
# Qwen3.6 family (v1.7-B 追加、v1.8.1 で suitability 撤回)
|
|
172
172
|
#
|
|
173
173
|
# 2026-04 リリースの Qwen3.6 シリーズ。Ollama 公式 tag は
|
|
174
|
-
# qwen3.6:27b / qwen3.6:35b
|
|
175
|
-
# 256K context
|
|
176
|
-
# 「Claude Code 代替として最高」「local champ
|
|
177
|
-
#
|
|
174
|
+
# qwen3.6:27b / qwen3.6:35b、metadata 上は tools+vision+thinking 対応、
|
|
175
|
+
# 256K context を宣言。note 記事 (r/LocalLLaMA 2026-04 Megathread) で
|
|
176
|
+
# 「Claude Code 代替として最高」「local champ」と評価されている。
|
|
177
|
+
#
|
|
178
|
+
# ただし v1.8.0 までで `claude_code_suitability: ok` を declare していた
|
|
179
|
+
# のは note 記事の伝聞ベースの先回り宣言で、v1.8.1 (2026-04-26) の
|
|
180
|
+
# 実機検証 (M3 Max 32GB / Ollama 0.21.2) で次の課題が判明:
|
|
181
|
+
# - num_ctx を declare 32768 しても Ollama 側で silent に縮められる
|
|
182
|
+
# (canary echo-back probe 失敗)
|
|
183
|
+
# - tool_calls probe が native tool_calls / 修復可能 JSON のいずれも
|
|
184
|
+
# 返さず NEEDS_TUNING
|
|
185
|
+
# - streaming probe が finish_reason='length' で 0 chars 打ち切り
|
|
186
|
+
# これらは Ollama 経由特有の問題で、HF / vLLM 直接ロードなら違う可能性。
|
|
187
|
+
# 確証ない以上、`claude_code_suitability` は撤回し `tools` 宣言だけ残す。
|
|
188
|
+
# 実機で動いたユーザーは `~/.coderouter/model-capabilities.yaml` で
|
|
189
|
+
# `claude_code_suitability: ok` を上書きできる。
|
|
178
190
|
# ------------------------------------------------------------------
|
|
179
191
|
|
|
180
192
|
- match: "qwen3.6:*"
|
|
181
193
|
kind: openai_compat
|
|
182
194
|
capabilities:
|
|
183
195
|
tools: true
|
|
184
|
-
claude_code_suitability: ok
|
|
185
196
|
|
|
186
197
|
- match: "qwen/qwen3.6-*"
|
|
187
198
|
kind: openai_compat
|
|
188
199
|
capabilities:
|
|
189
200
|
tools: true
|
|
190
|
-
claude_code_suitability: ok
|
|
191
201
|
|
|
192
202
|
# ------------------------------------------------------------------
|
|
193
203
|
# Gemma 4 family (v1.7-B 追加)
|
|
@@ -233,3 +243,99 @@ rules:
|
|
|
233
243
|
kind: openai_compat
|
|
234
244
|
capabilities:
|
|
235
245
|
tools: true
|
|
246
|
+
|
|
247
|
+
# ------------------------------------------------------------------
|
|
248
|
+
# Kimi K2 family (Moonshot AI、v1.8.0 追加)
|
|
249
|
+
#
|
|
250
|
+
# NVIDIA NIM 経由で実機検証済み (2026-04-23) の tool-capable モデル。
|
|
251
|
+
# examples/providers.nvidia-nim.yaml の `nim-kimi-k2` / `nim-kimi-k2-thinking`
|
|
252
|
+
# で運用実績あり。Unsloth tool-calling guide にも tool calling 対応モデル
|
|
253
|
+
# として掲載 (Kimi K2.5 / K2 Thinking)。providers.yaml 側で個別の
|
|
254
|
+
# `capabilities.tools: true` 宣言を省略可能にするのが目的。
|
|
255
|
+
# ------------------------------------------------------------------
|
|
256
|
+
|
|
257
|
+
- match: "moonshotai/kimi-k2*"
|
|
258
|
+
kind: openai_compat
|
|
259
|
+
capabilities:
|
|
260
|
+
tools: true
|
|
261
|
+
|
|
262
|
+
- match: "moonshotai/Kimi-K2*"
|
|
263
|
+
kind: openai_compat
|
|
264
|
+
capabilities:
|
|
265
|
+
tools: true
|
|
266
|
+
|
|
267
|
+
# ------------------------------------------------------------------
|
|
268
|
+
# gpt-oss family (OpenAI 117B MoE オープンウェイト、v1.8.0 追加)
|
|
269
|
+
#
|
|
270
|
+
# OpenRouter free 経由で実機検証済み (`openai/gpt-oss-120b:free` を
|
|
271
|
+
# examples/providers.yaml の `openrouter-gpt-oss-free` で運用)。
|
|
272
|
+
# native tool calling 設計、131K context、Unsloth tool-calling guide
|
|
273
|
+
# にも tool calling 対応モデルとして掲載 (gpt-oss)。
|
|
274
|
+
# ------------------------------------------------------------------
|
|
275
|
+
|
|
276
|
+
- match: "openai/gpt-oss-*"
|
|
277
|
+
kind: openai_compat
|
|
278
|
+
capabilities:
|
|
279
|
+
tools: true
|
|
280
|
+
|
|
281
|
+
- match: "gpt-oss-*"
|
|
282
|
+
kind: openai_compat
|
|
283
|
+
capabilities:
|
|
284
|
+
tools: true
|
|
285
|
+
|
|
286
|
+
# ------------------------------------------------------------------
|
|
287
|
+
# 先回り宣言 family (Unsloth tool-calling guide 掲載、v1.8.0 追加)
|
|
288
|
+
#
|
|
289
|
+
# Unsloth のローカル LLM tool-calling ガイド
|
|
290
|
+
# (https://unsloth.ai/docs/jp/ji-ben/tool-calling-guide-for-local-llms)
|
|
291
|
+
# で tool-calling 対応モデルとして掲載されているが、CodeRouter 側で
|
|
292
|
+
# 実機検証は未実施。tools=true の事前宣言だけ入れて、providers.yaml で
|
|
293
|
+
# これらを使う際の `capabilities.tools: true` 明示宣言を不要にする。
|
|
294
|
+
# claude_code_suitability は実機検証後に追加判断 — それまでは "意見なし"。
|
|
295
|
+
# 不具合があれば user-side の `~/.coderouter/model-capabilities.yaml` で
|
|
296
|
+
# `tools: false` を declare して上書き可能 (first-match-per-flag)。
|
|
297
|
+
# ------------------------------------------------------------------
|
|
298
|
+
|
|
299
|
+
# DeepSeek-V3.x — DeepSeek-AI の主力 (V3.1 / V3.2 等)
|
|
300
|
+
- match: "deepseek-ai/DeepSeek-V3*"
|
|
301
|
+
kind: openai_compat
|
|
302
|
+
capabilities:
|
|
303
|
+
tools: true
|
|
304
|
+
|
|
305
|
+
- match: "deepseek/deepseek-v3*"
|
|
306
|
+
kind: openai_compat
|
|
307
|
+
capabilities:
|
|
308
|
+
tools: true
|
|
309
|
+
|
|
310
|
+
# MiniMax — MiniMaxAI の MoE 系
|
|
311
|
+
- match: "MiniMaxAI/MiniMax-*"
|
|
312
|
+
kind: openai_compat
|
|
313
|
+
capabilities:
|
|
314
|
+
tools: true
|
|
315
|
+
|
|
316
|
+
- match: "minimax/minimax-*"
|
|
317
|
+
kind: openai_compat
|
|
318
|
+
capabilities:
|
|
319
|
+
tools: true
|
|
320
|
+
|
|
321
|
+
# NVIDIA Nemotron 3 — Nano 系の小型モデル
|
|
322
|
+
- match: "nvidia/nemotron-3-*"
|
|
323
|
+
kind: openai_compat
|
|
324
|
+
capabilities:
|
|
325
|
+
tools: true
|
|
326
|
+
|
|
327
|
+
- match: "nvidia/Nemotron-3-*"
|
|
328
|
+
kind: openai_compat
|
|
329
|
+
capabilities:
|
|
330
|
+
tools: true
|
|
331
|
+
|
|
332
|
+
# Devstral 2 — Mistral AI の coding 特化 fine-tune
|
|
333
|
+
- match: "mistralai/Devstral-*"
|
|
334
|
+
kind: openai_compat
|
|
335
|
+
capabilities:
|
|
336
|
+
tools: true
|
|
337
|
+
|
|
338
|
+
- match: "mistral/devstral*"
|
|
339
|
+
kind: openai_compat
|
|
340
|
+
capabilities:
|
|
341
|
+
tools: true
|
|
@@ -237,6 +237,8 @@ ollama cp hf.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q4_K_M qwen3-coder:30b
|
|
|
237
237
|
|
|
238
238
|
- Ollama HF integration: <https://huggingface.co/docs/hub/en/ollama>
|
|
239
239
|
- Unsloth (高速量子化版の代表的アップローダー): <https://huggingface.co/unsloth>
|
|
240
|
+
- **Unsloth: Tool calling guide for local LLMs (日本語)**: <https://unsloth.ai/docs/jp/ji-ben/tool-calling-guide-for-local-llms>
|
|
241
|
+
— Qwen / Llama / Gemma など local LLM で tool-call が動かない/壊れる原因と対策をモデル別に整理。CodeRouter で `tool_calls: NEEDS_TUNING` が出たときの背景理解にちょうど良い。
|
|
240
242
|
- bartowski (品質重視の量子化版): <https://huggingface.co/bartowski>
|
|
241
243
|
- Qwen3-Coder (Alibaba 公式): <https://huggingface.co/collections/Qwen/qwen3-coder>
|
|
242
244
|
- Gemma 4 (Google 公式): <https://huggingface.co/collections/google/gemma-4>
|
|
@@ -187,6 +187,8 @@ coderouter doctor --check-model <provider>
|
|
|
187
187
|
|
|
188
188
|
With `tools: false` the chain moves on to the next provider when a tool-heavy request arrives. Pair this with a stronger model later in the chain (e.g. qwen2.5-coder:14b or a cloud fallback).
|
|
189
189
|
|
|
190
|
+
> **Further reading**: Unsloth's [Tool calling guide for local LLMs](https://unsloth.ai/docs/jp/ji-ben/tool-calling-guide-for-local-llms) (Japanese) walks through per-model tool-call quirks (Qwen / Llama / Gemma) — a useful background read when CodeRouter's doctor flags `tool_calls: NEEDS_TUNING` and you want to understand why.
|
|
191
|
+
|
|
190
192
|
**3. `<think>...</think>` tags leak into the UI.** Qwen3-distilled models, DeepSeek-R1 distills, and some HF GGUF variants emit chain-of-thought inside the regular content channel (not an Anthropic `thinking` block). The tags land in Claude Code's terminal verbatim.
|
|
191
193
|
|
|
192
194
|
```bash
|
|
@@ -278,6 +280,8 @@ profiles:
|
|
|
278
280
|
|
|
279
281
|
`examples/providers.nvidia-nim.yaml` (v1.6.2 onwards) ships with the Qwen-first ordering. Llama-3.3-70B works fine for many things, but for Claude Code chat traffic specifically, Qwen3-Coder-480B / Kimi-K2 are operationally more stable.
|
|
280
282
|
|
|
283
|
+
> **Background on per-model tool-call behavior**: Llama-3.3-70B's tendency to rewrite plain text into tool calls comes from its aggressive agentic-tuning RLHF signal interacting with Claude Code's system prompt. Unsloth's [Tool calling guide for local LLMs](https://unsloth.ai/docs/jp/ji-ben/tool-calling-guide-for-local-llms) (Japanese) covers this and other model-specific quirks well — useful background for the v1.8.0 `claude_code_suitability` heuristic.
|
|
284
|
+
|
|
281
285
|
### 4-2. `UserPromptSubmit hook error` (third-party Claude Code plugins)
|
|
282
286
|
|
|
283
287
|
```
|
|
@@ -187,6 +187,8 @@ coderouter doctor --check-model <provider>
|
|
|
187
187
|
|
|
188
188
|
`tools: false` にすると、ツール要求リクエスト到来時にチェーンは次のプロバイダに進みます。強いモデル (qwen2.5-coder:14b やクラウドフォールバック) と組み合わせて使ってください。
|
|
189
189
|
|
|
190
|
+
> **補足資料**: モデル別の tool-call 対応状況や、量子化 / システムプロンプト / chat template の落とし穴は Unsloth の [Tool calling guide for local LLMs (日本語)](https://unsloth.ai/docs/jp/ji-ben/tool-calling-guide-for-local-llms) にきれいにまとまっています。Qwen / Llama / Gemma 各系列で tool-call が動かない原因を踏み込んで知りたい人向け。
|
|
191
|
+
|
|
190
192
|
**3. UI に `<think>...</think>` タグが漏れる。** Qwen3 蒸留モデル、DeepSeek-R1 蒸留、一部の HF GGUF 変種は chain-of-thought を Anthropic の `thinking` ブロックではなく通常のコンテンツチャネルに吐きます。タグが Claude Code のターミナルにそのまま出ます。
|
|
191
193
|
|
|
192
194
|
```bash
|
|
@@ -278,7 +280,64 @@ profiles:
|
|
|
278
280
|
|
|
279
281
|
`examples/providers.nvidia-nim.yaml` (v1.6.2 以降) は Qwen-first の順序になっています。Llama-3.3-70B 自体は動作確認済みですが、Claude Code 単独の対話用途では Qwen3-Coder-480B / Kimi-K2 のほうが運用上は安定します。
|
|
280
282
|
|
|
281
|
-
|
|
283
|
+
> **モデル別 tool-call 挙動の深掘り**: Llama-3.3-70B 系の「自然文を tool 呼び出しに変換しがち」性質は、agentic tuning の RLHF signal とシステムプロンプトの相性に起因します。各モデルの傾向と回避策は Unsloth の [Tool calling guide for local LLMs (日本語)](https://unsloth.ai/docs/jp/ji-ben/tool-calling-guide-for-local-llms) が読みやすく、CodeRouter v1.8.0 で導入した `claude_code_suitability` 判定の背景理解にも役立ちます。
|
|
284
|
+
|
|
285
|
+
### 4-2. ローカル Ollama 経由で踏みやすい既知問題 (v1.8.1 追記)
|
|
286
|
+
|
|
287
|
+
2026-04-26 の実機検証 (M3 Max 32GB / Ollama 0.21.2 / CodeRouter v1.8.0) で、**note 記事や HF で評価が高いモデルでも Ollama 経由では動かないケース**が判明したのでまとめます。
|
|
288
|
+
|
|
289
|
+
#### 4-2-A. **Qwen3.6:27b / 35b** が Claude Code で実用厳しい
|
|
290
|
+
|
|
291
|
+
`coderouter doctor --check-model ollama-qwen3-6-27b` の結果:
|
|
292
|
+
|
|
293
|
+
| Probe | 結果 | 症状 |
|
|
294
|
+
|---|---|---|
|
|
295
|
+
| auth+basic-chat | OK | 短い chat なら動く |
|
|
296
|
+
| **num_ctx** | **NEEDS_TUNING** | `extra_body.options.num_ctx: 32768` を declare しても canary echo されない (Ollama が silent に縮めてる疑い) |
|
|
297
|
+
| **tool_calls** | **NEEDS_TUNING** | native tool_calls / 修復可能 JSON のいずれも返さず |
|
|
298
|
+
| **streaming** | **NEEDS_TUNING** | `finish_reason='length'` で 0 chars 打ち切り |
|
|
299
|
+
|
|
300
|
+
`/no_think` を `append_system_prompt` に入れても改善せず。Ollama 0.21.2 / llama.cpp 側の Qwen3.6 family 対応がまだ完全でない可能性が高い。
|
|
301
|
+
|
|
302
|
+
**回避**: `claude-code-nim` profile の primary に Qwen3.6 を置かず、**Gemma 4 26B または Qwen2.5-Coder 14b を上位に**。bundled `model-capabilities.yaml` も v1.8.1 で `qwen3.6:*` の `claude_code_suitability: ok` を撤回 (declaration 過信の例)。
|
|
303
|
+
|
|
304
|
+
#### 4-2-B. **Qwen3.5 系の HF 蒸留モデル** (Qwopus3.5 等) は llama.cpp 未対応
|
|
305
|
+
|
|
306
|
+
例えば `Jackrong/Qwopus3.5-9B-v3-GGUF` (Qwen3.5-VL ベース + Claude Opus 蒸留、Apache-2.0、Vision) を `ollama pull` すると **blob は完全に落ちてくる**が `ollama run` で:
|
|
307
|
+
|
|
308
|
+
```
|
|
309
|
+
Error: 500 Internal Server Error: unable to load model:
|
|
310
|
+
...sha256-19d52ddc.../...
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
Ollama server log を見ると:
|
|
314
|
+
|
|
315
|
+
```
|
|
316
|
+
llama_model_load: error loading model: error loading model architecture:
|
|
317
|
+
unknown model architecture: 'qwen35'
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
**原因**: Qwen3.5 は新アーキテクチャ (hybrid Transformer-SSM、`qwen35.ssm.*` 系のキーが GGUF metadata に含まれる) で、llama.cpp / Ollama に **`qwen35` architecture 実装が未マージ**。Ollama version の問題ではなく、フレームワーク本体の対応待ち。
|
|
321
|
+
|
|
322
|
+
**回避**: 現状は HF / Transformers / vLLM 直接ロード経由で使うしかない (CodeRouter は Ollama OpenAI-compat 経由なので非対応)。llama.cpp が `qwen35` を実装したら再評価。
|
|
323
|
+
|
|
324
|
+
> **教訓**: HF で「Qwen3.5 + Opus 蒸留」のような新しい組み合わせは note / r/LocalLLaMA で評判が立っていても、**Ollama 経由ですぐ使えるとは限らない**。`ollama pull` → `ollama run` で 500 が出たら、まず Ollama server log で `unknown model architecture` を確認。出たら今は諦めて他のモデルに行くのが時間効率的に正解。
|
|
325
|
+
|
|
326
|
+
#### 4-2-C. **Gemma 4 26B** は無加工で tool_calls OK
|
|
327
|
+
|
|
328
|
+
同じ実機検証で `coderouter doctor --check-model ollama-gemma4-26b` の `tool_calls` probe が **無加工で `[OK]`**。`num_ctx` / `streaming` は patch (`--apply`) で解消可能。Ollama 経由 Claude Code 用途では **note 記事の「Gemma 4 が日常の王者」評価が裏付けられた**形。v1.8.1 で `coding` profile primary を Gemma 4 / Qwen-Coder 14b へ調整しています。
|
|
329
|
+
|
|
330
|
+
#### 4-2-D. ベスト実践 — 「枯れたモデル + 観測ツール」
|
|
331
|
+
|
|
332
|
+
実機運用での提案:
|
|
333
|
+
|
|
334
|
+
1. **第一候補は枯れたもの**: `qwen2.5-coder:14b` / `qwen2.5-coder:7b` / `gemma4:26b` / `gemma4:e4b`
|
|
335
|
+
2. **doctor で確認**: `cr doctor --check-model <provider>` で 6 probe を回す
|
|
336
|
+
3. **`--apply` で patch 適用**: NEEDS_TUNING の YAML パッチを非破壊書き戻し (`pip install 'coderouter-cli[doctor]'` 必要)
|
|
337
|
+
4. **新興モデルは慎重に**: HF で見つけた新モデルは Ollama 0.20+ でも未対応のことあり、`ollama run` → server log で確認
|
|
338
|
+
5. **fallback chain で守る**: ローカル primary が落ちても NIM / OpenRouter free に流れるように chain を厚く
|
|
339
|
+
|
|
340
|
+
### 4-3. `UserPromptSubmit hook error` が出る (第三者 Claude Code プラグイン)
|
|
282
341
|
|
|
283
342
|
```
|
|
284
343
|
❯ こんにちは
|
|
@@ -112,6 +112,33 @@ allow_paid: false
|
|
|
112
112
|
default_profile: claude-code-nim
|
|
113
113
|
display_timezone: Asia/Tokyo
|
|
114
114
|
|
|
115
|
+
# v1.8.0: NIM example でも `--mode coding|general|reasoning|multi` を
|
|
116
|
+
# 受けられるように、メイン examples/providers.yaml と同じ短縮 alias を
|
|
117
|
+
# 用意。ユーザーが「examples/providers.yaml の説明通りの mode を試したら
|
|
118
|
+
# default_profile 'coding' is not declared」エラーで詰まるのを回避。
|
|
119
|
+
#
|
|
120
|
+
# alias 解決:
|
|
121
|
+
# coding → claude-code-nim (NIM 主軸の coding chain)
|
|
122
|
+
# general → claude-code-nim (NIM は general 専用 profile が無いので兼用)
|
|
123
|
+
# reasoning → nim-reasoning (Kimi-K2-Thinking 起点の長文推論 chain)
|
|
124
|
+
# multi → claude-code-nim (NIM ネイティブ vision profile は未提供。
|
|
125
|
+
# 画像が必要なら examples/providers.yaml の
|
|
126
|
+
# multi profile (Gemma 4 26B vision) を併用)
|
|
127
|
+
# default → claude-code-nim
|
|
128
|
+
# fast → free-only-nim (有料を絶対に呼ばない安全重視)
|
|
129
|
+
# cheap → free-only-nim
|
|
130
|
+
# think → nim-reasoning
|
|
131
|
+
mode_aliases:
|
|
132
|
+
default: claude-code-nim
|
|
133
|
+
coding: claude-code-nim
|
|
134
|
+
general: claude-code-nim
|
|
135
|
+
reasoning: nim-reasoning
|
|
136
|
+
multi: claude-code-nim
|
|
137
|
+
fast: free-only-nim
|
|
138
|
+
cheap: free-only-nim
|
|
139
|
+
think: nim-reasoning
|
|
140
|
+
vision: claude-code-nim
|
|
141
|
+
|
|
115
142
|
providers:
|
|
116
143
|
# ------------------------------------------------------------
|
|
117
144
|
# Tier 1: local (same as the main examples/providers.yaml)
|
|
@@ -533,6 +533,13 @@ providers:
|
|
|
533
533
|
api_key_env: Z_AI_API_KEY
|
|
534
534
|
paid: true
|
|
535
535
|
timeout_s: 60
|
|
536
|
+
# Unsloth tool-calling guide
|
|
537
|
+
# (https://unsloth.ai/docs/jp/ji-ben/tool-calling-guide-for-local-llms)
|
|
538
|
+
# の GLM 4.7 推奨値。Claude Code が temperature/top_p を明示的に
|
|
539
|
+
# 投げてくる場合はそちらが優先される (extra_body は default)。
|
|
540
|
+
extra_body:
|
|
541
|
+
temperature: 0.7
|
|
542
|
+
top_p: 1.0
|
|
536
543
|
capabilities:
|
|
537
544
|
chat: true
|
|
538
545
|
streaming: true
|
|
@@ -857,21 +864,33 @@ profiles:
|
|
|
857
864
|
- anthropic-direct # paid: Sonnet 4-6 with vision (★★★★★)
|
|
858
865
|
|
|
859
866
|
# --------------------------------------------------------------------------
|
|
860
|
-
# coding — Claude Code / agentic coding 用。
|
|
867
|
+
# coding — Claude Code / agentic coding 用。
|
|
861
868
|
# --------------------------------------------------------------------------
|
|
862
|
-
#
|
|
863
|
-
#
|
|
864
|
-
#
|
|
869
|
+
# 2026-04-26 実機検証 (M3 Max 32GB / Ollama 0.21.2) を反映:
|
|
870
|
+
# - Gemma 4 26B が tool_calls を無加工で OK (Ollama 公式 tag、note "日常の王者")
|
|
871
|
+
# - Qwen-Coder 14b/7b は v0.x からの実機検証済み (枯れた選択)
|
|
872
|
+
# - Qwen3.6 27b/35b は num_ctx を Ollama が silent に縮める /
|
|
873
|
+
# tool_calls が出ない / streaming 0 chars の三重課題で Ollama 経由
|
|
874
|
+
# では実用厳しい (HF / vLLM 直なら可かもしれない、要再検証)
|
|
875
|
+
# - Qwen3-Coder 30B は llama.cpp 対応モデルで、Qwen3.6 系列より安定
|
|
876
|
+
#
|
|
877
|
+
# 順序原則: 「枯れて確実に動くもの」を上、「note 推奨だが新しい」は
|
|
878
|
+
# その後に置く。Sonnet の tool-call 行動に最も近いのは依然として
|
|
879
|
+
# Qwen3-Coder family (agentic coding 専用設計)。
|
|
865
880
|
- name: coding
|
|
866
881
|
append_system_prompt: |
|
|
867
882
|
Match Claude Sonnet's coding style: terse, structured, tools used precisely.
|
|
868
883
|
Prefer code blocks over prose. Avoid filler explanations unless asked.
|
|
869
884
|
When using tools, batch independent operations in parallel.
|
|
870
885
|
providers:
|
|
871
|
-
- ollama-
|
|
872
|
-
- ollama-
|
|
873
|
-
- ollama-qwen-coder-14b # local quality fallback
|
|
886
|
+
- ollama-qwen-coder-14b # local primary (実機検証済み、枯れた選択)
|
|
887
|
+
- ollama-gemma4-26b # local: tool_calls OK 実機確認 (note 推奨)
|
|
874
888
|
- ollama-qwen-coder-7b # local fast fallback
|
|
889
|
+
- ollama-qwen3-coder-30b # local: agentic-coding 専用 (llama.cpp 対応)
|
|
890
|
+
# Qwen3.6 系は Ollama 経由で実用厳しいので末尾退避線。マシンに余裕
|
|
891
|
+
# ある + llama.cpp が後日対応強化されたら primary に戻す候補:
|
|
892
|
+
# - ollama-qwen3-6-27b
|
|
893
|
+
# - ollama-qwen3-6-35b
|
|
875
894
|
- openrouter-free # cloud: qwen3-coder:free (262K ctx)
|
|
876
895
|
- openrouter-gpt-oss-free # cloud: vendor diversity (rate-limit 逃げ)
|
|
877
896
|
- zai-coding-glm-4-7 # paid: GLM-4.7 (Sonnet 級、Z.AI Coding Plan)
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# in plan.md §11.B; once granted, this name will become an alias and
|
|
12
12
|
# `coderouter` will become the canonical distribution name.
|
|
13
13
|
name = "coderouter-cli"
|
|
14
|
-
version = "1.8.
|
|
14
|
+
version = "1.8.1"
|
|
15
15
|
description = "Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible."
|
|
16
16
|
readme = "README.md"
|
|
17
17
|
requires-python = ">=3.12"
|
|
@@ -403,6 +403,52 @@ def test_bundled_yaml_does_not_flag_other_llama_families_as_degraded() -> None:
|
|
|
403
403
|
)
|
|
404
404
|
|
|
405
405
|
|
|
406
|
+
# ======================================================================
|
|
407
|
+
# v1.8.0: Unsloth tool-calling guide 掲載モデル群の registry 反映
|
|
408
|
+
# ======================================================================
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
@pytest.mark.parametrize(
|
|
412
|
+
"model_slug",
|
|
413
|
+
[
|
|
414
|
+
# Kimi K2 family — NIM で実機検証済み
|
|
415
|
+
"moonshotai/kimi-k2-instruct",
|
|
416
|
+
"moonshotai/kimi-k2-thinking",
|
|
417
|
+
"moonshotai/Kimi-K2-Instruct",
|
|
418
|
+
# gpt-oss — OpenRouter free で実機検証済み
|
|
419
|
+
"openai/gpt-oss-120b:free",
|
|
420
|
+
"openai/gpt-oss-20b",
|
|
421
|
+
"gpt-oss-120b",
|
|
422
|
+
# DeepSeek-V3.x — Unsloth ガイド掲載 (先回り宣言)
|
|
423
|
+
"deepseek-ai/DeepSeek-V3.1",
|
|
424
|
+
"deepseek-ai/DeepSeek-V3.2",
|
|
425
|
+
"deepseek/deepseek-v3.1",
|
|
426
|
+
# MiniMax — Unsloth ガイド掲載
|
|
427
|
+
"MiniMaxAI/MiniMax-M2",
|
|
428
|
+
"minimax/minimax-m2",
|
|
429
|
+
# NVIDIA Nemotron 3 — Unsloth ガイド掲載
|
|
430
|
+
"nvidia/nemotron-3-nano-9b",
|
|
431
|
+
"nvidia/Nemotron-3-Nano-12B",
|
|
432
|
+
# Devstral 2 — Unsloth ガイド掲載
|
|
433
|
+
"mistralai/Devstral-2-24B",
|
|
434
|
+
"mistral/devstral-2",
|
|
435
|
+
],
|
|
436
|
+
)
|
|
437
|
+
def test_bundled_yaml_unsloth_listed_models_resolve_tools_true(model_slug: str) -> None:
|
|
438
|
+
"""v1.8.0: Unsloth tool-calling guide 掲載 (Kimi K2 / gpt-oss / DeepSeek-V3 /
|
|
439
|
+
MiniMax / Nemotron / Devstral) の各 family が tools=true で解決される。
|
|
440
|
+
|
|
441
|
+
providers.yaml で個別に capabilities.tools: true を declare しなくても、
|
|
442
|
+
bundled registry がデフォルトで tool-call 対応として扱う。
|
|
443
|
+
"""
|
|
444
|
+
reg = CapabilityRegistry.load_default()
|
|
445
|
+
result = reg.lookup(kind="openai_compat", model=model_slug)
|
|
446
|
+
assert result.tools is True, (
|
|
447
|
+
f"{model_slug}: Unsloth ガイド掲載 family は registry が "
|
|
448
|
+
f"tools=true を返すべき (got tools={result.tools!r})"
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
|
|
406
452
|
def test_lookup_any_kind_rule_matches_both_adapter_kinds() -> None:
|
|
407
453
|
"""A rule with kind='any' applies regardless of the adapter kind."""
|
|
408
454
|
reg = CapabilityRegistry(
|
|
@@ -5,6 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
|
|
7
7
|
import pytest
|
|
8
|
+
import yaml
|
|
8
9
|
|
|
9
10
|
from coderouter.config.loader import load_config, resolve_api_key
|
|
10
11
|
from coderouter.config.schemas import CodeRouterConfig
|
|
@@ -73,6 +74,68 @@ def test_env_overrides_default_profile(
|
|
|
73
74
|
load_config(yaml_config_path)
|
|
74
75
|
|
|
75
76
|
|
|
77
|
+
def test_env_mode_resolves_through_mode_aliases_at_startup(
|
|
78
|
+
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
|
79
|
+
) -> None:
|
|
80
|
+
"""v1.8.0: CODEROUTER_MODE goes through mode_aliases before validation.
|
|
81
|
+
|
|
82
|
+
Matches the runtime ``X-CodeRouter-Mode`` header semantics — startup
|
|
83
|
+
should accept canonical short intent names (``coding`` / ``general`` /
|
|
84
|
+
``reasoning`` / ``multi`` / ``fast`` / ``cheap``) and resolve them to
|
|
85
|
+
the underlying profile via ``mode_aliases`` instead of failing
|
|
86
|
+
validation with "default_profile 'coding' is not declared".
|
|
87
|
+
|
|
88
|
+
Repro for the v1.8.0 NIM-yaml bug: the user's providers.yaml has
|
|
89
|
+
profiles=[claude-code-nim, ...] and mode_aliases={coding:claude-code-nim};
|
|
90
|
+
``coderouter serve --mode coding`` (= env CODEROUTER_MODE=coding) used to
|
|
91
|
+
blow up because env_mode was assigned directly to default_profile
|
|
92
|
+
bypassing the alias map.
|
|
93
|
+
"""
|
|
94
|
+
cfg_file = tmp_path / "providers.yaml"
|
|
95
|
+
cfg_file.write_text(
|
|
96
|
+
yaml.safe_dump(
|
|
97
|
+
{
|
|
98
|
+
"default_profile": "claude-code-nim",
|
|
99
|
+
"mode_aliases": {
|
|
100
|
+
"coding": "claude-code-nim",
|
|
101
|
+
"reasoning": "nim-reasoning",
|
|
102
|
+
},
|
|
103
|
+
"providers": [
|
|
104
|
+
{
|
|
105
|
+
"name": "p",
|
|
106
|
+
"kind": "openai_compat",
|
|
107
|
+
"base_url": "http://localhost:11434/v1",
|
|
108
|
+
"model": "x",
|
|
109
|
+
}
|
|
110
|
+
],
|
|
111
|
+
"profiles": [
|
|
112
|
+
{"name": "claude-code-nim", "providers": ["p"]},
|
|
113
|
+
{"name": "nim-reasoning", "providers": ["p"]},
|
|
114
|
+
],
|
|
115
|
+
}
|
|
116
|
+
),
|
|
117
|
+
encoding="utf-8",
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Alias should resolve: 'coding' → 'claude-code-nim'.
|
|
121
|
+
monkeypatch.setenv("CODEROUTER_MODE", "coding")
|
|
122
|
+
cfg = load_config(cfg_file)
|
|
123
|
+
assert cfg.default_profile == "claude-code-nim", (
|
|
124
|
+
f"CODEROUTER_MODE=coding should resolve through mode_aliases to "
|
|
125
|
+
f"'claude-code-nim', got {cfg.default_profile!r}"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Direct profile names still work.
|
|
129
|
+
monkeypatch.setenv("CODEROUTER_MODE", "claude-code-nim")
|
|
130
|
+
cfg = load_config(cfg_file)
|
|
131
|
+
assert cfg.default_profile == "claude-code-nim"
|
|
132
|
+
|
|
133
|
+
# Unknown intent (not in profiles, not in aliases) → fast-fail.
|
|
134
|
+
monkeypatch.setenv("CODEROUTER_MODE", "totally-unknown")
|
|
135
|
+
with pytest.raises(ValueError, match="totally-unknown"):
|
|
136
|
+
load_config(cfg_file)
|
|
137
|
+
|
|
138
|
+
|
|
76
139
|
def test_env_override_default_profile_ignores_empty_string(
|
|
77
140
|
yaml_config_path: Path, monkeypatch: pytest.MonkeyPatch
|
|
78
141
|
) -> None:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|