coderouter-cli 1.8.1__tar.gz → 1.8.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/CHANGELOG.md +126 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/PKG-INFO +5 -5
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/README.en.md +4 -4
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/README.md +4 -4
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/adapters/openai_compat.py +30 -14
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/data/model-capabilities.yaml +21 -9
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/doctor.py +116 -9
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/troubleshooting.en.md +24 -3
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/troubleshooting.md +70 -10
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/pyproject.toml +1 -1
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_doctor.py +246 -1
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_reasoning_strip.py +72 -2
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/.gitignore +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/LICENSE +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/__init__.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/__main__.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/adapters/__init__.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/adapters/anthropic_native.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/adapters/base.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/adapters/registry.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/cli.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/cli_stats.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/config/__init__.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/config/capability_registry.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/config/env_file.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/config/loader.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/config/schemas.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/data/__init__.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/doctor_apply.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/env_security.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/errors.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/ingress/__init__.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/ingress/anthropic_routes.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/ingress/app.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/ingress/dashboard_routes.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/ingress/metrics_routes.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/ingress/openai_routes.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/logging.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/metrics/__init__.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/metrics/collector.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/metrics/prometheus.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/output_filters.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/routing/__init__.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/routing/auto_router.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/routing/capability.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/routing/fallback.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/translation/__init__.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/translation/anthropic.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/translation/convert.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/coderouter/translation/tool_repair.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/assets/dashboard-demo.png +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/designs/v1.5-dashboard-mockup.html +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/designs/v1.6-auto-router-verification.md +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/designs/v1.6-auto-router.md +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/free-tier-guide.en.md +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/free-tier-guide.md +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/hf-ollama-models.md +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/openrouter-roster/README.md +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/openrouter-roster/latest.json +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/quickstart.en.md +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/quickstart.md +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/retrospectives/v0.4.md +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/retrospectives/v0.5-verify.md +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/retrospectives/v0.5.md +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/retrospectives/v0.6.md +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/retrospectives/v0.7.md +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/retrospectives/v1.0-verify.md +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/retrospectives/v1.0.md +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/security.en.md +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/security.md +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/usage-guide.en.md +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/usage-guide.md +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/when-do-i-need-coderouter.en.md +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/docs/when-do-i-need-coderouter.md +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/examples/.env.example +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/examples/providers.auto-custom.yaml +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/examples/providers.auto.yaml +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/examples/providers.note-2026.yaml +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/examples/providers.nvidia-nim.yaml +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/examples/providers.yaml +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/scripts/demo_traffic.sh +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/scripts/openrouter_roster_diff.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/scripts/verify_v0_5.sh +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/scripts/verify_v1_0.sh +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/__init__.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/conftest.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_adapter_anthropic.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_auto_router.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_capability.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_capability_degraded_payload.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_capability_registry.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_claude_code_suitability.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_cli.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_cli_stats.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_config.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_dashboard_endpoint.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_doctor_apply.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_env_file.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_env_security.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_errors.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_examples_yaml.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_fallback.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_fallback_anthropic.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_fallback_cache_control.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_fallback_misconfig_warn.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_fallback_paid_gate.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_fallback_thinking.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_ingress_anthropic.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_ingress_profile.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_metrics_collector.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_metrics_endpoint.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_metrics_jsonl.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_metrics_prometheus.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_openai_compat.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_openrouter_roster_diff.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_output_filters.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_output_filters_adapters.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_setup_sh.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_tool_repair.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_translation_anthropic.py +0 -0
- {coderouter_cli-1.8.1 → coderouter_cli-1.8.3}/tests/test_translation_reverse.py +0 -0
|
@@ -6,6 +6,132 @@ versioning follows [SemVer](https://semver.org/).
|
|
|
6
6
|
|
|
7
7
|
---
|
|
8
8
|
|
|
9
|
+
## [v1.8.3] — 2026-04-26 (tool_calls probe も thinking モデル対応 + adapter で `reasoning_content` strip — llama.cpp 直叩き対応)
|
|
10
|
+
|
|
11
|
+
**Theme: v1.8.2 と同日リリースの第 2 弾 patch。Qwen3.6:35b-a3b on llama.cpp の実機検証で発見した 2 つの追加課題 — `tool_calls` probe の thinking モデル偽陽性 + llama.cpp が emit する `reasoning_content` フィールドの adapter strip 不足 — を解消。**
|
|
12
|
+
|
|
13
|
+
v1.8.2 リリース直後、note 記事 v1.8.2「自分が作った診断ツールに自分が騙された話」の続編として **「Ollama 経由で詰んだ Qwen3.6 を Unsloth GGUF + llama.cpp 直叩きで動かしたら native tool_calls が完璧に出た」** を実機検証中、CodeRouter doctor で `tool_calls [NEEDS TUNING]` が依然として出る矛盾に直面。深掘りで `tool_calls` probe の `max_tokens=64` が thinking モデルで `reasoning_content` トークン消費に食い切られる **v1.8.2 で num_ctx / streaming に対して直したのと完全に同じバグ pattern が tool_calls probe にも残っていた** ことが判明。あわせて llama.cpp の `reasoning_content` フィールド (Ollama / OpenRouter は `reasoning`) が openai_compat adapter の strip 対象に入っていなかった事実も発見。両者を v1.8.3 として 1 patch に統合。
|
|
14
|
+
|
|
15
|
+
**Ollama 経由詰みの真因が完全確定**: Ollama の chat template / tool 仕様未成熟、モデル本体は健全。llama.cpp 直叩きでは Qwen3.6 系の `tool_calls` が native で動作。
|
|
16
|
+
|
|
17
|
+
- Tests: 733 → **737** (+4: tool_calls probe budget thinking variant / reasoning_content strip 3 件)
|
|
18
|
+
- Runtime deps: 5 → 5 (21 sub-release 連続据え置き)
|
|
19
|
+
- Backward compat: 完全互換、`providers.yaml` / `~/.coderouter/model-capabilities.yaml` 編集不要
|
|
20
|
+
|
|
21
|
+
### Changes
|
|
22
|
+
|
|
23
|
+
#### Doctor `tool_calls` probe: thinking モデル対応バジェット
|
|
24
|
+
|
|
25
|
+
- **`coderouter/doctor.py`**: `_probe_tool_calls` の `max_tokens` を `64` 固定から **thinking 検出付きの動的選択** (256 default / 1024 thinking) に変更。`_TOOL_CALLS_PROBE_MAX_TOKENS_DEFAULT/_THINKING` 定数を新設、既存の `_is_reasoning_model(provider, resolved)` ヘルパで分岐。
|
|
26
|
+
- 旧 64 では Qwen3.6:35b-a3b on llama.cpp が `reasoning_content` で 64 token 食い切り → `tool_calls` 出力前に length cap → **NEEDS_TUNING + suggested patch 「`tools: false` にしろ」という真逆の推奨** を出していた
|
|
27
|
+
- 新 1024 で thinking + tool_call が両方収まる headroom
|
|
28
|
+
|
|
29
|
+
#### Adapter: `reasoning_content` フィールド strip 追加
|
|
30
|
+
|
|
31
|
+
- **`coderouter/adapters/openai_compat.py`**: `_strip_reasoning_field` を `_NON_STANDARD_REASONING_KEYS = ("reasoning", "reasoning_content")` の両方を strip するように拡張。
|
|
32
|
+
- `reasoning` (Ollama / OpenRouter 命名) と `reasoning_content` (llama.cpp `llama-server` 命名) は同じ概念で、ベンダー命名が違うだけ
|
|
33
|
+
- 厳格な OpenAI client はどちらも unknown key として reject するので、両方 strip するのが正しい
|
|
34
|
+
- `capability-degraded` log の `dropped` フィールドも `["reasoning", "reasoning_content"]` に更新 (両方 strip し得ることを表現)
|
|
35
|
+
|
|
36
|
+
#### Doctor `reasoning-leak` probe: `reasoning_content` 検出
|
|
37
|
+
|
|
38
|
+
- **`coderouter/doctor.py`**: `_probe_reasoning_leak` の `has_reasoning` 判定を `"reasoning" in msg or "reasoning_content" in msg` に拡張。llama.cpp 経由 provider でも reasoning leak を informational に検出可能に。
|
|
39
|
+
|
|
40
|
+
#### Tests
|
|
41
|
+
|
|
42
|
+
- **`tests/test_doctor.py`** + 1: `test_tool_calls_max_tokens_bumped_for_thinking_provider` (thinking provider で tool_calls probe が 1024 を要求、native tool_calls 応答で OK 判定)
|
|
43
|
+
- **`tests/test_reasoning_strip.py`** + 3: `test_strip_helper_removes_reasoning_content_field` / `test_strip_helper_removes_both_reasoning_and_reasoning_content` / `test_strip_helper_removes_reasoning_content_from_delta` (各 layer で `reasoning_content` 除去確認)
|
|
44
|
+
- 既存 `tests/test_reasoning_strip.py` の `recs[0].dropped == ["reasoning"]` を `["reasoning", "reasoning_content"]` に更新 (log の表現変更に追従)
|
|
45
|
+
|
|
46
|
+
### Why
|
|
47
|
+
|
|
48
|
+
v1.8.2 で「diagnostic ツール自身も diagnostic され続ける必要がある」というメタ教訓を書いた直後、まさにそのことを実証する形で残バグが発見された。`tool_calls` probe は num_ctx / streaming probe と同じ「thinking モデルの reasoning トークン消費を考慮していない `max_tokens=64`」問題を抱えていて、しかも doctor の出した suggested patch (`tools: false` に倒せ) は **完全に逆の対処を勧めていた** — false-positive どころか、誠実なユーザーが従うと healthy なモデルを抑制してしまう **active-harmful な誤診断**。
|
|
49
|
+
|
|
50
|
+
これは v1.8.2 の patch を当てる時点で見つけるべきだった見落としで、note 記事 v1.8.2 のメタ教訓「diagnostic ツール自身も diagnostic され続ける」が現実に試された格好。素早く v1.8.3 で潰す。
|
|
51
|
+
|
|
52
|
+
`reasoning_content` strip 追加は llama.cpp 直叩き経路を CodeRouter から綺麗に使えるようにする ergonomic 改善で、`v1.8.x` patch 候補で plan.md に記録済みだった項目を実機発見と同時に消化。
|
|
53
|
+
|
|
54
|
+
### Migration
|
|
55
|
+
|
|
56
|
+
`pyproject.toml version 1.8.2 → 1.8.3`、`coderouter --version` は 1.8.3 を返す。**手元の `~/.coderouter/providers.yaml` は触らない限り完全に変化なし**。
|
|
57
|
+
|
|
58
|
+
v1.8.2 で Qwen3.6 / Gemma 4 系 thinking provider に対して `tool_calls [NEEDS TUNING]` が出ていたユーザーは v1.8.3 で再実行すると **OK** 判定 (実機で動いていた provider が doctor 上でも妥当に評価される)。llama.cpp 直叩き provider を使っているユーザーは `reasoning_content` が client に流れることなく綺麗に strip される。
|
|
59
|
+
|
|
60
|
+
### Files touched
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
M CHANGELOG.md
|
|
64
|
+
M coderouter/adapters/openai_compat.py
|
|
65
|
+
M coderouter/doctor.py
|
|
66
|
+
M pyproject.toml
|
|
67
|
+
M plan.md
|
|
68
|
+
M docs/troubleshooting.md
|
|
69
|
+
M tests/test_doctor.py
|
|
70
|
+
M tests/test_reasoning_strip.py
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## [v1.8.2] — 2026-04-26 (doctor probe を thinking モデル対応に — Gemma 4 偽陽性の解消)
|
|
76
|
+
|
|
77
|
+
**Theme: v1.8.1 リリース直後の深掘りで `doctor` の `num_ctx` / `streaming` probe が thinking モデルに対して偽陽性 NEEDS_TUNING を出していた事実を発見、probe の `max_tokens` バジェットを reasoning トークン消費分込みで設計し直した patch。**
|
|
78
|
+
|
|
79
|
+
v1.8.1 で `coding` profile primary に置いた Gemma 4 26B の doctor 結果が `tool_calls [OK]` + `num_ctx [NEEDS TUNING]` + `streaming [NEEDS TUNING]` で「中途半端に動く」と判定されていたが、実機で curl 直叩きすると **Ollama OpenAI-compat 経由でも 5K トークンの canary echo-back に成功** することが判明。原因切り分けの結果、Gemma 4 が emit する非標準 `reasoning` フィールドが doctor probe の `max_tokens=32` (num_ctx) / `max_tokens=128` (streaming) を**思考トークンで食い切って `content=""` で `finish_reason='length'`** を返していた偽陽性と確定。実機検証 (M3 Max 64GB / Ollama 0.21.2) で Anthropic 互換 `/v1/messages` 経由 Gemma 4 26B が "Hello." を 2 秒で返すことも確認、**Gemma 4 26B は実用 OK** と最終判定。
|
|
80
|
+
|
|
81
|
+
- Tests: 730 → **733** (+3: thinking provider declaration / registry-based / streaming の 3 件)
|
|
82
|
+
- Runtime deps: 5 → 5 (20 sub-release 連続据え置き)
|
|
83
|
+
- Backward compat: 完全互換、`providers.yaml` / `~/.coderouter/model-capabilities.yaml` 編集不要
|
|
84
|
+
|
|
85
|
+
### Changes
|
|
86
|
+
|
|
87
|
+
#### Doctor probe: thinking モデル対応バジェット選択
|
|
88
|
+
|
|
89
|
+
- **`coderouter/doctor.py`**: `_probe_num_ctx` / `_probe_streaming` の `max_tokens` を thinking 検出付きの動的選択に変更。新 helper `_is_reasoning_model(provider, resolved)` が provider declaration / registry resolved の両方から `thinking` / `reasoning_passthrough` の真偽を見て、reasoning モデルのときだけ大きいバジェットを選ぶ。
|
|
90
|
+
- `_NUM_CTX_PROBE_MAX_TOKENS_DEFAULT = 256` (旧 32)、`_NUM_CTX_PROBE_MAX_TOKENS_THINKING = 1024`
|
|
91
|
+
- `_STREAMING_PROBE_MAX_TOKENS_DEFAULT = 512` (旧 128)、`_STREAMING_PROBE_MAX_TOKENS_THINKING = 1024`
|
|
92
|
+
- 非 thinking モデルは natural stop で早期終了するので無駄消費なし、thinking モデルは reasoning trace + 答えが収まる headroom
|
|
93
|
+
|
|
94
|
+
#### Registry: 既知 thinking モデルに `thinking: true` 宣言
|
|
95
|
+
|
|
96
|
+
- **`coderouter/data/model-capabilities.yaml`**: `gemma4:*` / `google/gemma-4*` / `qwen3.6:*` / `qwen/qwen3.6-*` に `thinking: true` を追加。これらは Ollama 経由で `reasoning` フィールドにかなりのトークンを吐く設計と確認済み。registry 経由で渡るので user は `providers.yaml` を触らなくても doctor の thinking バジェットが効く
|
|
97
|
+
- **Qwen3.6 セクションのコメント更新**: v1.8.1 時点で「Ollama silent cap」と書いていた part を「**v1.8.2 で num_ctx / streaming は doctor 偽陽性と判明、tool_calls [NEEDS TUNING] が真の課題として残る**」に整理。`claude_code_suitability` 撤回判断は維持 (Qwen3.6 の tool_calls 不全は thinking 起因ではない別の真の課題)
|
|
98
|
+
|
|
99
|
+
#### Tests
|
|
100
|
+
|
|
101
|
+
- **`tests/test_doctor.py`**: 3 件追加
|
|
102
|
+
- `test_num_ctx_max_tokens_bumped_for_thinking_provider_declaration`: `provider.capabilities.thinking=True` → 1024
|
|
103
|
+
- `test_num_ctx_max_tokens_bumped_when_registry_says_thinking`: provider 宣言なし + registry 宣言あり → 1024
|
|
104
|
+
- `test_streaming_max_tokens_bumped_for_thinking_provider`: streaming probe も同経路で 1024 になる
|
|
105
|
+
- 既存 `test_num_ctx_request_body_merges_extra_body_options` の `max_tokens == 32` assertion を `== 256` に更新 (新 baseline)
|
|
106
|
+
- 既存 `test_streaming_request_body_carries_stream_true_and_merges_extra_body` に `max_tokens == 512` assertion を追加 (streaming baseline)
|
|
107
|
+
|
|
108
|
+
### Why
|
|
109
|
+
|
|
110
|
+
v1.8.1 article 執筆中に「note の流行モデル → ollama pull → 動かない」のうち Gemma 4 だけ `tool_calls [OK]` の **逆転勝利** だったはずが、`num_ctx [NEEDS TUNING]` も出ていて記事として煮え切らない状態だった。深掘りの結果、`/v1/chat/completions` 経由でも options は効く / `ollama ps` で context length 262144 が出る / **でも doctor は失敗** という矛盾を観測。`.choices[0].message.reasoning` フィールドに思考トークンが流れて `max_tokens=32` を消費していた事実を確認、**doctor 側の probe 設計が thinking モデル時代に追いついていない**ことが判明。
|
|
111
|
+
|
|
112
|
+
これは「実機 evidence first」原則 (plan.md §5.4) の更に一段下のメタ教訓:**diagnostic ツール自身も diagnostic され続ける必要がある**。
|
|
113
|
+
|
|
114
|
+
### Migration
|
|
115
|
+
|
|
116
|
+
`pyproject.toml version 1.8.1 → 1.8.2`、`coderouter --version` は 1.8.2 を返す。**手元の `~/.coderouter/providers.yaml` は触らない限り完全に変化なし**。
|
|
117
|
+
|
|
118
|
+
v1.8.1 で Gemma 4 26B を `claude_code_suitability` 抑え目に運用していたユーザーは v1.8.2 で doctor 再実行すると `num_ctx [OK]` + `streaming [OK]` まで通るはず。Qwen3.6 系の `tool_calls [NEEDS TUNING]` は本物 (thinking 起因ではない) なので引き続き coding chain primary には推奨しない。
|
|
119
|
+
|
|
120
|
+
### Files touched
|
|
121
|
+
|
|
122
|
+
```
|
|
123
|
+
M CHANGELOG.md
|
|
124
|
+
M coderouter/data/model-capabilities.yaml
|
|
125
|
+
M coderouter/doctor.py
|
|
126
|
+
M pyproject.toml
|
|
127
|
+
M plan.md
|
|
128
|
+
M docs/troubleshooting.md
|
|
129
|
+
M docs/articles/note-v1-8-1-reality-check.md (or new file v1-8-2)
|
|
130
|
+
M tests/test_doctor.py
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
9
135
|
## [v1.8.1] — 2026-04-26 (実機検証反映 patch — mode_aliases 解決 + Gemma 4 第一候補化 + Ollama 既知問題ドキュメント化)
|
|
10
136
|
|
|
11
137
|
**Theme: v1.8.0 出荷直後の実機検証 (M3 Max 32GB / Ollama 0.21.2) で踏んだ問題 3 件を patch で解消。**
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coderouter-cli
|
|
3
|
-
Version: 1.8.
|
|
3
|
+
Version: 1.8.3
|
|
4
4
|
Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
|
|
5
5
|
Project-URL: Homepage, https://github.com/zephel01/CodeRouter
|
|
6
6
|
Project-URL: Repository, https://github.com/zephel01/CodeRouter
|
|
@@ -60,7 +60,7 @@ Description-Content-Type: text/markdown
|
|
|
60
60
|
<p align="center">
|
|
61
61
|
<a href="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml"><img src="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml/badge.svg?branch=main" alt="CI"></a>
|
|
62
62
|
<a href=""><img src="https://img.shields.io/badge/status-stable-brightgreen" alt="status"></a>
|
|
63
|
-
<a href=""><img src="https://img.shields.io/badge/version-1.8.
|
|
63
|
+
<a href=""><img src="https://img.shields.io/badge/version-1.8.3-blue" alt="version"></a>
|
|
64
64
|
<a href=""><img src="https://img.shields.io/badge/python-3.12%2B-blue" alt="python"></a>
|
|
65
65
|
<a href=""><img src="https://img.shields.io/badge/runtime%20deps-5-brightgreen" alt="deps"></a>
|
|
66
66
|
<a href=""><img src="https://img.shields.io/badge/license-MIT-yellow" alt="license"></a>
|
|
@@ -100,7 +100,7 @@ Description-Content-Type: text/markdown
|
|
|
100
100
|
| **要るか判断する** | [要否判定ガイド](./docs/when-do-i-need-coderouter.md) | エージェント × モデルの詳細マトリクスで「そもそも自分に必要か」を決める |
|
|
101
101
|
| **詰まったとき** | [トラブルシューティング](./docs/troubleshooting.md) | `doctor` の使い方、`.env` の export 必須、Ollama サイレント失敗 5 症状、Claude Code 連携の罠 |
|
|
102
102
|
| **安全に使う** | [セキュリティ方針](./docs/security.md) | 脅威モデル・秘密情報の扱い・脆弱性報告経路 |
|
|
103
|
-
| **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v1.8.
|
|
103
|
+
| **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v1.8.3 — tool_calls probe も thinking 対応 + adapter で `reasoning_content` strip / llama.cpp 直叩き対応) |
|
|
104
104
|
| **設計を追う** | [plan.md](./plan.md) | 設計不変項・マイルストーン・今後のロードマップ |
|
|
105
105
|
|
|
106
106
|
English versions: [Quickstart](./docs/quickstart.en.md) · [Usage guide](./docs/usage-guide.en.md) · [Free-tier guide](./docs/free-tier-guide.en.md) · [When you need it](./docs/when-do-i-need-coderouter.en.md) · [Troubleshooting](./docs/troubleshooting.en.md) · [Security](./docs/security.en.md)
|
|
@@ -175,7 +175,7 @@ OpenAI 互換エージェント + お行儀の良いモデル + フォールバ
|
|
|
175
175
|
|
|
176
176
|
## クイックスタート(3 コマンド)
|
|
177
177
|
|
|
178
|
-
**v1.7.0 で PyPI 公開**、**v1.8.0 で用途別 4 プロファイル + Z.AI/GLM
|
|
178
|
+
**v1.7.0 で PyPI 公開**、**v1.8.0 で用途別 4 プロファイル + Z.AI/GLM 連携**を追加、**v1.8.2 で doctor probe を thinking モデル対応**にしました。`uvx` 一発で動きます (Python 3.12 以上必須):
|
|
179
179
|
|
|
180
180
|
```bash
|
|
181
181
|
# 1. サンプル設定を置く
|
|
@@ -205,7 +205,7 @@ uv run coderouter serve --port 8088
|
|
|
205
205
|
|
|
206
206
|
> **注**: PyPI 上のパッケージ名は `coderouter-cli` ですが、コマンド名と Python import 名は `coderouter` のままです。詳しくは [CHANGELOG `[v1.7.0]`](./CHANGELOG.md#v170--2026-04-25-pypi-公開-uvx-coderouter-cli-一発で動く) 参照。
|
|
207
207
|
>
|
|
208
|
-
>
|
|
208
|
+
> **`--apply` 自動化を使う場合** (v1.8.0+): `ruamel.yaml` を optional dep として一緒に入れます (`pip install 'coderouter-cli[doctor]'` または `uv pip install ruamel.yaml`)。基本機能には不要です。
|
|
209
209
|
|
|
210
210
|
あとは任意の OpenAI クライアントを `http://127.0.0.1:8088` に向けるだけです:
|
|
211
211
|
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
<p align="center">
|
|
21
21
|
<a href="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml"><img src="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml/badge.svg?branch=main" alt="CI"></a>
|
|
22
22
|
<a href=""><img src="https://img.shields.io/badge/status-stable-brightgreen" alt="status"></a>
|
|
23
|
-
<a href=""><img src="https://img.shields.io/badge/version-1.8.
|
|
23
|
+
<a href=""><img src="https://img.shields.io/badge/version-1.8.3-blue" alt="version"></a>
|
|
24
24
|
<a href=""><img src="https://img.shields.io/badge/python-3.12%2B-blue" alt="python"></a>
|
|
25
25
|
<a href=""><img src="https://img.shields.io/badge/runtime%20deps-5-brightgreen" alt="deps"></a>
|
|
26
26
|
<a href=""><img src="https://img.shields.io/badge/license-MIT-yellow" alt="license"></a>
|
|
@@ -59,7 +59,7 @@
|
|
|
59
59
|
| **Decide if you need it** | [Decision guide](./docs/when-do-i-need-coderouter.en.md) | Agent × model matrix to figure out whether CodeRouter fits your setup at all |
|
|
60
60
|
| **When stuck** | [Troubleshooting](./docs/troubleshooting.en.md) | How to use `doctor`, why `.env` needs `export`, the 5 Ollama silent-fail symptoms, Claude Code integration gotchas |
|
|
61
61
|
| **Operate safely** | [Security](./docs/security.en.md) | Threat model, secret handling, vulnerability reporting |
|
|
62
|
-
| **History** | [CHANGELOG](./CHANGELOG.md) | All releases (latest: v1.8.
|
|
62
|
+
| **History** | [CHANGELOG](./CHANGELOG.md) | All releases (latest: v1.8.3 — tool_calls probe also thinking-aware + adapter strips `reasoning_content` / llama.cpp direct backend supported) |
|
|
63
63
|
| **Track the design** | [plan.md](./plan.md) | Design invariants, milestones, roadmap |
|
|
64
64
|
|
|
65
65
|
日本語版: [Quickstart](./docs/quickstart.md) · [利用ガイド](./docs/usage-guide.md) · [無料枠ガイド](./docs/free-tier-guide.md) · [要否判定](./docs/when-do-i-need-coderouter.md) · [トラブルシューティング](./docs/troubleshooting.md) · [Security](./docs/security.md)
|
|
@@ -134,7 +134,7 @@ Design invariants and the roadmap are in [`plan.md`](./plan.md). Beginner-friend
|
|
|
134
134
|
|
|
135
135
|
## Quickstart (2 commands)
|
|
136
136
|
|
|
137
|
-
**v1.7.0 published to PyPI**, **v1.8.0 added use-case-aware 4 profiles + Z.AI/GLM integration**. `uvx` installs and runs in one shot (Python 3.12+ required):
|
|
137
|
+
**v1.7.0 published to PyPI**, **v1.8.0 added use-case-aware 4 profiles + Z.AI/GLM integration**, **v1.8.2 made the `doctor` probe thinking-model-aware**. `uvx` installs and runs in one shot (Python 3.12+ required):
|
|
138
138
|
|
|
139
139
|
```bash
|
|
140
140
|
# 1. Drop a sample config
|
|
@@ -164,7 +164,7 @@ uv run coderouter serve --port 8088
|
|
|
164
164
|
|
|
165
165
|
> **Note**: the PyPI distribution name is `coderouter-cli`, but the command and Python import name are both `coderouter`. See [CHANGELOG `[v1.7.0]`](./CHANGELOG.md#v170--2026-04-25-pypi-公開-uvx-coderouter-cli-一発で動く) for details.
|
|
166
166
|
>
|
|
167
|
-
> **For the v1.8.0
|
|
167
|
+
> **For the `--apply` automation** (v1.8.0+): install `ruamel.yaml` as the optional dependency (`pip install 'coderouter-cli[doctor]'` or `uv pip install ruamel.yaml`). Not required for the base feature set.
|
|
168
168
|
|
|
169
169
|
Then point any OpenAI client at `http://127.0.0.1:8088`:
|
|
170
170
|
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
<p align="center">
|
|
20
20
|
<a href="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml"><img src="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml/badge.svg?branch=main" alt="CI"></a>
|
|
21
21
|
<a href=""><img src="https://img.shields.io/badge/status-stable-brightgreen" alt="status"></a>
|
|
22
|
-
<a href=""><img src="https://img.shields.io/badge/version-1.8.
|
|
22
|
+
<a href=""><img src="https://img.shields.io/badge/version-1.8.3-blue" alt="version"></a>
|
|
23
23
|
<a href=""><img src="https://img.shields.io/badge/python-3.12%2B-blue" alt="python"></a>
|
|
24
24
|
<a href=""><img src="https://img.shields.io/badge/runtime%20deps-5-brightgreen" alt="deps"></a>
|
|
25
25
|
<a href=""><img src="https://img.shields.io/badge/license-MIT-yellow" alt="license"></a>
|
|
@@ -59,7 +59,7 @@
|
|
|
59
59
|
| **要るか判断する** | [要否判定ガイド](./docs/when-do-i-need-coderouter.md) | エージェント × モデルの詳細マトリクスで「そもそも自分に必要か」を決める |
|
|
60
60
|
| **詰まったとき** | [トラブルシューティング](./docs/troubleshooting.md) | `doctor` の使い方、`.env` の export 必須、Ollama サイレント失敗 5 症状、Claude Code 連携の罠 |
|
|
61
61
|
| **安全に使う** | [セキュリティ方針](./docs/security.md) | 脅威モデル・秘密情報の扱い・脆弱性報告経路 |
|
|
62
|
-
| **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v1.8.
|
|
62
|
+
| **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v1.8.3 — tool_calls probe も thinking 対応 + adapter で `reasoning_content` strip / llama.cpp 直叩き対応) |
|
|
63
63
|
| **設計を追う** | [plan.md](./plan.md) | 設計不変項・マイルストーン・今後のロードマップ |
|
|
64
64
|
|
|
65
65
|
English versions: [Quickstart](./docs/quickstart.en.md) · [Usage guide](./docs/usage-guide.en.md) · [Free-tier guide](./docs/free-tier-guide.en.md) · [When you need it](./docs/when-do-i-need-coderouter.en.md) · [Troubleshooting](./docs/troubleshooting.en.md) · [Security](./docs/security.en.md)
|
|
@@ -134,7 +134,7 @@ OpenAI 互換エージェント + お行儀の良いモデル + フォールバ
|
|
|
134
134
|
|
|
135
135
|
## クイックスタート(3 コマンド)
|
|
136
136
|
|
|
137
|
-
**v1.7.0 で PyPI 公開**、**v1.8.0 で用途別 4 プロファイル + Z.AI/GLM
|
|
137
|
+
**v1.7.0 で PyPI 公開**、**v1.8.0 で用途別 4 プロファイル + Z.AI/GLM 連携**を追加、**v1.8.2 で doctor probe を thinking モデル対応**にしました。`uvx` 一発で動きます (Python 3.12 以上必須):
|
|
138
138
|
|
|
139
139
|
```bash
|
|
140
140
|
# 1. サンプル設定を置く
|
|
@@ -164,7 +164,7 @@ uv run coderouter serve --port 8088
|
|
|
164
164
|
|
|
165
165
|
> **注**: PyPI 上のパッケージ名は `coderouter-cli` ですが、コマンド名と Python import 名は `coderouter` のままです。詳しくは [CHANGELOG `[v1.7.0]`](./CHANGELOG.md#v170--2026-04-25-pypi-公開-uvx-coderouter-cli-一発で動く) 参照。
|
|
166
166
|
>
|
|
167
|
-
>
|
|
167
|
+
> **`--apply` 自動化を使う場合** (v1.8.0+): `ruamel.yaml` を optional dep として一緒に入れます (`pip install 'coderouter-cli[doctor]'` または `uv pip install ruamel.yaml`)。基本機能には不要です。
|
|
168
168
|
|
|
169
169
|
あとは任意の OpenAI クライアントを `http://127.0.0.1:8088` に向けるだけです:
|
|
170
170
|
|
|
@@ -48,14 +48,25 @@ logger = get_logger(__name__)
|
|
|
48
48
|
_RETRYABLE_STATUSES = {404, 408, 425, 429, 500, 502, 503, 504}
|
|
49
49
|
|
|
50
50
|
|
|
51
|
+
# v1.8.3: non-standard reasoning fields emitted by various upstreams.
|
|
52
|
+
# Different runtimes use different field names for the same concept:
|
|
53
|
+
# * ``reasoning`` — OpenRouter free models (gpt-oss-120b:free
|
|
54
|
+
# confirmed 2026-04-20), Ollama
|
|
55
|
+
# * ``reasoning_content`` — llama.cpp ``llama-server`` (Qwen3.6 etc.,
|
|
56
|
+
# confirmed 2026-04-26 with Unsloth GGUF)
|
|
57
|
+
# Strict OpenAI clients reject either as an unknown key. The strip
|
|
58
|
+
# function below removes both at the adapter boundary so downstream
|
|
59
|
+
# layers never see them, regardless of which runtime fronts the model.
|
|
60
|
+
_NON_STANDARD_REASONING_KEYS = ("reasoning", "reasoning_content")
|
|
61
|
+
|
|
62
|
+
|
|
51
63
|
def _strip_reasoning_field(choices: list[dict[str, Any]] | None, *, delta_key: bool) -> bool:
|
|
52
|
-
"""Remove non-standard
|
|
64
|
+
"""Remove non-standard reasoning keys from a choices list, in place.
|
|
53
65
|
|
|
54
|
-
v0.5-C
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
We strip it at the adapter boundary so downstream layers never see it.
|
|
66
|
+
v0.5-C originally targeted OpenRouter's ``reasoning`` field. v1.8.3
|
|
67
|
+
extends the strip to ``reasoning_content`` (llama.cpp ``llama-server``
|
|
68
|
+
naming) since both denote the same hidden chain-of-thought trace and
|
|
69
|
+
neither is part of the OpenAI Chat Completions spec.
|
|
59
70
|
|
|
60
71
|
Args:
|
|
61
72
|
choices: The ``choices`` list from the response body or stream chunk.
|
|
@@ -64,7 +75,7 @@ def _strip_reasoning_field(choices: list[dict[str, Any]] | None, *, delta_key: b
|
|
|
64
75
|
``False`` for non-streaming responses (look in ``choice["message"]``).
|
|
65
76
|
|
|
66
77
|
Returns:
|
|
67
|
-
True iff at least one
|
|
78
|
+
True iff at least one reasoning key was removed. Callers use
|
|
68
79
|
this to decide whether to emit a one-shot log line.
|
|
69
80
|
"""
|
|
70
81
|
if not choices:
|
|
@@ -75,9 +86,12 @@ def _strip_reasoning_field(choices: list[dict[str, Any]] | None, *, delta_key: b
|
|
|
75
86
|
if not isinstance(choice, dict):
|
|
76
87
|
continue
|
|
77
88
|
inner = choice.get(inner_key)
|
|
78
|
-
if isinstance(inner, dict)
|
|
79
|
-
|
|
80
|
-
|
|
89
|
+
if not isinstance(inner, dict):
|
|
90
|
+
continue
|
|
91
|
+
for key in _NON_STANDARD_REASONING_KEYS:
|
|
92
|
+
if key in inner:
|
|
93
|
+
inner.pop(key, None)
|
|
94
|
+
stripped = True
|
|
81
95
|
return stripped
|
|
82
96
|
|
|
83
97
|
|
|
@@ -235,15 +249,17 @@ class OpenAICompatAdapter(BaseAdapter):
|
|
|
235
249
|
retryable=False,
|
|
236
250
|
) from exc
|
|
237
251
|
|
|
238
|
-
# v0.5-C: passive strip of non-standard
|
|
239
|
-
#
|
|
252
|
+
# v0.5-C / v1.8.3: passive strip of non-standard reasoning fields
|
|
253
|
+
# on choices (covers both Ollama/OpenRouter ``reasoning`` and
|
|
254
|
+
# llama.cpp ``reasoning_content``). No-op when the provider opted
|
|
255
|
+
# into passthrough.
|
|
240
256
|
if not self.config.capabilities.reasoning_passthrough and _strip_reasoning_field(
|
|
241
257
|
data.get("choices"), delta_key=False
|
|
242
258
|
):
|
|
243
259
|
log_capability_degraded(
|
|
244
260
|
logger,
|
|
245
261
|
provider=self.name,
|
|
246
|
-
dropped=
|
|
262
|
+
dropped=list(_NON_STANDARD_REASONING_KEYS),
|
|
247
263
|
reason="non-standard-field",
|
|
248
264
|
)
|
|
249
265
|
|
|
@@ -344,7 +360,7 @@ class OpenAICompatAdapter(BaseAdapter):
|
|
|
344
360
|
log_capability_degraded(
|
|
345
361
|
logger,
|
|
346
362
|
provider=self.name,
|
|
347
|
-
dropped=
|
|
363
|
+
dropped=list(_NON_STANDARD_REASONING_KEYS),
|
|
348
364
|
reason="non-standard-field",
|
|
349
365
|
)
|
|
350
366
|
reasoning_logged = True
|
|
@@ -176,47 +176,59 @@ rules:
|
|
|
176
176
|
# 「Claude Code 代替として最高」「local champ」と評価されている。
|
|
177
177
|
#
|
|
178
178
|
# ただし v1.8.0 までで `claude_code_suitability: ok` を declare していた
|
|
179
|
-
# のは note 記事の伝聞ベースの先回り宣言で、v1.8.1
|
|
180
|
-
#
|
|
181
|
-
# - num_ctx
|
|
182
|
-
#
|
|
179
|
+
# のは note 記事の伝聞ベースの先回り宣言で、v1.8.1 〜 v1.8.2
|
|
180
|
+
# (2026-04-26) の実機検証 (M3 Max 64GB / Ollama 0.21.2) で:
|
|
181
|
+
# - num_ctx と streaming の NEEDS_TUNING は v1.8.2 で thinking モデル
|
|
182
|
+
# 用 probe バジェット拡大により偽陽性と判明 (doctor 側の課題)
|
|
183
183
|
# - tool_calls probe が native tool_calls / 修復可能 JSON のいずれも
|
|
184
|
-
#
|
|
185
|
-
#
|
|
186
|
-
# これらは Ollama 経由特有の問題で、HF / vLLM 直接ロードなら違う可能性。
|
|
187
|
-
# 確証ない以上、`claude_code_suitability` は撤回し `tools` 宣言だけ残す。
|
|
184
|
+
# 返さない真の課題が残る (Qwen3.6 系の Ollama 経由 tool 仕様未成熟)
|
|
185
|
+
# tool_calls 不全が解消されるまで `claude_code_suitability` は撤回。
|
|
188
186
|
# 実機で動いたユーザーは `~/.coderouter/model-capabilities.yaml` で
|
|
189
187
|
# `claude_code_suitability: ok` を上書きできる。
|
|
190
188
|
# ------------------------------------------------------------------
|
|
191
189
|
|
|
190
|
+
# v1.8.2: thinking: true は doctor probe (num_ctx / streaming) が reasoning
|
|
191
|
+
# トークン消費分の max_tokens 余裕を確保するためのヒント。Qwen3 系は
|
|
192
|
+
# /think モードで thinking トークンを吐く設計なので true 宣言。
|
|
192
193
|
- match: "qwen3.6:*"
|
|
193
194
|
kind: openai_compat
|
|
194
195
|
capabilities:
|
|
195
196
|
tools: true
|
|
197
|
+
thinking: true
|
|
196
198
|
|
|
197
199
|
- match: "qwen/qwen3.6-*"
|
|
198
200
|
kind: openai_compat
|
|
199
201
|
capabilities:
|
|
200
202
|
tools: true
|
|
203
|
+
thinking: true
|
|
201
204
|
|
|
202
205
|
# ------------------------------------------------------------------
|
|
203
|
-
# Gemma 4 family (v1.7-B
|
|
206
|
+
# Gemma 4 family (v1.7-B 追加、v1.8.2 で thinking: true 宣言)
|
|
204
207
|
#
|
|
205
208
|
# Google 公式 Gemma 4。Ollama 公式 tag は gemma4:e2b / e4b / 26b / 31b、
|
|
206
209
|
# 全 variant が tools+vision+thinking 対応、E2B/E4B は audio もサポート。
|
|
207
210
|
# MoE (26b は active 3.8B / total 25.2B)。note 記事で「日常・バランスの
|
|
208
211
|
# 王者」と評価。Claude Haiku 互換性に近い簡潔な応答スタイル。
|
|
212
|
+
#
|
|
213
|
+
# v1.8.2 (2026-04-26): 実機検証 (M3 Max 64GB / Ollama 0.21.2 / gemma4:26b)
|
|
214
|
+
# で `reasoning` フィールドにかなりの量のトークンを吐く thinking モデル
|
|
215
|
+
# と確認。doctor probe の max_tokens=32 / 128 が thinking トークンに
|
|
216
|
+
# 食い切られて偽陽性 NEEDS_TUNING を出していた。registry で
|
|
217
|
+
# `thinking: true` を宣言すると doctor が probe バジェットを 1024 まで
|
|
218
|
+
# 引き上げて偽陽性を回避する。
|
|
209
219
|
# ------------------------------------------------------------------
|
|
210
220
|
|
|
211
221
|
- match: "gemma4:*"
|
|
212
222
|
kind: openai_compat
|
|
213
223
|
capabilities:
|
|
214
224
|
tools: true
|
|
225
|
+
thinking: true
|
|
215
226
|
|
|
216
227
|
- match: "google/gemma-4*"
|
|
217
228
|
kind: openai_compat
|
|
218
229
|
capabilities:
|
|
219
230
|
tools: true
|
|
231
|
+
thinking: true
|
|
220
232
|
|
|
221
233
|
# ------------------------------------------------------------------
|
|
222
234
|
# GLM family (Z.AI / Zhipu AI、v1.7-B 追加)
|
|
@@ -433,6 +433,43 @@ _STREAMING_PROBE_USER_PROMPT = (
|
|
|
433
433
|
# truncated". "1\n2\n...\n30" is ~80 chars; 40 chars covers the halfway
|
|
434
434
|
# mark (1..20) which is already obviously-truncated territory.
|
|
435
435
|
_STREAMING_PROBE_MIN_EXPECTED_CHARS = 40
|
|
436
|
+
|
|
437
|
+
# v1.8.2: probe response budgets.
|
|
438
|
+
#
|
|
439
|
+
# Both num_ctx and streaming probes ask the model for a *short* answer
|
|
440
|
+
# (the canary token / "1..30"). The original budgets (32 / 128 tokens)
|
|
441
|
+
# assumed a non-thinking model that emits the answer immediately. On a
|
|
442
|
+
# thinking model — Gemma 4 26B, Qwen3.6, gpt-oss, deepseek-r1 — the
|
|
443
|
+
# upstream burns the entire budget on a hidden ``reasoning`` field
|
|
444
|
+
# *before* emitting any visible ``content``, producing a false-positive
|
|
445
|
+
# NEEDS_TUNING (canary missing / 0 chars streamed). Bumping the budget
|
|
446
|
+
# is the cleanest fix: non-thinking models stop early at their natural
|
|
447
|
+
# stop token (no waste), thinking models get headroom for the reasoning
|
|
448
|
+
# trace plus the actual answer.
|
|
449
|
+
#
|
|
450
|
+
# Numbers picked from the v1.8.1 reality-check session
|
|
451
|
+
# (docs/articles/note-v1-8-1-reality-check.md):
|
|
452
|
+
# * Gemma 4 26B reasoning prefix observed at ~150-300 tokens before
|
|
453
|
+
# content starts → 1024 covers reasoning + 30-line count comfortably.
|
|
454
|
+
# * Non-thinking baseline kept conservative-but-non-tight (256/512) to
|
|
455
|
+
# absorb stylistic preambles ("Sure, the answer is...") without
|
|
456
|
+
# burning extra cloud quota when the operator probes a paid endpoint.
|
|
457
|
+
_NUM_CTX_PROBE_MAX_TOKENS_DEFAULT = 256
|
|
458
|
+
_NUM_CTX_PROBE_MAX_TOKENS_THINKING = 1024
|
|
459
|
+
_STREAMING_PROBE_MAX_TOKENS_DEFAULT = 512
|
|
460
|
+
_STREAMING_PROBE_MAX_TOKENS_THINKING = 1024
|
|
461
|
+
# v1.8.3: tool_calls probe also needs thinking-aware budget. The
|
|
462
|
+
# pre-v1.8.3 default of 64 was tight even for non-thinking models
|
|
463
|
+
# (the assistant often emits a brief preamble before the JSON tool
|
|
464
|
+
# call), and on thinking models (Qwen3.6, Gemma 4, gpt-oss, deepseek-r1)
|
|
465
|
+
# the entire 64-token budget gets consumed by ``reasoning_content``
|
|
466
|
+
# before any ``tool_calls`` can surface — producing a false-positive
|
|
467
|
+
# NEEDS_TUNING with the WRONG remediation (suggested patch flips
|
|
468
|
+
# ``tools`` to false even though the model supports them perfectly).
|
|
469
|
+
# 256/1024 brings the budget into line with the num_ctx / streaming
|
|
470
|
+
# probes (same _is_reasoning_model gate).
|
|
471
|
+
_TOOL_CALLS_PROBE_MAX_TOKENS_DEFAULT = 256
|
|
472
|
+
_TOOL_CALLS_PROBE_MAX_TOKENS_THINKING = 1024
|
|
436
473
|
# Default ``num_predict`` suggested in the emitted patch. -1 would be
|
|
437
474
|
# optimal (uncapped) but "4096" communicates intent more clearly to
|
|
438
475
|
# operators unfamiliar with Ollama's sentinel value, and covers Claude
|
|
@@ -475,6 +512,40 @@ def _declared_num_ctx(provider: ProviderConfig) -> int | None:
|
|
|
475
512
|
return val if isinstance(val, int) else None
|
|
476
513
|
|
|
477
514
|
|
|
515
|
+
def _is_reasoning_model(
|
|
516
|
+
provider: ProviderConfig, resolved: ResolvedCapabilities
|
|
517
|
+
) -> bool:
|
|
518
|
+
"""v1.8.2: True iff the model is known to emit a hidden reasoning trace.
|
|
519
|
+
|
|
520
|
+
Thinking models (Gemma 4, Qwen3-with-/think, gpt-oss, deepseek-r1,
|
|
521
|
+
Claude Sonnet 4.5+ in extended-thinking mode) burn output tokens on a
|
|
522
|
+
``reasoning`` field before any visible ``content`` is produced. The
|
|
523
|
+
num_ctx / streaming probes use small response budgets that get fully
|
|
524
|
+
consumed by the reasoning prefix, producing a false-positive
|
|
525
|
+
NEEDS_TUNING. Callers use this to choose a generous probe budget.
|
|
526
|
+
|
|
527
|
+
Three signals fire:
|
|
528
|
+
* provider declared ``capabilities.thinking: true`` in providers.yaml
|
|
529
|
+
* provider declared ``capabilities.reasoning_passthrough: true``
|
|
530
|
+
(the operator opted in to passing the raw reasoning to the client,
|
|
531
|
+
which is only meaningful for models that emit it)
|
|
532
|
+
* registry resolved ``thinking: true`` for this (kind, model) pair
|
|
533
|
+
|
|
534
|
+
Conservative bias — when both provider declaration and registry are
|
|
535
|
+
silent, treat as non-reasoning. The probe still completes for thinking
|
|
536
|
+
models in that case (they just hit ``finish_reason='length'`` like
|
|
537
|
+
they did pre-v1.8.2), but at least the new generous default budget
|
|
538
|
+
(256 / 512) gives more headroom than the old 32 / 128.
|
|
539
|
+
"""
|
|
540
|
+
if provider.capabilities.thinking is True:
|
|
541
|
+
return True
|
|
542
|
+
if provider.capabilities.reasoning_passthrough is True:
|
|
543
|
+
return True
|
|
544
|
+
if resolved.thinking is True:
|
|
545
|
+
return True
|
|
546
|
+
return resolved.reasoning_passthrough is True
|
|
547
|
+
|
|
548
|
+
|
|
478
549
|
_PROBE_BASIC_USER_PROMPT = "Reply with exactly the single word: PONG"
|
|
479
550
|
_PROBE_TOOLS_USER_PROMPT = (
|
|
480
551
|
"You have one tool named `echo`. Call it with the argument "
|
|
@@ -617,7 +688,9 @@ def _extract_openai_assistant_choice(
|
|
|
617
688
|
return msg if isinstance(msg, dict) else None
|
|
618
689
|
|
|
619
690
|
|
|
620
|
-
async def _probe_num_ctx(
|
|
691
|
+
async def _probe_num_ctx(
|
|
692
|
+
provider: ProviderConfig, resolved: ResolvedCapabilities
|
|
693
|
+
) -> ProbeResult:
|
|
621
694
|
"""v1.0-B Probe — direct detection of Ollama ``num_ctx`` truncation.
|
|
622
695
|
|
|
623
696
|
Addresses plan.md §9.4 symptom #1 (空応答 / 意味不明応答). Prior to
|
|
@@ -683,11 +756,21 @@ async def _probe_num_ctx(provider: ProviderConfig) -> ProbeResult:
|
|
|
683
756
|
# whatever ``options.num_ctx`` the operator has declared. Request
|
|
684
757
|
# fields win over extra_body, matching the adapter's merge order.
|
|
685
758
|
body: dict[str, Any] = dict(provider.extra_body)
|
|
759
|
+
# v1.8.2: thinking models burn output tokens on a hidden ``reasoning``
|
|
760
|
+
# trace before emitting any ``content``. The pre-v1.8.2 default of 32
|
|
761
|
+
# was tight for any preamble at all; on Gemma 4 26B it caused
|
|
762
|
+
# ``finish_reason='length'`` with content="" before the canary could
|
|
763
|
+
# surface, producing a false-positive NEEDS_TUNING.
|
|
764
|
+
max_tokens = (
|
|
765
|
+
_NUM_CTX_PROBE_MAX_TOKENS_THINKING
|
|
766
|
+
if _is_reasoning_model(provider, resolved)
|
|
767
|
+
else _NUM_CTX_PROBE_MAX_TOKENS_DEFAULT
|
|
768
|
+
)
|
|
686
769
|
body.update(
|
|
687
770
|
{
|
|
688
771
|
"model": provider.model,
|
|
689
772
|
"messages": [{"role": "user", "content": user_prompt}],
|
|
690
|
-
"max_tokens":
|
|
773
|
+
"max_tokens": max_tokens,
|
|
691
774
|
"temperature": 0,
|
|
692
775
|
}
|
|
693
776
|
)
|
|
@@ -799,7 +882,9 @@ async def _probe_num_ctx(provider: ProviderConfig) -> ProbeResult:
|
|
|
799
882
|
)
|
|
800
883
|
|
|
801
884
|
|
|
802
|
-
async def _probe_streaming(
|
|
885
|
+
async def _probe_streaming(
|
|
886
|
+
provider: ProviderConfig, resolved: ResolvedCapabilities
|
|
887
|
+
) -> ProbeResult:
|
|
803
888
|
"""v1.0-C Probe — streaming completion path integrity.
|
|
804
889
|
|
|
805
890
|
Addresses plan.md §9.4 symptom #1 from the **output** side. The v1.0-B
|
|
@@ -868,11 +953,18 @@ async def _probe_streaming(provider: ProviderConfig) -> ProbeResult:
|
|
|
868
953
|
# probing. Top-level probe fields win on collision, matching adapter
|
|
869
954
|
# merge order.
|
|
870
955
|
body: dict[str, Any] = dict(provider.extra_body)
|
|
956
|
+
# v1.8.2: same thinking-model rationale as num_ctx probe — give
|
|
957
|
+
# reasoning a budget so the visible content has a chance to surface.
|
|
958
|
+
max_tokens = (
|
|
959
|
+
_STREAMING_PROBE_MAX_TOKENS_THINKING
|
|
960
|
+
if _is_reasoning_model(provider, resolved)
|
|
961
|
+
else _STREAMING_PROBE_MAX_TOKENS_DEFAULT
|
|
962
|
+
)
|
|
871
963
|
body.update(
|
|
872
964
|
{
|
|
873
965
|
"model": provider.model,
|
|
874
966
|
"messages": [{"role": "user", "content": _STREAMING_PROBE_USER_PROMPT}],
|
|
875
|
-
"max_tokens":
|
|
967
|
+
"max_tokens": max_tokens,
|
|
876
968
|
"temperature": 0,
|
|
877
969
|
"stream": True,
|
|
878
970
|
}
|
|
@@ -1011,6 +1103,16 @@ async def _probe_tool_calls(
|
|
|
1011
1103
|
If declaration says True → NEEDS_TUNING (flip to False). If
|
|
1012
1104
|
False → OK.
|
|
1013
1105
|
"""
|
|
1106
|
+
# v1.8.3: thinking-aware budget — the pre-v1.8.3 default of 64 was
|
|
1107
|
+
# consumed by ``reasoning_content`` on thinking models (Qwen3.6,
|
|
1108
|
+
# Gemma 4, gpt-oss, deepseek-r1) before any ``tool_calls`` could
|
|
1109
|
+
# surface, producing a false-positive NEEDS_TUNING that recommended
|
|
1110
|
+
# flipping ``tools`` to false — the exact opposite of what's needed.
|
|
1111
|
+
max_tokens = (
|
|
1112
|
+
_TOOL_CALLS_PROBE_MAX_TOKENS_THINKING
|
|
1113
|
+
if _is_reasoning_model(provider, resolved)
|
|
1114
|
+
else _TOOL_CALLS_PROBE_MAX_TOKENS_DEFAULT
|
|
1115
|
+
)
|
|
1014
1116
|
if provider.kind == "anthropic":
|
|
1015
1117
|
# Anthropic native tools use a different wire shape; we probe
|
|
1016
1118
|
# via the messages API. A capable model returns content blocks
|
|
@@ -1022,7 +1124,7 @@ async def _probe_tool_calls(
|
|
|
1022
1124
|
"messages": [
|
|
1023
1125
|
{"role": "user", "content": _PROBE_TOOLS_USER_PROMPT},
|
|
1024
1126
|
],
|
|
1025
|
-
"max_tokens":
|
|
1127
|
+
"max_tokens": max_tokens,
|
|
1026
1128
|
"tools": [_PROBE_TOOL_SPEC_ANTHROPIC],
|
|
1027
1129
|
}
|
|
1028
1130
|
else:
|
|
@@ -1033,7 +1135,7 @@ async def _probe_tool_calls(
|
|
|
1033
1135
|
"messages": [
|
|
1034
1136
|
{"role": "user", "content": _PROBE_TOOLS_USER_PROMPT},
|
|
1035
1137
|
],
|
|
1036
|
-
"max_tokens":
|
|
1138
|
+
"max_tokens": max_tokens,
|
|
1037
1139
|
"temperature": 0,
|
|
1038
1140
|
"tools": [_PROBE_TOOL_SPEC_OPENAI],
|
|
1039
1141
|
}
|
|
@@ -1357,7 +1459,12 @@ async def _probe_reasoning_leak(
|
|
|
1357
1459
|
)
|
|
1358
1460
|
|
|
1359
1461
|
msg = _extract_openai_assistant_choice(parsed)
|
|
1360
|
-
|
|
1462
|
+
# v1.8.3: detect llama.cpp's ``reasoning_content`` alongside Ollama /
|
|
1463
|
+
# OpenRouter's ``reasoning`` — they're the same concept under different
|
|
1464
|
+
# field names, and the openai_compat adapter strips both since v1.8.3.
|
|
1465
|
+
has_reasoning = bool(
|
|
1466
|
+
msg and ("reasoning" in msg or "reasoning_content" in msg)
|
|
1467
|
+
)
|
|
1361
1468
|
|
|
1362
1469
|
# v1.0-A: content-embedded marker detection.
|
|
1363
1470
|
content = (msg.get("content") if isinstance(msg, dict) else None) or ""
|
|
@@ -1506,11 +1613,11 @@ async def check_model(
|
|
|
1506
1613
|
# declaration probes (tool_calls / thinking / reasoning-leak) should
|
|
1507
1614
|
# dominate the report — streaming is the output-side sibling of
|
|
1508
1615
|
# num_ctx and its NEEDS_TUNING verdict is orthogonal to the others.
|
|
1509
|
-
report.results.append(await _probe_num_ctx(provider))
|
|
1616
|
+
report.results.append(await _probe_num_ctx(provider, resolved))
|
|
1510
1617
|
report.results.append(await _probe_tool_calls(provider, resolved))
|
|
1511
1618
|
report.results.append(await _probe_thinking(provider, resolved))
|
|
1512
1619
|
report.results.append(await _probe_reasoning_leak(provider, resolved))
|
|
1513
|
-
report.results.append(await _probe_streaming(provider))
|
|
1620
|
+
report.results.append(await _probe_streaming(provider, resolved))
|
|
1514
1621
|
return report
|
|
1515
1622
|
|
|
1516
1623
|
|