coderouter-cli 1.8.2__tar.gz → 1.8.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/CHANGELOG.md +66 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/PKG-INFO +5 -5
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/README.en.md +4 -4
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/README.md +4 -4
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/adapters/openai_compat.py +30 -14
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/doctor.py +31 -6
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/troubleshooting.en.md +24 -3
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/troubleshooting.md +40 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/pyproject.toml +1 -1
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_doctor.py +78 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_reasoning_strip.py +72 -2
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/.gitignore +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/LICENSE +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/__init__.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/__main__.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/adapters/__init__.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/adapters/anthropic_native.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/adapters/base.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/adapters/registry.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/cli.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/cli_stats.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/config/__init__.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/config/capability_registry.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/config/env_file.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/config/loader.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/config/schemas.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/data/__init__.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/data/model-capabilities.yaml +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/doctor_apply.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/env_security.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/errors.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/ingress/__init__.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/ingress/anthropic_routes.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/ingress/app.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/ingress/dashboard_routes.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/ingress/metrics_routes.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/ingress/openai_routes.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/logging.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/metrics/__init__.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/metrics/collector.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/metrics/prometheus.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/output_filters.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/routing/__init__.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/routing/auto_router.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/routing/capability.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/routing/fallback.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/translation/__init__.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/translation/anthropic.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/translation/convert.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/coderouter/translation/tool_repair.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/assets/dashboard-demo.png +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/designs/v1.5-dashboard-mockup.html +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/designs/v1.6-auto-router-verification.md +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/designs/v1.6-auto-router.md +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/free-tier-guide.en.md +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/free-tier-guide.md +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/hf-ollama-models.md +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/openrouter-roster/README.md +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/openrouter-roster/latest.json +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/quickstart.en.md +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/quickstart.md +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/retrospectives/v0.4.md +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/retrospectives/v0.5-verify.md +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/retrospectives/v0.5.md +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/retrospectives/v0.6.md +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/retrospectives/v0.7.md +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/retrospectives/v1.0-verify.md +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/retrospectives/v1.0.md +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/security.en.md +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/security.md +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/usage-guide.en.md +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/usage-guide.md +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/when-do-i-need-coderouter.en.md +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/docs/when-do-i-need-coderouter.md +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/examples/.env.example +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/examples/providers.auto-custom.yaml +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/examples/providers.auto.yaml +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/examples/providers.note-2026.yaml +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/examples/providers.nvidia-nim.yaml +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/examples/providers.yaml +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/scripts/demo_traffic.sh +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/scripts/openrouter_roster_diff.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/scripts/verify_v0_5.sh +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/scripts/verify_v1_0.sh +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/__init__.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/conftest.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_adapter_anthropic.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_auto_router.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_capability.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_capability_degraded_payload.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_capability_registry.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_claude_code_suitability.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_cli.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_cli_stats.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_config.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_dashboard_endpoint.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_doctor_apply.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_env_file.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_env_security.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_errors.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_examples_yaml.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_fallback.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_fallback_anthropic.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_fallback_cache_control.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_fallback_misconfig_warn.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_fallback_paid_gate.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_fallback_thinking.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_ingress_anthropic.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_ingress_profile.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_metrics_collector.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_metrics_endpoint.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_metrics_jsonl.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_metrics_prometheus.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_openai_compat.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_openrouter_roster_diff.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_output_filters.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_output_filters_adapters.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_setup_sh.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_tool_repair.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_translation_anthropic.py +0 -0
- {coderouter_cli-1.8.2 → coderouter_cli-1.8.3}/tests/test_translation_reverse.py +0 -0
|
@@ -6,6 +6,72 @@ versioning follows [SemVer](https://semver.org/).
|
|
|
6
6
|
|
|
7
7
|
---
|
|
8
8
|
|
|
9
|
+
## [v1.8.3] — 2026-04-26 (tool_calls probe も thinking モデル対応 + adapter で `reasoning_content` strip — llama.cpp 直叩き対応)
|
|
10
|
+
|
|
11
|
+
**Theme: v1.8.2 と同日リリースの第 2 弾 patch。Qwen3.6:35b-a3b on llama.cpp の実機検証で発見した 2 つの追加課題 — `tool_calls` probe の thinking モデル偽陽性 + llama.cpp が emit する `reasoning_content` フィールドの adapter strip 不足 — を解消。**
|
|
12
|
+
|
|
13
|
+
v1.8.2 リリース直後、note 記事 v1.8.2「自分が作った診断ツールに自分が騙された話」の続編として **「Ollama 経由で詰んだ Qwen3.6 を Unsloth GGUF + llama.cpp 直叩きで動かしたら native tool_calls が完璧に出た」** を実機検証中、CodeRouter doctor で `tool_calls [NEEDS TUNING]` が依然として出る矛盾に直面。深掘りで `tool_calls` probe の `max_tokens=64` が thinking モデルで `reasoning_content` トークン消費に食い切られる **v1.8.2 で num_ctx / streaming に対して直したのと完全に同じバグ pattern が tool_calls probe にも残っていた** ことが判明。あわせて llama.cpp の `reasoning_content` フィールド (Ollama / OpenRouter は `reasoning`) が openai_compat adapter の strip 対象に入っていなかった事実も発見。両者を v1.8.3 として 1 patch に統合。
|
|
14
|
+
|
|
15
|
+
**Ollama 経由詰みの真因が完全確定**: Ollama の chat template / tool 仕様未成熟、モデル本体は健全。llama.cpp 直叩きでは Qwen3.6 系の `tool_calls` が native で動作。
|
|
16
|
+
|
|
17
|
+
- Tests: 733 → **737** (+4: tool_calls probe budget thinking variant / reasoning_content strip 3 件)
|
|
18
|
+
- Runtime deps: 5 → 5 (21 sub-release 連続据え置き)
|
|
19
|
+
- Backward compat: 完全互換、`providers.yaml` / `~/.coderouter/model-capabilities.yaml` 編集不要
|
|
20
|
+
|
|
21
|
+
### Changes
|
|
22
|
+
|
|
23
|
+
#### Doctor `tool_calls` probe: thinking モデル対応バジェット
|
|
24
|
+
|
|
25
|
+
- **`coderouter/doctor.py`**: `_probe_tool_calls` の `max_tokens` を `64` 固定から **thinking 検出付きの動的選択** (256 default / 1024 thinking) に変更。`_TOOL_CALLS_PROBE_MAX_TOKENS_DEFAULT/_THINKING` 定数を新設、既存の `_is_reasoning_model(provider, resolved)` ヘルパで分岐。
|
|
26
|
+
- 旧 64 では Qwen3.6:35b-a3b on llama.cpp が `reasoning_content` で 64 token 食い切り → `tool_calls` 出力前に length cap → **NEEDS_TUNING + suggested patch 「`tools: false` にしろ」という真逆の推奨** を出していた
|
|
27
|
+
- 新 1024 で thinking + tool_call が両方収まる headroom
|
|
28
|
+
|
|
29
|
+
#### Adapter: `reasoning_content` フィールド strip 追加
|
|
30
|
+
|
|
31
|
+
- **`coderouter/adapters/openai_compat.py`**: `_strip_reasoning_field` を `_NON_STANDARD_REASONING_KEYS = ("reasoning", "reasoning_content")` の両方を strip するように拡張。
|
|
32
|
+
- `reasoning` (Ollama / OpenRouter 命名) と `reasoning_content` (llama.cpp `llama-server` 命名) は同じ概念で、ベンダー命名が違うだけ
|
|
33
|
+
- 厳格な OpenAI client はどちらも unknown key として reject するので、両方 strip するのが正しい
|
|
34
|
+
- `capability-degraded` log の `dropped` フィールドも `["reasoning", "reasoning_content"]` に更新 (両方 strip し得ることを表現)
|
|
35
|
+
|
|
36
|
+
#### Doctor `reasoning-leak` probe: `reasoning_content` 検出
|
|
37
|
+
|
|
38
|
+
- **`coderouter/doctor.py`**: `_probe_reasoning_leak` の `has_reasoning` 判定を `"reasoning" in msg or "reasoning_content" in msg` に拡張。llama.cpp 経由 provider でも reasoning leak を informational に検出可能に。
|
|
39
|
+
|
|
40
|
+
#### Tests
|
|
41
|
+
|
|
42
|
+
- **`tests/test_doctor.py`** + 1: `test_tool_calls_max_tokens_bumped_for_thinking_provider` (thinking provider で tool_calls probe が 1024 を要求、native tool_calls 応答で OK 判定)
|
|
43
|
+
- **`tests/test_reasoning_strip.py`** + 3: `test_strip_helper_removes_reasoning_content_field` / `test_strip_helper_removes_both_reasoning_and_reasoning_content` / `test_strip_helper_removes_reasoning_content_from_delta` (各 layer で `reasoning_content` 除去確認)
|
|
44
|
+
- 既存 `tests/test_reasoning_strip.py` の `recs[0].dropped == ["reasoning"]` を `["reasoning", "reasoning_content"]` に更新 (log の表現変更に追従)
|
|
45
|
+
|
|
46
|
+
### Why
|
|
47
|
+
|
|
48
|
+
v1.8.2 で「diagnostic ツール自身も diagnostic され続ける必要がある」というメタ教訓を書いた直後、まさにそのことを実証する形で残バグが発見された。`tool_calls` probe は num_ctx / streaming probe と同じ「thinking モデルの reasoning トークン消費を考慮していない `max_tokens=64`」問題を抱えていて、しかも doctor の出した suggested patch (`tools: false` に倒せ) は **完全に逆の対処を勧めていた** — false-positive どころか、誠実なユーザーが従うと healthy なモデルを抑制してしまう **active-harmful な誤診断**。
|
|
49
|
+
|
|
50
|
+
これは v1.8.2 の patch を当てる時点で見つけるべきだった見落としで、note 記事 v1.8.2 のメタ教訓「diagnostic ツール自身も diagnostic され続ける」が現実に試された格好。素早く v1.8.3 で潰す。
|
|
51
|
+
|
|
52
|
+
`reasoning_content` strip 追加は llama.cpp 直叩き経路を CodeRouter から綺麗に使えるようにする ergonomic 改善で、`v1.8.x` patch 候補で plan.md に記録済みだった項目を実機発見と同時に消化。
|
|
53
|
+
|
|
54
|
+
### Migration
|
|
55
|
+
|
|
56
|
+
`pyproject.toml version 1.8.2 → 1.8.3`、`coderouter --version` は 1.8.3 を返す。**手元の `~/.coderouter/providers.yaml` は触らない限り完全に変化なし**。
|
|
57
|
+
|
|
58
|
+
v1.8.2 で Qwen3.6 / Gemma 4 系 thinking provider に対して `tool_calls [NEEDS TUNING]` が出ていたユーザーは v1.8.3 で再実行すると **OK** 判定 (実機で動いていた provider が doctor 上でも妥当に評価される)。llama.cpp 直叩き provider を使っているユーザーは `reasoning_content` が client に流れることなく綺麗に strip される。
|
|
59
|
+
|
|
60
|
+
### Files touched
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
M CHANGELOG.md
|
|
64
|
+
M coderouter/adapters/openai_compat.py
|
|
65
|
+
M coderouter/doctor.py
|
|
66
|
+
M pyproject.toml
|
|
67
|
+
M plan.md
|
|
68
|
+
M docs/troubleshooting.md
|
|
69
|
+
M tests/test_doctor.py
|
|
70
|
+
M tests/test_reasoning_strip.py
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
9
75
|
## [v1.8.2] — 2026-04-26 (doctor probe を thinking モデル対応に — Gemma 4 偽陽性の解消)
|
|
10
76
|
|
|
11
77
|
**Theme: v1.8.1 リリース直後の深掘りで `doctor` の `num_ctx` / `streaming` probe が thinking モデルに対して偽陽性 NEEDS_TUNING を出していた事実を発見、probe の `max_tokens` バジェットを reasoning トークン消費分込みで設計し直した patch。**
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coderouter-cli
|
|
3
|
-
Version: 1.8.
|
|
3
|
+
Version: 1.8.3
|
|
4
4
|
Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
|
|
5
5
|
Project-URL: Homepage, https://github.com/zephel01/CodeRouter
|
|
6
6
|
Project-URL: Repository, https://github.com/zephel01/CodeRouter
|
|
@@ -60,7 +60,7 @@ Description-Content-Type: text/markdown
|
|
|
60
60
|
<p align="center">
|
|
61
61
|
<a href="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml"><img src="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml/badge.svg?branch=main" alt="CI"></a>
|
|
62
62
|
<a href=""><img src="https://img.shields.io/badge/status-stable-brightgreen" alt="status"></a>
|
|
63
|
-
<a href=""><img src="https://img.shields.io/badge/version-1.8.
|
|
63
|
+
<a href=""><img src="https://img.shields.io/badge/version-1.8.3-blue" alt="version"></a>
|
|
64
64
|
<a href=""><img src="https://img.shields.io/badge/python-3.12%2B-blue" alt="python"></a>
|
|
65
65
|
<a href=""><img src="https://img.shields.io/badge/runtime%20deps-5-brightgreen" alt="deps"></a>
|
|
66
66
|
<a href=""><img src="https://img.shields.io/badge/license-MIT-yellow" alt="license"></a>
|
|
@@ -100,7 +100,7 @@ Description-Content-Type: text/markdown
|
|
|
100
100
|
| **要るか判断する** | [要否判定ガイド](./docs/when-do-i-need-coderouter.md) | エージェント × モデルの詳細マトリクスで「そもそも自分に必要か」を決める |
|
|
101
101
|
| **詰まったとき** | [トラブルシューティング](./docs/troubleshooting.md) | `doctor` の使い方、`.env` の export 必須、Ollama サイレント失敗 5 症状、Claude Code 連携の罠 |
|
|
102
102
|
| **安全に使う** | [セキュリティ方針](./docs/security.md) | 脅威モデル・秘密情報の扱い・脆弱性報告経路 |
|
|
103
|
-
| **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v1.8.
|
|
103
|
+
| **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v1.8.3 — tool_calls probe も thinking 対応 + adapter で `reasoning_content` strip / llama.cpp 直叩き対応) |
|
|
104
104
|
| **設計を追う** | [plan.md](./plan.md) | 設計不変項・マイルストーン・今後のロードマップ |
|
|
105
105
|
|
|
106
106
|
English versions: [Quickstart](./docs/quickstart.en.md) · [Usage guide](./docs/usage-guide.en.md) · [Free-tier guide](./docs/free-tier-guide.en.md) · [When you need it](./docs/when-do-i-need-coderouter.en.md) · [Troubleshooting](./docs/troubleshooting.en.md) · [Security](./docs/security.en.md)
|
|
@@ -175,7 +175,7 @@ OpenAI 互換エージェント + お行儀の良いモデル + フォールバ
|
|
|
175
175
|
|
|
176
176
|
## クイックスタート(3 コマンド)
|
|
177
177
|
|
|
178
|
-
**v1.7.0 で PyPI 公開**、**v1.8.0 で用途別 4 プロファイル + Z.AI/GLM
|
|
178
|
+
**v1.7.0 で PyPI 公開**、**v1.8.0 で用途別 4 プロファイル + Z.AI/GLM 連携**を追加、**v1.8.2 で doctor probe を thinking モデル対応**にしました。`uvx` 一発で動きます (Python 3.12 以上必須):
|
|
179
179
|
|
|
180
180
|
```bash
|
|
181
181
|
# 1. サンプル設定を置く
|
|
@@ -205,7 +205,7 @@ uv run coderouter serve --port 8088
|
|
|
205
205
|
|
|
206
206
|
> **注**: PyPI 上のパッケージ名は `coderouter-cli` ですが、コマンド名と Python import 名は `coderouter` のままです。詳しくは [CHANGELOG `[v1.7.0]`](./CHANGELOG.md#v170--2026-04-25-pypi-公開-uvx-coderouter-cli-一発で動く) 参照。
|
|
207
207
|
>
|
|
208
|
-
>
|
|
208
|
+
> **`--apply` 自動化を使う場合** (v1.8.0+): `ruamel.yaml` を optional dep として一緒に入れます (`pip install 'coderouter-cli[doctor]'` または `uv pip install ruamel.yaml`)。基本機能には不要です。
|
|
209
209
|
|
|
210
210
|
あとは任意の OpenAI クライアントを `http://127.0.0.1:8088` に向けるだけです:
|
|
211
211
|
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
<p align="center">
|
|
21
21
|
<a href="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml"><img src="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml/badge.svg?branch=main" alt="CI"></a>
|
|
22
22
|
<a href=""><img src="https://img.shields.io/badge/status-stable-brightgreen" alt="status"></a>
|
|
23
|
-
<a href=""><img src="https://img.shields.io/badge/version-1.8.
|
|
23
|
+
<a href=""><img src="https://img.shields.io/badge/version-1.8.3-blue" alt="version"></a>
|
|
24
24
|
<a href=""><img src="https://img.shields.io/badge/python-3.12%2B-blue" alt="python"></a>
|
|
25
25
|
<a href=""><img src="https://img.shields.io/badge/runtime%20deps-5-brightgreen" alt="deps"></a>
|
|
26
26
|
<a href=""><img src="https://img.shields.io/badge/license-MIT-yellow" alt="license"></a>
|
|
@@ -59,7 +59,7 @@
|
|
|
59
59
|
| **Decide if you need it** | [Decision guide](./docs/when-do-i-need-coderouter.en.md) | Agent × model matrix to figure out whether CodeRouter fits your setup at all |
|
|
60
60
|
| **When stuck** | [Troubleshooting](./docs/troubleshooting.en.md) | How to use `doctor`, why `.env` needs `export`, the 5 Ollama silent-fail symptoms, Claude Code integration gotchas |
|
|
61
61
|
| **Operate safely** | [Security](./docs/security.en.md) | Threat model, secret handling, vulnerability reporting |
|
|
62
|
-
| **History** | [CHANGELOG](./CHANGELOG.md) | All releases (latest: v1.8.
|
|
62
|
+
| **History** | [CHANGELOG](./CHANGELOG.md) | All releases (latest: v1.8.3 — tool_calls probe also thinking-aware + adapter strips `reasoning_content` / llama.cpp direct backend supported) |
|
|
63
63
|
| **Track the design** | [plan.md](./plan.md) | Design invariants, milestones, roadmap |
|
|
64
64
|
|
|
65
65
|
日本語版: [Quickstart](./docs/quickstart.md) · [利用ガイド](./docs/usage-guide.md) · [無料枠ガイド](./docs/free-tier-guide.md) · [要否判定](./docs/when-do-i-need-coderouter.md) · [トラブルシューティング](./docs/troubleshooting.md) · [Security](./docs/security.md)
|
|
@@ -134,7 +134,7 @@ Design invariants and the roadmap are in [`plan.md`](./plan.md). Beginner-friend
|
|
|
134
134
|
|
|
135
135
|
## Quickstart (2 commands)
|
|
136
136
|
|
|
137
|
-
**v1.7.0 published to PyPI**, **v1.8.0 added use-case-aware 4 profiles + Z.AI/GLM integration**. `uvx` installs and runs in one shot (Python 3.12+ required):
|
|
137
|
+
**v1.7.0 published to PyPI**, **v1.8.0 added use-case-aware 4 profiles + Z.AI/GLM integration**, **v1.8.2 made the `doctor` probe thinking-model-aware**. `uvx` installs and runs in one shot (Python 3.12+ required):
|
|
138
138
|
|
|
139
139
|
```bash
|
|
140
140
|
# 1. Drop a sample config
|
|
@@ -164,7 +164,7 @@ uv run coderouter serve --port 8088
|
|
|
164
164
|
|
|
165
165
|
> **Note**: the PyPI distribution name is `coderouter-cli`, but the command and Python import name are both `coderouter`. See [CHANGELOG `[v1.7.0]`](./CHANGELOG.md#v170--2026-04-25-pypi-公開-uvx-coderouter-cli-一発で動く) for details.
|
|
166
166
|
>
|
|
167
|
-
> **For the v1.8.0
|
|
167
|
+
> **For the `--apply` automation** (v1.8.0+): install `ruamel.yaml` as the optional dependency (`pip install 'coderouter-cli[doctor]'` or `uv pip install ruamel.yaml`). Not required for the base feature set.
|
|
168
168
|
|
|
169
169
|
Then point any OpenAI client at `http://127.0.0.1:8088`:
|
|
170
170
|
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
<p align="center">
|
|
20
20
|
<a href="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml"><img src="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml/badge.svg?branch=main" alt="CI"></a>
|
|
21
21
|
<a href=""><img src="https://img.shields.io/badge/status-stable-brightgreen" alt="status"></a>
|
|
22
|
-
<a href=""><img src="https://img.shields.io/badge/version-1.8.
|
|
22
|
+
<a href=""><img src="https://img.shields.io/badge/version-1.8.3-blue" alt="version"></a>
|
|
23
23
|
<a href=""><img src="https://img.shields.io/badge/python-3.12%2B-blue" alt="python"></a>
|
|
24
24
|
<a href=""><img src="https://img.shields.io/badge/runtime%20deps-5-brightgreen" alt="deps"></a>
|
|
25
25
|
<a href=""><img src="https://img.shields.io/badge/license-MIT-yellow" alt="license"></a>
|
|
@@ -59,7 +59,7 @@
|
|
|
59
59
|
| **要るか判断する** | [要否判定ガイド](./docs/when-do-i-need-coderouter.md) | エージェント × モデルの詳細マトリクスで「そもそも自分に必要か」を決める |
|
|
60
60
|
| **詰まったとき** | [トラブルシューティング](./docs/troubleshooting.md) | `doctor` の使い方、`.env` の export 必須、Ollama サイレント失敗 5 症状、Claude Code 連携の罠 |
|
|
61
61
|
| **安全に使う** | [セキュリティ方針](./docs/security.md) | 脅威モデル・秘密情報の扱い・脆弱性報告経路 |
|
|
62
|
-
| **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v1.8.
|
|
62
|
+
| **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v1.8.3 — tool_calls probe も thinking 対応 + adapter で `reasoning_content` strip / llama.cpp 直叩き対応) |
|
|
63
63
|
| **設計を追う** | [plan.md](./plan.md) | 設計不変項・マイルストーン・今後のロードマップ |
|
|
64
64
|
|
|
65
65
|
English versions: [Quickstart](./docs/quickstart.en.md) · [Usage guide](./docs/usage-guide.en.md) · [Free-tier guide](./docs/free-tier-guide.en.md) · [When you need it](./docs/when-do-i-need-coderouter.en.md) · [Troubleshooting](./docs/troubleshooting.en.md) · [Security](./docs/security.en.md)
|
|
@@ -134,7 +134,7 @@ OpenAI 互換エージェント + お行儀の良いモデル + フォールバ
|
|
|
134
134
|
|
|
135
135
|
## クイックスタート(3 コマンド)
|
|
136
136
|
|
|
137
|
-
**v1.7.0 で PyPI 公開**、**v1.8.0 で用途別 4 プロファイル + Z.AI/GLM
|
|
137
|
+
**v1.7.0 で PyPI 公開**、**v1.8.0 で用途別 4 プロファイル + Z.AI/GLM 連携**を追加、**v1.8.2 で doctor probe を thinking モデル対応**にしました。`uvx` 一発で動きます (Python 3.12 以上必須):
|
|
138
138
|
|
|
139
139
|
```bash
|
|
140
140
|
# 1. サンプル設定を置く
|
|
@@ -164,7 +164,7 @@ uv run coderouter serve --port 8088
|
|
|
164
164
|
|
|
165
165
|
> **注**: PyPI 上のパッケージ名は `coderouter-cli` ですが、コマンド名と Python import 名は `coderouter` のままです。詳しくは [CHANGELOG `[v1.7.0]`](./CHANGELOG.md#v170--2026-04-25-pypi-公開-uvx-coderouter-cli-一発で動く) 参照。
|
|
166
166
|
>
|
|
167
|
-
>
|
|
167
|
+
> **`--apply` 自動化を使う場合** (v1.8.0+): `ruamel.yaml` を optional dep として一緒に入れます (`pip install 'coderouter-cli[doctor]'` または `uv pip install ruamel.yaml`)。基本機能には不要です。
|
|
168
168
|
|
|
169
169
|
あとは任意の OpenAI クライアントを `http://127.0.0.1:8088` に向けるだけです:
|
|
170
170
|
|
|
@@ -48,14 +48,25 @@ logger = get_logger(__name__)
|
|
|
48
48
|
_RETRYABLE_STATUSES = {404, 408, 425, 429, 500, 502, 503, 504}
|
|
49
49
|
|
|
50
50
|
|
|
51
|
+
# v1.8.3: non-standard reasoning fields emitted by various upstreams.
|
|
52
|
+
# Different runtimes use different field names for the same concept:
|
|
53
|
+
# * ``reasoning`` — OpenRouter free models (gpt-oss-120b:free
|
|
54
|
+
# confirmed 2026-04-20), Ollama
|
|
55
|
+
# * ``reasoning_content`` — llama.cpp ``llama-server`` (Qwen3.6 etc.,
|
|
56
|
+
# confirmed 2026-04-26 with Unsloth GGUF)
|
|
57
|
+
# Strict OpenAI clients reject either as an unknown key. The strip
|
|
58
|
+
# function below removes both at the adapter boundary so downstream
|
|
59
|
+
# layers never see them, regardless of which runtime fronts the model.
|
|
60
|
+
_NON_STANDARD_REASONING_KEYS = ("reasoning", "reasoning_content")
|
|
61
|
+
|
|
62
|
+
|
|
51
63
|
def _strip_reasoning_field(choices: list[dict[str, Any]] | None, *, delta_key: bool) -> bool:
|
|
52
|
-
"""Remove non-standard
|
|
64
|
+
"""Remove non-standard reasoning keys from a choices list, in place.
|
|
53
65
|
|
|
54
|
-
v0.5-C
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
We strip it at the adapter boundary so downstream layers never see it.
|
|
66
|
+
v0.5-C originally targeted OpenRouter's ``reasoning`` field. v1.8.3
|
|
67
|
+
extends the strip to ``reasoning_content`` (llama.cpp ``llama-server``
|
|
68
|
+
naming) since both denote the same hidden chain-of-thought trace and
|
|
69
|
+
neither is part of the OpenAI Chat Completions spec.
|
|
59
70
|
|
|
60
71
|
Args:
|
|
61
72
|
choices: The ``choices`` list from the response body or stream chunk.
|
|
@@ -64,7 +75,7 @@ def _strip_reasoning_field(choices: list[dict[str, Any]] | None, *, delta_key: b
|
|
|
64
75
|
``False`` for non-streaming responses (look in ``choice["message"]``).
|
|
65
76
|
|
|
66
77
|
Returns:
|
|
67
|
-
True iff at least one
|
|
78
|
+
True iff at least one reasoning key was removed. Callers use
|
|
68
79
|
this to decide whether to emit a one-shot log line.
|
|
69
80
|
"""
|
|
70
81
|
if not choices:
|
|
@@ -75,9 +86,12 @@ def _strip_reasoning_field(choices: list[dict[str, Any]] | None, *, delta_key: b
|
|
|
75
86
|
if not isinstance(choice, dict):
|
|
76
87
|
continue
|
|
77
88
|
inner = choice.get(inner_key)
|
|
78
|
-
if isinstance(inner, dict)
|
|
79
|
-
|
|
80
|
-
|
|
89
|
+
if not isinstance(inner, dict):
|
|
90
|
+
continue
|
|
91
|
+
for key in _NON_STANDARD_REASONING_KEYS:
|
|
92
|
+
if key in inner:
|
|
93
|
+
inner.pop(key, None)
|
|
94
|
+
stripped = True
|
|
81
95
|
return stripped
|
|
82
96
|
|
|
83
97
|
|
|
@@ -235,15 +249,17 @@ class OpenAICompatAdapter(BaseAdapter):
|
|
|
235
249
|
retryable=False,
|
|
236
250
|
) from exc
|
|
237
251
|
|
|
238
|
-
# v0.5-C: passive strip of non-standard
|
|
239
|
-
#
|
|
252
|
+
# v0.5-C / v1.8.3: passive strip of non-standard reasoning fields
|
|
253
|
+
# on choices (covers both Ollama/OpenRouter ``reasoning`` and
|
|
254
|
+
# llama.cpp ``reasoning_content``). No-op when the provider opted
|
|
255
|
+
# into passthrough.
|
|
240
256
|
if not self.config.capabilities.reasoning_passthrough and _strip_reasoning_field(
|
|
241
257
|
data.get("choices"), delta_key=False
|
|
242
258
|
):
|
|
243
259
|
log_capability_degraded(
|
|
244
260
|
logger,
|
|
245
261
|
provider=self.name,
|
|
246
|
-
dropped=
|
|
262
|
+
dropped=list(_NON_STANDARD_REASONING_KEYS),
|
|
247
263
|
reason="non-standard-field",
|
|
248
264
|
)
|
|
249
265
|
|
|
@@ -344,7 +360,7 @@ class OpenAICompatAdapter(BaseAdapter):
|
|
|
344
360
|
log_capability_degraded(
|
|
345
361
|
logger,
|
|
346
362
|
provider=self.name,
|
|
347
|
-
dropped=
|
|
363
|
+
dropped=list(_NON_STANDARD_REASONING_KEYS),
|
|
348
364
|
reason="non-standard-field",
|
|
349
365
|
)
|
|
350
366
|
reasoning_logged = True
|
|
@@ -458,6 +458,18 @@ _NUM_CTX_PROBE_MAX_TOKENS_DEFAULT = 256
|
|
|
458
458
|
_NUM_CTX_PROBE_MAX_TOKENS_THINKING = 1024
|
|
459
459
|
_STREAMING_PROBE_MAX_TOKENS_DEFAULT = 512
|
|
460
460
|
_STREAMING_PROBE_MAX_TOKENS_THINKING = 1024
|
|
461
|
+
# v1.8.3: tool_calls probe also needs thinking-aware budget. The
|
|
462
|
+
# pre-v1.8.3 default of 64 was tight even for non-thinking models
|
|
463
|
+
# (the assistant often emits a brief preamble before the JSON tool
|
|
464
|
+
# call), and on thinking models (Qwen3.6, Gemma 4, gpt-oss, deepseek-r1)
|
|
465
|
+
# the entire 64-token budget gets consumed by ``reasoning_content``
|
|
466
|
+
# before any ``tool_calls`` can surface — producing a false-positive
|
|
467
|
+
# NEEDS_TUNING with the WRONG remediation (suggested patch flips
|
|
468
|
+
# ``tools`` to false even though the model supports them perfectly).
|
|
469
|
+
# 256/1024 brings the budget into line with the num_ctx / streaming
|
|
470
|
+
# probes (same _is_reasoning_model gate).
|
|
471
|
+
_TOOL_CALLS_PROBE_MAX_TOKENS_DEFAULT = 256
|
|
472
|
+
_TOOL_CALLS_PROBE_MAX_TOKENS_THINKING = 1024
|
|
461
473
|
# Default ``num_predict`` suggested in the emitted patch. -1 would be
|
|
462
474
|
# optimal (uncapped) but "4096" communicates intent more clearly to
|
|
463
475
|
# operators unfamiliar with Ollama's sentinel value, and covers Claude
|
|
@@ -531,9 +543,7 @@ def _is_reasoning_model(
|
|
|
531
543
|
return True
|
|
532
544
|
if resolved.thinking is True:
|
|
533
545
|
return True
|
|
534
|
-
|
|
535
|
-
return True
|
|
536
|
-
return False
|
|
546
|
+
return resolved.reasoning_passthrough is True
|
|
537
547
|
|
|
538
548
|
|
|
539
549
|
_PROBE_BASIC_USER_PROMPT = "Reply with exactly the single word: PONG"
|
|
@@ -1093,6 +1103,16 @@ async def _probe_tool_calls(
|
|
|
1093
1103
|
If declaration says True → NEEDS_TUNING (flip to False). If
|
|
1094
1104
|
False → OK.
|
|
1095
1105
|
"""
|
|
1106
|
+
# v1.8.3: thinking-aware budget — the pre-v1.8.3 default of 64 was
|
|
1107
|
+
# consumed by ``reasoning_content`` on thinking models (Qwen3.6,
|
|
1108
|
+
# Gemma 4, gpt-oss, deepseek-r1) before any ``tool_calls`` could
|
|
1109
|
+
# surface, producing a false-positive NEEDS_TUNING that recommended
|
|
1110
|
+
# flipping ``tools`` to false — the exact opposite of what's needed.
|
|
1111
|
+
max_tokens = (
|
|
1112
|
+
_TOOL_CALLS_PROBE_MAX_TOKENS_THINKING
|
|
1113
|
+
if _is_reasoning_model(provider, resolved)
|
|
1114
|
+
else _TOOL_CALLS_PROBE_MAX_TOKENS_DEFAULT
|
|
1115
|
+
)
|
|
1096
1116
|
if provider.kind == "anthropic":
|
|
1097
1117
|
# Anthropic native tools use a different wire shape; we probe
|
|
1098
1118
|
# via the messages API. A capable model returns content blocks
|
|
@@ -1104,7 +1124,7 @@ async def _probe_tool_calls(
|
|
|
1104
1124
|
"messages": [
|
|
1105
1125
|
{"role": "user", "content": _PROBE_TOOLS_USER_PROMPT},
|
|
1106
1126
|
],
|
|
1107
|
-
"max_tokens":
|
|
1127
|
+
"max_tokens": max_tokens,
|
|
1108
1128
|
"tools": [_PROBE_TOOL_SPEC_ANTHROPIC],
|
|
1109
1129
|
}
|
|
1110
1130
|
else:
|
|
@@ -1115,7 +1135,7 @@ async def _probe_tool_calls(
|
|
|
1115
1135
|
"messages": [
|
|
1116
1136
|
{"role": "user", "content": _PROBE_TOOLS_USER_PROMPT},
|
|
1117
1137
|
],
|
|
1118
|
-
"max_tokens":
|
|
1138
|
+
"max_tokens": max_tokens,
|
|
1119
1139
|
"temperature": 0,
|
|
1120
1140
|
"tools": [_PROBE_TOOL_SPEC_OPENAI],
|
|
1121
1141
|
}
|
|
@@ -1439,7 +1459,12 @@ async def _probe_reasoning_leak(
|
|
|
1439
1459
|
)
|
|
1440
1460
|
|
|
1441
1461
|
msg = _extract_openai_assistant_choice(parsed)
|
|
1442
|
-
|
|
1462
|
+
# v1.8.3: detect llama.cpp's ``reasoning_content`` alongside Ollama /
|
|
1463
|
+
# OpenRouter's ``reasoning`` — they're the same concept under different
|
|
1464
|
+
# field names, and the openai_compat adapter strips both since v1.8.3.
|
|
1465
|
+
has_reasoning = bool(
|
|
1466
|
+
msg and ("reasoning" in msg or "reasoning_content" in msg)
|
|
1467
|
+
)
|
|
1443
1468
|
|
|
1444
1469
|
# v1.0-A: content-embedded marker detection.
|
|
1445
1470
|
content = (msg.get("content") if isinstance(msg, dict) else None) or ""
|
|
@@ -282,7 +282,28 @@ profiles:
|
|
|
282
282
|
|
|
283
283
|
> **Background on per-model tool-call behavior**: Llama-3.3-70B's tendency to rewrite plain text into tool calls comes from its aggressive agentic-tuning RLHF signal interacting with Claude Code's system prompt. Unsloth's [Tool calling guide for local LLMs](https://unsloth.ai/docs/jp/ji-ben/tool-calling-guide-for-local-llms) (Japanese) covers this and other model-specific quirks well — useful background for the v1.8.0 `claude_code_suitability` heuristic.
|
|
284
284
|
|
|
285
|
-
### 4-2.
|
|
285
|
+
### 4-2. Local Ollama known pitfalls (added in v1.8.1, revised in v1.8.2)
|
|
286
|
+
|
|
287
|
+
A 2026-04-26 real-machine session (M3 Max 64GB / Ollama 0.21.2 / CodeRouter v1.8.0 → v1.8.2) surfaced three classes of "highly-rated on note / HF, doesn't actually work via Ollama" cases. The Japanese troubleshooting doc has the full breakdown — this is a brief summary for English readers.
|
|
288
|
+
|
|
289
|
+
**4-2-A. Qwen3.6:27b / 35b — `tool_calls [NEEDS TUNING]` is the real issue.**
|
|
290
|
+
After v1.8.2's thinking-aware probe budgets, `num_ctx` and `streaming` clear, but Ollama's Qwen3.6 chat-template / tool-spec is still immature and produces neither native `tool_calls` nor repairable JSON. Bundled `model-capabilities.yaml` withdrew the `claude_code_suitability: ok` declaration in v1.8.1; keep Qwen3.6 out of the `coding` profile primary slot until upstream tooling stabilizes.
|
|
291
|
+
|
|
292
|
+
**4-2-B. Qwen3.5-based HF distillations (e.g. Qwopus3.5) won't load.**
|
|
293
|
+
`ollama pull` succeeds but `ollama run` returns 500 with `unknown model architecture: 'qwen35'` because llama.cpp doesn't yet support Qwen3.5's hybrid Transformer-SSM architecture. Wait for upstream framework support; not addressable from CodeRouter.
|
|
294
|
+
|
|
295
|
+
**4-2-C. Gemma 4 26B works end-to-end (confirmed in v1.8.2).**
|
|
296
|
+
The doctor probe NEEDS_TUNING readings on `num_ctx` / `streaming` reported in v1.8.1 turned out to be **probe false positives** — Gemma 4 is a thinking model that emits a `reasoning` field, and the v1.8.1 probe budgets (`max_tokens=32` / `128`) were consumed entirely by the reasoning trace before any visible `content` could surface. Real-machine `/v1/messages` round-trip returns "Hello." in 2 seconds; `tool_calls` work natively. v1.8.2 widened the probe budget to 1024 tokens for thinking-flagged models.
|
|
297
|
+
|
|
298
|
+
**4-2-D. Best practice — "boring models + observation tools."**
|
|
299
|
+
Lead the `coding` chain with `qwen2.5-coder:14b` / `gemma4:26b`, run `coderouter doctor --check-model <name>` for a 6-probe sanity pass, and let the fallback chain pick up paid / cloud free for spillover.
|
|
300
|
+
|
|
301
|
+
**4-2-E. Doctor probe limits — thinking model awareness (v1.8.2).**
|
|
302
|
+
Pre-v1.8.2 `num_ctx` / `streaming` probes used `max_tokens=32` / `128`, sufficient for non-thinking models but consumed entirely by `reasoning` token output on thinking models. v1.8.2 introduced `_is_reasoning_model(provider, resolved)` which checks `provider.capabilities.thinking`, `provider.capabilities.reasoning_passthrough`, and the registry-resolved equivalents — when any signals true, the probe budget bumps to 1024. Bundled `model-capabilities.yaml` declares `thinking: true` for `gemma4:*` and `qwen3.6:*`, so users see the corrected behavior without editing their `providers.yaml`. **Meta lesson**: diagnostic tools themselves need to keep being diagnosed (a corollary of plan.md §5.4's "real-machine evidence first" principle).
|
|
303
|
+
|
|
304
|
+
For full detail and curl reproductions see [`troubleshooting.md` §4-2](./troubleshooting.md#4-2).
|
|
305
|
+
|
|
306
|
+
### 4-3. `UserPromptSubmit hook error` (third-party Claude Code plugins)
|
|
286
307
|
|
|
287
308
|
```
|
|
288
309
|
❯ hello
|
|
@@ -305,7 +326,7 @@ This is a structural mismatch on the plugin side, not a CodeRouter bug.
|
|
|
305
326
|
|
|
306
327
|
If the error vanishes, it's the plugin. The proper fix is upstream — file feedback asking the plugin author to support OpenAI-compat / non-Anthropic backends.
|
|
307
328
|
|
|
308
|
-
### 4-
|
|
329
|
+
### 4-4. "Compacting conversation…" takes ages
|
|
309
330
|
|
|
310
331
|
```
|
|
311
332
|
✻ Compacting conversation… (34s)
|
|
@@ -315,7 +336,7 @@ Claude Code's auto-compact (summarize old turns to compress context) being slow
|
|
|
315
336
|
|
|
316
337
|
`DISABLE_CLAUDE_CODE_SM_COMPACT=1` disables the LLM-based smart compact, but a truncate-based fallback still runs. Manual `/compact` / `/clear` is the most reliable workaround.
|
|
317
338
|
|
|
318
|
-
### 4-
|
|
339
|
+
### 4-5. Open the dashboard, every gotcha becomes visible in 10 seconds
|
|
319
340
|
|
|
320
341
|
Open `http://localhost:8088/dashboard` in a separate tab while you work. All of the above become directly visible:
|
|
321
342
|
|
|
@@ -303,6 +303,46 @@ v1.8.2 の偽陽性除去後、`coderouter doctor --check-model ollama-qwen3-6-2
|
|
|
303
303
|
|
|
304
304
|
**回避**: `claude-code-nim` profile の primary に Qwen3.6 を置かず、**Gemma 4 26B または Qwen2.5-Coder 14b を上位に**。bundled `model-capabilities.yaml` も v1.8.1 で `qwen3.6:*` の `claude_code_suitability: ok` を撤回 (declaration 過信の例)、v1.8.2 で `thinking: true` 追加 (doctor probe 偽陽性除去のため)。
|
|
305
305
|
|
|
306
|
+
> **コミュニティ証拠 (2026-04-26 偵察)**: X / Reddit (r/ollama, r/LocalLLaMA) で **Qwen3.6 + Ollama の組み合わせは現状コミュニティ全体で詰んでいる** ことが確認できた:
|
|
307
|
+
>
|
|
308
|
+
> - Qwen3.6 35B-A3B で hard crash / リブート (Mac Metal、複数報告)
|
|
309
|
+
> - 「available memory 不足」でロード失敗 (Ollama 側の memory 計算バグ)、最新 Ollama で一部改善
|
|
310
|
+
> - Claude Code / OpenCode 連携でタイムアウト・コンテキスト切れ・loop
|
|
311
|
+
> - `think=False` 時の構造化出力バグ
|
|
312
|
+
>
|
|
313
|
+
> 回避策として複数経路:
|
|
314
|
+
>
|
|
315
|
+
> 1. **Modelfile `PARAMETER num_ctx 131072`** で context を焼き込み (Ollama 経由維持の最小変更)
|
|
316
|
+
> 2. **Unsloth GGUF + llama.cpp / llama-server で直叩き** (最有力、複数の「これで解決した」報告あり) — CodeRouter は `kind: openai_compat` + `base_url: http://localhost:8080/v1` で接続可能
|
|
317
|
+
> 3. 低 quant (Q4_K_M) / coding 特化 tag を試す
|
|
318
|
+
> 4. Ollama 最新版へアップデート (memory bug 部分改善)
|
|
319
|
+
>
|
|
320
|
+
> CodeRouter として **Unsloth GGUF + llama.cpp 直叩きの providers.yaml 例** を追加するロードマップは plan.md 「v1.8.x patch 候補 — llama.cpp 直叩き backend 検証」に記録済 (実機検証で動いたら有効化)。
|
|
321
|
+
|
|
322
|
+
> **v1.8.3 update (2026-04-26)**: 実機検証完了。**Qwen3.6:35b-a3b + llama.cpp 直叩きで native `tool_calls` 完璧動作確認**。`finish_reason: "tool_calls"` + 正規 OpenAI `tool_calls[]` array が返る。**Ollama 経由詰みの真因 = Ollama chat template / tool 仕様未成熟、モデル本体は健全** が完全確定。検証済み recipe:
|
|
323
|
+
>
|
|
324
|
+
> ```bash
|
|
325
|
+
> # 1. llama.cpp build (Metal)
|
|
326
|
+
> git clone https://github.com/ggml-org/llama.cpp ~/llama.cpp
|
|
327
|
+
> cd ~/llama.cpp && cmake -B build -DGGML_METAL=ON -DLLAMA_CURL=ON
|
|
328
|
+
> cmake --build build --config Release -j
|
|
329
|
+
>
|
|
330
|
+
> # 2. Unsloth Dynamic Quantization GGUF (~22GB)
|
|
331
|
+
> huggingface-cli download unsloth/Qwen3.6-35B-A3B-GGUF \
|
|
332
|
+
> --include "*UD-Q4_K_M*" "*tokenizer*" "*chat_template*" \
|
|
333
|
+
> --local-dir ~/models/qwen3.6-35b-a3b-unsloth
|
|
334
|
+
>
|
|
335
|
+
> # 3. llama-server 起動
|
|
336
|
+
> ./build/bin/llama-server \
|
|
337
|
+
> --model ~/models/qwen3.6-35b-a3b-unsloth/Qwen3.6-35B-A3B-UD-Q4_K_M.gguf \
|
|
338
|
+
> --port 8080 --ctx-size 32768 --n-predict 4096 \
|
|
339
|
+
> --jinja --threads 8 -ngl 999 --host 127.0.0.1
|
|
340
|
+
> ```
|
|
341
|
+
>
|
|
342
|
+
> CodeRouter `providers.yaml` で `kind: openai_compat` + `base_url: http://localhost:8080/v1` で接続可能。`capabilities.thinking: true` を宣言すると doctor probe が thinking-aware budget (1024) を使うので `tool_calls [OK]` が出る。
|
|
343
|
+
>
|
|
344
|
+
> v1.8.3 では (a) **`tool_calls` probe も thinking 対応** (旧 `max_tokens=64` で偽陽性 NEEDS_TUNING、suggested patch も真逆 `tools: false` を出していた active-harmful 誤診断を解消)、(b) **adapter で `reasoning_content` (llama.cpp 命名) を `reasoning` と並んで strip** を実装。これで llama.cpp 直叩き経路が CodeRouter の正規サポート対象に。
|
|
345
|
+
|
|
306
346
|
#### 4-2-B. **Qwen3.5 系の HF 蒸留モデル** (Qwopus3.5 等) は llama.cpp 未対応
|
|
307
347
|
|
|
308
348
|
例えば `Jackrong/Qwopus3.5-9B-v3-GGUF` (Qwen3.5-VL ベース + Claude Opus 蒸留、Apache-2.0、Vision) を `ollama pull` すると **blob は完全に落ちてくる**が `ollama run` で:
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# in plan.md §11.B; once granted, this name will become an alias and
|
|
12
12
|
# `coderouter` will become the canonical distribution name.
|
|
13
13
|
name = "coderouter-cli"
|
|
14
|
-
version = "1.8.
|
|
14
|
+
version = "1.8.3"
|
|
15
15
|
description = "Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible."
|
|
16
16
|
readme = "README.md"
|
|
17
17
|
requires-python = ">=3.12"
|
|
@@ -882,6 +882,84 @@ async def test_num_ctx_max_tokens_bumped_when_registry_says_thinking(
|
|
|
882
882
|
assert num_ctx_body["max_tokens"] == 1024
|
|
883
883
|
|
|
884
884
|
|
|
885
|
+
@pytest.mark.asyncio
|
|
886
|
+
async def test_tool_calls_max_tokens_bumped_for_thinking_provider(
|
|
887
|
+
httpx_mock: HTTPXMock,
|
|
888
|
+
) -> None:
|
|
889
|
+
"""v1.8.3: tool_calls probe uses thinking-aware budget too.
|
|
890
|
+
|
|
891
|
+
Pre-v1.8.3 the probe sent ``max_tokens=64``, which thinking models
|
|
892
|
+
(Qwen3.6, Gemma 4, gpt-oss, deepseek-r1) consume entirely on the
|
|
893
|
+
``reasoning_content`` field before any ``tool_calls`` can surface —
|
|
894
|
+
producing a false-positive NEEDS_TUNING that recommended flipping
|
|
895
|
+
``tools`` to false despite the model supporting tools perfectly
|
|
896
|
+
(observed 2026-04-26 with Qwen3.6:35b-a3b on llama-server).
|
|
897
|
+
"""
|
|
898
|
+
thinking_caps = Capabilities(thinking=True, tools=True)
|
|
899
|
+
provider = _oa_provider(
|
|
900
|
+
name="llamacpp-qwen3-6-35b-a3b",
|
|
901
|
+
base_url="http://localhost:8080/v1",
|
|
902
|
+
model="qwen3.6",
|
|
903
|
+
caps=thinking_caps,
|
|
904
|
+
)
|
|
905
|
+
captured: list[httpx.Request] = []
|
|
906
|
+
|
|
907
|
+
def _capture(request: httpx.Request) -> httpx.Response:
|
|
908
|
+
captured.append(request)
|
|
909
|
+
body = json.loads(request.content.decode("utf-8"))
|
|
910
|
+
if "tools" in body:
|
|
911
|
+
# tool_calls probe — return a native tool_calls structure.
|
|
912
|
+
return httpx.Response(
|
|
913
|
+
200,
|
|
914
|
+
json={
|
|
915
|
+
"id": "chatcmpl-probe",
|
|
916
|
+
"object": "chat.completion",
|
|
917
|
+
"created": 0,
|
|
918
|
+
"model": "probe",
|
|
919
|
+
"choices": [
|
|
920
|
+
{
|
|
921
|
+
"index": 0,
|
|
922
|
+
"message": {
|
|
923
|
+
"role": "assistant",
|
|
924
|
+
"content": "",
|
|
925
|
+
"tool_calls": [
|
|
926
|
+
{
|
|
927
|
+
"id": "call_1",
|
|
928
|
+
"type": "function",
|
|
929
|
+
"function": {
|
|
930
|
+
"name": "echo",
|
|
931
|
+
"arguments": '{"message":"probe"}',
|
|
932
|
+
},
|
|
933
|
+
}
|
|
934
|
+
],
|
|
935
|
+
},
|
|
936
|
+
"finish_reason": "tool_calls",
|
|
937
|
+
}
|
|
938
|
+
],
|
|
939
|
+
"usage": {"prompt_tokens": 8, "completion_tokens": 1},
|
|
940
|
+
},
|
|
941
|
+
)
|
|
942
|
+
return httpx.Response(200, json=_openai_ok_response(content="PONG"))
|
|
943
|
+
|
|
944
|
+
httpx_mock.add_callback(
|
|
945
|
+
_capture,
|
|
946
|
+
url="http://localhost:8080/v1/chat/completions",
|
|
947
|
+
method="POST",
|
|
948
|
+
is_reusable=True,
|
|
949
|
+
)
|
|
950
|
+
await check_model(
|
|
951
|
+
_config_for([provider]), provider.name, registry=_empty_registry()
|
|
952
|
+
)
|
|
953
|
+
# Identify the tool_calls probe request by the presence of ``tools``.
|
|
954
|
+
tool_calls_bodies = [
|
|
955
|
+
json.loads(req.content.decode("utf-8"))
|
|
956
|
+
for req in captured
|
|
957
|
+
if "tools" in json.loads(req.content.decode("utf-8"))
|
|
958
|
+
]
|
|
959
|
+
assert len(tool_calls_bodies) == 1
|
|
960
|
+
assert tool_calls_bodies[0]["max_tokens"] == 1024
|
|
961
|
+
|
|
962
|
+
|
|
885
963
|
@pytest.mark.asyncio
|
|
886
964
|
async def test_streaming_max_tokens_bumped_for_thinking_provider(
|
|
887
965
|
httpx_mock: HTTPXMock,
|
|
@@ -117,6 +117,72 @@ def test_strip_helper_handles_multiple_choices() -> None:
|
|
|
117
117
|
assert "reasoning" not in choices[2]["message"]
|
|
118
118
|
|
|
119
119
|
|
|
120
|
+
def test_strip_helper_removes_reasoning_content_field() -> None:
|
|
121
|
+
"""v1.8.3: llama.cpp's ``reasoning_content`` is treated the same as
|
|
122
|
+
``reasoning`` — both are non-standard chain-of-thought fields with
|
|
123
|
+
different vendor naming.
|
|
124
|
+
|
|
125
|
+
Confirmed 2026-04-26 with Qwen3.6:35b-a3b on llama-server: the
|
|
126
|
+
response shape is::
|
|
127
|
+
|
|
128
|
+
{"message": {"role": "assistant", "content": "...",
|
|
129
|
+
"reasoning_content": "<thinking trace>"}}
|
|
130
|
+
|
|
131
|
+
Strict OpenAI clients reject the unknown ``reasoning_content`` key
|
|
132
|
+
just as they would reject ``reasoning``.
|
|
133
|
+
"""
|
|
134
|
+
choices = [
|
|
135
|
+
{
|
|
136
|
+
"index": 0,
|
|
137
|
+
"message": {
|
|
138
|
+
"role": "assistant",
|
|
139
|
+
"content": "Hello",
|
|
140
|
+
"reasoning_content": "Here's a thinking process: ...",
|
|
141
|
+
},
|
|
142
|
+
"finish_reason": "stop",
|
|
143
|
+
}
|
|
144
|
+
]
|
|
145
|
+
stripped = _strip_reasoning_field(choices, delta_key=False)
|
|
146
|
+
assert stripped is True
|
|
147
|
+
assert choices[0]["message"] == {"role": "assistant", "content": "Hello"}
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def test_strip_helper_removes_both_reasoning_and_reasoning_content() -> None:
|
|
151
|
+
"""When a single message carries both keys (defensive — unlikely in
|
|
152
|
+
practice but possible if a proxy merges OpenRouter + llama.cpp
|
|
153
|
+
upstreams), the strip removes both in one pass.
|
|
154
|
+
"""
|
|
155
|
+
choices = [
|
|
156
|
+
{
|
|
157
|
+
"index": 0,
|
|
158
|
+
"message": {
|
|
159
|
+
"role": "assistant",
|
|
160
|
+
"content": "answer",
|
|
161
|
+
"reasoning": "ollama-style trace",
|
|
162
|
+
"reasoning_content": "llama-cpp-style trace",
|
|
163
|
+
},
|
|
164
|
+
}
|
|
165
|
+
]
|
|
166
|
+
stripped = _strip_reasoning_field(choices, delta_key=False)
|
|
167
|
+
assert stripped is True
|
|
168
|
+
assert choices[0]["message"] == {"role": "assistant", "content": "answer"}
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def test_strip_helper_removes_reasoning_content_from_delta() -> None:
|
|
172
|
+
"""Stream chunks may carry ``reasoning_content`` in ``delta`` too —
|
|
173
|
+
llama-server's streaming path emits it incrementally.
|
|
174
|
+
"""
|
|
175
|
+
choices = [
|
|
176
|
+
{
|
|
177
|
+
"index": 0,
|
|
178
|
+
"delta": {"content": "tok", "reasoning_content": "thinking..."},
|
|
179
|
+
}
|
|
180
|
+
]
|
|
181
|
+
stripped = _strip_reasoning_field(choices, delta_key=True)
|
|
182
|
+
assert stripped is True
|
|
183
|
+
assert choices[0]["delta"] == {"content": "tok"}
|
|
184
|
+
|
|
185
|
+
|
|
120
186
|
# ======================================================================
|
|
121
187
|
# Adapter tests — generate() (non-streaming)
|
|
122
188
|
# ======================================================================
|
|
@@ -181,7 +247,9 @@ async def test_generate_strips_reasoning_from_message(
|
|
|
181
247
|
]
|
|
182
248
|
assert len(recs) == 1
|
|
183
249
|
assert recs[0].provider == "openrouter-gpt-oss-free"
|
|
184
|
-
|
|
250
|
+
# v1.8.3: log dropped now lists both Ollama/OpenRouter (`reasoning`)
|
|
251
|
+
# and llama.cpp (`reasoning_content`) since the same strip handles both.
|
|
252
|
+
assert recs[0].dropped == ["reasoning", "reasoning_content"]
|
|
185
253
|
|
|
186
254
|
|
|
187
255
|
@pytest.mark.asyncio
|
|
@@ -357,7 +425,9 @@ async def test_stream_strips_reasoning_from_each_delta(
|
|
|
357
425
|
]
|
|
358
426
|
assert len(recs) == 1
|
|
359
427
|
assert recs[0].provider == "openrouter-gpt-oss-free"
|
|
360
|
-
|
|
428
|
+
# v1.8.3: log dropped now lists both Ollama/OpenRouter (`reasoning`)
|
|
429
|
+
# and llama.cpp (`reasoning_content`) since the same strip handles both.
|
|
430
|
+
assert recs[0].dropped == ["reasoning", "reasoning_content"]
|
|
361
431
|
|
|
362
432
|
|
|
363
433
|
@pytest.mark.asyncio
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|