coderouter-cli 1.8.3__tar.gz → 1.8.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/CHANGELOG.md +64 -3
  2. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/PKG-INFO +11 -7
  3. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/README.en.md +12 -7
  4. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/README.md +10 -6
  5. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/doctor.py +63 -8
  6. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/free-tier-guide.en.md +4 -1
  7. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/free-tier-guide.md +3 -1
  8. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/hf-ollama-models.md +17 -3
  9. coderouter_cli-1.8.5/docs/llamacpp-direct.en.md +232 -0
  10. coderouter_cli-1.8.5/docs/llamacpp-direct.md +285 -0
  11. coderouter_cli-1.8.5/docs/lmstudio-direct.en.md +405 -0
  12. coderouter_cli-1.8.5/docs/lmstudio-direct.md +407 -0
  13. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/quickstart.en.md +15 -7
  14. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/quickstart.md +12 -6
  15. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/examples/providers.yaml +181 -2
  16. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/pyproject.toml +1 -1
  17. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_setup_sh.py +6 -6
  18. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/.gitignore +0 -0
  19. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/LICENSE +0 -0
  20. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/__init__.py +0 -0
  21. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/__main__.py +0 -0
  22. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/adapters/__init__.py +0 -0
  23. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/adapters/anthropic_native.py +0 -0
  24. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/adapters/base.py +0 -0
  25. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/adapters/openai_compat.py +0 -0
  26. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/adapters/registry.py +0 -0
  27. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/cli.py +0 -0
  28. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/cli_stats.py +0 -0
  29. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/config/__init__.py +0 -0
  30. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/config/capability_registry.py +0 -0
  31. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/config/env_file.py +0 -0
  32. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/config/loader.py +0 -0
  33. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/config/schemas.py +0 -0
  34. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/data/__init__.py +0 -0
  35. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/data/model-capabilities.yaml +0 -0
  36. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/doctor_apply.py +0 -0
  37. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/env_security.py +0 -0
  38. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/errors.py +0 -0
  39. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/ingress/__init__.py +0 -0
  40. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/ingress/anthropic_routes.py +0 -0
  41. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/ingress/app.py +0 -0
  42. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/ingress/dashboard_routes.py +0 -0
  43. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/ingress/metrics_routes.py +0 -0
  44. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/ingress/openai_routes.py +0 -0
  45. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/logging.py +0 -0
  46. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/metrics/__init__.py +0 -0
  47. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/metrics/collector.py +0 -0
  48. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/metrics/prometheus.py +0 -0
  49. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/output_filters.py +0 -0
  50. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/routing/__init__.py +0 -0
  51. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/routing/auto_router.py +0 -0
  52. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/routing/capability.py +0 -0
  53. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/routing/fallback.py +0 -0
  54. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/translation/__init__.py +0 -0
  55. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/translation/anthropic.py +0 -0
  56. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/translation/convert.py +0 -0
  57. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/coderouter/translation/tool_repair.py +0 -0
  58. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/assets/dashboard-demo.png +0 -0
  59. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/designs/v1.5-dashboard-mockup.html +0 -0
  60. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/designs/v1.6-auto-router-verification.md +0 -0
  61. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/designs/v1.6-auto-router.md +0 -0
  62. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/openrouter-roster/README.md +0 -0
  63. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/openrouter-roster/latest.json +0 -0
  64. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/retrospectives/v0.4.md +0 -0
  65. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/retrospectives/v0.5-verify.md +0 -0
  66. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/retrospectives/v0.5.md +0 -0
  67. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/retrospectives/v0.6.md +0 -0
  68. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/retrospectives/v0.7.md +0 -0
  69. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/retrospectives/v1.0-verify.md +0 -0
  70. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/retrospectives/v1.0.md +0 -0
  71. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/security.en.md +0 -0
  72. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/security.md +0 -0
  73. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/troubleshooting.en.md +0 -0
  74. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/troubleshooting.md +0 -0
  75. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/usage-guide.en.md +0 -0
  76. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/usage-guide.md +0 -0
  77. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/when-do-i-need-coderouter.en.md +0 -0
  78. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/docs/when-do-i-need-coderouter.md +0 -0
  79. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/examples/.env.example +0 -0
  80. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/examples/providers.auto-custom.yaml +0 -0
  81. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/examples/providers.auto.yaml +0 -0
  82. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/examples/providers.note-2026.yaml +0 -0
  83. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/examples/providers.nvidia-nim.yaml +0 -0
  84. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/scripts/demo_traffic.sh +0 -0
  85. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/scripts/openrouter_roster_diff.py +0 -0
  86. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/scripts/verify_v0_5.sh +0 -0
  87. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/scripts/verify_v1_0.sh +0 -0
  88. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/__init__.py +0 -0
  89. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/conftest.py +0 -0
  90. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_adapter_anthropic.py +0 -0
  91. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_auto_router.py +0 -0
  92. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_capability.py +0 -0
  93. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_capability_degraded_payload.py +0 -0
  94. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_capability_registry.py +0 -0
  95. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_claude_code_suitability.py +0 -0
  96. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_cli.py +0 -0
  97. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_cli_stats.py +0 -0
  98. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_config.py +0 -0
  99. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_dashboard_endpoint.py +0 -0
  100. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_doctor.py +0 -0
  101. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_doctor_apply.py +0 -0
  102. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_env_file.py +0 -0
  103. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_env_security.py +0 -0
  104. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_errors.py +0 -0
  105. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_examples_yaml.py +0 -0
  106. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_fallback.py +0 -0
  107. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_fallback_anthropic.py +0 -0
  108. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_fallback_cache_control.py +0 -0
  109. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_fallback_misconfig_warn.py +0 -0
  110. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_fallback_paid_gate.py +0 -0
  111. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_fallback_thinking.py +0 -0
  112. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_ingress_anthropic.py +0 -0
  113. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_ingress_profile.py +0 -0
  114. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_metrics_collector.py +0 -0
  115. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_metrics_endpoint.py +0 -0
  116. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_metrics_jsonl.py +0 -0
  117. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_metrics_prometheus.py +0 -0
  118. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_openai_compat.py +0 -0
  119. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_openrouter_roster_diff.py +0 -0
  120. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_output_filters.py +0 -0
  121. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_output_filters_adapters.py +0 -0
  122. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_reasoning_strip.py +0 -0
  123. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_tool_repair.py +0 -0
  124. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_translation_anthropic.py +0 -0
  125. {coderouter_cli-1.8.3 → coderouter_cli-1.8.5}/tests/test_translation_reverse.py +0 -0
@@ -6,6 +6,55 @@ versioning follows [SemVer](https://semver.org/).
6
6
 
7
7
  ---
8
8
 
9
+ ## [v1.8.5] — 2026-04-28 (doctor NEEDS_TUNING メッセージを v1.8.3 thinking-aware budget の事実に揃える + `docs/lmstudio-direct.md` 新規)
10
+
11
+ **Theme: 文言の整合 patch + ドキュメント補完。**v1.8.3 で `tool_calls` / `num_ctx` / `streaming` の 3 probe に thinking-aware budget (256 / 1024) を入れた。今回はその事実を NEEDS_TUNING 時の detail メッセージに反映し、operator が「probe budget が小さすぎたのでは」と疑う余地をなくす。あわせて v1.8.4 で実機検証した LM Studio 0.4.12 経由経路を `docs/llamacpp-direct.md` と対をなす形で `docs/lmstudio-direct.md` (+ `.en.md`) として正式化。
12
+
13
+ - Tests: 737 → 737 (既存 assert は phrase-substring を見ていないので追従不要、新規 assertion は不足分を 1 件追加)
14
+ - Runtime deps: 5 → 5 (22 sub-release 連続据え置き)
15
+ - Backward compat: 完全互換、`providers.yaml` / `~/.coderouter/model-capabilities.yaml` / コード側 API 変更なし
16
+
17
+ ### Changes
18
+
19
+ #### Doctor NEEDS_TUNING 文言更新 (suggestion を thinking-aware budget 前提に揃える)
20
+
21
+ - **`coderouter/doctor.py` `_probe_tool_calls`**: 「Common for quantized small models」を残しつつ、thinking モデル時は `Probed with thinking-aware budget (1024 tokens, covers reasoning_content plus the call) — this is a true tools=false case, not budget exhaustion.` を前置。非 thinking 時は `Probed with default budget (256 tokens) — the model produced no tool-shaped output at all.` を前置
22
+ - **`coderouter/doctor.py` `_probe_streaming`**: `finish_reason='length'` 偽陽性回避のため、thinking 時は `Probe sent max_tokens=1024 (thinking-aware), so the cap is server-side options.num_predict rather than the probe budget.` を前置。非 thinking 時は `Probe sent max_tokens=512;` 系を前置
23
+ - **`coderouter/doctor.py` `_probe_num_ctx`**: 「canary missing」3 ケース (declared=None / declared<threshold / declared>=threshold) すべてに、thinking モデル時は `Probe sent max_tokens=1024 (thinking-aware), so the miss is prompt-side truncation rather than reply truncation.` の budget note を追加。これで operator が「probe の reply budget が足りなかったのでは」という疑問を即座に消せる
24
+
25
+ #### Documentation 補完: `docs/lmstudio-direct.md` 新規
26
+
27
+ - **`docs/lmstudio-direct.md` / `.en.md` 新規** — v1.8.4 で実機検証した LM Studio 0.4.12 経由経路を `docs/llamacpp-direct.md` と対をなす形で 7 step + Troubleshooting で。M3 Max 64GB / Q4_K_M / Metal 想定 + GUI 操作前提の canonical recipe
28
+ - Step 1: LM Studio install & Discover タブで Q4_K_M モデルダウンロード (Qwen3.5 9B / Qwen3.6 35B-A3B / Jackrong/Qwopus3.5-9B-v3-GGUF)
29
+ - Step 2: Chat タブで Load Model (Context 32768 / GPU max / Flash Attention ON)
30
+ - Step 3: Local Server タブで Port 1234 / Just-in-time Model Loading: ON / Start Server
31
+ - Step 4: curl 直叩き (OpenAI 互換 + Anthropic 互換 両ルート、native tool_calls / native tool_use 両方確認)
32
+ - Step 5: CodeRouter に provider 登録 (`kind: openai_compat` 経路 + `kind: anthropic` 経路の 2 種)
33
+ - Step 6: doctor 6 probe で動作確認 (両ルートとも全 probe OK)
34
+ - Step 7: CodeRouter 経由 end-to-end (Anthropic prompt caching `cache_read_input_tokens: 280` 観測も含む)
35
+
36
+ ### Why
37
+
38
+ v1.8.3 で `tool_calls` probe の active-harmful 誤診断 (thinking モデルに対して `tools: false` 提案) を fix したが、メッセージ文面はそのまま v1.8.2 以前の言い回し (「Common for quantized small models」のみ) を残していた。operator が NEEDS_TUNING を見たときに「probe budget が小さすぎたのでは」「v1.8.2 のバグの再発では」と疑う余地が文面上残っていたのを、**実装が既に thinking-aware なので断定できる** という事実に文言を揃える。診断ツールの出力は実装の confidence を反映すべき。
39
+
40
+ `docs/lmstudio-direct.md` は v1.8.4 で実機検証 + `examples/providers.yaml` に provider 例追加までは済ませていたが、`docs/llamacpp-direct.md` と並ぶレベルの canonical recipe ドキュメントが欠けていた。LM Studio 経由が現時点で最も `qwen35` / `qwen35moe` architecture を安定して動かせる経路 (Anthropic prompt caching まで透過) なので、operator が辿り着けるドキュメントとして正式化。
41
+
42
+ ### Migration
43
+
44
+ `pyproject.toml version 1.8.3 → 1.8.5`、`coderouter --version` は 1.8.5 を返す。**手元の `~/.coderouter/providers.yaml` は触らない限り完全に変化なし**。doctor 出力の文面が変わるが verdict と suggested_patch の semantic は完全互換。
45
+
46
+ ### Files touched
47
+
48
+ ```
49
+ M CHANGELOG.md
50
+ M coderouter/doctor.py
51
+ M pyproject.toml
52
+ A docs/lmstudio-direct.md
53
+ A docs/lmstudio-direct.en.md
54
+ ```
55
+
56
+ ---
57
+
9
58
  ## [v1.8.3] — 2026-04-26 (tool_calls probe も thinking モデル対応 + adapter で `reasoning_content` strip — llama.cpp 直叩き対応)
10
59
 
11
60
  **Theme: v1.8.2 と同日リリースの第 2 弾 patch。Qwen3.6:35b-a3b on llama.cpp の実機検証で発見した 2 つの追加課題 — `tool_calls` probe の thinking モデル偽陽性 + llama.cpp が emit する `reasoning_content` フィールドの adapter strip 不足 — を解消。**
@@ -70,6 +119,18 @@ M tests/test_doctor.py
70
119
  M tests/test_reasoning_strip.py
71
120
  ```
72
121
 
122
+ ### Post-release docs followup (同 commit ではなく追加 commit で)
123
+
124
+ llama.cpp 直叩き経路を canonical な救済路として正式採用したのを受け、関連ドキュメントを v1.8.3 後に整理:
125
+
126
+ - **`docs/llamacpp-direct.md` / `.en.md` 新規** — `llama.cpp` build → Unsloth GGUF → `llama-server` → CodeRouter 接続を 7 step + Troubleshooting で。M3 Max 64GB / Q4_K_M / Metal 想定の canonical recipe
127
+ - **`setup.sh`**: 48 GB+ tier の推奨を旧 `qwen3.6:35b` → `gemma4:26b` に変更 (Ollama 経由詰みのため)。upgrade hint からも Qwen3.6 系を撤去、代わりに `docs/llamacpp-direct.md` への誘導を追加
128
+ - **`docs/quickstart.md` / `.en.md`**: 「より良いモデル」セクションの `ollama pull qwen3.6:35b` を撤去、`docs/llamacpp-direct.md` への誘導追加
129
+ - **`docs/hf-ollama-models.md`**: `ollama pull qwen3.6:35b` を「⚠️ Qwen3.6 系は Ollama 経由で詰みやすい」警告に置換、llama.cpp 直叩き経路の案内を追加
130
+ - **`README.md` / `.en.md`**: ドキュメント目次に「llama.cpp 直叩きガイド」行を追加、英語版言語スイッチャーにも `llama.cpp direct` リンクを追加
131
+ - **`examples/providers.yaml`**: `llamacpp-qwen3-6-35b-a3b` provider 例を追加 + `coding` profile chain primary に組み込み (詳細コメント付き)。Qwen3.6 系 Ollama 経路のコメントも v1.8.3 結果反映で更新
132
+ - **`tests/test_setup_sh.py`**: 48 GB / 64 GB tier の expected_model assertion を `qwen3.6:35b` → `gemma4:26b` に追従更新
133
+
73
134
  ---
74
135
 
75
136
  ## [v1.8.2] — 2026-04-26 (doctor probe を thinking モデル対応に — Gemma 4 偽陽性の解消)
@@ -399,7 +460,7 @@ $ curl -sI "https://pypi.org/pypi/coderouter-cli/json" | head -1
399
460
  HTTP/2 200
400
461
  ```
401
462
 
402
- CDN 伝播後に `uvx coderouter-cli --version` で本物の PyPI 経由インストールも確認済み。
463
+ CDN 伝播後に `uvx --from coderouter-cli coderouter --version` で本物の PyPI 経由インストールも確認済み (uv 0.11+ では package 名 ≠ executable 名のとき `--from` 必須、Issue #10 で報告者から fb)。
403
464
 
404
465
  ### Migration
405
466
 
@@ -409,8 +470,8 @@ CDN 伝播後に `uvx coderouter-cli --version` で本物の PyPI 経由イン
409
470
  # 旧 (引き続き有効)
410
471
  uv tool install --from git+https://github.com/zephel01/CodeRouter.git coderouter-cli
411
472
 
412
- # 新 (PyPI から、コマンド 1 行)
413
- uvx coderouter-cli serve --port 8088
473
+ # 新 (PyPI から、コマンド 1 行 — uv 0.11+ canonical 形式)
474
+ uvx --from coderouter-cli coderouter serve --port 8088
414
475
  # あるいは恒久的に:
415
476
  uv tool install coderouter-cli
416
477
  ```
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coderouter-cli
3
- Version: 1.8.3
3
+ Version: 1.8.5
4
4
  Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
5
5
  Project-URL: Homepage, https://github.com/zephel01/CodeRouter
6
6
  Project-URL: Repository, https://github.com/zephel01/CodeRouter
@@ -60,7 +60,7 @@ Description-Content-Type: text/markdown
60
60
  <p align="center">
61
61
  <a href="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml"><img src="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml/badge.svg?branch=main" alt="CI"></a>
62
62
  <a href=""><img src="https://img.shields.io/badge/status-stable-brightgreen" alt="status"></a>
63
- <a href=""><img src="https://img.shields.io/badge/version-1.8.3-blue" alt="version"></a>
63
+ <a href=""><img src="https://img.shields.io/badge/version-1.8.5-blue" alt="version"></a>
64
64
  <a href=""><img src="https://img.shields.io/badge/python-3.12%2B-blue" alt="python"></a>
65
65
  <a href=""><img src="https://img.shields.io/badge/runtime%20deps-5-brightgreen" alt="deps"></a>
66
66
  <a href=""><img src="https://img.shields.io/badge/license-MIT-yellow" alt="license"></a>
@@ -99,11 +99,13 @@ Description-Content-Type: text/markdown
99
99
  | **無料で回す** | [無料枠ガイド](./docs/free-tier-guide.md) | NVIDIA NIM 40 req/min × OpenRouter 無料枠の使い分け・live 検証済みモデル表・地雷 5 点 |
100
100
  | **要るか判断する** | [要否判定ガイド](./docs/when-do-i-need-coderouter.md) | エージェント × モデルの詳細マトリクスで「そもそも自分に必要か」を決める |
101
101
  | **詰まったとき** | [トラブルシューティング](./docs/troubleshooting.md) | `doctor` の使い方、`.env` の export 必須、Ollama サイレント失敗 5 症状、Claude Code 連携の罠 |
102
+ | **llama.cpp 直叩き** | [llama.cpp 直叩きガイド](./docs/llamacpp-direct.md) | Qwen3.6 を Ollama 詰みから救出する経路。`llama.cpp` build → Unsloth GGUF → `llama-server` → CodeRouter 接続を 7 step で(v1.8.3 実機検証済)|
103
+ | **LM Studio 直接** | [LM Studio 直接ガイド](./docs/lmstudio-direct.md) | `qwen35` / `qwen35moe` を救う第 2 経路。LM Studio 0.4.12+ Local Server 経由で OpenAI 互換 + Anthropic 互換 (`/v1/messages`) 両対応、prompt caching 透過(v1.8.4 実機検証済)|
102
104
  | **安全に使う** | [セキュリティ方針](./docs/security.md) | 脅威モデル・秘密情報の扱い・脆弱性報告経路 |
103
- | **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v1.8.3tool_calls probe thinking 対応 + adapter で `reasoning_content` strip / llama.cpp 直叩き対応) |
105
+ | **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v1.8.5doctor NEEDS_TUNING メッセージを v1.8.3 thinking-aware budget の事実に揃える + `docs/lmstudio-direct.md` 新規) |
104
106
  | **設計を追う** | [plan.md](./plan.md) | 設計不変項・マイルストーン・今後のロードマップ |
105
107
 
106
- English versions: [Quickstart](./docs/quickstart.en.md) · [Usage guide](./docs/usage-guide.en.md) · [Free-tier guide](./docs/free-tier-guide.en.md) · [When you need it](./docs/when-do-i-need-coderouter.en.md) · [Troubleshooting](./docs/troubleshooting.en.md) · [Security](./docs/security.en.md)
108
+ English versions: [Quickstart](./docs/quickstart.en.md) · [Usage guide](./docs/usage-guide.en.md) · [Free-tier guide](./docs/free-tier-guide.en.md) · [When you need it](./docs/when-do-i-need-coderouter.en.md) · [Troubleshooting](./docs/troubleshooting.en.md) · [llama.cpp direct](./docs/llamacpp-direct.en.md) · [LM Studio direct](./docs/lmstudio-direct.en.md) · [Security](./docs/security.en.md)
107
109
 
108
110
  ## CodeRouter で何が楽になるか
109
111
 
@@ -184,7 +186,9 @@ curl -fsSL https://raw.githubusercontent.com/zephel01/CodeRouter/main/examples/p
184
186
  > ~/.coderouter/providers.yaml
185
187
 
186
188
  # 2. uvx で起動 (インストール + 起動が 1 行)
187
- uvx coderouter-cli serve --port 8088
189
+ # PyPI 配布名 (coderouter-cli) console script 名 (coderouter) が異なるため、
190
+ # uv 0.11+ では --from 形式が必須 (旧 uv でも動く canonical 形式)
191
+ uvx --from coderouter-cli coderouter serve --port 8088
188
192
  ```
189
193
 
190
194
  恒久的にインストールしておきたい場合:
@@ -257,7 +261,7 @@ CodeRouter 自体は純 Python 3.12+ で、実質的な OS 対応範囲は `min(
257
261
 
258
262
  **リリース単位の詳細が欲しい?** v0.x と v1.0-A/B/C の各スライス — 何が入り、何本のテストが増え、なぜ必要だったのか — は [CHANGELOG.md](./CHANGELOG.md) に揃っています。設計の不変項と今後のロードマップは [plan.md](./plan.md)。
259
263
 
260
- **次の予定**(v1.0 は [plan.md §10](./plan.md)、v1.0+ は §18): v1.5 ✅ メトリクス / `/dashboard` / `coderouter stats` TUI / `scripts/demo_traffic.sh`、v1.6 ✅ `auto_router` (task-aware routing) + NVIDIA NIM 無料枠 + トラブルシュートドキュメント分離 + `--env-file` / `doctor --check-env`、v1.7 ✅ PyPI 公開 (`uvx coderouter-cli`)、v1.8 ✅ 用途別 4 プロファイル (multi/coding/general/reasoning) + Gemma 4 / Qwen3.6 / Z.AI (GLM) 登録 + `setup.sh` onboarding ウィザード + `coderouter doctor --check-model --apply` (非破壊 YAML 書き戻し) + `claude_code_suitability` startup チェック + Trusted Publishing 自動化。残り (v1.9 候補) は `coderouter doctor --network` (CI 用) / launcher スクリプト / 起動時アップデートチェック (opt-in)。
264
+ **次の予定**(v1.0 は [plan.md §10](./plan.md)、v1.0+ は §18): v1.5 ✅ メトリクス / `/dashboard` / `coderouter stats` TUI / `scripts/demo_traffic.sh`、v1.6 ✅ `auto_router` (task-aware routing) + NVIDIA NIM 無料枠 + トラブルシュートドキュメント分離 + `--env-file` / `doctor --check-env`、v1.7 ✅ PyPI 公開 (`uvx --from coderouter-cli coderouter`)、v1.8 ✅ 用途別 4 プロファイル (multi/coding/general/reasoning) + Gemma 4 / Qwen3.6 / Z.AI (GLM) 登録 + `setup.sh` onboarding ウィザード + `coderouter doctor --check-model --apply` (非破壊 YAML 書き戻し) + `claude_code_suitability` startup チェック + Trusted Publishing 自動化。残り (v1.9 候補) は `coderouter doctor --network` (CI 用) / launcher スクリプト / 起動時アップデートチェック (opt-in)。
261
265
 
262
266
  ### Claude Code と一緒に使う
263
267
 
@@ -432,7 +436,7 @@ suggested patch for ~/.coderouter/providers.yaml:
432
436
  - v1.0 ✅ — 14 ケースのリグレッションスイート、Code Mode (スリム版 Claude Code ハーネス); 出力クリーニングは **v1.0-A** で `output_filters` チェーンとして完了
433
437
  - v1.5 ✅ — **メトリクスダッシュボード(出荷済み)** — `MetricsCollector` + `GET /metrics.json` + `GET /metrics` (Prometheus) + `GET /dashboard` (HTML 1 ページ) + `coderouter stats` curses TUI + `scripts/demo_traffic.sh` トラフィックジェネレータ + `display_timezone` 設定
434
438
  - v1.6 ✅ — `auto_router` (task-aware routing、`default_profile: auto` で画像/コード濃度/その他を自動振り分け) + NVIDIA NIM 無料枠 8 段チェーン + ドキュメント言語スワップ (JA primary) + トラブルシュート独立ドキュメント + `--env-file` / `doctor --check-env`
435
- - v1.7 ✅ — PyPI 公開 (`uvx coderouter-cli` で 1 行起動) + Trusted Publishing 経路 (release.yml で自動 publish)
439
+ - v1.7 ✅ — PyPI 公開 (`uvx --from coderouter-cli coderouter` で 1 行起動) + Trusted Publishing 経路 (release.yml で自動 publish)
436
440
  - v1.8 ✅ — **用途別 4 プロファイル + GLM/Gemma 4/Qwen3.6 公式化 + apply 自動化**: `multi` (default) / `coding` / `general` / `reasoning` の 4 プロファイル + 全プロファイルに `append_system_prompt` で Claude 風応答 nudge + `mode_aliases` (default/fast/vision/think/cheap)、Ollama 公式 tag 化された `gemma4:e4b/26b/31b` / `qwen3.6:27b/35b` を active stanza に格上げ、Z.AI を OpenAI-compat で 2 base_url 提供 (Coding Plan / General API)、`coderouter doctor --check-model --apply` で YAML パッチを非破壊書き戻し (`ruamel.yaml` round-trip でコメント・key 順序保持、冪等)、`setup.sh` onboarding ウィザード、`claude_code_suitability` startup チェック (Llama-3.3-70B 系を `claude-code-*` profile で WARN)。残り (v1.9 以降): `coderouter doctor --network` (CI 用)、launcher スクリプト (`.command` / `.sh` / `.bat`)、opt-in 起動時アップデートチェック
437
441
 
438
442
  ## `kind: openai_compat` と `kind: anthropic` の選び方
@@ -20,7 +20,7 @@
20
20
  <p align="center">
21
21
  <a href="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml"><img src="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml/badge.svg?branch=main" alt="CI"></a>
22
22
  <a href=""><img src="https://img.shields.io/badge/status-stable-brightgreen" alt="status"></a>
23
- <a href=""><img src="https://img.shields.io/badge/version-1.8.3-blue" alt="version"></a>
23
+ <a href=""><img src="https://img.shields.io/badge/version-1.8.5-blue" alt="version"></a>
24
24
  <a href=""><img src="https://img.shields.io/badge/python-3.12%2B-blue" alt="python"></a>
25
25
  <a href=""><img src="https://img.shields.io/badge/runtime%20deps-5-brightgreen" alt="deps"></a>
26
26
  <a href=""><img src="https://img.shields.io/badge/license-MIT-yellow" alt="license"></a>
@@ -58,11 +58,13 @@
58
58
  | **Run for free** | [Free-tier guide](./docs/free-tier-guide.en.md) | Stacking NVIDIA NIM (40 req/min) with OpenRouter free: live-verified roster and five footguns |
59
59
  | **Decide if you need it** | [Decision guide](./docs/when-do-i-need-coderouter.en.md) | Agent × model matrix to figure out whether CodeRouter fits your setup at all |
60
60
  | **When stuck** | [Troubleshooting](./docs/troubleshooting.en.md) | How to use `doctor`, why `.env` needs `export`, the 5 Ollama silent-fail symptoms, Claude Code integration gotchas |
61
+ | **llama.cpp direct** | [llama.cpp direct guide](./docs/llamacpp-direct.en.md) | Rescue path for Qwen3.6 (Ollama is brittle). 7-step recipe: `llama.cpp` build → Unsloth GGUF → `llama-server` → CodeRouter wiring. Real-machine verified in v1.8.3. |
62
+ | **LM Studio direct** | [LM Studio direct guide](./docs/lmstudio-direct.en.md) | Second rescue path for `qwen35` / `qwen35moe`. LM Studio 0.4.12+ Local Server with both OpenAI-compatible and Anthropic-compatible (`/v1/messages`) routes — prompt caching survives end-to-end. Real-machine verified in v1.8.4. |
61
63
  | **Operate safely** | [Security](./docs/security.en.md) | Threat model, secret handling, vulnerability reporting |
62
- | **History** | [CHANGELOG](./CHANGELOG.md) | All releases (latest: v1.8.3tool_calls probe also thinking-aware + adapter strips `reasoning_content` / llama.cpp direct backend supported) |
64
+ | **History** | [CHANGELOG](./CHANGELOG.md) | All releases (latest: v1.8.5doctor NEEDS_TUNING messages aligned with v1.8.3 thinking-aware budget + new `docs/lmstudio-direct.md`) |
63
65
  | **Track the design** | [plan.md](./plan.md) | Design invariants, milestones, roadmap |
64
66
 
65
- 日本語版: [Quickstart](./docs/quickstart.md) · [利用ガイド](./docs/usage-guide.md) · [無料枠ガイド](./docs/free-tier-guide.md) · [要否判定](./docs/when-do-i-need-coderouter.md) · [トラブルシューティング](./docs/troubleshooting.md) · [Security](./docs/security.md)
67
+ 日本語版: [Quickstart](./docs/quickstart.md) · [利用ガイド](./docs/usage-guide.md) · [無料枠ガイド](./docs/free-tier-guide.md) · [要否判定](./docs/when-do-i-need-coderouter.md) · [トラブルシューティング](./docs/troubleshooting.md) · [LM Studio 直接](./docs/lmstudio-direct.md) · [Security](./docs/security.md)
66
68
 
67
69
  ## What gets easier with CodeRouter
68
70
 
@@ -142,8 +144,11 @@ mkdir -p ~/.coderouter
142
144
  curl -fsSL https://raw.githubusercontent.com/zephel01/CodeRouter/main/examples/providers.yaml \
143
145
  > ~/.coderouter/providers.yaml
144
146
 
145
- # 2. Install + run, in one line
146
- uvx coderouter-cli serve --port 8088
147
+ # 2. Install + run, in one line.
148
+ # The PyPI distribution name (coderouter-cli) differs from the console
149
+ # script name (coderouter), so uv 0.11+ requires the --from form. (It
150
+ # also works on older uv, so this is the canonical incantation.)
151
+ uvx --from coderouter-cli coderouter serve --port 8088
147
152
  ```
148
153
 
149
154
  Want a permanent install?
@@ -216,7 +221,7 @@ What CodeRouter can do for you today:
216
221
 
217
222
  **Want the per-release detail?** Every v0.x and v1.0-A/B/C slice — what shipped, how many tests it added, why it was needed — is in [CHANGELOG.md](./CHANGELOG.md). Design invariants and the forward roadmap live in [plan.md](./plan.md).
218
223
 
219
- **Coming next** (see [plan.md §10](./plan.md) for v1.0, §18 for v1.0+): v1.5 ✅ metrics / `/dashboard` / `coderouter stats` TUI / `scripts/demo_traffic.sh`. v1.6 ✅ `auto_router` (task-aware routing) + NVIDIA NIM free tier + troubleshooting doc split + `--env-file` / `doctor --check-env`. v1.7 ✅ PyPI publish (`uvx coderouter-cli`). v1.8 ✅ Use-case-aware 4 profiles (multi/coding/general/reasoning) + Gemma 4 / Qwen3.6 / Z.AI (GLM) registration + `setup.sh` wizard + `coderouter doctor --check-model --apply` (non-destructive YAML write-back) + `claude_code_suitability` startup check + Trusted Publishing automation. Remaining for v1.9+: `coderouter doctor --network` (CI), launcher scripts, opt-in update check.
224
+ **Coming next** (see [plan.md §10](./plan.md) for v1.0, §18 for v1.0+): v1.5 ✅ metrics / `/dashboard` / `coderouter stats` TUI / `scripts/demo_traffic.sh`. v1.6 ✅ `auto_router` (task-aware routing) + NVIDIA NIM free tier + troubleshooting doc split + `--env-file` / `doctor --check-env`. v1.7 ✅ PyPI publish (`uvx --from coderouter-cli coderouter`). v1.8 ✅ Use-case-aware 4 profiles (multi/coding/general/reasoning) + Gemma 4 / Qwen3.6 / Z.AI (GLM) registration + `setup.sh` wizard + `coderouter doctor --check-model --apply` (non-destructive YAML write-back) + `claude_code_suitability` startup check + Trusted Publishing automation. Remaining for v1.9+: `coderouter doctor --network` (CI), launcher scripts, opt-in update check.
220
225
 
221
226
  ### Use it with Claude Code
222
227
 
@@ -393,7 +398,7 @@ Coming next (see [plan.md §10](./plan.md) for v1.0, §18 for v1.0+):
393
398
  - v1.0 ✅ — 14-case regression suite, Code Mode (slim Claude Code harness); output cleaning shipped in **v1.0-A** (`output_filters` chain, done)
394
399
  - v1.5 ✅ — **Metrics dashboard (shipped)** — `MetricsCollector` + `GET /metrics.json` + `GET /metrics` (Prometheus) + `GET /dashboard` (HTML one-pager) + `coderouter stats` curses TUI + `scripts/demo_traffic.sh` traffic generator + `display_timezone` config
395
400
  - v1.6 ✅ — `auto_router` (task-aware routing; `default_profile: auto` dispatches by image attachment / code-fence ratio / else) + NVIDIA NIM free-tier 8-step chain + doc language swap (JA primary) + troubleshooting page split + `--env-file` / `doctor --check-env`
396
- - v1.7 ✅ — PyPI publish (`uvx coderouter-cli` one-line bootstrap) + Trusted Publishing path (release.yml auto-publish on tag push)
401
+ - v1.7 ✅ — PyPI publish (`uvx --from coderouter-cli coderouter` one-line bootstrap) + Trusted Publishing path (release.yml auto-publish on tag push)
397
402
  - v1.8 ✅ — **Use-case-aware 4 profiles + GLM/Gemma 4/Qwen3.6 official tags + apply automation**: `multi` (default) / `coding` / `general` / `reasoning` profiles + `append_system_prompt` per profile to nudge non-Claude models toward Claude-style replies + `mode_aliases` (default/fast/vision/think/cheap), Ollama-official `gemma4:e4b/26b/31b` and `qwen3.6:27b/35b` promoted to active stanzas, Z.AI provided as OpenAI-compat with two base URLs (Coding Plan / General API), `coderouter doctor --check-model --apply` writes YAML patches non-destructively (`ruamel.yaml` round-trip preserves comments + key order; idempotent), `setup.sh` onboarding wizard, `claude_code_suitability` startup check (Llama-3.3-70B in `claude-code-*` profiles emits a structured WARN). Remaining for v1.9+: `coderouter doctor --network` (CI-friendly), launcher scripts (`.command` / `.sh` / `.bat`), opt-in startup update check
398
403
 
399
404
  ## Choosing `kind: openai_compat` vs `kind: anthropic`
@@ -19,7 +19,7 @@
19
19
  <p align="center">
20
20
  <a href="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml"><img src="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml/badge.svg?branch=main" alt="CI"></a>
21
21
  <a href=""><img src="https://img.shields.io/badge/status-stable-brightgreen" alt="status"></a>
22
- <a href=""><img src="https://img.shields.io/badge/version-1.8.3-blue" alt="version"></a>
22
+ <a href=""><img src="https://img.shields.io/badge/version-1.8.5-blue" alt="version"></a>
23
23
  <a href=""><img src="https://img.shields.io/badge/python-3.12%2B-blue" alt="python"></a>
24
24
  <a href=""><img src="https://img.shields.io/badge/runtime%20deps-5-brightgreen" alt="deps"></a>
25
25
  <a href=""><img src="https://img.shields.io/badge/license-MIT-yellow" alt="license"></a>
@@ -58,11 +58,13 @@
58
58
  | **無料で回す** | [無料枠ガイド](./docs/free-tier-guide.md) | NVIDIA NIM 40 req/min × OpenRouter 無料枠の使い分け・live 検証済みモデル表・地雷 5 点 |
59
59
  | **要るか判断する** | [要否判定ガイド](./docs/when-do-i-need-coderouter.md) | エージェント × モデルの詳細マトリクスで「そもそも自分に必要か」を決める |
60
60
  | **詰まったとき** | [トラブルシューティング](./docs/troubleshooting.md) | `doctor` の使い方、`.env` の export 必須、Ollama サイレント失敗 5 症状、Claude Code 連携の罠 |
61
+ | **llama.cpp 直叩き** | [llama.cpp 直叩きガイド](./docs/llamacpp-direct.md) | Qwen3.6 を Ollama 詰みから救出する経路。`llama.cpp` build → Unsloth GGUF → `llama-server` → CodeRouter 接続を 7 step で(v1.8.3 実機検証済)|
62
+ | **LM Studio 直接** | [LM Studio 直接ガイド](./docs/lmstudio-direct.md) | `qwen35` / `qwen35moe` を救う第 2 経路。LM Studio 0.4.12+ Local Server 経由で OpenAI 互換 + Anthropic 互換 (`/v1/messages`) 両対応、prompt caching 透過(v1.8.4 実機検証済)|
61
63
  | **安全に使う** | [セキュリティ方針](./docs/security.md) | 脅威モデル・秘密情報の扱い・脆弱性報告経路 |
62
- | **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v1.8.3tool_calls probe thinking 対応 + adapter で `reasoning_content` strip / llama.cpp 直叩き対応) |
64
+ | **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v1.8.5doctor NEEDS_TUNING メッセージを v1.8.3 thinking-aware budget の事実に揃える + `docs/lmstudio-direct.md` 新規) |
63
65
  | **設計を追う** | [plan.md](./plan.md) | 設計不変項・マイルストーン・今後のロードマップ |
64
66
 
65
- English versions: [Quickstart](./docs/quickstart.en.md) · [Usage guide](./docs/usage-guide.en.md) · [Free-tier guide](./docs/free-tier-guide.en.md) · [When you need it](./docs/when-do-i-need-coderouter.en.md) · [Troubleshooting](./docs/troubleshooting.en.md) · [Security](./docs/security.en.md)
67
+ English versions: [Quickstart](./docs/quickstart.en.md) · [Usage guide](./docs/usage-guide.en.md) · [Free-tier guide](./docs/free-tier-guide.en.md) · [When you need it](./docs/when-do-i-need-coderouter.en.md) · [Troubleshooting](./docs/troubleshooting.en.md) · [llama.cpp direct](./docs/llamacpp-direct.en.md) · [LM Studio direct](./docs/lmstudio-direct.en.md) · [Security](./docs/security.en.md)
66
68
 
67
69
  ## CodeRouter で何が楽になるか
68
70
 
@@ -143,7 +145,9 @@ curl -fsSL https://raw.githubusercontent.com/zephel01/CodeRouter/main/examples/p
143
145
  > ~/.coderouter/providers.yaml
144
146
 
145
147
  # 2. uvx で起動 (インストール + 起動が 1 行)
146
- uvx coderouter-cli serve --port 8088
148
+ # PyPI 配布名 (coderouter-cli) console script 名 (coderouter) が異なるため、
149
+ # uv 0.11+ では --from 形式が必須 (旧 uv でも動く canonical 形式)
150
+ uvx --from coderouter-cli coderouter serve --port 8088
147
151
  ```
148
152
 
149
153
  恒久的にインストールしておきたい場合:
@@ -216,7 +220,7 @@ CodeRouter 自体は純 Python 3.12+ で、実質的な OS 対応範囲は `min(
216
220
 
217
221
  **リリース単位の詳細が欲しい?** v0.x と v1.0-A/B/C の各スライス — 何が入り、何本のテストが増え、なぜ必要だったのか — は [CHANGELOG.md](./CHANGELOG.md) に揃っています。設計の不変項と今後のロードマップは [plan.md](./plan.md)。
218
222
 
219
- **次の予定**(v1.0 は [plan.md §10](./plan.md)、v1.0+ は §18): v1.5 ✅ メトリクス / `/dashboard` / `coderouter stats` TUI / `scripts/demo_traffic.sh`、v1.6 ✅ `auto_router` (task-aware routing) + NVIDIA NIM 無料枠 + トラブルシュートドキュメント分離 + `--env-file` / `doctor --check-env`、v1.7 ✅ PyPI 公開 (`uvx coderouter-cli`)、v1.8 ✅ 用途別 4 プロファイル (multi/coding/general/reasoning) + Gemma 4 / Qwen3.6 / Z.AI (GLM) 登録 + `setup.sh` onboarding ウィザード + `coderouter doctor --check-model --apply` (非破壊 YAML 書き戻し) + `claude_code_suitability` startup チェック + Trusted Publishing 自動化。残り (v1.9 候補) は `coderouter doctor --network` (CI 用) / launcher スクリプト / 起動時アップデートチェック (opt-in)。
223
+ **次の予定**(v1.0 は [plan.md §10](./plan.md)、v1.0+ は §18): v1.5 ✅ メトリクス / `/dashboard` / `coderouter stats` TUI / `scripts/demo_traffic.sh`、v1.6 ✅ `auto_router` (task-aware routing) + NVIDIA NIM 無料枠 + トラブルシュートドキュメント分離 + `--env-file` / `doctor --check-env`、v1.7 ✅ PyPI 公開 (`uvx --from coderouter-cli coderouter`)、v1.8 ✅ 用途別 4 プロファイル (multi/coding/general/reasoning) + Gemma 4 / Qwen3.6 / Z.AI (GLM) 登録 + `setup.sh` onboarding ウィザード + `coderouter doctor --check-model --apply` (非破壊 YAML 書き戻し) + `claude_code_suitability` startup チェック + Trusted Publishing 自動化。残り (v1.9 候補) は `coderouter doctor --network` (CI 用) / launcher スクリプト / 起動時アップデートチェック (opt-in)。
220
224
 
221
225
  ### Claude Code と一緒に使う
222
226
 
@@ -391,7 +395,7 @@ suggested patch for ~/.coderouter/providers.yaml:
391
395
  - v1.0 ✅ — 14 ケースのリグレッションスイート、Code Mode (スリム版 Claude Code ハーネス); 出力クリーニングは **v1.0-A** で `output_filters` チェーンとして完了
392
396
  - v1.5 ✅ — **メトリクスダッシュボード(出荷済み)** — `MetricsCollector` + `GET /metrics.json` + `GET /metrics` (Prometheus) + `GET /dashboard` (HTML 1 ページ) + `coderouter stats` curses TUI + `scripts/demo_traffic.sh` トラフィックジェネレータ + `display_timezone` 設定
393
397
  - v1.6 ✅ — `auto_router` (task-aware routing、`default_profile: auto` で画像/コード濃度/その他を自動振り分け) + NVIDIA NIM 無料枠 8 段チェーン + ドキュメント言語スワップ (JA primary) + トラブルシュート独立ドキュメント + `--env-file` / `doctor --check-env`
394
- - v1.7 ✅ — PyPI 公開 (`uvx coderouter-cli` で 1 行起動) + Trusted Publishing 経路 (release.yml で自動 publish)
398
+ - v1.7 ✅ — PyPI 公開 (`uvx --from coderouter-cli coderouter` で 1 行起動) + Trusted Publishing 経路 (release.yml で自動 publish)
395
399
  - v1.8 ✅ — **用途別 4 プロファイル + GLM/Gemma 4/Qwen3.6 公式化 + apply 自動化**: `multi` (default) / `coding` / `general` / `reasoning` の 4 プロファイル + 全プロファイルに `append_system_prompt` で Claude 風応答 nudge + `mode_aliases` (default/fast/vision/think/cheap)、Ollama 公式 tag 化された `gemma4:e4b/26b/31b` / `qwen3.6:27b/35b` を active stanza に格上げ、Z.AI を OpenAI-compat で 2 base_url 提供 (Coding Plan / General API)、`coderouter doctor --check-model --apply` で YAML パッチを非破壊書き戻し (`ruamel.yaml` round-trip でコメント・key 順序保持、冪等)、`setup.sh` onboarding ウィザード、`claude_code_suitability` startup チェック (Llama-3.3-70B 系を `claude-code-*` profile で WARN)。残り (v1.9 以降): `coderouter doctor --network` (CI 用)、launcher スクリプト (`.command` / `.sh` / `.bat`)、opt-in 起動時アップデートチェック
396
400
 
397
401
  ## `kind: openai_compat` と `kind: anthropic` の選び方
@@ -836,6 +836,22 @@ async def _probe_num_ctx(
836
836
  )
837
837
 
838
838
  # Canary missing → truncation occurred.
839
+ #
840
+ # v1.8.5: with the v1.8.3 thinking-aware response budget already
841
+ # applied (max_tokens=1024 for reasoning models — see
842
+ # ``_NUM_CTX_PROBE_MAX_TOKENS_THINKING``), a missing canary cannot
843
+ # be blamed on an under-sized reply. The fault is genuinely on the
844
+ # prompt side: the upstream truncated the input before the model
845
+ # could see the canary token at the head. This sharpens the
846
+ # remediation — we are confident bumping ``num_ctx`` is the right
847
+ # fix, not bumping the response budget.
848
+ thinking = _is_reasoning_model(provider, resolved)
849
+ budget_note = (
850
+ f" Probe sent max_tokens={max_tokens} (thinking-aware), so the "
851
+ "miss is prompt-side truncation rather than reply truncation."
852
+ if thinking
853
+ else ""
854
+ )
839
855
  if declared is None:
840
856
  return ProbeResult(
841
857
  name="num_ctx",
@@ -845,7 +861,7 @@ async def _probe_num_ctx(
845
861
  "upstream truncated the prompt. No `extra_body.options.num_ctx` "
846
862
  "is declared, so Ollama is running at its 2048-token default, "
847
863
  "which cannot hold Claude Code's system + tool prompts "
848
- "(plan.md §9.4 symptom #1)."
864
+ f"(plan.md §9.4 symptom #1).{budget_note}"
849
865
  ),
850
866
  target_file="providers.yaml",
851
867
  suggested_patch=_patch_providers_yaml_num_ctx(provider.name, 32768),
@@ -857,7 +873,8 @@ async def _probe_num_ctx(
857
873
  detail=(
858
874
  f"canary missing — declared num_ctx={declared} is below "
859
875
  f"the {_NUM_CTX_ADEQUATE_THRESHOLD}-token threshold needed "
860
- "for Claude Code prompts. Bump it (plan.md §9.4 symptom #1)."
876
+ f"for Claude Code prompts. Bump it (plan.md §9.4 symptom "
877
+ f"#1).{budget_note}"
861
878
  ),
862
879
  target_file="providers.yaml",
863
880
  suggested_patch=_patch_providers_yaml_num_ctx(provider.name, 32768),
@@ -875,7 +892,7 @@ async def _probe_num_ctx(
875
892
  "declared value, or the upstream is silently capping it — "
876
893
  "verify with the model card / server logs. The suggested "
877
894
  "patch still emits 32768 as a starting point; dial down if "
878
- "the host is memory-constrained."
895
+ f"the host is memory-constrained.{budget_note}"
879
896
  ),
880
897
  target_file="providers.yaml",
881
898
  suggested_patch=_patch_providers_yaml_num_ctx(provider.name, 32768),
@@ -1041,6 +1058,24 @@ async def _probe_streaming(
1041
1058
  # mid-word". Since we're already Ollama-shape-gated, the
1042
1059
  # remediation is always the ``extra_body.options.num_predict``
1043
1060
  # bump.
1061
+ #
1062
+ # v1.8.5: with v1.8.3's thinking-aware probe budget already
1063
+ # applied (max_tokens=1024 for reasoning models), a length cap
1064
+ # here cannot be blamed on the probe budget — the upstream is
1065
+ # the one capping. Surface the budget used so the operator can
1066
+ # rule it out at a glance.
1067
+ thinking = _is_reasoning_model(provider, resolved)
1068
+ budget_note = (
1069
+ f"Probe sent max_tokens={max_tokens} (thinking-aware), so "
1070
+ "the cap is server-side `options.num_predict` rather than "
1071
+ "the probe budget."
1072
+ if thinking
1073
+ else (
1074
+ f"Probe sent max_tokens={max_tokens}; the cap is "
1075
+ "server-side `options.num_predict` rather than the "
1076
+ "probe budget."
1077
+ )
1078
+ )
1044
1079
  return ProbeResult(
1045
1080
  name="streaming",
1046
1081
  verdict=ProbeVerdict.NEEDS_TUNING,
@@ -1048,9 +1083,9 @@ async def _probe_streaming(
1048
1083
  f"stream closed with `finish_reason='length'` after only "
1049
1084
  f"{len(content)} chars (expected ≥ "
1050
1085
  f"{_STREAMING_PROBE_MIN_EXPECTED_CHARS}). Upstream is "
1051
- "capping output — most likely `options.num_predict`. "
1052
- "Bump it via `extra_body` (plan.md §9.4 symptom #1 "
1053
- "streaming variant)."
1086
+ f"capping output — most likely `options.num_predict`. "
1087
+ f"{budget_note} Bump it via `extra_body` (plan.md §9.4 "
1088
+ "symptom #1 streaming variant)."
1054
1089
  ),
1055
1090
  target_file="providers.yaml",
1056
1091
  suggested_patch=_patch_providers_yaml_num_predict(
@@ -1239,13 +1274,33 @@ async def _probe_tool_calls(
1239
1274
 
1240
1275
  # Nothing tool-shaped at all.
1241
1276
  if declared:
1277
+ # v1.8.5: with the v1.8.3 thinking-aware budget already applied,
1278
+ # we can speak with confidence here: the model genuinely did not
1279
+ # emit tool_calls (it's not a budget-exhaustion false-positive
1280
+ # like the pre-v1.8.3 64-token cap used to produce). For thinking
1281
+ # models specifically, the 1024-token budget covers
1282
+ # ``reasoning_content`` *and* a tool call — so a missing
1283
+ # ``tool_calls`` here is real. Surface the budget that was used
1284
+ # so operators reading the message understand what was probed.
1285
+ thinking = _is_reasoning_model(provider, resolved)
1286
+ budget_note = (
1287
+ f"Probed with thinking-aware budget ({max_tokens} tokens, "
1288
+ "covers `reasoning_content` plus the call) — this is a true "
1289
+ "tools=false case, not budget exhaustion."
1290
+ if thinking
1291
+ else (
1292
+ f"Probed with default budget ({max_tokens} tokens) — "
1293
+ "the model produced no tool-shaped output at all."
1294
+ )
1295
+ )
1242
1296
  return ProbeResult(
1243
1297
  name="tool_calls",
1244
1298
  verdict=ProbeVerdict.NEEDS_TUNING,
1245
1299
  detail=(
1246
1300
  "declaration says tools=true but model produced neither "
1247
- "native `tool_calls` nor repairable tool JSON. Common for "
1248
- "quantized small models (plan.md §9.4 symptom #2)."
1301
+ "native `tool_calls` nor repairable tool JSON. "
1302
+ f"{budget_note} Common for quantized small models "
1303
+ "(plan.md §9.4 symptom #2)."
1249
1304
  ),
1250
1305
  target_file="providers.yaml",
1251
1306
  suggested_patch=_patch_providers_yaml_capability(provider.name, "tools", False),
@@ -115,7 +115,10 @@ curl -fsSL -o ~/.coderouter/providers.yaml \
115
115
  https://raw.githubusercontent.com/zephel01/CodeRouter/main/examples/providers.nvidia-nim.yaml
116
116
 
117
117
  # uvx fetches and runs in one shot
118
- uvx coderouter-cli serve --mode claude-code-nim --port 8088
118
+ # Note: PyPI distribution name (coderouter-cli) differs from the console
119
+ # script name (coderouter), so uv 0.11+ requires the --from form. It also
120
+ # works on older uv, so this is the canonical incantation.
121
+ uvx --from coderouter-cli coderouter serve --mode claude-code-nim --port 8088
119
122
  ```
120
123
 
121
124
  For a permanent install:
@@ -115,7 +115,9 @@ curl -fsSL -o ~/.coderouter/providers.yaml \
115
115
  https://raw.githubusercontent.com/zephel01/CodeRouter/main/examples/providers.nvidia-nim.yaml
116
116
 
117
117
  # uvx で起動 (PyPI から都度取得 + 実行)
118
- uvx coderouter-cli serve --mode claude-code-nim --port 8088
118
+ # 注: PyPI 配布名 (coderouter-cli) console script (coderouter) が
119
+ # 異なるため、uv 0.11+ では --from 形式が必須 (旧 uv でも動く)
120
+ uvx --from coderouter-cli coderouter serve --mode claude-code-nim --port 8088
119
121
  ```
120
122
 
121
123
  恒久的にインストールしておく場合:
@@ -137,9 +137,23 @@ ollama cp hf.co/unsloth/Qwen3-Coder-480B-A35B-Instruct-GGUF:Q4_K_M qwen3-coder:
137
137
  ### 雑用 / 一般向け(note 記事推奨)
138
138
 
139
139
  > **2026-04 update**: Gemma 4 / Qwen3.6 は Ollama 公式 registry に
140
- > 登録されました。HF 経由は不要です。`ollama pull gemma4:26b` / `ollama
141
- > pull qwen3.6:35b` でそのまま使えます。providers.yaml の
142
- > `ollama-gemma4-*` / `ollama-qwen3-6-*` stanza は既に有効化されています。
140
+ > 登録されました。HF 経由は不要です。`ollama pull gemma4:26b` でそのまま
141
+ > 使えます。providers.yaml の `ollama-gemma4-*` stanza は既に有効化されて
142
+ > います。
143
+
144
+ > **⚠️ Qwen3.6 系 (qwen3.6:35b / qwen3.6:27b) は Ollama 経由で詰みやすい
145
+ > (v1.8.1 〜 v1.8.3 の実機検証 + X / Reddit のコミュニティ報告で確認)**:
146
+ >
147
+ > - `tool_calls [NEEDS TUNING]` (Ollama の chat template / tool 仕様未成熟)
148
+ > - hard crash / リブート / メモリ計算バグ等の幅広い報告 (主に Mac Metal)
149
+ > - `qwen3.6:35b-a3b-coding-nvfp4` 等の variant は MLX backend で 500 エラー
150
+ >
151
+ > **Qwen3.6 を Sonnet 級として狙うなら Ollama ではなく llama.cpp 直叩きを
152
+ > 推奨**: `Unsloth/Qwen3.6-35B-A3B-GGUF` (UD-Q4_K_M) + `llama-server` で
153
+ > native `tool_calls` が完璧動作。手順は
154
+ > [`docs/llamacpp-direct.md`](./llamacpp-direct.md) を参照
155
+ > (CodeRouter v1.8.3 で実機検証済、`examples/providers.yaml` に
156
+ > `llamacpp-qwen3-6-35b-a3b` provider 例も同梱)。
143
157
 
144
158
  ### Reasoning 向け(GLM / Opus 蒸留)
145
159