coderouter-cli 1.8.3__py3-none-any.whl → 1.8.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderouter/doctor.py +63 -8
- {coderouter_cli-1.8.3.dist-info → coderouter_cli-1.8.5.dist-info}/METADATA +11 -7
- {coderouter_cli-1.8.3.dist-info → coderouter_cli-1.8.5.dist-info}/RECORD +6 -6
- {coderouter_cli-1.8.3.dist-info → coderouter_cli-1.8.5.dist-info}/WHEEL +0 -0
- {coderouter_cli-1.8.3.dist-info → coderouter_cli-1.8.5.dist-info}/entry_points.txt +0 -0
- {coderouter_cli-1.8.3.dist-info → coderouter_cli-1.8.5.dist-info}/licenses/LICENSE +0 -0
coderouter/doctor.py
CHANGED
|
@@ -836,6 +836,22 @@ async def _probe_num_ctx(
|
|
|
836
836
|
)
|
|
837
837
|
|
|
838
838
|
# Canary missing → truncation occurred.
|
|
839
|
+
#
|
|
840
|
+
# v1.8.5: with the v1.8.3 thinking-aware response budget already
|
|
841
|
+
# applied (max_tokens=1024 for reasoning models — see
|
|
842
|
+
# ``_NUM_CTX_PROBE_MAX_TOKENS_THINKING``), a missing canary cannot
|
|
843
|
+
# be blamed on an under-sized reply. The fault is genuinely on the
|
|
844
|
+
# prompt side: the upstream truncated the input before the model
|
|
845
|
+
# could see the canary token at the head. This sharpens the
|
|
846
|
+
# remediation — we are confident bumping ``num_ctx`` is the right
|
|
847
|
+
# fix, not bumping the response budget.
|
|
848
|
+
thinking = _is_reasoning_model(provider, resolved)
|
|
849
|
+
budget_note = (
|
|
850
|
+
f" Probe sent max_tokens={max_tokens} (thinking-aware), so the "
|
|
851
|
+
"miss is prompt-side truncation rather than reply truncation."
|
|
852
|
+
if thinking
|
|
853
|
+
else ""
|
|
854
|
+
)
|
|
839
855
|
if declared is None:
|
|
840
856
|
return ProbeResult(
|
|
841
857
|
name="num_ctx",
|
|
@@ -845,7 +861,7 @@ async def _probe_num_ctx(
|
|
|
845
861
|
"upstream truncated the prompt. No `extra_body.options.num_ctx` "
|
|
846
862
|
"is declared, so Ollama is running at its 2048-token default, "
|
|
847
863
|
"which cannot hold Claude Code's system + tool prompts "
|
|
848
|
-
"(plan.md §9.4 symptom #1)."
|
|
864
|
+
f"(plan.md §9.4 symptom #1).{budget_note}"
|
|
849
865
|
),
|
|
850
866
|
target_file="providers.yaml",
|
|
851
867
|
suggested_patch=_patch_providers_yaml_num_ctx(provider.name, 32768),
|
|
@@ -857,7 +873,8 @@ async def _probe_num_ctx(
|
|
|
857
873
|
detail=(
|
|
858
874
|
f"canary missing — declared num_ctx={declared} is below "
|
|
859
875
|
f"the {_NUM_CTX_ADEQUATE_THRESHOLD}-token threshold needed "
|
|
860
|
-
"for Claude Code prompts. Bump it (plan.md §9.4 symptom
|
|
876
|
+
f"for Claude Code prompts. Bump it (plan.md §9.4 symptom "
|
|
877
|
+
f"#1).{budget_note}"
|
|
861
878
|
),
|
|
862
879
|
target_file="providers.yaml",
|
|
863
880
|
suggested_patch=_patch_providers_yaml_num_ctx(provider.name, 32768),
|
|
@@ -875,7 +892,7 @@ async def _probe_num_ctx(
|
|
|
875
892
|
"declared value, or the upstream is silently capping it — "
|
|
876
893
|
"verify with the model card / server logs. The suggested "
|
|
877
894
|
"patch still emits 32768 as a starting point; dial down if "
|
|
878
|
-
"the host is memory-constrained."
|
|
895
|
+
f"the host is memory-constrained.{budget_note}"
|
|
879
896
|
),
|
|
880
897
|
target_file="providers.yaml",
|
|
881
898
|
suggested_patch=_patch_providers_yaml_num_ctx(provider.name, 32768),
|
|
@@ -1041,6 +1058,24 @@ async def _probe_streaming(
|
|
|
1041
1058
|
# mid-word". Since we're already Ollama-shape-gated, the
|
|
1042
1059
|
# remediation is always the ``extra_body.options.num_predict``
|
|
1043
1060
|
# bump.
|
|
1061
|
+
#
|
|
1062
|
+
# v1.8.5: with v1.8.3's thinking-aware probe budget already
|
|
1063
|
+
# applied (max_tokens=1024 for reasoning models), a length cap
|
|
1064
|
+
# here cannot be blamed on the probe budget — the upstream is
|
|
1065
|
+
# the one capping. Surface the budget used so the operator can
|
|
1066
|
+
# rule it out at a glance.
|
|
1067
|
+
thinking = _is_reasoning_model(provider, resolved)
|
|
1068
|
+
budget_note = (
|
|
1069
|
+
f"Probe sent max_tokens={max_tokens} (thinking-aware), so "
|
|
1070
|
+
"the cap is server-side `options.num_predict` rather than "
|
|
1071
|
+
"the probe budget."
|
|
1072
|
+
if thinking
|
|
1073
|
+
else (
|
|
1074
|
+
f"Probe sent max_tokens={max_tokens}; the cap is "
|
|
1075
|
+
"server-side `options.num_predict` rather than the "
|
|
1076
|
+
"probe budget."
|
|
1077
|
+
)
|
|
1078
|
+
)
|
|
1044
1079
|
return ProbeResult(
|
|
1045
1080
|
name="streaming",
|
|
1046
1081
|
verdict=ProbeVerdict.NEEDS_TUNING,
|
|
@@ -1048,9 +1083,9 @@ async def _probe_streaming(
|
|
|
1048
1083
|
f"stream closed with `finish_reason='length'` after only "
|
|
1049
1084
|
f"{len(content)} chars (expected ≥ "
|
|
1050
1085
|
f"{_STREAMING_PROBE_MIN_EXPECTED_CHARS}). Upstream is "
|
|
1051
|
-
"capping output — most likely `options.num_predict`. "
|
|
1052
|
-
"Bump it via `extra_body` (plan.md §9.4
|
|
1053
|
-
"streaming variant)."
|
|
1086
|
+
f"capping output — most likely `options.num_predict`. "
|
|
1087
|
+
f"{budget_note} Bump it via `extra_body` (plan.md §9.4 "
|
|
1088
|
+
"symptom #1 streaming variant)."
|
|
1054
1089
|
),
|
|
1055
1090
|
target_file="providers.yaml",
|
|
1056
1091
|
suggested_patch=_patch_providers_yaml_num_predict(
|
|
@@ -1239,13 +1274,33 @@ async def _probe_tool_calls(
|
|
|
1239
1274
|
|
|
1240
1275
|
# Nothing tool-shaped at all.
|
|
1241
1276
|
if declared:
|
|
1277
|
+
# v1.8.5: with the v1.8.3 thinking-aware budget already applied,
|
|
1278
|
+
# we can speak with confidence here: the model genuinely did not
|
|
1279
|
+
# emit tool_calls (it's not a budget-exhaustion false-positive
|
|
1280
|
+
# like the pre-v1.8.3 64-token cap used to produce). For thinking
|
|
1281
|
+
# models specifically, the 1024-token budget covers
|
|
1282
|
+
# ``reasoning_content`` *and* a tool call — so a missing
|
|
1283
|
+
# ``tool_calls`` here is real. Surface the budget that was used
|
|
1284
|
+
# so operators reading the message understand what was probed.
|
|
1285
|
+
thinking = _is_reasoning_model(provider, resolved)
|
|
1286
|
+
budget_note = (
|
|
1287
|
+
f"Probed with thinking-aware budget ({max_tokens} tokens, "
|
|
1288
|
+
"covers `reasoning_content` plus the call) — this is a true "
|
|
1289
|
+
"tools=false case, not budget exhaustion."
|
|
1290
|
+
if thinking
|
|
1291
|
+
else (
|
|
1292
|
+
f"Probed with default budget ({max_tokens} tokens) — "
|
|
1293
|
+
"the model produced no tool-shaped output at all."
|
|
1294
|
+
)
|
|
1295
|
+
)
|
|
1242
1296
|
return ProbeResult(
|
|
1243
1297
|
name="tool_calls",
|
|
1244
1298
|
verdict=ProbeVerdict.NEEDS_TUNING,
|
|
1245
1299
|
detail=(
|
|
1246
1300
|
"declaration says tools=true but model produced neither "
|
|
1247
|
-
"native `tool_calls` nor repairable tool JSON.
|
|
1248
|
-
"quantized small models
|
|
1301
|
+
"native `tool_calls` nor repairable tool JSON. "
|
|
1302
|
+
f"{budget_note} Common for quantized small models "
|
|
1303
|
+
"(plan.md §9.4 symptom #2)."
|
|
1249
1304
|
),
|
|
1250
1305
|
target_file="providers.yaml",
|
|
1251
1306
|
suggested_patch=_patch_providers_yaml_capability(provider.name, "tools", False),
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coderouter-cli
|
|
3
|
-
Version: 1.8.
|
|
3
|
+
Version: 1.8.5
|
|
4
4
|
Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
|
|
5
5
|
Project-URL: Homepage, https://github.com/zephel01/CodeRouter
|
|
6
6
|
Project-URL: Repository, https://github.com/zephel01/CodeRouter
|
|
@@ -60,7 +60,7 @@ Description-Content-Type: text/markdown
|
|
|
60
60
|
<p align="center">
|
|
61
61
|
<a href="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml"><img src="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml/badge.svg?branch=main" alt="CI"></a>
|
|
62
62
|
<a href=""><img src="https://img.shields.io/badge/status-stable-brightgreen" alt="status"></a>
|
|
63
|
-
<a href=""><img src="https://img.shields.io/badge/version-1.8.
|
|
63
|
+
<a href=""><img src="https://img.shields.io/badge/version-1.8.5-blue" alt="version"></a>
|
|
64
64
|
<a href=""><img src="https://img.shields.io/badge/python-3.12%2B-blue" alt="python"></a>
|
|
65
65
|
<a href=""><img src="https://img.shields.io/badge/runtime%20deps-5-brightgreen" alt="deps"></a>
|
|
66
66
|
<a href=""><img src="https://img.shields.io/badge/license-MIT-yellow" alt="license"></a>
|
|
@@ -99,11 +99,13 @@ Description-Content-Type: text/markdown
|
|
|
99
99
|
| **無料で回す** | [無料枠ガイド](./docs/free-tier-guide.md) | NVIDIA NIM 40 req/min × OpenRouter 無料枠の使い分け・live 検証済みモデル表・地雷 5 点 |
|
|
100
100
|
| **要るか判断する** | [要否判定ガイド](./docs/when-do-i-need-coderouter.md) | エージェント × モデルの詳細マトリクスで「そもそも自分に必要か」を決める |
|
|
101
101
|
| **詰まったとき** | [トラブルシューティング](./docs/troubleshooting.md) | `doctor` の使い方、`.env` の export 必須、Ollama サイレント失敗 5 症状、Claude Code 連携の罠 |
|
|
102
|
+
| **llama.cpp 直叩き** | [llama.cpp 直叩きガイド](./docs/llamacpp-direct.md) | Qwen3.6 を Ollama 詰みから救出する経路。`llama.cpp` build → Unsloth GGUF → `llama-server` → CodeRouter 接続を 7 step で(v1.8.3 実機検証済)|
|
|
103
|
+
| **LM Studio 直接** | [LM Studio 直接ガイド](./docs/lmstudio-direct.md) | `qwen35` / `qwen35moe` を救う第 2 経路。LM Studio 0.4.12+ Local Server 経由で OpenAI 互換 + Anthropic 互換 (`/v1/messages`) 両対応、prompt caching 透過(v1.8.4 実機検証済)|
|
|
102
104
|
| **安全に使う** | [セキュリティ方針](./docs/security.md) | 脅威モデル・秘密情報の扱い・脆弱性報告経路 |
|
|
103
|
-
| **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v1.8.
|
|
105
|
+
| **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v1.8.5 — doctor NEEDS_TUNING メッセージを v1.8.3 thinking-aware budget の事実に揃える + `docs/lmstudio-direct.md` 新規) |
|
|
104
106
|
| **設計を追う** | [plan.md](./plan.md) | 設計不変項・マイルストーン・今後のロードマップ |
|
|
105
107
|
|
|
106
|
-
English versions: [Quickstart](./docs/quickstart.en.md) · [Usage guide](./docs/usage-guide.en.md) · [Free-tier guide](./docs/free-tier-guide.en.md) · [When you need it](./docs/when-do-i-need-coderouter.en.md) · [Troubleshooting](./docs/troubleshooting.en.md) · [Security](./docs/security.en.md)
|
|
108
|
+
English versions: [Quickstart](./docs/quickstart.en.md) · [Usage guide](./docs/usage-guide.en.md) · [Free-tier guide](./docs/free-tier-guide.en.md) · [When you need it](./docs/when-do-i-need-coderouter.en.md) · [Troubleshooting](./docs/troubleshooting.en.md) · [llama.cpp direct](./docs/llamacpp-direct.en.md) · [LM Studio direct](./docs/lmstudio-direct.en.md) · [Security](./docs/security.en.md)
|
|
107
109
|
|
|
108
110
|
## CodeRouter で何が楽になるか
|
|
109
111
|
|
|
@@ -184,7 +186,9 @@ curl -fsSL https://raw.githubusercontent.com/zephel01/CodeRouter/main/examples/p
|
|
|
184
186
|
> ~/.coderouter/providers.yaml
|
|
185
187
|
|
|
186
188
|
# 2. uvx で起動 (インストール + 起動が 1 行)
|
|
187
|
-
|
|
189
|
+
# PyPI 配布名 (coderouter-cli) と console script 名 (coderouter) が異なるため、
|
|
190
|
+
# uv 0.11+ では --from 形式が必須 (旧 uv でも動く canonical 形式)
|
|
191
|
+
uvx --from coderouter-cli coderouter serve --port 8088
|
|
188
192
|
```
|
|
189
193
|
|
|
190
194
|
恒久的にインストールしておきたい場合:
|
|
@@ -257,7 +261,7 @@ CodeRouter 自体は純 Python 3.12+ で、実質的な OS 対応範囲は `min(
|
|
|
257
261
|
|
|
258
262
|
**リリース単位の詳細が欲しい?** v0.x と v1.0-A/B/C の各スライス — 何が入り、何本のテストが増え、なぜ必要だったのか — は [CHANGELOG.md](./CHANGELOG.md) に揃っています。設計の不変項と今後のロードマップは [plan.md](./plan.md)。
|
|
259
263
|
|
|
260
|
-
**次の予定**(v1.0 は [plan.md §10](./plan.md)、v1.0+ は §18): v1.5 ✅ メトリクス / `/dashboard` / `coderouter stats` TUI / `scripts/demo_traffic.sh`、v1.6 ✅ `auto_router` (task-aware routing) + NVIDIA NIM 無料枠 + トラブルシュートドキュメント分離 + `--env-file` / `doctor --check-env`、v1.7 ✅ PyPI 公開 (`uvx coderouter-cli`)、v1.8 ✅ 用途別 4 プロファイル (multi/coding/general/reasoning) + Gemma 4 / Qwen3.6 / Z.AI (GLM) 登録 + `setup.sh` onboarding ウィザード + `coderouter doctor --check-model --apply` (非破壊 YAML 書き戻し) + `claude_code_suitability` startup チェック + Trusted Publishing 自動化。残り (v1.9 候補) は `coderouter doctor --network` (CI 用) / launcher スクリプト / 起動時アップデートチェック (opt-in)。
|
|
264
|
+
**次の予定**(v1.0 は [plan.md §10](./plan.md)、v1.0+ は §18): v1.5 ✅ メトリクス / `/dashboard` / `coderouter stats` TUI / `scripts/demo_traffic.sh`、v1.6 ✅ `auto_router` (task-aware routing) + NVIDIA NIM 無料枠 + トラブルシュートドキュメント分離 + `--env-file` / `doctor --check-env`、v1.7 ✅ PyPI 公開 (`uvx --from coderouter-cli coderouter`)、v1.8 ✅ 用途別 4 プロファイル (multi/coding/general/reasoning) + Gemma 4 / Qwen3.6 / Z.AI (GLM) 登録 + `setup.sh` onboarding ウィザード + `coderouter doctor --check-model --apply` (非破壊 YAML 書き戻し) + `claude_code_suitability` startup チェック + Trusted Publishing 自動化。残り (v1.9 候補) は `coderouter doctor --network` (CI 用) / launcher スクリプト / 起動時アップデートチェック (opt-in)。
|
|
261
265
|
|
|
262
266
|
### Claude Code と一緒に使う
|
|
263
267
|
|
|
@@ -432,7 +436,7 @@ suggested patch for ~/.coderouter/providers.yaml:
|
|
|
432
436
|
- v1.0 ✅ — 14 ケースのリグレッションスイート、Code Mode (スリム版 Claude Code ハーネス); 出力クリーニングは **v1.0-A** で `output_filters` チェーンとして完了
|
|
433
437
|
- v1.5 ✅ — **メトリクスダッシュボード(出荷済み)** — `MetricsCollector` + `GET /metrics.json` + `GET /metrics` (Prometheus) + `GET /dashboard` (HTML 1 ページ) + `coderouter stats` curses TUI + `scripts/demo_traffic.sh` トラフィックジェネレータ + `display_timezone` 設定
|
|
434
438
|
- v1.6 ✅ — `auto_router` (task-aware routing、`default_profile: auto` で画像/コード濃度/その他を自動振り分け) + NVIDIA NIM 無料枠 8 段チェーン + ドキュメント言語スワップ (JA primary) + トラブルシュート独立ドキュメント + `--env-file` / `doctor --check-env`
|
|
435
|
-
- v1.7 ✅ — PyPI 公開 (`uvx coderouter-cli` で 1 行起動) + Trusted Publishing 経路 (release.yml で自動 publish)
|
|
439
|
+
- v1.7 ✅ — PyPI 公開 (`uvx --from coderouter-cli coderouter` で 1 行起動) + Trusted Publishing 経路 (release.yml で自動 publish)
|
|
436
440
|
- v1.8 ✅ — **用途別 4 プロファイル + GLM/Gemma 4/Qwen3.6 公式化 + apply 自動化**: `multi` (default) / `coding` / `general` / `reasoning` の 4 プロファイル + 全プロファイルに `append_system_prompt` で Claude 風応答 nudge + `mode_aliases` (default/fast/vision/think/cheap)、Ollama 公式 tag 化された `gemma4:e4b/26b/31b` / `qwen3.6:27b/35b` を active stanza に格上げ、Z.AI を OpenAI-compat で 2 base_url 提供 (Coding Plan / General API)、`coderouter doctor --check-model --apply` で YAML パッチを非破壊書き戻し (`ruamel.yaml` round-trip でコメント・key 順序保持、冪等)、`setup.sh` onboarding ウィザード、`claude_code_suitability` startup チェック (Llama-3.3-70B 系を `claude-code-*` profile で WARN)。残り (v1.9 以降): `coderouter doctor --network` (CI 用)、launcher スクリプト (`.command` / `.sh` / `.bat`)、opt-in 起動時アップデートチェック
|
|
437
441
|
|
|
438
442
|
## `kind: openai_compat` と `kind: anthropic` の選び方
|
|
@@ -2,7 +2,7 @@ coderouter/__init__.py,sha256=ghdjPrLtnRzY8fyQ4CJZI1UJKADyNTLtA3G7se8H7Ns,696
|
|
|
2
2
|
coderouter/__main__.py,sha256=-LCgxJnvgUV240HjQKv7ly-mn2NuKHpC4nCpvTHjeSU,130
|
|
3
3
|
coderouter/cli.py,sha256=vI1-dv10t4-xG6Zpt7zi_3U8xGgq54Qa8XIMUYpfOV8,19859
|
|
4
4
|
coderouter/cli_stats.py,sha256=ae20xUr_hjX09Ms3fBZGZsUS52o44JC57EpbWLBOCO0,27750
|
|
5
|
-
coderouter/doctor.py,sha256=
|
|
5
|
+
coderouter/doctor.py,sha256=Gs9KIYHvXBwhnNlt5rzx6vceEliV5gJ6cBuf_pMLQ6A,72417
|
|
6
6
|
coderouter/doctor_apply.py,sha256=r_J6xbu5-HivofPNriw4_vjNYs_VRs7GsGTS0oMEX10,24209
|
|
7
7
|
coderouter/env_security.py,sha256=FEBZnXfJ0xE39kmMMn39zk0W_DRRnmcB_REmP9f4xWo,14796
|
|
8
8
|
coderouter/errors.py,sha256=Xmq67lheyw8iv3Ox39jh2c4tvNI5RcUR4QkoxVDN6l4,1130
|
|
@@ -37,8 +37,8 @@ coderouter/translation/__init__.py,sha256=PYXN7XVEwpG1uC8RLy6fvnGbzEZhhrEuUapH8I
|
|
|
37
37
|
coderouter/translation/anthropic.py,sha256=JpvIWNXHUPVqOGvps7o_6ZADhXuJuvpU7RdMqQFtwwM,6421
|
|
38
38
|
coderouter/translation/convert.py,sha256=-qyzFzmmr9hhQV6_Sg75kJnvCZvHe3n7vRdaZtk_JqQ,47269
|
|
39
39
|
coderouter/translation/tool_repair.py,sha256=fyxDb4kWHytO5JWq5y0i4tinJUtWqhMCkyfoCf5BjeM,8314
|
|
40
|
-
coderouter_cli-1.8.
|
|
41
|
-
coderouter_cli-1.8.
|
|
42
|
-
coderouter_cli-1.8.
|
|
43
|
-
coderouter_cli-1.8.
|
|
44
|
-
coderouter_cli-1.8.
|
|
40
|
+
coderouter_cli-1.8.5.dist-info/METADATA,sha256=7WObVqSwWtedbYe0LUFR7ZoXR_r0tQRquER992PFAKM,45119
|
|
41
|
+
coderouter_cli-1.8.5.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
42
|
+
coderouter_cli-1.8.5.dist-info/entry_points.txt,sha256=-dnLfD1YZ2WjH2zSdNCvlO65wYltM9bsHt9Fhg3yGss,51
|
|
43
|
+
coderouter_cli-1.8.5.dist-info/licenses/LICENSE,sha256=wkEzoR86jFw33jvfOHjULqmkGEfxTFMgMaJnpR8mPRw,1065
|
|
44
|
+
coderouter_cli-1.8.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|