coderouter-cli 1.8.2__py3-none-any.whl → 1.8.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,14 +48,25 @@ logger = get_logger(__name__)
48
48
  _RETRYABLE_STATUSES = {404, 408, 425, 429, 500, 502, 503, 504}
49
49
 
50
50
 
51
+ # v1.8.3: non-standard reasoning fields emitted by various upstreams.
52
+ # Different runtimes use different field names for the same concept:
53
+ # * ``reasoning`` — OpenRouter free models (gpt-oss-120b:free
54
+ # confirmed 2026-04-20), Ollama
55
+ # * ``reasoning_content`` — llama.cpp ``llama-server`` (Qwen3.6 etc.,
56
+ # confirmed 2026-04-26 with Unsloth GGUF)
57
+ # Strict OpenAI clients reject either as an unknown key. The strip
58
+ # function below removes both at the adapter boundary so downstream
59
+ # layers never see them, regardless of which runtime fronts the model.
60
+ _NON_STANDARD_REASONING_KEYS = ("reasoning", "reasoning_content")
61
+
62
+
51
63
  def _strip_reasoning_field(choices: list[dict[str, Any]] | None, *, delta_key: bool) -> bool:
52
- """Remove non-standard ``reasoning`` keys from a choices list, in place.
64
+ """Remove non-standard reasoning keys from a choices list, in place.
53
65
 
54
- v0.5-C: Some OpenRouter free models (confirmed on
55
- ``openai/gpt-oss-120b:free`` 2026-04-20) return a ``reasoning`` field
56
- alongside ``content`` on each choice. The field is not in the OpenAI
57
- Chat Completions spec and strict clients can reject the unknown key.
58
- We strip it at the adapter boundary so downstream layers never see it.
66
+ v0.5-C originally targeted OpenRouter's ``reasoning`` field. v1.8.3
67
+ extends the strip to ``reasoning_content`` (llama.cpp ``llama-server``
68
+ naming) since both denote the same hidden chain-of-thought trace and
69
+ neither is part of the OpenAI Chat Completions spec.
59
70
 
60
71
  Args:
61
72
  choices: The ``choices`` list from the response body or stream chunk.
@@ -64,7 +75,7 @@ def _strip_reasoning_field(choices: list[dict[str, Any]] | None, *, delta_key: b
64
75
  ``False`` for non-streaming responses (look in ``choice["message"]``).
65
76
 
66
77
  Returns:
67
- True iff at least one ``reasoning`` key was removed. Callers use
78
+ True iff at least one reasoning key was removed. Callers use
68
79
  this to decide whether to emit a one-shot log line.
69
80
  """
70
81
  if not choices:
@@ -75,9 +86,12 @@ def _strip_reasoning_field(choices: list[dict[str, Any]] | None, *, delta_key: b
75
86
  if not isinstance(choice, dict):
76
87
  continue
77
88
  inner = choice.get(inner_key)
78
- if isinstance(inner, dict) and "reasoning" in inner:
79
- inner.pop("reasoning", None)
80
- stripped = True
89
+ if not isinstance(inner, dict):
90
+ continue
91
+ for key in _NON_STANDARD_REASONING_KEYS:
92
+ if key in inner:
93
+ inner.pop(key, None)
94
+ stripped = True
81
95
  return stripped
82
96
 
83
97
 
@@ -235,15 +249,17 @@ class OpenAICompatAdapter(BaseAdapter):
235
249
  retryable=False,
236
250
  ) from exc
237
251
 
238
- # v0.5-C: passive strip of non-standard `reasoning` field on choices.
239
- # No-op when the provider opted into passthrough.
252
+ # v0.5-C / v1.8.3: passive strip of non-standard reasoning fields
253
+ # on choices (covers both Ollama/OpenRouter ``reasoning`` and
254
+ # llama.cpp ``reasoning_content``). No-op when the provider opted
255
+ # into passthrough.
240
256
  if not self.config.capabilities.reasoning_passthrough and _strip_reasoning_field(
241
257
  data.get("choices"), delta_key=False
242
258
  ):
243
259
  log_capability_degraded(
244
260
  logger,
245
261
  provider=self.name,
246
- dropped=["reasoning"],
262
+ dropped=list(_NON_STANDARD_REASONING_KEYS),
247
263
  reason="non-standard-field",
248
264
  )
249
265
 
@@ -344,7 +360,7 @@ class OpenAICompatAdapter(BaseAdapter):
344
360
  log_capability_degraded(
345
361
  logger,
346
362
  provider=self.name,
347
- dropped=["reasoning"],
363
+ dropped=list(_NON_STANDARD_REASONING_KEYS),
348
364
  reason="non-standard-field",
349
365
  )
350
366
  reasoning_logged = True
coderouter/doctor.py CHANGED
@@ -458,6 +458,18 @@ _NUM_CTX_PROBE_MAX_TOKENS_DEFAULT = 256
458
458
  _NUM_CTX_PROBE_MAX_TOKENS_THINKING = 1024
459
459
  _STREAMING_PROBE_MAX_TOKENS_DEFAULT = 512
460
460
  _STREAMING_PROBE_MAX_TOKENS_THINKING = 1024
461
+ # v1.8.3: tool_calls probe also needs thinking-aware budget. The
462
+ # pre-v1.8.3 default of 64 was tight even for non-thinking models
463
+ # (the assistant often emits a brief preamble before the JSON tool
464
+ # call), and on thinking models (Qwen3.6, Gemma 4, gpt-oss, deepseek-r1)
465
+ # the entire 64-token budget gets consumed by ``reasoning_content``
466
+ # before any ``tool_calls`` can surface — producing a false-positive
467
+ # NEEDS_TUNING with the WRONG remediation (suggested patch flips
468
+ # ``tools`` to false even though the model supports them perfectly).
469
+ # 256/1024 brings the budget into line with the num_ctx / streaming
470
+ # probes (same _is_reasoning_model gate).
471
+ _TOOL_CALLS_PROBE_MAX_TOKENS_DEFAULT = 256
472
+ _TOOL_CALLS_PROBE_MAX_TOKENS_THINKING = 1024
461
473
  # Default ``num_predict`` suggested in the emitted patch. -1 would be
462
474
  # optimal (uncapped) but "4096" communicates intent more clearly to
463
475
  # operators unfamiliar with Ollama's sentinel value, and covers Claude
@@ -531,9 +543,7 @@ def _is_reasoning_model(
531
543
  return True
532
544
  if resolved.thinking is True:
533
545
  return True
534
- if resolved.reasoning_passthrough is True:
535
- return True
536
- return False
546
+ return resolved.reasoning_passthrough is True
537
547
 
538
548
 
539
549
  _PROBE_BASIC_USER_PROMPT = "Reply with exactly the single word: PONG"
@@ -826,6 +836,22 @@ async def _probe_num_ctx(
826
836
  )
827
837
 
828
838
  # Canary missing → truncation occurred.
839
+ #
840
+ # v1.8.5: with the v1.8.3 thinking-aware response budget already
841
+ # applied (max_tokens=1024 for reasoning models — see
842
+ # ``_NUM_CTX_PROBE_MAX_TOKENS_THINKING``), a missing canary cannot
843
+ # be blamed on an under-sized reply. The fault is genuinely on the
844
+ # prompt side: the upstream truncated the input before the model
845
+ # could see the canary token at the head. This sharpens the
846
+ # remediation — we are confident bumping ``num_ctx`` is the right
847
+ # fix, not bumping the response budget.
848
+ thinking = _is_reasoning_model(provider, resolved)
849
+ budget_note = (
850
+ f" Probe sent max_tokens={max_tokens} (thinking-aware), so the "
851
+ "miss is prompt-side truncation rather than reply truncation."
852
+ if thinking
853
+ else ""
854
+ )
829
855
  if declared is None:
830
856
  return ProbeResult(
831
857
  name="num_ctx",
@@ -835,7 +861,7 @@ async def _probe_num_ctx(
835
861
  "upstream truncated the prompt. No `extra_body.options.num_ctx` "
836
862
  "is declared, so Ollama is running at its 2048-token default, "
837
863
  "which cannot hold Claude Code's system + tool prompts "
838
- "(plan.md §9.4 symptom #1)."
864
+ f"(plan.md §9.4 symptom #1).{budget_note}"
839
865
  ),
840
866
  target_file="providers.yaml",
841
867
  suggested_patch=_patch_providers_yaml_num_ctx(provider.name, 32768),
@@ -847,7 +873,8 @@ async def _probe_num_ctx(
847
873
  detail=(
848
874
  f"canary missing — declared num_ctx={declared} is below "
849
875
  f"the {_NUM_CTX_ADEQUATE_THRESHOLD}-token threshold needed "
850
- "for Claude Code prompts. Bump it (plan.md §9.4 symptom #1)."
876
+ f"for Claude Code prompts. Bump it (plan.md §9.4 symptom "
877
+ f"#1).{budget_note}"
851
878
  ),
852
879
  target_file="providers.yaml",
853
880
  suggested_patch=_patch_providers_yaml_num_ctx(provider.name, 32768),
@@ -865,7 +892,7 @@ async def _probe_num_ctx(
865
892
  "declared value, or the upstream is silently capping it — "
866
893
  "verify with the model card / server logs. The suggested "
867
894
  "patch still emits 32768 as a starting point; dial down if "
868
- "the host is memory-constrained."
895
+ f"the host is memory-constrained.{budget_note}"
869
896
  ),
870
897
  target_file="providers.yaml",
871
898
  suggested_patch=_patch_providers_yaml_num_ctx(provider.name, 32768),
@@ -1031,6 +1058,24 @@ async def _probe_streaming(
1031
1058
  # mid-word". Since we're already Ollama-shape-gated, the
1032
1059
  # remediation is always the ``extra_body.options.num_predict``
1033
1060
  # bump.
1061
+ #
1062
+ # v1.8.5: with v1.8.3's thinking-aware probe budget already
1063
+ # applied (max_tokens=1024 for reasoning models), a length cap
1064
+ # here cannot be blamed on the probe budget — the upstream is
1065
+ # the one capping. Surface the budget used so the operator can
1066
+ # rule it out at a glance.
1067
+ thinking = _is_reasoning_model(provider, resolved)
1068
+ budget_note = (
1069
+ f"Probe sent max_tokens={max_tokens} (thinking-aware), so "
1070
+ "the cap is server-side `options.num_predict` rather than "
1071
+ "the probe budget."
1072
+ if thinking
1073
+ else (
1074
+ f"Probe sent max_tokens={max_tokens}; the cap is "
1075
+ "server-side `options.num_predict` rather than the "
1076
+ "probe budget."
1077
+ )
1078
+ )
1034
1079
  return ProbeResult(
1035
1080
  name="streaming",
1036
1081
  verdict=ProbeVerdict.NEEDS_TUNING,
@@ -1038,9 +1083,9 @@ async def _probe_streaming(
1038
1083
  f"stream closed with `finish_reason='length'` after only "
1039
1084
  f"{len(content)} chars (expected ≥ "
1040
1085
  f"{_STREAMING_PROBE_MIN_EXPECTED_CHARS}). Upstream is "
1041
- "capping output — most likely `options.num_predict`. "
1042
- "Bump it via `extra_body` (plan.md §9.4 symptom #1 "
1043
- "streaming variant)."
1086
+ f"capping output — most likely `options.num_predict`. "
1087
+ f"{budget_note} Bump it via `extra_body` (plan.md §9.4 "
1088
+ "symptom #1 streaming variant)."
1044
1089
  ),
1045
1090
  target_file="providers.yaml",
1046
1091
  suggested_patch=_patch_providers_yaml_num_predict(
@@ -1093,6 +1138,16 @@ async def _probe_tool_calls(
1093
1138
  If declaration says True → NEEDS_TUNING (flip to False). If
1094
1139
  False → OK.
1095
1140
  """
1141
+ # v1.8.3: thinking-aware budget — the pre-v1.8.3 default of 64 was
1142
+ # consumed by ``reasoning_content`` on thinking models (Qwen3.6,
1143
+ # Gemma 4, gpt-oss, deepseek-r1) before any ``tool_calls`` could
1144
+ # surface, producing a false-positive NEEDS_TUNING that recommended
1145
+ # flipping ``tools`` to false — the exact opposite of what's needed.
1146
+ max_tokens = (
1147
+ _TOOL_CALLS_PROBE_MAX_TOKENS_THINKING
1148
+ if _is_reasoning_model(provider, resolved)
1149
+ else _TOOL_CALLS_PROBE_MAX_TOKENS_DEFAULT
1150
+ )
1096
1151
  if provider.kind == "anthropic":
1097
1152
  # Anthropic native tools use a different wire shape; we probe
1098
1153
  # via the messages API. A capable model returns content blocks
@@ -1104,7 +1159,7 @@ async def _probe_tool_calls(
1104
1159
  "messages": [
1105
1160
  {"role": "user", "content": _PROBE_TOOLS_USER_PROMPT},
1106
1161
  ],
1107
- "max_tokens": 64,
1162
+ "max_tokens": max_tokens,
1108
1163
  "tools": [_PROBE_TOOL_SPEC_ANTHROPIC],
1109
1164
  }
1110
1165
  else:
@@ -1115,7 +1170,7 @@ async def _probe_tool_calls(
1115
1170
  "messages": [
1116
1171
  {"role": "user", "content": _PROBE_TOOLS_USER_PROMPT},
1117
1172
  ],
1118
- "max_tokens": 64,
1173
+ "max_tokens": max_tokens,
1119
1174
  "temperature": 0,
1120
1175
  "tools": [_PROBE_TOOL_SPEC_OPENAI],
1121
1176
  }
@@ -1219,13 +1274,33 @@ async def _probe_tool_calls(
1219
1274
 
1220
1275
  # Nothing tool-shaped at all.
1221
1276
  if declared:
1277
+ # v1.8.5: with the v1.8.3 thinking-aware budget already applied,
1278
+ # we can speak with confidence here: the model genuinely did not
1279
+ # emit tool_calls (it's not a budget-exhaustion false-positive
1280
+ # like the pre-v1.8.3 64-token cap used to produce). For thinking
1281
+ # models specifically, the 1024-token budget covers
1282
+ # ``reasoning_content`` *and* a tool call — so a missing
1283
+ # ``tool_calls`` here is real. Surface the budget that was used
1284
+ # so operators reading the message understand what was probed.
1285
+ thinking = _is_reasoning_model(provider, resolved)
1286
+ budget_note = (
1287
+ f"Probed with thinking-aware budget ({max_tokens} tokens, "
1288
+ "covers `reasoning_content` plus the call) — this is a true "
1289
+ "tools=false case, not budget exhaustion."
1290
+ if thinking
1291
+ else (
1292
+ f"Probed with default budget ({max_tokens} tokens) — "
1293
+ "the model produced no tool-shaped output at all."
1294
+ )
1295
+ )
1222
1296
  return ProbeResult(
1223
1297
  name="tool_calls",
1224
1298
  verdict=ProbeVerdict.NEEDS_TUNING,
1225
1299
  detail=(
1226
1300
  "declaration says tools=true but model produced neither "
1227
- "native `tool_calls` nor repairable tool JSON. Common for "
1228
- "quantized small models (plan.md §9.4 symptom #2)."
1301
+ "native `tool_calls` nor repairable tool JSON. "
1302
+ f"{budget_note} Common for quantized small models "
1303
+ "(plan.md §9.4 symptom #2)."
1229
1304
  ),
1230
1305
  target_file="providers.yaml",
1231
1306
  suggested_patch=_patch_providers_yaml_capability(provider.name, "tools", False),
@@ -1439,7 +1514,12 @@ async def _probe_reasoning_leak(
1439
1514
  )
1440
1515
 
1441
1516
  msg = _extract_openai_assistant_choice(parsed)
1442
- has_reasoning = bool(msg and "reasoning" in msg)
1517
+ # v1.8.3: detect llama.cpp's ``reasoning_content`` alongside Ollama /
1518
+ # OpenRouter's ``reasoning`` — they're the same concept under different
1519
+ # field names, and the openai_compat adapter strips both since v1.8.3.
1520
+ has_reasoning = bool(
1521
+ msg and ("reasoning" in msg or "reasoning_content" in msg)
1522
+ )
1443
1523
 
1444
1524
  # v1.0-A: content-embedded marker detection.
1445
1525
  content = (msg.get("content") if isinstance(msg, dict) else None) or ""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coderouter-cli
3
- Version: 1.8.2
3
+ Version: 1.8.5
4
4
  Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
5
5
  Project-URL: Homepage, https://github.com/zephel01/CodeRouter
6
6
  Project-URL: Repository, https://github.com/zephel01/CodeRouter
@@ -60,7 +60,7 @@ Description-Content-Type: text/markdown
60
60
  <p align="center">
61
61
  <a href="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml"><img src="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml/badge.svg?branch=main" alt="CI"></a>
62
62
  <a href=""><img src="https://img.shields.io/badge/status-stable-brightgreen" alt="status"></a>
63
- <a href=""><img src="https://img.shields.io/badge/version-1.8.0-blue" alt="version"></a>
63
+ <a href=""><img src="https://img.shields.io/badge/version-1.8.5-blue" alt="version"></a>
64
64
  <a href=""><img src="https://img.shields.io/badge/python-3.12%2B-blue" alt="python"></a>
65
65
  <a href=""><img src="https://img.shields.io/badge/runtime%20deps-5-brightgreen" alt="deps"></a>
66
66
  <a href=""><img src="https://img.shields.io/badge/license-MIT-yellow" alt="license"></a>
@@ -99,11 +99,13 @@ Description-Content-Type: text/markdown
99
99
  | **無料で回す** | [無料枠ガイド](./docs/free-tier-guide.md) | NVIDIA NIM 40 req/min × OpenRouter 無料枠の使い分け・live 検証済みモデル表・地雷 5 点 |
100
100
  | **要るか判断する** | [要否判定ガイド](./docs/when-do-i-need-coderouter.md) | エージェント × モデルの詳細マトリクスで「そもそも自分に必要か」を決める |
101
101
  | **詰まったとき** | [トラブルシューティング](./docs/troubleshooting.md) | `doctor` の使い方、`.env` の export 必須、Ollama サイレント失敗 5 症状、Claude Code 連携の罠 |
102
+ | **llama.cpp 直叩き** | [llama.cpp 直叩きガイド](./docs/llamacpp-direct.md) | Qwen3.6 を Ollama 詰みから救出する経路。`llama.cpp` build → Unsloth GGUF → `llama-server` → CodeRouter 接続を 7 step で(v1.8.3 実機検証済)|
103
+ | **LM Studio 直接** | [LM Studio 直接ガイド](./docs/lmstudio-direct.md) | `qwen35` / `qwen35moe` を救う第 2 経路。LM Studio 0.4.12+ Local Server 経由で OpenAI 互換 + Anthropic 互換 (`/v1/messages`) 両対応、prompt caching 透過(v1.8.4 実機検証済)|
102
104
  | **安全に使う** | [セキュリティ方針](./docs/security.md) | 脅威モデル・秘密情報の扱い・脆弱性報告経路 |
103
- | **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v1.8.0用途別 4 プロファイル + GLM/Gemma 4/Qwen3.6 公式化 + apply 自動化) |
105
+ | **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v1.8.5doctor NEEDS_TUNING メッセージを v1.8.3 thinking-aware budget の事実に揃える + `docs/lmstudio-direct.md` 新規) |
104
106
  | **設計を追う** | [plan.md](./plan.md) | 設計不変項・マイルストーン・今後のロードマップ |
105
107
 
106
- English versions: [Quickstart](./docs/quickstart.en.md) · [Usage guide](./docs/usage-guide.en.md) · [Free-tier guide](./docs/free-tier-guide.en.md) · [When you need it](./docs/when-do-i-need-coderouter.en.md) · [Troubleshooting](./docs/troubleshooting.en.md) · [Security](./docs/security.en.md)
108
+ English versions: [Quickstart](./docs/quickstart.en.md) · [Usage guide](./docs/usage-guide.en.md) · [Free-tier guide](./docs/free-tier-guide.en.md) · [When you need it](./docs/when-do-i-need-coderouter.en.md) · [Troubleshooting](./docs/troubleshooting.en.md) · [llama.cpp direct](./docs/llamacpp-direct.en.md) · [LM Studio direct](./docs/lmstudio-direct.en.md) · [Security](./docs/security.en.md)
107
109
 
108
110
  ## CodeRouter で何が楽になるか
109
111
 
@@ -175,7 +177,7 @@ OpenAI 互換エージェント + お行儀の良いモデル + フォールバ
175
177
 
176
178
  ## クイックスタート(3 コマンド)
177
179
 
178
- **v1.7.0 で PyPI 公開**、**v1.8.0 で用途別 4 プロファイル + Z.AI/GLM 連携**を追加しました。`uvx` 一発で動きます (Python 3.12 以上必須):
180
+ **v1.7.0 で PyPI 公開**、**v1.8.0 で用途別 4 プロファイル + Z.AI/GLM 連携**を追加、**v1.8.2 で doctor probe を thinking モデル対応**にしました。`uvx` 一発で動きます (Python 3.12 以上必須):
179
181
 
180
182
  ```bash
181
183
  # 1. サンプル設定を置く
@@ -184,7 +186,9 @@ curl -fsSL https://raw.githubusercontent.com/zephel01/CodeRouter/main/examples/p
184
186
  > ~/.coderouter/providers.yaml
185
187
 
186
188
  # 2. uvx で起動 (インストール + 起動が 1 行)
187
- uvx coderouter-cli serve --port 8088
189
+ # PyPI 配布名 (coderouter-cli) console script 名 (coderouter) が異なるため、
190
+ # uv 0.11+ では --from 形式が必須 (旧 uv でも動く canonical 形式)
191
+ uvx --from coderouter-cli coderouter serve --port 8088
188
192
  ```
189
193
 
190
194
  恒久的にインストールしておきたい場合:
@@ -205,7 +209,7 @@ uv run coderouter serve --port 8088
205
209
 
206
210
  > **注**: PyPI 上のパッケージ名は `coderouter-cli` ですが、コマンド名と Python import 名は `coderouter` のままです。詳しくは [CHANGELOG `[v1.7.0]`](./CHANGELOG.md#v170--2026-04-25-pypi-公開-uvx-coderouter-cli-一発で動く) 参照。
207
211
  >
208
- > **v1.8.0 の `--apply` 自動化を使う場合**: `ruamel.yaml` を optional dep として一緒に入れます (`pip install 'coderouter-cli[doctor]'` または `uv pip install ruamel.yaml`)。基本機能には不要です。
212
+ > **`--apply` 自動化を使う場合** (v1.8.0+): `ruamel.yaml` を optional dep として一緒に入れます (`pip install 'coderouter-cli[doctor]'` または `uv pip install ruamel.yaml`)。基本機能には不要です。
209
213
 
210
214
  あとは任意の OpenAI クライアントを `http://127.0.0.1:8088` に向けるだけです:
211
215
 
@@ -257,7 +261,7 @@ CodeRouter 自体は純 Python 3.12+ で、実質的な OS 対応範囲は `min(
257
261
 
258
262
  **リリース単位の詳細が欲しい?** v0.x と v1.0-A/B/C の各スライス — 何が入り、何本のテストが増え、なぜ必要だったのか — は [CHANGELOG.md](./CHANGELOG.md) に揃っています。設計の不変項と今後のロードマップは [plan.md](./plan.md)。
259
263
 
260
- **次の予定**(v1.0 は [plan.md §10](./plan.md)、v1.0+ は §18): v1.5 ✅ メトリクス / `/dashboard` / `coderouter stats` TUI / `scripts/demo_traffic.sh`、v1.6 ✅ `auto_router` (task-aware routing) + NVIDIA NIM 無料枠 + トラブルシュートドキュメント分離 + `--env-file` / `doctor --check-env`、v1.7 ✅ PyPI 公開 (`uvx coderouter-cli`)、v1.8 ✅ 用途別 4 プロファイル (multi/coding/general/reasoning) + Gemma 4 / Qwen3.6 / Z.AI (GLM) 登録 + `setup.sh` onboarding ウィザード + `coderouter doctor --check-model --apply` (非破壊 YAML 書き戻し) + `claude_code_suitability` startup チェック + Trusted Publishing 自動化。残り (v1.9 候補) は `coderouter doctor --network` (CI 用) / launcher スクリプト / 起動時アップデートチェック (opt-in)。
264
+ **次の予定**(v1.0 は [plan.md §10](./plan.md)、v1.0+ は §18): v1.5 ✅ メトリクス / `/dashboard` / `coderouter stats` TUI / `scripts/demo_traffic.sh`、v1.6 ✅ `auto_router` (task-aware routing) + NVIDIA NIM 無料枠 + トラブルシュートドキュメント分離 + `--env-file` / `doctor --check-env`、v1.7 ✅ PyPI 公開 (`uvx --from coderouter-cli coderouter`)、v1.8 ✅ 用途別 4 プロファイル (multi/coding/general/reasoning) + Gemma 4 / Qwen3.6 / Z.AI (GLM) 登録 + `setup.sh` onboarding ウィザード + `coderouter doctor --check-model --apply` (非破壊 YAML 書き戻し) + `claude_code_suitability` startup チェック + Trusted Publishing 自動化。残り (v1.9 候補) は `coderouter doctor --network` (CI 用) / launcher スクリプト / 起動時アップデートチェック (opt-in)。
261
265
 
262
266
  ### Claude Code と一緒に使う
263
267
 
@@ -432,7 +436,7 @@ suggested patch for ~/.coderouter/providers.yaml:
432
436
  - v1.0 ✅ — 14 ケースのリグレッションスイート、Code Mode (スリム版 Claude Code ハーネス); 出力クリーニングは **v1.0-A** で `output_filters` チェーンとして完了
433
437
  - v1.5 ✅ — **メトリクスダッシュボード(出荷済み)** — `MetricsCollector` + `GET /metrics.json` + `GET /metrics` (Prometheus) + `GET /dashboard` (HTML 1 ページ) + `coderouter stats` curses TUI + `scripts/demo_traffic.sh` トラフィックジェネレータ + `display_timezone` 設定
434
438
  - v1.6 ✅ — `auto_router` (task-aware routing、`default_profile: auto` で画像/コード濃度/その他を自動振り分け) + NVIDIA NIM 無料枠 8 段チェーン + ドキュメント言語スワップ (JA primary) + トラブルシュート独立ドキュメント + `--env-file` / `doctor --check-env`
435
- - v1.7 ✅ — PyPI 公開 (`uvx coderouter-cli` で 1 行起動) + Trusted Publishing 経路 (release.yml で自動 publish)
439
+ - v1.7 ✅ — PyPI 公開 (`uvx --from coderouter-cli coderouter` で 1 行起動) + Trusted Publishing 経路 (release.yml で自動 publish)
436
440
  - v1.8 ✅ — **用途別 4 プロファイル + GLM/Gemma 4/Qwen3.6 公式化 + apply 自動化**: `multi` (default) / `coding` / `general` / `reasoning` の 4 プロファイル + 全プロファイルに `append_system_prompt` で Claude 風応答 nudge + `mode_aliases` (default/fast/vision/think/cheap)、Ollama 公式 tag 化された `gemma4:e4b/26b/31b` / `qwen3.6:27b/35b` を active stanza に格上げ、Z.AI を OpenAI-compat で 2 base_url 提供 (Coding Plan / General API)、`coderouter doctor --check-model --apply` で YAML パッチを非破壊書き戻し (`ruamel.yaml` round-trip でコメント・key 順序保持、冪等)、`setup.sh` onboarding ウィザード、`claude_code_suitability` startup チェック (Llama-3.3-70B 系を `claude-code-*` profile で WARN)。残り (v1.9 以降): `coderouter doctor --network` (CI 用)、launcher スクリプト (`.command` / `.sh` / `.bat`)、opt-in 起動時アップデートチェック
437
441
 
438
442
  ## `kind: openai_compat` と `kind: anthropic` の選び方
@@ -2,7 +2,7 @@ coderouter/__init__.py,sha256=ghdjPrLtnRzY8fyQ4CJZI1UJKADyNTLtA3G7se8H7Ns,696
2
2
  coderouter/__main__.py,sha256=-LCgxJnvgUV240HjQKv7ly-mn2NuKHpC4nCpvTHjeSU,130
3
3
  coderouter/cli.py,sha256=vI1-dv10t4-xG6Zpt7zi_3U8xGgq54Qa8XIMUYpfOV8,19859
4
4
  coderouter/cli_stats.py,sha256=ae20xUr_hjX09Ms3fBZGZsUS52o44JC57EpbWLBOCO0,27750
5
- coderouter/doctor.py,sha256=atYOr73LLI3lKHjhFDY0lea41_0jolfiY2zb15La_O8,68116
5
+ coderouter/doctor.py,sha256=Gs9KIYHvXBwhnNlt5rzx6vceEliV5gJ6cBuf_pMLQ6A,72417
6
6
  coderouter/doctor_apply.py,sha256=r_J6xbu5-HivofPNriw4_vjNYs_VRs7GsGTS0oMEX10,24209
7
7
  coderouter/env_security.py,sha256=FEBZnXfJ0xE39kmMMn39zk0W_DRRnmcB_REmP9f4xWo,14796
8
8
  coderouter/errors.py,sha256=Xmq67lheyw8iv3Ox39jh2c4tvNI5RcUR4QkoxVDN6l4,1130
@@ -11,7 +11,7 @@ coderouter/output_filters.py,sha256=rI4YgKVv5vviDBl3Xkf7rp6LaSSkdWyEV004q6HrkB0,
11
11
  coderouter/adapters/__init__.py,sha256=7dIDSZ-FE_0iSqLSDc_lK1idRdLTKcM2hP9tCJipgPI,463
12
12
  coderouter/adapters/anthropic_native.py,sha256=qfdjxy4YyLt-0Fj7hUYn1oi1SFjEEbSvpaRBUC2hMf4,21903
13
13
  coderouter/adapters/base.py,sha256=H4uM6r_-95Xs1hCM_X4Zv3tq-xN3cXWLj83F-QjPNLw,8265
14
- coderouter/adapters/openai_compat.py,sha256=EC9zNYPGgSOVZyaH1dXRXO1VMN1RjBX5FZ2vEgTkVD8,17100
14
+ coderouter/adapters/openai_compat.py,sha256=9qoJfLR2vVnyM8isb9G4j-Dk5QBHFlneOaBSY-P4UAg,17941
15
15
  coderouter/adapters/registry.py,sha256=Syt3eDljWZAK5mfiJGvUMKaZYAfCRScp7PvV6pYt7mc,683
16
16
  coderouter/config/__init__.py,sha256=FODEn74fN-qZnt4INPSHswqhOlEgpL6-_onxsitSx8g,274
17
17
  coderouter/config/capability_registry.py,sha256=oypl6Z-YjvNoC87AdSIm1C7XE_MZoFq_7Ivm3eRH3cI,14379
@@ -37,8 +37,8 @@ coderouter/translation/__init__.py,sha256=PYXN7XVEwpG1uC8RLy6fvnGbzEZhhrEuUapH8I
37
37
  coderouter/translation/anthropic.py,sha256=JpvIWNXHUPVqOGvps7o_6ZADhXuJuvpU7RdMqQFtwwM,6421
38
38
  coderouter/translation/convert.py,sha256=-qyzFzmmr9hhQV6_Sg75kJnvCZvHe3n7vRdaZtk_JqQ,47269
39
39
  coderouter/translation/tool_repair.py,sha256=fyxDb4kWHytO5JWq5y0i4tinJUtWqhMCkyfoCf5BjeM,8314
40
- coderouter_cli-1.8.2.dist-info/METADATA,sha256=MBkIOwnySR2wfw-JhBtGFipk_MqbV8J_4aAqWZ65A7g,44136
41
- coderouter_cli-1.8.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
42
- coderouter_cli-1.8.2.dist-info/entry_points.txt,sha256=-dnLfD1YZ2WjH2zSdNCvlO65wYltM9bsHt9Fhg3yGss,51
43
- coderouter_cli-1.8.2.dist-info/licenses/LICENSE,sha256=wkEzoR86jFw33jvfOHjULqmkGEfxTFMgMaJnpR8mPRw,1065
44
- coderouter_cli-1.8.2.dist-info/RECORD,,
40
+ coderouter_cli-1.8.5.dist-info/METADATA,sha256=7WObVqSwWtedbYe0LUFR7ZoXR_r0tQRquER992PFAKM,45119
41
+ coderouter_cli-1.8.5.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
42
+ coderouter_cli-1.8.5.dist-info/entry_points.txt,sha256=-dnLfD1YZ2WjH2zSdNCvlO65wYltM9bsHt9Fhg3yGss,51
43
+ coderouter_cli-1.8.5.dist-info/licenses/LICENSE,sha256=wkEzoR86jFw33jvfOHjULqmkGEfxTFMgMaJnpR8mPRw,1065
44
+ coderouter_cli-1.8.5.dist-info/RECORD,,