coderouter-cli 1.8.1__py3-none-any.whl → 1.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,14 +48,25 @@ logger = get_logger(__name__)
48
48
  _RETRYABLE_STATUSES = {404, 408, 425, 429, 500, 502, 503, 504}
49
49
 
50
50
 
51
+ # v1.8.3: non-standard reasoning fields emitted by various upstreams.
52
+ # Different runtimes use different field names for the same concept:
53
+ # * ``reasoning`` — OpenRouter free models (gpt-oss-120b:free
54
+ # confirmed 2026-04-20), Ollama
55
+ # * ``reasoning_content`` — llama.cpp ``llama-server`` (Qwen3.6 etc.,
56
+ # confirmed 2026-04-26 with Unsloth GGUF)
57
+ # Strict OpenAI clients reject either as an unknown key. The strip
58
+ # function below removes both at the adapter boundary so downstream
59
+ # layers never see them, regardless of which runtime fronts the model.
60
+ _NON_STANDARD_REASONING_KEYS = ("reasoning", "reasoning_content")
61
+
62
+
51
63
  def _strip_reasoning_field(choices: list[dict[str, Any]] | None, *, delta_key: bool) -> bool:
52
- """Remove non-standard ``reasoning`` keys from a choices list, in place.
64
+ """Remove non-standard reasoning keys from a choices list, in place.
53
65
 
54
- v0.5-C: Some OpenRouter free models (confirmed on
55
- ``openai/gpt-oss-120b:free`` 2026-04-20) return a ``reasoning`` field
56
- alongside ``content`` on each choice. The field is not in the OpenAI
57
- Chat Completions spec and strict clients can reject the unknown key.
58
- We strip it at the adapter boundary so downstream layers never see it.
66
+ v0.5-C originally targeted OpenRouter's ``reasoning`` field. v1.8.3
67
+ extends the strip to ``reasoning_content`` (llama.cpp ``llama-server``
68
+ naming) since both denote the same hidden chain-of-thought trace and
69
+ neither is part of the OpenAI Chat Completions spec.
59
70
 
60
71
  Args:
61
72
  choices: The ``choices`` list from the response body or stream chunk.
@@ -64,7 +75,7 @@ def _strip_reasoning_field(choices: list[dict[str, Any]] | None, *, delta_key: b
64
75
  ``False`` for non-streaming responses (look in ``choice["message"]``).
65
76
 
66
77
  Returns:
67
- True iff at least one ``reasoning`` key was removed. Callers use
78
+ True iff at least one reasoning key was removed. Callers use
68
79
  this to decide whether to emit a one-shot log line.
69
80
  """
70
81
  if not choices:
@@ -75,9 +86,12 @@ def _strip_reasoning_field(choices: list[dict[str, Any]] | None, *, delta_key: b
75
86
  if not isinstance(choice, dict):
76
87
  continue
77
88
  inner = choice.get(inner_key)
78
- if isinstance(inner, dict) and "reasoning" in inner:
79
- inner.pop("reasoning", None)
80
- stripped = True
89
+ if not isinstance(inner, dict):
90
+ continue
91
+ for key in _NON_STANDARD_REASONING_KEYS:
92
+ if key in inner:
93
+ inner.pop(key, None)
94
+ stripped = True
81
95
  return stripped
82
96
 
83
97
 
@@ -235,15 +249,17 @@ class OpenAICompatAdapter(BaseAdapter):
235
249
  retryable=False,
236
250
  ) from exc
237
251
 
238
- # v0.5-C: passive strip of non-standard `reasoning` field on choices.
239
- # No-op when the provider opted into passthrough.
252
+ # v0.5-C / v1.8.3: passive strip of non-standard reasoning fields
253
+ # on choices (covers both Ollama/OpenRouter ``reasoning`` and
254
+ # llama.cpp ``reasoning_content``). No-op when the provider opted
255
+ # into passthrough.
240
256
  if not self.config.capabilities.reasoning_passthrough and _strip_reasoning_field(
241
257
  data.get("choices"), delta_key=False
242
258
  ):
243
259
  log_capability_degraded(
244
260
  logger,
245
261
  provider=self.name,
246
- dropped=["reasoning"],
262
+ dropped=list(_NON_STANDARD_REASONING_KEYS),
247
263
  reason="non-standard-field",
248
264
  )
249
265
 
@@ -344,7 +360,7 @@ class OpenAICompatAdapter(BaseAdapter):
344
360
  log_capability_degraded(
345
361
  logger,
346
362
  provider=self.name,
347
- dropped=["reasoning"],
363
+ dropped=list(_NON_STANDARD_REASONING_KEYS),
348
364
  reason="non-standard-field",
349
365
  )
350
366
  reasoning_logged = True
@@ -176,47 +176,59 @@ rules:
176
176
  # 「Claude Code 代替として最高」「local champ」と評価されている。
177
177
  #
178
178
  # ただし v1.8.0 までで `claude_code_suitability: ok` を declare していた
179
- # のは note 記事の伝聞ベースの先回り宣言で、v1.8.1 (2026-04-26)
180
- # 実機検証 (M3 Max 32GB / Ollama 0.21.2) で次の課題が判明:
181
- # - num_ctx declare 32768 しても Ollama 側で silent に縮められる
182
- # (canary echo-back probe 失敗)
179
+ # のは note 記事の伝聞ベースの先回り宣言で、v1.8.1 v1.8.2
180
+ # (2026-04-26) の実機検証 (M3 Max 64GB / Ollama 0.21.2) で:
181
+ # - num_ctx streaming NEEDS_TUNING v1.8.2 thinking モデル
182
+ # probe バジェット拡大により偽陽性と判明 (doctor 側の課題)
183
183
  # - tool_calls probe が native tool_calls / 修復可能 JSON のいずれも
184
- # 返さず NEEDS_TUNING
185
- # - streaming probe finish_reason='length' で 0 chars 打ち切り
186
- # これらは Ollama 経由特有の問題で、HF / vLLM 直接ロードなら違う可能性。
187
- # 確証ない以上、`claude_code_suitability` は撤回し `tools` 宣言だけ残す。
184
+ # 返さない真の課題が残る (Qwen3.6 系の Ollama 経由 tool 仕様未成熟)
185
+ # tool_calls 不全が解消されるまで `claude_code_suitability` は撤回。
188
186
  # 実機で動いたユーザーは `~/.coderouter/model-capabilities.yaml` で
189
187
  # `claude_code_suitability: ok` を上書きできる。
190
188
  # ------------------------------------------------------------------
191
189
 
190
+ # v1.8.2: thinking: true は doctor probe (num_ctx / streaming) が reasoning
191
+ # トークン消費分の max_tokens 余裕を確保するためのヒント。Qwen3 系は
192
+ # /think モードで thinking トークンを吐く設計なので true 宣言。
192
193
  - match: "qwen3.6:*"
193
194
  kind: openai_compat
194
195
  capabilities:
195
196
  tools: true
197
+ thinking: true
196
198
 
197
199
  - match: "qwen/qwen3.6-*"
198
200
  kind: openai_compat
199
201
  capabilities:
200
202
  tools: true
203
+ thinking: true
201
204
 
202
205
  # ------------------------------------------------------------------
203
- # Gemma 4 family (v1.7-B 追加)
206
+ # Gemma 4 family (v1.7-B 追加、v1.8.2 で thinking: true 宣言)
204
207
  #
205
208
  # Google 公式 Gemma 4。Ollama 公式 tag は gemma4:e2b / e4b / 26b / 31b、
206
209
  # 全 variant が tools+vision+thinking 対応、E2B/E4B は audio もサポート。
207
210
  # MoE (26b は active 3.8B / total 25.2B)。note 記事で「日常・バランスの
208
211
  # 王者」と評価。Claude Haiku 互換性に近い簡潔な応答スタイル。
212
+ #
213
+ # v1.8.2 (2026-04-26): 実機検証 (M3 Max 64GB / Ollama 0.21.2 / gemma4:26b)
214
+ # で `reasoning` フィールドにかなりの量のトークンを吐く thinking モデル
215
+ # と確認。doctor probe の max_tokens=32 / 128 が thinking トークンに
216
+ # 食い切られて偽陽性 NEEDS_TUNING を出していた。registry で
217
+ # `thinking: true` を宣言すると doctor が probe バジェットを 1024 まで
218
+ # 引き上げて偽陽性を回避する。
209
219
  # ------------------------------------------------------------------
210
220
 
211
221
  - match: "gemma4:*"
212
222
  kind: openai_compat
213
223
  capabilities:
214
224
  tools: true
225
+ thinking: true
215
226
 
216
227
  - match: "google/gemma-4*"
217
228
  kind: openai_compat
218
229
  capabilities:
219
230
  tools: true
231
+ thinking: true
220
232
 
221
233
  # ------------------------------------------------------------------
222
234
  # GLM family (Z.AI / Zhipu AI、v1.7-B 追加)
coderouter/doctor.py CHANGED
@@ -433,6 +433,43 @@ _STREAMING_PROBE_USER_PROMPT = (
433
433
  # truncated". "1\n2\n...\n30" is ~80 chars; 40 chars covers the halfway
434
434
  # mark (1..20) which is already obviously-truncated territory.
435
435
  _STREAMING_PROBE_MIN_EXPECTED_CHARS = 40
436
+
437
+ # v1.8.2: probe response budgets.
438
+ #
439
+ # Both num_ctx and streaming probes ask the model for a *short* answer
440
+ # (the canary token / "1..30"). The original budgets (32 / 128 tokens)
441
+ # assumed a non-thinking model that emits the answer immediately. On a
442
+ # thinking model — Gemma 4 26B, Qwen3.6, gpt-oss, deepseek-r1 — the
443
+ # upstream burns the entire budget on a hidden ``reasoning`` field
444
+ # *before* emitting any visible ``content``, producing a false-positive
445
+ # NEEDS_TUNING (canary missing / 0 chars streamed). Bumping the budget
446
+ # is the cleanest fix: non-thinking models stop early at their natural
447
+ # stop token (no waste), thinking models get headroom for the reasoning
448
+ # trace plus the actual answer.
449
+ #
450
+ # Numbers picked from the v1.8.1 reality-check session
451
+ # (docs/articles/note-v1-8-1-reality-check.md):
452
+ # * Gemma 4 26B reasoning prefix observed at ~150-300 tokens before
453
+ # content starts → 1024 covers reasoning + 30-line count comfortably.
454
+ # * Non-thinking baseline kept conservative-but-non-tight (256/512) to
455
+ # absorb stylistic preambles ("Sure, the answer is...") without
456
+ # burning extra cloud quota when the operator probes a paid endpoint.
457
+ _NUM_CTX_PROBE_MAX_TOKENS_DEFAULT = 256
458
+ _NUM_CTX_PROBE_MAX_TOKENS_THINKING = 1024
459
+ _STREAMING_PROBE_MAX_TOKENS_DEFAULT = 512
460
+ _STREAMING_PROBE_MAX_TOKENS_THINKING = 1024
461
+ # v1.8.3: tool_calls probe also needs thinking-aware budget. The
462
+ # pre-v1.8.3 default of 64 was tight even for non-thinking models
463
+ # (the assistant often emits a brief preamble before the JSON tool
464
+ # call), and on thinking models (Qwen3.6, Gemma 4, gpt-oss, deepseek-r1)
465
+ # the entire 64-token budget gets consumed by ``reasoning_content``
466
+ # before any ``tool_calls`` can surface — producing a false-positive
467
+ # NEEDS_TUNING with the WRONG remediation (suggested patch flips
468
+ # ``tools`` to false even though the model supports them perfectly).
469
+ # 256/1024 brings the budget into line with the num_ctx / streaming
470
+ # probes (same _is_reasoning_model gate).
471
+ _TOOL_CALLS_PROBE_MAX_TOKENS_DEFAULT = 256
472
+ _TOOL_CALLS_PROBE_MAX_TOKENS_THINKING = 1024
436
473
  # Default ``num_predict`` suggested in the emitted patch. -1 would be
437
474
  # optimal (uncapped) but "4096" communicates intent more clearly to
438
475
  # operators unfamiliar with Ollama's sentinel value, and covers Claude
@@ -475,6 +512,40 @@ def _declared_num_ctx(provider: ProviderConfig) -> int | None:
475
512
  return val if isinstance(val, int) else None
476
513
 
477
514
 
515
+ def _is_reasoning_model(
516
+ provider: ProviderConfig, resolved: ResolvedCapabilities
517
+ ) -> bool:
518
+ """v1.8.2: True iff the model is known to emit a hidden reasoning trace.
519
+
520
+ Thinking models (Gemma 4, Qwen3-with-/think, gpt-oss, deepseek-r1,
521
+ Claude Sonnet 4.5+ in extended-thinking mode) burn output tokens on a
522
+ ``reasoning`` field before any visible ``content`` is produced. The
523
+ num_ctx / streaming probes use small response budgets that get fully
524
+ consumed by the reasoning prefix, producing a false-positive
525
+ NEEDS_TUNING. Callers use this to choose a generous probe budget.
526
+
527
+ Three signals fire:
528
+ * provider declared ``capabilities.thinking: true`` in providers.yaml
529
+ * provider declared ``capabilities.reasoning_passthrough: true``
530
+ (the operator opted in to passing the raw reasoning to the client,
531
+ which is only meaningful for models that emit it)
532
+ * registry resolved ``thinking: true`` for this (kind, model) pair
533
+
534
+ Conservative bias — when both provider declaration and registry are
535
+ silent, treat as non-reasoning. The probe still completes for thinking
536
+ models in that case (they just hit ``finish_reason='length'`` like
537
+ they did pre-v1.8.2), but at least the new generous default budget
538
+ (256 / 512) gives more headroom than the old 32 / 128.
539
+ """
540
+ if provider.capabilities.thinking is True:
541
+ return True
542
+ if provider.capabilities.reasoning_passthrough is True:
543
+ return True
544
+ if resolved.thinking is True:
545
+ return True
546
+ return resolved.reasoning_passthrough is True
547
+
548
+
478
549
  _PROBE_BASIC_USER_PROMPT = "Reply with exactly the single word: PONG"
479
550
  _PROBE_TOOLS_USER_PROMPT = (
480
551
  "You have one tool named `echo`. Call it with the argument "
@@ -617,7 +688,9 @@ def _extract_openai_assistant_choice(
617
688
  return msg if isinstance(msg, dict) else None
618
689
 
619
690
 
620
- async def _probe_num_ctx(provider: ProviderConfig) -> ProbeResult:
691
+ async def _probe_num_ctx(
692
+ provider: ProviderConfig, resolved: ResolvedCapabilities
693
+ ) -> ProbeResult:
621
694
  """v1.0-B Probe — direct detection of Ollama ``num_ctx`` truncation.
622
695
 
623
696
  Addresses plan.md §9.4 symptom #1 (空応答 / 意味不明応答). Prior to
@@ -683,11 +756,21 @@ async def _probe_num_ctx(provider: ProviderConfig) -> ProbeResult:
683
756
  # whatever ``options.num_ctx`` the operator has declared. Request
684
757
  # fields win over extra_body, matching the adapter's merge order.
685
758
  body: dict[str, Any] = dict(provider.extra_body)
759
+ # v1.8.2: thinking models burn output tokens on a hidden ``reasoning``
760
+ # trace before emitting any ``content``. The pre-v1.8.2 default of 32
761
+ # was tight for any preamble at all; on Gemma 4 26B it caused
762
+ # ``finish_reason='length'`` with content="" before the canary could
763
+ # surface, producing a false-positive NEEDS_TUNING.
764
+ max_tokens = (
765
+ _NUM_CTX_PROBE_MAX_TOKENS_THINKING
766
+ if _is_reasoning_model(provider, resolved)
767
+ else _NUM_CTX_PROBE_MAX_TOKENS_DEFAULT
768
+ )
686
769
  body.update(
687
770
  {
688
771
  "model": provider.model,
689
772
  "messages": [{"role": "user", "content": user_prompt}],
690
- "max_tokens": 32,
773
+ "max_tokens": max_tokens,
691
774
  "temperature": 0,
692
775
  }
693
776
  )
@@ -799,7 +882,9 @@ async def _probe_num_ctx(provider: ProviderConfig) -> ProbeResult:
799
882
  )
800
883
 
801
884
 
802
- async def _probe_streaming(provider: ProviderConfig) -> ProbeResult:
885
+ async def _probe_streaming(
886
+ provider: ProviderConfig, resolved: ResolvedCapabilities
887
+ ) -> ProbeResult:
803
888
  """v1.0-C Probe — streaming completion path integrity.
804
889
 
805
890
  Addresses plan.md §9.4 symptom #1 from the **output** side. The v1.0-B
@@ -868,11 +953,18 @@ async def _probe_streaming(provider: ProviderConfig) -> ProbeResult:
868
953
  # probing. Top-level probe fields win on collision, matching adapter
869
954
  # merge order.
870
955
  body: dict[str, Any] = dict(provider.extra_body)
956
+ # v1.8.2: same thinking-model rationale as num_ctx probe — give
957
+ # reasoning a budget so the visible content has a chance to surface.
958
+ max_tokens = (
959
+ _STREAMING_PROBE_MAX_TOKENS_THINKING
960
+ if _is_reasoning_model(provider, resolved)
961
+ else _STREAMING_PROBE_MAX_TOKENS_DEFAULT
962
+ )
871
963
  body.update(
872
964
  {
873
965
  "model": provider.model,
874
966
  "messages": [{"role": "user", "content": _STREAMING_PROBE_USER_PROMPT}],
875
- "max_tokens": 128,
967
+ "max_tokens": max_tokens,
876
968
  "temperature": 0,
877
969
  "stream": True,
878
970
  }
@@ -1011,6 +1103,16 @@ async def _probe_tool_calls(
1011
1103
  If declaration says True → NEEDS_TUNING (flip to False). If
1012
1104
  False → OK.
1013
1105
  """
1106
+ # v1.8.3: thinking-aware budget — the pre-v1.8.3 default of 64 was
1107
+ # consumed by ``reasoning_content`` on thinking models (Qwen3.6,
1108
+ # Gemma 4, gpt-oss, deepseek-r1) before any ``tool_calls`` could
1109
+ # surface, producing a false-positive NEEDS_TUNING that recommended
1110
+ # flipping ``tools`` to false — the exact opposite of what's needed.
1111
+ max_tokens = (
1112
+ _TOOL_CALLS_PROBE_MAX_TOKENS_THINKING
1113
+ if _is_reasoning_model(provider, resolved)
1114
+ else _TOOL_CALLS_PROBE_MAX_TOKENS_DEFAULT
1115
+ )
1014
1116
  if provider.kind == "anthropic":
1015
1117
  # Anthropic native tools use a different wire shape; we probe
1016
1118
  # via the messages API. A capable model returns content blocks
@@ -1022,7 +1124,7 @@ async def _probe_tool_calls(
1022
1124
  "messages": [
1023
1125
  {"role": "user", "content": _PROBE_TOOLS_USER_PROMPT},
1024
1126
  ],
1025
- "max_tokens": 64,
1127
+ "max_tokens": max_tokens,
1026
1128
  "tools": [_PROBE_TOOL_SPEC_ANTHROPIC],
1027
1129
  }
1028
1130
  else:
@@ -1033,7 +1135,7 @@ async def _probe_tool_calls(
1033
1135
  "messages": [
1034
1136
  {"role": "user", "content": _PROBE_TOOLS_USER_PROMPT},
1035
1137
  ],
1036
- "max_tokens": 64,
1138
+ "max_tokens": max_tokens,
1037
1139
  "temperature": 0,
1038
1140
  "tools": [_PROBE_TOOL_SPEC_OPENAI],
1039
1141
  }
@@ -1357,7 +1459,12 @@ async def _probe_reasoning_leak(
1357
1459
  )
1358
1460
 
1359
1461
  msg = _extract_openai_assistant_choice(parsed)
1360
- has_reasoning = bool(msg and "reasoning" in msg)
1462
+ # v1.8.3: detect llama.cpp's ``reasoning_content`` alongside Ollama /
1463
+ # OpenRouter's ``reasoning`` — they're the same concept under different
1464
+ # field names, and the openai_compat adapter strips both since v1.8.3.
1465
+ has_reasoning = bool(
1466
+ msg and ("reasoning" in msg or "reasoning_content" in msg)
1467
+ )
1361
1468
 
1362
1469
  # v1.0-A: content-embedded marker detection.
1363
1470
  content = (msg.get("content") if isinstance(msg, dict) else None) or ""
@@ -1506,11 +1613,11 @@ async def check_model(
1506
1613
  # declaration probes (tool_calls / thinking / reasoning-leak) should
1507
1614
  # dominate the report — streaming is the output-side sibling of
1508
1615
  # num_ctx and its NEEDS_TUNING verdict is orthogonal to the others.
1509
- report.results.append(await _probe_num_ctx(provider))
1616
+ report.results.append(await _probe_num_ctx(provider, resolved))
1510
1617
  report.results.append(await _probe_tool_calls(provider, resolved))
1511
1618
  report.results.append(await _probe_thinking(provider, resolved))
1512
1619
  report.results.append(await _probe_reasoning_leak(provider, resolved))
1513
- report.results.append(await _probe_streaming(provider))
1620
+ report.results.append(await _probe_streaming(provider, resolved))
1514
1621
  return report
1515
1622
 
1516
1623
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coderouter-cli
3
- Version: 1.8.1
3
+ Version: 1.8.3
4
4
  Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
5
5
  Project-URL: Homepage, https://github.com/zephel01/CodeRouter
6
6
  Project-URL: Repository, https://github.com/zephel01/CodeRouter
@@ -60,7 +60,7 @@ Description-Content-Type: text/markdown
60
60
  <p align="center">
61
61
  <a href="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml"><img src="https://github.com/zephel01/CodeRouter/actions/workflows/ci.yml/badge.svg?branch=main" alt="CI"></a>
62
62
  <a href=""><img src="https://img.shields.io/badge/status-stable-brightgreen" alt="status"></a>
63
- <a href=""><img src="https://img.shields.io/badge/version-1.8.0-blue" alt="version"></a>
63
+ <a href=""><img src="https://img.shields.io/badge/version-1.8.3-blue" alt="version"></a>
64
64
  <a href=""><img src="https://img.shields.io/badge/python-3.12%2B-blue" alt="python"></a>
65
65
  <a href=""><img src="https://img.shields.io/badge/runtime%20deps-5-brightgreen" alt="deps"></a>
66
66
  <a href=""><img src="https://img.shields.io/badge/license-MIT-yellow" alt="license"></a>
@@ -100,7 +100,7 @@ Description-Content-Type: text/markdown
100
100
  | **要るか判断する** | [要否判定ガイド](./docs/when-do-i-need-coderouter.md) | エージェント × モデルの詳細マトリクスで「そもそも自分に必要か」を決める |
101
101
  | **詰まったとき** | [トラブルシューティング](./docs/troubleshooting.md) | `doctor` の使い方、`.env` の export 必須、Ollama サイレント失敗 5 症状、Claude Code 連携の罠 |
102
102
  | **安全に使う** | [セキュリティ方針](./docs/security.md) | 脅威モデル・秘密情報の扱い・脆弱性報告経路 |
103
- | **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v1.8.0用途別 4 プロファイル + GLM/Gemma 4/Qwen3.6 公式化 + apply 自動化) |
103
+ | **履歴** | [CHANGELOG](./CHANGELOG.md) | 全リリース履歴(最新: v1.8.3tool_calls probe thinking 対応 + adapter `reasoning_content` strip / llama.cpp 直叩き対応) |
104
104
  | **設計を追う** | [plan.md](./plan.md) | 設計不変項・マイルストーン・今後のロードマップ |
105
105
 
106
106
  English versions: [Quickstart](./docs/quickstart.en.md) · [Usage guide](./docs/usage-guide.en.md) · [Free-tier guide](./docs/free-tier-guide.en.md) · [When you need it](./docs/when-do-i-need-coderouter.en.md) · [Troubleshooting](./docs/troubleshooting.en.md) · [Security](./docs/security.en.md)
@@ -175,7 +175,7 @@ OpenAI 互換エージェント + お行儀の良いモデル + フォールバ
175
175
 
176
176
  ## クイックスタート(3 コマンド)
177
177
 
178
- **v1.7.0 で PyPI 公開**、**v1.8.0 で用途別 4 プロファイル + Z.AI/GLM 連携**を追加しました。`uvx` 一発で動きます (Python 3.12 以上必須):
178
+ **v1.7.0 で PyPI 公開**、**v1.8.0 で用途別 4 プロファイル + Z.AI/GLM 連携**を追加、**v1.8.2 で doctor probe を thinking モデル対応**にしました。`uvx` 一発で動きます (Python 3.12 以上必須):
179
179
 
180
180
  ```bash
181
181
  # 1. サンプル設定を置く
@@ -205,7 +205,7 @@ uv run coderouter serve --port 8088
205
205
 
206
206
  > **注**: PyPI 上のパッケージ名は `coderouter-cli` ですが、コマンド名と Python import 名は `coderouter` のままです。詳しくは [CHANGELOG `[v1.7.0]`](./CHANGELOG.md#v170--2026-04-25-pypi-公開-uvx-coderouter-cli-一発で動く) 参照。
207
207
  >
208
- > **v1.8.0 の `--apply` 自動化を使う場合**: `ruamel.yaml` を optional dep として一緒に入れます (`pip install 'coderouter-cli[doctor]'` または `uv pip install ruamel.yaml`)。基本機能には不要です。
208
+ > **`--apply` 自動化を使う場合** (v1.8.0+): `ruamel.yaml` を optional dep として一緒に入れます (`pip install 'coderouter-cli[doctor]'` または `uv pip install ruamel.yaml`)。基本機能には不要です。
209
209
 
210
210
  あとは任意の OpenAI クライアントを `http://127.0.0.1:8088` に向けるだけです:
211
211
 
@@ -2,7 +2,7 @@ coderouter/__init__.py,sha256=ghdjPrLtnRzY8fyQ4CJZI1UJKADyNTLtA3G7se8H7Ns,696
2
2
  coderouter/__main__.py,sha256=-LCgxJnvgUV240HjQKv7ly-mn2NuKHpC4nCpvTHjeSU,130
3
3
  coderouter/cli.py,sha256=vI1-dv10t4-xG6Zpt7zi_3U8xGgq54Qa8XIMUYpfOV8,19859
4
4
  coderouter/cli_stats.py,sha256=ae20xUr_hjX09Ms3fBZGZsUS52o44JC57EpbWLBOCO0,27750
5
- coderouter/doctor.py,sha256=Uf4R1t2-50hRx6UpV9UBe_UeiJ2UYBg7e7zkPUHegWE,64217
5
+ coderouter/doctor.py,sha256=F6f9vl99KTGgCja6N9w_QlDIFFCEqY0mxoAhKEI5yTI,69643
6
6
  coderouter/doctor_apply.py,sha256=r_J6xbu5-HivofPNriw4_vjNYs_VRs7GsGTS0oMEX10,24209
7
7
  coderouter/env_security.py,sha256=FEBZnXfJ0xE39kmMMn39zk0W_DRRnmcB_REmP9f4xWo,14796
8
8
  coderouter/errors.py,sha256=Xmq67lheyw8iv3Ox39jh2c4tvNI5RcUR4QkoxVDN6l4,1130
@@ -11,7 +11,7 @@ coderouter/output_filters.py,sha256=rI4YgKVv5vviDBl3Xkf7rp6LaSSkdWyEV004q6HrkB0,
11
11
  coderouter/adapters/__init__.py,sha256=7dIDSZ-FE_0iSqLSDc_lK1idRdLTKcM2hP9tCJipgPI,463
12
12
  coderouter/adapters/anthropic_native.py,sha256=qfdjxy4YyLt-0Fj7hUYn1oi1SFjEEbSvpaRBUC2hMf4,21903
13
13
  coderouter/adapters/base.py,sha256=H4uM6r_-95Xs1hCM_X4Zv3tq-xN3cXWLj83F-QjPNLw,8265
14
- coderouter/adapters/openai_compat.py,sha256=EC9zNYPGgSOVZyaH1dXRXO1VMN1RjBX5FZ2vEgTkVD8,17100
14
+ coderouter/adapters/openai_compat.py,sha256=9qoJfLR2vVnyM8isb9G4j-Dk5QBHFlneOaBSY-P4UAg,17941
15
15
  coderouter/adapters/registry.py,sha256=Syt3eDljWZAK5mfiJGvUMKaZYAfCRScp7PvV6pYt7mc,683
16
16
  coderouter/config/__init__.py,sha256=FODEn74fN-qZnt4INPSHswqhOlEgpL6-_onxsitSx8g,274
17
17
  coderouter/config/capability_registry.py,sha256=oypl6Z-YjvNoC87AdSIm1C7XE_MZoFq_7Ivm3eRH3cI,14379
@@ -19,7 +19,7 @@ coderouter/config/env_file.py,sha256=CoMK27fuAXm-NtoLzXb8yN2E-wDFjHQuFwiIlmgTBQw
19
19
  coderouter/config/loader.py,sha256=FUEe8m4Tnmj_aul0vSctD8vKvNW-oLRoMRbTpSKqSmc,4077
20
20
  coderouter/config/schemas.py,sha256=NMWKstAzDKc6DT7qKr-GKlUFrsfE4HtEG7zkz9Pnq78,21597
21
21
  coderouter/data/__init__.py,sha256=uNyfD9jaCvTWsBAWtaw1Fr25OSxzv3psGMfBjT1z0Cc,328
22
- coderouter/data/model-capabilities.yaml,sha256=8g11ysbjEwt9Rc9Q9o4VCA0de2RmnH-mJEbnsueET_A,13949
22
+ coderouter/data/model-capabilities.yaml,sha256=9f9CDIoWBJYtppMRh1BOS8FzRKzICV4uT-uANovGmzs,14718
23
23
  coderouter/ingress/__init__.py,sha256=WQsCH2CGJCAhy0mS6GSEdeYZRkkQu2OHDsP4CJWTLug,155
24
24
  coderouter/ingress/anthropic_routes.py,sha256=vuylsn7klFN-Dz3cBS7LrhnnSRGr6agipgMrr9gxq7k,8261
25
25
  coderouter/ingress/app.py,sha256=b9s6NpBGckaTQJCaw-6MZaY7Eq3rxgtX_cCkgWq-0Ig,6620
@@ -37,8 +37,8 @@ coderouter/translation/__init__.py,sha256=PYXN7XVEwpG1uC8RLy6fvnGbzEZhhrEuUapH8I
37
37
  coderouter/translation/anthropic.py,sha256=JpvIWNXHUPVqOGvps7o_6ZADhXuJuvpU7RdMqQFtwwM,6421
38
38
  coderouter/translation/convert.py,sha256=-qyzFzmmr9hhQV6_Sg75kJnvCZvHe3n7vRdaZtk_JqQ,47269
39
39
  coderouter/translation/tool_repair.py,sha256=fyxDb4kWHytO5JWq5y0i4tinJUtWqhMCkyfoCf5BjeM,8314
40
- coderouter_cli-1.8.1.dist-info/METADATA,sha256=sb0tldZ1vqseJCuO-oExjVXwnmOslwQt9MN9Rh5PVS8,44136
41
- coderouter_cli-1.8.1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
42
- coderouter_cli-1.8.1.dist-info/entry_points.txt,sha256=-dnLfD1YZ2WjH2zSdNCvlO65wYltM9bsHt9Fhg3yGss,51
43
- coderouter_cli-1.8.1.dist-info/licenses/LICENSE,sha256=wkEzoR86jFw33jvfOHjULqmkGEfxTFMgMaJnpR8mPRw,1065
44
- coderouter_cli-1.8.1.dist-info/RECORD,,
40
+ coderouter_cli-1.8.3.dist-info/METADATA,sha256=Nm6kOVjXop9D5aISl3H0OcH-gi4fkGT0fmhuyoKTcHU,44221
41
+ coderouter_cli-1.8.3.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
42
+ coderouter_cli-1.8.3.dist-info/entry_points.txt,sha256=-dnLfD1YZ2WjH2zSdNCvlO65wYltM9bsHt9Fhg3yGss,51
43
+ coderouter_cli-1.8.3.dist-info/licenses/LICENSE,sha256=wkEzoR86jFw33jvfOHjULqmkGEfxTFMgMaJnpR8mPRw,1065
44
+ coderouter_cli-1.8.3.dist-info/RECORD,,