coderouter-cli 1.8.0__py3-none-any.whl → 1.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderouter/config/loader.py +27 -0
- coderouter/data/model-capabilities.yaml +126 -8
- coderouter/doctor.py +88 -6
- {coderouter_cli-1.8.0.dist-info → coderouter_cli-1.8.2.dist-info}/METADATA +1 -1
- {coderouter_cli-1.8.0.dist-info → coderouter_cli-1.8.2.dist-info}/RECORD +8 -8
- {coderouter_cli-1.8.0.dist-info → coderouter_cli-1.8.2.dist-info}/WHEEL +0 -0
- {coderouter_cli-1.8.0.dist-info → coderouter_cli-1.8.2.dist-info}/entry_points.txt +0 -0
- {coderouter_cli-1.8.0.dist-info → coderouter_cli-1.8.2.dist-info}/licenses/LICENSE +0 -0
coderouter/config/loader.py
CHANGED
|
@@ -49,8 +49,35 @@ def load_config(path: str | os.PathLike[str] | None = None) -> CodeRouterConfig:
|
|
|
49
49
|
# fail can be rescued by an explicit env-set mode, and (b) the model-
|
|
50
50
|
# validator's "default_profile must exist in profiles" check applies to the
|
|
51
51
|
# *effective* mode the engine will see, not the pre-override YAML value.
|
|
52
|
+
#
|
|
53
|
+
# v1.8.0+: also resolve env_mode through ``mode_aliases`` before assigning,
|
|
54
|
+
# so that startup-time ``--mode coding`` (env CODEROUTER_MODE=coding)
|
|
55
|
+
# behaves symmetrically with the runtime ``X-CodeRouter-Mode: coding``
|
|
56
|
+
# header — both should accept short intent names like ``coding`` /
|
|
57
|
+
# ``general`` / ``reasoning`` and resolve them to the underlying profile
|
|
58
|
+
# (e.g. ``claude-code-nim`` in providers.nvidia-nim.yaml). Without this,
|
|
59
|
+
# users on the NIM example yaml hit
|
|
60
|
+
# "default_profile 'coding' is not declared in profiles:
|
|
61
|
+
# known=['claude-code-nim', ...]"
|
|
62
|
+
# because mode_aliases only fired at request time, not at startup.
|
|
52
63
|
env_mode = os.environ.get("CODEROUTER_MODE", "").strip()
|
|
53
64
|
if env_mode:
|
|
65
|
+
# Pre-validation alias resolution: if env_mode isn't directly a
|
|
66
|
+
# profile name but matches an entry in raw["mode_aliases"], swap it
|
|
67
|
+
# for the underlying profile name. This avoids forcing every example
|
|
68
|
+
# yaml to mirror the v1.8.0 four-profile names (multi/coding/general
|
|
69
|
+
# /reasoning) just to accept the canonical short --mode flags.
|
|
70
|
+
raw_profiles = raw.get("profiles", []) or []
|
|
71
|
+
profile_names = {
|
|
72
|
+
p.get("name") for p in raw_profiles if isinstance(p, dict)
|
|
73
|
+
}
|
|
74
|
+
raw_aliases = raw.get("mode_aliases", {}) or {}
|
|
75
|
+
if (
|
|
76
|
+
env_mode not in profile_names
|
|
77
|
+
and isinstance(raw_aliases, dict)
|
|
78
|
+
and env_mode in raw_aliases
|
|
79
|
+
):
|
|
80
|
+
env_mode = raw_aliases[env_mode]
|
|
54
81
|
raw["default_profile"] = env_mode
|
|
55
82
|
|
|
56
83
|
config = CodeRouterConfig.model_validate(raw)
|
|
@@ -168,45 +168,67 @@ rules:
|
|
|
168
168
|
claude_code_suitability: ok
|
|
169
169
|
|
|
170
170
|
# ------------------------------------------------------------------
|
|
171
|
-
# Qwen3.6 family (v1.7-B
|
|
171
|
+
# Qwen3.6 family (v1.7-B 追加、v1.8.1 で suitability 撤回)
|
|
172
172
|
#
|
|
173
173
|
# 2026-04 リリースの Qwen3.6 シリーズ。Ollama 公式 tag は
|
|
174
|
-
# qwen3.6:27b / qwen3.6:35b
|
|
175
|
-
# 256K context
|
|
176
|
-
# 「Claude Code 代替として最高」「local champ
|
|
177
|
-
#
|
|
174
|
+
# qwen3.6:27b / qwen3.6:35b、metadata 上は tools+vision+thinking 対応、
|
|
175
|
+
# 256K context を宣言。note 記事 (r/LocalLLaMA 2026-04 Megathread) で
|
|
176
|
+
# 「Claude Code 代替として最高」「local champ」と評価されている。
|
|
177
|
+
#
|
|
178
|
+
# ただし v1.8.0 までで `claude_code_suitability: ok` を declare していた
|
|
179
|
+
# のは note 記事の伝聞ベースの先回り宣言で、v1.8.1 〜 v1.8.2
|
|
180
|
+
# (2026-04-26) の実機検証 (M3 Max 64GB / Ollama 0.21.2) で:
|
|
181
|
+
# - num_ctx と streaming の NEEDS_TUNING は v1.8.2 で thinking モデル
|
|
182
|
+
# 用 probe バジェット拡大により偽陽性と判明 (doctor 側の課題)
|
|
183
|
+
# - tool_calls probe が native tool_calls / 修復可能 JSON のいずれも
|
|
184
|
+
# 返さない真の課題が残る (Qwen3.6 系の Ollama 経由 tool 仕様未成熟)
|
|
185
|
+
# tool_calls 不全が解消されるまで `claude_code_suitability` は撤回。
|
|
186
|
+
# 実機で動いたユーザーは `~/.coderouter/model-capabilities.yaml` で
|
|
187
|
+
# `claude_code_suitability: ok` を上書きできる。
|
|
178
188
|
# ------------------------------------------------------------------
|
|
179
189
|
|
|
190
|
+
# v1.8.2: thinking: true は doctor probe (num_ctx / streaming) が reasoning
|
|
191
|
+
# トークン消費分の max_tokens 余裕を確保するためのヒント。Qwen3 系は
|
|
192
|
+
# /think モードで thinking トークンを吐く設計なので true 宣言。
|
|
180
193
|
- match: "qwen3.6:*"
|
|
181
194
|
kind: openai_compat
|
|
182
195
|
capabilities:
|
|
183
196
|
tools: true
|
|
184
|
-
|
|
197
|
+
thinking: true
|
|
185
198
|
|
|
186
199
|
- match: "qwen/qwen3.6-*"
|
|
187
200
|
kind: openai_compat
|
|
188
201
|
capabilities:
|
|
189
202
|
tools: true
|
|
190
|
-
|
|
203
|
+
thinking: true
|
|
191
204
|
|
|
192
205
|
# ------------------------------------------------------------------
|
|
193
|
-
# Gemma 4 family (v1.7-B
|
|
206
|
+
# Gemma 4 family (v1.7-B 追加、v1.8.2 で thinking: true 宣言)
|
|
194
207
|
#
|
|
195
208
|
# Google 公式 Gemma 4。Ollama 公式 tag は gemma4:e2b / e4b / 26b / 31b、
|
|
196
209
|
# 全 variant が tools+vision+thinking 対応、E2B/E4B は audio もサポート。
|
|
197
210
|
# MoE (26b は active 3.8B / total 25.2B)。note 記事で「日常・バランスの
|
|
198
211
|
# 王者」と評価。Claude Haiku 互換性に近い簡潔な応答スタイル。
|
|
212
|
+
#
|
|
213
|
+
# v1.8.2 (2026-04-26): 実機検証 (M3 Max 64GB / Ollama 0.21.2 / gemma4:26b)
|
|
214
|
+
# で `reasoning` フィールドにかなりの量のトークンを吐く thinking モデル
|
|
215
|
+
# と確認。doctor probe の max_tokens=32 / 128 が thinking トークンに
|
|
216
|
+
# 食い切られて偽陽性 NEEDS_TUNING を出していた。registry で
|
|
217
|
+
# `thinking: true` を宣言すると doctor が probe バジェットを 1024 まで
|
|
218
|
+
# 引き上げて偽陽性を回避する。
|
|
199
219
|
# ------------------------------------------------------------------
|
|
200
220
|
|
|
201
221
|
- match: "gemma4:*"
|
|
202
222
|
kind: openai_compat
|
|
203
223
|
capabilities:
|
|
204
224
|
tools: true
|
|
225
|
+
thinking: true
|
|
205
226
|
|
|
206
227
|
- match: "google/gemma-4*"
|
|
207
228
|
kind: openai_compat
|
|
208
229
|
capabilities:
|
|
209
230
|
tools: true
|
|
231
|
+
thinking: true
|
|
210
232
|
|
|
211
233
|
# ------------------------------------------------------------------
|
|
212
234
|
# GLM family (Z.AI / Zhipu AI、v1.7-B 追加)
|
|
@@ -233,3 +255,99 @@ rules:
|
|
|
233
255
|
kind: openai_compat
|
|
234
256
|
capabilities:
|
|
235
257
|
tools: true
|
|
258
|
+
|
|
259
|
+
# ------------------------------------------------------------------
|
|
260
|
+
# Kimi K2 family (Moonshot AI、v1.8.0 追加)
|
|
261
|
+
#
|
|
262
|
+
# NVIDIA NIM 経由で実機検証済み (2026-04-23) の tool-capable モデル。
|
|
263
|
+
# examples/providers.nvidia-nim.yaml の `nim-kimi-k2` / `nim-kimi-k2-thinking`
|
|
264
|
+
# で運用実績あり。Unsloth tool-calling guide にも tool calling 対応モデル
|
|
265
|
+
# として掲載 (Kimi K2.5 / K2 Thinking)。providers.yaml 側で個別の
|
|
266
|
+
# `capabilities.tools: true` 宣言を省略可能にするのが目的。
|
|
267
|
+
# ------------------------------------------------------------------
|
|
268
|
+
|
|
269
|
+
- match: "moonshotai/kimi-k2*"
|
|
270
|
+
kind: openai_compat
|
|
271
|
+
capabilities:
|
|
272
|
+
tools: true
|
|
273
|
+
|
|
274
|
+
- match: "moonshotai/Kimi-K2*"
|
|
275
|
+
kind: openai_compat
|
|
276
|
+
capabilities:
|
|
277
|
+
tools: true
|
|
278
|
+
|
|
279
|
+
# ------------------------------------------------------------------
|
|
280
|
+
# gpt-oss family (OpenAI 117B MoE オープンウェイト、v1.8.0 追加)
|
|
281
|
+
#
|
|
282
|
+
# OpenRouter free 経由で実機検証済み (`openai/gpt-oss-120b:free` を
|
|
283
|
+
# examples/providers.yaml の `openrouter-gpt-oss-free` で運用)。
|
|
284
|
+
# native tool calling 設計、131K context、Unsloth tool-calling guide
|
|
285
|
+
# にも tool calling 対応モデルとして掲載 (gpt-oss)。
|
|
286
|
+
# ------------------------------------------------------------------
|
|
287
|
+
|
|
288
|
+
- match: "openai/gpt-oss-*"
|
|
289
|
+
kind: openai_compat
|
|
290
|
+
capabilities:
|
|
291
|
+
tools: true
|
|
292
|
+
|
|
293
|
+
- match: "gpt-oss-*"
|
|
294
|
+
kind: openai_compat
|
|
295
|
+
capabilities:
|
|
296
|
+
tools: true
|
|
297
|
+
|
|
298
|
+
# ------------------------------------------------------------------
|
|
299
|
+
# 先回り宣言 family (Unsloth tool-calling guide 掲載、v1.8.0 追加)
|
|
300
|
+
#
|
|
301
|
+
# Unsloth のローカル LLM tool-calling ガイド
|
|
302
|
+
# (https://unsloth.ai/docs/jp/ji-ben/tool-calling-guide-for-local-llms)
|
|
303
|
+
# で tool-calling 対応モデルとして掲載されているが、CodeRouter 側で
|
|
304
|
+
# 実機検証は未実施。tools=true の事前宣言だけ入れて、providers.yaml で
|
|
305
|
+
# これらを使う際の `capabilities.tools: true` 明示宣言を不要にする。
|
|
306
|
+
# claude_code_suitability は実機検証後に追加判断 — それまでは "意見なし"。
|
|
307
|
+
# 不具合があれば user-side の `~/.coderouter/model-capabilities.yaml` で
|
|
308
|
+
# `tools: false` を declare して上書き可能 (first-match-per-flag)。
|
|
309
|
+
# ------------------------------------------------------------------
|
|
310
|
+
|
|
311
|
+
# DeepSeek-V3.x — DeepSeek-AI の主力 (V3.1 / V3.2 等)
|
|
312
|
+
- match: "deepseek-ai/DeepSeek-V3*"
|
|
313
|
+
kind: openai_compat
|
|
314
|
+
capabilities:
|
|
315
|
+
tools: true
|
|
316
|
+
|
|
317
|
+
- match: "deepseek/deepseek-v3*"
|
|
318
|
+
kind: openai_compat
|
|
319
|
+
capabilities:
|
|
320
|
+
tools: true
|
|
321
|
+
|
|
322
|
+
# MiniMax — MiniMaxAI の MoE 系
|
|
323
|
+
- match: "MiniMaxAI/MiniMax-*"
|
|
324
|
+
kind: openai_compat
|
|
325
|
+
capabilities:
|
|
326
|
+
tools: true
|
|
327
|
+
|
|
328
|
+
- match: "minimax/minimax-*"
|
|
329
|
+
kind: openai_compat
|
|
330
|
+
capabilities:
|
|
331
|
+
tools: true
|
|
332
|
+
|
|
333
|
+
# NVIDIA Nemotron 3 — Nano 系の小型モデル
|
|
334
|
+
- match: "nvidia/nemotron-3-*"
|
|
335
|
+
kind: openai_compat
|
|
336
|
+
capabilities:
|
|
337
|
+
tools: true
|
|
338
|
+
|
|
339
|
+
- match: "nvidia/Nemotron-3-*"
|
|
340
|
+
kind: openai_compat
|
|
341
|
+
capabilities:
|
|
342
|
+
tools: true
|
|
343
|
+
|
|
344
|
+
# Devstral 2 — Mistral AI の coding 特化 fine-tune
|
|
345
|
+
- match: "mistralai/Devstral-*"
|
|
346
|
+
kind: openai_compat
|
|
347
|
+
capabilities:
|
|
348
|
+
tools: true
|
|
349
|
+
|
|
350
|
+
- match: "mistral/devstral*"
|
|
351
|
+
kind: openai_compat
|
|
352
|
+
capabilities:
|
|
353
|
+
tools: true
|
coderouter/doctor.py
CHANGED
|
@@ -433,6 +433,31 @@ _STREAMING_PROBE_USER_PROMPT = (
|
|
|
433
433
|
# truncated". "1\n2\n...\n30" is ~80 chars; 40 chars covers the halfway
|
|
434
434
|
# mark (1..20) which is already obviously-truncated territory.
|
|
435
435
|
_STREAMING_PROBE_MIN_EXPECTED_CHARS = 40
|
|
436
|
+
|
|
437
|
+
# v1.8.2: probe response budgets.
|
|
438
|
+
#
|
|
439
|
+
# Both num_ctx and streaming probes ask the model for a *short* answer
|
|
440
|
+
# (the canary token / "1..30"). The original budgets (32 / 128 tokens)
|
|
441
|
+
# assumed a non-thinking model that emits the answer immediately. On a
|
|
442
|
+
# thinking model — Gemma 4 26B, Qwen3.6, gpt-oss, deepseek-r1 — the
|
|
443
|
+
# upstream burns the entire budget on a hidden ``reasoning`` field
|
|
444
|
+
# *before* emitting any visible ``content``, producing a false-positive
|
|
445
|
+
# NEEDS_TUNING (canary missing / 0 chars streamed). Bumping the budget
|
|
446
|
+
# is the cleanest fix: non-thinking models stop early at their natural
|
|
447
|
+
# stop token (no waste), thinking models get headroom for the reasoning
|
|
448
|
+
# trace plus the actual answer.
|
|
449
|
+
#
|
|
450
|
+
# Numbers picked from the v1.8.1 reality-check session
|
|
451
|
+
# (docs/articles/note-v1-8-1-reality-check.md):
|
|
452
|
+
# * Gemma 4 26B reasoning prefix observed at ~150-300 tokens before
|
|
453
|
+
# content starts → 1024 covers reasoning + 30-line count comfortably.
|
|
454
|
+
# * Non-thinking baseline kept conservative-but-non-tight (256/512) to
|
|
455
|
+
# absorb stylistic preambles ("Sure, the answer is...") without
|
|
456
|
+
# burning extra cloud quota when the operator probes a paid endpoint.
|
|
457
|
+
_NUM_CTX_PROBE_MAX_TOKENS_DEFAULT = 256
|
|
458
|
+
_NUM_CTX_PROBE_MAX_TOKENS_THINKING = 1024
|
|
459
|
+
_STREAMING_PROBE_MAX_TOKENS_DEFAULT = 512
|
|
460
|
+
_STREAMING_PROBE_MAX_TOKENS_THINKING = 1024
|
|
436
461
|
# Default ``num_predict`` suggested in the emitted patch. -1 would be
|
|
437
462
|
# optimal (uncapped) but "4096" communicates intent more clearly to
|
|
438
463
|
# operators unfamiliar with Ollama's sentinel value, and covers Claude
|
|
@@ -475,6 +500,42 @@ def _declared_num_ctx(provider: ProviderConfig) -> int | None:
|
|
|
475
500
|
return val if isinstance(val, int) else None
|
|
476
501
|
|
|
477
502
|
|
|
503
|
+
def _is_reasoning_model(
|
|
504
|
+
provider: ProviderConfig, resolved: ResolvedCapabilities
|
|
505
|
+
) -> bool:
|
|
506
|
+
"""v1.8.2: True iff the model is known to emit a hidden reasoning trace.
|
|
507
|
+
|
|
508
|
+
Thinking models (Gemma 4, Qwen3-with-/think, gpt-oss, deepseek-r1,
|
|
509
|
+
Claude Sonnet 4.5+ in extended-thinking mode) burn output tokens on a
|
|
510
|
+
``reasoning`` field before any visible ``content`` is produced. The
|
|
511
|
+
num_ctx / streaming probes use small response budgets that get fully
|
|
512
|
+
consumed by the reasoning prefix, producing a false-positive
|
|
513
|
+
NEEDS_TUNING. Callers use this to choose a generous probe budget.
|
|
514
|
+
|
|
515
|
+
Three signals fire:
|
|
516
|
+
* provider declared ``capabilities.thinking: true`` in providers.yaml
|
|
517
|
+
* provider declared ``capabilities.reasoning_passthrough: true``
|
|
518
|
+
(the operator opted in to passing the raw reasoning to the client,
|
|
519
|
+
which is only meaningful for models that emit it)
|
|
520
|
+
* registry resolved ``thinking: true`` for this (kind, model) pair
|
|
521
|
+
|
|
522
|
+
Conservative bias — when both provider declaration and registry are
|
|
523
|
+
silent, treat as non-reasoning. The probe still completes for thinking
|
|
524
|
+
models in that case (they just hit ``finish_reason='length'`` like
|
|
525
|
+
they did pre-v1.8.2), but at least the new generous default budget
|
|
526
|
+
(256 / 512) gives more headroom than the old 32 / 128.
|
|
527
|
+
"""
|
|
528
|
+
if provider.capabilities.thinking is True:
|
|
529
|
+
return True
|
|
530
|
+
if provider.capabilities.reasoning_passthrough is True:
|
|
531
|
+
return True
|
|
532
|
+
if resolved.thinking is True:
|
|
533
|
+
return True
|
|
534
|
+
if resolved.reasoning_passthrough is True:
|
|
535
|
+
return True
|
|
536
|
+
return False
|
|
537
|
+
|
|
538
|
+
|
|
478
539
|
_PROBE_BASIC_USER_PROMPT = "Reply with exactly the single word: PONG"
|
|
479
540
|
_PROBE_TOOLS_USER_PROMPT = (
|
|
480
541
|
"You have one tool named `echo`. Call it with the argument "
|
|
@@ -617,7 +678,9 @@ def _extract_openai_assistant_choice(
|
|
|
617
678
|
return msg if isinstance(msg, dict) else None
|
|
618
679
|
|
|
619
680
|
|
|
620
|
-
async def _probe_num_ctx(
|
|
681
|
+
async def _probe_num_ctx(
|
|
682
|
+
provider: ProviderConfig, resolved: ResolvedCapabilities
|
|
683
|
+
) -> ProbeResult:
|
|
621
684
|
"""v1.0-B Probe — direct detection of Ollama ``num_ctx`` truncation.
|
|
622
685
|
|
|
623
686
|
Addresses plan.md §9.4 symptom #1 (空応答 / 意味不明応答). Prior to
|
|
@@ -683,11 +746,21 @@ async def _probe_num_ctx(provider: ProviderConfig) -> ProbeResult:
|
|
|
683
746
|
# whatever ``options.num_ctx`` the operator has declared. Request
|
|
684
747
|
# fields win over extra_body, matching the adapter's merge order.
|
|
685
748
|
body: dict[str, Any] = dict(provider.extra_body)
|
|
749
|
+
# v1.8.2: thinking models burn output tokens on a hidden ``reasoning``
|
|
750
|
+
# trace before emitting any ``content``. The pre-v1.8.2 default of 32
|
|
751
|
+
# was tight for any preamble at all; on Gemma 4 26B it caused
|
|
752
|
+
# ``finish_reason='length'`` with content="" before the canary could
|
|
753
|
+
# surface, producing a false-positive NEEDS_TUNING.
|
|
754
|
+
max_tokens = (
|
|
755
|
+
_NUM_CTX_PROBE_MAX_TOKENS_THINKING
|
|
756
|
+
if _is_reasoning_model(provider, resolved)
|
|
757
|
+
else _NUM_CTX_PROBE_MAX_TOKENS_DEFAULT
|
|
758
|
+
)
|
|
686
759
|
body.update(
|
|
687
760
|
{
|
|
688
761
|
"model": provider.model,
|
|
689
762
|
"messages": [{"role": "user", "content": user_prompt}],
|
|
690
|
-
"max_tokens":
|
|
763
|
+
"max_tokens": max_tokens,
|
|
691
764
|
"temperature": 0,
|
|
692
765
|
}
|
|
693
766
|
)
|
|
@@ -799,7 +872,9 @@ async def _probe_num_ctx(provider: ProviderConfig) -> ProbeResult:
|
|
|
799
872
|
)
|
|
800
873
|
|
|
801
874
|
|
|
802
|
-
async def _probe_streaming(
|
|
875
|
+
async def _probe_streaming(
|
|
876
|
+
provider: ProviderConfig, resolved: ResolvedCapabilities
|
|
877
|
+
) -> ProbeResult:
|
|
803
878
|
"""v1.0-C Probe — streaming completion path integrity.
|
|
804
879
|
|
|
805
880
|
Addresses plan.md §9.4 symptom #1 from the **output** side. The v1.0-B
|
|
@@ -868,11 +943,18 @@ async def _probe_streaming(provider: ProviderConfig) -> ProbeResult:
|
|
|
868
943
|
# probing. Top-level probe fields win on collision, matching adapter
|
|
869
944
|
# merge order.
|
|
870
945
|
body: dict[str, Any] = dict(provider.extra_body)
|
|
946
|
+
# v1.8.2: same thinking-model rationale as num_ctx probe — give
|
|
947
|
+
# reasoning a budget so the visible content has a chance to surface.
|
|
948
|
+
max_tokens = (
|
|
949
|
+
_STREAMING_PROBE_MAX_TOKENS_THINKING
|
|
950
|
+
if _is_reasoning_model(provider, resolved)
|
|
951
|
+
else _STREAMING_PROBE_MAX_TOKENS_DEFAULT
|
|
952
|
+
)
|
|
871
953
|
body.update(
|
|
872
954
|
{
|
|
873
955
|
"model": provider.model,
|
|
874
956
|
"messages": [{"role": "user", "content": _STREAMING_PROBE_USER_PROMPT}],
|
|
875
|
-
"max_tokens":
|
|
957
|
+
"max_tokens": max_tokens,
|
|
876
958
|
"temperature": 0,
|
|
877
959
|
"stream": True,
|
|
878
960
|
}
|
|
@@ -1506,11 +1588,11 @@ async def check_model(
|
|
|
1506
1588
|
# declaration probes (tool_calls / thinking / reasoning-leak) should
|
|
1507
1589
|
# dominate the report — streaming is the output-side sibling of
|
|
1508
1590
|
# num_ctx and its NEEDS_TUNING verdict is orthogonal to the others.
|
|
1509
|
-
report.results.append(await _probe_num_ctx(provider))
|
|
1591
|
+
report.results.append(await _probe_num_ctx(provider, resolved))
|
|
1510
1592
|
report.results.append(await _probe_tool_calls(provider, resolved))
|
|
1511
1593
|
report.results.append(await _probe_thinking(provider, resolved))
|
|
1512
1594
|
report.results.append(await _probe_reasoning_leak(provider, resolved))
|
|
1513
|
-
report.results.append(await _probe_streaming(provider))
|
|
1595
|
+
report.results.append(await _probe_streaming(provider, resolved))
|
|
1514
1596
|
return report
|
|
1515
1597
|
|
|
1516
1598
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coderouter-cli
|
|
3
|
-
Version: 1.8.
|
|
3
|
+
Version: 1.8.2
|
|
4
4
|
Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
|
|
5
5
|
Project-URL: Homepage, https://github.com/zephel01/CodeRouter
|
|
6
6
|
Project-URL: Repository, https://github.com/zephel01/CodeRouter
|
|
@@ -2,7 +2,7 @@ coderouter/__init__.py,sha256=ghdjPrLtnRzY8fyQ4CJZI1UJKADyNTLtA3G7se8H7Ns,696
|
|
|
2
2
|
coderouter/__main__.py,sha256=-LCgxJnvgUV240HjQKv7ly-mn2NuKHpC4nCpvTHjeSU,130
|
|
3
3
|
coderouter/cli.py,sha256=vI1-dv10t4-xG6Zpt7zi_3U8xGgq54Qa8XIMUYpfOV8,19859
|
|
4
4
|
coderouter/cli_stats.py,sha256=ae20xUr_hjX09Ms3fBZGZsUS52o44JC57EpbWLBOCO0,27750
|
|
5
|
-
coderouter/doctor.py,sha256=
|
|
5
|
+
coderouter/doctor.py,sha256=atYOr73LLI3lKHjhFDY0lea41_0jolfiY2zb15La_O8,68116
|
|
6
6
|
coderouter/doctor_apply.py,sha256=r_J6xbu5-HivofPNriw4_vjNYs_VRs7GsGTS0oMEX10,24209
|
|
7
7
|
coderouter/env_security.py,sha256=FEBZnXfJ0xE39kmMMn39zk0W_DRRnmcB_REmP9f4xWo,14796
|
|
8
8
|
coderouter/errors.py,sha256=Xmq67lheyw8iv3Ox39jh2c4tvNI5RcUR4QkoxVDN6l4,1130
|
|
@@ -16,10 +16,10 @@ coderouter/adapters/registry.py,sha256=Syt3eDljWZAK5mfiJGvUMKaZYAfCRScp7PvV6pYt7
|
|
|
16
16
|
coderouter/config/__init__.py,sha256=FODEn74fN-qZnt4INPSHswqhOlEgpL6-_onxsitSx8g,274
|
|
17
17
|
coderouter/config/capability_registry.py,sha256=oypl6Z-YjvNoC87AdSIm1C7XE_MZoFq_7Ivm3eRH3cI,14379
|
|
18
18
|
coderouter/config/env_file.py,sha256=CoMK27fuAXm-NtoLzXb8yN2E-wDFjHQuFwiIlmgTBQw,10356
|
|
19
|
-
coderouter/config/loader.py,sha256=
|
|
19
|
+
coderouter/config/loader.py,sha256=FUEe8m4Tnmj_aul0vSctD8vKvNW-oLRoMRbTpSKqSmc,4077
|
|
20
20
|
coderouter/config/schemas.py,sha256=NMWKstAzDKc6DT7qKr-GKlUFrsfE4HtEG7zkz9Pnq78,21597
|
|
21
21
|
coderouter/data/__init__.py,sha256=uNyfD9jaCvTWsBAWtaw1Fr25OSxzv3psGMfBjT1z0Cc,328
|
|
22
|
-
coderouter/data/model-capabilities.yaml,sha256=
|
|
22
|
+
coderouter/data/model-capabilities.yaml,sha256=9f9CDIoWBJYtppMRh1BOS8FzRKzICV4uT-uANovGmzs,14718
|
|
23
23
|
coderouter/ingress/__init__.py,sha256=WQsCH2CGJCAhy0mS6GSEdeYZRkkQu2OHDsP4CJWTLug,155
|
|
24
24
|
coderouter/ingress/anthropic_routes.py,sha256=vuylsn7klFN-Dz3cBS7LrhnnSRGr6agipgMrr9gxq7k,8261
|
|
25
25
|
coderouter/ingress/app.py,sha256=b9s6NpBGckaTQJCaw-6MZaY7Eq3rxgtX_cCkgWq-0Ig,6620
|
|
@@ -37,8 +37,8 @@ coderouter/translation/__init__.py,sha256=PYXN7XVEwpG1uC8RLy6fvnGbzEZhhrEuUapH8I
|
|
|
37
37
|
coderouter/translation/anthropic.py,sha256=JpvIWNXHUPVqOGvps7o_6ZADhXuJuvpU7RdMqQFtwwM,6421
|
|
38
38
|
coderouter/translation/convert.py,sha256=-qyzFzmmr9hhQV6_Sg75kJnvCZvHe3n7vRdaZtk_JqQ,47269
|
|
39
39
|
coderouter/translation/tool_repair.py,sha256=fyxDb4kWHytO5JWq5y0i4tinJUtWqhMCkyfoCf5BjeM,8314
|
|
40
|
-
coderouter_cli-1.8.
|
|
41
|
-
coderouter_cli-1.8.
|
|
42
|
-
coderouter_cli-1.8.
|
|
43
|
-
coderouter_cli-1.8.
|
|
44
|
-
coderouter_cli-1.8.
|
|
40
|
+
coderouter_cli-1.8.2.dist-info/METADATA,sha256=MBkIOwnySR2wfw-JhBtGFipk_MqbV8J_4aAqWZ65A7g,44136
|
|
41
|
+
coderouter_cli-1.8.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
42
|
+
coderouter_cli-1.8.2.dist-info/entry_points.txt,sha256=-dnLfD1YZ2WjH2zSdNCvlO65wYltM9bsHt9Fhg3yGss,51
|
|
43
|
+
coderouter_cli-1.8.2.dist-info/licenses/LICENSE,sha256=wkEzoR86jFw33jvfOHjULqmkGEfxTFMgMaJnpR8mPRw,1065
|
|
44
|
+
coderouter_cli-1.8.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|