coderouter-cli 2.5.4__py3-none-any.whl → 2.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderouter/config/schemas.py +31 -0
- coderouter/cost.py +32 -0
- coderouter/ingress/dashboard_routes.py +42 -0
- coderouter/language_tax.py +244 -0
- coderouter/logging.py +8 -0
- coderouter/metrics/collector.py +45 -0
- coderouter/routing/auto_router.py +7 -0
- coderouter/routing/fallback.py +30 -0
- coderouter/token_estimation.py +47 -0
- coderouter/translation/anthropic.py +124 -1
- {coderouter_cli-2.5.4.dist-info → coderouter_cli-2.6.0.dist-info}/METADATA +1 -1
- {coderouter_cli-2.5.4.dist-info → coderouter_cli-2.6.0.dist-info}/RECORD +15 -14
- {coderouter_cli-2.5.4.dist-info → coderouter_cli-2.6.0.dist-info}/WHEEL +0 -0
- {coderouter_cli-2.5.4.dist-info → coderouter_cli-2.6.0.dist-info}/entry_points.txt +0 -0
- {coderouter_cli-2.5.4.dist-info → coderouter_cli-2.6.0.dist-info}/licenses/LICENSE +0 -0
coderouter/config/schemas.py
CHANGED
|
@@ -185,6 +185,19 @@ class ProviderConfig(BaseModel):
|
|
|
185
185
|
)
|
|
186
186
|
timeout_s: float = Field(default=30.0, ge=1.0, le=600.0)
|
|
187
187
|
|
|
188
|
+
# v2.6 language-tax track: path to a LOCAL ``tokenizer.json`` for this
|
|
189
|
+
# provider's model, used to measure the CJK over-count vs the char/4
|
|
190
|
+
# baseline (see ``coderouter.language_tax``). Loaded local-file-only —
|
|
191
|
+
# never contacts the HuggingFace Hub. When unset, language-tax falls
|
|
192
|
+
# back to char/4 (multiplier 1.0) and the feature is silently inert.
|
|
193
|
+
tokenizer_path: str | None = Field(
|
|
194
|
+
default=None,
|
|
195
|
+
description=(
|
|
196
|
+
"Local tokenizer.json for accurate (language-tax) token "
|
|
197
|
+
"counting. No network access. Requires the 'accuracy' extra."
|
|
198
|
+
),
|
|
199
|
+
)
|
|
200
|
+
|
|
188
201
|
# Provider-specific extras merged into the outbound request body.
|
|
189
202
|
# Use for non-standard fields like Ollama's `think: false`, `keep_alive`,
|
|
190
203
|
# `options.num_ctx`, or any vendor-specific toggle. User-supplied request
|
|
@@ -763,6 +776,16 @@ class RuleMatcher(BaseModel):
|
|
|
763
776
|
``request.tools`` set). The ``has_tools`` matcher is the
|
|
764
777
|
profile-level lever for steering tool-laden traffic to the right
|
|
765
778
|
chain entirely.
|
|
779
|
+
|
|
780
|
+
Variants (v2.6 / language-tax routing):
|
|
781
|
+
|
|
782
|
+
- ``cjk_ratio_min: 0.3`` — CJK character ratio of the latest user
|
|
783
|
+
message is ``>=`` this threshold. Routes CJK-heavy turns (which
|
|
784
|
+
pay the cloud "language tax" of ~1.2-1.5x more tokens) to a local
|
|
785
|
+
model that bills nothing per token, while ASCII/code turns fall
|
|
786
|
+
through to the cloud chain. Per-turn property like
|
|
787
|
+
``code_fence_ratio_min``; see
|
|
788
|
+
:func:`coderouter.language_tax.cjk_char_ratio`.
|
|
766
789
|
"""
|
|
767
790
|
|
|
768
791
|
model_config = ConfigDict(extra="forbid")
|
|
@@ -773,6 +796,13 @@ class RuleMatcher(BaseModel):
|
|
|
773
796
|
content_regex: str | None = None
|
|
774
797
|
model_pattern: str | None = None
|
|
775
798
|
content_token_count_min: int | None = Field(default=None, ge=1)
|
|
799
|
+
# v2.6 language-tax routing: CJK character ratio of the latest user
|
|
800
|
+
# message >= this threshold. Lets operators steer CJK-heavy traffic
|
|
801
|
+
# (which carries the cloud language tax) to a local model that bills
|
|
802
|
+
# nothing per token. Operates on the latest user message like
|
|
803
|
+
# ``code_fence_ratio_min`` (a per-turn property), not the whole
|
|
804
|
+
# request. See ``coderouter.language_tax.cjk_char_ratio``.
|
|
805
|
+
cjk_ratio_min: float | None = Field(default=None, ge=0.0, le=1.0)
|
|
776
806
|
# [Unreleased]: tool-aware routing (OpenClaw + Raspberry Pi 由来).
|
|
777
807
|
# See class docstring "Variants ([Unreleased] / tool-aware routing)"
|
|
778
808
|
# above for the full rationale. Boolean shape mirrors ``has_image`` —
|
|
@@ -789,6 +819,7 @@ class RuleMatcher(BaseModel):
|
|
|
789
819
|
"model_pattern",
|
|
790
820
|
"content_token_count_min",
|
|
791
821
|
"has_tools",
|
|
822
|
+
"cjk_ratio_min",
|
|
792
823
|
)
|
|
793
824
|
|
|
794
825
|
@model_validator(mode="after")
|
coderouter/cost.py
CHANGED
|
@@ -58,9 +58,13 @@ in the cost calc.
|
|
|
58
58
|
from __future__ import annotations
|
|
59
59
|
|
|
60
60
|
from dataclasses import dataclass
|
|
61
|
+
from typing import TYPE_CHECKING
|
|
61
62
|
|
|
62
63
|
from coderouter.config.schemas import CostConfig
|
|
63
64
|
|
|
65
|
+
if TYPE_CHECKING: # avoid an import cycle at runtime; used only for typing
|
|
66
|
+
from coderouter.language_tax import LanguageTaxBreakdown
|
|
67
|
+
|
|
64
68
|
|
|
65
69
|
@dataclass(frozen=True)
|
|
66
70
|
class CostBreakdown:
|
|
@@ -82,6 +86,12 @@ class CostBreakdown:
|
|
|
82
86
|
chart. ``input_usd`` is "fresh input only" (does not
|
|
83
87
|
include cache buckets); cache_read_usd / cache_creation_usd
|
|
84
88
|
are the post-discount / post-premium values.
|
|
89
|
+
language_tax_multiplier: ``tokens_accurate / tokens_heuristic``
|
|
90
|
+
for the request text (v2.6 language-tax track). 1.0 when no
|
|
91
|
+
tax is measurable (English/code, or no accurate tokenizer).
|
|
92
|
+
language_tax_usd: USD share of ``total_usd`` attributable to the
|
|
93
|
+
CJK over-count vs CodeRouter's char/4 English baseline.
|
|
94
|
+
0.0 for free / local providers. See :mod:`coderouter.language_tax`.
|
|
85
95
|
"""
|
|
86
96
|
|
|
87
97
|
total_usd: float = 0.0
|
|
@@ -90,6 +100,10 @@ class CostBreakdown:
|
|
|
90
100
|
output_usd: float = 0.0
|
|
91
101
|
cache_read_usd: float = 0.0
|
|
92
102
|
cache_creation_usd: float = 0.0
|
|
103
|
+
# v2.6 language-tax track (additive; defaults keep pre-v2.6 behaviour
|
|
104
|
+
# and equality with a bare ``CostBreakdown()``).
|
|
105
|
+
language_tax_multiplier: float = 1.0
|
|
106
|
+
language_tax_usd: float = 0.0
|
|
93
107
|
|
|
94
108
|
|
|
95
109
|
_PER_MILLION: float = 1_000_000.0
|
|
@@ -102,6 +116,7 @@ def compute_cost_for_attempt(
|
|
|
102
116
|
output_tokens: int,
|
|
103
117
|
cache_read_input_tokens: int,
|
|
104
118
|
cache_creation_input_tokens: int,
|
|
119
|
+
language_tax: LanguageTaxBreakdown | None = None,
|
|
105
120
|
) -> CostBreakdown:
|
|
106
121
|
"""Translate per-attempt token counts into a USD :class:`CostBreakdown`.
|
|
107
122
|
|
|
@@ -144,6 +159,21 @@ def compute_cost_for_attempt(
|
|
|
144
159
|
full_rate_for_cache_read = safe_read * input_rate
|
|
145
160
|
savings_usd = full_rate_for_cache_read - cache_read_usd
|
|
146
161
|
|
|
162
|
+
# v2.6 language tax: the share of fresh-input spend attributable to
|
|
163
|
+
# the CJK over-count vs the char/4 English baseline. Defaults to a
|
|
164
|
+
# 1.0 multiplier / $0 when no LanguageTaxBreakdown is supplied, so
|
|
165
|
+
# the pre-v2.6 call shape is unchanged.
|
|
166
|
+
lt_multiplier = 1.0
|
|
167
|
+
lt_usd = 0.0
|
|
168
|
+
if language_tax is not None:
|
|
169
|
+
lt_multiplier = language_tax.tax_multiplier
|
|
170
|
+
from coderouter.language_tax import language_tax_usd
|
|
171
|
+
|
|
172
|
+
lt_usd = language_tax_usd(
|
|
173
|
+
language_tax.extra_tokens,
|
|
174
|
+
input_tokens_per_million=cost_config.input_tokens_per_million,
|
|
175
|
+
)
|
|
176
|
+
|
|
147
177
|
return CostBreakdown(
|
|
148
178
|
total_usd=total_usd,
|
|
149
179
|
savings_usd=max(savings_usd, 0.0),
|
|
@@ -151,4 +181,6 @@ def compute_cost_for_attempt(
|
|
|
151
181
|
output_usd=output_usd,
|
|
152
182
|
cache_read_usd=cache_read_usd,
|
|
153
183
|
cache_creation_usd=cache_creation_usd,
|
|
184
|
+
language_tax_multiplier=lt_multiplier,
|
|
185
|
+
language_tax_usd=lt_usd,
|
|
154
186
|
)
|
|
@@ -165,6 +165,26 @@ _DASHBOARD_HTML = r"""<!doctype html>
|
|
|
165
165
|
</main>
|
|
166
166
|
|
|
167
167
|
<footer class="max-w-7xl mx-auto px-4 md:px-6 pb-8">
|
|
168
|
+
<!-- Panel: Cost & Language Tax (v2.6) -->
|
|
169
|
+
<section class="bg-slate-900/60 border border-slate-800 rounded-lg p-4 mb-4">
|
|
170
|
+
<h2 class="text-sm font-semibold uppercase tracking-wider text-slate-400 mb-3">Cost & Language Tax</h2>
|
|
171
|
+
<div class="grid grid-cols-3 gap-3">
|
|
172
|
+
<div class="rounded-md bg-slate-800/50 p-3">
|
|
173
|
+
<div class="text-xs text-slate-400">Total spend</div>
|
|
174
|
+
<div class="text-2xl font-semibold tabnum" data-bind="cost_total">$0.00</div>
|
|
175
|
+
</div>
|
|
176
|
+
<div class="rounded-md bg-slate-800/50 p-3">
|
|
177
|
+
<div class="text-xs text-slate-400">Cache savings</div>
|
|
178
|
+
<div class="text-2xl font-semibold tabnum text-green-400" data-bind="cost_savings">$0.00</div>
|
|
179
|
+
</div>
|
|
180
|
+
<div class="rounded-md bg-slate-800/50 p-3">
|
|
181
|
+
<div class="text-xs text-slate-400">Language tax (CJK)</div>
|
|
182
|
+
<div class="text-2xl font-semibold tabnum text-amber-400" data-bind="language_tax_total">$0.00</div>
|
|
183
|
+
<div class="text-xs text-slate-500" data-bind="language_tax_hint">no tokenizer configured</div>
|
|
184
|
+
</div>
|
|
185
|
+
</div>
|
|
186
|
+
<div id="language-tax-by-provider" class="text-xs text-slate-400 tabnum mt-3"></div>
|
|
187
|
+
</section>
|
|
168
188
|
<section class="bg-slate-900/60 border border-slate-800 rounded-lg p-4">
|
|
169
189
|
<h2 class="text-sm font-semibold uppercase tracking-wider text-slate-400 mb-3">Usage Mix</h2>
|
|
170
190
|
<div id="usage-bar" class="flex h-3 rounded-full overflow-hidden bg-slate-800" role="img" aria-label="usage mix"></div>
|
|
@@ -435,6 +455,27 @@ _DASHBOARD_HTML = r"""<!doctype html>
|
|
|
435
455
|
{"&": "&", "<": "<", ">": ">", '"': """, "'": "'"}[c]
|
|
436
456
|
));
|
|
437
457
|
|
|
458
|
+
// v2.6: cost + language-tax panel. The collector zero-fills these, so
|
|
459
|
+
// a fresh/local-only deployment shows $0.00 across the board.
|
|
460
|
+
const renderCostTax = (snap) => {
|
|
461
|
+
const c = snap.counters || {};
|
|
462
|
+
const usd = (x) => "$" + (Number(x) || 0).toFixed(4);
|
|
463
|
+
setBind("cost_total", usd(c.cost_total_usd_aggregate));
|
|
464
|
+
setBind("cost_savings", usd(c.cost_savings_usd_aggregate));
|
|
465
|
+
const taxTotal = Number(c.language_tax_usd_aggregate) || 0;
|
|
466
|
+
setBind("language_tax_total", usd(taxTotal));
|
|
467
|
+
setBind("language_tax_hint",
|
|
468
|
+
taxTotal > 0 ? "extra paid for CJK vs char/4 baseline"
|
|
469
|
+
: "no tax measured (set provider tokenizer_path)");
|
|
470
|
+
const byProv = c.language_tax_usd || {};
|
|
471
|
+
const el = document.getElementById("language-tax-by-provider");
|
|
472
|
+
const rows = Object.entries(byProv).filter(([, v]) => Number(v) > 0);
|
|
473
|
+
el.innerHTML = rows.length === 0 ? "" :
|
|
474
|
+
rows.map(([n, v]) =>
|
|
475
|
+
'<span class="mr-4"><span class="text-slate-500">' + escapeHTML(n) +
|
|
476
|
+
'</span> ' + usd(v) + '</span>').join("");
|
|
477
|
+
};
|
|
478
|
+
|
|
438
479
|
const renderSnapshot = (snap) => {
|
|
439
480
|
const startup = snap.startup || {};
|
|
440
481
|
const cfg = snap.config || {};
|
|
@@ -451,6 +492,7 @@ _DASHBOARD_HTML = r"""<!doctype html>
|
|
|
451
492
|
renderSparkline(snap);
|
|
452
493
|
renderRecent(snap);
|
|
453
494
|
renderUsageMix(snap);
|
|
495
|
+
renderCostTax(snap);
|
|
454
496
|
};
|
|
455
497
|
|
|
456
498
|
const renderError = (msg) => {
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
"""Language-tax measurement (Phase 1 PoC, 5-deps invariant).
|
|
2
|
+
|
|
3
|
+
Why this module exists
|
|
4
|
+
======================
|
|
5
|
+
|
|
6
|
+
Cloud LLM tokenizers charge CJK text far more tokens-per-character
|
|
7
|
+
than English. CodeRouter's core router uses a ``char/4`` heuristic
|
|
8
|
+
(:mod:`coderouter.token_estimation`) which is *conservative for CJK*
|
|
9
|
+
— i.e. it **under-counts** Japanese/Chinese/Korean text. That gap is
|
|
10
|
+
the "language tax": a Japanese prompt that the heuristic prices at N
|
|
11
|
+
tokens is actually billed at ~1.2-1.5x N by the cloud provider.
|
|
12
|
+
|
|
13
|
+
Local models are unaffected (no per-token billing), so the tax only
|
|
14
|
+
matters on the cloud leg. This module quantifies it so the cost
|
|
15
|
+
tracker / dashboard can surface "how much extra am I paying to work
|
|
16
|
+
in Japanese?".
|
|
17
|
+
|
|
18
|
+
Design constraints (mirrors token_estimation_accurate.py)
|
|
19
|
+
=========================================================
|
|
20
|
+
|
|
21
|
+
* **No new core dependency.** CJK detection is pure ``str`` + Unicode
|
|
22
|
+
range checks (stdlib only). The *accurate* token count is delegated
|
|
23
|
+
to :func:`coderouter.token_estimation_accurate.count_tokens`, whose
|
|
24
|
+
precise backend (HuggingFace ``tokenizers``) is the existing
|
|
25
|
+
optional ``accuracy`` extra. When that backend is absent every
|
|
26
|
+
function still returns a sane value — the tax_multiplier simply
|
|
27
|
+
collapses to 1.0 because both legs use char/4.
|
|
28
|
+
* **Local only / no network.** No tokenizer is ever downloaded; we
|
|
29
|
+
only pass through a caller-supplied local ``tokenizer.json`` path.
|
|
30
|
+
* **Leaf module.** Imports only ``token_estimation`` /
|
|
31
|
+
``token_estimation_accurate`` (both leaves), never the engine or
|
|
32
|
+
collector — keeps it trivially testable and circular-import-free.
|
|
33
|
+
|
|
34
|
+
The tax multiplier, defined
|
|
35
|
+
===========================
|
|
36
|
+
|
|
37
|
+
``tax_multiplier = tokens_accurate / tokens_heuristic``
|
|
38
|
+
|
|
39
|
+
where ``tokens_heuristic`` is the char/4 estimate (CodeRouter's
|
|
40
|
+
English-calibrated baseline) and ``tokens_accurate`` is the real
|
|
41
|
+
tokenizer count. Reading it:
|
|
42
|
+
|
|
43
|
+
* English / code text → real tokenizers land near char/4, so the
|
|
44
|
+
multiplier is ~1.0 (no tax).
|
|
45
|
+
* Japanese prose → real tokenizers emit ~0.5-1.0 tokens/char vs the
|
|
46
|
+
0.25 the heuristic assumes, so the multiplier lands ~2.0-4.0 on
|
|
47
|
+
*pure* CJK and ~1.2-1.5 on realistic mixed coding prompts (CJK
|
|
48
|
+
comments/instructions + ASCII code/identifiers).
|
|
49
|
+
|
|
50
|
+
Confidence: **MODERATE.** char/4 is itself an approximation of
|
|
51
|
+
English, so the multiplier is "tax relative to CodeRouter's own
|
|
52
|
+
English baseline", not a lab-grade JA-vs-EN figure. It is, however,
|
|
53
|
+
fully measurable with zero network and no guessing — which is why we
|
|
54
|
+
prefer it to a translate-and-compare counterfactual.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
from __future__ import annotations
|
|
58
|
+
|
|
59
|
+
from dataclasses import dataclass
|
|
60
|
+
from pathlib import Path
|
|
61
|
+
from typing import Any
|
|
62
|
+
|
|
63
|
+
from coderouter.token_estimation import (
|
|
64
|
+
CHARS_PER_TOKEN_HEURISTIC,
|
|
65
|
+
extract_text_from_anthropic_request,
|
|
66
|
+
)
|
|
67
|
+
from coderouter.token_estimation_accurate import count_tokens
|
|
68
|
+
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
# CJK Unicode ranges
|
|
71
|
+
# ---------------------------------------------------------------------------
|
|
72
|
+
#
|
|
73
|
+
# We count a character as "CJK" when it falls in one of the blocks that
|
|
74
|
+
# real tokenizers fragment heavily. Latin, digits, punctuation and
|
|
75
|
+
# whitespace are excluded so that an ASCII-only prompt scores 0.0 and a
|
|
76
|
+
# pure-Japanese prompt scores ~1.0. Half-width katakana and full-width
|
|
77
|
+
# forms are included because they tokenize like their full-width kin.
|
|
78
|
+
#
|
|
79
|
+
# Ranges are (low, high) inclusive code points.
|
|
80
|
+
_CJK_RANGES: tuple[tuple[int, int], ...] = (
|
|
81
|
+
(0x3040, 0x309F), # Hiragana
|
|
82
|
+
(0x30A0, 0x30FF), # Katakana
|
|
83
|
+
(0x3400, 0x4DBF), # CJK Unified Ideographs Extension A
|
|
84
|
+
(0x4E00, 0x9FFF), # CJK Unified Ideographs (common Kanji/Hanzi)
|
|
85
|
+
(0xF900, 0xFAFF), # CJK Compatibility Ideographs
|
|
86
|
+
(0xFF00, 0xFFEF), # Half/Full-width forms (full-width punct, half kana)
|
|
87
|
+
(0x3000, 0x303F), # CJK symbols & punctuation (、。「」etc.)
|
|
88
|
+
(0xAC00, 0xD7A3), # Hangul syllables (Korean)
|
|
89
|
+
(0x1100, 0x11FF), # Hangul Jamo
|
|
90
|
+
(0x20000, 0x2A6DF), # CJK Ext. B (rare ideographs)
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _is_cjk(cp: int) -> bool:
|
|
95
|
+
return any(low <= cp <= high for low, high in _CJK_RANGES)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# ---------------------------------------------------------------------------
|
|
99
|
+
# Public API
|
|
100
|
+
# ---------------------------------------------------------------------------
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def cjk_char_ratio(text: str) -> float:
|
|
104
|
+
"""Fraction of *non-whitespace* characters in ``text`` that are CJK.
|
|
105
|
+
|
|
106
|
+
Whitespace is excluded from the denominator so that indentation /
|
|
107
|
+
blank lines in a code block don't dilute the score. Returns ``0.0``
|
|
108
|
+
for empty or whitespace-only / pure-ASCII text and ``1.0`` for pure
|
|
109
|
+
CJK. The value feeds the Phase-2 ``cjk_ratio_min`` auto-route
|
|
110
|
+
matcher and the Phase-1 reporting below.
|
|
111
|
+
"""
|
|
112
|
+
if not text:
|
|
113
|
+
return 0.0
|
|
114
|
+
cjk = 0
|
|
115
|
+
total = 0
|
|
116
|
+
for ch in text:
|
|
117
|
+
if ch.isspace():
|
|
118
|
+
continue
|
|
119
|
+
total += 1
|
|
120
|
+
if _is_cjk(ord(ch)):
|
|
121
|
+
cjk += 1
|
|
122
|
+
if total == 0:
|
|
123
|
+
return 0.0
|
|
124
|
+
return cjk / total
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@dataclass(frozen=True)
|
|
128
|
+
class LanguageTaxBreakdown:
|
|
129
|
+
"""Per-text language-tax measurement.
|
|
130
|
+
|
|
131
|
+
Fields
|
|
132
|
+
char_count: non-whitespace-inclusive length of the text.
|
|
133
|
+
cjk_ratio: see :func:`cjk_char_ratio` (0.0-1.0).
|
|
134
|
+
tokens_heuristic: char/4 estimate (CodeRouter's English
|
|
135
|
+
baseline). Always available.
|
|
136
|
+
tokens_accurate: real tokenizer count when a ``tokenizer_path``
|
|
137
|
+
was supplied *and* the optional backend is installed;
|
|
138
|
+
otherwise equals ``tokens_heuristic`` (graceful fallback).
|
|
139
|
+
accurate_available: whether ``tokens_accurate`` came from the
|
|
140
|
+
precise backend (True) or fell back to char/4 (False).
|
|
141
|
+
tax_multiplier: ``tokens_accurate / tokens_heuristic``; 1.0
|
|
142
|
+
when no tax is measurable. See module docstring for the
|
|
143
|
+
MODERATE-confidence caveat.
|
|
144
|
+
extra_tokens: ``tokens_accurate - tokens_heuristic`` (>= 0 for
|
|
145
|
+
CJK; the visible "tax" in tokens).
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
char_count: int = 0
|
|
149
|
+
cjk_ratio: float = 0.0
|
|
150
|
+
tokens_heuristic: int = 0
|
|
151
|
+
tokens_accurate: int = 0
|
|
152
|
+
accurate_available: bool = False
|
|
153
|
+
tax_multiplier: float = 1.0
|
|
154
|
+
extra_tokens: int = 0
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def estimate_language_tax(
|
|
158
|
+
text: str,
|
|
159
|
+
*,
|
|
160
|
+
tokenizer_path: str | Path | None = None,
|
|
161
|
+
) -> LanguageTaxBreakdown:
|
|
162
|
+
"""Measure the language tax of ``text``.
|
|
163
|
+
|
|
164
|
+
With ``tokenizer_path`` pointing at a readable local
|
|
165
|
+
``tokenizer.json`` (and the ``accuracy`` extra installed), the
|
|
166
|
+
accurate leg uses the real tokenizer and the multiplier reflects
|
|
167
|
+
the true char/4 under-count. Without it, both legs use char/4 and
|
|
168
|
+
the multiplier is 1.0 — the function never raises and never
|
|
169
|
+
touches the network.
|
|
170
|
+
"""
|
|
171
|
+
if not text:
|
|
172
|
+
return LanguageTaxBreakdown()
|
|
173
|
+
|
|
174
|
+
heuristic = len(text) // CHARS_PER_TOKEN_HEURISTIC
|
|
175
|
+
accurate_raw = count_tokens(text, tokenizer_path=tokenizer_path)
|
|
176
|
+
|
|
177
|
+
# When the precise backend is unavailable, count_tokens returns the
|
|
178
|
+
# same char/4 value, so accurate == heuristic and we report no tax.
|
|
179
|
+
accurate_available = tokenizer_path is not None and accurate_raw != heuristic
|
|
180
|
+
|
|
181
|
+
# Guard against a zero-heuristic (text shorter than 4 chars) to keep
|
|
182
|
+
# the multiplier finite and meaningful.
|
|
183
|
+
if heuristic <= 0:
|
|
184
|
+
multiplier = 1.0
|
|
185
|
+
extra = max(accurate_raw - 0, 0)
|
|
186
|
+
else:
|
|
187
|
+
multiplier = accurate_raw / heuristic
|
|
188
|
+
extra = accurate_raw - heuristic
|
|
189
|
+
|
|
190
|
+
return LanguageTaxBreakdown(
|
|
191
|
+
char_count=len(text),
|
|
192
|
+
cjk_ratio=cjk_char_ratio(text),
|
|
193
|
+
tokens_heuristic=heuristic,
|
|
194
|
+
tokens_accurate=accurate_raw,
|
|
195
|
+
accurate_available=accurate_available,
|
|
196
|
+
tax_multiplier=multiplier,
|
|
197
|
+
extra_tokens=max(extra, 0),
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def language_tax_usd(
|
|
202
|
+
extra_tokens: int,
|
|
203
|
+
*,
|
|
204
|
+
input_tokens_per_million: float | None,
|
|
205
|
+
) -> float:
|
|
206
|
+
"""USD attributable to the language tax for one request leg.
|
|
207
|
+
|
|
208
|
+
``extra_tokens`` is the :attr:`LanguageTaxBreakdown.extra_tokens`
|
|
209
|
+
delta; pricing is the provider's normal input rate. Returns 0.0 for
|
|
210
|
+
a free / unpriced (typically local) provider — mirroring
|
|
211
|
+
:func:`coderouter.cost.compute_cost_for_attempt`'s zero-on-None
|
|
212
|
+
behaviour so callers never special-case local models.
|
|
213
|
+
"""
|
|
214
|
+
if not input_tokens_per_million or extra_tokens <= 0:
|
|
215
|
+
return 0.0
|
|
216
|
+
return extra_tokens * (input_tokens_per_million / 1_000_000.0)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def estimate_language_tax_for_request(
|
|
220
|
+
system: Any,
|
|
221
|
+
messages: list[Any],
|
|
222
|
+
*,
|
|
223
|
+
tokenizer_path: str | Path | None = None,
|
|
224
|
+
) -> LanguageTaxBreakdown:
|
|
225
|
+
"""Measure the language tax of a whole Anthropic-shaped request.
|
|
226
|
+
|
|
227
|
+
Convenience wrapper used by the engine's cost-emit path: pulls the
|
|
228
|
+
concatenated request text (system + message text blocks) and runs it
|
|
229
|
+
through :func:`estimate_language_tax`. With no ``tokenizer_path`` the
|
|
230
|
+
multiplier is 1.0 (inert), so calling this on every request is safe
|
|
231
|
+
and cheap — the engine only invokes it when a provider declares a
|
|
232
|
+
local ``tokenizer.json``.
|
|
233
|
+
"""
|
|
234
|
+
text = extract_text_from_anthropic_request(system=system, messages=messages)
|
|
235
|
+
return estimate_language_tax(text, tokenizer_path=tokenizer_path)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
__all__ = [
|
|
239
|
+
"LanguageTaxBreakdown",
|
|
240
|
+
"cjk_char_ratio",
|
|
241
|
+
"estimate_language_tax",
|
|
242
|
+
"estimate_language_tax_for_request",
|
|
243
|
+
"language_tax_usd",
|
|
244
|
+
]
|
coderouter/logging.py
CHANGED
|
@@ -971,6 +971,10 @@ class CacheObservedPayload(TypedDict):
|
|
|
971
971
|
streaming: bool
|
|
972
972
|
cost_usd: float
|
|
973
973
|
cost_savings_usd: float
|
|
974
|
+
# v2.6 language-tax track (optional; default 0.0 / 1.0 at the emit
|
|
975
|
+
# site keeps pre-v2.6 callers and log consumers working unchanged).
|
|
976
|
+
language_tax_usd: float
|
|
977
|
+
language_tax_multiplier: float
|
|
974
978
|
|
|
975
979
|
|
|
976
980
|
def log_cache_observed(
|
|
@@ -986,6 +990,8 @@ def log_cache_observed(
|
|
|
986
990
|
streaming: bool,
|
|
987
991
|
cost_usd: float = 0.0,
|
|
988
992
|
cost_savings_usd: float = 0.0,
|
|
993
|
+
language_tax_usd: float = 0.0,
|
|
994
|
+
language_tax_multiplier: float = 1.0,
|
|
989
995
|
) -> None:
|
|
990
996
|
"""Emit a ``cache-observed`` info record with the unified shape.
|
|
991
997
|
|
|
@@ -1013,6 +1019,8 @@ def log_cache_observed(
|
|
|
1013
1019
|
"streaming": streaming,
|
|
1014
1020
|
"cost_usd": cost_usd,
|
|
1015
1021
|
"cost_savings_usd": cost_savings_usd,
|
|
1022
|
+
"language_tax_usd": language_tax_usd,
|
|
1023
|
+
"language_tax_multiplier": language_tax_multiplier,
|
|
1016
1024
|
}
|
|
1017
1025
|
logger.info("cache-observed", extra=payload)
|
|
1018
1026
|
|
coderouter/metrics/collector.py
CHANGED
|
@@ -190,6 +190,13 @@ class MetricsCollector(logging.Handler):
|
|
|
190
190
|
self._cost_total_usd_aggregate: float = 0.0
|
|
191
191
|
self._cost_savings_usd_aggregate: float = 0.0
|
|
192
192
|
|
|
193
|
+
# v2.6: per-provider language-tax spend — the USD share of input
|
|
194
|
+
# cost attributable to the CJK over-count vs the char/4 baseline.
|
|
195
|
+
# Zero for English/code workloads and for providers without a
|
|
196
|
+
# configured tokenizer_path. Surfaced alongside cost_total_usd.
|
|
197
|
+
self._language_tax_usd: dict[str, float] = {}
|
|
198
|
+
self._language_tax_usd_aggregate: float = 0.0
|
|
199
|
+
|
|
193
200
|
# v2.0-F (L1): context budget guard counters. Per-profile counts
|
|
194
201
|
# of warnings (over warn threshold) and trims (messages removed).
|
|
195
202
|
# The ``latest_usage_ratio`` dict records the most recent ratio
|
|
@@ -388,6 +395,22 @@ class MetricsCollector(logging.Handler):
|
|
|
388
395
|
self._cost_savings_usd.get(provider, 0.0) + savings_usd
|
|
389
396
|
)
|
|
390
397
|
self._cost_savings_usd_aggregate += savings_usd
|
|
398
|
+
|
|
399
|
+
# v2.6: language-tax spend. Same defensive coercion as the
|
|
400
|
+
# cost fields; defaults to 0.0 for pre-v2.6 log lines and
|
|
401
|
+
# English/code traffic, so the aggregate only moves on
|
|
402
|
+
# CJK-heavy requests against a tokenizer-configured provider.
|
|
403
|
+
lt_usd_raw = extras.get("language_tax_usd", 0.0)
|
|
404
|
+
lt_usd = (
|
|
405
|
+
float(lt_usd_raw)
|
|
406
|
+
if isinstance(lt_usd_raw, int | float)
|
|
407
|
+
else 0.0
|
|
408
|
+
)
|
|
409
|
+
if lt_usd > 0.0:
|
|
410
|
+
self._language_tax_usd[provider] = (
|
|
411
|
+
self._language_tax_usd.get(provider, 0.0) + lt_usd
|
|
412
|
+
)
|
|
413
|
+
self._language_tax_usd_aggregate += lt_usd
|
|
391
414
|
elif event == "context-budget-warning":
|
|
392
415
|
# v2.0-F (L1): context usage exceeded the warn threshold.
|
|
393
416
|
# Track per-profile and aggregate, plus latest ratio gauge.
|
|
@@ -522,6 +545,10 @@ class MetricsCollector(logging.Handler):
|
|
|
522
545
|
"savings_usd": round(
|
|
523
546
|
self._cost_savings_usd.get(name, 0.0), 6
|
|
524
547
|
),
|
|
548
|
+
# v2.6: per-provider language-tax spend.
|
|
549
|
+
"language_tax_usd": round(
|
|
550
|
+
self._language_tax_usd.get(name, 0.0), 6
|
|
551
|
+
),
|
|
525
552
|
},
|
|
526
553
|
}
|
|
527
554
|
for name in providers
|
|
@@ -589,6 +616,14 @@ class MetricsCollector(logging.Handler):
|
|
|
589
616
|
"cost_savings_usd_aggregate": round(
|
|
590
617
|
self._cost_savings_usd_aggregate, 6
|
|
591
618
|
),
|
|
619
|
+
# v2.6: per-provider + aggregate language-tax spend.
|
|
620
|
+
"language_tax_usd": {
|
|
621
|
+
n: round(v, 6)
|
|
622
|
+
for n, v in self._language_tax_usd.items()
|
|
623
|
+
},
|
|
624
|
+
"language_tax_usd_aggregate": round(
|
|
625
|
+
self._language_tax_usd_aggregate, 6
|
|
626
|
+
),
|
|
592
627
|
# v2.0-F (L1): context budget guard aggregate counters.
|
|
593
628
|
"context_budget_warnings_total": self._context_budget_warnings_total,
|
|
594
629
|
"context_budget_trims_total": self._context_budget_trims_total,
|
|
@@ -682,6 +717,13 @@ class MetricsCollector(logging.Handler):
|
|
|
682
717
|
self._cost_savings_usd_aggregate += float(
|
|
683
718
|
state.get("cost_savings_usd_aggregate", 0.0)
|
|
684
719
|
)
|
|
720
|
+
for k, v in (state.get("language_tax_usd") or {}).items():
|
|
721
|
+
self._language_tax_usd[k] = (
|
|
722
|
+
self._language_tax_usd.get(k, 0.0) + float(v)
|
|
723
|
+
)
|
|
724
|
+
self._language_tax_usd_aggregate += float(
|
|
725
|
+
state.get("language_tax_usd_aggregate", 0.0)
|
|
726
|
+
)
|
|
685
727
|
self._chain_paid_gate_blocked_total += int(
|
|
686
728
|
state.get("chain_paid_gate_blocked_total", 0)
|
|
687
729
|
)
|
|
@@ -737,6 +779,9 @@ class MetricsCollector(logging.Handler):
|
|
|
737
779
|
self._cost_savings_usd.clear()
|
|
738
780
|
self._cost_total_usd_aggregate = 0.0
|
|
739
781
|
self._cost_savings_usd_aggregate = 0.0
|
|
782
|
+
# v2.6
|
|
783
|
+
self._language_tax_usd.clear()
|
|
784
|
+
self._language_tax_usd_aggregate = 0.0
|
|
740
785
|
# v2.0-H (L6)
|
|
741
786
|
self._partial_stitch_surfaced_total = 0
|
|
742
787
|
# v2.0-I
|
|
@@ -39,6 +39,7 @@ import re
|
|
|
39
39
|
from typing import TYPE_CHECKING, Any
|
|
40
40
|
|
|
41
41
|
from coderouter.config.schemas import AutoRouterConfig, AutoRouteRule, RuleMatcher
|
|
42
|
+
from coderouter.language_tax import cjk_char_ratio
|
|
42
43
|
from coderouter.token_estimation import estimate_tokens_from_body as _estimate_total_tokens
|
|
43
44
|
|
|
44
45
|
if TYPE_CHECKING:
|
|
@@ -181,6 +182,12 @@ def _match_rule(
|
|
|
181
182
|
return message is not None and _has_image(message)
|
|
182
183
|
if m.code_fence_ratio_min is not None:
|
|
183
184
|
return _code_fence_ratio(text) >= m.code_fence_ratio_min
|
|
185
|
+
if m.cjk_ratio_min is not None:
|
|
186
|
+
# v2.6: language-tax routing. CJK ratio of the latest user
|
|
187
|
+
# message — a per-turn property like code_fence_ratio_min, so it
|
|
188
|
+
# reuses ``text`` (latest user message) rather than walking the
|
|
189
|
+
# whole request. Steers CJK-heavy turns to a local, tax-free model.
|
|
190
|
+
return cjk_char_ratio(text) >= m.cjk_ratio_min
|
|
184
191
|
if m.content_contains is not None:
|
|
185
192
|
return m.content_contains in text
|
|
186
193
|
if m.content_regex is not None:
|
coderouter/routing/fallback.py
CHANGED
|
@@ -61,6 +61,10 @@ from coderouter.guards.tool_loop import (
|
|
|
61
61
|
detect_tool_loop,
|
|
62
62
|
inject_loop_break_hint,
|
|
63
63
|
)
|
|
64
|
+
from coderouter.language_tax import (
|
|
65
|
+
LanguageTaxBreakdown,
|
|
66
|
+
estimate_language_tax_for_request,
|
|
67
|
+
)
|
|
64
68
|
from coderouter.logging import (
|
|
65
69
|
classify_cache_outcome,
|
|
66
70
|
get_logger,
|
|
@@ -372,6 +376,7 @@ def _emit_cache_observed(
|
|
|
372
376
|
streaming: bool,
|
|
373
377
|
provider_config: ProviderConfig | None = None,
|
|
374
378
|
budget: BudgetTracker | None = None,
|
|
379
|
+
language_tax: LanguageTaxBreakdown | None = None,
|
|
375
380
|
) -> None:
|
|
376
381
|
"""Extract usage / cache fields from an AnthropicResponse and log them.
|
|
377
382
|
|
|
@@ -432,6 +437,7 @@ def _emit_cache_observed(
|
|
|
432
437
|
output_tokens=usage.output_tokens,
|
|
433
438
|
cache_read_input_tokens=cache_read,
|
|
434
439
|
cache_creation_input_tokens=cache_creation,
|
|
440
|
+
language_tax=language_tax,
|
|
435
441
|
)
|
|
436
442
|
|
|
437
443
|
# v1.10: feed the per-provider monthly running total. The
|
|
@@ -452,6 +458,8 @@ def _emit_cache_observed(
|
|
|
452
458
|
streaming=streaming,
|
|
453
459
|
cost_usd=cost.total_usd,
|
|
454
460
|
cost_savings_usd=cost.savings_usd,
|
|
461
|
+
language_tax_usd=cost.language_tax_usd,
|
|
462
|
+
language_tax_multiplier=cost.language_tax_multiplier,
|
|
455
463
|
)
|
|
456
464
|
|
|
457
465
|
|
|
@@ -629,6 +637,7 @@ def _emit_cache_observed_streaming(
|
|
|
629
637
|
request_had_cache_control: bool,
|
|
630
638
|
provider_config: ProviderConfig | None = None,
|
|
631
639
|
budget: BudgetTracker | None = None,
|
|
640
|
+
language_tax: LanguageTaxBreakdown | None = None,
|
|
632
641
|
) -> None:
|
|
633
642
|
"""Streaming counterpart of :func:`_emit_cache_observed` (v1.9-B2).
|
|
634
643
|
|
|
@@ -661,6 +670,7 @@ def _emit_cache_observed_streaming(
|
|
|
661
670
|
output_tokens=output_tokens,
|
|
662
671
|
cache_read_input_tokens=cache_read,
|
|
663
672
|
cache_creation_input_tokens=cache_creation,
|
|
673
|
+
language_tax=language_tax,
|
|
664
674
|
)
|
|
665
675
|
|
|
666
676
|
# v1.10: same monthly-budget bookkeeping as the non-streaming
|
|
@@ -681,6 +691,8 @@ def _emit_cache_observed_streaming(
|
|
|
681
691
|
streaming=True,
|
|
682
692
|
cost_usd=cost.total_usd,
|
|
683
693
|
cost_savings_usd=cost.savings_usd,
|
|
694
|
+
language_tax_usd=cost.language_tax_usd,
|
|
695
|
+
language_tax_multiplier=cost.language_tax_multiplier,
|
|
684
696
|
)
|
|
685
697
|
|
|
686
698
|
|
|
@@ -2126,6 +2138,14 @@ class FallbackEngine:
|
|
|
2126
2138
|
# outcome=unknown.
|
|
2127
2139
|
# v1.9-D: also enrich the log line with per-attempt
|
|
2128
2140
|
# USD cost + cache savings via the provider's CostConfig.
|
|
2141
|
+
# v2.6 language tax: only measured when the provider declares
|
|
2142
|
+
# a local tokenizer.json (else inert — no extra work, mult=1.0).
|
|
2143
|
+
_lt = None
|
|
2144
|
+
_tok = getattr(adapter.config, "tokenizer_path", None)
|
|
2145
|
+
if _tok:
|
|
2146
|
+
_lt = estimate_language_tax_for_request(
|
|
2147
|
+
request.system, request.messages, tokenizer_path=_tok
|
|
2148
|
+
)
|
|
2129
2149
|
_emit_cache_observed(
|
|
2130
2150
|
resp,
|
|
2131
2151
|
provider=adapter.name,
|
|
@@ -2133,6 +2153,7 @@ class FallbackEngine:
|
|
|
2133
2153
|
streaming=False,
|
|
2134
2154
|
provider_config=adapter.config,
|
|
2135
2155
|
budget=self._budget,
|
|
2156
|
+
language_tax=_lt,
|
|
2136
2157
|
)
|
|
2137
2158
|
# v2.3.0: observer plugin fanout — fire-and-forget, never
|
|
2138
2159
|
# blocks the engine response. Latency in ms uses the same
|
|
@@ -2359,12 +2380,21 @@ class FallbackEngine:
|
|
|
2359
2380
|
# both go through ``classify_cache_outcome`` /
|
|
2360
2381
|
# ``compute_cost_for_attempt`` for symmetric outcome and
|
|
2361
2382
|
# cost reporting.
|
|
2383
|
+
# v2.6 language tax: same opt-in measurement as the
|
|
2384
|
+
# non-streaming sibling (inert unless tokenizer_path is set).
|
|
2385
|
+
_lt_s = None
|
|
2386
|
+
_tok_s = getattr(adapter.config, "tokenizer_path", None)
|
|
2387
|
+
if _tok_s:
|
|
2388
|
+
_lt_s = estimate_language_tax_for_request(
|
|
2389
|
+
request.system, request.messages, tokenizer_path=_tok_s
|
|
2390
|
+
)
|
|
2362
2391
|
_emit_cache_observed_streaming(
|
|
2363
2392
|
acc,
|
|
2364
2393
|
provider=adapter.name,
|
|
2365
2394
|
request_had_cache_control=request_had_cache_control,
|
|
2366
2395
|
provider_config=adapter.config,
|
|
2367
2396
|
budget=self._budget,
|
|
2397
|
+
language_tax=_lt_s,
|
|
2368
2398
|
)
|
|
2369
2399
|
# v2.3.0: streaming observer fanout fires once, after the
|
|
2370
2400
|
# SSE terminates successfully. We hand the accumulator's
|
coderouter/token_estimation.py
CHANGED
|
@@ -91,6 +91,21 @@ def _count_system_chars(system: Any) -> int:
|
|
|
91
91
|
return 0
|
|
92
92
|
|
|
93
93
|
|
|
94
|
+
def _extract_system_text(system: Any) -> str:
|
|
95
|
+
"""Concatenate the system prompt text (str or list-of-blocks form)."""
|
|
96
|
+
if isinstance(system, str):
|
|
97
|
+
return system
|
|
98
|
+
if isinstance(system, list):
|
|
99
|
+
pieces: list[str] = []
|
|
100
|
+
for block in system:
|
|
101
|
+
if isinstance(block, dict):
|
|
102
|
+
text = block.get("text")
|
|
103
|
+
if isinstance(text, str):
|
|
104
|
+
pieces.append(text)
|
|
105
|
+
return "\n".join(pieces)
|
|
106
|
+
return ""
|
|
107
|
+
|
|
108
|
+
|
|
94
109
|
# ---------------------------------------------------------------------------
|
|
95
110
|
# Public API
|
|
96
111
|
# ---------------------------------------------------------------------------
|
|
@@ -153,9 +168,41 @@ def estimate_tokens_from_anthropic_request(
|
|
|
153
168
|
return total_chars // CHARS_PER_TOKEN_HEURISTIC
|
|
154
169
|
|
|
155
170
|
|
|
171
|
+
def extract_text_from_anthropic_request(
|
|
172
|
+
*,
|
|
173
|
+
system: Any,
|
|
174
|
+
messages: list[Any],
|
|
175
|
+
) -> str:
|
|
176
|
+
"""Concatenate all text in an Anthropic-shaped request.
|
|
177
|
+
|
|
178
|
+
Mirrors :func:`estimate_tokens_from_anthropic_request` but returns
|
|
179
|
+
the raw text (system prompt + every message's text blocks) instead
|
|
180
|
+
of a char/4 count. Used by :mod:`coderouter.language_tax` to feed an
|
|
181
|
+
accurate tokenizer for language-tax measurement. Non-text blocks
|
|
182
|
+
(images / tool_use / tool_result) contribute nothing — same rule the
|
|
183
|
+
char/4 estimator uses.
|
|
184
|
+
"""
|
|
185
|
+
pieces: list[str] = []
|
|
186
|
+
sys_text = _extract_system_text(system)
|
|
187
|
+
if sys_text:
|
|
188
|
+
pieces.append(sys_text)
|
|
189
|
+
for msg in messages:
|
|
190
|
+
if hasattr(msg, "content"):
|
|
191
|
+
content = msg.content
|
|
192
|
+
elif isinstance(msg, dict):
|
|
193
|
+
content = msg.get("content")
|
|
194
|
+
else:
|
|
195
|
+
continue
|
|
196
|
+
text = _extract_text_from_content(content)
|
|
197
|
+
if text:
|
|
198
|
+
pieces.append(text)
|
|
199
|
+
return "\n".join(pieces)
|
|
200
|
+
|
|
201
|
+
|
|
156
202
|
__all__ = [
|
|
157
203
|
"CHARS_PER_TOKEN_HEURISTIC",
|
|
158
204
|
"DEFAULT_MAX_CONTEXT_TOKENS",
|
|
159
205
|
"estimate_tokens_from_anthropic_request",
|
|
160
206
|
"estimate_tokens_from_body",
|
|
207
|
+
"extract_text_from_anthropic_request",
|
|
161
208
|
]
|
|
@@ -10,9 +10,12 @@ through unchanged if a client sends them.
|
|
|
10
10
|
|
|
11
11
|
from __future__ import annotations
|
|
12
12
|
|
|
13
|
+
import logging
|
|
13
14
|
from typing import Any, Literal
|
|
14
15
|
|
|
15
|
-
from pydantic import BaseModel, ConfigDict, Field
|
|
16
|
+
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
16
19
|
|
|
17
20
|
# ============================================================
|
|
18
21
|
# Content blocks
|
|
@@ -105,6 +108,113 @@ class AnthropicTool(BaseModel):
|
|
|
105
108
|
input_schema: dict[str, Any] = Field(default_factory=dict)
|
|
106
109
|
|
|
107
110
|
|
|
111
|
+
# ============================================================
|
|
112
|
+
# Role normalization (Claude Code CLI >= 2.1.154 workaround)
|
|
113
|
+
# ============================================================
|
|
114
|
+
|
|
115
|
+
_SPEC_MESSAGE_ROLES = frozenset({"user", "assistant"})
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _content_as_text(content: Any) -> str:
|
|
119
|
+
"""Best-effort plain-text extraction from a message ``content`` field.
|
|
120
|
+
|
|
121
|
+
Strings pass through; block lists contribute their ``text`` blocks
|
|
122
|
+
joined with newlines; anything else yields "".
|
|
123
|
+
"""
|
|
124
|
+
if isinstance(content, str):
|
|
125
|
+
return content
|
|
126
|
+
if isinstance(content, list):
|
|
127
|
+
parts: list[str] = []
|
|
128
|
+
for block in content:
|
|
129
|
+
if isinstance(block, dict) and block.get("type") == "text":
|
|
130
|
+
parts.append(str(block.get("text", "")))
|
|
131
|
+
return "\n".join(p for p in parts if p)
|
|
132
|
+
return ""
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def normalize_message_roles(payload: dict[str, Any]) -> dict[str, Any]:
|
|
136
|
+
"""Normalize non-spec roles inside ``messages`` before validation.
|
|
137
|
+
|
|
138
|
+
Claude Code CLI >= 2.1.154 has a regression where it emits messages
|
|
139
|
+
with ``role: "system"`` (and reportedly ``ctx`` / ``msg``) inside the
|
|
140
|
+
``messages`` array. The Anthropic Messages API spec allows only
|
|
141
|
+
``user`` / ``assistant`` there, so without this hop those requests
|
|
142
|
+
die in validation with "Input should be 'user' or 'assistant'"
|
|
143
|
+
(see anthropics/claude-code#63469, vllm-project/vllm#44000).
|
|
144
|
+
|
|
145
|
+
Policy:
|
|
146
|
+
- ``role: "system"`` → text content merged into the top-level
|
|
147
|
+
``system`` field (appended after any existing system prompt;
|
|
148
|
+
same join rule as ``convert.to_anthropic_request``).
|
|
149
|
+
- any other non-spec role (``ctx``, ``msg``, ...) → coerced to
|
|
150
|
+
``user`` so conversation position is preserved. Anthropic
|
|
151
|
+
merges consecutive same-role turns, so this is safe.
|
|
152
|
+
- messages whose salvaged content is empty are dropped entirely
|
|
153
|
+
(Anthropic rejects empty turns).
|
|
154
|
+
|
|
155
|
+
Returns a shallow-copied payload; the caller's dict is not mutated.
|
|
156
|
+
Non-dict message entries (already-validated models) pass through.
|
|
157
|
+
"""
|
|
158
|
+
messages = payload.get("messages")
|
|
159
|
+
if not isinstance(messages, list):
|
|
160
|
+
return payload
|
|
161
|
+
|
|
162
|
+
system_texts: list[str] = []
|
|
163
|
+
messages_out: list[Any] = []
|
|
164
|
+
coerced_roles: list[str] = []
|
|
165
|
+
|
|
166
|
+
for msg in messages:
|
|
167
|
+
if not isinstance(msg, dict):
|
|
168
|
+
# Already a validated AnthropicMessage (internal construction
|
|
169
|
+
# path, e.g. convert.to_anthropic_request) — spec roles only.
|
|
170
|
+
messages_out.append(msg)
|
|
171
|
+
continue
|
|
172
|
+
role = msg.get("role")
|
|
173
|
+
if role in _SPEC_MESSAGE_ROLES:
|
|
174
|
+
messages_out.append(msg)
|
|
175
|
+
continue
|
|
176
|
+
if role == "system":
|
|
177
|
+
text = _content_as_text(msg.get("content"))
|
|
178
|
+
if text:
|
|
179
|
+
system_texts.append(text)
|
|
180
|
+
coerced_roles.append("system")
|
|
181
|
+
continue
|
|
182
|
+
# Unknown role (ctx / msg / future surprises): keep its position
|
|
183
|
+
# in the conversation as a user turn; drop if nothing salvageable.
|
|
184
|
+
text = _content_as_text(msg.get("content"))
|
|
185
|
+
coerced_roles.append(str(role))
|
|
186
|
+
if text:
|
|
187
|
+
messages_out.append({"role": "user", "content": text})
|
|
188
|
+
|
|
189
|
+
if not coerced_roles:
|
|
190
|
+
return payload
|
|
191
|
+
|
|
192
|
+
out = dict(payload)
|
|
193
|
+
out["messages"] = messages_out
|
|
194
|
+
|
|
195
|
+
if system_texts:
|
|
196
|
+
joined = "\n".join(system_texts)
|
|
197
|
+
existing = out.get("system")
|
|
198
|
+
if existing is None:
|
|
199
|
+
out["system"] = joined
|
|
200
|
+
elif isinstance(existing, str):
|
|
201
|
+
out["system"] = f"{existing}\n{joined}" if existing else joined
|
|
202
|
+
elif isinstance(existing, list):
|
|
203
|
+
out["system"] = [*existing, {"type": "text", "text": joined}]
|
|
204
|
+
else: # unexpected shape — don't lose the client's value
|
|
205
|
+
out["system"] = existing
|
|
206
|
+
|
|
207
|
+
logger.warning(
|
|
208
|
+
"normalized-nonspec-message-roles",
|
|
209
|
+
extra={
|
|
210
|
+
"roles": coerced_roles,
|
|
211
|
+
"system_merged": bool(system_texts),
|
|
212
|
+
"hint": "client is likely Claude Code CLI >= 2.1.154 (known regression)",
|
|
213
|
+
},
|
|
214
|
+
)
|
|
215
|
+
return out
|
|
216
|
+
|
|
217
|
+
|
|
108
218
|
# ============================================================
|
|
109
219
|
# Request
|
|
110
220
|
# ============================================================
|
|
@@ -147,6 +257,19 @@ class AnthropicRequest(BaseModel):
|
|
|
147
257
|
# `thinking` beyond what the default minor version accepts.
|
|
148
258
|
anthropic_beta: str | None = Field(default=None, exclude=True)
|
|
149
259
|
|
|
260
|
+
@model_validator(mode="before")
|
|
261
|
+
@classmethod
|
|
262
|
+
def _normalize_roles(cls, data: Any) -> Any:
|
|
263
|
+
"""Claude Code >= 2.1.154 sends system/ctx/msg roles in messages.
|
|
264
|
+
|
|
265
|
+
Normalize them before field validation so the request doesn't
|
|
266
|
+
422 at ingress (and doesn't 400 upstream at api.anthropic.com
|
|
267
|
+
via the native adapter). See ``normalize_message_roles``.
|
|
268
|
+
"""
|
|
269
|
+
if isinstance(data, dict):
|
|
270
|
+
return normalize_message_roles(data)
|
|
271
|
+
return data
|
|
272
|
+
|
|
150
273
|
|
|
151
274
|
# ============================================================
|
|
152
275
|
# Response
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coderouter-cli
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.6.0
|
|
4
4
|
Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
|
|
5
5
|
Project-URL: Homepage, https://github.com/zephel01/CodeRouter
|
|
6
6
|
Project-URL: Repository, https://github.com/zephel01/CodeRouter
|
|
@@ -2,16 +2,17 @@ coderouter/__init__.py,sha256=ghdjPrLtnRzY8fyQ4CJZI1UJKADyNTLtA3G7se8H7Ns,696
|
|
|
2
2
|
coderouter/__main__.py,sha256=-LCgxJnvgUV240HjQKv7ly-mn2NuKHpC4nCpvTHjeSU,130
|
|
3
3
|
coderouter/cli.py,sha256=KE49IACJVw692H6dlfu1tAah1jQgbwH92F4lCkhRk6U,28168
|
|
4
4
|
coderouter/cli_stats.py,sha256=CCjzc1G4hTRHZ2gG1XhxhDpUkJnnl3NXbcbp1T18jpg,29894
|
|
5
|
-
coderouter/cost.py,sha256=
|
|
5
|
+
coderouter/cost.py,sha256=32h6uzb4nxh2eA5d2Hn3kD9yJbtis6CFDAbeIy5KRkM,7431
|
|
6
6
|
coderouter/doctor.py,sha256=2luNk6BHSRvpQStJnHcqzNvNi-SKdOuKV0WZdorZhVk,82854
|
|
7
7
|
coderouter/doctor_apply.py,sha256=r_J6xbu5-HivofPNriw4_vjNYs_VRs7GsGTS0oMEX10,24209
|
|
8
8
|
coderouter/env_security.py,sha256=FEBZnXfJ0xE39kmMMn39zk0W_DRRnmcB_REmP9f4xWo,14796
|
|
9
9
|
coderouter/errors.py,sha256=Xmq67lheyw8iv3Ox39jh2c4tvNI5RcUR4QkoxVDN6l4,1130
|
|
10
10
|
coderouter/gguf_introspect.py,sha256=FZO14STLSp94Rfo5AInGwYUOpfjiXOW6CH5RiczTWDE,9514
|
|
11
11
|
coderouter/hardware.py,sha256=gn3_9qbVcGRR81yKMn1lJE_8-YDRau0LxIH_M-f7pxE,8356
|
|
12
|
-
coderouter/
|
|
12
|
+
coderouter/language_tax.py,sha256=LTbE3tIfoJuV2O3T0NixRKhzq_dEOTUuPEerJv2q9uk,9360
|
|
13
|
+
coderouter/logging.py,sha256=63_aaXuZwk_jboDGUrfFjZV65SGvcqhW3dcj12AqUcA,53126
|
|
13
14
|
coderouter/output_filters.py,sha256=0ry_rPiS_kC-FnHgaNVP6v7e6Al2djxzu9vBzZ8kEkE,25314
|
|
14
|
-
coderouter/token_estimation.py,sha256=
|
|
15
|
+
coderouter/token_estimation.py,sha256=iz22vZEEW2P7uKLB2pYvPNpIbZGbgXRO5MtfkS_-9Sk,7531
|
|
15
16
|
coderouter/token_estimation_accurate.py,sha256=GTfzrBVnvAGjeVzmzAeUdOYZvWZKLAxcxPpFiJGlzjk,4609
|
|
16
17
|
coderouter/adapters/__init__.py,sha256=7dIDSZ-FE_0iSqLSDc_lK1idRdLTKcM2hP9tCJipgPI,463
|
|
17
18
|
coderouter/adapters/anthropic_native.py,sha256=qfdjxy4YyLt-0Fj7hUYn1oi1SFjEEbSvpaRBUC2hMf4,21903
|
|
@@ -22,7 +23,7 @@ coderouter/config/__init__.py,sha256=FODEn74fN-qZnt4INPSHswqhOlEgpL6-_onxsitSx8g
|
|
|
22
23
|
coderouter/config/capability_registry.py,sha256=QRJLlzqKTdb2ndpWAHGZQJq23wczEfzSVanKnoqJrg4,15815
|
|
23
24
|
coderouter/config/env_file.py,sha256=CoMK27fuAXm-NtoLzXb8yN2E-wDFjHQuFwiIlmgTBQw,10356
|
|
24
25
|
coderouter/config/loader.py,sha256=FUEe8m4Tnmj_aul0vSctD8vKvNW-oLRoMRbTpSKqSmc,4077
|
|
25
|
-
coderouter/config/schemas.py,sha256=
|
|
26
|
+
coderouter/config/schemas.py,sha256=ROFU3TdH4WanK63US6PRCLAMRUZBKFNlIoguXTYo6L0,62026
|
|
26
27
|
coderouter/data/__init__.py,sha256=uNyfD9jaCvTWsBAWtaw1Fr25OSxzv3psGMfBjT1z0Cc,328
|
|
27
28
|
coderouter/data/model-capabilities.yaml,sha256=S9jt6SC6-3s2-icZ_n-a14iEMnc2yB1C2R6q-N_tZWQ,19309
|
|
28
29
|
coderouter/guards/__init__.py,sha256=5qliYBqygvVPneej7nx0uSjxDKsz7t8VzvrDgVBJlvU,1170
|
|
@@ -39,12 +40,12 @@ coderouter/guards/tool_loop.py,sha256=EzeMcmU7BLeTW2jsRVevU81l5rhWcn1oUr7EpzgXjV
|
|
|
39
40
|
coderouter/ingress/__init__.py,sha256=WQsCH2CGJCAhy0mS6GSEdeYZRkkQu2OHDsP4CJWTLug,155
|
|
40
41
|
coderouter/ingress/anthropic_routes.py,sha256=It2f7XGe3fgKQX01J2F5JOCoZr96t_Tx_kY2om99MVo,16894
|
|
41
42
|
coderouter/ingress/app.py,sha256=PcuTvUFNjr04EbsUOu8qdyKTdBzxkIJYB4xpz8dFfMo,12635
|
|
42
|
-
coderouter/ingress/dashboard_routes.py,sha256=
|
|
43
|
+
coderouter/ingress/dashboard_routes.py,sha256=tEIayMHxCzlmpnLyKHgpqrE4W24DTJM97ewTlYvkKqI,24238
|
|
43
44
|
coderouter/ingress/launcher_routes.py,sha256=Jh-E6qFmHnr7ON4W6QanafxQIoojT4F034mybLvhTyQ,47548
|
|
44
45
|
coderouter/ingress/metrics_routes.py,sha256=M22dwOGn24P05Ge4W3c7d7mYytSGWjIR-pPSPOAiHJY,3965
|
|
45
46
|
coderouter/ingress/openai_routes.py,sha256=Zw1efPw9DI6GgV8ZcLrzS6Cda0KLrFkKn2GBZWSe6Vo,6322
|
|
46
47
|
coderouter/metrics/__init__.py,sha256=7Es351DPS7yLM0yVF_F0eesmiD83n7Zzhie44chht38,1465
|
|
47
|
-
coderouter/metrics/collector.py,sha256=
|
|
48
|
+
coderouter/metrics/collector.py,sha256=9lKnaFpdlu8R9mRUeyAeJWXR1urRCKt_6sUFn_9ybss,49657
|
|
48
49
|
coderouter/metrics/prometheus.py,sha256=YRqyT931s40zVkIj07D-M2UNfDhIEElVFRz3izdJcnQ,24419
|
|
49
50
|
coderouter/plugins/__init__.py,sha256=76hMLe5dV_ilripHXzWn3HSYoIALjzlw4EJVyI-GyIM,1974
|
|
50
51
|
coderouter/plugins/base.py,sha256=n9hsck2NCSqi6oeHIumKC5zhQ8JGwCXUz7J5AZQCQss,5772
|
|
@@ -52,10 +53,10 @@ coderouter/plugins/loader.py,sha256=xAIf6bIuth0QXCzwxO_ja6aSUlLzIqZNbrbQNJDgSE8,
|
|
|
52
53
|
coderouter/plugins/registry.py,sha256=Tx0QHJHozZ5LTUliGylBdNVcdzHTBV0nedCUwGlbLMM,3236
|
|
53
54
|
coderouter/routing/__init__.py,sha256=g2vhutbozRx5QBThReqwPN3imk5qXdpDiaogILd3IRc,257
|
|
54
55
|
coderouter/routing/adaptive.py,sha256=G2o377twGSjbUh65wiIFx6klnpFGjsD_nI3oDvcBwhY,21257
|
|
55
|
-
coderouter/routing/auto_router.py,sha256=
|
|
56
|
+
coderouter/routing/auto_router.py,sha256=y4v0c8u5F9f98Vmhx1vRcKPiOgAvpzbFqr6TIh058h0,13341
|
|
56
57
|
coderouter/routing/budget.py,sha256=PblmVKJGs_BwNa9uDHAA8hmZ4XIVKv38mHAeU0V3OMs,8451
|
|
57
58
|
coderouter/routing/capability.py,sha256=DCDmiQ-78dkYonCM1WQBCMf6e6XI6VIv_cnuz9hdWT0,18443
|
|
58
|
-
coderouter/routing/fallback.py,sha256=
|
|
59
|
+
coderouter/routing/fallback.py,sha256=Tu7vNDvGDD9EeGDEvAVZfJB2KmXM3ZulZbYlB9F-zdM,105962
|
|
59
60
|
coderouter/state/__init__.py,sha256=XoGcPmmBQSiZWML2S0juSveQ78xfhtdeCliNnVyzu7E,1088
|
|
60
61
|
coderouter/state/audit_log.py,sha256=JwGd0OkkDlkh0Fdc6SmnuyViwKzEaFA7Ux_VqHzakWE,8358
|
|
61
62
|
coderouter/state/replay.py,sha256=Z_YHKroTKZdrL8qObFxcoLOAQWWXZvXFdLfxzvBhEJg,11230
|
|
@@ -63,11 +64,11 @@ coderouter/state/request_log.py,sha256=bR814sOn--U_sKVtbezwS3bkZaNt4FGnboX75_2LL
|
|
|
63
64
|
coderouter/state/store.py,sha256=h-rsMJq8GILsOfCP94nI40cuHaj4Vqycsm9UNN77REI,7445
|
|
64
65
|
coderouter/state/suggest_rules.py,sha256=FvdhEvao5NvdKp9zs8AkcoFKHY4yqqXY2HekvSjpDFA,16670
|
|
65
66
|
coderouter/translation/__init__.py,sha256=PYXN7XVEwpG1uC8RLy6fvnGbzEZhhrEuUapH8IYOtG8,1788
|
|
66
|
-
coderouter/translation/anthropic.py,sha256=
|
|
67
|
+
coderouter/translation/anthropic.py,sha256=aZkcYH4x82b0x7efJgJb9RWn9Hbyc9pEOthXe4vjUdU,11113
|
|
67
68
|
coderouter/translation/convert.py,sha256=-qyzFzmmr9hhQV6_Sg75kJnvCZvHe3n7vRdaZtk_JqQ,47269
|
|
68
69
|
coderouter/translation/tool_repair.py,sha256=Ok2PF947Liegc5oaytfptv5MWMkpfJYQie-zdP1y3cY,9946
|
|
69
|
-
coderouter_cli-2.
|
|
70
|
-
coderouter_cli-2.
|
|
71
|
-
coderouter_cli-2.
|
|
72
|
-
coderouter_cli-2.
|
|
73
|
-
coderouter_cli-2.
|
|
70
|
+
coderouter_cli-2.6.0.dist-info/METADATA,sha256=us2o2_EtIlzd2EjQqAqtKIX1ocpAD3YcaDiZKOG6ktE,11674
|
|
71
|
+
coderouter_cli-2.6.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
72
|
+
coderouter_cli-2.6.0.dist-info/entry_points.txt,sha256=-dnLfD1YZ2WjH2zSdNCvlO65wYltM9bsHt9Fhg3yGss,51
|
|
73
|
+
coderouter_cli-2.6.0.dist-info/licenses/LICENSE,sha256=wkEzoR86jFw33jvfOHjULqmkGEfxTFMgMaJnpR8mPRw,1065
|
|
74
|
+
coderouter_cli-2.6.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|