coderouter-cli 2.5.5__py3-none-any.whl → 2.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -185,6 +185,19 @@ class ProviderConfig(BaseModel):
185
185
  )
186
186
  timeout_s: float = Field(default=30.0, ge=1.0, le=600.0)
187
187
 
188
+ # v2.6 language-tax track: path to a LOCAL ``tokenizer.json`` for this
189
+ # provider's model, used to measure the CJK over-count vs the char/4
190
+ # baseline (see ``coderouter.language_tax``). Loaded local-file-only —
191
+ # never contacts the HuggingFace Hub. When unset, language-tax falls
192
+ # back to char/4 (multiplier 1.0) and the feature is silently inert.
193
+ tokenizer_path: str | None = Field(
194
+ default=None,
195
+ description=(
196
+ "Local tokenizer.json for accurate (language-tax) token "
197
+ "counting. No network access. Requires the 'accuracy' extra."
198
+ ),
199
+ )
200
+
188
201
  # Provider-specific extras merged into the outbound request body.
189
202
  # Use for non-standard fields like Ollama's `think: false`, `keep_alive`,
190
203
  # `options.num_ctx`, or any vendor-specific toggle. User-supplied request
@@ -763,6 +776,16 @@ class RuleMatcher(BaseModel):
763
776
  ``request.tools`` set). The ``has_tools`` matcher is the
764
777
  profile-level lever for steering tool-laden traffic to the right
765
778
  chain entirely.
779
+
780
+ Variants (v2.6 / language-tax routing):
781
+
782
+ - ``cjk_ratio_min: 0.3`` — CJK character ratio of the latest user
783
+ message is ``>=`` this threshold. Routes CJK-heavy turns (which
784
+ pay the cloud "language tax" of ~1.2-1.5x more tokens) to a local
785
+ model that bills nothing per token, while ASCII/code turns fall
786
+ through to the cloud chain. Per-turn property like
787
+ ``code_fence_ratio_min``; see
788
+ :func:`coderouter.language_tax.cjk_char_ratio`.
766
789
  """
767
790
 
768
791
  model_config = ConfigDict(extra="forbid")
@@ -773,6 +796,13 @@ class RuleMatcher(BaseModel):
773
796
  content_regex: str | None = None
774
797
  model_pattern: str | None = None
775
798
  content_token_count_min: int | None = Field(default=None, ge=1)
799
+ # v2.6 language-tax routing: CJK character ratio of the latest user
800
+ # message >= this threshold. Lets operators steer CJK-heavy traffic
801
+ # (which carries the cloud language tax) to a local model that bills
802
+ # nothing per token. Operates on the latest user message like
803
+ # ``code_fence_ratio_min`` (a per-turn property), not the whole
804
+ # request. See ``coderouter.language_tax.cjk_char_ratio``.
805
+ cjk_ratio_min: float | None = Field(default=None, ge=0.0, le=1.0)
776
806
  # [Unreleased]: tool-aware routing (OpenClaw + Raspberry Pi 由来).
777
807
  # See class docstring "Variants ([Unreleased] / tool-aware routing)"
778
808
  # above for the full rationale. Boolean shape mirrors ``has_image`` —
@@ -789,6 +819,7 @@ class RuleMatcher(BaseModel):
789
819
  "model_pattern",
790
820
  "content_token_count_min",
791
821
  "has_tools",
822
+ "cjk_ratio_min",
792
823
  )
793
824
 
794
825
  @model_validator(mode="after")
coderouter/cost.py CHANGED
@@ -58,9 +58,13 @@ in the cost calc.
58
58
  from __future__ import annotations
59
59
 
60
60
  from dataclasses import dataclass
61
+ from typing import TYPE_CHECKING
61
62
 
62
63
  from coderouter.config.schemas import CostConfig
63
64
 
65
+ if TYPE_CHECKING: # avoid an import cycle at runtime; used only for typing
66
+ from coderouter.language_tax import LanguageTaxBreakdown
67
+
64
68
 
65
69
  @dataclass(frozen=True)
66
70
  class CostBreakdown:
@@ -82,6 +86,12 @@ class CostBreakdown:
82
86
  chart. ``input_usd`` is "fresh input only" (does not
83
87
  include cache buckets); cache_read_usd / cache_creation_usd
84
88
  are the post-discount / post-premium values.
89
+ language_tax_multiplier: ``tokens_accurate / tokens_heuristic``
90
+ for the request text (v2.6 language-tax track). 1.0 when no
91
+ tax is measurable (English/code, or no accurate tokenizer).
92
+ language_tax_usd: USD share of ``total_usd`` attributable to the
93
+ CJK over-count vs CodeRouter's char/4 English baseline.
94
+ 0.0 for free / local providers. See :mod:`coderouter.language_tax`.
85
95
  """
86
96
 
87
97
  total_usd: float = 0.0
@@ -90,6 +100,10 @@ class CostBreakdown:
90
100
  output_usd: float = 0.0
91
101
  cache_read_usd: float = 0.0
92
102
  cache_creation_usd: float = 0.0
103
+ # v2.6 language-tax track (additive; defaults keep pre-v2.6 behaviour
104
+ # and equality with a bare ``CostBreakdown()``).
105
+ language_tax_multiplier: float = 1.0
106
+ language_tax_usd: float = 0.0
93
107
 
94
108
 
95
109
  _PER_MILLION: float = 1_000_000.0
@@ -102,6 +116,7 @@ def compute_cost_for_attempt(
102
116
  output_tokens: int,
103
117
  cache_read_input_tokens: int,
104
118
  cache_creation_input_tokens: int,
119
+ language_tax: LanguageTaxBreakdown | None = None,
105
120
  ) -> CostBreakdown:
106
121
  """Translate per-attempt token counts into a USD :class:`CostBreakdown`.
107
122
 
@@ -144,6 +159,21 @@ def compute_cost_for_attempt(
144
159
  full_rate_for_cache_read = safe_read * input_rate
145
160
  savings_usd = full_rate_for_cache_read - cache_read_usd
146
161
 
162
+ # v2.6 language tax: the share of fresh-input spend attributable to
163
+ # the CJK over-count vs the char/4 English baseline. Defaults to a
164
+ # 1.0 multiplier / $0 when no LanguageTaxBreakdown is supplied, so
165
+ # the pre-v2.6 call shape is unchanged.
166
+ lt_multiplier = 1.0
167
+ lt_usd = 0.0
168
+ if language_tax is not None:
169
+ lt_multiplier = language_tax.tax_multiplier
170
+ from coderouter.language_tax import language_tax_usd
171
+
172
+ lt_usd = language_tax_usd(
173
+ language_tax.extra_tokens,
174
+ input_tokens_per_million=cost_config.input_tokens_per_million,
175
+ )
176
+
147
177
  return CostBreakdown(
148
178
  total_usd=total_usd,
149
179
  savings_usd=max(savings_usd, 0.0),
@@ -151,4 +181,6 @@ def compute_cost_for_attempt(
151
181
  output_usd=output_usd,
152
182
  cache_read_usd=cache_read_usd,
153
183
  cache_creation_usd=cache_creation_usd,
184
+ language_tax_multiplier=lt_multiplier,
185
+ language_tax_usd=lt_usd,
154
186
  )
@@ -165,6 +165,26 @@ _DASHBOARD_HTML = r"""<!doctype html>
165
165
  </main>
166
166
 
167
167
  <footer class="max-w-7xl mx-auto px-4 md:px-6 pb-8">
168
+ <!-- Panel: Cost & Language Tax (v2.6) -->
169
+ <section class="bg-slate-900/60 border border-slate-800 rounded-lg p-4 mb-4">
170
+ <h2 class="text-sm font-semibold uppercase tracking-wider text-slate-400 mb-3">Cost &amp; Language Tax</h2>
171
+ <div class="grid grid-cols-3 gap-3">
172
+ <div class="rounded-md bg-slate-800/50 p-3">
173
+ <div class="text-xs text-slate-400">Total spend</div>
174
+ <div class="text-2xl font-semibold tabnum" data-bind="cost_total">$0.00</div>
175
+ </div>
176
+ <div class="rounded-md bg-slate-800/50 p-3">
177
+ <div class="text-xs text-slate-400">Cache savings</div>
178
+ <div class="text-2xl font-semibold tabnum text-green-400" data-bind="cost_savings">$0.00</div>
179
+ </div>
180
+ <div class="rounded-md bg-slate-800/50 p-3">
181
+ <div class="text-xs text-slate-400">Language tax (CJK)</div>
182
+ <div class="text-2xl font-semibold tabnum text-amber-400" data-bind="language_tax_total">$0.00</div>
183
+ <div class="text-xs text-slate-500" data-bind="language_tax_hint">no tokenizer configured</div>
184
+ </div>
185
+ </div>
186
+ <div id="language-tax-by-provider" class="text-xs text-slate-400 tabnum mt-3"></div>
187
+ </section>
168
188
  <section class="bg-slate-900/60 border border-slate-800 rounded-lg p-4">
169
189
  <h2 class="text-sm font-semibold uppercase tracking-wider text-slate-400 mb-3">Usage Mix</h2>
170
190
  <div id="usage-bar" class="flex h-3 rounded-full overflow-hidden bg-slate-800" role="img" aria-label="usage mix"></div>
@@ -435,6 +455,27 @@ _DASHBOARD_HTML = r"""<!doctype html>
435
455
  {"&": "&amp;", "<": "&lt;", ">": "&gt;", '"': "&quot;", "'": "&#39;"}[c]
436
456
  ));
437
457
 
458
+ // v2.6: cost + language-tax panel. The collector zero-fills these, so
459
+ // a fresh/local-only deployment shows $0.00 across the board.
460
+ const renderCostTax = (snap) => {
461
+ const c = snap.counters || {};
462
+ const usd = (x) => "$" + (Number(x) || 0).toFixed(4);
463
+ setBind("cost_total", usd(c.cost_total_usd_aggregate));
464
+ setBind("cost_savings", usd(c.cost_savings_usd_aggregate));
465
+ const taxTotal = Number(c.language_tax_usd_aggregate) || 0;
466
+ setBind("language_tax_total", usd(taxTotal));
467
+ setBind("language_tax_hint",
468
+ taxTotal > 0 ? "extra paid for CJK vs char/4 baseline"
469
+ : "no tax measured (set provider tokenizer_path)");
470
+ const byProv = c.language_tax_usd || {};
471
+ const el = document.getElementById("language-tax-by-provider");
472
+ const rows = Object.entries(byProv).filter(([, v]) => Number(v) > 0);
473
+ el.innerHTML = rows.length === 0 ? "" :
474
+ rows.map(([n, v]) =>
475
+ '<span class="mr-4"><span class="text-slate-500">' + escapeHTML(n) +
476
+ '</span> ' + usd(v) + '</span>').join("");
477
+ };
478
+
438
479
  const renderSnapshot = (snap) => {
439
480
  const startup = snap.startup || {};
440
481
  const cfg = snap.config || {};
@@ -451,6 +492,7 @@ _DASHBOARD_HTML = r"""<!doctype html>
451
492
  renderSparkline(snap);
452
493
  renderRecent(snap);
453
494
  renderUsageMix(snap);
495
+ renderCostTax(snap);
454
496
  };
455
497
 
456
498
  const renderError = (msg) => {
@@ -0,0 +1,244 @@
1
+ """Language-tax measurement (Phase 1 PoC, 5-deps invariant).
2
+
3
+ Why this module exists
4
+ ======================
5
+
6
+ Cloud LLM tokenizers charge CJK text far more tokens-per-character
7
+ than English. CodeRouter's core router uses a ``char/4`` heuristic
8
+ (:mod:`coderouter.token_estimation`) which is *conservative for CJK*
9
+ — i.e. it **under-counts** Japanese/Chinese/Korean text. That gap is
10
+ the "language tax": a Japanese prompt that the heuristic prices at N
11
+ tokens is actually billed at ~1.2-1.5x N by the cloud provider.
12
+
13
+ Local models are unaffected (no per-token billing), so the tax only
14
+ matters on the cloud leg. This module quantifies it so the cost
15
+ tracker / dashboard can surface "how much extra am I paying to work
16
+ in Japanese?".
17
+
18
+ Design constraints (mirrors token_estimation_accurate.py)
19
+ =========================================================
20
+
21
+ * **No new core dependency.** CJK detection is pure ``str`` + Unicode
22
+ range checks (stdlib only). The *accurate* token count is delegated
23
+ to :func:`coderouter.token_estimation_accurate.count_tokens`, whose
24
+ precise backend (HuggingFace ``tokenizers``) is the existing
25
+ optional ``accuracy`` extra. When that backend is absent every
26
+ function still returns a sane value — the tax_multiplier simply
27
+ collapses to 1.0 because both legs use char/4.
28
+ * **Local only / no network.** No tokenizer is ever downloaded; we
29
+ only pass through a caller-supplied local ``tokenizer.json`` path.
30
+ * **Leaf module.** Imports only ``token_estimation`` /
31
+ ``token_estimation_accurate`` (both leaves), never the engine or
32
+ collector — keeps it trivially testable and circular-import-free.
33
+
34
+ The tax multiplier, defined
35
+ ===========================
36
+
37
+ ``tax_multiplier = tokens_accurate / tokens_heuristic``
38
+
39
+ where ``tokens_heuristic`` is the char/4 estimate (CodeRouter's
40
+ English-calibrated baseline) and ``tokens_accurate`` is the real
41
+ tokenizer count. Reading it:
42
+
43
+ * English / code text → real tokenizers land near char/4, so the
44
+ multiplier is ~1.0 (no tax).
45
+ * Japanese prose → real tokenizers emit ~0.5-1.0 tokens/char vs the
46
+ 0.25 the heuristic assumes, so the multiplier lands ~2.0-4.0 on
47
+ *pure* CJK and ~1.2-1.5 on realistic mixed coding prompts (CJK
48
+ comments/instructions + ASCII code/identifiers).
49
+
50
+ Confidence: **MODERATE.** char/4 is itself an approximation of
51
+ English, so the multiplier is "tax relative to CodeRouter's own
52
+ English baseline", not a lab-grade JA-vs-EN figure. It is, however,
53
+ fully measurable with zero network and no guessing — which is why we
54
+ prefer it to a translate-and-compare counterfactual.
55
+ """
56
+
57
+ from __future__ import annotations
58
+
59
+ from dataclasses import dataclass
60
+ from pathlib import Path
61
+ from typing import Any
62
+
63
+ from coderouter.token_estimation import (
64
+ CHARS_PER_TOKEN_HEURISTIC,
65
+ extract_text_from_anthropic_request,
66
+ )
67
+ from coderouter.token_estimation_accurate import count_tokens
68
+
69
+ # ---------------------------------------------------------------------------
70
+ # CJK Unicode ranges
71
+ # ---------------------------------------------------------------------------
72
+ #
73
+ # We count a character as "CJK" when it falls in one of the blocks that
74
+ # real tokenizers fragment heavily. Latin, digits, punctuation and
75
+ # whitespace are excluded so that an ASCII-only prompt scores 0.0 and a
76
+ # pure-Japanese prompt scores ~1.0. Half-width katakana and full-width
77
+ # forms are included because they tokenize like their full-width kin.
78
+ #
79
+ # Ranges are (low, high) inclusive code points.
80
+ _CJK_RANGES: tuple[tuple[int, int], ...] = (
81
+ (0x3040, 0x309F), # Hiragana
82
+ (0x30A0, 0x30FF), # Katakana
83
+ (0x3400, 0x4DBF), # CJK Unified Ideographs Extension A
84
+ (0x4E00, 0x9FFF), # CJK Unified Ideographs (common Kanji/Hanzi)
85
+ (0xF900, 0xFAFF), # CJK Compatibility Ideographs
86
+ (0xFF00, 0xFFEF), # Half/Full-width forms (full-width punct, half kana)
87
+ (0x3000, 0x303F), # CJK symbols & punctuation (、。「」etc.)
88
+ (0xAC00, 0xD7A3), # Hangul syllables (Korean)
89
+ (0x1100, 0x11FF), # Hangul Jamo
90
+ (0x20000, 0x2A6DF), # CJK Ext. B (rare ideographs)
91
+ )
92
+
93
+
94
+ def _is_cjk(cp: int) -> bool:
95
+ return any(low <= cp <= high for low, high in _CJK_RANGES)
96
+
97
+
98
+ # ---------------------------------------------------------------------------
99
+ # Public API
100
+ # ---------------------------------------------------------------------------
101
+
102
+
103
+ def cjk_char_ratio(text: str) -> float:
104
+ """Fraction of *non-whitespace* characters in ``text`` that are CJK.
105
+
106
+ Whitespace is excluded from the denominator so that indentation /
107
+ blank lines in a code block don't dilute the score. Returns ``0.0``
108
+ for empty or whitespace-only / pure-ASCII text and ``1.0`` for pure
109
+ CJK. The value feeds the Phase-2 ``cjk_ratio_min`` auto-route
110
+ matcher and the Phase-1 reporting below.
111
+ """
112
+ if not text:
113
+ return 0.0
114
+ cjk = 0
115
+ total = 0
116
+ for ch in text:
117
+ if ch.isspace():
118
+ continue
119
+ total += 1
120
+ if _is_cjk(ord(ch)):
121
+ cjk += 1
122
+ if total == 0:
123
+ return 0.0
124
+ return cjk / total
125
+
126
+
127
+ @dataclass(frozen=True)
128
+ class LanguageTaxBreakdown:
129
+ """Per-text language-tax measurement.
130
+
131
+ Fields
132
+ char_count: non-whitespace-inclusive length of the text.
133
+ cjk_ratio: see :func:`cjk_char_ratio` (0.0-1.0).
134
+ tokens_heuristic: char/4 estimate (CodeRouter's English
135
+ baseline). Always available.
136
+ tokens_accurate: real tokenizer count when a ``tokenizer_path``
137
+ was supplied *and* the optional backend is installed;
138
+ otherwise equals ``tokens_heuristic`` (graceful fallback).
139
+ accurate_available: whether ``tokens_accurate`` came from the
140
+ precise backend (True) or fell back to char/4 (False).
141
+ tax_multiplier: ``tokens_accurate / tokens_heuristic``; 1.0
142
+ when no tax is measurable. See module docstring for the
143
+ MODERATE-confidence caveat.
144
+ extra_tokens: ``tokens_accurate - tokens_heuristic`` (>= 0 for
145
+ CJK; the visible "tax" in tokens).
146
+ """
147
+
148
+ char_count: int = 0
149
+ cjk_ratio: float = 0.0
150
+ tokens_heuristic: int = 0
151
+ tokens_accurate: int = 0
152
+ accurate_available: bool = False
153
+ tax_multiplier: float = 1.0
154
+ extra_tokens: int = 0
155
+
156
+
157
+ def estimate_language_tax(
158
+ text: str,
159
+ *,
160
+ tokenizer_path: str | Path | None = None,
161
+ ) -> LanguageTaxBreakdown:
162
+ """Measure the language tax of ``text``.
163
+
164
+ With ``tokenizer_path`` pointing at a readable local
165
+ ``tokenizer.json`` (and the ``accuracy`` extra installed), the
166
+ accurate leg uses the real tokenizer and the multiplier reflects
167
+ the true char/4 under-count. Without it, both legs use char/4 and
168
+ the multiplier is 1.0 — the function never raises and never
169
+ touches the network.
170
+ """
171
+ if not text:
172
+ return LanguageTaxBreakdown()
173
+
174
+ heuristic = len(text) // CHARS_PER_TOKEN_HEURISTIC
175
+ accurate_raw = count_tokens(text, tokenizer_path=tokenizer_path)
176
+
177
+ # When the precise backend is unavailable, count_tokens returns the
178
+ # same char/4 value, so accurate == heuristic and we report no tax.
179
+ accurate_available = tokenizer_path is not None and accurate_raw != heuristic
180
+
181
+ # Guard against a zero-heuristic (text shorter than 4 chars) to keep
182
+ # the multiplier finite and meaningful.
183
+ if heuristic <= 0:
184
+ multiplier = 1.0
185
+ extra = max(accurate_raw - 0, 0)
186
+ else:
187
+ multiplier = accurate_raw / heuristic
188
+ extra = accurate_raw - heuristic
189
+
190
+ return LanguageTaxBreakdown(
191
+ char_count=len(text),
192
+ cjk_ratio=cjk_char_ratio(text),
193
+ tokens_heuristic=heuristic,
194
+ tokens_accurate=accurate_raw,
195
+ accurate_available=accurate_available,
196
+ tax_multiplier=multiplier,
197
+ extra_tokens=max(extra, 0),
198
+ )
199
+
200
+
201
+ def language_tax_usd(
202
+ extra_tokens: int,
203
+ *,
204
+ input_tokens_per_million: float | None,
205
+ ) -> float:
206
+ """USD attributable to the language tax for one request leg.
207
+
208
+ ``extra_tokens`` is the :attr:`LanguageTaxBreakdown.extra_tokens`
209
+ delta; pricing is the provider's normal input rate. Returns 0.0 for
210
+ a free / unpriced (typically local) provider — mirroring
211
+ :func:`coderouter.cost.compute_cost_for_attempt`'s zero-on-None
212
+ behaviour so callers never special-case local models.
213
+ """
214
+ if not input_tokens_per_million or extra_tokens <= 0:
215
+ return 0.0
216
+ return extra_tokens * (input_tokens_per_million / 1_000_000.0)
217
+
218
+
219
+ def estimate_language_tax_for_request(
220
+ system: Any,
221
+ messages: list[Any],
222
+ *,
223
+ tokenizer_path: str | Path | None = None,
224
+ ) -> LanguageTaxBreakdown:
225
+ """Measure the language tax of a whole Anthropic-shaped request.
226
+
227
+ Convenience wrapper used by the engine's cost-emit path: pulls the
228
+ concatenated request text (system + message text blocks) and runs it
229
+ through :func:`estimate_language_tax`. With no ``tokenizer_path`` the
230
+ multiplier is 1.0 (inert), so calling this on every request is safe
231
+ and cheap — the engine only invokes it when a provider declares a
232
+ local ``tokenizer.json``.
233
+ """
234
+ text = extract_text_from_anthropic_request(system=system, messages=messages)
235
+ return estimate_language_tax(text, tokenizer_path=tokenizer_path)
236
+
237
+
238
+ __all__ = [
239
+ "LanguageTaxBreakdown",
240
+ "cjk_char_ratio",
241
+ "estimate_language_tax",
242
+ "estimate_language_tax_for_request",
243
+ "language_tax_usd",
244
+ ]
coderouter/logging.py CHANGED
@@ -971,6 +971,10 @@ class CacheObservedPayload(TypedDict):
971
971
  streaming: bool
972
972
  cost_usd: float
973
973
  cost_savings_usd: float
974
+ # v2.6 language-tax track (optional; default 0.0 / 1.0 at the emit
975
+ # site keeps pre-v2.6 callers and log consumers working unchanged).
976
+ language_tax_usd: float
977
+ language_tax_multiplier: float
974
978
 
975
979
 
976
980
  def log_cache_observed(
@@ -986,6 +990,8 @@ def log_cache_observed(
986
990
  streaming: bool,
987
991
  cost_usd: float = 0.0,
988
992
  cost_savings_usd: float = 0.0,
993
+ language_tax_usd: float = 0.0,
994
+ language_tax_multiplier: float = 1.0,
989
995
  ) -> None:
990
996
  """Emit a ``cache-observed`` info record with the unified shape.
991
997
 
@@ -1013,6 +1019,8 @@ def log_cache_observed(
1013
1019
  "streaming": streaming,
1014
1020
  "cost_usd": cost_usd,
1015
1021
  "cost_savings_usd": cost_savings_usd,
1022
+ "language_tax_usd": language_tax_usd,
1023
+ "language_tax_multiplier": language_tax_multiplier,
1016
1024
  }
1017
1025
  logger.info("cache-observed", extra=payload)
1018
1026
 
@@ -190,6 +190,13 @@ class MetricsCollector(logging.Handler):
190
190
  self._cost_total_usd_aggregate: float = 0.0
191
191
  self._cost_savings_usd_aggregate: float = 0.0
192
192
 
193
+ # v2.6: per-provider language-tax spend — the USD share of input
194
+ # cost attributable to the CJK over-count vs the char/4 baseline.
195
+ # Zero for English/code workloads and for providers without a
196
+ # configured tokenizer_path. Surfaced alongside cost_total_usd.
197
+ self._language_tax_usd: dict[str, float] = {}
198
+ self._language_tax_usd_aggregate: float = 0.0
199
+
193
200
  # v2.0-F (L1): context budget guard counters. Per-profile counts
194
201
  # of warnings (over warn threshold) and trims (messages removed).
195
202
  # The ``latest_usage_ratio`` dict records the most recent ratio
@@ -388,6 +395,22 @@ class MetricsCollector(logging.Handler):
388
395
  self._cost_savings_usd.get(provider, 0.0) + savings_usd
389
396
  )
390
397
  self._cost_savings_usd_aggregate += savings_usd
398
+
399
+ # v2.6: language-tax spend. Same defensive coercion as the
400
+ # cost fields; defaults to 0.0 for pre-v2.6 log lines and
401
+ # English/code traffic, so the aggregate only moves on
402
+ # CJK-heavy requests against a tokenizer-configured provider.
403
+ lt_usd_raw = extras.get("language_tax_usd", 0.0)
404
+ lt_usd = (
405
+ float(lt_usd_raw)
406
+ if isinstance(lt_usd_raw, int | float)
407
+ else 0.0
408
+ )
409
+ if lt_usd > 0.0:
410
+ self._language_tax_usd[provider] = (
411
+ self._language_tax_usd.get(provider, 0.0) + lt_usd
412
+ )
413
+ self._language_tax_usd_aggregate += lt_usd
391
414
  elif event == "context-budget-warning":
392
415
  # v2.0-F (L1): context usage exceeded the warn threshold.
393
416
  # Track per-profile and aggregate, plus latest ratio gauge.
@@ -522,6 +545,10 @@ class MetricsCollector(logging.Handler):
522
545
  "savings_usd": round(
523
546
  self._cost_savings_usd.get(name, 0.0), 6
524
547
  ),
548
+ # v2.6: per-provider language-tax spend.
549
+ "language_tax_usd": round(
550
+ self._language_tax_usd.get(name, 0.0), 6
551
+ ),
525
552
  },
526
553
  }
527
554
  for name in providers
@@ -589,6 +616,14 @@ class MetricsCollector(logging.Handler):
589
616
  "cost_savings_usd_aggregate": round(
590
617
  self._cost_savings_usd_aggregate, 6
591
618
  ),
619
+ # v2.6: per-provider + aggregate language-tax spend.
620
+ "language_tax_usd": {
621
+ n: round(v, 6)
622
+ for n, v in self._language_tax_usd.items()
623
+ },
624
+ "language_tax_usd_aggregate": round(
625
+ self._language_tax_usd_aggregate, 6
626
+ ),
592
627
  # v2.0-F (L1): context budget guard aggregate counters.
593
628
  "context_budget_warnings_total": self._context_budget_warnings_total,
594
629
  "context_budget_trims_total": self._context_budget_trims_total,
@@ -682,6 +717,13 @@ class MetricsCollector(logging.Handler):
682
717
  self._cost_savings_usd_aggregate += float(
683
718
  state.get("cost_savings_usd_aggregate", 0.0)
684
719
  )
720
+ for k, v in (state.get("language_tax_usd") or {}).items():
721
+ self._language_tax_usd[k] = (
722
+ self._language_tax_usd.get(k, 0.0) + float(v)
723
+ )
724
+ self._language_tax_usd_aggregate += float(
725
+ state.get("language_tax_usd_aggregate", 0.0)
726
+ )
685
727
  self._chain_paid_gate_blocked_total += int(
686
728
  state.get("chain_paid_gate_blocked_total", 0)
687
729
  )
@@ -737,6 +779,9 @@ class MetricsCollector(logging.Handler):
737
779
  self._cost_savings_usd.clear()
738
780
  self._cost_total_usd_aggregate = 0.0
739
781
  self._cost_savings_usd_aggregate = 0.0
782
+ # v2.6
783
+ self._language_tax_usd.clear()
784
+ self._language_tax_usd_aggregate = 0.0
740
785
  # v2.0-H (L6)
741
786
  self._partial_stitch_surfaced_total = 0
742
787
  # v2.0-I
@@ -39,6 +39,7 @@ import re
39
39
  from typing import TYPE_CHECKING, Any
40
40
 
41
41
  from coderouter.config.schemas import AutoRouterConfig, AutoRouteRule, RuleMatcher
42
+ from coderouter.language_tax import cjk_char_ratio
42
43
  from coderouter.token_estimation import estimate_tokens_from_body as _estimate_total_tokens
43
44
 
44
45
  if TYPE_CHECKING:
@@ -181,6 +182,12 @@ def _match_rule(
181
182
  return message is not None and _has_image(message)
182
183
  if m.code_fence_ratio_min is not None:
183
184
  return _code_fence_ratio(text) >= m.code_fence_ratio_min
185
+ if m.cjk_ratio_min is not None:
186
+ # v2.6: language-tax routing. CJK ratio of the latest user
187
+ # message — a per-turn property like code_fence_ratio_min, so it
188
+ # reuses ``text`` (latest user message) rather than walking the
189
+ # whole request. Steers CJK-heavy turns to a local, tax-free model.
190
+ return cjk_char_ratio(text) >= m.cjk_ratio_min
184
191
  if m.content_contains is not None:
185
192
  return m.content_contains in text
186
193
  if m.content_regex is not None:
@@ -61,6 +61,10 @@ from coderouter.guards.tool_loop import (
61
61
  detect_tool_loop,
62
62
  inject_loop_break_hint,
63
63
  )
64
+ from coderouter.language_tax import (
65
+ LanguageTaxBreakdown,
66
+ estimate_language_tax_for_request,
67
+ )
64
68
  from coderouter.logging import (
65
69
  classify_cache_outcome,
66
70
  get_logger,
@@ -372,6 +376,7 @@ def _emit_cache_observed(
372
376
  streaming: bool,
373
377
  provider_config: ProviderConfig | None = None,
374
378
  budget: BudgetTracker | None = None,
379
+ language_tax: LanguageTaxBreakdown | None = None,
375
380
  ) -> None:
376
381
  """Extract usage / cache fields from an AnthropicResponse and log them.
377
382
 
@@ -432,6 +437,7 @@ def _emit_cache_observed(
432
437
  output_tokens=usage.output_tokens,
433
438
  cache_read_input_tokens=cache_read,
434
439
  cache_creation_input_tokens=cache_creation,
440
+ language_tax=language_tax,
435
441
  )
436
442
 
437
443
  # v1.10: feed the per-provider monthly running total. The
@@ -452,6 +458,8 @@ def _emit_cache_observed(
452
458
  streaming=streaming,
453
459
  cost_usd=cost.total_usd,
454
460
  cost_savings_usd=cost.savings_usd,
461
+ language_tax_usd=cost.language_tax_usd,
462
+ language_tax_multiplier=cost.language_tax_multiplier,
455
463
  )
456
464
 
457
465
 
@@ -629,6 +637,7 @@ def _emit_cache_observed_streaming(
629
637
  request_had_cache_control: bool,
630
638
  provider_config: ProviderConfig | None = None,
631
639
  budget: BudgetTracker | None = None,
640
+ language_tax: LanguageTaxBreakdown | None = None,
632
641
  ) -> None:
633
642
  """Streaming counterpart of :func:`_emit_cache_observed` (v1.9-B2).
634
643
 
@@ -661,6 +670,7 @@ def _emit_cache_observed_streaming(
661
670
  output_tokens=output_tokens,
662
671
  cache_read_input_tokens=cache_read,
663
672
  cache_creation_input_tokens=cache_creation,
673
+ language_tax=language_tax,
664
674
  )
665
675
 
666
676
  # v1.10: same monthly-budget bookkeeping as the non-streaming
@@ -681,6 +691,8 @@ def _emit_cache_observed_streaming(
681
691
  streaming=True,
682
692
  cost_usd=cost.total_usd,
683
693
  cost_savings_usd=cost.savings_usd,
694
+ language_tax_usd=cost.language_tax_usd,
695
+ language_tax_multiplier=cost.language_tax_multiplier,
684
696
  )
685
697
 
686
698
 
@@ -2126,6 +2138,14 @@ class FallbackEngine:
2126
2138
  # outcome=unknown.
2127
2139
  # v1.9-D: also enrich the log line with per-attempt
2128
2140
  # USD cost + cache savings via the provider's CostConfig.
2141
+ # v2.6 language tax: only measured when the provider declares
2142
+ # a local tokenizer.json (else inert — no extra work, mult=1.0).
2143
+ _lt = None
2144
+ _tok = getattr(adapter.config, "tokenizer_path", None)
2145
+ if _tok:
2146
+ _lt = estimate_language_tax_for_request(
2147
+ request.system, request.messages, tokenizer_path=_tok
2148
+ )
2129
2149
  _emit_cache_observed(
2130
2150
  resp,
2131
2151
  provider=adapter.name,
@@ -2133,6 +2153,7 @@ class FallbackEngine:
2133
2153
  streaming=False,
2134
2154
  provider_config=adapter.config,
2135
2155
  budget=self._budget,
2156
+ language_tax=_lt,
2136
2157
  )
2137
2158
  # v2.3.0: observer plugin fanout — fire-and-forget, never
2138
2159
  # blocks the engine response. Latency in ms uses the same
@@ -2359,12 +2380,21 @@ class FallbackEngine:
2359
2380
  # both go through ``classify_cache_outcome`` /
2360
2381
  # ``compute_cost_for_attempt`` for symmetric outcome and
2361
2382
  # cost reporting.
2383
+ # v2.6 language tax: same opt-in measurement as the
2384
+ # non-streaming sibling (inert unless tokenizer_path is set).
2385
+ _lt_s = None
2386
+ _tok_s = getattr(adapter.config, "tokenizer_path", None)
2387
+ if _tok_s:
2388
+ _lt_s = estimate_language_tax_for_request(
2389
+ request.system, request.messages, tokenizer_path=_tok_s
2390
+ )
2362
2391
  _emit_cache_observed_streaming(
2363
2392
  acc,
2364
2393
  provider=adapter.name,
2365
2394
  request_had_cache_control=request_had_cache_control,
2366
2395
  provider_config=adapter.config,
2367
2396
  budget=self._budget,
2397
+ language_tax=_lt_s,
2368
2398
  )
2369
2399
  # v2.3.0: streaming observer fanout fires once, after the
2370
2400
  # SSE terminates successfully. We hand the accumulator's
@@ -91,6 +91,21 @@ def _count_system_chars(system: Any) -> int:
91
91
  return 0
92
92
 
93
93
 
94
+ def _extract_system_text(system: Any) -> str:
95
+ """Concatenate the system prompt text (str or list-of-blocks form)."""
96
+ if isinstance(system, str):
97
+ return system
98
+ if isinstance(system, list):
99
+ pieces: list[str] = []
100
+ for block in system:
101
+ if isinstance(block, dict):
102
+ text = block.get("text")
103
+ if isinstance(text, str):
104
+ pieces.append(text)
105
+ return "\n".join(pieces)
106
+ return ""
107
+
108
+
94
109
  # ---------------------------------------------------------------------------
95
110
  # Public API
96
111
  # ---------------------------------------------------------------------------
@@ -153,9 +168,41 @@ def estimate_tokens_from_anthropic_request(
153
168
  return total_chars // CHARS_PER_TOKEN_HEURISTIC
154
169
 
155
170
 
171
+ def extract_text_from_anthropic_request(
172
+ *,
173
+ system: Any,
174
+ messages: list[Any],
175
+ ) -> str:
176
+ """Concatenate all text in an Anthropic-shaped request.
177
+
178
+ Mirrors :func:`estimate_tokens_from_anthropic_request` but returns
179
+ the raw text (system prompt + every message's text blocks) instead
180
+ of a char/4 count. Used by :mod:`coderouter.language_tax` to feed an
181
+ accurate tokenizer for language-tax measurement. Non-text blocks
182
+ (images / tool_use / tool_result) contribute nothing — same rule the
183
+ char/4 estimator uses.
184
+ """
185
+ pieces: list[str] = []
186
+ sys_text = _extract_system_text(system)
187
+ if sys_text:
188
+ pieces.append(sys_text)
189
+ for msg in messages:
190
+ if hasattr(msg, "content"):
191
+ content = msg.content
192
+ elif isinstance(msg, dict):
193
+ content = msg.get("content")
194
+ else:
195
+ continue
196
+ text = _extract_text_from_content(content)
197
+ if text:
198
+ pieces.append(text)
199
+ return "\n".join(pieces)
200
+
201
+
156
202
  __all__ = [
157
203
  "CHARS_PER_TOKEN_HEURISTIC",
158
204
  "DEFAULT_MAX_CONTEXT_TOKENS",
159
205
  "estimate_tokens_from_anthropic_request",
160
206
  "estimate_tokens_from_body",
207
+ "extract_text_from_anthropic_request",
161
208
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coderouter-cli
3
- Version: 2.5.5
3
+ Version: 2.6.0
4
4
  Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
5
5
  Project-URL: Homepage, https://github.com/zephel01/CodeRouter
6
6
  Project-URL: Repository, https://github.com/zephel01/CodeRouter
@@ -2,16 +2,17 @@ coderouter/__init__.py,sha256=ghdjPrLtnRzY8fyQ4CJZI1UJKADyNTLtA3G7se8H7Ns,696
2
2
  coderouter/__main__.py,sha256=-LCgxJnvgUV240HjQKv7ly-mn2NuKHpC4nCpvTHjeSU,130
3
3
  coderouter/cli.py,sha256=KE49IACJVw692H6dlfu1tAah1jQgbwH92F4lCkhRk6U,28168
4
4
  coderouter/cli_stats.py,sha256=CCjzc1G4hTRHZ2gG1XhxhDpUkJnnl3NXbcbp1T18jpg,29894
5
- coderouter/cost.py,sha256=jf70XTTjEfWddHkOohErqSo5TfmIG42zQpBT_5faPNA,5881
5
+ coderouter/cost.py,sha256=32h6uzb4nxh2eA5d2Hn3kD9yJbtis6CFDAbeIy5KRkM,7431
6
6
  coderouter/doctor.py,sha256=2luNk6BHSRvpQStJnHcqzNvNi-SKdOuKV0WZdorZhVk,82854
7
7
  coderouter/doctor_apply.py,sha256=r_J6xbu5-HivofPNriw4_vjNYs_VRs7GsGTS0oMEX10,24209
8
8
  coderouter/env_security.py,sha256=FEBZnXfJ0xE39kmMMn39zk0W_DRRnmcB_REmP9f4xWo,14796
9
9
  coderouter/errors.py,sha256=Xmq67lheyw8iv3Ox39jh2c4tvNI5RcUR4QkoxVDN6l4,1130
10
10
  coderouter/gguf_introspect.py,sha256=FZO14STLSp94Rfo5AInGwYUOpfjiXOW6CH5RiczTWDE,9514
11
11
  coderouter/hardware.py,sha256=gn3_9qbVcGRR81yKMn1lJE_8-YDRau0LxIH_M-f7pxE,8356
12
- coderouter/logging.py,sha256=U7QiGRaoQXTSGijc-jV9TebnbbzrD-snfnoZy73Nvwo,52737
12
+ coderouter/language_tax.py,sha256=LTbE3tIfoJuV2O3T0NixRKhzq_dEOTUuPEerJv2q9uk,9360
13
+ coderouter/logging.py,sha256=63_aaXuZwk_jboDGUrfFjZV65SGvcqhW3dcj12AqUcA,53126
13
14
  coderouter/output_filters.py,sha256=0ry_rPiS_kC-FnHgaNVP6v7e6Al2djxzu9vBzZ8kEkE,25314
14
- coderouter/token_estimation.py,sha256=1Ai1uT68hahpyr4LBhNyVRGq7y4yXItd6J4k5ApGX7M,5995
15
+ coderouter/token_estimation.py,sha256=iz22vZEEW2P7uKLB2pYvPNpIbZGbgXRO5MtfkS_-9Sk,7531
15
16
  coderouter/token_estimation_accurate.py,sha256=GTfzrBVnvAGjeVzmzAeUdOYZvWZKLAxcxPpFiJGlzjk,4609
16
17
  coderouter/adapters/__init__.py,sha256=7dIDSZ-FE_0iSqLSDc_lK1idRdLTKcM2hP9tCJipgPI,463
17
18
  coderouter/adapters/anthropic_native.py,sha256=qfdjxy4YyLt-0Fj7hUYn1oi1SFjEEbSvpaRBUC2hMf4,21903
@@ -22,7 +23,7 @@ coderouter/config/__init__.py,sha256=FODEn74fN-qZnt4INPSHswqhOlEgpL6-_onxsitSx8g
22
23
  coderouter/config/capability_registry.py,sha256=QRJLlzqKTdb2ndpWAHGZQJq23wczEfzSVanKnoqJrg4,15815
23
24
  coderouter/config/env_file.py,sha256=CoMK27fuAXm-NtoLzXb8yN2E-wDFjHQuFwiIlmgTBQw,10356
24
25
  coderouter/config/loader.py,sha256=FUEe8m4Tnmj_aul0vSctD8vKvNW-oLRoMRbTpSKqSmc,4077
25
- coderouter/config/schemas.py,sha256=XsOGooItlHSXIfkE6LJbZY6vOo3uRkxUoTOt03mQARI,60423
26
+ coderouter/config/schemas.py,sha256=ROFU3TdH4WanK63US6PRCLAMRUZBKFNlIoguXTYo6L0,62026
26
27
  coderouter/data/__init__.py,sha256=uNyfD9jaCvTWsBAWtaw1Fr25OSxzv3psGMfBjT1z0Cc,328
27
28
  coderouter/data/model-capabilities.yaml,sha256=S9jt6SC6-3s2-icZ_n-a14iEMnc2yB1C2R6q-N_tZWQ,19309
28
29
  coderouter/guards/__init__.py,sha256=5qliYBqygvVPneej7nx0uSjxDKsz7t8VzvrDgVBJlvU,1170
@@ -39,12 +40,12 @@ coderouter/guards/tool_loop.py,sha256=EzeMcmU7BLeTW2jsRVevU81l5rhWcn1oUr7EpzgXjV
39
40
  coderouter/ingress/__init__.py,sha256=WQsCH2CGJCAhy0mS6GSEdeYZRkkQu2OHDsP4CJWTLug,155
40
41
  coderouter/ingress/anthropic_routes.py,sha256=It2f7XGe3fgKQX01J2F5JOCoZr96t_Tx_kY2om99MVo,16894
41
42
  coderouter/ingress/app.py,sha256=PcuTvUFNjr04EbsUOu8qdyKTdBzxkIJYB4xpz8dFfMo,12635
42
- coderouter/ingress/dashboard_routes.py,sha256=rscoj89weHTfc8QmYk-fof-7062rhKFHVHRA8cDImDI,21931
43
+ coderouter/ingress/dashboard_routes.py,sha256=tEIayMHxCzlmpnLyKHgpqrE4W24DTJM97ewTlYvkKqI,24238
43
44
  coderouter/ingress/launcher_routes.py,sha256=Jh-E6qFmHnr7ON4W6QanafxQIoojT4F034mybLvhTyQ,47548
44
45
  coderouter/ingress/metrics_routes.py,sha256=M22dwOGn24P05Ge4W3c7d7mYytSGWjIR-pPSPOAiHJY,3965
45
46
  coderouter/ingress/openai_routes.py,sha256=Zw1efPw9DI6GgV8ZcLrzS6Cda0KLrFkKn2GBZWSe6Vo,6322
46
47
  coderouter/metrics/__init__.py,sha256=7Es351DPS7yLM0yVF_F0eesmiD83n7Zzhie44chht38,1465
47
- coderouter/metrics/collector.py,sha256=Q0_CY0orX8_i0EICBME5sYW2RqL2VD4SpNs8qfCnBM0,47432
48
+ coderouter/metrics/collector.py,sha256=9lKnaFpdlu8R9mRUeyAeJWXR1urRCKt_6sUFn_9ybss,49657
48
49
  coderouter/metrics/prometheus.py,sha256=YRqyT931s40zVkIj07D-M2UNfDhIEElVFRz3izdJcnQ,24419
49
50
  coderouter/plugins/__init__.py,sha256=76hMLe5dV_ilripHXzWn3HSYoIALjzlw4EJVyI-GyIM,1974
50
51
  coderouter/plugins/base.py,sha256=n9hsck2NCSqi6oeHIumKC5zhQ8JGwCXUz7J5AZQCQss,5772
@@ -52,10 +53,10 @@ coderouter/plugins/loader.py,sha256=xAIf6bIuth0QXCzwxO_ja6aSUlLzIqZNbrbQNJDgSE8,
52
53
  coderouter/plugins/registry.py,sha256=Tx0QHJHozZ5LTUliGylBdNVcdzHTBV0nedCUwGlbLMM,3236
53
54
  coderouter/routing/__init__.py,sha256=g2vhutbozRx5QBThReqwPN3imk5qXdpDiaogILd3IRc,257
54
55
  coderouter/routing/adaptive.py,sha256=G2o377twGSjbUh65wiIFx6klnpFGjsD_nI3oDvcBwhY,21257
55
- coderouter/routing/auto_router.py,sha256=4_sQR0ztSED9FgQSvQqgqSiydyQVY_qOSRvwyZ5BfRc,12909
56
+ coderouter/routing/auto_router.py,sha256=y4v0c8u5F9f98Vmhx1vRcKPiOgAvpzbFqr6TIh058h0,13341
56
57
  coderouter/routing/budget.py,sha256=PblmVKJGs_BwNa9uDHAA8hmZ4XIVKv38mHAeU0V3OMs,8451
57
58
  coderouter/routing/capability.py,sha256=DCDmiQ-78dkYonCM1WQBCMf6e6XI6VIv_cnuz9hdWT0,18443
58
- coderouter/routing/fallback.py,sha256=P3f6Yna1EGnLAT-ZS5ADrrZ-qRWc-M5xvwEuan4rmcs,104568
59
+ coderouter/routing/fallback.py,sha256=Tu7vNDvGDD9EeGDEvAVZfJB2KmXM3ZulZbYlB9F-zdM,105962
59
60
  coderouter/state/__init__.py,sha256=XoGcPmmBQSiZWML2S0juSveQ78xfhtdeCliNnVyzu7E,1088
60
61
  coderouter/state/audit_log.py,sha256=JwGd0OkkDlkh0Fdc6SmnuyViwKzEaFA7Ux_VqHzakWE,8358
61
62
  coderouter/state/replay.py,sha256=Z_YHKroTKZdrL8qObFxcoLOAQWWXZvXFdLfxzvBhEJg,11230
@@ -66,8 +67,8 @@ coderouter/translation/__init__.py,sha256=PYXN7XVEwpG1uC8RLy6fvnGbzEZhhrEuUapH8I
66
67
  coderouter/translation/anthropic.py,sha256=aZkcYH4x82b0x7efJgJb9RWn9Hbyc9pEOthXe4vjUdU,11113
67
68
  coderouter/translation/convert.py,sha256=-qyzFzmmr9hhQV6_Sg75kJnvCZvHe3n7vRdaZtk_JqQ,47269
68
69
  coderouter/translation/tool_repair.py,sha256=Ok2PF947Liegc5oaytfptv5MWMkpfJYQie-zdP1y3cY,9946
69
- coderouter_cli-2.5.5.dist-info/METADATA,sha256=1A8zDyh8_kEIFafq1l3uKVyJikkJ8QOmwOlaEaSz_qI,11674
70
- coderouter_cli-2.5.5.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
71
- coderouter_cli-2.5.5.dist-info/entry_points.txt,sha256=-dnLfD1YZ2WjH2zSdNCvlO65wYltM9bsHt9Fhg3yGss,51
72
- coderouter_cli-2.5.5.dist-info/licenses/LICENSE,sha256=wkEzoR86jFw33jvfOHjULqmkGEfxTFMgMaJnpR8mPRw,1065
73
- coderouter_cli-2.5.5.dist-info/RECORD,,
70
+ coderouter_cli-2.6.0.dist-info/METADATA,sha256=us2o2_EtIlzd2EjQqAqtKIX1ocpAD3YcaDiZKOG6ktE,11674
71
+ coderouter_cli-2.6.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
72
+ coderouter_cli-2.6.0.dist-info/entry_points.txt,sha256=-dnLfD1YZ2WjH2zSdNCvlO65wYltM9bsHt9Fhg3yGss,51
73
+ coderouter_cli-2.6.0.dist-info/licenses/LICENSE,sha256=wkEzoR86jFw33jvfOHjULqmkGEfxTFMgMaJnpR8mPRw,1065
74
+ coderouter_cli-2.6.0.dist-info/RECORD,,