coderouter-cli 2.5.3__py3-none-any.whl → 2.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -43,6 +43,7 @@ Reference: plan.md §10.2 "出力クリーニング" / docs/retrospectives/v0.7.
43
43
 
44
44
  from __future__ import annotations
45
45
 
46
+ import re
46
47
  from typing import Protocol
47
48
 
48
49
  __all__ = [
@@ -50,6 +51,7 @@ __all__ = [
50
51
  "KNOWN_FILTERS",
51
52
  "OutputFilter",
52
53
  "OutputFilterChain",
54
+ "RepairByteFallbackFilter",
53
55
  "StripStopMarkersFilter",
54
56
  "StripThinkingFilter",
55
57
  "StripToolCallXmlFilter",
@@ -382,6 +384,151 @@ class StripToolCallXmlFilter:
382
384
  return "".join(out_parts)
383
385
 
384
386
 
387
+ # ---------------------------------------------------------------------------
388
+ # repair_byte_fallback (v2.x)
389
+ # ---------------------------------------------------------------------------
390
+
391
+
392
+ # A complete byte-fallback token: ``<0x`` + exactly two hex digits + ``>``.
393
+ _BYTE_RE = re.compile(r"<0x([0-9A-Fa-f]{2})>")
394
+
395
+ # The whole remaining buffer is a *proper prefix* of some ``<0xHH>`` token,
396
+ # i.e. it could still complete (and continue a run) on the next feed:
397
+ # ``<`` / ``<0`` / ``<0x`` / ``<0xH`` / ``<0xHH`` (closing ``>`` not yet seen).
398
+ _PREFIX_RE = re.compile(r"<(0(x[0-9A-Fa-f]{0,2})?)?")
399
+
400
+ _BYTE_TOKEN_START = "<0x"
401
+
402
+
403
+ def _decode_byte_run(buf: bytes) -> str:
404
+ """Decode a run of fallback bytes to text, losslessly.
405
+
406
+ Decodes the maximal valid UTF-8 prefix; any byte that cannot start or
407
+ continue a valid sequence is re-emitted as its ``<0xHH>`` token and
408
+ decoding resumes after it. So ``b"\\xe3\\x80\\x80"`` -> ``" "`` while a
409
+ stray ``b"\\xff"`` round-trips to ``"<0xFF>"`` — we never make the stream
410
+ worse than llama.cpp already did.
411
+ """
412
+ parts: list[str] = []
413
+ i = 0
414
+ n = len(buf)
415
+ while i < n:
416
+ try:
417
+ parts.append(buf[i:].decode("utf-8"))
418
+ break
419
+ except UnicodeDecodeError as exc:
420
+ good_end = i + exc.start
421
+ if good_end > i:
422
+ parts.append(buf[i:good_end].decode("utf-8"))
423
+ parts.append(f"<0x{buf[good_end]:02X}>")
424
+ i = good_end + 1
425
+ return "".join(parts)
426
+
427
+
428
+ class RepairByteFallbackFilter:
429
+ """Reassemble llama.cpp ``<0xNN>`` byte-fallback leaks into UTF-8 text.
430
+
431
+ Ollama 0.30 unified its GGUF runtime onto llama.cpp
432
+ (``ollama/ollama#16031``). For gemma4 the detokenizer changed, and
433
+ multi-byte characters it cannot assemble now leak as llama.cpp's
434
+ byte-fallback notation::
435
+
436
+ full-width space `` `` -> ``<0xE3><0x80><0x80>``
437
+ rare kanji ``躙`` -> ``<0xE8><0xBA><0x99>``
438
+
439
+ These corrupt Japanese prose AND tool-call JSON arguments (a stray
440
+ ``<0xNN>`` inside an argument string breaks JSON parsing). This filter
441
+ reassembles runs of consecutive ``<0xNN>`` tokens back into UTF-8.
442
+
443
+ Stateful across ``feed`` calls so a token split across SSE deltas
444
+ (``<0x`` | ``E3>``) and a multi-byte run split across deltas
445
+ (``<0xE3>`` | ``<0x80><0x80>``) both reassemble correctly. A pending byte
446
+ run is only flushed once we are certain it has ended (confirmed normal
447
+ text follows, or ``eof``) — never at a bare chunk boundary, where the run
448
+ might continue in the next delta. Bytes that cannot form valid UTF-8 are
449
+ re-emitted verbatim (lossless).
450
+
451
+ ``modified`` flips True the first time any ``<0xNN>`` token is consumed —
452
+ the adapter uses it to gate the log-once "output-filter-applied" line.
453
+
454
+ Ordering note: place this BEFORE ``tool_repair`` / the tool-call XML
455
+ strip so byte-fallback inside tool-call argument strings is restored
456
+ before JSON extraction.
457
+ """
458
+
459
+ name = "repair_byte_fallback"
460
+
461
+ def __init__(self) -> None:
462
+ """Initialize per-request buffer, pending byte run and state."""
463
+ self.modified: bool = False
464
+ self._buffer: str = ""
465
+ self._pending = bytearray()
466
+
467
+ def _flush_pending(self, out: list[str]) -> None:
468
+ """Decode and emit the accumulated byte run, then clear it."""
469
+ if self._pending:
470
+ out.append(_decode_byte_run(bytes(self._pending)))
471
+ self._pending.clear()
472
+
473
+ def feed(self, text: str, *, eof: bool = False) -> str:
474
+ """Consume ``text``; return the portion safe to emit now."""
475
+ self._buffer += text
476
+ out: list[str] = []
477
+
478
+ while self._buffer:
479
+ m = _BYTE_RE.match(self._buffer)
480
+ if m is not None:
481
+ # Complete byte token at position 0 — extend the run.
482
+ self._pending.append(int(m.group(1), 16))
483
+ self._buffer = self._buffer[m.end() :]
484
+ self.modified = True
485
+ continue
486
+
487
+ idx = self._buffer.find(_BYTE_TOKEN_START)
488
+ if idx == -1:
489
+ # No complete/started token in the buffer. Hold a trailing
490
+ # partial of ``<0x`` (it may complete — and CONTINUE the run —
491
+ # on the next feed); treat anything before it as confirmed
492
+ # normal text that ends the run.
493
+ hold = (
494
+ 0 if eof else _max_suffix_overlap(self._buffer, _BYTE_TOKEN_START)
495
+ )
496
+ safe = self._buffer[:-hold] if hold else self._buffer
497
+ if safe:
498
+ self._flush_pending(out)
499
+ out.append(safe)
500
+ self._buffer = self._buffer[len(safe) :]
501
+ # else: whole buffer is a token-start prefix; keep pending
502
+ # (the run might continue) and wait for more input.
503
+ break
504
+
505
+ if idx > 0:
506
+ # Normal text precedes the next token start — run ended.
507
+ self._flush_pending(out)
508
+ out.append(self._buffer[:idx])
509
+ self._buffer = self._buffer[idx:]
510
+ continue
511
+
512
+ # idx == 0: buffer starts with ``<0x`` but is not a complete token.
513
+ if not eof and _PREFIX_RE.fullmatch(self._buffer):
514
+ # Could still complete next feed — hold token AND pending run.
515
+ break
516
+
517
+ # Malformed ``<0x..`` (non-hex, or stuck at eof). The ``<`` is
518
+ # ordinary text; the run (if any) has ended.
519
+ self._flush_pending(out)
520
+ out.append("<")
521
+ self._buffer = self._buffer[1:]
522
+
523
+ if eof:
524
+ self._flush_pending(out)
525
+ if self._buffer:
526
+ out.append(self._buffer)
527
+ self._buffer = ""
528
+
529
+ return "".join(out)
530
+
531
+
385
532
  # ---------------------------------------------------------------------------
386
533
  # Registry + chain
387
534
  # ---------------------------------------------------------------------------
@@ -391,6 +538,7 @@ KNOWN_FILTERS: dict[str, type[OutputFilter]] = {
391
538
  StripThinkingFilter.name: StripThinkingFilter,
392
539
  StripStopMarkersFilter.name: StripStopMarkersFilter,
393
540
  StripToolCallXmlFilter.name: StripToolCallXmlFilter,
541
+ RepairByteFallbackFilter.name: RepairByteFallbackFilter,
394
542
  }
395
543
  """Registry of string-name → filter class.
396
544
 
@@ -10,9 +10,12 @@ through unchanged if a client sends them.
10
10
 
11
11
  from __future__ import annotations
12
12
 
13
+ import logging
13
14
  from typing import Any, Literal
14
15
 
15
- from pydantic import BaseModel, ConfigDict, Field
16
+ from pydantic import BaseModel, ConfigDict, Field, model_validator
17
+
18
+ logger = logging.getLogger(__name__)
16
19
 
17
20
  # ============================================================
18
21
  # Content blocks
@@ -105,6 +108,113 @@ class AnthropicTool(BaseModel):
105
108
  input_schema: dict[str, Any] = Field(default_factory=dict)
106
109
 
107
110
 
111
+ # ============================================================
112
+ # Role normalization (Claude Code CLI >= 2.1.154 workaround)
113
+ # ============================================================
114
+
115
+ _SPEC_MESSAGE_ROLES = frozenset({"user", "assistant"})
116
+
117
+
118
+ def _content_as_text(content: Any) -> str:
119
+ """Best-effort plain-text extraction from a message ``content`` field.
120
+
121
+ Strings pass through; block lists contribute their ``text`` blocks
122
+ joined with newlines; anything else yields "".
123
+ """
124
+ if isinstance(content, str):
125
+ return content
126
+ if isinstance(content, list):
127
+ parts: list[str] = []
128
+ for block in content:
129
+ if isinstance(block, dict) and block.get("type") == "text":
130
+ parts.append(str(block.get("text", "")))
131
+ return "\n".join(p for p in parts if p)
132
+ return ""
133
+
134
+
135
+ def normalize_message_roles(payload: dict[str, Any]) -> dict[str, Any]:
136
+ """Normalize non-spec roles inside ``messages`` before validation.
137
+
138
+ Claude Code CLI >= 2.1.154 has a regression where it emits messages
139
+ with ``role: "system"`` (and reportedly ``ctx`` / ``msg``) inside the
140
+ ``messages`` array. The Anthropic Messages API spec allows only
141
+ ``user`` / ``assistant`` there, so without this hop those requests
142
+ die in validation with "Input should be 'user' or 'assistant'"
143
+ (see anthropics/claude-code#63469, vllm-project/vllm#44000).
144
+
145
+ Policy:
146
+ - ``role: "system"`` → text content merged into the top-level
147
+ ``system`` field (appended after any existing system prompt;
148
+ same join rule as ``convert.to_anthropic_request``).
149
+ - any other non-spec role (``ctx``, ``msg``, ...) → coerced to
150
+ ``user`` so conversation position is preserved. Anthropic
151
+ merges consecutive same-role turns, so this is safe.
152
+ - messages whose salvaged content is empty are dropped entirely
153
+ (Anthropic rejects empty turns).
154
+
155
+ Returns a shallow-copied payload; the caller's dict is not mutated.
156
+ Non-dict message entries (already-validated models) pass through.
157
+ """
158
+ messages = payload.get("messages")
159
+ if not isinstance(messages, list):
160
+ return payload
161
+
162
+ system_texts: list[str] = []
163
+ messages_out: list[Any] = []
164
+ coerced_roles: list[str] = []
165
+
166
+ for msg in messages:
167
+ if not isinstance(msg, dict):
168
+ # Already a validated AnthropicMessage (internal construction
169
+ # path, e.g. convert.to_anthropic_request) — spec roles only.
170
+ messages_out.append(msg)
171
+ continue
172
+ role = msg.get("role")
173
+ if role in _SPEC_MESSAGE_ROLES:
174
+ messages_out.append(msg)
175
+ continue
176
+ if role == "system":
177
+ text = _content_as_text(msg.get("content"))
178
+ if text:
179
+ system_texts.append(text)
180
+ coerced_roles.append("system")
181
+ continue
182
+ # Unknown role (ctx / msg / future surprises): keep its position
183
+ # in the conversation as a user turn; drop if nothing salvageable.
184
+ text = _content_as_text(msg.get("content"))
185
+ coerced_roles.append(str(role))
186
+ if text:
187
+ messages_out.append({"role": "user", "content": text})
188
+
189
+ if not coerced_roles:
190
+ return payload
191
+
192
+ out = dict(payload)
193
+ out["messages"] = messages_out
194
+
195
+ if system_texts:
196
+ joined = "\n".join(system_texts)
197
+ existing = out.get("system")
198
+ if existing is None:
199
+ out["system"] = joined
200
+ elif isinstance(existing, str):
201
+ out["system"] = f"{existing}\n{joined}" if existing else joined
202
+ elif isinstance(existing, list):
203
+ out["system"] = [*existing, {"type": "text", "text": joined}]
204
+ else: # unexpected shape — don't lose the client's value
205
+ out["system"] = existing
206
+
207
+ logger.warning(
208
+ "normalized-nonspec-message-roles",
209
+ extra={
210
+ "roles": coerced_roles,
211
+ "system_merged": bool(system_texts),
212
+ "hint": "client is likely Claude Code CLI >= 2.1.154 (known regression)",
213
+ },
214
+ )
215
+ return out
216
+
217
+
108
218
  # ============================================================
109
219
  # Request
110
220
  # ============================================================
@@ -147,6 +257,19 @@ class AnthropicRequest(BaseModel):
147
257
  # `thinking` beyond what the default minor version accepts.
148
258
  anthropic_beta: str | None = Field(default=None, exclude=True)
149
259
 
260
+ @model_validator(mode="before")
261
+ @classmethod
262
+ def _normalize_roles(cls, data: Any) -> Any:
263
+ """Claude Code >= 2.1.154 sends system/ctx/msg roles in messages.
264
+
265
+ Normalize them before field validation so the request doesn't
266
+ 422 at ingress (and doesn't 400 upstream at api.anthropic.com
267
+ via the native adapter). See ``normalize_message_roles``.
268
+ """
269
+ if isinstance(data, dict):
270
+ return normalize_message_roles(data)
271
+ return data
272
+
150
273
 
151
274
  # ============================================================
152
275
  # Response
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coderouter-cli
3
- Version: 2.5.3
3
+ Version: 2.5.5
4
4
  Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
5
5
  Project-URL: Homepage, https://github.com/zephel01/CodeRouter
6
6
  Project-URL: Repository, https://github.com/zephel01/CodeRouter
@@ -10,7 +10,7 @@ coderouter/errors.py,sha256=Xmq67lheyw8iv3Ox39jh2c4tvNI5RcUR4QkoxVDN6l4,1130
10
10
  coderouter/gguf_introspect.py,sha256=FZO14STLSp94Rfo5AInGwYUOpfjiXOW6CH5RiczTWDE,9514
11
11
  coderouter/hardware.py,sha256=gn3_9qbVcGRR81yKMn1lJE_8-YDRau0LxIH_M-f7pxE,8356
12
12
  coderouter/logging.py,sha256=U7QiGRaoQXTSGijc-jV9TebnbbzrD-snfnoZy73Nvwo,52737
13
- coderouter/output_filters.py,sha256=LOOh68Kcn2LFDy1wPFynA6O_HGazV756q_79Z0_4Jww,19350
13
+ coderouter/output_filters.py,sha256=0ry_rPiS_kC-FnHgaNVP6v7e6Al2djxzu9vBzZ8kEkE,25314
14
14
  coderouter/token_estimation.py,sha256=1Ai1uT68hahpyr4LBhNyVRGq7y4yXItd6J4k5ApGX7M,5995
15
15
  coderouter/token_estimation_accurate.py,sha256=GTfzrBVnvAGjeVzmzAeUdOYZvWZKLAxcxPpFiJGlzjk,4609
16
16
  coderouter/adapters/__init__.py,sha256=7dIDSZ-FE_0iSqLSDc_lK1idRdLTKcM2hP9tCJipgPI,463
@@ -63,11 +63,11 @@ coderouter/state/request_log.py,sha256=bR814sOn--U_sKVtbezwS3bkZaNt4FGnboX75_2LL
63
63
  coderouter/state/store.py,sha256=h-rsMJq8GILsOfCP94nI40cuHaj4Vqycsm9UNN77REI,7445
64
64
  coderouter/state/suggest_rules.py,sha256=FvdhEvao5NvdKp9zs8AkcoFKHY4yqqXY2HekvSjpDFA,16670
65
65
  coderouter/translation/__init__.py,sha256=PYXN7XVEwpG1uC8RLy6fvnGbzEZhhrEuUapH8IYOtG8,1788
66
- coderouter/translation/anthropic.py,sha256=JpvIWNXHUPVqOGvps7o_6ZADhXuJuvpU7RdMqQFtwwM,6421
66
+ coderouter/translation/anthropic.py,sha256=aZkcYH4x82b0x7efJgJb9RWn9Hbyc9pEOthXe4vjUdU,11113
67
67
  coderouter/translation/convert.py,sha256=-qyzFzmmr9hhQV6_Sg75kJnvCZvHe3n7vRdaZtk_JqQ,47269
68
68
  coderouter/translation/tool_repair.py,sha256=Ok2PF947Liegc5oaytfptv5MWMkpfJYQie-zdP1y3cY,9946
69
- coderouter_cli-2.5.3.dist-info/METADATA,sha256=3q3FPL44mGgfySDAi_5gEW1Y_CaZk6i_8wH2RkQKwf0,11674
70
- coderouter_cli-2.5.3.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
71
- coderouter_cli-2.5.3.dist-info/entry_points.txt,sha256=-dnLfD1YZ2WjH2zSdNCvlO65wYltM9bsHt9Fhg3yGss,51
72
- coderouter_cli-2.5.3.dist-info/licenses/LICENSE,sha256=wkEzoR86jFw33jvfOHjULqmkGEfxTFMgMaJnpR8mPRw,1065
73
- coderouter_cli-2.5.3.dist-info/RECORD,,
69
+ coderouter_cli-2.5.5.dist-info/METADATA,sha256=1A8zDyh8_kEIFafq1l3uKVyJikkJ8QOmwOlaEaSz_qI,11674
70
+ coderouter_cli-2.5.5.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
71
+ coderouter_cli-2.5.5.dist-info/entry_points.txt,sha256=-dnLfD1YZ2WjH2zSdNCvlO65wYltM9bsHt9Fhg3yGss,51
72
+ coderouter_cli-2.5.5.dist-info/licenses/LICENSE,sha256=wkEzoR86jFw33jvfOHjULqmkGEfxTFMgMaJnpR8mPRw,1065
73
+ coderouter_cli-2.5.5.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.29.0
2
+ Generator: hatchling 1.30.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any