PyPI - coderouter-cli - Versions diffs - 2.5.3__py3-none-any.whl → 2.5.5__py3-none-any.whl - Mend

coderouter-cli 2.5.3py3-none-any.whl → 2.5.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

coderouter/output_filters.py CHANGED Viewed

@@ -43,6 +43,7 @@ Reference: plan.md §10.2 "出力クリーニング" / docs/retrospectives/v0.7.
 from __future__ import annotations
+import re
 from typing import Protocol
 __all__ = [
@@ -50,6 +51,7 @@ __all__ = [
     "KNOWN_FILTERS",
     "OutputFilter",
     "OutputFilterChain",
+    "RepairByteFallbackFilter",
     "StripStopMarkersFilter",
     "StripThinkingFilter",
     "StripToolCallXmlFilter",
@@ -382,6 +384,151 @@ class StripToolCallXmlFilter:
         return "".join(out_parts)
+# ---------------------------------------------------------------------------
+# repair_byte_fallback (v2.x)
+# ---------------------------------------------------------------------------
+# A complete byte-fallback token: ``<0x`` + exactly two hex digits + ``>``.
+_BYTE_RE = re.compile(r"<0x([0-9A-Fa-f]{2})>")
+# The whole remaining buffer is a *proper prefix* of some ``<0xHH>`` token,
+# i.e. it could still complete (and continue a run) on the next feed:
+# ``<`` / ``<0`` / ``<0x`` / ``<0xH`` / ``<0xHH`` (closing ``>`` not yet seen).
+_PREFIX_RE = re.compile(r"<(0(x[0-9A-Fa-f]{0,2})?)?")
+_BYTE_TOKEN_START = "<0x"
+def _decode_byte_run(buf: bytes) -> str:
+    """Decode a run of fallback bytes to text, losslessly.
+    Decodes the maximal valid UTF-8 prefix; any byte that cannot start or
+    continue a valid sequence is re-emitted as its ``<0xHH>`` token and
+    decoding resumes after it. So ``b"\\xe3\\x80\\x80"`` -> ``"　"`` while a
+    stray ``b"\\xff"`` round-trips to ``"<0xFF>"`` — we never make the stream
+    worse than llama.cpp already did.
+    """
+    parts: list[str] = []
+    i = 0
+    n = len(buf)
+    while i < n:
+        try:
+            parts.append(buf[i:].decode("utf-8"))
+            break
+        except UnicodeDecodeError as exc:
+            good_end = i + exc.start
+            if good_end > i:
+                parts.append(buf[i:good_end].decode("utf-8"))
+            parts.append(f"<0x{buf[good_end]:02X}>")
+            i = good_end + 1
+    return "".join(parts)
+class RepairByteFallbackFilter:
+    """Reassemble llama.cpp ``<0xNN>`` byte-fallback leaks into UTF-8 text.
+    Ollama 0.30 unified its GGUF runtime onto llama.cpp
+    (``ollama/ollama#16031``). For gemma4 the detokenizer changed, and
+    multi-byte characters it cannot assemble now leak as llama.cpp's
+    byte-fallback notation::
+        full-width space ``　``  -> ``<0xE3><0x80><0x80>``
+        rare kanji      ``躙``  -> ``<0xE8><0xBA><0x99>``
+    These corrupt Japanese prose AND tool-call JSON arguments (a stray
+    ``<0xNN>`` inside an argument string breaks JSON parsing). This filter
+    reassembles runs of consecutive ``<0xNN>`` tokens back into UTF-8.
+    Stateful across ``feed`` calls so a token split across SSE deltas
+    (``<0x`` | ``E3>``) and a multi-byte run split across deltas
+    (``<0xE3>`` | ``<0x80><0x80>``) both reassemble correctly. A pending byte
+    run is only flushed once we are certain it has ended (confirmed normal
+    text follows, or ``eof``) — never at a bare chunk boundary, where the run
+    might continue in the next delta. Bytes that cannot form valid UTF-8 are
+    re-emitted verbatim (lossless).
+    ``modified`` flips True the first time any ``<0xNN>`` token is consumed —
+    the adapter uses it to gate the log-once "output-filter-applied" line.
+    Ordering note: place this BEFORE ``tool_repair`` / the tool-call XML
+    strip so byte-fallback inside tool-call argument strings is restored
+    before JSON extraction.
+    """
+    name = "repair_byte_fallback"
+    def __init__(self) -> None:
+        """Initialize per-request buffer, pending byte run and state."""
+        self.modified: bool = False
+        self._buffer: str = ""
+        self._pending = bytearray()
+    def _flush_pending(self, out: list[str]) -> None:
+        """Decode and emit the accumulated byte run, then clear it."""
+        if self._pending:
+            out.append(_decode_byte_run(bytes(self._pending)))
+            self._pending.clear()
+    def feed(self, text: str, *, eof: bool = False) -> str:
+        """Consume ``text``; return the portion safe to emit now."""
+        self._buffer += text
+        out: list[str] = []
+        while self._buffer:
+            m = _BYTE_RE.match(self._buffer)
+            if m is not None:
+                # Complete byte token at position 0 — extend the run.
+                self._pending.append(int(m.group(1), 16))
+                self._buffer = self._buffer[m.end() :]
+                self.modified = True
+                continue
+            idx = self._buffer.find(_BYTE_TOKEN_START)
+            if idx == -1:
+                # No complete/started token in the buffer. Hold a trailing
+                # partial of ``<0x`` (it may complete — and CONTINUE the run —
+                # on the next feed); treat anything before it as confirmed
+                # normal text that ends the run.
+                hold = (
+                    0 if eof else _max_suffix_overlap(self._buffer, _BYTE_TOKEN_START)
+                )
+                safe = self._buffer[:-hold] if hold else self._buffer
+                if safe:
+                    self._flush_pending(out)
+                    out.append(safe)
+                    self._buffer = self._buffer[len(safe) :]
+                # else: whole buffer is a token-start prefix; keep pending
+                # (the run might continue) and wait for more input.
+                break
+            if idx > 0:
+                # Normal text precedes the next token start — run ended.
+                self._flush_pending(out)
+                out.append(self._buffer[:idx])
+                self._buffer = self._buffer[idx:]
+                continue
+            # idx == 0: buffer starts with ``<0x`` but is not a complete token.
+            if not eof and _PREFIX_RE.fullmatch(self._buffer):
+                # Could still complete next feed — hold token AND pending run.
+                break
+            # Malformed ``<0x..`` (non-hex, or stuck at eof). The ``<`` is
+            # ordinary text; the run (if any) has ended.
+            self._flush_pending(out)
+            out.append("<")
+            self._buffer = self._buffer[1:]
+        if eof:
+            self._flush_pending(out)
+            if self._buffer:
+                out.append(self._buffer)
+            self._buffer = ""
+        return "".join(out)
 # ---------------------------------------------------------------------------
 # Registry + chain
 # ---------------------------------------------------------------------------
@@ -391,6 +538,7 @@ KNOWN_FILTERS: dict[str, type[OutputFilter]] = {
     StripThinkingFilter.name: StripThinkingFilter,
     StripStopMarkersFilter.name: StripStopMarkersFilter,
     StripToolCallXmlFilter.name: StripToolCallXmlFilter,
+    RepairByteFallbackFilter.name: RepairByteFallbackFilter,
 }
 """Registry of string-name → filter class.

coderouter/translation/anthropic.py CHANGED Viewed

@@ -10,9 +10,12 @@ through unchanged if a client sends them.
 from __future__ import annotations
+import logging
 from typing import Any, Literal
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, ConfigDict, Field, model_validator
+logger = logging.getLogger(__name__)
 # ============================================================
 # Content blocks
@@ -105,6 +108,113 @@ class AnthropicTool(BaseModel):
     input_schema: dict[str, Any] = Field(default_factory=dict)
+# ============================================================
+# Role normalization (Claude Code CLI >= 2.1.154 workaround)
+# ============================================================
+_SPEC_MESSAGE_ROLES = frozenset({"user", "assistant"})
+def _content_as_text(content: Any) -> str:
+    """Best-effort plain-text extraction from a message ``content`` field.
+    Strings pass through; block lists contribute their ``text`` blocks
+    joined with newlines; anything else yields "".
+    """
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts: list[str] = []
+        for block in content:
+            if isinstance(block, dict) and block.get("type") == "text":
+                parts.append(str(block.get("text", "")))
+        return "\n".join(p for p in parts if p)
+    return ""
+def normalize_message_roles(payload: dict[str, Any]) -> dict[str, Any]:
+    """Normalize non-spec roles inside ``messages`` before validation.
+    Claude Code CLI >= 2.1.154 has a regression where it emits messages
+    with ``role: "system"`` (and reportedly ``ctx`` / ``msg``) inside the
+    ``messages`` array. The Anthropic Messages API spec allows only
+    ``user`` / ``assistant`` there, so without this hop those requests
+    die in validation with "Input should be 'user' or 'assistant'"
+    (see anthropics/claude-code#63469, vllm-project/vllm#44000).
+    Policy:
+        - ``role: "system"`` → text content merged into the top-level
+          ``system`` field (appended after any existing system prompt;
+          same join rule as ``convert.to_anthropic_request``).
+        - any other non-spec role (``ctx``, ``msg``, ...) → coerced to
+          ``user`` so conversation position is preserved. Anthropic
+          merges consecutive same-role turns, so this is safe.
+        - messages whose salvaged content is empty are dropped entirely
+          (Anthropic rejects empty turns).
+    Returns a shallow-copied payload; the caller's dict is not mutated.
+    Non-dict message entries (already-validated models) pass through.
+    """
+    messages = payload.get("messages")
+    if not isinstance(messages, list):
+        return payload
+    system_texts: list[str] = []
+    messages_out: list[Any] = []
+    coerced_roles: list[str] = []
+    for msg in messages:
+        if not isinstance(msg, dict):
+            # Already a validated AnthropicMessage (internal construction
+            # path, e.g. convert.to_anthropic_request) — spec roles only.
+            messages_out.append(msg)
+            continue
+        role = msg.get("role")
+        if role in _SPEC_MESSAGE_ROLES:
+            messages_out.append(msg)
+            continue
+        if role == "system":
+            text = _content_as_text(msg.get("content"))
+            if text:
+                system_texts.append(text)
+            coerced_roles.append("system")
+            continue
+        # Unknown role (ctx / msg / future surprises): keep its position
+        # in the conversation as a user turn; drop if nothing salvageable.
+        text = _content_as_text(msg.get("content"))
+        coerced_roles.append(str(role))
+        if text:
+            messages_out.append({"role": "user", "content": text})
+    if not coerced_roles:
+        return payload
+    out = dict(payload)
+    out["messages"] = messages_out
+    if system_texts:
+        joined = "\n".join(system_texts)
+        existing = out.get("system")
+        if existing is None:
+            out["system"] = joined
+        elif isinstance(existing, str):
+            out["system"] = f"{existing}\n{joined}" if existing else joined
+        elif isinstance(existing, list):
+            out["system"] = [*existing, {"type": "text", "text": joined}]
+        else:  # unexpected shape — don't lose the client's value
+            out["system"] = existing
+    logger.warning(
+        "normalized-nonspec-message-roles",
+        extra={
+            "roles": coerced_roles,
+            "system_merged": bool(system_texts),
+            "hint": "client is likely Claude Code CLI >= 2.1.154 (known regression)",
+        },
+    )
+    return out
 # ============================================================
 # Request
 # ============================================================
@@ -147,6 +257,19 @@ class AnthropicRequest(BaseModel):
     # `thinking` beyond what the default minor version accepts.
     anthropic_beta: str | None = Field(default=None, exclude=True)
+    @model_validator(mode="before")
+    @classmethod
+    def _normalize_roles(cls, data: Any) -> Any:
+        """Claude Code >= 2.1.154 sends system/ctx/msg roles in messages.
+        Normalize them before field validation so the request doesn't
+        422 at ingress (and doesn't 400 upstream at api.anthropic.com
+        via the native adapter). See ``normalize_message_roles``.
+        """
+        if isinstance(data, dict):
+            return normalize_message_roles(data)
+        return data
 # ============================================================
 # Response

{coderouter_cli-2.5.3.dist-info → coderouter_cli-2.5.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: coderouter-cli
-Version: 2.5.3
+Version: 2.5.5
 Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
 Project-URL: Homepage, https://github.com/zephel01/CodeRouter
 Project-URL: Repository, https://github.com/zephel01/CodeRouter

{coderouter_cli-2.5.3.dist-info → coderouter_cli-2.5.5.dist-info}/RECORD RENAMED Viewed

@@ -10,7 +10,7 @@ coderouter/errors.py,sha256=Xmq67lheyw8iv3Ox39jh2c4tvNI5RcUR4QkoxVDN6l4,1130
 coderouter/gguf_introspect.py,sha256=FZO14STLSp94Rfo5AInGwYUOpfjiXOW6CH5RiczTWDE,9514
 coderouter/hardware.py,sha256=gn3_9qbVcGRR81yKMn1lJE_8-YDRau0LxIH_M-f7pxE,8356
 coderouter/logging.py,sha256=U7QiGRaoQXTSGijc-jV9TebnbbzrD-snfnoZy73Nvwo,52737
-coderouter/output_filters.py,sha256=LOOh68Kcn2LFDy1wPFynA6O_HGazV756q_79Z0_4Jww,19350
+coderouter/output_filters.py,sha256=0ry_rPiS_kC-FnHgaNVP6v7e6Al2djxzu9vBzZ8kEkE,25314
 coderouter/token_estimation.py,sha256=1Ai1uT68hahpyr4LBhNyVRGq7y4yXItd6J4k5ApGX7M,5995
 coderouter/token_estimation_accurate.py,sha256=GTfzrBVnvAGjeVzmzAeUdOYZvWZKLAxcxPpFiJGlzjk,4609
 coderouter/adapters/__init__.py,sha256=7dIDSZ-FE_0iSqLSDc_lK1idRdLTKcM2hP9tCJipgPI,463
@@ -63,11 +63,11 @@ coderouter/state/request_log.py,sha256=bR814sOn--U_sKVtbezwS3bkZaNt4FGnboX75_2LL
 coderouter/state/store.py,sha256=h-rsMJq8GILsOfCP94nI40cuHaj4Vqycsm9UNN77REI,7445
 coderouter/state/suggest_rules.py,sha256=FvdhEvao5NvdKp9zs8AkcoFKHY4yqqXY2HekvSjpDFA,16670
 coderouter/translation/__init__.py,sha256=PYXN7XVEwpG1uC8RLy6fvnGbzEZhhrEuUapH8IYOtG8,1788
-coderouter/translation/anthropic.py,sha256=JpvIWNXHUPVqOGvps7o_6ZADhXuJuvpU7RdMqQFtwwM,6421
+coderouter/translation/anthropic.py,sha256=aZkcYH4x82b0x7efJgJb9RWn9Hbyc9pEOthXe4vjUdU,11113
 coderouter/translation/convert.py,sha256=-qyzFzmmr9hhQV6_Sg75kJnvCZvHe3n7vRdaZtk_JqQ,47269
 coderouter/translation/tool_repair.py,sha256=Ok2PF947Liegc5oaytfptv5MWMkpfJYQie-zdP1y3cY,9946
-coderouter_cli-2.5.3.dist-info/METADATA,sha256=3q3FPL44mGgfySDAi_5gEW1Y_CaZk6i_8wH2RkQKwf0,11674
-coderouter_cli-2.5.3.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
-coderouter_cli-2.5.3.dist-info/entry_points.txt,sha256=-dnLfD1YZ2WjH2zSdNCvlO65wYltM9bsHt9Fhg3yGss,51
-coderouter_cli-2.5.3.dist-info/licenses/LICENSE,sha256=wkEzoR86jFw33jvfOHjULqmkGEfxTFMgMaJnpR8mPRw,1065
-coderouter_cli-2.5.3.dist-info/RECORD,,
+coderouter_cli-2.5.5.dist-info/METADATA,sha256=1A8zDyh8_kEIFafq1l3uKVyJikkJ8QOmwOlaEaSz_qI,11674
+coderouter_cli-2.5.5.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+coderouter_cli-2.5.5.dist-info/entry_points.txt,sha256=-dnLfD1YZ2WjH2zSdNCvlO65wYltM9bsHt9Fhg3yGss,51
+coderouter_cli-2.5.5.dist-info/licenses/LICENSE,sha256=wkEzoR86jFw33jvfOHjULqmkGEfxTFMgMaJnpR8mPRw,1065
+coderouter_cli-2.5.5.dist-info/RECORD,,

{coderouter_cli-2.5.3.dist-info → coderouter_cli-2.5.5.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: hatchling 1.29.0
+Generator: hatchling 1.30.1
 Root-Is-Purelib: true
 Tag: py3-none-any

{coderouter_cli-2.5.3.dist-info → coderouter_cli-2.5.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{coderouter_cli-2.5.3.dist-info → coderouter_cli-2.5.5.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

coderouter-cli 2.5.3__py3-none-any.whl → 2.5.5__py3-none-any.whl

coderouter-cli 2.5.3py3-none-any.whl → 2.5.5py3-none-any.whl