coderouter-cli 2.5.3__py3-none-any.whl → 2.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderouter/output_filters.py +148 -0
- coderouter/translation/anthropic.py +124 -1
- {coderouter_cli-2.5.3.dist-info → coderouter_cli-2.5.5.dist-info}/METADATA +1 -1
- {coderouter_cli-2.5.3.dist-info → coderouter_cli-2.5.5.dist-info}/RECORD +7 -7
- {coderouter_cli-2.5.3.dist-info → coderouter_cli-2.5.5.dist-info}/WHEEL +1 -1
- {coderouter_cli-2.5.3.dist-info → coderouter_cli-2.5.5.dist-info}/entry_points.txt +0 -0
- {coderouter_cli-2.5.3.dist-info → coderouter_cli-2.5.5.dist-info}/licenses/LICENSE +0 -0
coderouter/output_filters.py
CHANGED
|
@@ -43,6 +43,7 @@ Reference: plan.md §10.2 "出力クリーニング" / docs/retrospectives/v0.7.
|
|
|
43
43
|
|
|
44
44
|
from __future__ import annotations
|
|
45
45
|
|
|
46
|
+
import re
|
|
46
47
|
from typing import Protocol
|
|
47
48
|
|
|
48
49
|
__all__ = [
|
|
@@ -50,6 +51,7 @@ __all__ = [
|
|
|
50
51
|
"KNOWN_FILTERS",
|
|
51
52
|
"OutputFilter",
|
|
52
53
|
"OutputFilterChain",
|
|
54
|
+
"RepairByteFallbackFilter",
|
|
53
55
|
"StripStopMarkersFilter",
|
|
54
56
|
"StripThinkingFilter",
|
|
55
57
|
"StripToolCallXmlFilter",
|
|
@@ -382,6 +384,151 @@ class StripToolCallXmlFilter:
|
|
|
382
384
|
return "".join(out_parts)
|
|
383
385
|
|
|
384
386
|
|
|
387
|
+
# ---------------------------------------------------------------------------
|
|
388
|
+
# repair_byte_fallback (v2.x)
|
|
389
|
+
# ---------------------------------------------------------------------------
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
# A complete byte-fallback token: ``<0x`` + exactly two hex digits + ``>``.
|
|
393
|
+
_BYTE_RE = re.compile(r"<0x([0-9A-Fa-f]{2})>")
|
|
394
|
+
|
|
395
|
+
# The whole remaining buffer is a *proper prefix* of some ``<0xHH>`` token,
|
|
396
|
+
# i.e. it could still complete (and continue a run) on the next feed:
|
|
397
|
+
# ``<`` / ``<0`` / ``<0x`` / ``<0xH`` / ``<0xHH`` (closing ``>`` not yet seen).
|
|
398
|
+
_PREFIX_RE = re.compile(r"<(0(x[0-9A-Fa-f]{0,2})?)?")
|
|
399
|
+
|
|
400
|
+
_BYTE_TOKEN_START = "<0x"
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def _decode_byte_run(buf: bytes) -> str:
|
|
404
|
+
"""Decode a run of fallback bytes to text, losslessly.
|
|
405
|
+
|
|
406
|
+
Decodes the maximal valid UTF-8 prefix; any byte that cannot start or
|
|
407
|
+
continue a valid sequence is re-emitted as its ``<0xHH>`` token and
|
|
408
|
+
decoding resumes after it. So ``b"\\xe3\\x80\\x80"`` -> ``" "`` while a
|
|
409
|
+
stray ``b"\\xff"`` round-trips to ``"<0xFF>"`` — we never make the stream
|
|
410
|
+
worse than llama.cpp already did.
|
|
411
|
+
"""
|
|
412
|
+
parts: list[str] = []
|
|
413
|
+
i = 0
|
|
414
|
+
n = len(buf)
|
|
415
|
+
while i < n:
|
|
416
|
+
try:
|
|
417
|
+
parts.append(buf[i:].decode("utf-8"))
|
|
418
|
+
break
|
|
419
|
+
except UnicodeDecodeError as exc:
|
|
420
|
+
good_end = i + exc.start
|
|
421
|
+
if good_end > i:
|
|
422
|
+
parts.append(buf[i:good_end].decode("utf-8"))
|
|
423
|
+
parts.append(f"<0x{buf[good_end]:02X}>")
|
|
424
|
+
i = good_end + 1
|
|
425
|
+
return "".join(parts)
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
class RepairByteFallbackFilter:
|
|
429
|
+
"""Reassemble llama.cpp ``<0xNN>`` byte-fallback leaks into UTF-8 text.
|
|
430
|
+
|
|
431
|
+
Ollama 0.30 unified its GGUF runtime onto llama.cpp
|
|
432
|
+
(``ollama/ollama#16031``). For gemma4 the detokenizer changed, and
|
|
433
|
+
multi-byte characters it cannot assemble now leak as llama.cpp's
|
|
434
|
+
byte-fallback notation::
|
|
435
|
+
|
|
436
|
+
full-width space `` `` -> ``<0xE3><0x80><0x80>``
|
|
437
|
+
rare kanji ``躙`` -> ``<0xE8><0xBA><0x99>``
|
|
438
|
+
|
|
439
|
+
These corrupt Japanese prose AND tool-call JSON arguments (a stray
|
|
440
|
+
``<0xNN>`` inside an argument string breaks JSON parsing). This filter
|
|
441
|
+
reassembles runs of consecutive ``<0xNN>`` tokens back into UTF-8.
|
|
442
|
+
|
|
443
|
+
Stateful across ``feed`` calls so a token split across SSE deltas
|
|
444
|
+
(``<0x`` | ``E3>``) and a multi-byte run split across deltas
|
|
445
|
+
(``<0xE3>`` | ``<0x80><0x80>``) both reassemble correctly. A pending byte
|
|
446
|
+
run is only flushed once we are certain it has ended (confirmed normal
|
|
447
|
+
text follows, or ``eof``) — never at a bare chunk boundary, where the run
|
|
448
|
+
might continue in the next delta. Bytes that cannot form valid UTF-8 are
|
|
449
|
+
re-emitted verbatim (lossless).
|
|
450
|
+
|
|
451
|
+
``modified`` flips True the first time any ``<0xNN>`` token is consumed —
|
|
452
|
+
the adapter uses it to gate the log-once "output-filter-applied" line.
|
|
453
|
+
|
|
454
|
+
Ordering note: place this BEFORE ``tool_repair`` / the tool-call XML
|
|
455
|
+
strip so byte-fallback inside tool-call argument strings is restored
|
|
456
|
+
before JSON extraction.
|
|
457
|
+
"""
|
|
458
|
+
|
|
459
|
+
name = "repair_byte_fallback"
|
|
460
|
+
|
|
461
|
+
def __init__(self) -> None:
|
|
462
|
+
"""Initialize per-request buffer, pending byte run and state."""
|
|
463
|
+
self.modified: bool = False
|
|
464
|
+
self._buffer: str = ""
|
|
465
|
+
self._pending = bytearray()
|
|
466
|
+
|
|
467
|
+
def _flush_pending(self, out: list[str]) -> None:
|
|
468
|
+
"""Decode and emit the accumulated byte run, then clear it."""
|
|
469
|
+
if self._pending:
|
|
470
|
+
out.append(_decode_byte_run(bytes(self._pending)))
|
|
471
|
+
self._pending.clear()
|
|
472
|
+
|
|
473
|
+
def feed(self, text: str, *, eof: bool = False) -> str:
|
|
474
|
+
"""Consume ``text``; return the portion safe to emit now."""
|
|
475
|
+
self._buffer += text
|
|
476
|
+
out: list[str] = []
|
|
477
|
+
|
|
478
|
+
while self._buffer:
|
|
479
|
+
m = _BYTE_RE.match(self._buffer)
|
|
480
|
+
if m is not None:
|
|
481
|
+
# Complete byte token at position 0 — extend the run.
|
|
482
|
+
self._pending.append(int(m.group(1), 16))
|
|
483
|
+
self._buffer = self._buffer[m.end() :]
|
|
484
|
+
self.modified = True
|
|
485
|
+
continue
|
|
486
|
+
|
|
487
|
+
idx = self._buffer.find(_BYTE_TOKEN_START)
|
|
488
|
+
if idx == -1:
|
|
489
|
+
# No complete/started token in the buffer. Hold a trailing
|
|
490
|
+
# partial of ``<0x`` (it may complete — and CONTINUE the run —
|
|
491
|
+
# on the next feed); treat anything before it as confirmed
|
|
492
|
+
# normal text that ends the run.
|
|
493
|
+
hold = (
|
|
494
|
+
0 if eof else _max_suffix_overlap(self._buffer, _BYTE_TOKEN_START)
|
|
495
|
+
)
|
|
496
|
+
safe = self._buffer[:-hold] if hold else self._buffer
|
|
497
|
+
if safe:
|
|
498
|
+
self._flush_pending(out)
|
|
499
|
+
out.append(safe)
|
|
500
|
+
self._buffer = self._buffer[len(safe) :]
|
|
501
|
+
# else: whole buffer is a token-start prefix; keep pending
|
|
502
|
+
# (the run might continue) and wait for more input.
|
|
503
|
+
break
|
|
504
|
+
|
|
505
|
+
if idx > 0:
|
|
506
|
+
# Normal text precedes the next token start — run ended.
|
|
507
|
+
self._flush_pending(out)
|
|
508
|
+
out.append(self._buffer[:idx])
|
|
509
|
+
self._buffer = self._buffer[idx:]
|
|
510
|
+
continue
|
|
511
|
+
|
|
512
|
+
# idx == 0: buffer starts with ``<0x`` but is not a complete token.
|
|
513
|
+
if not eof and _PREFIX_RE.fullmatch(self._buffer):
|
|
514
|
+
# Could still complete next feed — hold token AND pending run.
|
|
515
|
+
break
|
|
516
|
+
|
|
517
|
+
# Malformed ``<0x..`` (non-hex, or stuck at eof). The ``<`` is
|
|
518
|
+
# ordinary text; the run (if any) has ended.
|
|
519
|
+
self._flush_pending(out)
|
|
520
|
+
out.append("<")
|
|
521
|
+
self._buffer = self._buffer[1:]
|
|
522
|
+
|
|
523
|
+
if eof:
|
|
524
|
+
self._flush_pending(out)
|
|
525
|
+
if self._buffer:
|
|
526
|
+
out.append(self._buffer)
|
|
527
|
+
self._buffer = ""
|
|
528
|
+
|
|
529
|
+
return "".join(out)
|
|
530
|
+
|
|
531
|
+
|
|
385
532
|
# ---------------------------------------------------------------------------
|
|
386
533
|
# Registry + chain
|
|
387
534
|
# ---------------------------------------------------------------------------
|
|
@@ -391,6 +538,7 @@ KNOWN_FILTERS: dict[str, type[OutputFilter]] = {
|
|
|
391
538
|
StripThinkingFilter.name: StripThinkingFilter,
|
|
392
539
|
StripStopMarkersFilter.name: StripStopMarkersFilter,
|
|
393
540
|
StripToolCallXmlFilter.name: StripToolCallXmlFilter,
|
|
541
|
+
RepairByteFallbackFilter.name: RepairByteFallbackFilter,
|
|
394
542
|
}
|
|
395
543
|
"""Registry of string-name → filter class.
|
|
396
544
|
|
|
@@ -10,9 +10,12 @@ through unchanged if a client sends them.
|
|
|
10
10
|
|
|
11
11
|
from __future__ import annotations
|
|
12
12
|
|
|
13
|
+
import logging
|
|
13
14
|
from typing import Any, Literal
|
|
14
15
|
|
|
15
|
-
from pydantic import BaseModel, ConfigDict, Field
|
|
16
|
+
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
16
19
|
|
|
17
20
|
# ============================================================
|
|
18
21
|
# Content blocks
|
|
@@ -105,6 +108,113 @@ class AnthropicTool(BaseModel):
|
|
|
105
108
|
input_schema: dict[str, Any] = Field(default_factory=dict)
|
|
106
109
|
|
|
107
110
|
|
|
111
|
+
# ============================================================
|
|
112
|
+
# Role normalization (Claude Code CLI >= 2.1.154 workaround)
|
|
113
|
+
# ============================================================
|
|
114
|
+
|
|
115
|
+
_SPEC_MESSAGE_ROLES = frozenset({"user", "assistant"})
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _content_as_text(content: Any) -> str:
|
|
119
|
+
"""Best-effort plain-text extraction from a message ``content`` field.
|
|
120
|
+
|
|
121
|
+
Strings pass through; block lists contribute their ``text`` blocks
|
|
122
|
+
joined with newlines; anything else yields "".
|
|
123
|
+
"""
|
|
124
|
+
if isinstance(content, str):
|
|
125
|
+
return content
|
|
126
|
+
if isinstance(content, list):
|
|
127
|
+
parts: list[str] = []
|
|
128
|
+
for block in content:
|
|
129
|
+
if isinstance(block, dict) and block.get("type") == "text":
|
|
130
|
+
parts.append(str(block.get("text", "")))
|
|
131
|
+
return "\n".join(p for p in parts if p)
|
|
132
|
+
return ""
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def normalize_message_roles(payload: dict[str, Any]) -> dict[str, Any]:
|
|
136
|
+
"""Normalize non-spec roles inside ``messages`` before validation.
|
|
137
|
+
|
|
138
|
+
Claude Code CLI >= 2.1.154 has a regression where it emits messages
|
|
139
|
+
with ``role: "system"`` (and reportedly ``ctx`` / ``msg``) inside the
|
|
140
|
+
``messages`` array. The Anthropic Messages API spec allows only
|
|
141
|
+
``user`` / ``assistant`` there, so without this hop those requests
|
|
142
|
+
die in validation with "Input should be 'user' or 'assistant'"
|
|
143
|
+
(see anthropics/claude-code#63469, vllm-project/vllm#44000).
|
|
144
|
+
|
|
145
|
+
Policy:
|
|
146
|
+
- ``role: "system"`` → text content merged into the top-level
|
|
147
|
+
``system`` field (appended after any existing system prompt;
|
|
148
|
+
same join rule as ``convert.to_anthropic_request``).
|
|
149
|
+
- any other non-spec role (``ctx``, ``msg``, ...) → coerced to
|
|
150
|
+
``user`` so conversation position is preserved. Anthropic
|
|
151
|
+
merges consecutive same-role turns, so this is safe.
|
|
152
|
+
- messages whose salvaged content is empty are dropped entirely
|
|
153
|
+
(Anthropic rejects empty turns).
|
|
154
|
+
|
|
155
|
+
Returns a shallow-copied payload; the caller's dict is not mutated.
|
|
156
|
+
Non-dict message entries (already-validated models) pass through.
|
|
157
|
+
"""
|
|
158
|
+
messages = payload.get("messages")
|
|
159
|
+
if not isinstance(messages, list):
|
|
160
|
+
return payload
|
|
161
|
+
|
|
162
|
+
system_texts: list[str] = []
|
|
163
|
+
messages_out: list[Any] = []
|
|
164
|
+
coerced_roles: list[str] = []
|
|
165
|
+
|
|
166
|
+
for msg in messages:
|
|
167
|
+
if not isinstance(msg, dict):
|
|
168
|
+
# Already a validated AnthropicMessage (internal construction
|
|
169
|
+
# path, e.g. convert.to_anthropic_request) — spec roles only.
|
|
170
|
+
messages_out.append(msg)
|
|
171
|
+
continue
|
|
172
|
+
role = msg.get("role")
|
|
173
|
+
if role in _SPEC_MESSAGE_ROLES:
|
|
174
|
+
messages_out.append(msg)
|
|
175
|
+
continue
|
|
176
|
+
if role == "system":
|
|
177
|
+
text = _content_as_text(msg.get("content"))
|
|
178
|
+
if text:
|
|
179
|
+
system_texts.append(text)
|
|
180
|
+
coerced_roles.append("system")
|
|
181
|
+
continue
|
|
182
|
+
# Unknown role (ctx / msg / future surprises): keep its position
|
|
183
|
+
# in the conversation as a user turn; drop if nothing salvageable.
|
|
184
|
+
text = _content_as_text(msg.get("content"))
|
|
185
|
+
coerced_roles.append(str(role))
|
|
186
|
+
if text:
|
|
187
|
+
messages_out.append({"role": "user", "content": text})
|
|
188
|
+
|
|
189
|
+
if not coerced_roles:
|
|
190
|
+
return payload
|
|
191
|
+
|
|
192
|
+
out = dict(payload)
|
|
193
|
+
out["messages"] = messages_out
|
|
194
|
+
|
|
195
|
+
if system_texts:
|
|
196
|
+
joined = "\n".join(system_texts)
|
|
197
|
+
existing = out.get("system")
|
|
198
|
+
if existing is None:
|
|
199
|
+
out["system"] = joined
|
|
200
|
+
elif isinstance(existing, str):
|
|
201
|
+
out["system"] = f"{existing}\n{joined}" if existing else joined
|
|
202
|
+
elif isinstance(existing, list):
|
|
203
|
+
out["system"] = [*existing, {"type": "text", "text": joined}]
|
|
204
|
+
else: # unexpected shape — don't lose the client's value
|
|
205
|
+
out["system"] = existing
|
|
206
|
+
|
|
207
|
+
logger.warning(
|
|
208
|
+
"normalized-nonspec-message-roles",
|
|
209
|
+
extra={
|
|
210
|
+
"roles": coerced_roles,
|
|
211
|
+
"system_merged": bool(system_texts),
|
|
212
|
+
"hint": "client is likely Claude Code CLI >= 2.1.154 (known regression)",
|
|
213
|
+
},
|
|
214
|
+
)
|
|
215
|
+
return out
|
|
216
|
+
|
|
217
|
+
|
|
108
218
|
# ============================================================
|
|
109
219
|
# Request
|
|
110
220
|
# ============================================================
|
|
@@ -147,6 +257,19 @@ class AnthropicRequest(BaseModel):
|
|
|
147
257
|
# `thinking` beyond what the default minor version accepts.
|
|
148
258
|
anthropic_beta: str | None = Field(default=None, exclude=True)
|
|
149
259
|
|
|
260
|
+
@model_validator(mode="before")
|
|
261
|
+
@classmethod
|
|
262
|
+
def _normalize_roles(cls, data: Any) -> Any:
|
|
263
|
+
"""Claude Code >= 2.1.154 sends system/ctx/msg roles in messages.
|
|
264
|
+
|
|
265
|
+
Normalize them before field validation so the request doesn't
|
|
266
|
+
422 at ingress (and doesn't 400 upstream at api.anthropic.com
|
|
267
|
+
via the native adapter). See ``normalize_message_roles``.
|
|
268
|
+
"""
|
|
269
|
+
if isinstance(data, dict):
|
|
270
|
+
return normalize_message_roles(data)
|
|
271
|
+
return data
|
|
272
|
+
|
|
150
273
|
|
|
151
274
|
# ============================================================
|
|
152
275
|
# Response
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: coderouter-cli
|
|
3
|
-
Version: 2.5.
|
|
3
|
+
Version: 2.5.5
|
|
4
4
|
Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
|
|
5
5
|
Project-URL: Homepage, https://github.com/zephel01/CodeRouter
|
|
6
6
|
Project-URL: Repository, https://github.com/zephel01/CodeRouter
|
|
@@ -10,7 +10,7 @@ coderouter/errors.py,sha256=Xmq67lheyw8iv3Ox39jh2c4tvNI5RcUR4QkoxVDN6l4,1130
|
|
|
10
10
|
coderouter/gguf_introspect.py,sha256=FZO14STLSp94Rfo5AInGwYUOpfjiXOW6CH5RiczTWDE,9514
|
|
11
11
|
coderouter/hardware.py,sha256=gn3_9qbVcGRR81yKMn1lJE_8-YDRau0LxIH_M-f7pxE,8356
|
|
12
12
|
coderouter/logging.py,sha256=U7QiGRaoQXTSGijc-jV9TebnbbzrD-snfnoZy73Nvwo,52737
|
|
13
|
-
coderouter/output_filters.py,sha256=
|
|
13
|
+
coderouter/output_filters.py,sha256=0ry_rPiS_kC-FnHgaNVP6v7e6Al2djxzu9vBzZ8kEkE,25314
|
|
14
14
|
coderouter/token_estimation.py,sha256=1Ai1uT68hahpyr4LBhNyVRGq7y4yXItd6J4k5ApGX7M,5995
|
|
15
15
|
coderouter/token_estimation_accurate.py,sha256=GTfzrBVnvAGjeVzmzAeUdOYZvWZKLAxcxPpFiJGlzjk,4609
|
|
16
16
|
coderouter/adapters/__init__.py,sha256=7dIDSZ-FE_0iSqLSDc_lK1idRdLTKcM2hP9tCJipgPI,463
|
|
@@ -63,11 +63,11 @@ coderouter/state/request_log.py,sha256=bR814sOn--U_sKVtbezwS3bkZaNt4FGnboX75_2LL
|
|
|
63
63
|
coderouter/state/store.py,sha256=h-rsMJq8GILsOfCP94nI40cuHaj4Vqycsm9UNN77REI,7445
|
|
64
64
|
coderouter/state/suggest_rules.py,sha256=FvdhEvao5NvdKp9zs8AkcoFKHY4yqqXY2HekvSjpDFA,16670
|
|
65
65
|
coderouter/translation/__init__.py,sha256=PYXN7XVEwpG1uC8RLy6fvnGbzEZhhrEuUapH8IYOtG8,1788
|
|
66
|
-
coderouter/translation/anthropic.py,sha256=
|
|
66
|
+
coderouter/translation/anthropic.py,sha256=aZkcYH4x82b0x7efJgJb9RWn9Hbyc9pEOthXe4vjUdU,11113
|
|
67
67
|
coderouter/translation/convert.py,sha256=-qyzFzmmr9hhQV6_Sg75kJnvCZvHe3n7vRdaZtk_JqQ,47269
|
|
68
68
|
coderouter/translation/tool_repair.py,sha256=Ok2PF947Liegc5oaytfptv5MWMkpfJYQie-zdP1y3cY,9946
|
|
69
|
-
coderouter_cli-2.5.
|
|
70
|
-
coderouter_cli-2.5.
|
|
71
|
-
coderouter_cli-2.5.
|
|
72
|
-
coderouter_cli-2.5.
|
|
73
|
-
coderouter_cli-2.5.
|
|
69
|
+
coderouter_cli-2.5.5.dist-info/METADATA,sha256=1A8zDyh8_kEIFafq1l3uKVyJikkJ8QOmwOlaEaSz_qI,11674
|
|
70
|
+
coderouter_cli-2.5.5.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
71
|
+
coderouter_cli-2.5.5.dist-info/entry_points.txt,sha256=-dnLfD1YZ2WjH2zSdNCvlO65wYltM9bsHt9Fhg3yGss,51
|
|
72
|
+
coderouter_cli-2.5.5.dist-info/licenses/LICENSE,sha256=wkEzoR86jFw33jvfOHjULqmkGEfxTFMgMaJnpR8mPRw,1065
|
|
73
|
+
coderouter_cli-2.5.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|