coderouter-cli 2.5.3__py3-none-any.whl → 2.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -43,6 +43,7 @@ Reference: plan.md §10.2 "出力クリーニング" / docs/retrospectives/v0.7.
43
43
 
44
44
  from __future__ import annotations
45
45
 
46
+ import re
46
47
  from typing import Protocol
47
48
 
48
49
  __all__ = [
@@ -50,6 +51,7 @@ __all__ = [
50
51
  "KNOWN_FILTERS",
51
52
  "OutputFilter",
52
53
  "OutputFilterChain",
54
+ "RepairByteFallbackFilter",
53
55
  "StripStopMarkersFilter",
54
56
  "StripThinkingFilter",
55
57
  "StripToolCallXmlFilter",
@@ -382,6 +384,151 @@ class StripToolCallXmlFilter:
382
384
  return "".join(out_parts)
383
385
 
384
386
 
387
+ # ---------------------------------------------------------------------------
388
+ # repair_byte_fallback (v2.x)
389
+ # ---------------------------------------------------------------------------
390
+
391
+
392
+ # A complete byte-fallback token: ``<0x`` + exactly two hex digits + ``>``.
393
+ _BYTE_RE = re.compile(r"<0x([0-9A-Fa-f]{2})>")
394
+
395
+ # The whole remaining buffer is a *proper prefix* of some ``<0xHH>`` token,
396
+ # i.e. it could still complete (and continue a run) on the next feed:
397
+ # ``<`` / ``<0`` / ``<0x`` / ``<0xH`` / ``<0xHH`` (closing ``>`` not yet seen).
398
+ _PREFIX_RE = re.compile(r"<(0(x[0-9A-Fa-f]{0,2})?)?")
399
+
400
+ _BYTE_TOKEN_START = "<0x"
401
+
402
+
403
+ def _decode_byte_run(buf: bytes) -> str:
404
+ """Decode a run of fallback bytes to text, losslessly.
405
+
406
+ Decodes the maximal valid UTF-8 prefix; any byte that cannot start or
407
+ continue a valid sequence is re-emitted as its ``<0xHH>`` token and
408
+ decoding resumes after it. So ``b"\\xe3\\x80\\x80"`` -> ``" "`` while a
409
+ stray ``b"\\xff"`` round-trips to ``"<0xFF>"`` — we never make the stream
410
+ worse than llama.cpp already did.
411
+ """
412
+ parts: list[str] = []
413
+ i = 0
414
+ n = len(buf)
415
+ while i < n:
416
+ try:
417
+ parts.append(buf[i:].decode("utf-8"))
418
+ break
419
+ except UnicodeDecodeError as exc:
420
+ good_end = i + exc.start
421
+ if good_end > i:
422
+ parts.append(buf[i:good_end].decode("utf-8"))
423
+ parts.append(f"<0x{buf[good_end]:02X}>")
424
+ i = good_end + 1
425
+ return "".join(parts)
426
+
427
+
428
+ class RepairByteFallbackFilter:
429
+ """Reassemble llama.cpp ``<0xNN>`` byte-fallback leaks into UTF-8 text.
430
+
431
+ Ollama 0.30 unified its GGUF runtime onto llama.cpp
432
+ (``ollama/ollama#16031``). For gemma4 the detokenizer changed, and
433
+ multi-byte characters it cannot assemble now leak as llama.cpp's
434
+ byte-fallback notation::
435
+
436
+ full-width space `` `` -> ``<0xE3><0x80><0x80>``
437
+ rare kanji ``躙`` -> ``<0xE8><0xBA><0x99>``
438
+
439
+ These corrupt Japanese prose AND tool-call JSON arguments (a stray
440
+ ``<0xNN>`` inside an argument string breaks JSON parsing). This filter
441
+ reassembles runs of consecutive ``<0xNN>`` tokens back into UTF-8.
442
+
443
+ Stateful across ``feed`` calls so a token split across SSE deltas
444
+ (``<0x`` | ``E3>``) and a multi-byte run split across deltas
445
+ (``<0xE3>`` | ``<0x80><0x80>``) both reassemble correctly. A pending byte
446
+ run is only flushed once we are certain it has ended (confirmed normal
447
+ text follows, or ``eof``) — never at a bare chunk boundary, where the run
448
+ might continue in the next delta. Bytes that cannot form valid UTF-8 are
449
+ re-emitted verbatim (lossless).
450
+
451
+ ``modified`` flips True the first time any ``<0xNN>`` token is consumed —
452
+ the adapter uses it to gate the log-once "output-filter-applied" line.
453
+
454
+ Ordering note: place this BEFORE ``tool_repair`` / the tool-call XML
455
+ strip so byte-fallback inside tool-call argument strings is restored
456
+ before JSON extraction.
457
+ """
458
+
459
+ name = "repair_byte_fallback"
460
+
461
+ def __init__(self) -> None:
462
+ """Initialize per-request buffer, pending byte run and state."""
463
+ self.modified: bool = False
464
+ self._buffer: str = ""
465
+ self._pending = bytearray()
466
+
467
+ def _flush_pending(self, out: list[str]) -> None:
468
+ """Decode and emit the accumulated byte run, then clear it."""
469
+ if self._pending:
470
+ out.append(_decode_byte_run(bytes(self._pending)))
471
+ self._pending.clear()
472
+
473
+ def feed(self, text: str, *, eof: bool = False) -> str:
474
+ """Consume ``text``; return the portion safe to emit now."""
475
+ self._buffer += text
476
+ out: list[str] = []
477
+
478
+ while self._buffer:
479
+ m = _BYTE_RE.match(self._buffer)
480
+ if m is not None:
481
+ # Complete byte token at position 0 — extend the run.
482
+ self._pending.append(int(m.group(1), 16))
483
+ self._buffer = self._buffer[m.end() :]
484
+ self.modified = True
485
+ continue
486
+
487
+ idx = self._buffer.find(_BYTE_TOKEN_START)
488
+ if idx == -1:
489
+ # No complete/started token in the buffer. Hold a trailing
490
+ # partial of ``<0x`` (it may complete — and CONTINUE the run —
491
+ # on the next feed); treat anything before it as confirmed
492
+ # normal text that ends the run.
493
+ hold = (
494
+ 0 if eof else _max_suffix_overlap(self._buffer, _BYTE_TOKEN_START)
495
+ )
496
+ safe = self._buffer[:-hold] if hold else self._buffer
497
+ if safe:
498
+ self._flush_pending(out)
499
+ out.append(safe)
500
+ self._buffer = self._buffer[len(safe) :]
501
+ # else: whole buffer is a token-start prefix; keep pending
502
+ # (the run might continue) and wait for more input.
503
+ break
504
+
505
+ if idx > 0:
506
+ # Normal text precedes the next token start — run ended.
507
+ self._flush_pending(out)
508
+ out.append(self._buffer[:idx])
509
+ self._buffer = self._buffer[idx:]
510
+ continue
511
+
512
+ # idx == 0: buffer starts with ``<0x`` but is not a complete token.
513
+ if not eof and _PREFIX_RE.fullmatch(self._buffer):
514
+ # Could still complete next feed — hold token AND pending run.
515
+ break
516
+
517
+ # Malformed ``<0x..`` (non-hex, or stuck at eof). The ``<`` is
518
+ # ordinary text; the run (if any) has ended.
519
+ self._flush_pending(out)
520
+ out.append("<")
521
+ self._buffer = self._buffer[1:]
522
+
523
+ if eof:
524
+ self._flush_pending(out)
525
+ if self._buffer:
526
+ out.append(self._buffer)
527
+ self._buffer = ""
528
+
529
+ return "".join(out)
530
+
531
+
385
532
  # ---------------------------------------------------------------------------
386
533
  # Registry + chain
387
534
  # ---------------------------------------------------------------------------
@@ -391,6 +538,7 @@ KNOWN_FILTERS: dict[str, type[OutputFilter]] = {
391
538
  StripThinkingFilter.name: StripThinkingFilter,
392
539
  StripStopMarkersFilter.name: StripStopMarkersFilter,
393
540
  StripToolCallXmlFilter.name: StripToolCallXmlFilter,
541
+ RepairByteFallbackFilter.name: RepairByteFallbackFilter,
394
542
  }
395
543
  """Registry of string-name → filter class.
396
544
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: coderouter-cli
3
- Version: 2.5.3
3
+ Version: 2.5.4
4
4
  Summary: Local-first, free-first, fallback-built-in LLM router. Claude Code / OpenAI compatible.
5
5
  Project-URL: Homepage, https://github.com/zephel01/CodeRouter
6
6
  Project-URL: Repository, https://github.com/zephel01/CodeRouter
@@ -10,7 +10,7 @@ coderouter/errors.py,sha256=Xmq67lheyw8iv3Ox39jh2c4tvNI5RcUR4QkoxVDN6l4,1130
10
10
  coderouter/gguf_introspect.py,sha256=FZO14STLSp94Rfo5AInGwYUOpfjiXOW6CH5RiczTWDE,9514
11
11
  coderouter/hardware.py,sha256=gn3_9qbVcGRR81yKMn1lJE_8-YDRau0LxIH_M-f7pxE,8356
12
12
  coderouter/logging.py,sha256=U7QiGRaoQXTSGijc-jV9TebnbbzrD-snfnoZy73Nvwo,52737
13
- coderouter/output_filters.py,sha256=LOOh68Kcn2LFDy1wPFynA6O_HGazV756q_79Z0_4Jww,19350
13
+ coderouter/output_filters.py,sha256=0ry_rPiS_kC-FnHgaNVP6v7e6Al2djxzu9vBzZ8kEkE,25314
14
14
  coderouter/token_estimation.py,sha256=1Ai1uT68hahpyr4LBhNyVRGq7y4yXItd6J4k5ApGX7M,5995
15
15
  coderouter/token_estimation_accurate.py,sha256=GTfzrBVnvAGjeVzmzAeUdOYZvWZKLAxcxPpFiJGlzjk,4609
16
16
  coderouter/adapters/__init__.py,sha256=7dIDSZ-FE_0iSqLSDc_lK1idRdLTKcM2hP9tCJipgPI,463
@@ -66,8 +66,8 @@ coderouter/translation/__init__.py,sha256=PYXN7XVEwpG1uC8RLy6fvnGbzEZhhrEuUapH8I
66
66
  coderouter/translation/anthropic.py,sha256=JpvIWNXHUPVqOGvps7o_6ZADhXuJuvpU7RdMqQFtwwM,6421
67
67
  coderouter/translation/convert.py,sha256=-qyzFzmmr9hhQV6_Sg75kJnvCZvHe3n7vRdaZtk_JqQ,47269
68
68
  coderouter/translation/tool_repair.py,sha256=Ok2PF947Liegc5oaytfptv5MWMkpfJYQie-zdP1y3cY,9946
69
- coderouter_cli-2.5.3.dist-info/METADATA,sha256=3q3FPL44mGgfySDAi_5gEW1Y_CaZk6i_8wH2RkQKwf0,11674
70
- coderouter_cli-2.5.3.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
71
- coderouter_cli-2.5.3.dist-info/entry_points.txt,sha256=-dnLfD1YZ2WjH2zSdNCvlO65wYltM9bsHt9Fhg3yGss,51
72
- coderouter_cli-2.5.3.dist-info/licenses/LICENSE,sha256=wkEzoR86jFw33jvfOHjULqmkGEfxTFMgMaJnpR8mPRw,1065
73
- coderouter_cli-2.5.3.dist-info/RECORD,,
69
+ coderouter_cli-2.5.4.dist-info/METADATA,sha256=cpwFo9rILUr99bq2K1bRH62s-hhVQqmed4psTvG-XFM,11674
70
+ coderouter_cli-2.5.4.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
71
+ coderouter_cli-2.5.4.dist-info/entry_points.txt,sha256=-dnLfD1YZ2WjH2zSdNCvlO65wYltM9bsHt9Fhg3yGss,51
72
+ coderouter_cli-2.5.4.dist-info/licenses/LICENSE,sha256=wkEzoR86jFw33jvfOHjULqmkGEfxTFMgMaJnpR8mPRw,1065
73
+ coderouter_cli-2.5.4.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.29.0
2
+ Generator: hatchling 1.30.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any