renderers 0.1.8.dev41__tar.gz → 0.1.8.dev42__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/PKG-INFO +1 -1
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/_version.py +2 -2
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/kimi_k25.py +15 -3
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/parsing.py +67 -3
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/qwen3.py +2 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/qwen3_vl.py +2 -0
- renderers-0.1.8.dev42/tests/test_parse_response.py +276 -0
- renderers-0.1.8.dev41/tests/test_parse_response.py +0 -137
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/.github/workflows/publish-dev.yml +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/.github/workflows/publish.yml +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/.github/workflows/style.yml +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/.github/workflows/test.yml +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/.gitignore +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/.pre-commit-config.yaml +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/LICENSE +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/README.md +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/docs/renderer-config.md +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/examples/README.md +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/examples/sglang/multiturn_generate_sglang.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/examples/sglang/online_multiturn_sglang.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/examples/tinker/multiturn_generate_tinker.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/examples/transformers/multiturn_generate_transformers.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/examples/vllm/multiturn_generate_vllm.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/pyproject.toml +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/__init__.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/base.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/client.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/configs.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/deepseek_v3.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/default.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/glm45.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/glm5.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/gpt_oss.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/kimi_k2.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/laguna_xs2.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/llama_3.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/minimax_m2.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/nemotron3.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/parsers.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/qwen35.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/qwen36.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/conftest.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_bridge.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_build_helpers.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_client.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_gpt_oss_harmony_parity.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_incremental.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_is_content.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_kimi_k25_tool_schema.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_llama_3.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_load_tokenizer.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_load_tokenizer_fastokens.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_message_indices.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_message_tool_names.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_multimodal.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_nemotron3_ultra.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_parse_response_robustness.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_parsers.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_preserve_thinking.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_qwen35_size_coverage.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_render_ids.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_renderer_config.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_renderer_config_parity.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_roundtrip.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_sampled_mask.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_tokens_per_message.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_tool_arg_type_preservation.py +0 -0
- {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/uv.lock +0 -0
|
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
|
|
|
18
18
|
commit_id: str | None
|
|
19
19
|
__commit_id__: str | None
|
|
20
20
|
|
|
21
|
-
__version__ = version = '0.1.8.
|
|
22
|
-
__version_tuple__ = version_tuple = (0, 1, 8, '
|
|
21
|
+
__version__ = version = '0.1.8.dev42'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 1, 8, 'dev42')
|
|
23
23
|
|
|
24
24
|
__commit_id__ = commit_id = None
|
|
@@ -42,7 +42,7 @@ from renderers.base import (
|
|
|
42
42
|
trim_to_turn_close,
|
|
43
43
|
)
|
|
44
44
|
from renderers.configs import KimiK25RendererConfig
|
|
45
|
-
from renderers.parsing import parse_kimi_k2_section
|
|
45
|
+
from renderers.parsing import _reasoning_end_token_index, parse_kimi_k2_section
|
|
46
46
|
from renderers.qwen3_vl import (
|
|
47
47
|
_image_hash,
|
|
48
48
|
_is_image_part,
|
|
@@ -452,6 +452,13 @@ def _parse_kimi_k2_response(
|
|
|
452
452
|
ids = ids[:i]
|
|
453
453
|
break
|
|
454
454
|
|
|
455
|
+
# Reasoning first: a tool-call section the model drafts *inside* its
|
|
456
|
+
# <think> trace must not be parsed as a real call (regression #78 — cf.
|
|
457
|
+
# parse_qwen3). K2.5 renders </think> as text, so locate the boundary by
|
|
458
|
+
# decoding; the section scan then starts past it. content_ids still begins
|
|
459
|
+
# at 0, so the </think> text-split below recovers reasoning unchanged.
|
|
460
|
+
reasoning_end = _reasoning_end_token_index(tokenizer, ids)
|
|
461
|
+
|
|
455
462
|
# Token-ID path — produces spans. Only run if every relevant special
|
|
456
463
|
# token resolved at init (i.e. is in the tokenizer's vocab).
|
|
457
464
|
tool_calls: list[ParsedToolCall] = []
|
|
@@ -471,6 +478,7 @@ def _parse_kimi_k2_response(
|
|
|
471
478
|
tool_call_begin_id=tool_call_begin_id,
|
|
472
479
|
tool_call_argument_begin_id=tool_call_argument_begin_id,
|
|
473
480
|
tool_call_end_id=tool_call_end_id,
|
|
481
|
+
scan_start=reasoning_end,
|
|
474
482
|
)
|
|
475
483
|
text = (
|
|
476
484
|
tokenizer.decode(content_ids, skip_special_tokens=False)
|
|
@@ -481,9 +489,13 @@ def _parse_kimi_k2_response(
|
|
|
481
489
|
text = tokenizer.decode(ids, skip_special_tokens=False) if ids else ""
|
|
482
490
|
|
|
483
491
|
# Fallback path: model emitted literal-text section delimiters (singular
|
|
484
|
-
# variant) rather than special tokens. Spans unavailable here.
|
|
492
|
+
# variant) rather than special tokens. Spans unavailable here. Start the
|
|
493
|
+
# search past the first </think> so a literal section drafted inside the
|
|
494
|
+
# reasoning trace isn't matched as a real call (regression #78).
|
|
485
495
|
if not tool_calls:
|
|
486
|
-
|
|
496
|
+
think_close = text.find("</think>")
|
|
497
|
+
search_from = think_close + len("</think>") if think_close != -1 else 0
|
|
498
|
+
tc_match = _TOOL_CALLS_SECTION_RE.search(text, search_from)
|
|
487
499
|
if tc_match:
|
|
488
500
|
text = text[: tc_match.start()]
|
|
489
501
|
tool_section = (
|
|
@@ -133,6 +133,39 @@ def _decode(tokenizer, ids: list[int]) -> str:
|
|
|
133
133
|
return tokenizer.decode(ids, skip_special_tokens=False)
|
|
134
134
|
|
|
135
135
|
|
|
136
|
+
def _reasoning_end_token_index(
|
|
137
|
+
tokenizer, ids: list[int], marker: str = "</think>"
|
|
138
|
+
) -> int:
|
|
139
|
+
"""Token index immediately past the first ``</think>`` in ``ids``.
|
|
140
|
+
|
|
141
|
+
Returns 0 when ``ids`` has no closed reasoning region — callers treat
|
|
142
|
+
that as "scan from the start" (preserves pre-existing behavior for
|
|
143
|
+
non-thinking / truncated-reasoning completions).
|
|
144
|
+
|
|
145
|
+
Used by parsers whose ``</think>`` is *not* a single special token
|
|
146
|
+
(DeepSeek-V3, Kimi-K2.5) — where it tokenizes to several pieces and is
|
|
147
|
+
context-sensitive (the closing ``>`` merges differently depending on the
|
|
148
|
+
next char), so a token-id or fixed-subsequence search isn't reliable. We
|
|
149
|
+
instead locate the boundary in decoded text via binary search over prefix
|
|
150
|
+
decodes, which holds as long as ``decode(ids[:k])`` is prefix-stable in
|
|
151
|
+
``k`` (true for the byte-level BPE tokenizers here; ``</think>`` is clean
|
|
152
|
+
ASCII that won't straddle a byte boundary). Single-token ``</think>``
|
|
153
|
+
parsers (Qwen3) anchor on the token id directly and don't need this.
|
|
154
|
+
"""
|
|
155
|
+
if not ids or marker not in _decode(tokenizer, ids):
|
|
156
|
+
return 0
|
|
157
|
+
# Smallest prefix length (in tokens) whose decode already contains the
|
|
158
|
+
# full marker — i.e. the index just past where </think> completes.
|
|
159
|
+
lo, hi = 1, len(ids)
|
|
160
|
+
while lo < hi:
|
|
161
|
+
mid = (lo + hi) // 2
|
|
162
|
+
if marker in _decode(tokenizer, ids[:mid]):
|
|
163
|
+
hi = mid
|
|
164
|
+
else:
|
|
165
|
+
lo = mid + 1
|
|
166
|
+
return lo
|
|
167
|
+
|
|
168
|
+
|
|
136
169
|
# ── Qwen3: <tool_call> JSON </tool_call> ────────────────────────────
|
|
137
170
|
|
|
138
171
|
|
|
@@ -143,11 +176,26 @@ def parse_qwen3(
|
|
|
143
176
|
stop_ids: set[int],
|
|
144
177
|
tool_call_id: int,
|
|
145
178
|
tool_call_end_id: int,
|
|
179
|
+
reasoning_end_id: int | None = None,
|
|
146
180
|
) -> ParsedResponse:
|
|
147
181
|
"""Parse Qwen3 completion tokens. Hermes-style JSON tool calls."""
|
|
148
182
|
ids = _strip_stop_tokens(token_ids, stop_ids)
|
|
149
183
|
|
|
150
|
-
|
|
184
|
+
# Reasoning is resolved before tool calls. Thinking models (e.g.
|
|
185
|
+
# Qwen3-*-Thinking) routinely draft ``<tool_call>`` blocks *inside* their
|
|
186
|
+
# ``<think>...</think>`` trace while planning; those are reasoning, not
|
|
187
|
+
# real invocations. Anchoring the tool-call scan after the ``</think>``
|
|
188
|
+
# boundary keeps in-think drafts out of ``tool_calls`` (otherwise they
|
|
189
|
+
# surface as phantom/duplicate calls) and out of the reasoning/content
|
|
190
|
+
# split. Mirrors vLLM's DelegatingParser, which runs the reasoning parser
|
|
191
|
+
# first and tool-parses only the post-``</think>`` content.
|
|
192
|
+
# ``reasoning_end_id`` is the ``</think>`` token id; when it's absent
|
|
193
|
+
# (``None``) or the model never closed its reasoning, the scan falls back
|
|
194
|
+
# to the whole stream (prior behavior).
|
|
195
|
+
reasoning_end = _find(ids, reasoning_end_id) if reasoning_end_id is not None else -1
|
|
196
|
+
scan_start = reasoning_end + 1 if reasoning_end != -1 else 0
|
|
197
|
+
|
|
198
|
+
tc_start = _find(ids, tool_call_id, scan_start)
|
|
151
199
|
tool_calls: list[ParsedToolCall] = []
|
|
152
200
|
if tc_start != -1:
|
|
153
201
|
content_ids = ids[:tc_start]
|
|
@@ -685,7 +733,15 @@ def parse_deepseek_v3(
|
|
|
685
733
|
"""
|
|
686
734
|
ids = _strip_stop_tokens(token_ids, stop_ids)
|
|
687
735
|
|
|
688
|
-
|
|
736
|
+
# Reasoning first: skip past </think> before looking for the tool-call
|
|
737
|
+
# section, so a section the model drafts *inside* its <think> trace isn't
|
|
738
|
+
# parsed as a real call (regression #78 — cf. parse_qwen3). content_ids
|
|
739
|
+
# still starts at 0, so the </think> text-split below recovers reasoning.
|
|
740
|
+
# DeepSeek-V3 renders </think> as multi-token text, hence the decode-based
|
|
741
|
+
# boundary finder rather than a token-id anchor.
|
|
742
|
+
reasoning_end = _reasoning_end_token_index(tokenizer, ids)
|
|
743
|
+
|
|
744
|
+
tc_section_start = _find(ids, tool_calls_begin_id, reasoning_end)
|
|
689
745
|
tool_calls: list[ParsedToolCall] = []
|
|
690
746
|
if tc_section_start != -1:
|
|
691
747
|
content_ids = ids[:tc_section_start]
|
|
@@ -962,6 +1018,7 @@ def parse_kimi_k2_section(
|
|
|
962
1018
|
tool_call_begin_id: int,
|
|
963
1019
|
tool_call_argument_begin_id: int,
|
|
964
1020
|
tool_call_end_id: int,
|
|
1021
|
+
scan_start: int = 0,
|
|
965
1022
|
) -> tuple[list[int], list[ParsedToolCall]]:
|
|
966
1023
|
"""Split ``ids`` into ``(content_before_section, tool_calls)`` by finding
|
|
967
1024
|
the Kimi-style tool-call section delimiters.
|
|
@@ -973,8 +1030,15 @@ def parse_kimi_k2_section(
|
|
|
973
1030
|
of the section and a list of ``ParsedToolCall`` covering every attempted
|
|
974
1031
|
block inside it; an unclosed section is still walked to whatever the model
|
|
975
1032
|
emitted before EOS. Returns ``(ids, [])`` when no section is present.
|
|
1033
|
+
|
|
1034
|
+
``scan_start`` restricts the section search to ``ids[scan_start:]`` while
|
|
1035
|
+
keeping ``content_ids = ids[:section_start]`` and all token spans relative
|
|
1036
|
+
to the full ``ids``. Callers pass the post-``</think>`` index so a section
|
|
1037
|
+
the model drafts inside its reasoning trace isn't parsed as a real call;
|
|
1038
|
+
because ``content_ids`` still starts at 0, downstream text-based reasoning
|
|
1039
|
+
extraction is unaffected (regression #78).
|
|
976
1040
|
"""
|
|
977
|
-
section_start = _find_any(ids, tool_calls_section_begin_ids)
|
|
1041
|
+
section_start = _find_any(ids, tool_calls_section_begin_ids, scan_start)
|
|
978
1042
|
if section_start == -1:
|
|
979
1043
|
return list(ids), []
|
|
980
1044
|
content_ids = ids[:section_start]
|
|
@@ -62,6 +62,7 @@ class Qwen3Renderer:
|
|
|
62
62
|
self._tool_call_end = self._token_id("</tool_call>")
|
|
63
63
|
self._tool_response = self._token_id("<tool_response>")
|
|
64
64
|
self._tool_response_end = self._token_id("</tool_response>")
|
|
65
|
+
self._think_end = self._token_id("</think>")
|
|
65
66
|
|
|
66
67
|
def _token_id(self, token: str) -> int:
|
|
67
68
|
tid = self._tokenizer.convert_tokens_to_ids(token)
|
|
@@ -276,6 +277,7 @@ class Qwen3Renderer:
|
|
|
276
277
|
stop_ids={self._im_end, self._endoftext},
|
|
277
278
|
tool_call_id=self._tool_call,
|
|
278
279
|
tool_call_end_id=self._tool_call_end,
|
|
280
|
+
reasoning_end_id=self._think_end,
|
|
279
281
|
)
|
|
280
282
|
|
|
281
283
|
def get_stop_token_ids(self) -> list[int]:
|
|
@@ -325,6 +325,7 @@ class Qwen3VLRenderer:
|
|
|
325
325
|
self._tool_call_end = self._token_id("</tool_call>")
|
|
326
326
|
self._tool_response = self._token_id("<tool_response>")
|
|
327
327
|
self._tool_response_end = self._token_id("</tool_response>")
|
|
328
|
+
self._think_end = self._token_id("</think>")
|
|
328
329
|
self._vision_start = self._token_id("<|vision_start|>")
|
|
329
330
|
self._vision_end = self._token_id("<|vision_end|>")
|
|
330
331
|
self._image_pad = self._token_id("<|image_pad|>")
|
|
@@ -634,6 +635,7 @@ class Qwen3VLRenderer:
|
|
|
634
635
|
stop_ids={self._im_end, self._endoftext},
|
|
635
636
|
tool_call_id=self._tool_call,
|
|
636
637
|
tool_call_end_id=self._tool_call_end,
|
|
638
|
+
reasoning_end_id=self._think_end,
|
|
637
639
|
)
|
|
638
640
|
|
|
639
641
|
def get_stop_token_ids(self) -> list[int]:
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
"""Barrage test: renderer.parse_response() must correctly extract
|
|
2
|
+
content, reasoning_content, and tool_calls from completion tokens.
|
|
3
|
+
|
|
4
|
+
Runs against every (model, renderer) pair.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from functools import lru_cache
|
|
8
|
+
|
|
9
|
+
from renderers import create_renderer
|
|
10
|
+
from renderers.base import ToolCallParseStatus, load_tokenizer
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@lru_cache
|
|
14
|
+
def _qwen3_vl():
|
|
15
|
+
tokenizer = load_tokenizer("Qwen/Qwen3-VL-4B-Instruct")
|
|
16
|
+
renderer = create_renderer(tokenizer)
|
|
17
|
+
return tokenizer, renderer
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_parse_simple_content(model_name, tokenizer, renderer):
|
|
21
|
+
"""Plain content, no thinking."""
|
|
22
|
+
text = "Hello there!"
|
|
23
|
+
ids = tokenizer.encode(text, add_special_tokens=False)
|
|
24
|
+
parsed = renderer.parse_response(ids)
|
|
25
|
+
assert "Hello" in parsed.content
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_parse_thinking_and_content(model_name, tokenizer, renderer):
|
|
29
|
+
"""Content with <think>reasoning</think> block."""
|
|
30
|
+
text = "Let me think about this.\n</think>\n\nThe answer is 42."
|
|
31
|
+
ids = tokenizer.encode(text, add_special_tokens=False)
|
|
32
|
+
parsed = renderer.parse_response(ids)
|
|
33
|
+
# Should extract reasoning or at least not crash
|
|
34
|
+
assert (
|
|
35
|
+
"42" in parsed.content
|
|
36
|
+
or "think" in (parsed.reasoning_content or "").lower()
|
|
37
|
+
or parsed.content
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_parse_empty_completion(model_name, tokenizer, renderer):
|
|
42
|
+
"""Empty completion should not crash."""
|
|
43
|
+
parsed = renderer.parse_response([])
|
|
44
|
+
assert parsed.content is not None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_parse_response_returns_parsed_response(model_name, tokenizer, renderer):
|
|
48
|
+
"""Return type must have content, reasoning_content, tool_calls."""
|
|
49
|
+
ids = tokenizer.encode("Hello!", add_special_tokens=False)
|
|
50
|
+
parsed = renderer.parse_response(ids)
|
|
51
|
+
assert hasattr(parsed, "content")
|
|
52
|
+
assert hasattr(parsed, "reasoning_content")
|
|
53
|
+
assert hasattr(parsed, "tool_calls")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_qwen3_vl_parse_json_tool_call():
|
|
57
|
+
tokenizer, renderer = _qwen3_vl()
|
|
58
|
+
text = (
|
|
59
|
+
'Need a tool.\n<tool_call>\n{"name": "get_weather", '
|
|
60
|
+
'"arguments": {"city": "Paris"}}\n</tool_call>'
|
|
61
|
+
)
|
|
62
|
+
parsed = renderer.parse_response(tokenizer.encode(text, add_special_tokens=False))
|
|
63
|
+
|
|
64
|
+
assert parsed.content == "Need a tool."
|
|
65
|
+
assert len(parsed.tool_calls) == 1
|
|
66
|
+
tc = parsed.tool_calls[0]
|
|
67
|
+
assert tc.status == ToolCallParseStatus.OK
|
|
68
|
+
assert tc.name == "get_weather"
|
|
69
|
+
assert tc.arguments == {"city": "Paris"}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def test_qwen3_vl_malformed_tool_call_surfaces_as_invalid_json():
|
|
73
|
+
"""A malformed ``<tool_call>`` block lands as a non-OK ``ParsedToolCall``
|
|
74
|
+
rather than getting silently merged back into ``content``.
|
|
75
|
+
|
|
76
|
+
Before the per-call status redesign, the parser mirrored vLLM's
|
|
77
|
+
hermes parser and stuffed the raw block into ``content`` to avoid
|
|
78
|
+
downstream ``EmptyModelResponseError``. That hid the malformed signal
|
|
79
|
+
from verifiers — they couldn't tell "model wrote prose" from "model
|
|
80
|
+
tried a tool call and produced broken JSON." Now the failed attempt
|
|
81
|
+
is preserved with ``status=INVALID_JSON`` and ``raw`` text, which
|
|
82
|
+
also satisfies the EmptyModelResponseError prevention contract: the
|
|
83
|
+
response is non-empty (it has a tool-call attempt) without lying
|
|
84
|
+
about what kind of output the model produced.
|
|
85
|
+
"""
|
|
86
|
+
tokenizer, renderer = _qwen3_vl()
|
|
87
|
+
# Note the trailing comma — malformed JSON
|
|
88
|
+
text = (
|
|
89
|
+
'<tool_call>\n{"name": "get_weather", '
|
|
90
|
+
'"arguments": {"city": "Paris",}}\n</tool_call>'
|
|
91
|
+
)
|
|
92
|
+
parsed = renderer.parse_response(tokenizer.encode(text, add_special_tokens=False))
|
|
93
|
+
|
|
94
|
+
assert len(parsed.tool_calls) == 1
|
|
95
|
+
tc = parsed.tool_calls[0]
|
|
96
|
+
assert tc.status == ToolCallParseStatus.INVALID_JSON
|
|
97
|
+
assert "get_weather" in tc.raw
|
|
98
|
+
assert tc.token_span is not None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@lru_cache
|
|
102
|
+
def _qwen3():
|
|
103
|
+
tokenizer = load_tokenizer("Qwen/Qwen3-0.6B")
|
|
104
|
+
renderer = create_renderer(tokenizer)
|
|
105
|
+
return tokenizer, renderer
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def test_qwen3_in_think_tool_call_is_not_a_real_call():
|
|
109
|
+
"""A ``<tool_call>`` the model drafts *inside* its ``<think>`` trace must
|
|
110
|
+
stay reasoning — only the call emitted after ``</think>`` counts.
|
|
111
|
+
|
|
112
|
+
Regression for #78: Thinking models (e.g. Qwen3-*-Thinking-2507) draft
|
|
113
|
+
tool-call syntax while planning. Because ``<tool_call>`` is a real vocab
|
|
114
|
+
token, the parser used to scan the whole stream and emit the in-think
|
|
115
|
+
draft *and* the genuine post-``</think>`` call as two tool calls — a
|
|
116
|
+
phantom duplicate that made callers execute the same code twice. The scan
|
|
117
|
+
is now anchored after ``</think>``, mirroring vLLM's reasoning-then-tools
|
|
118
|
+
ordering.
|
|
119
|
+
"""
|
|
120
|
+
tokenizer, renderer = _qwen3()
|
|
121
|
+
text = (
|
|
122
|
+
"<think>\nLet me draft the call:\n"
|
|
123
|
+
'<tool_call>\n{"name": "execute_code", "arguments": {"code": "print(1)"}}\n'
|
|
124
|
+
"</tool_call>\nYes, that looks right.\n</think>\n"
|
|
125
|
+
'<tool_call>\n{"name": "execute_code", "arguments": {"code": "print(1)"}}\n'
|
|
126
|
+
"</tool_call>"
|
|
127
|
+
)
|
|
128
|
+
parsed = renderer.parse_response(tokenizer.encode(text, add_special_tokens=False))
|
|
129
|
+
|
|
130
|
+
assert len(parsed.tool_calls) == 1
|
|
131
|
+
tc = parsed.tool_calls[0]
|
|
132
|
+
assert tc.status == ToolCallParseStatus.OK
|
|
133
|
+
assert tc.name == "execute_code"
|
|
134
|
+
assert tc.arguments == {"code": "print(1)"}
|
|
135
|
+
# The drafted call stays in the reasoning trace, not content.
|
|
136
|
+
assert parsed.reasoning_content is not None
|
|
137
|
+
assert "<tool_call>" in parsed.reasoning_content
|
|
138
|
+
assert parsed.content == ""
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def test_qwen3_distinct_parallel_calls_after_think_are_preserved():
|
|
142
|
+
"""The fix must not over-correct: two *genuine* parallel calls emitted
|
|
143
|
+
after ``</think>`` are still both returned (no dedup), preserving the
|
|
144
|
+
faithful-transcription contract for real invocations.
|
|
145
|
+
"""
|
|
146
|
+
tokenizer, renderer = _qwen3()
|
|
147
|
+
text = (
|
|
148
|
+
"<think>\nplan\n</think>\n"
|
|
149
|
+
'<tool_call>\n{"name": "execute_code", "arguments": {"code": "print(1)"}}\n'
|
|
150
|
+
"</tool_call>\n"
|
|
151
|
+
'<tool_call>\n{"name": "execute_code", "arguments": {"code": "print(2)"}}\n'
|
|
152
|
+
"</tool_call>"
|
|
153
|
+
)
|
|
154
|
+
parsed = renderer.parse_response(tokenizer.encode(text, add_special_tokens=False))
|
|
155
|
+
|
|
156
|
+
assert len(parsed.tool_calls) == 2
|
|
157
|
+
assert [tc.arguments for tc in parsed.tool_calls] == [
|
|
158
|
+
{"code": "print(1)"},
|
|
159
|
+
{"code": "print(2)"},
|
|
160
|
+
]
|
|
161
|
+
assert parsed.reasoning_content == "plan"
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
@lru_cache
|
|
165
|
+
def _kimi_k25():
|
|
166
|
+
tokenizer = load_tokenizer("moonshotai/Kimi-K2.5")
|
|
167
|
+
renderer = create_renderer(tokenizer)
|
|
168
|
+
return tokenizer, renderer
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def test_kimi_k25_tool_call_carries_token_span():
|
|
172
|
+
"""K2.5 was the lone parser without token spans before — its inline
|
|
173
|
+
text-walking implementation couldn't cheaply map regex hits back to
|
|
174
|
+
token offsets. We now walk token IDs via ``parse_kimi_k2_section`` for
|
|
175
|
+
the special-token path; spans must round-trip and point at a sensible
|
|
176
|
+
range within the original input token_ids.
|
|
177
|
+
"""
|
|
178
|
+
tokenizer, renderer = _kimi_k25()
|
|
179
|
+
# K2.5 tool-call wire shape: section + per-call special tokens.
|
|
180
|
+
text = (
|
|
181
|
+
"<|tool_calls_section_begin|>"
|
|
182
|
+
"<|tool_call_begin|>functions.get_weather:0"
|
|
183
|
+
"<|tool_call_argument_begin|>"
|
|
184
|
+
'{"city": "Tokyo"}'
|
|
185
|
+
"<|tool_call_end|>"
|
|
186
|
+
"<|tool_calls_section_end|>"
|
|
187
|
+
)
|
|
188
|
+
token_ids = tokenizer.encode(text, add_special_tokens=False)
|
|
189
|
+
parsed = renderer.parse_response(token_ids)
|
|
190
|
+
|
|
191
|
+
assert len(parsed.tool_calls) == 1
|
|
192
|
+
tc = parsed.tool_calls[0]
|
|
193
|
+
assert tc.status == ToolCallParseStatus.OK
|
|
194
|
+
assert tc.name == "get_weather"
|
|
195
|
+
assert tc.arguments == {"city": "Tokyo"}
|
|
196
|
+
assert tc.token_span is not None
|
|
197
|
+
start, end = tc.token_span
|
|
198
|
+
assert 0 <= start < end <= len(token_ids), (
|
|
199
|
+
f"span {tc.token_span} out of range for {len(token_ids)} input tokens"
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def test_kimi_k25_in_think_section_is_not_a_real_call():
|
|
204
|
+
"""A tool-call section the model drafts inside its ``<think>`` trace must
|
|
205
|
+
not be parsed — only the section after ``</think>`` counts.
|
|
206
|
+
|
|
207
|
+
Regression for #78. K2.5's failure mode differed from Qwen3's: the
|
|
208
|
+
in-think section tripped the "truncated reasoning" guard and the parser
|
|
209
|
+
*dropped every tool call* (returned zero), losing the genuine call. The
|
|
210
|
+
scan is now anchored past ``</think>``.
|
|
211
|
+
"""
|
|
212
|
+
tokenizer, renderer = _kimi_k25()
|
|
213
|
+
section = (
|
|
214
|
+
"<|tool_calls_section_begin|>"
|
|
215
|
+
"<|tool_call_begin|>functions.execute_code:0"
|
|
216
|
+
'<|tool_call_argument_begin|>{"code": "print(1)"}'
|
|
217
|
+
"<|tool_call_end|><|tool_calls_section_end|>"
|
|
218
|
+
)
|
|
219
|
+
text = f"<think>\nLet me draft:\n{section}\nlooks right.\n</think>\nGo.\n{section}"
|
|
220
|
+
parsed = renderer.parse_response(tokenizer.encode(text, add_special_tokens=False))
|
|
221
|
+
|
|
222
|
+
assert len(parsed.tool_calls) == 1
|
|
223
|
+
tc = parsed.tool_calls[0]
|
|
224
|
+
assert tc.status == ToolCallParseStatus.OK
|
|
225
|
+
assert tc.name == "execute_code"
|
|
226
|
+
assert tc.arguments == {"code": "print(1)"}
|
|
227
|
+
# The drafted section stays in the reasoning trace.
|
|
228
|
+
assert parsed.reasoning_content is not None
|
|
229
|
+
assert "<|tool_calls_section_begin|>" in parsed.reasoning_content
|
|
230
|
+
assert parsed.content == "Go."
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
@lru_cache
|
|
234
|
+
def _deepseek_v3():
|
|
235
|
+
tokenizer = load_tokenizer("deepseek-ai/DeepSeek-V3")
|
|
236
|
+
renderer = create_renderer(tokenizer)
|
|
237
|
+
return tokenizer, renderer
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def test_deepseek_v3_in_think_section_is_not_a_real_call():
|
|
241
|
+
"""A tool-call section drafted inside ``<think>`` must not be parsed —
|
|
242
|
+
only the section after ``</think>`` counts.
|
|
243
|
+
|
|
244
|
+
Regression for #78. DeepSeek-V3's failure mode: it returned the *wrong*
|
|
245
|
+
call (the in-think draft) and lost reasoning, because ``</think>`` is
|
|
246
|
+
multi-token text there and the scan wasn't anchored past it.
|
|
247
|
+
"""
|
|
248
|
+
tokenizer, renderer = _deepseek_v3()
|
|
249
|
+
|
|
250
|
+
def section(name: str) -> str:
|
|
251
|
+
return (
|
|
252
|
+
"<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>"
|
|
253
|
+
f'{name}\n```json\n{{"code": "print(1)"}}\n```'
|
|
254
|
+
"<|tool▁call▁end|><|tool▁calls▁end|>"
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
text = (
|
|
258
|
+
f"<think>\nLet me draft:\n{section('draft_tool')}\nlooks right.\n</think>\n"
|
|
259
|
+
f"Go.\n{section('real_tool')}"
|
|
260
|
+
)
|
|
261
|
+
parsed = renderer.parse_response(tokenizer.encode(text, add_special_tokens=False))
|
|
262
|
+
|
|
263
|
+
assert len(parsed.tool_calls) == 1
|
|
264
|
+
tc = parsed.tool_calls[0]
|
|
265
|
+
# Assert the *post-``</think>``* section was chosen, not the in-think draft.
|
|
266
|
+
# (Use ``startswith`` rather than ``== "real_tool"``: under transformers
|
|
267
|
+
# 5.x the DeepSeek tokenizer's decode drops the ``\n`` between name and the
|
|
268
|
+
# ```json fence, so ``_parse_deepseek_tool_calls`` folds the fence into the
|
|
269
|
+
# name — a pre-existing, #78-unrelated quirk. What matters here is *which*
|
|
270
|
+
# section won.)
|
|
271
|
+
assert tc.name is not None and tc.name.startswith("real_tool")
|
|
272
|
+
assert "draft_tool" not in tc.name
|
|
273
|
+
# The drafted section stays in the reasoning trace, not content.
|
|
274
|
+
assert parsed.reasoning_content is not None
|
|
275
|
+
assert "draft_tool" in parsed.reasoning_content
|
|
276
|
+
assert parsed.content == "Go."
|
|
@@ -1,137 +0,0 @@
|
|
|
1
|
-
"""Barrage test: renderer.parse_response() must correctly extract
|
|
2
|
-
content, reasoning_content, and tool_calls from completion tokens.
|
|
3
|
-
|
|
4
|
-
Runs against every (model, renderer) pair.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
from functools import lru_cache
|
|
8
|
-
|
|
9
|
-
from renderers import create_renderer
|
|
10
|
-
from renderers.base import ToolCallParseStatus, load_tokenizer
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@lru_cache
|
|
14
|
-
def _qwen3_vl():
|
|
15
|
-
tokenizer = load_tokenizer("Qwen/Qwen3-VL-4B-Instruct")
|
|
16
|
-
renderer = create_renderer(tokenizer)
|
|
17
|
-
return tokenizer, renderer
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def test_parse_simple_content(model_name, tokenizer, renderer):
|
|
21
|
-
"""Plain content, no thinking."""
|
|
22
|
-
text = "Hello there!"
|
|
23
|
-
ids = tokenizer.encode(text, add_special_tokens=False)
|
|
24
|
-
parsed = renderer.parse_response(ids)
|
|
25
|
-
assert "Hello" in parsed.content
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def test_parse_thinking_and_content(model_name, tokenizer, renderer):
|
|
29
|
-
"""Content with <think>reasoning</think> block."""
|
|
30
|
-
text = "Let me think about this.\n</think>\n\nThe answer is 42."
|
|
31
|
-
ids = tokenizer.encode(text, add_special_tokens=False)
|
|
32
|
-
parsed = renderer.parse_response(ids)
|
|
33
|
-
# Should extract reasoning or at least not crash
|
|
34
|
-
assert (
|
|
35
|
-
"42" in parsed.content
|
|
36
|
-
or "think" in (parsed.reasoning_content or "").lower()
|
|
37
|
-
or parsed.content
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def test_parse_empty_completion(model_name, tokenizer, renderer):
|
|
42
|
-
"""Empty completion should not crash."""
|
|
43
|
-
parsed = renderer.parse_response([])
|
|
44
|
-
assert parsed.content is not None
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def test_parse_response_returns_parsed_response(model_name, tokenizer, renderer):
|
|
48
|
-
"""Return type must have content, reasoning_content, tool_calls."""
|
|
49
|
-
ids = tokenizer.encode("Hello!", add_special_tokens=False)
|
|
50
|
-
parsed = renderer.parse_response(ids)
|
|
51
|
-
assert hasattr(parsed, "content")
|
|
52
|
-
assert hasattr(parsed, "reasoning_content")
|
|
53
|
-
assert hasattr(parsed, "tool_calls")
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def test_qwen3_vl_parse_json_tool_call():
|
|
57
|
-
tokenizer, renderer = _qwen3_vl()
|
|
58
|
-
text = (
|
|
59
|
-
'Need a tool.\n<tool_call>\n{"name": "get_weather", '
|
|
60
|
-
'"arguments": {"city": "Paris"}}\n</tool_call>'
|
|
61
|
-
)
|
|
62
|
-
parsed = renderer.parse_response(tokenizer.encode(text, add_special_tokens=False))
|
|
63
|
-
|
|
64
|
-
assert parsed.content == "Need a tool."
|
|
65
|
-
assert len(parsed.tool_calls) == 1
|
|
66
|
-
tc = parsed.tool_calls[0]
|
|
67
|
-
assert tc.status == ToolCallParseStatus.OK
|
|
68
|
-
assert tc.name == "get_weather"
|
|
69
|
-
assert tc.arguments == {"city": "Paris"}
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
def test_qwen3_vl_malformed_tool_call_surfaces_as_invalid_json():
|
|
73
|
-
"""A malformed ``<tool_call>`` block lands as a non-OK ``ParsedToolCall``
|
|
74
|
-
rather than getting silently merged back into ``content``.
|
|
75
|
-
|
|
76
|
-
Before the per-call status redesign, the parser mirrored vLLM's
|
|
77
|
-
hermes parser and stuffed the raw block into ``content`` to avoid
|
|
78
|
-
downstream ``EmptyModelResponseError``. That hid the malformed signal
|
|
79
|
-
from verifiers — they couldn't tell "model wrote prose" from "model
|
|
80
|
-
tried a tool call and produced broken JSON." Now the failed attempt
|
|
81
|
-
is preserved with ``status=INVALID_JSON`` and ``raw`` text, which
|
|
82
|
-
also satisfies the EmptyModelResponseError prevention contract: the
|
|
83
|
-
response is non-empty (it has a tool-call attempt) without lying
|
|
84
|
-
about what kind of output the model produced.
|
|
85
|
-
"""
|
|
86
|
-
tokenizer, renderer = _qwen3_vl()
|
|
87
|
-
# Note the trailing comma — malformed JSON
|
|
88
|
-
text = (
|
|
89
|
-
'<tool_call>\n{"name": "get_weather", '
|
|
90
|
-
'"arguments": {"city": "Paris",}}\n</tool_call>'
|
|
91
|
-
)
|
|
92
|
-
parsed = renderer.parse_response(tokenizer.encode(text, add_special_tokens=False))
|
|
93
|
-
|
|
94
|
-
assert len(parsed.tool_calls) == 1
|
|
95
|
-
tc = parsed.tool_calls[0]
|
|
96
|
-
assert tc.status == ToolCallParseStatus.INVALID_JSON
|
|
97
|
-
assert "get_weather" in tc.raw
|
|
98
|
-
assert tc.token_span is not None
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
@lru_cache
|
|
102
|
-
def _kimi_k25():
|
|
103
|
-
tokenizer = load_tokenizer("moonshotai/Kimi-K2.5")
|
|
104
|
-
renderer = create_renderer(tokenizer)
|
|
105
|
-
return tokenizer, renderer
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
def test_kimi_k25_tool_call_carries_token_span():
|
|
109
|
-
"""K2.5 was the lone parser without token spans before — its inline
|
|
110
|
-
text-walking implementation couldn't cheaply map regex hits back to
|
|
111
|
-
token offsets. We now walk token IDs via ``parse_kimi_k2_section`` for
|
|
112
|
-
the special-token path; spans must round-trip and point at a sensible
|
|
113
|
-
range within the original input token_ids.
|
|
114
|
-
"""
|
|
115
|
-
tokenizer, renderer = _kimi_k25()
|
|
116
|
-
# K2.5 tool-call wire shape: section + per-call special tokens.
|
|
117
|
-
text = (
|
|
118
|
-
"<|tool_calls_section_begin|>"
|
|
119
|
-
"<|tool_call_begin|>functions.get_weather:0"
|
|
120
|
-
"<|tool_call_argument_begin|>"
|
|
121
|
-
'{"city": "Tokyo"}'
|
|
122
|
-
"<|tool_call_end|>"
|
|
123
|
-
"<|tool_calls_section_end|>"
|
|
124
|
-
)
|
|
125
|
-
token_ids = tokenizer.encode(text, add_special_tokens=False)
|
|
126
|
-
parsed = renderer.parse_response(token_ids)
|
|
127
|
-
|
|
128
|
-
assert len(parsed.tool_calls) == 1
|
|
129
|
-
tc = parsed.tool_calls[0]
|
|
130
|
-
assert tc.status == ToolCallParseStatus.OK
|
|
131
|
-
assert tc.name == "get_weather"
|
|
132
|
-
assert tc.arguments == {"city": "Tokyo"}
|
|
133
|
-
assert tc.token_span is not None
|
|
134
|
-
start, end = tc.token_span
|
|
135
|
-
assert 0 <= start < end <= len(token_ids), (
|
|
136
|
-
f"span {tc.token_span} out of range for {len(token_ids)} input tokens"
|
|
137
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/examples/sglang/multiturn_generate_sglang.py
RENAMED
|
File without changes
|
|
File without changes
|
{renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/examples/tinker/multiturn_generate_tinker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|