renderers 0.1.8.dev41__tar.gz → 0.1.8.dev42__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/PKG-INFO +1 -1
  2. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/_version.py +2 -2
  3. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/kimi_k25.py +15 -3
  4. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/parsing.py +67 -3
  5. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/qwen3.py +2 -0
  6. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/qwen3_vl.py +2 -0
  7. renderers-0.1.8.dev42/tests/test_parse_response.py +276 -0
  8. renderers-0.1.8.dev41/tests/test_parse_response.py +0 -137
  9. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/.github/workflows/publish-dev.yml +0 -0
  10. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/.github/workflows/publish.yml +0 -0
  11. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/.github/workflows/style.yml +0 -0
  12. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/.github/workflows/test.yml +0 -0
  13. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/.gitignore +0 -0
  14. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/.pre-commit-config.yaml +0 -0
  15. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/LICENSE +0 -0
  16. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/README.md +0 -0
  17. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/docs/renderer-config.md +0 -0
  18. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/examples/README.md +0 -0
  19. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/examples/sglang/multiturn_generate_sglang.py +0 -0
  20. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/examples/sglang/online_multiturn_sglang.py +0 -0
  21. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/examples/tinker/multiturn_generate_tinker.py +0 -0
  22. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/examples/transformers/multiturn_generate_transformers.py +0 -0
  23. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/examples/vllm/multiturn_generate_vllm.py +0 -0
  24. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/pyproject.toml +0 -0
  25. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/__init__.py +0 -0
  26. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/base.py +0 -0
  27. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/client.py +0 -0
  28. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/configs.py +0 -0
  29. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/deepseek_v3.py +0 -0
  30. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/default.py +0 -0
  31. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/glm45.py +0 -0
  32. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/glm5.py +0 -0
  33. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/gpt_oss.py +0 -0
  34. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/kimi_k2.py +0 -0
  35. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/laguna_xs2.py +0 -0
  36. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/llama_3.py +0 -0
  37. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/minimax_m2.py +0 -0
  38. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/nemotron3.py +0 -0
  39. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/parsers.py +0 -0
  40. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/qwen35.py +0 -0
  41. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/renderers/qwen36.py +0 -0
  42. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/conftest.py +0 -0
  43. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_bridge.py +0 -0
  44. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_build_helpers.py +0 -0
  45. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_client.py +0 -0
  46. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_gpt_oss_harmony_parity.py +0 -0
  47. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_incremental.py +0 -0
  48. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_is_content.py +0 -0
  49. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_kimi_k25_tool_schema.py +0 -0
  50. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_llama_3.py +0 -0
  51. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_load_tokenizer.py +0 -0
  52. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_load_tokenizer_fastokens.py +0 -0
  53. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_message_indices.py +0 -0
  54. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_message_tool_names.py +0 -0
  55. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_multimodal.py +0 -0
  56. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_nemotron3_ultra.py +0 -0
  57. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_parse_response_robustness.py +0 -0
  58. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_parsers.py +0 -0
  59. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_preserve_thinking.py +0 -0
  60. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_qwen35_size_coverage.py +0 -0
  61. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_render_ids.py +0 -0
  62. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_renderer_config.py +0 -0
  63. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_renderer_config_parity.py +0 -0
  64. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_roundtrip.py +0 -0
  65. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_sampled_mask.py +0 -0
  66. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_tokens_per_message.py +0 -0
  67. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/tests/test_tool_arg_type_preservation.py +0 -0
  68. {renderers-0.1.8.dev41 → renderers-0.1.8.dev42}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: renderers
3
- Version: 0.1.8.dev41
3
+ Version: 0.1.8.dev42
4
4
  Summary: Chat template renderers — deterministic message-to-token conversion for LLM training
5
5
  License-Expression: Apache-2.0
6
6
  License-File: LICENSE
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
18
18
  commit_id: str | None
19
19
  __commit_id__: str | None
20
20
 
21
- __version__ = version = '0.1.8.dev41'
22
- __version_tuple__ = version_tuple = (0, 1, 8, 'dev41')
21
+ __version__ = version = '0.1.8.dev42'
22
+ __version_tuple__ = version_tuple = (0, 1, 8, 'dev42')
23
23
 
24
24
  __commit_id__ = commit_id = None
@@ -42,7 +42,7 @@ from renderers.base import (
42
42
  trim_to_turn_close,
43
43
  )
44
44
  from renderers.configs import KimiK25RendererConfig
45
- from renderers.parsing import parse_kimi_k2_section
45
+ from renderers.parsing import _reasoning_end_token_index, parse_kimi_k2_section
46
46
  from renderers.qwen3_vl import (
47
47
  _image_hash,
48
48
  _is_image_part,
@@ -452,6 +452,13 @@ def _parse_kimi_k2_response(
452
452
  ids = ids[:i]
453
453
  break
454
454
 
455
+ # Reasoning first: a tool-call section the model drafts *inside* its
456
+ # <think> trace must not be parsed as a real call (regression #78 — cf.
457
+ # parse_qwen3). K2.5 renders </think> as text, so locate the boundary by
458
+ # decoding; the section scan then starts past it. content_ids still begins
459
+ # at 0, so the </think> text-split below recovers reasoning unchanged.
460
+ reasoning_end = _reasoning_end_token_index(tokenizer, ids)
461
+
455
462
  # Token-ID path — produces spans. Only run if every relevant special
456
463
  # token resolved at init (i.e. is in the tokenizer's vocab).
457
464
  tool_calls: list[ParsedToolCall] = []
@@ -471,6 +478,7 @@ def _parse_kimi_k2_response(
471
478
  tool_call_begin_id=tool_call_begin_id,
472
479
  tool_call_argument_begin_id=tool_call_argument_begin_id,
473
480
  tool_call_end_id=tool_call_end_id,
481
+ scan_start=reasoning_end,
474
482
  )
475
483
  text = (
476
484
  tokenizer.decode(content_ids, skip_special_tokens=False)
@@ -481,9 +489,13 @@ def _parse_kimi_k2_response(
481
489
  text = tokenizer.decode(ids, skip_special_tokens=False) if ids else ""
482
490
 
483
491
  # Fallback path: model emitted literal-text section delimiters (singular
484
- # variant) rather than special tokens. Spans unavailable here.
492
+ # variant) rather than special tokens. Spans unavailable here. Start the
493
+ # search past the first </think> so a literal section drafted inside the
494
+ # reasoning trace isn't matched as a real call (regression #78).
485
495
  if not tool_calls:
486
- tc_match = _TOOL_CALLS_SECTION_RE.search(text)
496
+ think_close = text.find("</think>")
497
+ search_from = think_close + len("</think>") if think_close != -1 else 0
498
+ tc_match = _TOOL_CALLS_SECTION_RE.search(text, search_from)
487
499
  if tc_match:
488
500
  text = text[: tc_match.start()]
489
501
  tool_section = (
@@ -133,6 +133,39 @@ def _decode(tokenizer, ids: list[int]) -> str:
133
133
  return tokenizer.decode(ids, skip_special_tokens=False)
134
134
 
135
135
 
136
+ def _reasoning_end_token_index(
137
+ tokenizer, ids: list[int], marker: str = "</think>"
138
+ ) -> int:
139
+ """Token index immediately past the first ``</think>`` in ``ids``.
140
+
141
+ Returns 0 when ``ids`` has no closed reasoning region — callers treat
142
+ that as "scan from the start" (preserves pre-existing behavior for
143
+ non-thinking / truncated-reasoning completions).
144
+
145
+ Used by parsers whose ``</think>`` is *not* a single special token
146
+ (DeepSeek-V3, Kimi-K2.5) — where it tokenizes to several pieces and is
147
+ context-sensitive (the closing ``>`` merges differently depending on the
148
+ next char), so a token-id or fixed-subsequence search isn't reliable. We
149
+ instead locate the boundary in decoded text via binary search over prefix
150
+ decodes, which holds as long as ``decode(ids[:k])`` is prefix-stable in
151
+ ``k`` (true for the byte-level BPE tokenizers here; ``</think>`` is clean
152
+ ASCII that won't straddle a byte boundary). Single-token ``</think>``
153
+ parsers (Qwen3) anchor on the token id directly and don't need this.
154
+ """
155
+ if not ids or marker not in _decode(tokenizer, ids):
156
+ return 0
157
+ # Smallest prefix length (in tokens) whose decode already contains the
158
+ # full marker — i.e. the index just past where </think> completes.
159
+ lo, hi = 1, len(ids)
160
+ while lo < hi:
161
+ mid = (lo + hi) // 2
162
+ if marker in _decode(tokenizer, ids[:mid]):
163
+ hi = mid
164
+ else:
165
+ lo = mid + 1
166
+ return lo
167
+
168
+
136
169
  # ── Qwen3: <tool_call> JSON </tool_call> ────────────────────────────
137
170
 
138
171
 
@@ -143,11 +176,26 @@ def parse_qwen3(
143
176
  stop_ids: set[int],
144
177
  tool_call_id: int,
145
178
  tool_call_end_id: int,
179
+ reasoning_end_id: int | None = None,
146
180
  ) -> ParsedResponse:
147
181
  """Parse Qwen3 completion tokens. Hermes-style JSON tool calls."""
148
182
  ids = _strip_stop_tokens(token_ids, stop_ids)
149
183
 
150
- tc_start = _find(ids, tool_call_id)
184
+ # Reasoning is resolved before tool calls. Thinking models (e.g.
185
+ # Qwen3-*-Thinking) routinely draft ``<tool_call>`` blocks *inside* their
186
+ # ``<think>...</think>`` trace while planning; those are reasoning, not
187
+ # real invocations. Anchoring the tool-call scan after the ``</think>``
188
+ # boundary keeps in-think drafts out of ``tool_calls`` (otherwise they
189
+ # surface as phantom/duplicate calls) and out of the reasoning/content
190
+ # split. Mirrors vLLM's DelegatingParser, which runs the reasoning parser
191
+ # first and tool-parses only the post-``</think>`` content.
192
+ # ``reasoning_end_id`` is the ``</think>`` token id; when it's absent
193
+ # (``None``) or the model never closed its reasoning, the scan falls back
194
+ # to the whole stream (prior behavior).
195
+ reasoning_end = _find(ids, reasoning_end_id) if reasoning_end_id is not None else -1
196
+ scan_start = reasoning_end + 1 if reasoning_end != -1 else 0
197
+
198
+ tc_start = _find(ids, tool_call_id, scan_start)
151
199
  tool_calls: list[ParsedToolCall] = []
152
200
  if tc_start != -1:
153
201
  content_ids = ids[:tc_start]
@@ -685,7 +733,15 @@ def parse_deepseek_v3(
685
733
  """
686
734
  ids = _strip_stop_tokens(token_ids, stop_ids)
687
735
 
688
- tc_section_start = _find(ids, tool_calls_begin_id)
736
+ # Reasoning first: skip past </think> before looking for the tool-call
737
+ # section, so a section the model drafts *inside* its <think> trace isn't
738
+ # parsed as a real call (regression #78 — cf. parse_qwen3). content_ids
739
+ # still starts at 0, so the </think> text-split below recovers reasoning.
740
+ # DeepSeek-V3 renders </think> as multi-token text, hence the decode-based
741
+ # boundary finder rather than a token-id anchor.
742
+ reasoning_end = _reasoning_end_token_index(tokenizer, ids)
743
+
744
+ tc_section_start = _find(ids, tool_calls_begin_id, reasoning_end)
689
745
  tool_calls: list[ParsedToolCall] = []
690
746
  if tc_section_start != -1:
691
747
  content_ids = ids[:tc_section_start]
@@ -962,6 +1018,7 @@ def parse_kimi_k2_section(
962
1018
  tool_call_begin_id: int,
963
1019
  tool_call_argument_begin_id: int,
964
1020
  tool_call_end_id: int,
1021
+ scan_start: int = 0,
965
1022
  ) -> tuple[list[int], list[ParsedToolCall]]:
966
1023
  """Split ``ids`` into ``(content_before_section, tool_calls)`` by finding
967
1024
  the Kimi-style tool-call section delimiters.
@@ -973,8 +1030,15 @@ def parse_kimi_k2_section(
973
1030
  of the section and a list of ``ParsedToolCall`` covering every attempted
974
1031
  block inside it; an unclosed section is still walked to whatever the model
975
1032
  emitted before EOS. Returns ``(ids, [])`` when no section is present.
1033
+
1034
+ ``scan_start`` restricts the section search to ``ids[scan_start:]`` while
1035
+ keeping ``content_ids = ids[:section_start]`` and all token spans relative
1036
+ to the full ``ids``. Callers pass the post-``</think>`` index so a section
1037
+ the model drafts inside its reasoning trace isn't parsed as a real call;
1038
+ because ``content_ids`` still starts at 0, downstream text-based reasoning
1039
+ extraction is unaffected (regression #78).
976
1040
  """
977
- section_start = _find_any(ids, tool_calls_section_begin_ids)
1041
+ section_start = _find_any(ids, tool_calls_section_begin_ids, scan_start)
978
1042
  if section_start == -1:
979
1043
  return list(ids), []
980
1044
  content_ids = ids[:section_start]
@@ -62,6 +62,7 @@ class Qwen3Renderer:
62
62
  self._tool_call_end = self._token_id("</tool_call>")
63
63
  self._tool_response = self._token_id("<tool_response>")
64
64
  self._tool_response_end = self._token_id("</tool_response>")
65
+ self._think_end = self._token_id("</think>")
65
66
 
66
67
  def _token_id(self, token: str) -> int:
67
68
  tid = self._tokenizer.convert_tokens_to_ids(token)
@@ -276,6 +277,7 @@ class Qwen3Renderer:
276
277
  stop_ids={self._im_end, self._endoftext},
277
278
  tool_call_id=self._tool_call,
278
279
  tool_call_end_id=self._tool_call_end,
280
+ reasoning_end_id=self._think_end,
279
281
  )
280
282
 
281
283
  def get_stop_token_ids(self) -> list[int]:
@@ -325,6 +325,7 @@ class Qwen3VLRenderer:
325
325
  self._tool_call_end = self._token_id("</tool_call>")
326
326
  self._tool_response = self._token_id("<tool_response>")
327
327
  self._tool_response_end = self._token_id("</tool_response>")
328
+ self._think_end = self._token_id("</think>")
328
329
  self._vision_start = self._token_id("<|vision_start|>")
329
330
  self._vision_end = self._token_id("<|vision_end|>")
330
331
  self._image_pad = self._token_id("<|image_pad|>")
@@ -634,6 +635,7 @@ class Qwen3VLRenderer:
634
635
  stop_ids={self._im_end, self._endoftext},
635
636
  tool_call_id=self._tool_call,
636
637
  tool_call_end_id=self._tool_call_end,
638
+ reasoning_end_id=self._think_end,
637
639
  )
638
640
 
639
641
  def get_stop_token_ids(self) -> list[int]:
@@ -0,0 +1,276 @@
1
+ """Barrage test: renderer.parse_response() must correctly extract
2
+ content, reasoning_content, and tool_calls from completion tokens.
3
+
4
+ Runs against every (model, renderer) pair.
5
+ """
6
+
7
+ from functools import lru_cache
8
+
9
+ from renderers import create_renderer
10
+ from renderers.base import ToolCallParseStatus, load_tokenizer
11
+
12
+
13
+ @lru_cache
14
+ def _qwen3_vl():
15
+ tokenizer = load_tokenizer("Qwen/Qwen3-VL-4B-Instruct")
16
+ renderer = create_renderer(tokenizer)
17
+ return tokenizer, renderer
18
+
19
+
20
+ def test_parse_simple_content(model_name, tokenizer, renderer):
21
+ """Plain content, no thinking."""
22
+ text = "Hello there!"
23
+ ids = tokenizer.encode(text, add_special_tokens=False)
24
+ parsed = renderer.parse_response(ids)
25
+ assert "Hello" in parsed.content
26
+
27
+
28
+ def test_parse_thinking_and_content(model_name, tokenizer, renderer):
29
+ """Content with <think>reasoning</think> block."""
30
+ text = "Let me think about this.\n</think>\n\nThe answer is 42."
31
+ ids = tokenizer.encode(text, add_special_tokens=False)
32
+ parsed = renderer.parse_response(ids)
33
+ # Should extract reasoning or at least not crash
34
+ assert (
35
+ "42" in parsed.content
36
+ or "think" in (parsed.reasoning_content or "").lower()
37
+ or parsed.content
38
+ )
39
+
40
+
41
+ def test_parse_empty_completion(model_name, tokenizer, renderer):
42
+ """Empty completion should not crash."""
43
+ parsed = renderer.parse_response([])
44
+ assert parsed.content is not None
45
+
46
+
47
+ def test_parse_response_returns_parsed_response(model_name, tokenizer, renderer):
48
+ """Return type must have content, reasoning_content, tool_calls."""
49
+ ids = tokenizer.encode("Hello!", add_special_tokens=False)
50
+ parsed = renderer.parse_response(ids)
51
+ assert hasattr(parsed, "content")
52
+ assert hasattr(parsed, "reasoning_content")
53
+ assert hasattr(parsed, "tool_calls")
54
+
55
+
56
+ def test_qwen3_vl_parse_json_tool_call():
57
+ tokenizer, renderer = _qwen3_vl()
58
+ text = (
59
+ 'Need a tool.\n<tool_call>\n{"name": "get_weather", '
60
+ '"arguments": {"city": "Paris"}}\n</tool_call>'
61
+ )
62
+ parsed = renderer.parse_response(tokenizer.encode(text, add_special_tokens=False))
63
+
64
+ assert parsed.content == "Need a tool."
65
+ assert len(parsed.tool_calls) == 1
66
+ tc = parsed.tool_calls[0]
67
+ assert tc.status == ToolCallParseStatus.OK
68
+ assert tc.name == "get_weather"
69
+ assert tc.arguments == {"city": "Paris"}
70
+
71
+
72
+ def test_qwen3_vl_malformed_tool_call_surfaces_as_invalid_json():
73
+ """A malformed ``<tool_call>`` block lands as a non-OK ``ParsedToolCall``
74
+ rather than getting silently merged back into ``content``.
75
+
76
+ Before the per-call status redesign, the parser mirrored vLLM's
77
+ hermes parser and stuffed the raw block into ``content`` to avoid
78
+ downstream ``EmptyModelResponseError``. That hid the malformed signal
79
+ from verifiers — they couldn't tell "model wrote prose" from "model
80
+ tried a tool call and produced broken JSON." Now the failed attempt
81
+ is preserved with ``status=INVALID_JSON`` and ``raw`` text, which
82
+ also satisfies the EmptyModelResponseError prevention contract: the
83
+ response is non-empty (it has a tool-call attempt) without lying
84
+ about what kind of output the model produced.
85
+ """
86
+ tokenizer, renderer = _qwen3_vl()
87
+ # Note the trailing comma — malformed JSON
88
+ text = (
89
+ '<tool_call>\n{"name": "get_weather", '
90
+ '"arguments": {"city": "Paris",}}\n</tool_call>'
91
+ )
92
+ parsed = renderer.parse_response(tokenizer.encode(text, add_special_tokens=False))
93
+
94
+ assert len(parsed.tool_calls) == 1
95
+ tc = parsed.tool_calls[0]
96
+ assert tc.status == ToolCallParseStatus.INVALID_JSON
97
+ assert "get_weather" in tc.raw
98
+ assert tc.token_span is not None
99
+
100
+
101
+ @lru_cache
102
+ def _qwen3():
103
+ tokenizer = load_tokenizer("Qwen/Qwen3-0.6B")
104
+ renderer = create_renderer(tokenizer)
105
+ return tokenizer, renderer
106
+
107
+
108
+ def test_qwen3_in_think_tool_call_is_not_a_real_call():
109
+ """A ``<tool_call>`` the model drafts *inside* its ``<think>`` trace must
110
+ stay reasoning — only the call emitted after ``</think>`` counts.
111
+
112
+ Regression for #78: Thinking models (e.g. Qwen3-*-Thinking-2507) draft
113
+ tool-call syntax while planning. Because ``<tool_call>`` is a real vocab
114
+ token, the parser used to scan the whole stream and emit the in-think
115
+ draft *and* the genuine post-``</think>`` call as two tool calls — a
116
+ phantom duplicate that made callers execute the same code twice. The scan
117
+ is now anchored after ``</think>``, mirroring vLLM's reasoning-then-tools
118
+ ordering.
119
+ """
120
+ tokenizer, renderer = _qwen3()
121
+ text = (
122
+ "<think>\nLet me draft the call:\n"
123
+ '<tool_call>\n{"name": "execute_code", "arguments": {"code": "print(1)"}}\n'
124
+ "</tool_call>\nYes, that looks right.\n</think>\n"
125
+ '<tool_call>\n{"name": "execute_code", "arguments": {"code": "print(1)"}}\n'
126
+ "</tool_call>"
127
+ )
128
+ parsed = renderer.parse_response(tokenizer.encode(text, add_special_tokens=False))
129
+
130
+ assert len(parsed.tool_calls) == 1
131
+ tc = parsed.tool_calls[0]
132
+ assert tc.status == ToolCallParseStatus.OK
133
+ assert tc.name == "execute_code"
134
+ assert tc.arguments == {"code": "print(1)"}
135
+ # The drafted call stays in the reasoning trace, not content.
136
+ assert parsed.reasoning_content is not None
137
+ assert "<tool_call>" in parsed.reasoning_content
138
+ assert parsed.content == ""
139
+
140
+
141
+ def test_qwen3_distinct_parallel_calls_after_think_are_preserved():
142
+ """The fix must not over-correct: two *genuine* parallel calls emitted
143
+ after ``</think>`` are still both returned (no dedup), preserving the
144
+ faithful-transcription contract for real invocations.
145
+ """
146
+ tokenizer, renderer = _qwen3()
147
+ text = (
148
+ "<think>\nplan\n</think>\n"
149
+ '<tool_call>\n{"name": "execute_code", "arguments": {"code": "print(1)"}}\n'
150
+ "</tool_call>\n"
151
+ '<tool_call>\n{"name": "execute_code", "arguments": {"code": "print(2)"}}\n'
152
+ "</tool_call>"
153
+ )
154
+ parsed = renderer.parse_response(tokenizer.encode(text, add_special_tokens=False))
155
+
156
+ assert len(parsed.tool_calls) == 2
157
+ assert [tc.arguments for tc in parsed.tool_calls] == [
158
+ {"code": "print(1)"},
159
+ {"code": "print(2)"},
160
+ ]
161
+ assert parsed.reasoning_content == "plan"
162
+
163
+
164
+ @lru_cache
165
+ def _kimi_k25():
166
+ tokenizer = load_tokenizer("moonshotai/Kimi-K2.5")
167
+ renderer = create_renderer(tokenizer)
168
+ return tokenizer, renderer
169
+
170
+
171
+ def test_kimi_k25_tool_call_carries_token_span():
172
+ """K2.5 was the lone parser without token spans before — its inline
173
+ text-walking implementation couldn't cheaply map regex hits back to
174
+ token offsets. We now walk token IDs via ``parse_kimi_k2_section`` for
175
+ the special-token path; spans must round-trip and point at a sensible
176
+ range within the original input token_ids.
177
+ """
178
+ tokenizer, renderer = _kimi_k25()
179
+ # K2.5 tool-call wire shape: section + per-call special tokens.
180
+ text = (
181
+ "<|tool_calls_section_begin|>"
182
+ "<|tool_call_begin|>functions.get_weather:0"
183
+ "<|tool_call_argument_begin|>"
184
+ '{"city": "Tokyo"}'
185
+ "<|tool_call_end|>"
186
+ "<|tool_calls_section_end|>"
187
+ )
188
+ token_ids = tokenizer.encode(text, add_special_tokens=False)
189
+ parsed = renderer.parse_response(token_ids)
190
+
191
+ assert len(parsed.tool_calls) == 1
192
+ tc = parsed.tool_calls[0]
193
+ assert tc.status == ToolCallParseStatus.OK
194
+ assert tc.name == "get_weather"
195
+ assert tc.arguments == {"city": "Tokyo"}
196
+ assert tc.token_span is not None
197
+ start, end = tc.token_span
198
+ assert 0 <= start < end <= len(token_ids), (
199
+ f"span {tc.token_span} out of range for {len(token_ids)} input tokens"
200
+ )
201
+
202
+
203
+ def test_kimi_k25_in_think_section_is_not_a_real_call():
204
+ """A tool-call section the model drafts inside its ``<think>`` trace must
205
+ not be parsed — only the section after ``</think>`` counts.
206
+
207
+ Regression for #78. K2.5's failure mode differed from Qwen3's: the
208
+ in-think section tripped the "truncated reasoning" guard and the parser
209
+ *dropped every tool call* (returned zero), losing the genuine call. The
210
+ scan is now anchored past ``</think>``.
211
+ """
212
+ tokenizer, renderer = _kimi_k25()
213
+ section = (
214
+ "<|tool_calls_section_begin|>"
215
+ "<|tool_call_begin|>functions.execute_code:0"
216
+ '<|tool_call_argument_begin|>{"code": "print(1)"}'
217
+ "<|tool_call_end|><|tool_calls_section_end|>"
218
+ )
219
+ text = f"<think>\nLet me draft:\n{section}\nlooks right.\n</think>\nGo.\n{section}"
220
+ parsed = renderer.parse_response(tokenizer.encode(text, add_special_tokens=False))
221
+
222
+ assert len(parsed.tool_calls) == 1
223
+ tc = parsed.tool_calls[0]
224
+ assert tc.status == ToolCallParseStatus.OK
225
+ assert tc.name == "execute_code"
226
+ assert tc.arguments == {"code": "print(1)"}
227
+ # The drafted section stays in the reasoning trace.
228
+ assert parsed.reasoning_content is not None
229
+ assert "<|tool_calls_section_begin|>" in parsed.reasoning_content
230
+ assert parsed.content == "Go."
231
+
232
+
233
+ @lru_cache
234
+ def _deepseek_v3():
235
+ tokenizer = load_tokenizer("deepseek-ai/DeepSeek-V3")
236
+ renderer = create_renderer(tokenizer)
237
+ return tokenizer, renderer
238
+
239
+
240
+ def test_deepseek_v3_in_think_section_is_not_a_real_call():
241
+ """A tool-call section drafted inside ``<think>`` must not be parsed —
242
+ only the section after ``</think>`` counts.
243
+
244
+ Regression for #78. DeepSeek-V3's failure mode: it returned the *wrong*
245
+ call (the in-think draft) and lost reasoning, because ``</think>`` is
246
+ multi-token text there and the scan wasn't anchored past it.
247
+ """
248
+ tokenizer, renderer = _deepseek_v3()
249
+
250
+ def section(name: str) -> str:
251
+ return (
252
+ "<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>"
253
+ f'{name}\n```json\n{{"code": "print(1)"}}\n```'
254
+ "<|tool▁call▁end|><|tool▁calls▁end|>"
255
+ )
256
+
257
+ text = (
258
+ f"<think>\nLet me draft:\n{section('draft_tool')}\nlooks right.\n</think>\n"
259
+ f"Go.\n{section('real_tool')}"
260
+ )
261
+ parsed = renderer.parse_response(tokenizer.encode(text, add_special_tokens=False))
262
+
263
+ assert len(parsed.tool_calls) == 1
264
+ tc = parsed.tool_calls[0]
265
+ # Assert the *post-``</think>``* section was chosen, not the in-think draft.
266
+ # (Use ``startswith`` rather than ``== "real_tool"``: under transformers
267
+ # 5.x the DeepSeek tokenizer's decode drops the ``\n`` between name and the
268
+ # ```json fence, so ``_parse_deepseek_tool_calls`` folds the fence into the
269
+ # name — a pre-existing, #78-unrelated quirk. What matters here is *which*
270
+ # section won.)
271
+ assert tc.name is not None and tc.name.startswith("real_tool")
272
+ assert "draft_tool" not in tc.name
273
+ # The drafted section stays in the reasoning trace, not content.
274
+ assert parsed.reasoning_content is not None
275
+ assert "draft_tool" in parsed.reasoning_content
276
+ assert parsed.content == "Go."
@@ -1,137 +0,0 @@
1
- """Barrage test: renderer.parse_response() must correctly extract
2
- content, reasoning_content, and tool_calls from completion tokens.
3
-
4
- Runs against every (model, renderer) pair.
5
- """
6
-
7
- from functools import lru_cache
8
-
9
- from renderers import create_renderer
10
- from renderers.base import ToolCallParseStatus, load_tokenizer
11
-
12
-
13
- @lru_cache
14
- def _qwen3_vl():
15
- tokenizer = load_tokenizer("Qwen/Qwen3-VL-4B-Instruct")
16
- renderer = create_renderer(tokenizer)
17
- return tokenizer, renderer
18
-
19
-
20
- def test_parse_simple_content(model_name, tokenizer, renderer):
21
- """Plain content, no thinking."""
22
- text = "Hello there!"
23
- ids = tokenizer.encode(text, add_special_tokens=False)
24
- parsed = renderer.parse_response(ids)
25
- assert "Hello" in parsed.content
26
-
27
-
28
- def test_parse_thinking_and_content(model_name, tokenizer, renderer):
29
- """Content with <think>reasoning</think> block."""
30
- text = "Let me think about this.\n</think>\n\nThe answer is 42."
31
- ids = tokenizer.encode(text, add_special_tokens=False)
32
- parsed = renderer.parse_response(ids)
33
- # Should extract reasoning or at least not crash
34
- assert (
35
- "42" in parsed.content
36
- or "think" in (parsed.reasoning_content or "").lower()
37
- or parsed.content
38
- )
39
-
40
-
41
- def test_parse_empty_completion(model_name, tokenizer, renderer):
42
- """Empty completion should not crash."""
43
- parsed = renderer.parse_response([])
44
- assert parsed.content is not None
45
-
46
-
47
- def test_parse_response_returns_parsed_response(model_name, tokenizer, renderer):
48
- """Return type must have content, reasoning_content, tool_calls."""
49
- ids = tokenizer.encode("Hello!", add_special_tokens=False)
50
- parsed = renderer.parse_response(ids)
51
- assert hasattr(parsed, "content")
52
- assert hasattr(parsed, "reasoning_content")
53
- assert hasattr(parsed, "tool_calls")
54
-
55
-
56
- def test_qwen3_vl_parse_json_tool_call():
57
- tokenizer, renderer = _qwen3_vl()
58
- text = (
59
- 'Need a tool.\n<tool_call>\n{"name": "get_weather", '
60
- '"arguments": {"city": "Paris"}}\n</tool_call>'
61
- )
62
- parsed = renderer.parse_response(tokenizer.encode(text, add_special_tokens=False))
63
-
64
- assert parsed.content == "Need a tool."
65
- assert len(parsed.tool_calls) == 1
66
- tc = parsed.tool_calls[0]
67
- assert tc.status == ToolCallParseStatus.OK
68
- assert tc.name == "get_weather"
69
- assert tc.arguments == {"city": "Paris"}
70
-
71
-
72
- def test_qwen3_vl_malformed_tool_call_surfaces_as_invalid_json():
73
- """A malformed ``<tool_call>`` block lands as a non-OK ``ParsedToolCall``
74
- rather than getting silently merged back into ``content``.
75
-
76
- Before the per-call status redesign, the parser mirrored vLLM's
77
- hermes parser and stuffed the raw block into ``content`` to avoid
78
- downstream ``EmptyModelResponseError``. That hid the malformed signal
79
- from verifiers — they couldn't tell "model wrote prose" from "model
80
- tried a tool call and produced broken JSON." Now the failed attempt
81
- is preserved with ``status=INVALID_JSON`` and ``raw`` text, which
82
- also satisfies the EmptyModelResponseError prevention contract: the
83
- response is non-empty (it has a tool-call attempt) without lying
84
- about what kind of output the model produced.
85
- """
86
- tokenizer, renderer = _qwen3_vl()
87
- # Note the trailing comma — malformed JSON
88
- text = (
89
- '<tool_call>\n{"name": "get_weather", '
90
- '"arguments": {"city": "Paris",}}\n</tool_call>'
91
- )
92
- parsed = renderer.parse_response(tokenizer.encode(text, add_special_tokens=False))
93
-
94
- assert len(parsed.tool_calls) == 1
95
- tc = parsed.tool_calls[0]
96
- assert tc.status == ToolCallParseStatus.INVALID_JSON
97
- assert "get_weather" in tc.raw
98
- assert tc.token_span is not None
99
-
100
-
101
- @lru_cache
102
- def _kimi_k25():
103
- tokenizer = load_tokenizer("moonshotai/Kimi-K2.5")
104
- renderer = create_renderer(tokenizer)
105
- return tokenizer, renderer
106
-
107
-
108
- def test_kimi_k25_tool_call_carries_token_span():
109
- """K2.5 was the lone parser without token spans before — its inline
110
- text-walking implementation couldn't cheaply map regex hits back to
111
- token offsets. We now walk token IDs via ``parse_kimi_k2_section`` for
112
- the special-token path; spans must round-trip and point at a sensible
113
- range within the original input token_ids.
114
- """
115
- tokenizer, renderer = _kimi_k25()
116
- # K2.5 tool-call wire shape: section + per-call special tokens.
117
- text = (
118
- "<|tool_calls_section_begin|>"
119
- "<|tool_call_begin|>functions.get_weather:0"
120
- "<|tool_call_argument_begin|>"
121
- '{"city": "Tokyo"}'
122
- "<|tool_call_end|>"
123
- "<|tool_calls_section_end|>"
124
- )
125
- token_ids = tokenizer.encode(text, add_special_tokens=False)
126
- parsed = renderer.parse_response(token_ids)
127
-
128
- assert len(parsed.tool_calls) == 1
129
- tc = parsed.tool_calls[0]
130
- assert tc.status == ToolCallParseStatus.OK
131
- assert tc.name == "get_weather"
132
- assert tc.arguments == {"city": "Tokyo"}
133
- assert tc.token_span is not None
134
- start, end = tc.token_span
135
- assert 0 <= start < end <= len(token_ids), (
136
- f"span {tc.token_span} out of range for {len(token_ids)} input tokens"
137
- )
File without changes
File without changes