renderers 0.1.8.dev32__tar.gz → 0.1.8.dev34__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/PKG-INFO +1 -1
  2. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/examples/sglang/multiturn_generate_sglang.py +29 -10
  3. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/examples/sglang/online_multiturn_sglang.py +29 -10
  4. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/examples/tinker/multiturn_generate_tinker.py +29 -10
  5. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/examples/transformers/multiturn_generate_transformers.py +28 -10
  6. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/examples/vllm/multiturn_generate_vllm.py +29 -10
  7. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/renderers/_version.py +2 -2
  8. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/renderers/base.py +2 -2
  9. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/renderers/qwen35.py +42 -37
  10. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_qwen35_size_coverage.py +32 -9
  11. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/.github/workflows/publish-dev.yml +0 -0
  12. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/.github/workflows/publish.yml +0 -0
  13. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/.github/workflows/style.yml +0 -0
  14. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/.github/workflows/test.yml +0 -0
  15. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/.gitignore +0 -0
  16. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/.pre-commit-config.yaml +0 -0
  17. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/LICENSE +0 -0
  18. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/README.md +0 -0
  19. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/docs/renderer-config.md +0 -0
  20. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/examples/README.md +0 -0
  21. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/pyproject.toml +0 -0
  22. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/renderers/__init__.py +0 -0
  23. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/renderers/client.py +0 -0
  24. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/renderers/configs.py +0 -0
  25. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/renderers/deepseek_v3.py +0 -0
  26. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/renderers/default.py +0 -0
  27. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/renderers/glm45.py +0 -0
  28. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/renderers/glm5.py +0 -0
  29. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/renderers/gpt_oss.py +0 -0
  30. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/renderers/kimi_k2.py +0 -0
  31. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/renderers/kimi_k25.py +0 -0
  32. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/renderers/laguna_xs2.py +0 -0
  33. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/renderers/minimax_m2.py +0 -0
  34. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/renderers/nemotron3.py +0 -0
  35. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/renderers/parsers.py +0 -0
  36. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/renderers/parsing.py +0 -0
  37. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/renderers/qwen3.py +0 -0
  38. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/renderers/qwen36.py +0 -0
  39. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/renderers/qwen3_vl.py +0 -0
  40. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/conftest.py +0 -0
  41. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_bridge.py +0 -0
  42. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_build_helpers.py +0 -0
  43. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_client.py +0 -0
  44. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_gpt_oss_harmony_parity.py +0 -0
  45. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_incremental.py +0 -0
  46. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_is_content.py +0 -0
  47. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_kimi_k25_tool_schema.py +0 -0
  48. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_load_tokenizer.py +0 -0
  49. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_load_tokenizer_fastokens.py +0 -0
  50. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_message_indices.py +0 -0
  51. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_multimodal.py +0 -0
  52. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_parse_response.py +0 -0
  53. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_parse_response_robustness.py +0 -0
  54. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_parsers.py +0 -0
  55. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_preserve_thinking.py +0 -0
  56. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_render_ids.py +0 -0
  57. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_renderer_config.py +0 -0
  58. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_renderer_config_parity.py +0 -0
  59. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_roundtrip.py +0 -0
  60. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_sampled_mask.py +0 -0
  61. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_tokens_per_message.py +0 -0
  62. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/tests/test_tool_arg_type_preservation.py +0 -0
  63. {renderers-0.1.8.dev32 → renderers-0.1.8.dev34}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: renderers
3
- Version: 0.1.8.dev32
3
+ Version: 0.1.8.dev34
4
4
  Summary: Chat template renderers — deterministic message-to-token conversion for LLM training
5
5
  License-Expression: Apache-2.0
6
6
  License-File: LICENSE
@@ -22,6 +22,7 @@ import json
22
22
  import os
23
23
 
24
24
  import sglang as sgl
25
+ from renderers.configs import Qwen35RendererConfig
25
26
  from renderers.gpt_oss import GptOssRenderer
26
27
  from renderers.qwen35 import Qwen35Renderer
27
28
  from transformers import AutoTokenizer
@@ -52,7 +53,9 @@ TOOLS = [
52
53
  def make_renderer(model: str, enable_thinking: bool | None):
53
54
  tokenizer = AutoTokenizer.from_pretrained(model, trust_remote_code=False)
54
55
  if model.startswith("Qwen/Qwen3.5-"):
55
- return Qwen35Renderer(tokenizer, enable_thinking=enable_thinking)
56
+ return Qwen35Renderer(
57
+ tokenizer, Qwen35RendererConfig(enable_thinking=enable_thinking)
58
+ )
56
59
  if model == "openai/gpt-oss-20b":
57
60
  return GptOssRenderer(tokenizer)
58
61
  raise ValueError(f"unsupported demo model: {model}")
@@ -62,8 +65,9 @@ def print_parsed(label: str, turn: str, parsed) -> None:
62
65
  print(f"\n[{label}] {turn}")
63
66
  if parsed.reasoning_content:
64
67
  print(f"reasoning: {parsed.reasoning_content[:240]}")
65
- if parsed.tool_calls:
66
- print(f"tool_calls: {json.dumps(parsed.tool_calls, ensure_ascii=False)}")
68
+ for tc in parsed.tool_calls:
69
+ # ``parse_response`` returns ``ParsedToolCall`` dataclasses, not dicts.
70
+ print(f"tool_call: {tc.name}({tc.arguments}) [{tc.status.value}]")
67
71
  if parsed.content:
68
72
  print(f"content: {parsed.content}")
69
73
 
@@ -141,21 +145,33 @@ def main() -> None:
141
145
  if parsed1.reasoning_content:
142
146
  assistant["reasoning_content"] = parsed1.reasoning_content
143
147
  if parsed1.tool_calls:
144
- assistant["tool_calls"] = parsed1.tool_calls
148
+ # Convert the parsed dataclasses back to OpenAI-format tool_calls.
149
+ assistant["tool_calls"] = [
150
+ {
151
+ "id": tc.id or f"call_{idx}",
152
+ "type": "function",
153
+ "function": {
154
+ "name": tc.name,
155
+ "arguments": tc.arguments
156
+ if isinstance(tc.arguments, str)
157
+ else json.dumps(tc.arguments),
158
+ },
159
+ }
160
+ for idx, tc in enumerate(parsed1.tool_calls)
161
+ ]
145
162
  messages.append(assistant)
146
163
 
147
164
  if parsed1.tool_calls:
148
165
  new_messages = []
149
166
  for idx, tool_call in enumerate(parsed1.tool_calls):
150
- fn = tool_call.get("function") or tool_call
151
- tool_args = fn.get("arguments") or {}
167
+ tool_args = tool_call.arguments or {}
152
168
  if isinstance(tool_args, str):
153
169
  tool_args = json.loads(tool_args)
154
170
  new_messages.append(
155
171
  {
156
172
  "role": "tool",
157
- "tool_call_id": tool_call.get("id", f"call_{idx}"),
158
- "name": fn.get("name", "multiply"),
173
+ "tool_call_id": tool_call.id or f"call_{idx}",
174
+ "name": tool_call.name or "multiply",
159
175
  "content": json.dumps(
160
176
  {"result": int(tool_args["a"]) * int(tool_args["b"])}
161
177
  ),
@@ -167,11 +183,14 @@ def main() -> None:
167
183
  ]
168
184
 
169
185
  # Turn 2: bridge extends prompt_ids + completion1 exactly.
170
- bridged_ids = renderer.bridge_to_next_turn(
186
+ # ``bridge_to_next_turn`` returns a ``RenderedTokens`` (or None); the
187
+ # extended id stream is on ``.token_ids``.
188
+ bridged = renderer.bridge_to_next_turn(
171
189
  prompt_ids, completion1, new_messages, tools=TOOLS
172
190
  )
173
- if bridged_ids is None:
191
+ if bridged is None:
174
192
  raise RuntimeError("bridge_to_next_turn returned None")
193
+ bridged_ids = bridged.token_ids
175
194
  assert bridged_ids[: len(prompt_ids) + len(completion1)] == (
176
195
  prompt_ids + completion1
177
196
  )
@@ -44,6 +44,7 @@ from typing import Any
44
44
 
45
45
  import httpx
46
46
  from renderers.base import Renderer
47
+ from renderers.configs import Qwen35RendererConfig
47
48
  from renderers.gpt_oss import GptOssRenderer
48
49
  from renderers.qwen35 import Qwen35Renderer
49
50
  from transformers import AutoTokenizer
@@ -71,7 +72,9 @@ TOOLS = [
71
72
  def make_renderer(model: str, enable_thinking: bool | None) -> Renderer:
72
73
  tokenizer = AutoTokenizer.from_pretrained(model, trust_remote_code=False)
73
74
  if model.startswith("Qwen/Qwen3.5-"):
74
- return Qwen35Renderer(tokenizer, enable_thinking=enable_thinking)
75
+ return Qwen35Renderer(
76
+ tokenizer, Qwen35RendererConfig(enable_thinking=enable_thinking)
77
+ )
75
78
  if model == "openai/gpt-oss-20b":
76
79
  return GptOssRenderer(tokenizer)
77
80
  raise ValueError(f"unsupported demo model: {model}")
@@ -116,8 +119,9 @@ def print_parsed(label: str, turn: str, parsed) -> None:
116
119
  print(f"\n[{label}] {turn}")
117
120
  if parsed.reasoning_content:
118
121
  print(f"reasoning: {parsed.reasoning_content[:240]}")
119
- if parsed.tool_calls:
120
- print(f"tool_calls: {json.dumps(parsed.tool_calls, ensure_ascii=False)}")
122
+ for tc in parsed.tool_calls:
123
+ # ``parse_response`` returns ``ParsedToolCall`` dataclasses, not dicts.
124
+ print(f"tool_call: {tc.name}({tc.arguments}) [{tc.status.value}]")
121
125
  if parsed.content:
122
126
  print(f"content: {parsed.content}")
123
127
 
@@ -164,21 +168,33 @@ async def run_one(
164
168
  if parsed1.reasoning_content:
165
169
  assistant["reasoning_content"] = parsed1.reasoning_content
166
170
  if parsed1.tool_calls:
167
- assistant["tool_calls"] = parsed1.tool_calls
171
+ # Convert the parsed dataclasses back to OpenAI-format tool_calls.
172
+ assistant["tool_calls"] = [
173
+ {
174
+ "id": tc.id or f"call_{idx}",
175
+ "type": "function",
176
+ "function": {
177
+ "name": tc.name,
178
+ "arguments": tc.arguments
179
+ if isinstance(tc.arguments, str)
180
+ else json.dumps(tc.arguments),
181
+ },
182
+ }
183
+ for idx, tc in enumerate(parsed1.tool_calls)
184
+ ]
168
185
  messages.append(assistant)
169
186
 
170
187
  if parsed1.tool_calls:
171
188
  new_messages: list[dict[str, Any]] = []
172
189
  for idx, tool_call in enumerate(parsed1.tool_calls):
173
- fn = tool_call.get("function") or tool_call
174
- tool_args = fn.get("arguments") or {}
190
+ tool_args = tool_call.arguments or {}
175
191
  if isinstance(tool_args, str):
176
192
  tool_args = json.loads(tool_args)
177
193
  new_messages.append(
178
194
  {
179
195
  "role": "tool",
180
- "tool_call_id": tool_call.get("id", f"call_{idx}"),
181
- "name": fn.get("name", "multiply"),
196
+ "tool_call_id": tool_call.id or f"call_{idx}",
197
+ "name": tool_call.name or "multiply",
182
198
  "content": json.dumps(
183
199
  {"result": int(tool_args["a"]) * int(tool_args["b"])}
184
200
  ),
@@ -190,11 +206,14 @@ async def run_one(
190
206
  ]
191
207
 
192
208
  # Turn 2: bridge extends prompt_ids + completion1 exactly.
193
- bridged_ids = renderer.bridge_to_next_turn(
209
+ # ``bridge_to_next_turn`` returns a ``RenderedTokens`` (or None); the
210
+ # extended id stream is on ``.token_ids``.
211
+ bridged = renderer.bridge_to_next_turn(
194
212
  prompt_ids, completion1, new_messages, tools=TOOLS
195
213
  )
196
- if bridged_ids is None:
214
+ if bridged is None:
197
215
  raise RuntimeError("bridge_to_next_turn returned None")
216
+ bridged_ids = bridged.token_ids
198
217
  assert bridged_ids[: len(prompt_ids) + len(completion1)] == (
199
218
  prompt_ids + completion1
200
219
  )
@@ -22,6 +22,7 @@ import json
22
22
  import os
23
23
 
24
24
  import tinker
25
+ from renderers.configs import Qwen35RendererConfig
25
26
  from renderers.gpt_oss import GptOssRenderer
26
27
  from renderers.qwen35 import Qwen35Renderer
27
28
  from tinker import types
@@ -53,7 +54,9 @@ TOOLS = [
53
54
  def make_renderer(model: str, enable_thinking: bool | None):
54
55
  tokenizer = AutoTokenizer.from_pretrained(model, trust_remote_code=False)
55
56
  if model.startswith("Qwen/Qwen3.5-"):
56
- return Qwen35Renderer(tokenizer, enable_thinking=enable_thinking)
57
+ return Qwen35Renderer(
58
+ tokenizer, Qwen35RendererConfig(enable_thinking=enable_thinking)
59
+ )
57
60
  if model == "openai/gpt-oss-20b":
58
61
  return GptOssRenderer(tokenizer)
59
62
  raise ValueError(f"unsupported demo model: {model}")
@@ -63,8 +66,9 @@ def print_parsed(label: str, turn: str, parsed) -> None:
63
66
  print(f"\n[{label}] {turn}")
64
67
  if parsed.reasoning_content:
65
68
  print(f"reasoning: {parsed.reasoning_content[:240]}")
66
- if parsed.tool_calls:
67
- print(f"tool_calls: {json.dumps(parsed.tool_calls, ensure_ascii=False)}")
69
+ for tc in parsed.tool_calls:
70
+ # ``parse_response`` returns ``ParsedToolCall`` dataclasses, not dicts.
71
+ print(f"tool_call: {tc.name}({tc.arguments}) [{tc.status.value}]")
68
72
  if parsed.content:
69
73
  print(f"content: {parsed.content}")
70
74
 
@@ -131,21 +135,33 @@ async def main() -> None:
131
135
  if parsed1.reasoning_content:
132
136
  assistant["reasoning_content"] = parsed1.reasoning_content
133
137
  if parsed1.tool_calls:
134
- assistant["tool_calls"] = parsed1.tool_calls
138
+ # Convert the parsed dataclasses back to OpenAI-format tool_calls.
139
+ assistant["tool_calls"] = [
140
+ {
141
+ "id": tc.id or f"call_{idx}",
142
+ "type": "function",
143
+ "function": {
144
+ "name": tc.name,
145
+ "arguments": tc.arguments
146
+ if isinstance(tc.arguments, str)
147
+ else json.dumps(tc.arguments),
148
+ },
149
+ }
150
+ for idx, tc in enumerate(parsed1.tool_calls)
151
+ ]
135
152
  messages.append(assistant)
136
153
 
137
154
  if parsed1.tool_calls:
138
155
  new_messages = []
139
156
  for idx, tool_call in enumerate(parsed1.tool_calls):
140
- fn = tool_call.get("function") or tool_call
141
- tool_args = fn.get("arguments") or {}
157
+ tool_args = tool_call.arguments or {}
142
158
  if isinstance(tool_args, str):
143
159
  tool_args = json.loads(tool_args)
144
160
  new_messages.append(
145
161
  {
146
162
  "role": "tool",
147
- "tool_call_id": tool_call.get("id", f"call_{idx}"),
148
- "name": fn.get("name", "multiply"),
163
+ "tool_call_id": tool_call.id or f"call_{idx}",
164
+ "name": tool_call.name or "multiply",
149
165
  "content": json.dumps(
150
166
  {"result": int(tool_args["a"]) * int(tool_args["b"])}
151
167
  ),
@@ -157,11 +173,14 @@ async def main() -> None:
157
173
  ]
158
174
 
159
175
  # Turn 2: bridge extends prompt_ids + completion1 exactly.
160
- bridged_ids = renderer.bridge_to_next_turn(
176
+ # ``bridge_to_next_turn`` returns a ``RenderedTokens`` (or None); the
177
+ # extended id stream is on ``.token_ids``.
178
+ bridged = renderer.bridge_to_next_turn(
161
179
  prompt_ids, completion1, new_messages, tools=TOOLS
162
180
  )
163
- if bridged_ids is None:
181
+ if bridged is None:
164
182
  raise RuntimeError("bridge_to_next_turn returned None")
183
+ bridged_ids = bridged.token_ids
165
184
  assert bridged_ids[: len(prompt_ids) + len(completion1)] == (
166
185
  prompt_ids + completion1
167
186
  )
@@ -26,6 +26,7 @@ import os
26
26
  import torch
27
27
  from transformers import AutoModelForCausalLM, AutoTokenizer
28
28
 
29
+ from renderers.configs import Qwen35RendererConfig
29
30
  from renderers.gpt_oss import GptOssRenderer
30
31
  from renderers.qwen35 import Qwen35Renderer
31
32
 
@@ -55,7 +56,8 @@ TOOLS = [
55
56
  def make_renderer(model: str, enable_thinking: bool | None):
56
57
  tokenizer = AutoTokenizer.from_pretrained(model, trust_remote_code=False)
57
58
  if model.startswith("Qwen/Qwen3.5-"):
58
- return Qwen35Renderer(tokenizer, enable_thinking=enable_thinking), tokenizer
59
+ config = Qwen35RendererConfig(enable_thinking=enable_thinking)
60
+ return Qwen35Renderer(tokenizer, config), tokenizer
59
61
  if model == "openai/gpt-oss-20b":
60
62
  return GptOssRenderer(tokenizer), tokenizer
61
63
  raise ValueError(f"unsupported demo model: {model}")
@@ -65,8 +67,9 @@ def print_parsed(label: str, turn: str, parsed) -> None:
65
67
  print(f"\n[{label}] {turn}")
66
68
  if parsed.reasoning_content:
67
69
  print(f"reasoning: {parsed.reasoning_content[:240]}")
68
- if parsed.tool_calls:
69
- print(f"tool_calls: {json.dumps(parsed.tool_calls, ensure_ascii=False)}")
70
+ for tc in parsed.tool_calls:
71
+ # ``parse_response`` returns ``ParsedToolCall`` dataclasses, not dicts.
72
+ print(f"tool_call: {tc.name}({tc.arguments}) [{tc.status.value}]")
70
73
  if parsed.content:
71
74
  print(f"content: {parsed.content}")
72
75
 
@@ -139,21 +142,33 @@ def main() -> None:
139
142
  if parsed1.reasoning_content:
140
143
  assistant["reasoning_content"] = parsed1.reasoning_content
141
144
  if parsed1.tool_calls:
142
- assistant["tool_calls"] = parsed1.tool_calls
145
+ # Convert the parsed dataclasses back to OpenAI-format tool_calls.
146
+ assistant["tool_calls"] = [
147
+ {
148
+ "id": tc.id or f"call_{idx}",
149
+ "type": "function",
150
+ "function": {
151
+ "name": tc.name,
152
+ "arguments": tc.arguments
153
+ if isinstance(tc.arguments, str)
154
+ else json.dumps(tc.arguments),
155
+ },
156
+ }
157
+ for idx, tc in enumerate(parsed1.tool_calls)
158
+ ]
143
159
  messages.append(assistant)
144
160
 
145
161
  if parsed1.tool_calls:
146
162
  new_messages = []
147
163
  for idx, tool_call in enumerate(parsed1.tool_calls):
148
- fn = tool_call.get("function") or tool_call
149
- tool_args = fn.get("arguments") or {}
164
+ tool_args = tool_call.arguments or {}
150
165
  if isinstance(tool_args, str):
151
166
  tool_args = json.loads(tool_args)
152
167
  new_messages.append(
153
168
  {
154
169
  "role": "tool",
155
- "tool_call_id": tool_call.get("id", f"call_{idx}"),
156
- "name": fn.get("name", "multiply"),
170
+ "tool_call_id": tool_call.id or f"call_{idx}",
171
+ "name": tool_call.name or "multiply",
157
172
  "content": json.dumps(
158
173
  {"result": int(tool_args["a"]) * int(tool_args["b"])}
159
174
  ),
@@ -165,11 +180,14 @@ def main() -> None:
165
180
  ]
166
181
 
167
182
  # Turn 2: bridge extends prompt_ids + completion1 exactly.
168
- bridged_ids = renderer.bridge_to_next_turn(
183
+ # ``bridge_to_next_turn`` returns a ``RenderedTokens`` (or None); the
184
+ # extended id stream is on ``.token_ids``.
185
+ bridged = renderer.bridge_to_next_turn(
169
186
  prompt_ids, completion1, new_messages, tools=TOOLS
170
187
  )
171
- if bridged_ids is None:
188
+ if bridged is None:
172
189
  raise RuntimeError("bridge_to_next_turn returned None")
190
+ bridged_ids = bridged.token_ids
173
191
  assert bridged_ids[: len(prompt_ids) + len(completion1)] == (
174
192
  prompt_ids + completion1
175
193
  )
@@ -21,6 +21,7 @@ import gc
21
21
  import json
22
22
  import os
23
23
 
24
+ from renderers.configs import Qwen35RendererConfig
24
25
  from renderers.gpt_oss import GptOssRenderer
25
26
  from renderers.qwen35 import Qwen35Renderer
26
27
  from transformers import AutoTokenizer
@@ -52,7 +53,9 @@ TOOLS = [
52
53
  def make_renderer(model: str, enable_thinking: bool | None):
53
54
  tokenizer = AutoTokenizer.from_pretrained(model, trust_remote_code=False)
54
55
  if model.startswith("Qwen/Qwen3.5-"):
55
- return Qwen35Renderer(tokenizer, enable_thinking=enable_thinking)
56
+ return Qwen35Renderer(
57
+ tokenizer, Qwen35RendererConfig(enable_thinking=enable_thinking)
58
+ )
56
59
  if model == "openai/gpt-oss-20b":
57
60
  return GptOssRenderer(tokenizer)
58
61
  raise ValueError(f"unsupported demo model: {model}")
@@ -62,8 +65,9 @@ def print_parsed(label: str, turn: str, parsed) -> None:
62
65
  print(f"\n[{label}] {turn}")
63
66
  if parsed.reasoning_content:
64
67
  print(f"reasoning: {parsed.reasoning_content[:240]}")
65
- if parsed.tool_calls:
66
- print(f"tool_calls: {json.dumps(parsed.tool_calls, ensure_ascii=False)}")
68
+ for tc in parsed.tool_calls:
69
+ # ``parse_response`` returns ``ParsedToolCall`` dataclasses, not dicts.
70
+ print(f"tool_call: {tc.name}({tc.arguments}) [{tc.status.value}]")
67
71
  if parsed.content:
68
72
  print(f"content: {parsed.content}")
69
73
 
@@ -134,21 +138,33 @@ def main() -> None:
134
138
  if parsed1.reasoning_content:
135
139
  assistant["reasoning_content"] = parsed1.reasoning_content
136
140
  if parsed1.tool_calls:
137
- assistant["tool_calls"] = parsed1.tool_calls
141
+ # Convert the parsed dataclasses back to OpenAI-format tool_calls.
142
+ assistant["tool_calls"] = [
143
+ {
144
+ "id": tc.id or f"call_{idx}",
145
+ "type": "function",
146
+ "function": {
147
+ "name": tc.name,
148
+ "arguments": tc.arguments
149
+ if isinstance(tc.arguments, str)
150
+ else json.dumps(tc.arguments),
151
+ },
152
+ }
153
+ for idx, tc in enumerate(parsed1.tool_calls)
154
+ ]
138
155
  messages.append(assistant)
139
156
 
140
157
  if parsed1.tool_calls:
141
158
  new_messages = []
142
159
  for idx, tool_call in enumerate(parsed1.tool_calls):
143
- fn = tool_call.get("function") or tool_call
144
- tool_args = fn.get("arguments") or {}
160
+ tool_args = tool_call.arguments or {}
145
161
  if isinstance(tool_args, str):
146
162
  tool_args = json.loads(tool_args)
147
163
  new_messages.append(
148
164
  {
149
165
  "role": "tool",
150
- "tool_call_id": tool_call.get("id", f"call_{idx}"),
151
- "name": fn.get("name", "multiply"),
166
+ "tool_call_id": tool_call.id or f"call_{idx}",
167
+ "name": tool_call.name or "multiply",
152
168
  "content": json.dumps(
153
169
  {"result": int(tool_args["a"]) * int(tool_args["b"])}
154
170
  ),
@@ -160,11 +176,14 @@ def main() -> None:
160
176
  ]
161
177
 
162
178
  # Turn 2: bridge extends prompt_ids + completion1 exactly.
163
- bridged_ids = renderer.bridge_to_next_turn(
179
+ # ``bridge_to_next_turn`` returns a ``RenderedTokens`` (or None); the
180
+ # extended id stream is on ``.token_ids``.
181
+ bridged = renderer.bridge_to_next_turn(
164
182
  prompt_ids, completion1, new_messages, tools=TOOLS
165
183
  )
166
- if bridged_ids is None:
184
+ if bridged is None:
167
185
  raise RuntimeError("bridge_to_next_turn returned None")
186
+ bridged_ids = bridged.token_ids
168
187
  assert bridged_ids[: len(prompt_ids) + len(completion1)] == (
169
188
  prompt_ids + completion1
170
189
  )
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
18
18
  commit_id: str | None
19
19
  __commit_id__: str | None
20
20
 
21
- __version__ = version = '0.1.8.dev32'
22
- __version_tuple__ = version_tuple = (0, 1, 8, 'dev32')
21
+ __version__ = version = '0.1.8.dev34'
22
+ __version_tuple__ = version_tuple = (0, 1, 8, 'dev34')
23
23
 
24
24
  __commit_id__ = commit_id = None
@@ -911,8 +911,8 @@ MODEL_RENDERER_MAP: dict[str, str] = {
911
911
  # ``enable_thinking=true`` (open ``<think>\n`` at the gen prompt);
912
912
  # the smaller 0.8B / 2B variants flip the polarity (default
913
913
  # ``enable_thinking=false``, empty ``<think>\n\n</think>\n\n``).
914
- # ``Qwen35Renderer`` auto-detects polarity from the tokenizer's
915
- # chat_template at construction, so all seven sizes are
914
+ # ``Qwen35Renderer`` hard-codes this polarity per model
915
+ # (``_ENABLE_THINKING_DEFAULTS``), so all seven sizes are
916
916
  # token-for-token parity-tested against their own
917
917
  # ``apply_chat_template`` — including with
918
918
  # ``add_generation_prompt=True``.
@@ -66,39 +66,44 @@ _TOOLS_INSTRUCTIONS = (
66
66
  )
67
67
 
68
68
 
69
- def _detect_enable_thinking_default(tokenizer: PreTrainedTokenizer) -> bool:
70
- """Probe the tokenizer's chat template to learn its ``enable_thinking``
71
- default polarity at the generation-prompt boundary.
72
-
73
- The Qwen3.5 family ships two template variants that differ only in the
74
- polarity of the gated branch:
75
-
76
- * Big sizes (4B / 9B / 35B-A3B / 122B-A10B / 397B-A17B) emit an open
77
- ``<think>\\n`` by default and the empty ``<think>\\n\\n</think>\\n\\n``
78
- block when ``enable_thinking`` is explicitly false.
79
- * Small sizes (0.8B / 2B) flip the polarity they emit the empty
80
- block by default and the open ``<think>\\n`` only when
81
- ``enable_thinking`` is explicitly true.
82
-
83
- A one-shot ``apply_chat_template`` call with no flag and a minimal
84
- user message reveals which variant is in use: the empty-block tail
85
- ends with ``</think>``, the open-think tail does not. Failing the
86
- probe (no chat_template, exotic config) falls back to the big-model
87
- default of True, which matches every entry in
88
- ``MODEL_RENDERER_MAP`` that routes to ``qwen3.5`` without explicit
89
- polarity awareness.
69
+ # Per-model ``enable_thinking`` default, applied when the renderer config
70
+ # leaves it ``None``. The Qwen3.5 family ships two chat-template variants
71
+ # that differ only in the polarity of the gated thinking branch:
72
+ #
73
+ # * Big sizes (4B / 9B / 35B-A3B / 122B-A10B / 397B-A17B) default
74
+ # ``enable_thinking=true`` an open ``<think>\n`` at the gen prompt.
75
+ # * Small sizes (0.8B / 2B) flip it — default ``false``, emitting the
76
+ # empty ``<think>\n\n</think>\n\n`` block.
77
+ #
78
+ # These are hard-coded (keyed by ``tokenizer.name_or_path``) rather than
79
+ # probed from the live ``chat_template``: probing meant calling
80
+ # ``apply_chat_template`` at construction, which pulls ``transformers`` onto
81
+ # the hot path and breaks bring-your-own-tokenizer use. The values are the
82
+ # ground truth pinned by ``tests/test_qwen35_size_coverage.py`` — both the
83
+ # polarity assertions and byte-parity against each size's own
84
+ # ``apply_chat_template``.
85
+ _ENABLE_THINKING_DEFAULTS: dict[str, bool] = {
86
+ "Qwen/Qwen3.5-0.8B": False,
87
+ "Qwen/Qwen3.5-2B": False,
88
+ "Qwen/Qwen3.5-4B": True,
89
+ "Qwen/Qwen3.5-9B": True,
90
+ "Qwen/Qwen3.5-35B-A3B": True,
91
+ "Qwen/Qwen3.5-122B-A10B": True,
92
+ "Qwen/Qwen3.5-397B-A17B": True,
93
+ # Qwen3.6 extends the Qwen3.5 template; same big-size polarity.
94
+ "Qwen/Qwen3.6-35B-A3B": True,
95
+ }
96
+
97
+
98
+ def _default_enable_thinking(tokenizer) -> bool:
99
+ """Hard-coded ``enable_thinking`` default for ``tokenizer``'s model.
100
+
101
+ Falls back to ``True`` (the big-model default, and the majority of the
102
+ family) for unknown / fine-tuned checkpoints whose ``name_or_path`` isn't
103
+ in ``_ENABLE_THINKING_DEFAULTS``; pass an explicit ``enable_thinking=`` to
104
+ a small-size fine-tune that needs ``False``.
90
105
  """
91
- try:
92
- out = tokenizer.apply_chat_template(
93
- [{"role": "user", "content": "x"}],
94
- tokenize=False,
95
- add_generation_prompt=True,
96
- )
97
- except Exception:
98
- return True
99
- if not isinstance(out, str):
100
- return True
101
- return not out.rstrip().endswith("</think>")
106
+ return _ENABLE_THINKING_DEFAULTS.get(getattr(tokenizer, "name_or_path", ""), True)
102
107
 
103
108
 
104
109
  class Qwen35Renderer:
@@ -116,13 +121,13 @@ class Qwen35Renderer:
116
121
  self._tokenizer = tokenizer
117
122
  self._processor = processor
118
123
  cfg = config or type(self)._config_cls()
119
- # ``enable_thinking=None`` defers to the tokenizer's chat-template
120
- # default (Instruct → off, Thinking → on). Materialise here so
121
- # downstream reads see a concrete bool; rebind the config with
122
- # the resolved value so introspection sees the same.
124
+ # ``enable_thinking=None`` defers to the model's known default (see
125
+ # ``_ENABLE_THINKING_DEFAULTS``). Materialise here so downstream reads
126
+ # see a concrete bool; rebind the config with the resolved value so
127
+ # introspection sees the same.
123
128
  if cfg.enable_thinking is None:
124
129
  cfg = cfg.model_copy(
125
- update={"enable_thinking": _detect_enable_thinking_default(tokenizer)}
130
+ update={"enable_thinking": _default_enable_thinking(tokenizer)}
126
131
  )
127
132
  self.config = cfg
128
133
 
@@ -5,9 +5,8 @@ Seven Qwen3.5 sizes route to ``Qwen35Renderer``. The 4B / 9B / 35B-A3B /
5
5
  ``enable_thinking=true``); the smaller 0.8B / 2B sizes ship the polarity-
6
6
  flipped variant (default ``enable_thinking=false`` → empty
7
7
  ``<think>\\n\\n</think>\\n\\n`` at the gen-prompt boundary). The renderer
8
- detects polarity from the tokenizer's chat_template at construction, so
9
- both variants render byte-identical to their own
10
- ``apply_chat_template``.
8
+ hard-codes this polarity per model (``_ENABLE_THINKING_DEFAULTS``), so
9
+ both variants render byte-identical to their own ``apply_chat_template``.
11
10
 
12
11
  These tests lock in (a) the exact set of Qwen3.5 sizes in the map and
13
12
  (b) byte parity for every one of them across representative
@@ -57,7 +56,7 @@ def test_no_other_qwen35_sizes_silently_added():
57
56
 
58
57
 
59
58
  # ---------------------------------------------------------------------------
60
- # Polarity auto-detection: 0.8B / 2B flip ``enable_thinking`` default.
59
+ # Polarity defaults: 0.8B / 2B flip ``enable_thinking`` default.
61
60
  # ---------------------------------------------------------------------------
62
61
 
63
62
 
@@ -73,10 +72,10 @@ def test_no_other_qwen35_sizes_silently_added():
73
72
  ("Qwen/Qwen3.5-397B-A17B", True),
74
73
  ],
75
74
  )
76
- def test_qwen35_enable_thinking_polarity_autodetected(qwen35_model, expected_default):
77
- """The renderer's ``_enable_thinking`` resolves to the chat template's
78
- own default when no explicit flag is passed — so big / small sizes
79
- each match their own template at the gen-prompt boundary."""
75
+ def test_qwen35_enable_thinking_polarity_default(qwen35_model, expected_default):
76
+ """With no explicit flag, the renderer resolves ``enable_thinking`` from
77
+ the hard-coded per-model default — so big / small sizes each match their
78
+ own template at the gen-prompt boundary."""
80
79
  tok = load_tokenizer(qwen35_model)
81
80
  renderer = create_renderer(tok, Qwen35RendererConfig())
82
81
  assert isinstance(renderer, Qwen35Renderer)
@@ -86,6 +85,30 @@ def test_qwen35_enable_thinking_polarity_autodetected(qwen35_model, expected_def
86
85
  )
87
86
 
88
87
 
88
+ def test_construction_does_not_call_apply_chat_template():
89
+ """The ``enable_thinking`` default is hard-coded per model, so building a
90
+ ``Qwen35Renderer`` must not probe ``apply_chat_template`` — a
91
+ bring-your-own tokenizer with no chat-template support still works."""
92
+
93
+ class _Stub:
94
+ name_or_path = "Qwen/Qwen3.5-0.8B"
95
+ unk_token_id = -1
96
+
97
+ def convert_tokens_to_ids(self, token):
98
+ # Any stable non-unk id per token; the renderer only needs the
99
+ # special tokens to resolve to distinct, in-vocab ids.
100
+ return abs(hash(token)) % 1_000_000 + 1
101
+
102
+ def apply_chat_template(self, *args, **kwargs):
103
+ raise AssertionError(
104
+ "apply_chat_template must not be called at construction"
105
+ )
106
+
107
+ renderer = Qwen35Renderer(_Stub())
108
+ # 0.8B is a small size → thinking defaults off, from the hard-coded table.
109
+ assert renderer.config.enable_thinking is False
110
+
111
+
89
112
  # ---------------------------------------------------------------------------
90
113
  # Byte parity for each in-map Qwen3.5 size.
91
114
  # ---------------------------------------------------------------------------
@@ -146,7 +169,7 @@ def test_qwen35_size_parity_with_apply_chat_template(
146
169
  """Each in-map Qwen3.5 size renders byte-identical to its own
147
170
  ``apply_chat_template`` output. Locks in the property that lets us
148
171
  share ``Qwen35Renderer`` across all seven sizes — the polarity
149
- flip on 0.8B / 2B is absorbed by the constructor's auto-detect."""
172
+ flip on 0.8B / 2B is absorbed by the per-model default."""
150
173
  tok = load_tokenizer(qwen35_model)
151
174
  renderer = create_renderer(tok, Qwen35RendererConfig())
152
175
  assert isinstance(renderer, Qwen35Renderer)
File without changes
File without changes