renderers 0.1.8.dev30__tar.gz → 0.1.8.dev31__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/PKG-INFO +1 -1
  2. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/_version.py +2 -2
  3. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/base.py +38 -12
  4. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/glm45.py +55 -16
  5. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/glm5.py +55 -11
  6. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/.github/workflows/publish-dev.yml +0 -0
  7. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/.github/workflows/publish.yml +0 -0
  8. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/.github/workflows/style.yml +0 -0
  9. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/.github/workflows/test.yml +0 -0
  10. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/.gitignore +0 -0
  11. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/.pre-commit-config.yaml +0 -0
  12. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/LICENSE +0 -0
  13. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/README.md +0 -0
  14. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/docs/renderer-config.md +0 -0
  15. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/examples/README.md +0 -0
  16. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/examples/sglang/multiturn_generate_sglang.py +0 -0
  17. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/examples/sglang/online_multiturn_sglang.py +0 -0
  18. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/examples/tinker/multiturn_generate_tinker.py +0 -0
  19. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/examples/transformers/multiturn_generate_transformers.py +0 -0
  20. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/examples/vllm/multiturn_generate_vllm.py +0 -0
  21. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/pyproject.toml +0 -0
  22. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/__init__.py +0 -0
  23. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/client.py +0 -0
  24. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/configs.py +0 -0
  25. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/deepseek_v3.py +0 -0
  26. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/default.py +0 -0
  27. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/gpt_oss.py +0 -0
  28. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/kimi_k2.py +0 -0
  29. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/kimi_k25.py +0 -0
  30. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/laguna_xs2.py +0 -0
  31. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/minimax_m2.py +0 -0
  32. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/nemotron3.py +0 -0
  33. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/parsers.py +0 -0
  34. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/parsing.py +0 -0
  35. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/qwen3.py +0 -0
  36. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/qwen35.py +0 -0
  37. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/qwen36.py +0 -0
  38. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/qwen3_vl.py +0 -0
  39. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/conftest.py +0 -0
  40. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_bridge.py +0 -0
  41. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_build_helpers.py +0 -0
  42. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_client.py +0 -0
  43. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_gpt_oss_harmony_parity.py +0 -0
  44. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_incremental.py +0 -0
  45. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_is_content.py +0 -0
  46. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_kimi_k25_tool_schema.py +0 -0
  47. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_load_tokenizer.py +0 -0
  48. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_load_tokenizer_fastokens.py +0 -0
  49. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_message_indices.py +0 -0
  50. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_multimodal.py +0 -0
  51. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_parse_response.py +0 -0
  52. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_parse_response_robustness.py +0 -0
  53. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_parsers.py +0 -0
  54. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_preserve_thinking.py +0 -0
  55. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_qwen35_size_coverage.py +0 -0
  56. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_render_ids.py +0 -0
  57. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_renderer_config.py +0 -0
  58. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_renderer_config_parity.py +0 -0
  59. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_roundtrip.py +0 -0
  60. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_sampled_mask.py +0 -0
  61. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_tokens_per_message.py +0 -0
  62. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_tool_arg_type_preservation.py +0 -0
  63. {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: renderers
3
- Version: 0.1.8.dev30
3
+ Version: 0.1.8.dev31
4
4
  Summary: Chat template renderers — deterministic message-to-token conversion for LLM training
5
5
  License-Expression: Apache-2.0
6
6
  License-File: LICENSE
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
18
18
  commit_id: str | None
19
19
  __commit_id__: str | None
20
20
 
21
- __version__ = version = '0.1.8.dev30'
22
- __version_tuple__ = version_tuple = (0, 1, 8, 'dev30')
21
+ __version__ = version = '0.1.8.dev31'
22
+ __version_tuple__ = version_tuple = (0, 1, 8, 'dev31')
23
23
 
24
24
  __commit_id__ = commit_id = None
@@ -1098,8 +1098,7 @@ def _patched_load(model_name_or_path: str, **kwargs):
1098
1098
  fastokens.patch_transformers()
1099
1099
  if not _FASTOKENS_ANNOUNCED:
1100
1100
  logger.info(
1101
- "fastokens enabled — tokenizers load through the Rust BPE "
1102
- "fast path (~10x encode speedup)."
1101
+ "fastokens enabled — tokenizers load through the Rust BPE fast path (~10x encode speedup)."
1103
1102
  )
1104
1103
  _FASTOKENS_ANNOUNCED = True
1105
1104
  try:
@@ -1169,8 +1168,8 @@ def load_tokenizer(
1169
1168
  def _populate_registry():
1170
1169
  if RENDERER_REGISTRY:
1171
1170
  return
1172
- from renderers.default import DefaultRenderer
1173
1171
  from renderers.deepseek_v3 import DeepSeekV3Renderer
1172
+ from renderers.default import DefaultRenderer
1174
1173
  from renderers.glm5 import GLM5Renderer, GLM51Renderer
1175
1174
  from renderers.glm45 import GLM45Renderer
1176
1175
  from renderers.gpt_oss import GptOssRenderer
@@ -1271,8 +1270,7 @@ def create_renderer(
1271
1270
  cls = RENDERER_REGISTRY.get(config.name)
1272
1271
  if cls is None:
1273
1272
  raise ValueError(
1274
- f"Unknown renderer {config.name!r}. "
1275
- f"Available: {', '.join(sorted(RENDERER_REGISTRY))}"
1273
+ f"Unknown renderer {config.name!r}. Available: {', '.join(sorted(RENDERER_REGISTRY))}"
1276
1274
  )
1277
1275
  return cls(tokenizer, config)
1278
1276
 
@@ -1345,7 +1343,7 @@ def build_training_sample(
1345
1343
  renderer: Renderer,
1346
1344
  messages: list[Message],
1347
1345
  *,
1348
- role_to_mask: Callable[[Message], bool],
1346
+ role_to_mask: Callable[[Message], bool] | None = None,
1349
1347
  tools: list[ToolSpec] | None = None,
1350
1348
  content_sft_roles: "set[str] | frozenset[str] | None" = None,
1351
1349
  ) -> tuple[list[int], list[bool]]:
@@ -1354,15 +1352,31 @@ def build_training_sample(
1354
1352
  Single render() call + message_indices → per-token mask.
1355
1353
  Replaces build_incremental_token_mask (O(N) renders → O(1)).
1356
1354
 
1357
- When the renderer populates ``rendered.sampled_mask``, the loss mask
1358
- is the AND of role-based attribution and the sampled signal: only
1359
- tokens the model would have produced at inference are trainable.
1360
- This keeps SFT byte-aligned with the RL trajectory mask (where the
1361
- prompt / completion split achieves the same effect structurally).
1355
+ When ``role_to_mask`` is omitted, ``loss_mask`` is the renderer's
1356
+ ``sampled_mask`` directly: every token the model would have
1357
+ produced at inference is trainable, regardless of which message
1358
+ it's attributed to. This is the recommended default for renderer
1359
+ callers the renderer owns the per-token "is this model output"
1360
+ signal, so role-level filtering becomes a downstream constraint
1361
+ rather than a precondition. (Some role markers — e.g. GLM
1362
+ ``<|user|>`` / ``<|observation|>`` after a tool-calling assistant
1363
+ turn — *are* sampled by the model at inference and live inside the
1364
+ next message's span; ``sampled_mask`` captures that, but a
1365
+ naive role filter would mask them out.)
1366
+
1367
+ When ``role_to_mask`` is provided, ``loss_mask`` is the AND of the
1368
+ role-based attribution and the sampled signal: only tokens the
1369
+ model would have produced at inference AND attributed to a
1370
+ trainable role pass through. Useful when the caller needs to
1371
+ restrict training to a specific role (e.g. assistant-only) even on
1372
+ a renderer whose ``sampled_mask`` already covers other roles.
1373
+
1362
1374
  Renderers that don't populate ``sampled_mask`` (empty list) fall
1363
1375
  back to attribution-only masking — every token attributed to a
1364
1376
  trainable role is trained on, including template-injected
1365
- ``<|im_start|>role\\n`` openers.
1377
+ ``<|im_start|>role\\n`` openers. In this fallback mode
1378
+ ``role_to_mask`` is required; calling without it raises
1379
+ ``ValueError``.
1366
1380
 
1367
1381
  ``content_sft_roles`` opts in additional roles for "body-only"
1368
1382
  supervision: for every message whose role is in this set, tokens
@@ -1393,6 +1407,13 @@ def build_training_sample(
1393
1407
  else:
1394
1408
  body_roles = frozenset()
1395
1409
 
1410
+ if role_to_mask is None and not has_sampled_info:
1411
+ raise ValueError(
1412
+ "role_to_mask is required when the renderer does not populate "
1413
+ "sampled_mask. Pass an explicit role filter (e.g. "
1414
+ "lambda m: m['role'] == 'assistant') for this renderer."
1415
+ )
1416
+
1396
1417
  loss_mask: list[bool] = []
1397
1418
  for k, msg_idx in enumerate(rendered.message_indices):
1398
1419
  if msg_idx < 0:
@@ -1408,6 +1429,11 @@ def build_training_sample(
1408
1429
  continue
1409
1430
  if has_sampled_info and not rendered.sampled_mask[k]:
1410
1431
  loss_mask.append(False)
1432
+ elif role_to_mask is None:
1433
+ # sampled_mask alone gates the loss when no role filter is
1434
+ # supplied. ``sampled_mask[k]`` is True here (handled by the
1435
+ # branch above), so this token is trainable.
1436
+ loss_mask.append(True)
1411
1437
  else:
1412
1438
  loss_mask.append(role_to_mask(msg))
1413
1439
  return rendered.token_ids, loss_mask
@@ -184,6 +184,22 @@ class GLM45Renderer:
184
184
  role = msg["role"]
185
185
  content = self._visible_text(msg.get("content"))
186
186
 
187
+ # When the previous message is an assistant, this message's
188
+ # role-opening token (``<|user|>`` / ``<|observation|>``) is
189
+ # the inference-time stop signal that closes the assistant's
190
+ # turn (see ``get_stop_token_ids``). Mark it
191
+ # ``is_sampled=True`` so the loss-mask pipeline trains the
192
+ # model to emit it after ``</tool_call>`` (instead of
193
+ # continuing with another ``<tool_call>`` block). The token
194
+ # stays attributed to this message (msg_idx=i) and remains
195
+ # ``is_content=False`` — it's a role-marker / scaffold, not
196
+ # body bytes, so ``content_mask_for_roles({"tool"})`` and
197
+ # ``content_token_spans_by_role()`` correctly exclude it
198
+ # from "tool body" views. Byte stream is unchanged.
199
+ # ``system`` only appears at the start of a GLM conversation,
200
+ # so its opener is never the closer of an assistant turn.
201
+ closes_assistant_turn = i > 0 and messages[i - 1]["role"] == "assistant"
202
+
187
203
  if role == "system":
188
204
  emit_special(self._system, i, is_sampled=False, is_content=False)
189
205
  # ``\n`` is the scaffold separator after the role tag;
@@ -193,7 +209,12 @@ class GLM45Renderer:
193
209
  )
194
210
 
195
211
  elif role == "user":
196
- emit_special(self._user, i, is_sampled=False, is_content=False)
212
+ emit_special(
213
+ self._user,
214
+ i,
215
+ is_sampled=closes_assistant_turn,
216
+ is_content=False,
217
+ )
197
218
  # ``\n`` is scaffold; ``content`` is body; the optional
198
219
  # ``/nothink`` suffix is scaffold the renderer injects
199
220
  # when ``enable_thinking=False``.
@@ -362,6 +383,21 @@ class GLM45Renderer:
362
383
  ext_sampled.append(is_sampled)
363
384
  ext_content.append(is_content)
364
385
 
386
+ # The opener-token of the first new_message may also serve as
387
+ # the close of the previous assistant turn (when the model
388
+ # failed to sample the stop token itself and the bridge has to
389
+ # synthesize the boundary above). Unlike :meth:`render`, the
390
+ # bridge emits these with ``is_sampled=False, is_content=False``
391
+ # — they are template scaffolding for the *next* step's prompt,
392
+ # not tokens the model produced *in this* step. The RL loss
393
+ # operates on ``previous_completion_ids`` (what the model
394
+ # actually sampled this round); bridge tokens belong to the
395
+ # subsequent prompt and must not be counted as "model output"
396
+ # by downstream mask consumers. This deliberate disagreement
397
+ # with ``render()`` reflects the SFT vs RL semantics: render's
398
+ # masks describe what the model *should* produce given a
399
+ # complete conversation; bridge's masks describe what it
400
+ # *actually* produced this step.
365
401
  for i, msg in enumerate(new_messages):
366
402
  role = msg.get("role")
367
403
  content = self._visible_text(msg.get("content"))
@@ -531,21 +567,24 @@ class GLM45Renderer:
531
567
  emit_text,
532
568
  emit_text_segments,
533
569
  ) -> None:
534
- # Tool messages are conversation history injected by the runtime
535
- # between assistant turns the model never samples any of these
536
- # tokens, so every emission is is_sampled=False. The body bytes
537
- # get ``is_content=True``; the ``\n<tool_response>\n`` /
538
- # ``\n</tool_response>`` wraps and the ``<|observation|>`` role
539
- # tag are scaffold so the SFT mask for tool body never trains
540
- # the model to emit them. Single BPE pass over the joined text
541
- # preserves boundary merges (the tool body's leading/trailing
542
- # chars can merge with the wrap's ``\n``s if the tokenizer would
543
- # do so; we route through ``emit_text_segments`` so the
544
- # attribution is offset-driven and tokenizer-agnostic).
545
- prev_is_tool = msg_idx > 0 and messages[msg_idx - 1]["role"] == "tool"
546
-
547
- if not prev_is_tool:
548
- emit_special(self._observation, msg_idx, is_sampled=False, is_content=False)
570
+ # Tool body bytes get ``is_content=True``; the wraps are
571
+ # scaffold. The ``<|observation|>`` role tag is scaffold too
572
+ # (``is_content=False`` so ``content_mask_for_roles({"tool"})``
573
+ # excludes it). When the previous message is an assistant it
574
+ # doubles as the inference stop signal for that assistant's
575
+ # turn mark it ``is_sampled=True`` so SFT trains the model to
576
+ # emit it after ``</tool_call>``. The token stays attributed to
577
+ # this tool message; byte stream is unchanged.
578
+ prev_role = messages[msg_idx - 1]["role"] if msg_idx > 0 else None
579
+ closes_assistant_turn = prev_role == "assistant"
580
+
581
+ if prev_role != "tool":
582
+ emit_special(
583
+ self._observation,
584
+ msg_idx,
585
+ is_sampled=closes_assistant_turn,
586
+ is_content=False,
587
+ )
549
588
 
550
589
  emit_text_segments(
551
590
  [
@@ -207,12 +207,33 @@ class GLM5Renderer:
207
207
  role = msg["role"]
208
208
  content = self._visible_text(msg.get("content"))
209
209
 
210
+ # When the previous message is an assistant, this message's
211
+ # role-opening token (``<|user|>`` / ``<|observation|>``) is
212
+ # the inference-time stop signal that closes the assistant's
213
+ # turn (see ``get_stop_token_ids``). Mark it
214
+ # ``is_sampled=True`` so the loss-mask pipeline trains the
215
+ # model to emit it after ``</tool_call>`` (instead of
216
+ # continuing with another ``<tool_call>`` block). The token
217
+ # stays attributed to this message (msg_idx=i) and remains
218
+ # ``is_content=False`` — it's a role-marker / scaffold, not
219
+ # body bytes, so ``content_mask_for_roles({"tool"})`` and
220
+ # ``content_token_spans_by_role()`` correctly exclude it
221
+ # from "tool body" views. Byte stream is unchanged.
222
+ # ``system`` only appears at the start of a GLM conversation,
223
+ # so its opener is never the closer of an assistant turn.
224
+ closes_assistant_turn = i > 0 and messages[i - 1]["role"] == "assistant"
225
+
210
226
  if role == "system":
211
227
  emit_special(self._system, i, is_sampled=False, is_content=False)
212
228
  emit_text(content, i, is_sampled=False, is_content=True)
213
229
 
214
230
  elif role == "user":
215
- emit_special(self._user, i, is_sampled=False, is_content=False)
231
+ emit_special(
232
+ self._user,
233
+ i,
234
+ is_sampled=closes_assistant_turn,
235
+ is_content=False,
236
+ )
216
237
  emit_text(content, i, is_sampled=False, is_content=True)
217
238
 
218
239
  elif role == "assistant":
@@ -382,6 +403,21 @@ class GLM5Renderer:
382
403
  ext_sampled.append(is_sampled)
383
404
  ext_content.append(is_content)
384
405
 
406
+ # The opener-token of the first new_message may also serve as
407
+ # the close of the previous assistant turn (when the model
408
+ # failed to sample the stop token itself and the bridge has to
409
+ # synthesize the boundary above). Unlike :meth:`render`, the
410
+ # bridge emits these with ``is_sampled=False, is_content=False``
411
+ # — they are template scaffolding for the *next* step's prompt,
412
+ # not tokens the model produced *in this* step. The RL loss
413
+ # operates on ``previous_completion_ids`` (what the model
414
+ # actually sampled this round); bridge tokens belong to the
415
+ # subsequent prompt and must not be counted as "model output"
416
+ # by downstream mask consumers. This deliberate disagreement
417
+ # with ``render()`` reflects the SFT vs RL semantics: render's
418
+ # masks describe what the model *should* produce given a
419
+ # complete conversation; bridge's masks describe what it
420
+ # *actually* produced this step.
385
421
  for i, msg in enumerate(new_messages):
386
422
  role = msg.get("role")
387
423
  content = self._visible_text(msg.get("content"))
@@ -566,16 +602,24 @@ class GLM5Renderer:
566
602
  emit_text,
567
603
  emit_text_segments,
568
604
  ) -> None:
569
- # Tool messages are conversation history injected by the runtime
570
- # between assistant turns the model never samples any of these
571
- # tokens, so every emission is is_sampled=False. The tool body
572
- # bytes get ``is_content=True``; the ``<|observation|>`` /
573
- # ``<tool_response>`` wraps are scaffold so the SFT mask for
574
- # tool body never trains the model to emit them.
575
- prev_is_tool = msg_idx > 0 and messages[msg_idx - 1]["role"] == "tool"
576
-
577
- if not prev_is_tool:
578
- emit_special(self._observation, msg_idx, is_sampled=False, is_content=False)
605
+ # Tool body bytes get ``is_content=True``; the wraps are
606
+ # scaffold. The ``<|observation|>`` role tag is scaffold too
607
+ # (``is_content=False`` so ``content_mask_for_roles({"tool"})``
608
+ # excludes it). When the previous message is an assistant it
609
+ # doubles as the inference stop signal for that assistant's
610
+ # turn mark it ``is_sampled=True`` so SFT trains the model to
611
+ # emit it after ``</tool_call>``. The token stays attributed to
612
+ # this tool message; byte stream is unchanged.
613
+ prev_role = messages[msg_idx - 1]["role"] if msg_idx > 0 else None
614
+ closes_assistant_turn = prev_role == "assistant"
615
+
616
+ if prev_role != "tool":
617
+ emit_special(
618
+ self._observation,
619
+ msg_idx,
620
+ is_sampled=closes_assistant_turn,
621
+ is_content=False,
622
+ )
579
623
 
580
624
  emit_special(
581
625
  self._tool_response_tok, msg_idx, is_sampled=False, is_content=False
File without changes
File without changes