renderers 0.1.8.dev42__tar.gz → 0.1.8.dev44__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/PKG-INFO +2 -2
  2. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/README.md +1 -1
  3. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/__init__.py +8 -0
  4. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/_version.py +2 -2
  5. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/base.py +16 -7
  6. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/configs.py +63 -41
  7. renderers-0.1.8.dev44/renderers/deepseek_r1.py +58 -0
  8. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/deepseek_v3.py +40 -33
  9. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/nemotron3.py +201 -196
  10. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/conftest.py +12 -1
  11. renderers-0.1.8.dev44/tests/test_deepseek_r1.py +152 -0
  12. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_load_tokenizer_fastokens.py +2 -0
  13. renderers-0.1.8.dev44/tests/test_nemotron3_parity.py +676 -0
  14. renderers-0.1.8.dev44/tests/test_nemotron3_ultra.py +104 -0
  15. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_renderer_config_parity.py +13 -2
  16. renderers-0.1.8.dev42/tests/test_nemotron3_ultra.py +0 -59
  17. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/.github/workflows/publish-dev.yml +0 -0
  18. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/.github/workflows/publish.yml +0 -0
  19. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/.github/workflows/style.yml +0 -0
  20. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/.github/workflows/test.yml +0 -0
  21. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/.gitignore +0 -0
  22. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/.pre-commit-config.yaml +0 -0
  23. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/LICENSE +0 -0
  24. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/docs/renderer-config.md +0 -0
  25. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/examples/README.md +0 -0
  26. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/examples/sglang/multiturn_generate_sglang.py +0 -0
  27. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/examples/sglang/online_multiturn_sglang.py +0 -0
  28. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/examples/tinker/multiturn_generate_tinker.py +0 -0
  29. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/examples/transformers/multiturn_generate_transformers.py +0 -0
  30. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/examples/vllm/multiturn_generate_vllm.py +0 -0
  31. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/pyproject.toml +0 -0
  32. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/client.py +0 -0
  33. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/default.py +0 -0
  34. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/glm45.py +0 -0
  35. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/glm5.py +0 -0
  36. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/gpt_oss.py +0 -0
  37. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/kimi_k2.py +0 -0
  38. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/kimi_k25.py +0 -0
  39. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/laguna_xs2.py +0 -0
  40. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/llama_3.py +0 -0
  41. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/minimax_m2.py +0 -0
  42. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/parsers.py +0 -0
  43. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/parsing.py +0 -0
  44. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/qwen3.py +0 -0
  45. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/qwen35.py +0 -0
  46. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/qwen36.py +0 -0
  47. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/qwen3_vl.py +0 -0
  48. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_bridge.py +0 -0
  49. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_build_helpers.py +0 -0
  50. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_client.py +0 -0
  51. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_gpt_oss_harmony_parity.py +0 -0
  52. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_incremental.py +0 -0
  53. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_is_content.py +0 -0
  54. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_kimi_k25_tool_schema.py +0 -0
  55. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_llama_3.py +0 -0
  56. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_load_tokenizer.py +0 -0
  57. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_message_indices.py +0 -0
  58. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_message_tool_names.py +0 -0
  59. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_multimodal.py +0 -0
  60. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_parse_response.py +0 -0
  61. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_parse_response_robustness.py +0 -0
  62. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_parsers.py +0 -0
  63. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_preserve_thinking.py +0 -0
  64. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_qwen35_size_coverage.py +0 -0
  65. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_render_ids.py +0 -0
  66. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_renderer_config.py +0 -0
  67. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_roundtrip.py +0 -0
  68. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_sampled_mask.py +0 -0
  69. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_tokens_per_message.py +0 -0
  70. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_tool_arg_type_preservation.py +0 -0
  71. {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: renderers
3
- Version: 0.1.8.dev42
3
+ Version: 0.1.8.dev44
4
4
  Summary: Chat template renderers — deterministic message-to-token conversion for LLM training
5
5
  License-Expression: Apache-2.0
6
6
  License-File: LICENSE
@@ -57,7 +57,7 @@ next_prompt_ids = r.bridge_to_next_turn(
57
57
  )
58
58
  ```
59
59
 
60
- Hand-coded renderers ship for `qwen3`, `qwen3-vl`, `qwen3.5`, `qwen3.6`, `glm-5`, `glm-5.1`, `glm-4.5`, `minimax-m2`, `deepseek-v3`, `kimi-k2`, `kimi-k2.5`, `nemotron-3`, `gpt-oss`. Anything else falls back to `DefaultRenderer`, a generic `apply_chat_template` wrapper.
60
+ Hand-coded renderers ship for `qwen3`, `qwen3-vl`, `qwen3.5`, `qwen3.6`, `glm-5`, `glm-5.1`, `glm-4.5`, `minimax-m2`, `deepseek-v3`, `kimi-k2`, `kimi-k2.5`, `nemotron-3`, `nemotron-3-ultra`, `gpt-oss`. Anything else falls back to `DefaultRenderer`, a generic `apply_chat_template` wrapper.
61
61
 
62
62
  ## API
63
63
 
@@ -40,7 +40,7 @@ next_prompt_ids = r.bridge_to_next_turn(
40
40
  )
41
41
  ```
42
42
 
43
- Hand-coded renderers ship for `qwen3`, `qwen3-vl`, `qwen3.5`, `qwen3.6`, `glm-5`, `glm-5.1`, `glm-4.5`, `minimax-m2`, `deepseek-v3`, `kimi-k2`, `kimi-k2.5`, `nemotron-3`, `gpt-oss`. Anything else falls back to `DefaultRenderer`, a generic `apply_chat_template` wrapper.
43
+ Hand-coded renderers ship for `qwen3`, `qwen3-vl`, `qwen3.5`, `qwen3.6`, `glm-5`, `glm-5.1`, `glm-4.5`, `minimax-m2`, `deepseek-v3`, `kimi-k2`, `kimi-k2.5`, `nemotron-3`, `nemotron-3-ultra`, `gpt-oss`. Anything else falls back to `DefaultRenderer`, a generic `apply_chat_template` wrapper.
44
44
 
45
45
  ## API
46
46
 
@@ -44,6 +44,7 @@ from renderers.configs import (
44
44
  BaseRendererConfig,
45
45
  config_from_name,
46
46
  DefaultRendererConfig,
47
+ DeepSeekR1RendererConfig,
47
48
  DeepSeekV3RendererConfig,
48
49
  GLM45RendererConfig,
49
50
  GLM51RendererConfig,
@@ -55,6 +56,7 @@ from renderers.configs import (
55
56
  Llama3RendererConfig,
56
57
  MiniMaxM2RendererConfig,
57
58
  Nemotron3RendererConfig,
59
+ Nemotron3UltraRendererConfig,
58
60
  Qwen35RendererConfig,
59
61
  Qwen36RendererConfig,
60
62
  Qwen3RendererConfig,
@@ -74,6 +76,7 @@ from renderers.configs import (
74
76
  # imports — ``renderers.base._populate_registry`` lazy-imports the
75
77
  # concrete classes itself when a renderer is instantiated.
76
78
  _LAZY_RENDERERS: dict[str, str] = {
79
+ "DeepSeekR1Renderer": "renderers.deepseek_r1",
77
80
  "DeepSeekV3Renderer": "renderers.deepseek_v3",
78
81
  "DefaultRenderer": "renderers.default",
79
82
  "GLM45Renderer": "renderers.glm45",
@@ -86,6 +89,7 @@ _LAZY_RENDERERS: dict[str, str] = {
86
89
  "Llama3Renderer": "renderers.llama_3",
87
90
  "MiniMaxM2Renderer": "renderers.minimax_m2",
88
91
  "Nemotron3Renderer": "renderers.nemotron3",
92
+ "Nemotron3UltraRenderer": "renderers.nemotron3",
89
93
  "Qwen35Renderer": "renderers.qwen35",
90
94
  "Qwen36Renderer": "renderers.qwen36",
91
95
  "Qwen3Renderer": "renderers.qwen3",
@@ -113,6 +117,8 @@ __all__ = [
113
117
  "BaseRendererConfig",
114
118
  "Content",
115
119
  "ContentPart",
120
+ "DeepSeekR1Renderer",
121
+ "DeepSeekR1RendererConfig",
116
122
  "DeepSeekV3Renderer",
117
123
  "DeepSeekV3RendererConfig",
118
124
  "DefaultRenderer",
@@ -142,6 +148,8 @@ __all__ = [
142
148
  "MultimodalRenderer",
143
149
  "Nemotron3Renderer",
144
150
  "Nemotron3RendererConfig",
151
+ "Nemotron3UltraRenderer",
152
+ "Nemotron3UltraRendererConfig",
145
153
  "OverlongPromptError",
146
154
  "ParsedResponse",
147
155
  "ParsedToolCall",
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
18
18
  commit_id: str | None
19
19
  __commit_id__: str | None
20
20
 
21
- __version__ = version = '0.1.8.dev42'
22
- __version_tuple__ = version_tuple = (0, 1, 8, 'dev42')
21
+ __version__ = version = '0.1.8.dev44'
22
+ __version_tuple__ = version_tuple = (0, 1, 8, 'dev44')
23
23
 
24
24
  __commit_id__ = commit_id = None
@@ -1030,21 +1030,25 @@ MODEL_RENDERER_MAP: dict[str, str] = {
1030
1030
  # MiniMax.
1031
1031
  "MiniMaxAI/MiniMax-M2": "minimax-m2",
1032
1032
  "MiniMaxAI/MiniMax-M2.5": "minimax-m2",
1033
- # DeepSeek V3.
1033
+ # DeepSeek V3 (non-reasoning).
1034
1034
  "deepseek-ai/DeepSeek-V3": "deepseek-v3",
1035
1035
  "deepseek-ai/DeepSeek-V3-Base": "deepseek-v3",
1036
+ # DeepSeek R1 (reasoning).
1037
+ "deepseek-ai/DeepSeek-R1": "deepseek-r1",
1038
+ "deepseek-ai/DeepSeek-R1-0528": "deepseek-r1",
1036
1039
  # Kimi K2 (K2.5 and K2.6 share the K2.5 template, distinct from K2).
1037
1040
  "moonshotai/Kimi-K2-Instruct": "kimi-k2",
1038
1041
  "moonshotai/Kimi-K2.5": "kimi-k2.5",
1039
1042
  "moonshotai/Kimi-K2.6": "kimi-k2.5",
1040
- # Nemotron 3. Nano / Super share one chat-template variant; the Ultra
1041
- # checkpoints use the Ultra variant — the renderer auto-selects it from
1042
- # the model name (see ``nemotron3._ULTRA_DEFAULTS``). BF16 and FP8 share the
1043
+ # Nemotron 3. Nano / Super share one chat-template variant (``nemotron-3``);
1044
+ # the Ultra checkpoints use the Ultra variant (``nemotron-3-ultra``, distinct
1045
+ # ``</think>`` glue). Both route to the same Nemotron3Renderer, which selects
1046
+ # the variant from the resolved config's ``name``. BF16 and FP8 share the
1043
1047
  # same tokenizer and template.
1044
1048
  "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "nemotron-3",
1045
1049
  "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16": "nemotron-3",
1046
- "nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16": "nemotron-3",
1047
- "nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-FP8": "nemotron-3",
1050
+ "nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16": "nemotron-3-ultra",
1051
+ "nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-FP8": "nemotron-3-ultra",
1048
1052
  # Llama 3.2 (Instruct). Tested against the gated meta-llama repos and
1049
1053
  # the unrestricted unsloth/... mirror, which ships a byte-identical
1050
1054
  # chat template. ``Llama3Renderer`` defaults ``date_string`` to
@@ -1161,6 +1165,8 @@ FASTOKENS_INCOMPATIBLE: frozenset[str] = frozenset(
1161
1165
  # doesn't yet implement.
1162
1166
  "deepseek-ai/DeepSeek-V3",
1163
1167
  "deepseek-ai/DeepSeek-V3-Base",
1168
+ "deepseek-ai/DeepSeek-R1",
1169
+ "deepseek-ai/DeepSeek-R1-0528",
1164
1170
  }
1165
1171
  )
1166
1172
 
@@ -1334,6 +1340,7 @@ def load_tokenizer(
1334
1340
  def _populate_registry():
1335
1341
  if RENDERER_REGISTRY:
1336
1342
  return
1343
+ from renderers.deepseek_r1 import DeepSeekR1Renderer
1337
1344
  from renderers.deepseek_v3 import DeepSeekV3Renderer
1338
1345
  from renderers.default import DefaultRenderer
1339
1346
  from renderers.glm5 import GLM5Renderer, GLM51Renderer
@@ -1344,7 +1351,7 @@ def _populate_registry():
1344
1351
  from renderers.laguna_xs2 import LagunaXS2Renderer
1345
1352
  from renderers.llama_3 import Llama3Renderer
1346
1353
  from renderers.minimax_m2 import MiniMaxM2Renderer
1347
- from renderers.nemotron3 import Nemotron3Renderer
1354
+ from renderers.nemotron3 import Nemotron3Renderer, Nemotron3UltraRenderer
1348
1355
  from renderers.qwen3 import Qwen3Renderer
1349
1356
  from renderers.qwen3_vl import Qwen3VLRenderer
1350
1357
  from renderers.qwen35 import Qwen35Renderer
@@ -1362,11 +1369,13 @@ def _populate_registry():
1362
1369
  "glm-4.5": GLM45Renderer,
1363
1370
  "minimax-m2": MiniMaxM2Renderer,
1364
1371
  "deepseek-v3": DeepSeekV3Renderer,
1372
+ "deepseek-r1": DeepSeekR1Renderer,
1365
1373
  "kimi-k2": KimiK2Renderer,
1366
1374
  "kimi-k2.5": KimiK25Renderer,
1367
1375
  "laguna-xs.2": LagunaXS2Renderer,
1368
1376
  "llama-3": Llama3Renderer,
1369
1377
  "nemotron-3": Nemotron3Renderer,
1378
+ "nemotron-3-ultra": Nemotron3UltraRenderer,
1370
1379
  "gpt-oss": GptOssRenderer,
1371
1380
  }
1372
1381
  )
@@ -354,7 +354,14 @@ class MiniMaxM2RendererConfig(BaseRendererConfig):
354
354
 
355
355
 
356
356
  class Nemotron3RendererConfig(BaseRendererConfig):
357
- """Nemotron 3 renderer config."""
357
+ """Nemotron-3 **Nano / Super** renderer config.
358
+
359
+ Nano and Super share one chat-template variant; the renderer routes both
360
+ through :class:`renderers.nemotron3.Nemotron3Renderer`. The Ultra variant
361
+ has its own template (different reasoning-block glue) and config —
362
+ :class:`Nemotron3UltraRendererConfig` — and is reached via the
363
+ ``nemotron-3-ultra`` discriminator.
364
+ """
358
365
 
359
366
  name: Literal["nemotron-3"] = "nemotron-3"
360
367
 
@@ -362,26 +369,6 @@ class Nemotron3RendererConfig(BaseRendererConfig):
362
369
  """When ``True``, the generation prompt includes ``<think>``. Mirrors
363
370
  the chat template's ``enable_thinking`` kwarg."""
364
371
 
365
- ultra: bool | None = None
366
- """Select the Nemotron-3 **Ultra** chat-template variant.
367
-
368
- ``None`` (default) auto-detects from the model name (see
369
- ``renderers.nemotron3._ULTRA_DEFAULTS``): the Ultra checkpoints resolve
370
- to ``True``; Nano / Super and unknown checkpoints to ``False``. Set
371
- explicitly to force a variant — e.g. an Ultra fine-tune or a
372
- locally-pathed checkpoint whose ``name_or_path`` isn't in the table.
373
-
374
- Ultra's template differs from Nano/Super: the reasoning block is glued
375
- as ``<think>\\n{reasoning}</think>{content}`` (no ``\\n`` around
376
- ``</think>``), truncated historical turns collapse to
377
- ``<think></think>{content}`` (no ``\\n``), and the thinking-truncation
378
- boundary follows the template's ``loop.index0 < last_user_idx`` rule
379
- (drop thinking on every assistant turn before the last user message).
380
-
381
- Not a chat-template kwarg — it picks which template the renderer
382
- mirrors, not a variable passed into one — so it's listed in
383
- ``_internal_fields`` and excluded from ``template_field_names()``."""
384
-
385
372
  truncate_history_thinking: bool = True
386
373
  """When ``False``, keep ``<think>{reasoning}</think>`` on past-cycle
387
374
  assistant turns instead of dropping them. Mirrors the chat
@@ -389,35 +376,64 @@ class Nemotron3RendererConfig(BaseRendererConfig):
389
376
  ``preserve_all_thinking`` / ``preserve_thinking_between_tool_calls``
390
377
  — see :class:`BaseRendererConfig` for the contract."""
391
378
 
392
- # ``ultra`` is a template-variant SELECTOR — it picks which template the
393
- # renderer mirrors (Ultra vs Nano/Super), not a variable passed into one;
394
- # there is no ``ultra`` Jinja variable. Marked internal so the parity
395
- # matrix doesn't cross it as a template field. Same ``_internal_fields``
396
- # mechanism DeepSeek-V3 uses for its no-op ``enable_thinking``, for a
397
- # different underlying reason (theirs is an ignored kwarg, this is a
398
- # variant switch).
399
- _internal_fields = frozenset({"ultra"})
379
+ low_effort: bool = False
380
+ """When ``True``, append ``\\n\\n{reasoning effort: low}`` to the last user
381
+ message, nudging the model toward shorter reasoning. Mirrors the **Super**
382
+ chat template's ``low_effort`` kwarg. A no-op on **Nano** (its template
383
+ doesn't define it) exactly as ``apply_chat_template`` ignores an undefined
384
+ template variable; the renderer distinguishes the two by model name (see
385
+ ``renderers.nemotron3._is_super``)."""
386
+
387
+
388
+ class Nemotron3UltraRendererConfig(BaseRendererConfig):
389
+ """Nemotron-3 **Ultra** renderer config — distinct discriminator so the
390
+ registry routes Ultra checkpoints to the Ultra template variant.
391
+
392
+ Ultra's template differs from Nano/Super: the reasoning block is glued as
393
+ ``<think>\\n{reasoning}</think>{content}`` (no ``\\n`` around ``</think>``)
394
+ and truncated historical turns collapse to ``<think></think>{content}``
395
+ (no ``\\n``). It shares the :class:`renderers.nemotron3.Nemotron3Renderer`
396
+ implementation, which selects the variant from ``config.name``.
397
+ """
398
+
399
+ name: Literal["nemotron-3-ultra"] = "nemotron-3-ultra"
400
+
401
+ enable_thinking: bool = True
402
+ """See :class:`Nemotron3RendererConfig.enable_thinking`."""
403
+
404
+ truncate_history_thinking: bool = True
405
+ """See :class:`Nemotron3RendererConfig.truncate_history_thinking`."""
406
+
407
+ medium_effort: bool = False
408
+ """When ``True``, append ``\\n\\n{reasoning effort: efficient}`` to the last
409
+ user message. Mirrors the Ultra chat template's ``medium_effort`` kwarg."""
400
410
 
401
411
 
402
412
  class DeepSeekV3RendererConfig(BaseRendererConfig):
403
- """DeepSeek V3 renderer config.
413
+ """DeepSeek-V3 renderer config (non-reasoning).
404
414
 
405
- ``enable_thinking`` is renderer-internal here DeepSeek-V3's chat
406
- template does not reference any thinking variable, so passing it to
407
- ``apply_chat_template`` upstream is a no-op. The renderer uses it
408
- to control the ``<think>`` prefill at the generation prompt (R1
409
- distill convention).
415
+ DeepSeek-V3 has no thinking concept: the generation prompt is a bare
416
+ ``<|Assistant|>`` and assistant content is emitted verbatim. For the
417
+ reasoning variant use :class:`DeepSeekR1RendererConfig`.
410
418
  """
411
419
 
412
420
  name: Literal["deepseek-v3"] = "deepseek-v3"
413
421
 
414
- enable_thinking: bool = True
415
- """Renderer convention for the R1-distill family: when ``True``,
416
- prefill ``<think>`` at the generation prompt. The DeepSeek-V3 Jinja
417
- template ignores this kwarg upstream; it's not a chat-template
418
- kwarg in the strict sense."""
419
422
 
420
- _internal_fields = frozenset({"enable_thinking"})
423
+ class DeepSeekR1RendererConfig(BaseRendererConfig):
424
+ """DeepSeek-R1 renderer config (reasoning).
425
+
426
+ R1 always reasons — its chat template unconditionally prefills
427
+ ``<think>\\n`` at the generation prompt and strips ``</think>`` from
428
+ historical assistant turns. There is therefore no ``enable_thinking``
429
+ knob (thinking is not optional), and ``preserve_*`` flags are no-ops
430
+ (history reasoning is always dropped); both stored for protocol
431
+ uniformity. Applies to full ``deepseek-ai/DeepSeek-R1`` / ``-R1-0528``
432
+ — NOT the R1-Distill-Qwen/Llama models, which use those base
433
+ tokenizers and route to the Qwen3 / Llama-3 renderers.
434
+ """
435
+
436
+ name: Literal["deepseek-r1"] = "deepseek-r1"
421
437
 
422
438
 
423
439
  RendererConfig = Annotated[
@@ -438,7 +454,9 @@ RendererConfig = Annotated[
438
454
  Llama3RendererConfig,
439
455
  MiniMaxM2RendererConfig,
440
456
  Nemotron3RendererConfig,
457
+ Nemotron3UltraRendererConfig,
441
458
  DeepSeekV3RendererConfig,
459
+ DeepSeekR1RendererConfig,
442
460
  ],
443
461
  Field(discriminator="name"),
444
462
  ]
@@ -473,7 +491,9 @@ _CONFIG_BY_NAME: dict[str, type[BaseRendererConfig]] = {
473
491
  "llama-3": Llama3RendererConfig,
474
492
  "minimax-m2": MiniMaxM2RendererConfig,
475
493
  "nemotron-3": Nemotron3RendererConfig,
494
+ "nemotron-3-ultra": Nemotron3UltraRendererConfig,
476
495
  "deepseek-v3": DeepSeekV3RendererConfig,
496
+ "deepseek-r1": DeepSeekR1RendererConfig,
477
497
  }
478
498
 
479
499
 
@@ -505,6 +525,7 @@ __all__ = [
505
525
  "AutoRendererConfig",
506
526
  "BaseRendererConfig",
507
527
  "DefaultRendererConfig",
528
+ "DeepSeekR1RendererConfig",
508
529
  "DeepSeekV3RendererConfig",
509
530
  "GLM45RendererConfig",
510
531
  "GLM51RendererConfig",
@@ -516,6 +537,7 @@ __all__ = [
516
537
  "Llama3RendererConfig",
517
538
  "MiniMaxM2RendererConfig",
518
539
  "Nemotron3RendererConfig",
540
+ "Nemotron3UltraRendererConfig",
519
541
  "Qwen35RendererConfig",
520
542
  "Qwen36RendererConfig",
521
543
  "Qwen3RendererConfig",
@@ -0,0 +1,58 @@
1
+ """DeepSeek-R1 Renderer — the reasoning variant of the DeepSeek format.
2
+
3
+ R1 shares DeepSeek-V3's special tokens, message structure, and tool-call
4
+ wire format, so it subclasses :class:`renderers.deepseek_v3.DeepSeekV3Renderer`
5
+ and overrides only the two places its chat template diverges:
6
+
7
+ 1. Generation prompt — R1 unconditionally prefills ``<think>\\n``
8
+ (``<|Assistant|><think>\\n``) to trigger reasoning, where V3 emits a bare
9
+ ``<|Assistant|>``. Handled by ``_GEN_THINK_PREFILL``.
10
+ 2. Historical assistant turns — R1 strips the reasoning trace, keeping only
11
+ the text after ``</think>`` (``content.split('</think>')[-1]``), where V3
12
+ emits content verbatim. Handled by ``_prepare_assistant_content``.
13
+
14
+ Everything else — system handling, tool-call / tool-output rendering,
15
+ special-token resolution, and ``parse_response`` (``parse_deepseek_v3``,
16
+ shared) — is inherited unchanged.
17
+
18
+ Scope: full ``deepseek-ai/DeepSeek-R1`` and ``-R1-0528``. The R1-Distill
19
+ models (``DeepSeek-R1-Distill-Qwen/Llama``) use their base models'
20
+ tokenizers and route to the Qwen3 / Llama-3 renderers, not this one.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ from renderers.base import Message
26
+ from renderers.configs import DeepSeekR1RendererConfig
27
+ from renderers.deepseek_v3 import DeepSeekV3Renderer
28
+
29
+
30
+ class DeepSeekR1Renderer(DeepSeekV3Renderer):
31
+ """Deterministic message → token renderer for DeepSeek-R1 models."""
32
+
33
+ _config_cls: type = DeepSeekR1RendererConfig
34
+ _GEN_THINK_PREFILL: str = "<think>\n"
35
+
36
+ def _prepare_assistant_content(self, msg: Message) -> str:
37
+ """Assistant content with the reasoning trace stripped, mirroring the
38
+ R1 template's ``content.split('</think>')[-1]`` on historical turns.
39
+
40
+ Structured ``thinking``/``text`` parts are reconstructed inline first
41
+ so the same ``</think>`` split applies. The separate
42
+ ``reasoning_content`` field is ignored — the R1 chat template never
43
+ reads it, and history reasoning is dropped regardless.
44
+ """
45
+ content = msg.get("content") or ""
46
+ if isinstance(content, list):
47
+ parts: list[str] = []
48
+ for p in content:
49
+ if not isinstance(p, dict):
50
+ continue
51
+ if p.get("type") == "thinking":
52
+ parts.append(f"<think>{p.get('thinking', '')}</think>")
53
+ elif p.get("type") == "text":
54
+ parts.append(p.get("text", ""))
55
+ content = "".join(parts)
56
+ if "</think>" in content:
57
+ content = content.split("</think>")[-1]
58
+ return content
@@ -41,25 +41,30 @@ def _ds_token(name: str) -> str:
41
41
 
42
42
 
43
43
  class DeepSeekV3Renderer:
44
- """Deterministic message → token renderer for DeepSeek V3 models.
45
-
46
- DeepSeek-V3's chat template does not consult any thinking-related
47
- variable; the ``enable_thinking`` field on the typed config controls
48
- the renderer's ``<think>\\n`` prefill at the generation prompt
49
- (R1-distill convention) and is intentionally not forwarded to
50
- ``apply_chat_template`` upstream — that would be a no-op. The
51
- template also always emits ``<think>{reasoning}</think>`` when
52
- ``reasoning_content`` is provided, so ``preserve_*`` flags are
53
- no-ops here too; stored for protocol uniformity.
44
+ """Deterministic message → token renderer for DeepSeek-V3 models.
45
+
46
+ DeepSeek-V3 is non-reasoning: its chat template has no ``<think>``
47
+ concept the generation prompt is a bare ``<|Assistant|>`` and past
48
+ assistant content is emitted verbatim. The reasoning variant
49
+ (``<think>``-prefilled prompt, history reasoning stripped) lives in
50
+ :class:`renderers.deepseek_r1.DeepSeekR1Renderer`, which subclasses
51
+ this one. ``preserve_*`` flags are no-ops here (no reasoning channel),
52
+ stored for protocol uniformity.
54
53
  """
55
54
 
55
+ #: Default typed config; the R1 subclass overrides this.
56
+ _config_cls: type = DeepSeekV3RendererConfig
57
+ #: Generation-prompt reasoning prefill. Empty for V3 (bare
58
+ #: ``<|Assistant|>``); the R1 subclass overrides to ``"<think>\n"``.
59
+ _GEN_THINK_PREFILL: str = ""
60
+
56
61
  def __init__(
57
62
  self,
58
63
  tokenizer: PreTrainedTokenizer,
59
64
  config: DeepSeekV3RendererConfig | None = None,
60
65
  ):
61
66
  self._tokenizer = tokenizer
62
- self.config = config or DeepSeekV3RendererConfig()
67
+ self.config = config or type(self)._config_cls()
63
68
 
64
69
  # ── BOS / EOS ────────────────────────────────────────────────
65
70
  self._bos = self._get_special_token(f"begin{_US}of{_US}sentence")
@@ -239,8 +244,10 @@ class DeepSeekV3Renderer:
239
244
  emit_special(
240
245
  self._assistant_token, -1, is_sampled=False, is_content=False
241
246
  )
242
- if self.config.enable_thinking:
243
- emit_text("<think>\n", -1, is_sampled=False, is_content=False)
247
+ if self._GEN_THINK_PREFILL:
248
+ emit_text(
249
+ self._GEN_THINK_PREFILL, -1, is_sampled=False, is_content=False
250
+ )
244
251
 
245
252
  return RenderedTokens(
246
253
  token_ids=tokens,
@@ -382,8 +389,8 @@ class DeepSeekV3Renderer:
382
389
  last_role = new_messages[-1].get("role") if new_messages else None
383
390
  if last_role != "tool":
384
391
  emit_special(self._assistant_token, -1)
385
- if self.config.enable_thinking:
386
- emit_text("<think>\n", -1)
392
+ if self._GEN_THINK_PREFILL:
393
+ emit_text(self._GEN_THINK_PREFILL, -1)
387
394
 
388
395
  total_len = len(previous_ids) + len(ext)
389
396
  return RenderedTokens(
@@ -399,6 +406,23 @@ class DeepSeekV3Renderer:
399
406
  # Assistant rendering
400
407
  # ------------------------------------------------------------------
401
408
 
409
+ def _prepare_assistant_content(self, msg: Message) -> str:
410
+ """Assistant content as the V3 template would emit it: verbatim.
411
+
412
+ V3 is non-reasoning — its template emits ``message['content']`` as-is
413
+ and never reads ``reasoning_content``. A structured content list is
414
+ flattened to its ``text`` parts. The R1 subclass overrides this to
415
+ strip ``</think>`` from history.
416
+ """
417
+ content = msg.get("content") or ""
418
+ if isinstance(content, list):
419
+ content = "".join(
420
+ p.get("text", "")
421
+ for p in content
422
+ if isinstance(p, dict) and p.get("type") == "text"
423
+ )
424
+ return content
425
+
402
426
  def _render_assistant(
403
427
  self,
404
428
  msg: Message,
@@ -414,24 +438,7 @@ class DeepSeekV3Renderer:
414
438
  # without a new <|Assistant|> token in that case.
415
439
  prev_is_tool = msg_idx > 0 and messages[msg_idx - 1]["role"] == "tool"
416
440
 
417
- content = msg.get("content") or ""
418
- # Support structured content (ThinkingPart / TextPart list).
419
- if isinstance(content, list):
420
- parts_text: list[str] = []
421
- for p in content:
422
- if not isinstance(p, dict):
423
- continue
424
- if p.get("type") == "thinking":
425
- thinking = p.get("thinking", "")
426
- parts_text.append(f"<think>{thinking}</think>")
427
- elif p.get("type") == "text":
428
- parts_text.append(p.get("text", ""))
429
- content = "".join(parts_text)
430
- # Also accept reasoning_content stored separately (OpenAI-style).
431
- elif isinstance(msg.get("reasoning_content"), str) and msg["reasoning_content"]:
432
- reasoning = msg["reasoning_content"]
433
- content = f"<think>{reasoning}</think>{content}"
434
-
441
+ content = self._prepare_assistant_content(msg)
435
442
  tool_calls = msg.get("tool_calls") or []
436
443
 
437
444
  # ``<|Assistant|>`` is template-injected scaffolding — at