renderers 0.1.8.dev42__tar.gz → 0.1.8.dev44__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/PKG-INFO +2 -2
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/README.md +1 -1
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/__init__.py +8 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/_version.py +2 -2
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/base.py +16 -7
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/configs.py +63 -41
- renderers-0.1.8.dev44/renderers/deepseek_r1.py +58 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/deepseek_v3.py +40 -33
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/nemotron3.py +201 -196
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/conftest.py +12 -1
- renderers-0.1.8.dev44/tests/test_deepseek_r1.py +152 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_load_tokenizer_fastokens.py +2 -0
- renderers-0.1.8.dev44/tests/test_nemotron3_parity.py +676 -0
- renderers-0.1.8.dev44/tests/test_nemotron3_ultra.py +104 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_renderer_config_parity.py +13 -2
- renderers-0.1.8.dev42/tests/test_nemotron3_ultra.py +0 -59
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/.github/workflows/publish-dev.yml +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/.github/workflows/publish.yml +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/.github/workflows/style.yml +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/.github/workflows/test.yml +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/.gitignore +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/.pre-commit-config.yaml +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/LICENSE +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/docs/renderer-config.md +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/examples/README.md +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/examples/sglang/multiturn_generate_sglang.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/examples/sglang/online_multiturn_sglang.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/examples/tinker/multiturn_generate_tinker.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/examples/transformers/multiturn_generate_transformers.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/examples/vllm/multiturn_generate_vllm.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/pyproject.toml +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/client.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/default.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/glm45.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/glm5.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/gpt_oss.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/kimi_k2.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/kimi_k25.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/laguna_xs2.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/llama_3.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/minimax_m2.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/parsers.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/parsing.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/qwen3.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/qwen35.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/qwen36.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/renderers/qwen3_vl.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_bridge.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_build_helpers.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_client.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_gpt_oss_harmony_parity.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_incremental.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_is_content.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_kimi_k25_tool_schema.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_llama_3.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_load_tokenizer.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_message_indices.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_message_tool_names.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_multimodal.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_parse_response.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_parse_response_robustness.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_parsers.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_preserve_thinking.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_qwen35_size_coverage.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_render_ids.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_renderer_config.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_roundtrip.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_sampled_mask.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_tokens_per_message.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/tests/test_tool_arg_type_preservation.py +0 -0
- {renderers-0.1.8.dev42 → renderers-0.1.8.dev44}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: renderers
|
|
3
|
-
Version: 0.1.8.
|
|
3
|
+
Version: 0.1.8.dev44
|
|
4
4
|
Summary: Chat template renderers — deterministic message-to-token conversion for LLM training
|
|
5
5
|
License-Expression: Apache-2.0
|
|
6
6
|
License-File: LICENSE
|
|
@@ -57,7 +57,7 @@ next_prompt_ids = r.bridge_to_next_turn(
|
|
|
57
57
|
)
|
|
58
58
|
```
|
|
59
59
|
|
|
60
|
-
Hand-coded renderers ship for `qwen3`, `qwen3-vl`, `qwen3.5`, `qwen3.6`, `glm-5`, `glm-5.1`, `glm-4.5`, `minimax-m2`, `deepseek-v3`, `kimi-k2`, `kimi-k2.5`, `nemotron-3`, `gpt-oss`. Anything else falls back to `DefaultRenderer`, a generic `apply_chat_template` wrapper.
|
|
60
|
+
Hand-coded renderers ship for `qwen3`, `qwen3-vl`, `qwen3.5`, `qwen3.6`, `glm-5`, `glm-5.1`, `glm-4.5`, `minimax-m2`, `deepseek-v3`, `kimi-k2`, `kimi-k2.5`, `nemotron-3`, `nemotron-3-ultra`, `gpt-oss`. Anything else falls back to `DefaultRenderer`, a generic `apply_chat_template` wrapper.
|
|
61
61
|
|
|
62
62
|
## API
|
|
63
63
|
|
|
@@ -40,7 +40,7 @@ next_prompt_ids = r.bridge_to_next_turn(
|
|
|
40
40
|
)
|
|
41
41
|
```
|
|
42
42
|
|
|
43
|
-
Hand-coded renderers ship for `qwen3`, `qwen3-vl`, `qwen3.5`, `qwen3.6`, `glm-5`, `glm-5.1`, `glm-4.5`, `minimax-m2`, `deepseek-v3`, `kimi-k2`, `kimi-k2.5`, `nemotron-3`, `gpt-oss`. Anything else falls back to `DefaultRenderer`, a generic `apply_chat_template` wrapper.
|
|
43
|
+
Hand-coded renderers ship for `qwen3`, `qwen3-vl`, `qwen3.5`, `qwen3.6`, `glm-5`, `glm-5.1`, `glm-4.5`, `minimax-m2`, `deepseek-v3`, `kimi-k2`, `kimi-k2.5`, `nemotron-3`, `nemotron-3-ultra`, `gpt-oss`. Anything else falls back to `DefaultRenderer`, a generic `apply_chat_template` wrapper.
|
|
44
44
|
|
|
45
45
|
## API
|
|
46
46
|
|
|
@@ -44,6 +44,7 @@ from renderers.configs import (
|
|
|
44
44
|
BaseRendererConfig,
|
|
45
45
|
config_from_name,
|
|
46
46
|
DefaultRendererConfig,
|
|
47
|
+
DeepSeekR1RendererConfig,
|
|
47
48
|
DeepSeekV3RendererConfig,
|
|
48
49
|
GLM45RendererConfig,
|
|
49
50
|
GLM51RendererConfig,
|
|
@@ -55,6 +56,7 @@ from renderers.configs import (
|
|
|
55
56
|
Llama3RendererConfig,
|
|
56
57
|
MiniMaxM2RendererConfig,
|
|
57
58
|
Nemotron3RendererConfig,
|
|
59
|
+
Nemotron3UltraRendererConfig,
|
|
58
60
|
Qwen35RendererConfig,
|
|
59
61
|
Qwen36RendererConfig,
|
|
60
62
|
Qwen3RendererConfig,
|
|
@@ -74,6 +76,7 @@ from renderers.configs import (
|
|
|
74
76
|
# imports — ``renderers.base._populate_registry`` lazy-imports the
|
|
75
77
|
# concrete classes itself when a renderer is instantiated.
|
|
76
78
|
_LAZY_RENDERERS: dict[str, str] = {
|
|
79
|
+
"DeepSeekR1Renderer": "renderers.deepseek_r1",
|
|
77
80
|
"DeepSeekV3Renderer": "renderers.deepseek_v3",
|
|
78
81
|
"DefaultRenderer": "renderers.default",
|
|
79
82
|
"GLM45Renderer": "renderers.glm45",
|
|
@@ -86,6 +89,7 @@ _LAZY_RENDERERS: dict[str, str] = {
|
|
|
86
89
|
"Llama3Renderer": "renderers.llama_3",
|
|
87
90
|
"MiniMaxM2Renderer": "renderers.minimax_m2",
|
|
88
91
|
"Nemotron3Renderer": "renderers.nemotron3",
|
|
92
|
+
"Nemotron3UltraRenderer": "renderers.nemotron3",
|
|
89
93
|
"Qwen35Renderer": "renderers.qwen35",
|
|
90
94
|
"Qwen36Renderer": "renderers.qwen36",
|
|
91
95
|
"Qwen3Renderer": "renderers.qwen3",
|
|
@@ -113,6 +117,8 @@ __all__ = [
|
|
|
113
117
|
"BaseRendererConfig",
|
|
114
118
|
"Content",
|
|
115
119
|
"ContentPart",
|
|
120
|
+
"DeepSeekR1Renderer",
|
|
121
|
+
"DeepSeekR1RendererConfig",
|
|
116
122
|
"DeepSeekV3Renderer",
|
|
117
123
|
"DeepSeekV3RendererConfig",
|
|
118
124
|
"DefaultRenderer",
|
|
@@ -142,6 +148,8 @@ __all__ = [
|
|
|
142
148
|
"MultimodalRenderer",
|
|
143
149
|
"Nemotron3Renderer",
|
|
144
150
|
"Nemotron3RendererConfig",
|
|
151
|
+
"Nemotron3UltraRenderer",
|
|
152
|
+
"Nemotron3UltraRendererConfig",
|
|
145
153
|
"OverlongPromptError",
|
|
146
154
|
"ParsedResponse",
|
|
147
155
|
"ParsedToolCall",
|
|
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
|
|
|
18
18
|
commit_id: str | None
|
|
19
19
|
__commit_id__: str | None
|
|
20
20
|
|
|
21
|
-
__version__ = version = '0.1.8.
|
|
22
|
-
__version_tuple__ = version_tuple = (0, 1, 8, '
|
|
21
|
+
__version__ = version = '0.1.8.dev44'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 1, 8, 'dev44')
|
|
23
23
|
|
|
24
24
|
__commit_id__ = commit_id = None
|
|
@@ -1030,21 +1030,25 @@ MODEL_RENDERER_MAP: dict[str, str] = {
|
|
|
1030
1030
|
# MiniMax.
|
|
1031
1031
|
"MiniMaxAI/MiniMax-M2": "minimax-m2",
|
|
1032
1032
|
"MiniMaxAI/MiniMax-M2.5": "minimax-m2",
|
|
1033
|
-
# DeepSeek V3.
|
|
1033
|
+
# DeepSeek V3 (non-reasoning).
|
|
1034
1034
|
"deepseek-ai/DeepSeek-V3": "deepseek-v3",
|
|
1035
1035
|
"deepseek-ai/DeepSeek-V3-Base": "deepseek-v3",
|
|
1036
|
+
# DeepSeek R1 (reasoning).
|
|
1037
|
+
"deepseek-ai/DeepSeek-R1": "deepseek-r1",
|
|
1038
|
+
"deepseek-ai/DeepSeek-R1-0528": "deepseek-r1",
|
|
1036
1039
|
# Kimi K2 (K2.5 and K2.6 share the K2.5 template, distinct from K2).
|
|
1037
1040
|
"moonshotai/Kimi-K2-Instruct": "kimi-k2",
|
|
1038
1041
|
"moonshotai/Kimi-K2.5": "kimi-k2.5",
|
|
1039
1042
|
"moonshotai/Kimi-K2.6": "kimi-k2.5",
|
|
1040
|
-
# Nemotron 3. Nano / Super share one chat-template variant;
|
|
1041
|
-
# checkpoints use the Ultra variant
|
|
1042
|
-
#
|
|
1043
|
+
# Nemotron 3. Nano / Super share one chat-template variant (``nemotron-3``);
|
|
1044
|
+
# the Ultra checkpoints use the Ultra variant (``nemotron-3-ultra``, distinct
|
|
1045
|
+
# ``</think>`` glue). Both route to the same Nemotron3Renderer, which selects
|
|
1046
|
+
# the variant from the resolved config's ``name``. BF16 and FP8 share the
|
|
1043
1047
|
# same tokenizer and template.
|
|
1044
1048
|
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "nemotron-3",
|
|
1045
1049
|
"nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16": "nemotron-3",
|
|
1046
|
-
"nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16": "nemotron-3",
|
|
1047
|
-
"nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-FP8": "nemotron-3",
|
|
1050
|
+
"nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16": "nemotron-3-ultra",
|
|
1051
|
+
"nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-FP8": "nemotron-3-ultra",
|
|
1048
1052
|
# Llama 3.2 (Instruct). Tested against the gated meta-llama repos and
|
|
1049
1053
|
# the unrestricted unsloth/... mirror, which ships a byte-identical
|
|
1050
1054
|
# chat template. ``Llama3Renderer`` defaults ``date_string`` to
|
|
@@ -1161,6 +1165,8 @@ FASTOKENS_INCOMPATIBLE: frozenset[str] = frozenset(
|
|
|
1161
1165
|
# doesn't yet implement.
|
|
1162
1166
|
"deepseek-ai/DeepSeek-V3",
|
|
1163
1167
|
"deepseek-ai/DeepSeek-V3-Base",
|
|
1168
|
+
"deepseek-ai/DeepSeek-R1",
|
|
1169
|
+
"deepseek-ai/DeepSeek-R1-0528",
|
|
1164
1170
|
}
|
|
1165
1171
|
)
|
|
1166
1172
|
|
|
@@ -1334,6 +1340,7 @@ def load_tokenizer(
|
|
|
1334
1340
|
def _populate_registry():
|
|
1335
1341
|
if RENDERER_REGISTRY:
|
|
1336
1342
|
return
|
|
1343
|
+
from renderers.deepseek_r1 import DeepSeekR1Renderer
|
|
1337
1344
|
from renderers.deepseek_v3 import DeepSeekV3Renderer
|
|
1338
1345
|
from renderers.default import DefaultRenderer
|
|
1339
1346
|
from renderers.glm5 import GLM5Renderer, GLM51Renderer
|
|
@@ -1344,7 +1351,7 @@ def _populate_registry():
|
|
|
1344
1351
|
from renderers.laguna_xs2 import LagunaXS2Renderer
|
|
1345
1352
|
from renderers.llama_3 import Llama3Renderer
|
|
1346
1353
|
from renderers.minimax_m2 import MiniMaxM2Renderer
|
|
1347
|
-
from renderers.nemotron3 import Nemotron3Renderer
|
|
1354
|
+
from renderers.nemotron3 import Nemotron3Renderer, Nemotron3UltraRenderer
|
|
1348
1355
|
from renderers.qwen3 import Qwen3Renderer
|
|
1349
1356
|
from renderers.qwen3_vl import Qwen3VLRenderer
|
|
1350
1357
|
from renderers.qwen35 import Qwen35Renderer
|
|
@@ -1362,11 +1369,13 @@ def _populate_registry():
|
|
|
1362
1369
|
"glm-4.5": GLM45Renderer,
|
|
1363
1370
|
"minimax-m2": MiniMaxM2Renderer,
|
|
1364
1371
|
"deepseek-v3": DeepSeekV3Renderer,
|
|
1372
|
+
"deepseek-r1": DeepSeekR1Renderer,
|
|
1365
1373
|
"kimi-k2": KimiK2Renderer,
|
|
1366
1374
|
"kimi-k2.5": KimiK25Renderer,
|
|
1367
1375
|
"laguna-xs.2": LagunaXS2Renderer,
|
|
1368
1376
|
"llama-3": Llama3Renderer,
|
|
1369
1377
|
"nemotron-3": Nemotron3Renderer,
|
|
1378
|
+
"nemotron-3-ultra": Nemotron3UltraRenderer,
|
|
1370
1379
|
"gpt-oss": GptOssRenderer,
|
|
1371
1380
|
}
|
|
1372
1381
|
)
|
|
@@ -354,7 +354,14 @@ class MiniMaxM2RendererConfig(BaseRendererConfig):
|
|
|
354
354
|
|
|
355
355
|
|
|
356
356
|
class Nemotron3RendererConfig(BaseRendererConfig):
|
|
357
|
-
"""Nemotron
|
|
357
|
+
"""Nemotron-3 **Nano / Super** renderer config.
|
|
358
|
+
|
|
359
|
+
Nano and Super share one chat-template variant; the renderer routes both
|
|
360
|
+
through :class:`renderers.nemotron3.Nemotron3Renderer`. The Ultra variant
|
|
361
|
+
has its own template (different reasoning-block glue) and config —
|
|
362
|
+
:class:`Nemotron3UltraRendererConfig` — and is reached via the
|
|
363
|
+
``nemotron-3-ultra`` discriminator.
|
|
364
|
+
"""
|
|
358
365
|
|
|
359
366
|
name: Literal["nemotron-3"] = "nemotron-3"
|
|
360
367
|
|
|
@@ -362,26 +369,6 @@ class Nemotron3RendererConfig(BaseRendererConfig):
|
|
|
362
369
|
"""When ``True``, the generation prompt includes ``<think>``. Mirrors
|
|
363
370
|
the chat template's ``enable_thinking`` kwarg."""
|
|
364
371
|
|
|
365
|
-
ultra: bool | None = None
|
|
366
|
-
"""Select the Nemotron-3 **Ultra** chat-template variant.
|
|
367
|
-
|
|
368
|
-
``None`` (default) auto-detects from the model name (see
|
|
369
|
-
``renderers.nemotron3._ULTRA_DEFAULTS``): the Ultra checkpoints resolve
|
|
370
|
-
to ``True``; Nano / Super and unknown checkpoints to ``False``. Set
|
|
371
|
-
explicitly to force a variant — e.g. an Ultra fine-tune or a
|
|
372
|
-
locally-pathed checkpoint whose ``name_or_path`` isn't in the table.
|
|
373
|
-
|
|
374
|
-
Ultra's template differs from Nano/Super: the reasoning block is glued
|
|
375
|
-
as ``<think>\\n{reasoning}</think>{content}`` (no ``\\n`` around
|
|
376
|
-
``</think>``), truncated historical turns collapse to
|
|
377
|
-
``<think></think>{content}`` (no ``\\n``), and the thinking-truncation
|
|
378
|
-
boundary follows the template's ``loop.index0 < last_user_idx`` rule
|
|
379
|
-
(drop thinking on every assistant turn before the last user message).
|
|
380
|
-
|
|
381
|
-
Not a chat-template kwarg — it picks which template the renderer
|
|
382
|
-
mirrors, not a variable passed into one — so it's listed in
|
|
383
|
-
``_internal_fields`` and excluded from ``template_field_names()``."""
|
|
384
|
-
|
|
385
372
|
truncate_history_thinking: bool = True
|
|
386
373
|
"""When ``False``, keep ``<think>{reasoning}</think>`` on past-cycle
|
|
387
374
|
assistant turns instead of dropping them. Mirrors the chat
|
|
@@ -389,35 +376,64 @@ class Nemotron3RendererConfig(BaseRendererConfig):
|
|
|
389
376
|
``preserve_all_thinking`` / ``preserve_thinking_between_tool_calls``
|
|
390
377
|
— see :class:`BaseRendererConfig` for the contract."""
|
|
391
378
|
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
379
|
+
low_effort: bool = False
|
|
380
|
+
"""When ``True``, append ``\\n\\n{reasoning effort: low}`` to the last user
|
|
381
|
+
message, nudging the model toward shorter reasoning. Mirrors the **Super**
|
|
382
|
+
chat template's ``low_effort`` kwarg. A no-op on **Nano** (its template
|
|
383
|
+
doesn't define it) — exactly as ``apply_chat_template`` ignores an undefined
|
|
384
|
+
template variable; the renderer distinguishes the two by model name (see
|
|
385
|
+
``renderers.nemotron3._is_super``)."""
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
class Nemotron3UltraRendererConfig(BaseRendererConfig):
|
|
389
|
+
"""Nemotron-3 **Ultra** renderer config — distinct discriminator so the
|
|
390
|
+
registry routes Ultra checkpoints to the Ultra template variant.
|
|
391
|
+
|
|
392
|
+
Ultra's template differs from Nano/Super: the reasoning block is glued as
|
|
393
|
+
``<think>\\n{reasoning}</think>{content}`` (no ``\\n`` around ``</think>``)
|
|
394
|
+
and truncated historical turns collapse to ``<think></think>{content}``
|
|
395
|
+
(no ``\\n``). It shares the :class:`renderers.nemotron3.Nemotron3Renderer`
|
|
396
|
+
implementation, which selects the variant from ``config.name``.
|
|
397
|
+
"""
|
|
398
|
+
|
|
399
|
+
name: Literal["nemotron-3-ultra"] = "nemotron-3-ultra"
|
|
400
|
+
|
|
401
|
+
enable_thinking: bool = True
|
|
402
|
+
"""See :class:`Nemotron3RendererConfig.enable_thinking`."""
|
|
403
|
+
|
|
404
|
+
truncate_history_thinking: bool = True
|
|
405
|
+
"""See :class:`Nemotron3RendererConfig.truncate_history_thinking`."""
|
|
406
|
+
|
|
407
|
+
medium_effort: bool = False
|
|
408
|
+
"""When ``True``, append ``\\n\\n{reasoning effort: efficient}`` to the last
|
|
409
|
+
user message. Mirrors the Ultra chat template's ``medium_effort`` kwarg."""
|
|
400
410
|
|
|
401
411
|
|
|
402
412
|
class DeepSeekV3RendererConfig(BaseRendererConfig):
|
|
403
|
-
"""DeepSeek
|
|
413
|
+
"""DeepSeek-V3 renderer config (non-reasoning).
|
|
404
414
|
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
to control the ``<think>`` prefill at the generation prompt (R1
|
|
409
|
-
distill convention).
|
|
415
|
+
DeepSeek-V3 has no thinking concept: the generation prompt is a bare
|
|
416
|
+
``<|Assistant|>`` and assistant content is emitted verbatim. For the
|
|
417
|
+
reasoning variant use :class:`DeepSeekR1RendererConfig`.
|
|
410
418
|
"""
|
|
411
419
|
|
|
412
420
|
name: Literal["deepseek-v3"] = "deepseek-v3"
|
|
413
421
|
|
|
414
|
-
enable_thinking: bool = True
|
|
415
|
-
"""Renderer convention for the R1-distill family: when ``True``,
|
|
416
|
-
prefill ``<think>`` at the generation prompt. The DeepSeek-V3 Jinja
|
|
417
|
-
template ignores this kwarg upstream; it's not a chat-template
|
|
418
|
-
kwarg in the strict sense."""
|
|
419
422
|
|
|
420
|
-
|
|
423
|
+
class DeepSeekR1RendererConfig(BaseRendererConfig):
|
|
424
|
+
"""DeepSeek-R1 renderer config (reasoning).
|
|
425
|
+
|
|
426
|
+
R1 always reasons — its chat template unconditionally prefills
|
|
427
|
+
``<think>\\n`` at the generation prompt and strips ``</think>`` from
|
|
428
|
+
historical assistant turns. There is therefore no ``enable_thinking``
|
|
429
|
+
knob (thinking is not optional), and ``preserve_*`` flags are no-ops
|
|
430
|
+
(history reasoning is always dropped); both stored for protocol
|
|
431
|
+
uniformity. Applies to full ``deepseek-ai/DeepSeek-R1`` / ``-R1-0528``
|
|
432
|
+
— NOT the R1-Distill-Qwen/Llama models, which use those base
|
|
433
|
+
tokenizers and route to the Qwen3 / Llama-3 renderers.
|
|
434
|
+
"""
|
|
435
|
+
|
|
436
|
+
name: Literal["deepseek-r1"] = "deepseek-r1"
|
|
421
437
|
|
|
422
438
|
|
|
423
439
|
RendererConfig = Annotated[
|
|
@@ -438,7 +454,9 @@ RendererConfig = Annotated[
|
|
|
438
454
|
Llama3RendererConfig,
|
|
439
455
|
MiniMaxM2RendererConfig,
|
|
440
456
|
Nemotron3RendererConfig,
|
|
457
|
+
Nemotron3UltraRendererConfig,
|
|
441
458
|
DeepSeekV3RendererConfig,
|
|
459
|
+
DeepSeekR1RendererConfig,
|
|
442
460
|
],
|
|
443
461
|
Field(discriminator="name"),
|
|
444
462
|
]
|
|
@@ -473,7 +491,9 @@ _CONFIG_BY_NAME: dict[str, type[BaseRendererConfig]] = {
|
|
|
473
491
|
"llama-3": Llama3RendererConfig,
|
|
474
492
|
"minimax-m2": MiniMaxM2RendererConfig,
|
|
475
493
|
"nemotron-3": Nemotron3RendererConfig,
|
|
494
|
+
"nemotron-3-ultra": Nemotron3UltraRendererConfig,
|
|
476
495
|
"deepseek-v3": DeepSeekV3RendererConfig,
|
|
496
|
+
"deepseek-r1": DeepSeekR1RendererConfig,
|
|
477
497
|
}
|
|
478
498
|
|
|
479
499
|
|
|
@@ -505,6 +525,7 @@ __all__ = [
|
|
|
505
525
|
"AutoRendererConfig",
|
|
506
526
|
"BaseRendererConfig",
|
|
507
527
|
"DefaultRendererConfig",
|
|
528
|
+
"DeepSeekR1RendererConfig",
|
|
508
529
|
"DeepSeekV3RendererConfig",
|
|
509
530
|
"GLM45RendererConfig",
|
|
510
531
|
"GLM51RendererConfig",
|
|
@@ -516,6 +537,7 @@ __all__ = [
|
|
|
516
537
|
"Llama3RendererConfig",
|
|
517
538
|
"MiniMaxM2RendererConfig",
|
|
518
539
|
"Nemotron3RendererConfig",
|
|
540
|
+
"Nemotron3UltraRendererConfig",
|
|
519
541
|
"Qwen35RendererConfig",
|
|
520
542
|
"Qwen36RendererConfig",
|
|
521
543
|
"Qwen3RendererConfig",
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""DeepSeek-R1 Renderer — the reasoning variant of the DeepSeek format.
|
|
2
|
+
|
|
3
|
+
R1 shares DeepSeek-V3's special tokens, message structure, and tool-call
|
|
4
|
+
wire format, so it subclasses :class:`renderers.deepseek_v3.DeepSeekV3Renderer`
|
|
5
|
+
and overrides only the two places its chat template diverges:
|
|
6
|
+
|
|
7
|
+
1. Generation prompt — R1 unconditionally prefills ``<think>\\n``
|
|
8
|
+
(``<|Assistant|><think>\\n``) to trigger reasoning, where V3 emits a bare
|
|
9
|
+
``<|Assistant|>``. Handled by ``_GEN_THINK_PREFILL``.
|
|
10
|
+
2. Historical assistant turns — R1 strips the reasoning trace, keeping only
|
|
11
|
+
the text after ``</think>`` (``content.split('</think>')[-1]``), where V3
|
|
12
|
+
emits content verbatim. Handled by ``_prepare_assistant_content``.
|
|
13
|
+
|
|
14
|
+
Everything else — system handling, tool-call / tool-output rendering,
|
|
15
|
+
special-token resolution, and ``parse_response`` (``parse_deepseek_v3``,
|
|
16
|
+
shared) — is inherited unchanged.
|
|
17
|
+
|
|
18
|
+
Scope: full ``deepseek-ai/DeepSeek-R1`` and ``-R1-0528``. The R1-Distill
|
|
19
|
+
models (``DeepSeek-R1-Distill-Qwen/Llama``) use their base models'
|
|
20
|
+
tokenizers and route to the Qwen3 / Llama-3 renderers, not this one.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
from renderers.base import Message
|
|
26
|
+
from renderers.configs import DeepSeekR1RendererConfig
|
|
27
|
+
from renderers.deepseek_v3 import DeepSeekV3Renderer
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class DeepSeekR1Renderer(DeepSeekV3Renderer):
|
|
31
|
+
"""Deterministic message → token renderer for DeepSeek-R1 models."""
|
|
32
|
+
|
|
33
|
+
_config_cls: type = DeepSeekR1RendererConfig
|
|
34
|
+
_GEN_THINK_PREFILL: str = "<think>\n"
|
|
35
|
+
|
|
36
|
+
def _prepare_assistant_content(self, msg: Message) -> str:
|
|
37
|
+
"""Assistant content with the reasoning trace stripped, mirroring the
|
|
38
|
+
R1 template's ``content.split('</think>')[-1]`` on historical turns.
|
|
39
|
+
|
|
40
|
+
Structured ``thinking``/``text`` parts are reconstructed inline first
|
|
41
|
+
so the same ``</think>`` split applies. The separate
|
|
42
|
+
``reasoning_content`` field is ignored — the R1 chat template never
|
|
43
|
+
reads it, and history reasoning is dropped regardless.
|
|
44
|
+
"""
|
|
45
|
+
content = msg.get("content") or ""
|
|
46
|
+
if isinstance(content, list):
|
|
47
|
+
parts: list[str] = []
|
|
48
|
+
for p in content:
|
|
49
|
+
if not isinstance(p, dict):
|
|
50
|
+
continue
|
|
51
|
+
if p.get("type") == "thinking":
|
|
52
|
+
parts.append(f"<think>{p.get('thinking', '')}</think>")
|
|
53
|
+
elif p.get("type") == "text":
|
|
54
|
+
parts.append(p.get("text", ""))
|
|
55
|
+
content = "".join(parts)
|
|
56
|
+
if "</think>" in content:
|
|
57
|
+
content = content.split("</think>")[-1]
|
|
58
|
+
return content
|
|
@@ -41,25 +41,30 @@ def _ds_token(name: str) -> str:
|
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
class DeepSeekV3Renderer:
|
|
44
|
-
"""Deterministic message → token renderer for DeepSeek
|
|
45
|
-
|
|
46
|
-
DeepSeek-V3
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
(
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
no-ops here too; stored for protocol uniformity.
|
|
44
|
+
"""Deterministic message → token renderer for DeepSeek-V3 models.
|
|
45
|
+
|
|
46
|
+
DeepSeek-V3 is non-reasoning: its chat template has no ``<think>``
|
|
47
|
+
concept — the generation prompt is a bare ``<|Assistant|>`` and past
|
|
48
|
+
assistant content is emitted verbatim. The reasoning variant
|
|
49
|
+
(``<think>``-prefilled prompt, history reasoning stripped) lives in
|
|
50
|
+
:class:`renderers.deepseek_r1.DeepSeekR1Renderer`, which subclasses
|
|
51
|
+
this one. ``preserve_*`` flags are no-ops here (no reasoning channel),
|
|
52
|
+
stored for protocol uniformity.
|
|
54
53
|
"""
|
|
55
54
|
|
|
55
|
+
#: Default typed config; the R1 subclass overrides this.
|
|
56
|
+
_config_cls: type = DeepSeekV3RendererConfig
|
|
57
|
+
#: Generation-prompt reasoning prefill. Empty for V3 (bare
|
|
58
|
+
#: ``<|Assistant|>``); the R1 subclass overrides to ``"<think>\n"``.
|
|
59
|
+
_GEN_THINK_PREFILL: str = ""
|
|
60
|
+
|
|
56
61
|
def __init__(
|
|
57
62
|
self,
|
|
58
63
|
tokenizer: PreTrainedTokenizer,
|
|
59
64
|
config: DeepSeekV3RendererConfig | None = None,
|
|
60
65
|
):
|
|
61
66
|
self._tokenizer = tokenizer
|
|
62
|
-
self.config = config or
|
|
67
|
+
self.config = config or type(self)._config_cls()
|
|
63
68
|
|
|
64
69
|
# ── BOS / EOS ────────────────────────────────────────────────
|
|
65
70
|
self._bos = self._get_special_token(f"begin{_US}of{_US}sentence")
|
|
@@ -239,8 +244,10 @@ class DeepSeekV3Renderer:
|
|
|
239
244
|
emit_special(
|
|
240
245
|
self._assistant_token, -1, is_sampled=False, is_content=False
|
|
241
246
|
)
|
|
242
|
-
if self.
|
|
243
|
-
emit_text(
|
|
247
|
+
if self._GEN_THINK_PREFILL:
|
|
248
|
+
emit_text(
|
|
249
|
+
self._GEN_THINK_PREFILL, -1, is_sampled=False, is_content=False
|
|
250
|
+
)
|
|
244
251
|
|
|
245
252
|
return RenderedTokens(
|
|
246
253
|
token_ids=tokens,
|
|
@@ -382,8 +389,8 @@ class DeepSeekV3Renderer:
|
|
|
382
389
|
last_role = new_messages[-1].get("role") if new_messages else None
|
|
383
390
|
if last_role != "tool":
|
|
384
391
|
emit_special(self._assistant_token, -1)
|
|
385
|
-
if self.
|
|
386
|
-
emit_text(
|
|
392
|
+
if self._GEN_THINK_PREFILL:
|
|
393
|
+
emit_text(self._GEN_THINK_PREFILL, -1)
|
|
387
394
|
|
|
388
395
|
total_len = len(previous_ids) + len(ext)
|
|
389
396
|
return RenderedTokens(
|
|
@@ -399,6 +406,23 @@ class DeepSeekV3Renderer:
|
|
|
399
406
|
# Assistant rendering
|
|
400
407
|
# ------------------------------------------------------------------
|
|
401
408
|
|
|
409
|
+
def _prepare_assistant_content(self, msg: Message) -> str:
|
|
410
|
+
"""Assistant content as the V3 template would emit it: verbatim.
|
|
411
|
+
|
|
412
|
+
V3 is non-reasoning — its template emits ``message['content']`` as-is
|
|
413
|
+
and never reads ``reasoning_content``. A structured content list is
|
|
414
|
+
flattened to its ``text`` parts. The R1 subclass overrides this to
|
|
415
|
+
strip ``</think>`` from history.
|
|
416
|
+
"""
|
|
417
|
+
content = msg.get("content") or ""
|
|
418
|
+
if isinstance(content, list):
|
|
419
|
+
content = "".join(
|
|
420
|
+
p.get("text", "")
|
|
421
|
+
for p in content
|
|
422
|
+
if isinstance(p, dict) and p.get("type") == "text"
|
|
423
|
+
)
|
|
424
|
+
return content
|
|
425
|
+
|
|
402
426
|
def _render_assistant(
|
|
403
427
|
self,
|
|
404
428
|
msg: Message,
|
|
@@ -414,24 +438,7 @@ class DeepSeekV3Renderer:
|
|
|
414
438
|
# without a new <|Assistant|> token in that case.
|
|
415
439
|
prev_is_tool = msg_idx > 0 and messages[msg_idx - 1]["role"] == "tool"
|
|
416
440
|
|
|
417
|
-
content =
|
|
418
|
-
# Support structured content (ThinkingPart / TextPart list).
|
|
419
|
-
if isinstance(content, list):
|
|
420
|
-
parts_text: list[str] = []
|
|
421
|
-
for p in content:
|
|
422
|
-
if not isinstance(p, dict):
|
|
423
|
-
continue
|
|
424
|
-
if p.get("type") == "thinking":
|
|
425
|
-
thinking = p.get("thinking", "")
|
|
426
|
-
parts_text.append(f"<think>{thinking}</think>")
|
|
427
|
-
elif p.get("type") == "text":
|
|
428
|
-
parts_text.append(p.get("text", ""))
|
|
429
|
-
content = "".join(parts_text)
|
|
430
|
-
# Also accept reasoning_content stored separately (OpenAI-style).
|
|
431
|
-
elif isinstance(msg.get("reasoning_content"), str) and msg["reasoning_content"]:
|
|
432
|
-
reasoning = msg["reasoning_content"]
|
|
433
|
-
content = f"<think>{reasoning}</think>{content}"
|
|
434
|
-
|
|
441
|
+
content = self._prepare_assistant_content(msg)
|
|
435
442
|
tool_calls = msg.get("tool_calls") or []
|
|
436
443
|
|
|
437
444
|
# ``<|Assistant|>`` is template-injected scaffolding — at
|