renderers 0.1.8.dev43__tar.gz → 0.1.8.dev45__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/PKG-INFO +2 -2
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/README.md +1 -1
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/__init__.py +4 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/_version.py +2 -2
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/base.py +45 -17
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/configs.py +42 -29
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/nemotron3.py +201 -196
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/conftest.py +2 -1
- renderers-0.1.8.dev45/tests/test_nemotron3_parity.py +676 -0
- renderers-0.1.8.dev45/tests/test_nemotron3_ultra.py +104 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_renderer_config_parity.py +12 -2
- renderers-0.1.8.dev43/tests/test_nemotron3_ultra.py +0 -59
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/.github/workflows/publish-dev.yml +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/.github/workflows/publish.yml +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/.github/workflows/style.yml +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/.github/workflows/test.yml +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/.gitignore +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/.pre-commit-config.yaml +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/LICENSE +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/docs/renderer-config.md +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/examples/README.md +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/examples/sglang/multiturn_generate_sglang.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/examples/sglang/online_multiturn_sglang.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/examples/tinker/multiturn_generate_tinker.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/examples/transformers/multiturn_generate_transformers.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/examples/vllm/multiturn_generate_vllm.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/pyproject.toml +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/client.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/deepseek_r1.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/deepseek_v3.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/default.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/glm45.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/glm5.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/gpt_oss.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/kimi_k2.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/kimi_k25.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/laguna_xs2.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/llama_3.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/minimax_m2.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/parsers.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/parsing.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/qwen3.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/qwen35.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/qwen36.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/renderers/qwen3_vl.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_bridge.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_build_helpers.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_client.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_deepseek_r1.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_gpt_oss_harmony_parity.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_incremental.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_is_content.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_kimi_k25_tool_schema.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_llama_3.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_load_tokenizer.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_load_tokenizer_fastokens.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_message_indices.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_message_tool_names.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_multimodal.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_parse_response.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_parse_response_robustness.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_parsers.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_preserve_thinking.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_qwen35_size_coverage.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_render_ids.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_renderer_config.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_roundtrip.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_sampled_mask.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_tokens_per_message.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/tests/test_tool_arg_type_preservation.py +0 -0
- {renderers-0.1.8.dev43 → renderers-0.1.8.dev45}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: renderers
|
|
3
|
-
Version: 0.1.8.
|
|
3
|
+
Version: 0.1.8.dev45
|
|
4
4
|
Summary: Chat template renderers — deterministic message-to-token conversion for LLM training
|
|
5
5
|
License-Expression: Apache-2.0
|
|
6
6
|
License-File: LICENSE
|
|
@@ -57,7 +57,7 @@ next_prompt_ids = r.bridge_to_next_turn(
|
|
|
57
57
|
)
|
|
58
58
|
```
|
|
59
59
|
|
|
60
|
-
Hand-coded renderers ship for `qwen3`, `qwen3-vl`, `qwen3.5`, `qwen3.6`, `glm-5`, `glm-5.1`, `glm-4.5`, `minimax-m2`, `deepseek-v3`, `kimi-k2`, `kimi-k2.5`, `nemotron-3`, `gpt-oss`. Anything else falls back to `DefaultRenderer`, a generic `apply_chat_template` wrapper.
|
|
60
|
+
Hand-coded renderers ship for `qwen3`, `qwen3-vl`, `qwen3.5`, `qwen3.6`, `glm-5`, `glm-5.1`, `glm-4.5`, `minimax-m2`, `deepseek-v3`, `kimi-k2`, `kimi-k2.5`, `nemotron-3`, `nemotron-3-ultra`, `gpt-oss`. Anything else falls back to `DefaultRenderer`, a generic `apply_chat_template` wrapper.
|
|
61
61
|
|
|
62
62
|
## API
|
|
63
63
|
|
|
@@ -40,7 +40,7 @@ next_prompt_ids = r.bridge_to_next_turn(
|
|
|
40
40
|
)
|
|
41
41
|
```
|
|
42
42
|
|
|
43
|
-
Hand-coded renderers ship for `qwen3`, `qwen3-vl`, `qwen3.5`, `qwen3.6`, `glm-5`, `glm-5.1`, `glm-4.5`, `minimax-m2`, `deepseek-v3`, `kimi-k2`, `kimi-k2.5`, `nemotron-3`, `gpt-oss`. Anything else falls back to `DefaultRenderer`, a generic `apply_chat_template` wrapper.
|
|
43
|
+
Hand-coded renderers ship for `qwen3`, `qwen3-vl`, `qwen3.5`, `qwen3.6`, `glm-5`, `glm-5.1`, `glm-4.5`, `minimax-m2`, `deepseek-v3`, `kimi-k2`, `kimi-k2.5`, `nemotron-3`, `nemotron-3-ultra`, `gpt-oss`. Anything else falls back to `DefaultRenderer`, a generic `apply_chat_template` wrapper.
|
|
44
44
|
|
|
45
45
|
## API
|
|
46
46
|
|
|
@@ -56,6 +56,7 @@ from renderers.configs import (
|
|
|
56
56
|
Llama3RendererConfig,
|
|
57
57
|
MiniMaxM2RendererConfig,
|
|
58
58
|
Nemotron3RendererConfig,
|
|
59
|
+
Nemotron3UltraRendererConfig,
|
|
59
60
|
Qwen35RendererConfig,
|
|
60
61
|
Qwen36RendererConfig,
|
|
61
62
|
Qwen3RendererConfig,
|
|
@@ -88,6 +89,7 @@ _LAZY_RENDERERS: dict[str, str] = {
|
|
|
88
89
|
"Llama3Renderer": "renderers.llama_3",
|
|
89
90
|
"MiniMaxM2Renderer": "renderers.minimax_m2",
|
|
90
91
|
"Nemotron3Renderer": "renderers.nemotron3",
|
|
92
|
+
"Nemotron3UltraRenderer": "renderers.nemotron3",
|
|
91
93
|
"Qwen35Renderer": "renderers.qwen35",
|
|
92
94
|
"Qwen36Renderer": "renderers.qwen36",
|
|
93
95
|
"Qwen3Renderer": "renderers.qwen3",
|
|
@@ -146,6 +148,8 @@ __all__ = [
|
|
|
146
148
|
"MultimodalRenderer",
|
|
147
149
|
"Nemotron3Renderer",
|
|
148
150
|
"Nemotron3RendererConfig",
|
|
151
|
+
"Nemotron3UltraRenderer",
|
|
152
|
+
"Nemotron3UltraRendererConfig",
|
|
149
153
|
"OverlongPromptError",
|
|
150
154
|
"ParsedResponse",
|
|
151
155
|
"ParsedToolCall",
|
|
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
|
|
|
18
18
|
commit_id: str | None
|
|
19
19
|
__commit_id__: str | None
|
|
20
20
|
|
|
21
|
-
__version__ = version = '0.1.8.
|
|
22
|
-
__version_tuple__ = version_tuple = (0, 1, 8, '
|
|
21
|
+
__version__ = version = '0.1.8.dev45'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 1, 8, 'dev45')
|
|
23
23
|
|
|
24
24
|
__commit_id__ = commit_id = None
|
|
@@ -1040,14 +1040,15 @@ MODEL_RENDERER_MAP: dict[str, str] = {
|
|
|
1040
1040
|
"moonshotai/Kimi-K2-Instruct": "kimi-k2",
|
|
1041
1041
|
"moonshotai/Kimi-K2.5": "kimi-k2.5",
|
|
1042
1042
|
"moonshotai/Kimi-K2.6": "kimi-k2.5",
|
|
1043
|
-
# Nemotron 3. Nano / Super share one chat-template variant;
|
|
1044
|
-
# checkpoints use the Ultra variant
|
|
1045
|
-
#
|
|
1043
|
+
# Nemotron 3. Nano / Super share one chat-template variant (``nemotron-3``);
|
|
1044
|
+
# the Ultra checkpoints use the Ultra variant (``nemotron-3-ultra``, distinct
|
|
1045
|
+
# ``</think>`` glue). Both route to the same Nemotron3Renderer, which selects
|
|
1046
|
+
# the variant from the resolved config's ``name``. BF16 and FP8 share the
|
|
1046
1047
|
# same tokenizer and template.
|
|
1047
1048
|
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "nemotron-3",
|
|
1048
1049
|
"nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16": "nemotron-3",
|
|
1049
|
-
"nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16": "nemotron-3",
|
|
1050
|
-
"nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-FP8": "nemotron-3",
|
|
1050
|
+
"nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16": "nemotron-3-ultra",
|
|
1051
|
+
"nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-FP8": "nemotron-3-ultra",
|
|
1051
1052
|
# Llama 3.2 (Instruct). Tested against the gated meta-llama repos and
|
|
1052
1053
|
# the unrestricted unsloth/... mirror, which ships a byte-identical
|
|
1053
1054
|
# chat template. ``Llama3Renderer`` defaults ``date_string`` to
|
|
@@ -1350,7 +1351,7 @@ def _populate_registry():
|
|
|
1350
1351
|
from renderers.laguna_xs2 import LagunaXS2Renderer
|
|
1351
1352
|
from renderers.llama_3 import Llama3Renderer
|
|
1352
1353
|
from renderers.minimax_m2 import MiniMaxM2Renderer
|
|
1353
|
-
from renderers.nemotron3 import Nemotron3Renderer
|
|
1354
|
+
from renderers.nemotron3 import Nemotron3Renderer, Nemotron3UltraRenderer
|
|
1354
1355
|
from renderers.qwen3 import Qwen3Renderer
|
|
1355
1356
|
from renderers.qwen3_vl import Qwen3VLRenderer
|
|
1356
1357
|
from renderers.qwen35 import Qwen35Renderer
|
|
@@ -1374,6 +1375,7 @@ def _populate_registry():
|
|
|
1374
1375
|
"laguna-xs.2": LagunaXS2Renderer,
|
|
1375
1376
|
"llama-3": Llama3Renderer,
|
|
1376
1377
|
"nemotron-3": Nemotron3Renderer,
|
|
1378
|
+
"nemotron-3-ultra": Nemotron3UltraRenderer,
|
|
1377
1379
|
"gpt-oss": GptOssRenderer,
|
|
1378
1380
|
}
|
|
1379
1381
|
)
|
|
@@ -1706,19 +1708,45 @@ def _get_offset_tokenizer(tokenizer):
|
|
|
1706
1708
|
kwargs = {"trust_remote_code": True, "revision": revision}
|
|
1707
1709
|
else:
|
|
1708
1710
|
kwargs = {"trust_remote_code": False}
|
|
1709
|
-
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
|
|
1714
|
-
|
|
1715
|
-
|
|
1711
|
+
|
|
1712
|
+
def _has_offsets(tok) -> bool:
|
|
1713
|
+
if not getattr(tok, "is_fast", False):
|
|
1714
|
+
return False
|
|
1715
|
+
try:
|
|
1716
|
+
tok("a", add_special_tokens=False, return_offsets_mapping=True)
|
|
1717
|
+
return True
|
|
1718
|
+
except (NotImplementedError, ValueError, TypeError):
|
|
1719
|
+
return False
|
|
1720
|
+
|
|
1721
|
+
# We want HF's Rust tokenizer with offset tracking, not the fastokens
|
|
1722
|
+
# shim. The shim is installed by a *process-global* monkeypatch that
|
|
1723
|
+
# ``load_tokenizer`` toggles per pool-slot load, so a plain reload here
|
|
1724
|
+
# can race a concurrent slot's open patch window and silently pick up
|
|
1725
|
+
# the offset-less shim (then get cached, poisoning the process). So:
|
|
1726
|
+
# load, verify offsets, and if missing, reload with the patch forced
|
|
1727
|
+
# off — serialized against pool patch/unpatch via ``_FASTOKENS_PATCH_LOCK``
|
|
1728
|
+
# so no concurrent window can swap the shim back in mid-load — then
|
|
1729
|
+
# restore the prior patch state. Never cache a non-offset tokenizer.
|
|
1716
1730
|
offset_tok = _load_tokenizer_via_auto(name_or_path, **kwargs)
|
|
1717
|
-
if not
|
|
1731
|
+
if not _has_offsets(offset_tok):
|
|
1732
|
+
import fastokens
|
|
1733
|
+
|
|
1734
|
+
with _FASTOKENS_PATCH_LOCK:
|
|
1735
|
+
was_patched = bool(getattr(fastokens, "_patched", False))
|
|
1736
|
+
if was_patched:
|
|
1737
|
+
with contextlib.redirect_stdout(io.StringIO()):
|
|
1738
|
+
fastokens.unpatch_transformers()
|
|
1739
|
+
try:
|
|
1740
|
+
offset_tok = _load_tokenizer_via_auto(name_or_path, **kwargs)
|
|
1741
|
+
finally:
|
|
1742
|
+
if was_patched:
|
|
1743
|
+
with contextlib.redirect_stdout(io.StringIO()):
|
|
1744
|
+
fastokens.patch_transformers()
|
|
1745
|
+
if not _has_offsets(offset_tok):
|
|
1718
1746
|
raise RuntimeError(
|
|
1719
|
-
f"
|
|
1720
|
-
"
|
|
1721
|
-
"renderers require a fast tokenizer for body/scaffold "
|
|
1747
|
+
f"Could not load an offset-capable tokenizer for {name_or_path!r}: "
|
|
1748
|
+
"offset_mapping is unavailable even with the fastokens patch off. "
|
|
1749
|
+
"Hand-coded renderers require a fast tokenizer for body/scaffold "
|
|
1722
1750
|
"attribution."
|
|
1723
1751
|
)
|
|
1724
1752
|
_offset_tokenizers[name_or_path] = offset_tok
|
|
@@ -354,7 +354,14 @@ class MiniMaxM2RendererConfig(BaseRendererConfig):
|
|
|
354
354
|
|
|
355
355
|
|
|
356
356
|
class Nemotron3RendererConfig(BaseRendererConfig):
|
|
357
|
-
"""Nemotron
|
|
357
|
+
"""Nemotron-3 **Nano / Super** renderer config.
|
|
358
|
+
|
|
359
|
+
Nano and Super share one chat-template variant; the renderer routes both
|
|
360
|
+
through :class:`renderers.nemotron3.Nemotron3Renderer`. The Ultra variant
|
|
361
|
+
has its own template (different reasoning-block glue) and config —
|
|
362
|
+
:class:`Nemotron3UltraRendererConfig` — and is reached via the
|
|
363
|
+
``nemotron-3-ultra`` discriminator.
|
|
364
|
+
"""
|
|
358
365
|
|
|
359
366
|
name: Literal["nemotron-3"] = "nemotron-3"
|
|
360
367
|
|
|
@@ -362,26 +369,6 @@ class Nemotron3RendererConfig(BaseRendererConfig):
|
|
|
362
369
|
"""When ``True``, the generation prompt includes ``<think>``. Mirrors
|
|
363
370
|
the chat template's ``enable_thinking`` kwarg."""
|
|
364
371
|
|
|
365
|
-
ultra: bool | None = None
|
|
366
|
-
"""Select the Nemotron-3 **Ultra** chat-template variant.
|
|
367
|
-
|
|
368
|
-
``None`` (default) auto-detects from the model name (see
|
|
369
|
-
``renderers.nemotron3._ULTRA_DEFAULTS``): the Ultra checkpoints resolve
|
|
370
|
-
to ``True``; Nano / Super and unknown checkpoints to ``False``. Set
|
|
371
|
-
explicitly to force a variant — e.g. an Ultra fine-tune or a
|
|
372
|
-
locally-pathed checkpoint whose ``name_or_path`` isn't in the table.
|
|
373
|
-
|
|
374
|
-
Ultra's template differs from Nano/Super: the reasoning block is glued
|
|
375
|
-
as ``<think>\\n{reasoning}</think>{content}`` (no ``\\n`` around
|
|
376
|
-
``</think>``), truncated historical turns collapse to
|
|
377
|
-
``<think></think>{content}`` (no ``\\n``), and the thinking-truncation
|
|
378
|
-
boundary follows the template's ``loop.index0 < last_user_idx`` rule
|
|
379
|
-
(drop thinking on every assistant turn before the last user message).
|
|
380
|
-
|
|
381
|
-
Not a chat-template kwarg — it picks which template the renderer
|
|
382
|
-
mirrors, not a variable passed into one — so it's listed in
|
|
383
|
-
``_internal_fields`` and excluded from ``template_field_names()``."""
|
|
384
|
-
|
|
385
372
|
truncate_history_thinking: bool = True
|
|
386
373
|
"""When ``False``, keep ``<think>{reasoning}</think>`` on past-cycle
|
|
387
374
|
assistant turns instead of dropping them. Mirrors the chat
|
|
@@ -389,14 +376,37 @@ class Nemotron3RendererConfig(BaseRendererConfig):
|
|
|
389
376
|
``preserve_all_thinking`` / ``preserve_thinking_between_tool_calls``
|
|
390
377
|
— see :class:`BaseRendererConfig` for the contract."""
|
|
391
378
|
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
379
|
+
low_effort: bool = False
|
|
380
|
+
"""When ``True``, append ``\\n\\n{reasoning effort: low}`` to the last user
|
|
381
|
+
message, nudging the model toward shorter reasoning. Mirrors the **Super**
|
|
382
|
+
chat template's ``low_effort`` kwarg. A no-op on **Nano** (its template
|
|
383
|
+
doesn't define it) — exactly as ``apply_chat_template`` ignores an undefined
|
|
384
|
+
template variable; the renderer distinguishes the two by model name (see
|
|
385
|
+
``renderers.nemotron3._is_super``)."""
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
class Nemotron3UltraRendererConfig(BaseRendererConfig):
|
|
389
|
+
"""Nemotron-3 **Ultra** renderer config — distinct discriminator so the
|
|
390
|
+
registry routes Ultra checkpoints to the Ultra template variant.
|
|
391
|
+
|
|
392
|
+
Ultra's template differs from Nano/Super: the reasoning block is glued as
|
|
393
|
+
``<think>\\n{reasoning}</think>{content}`` (no ``\\n`` around ``</think>``)
|
|
394
|
+
and truncated historical turns collapse to ``<think></think>{content}``
|
|
395
|
+
(no ``\\n``). It shares the :class:`renderers.nemotron3.Nemotron3Renderer`
|
|
396
|
+
implementation, which selects the variant from ``config.name``.
|
|
397
|
+
"""
|
|
398
|
+
|
|
399
|
+
name: Literal["nemotron-3-ultra"] = "nemotron-3-ultra"
|
|
400
|
+
|
|
401
|
+
enable_thinking: bool = True
|
|
402
|
+
"""See :class:`Nemotron3RendererConfig.enable_thinking`."""
|
|
403
|
+
|
|
404
|
+
truncate_history_thinking: bool = True
|
|
405
|
+
"""See :class:`Nemotron3RendererConfig.truncate_history_thinking`."""
|
|
406
|
+
|
|
407
|
+
medium_effort: bool = False
|
|
408
|
+
"""When ``True``, append ``\\n\\n{reasoning effort: efficient}`` to the last
|
|
409
|
+
user message. Mirrors the Ultra chat template's ``medium_effort`` kwarg."""
|
|
400
410
|
|
|
401
411
|
|
|
402
412
|
class DeepSeekV3RendererConfig(BaseRendererConfig):
|
|
@@ -444,6 +454,7 @@ RendererConfig = Annotated[
|
|
|
444
454
|
Llama3RendererConfig,
|
|
445
455
|
MiniMaxM2RendererConfig,
|
|
446
456
|
Nemotron3RendererConfig,
|
|
457
|
+
Nemotron3UltraRendererConfig,
|
|
447
458
|
DeepSeekV3RendererConfig,
|
|
448
459
|
DeepSeekR1RendererConfig,
|
|
449
460
|
],
|
|
@@ -480,6 +491,7 @@ _CONFIG_BY_NAME: dict[str, type[BaseRendererConfig]] = {
|
|
|
480
491
|
"llama-3": Llama3RendererConfig,
|
|
481
492
|
"minimax-m2": MiniMaxM2RendererConfig,
|
|
482
493
|
"nemotron-3": Nemotron3RendererConfig,
|
|
494
|
+
"nemotron-3-ultra": Nemotron3UltraRendererConfig,
|
|
483
495
|
"deepseek-v3": DeepSeekV3RendererConfig,
|
|
484
496
|
"deepseek-r1": DeepSeekR1RendererConfig,
|
|
485
497
|
}
|
|
@@ -525,6 +537,7 @@ __all__ = [
|
|
|
525
537
|
"Llama3RendererConfig",
|
|
526
538
|
"MiniMaxM2RendererConfig",
|
|
527
539
|
"Nemotron3RendererConfig",
|
|
540
|
+
"Nemotron3UltraRendererConfig",
|
|
528
541
|
"Qwen35RendererConfig",
|
|
529
542
|
"Qwen36RendererConfig",
|
|
530
543
|
"Qwen3RendererConfig",
|