renderers 0.1.8.dev39__tar.gz → 0.1.8.dev40__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/PKG-INFO +1 -1
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/_version.py +2 -2
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/base.py +6 -1
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/configs.py +29 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/nemotron3.py +68 -9
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/conftest.py +2 -0
- renderers-0.1.8.dev40/tests/test_nemotron3_ultra.py +59 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_renderer_config_parity.py +3 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_roundtrip.py +3 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/.github/workflows/publish-dev.yml +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/.github/workflows/publish.yml +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/.github/workflows/style.yml +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/.github/workflows/test.yml +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/.gitignore +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/.pre-commit-config.yaml +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/LICENSE +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/README.md +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/docs/renderer-config.md +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/examples/README.md +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/examples/sglang/multiturn_generate_sglang.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/examples/sglang/online_multiturn_sglang.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/examples/tinker/multiturn_generate_tinker.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/examples/transformers/multiturn_generate_transformers.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/examples/vllm/multiturn_generate_vllm.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/pyproject.toml +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/__init__.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/client.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/deepseek_v3.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/default.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/glm45.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/glm5.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/gpt_oss.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/kimi_k2.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/kimi_k25.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/laguna_xs2.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/minimax_m2.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/parsers.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/parsing.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/qwen3.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/qwen35.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/qwen36.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/qwen3_vl.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_bridge.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_build_helpers.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_client.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_gpt_oss_harmony_parity.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_incremental.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_is_content.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_kimi_k25_tool_schema.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_load_tokenizer.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_load_tokenizer_fastokens.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_message_indices.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_message_tool_names.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_multimodal.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_parse_response.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_parse_response_robustness.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_parsers.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_preserve_thinking.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_qwen35_size_coverage.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_render_ids.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_renderer_config.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_sampled_mask.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_tokens_per_message.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_tool_arg_type_preservation.py +0 -0
- {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/uv.lock +0 -0
|
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
|
|
|
18
18
|
commit_id: str | None
|
|
19
19
|
__commit_id__: str | None
|
|
20
20
|
|
|
21
|
-
__version__ = version = '0.1.8.
|
|
22
|
-
__version_tuple__ = version_tuple = (0, 1, 8, '
|
|
21
|
+
__version__ = version = '0.1.8.dev40'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 1, 8, 'dev40')
|
|
23
23
|
|
|
24
24
|
__commit_id__ = commit_id = None
|
|
@@ -1037,9 +1037,14 @@ MODEL_RENDERER_MAP: dict[str, str] = {
|
|
|
1037
1037
|
"moonshotai/Kimi-K2-Instruct": "kimi-k2",
|
|
1038
1038
|
"moonshotai/Kimi-K2.5": "kimi-k2.5",
|
|
1039
1039
|
"moonshotai/Kimi-K2.6": "kimi-k2.5",
|
|
1040
|
-
# Nemotron 3.
|
|
1040
|
+
# Nemotron 3. Nano / Super share one chat-template variant; the Ultra
|
|
1041
|
+
# checkpoints use the Ultra variant — the renderer auto-selects it from
|
|
1042
|
+
# the model name (see ``nemotron3._ULTRA_DEFAULTS``). BF16 and FP8 share the
|
|
1043
|
+
# same tokenizer and template.
|
|
1041
1044
|
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "nemotron-3",
|
|
1042
1045
|
"nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16": "nemotron-3",
|
|
1046
|
+
"nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16": "nemotron-3",
|
|
1047
|
+
"nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-FP8": "nemotron-3",
|
|
1043
1048
|
# Poolside Laguna.
|
|
1044
1049
|
"poolside/Laguna-XS.2": "laguna-xs.2",
|
|
1045
1050
|
# GPT-OSS.
|
|
@@ -337,6 +337,26 @@ class Nemotron3RendererConfig(BaseRendererConfig):
|
|
|
337
337
|
"""When ``True``, the generation prompt includes ``<think>``. Mirrors
|
|
338
338
|
the chat template's ``enable_thinking`` kwarg."""
|
|
339
339
|
|
|
340
|
+
ultra: bool | None = None
|
|
341
|
+
"""Select the Nemotron-3 **Ultra** chat-template variant.
|
|
342
|
+
|
|
343
|
+
``None`` (default) auto-detects from the model name (see
|
|
344
|
+
``renderers.nemotron3._ULTRA_DEFAULTS``): the Ultra checkpoints resolve
|
|
345
|
+
to ``True``; Nano / Super and unknown checkpoints to ``False``. Set
|
|
346
|
+
explicitly to force a variant — e.g. an Ultra fine-tune or a
|
|
347
|
+
locally-pathed checkpoint whose ``name_or_path`` isn't in the table.
|
|
348
|
+
|
|
349
|
+
Ultra's template differs from Nano/Super: the reasoning block is glued
|
|
350
|
+
as ``<think>\\n{reasoning}</think>{content}`` (no ``\\n`` around
|
|
351
|
+
``</think>``), truncated historical turns collapse to
|
|
352
|
+
``<think></think>{content}`` (no ``\\n``), and the thinking-truncation
|
|
353
|
+
boundary follows the template's ``loop.index0 < last_user_idx`` rule
|
|
354
|
+
(drop thinking on every assistant turn before the last user message).
|
|
355
|
+
|
|
356
|
+
Not a chat-template kwarg — it picks which template the renderer
|
|
357
|
+
mirrors, not a variable passed into one — so it's listed in
|
|
358
|
+
``_internal_fields`` and excluded from ``template_field_names()``."""
|
|
359
|
+
|
|
340
360
|
truncate_history_thinking: bool = True
|
|
341
361
|
"""When ``False``, keep ``<think>{reasoning}</think>`` on past-cycle
|
|
342
362
|
assistant turns instead of dropping them. Mirrors the chat
|
|
@@ -344,6 +364,15 @@ class Nemotron3RendererConfig(BaseRendererConfig):
|
|
|
344
364
|
``preserve_all_thinking`` / ``preserve_thinking_between_tool_calls``
|
|
345
365
|
— see :class:`BaseRendererConfig` for the contract."""
|
|
346
366
|
|
|
367
|
+
# ``ultra`` is a template-variant SELECTOR — it picks which template the
|
|
368
|
+
# renderer mirrors (Ultra vs Nano/Super), not a variable passed into one;
|
|
369
|
+
# there is no ``ultra`` Jinja variable. Marked internal so the parity
|
|
370
|
+
# matrix doesn't cross it as a template field. Same ``_internal_fields``
|
|
371
|
+
# mechanism DeepSeek-V3 uses for its no-op ``enable_thinking``, for a
|
|
372
|
+
# different underlying reason (theirs is an ignored kwarg, this is a
|
|
373
|
+
# variant switch).
|
|
374
|
+
_internal_fields = frozenset({"ultra"})
|
|
375
|
+
|
|
347
376
|
|
|
348
377
|
class DeepSeekV3RendererConfig(BaseRendererConfig):
|
|
349
378
|
"""DeepSeek V3 renderer config.
|
|
@@ -75,6 +75,35 @@ def _render_extra_keys(obj: dict[str, Any], handled_keys: set[str]) -> list[str]
|
|
|
75
75
|
return lines
|
|
76
76
|
|
|
77
77
|
|
|
78
|
+
# Per-model ``ultra`` default, applied when the renderer config leaves it
|
|
79
|
+
# ``None``. The Nemotron-3 family ships two chat-template variants: Nano /
|
|
80
|
+
# Super share one; Ultra differs in the reasoning-block glue (no ``\n`` around
|
|
81
|
+
# ``</think>``) and the thinking-truncation boundary (drop thinking on every
|
|
82
|
+
# assistant turn before the last user message). BF16 and FP8 share the same
|
|
83
|
+
# tokenizer and template. Hard-coded keyed by
|
|
84
|
+
# ``tokenizer.name_or_path`` rather than probed from the live template — the
|
|
85
|
+
# same convention as Qwen3.5's ``_ENABLE_THINKING_DEFAULTS`` (avoids pulling
|
|
86
|
+
# ``apply_chat_template`` onto the construction hot path and keeps
|
|
87
|
+
# bring-your-own-tokenizer use working).
|
|
88
|
+
_ULTRA_DEFAULTS: dict[str, bool] = {
|
|
89
|
+
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": False,
|
|
90
|
+
"nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16": False,
|
|
91
|
+
"nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16": True,
|
|
92
|
+
"nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-FP8": True,
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _default_ultra(tokenizer) -> bool:
|
|
97
|
+
"""Hard-coded ``ultra`` default for ``tokenizer``'s model.
|
|
98
|
+
|
|
99
|
+
Falls back to ``False`` (the Nano / Super template, and the majority of
|
|
100
|
+
the family) for unknown / fine-tuned checkpoints whose ``name_or_path``
|
|
101
|
+
isn't in ``_ULTRA_DEFAULTS`` — pass an explicit ``ultra=True`` for an
|
|
102
|
+
Ultra fine-tune or a locally-pathed Ultra checkpoint.
|
|
103
|
+
"""
|
|
104
|
+
return _ULTRA_DEFAULTS.get(getattr(tokenizer, "name_or_path", ""), False)
|
|
105
|
+
|
|
106
|
+
|
|
78
107
|
class Nemotron3Renderer:
|
|
79
108
|
"""Deterministic message → token renderer for Nemotron 3 models."""
|
|
80
109
|
|
|
@@ -84,7 +113,14 @@ class Nemotron3Renderer:
|
|
|
84
113
|
config: Nemotron3RendererConfig | None = None,
|
|
85
114
|
):
|
|
86
115
|
self._tokenizer = tokenizer
|
|
87
|
-
|
|
116
|
+
cfg = config or Nemotron3RendererConfig()
|
|
117
|
+
# ``ultra=None`` defers to the model's known default (see
|
|
118
|
+
# ``_ULTRA_DEFAULTS``). Materialise here so downstream reads see a
|
|
119
|
+
# concrete bool; rebind the frozen config with the resolved value so
|
|
120
|
+
# introspection sees the same.
|
|
121
|
+
if cfg.ultra is None:
|
|
122
|
+
cfg = cfg.model_copy(update={"ultra": _default_ultra(tokenizer)})
|
|
123
|
+
self.config = cfg
|
|
88
124
|
|
|
89
125
|
# Look up special token IDs from the tokenizer (not hardcoded).
|
|
90
126
|
# <|endoftext|> is optional: Nemotron-3 Nano / Super tokenizers ship
|
|
@@ -335,6 +371,17 @@ class Nemotron3Renderer:
|
|
|
335
371
|
last_plain_assistant_idx = j
|
|
336
372
|
break
|
|
337
373
|
|
|
374
|
+
# Ultra truncates thinking on every assistant turn *before the last
|
|
375
|
+
# user message* (template rule ``loop.index0 < last_user_idx``),
|
|
376
|
+
# whereas Nano/Super preserve only the last plain assistant. Compute
|
|
377
|
+
# the last-user index over the normalized ``messages`` list (a leading
|
|
378
|
+
# system never holds a user, so the relative comparison is unaffected).
|
|
379
|
+
last_user_idx_norm = -1
|
|
380
|
+
for j in range(len(messages) - 1, -1, -1):
|
|
381
|
+
if messages[j].get("role") == "user":
|
|
382
|
+
last_user_idx_norm = j
|
|
383
|
+
break
|
|
384
|
+
|
|
338
385
|
# ── 2. Iterate messages ─────────────────────────────────────
|
|
339
386
|
for i, msg in enumerate(messages):
|
|
340
387
|
role = msg["role"]
|
|
@@ -360,7 +407,10 @@ class Nemotron3Renderer:
|
|
|
360
407
|
emit_text("\n", msg_orig_idx, is_sampled=False, is_content=False)
|
|
361
408
|
|
|
362
409
|
elif role == "assistant":
|
|
363
|
-
|
|
410
|
+
if self.config.ultra:
|
|
411
|
+
is_last_turn = i >= last_user_idx_norm
|
|
412
|
+
else:
|
|
413
|
+
is_last_turn = i >= last_plain_assistant_idx
|
|
364
414
|
preserve_thinking = msg_orig_idx >= 0 and should_preserve_past_thinking(
|
|
365
415
|
original_messages,
|
|
366
416
|
msg_orig_idx,
|
|
@@ -617,6 +667,7 @@ class Nemotron3Renderer:
|
|
|
617
667
|
content = after_think_end.lstrip("\n")
|
|
618
668
|
|
|
619
669
|
reasoning_content = reasoning_content.strip()
|
|
670
|
+
ultra = self.config.ultra
|
|
620
671
|
|
|
621
672
|
# ``<|im_start|>assistant\n`` is template-injected scaffolding —
|
|
622
673
|
# at inference the chat template emits these as the generation
|
|
@@ -645,28 +696,36 @@ class Nemotron3Renderer:
|
|
|
645
696
|
or not self.config.truncate_history_thinking
|
|
646
697
|
):
|
|
647
698
|
emit_special(self._think, msg_idx, is_sampled=True, is_content=True)
|
|
699
|
+
# Ultra: <think>\n{reasoning}</think>{content} (no \n around </think>).
|
|
700
|
+
# Nano/Super: <think>\n{reasoning}\n</think>\n{content}.
|
|
648
701
|
emit_text(
|
|
649
|
-
"\n" + reasoning_content
|
|
702
|
+
("\n" + reasoning_content)
|
|
703
|
+
if ultra
|
|
704
|
+
else ("\n" + reasoning_content + "\n"),
|
|
650
705
|
msg_idx,
|
|
651
706
|
is_sampled=True,
|
|
652
707
|
is_content=True,
|
|
653
708
|
)
|
|
654
709
|
emit_special(self._think_end, msg_idx, is_sampled=True, is_content=True)
|
|
655
|
-
# Single \n separator (not \n\n like Qwen3.5)
|
|
710
|
+
# Single \n separator (not \n\n like Qwen3.5); Ultra glues directly.
|
|
656
711
|
emit_text(
|
|
657
|
-
|
|
712
|
+
(content + content_suffix)
|
|
713
|
+
if ultra
|
|
714
|
+
else ("\n" + content + content_suffix),
|
|
658
715
|
msg_idx,
|
|
659
716
|
is_sampled=True,
|
|
660
717
|
is_content=True,
|
|
661
718
|
)
|
|
662
719
|
elif reasoning_content:
|
|
663
|
-
# Historical assistant whose reasoning got stripped
|
|
664
|
-
#
|
|
665
|
-
#
|
|
720
|
+
# Historical assistant whose reasoning got stripped. Nano/Super keep
|
|
721
|
+
# a single \n between the collapsed <think></think> and the content
|
|
722
|
+
# as a marker that reasoning existed; Ultra glues content directly.
|
|
666
723
|
emit_special(self._think, msg_idx, is_sampled=True, is_content=True)
|
|
667
724
|
emit_special(self._think_end, msg_idx, is_sampled=True, is_content=True)
|
|
668
725
|
emit_text(
|
|
669
|
-
|
|
726
|
+
(content + content_suffix)
|
|
727
|
+
if ultra
|
|
728
|
+
else ("\n" + content + content_suffix),
|
|
670
729
|
msg_idx,
|
|
671
730
|
is_sampled=True,
|
|
672
731
|
is_content=True,
|
|
@@ -33,6 +33,8 @@ RENDERER_MODELS = [
|
|
|
33
33
|
("moonshotai/Kimi-K2.6", "auto"),
|
|
34
34
|
("nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "auto"),
|
|
35
35
|
("nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "auto"),
|
|
36
|
+
# Ultra resolves the Ultra template variant via name (auto → ultra=True).
|
|
37
|
+
("nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16", "auto"),
|
|
36
38
|
("poolside/Laguna-XS.2", "auto"),
|
|
37
39
|
("openai/gpt-oss-20b", "gpt-oss"),
|
|
38
40
|
("Qwen/Qwen2.5-0.5B-Instruct", "default"),
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Offline wiring tests for the Nemotron-3 Ultra template variant.
|
|
2
|
+
|
|
3
|
+
Assert the name-based ``ultra`` auto-selection, the model→renderer mapping,
|
|
4
|
+
and the typed-config surface WITHOUT loading any tokenizer (no network). This
|
|
5
|
+
pins the wiring the parity matrix can't reach — in particular the FP8 entry,
|
|
6
|
+
which no test loads a tokenizer for — so it can't silently rot.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from types import SimpleNamespace
|
|
10
|
+
|
|
11
|
+
from renderers.base import MODEL_RENDERER_MAP
|
|
12
|
+
from renderers.configs import Nemotron3RendererConfig
|
|
13
|
+
from renderers.nemotron3 import _ULTRA_DEFAULTS, _default_ultra
|
|
14
|
+
|
|
15
|
+
_ULTRA_REPOS = [
|
|
16
|
+
"nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16",
|
|
17
|
+
"nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-FP8",
|
|
18
|
+
]
|
|
19
|
+
_NON_ULTRA_REPOS = [
|
|
20
|
+
"nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
|
|
21
|
+
"nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _fake_tok(name):
|
|
26
|
+
return SimpleNamespace(name_or_path=name)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_ultra_and_non_ultra_models_map_to_nemotron3():
|
|
30
|
+
for repo in _ULTRA_REPOS + _NON_ULTRA_REPOS:
|
|
31
|
+
assert MODEL_RENDERER_MAP.get(repo) == "nemotron-3", repo
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_default_ultra_resolves_by_name():
|
|
35
|
+
# Ultra checkpoints (incl. the gated FP8 repo) resolve True.
|
|
36
|
+
for repo in _ULTRA_REPOS:
|
|
37
|
+
assert _ULTRA_DEFAULTS[repo] is True
|
|
38
|
+
assert _default_ultra(_fake_tok(repo)) is True
|
|
39
|
+
# Nano / Super resolve False (the shared Nano/Super template).
|
|
40
|
+
for repo in _NON_ULTRA_REPOS:
|
|
41
|
+
assert _default_ultra(_fake_tok(repo)) is False
|
|
42
|
+
# Unknown / fine-tuned / local-path checkpoints fall back to False;
|
|
43
|
+
# those must pass an explicit ultra= if they need the Ultra template.
|
|
44
|
+
assert _default_ultra(_fake_tok("acme/my-nemotron-ultra-ft")) is False
|
|
45
|
+
assert _default_ultra(_fake_tok("/home/user/local-ckpt")) is False
|
|
46
|
+
assert _default_ultra(SimpleNamespace()) is False # no name_or_path attr
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_ultra_is_not_a_template_kwarg():
|
|
50
|
+
fields = Nemotron3RendererConfig.template_field_names()
|
|
51
|
+
assert "ultra" not in fields
|
|
52
|
+
assert fields == frozenset({"enable_thinking", "truncate_history_thinking"})
|
|
53
|
+
assert "ultra" in Nemotron3RendererConfig._internal_fields
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_ultra_config_default_is_none_and_overridable():
|
|
57
|
+
assert Nemotron3RendererConfig().ultra is None # None => auto-detect by name
|
|
58
|
+
assert Nemotron3RendererConfig(ultra=True).ultra is True
|
|
59
|
+
assert Nemotron3RendererConfig(ultra=False).ultra is False
|
|
@@ -55,6 +55,9 @@ _RENDERER_MODELS = [
|
|
|
55
55
|
("moonshotai/Kimi-K2.6", "auto"),
|
|
56
56
|
("deepseek-ai/DeepSeek-V3", "auto"),
|
|
57
57
|
("nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "auto"),
|
|
58
|
+
# Ultra: auto-resolves to the Ultra template variant (ultra=True) via the
|
|
59
|
+
# model name; parity asserted against the Ultra apply_chat_template.
|
|
60
|
+
("nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16", "auto"),
|
|
58
61
|
("poolside/Laguna-XS.2", "auto"),
|
|
59
62
|
("openai/gpt-oss-20b", "gpt-oss"),
|
|
60
63
|
]
|
|
@@ -43,6 +43,9 @@ _ROUNDTRIP_MODELS = [
|
|
|
43
43
|
("moonshotai/Kimi-K2.6", "auto"),
|
|
44
44
|
("nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "auto"),
|
|
45
45
|
("nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "auto"),
|
|
46
|
+
# Ultra: parse must recover content after a </think> glued directly to it
|
|
47
|
+
# (no separating newline) — the Ultra-specific glue stresses the round-trip.
|
|
48
|
+
("nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16", "auto"),
|
|
46
49
|
("poolside/Laguna-XS.2", "auto"),
|
|
47
50
|
("openai/gpt-oss-20b", "gpt-oss"),
|
|
48
51
|
("Qwen/Qwen2.5-0.5B-Instruct", "default"),
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/examples/sglang/multiturn_generate_sglang.py
RENAMED
|
File without changes
|
|
File without changes
|
{renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/examples/tinker/multiturn_generate_tinker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|