renderers 0.1.8.dev39__tar.gz → 0.1.8.dev40__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/PKG-INFO +1 -1
  2. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/_version.py +2 -2
  3. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/base.py +6 -1
  4. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/configs.py +29 -0
  5. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/nemotron3.py +68 -9
  6. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/conftest.py +2 -0
  7. renderers-0.1.8.dev40/tests/test_nemotron3_ultra.py +59 -0
  8. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_renderer_config_parity.py +3 -0
  9. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_roundtrip.py +3 -0
  10. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/.github/workflows/publish-dev.yml +0 -0
  11. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/.github/workflows/publish.yml +0 -0
  12. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/.github/workflows/style.yml +0 -0
  13. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/.github/workflows/test.yml +0 -0
  14. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/.gitignore +0 -0
  15. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/.pre-commit-config.yaml +0 -0
  16. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/LICENSE +0 -0
  17. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/README.md +0 -0
  18. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/docs/renderer-config.md +0 -0
  19. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/examples/README.md +0 -0
  20. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/examples/sglang/multiturn_generate_sglang.py +0 -0
  21. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/examples/sglang/online_multiturn_sglang.py +0 -0
  22. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/examples/tinker/multiturn_generate_tinker.py +0 -0
  23. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/examples/transformers/multiturn_generate_transformers.py +0 -0
  24. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/examples/vllm/multiturn_generate_vllm.py +0 -0
  25. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/pyproject.toml +0 -0
  26. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/__init__.py +0 -0
  27. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/client.py +0 -0
  28. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/deepseek_v3.py +0 -0
  29. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/default.py +0 -0
  30. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/glm45.py +0 -0
  31. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/glm5.py +0 -0
  32. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/gpt_oss.py +0 -0
  33. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/kimi_k2.py +0 -0
  34. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/kimi_k25.py +0 -0
  35. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/laguna_xs2.py +0 -0
  36. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/minimax_m2.py +0 -0
  37. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/parsers.py +0 -0
  38. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/parsing.py +0 -0
  39. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/qwen3.py +0 -0
  40. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/qwen35.py +0 -0
  41. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/qwen36.py +0 -0
  42. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/renderers/qwen3_vl.py +0 -0
  43. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_bridge.py +0 -0
  44. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_build_helpers.py +0 -0
  45. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_client.py +0 -0
  46. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_gpt_oss_harmony_parity.py +0 -0
  47. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_incremental.py +0 -0
  48. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_is_content.py +0 -0
  49. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_kimi_k25_tool_schema.py +0 -0
  50. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_load_tokenizer.py +0 -0
  51. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_load_tokenizer_fastokens.py +0 -0
  52. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_message_indices.py +0 -0
  53. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_message_tool_names.py +0 -0
  54. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_multimodal.py +0 -0
  55. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_parse_response.py +0 -0
  56. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_parse_response_robustness.py +0 -0
  57. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_parsers.py +0 -0
  58. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_preserve_thinking.py +0 -0
  59. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_qwen35_size_coverage.py +0 -0
  60. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_render_ids.py +0 -0
  61. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_renderer_config.py +0 -0
  62. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_sampled_mask.py +0 -0
  63. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_tokens_per_message.py +0 -0
  64. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/tests/test_tool_arg_type_preservation.py +0 -0
  65. {renderers-0.1.8.dev39 → renderers-0.1.8.dev40}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: renderers
3
- Version: 0.1.8.dev39
3
+ Version: 0.1.8.dev40
4
4
  Summary: Chat template renderers — deterministic message-to-token conversion for LLM training
5
5
  License-Expression: Apache-2.0
6
6
  License-File: LICENSE
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
18
18
  commit_id: str | None
19
19
  __commit_id__: str | None
20
20
 
21
- __version__ = version = '0.1.8.dev39'
22
- __version_tuple__ = version_tuple = (0, 1, 8, 'dev39')
21
+ __version__ = version = '0.1.8.dev40'
22
+ __version_tuple__ = version_tuple = (0, 1, 8, 'dev40')
23
23
 
24
24
  __commit_id__ = commit_id = None
@@ -1037,9 +1037,14 @@ MODEL_RENDERER_MAP: dict[str, str] = {
1037
1037
  "moonshotai/Kimi-K2-Instruct": "kimi-k2",
1038
1038
  "moonshotai/Kimi-K2.5": "kimi-k2.5",
1039
1039
  "moonshotai/Kimi-K2.6": "kimi-k2.5",
1040
- # Nemotron 3.
1040
+ # Nemotron 3. Nano / Super share one chat-template variant; the Ultra
1041
+ # checkpoints use the Ultra variant — the renderer auto-selects it from
1042
+ # the model name (see ``nemotron3._ULTRA_DEFAULTS``). BF16 and FP8 share the
1043
+ # same tokenizer and template.
1041
1044
  "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "nemotron-3",
1042
1045
  "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16": "nemotron-3",
1046
+ "nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16": "nemotron-3",
1047
+ "nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-FP8": "nemotron-3",
1043
1048
  # Poolside Laguna.
1044
1049
  "poolside/Laguna-XS.2": "laguna-xs.2",
1045
1050
  # GPT-OSS.
@@ -337,6 +337,26 @@ class Nemotron3RendererConfig(BaseRendererConfig):
337
337
  """When ``True``, the generation prompt includes ``<think>``. Mirrors
338
338
  the chat template's ``enable_thinking`` kwarg."""
339
339
 
340
+ ultra: bool | None = None
341
+ """Select the Nemotron-3 **Ultra** chat-template variant.
342
+
343
+ ``None`` (default) auto-detects from the model name (see
344
+ ``renderers.nemotron3._ULTRA_DEFAULTS``): the Ultra checkpoints resolve
345
+ to ``True``; Nano / Super and unknown checkpoints to ``False``. Set
346
+ explicitly to force a variant — e.g. an Ultra fine-tune or a
347
+ locally-pathed checkpoint whose ``name_or_path`` isn't in the table.
348
+
349
+ Ultra's template differs from Nano/Super: the reasoning block is glued
350
+ as ``<think>\\n{reasoning}</think>{content}`` (no ``\\n`` around
351
+ ``</think>``), truncated historical turns collapse to
352
+ ``<think></think>{content}`` (no ``\\n``), and the thinking-truncation
353
+ boundary follows the template's ``loop.index0 < last_user_idx`` rule
354
+ (drop thinking on every assistant turn before the last user message).
355
+
356
+ Not a chat-template kwarg — it picks which template the renderer
357
+ mirrors, not a variable passed into one — so it's listed in
358
+ ``_internal_fields`` and excluded from ``template_field_names()``."""
359
+
340
360
  truncate_history_thinking: bool = True
341
361
  """When ``False``, keep ``<think>{reasoning}</think>`` on past-cycle
342
362
  assistant turns instead of dropping them. Mirrors the chat
@@ -344,6 +364,15 @@ class Nemotron3RendererConfig(BaseRendererConfig):
344
364
  ``preserve_all_thinking`` / ``preserve_thinking_between_tool_calls``
345
365
  — see :class:`BaseRendererConfig` for the contract."""
346
366
 
367
+ # ``ultra`` is a template-variant SELECTOR — it picks which template the
368
+ # renderer mirrors (Ultra vs Nano/Super), not a variable passed into one;
369
+ # there is no ``ultra`` Jinja variable. Marked internal so the parity
370
+ # matrix doesn't cross it as a template field. Same ``_internal_fields``
371
+ # mechanism DeepSeek-V3 uses for its no-op ``enable_thinking``, for a
372
+ # different underlying reason (theirs is an ignored kwarg, this is a
373
+ # variant switch).
374
+ _internal_fields = frozenset({"ultra"})
375
+
347
376
 
348
377
  class DeepSeekV3RendererConfig(BaseRendererConfig):
349
378
  """DeepSeek V3 renderer config.
@@ -75,6 +75,35 @@ def _render_extra_keys(obj: dict[str, Any], handled_keys: set[str]) -> list[str]
75
75
  return lines
76
76
 
77
77
 
78
+ # Per-model ``ultra`` default, applied when the renderer config leaves it
79
+ # ``None``. The Nemotron-3 family ships two chat-template variants: Nano /
80
+ # Super share one; Ultra differs in the reasoning-block glue (no ``\n`` around
81
+ # ``</think>``) and the thinking-truncation boundary (drop thinking on every
82
+ # assistant turn before the last user message). BF16 and FP8 share the same
83
+ # tokenizer and template. Hard-coded keyed by
84
+ # ``tokenizer.name_or_path`` rather than probed from the live template — the
85
+ # same convention as Qwen3.5's ``_ENABLE_THINKING_DEFAULTS`` (avoids pulling
86
+ # ``apply_chat_template`` onto the construction hot path and keeps
87
+ # bring-your-own-tokenizer use working).
88
+ _ULTRA_DEFAULTS: dict[str, bool] = {
89
+ "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": False,
90
+ "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16": False,
91
+ "nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16": True,
92
+ "nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-FP8": True,
93
+ }
94
+
95
+
96
+ def _default_ultra(tokenizer) -> bool:
97
+ """Hard-coded ``ultra`` default for ``tokenizer``'s model.
98
+
99
+ Falls back to ``False`` (the Nano / Super template, and the majority of
100
+ the family) for unknown / fine-tuned checkpoints whose ``name_or_path``
101
+ isn't in ``_ULTRA_DEFAULTS`` — pass an explicit ``ultra=True`` for an
102
+ Ultra fine-tune or a locally-pathed Ultra checkpoint.
103
+ """
104
+ return _ULTRA_DEFAULTS.get(getattr(tokenizer, "name_or_path", ""), False)
105
+
106
+
78
107
  class Nemotron3Renderer:
79
108
  """Deterministic message → token renderer for Nemotron 3 models."""
80
109
 
@@ -84,7 +113,14 @@ class Nemotron3Renderer:
84
113
  config: Nemotron3RendererConfig | None = None,
85
114
  ):
86
115
  self._tokenizer = tokenizer
87
- self.config = config or Nemotron3RendererConfig()
116
+ cfg = config or Nemotron3RendererConfig()
117
+ # ``ultra=None`` defers to the model's known default (see
118
+ # ``_ULTRA_DEFAULTS``). Materialise here so downstream reads see a
119
+ # concrete bool; rebind the frozen config with the resolved value so
120
+ # introspection sees the same.
121
+ if cfg.ultra is None:
122
+ cfg = cfg.model_copy(update={"ultra": _default_ultra(tokenizer)})
123
+ self.config = cfg
88
124
 
89
125
  # Look up special token IDs from the tokenizer (not hardcoded).
90
126
  # <|endoftext|> is optional: Nemotron-3 Nano / Super tokenizers ship
@@ -335,6 +371,17 @@ class Nemotron3Renderer:
335
371
  last_plain_assistant_idx = j
336
372
  break
337
373
 
374
+ # Ultra truncates thinking on every assistant turn *before the last
375
+ # user message* (template rule ``loop.index0 < last_user_idx``),
376
+ # whereas Nano/Super preserve only the last plain assistant. Compute
377
+ # the last-user index over the normalized ``messages`` list (a leading
378
+ # system never holds a user, so the relative comparison is unaffected).
379
+ last_user_idx_norm = -1
380
+ for j in range(len(messages) - 1, -1, -1):
381
+ if messages[j].get("role") == "user":
382
+ last_user_idx_norm = j
383
+ break
384
+
338
385
  # ── 2. Iterate messages ─────────────────────────────────────
339
386
  for i, msg in enumerate(messages):
340
387
  role = msg["role"]
@@ -360,7 +407,10 @@ class Nemotron3Renderer:
360
407
  emit_text("\n", msg_orig_idx, is_sampled=False, is_content=False)
361
408
 
362
409
  elif role == "assistant":
363
- is_last_turn = i >= last_plain_assistant_idx
410
+ if self.config.ultra:
411
+ is_last_turn = i >= last_user_idx_norm
412
+ else:
413
+ is_last_turn = i >= last_plain_assistant_idx
364
414
  preserve_thinking = msg_orig_idx >= 0 and should_preserve_past_thinking(
365
415
  original_messages,
366
416
  msg_orig_idx,
@@ -617,6 +667,7 @@ class Nemotron3Renderer:
617
667
  content = after_think_end.lstrip("\n")
618
668
 
619
669
  reasoning_content = reasoning_content.strip()
670
+ ultra = self.config.ultra
620
671
 
621
672
  # ``<|im_start|>assistant\n`` is template-injected scaffolding —
622
673
  # at inference the chat template emits these as the generation
@@ -645,28 +696,36 @@ class Nemotron3Renderer:
645
696
  or not self.config.truncate_history_thinking
646
697
  ):
647
698
  emit_special(self._think, msg_idx, is_sampled=True, is_content=True)
699
+ # Ultra: <think>\n{reasoning}</think>{content} (no \n around </think>).
700
+ # Nano/Super: <think>\n{reasoning}\n</think>\n{content}.
648
701
  emit_text(
649
- "\n" + reasoning_content + "\n",
702
+ ("\n" + reasoning_content)
703
+ if ultra
704
+ else ("\n" + reasoning_content + "\n"),
650
705
  msg_idx,
651
706
  is_sampled=True,
652
707
  is_content=True,
653
708
  )
654
709
  emit_special(self._think_end, msg_idx, is_sampled=True, is_content=True)
655
- # Single \n separator (not \n\n like Qwen3.5)
710
+ # Single \n separator (not \n\n like Qwen3.5); Ultra glues directly.
656
711
  emit_text(
657
- "\n" + content + content_suffix,
712
+ (content + content_suffix)
713
+ if ultra
714
+ else ("\n" + content + content_suffix),
658
715
  msg_idx,
659
716
  is_sampled=True,
660
717
  is_content=True,
661
718
  )
662
719
  elif reasoning_content:
663
- # Historical assistant whose reasoning got stripped template
664
- # keeps a single \n between the collapsed <think></think> and
665
- # the content as a marker that reasoning existed.
720
+ # Historical assistant whose reasoning got stripped. Nano/Super keep
721
+ # a single \n between the collapsed <think></think> and the content
722
+ # as a marker that reasoning existed; Ultra glues content directly.
666
723
  emit_special(self._think, msg_idx, is_sampled=True, is_content=True)
667
724
  emit_special(self._think_end, msg_idx, is_sampled=True, is_content=True)
668
725
  emit_text(
669
- "\n" + content + content_suffix,
726
+ (content + content_suffix)
727
+ if ultra
728
+ else ("\n" + content + content_suffix),
670
729
  msg_idx,
671
730
  is_sampled=True,
672
731
  is_content=True,
@@ -33,6 +33,8 @@ RENDERER_MODELS = [
33
33
  ("moonshotai/Kimi-K2.6", "auto"),
34
34
  ("nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "auto"),
35
35
  ("nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "auto"),
36
+ # Ultra resolves the Ultra template variant via name (auto → ultra=True).
37
+ ("nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16", "auto"),
36
38
  ("poolside/Laguna-XS.2", "auto"),
37
39
  ("openai/gpt-oss-20b", "gpt-oss"),
38
40
  ("Qwen/Qwen2.5-0.5B-Instruct", "default"),
@@ -0,0 +1,59 @@
1
+ """Offline wiring tests for the Nemotron-3 Ultra template variant.
2
+
3
+ Assert the name-based ``ultra`` auto-selection, the model→renderer mapping,
4
+ and the typed-config surface WITHOUT loading any tokenizer (no network). This
5
+ pins the wiring the parity matrix can't reach — in particular the FP8 entry,
6
+ which no test loads a tokenizer for — so it can't silently rot.
7
+ """
8
+
9
+ from types import SimpleNamespace
10
+
11
+ from renderers.base import MODEL_RENDERER_MAP
12
+ from renderers.configs import Nemotron3RendererConfig
13
+ from renderers.nemotron3 import _ULTRA_DEFAULTS, _default_ultra
14
+
15
+ _ULTRA_REPOS = [
16
+ "nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16",
17
+ "nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-FP8",
18
+ ]
19
+ _NON_ULTRA_REPOS = [
20
+ "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
21
+ "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
22
+ ]
23
+
24
+
25
+ def _fake_tok(name):
26
+ return SimpleNamespace(name_or_path=name)
27
+
28
+
29
+ def test_ultra_and_non_ultra_models_map_to_nemotron3():
30
+ for repo in _ULTRA_REPOS + _NON_ULTRA_REPOS:
31
+ assert MODEL_RENDERER_MAP.get(repo) == "nemotron-3", repo
32
+
33
+
34
+ def test_default_ultra_resolves_by_name():
35
+ # Ultra checkpoints (incl. the gated FP8 repo) resolve True.
36
+ for repo in _ULTRA_REPOS:
37
+ assert _ULTRA_DEFAULTS[repo] is True
38
+ assert _default_ultra(_fake_tok(repo)) is True
39
+ # Nano / Super resolve False (the shared Nano/Super template).
40
+ for repo in _NON_ULTRA_REPOS:
41
+ assert _default_ultra(_fake_tok(repo)) is False
42
+ # Unknown / fine-tuned / local-path checkpoints fall back to False;
43
+ # those must pass an explicit ultra= if they need the Ultra template.
44
+ assert _default_ultra(_fake_tok("acme/my-nemotron-ultra-ft")) is False
45
+ assert _default_ultra(_fake_tok("/home/user/local-ckpt")) is False
46
+ assert _default_ultra(SimpleNamespace()) is False # no name_or_path attr
47
+
48
+
49
+ def test_ultra_is_not_a_template_kwarg():
50
+ fields = Nemotron3RendererConfig.template_field_names()
51
+ assert "ultra" not in fields
52
+ assert fields == frozenset({"enable_thinking", "truncate_history_thinking"})
53
+ assert "ultra" in Nemotron3RendererConfig._internal_fields
54
+
55
+
56
+ def test_ultra_config_default_is_none_and_overridable():
57
+ assert Nemotron3RendererConfig().ultra is None # None => auto-detect by name
58
+ assert Nemotron3RendererConfig(ultra=True).ultra is True
59
+ assert Nemotron3RendererConfig(ultra=False).ultra is False
@@ -55,6 +55,9 @@ _RENDERER_MODELS = [
55
55
  ("moonshotai/Kimi-K2.6", "auto"),
56
56
  ("deepseek-ai/DeepSeek-V3", "auto"),
57
57
  ("nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "auto"),
58
+ # Ultra: auto-resolves to the Ultra template variant (ultra=True) via the
59
+ # model name; parity asserted against the Ultra apply_chat_template.
60
+ ("nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16", "auto"),
58
61
  ("poolside/Laguna-XS.2", "auto"),
59
62
  ("openai/gpt-oss-20b", "gpt-oss"),
60
63
  ]
@@ -43,6 +43,9 @@ _ROUNDTRIP_MODELS = [
43
43
  ("moonshotai/Kimi-K2.6", "auto"),
44
44
  ("nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "auto"),
45
45
  ("nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "auto"),
46
+ # Ultra: parse must recover content after a </think> glued directly to it
47
+ # (no separating newline) — the Ultra-specific glue stresses the round-trip.
48
+ ("nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16", "auto"),
46
49
  ("poolside/Laguna-XS.2", "auto"),
47
50
  ("openai/gpt-oss-20b", "gpt-oss"),
48
51
  ("Qwen/Qwen2.5-0.5B-Instruct", "default"),
File without changes
File without changes