renderers 0.1.8.dev33__tar.gz → 0.1.8.dev34__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/PKG-INFO +1 -1
  2. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/renderers/_version.py +2 -2
  3. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/renderers/base.py +2 -2
  4. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/renderers/qwen35.py +42 -37
  5. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_qwen35_size_coverage.py +32 -9
  6. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/.github/workflows/publish-dev.yml +0 -0
  7. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/.github/workflows/publish.yml +0 -0
  8. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/.github/workflows/style.yml +0 -0
  9. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/.github/workflows/test.yml +0 -0
  10. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/.gitignore +0 -0
  11. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/.pre-commit-config.yaml +0 -0
  12. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/LICENSE +0 -0
  13. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/README.md +0 -0
  14. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/docs/renderer-config.md +0 -0
  15. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/examples/README.md +0 -0
  16. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/examples/sglang/multiturn_generate_sglang.py +0 -0
  17. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/examples/sglang/online_multiturn_sglang.py +0 -0
  18. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/examples/tinker/multiturn_generate_tinker.py +0 -0
  19. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/examples/transformers/multiturn_generate_transformers.py +0 -0
  20. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/examples/vllm/multiturn_generate_vllm.py +0 -0
  21. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/pyproject.toml +0 -0
  22. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/renderers/__init__.py +0 -0
  23. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/renderers/client.py +0 -0
  24. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/renderers/configs.py +0 -0
  25. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/renderers/deepseek_v3.py +0 -0
  26. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/renderers/default.py +0 -0
  27. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/renderers/glm45.py +0 -0
  28. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/renderers/glm5.py +0 -0
  29. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/renderers/gpt_oss.py +0 -0
  30. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/renderers/kimi_k2.py +0 -0
  31. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/renderers/kimi_k25.py +0 -0
  32. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/renderers/laguna_xs2.py +0 -0
  33. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/renderers/minimax_m2.py +0 -0
  34. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/renderers/nemotron3.py +0 -0
  35. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/renderers/parsers.py +0 -0
  36. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/renderers/parsing.py +0 -0
  37. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/renderers/qwen3.py +0 -0
  38. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/renderers/qwen36.py +0 -0
  39. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/renderers/qwen3_vl.py +0 -0
  40. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/conftest.py +0 -0
  41. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_bridge.py +0 -0
  42. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_build_helpers.py +0 -0
  43. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_client.py +0 -0
  44. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_gpt_oss_harmony_parity.py +0 -0
  45. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_incremental.py +0 -0
  46. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_is_content.py +0 -0
  47. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_kimi_k25_tool_schema.py +0 -0
  48. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_load_tokenizer.py +0 -0
  49. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_load_tokenizer_fastokens.py +0 -0
  50. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_message_indices.py +0 -0
  51. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_multimodal.py +0 -0
  52. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_parse_response.py +0 -0
  53. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_parse_response_robustness.py +0 -0
  54. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_parsers.py +0 -0
  55. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_preserve_thinking.py +0 -0
  56. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_render_ids.py +0 -0
  57. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_renderer_config.py +0 -0
  58. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_renderer_config_parity.py +0 -0
  59. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_roundtrip.py +0 -0
  60. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_sampled_mask.py +0 -0
  61. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_tokens_per_message.py +0 -0
  62. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/tests/test_tool_arg_type_preservation.py +0 -0
  63. {renderers-0.1.8.dev33 → renderers-0.1.8.dev34}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: renderers
3
- Version: 0.1.8.dev33
3
+ Version: 0.1.8.dev34
4
4
  Summary: Chat template renderers — deterministic message-to-token conversion for LLM training
5
5
  License-Expression: Apache-2.0
6
6
  License-File: LICENSE
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
18
18
  commit_id: str | None
19
19
  __commit_id__: str | None
20
20
 
21
- __version__ = version = '0.1.8.dev33'
22
- __version_tuple__ = version_tuple = (0, 1, 8, 'dev33')
21
+ __version__ = version = '0.1.8.dev34'
22
+ __version_tuple__ = version_tuple = (0, 1, 8, 'dev34')
23
23
 
24
24
  __commit_id__ = commit_id = None
@@ -911,8 +911,8 @@ MODEL_RENDERER_MAP: dict[str, str] = {
911
911
  # ``enable_thinking=true`` (open ``<think>\n`` at the gen prompt);
912
912
  # the smaller 0.8B / 2B variants flip the polarity (default
913
913
  # ``enable_thinking=false``, empty ``<think>\n\n</think>\n\n``).
914
- # ``Qwen35Renderer`` auto-detects polarity from the tokenizer's
915
- # chat_template at construction, so all seven sizes are
914
+ # ``Qwen35Renderer`` hard-codes this polarity per model
915
+ # (``_ENABLE_THINKING_DEFAULTS``), so all seven sizes are
916
916
  # token-for-token parity-tested against their own
917
917
  # ``apply_chat_template`` — including with
918
918
  # ``add_generation_prompt=True``.
@@ -66,39 +66,44 @@ _TOOLS_INSTRUCTIONS = (
66
66
  )
67
67
 
68
68
 
69
- def _detect_enable_thinking_default(tokenizer: PreTrainedTokenizer) -> bool:
70
- """Probe the tokenizer's chat template to learn its ``enable_thinking``
71
- default polarity at the generation-prompt boundary.
72
-
73
- The Qwen3.5 family ships two template variants that differ only in the
74
- polarity of the gated branch:
75
-
76
- * Big sizes (4B / 9B / 35B-A3B / 122B-A10B / 397B-A17B) emit an open
77
- ``<think>\\n`` by default and the empty ``<think>\\n\\n</think>\\n\\n``
78
- block when ``enable_thinking`` is explicitly false.
79
- * Small sizes (0.8B / 2B) flip the polarity they emit the empty
80
- block by default and the open ``<think>\\n`` only when
81
- ``enable_thinking`` is explicitly true.
82
-
83
- A one-shot ``apply_chat_template`` call with no flag and a minimal
84
- user message reveals which variant is in use: the empty-block tail
85
- ends with ``</think>``, the open-think tail does not. Failing the
86
- probe (no chat_template, exotic config) falls back to the big-model
87
- default of True, which matches every entry in
88
- ``MODEL_RENDERER_MAP`` that routes to ``qwen3.5`` without explicit
89
- polarity awareness.
69
+ # Per-model ``enable_thinking`` default, applied when the renderer config
70
+ # leaves it ``None``. The Qwen3.5 family ships two chat-template variants
71
+ # that differ only in the polarity of the gated thinking branch:
72
+ #
73
+ # * Big sizes (4B / 9B / 35B-A3B / 122B-A10B / 397B-A17B) default
74
+ # ``enable_thinking=true`` an open ``<think>\n`` at the gen prompt.
75
+ # * Small sizes (0.8B / 2B) flip it — default ``false``, emitting the
76
+ # empty ``<think>\n\n</think>\n\n`` block.
77
+ #
78
+ # These are hard-coded (keyed by ``tokenizer.name_or_path``) rather than
79
+ # probed from the live ``chat_template``: probing meant calling
80
+ # ``apply_chat_template`` at construction, which pulls ``transformers`` onto
81
+ # the hot path and breaks bring-your-own-tokenizer use. The values are the
82
+ # ground truth pinned by ``tests/test_qwen35_size_coverage.py`` — both the
83
+ # polarity assertions and byte-parity against each size's own
84
+ # ``apply_chat_template``.
85
+ _ENABLE_THINKING_DEFAULTS: dict[str, bool] = {
86
+ "Qwen/Qwen3.5-0.8B": False,
87
+ "Qwen/Qwen3.5-2B": False,
88
+ "Qwen/Qwen3.5-4B": True,
89
+ "Qwen/Qwen3.5-9B": True,
90
+ "Qwen/Qwen3.5-35B-A3B": True,
91
+ "Qwen/Qwen3.5-122B-A10B": True,
92
+ "Qwen/Qwen3.5-397B-A17B": True,
93
+ # Qwen3.6 extends the Qwen3.5 template; same big-size polarity.
94
+ "Qwen/Qwen3.6-35B-A3B": True,
95
+ }
96
+
97
+
98
+ def _default_enable_thinking(tokenizer) -> bool:
99
+ """Hard-coded ``enable_thinking`` default for ``tokenizer``'s model.
100
+
101
+ Falls back to ``True`` (the big-model default, and the majority of the
102
+ family) for unknown / fine-tuned checkpoints whose ``name_or_path`` isn't
103
+ in ``_ENABLE_THINKING_DEFAULTS``; pass an explicit ``enable_thinking=`` to
104
+ a small-size fine-tune that needs ``False``.
90
105
  """
91
- try:
92
- out = tokenizer.apply_chat_template(
93
- [{"role": "user", "content": "x"}],
94
- tokenize=False,
95
- add_generation_prompt=True,
96
- )
97
- except Exception:
98
- return True
99
- if not isinstance(out, str):
100
- return True
101
- return not out.rstrip().endswith("</think>")
106
+ return _ENABLE_THINKING_DEFAULTS.get(getattr(tokenizer, "name_or_path", ""), True)
102
107
 
103
108
 
104
109
  class Qwen35Renderer:
@@ -116,13 +121,13 @@ class Qwen35Renderer:
116
121
  self._tokenizer = tokenizer
117
122
  self._processor = processor
118
123
  cfg = config or type(self)._config_cls()
119
- # ``enable_thinking=None`` defers to the tokenizer's chat-template
120
- # default (Instruct → off, Thinking → on). Materialise here so
121
- # downstream reads see a concrete bool; rebind the config with
122
- # the resolved value so introspection sees the same.
124
+ # ``enable_thinking=None`` defers to the model's known default (see
125
+ # ``_ENABLE_THINKING_DEFAULTS``). Materialise here so downstream reads
126
+ # see a concrete bool; rebind the config with the resolved value so
127
+ # introspection sees the same.
123
128
  if cfg.enable_thinking is None:
124
129
  cfg = cfg.model_copy(
125
- update={"enable_thinking": _detect_enable_thinking_default(tokenizer)}
130
+ update={"enable_thinking": _default_enable_thinking(tokenizer)}
126
131
  )
127
132
  self.config = cfg
128
133
 
@@ -5,9 +5,8 @@ Seven Qwen3.5 sizes route to ``Qwen35Renderer``. The 4B / 9B / 35B-A3B /
5
5
  ``enable_thinking=true``); the smaller 0.8B / 2B sizes ship the polarity-
6
6
  flipped variant (default ``enable_thinking=false`` → empty
7
7
  ``<think>\\n\\n</think>\\n\\n`` at the gen-prompt boundary). The renderer
8
- detects polarity from the tokenizer's chat_template at construction, so
9
- both variants render byte-identical to their own
10
- ``apply_chat_template``.
8
+ hard-codes this polarity per model (``_ENABLE_THINKING_DEFAULTS``), so
9
+ both variants render byte-identical to their own ``apply_chat_template``.
11
10
 
12
11
  These tests lock in (a) the exact set of Qwen3.5 sizes in the map and
13
12
  (b) byte parity for every one of them across representative
@@ -57,7 +56,7 @@ def test_no_other_qwen35_sizes_silently_added():
57
56
 
58
57
 
59
58
  # ---------------------------------------------------------------------------
60
- # Polarity auto-detection: 0.8B / 2B flip ``enable_thinking`` default.
59
+ # Polarity defaults: 0.8B / 2B flip ``enable_thinking`` default.
61
60
  # ---------------------------------------------------------------------------
62
61
 
63
62
 
@@ -73,10 +72,10 @@ def test_no_other_qwen35_sizes_silently_added():
73
72
  ("Qwen/Qwen3.5-397B-A17B", True),
74
73
  ],
75
74
  )
76
- def test_qwen35_enable_thinking_polarity_autodetected(qwen35_model, expected_default):
77
- """The renderer's ``_enable_thinking`` resolves to the chat template's
78
- own default when no explicit flag is passed — so big / small sizes
79
- each match their own template at the gen-prompt boundary."""
75
+ def test_qwen35_enable_thinking_polarity_default(qwen35_model, expected_default):
76
+ """With no explicit flag, the renderer resolves ``enable_thinking`` from
77
+ the hard-coded per-model default — so big / small sizes each match their
78
+ own template at the gen-prompt boundary."""
80
79
  tok = load_tokenizer(qwen35_model)
81
80
  renderer = create_renderer(tok, Qwen35RendererConfig())
82
81
  assert isinstance(renderer, Qwen35Renderer)
@@ -86,6 +85,30 @@ def test_qwen35_enable_thinking_polarity_autodetected(qwen35_model, expected_def
86
85
  )
87
86
 
88
87
 
88
+ def test_construction_does_not_call_apply_chat_template():
89
+ """The ``enable_thinking`` default is hard-coded per model, so building a
90
+ ``Qwen35Renderer`` must not probe ``apply_chat_template`` — a
91
+ bring-your-own tokenizer with no chat-template support still works."""
92
+
93
+ class _Stub:
94
+ name_or_path = "Qwen/Qwen3.5-0.8B"
95
+ unk_token_id = -1
96
+
97
+ def convert_tokens_to_ids(self, token):
98
+ # Any stable non-unk id per token; the renderer only needs the
99
+ # special tokens to resolve to distinct, in-vocab ids.
100
+ return abs(hash(token)) % 1_000_000 + 1
101
+
102
+ def apply_chat_template(self, *args, **kwargs):
103
+ raise AssertionError(
104
+ "apply_chat_template must not be called at construction"
105
+ )
106
+
107
+ renderer = Qwen35Renderer(_Stub())
108
+ # 0.8B is a small size → thinking defaults off, from the hard-coded table.
109
+ assert renderer.config.enable_thinking is False
110
+
111
+
89
112
  # ---------------------------------------------------------------------------
90
113
  # Byte parity for each in-map Qwen3.5 size.
91
114
  # ---------------------------------------------------------------------------
@@ -146,7 +169,7 @@ def test_qwen35_size_parity_with_apply_chat_template(
146
169
  """Each in-map Qwen3.5 size renders byte-identical to its own
147
170
  ``apply_chat_template`` output. Locks in the property that lets us
148
171
  share ``Qwen35Renderer`` across all seven sizes — the polarity
149
- flip on 0.8B / 2B is absorbed by the constructor's auto-detect."""
172
+ flip on 0.8B / 2B is absorbed by the per-model default."""
150
173
  tok = load_tokenizer(qwen35_model)
151
174
  renderer = create_renderer(tok, Qwen35RendererConfig())
152
175
  assert isinstance(renderer, Qwen35Renderer)
File without changes
File without changes