renderers 0.1.8.dev26__tar.gz → 0.1.8.dev27__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/.gitignore +3 -0
  2. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/PKG-INFO +40 -14
  3. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/README.md +38 -13
  4. renderers-0.1.8.dev27/docs/renderer-config.md +163 -0
  5. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/pyproject.toml +6 -1
  6. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/renderers/__init__.py +45 -4
  7. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/renderers/_version.py +2 -2
  8. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/renderers/base.py +85 -89
  9. renderers-0.1.8.dev27/renderers/configs.py +468 -0
  10. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/renderers/deepseek_v3.py +16 -15
  11. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/renderers/default.py +13 -20
  12. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/renderers/glm45.py +9 -15
  13. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/renderers/glm5.py +31 -16
  14. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/renderers/gpt_oss.py +36 -44
  15. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/renderers/kimi_k2.py +11 -13
  16. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/renderers/kimi_k25.py +15 -17
  17. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/renderers/laguna_xs2.py +67 -15
  18. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/renderers/minimax_m2.py +6 -16
  19. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/renderers/nemotron3.py +12 -14
  20. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/renderers/qwen3.py +7 -13
  21. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/renderers/qwen35.py +65 -17
  22. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/renderers/qwen36.py +9 -6
  23. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/renderers/qwen3_vl.py +62 -16
  24. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/conftest.py +2 -1
  25. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/test_bridge.py +2 -1
  26. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/test_gpt_oss_harmony_parity.py +4 -1
  27. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/test_message_indices.py +1 -1
  28. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/test_multimodal.py +183 -4
  29. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/test_parse_response.py +2 -2
  30. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/test_parse_response_robustness.py +1 -2
  31. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/test_parsers.py +5 -4
  32. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/test_preserve_thinking.py +53 -46
  33. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/test_qwen35_size_coverage.py +5 -5
  34. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/test_render_ids.py +3 -3
  35. renderers-0.1.8.dev27/tests/test_renderer_config.py +116 -0
  36. renderers-0.1.8.dev27/tests/test_renderer_config_parity.py +513 -0
  37. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/test_roundtrip.py +3 -3
  38. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/test_tool_arg_type_preservation.py +2 -2
  39. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/uv.lock +16 -1
  40. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/.github/workflows/publish-dev.yml +0 -0
  41. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/.github/workflows/publish.yml +0 -0
  42. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/.github/workflows/style.yml +0 -0
  43. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/.github/workflows/test.yml +0 -0
  44. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/.pre-commit-config.yaml +0 -0
  45. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/LICENSE +0 -0
  46. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/examples/README.md +0 -0
  47. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/examples/sglang/multiturn_generate_sglang.py +0 -0
  48. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/examples/sglang/online_multiturn_sglang.py +0 -0
  49. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/examples/tinker/multiturn_generate_tinker.py +0 -0
  50. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/examples/transformers/multiturn_generate_transformers.py +0 -0
  51. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/examples/vllm/multiturn_generate_vllm.py +0 -0
  52. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/renderers/client.py +0 -0
  53. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/renderers/parsers.py +0 -0
  54. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/renderers/parsing.py +0 -0
  55. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/test_build_helpers.py +0 -0
  56. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/test_client.py +0 -0
  57. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/test_incremental.py +0 -0
  58. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/test_is_content.py +0 -0
  59. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/test_kimi_k25_tool_schema.py +0 -0
  60. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/test_load_tokenizer.py +0 -0
  61. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/test_load_tokenizer_fastokens.py +0 -0
  62. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/test_sampled_mask.py +0 -0
  63. {renderers-0.1.8.dev26 → renderers-0.1.8.dev27}/tests/test_tokens_per_message.py +0 -0
@@ -31,3 +31,6 @@ coverage.xml
31
31
  .idea/
32
32
  .vscode/
33
33
  *.swp
34
+
35
+ # agent harness state
36
+ .claude/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: renderers
3
- Version: 0.1.8.dev26
3
+ Version: 0.1.8.dev27
4
4
  Summary: Chat template renderers — deterministic message-to-token conversion for LLM training
5
5
  License-Expression: Apache-2.0
6
6
  License-File: LICENSE
@@ -10,6 +10,7 @@ Requires-Dist: jinja2
10
10
  Requires-Dist: numpy
11
11
  Requires-Dist: openai-harmony>=0.0.8
12
12
  Requires-Dist: openai>=1.108.1
13
+ Requires-Dist: prime-pydantic-config>=0.3.0.dev83
13
14
  Requires-Dist: tiktoken
14
15
  Requires-Dist: transformers>=4.50.0
15
16
  Description-Content-Type: text/markdown
@@ -33,7 +34,7 @@ from transformers import AutoTokenizer
33
34
  from renderers import create_renderer
34
35
 
35
36
  tok = AutoTokenizer.from_pretrained("Qwen/Qwen3-8B")
36
- r = create_renderer(tok, renderer="auto") # → Qwen3Renderer
37
+ r = create_renderer(tok) # → Qwen3Renderer (auto-resolved)
37
38
 
38
39
  prompt_ids = r.render_ids(
39
40
  [{"role": "user", "content": "hi"}],
@@ -87,17 +88,17 @@ Each hand-coded bridge:
87
88
  ### Picking a renderer
88
89
 
89
90
  ```python
90
- r = create_renderer(tok, renderer="auto")
91
+ r = create_renderer(tok) # AutoRendererConfig is the implicit default
91
92
  ```
92
93
 
93
- Auto-detect matches `tokenizer.name_or_path` against `MODEL_RENDERER_MAP` by **exact match**. Prefix matching is intentionally off — same architecture can ship different chat templates (base vs instruct, fine-tune renames). Fine-tunes must pass `renderer=<name>` explicitly; unknown names fall back to `DefaultRenderer`.
94
+ Auto-detect matches `tokenizer.name_or_path` against `MODEL_RENDERER_MAP` by **exact match**. Prefix matching is intentionally off — same architecture can ship different chat templates (base vs instruct, fine-tune renames). Fine-tunes must pass an explicit typed config (e.g. `Qwen3RendererConfig()`); unknown names fall back to `DefaultRenderer`.
94
95
 
95
96
  ### Pools
96
97
 
97
98
  ```python
98
99
  from renderers import create_renderer_pool
99
100
 
100
- pool = create_renderer_pool("Qwen/Qwen3-8B", renderer="auto", size=16)
101
+ pool = create_renderer_pool("Qwen/Qwen3-8B", size=16)
101
102
  with pool.checkout() as r:
102
103
  ids = r.render_ids(messages)
103
104
  ```
@@ -124,25 +125,50 @@ Empirical delta on Qwen3.5-35B-A3B + mini-swe-agent-plus, step 0:
124
125
 
125
126
  Each break fragments a rollout into multiple training samples — every fragment re-encodes its prefix, inflating compute roughly linearly with the number of breaks.
126
127
 
127
- ## Compaction overrides
128
+ ## Typed renderer configs
128
129
 
129
- `create_renderer` and `create_renderer_pool` accept two constructor-only flags:
130
+ Each renderer accepts a typed pydantic config that pins its template-control kwargs at construction. `create_renderer` and `create_renderer_pool` take one positional `config` argument:
130
131
 
131
132
  ```python
132
- preserve_all_thinking: bool = False
133
- preserve_thinking_between_tool_calls: bool = False
133
+ from renderers import (
134
+ create_renderer,
135
+ AutoRendererConfig,
136
+ Qwen3RendererConfig,
137
+ GLM5RendererConfig,
138
+ DefaultRendererConfig,
139
+ )
140
+
141
+ # Auto-resolve renderer from the tokenizer's model name. Carries the
142
+ # shared preserve_* flags; template kwargs require an explicit choice.
143
+ renderer = create_renderer(tokenizer)
144
+ renderer = create_renderer(tokenizer, AutoRendererConfig(preserve_all_thinking=True))
145
+
146
+ # Explicit choice — the typed config exposes exactly the fields that
147
+ # renderer's chat template honours.
148
+ renderer = create_renderer(tokenizer, Qwen3RendererConfig(enable_thinking=False))
149
+ renderer = create_renderer(tokenizer, GLM5RendererConfig(clear_thinking=False))
150
+
151
+ # Default renderer (apply_chat_template fallback) — extra fields are
152
+ # captured via pydantic ``extra="allow"`` and forwarded to the Jinja
153
+ # template; tool / reasoning parsers are typed.
154
+ renderer = create_renderer(
155
+ tokenizer,
156
+ DefaultRendererConfig(tool_parser="qwen3", reasoning_parser="think"),
157
+ )
134
158
  ```
135
159
 
136
- Defaults preserve byte-identity with the model's chat template. Flipping a flag at construction restores `reasoning_content` the template would otherwise drop:
160
+ Discriminated union: every per-renderer config is a variant of `RendererConfig`, dispatched on the `name` field. Bogus combinations (e.g. `add_vision_id` under `name="qwen3"`) error at construction with a `pydantic.ValidationError`. Downstream pydantic configs (prime-rl orchestrator, verifiers `ClientConfig`) hold a single field typed as `RendererConfig` and inherit the same strict-per-variant validation.
161
+
162
+ Two shared behaviour flags live on every variant via `_BaseRendererConfig`:
137
163
 
138
- - `preserve_all_thinking=True` — every past assistant's reasoning is kept.
139
- - `preserve_thinking_between_tool_calls=True` — reasoning is kept on assistants in the in-flight tool cycle (no-op for current renderers; reserved for future templates that drop it).
164
+ - `preserve_all_thinking=True` — every past assistant's `reasoning_content` is kept, even when the chat template would drop it.
165
+ - `preserve_thinking_between_tool_calls=True` — reasoning is kept on assistants in the in-flight tool cycle (post-last-user A-T-…-A block when it contains a tool response). A new user turn closes the block and drops its thinking.
140
166
 
141
- The canonical use case is **compaction**. Injecting a `user` turn like *"summarize the work so far"* puts every prior assistant in a "past cycle", so template-default rules drop their `reasoning_content` before the summarizer sees it. Build the renderer with `preserve_all_thinking=True` to keep reasoning visible end-to-end on those flows. Both flags only ever *add* tokens vs the template default.
167
+ These OR-compose with template-level toggles (e.g. GLM-5 `clear_thinking`, Nemotron-3 `truncate_history_thinking`): either flag saying "keep" wins. preserve_* can only ever *extend* retention — never override a template kwarg into a "drop" decision. The canonical use case is **compaction**: injecting a `user` turn like *"summarize the work so far"* puts every prior assistant in a past cycle, and `preserve_all_thinking=True` keeps reasoning visible end-to-end.
142
168
 
143
169
  ## `DefaultRenderer`
144
170
 
145
- Fallback for unsupported models. Wraps `apply_chat_template` and accepts `tool_parser` / `reasoning_parser` kwargs (vLLM convention). `bridge_to_next_turn` returns `None` because the template's close is unknown, so multi-turn rollouts fall back to full re-render. Implementing a hand-coded renderer is a few hundred lines of Python (`render_ids` + `parse_response` + `bridge_to_next_turn`) and is the only path that closes the failure modes above by construction.
171
+ Fallback for unsupported models. Wraps `apply_chat_template` and accepts `tool_parser` / `reasoning_parser` (vLLM convention) plus arbitrary Jinja kwargs via `DefaultRendererConfig`'s `extra="allow"`. `bridge_to_next_turn` returns `None` because the template's close is unknown, so multi-turn rollouts fall back to full re-render. Implementing a hand-coded renderer is a few hundred lines of Python (`render_ids` + `parse_response` + `bridge_to_next_turn`) and is the only path that closes the failure modes above by construction.
146
172
 
147
173
  ## Roadmap
148
174
 
@@ -17,7 +17,7 @@ from transformers import AutoTokenizer
17
17
  from renderers import create_renderer
18
18
 
19
19
  tok = AutoTokenizer.from_pretrained("Qwen/Qwen3-8B")
20
- r = create_renderer(tok, renderer="auto") # → Qwen3Renderer
20
+ r = create_renderer(tok) # → Qwen3Renderer (auto-resolved)
21
21
 
22
22
  prompt_ids = r.render_ids(
23
23
  [{"role": "user", "content": "hi"}],
@@ -71,17 +71,17 @@ Each hand-coded bridge:
71
71
  ### Picking a renderer
72
72
 
73
73
  ```python
74
- r = create_renderer(tok, renderer="auto")
74
+ r = create_renderer(tok) # AutoRendererConfig is the implicit default
75
75
  ```
76
76
 
77
- Auto-detect matches `tokenizer.name_or_path` against `MODEL_RENDERER_MAP` by **exact match**. Prefix matching is intentionally off — same architecture can ship different chat templates (base vs instruct, fine-tune renames). Fine-tunes must pass `renderer=<name>` explicitly; unknown names fall back to `DefaultRenderer`.
77
+ Auto-detect matches `tokenizer.name_or_path` against `MODEL_RENDERER_MAP` by **exact match**. Prefix matching is intentionally off — same architecture can ship different chat templates (base vs instruct, fine-tune renames). Fine-tunes must pass an explicit typed config (e.g. `Qwen3RendererConfig()`); unknown names fall back to `DefaultRenderer`.
78
78
 
79
79
  ### Pools
80
80
 
81
81
  ```python
82
82
  from renderers import create_renderer_pool
83
83
 
84
- pool = create_renderer_pool("Qwen/Qwen3-8B", renderer="auto", size=16)
84
+ pool = create_renderer_pool("Qwen/Qwen3-8B", size=16)
85
85
  with pool.checkout() as r:
86
86
  ids = r.render_ids(messages)
87
87
  ```
@@ -108,25 +108,50 @@ Empirical delta on Qwen3.5-35B-A3B + mini-swe-agent-plus, step 0:
108
108
 
109
109
  Each break fragments a rollout into multiple training samples — every fragment re-encodes its prefix, inflating compute roughly linearly with the number of breaks.
110
110
 
111
- ## Compaction overrides
111
+ ## Typed renderer configs
112
112
 
113
- `create_renderer` and `create_renderer_pool` accept two constructor-only flags:
113
+ Each renderer accepts a typed pydantic config that pins its template-control kwargs at construction. `create_renderer` and `create_renderer_pool` take one positional `config` argument:
114
114
 
115
115
  ```python
116
- preserve_all_thinking: bool = False
117
- preserve_thinking_between_tool_calls: bool = False
116
+ from renderers import (
117
+ create_renderer,
118
+ AutoRendererConfig,
119
+ Qwen3RendererConfig,
120
+ GLM5RendererConfig,
121
+ DefaultRendererConfig,
122
+ )
123
+
124
+ # Auto-resolve renderer from the tokenizer's model name. Carries the
125
+ # shared preserve_* flags; template kwargs require an explicit choice.
126
+ renderer = create_renderer(tokenizer)
127
+ renderer = create_renderer(tokenizer, AutoRendererConfig(preserve_all_thinking=True))
128
+
129
+ # Explicit choice — the typed config exposes exactly the fields that
130
+ # renderer's chat template honours.
131
+ renderer = create_renderer(tokenizer, Qwen3RendererConfig(enable_thinking=False))
132
+ renderer = create_renderer(tokenizer, GLM5RendererConfig(clear_thinking=False))
133
+
134
+ # Default renderer (apply_chat_template fallback) — extra fields are
135
+ # captured via pydantic ``extra="allow"`` and forwarded to the Jinja
136
+ # template; tool / reasoning parsers are typed.
137
+ renderer = create_renderer(
138
+ tokenizer,
139
+ DefaultRendererConfig(tool_parser="qwen3", reasoning_parser="think"),
140
+ )
118
141
  ```
119
142
 
120
- Defaults preserve byte-identity with the model's chat template. Flipping a flag at construction restores `reasoning_content` the template would otherwise drop:
143
+ Discriminated union: every per-renderer config is a variant of `RendererConfig`, dispatched on the `name` field. Bogus combinations (e.g. `add_vision_id` under `name="qwen3"`) error at construction with a `pydantic.ValidationError`. Downstream pydantic configs (prime-rl orchestrator, verifiers `ClientConfig`) hold a single field typed as `RendererConfig` and inherit the same strict-per-variant validation.
144
+
145
+ Two shared behaviour flags live on every variant via `_BaseRendererConfig`:
121
146
 
122
- - `preserve_all_thinking=True` — every past assistant's reasoning is kept.
123
- - `preserve_thinking_between_tool_calls=True` — reasoning is kept on assistants in the in-flight tool cycle (no-op for current renderers; reserved for future templates that drop it).
147
+ - `preserve_all_thinking=True` — every past assistant's `reasoning_content` is kept, even when the chat template would drop it.
148
+ - `preserve_thinking_between_tool_calls=True` — reasoning is kept on assistants in the in-flight tool cycle (post-last-user A-T-…-A block when it contains a tool response). A new user turn closes the block and drops its thinking.
124
149
 
125
- The canonical use case is **compaction**. Injecting a `user` turn like *"summarize the work so far"* puts every prior assistant in a "past cycle", so template-default rules drop their `reasoning_content` before the summarizer sees it. Build the renderer with `preserve_all_thinking=True` to keep reasoning visible end-to-end on those flows. Both flags only ever *add* tokens vs the template default.
150
+ These OR-compose with template-level toggles (e.g. GLM-5 `clear_thinking`, Nemotron-3 `truncate_history_thinking`): either flag saying "keep" wins. preserve_* can only ever *extend* retention — never override a template kwarg into a "drop" decision. The canonical use case is **compaction**: injecting a `user` turn like *"summarize the work so far"* puts every prior assistant in a past cycle, and `preserve_all_thinking=True` keeps reasoning visible end-to-end.
126
151
 
127
152
  ## `DefaultRenderer`
128
153
 
129
- Fallback for unsupported models. Wraps `apply_chat_template` and accepts `tool_parser` / `reasoning_parser` kwargs (vLLM convention). `bridge_to_next_turn` returns `None` because the template's close is unknown, so multi-turn rollouts fall back to full re-render. Implementing a hand-coded renderer is a few hundred lines of Python (`render_ids` + `parse_response` + `bridge_to_next_turn`) and is the only path that closes the failure modes above by construction.
154
+ Fallback for unsupported models. Wraps `apply_chat_template` and accepts `tool_parser` / `reasoning_parser` (vLLM convention) plus arbitrary Jinja kwargs via `DefaultRendererConfig`'s `extra="allow"`. `bridge_to_next_turn` returns `None` because the template's close is unknown, so multi-turn rollouts fall back to full re-render. Implementing a hand-coded renderer is a few hundred lines of Python (`render_ids` + `parse_response` + `bridge_to_next_turn`) and is the only path that closes the failure modes above by construction.
130
155
 
131
156
  ## Roadmap
132
157
 
@@ -0,0 +1,163 @@
1
+ # Renderer config
2
+
3
+ `renderers.RendererConfig` is the typed input to `create_renderer` and
4
+ `create_renderer_pool`. It pins the renderer choice and its template-control
5
+ kwargs at construction.
6
+
7
+ ```python
8
+ from renderers import create_renderer, Qwen35RendererConfig
9
+
10
+ r = create_renderer(tokenizer, Qwen35RendererConfig(enable_thinking=False))
11
+ ```
12
+
13
+ `RendererConfig` is a pydantic discriminated union (one variant per renderer,
14
+ dispatched on the `name` field). Selecting a variant exposes exactly the
15
+ fields that renderer's chat template honours; anything else raises a
16
+ `pydantic.ValidationError` at construction.
17
+
18
+ ## Per-renderer configs
19
+
20
+ Each hand-coded renderer has a typed config class with the template kwargs
21
+ its Jinja chat template reads. For example:
22
+
23
+ | Renderer | Config class | Template fields |
24
+ |----------------|--------------------------|----------------------------------------------------------------|
25
+ | Qwen3 | `Qwen3RendererConfig` | `enable_thinking` |
26
+ | Qwen3.5 / 3.6 | `Qwen35RendererConfig` | `enable_thinking`, `add_vision_id` |
27
+ | Qwen3-VL | `Qwen3VLRendererConfig` | `add_vision_id` |
28
+ | GLM-5 / 5.1 | `GLM5RendererConfig` | `enable_thinking`, `clear_thinking` |
29
+ | GLM-4.5 | `GLM45RendererConfig` | `enable_thinking` |
30
+ | Nemotron-3 | `Nemotron3RendererConfig`| `enable_thinking`, `truncate_history_thinking` |
31
+ | Kimi K2.5 | `KimiK25RendererConfig` | `thinking` |
32
+ | MiniMax-M2 | `MiniMaxM2RendererConfig`| `model_identity` |
33
+ | Laguna-XS.2 | `LagunaXS2RendererConfig`| `enable_thinking`, `render_assistant_messages_raw` |
34
+ | gpt-oss | `GptOssRendererConfig` | `reasoning_effort`, `conversation_start_date` |
35
+
36
+ Field names mirror the upstream Jinja variable names. Passing
37
+ `Qwen3RendererConfig(add_vision_id=True)` raises — Qwen3 is text-only, so
38
+ the field doesn't exist on its config. Use
39
+ `type(config).template_field_names()` to introspect the fields that mirror
40
+ chat-template kwargs (parity is verified against `apply_chat_template` in
41
+ `tests/test_renderer_config_parity.py`).
42
+
43
+ Configs are frozen. To override a field, construct a new instance or call
44
+ `config.model_copy(update={...})`.
45
+
46
+ ## Auto-resolution
47
+
48
+ `create_renderer(tokenizer)` (no config) resolves the renderer from
49
+ `tokenizer.name_or_path` via `MODEL_RENDERER_MAP`:
50
+
51
+ ```python
52
+ r = create_renderer(tokenizer) # AutoRendererConfig() is the default
53
+ r = create_renderer(tokenizer, AutoRendererConfig(preserve_all_thinking=True))
54
+ ```
55
+
56
+ `AutoRendererConfig` carries only the shared `preserve_*` flags. Template
57
+ kwargs depend on the renderer, so overriding them requires naming the
58
+ renderer explicitly:
59
+
60
+ ```python
61
+ r = create_renderer(tokenizer, GLM5RendererConfig(clear_thinking=False))
62
+ ```
63
+
64
+ Auto-resolution fails loudly for VLMs that miss the exact-match lookup —
65
+ `DefaultRenderer` only knows `apply_chat_template` + text tokens, so silently
66
+ falling back for a VLM would produce token streams the trainer can't
67
+ reconstruct. Text-only fine-tunes without a registered renderer fall back to
68
+ `DefaultRenderer` and log the choice at INFO.
69
+
70
+ ## `preserve_*` flags
71
+
72
+ Every variant carries two renderer-agnostic flags on `_BaseRendererConfig`:
73
+
74
+ - `preserve_all_thinking: bool = False` — re-emit `reasoning_content` on
75
+ every past assistant turn, even when the chat template would drop it.
76
+ - `preserve_thinking_between_tool_calls: bool = False` — re-emit
77
+ `reasoning_content` only inside the in-flight tool cycle (the contiguous
78
+ A-T-…-A block after the most recent `user` message, when it contains at
79
+ least one `tool` response). A new user turn closes the block and drops
80
+ its thinking.
81
+
82
+ These OR-compose with template-level toggles. GLM-5's `clear_thinking` and
83
+ Nemotron-3's `truncate_history_thinking` already gate past thinking; the
84
+ `preserve_*` flags add to that:
85
+
86
+ | `clear_thinking` | `preserve_all_thinking` | past thinking? |
87
+ |------------------|-------------------------|----------------|
88
+ | `True` (default — drop) | `False` (default) | dropped |
89
+ | `True` | `True` | kept |
90
+ | `False` (keep) | `False` | kept |
91
+ | `False` | `True` | kept |
92
+
93
+ `preserve_*` can only extend retention, never force a drop. The canonical
94
+ use case is **compaction**: injecting a `user` turn like *"summarize the work
95
+ so far"* puts every prior assistant in a past cycle, and
96
+ `preserve_all_thinking=True` keeps reasoning visible end-to-end.
97
+
98
+ ## `DefaultRendererConfig` accepts arbitrary Jinja kwargs
99
+
100
+ `DefaultRenderer` wraps `tokenizer.apply_chat_template` for any model that
101
+ doesn't have a hand-coded renderer. Its config sets `extra="allow"`:
102
+
103
+ ```python
104
+ from renderers import create_renderer, DefaultRendererConfig
105
+
106
+ r = create_renderer(
107
+ tokenizer,
108
+ DefaultRendererConfig(
109
+ tool_parser="qwen3", # registered in renderers.parsers
110
+ reasoning_parser="think",
111
+ enable_thinking=False, # forwarded to apply_chat_template
112
+ custom_jinja_kwarg=True, # ditto
113
+ ),
114
+ )
115
+ ```
116
+
117
+ `tool_parser` and `reasoning_parser` are typed because they configure
118
+ `DefaultRenderer`'s own parsing pipeline. Every other field lands in
119
+ `model_extra` and `DefaultRenderer._apply` forwards `model_extra` verbatim
120
+ to `apply_chat_template`.
121
+
122
+ ## Downstream integration
123
+
124
+ Downstream pydantic configs (`prime-rl` orchestrator, `verifiers`
125
+ `ClientConfig`) hold a single field typed as `RendererConfig`:
126
+
127
+ ```python
128
+ from pydantic import BaseModel, Field
129
+ from renderers import AutoRendererConfig, RendererConfig
130
+
131
+ class ClientConfig(BaseModel):
132
+ renderer: RendererConfig = Field(default_factory=AutoRendererConfig)
133
+ ```
134
+
135
+ In TOML / YAML, the discriminator routes deserialization:
136
+
137
+ ```toml
138
+ [client.renderer]
139
+ name = "qwen3.5"
140
+ enable_thinking = false
141
+ add_vision_id = true
142
+ preserve_all_thinking = true
143
+ ```
144
+
145
+ Pydantic dispatches on `name = "qwen3.5"` to `Qwen35RendererConfig`. Bogus
146
+ combinations (e.g. `add_vision_id` under `name = "qwen3"`) raise at
147
+ config-load with a clear message naming the offending field and the variant
148
+ that rejected it.
149
+
150
+ To construct a config from a renderer name string (e.g. from a CLI flag):
151
+
152
+ ```python
153
+ from renderers import config_from_name
154
+
155
+ cfg = config_from_name("glm-5") # → GLM5RendererConfig() with defaults
156
+ cfg = config_from_name("auto") # → None, the implicit "auto" form
157
+ ```
158
+
159
+ ## Renaming a renderer is a breaking change
160
+
161
+ The discriminator key is the renderer name string. Renaming `"qwen3.5"` to
162
+ something else would break any downstream config that references it by
163
+ name. Add new renderers; don't rename existing ones.
@@ -30,6 +30,11 @@ dependencies = [
30
30
  # around ``from_pretrained``, so subsequent ``AutoTokenizer`` calls
31
31
  # outside the renderers package stay vanilla.
32
32
  "fastokens>=0.2.0",
33
+ # ``BaseRendererConfig`` inherits from ``pydantic_config.BaseConfig`` so
34
+ # the typed-config surface stays uniform with prime-rl / verifiers config
35
+ # bases. Transitively brings pydantic, which ``renderers.configs`` also
36
+ # imports directly.
37
+ "prime-pydantic-config>=0.3.0.dev83",
33
38
  ]
34
39
 
35
40
  [tool.hatch.version]
@@ -73,7 +78,7 @@ exclude-newer = "7 days"
73
78
  # MiniMax-M2's slow→fast tokenizer conversion path. Exempting it from
74
79
  # the project-wide 7-day cutoff lets the lockfile pick it up immediately
75
80
  # while the rest of the dependency graph stays gated.
76
- exclude-newer-package = { fastokens = false }
81
+ exclude-newer-package = { fastokens = false, "prime-pydantic-config" = false }
77
82
 
78
83
  [tool.ty.environment]
79
84
  python-version = "3.13"
@@ -38,9 +38,30 @@ from renderers.base import (
38
38
  trim_to_turn_close,
39
39
  )
40
40
  from renderers.client import OverlongPromptError
41
+ from renderers.configs import (
42
+ AutoRendererConfig,
43
+ BaseRendererConfig,
44
+ config_from_name,
45
+ DefaultRendererConfig,
46
+ DeepSeekV3RendererConfig,
47
+ GLM45RendererConfig,
48
+ GLM51RendererConfig,
49
+ GLM5RendererConfig,
50
+ GptOssRendererConfig,
51
+ KimiK25RendererConfig,
52
+ KimiK2RendererConfig,
53
+ LagunaXS2RendererConfig,
54
+ MiniMaxM2RendererConfig,
55
+ Nemotron3RendererConfig,
56
+ Qwen35RendererConfig,
57
+ Qwen36RendererConfig,
58
+ Qwen3RendererConfig,
59
+ Qwen3VLRendererConfig,
60
+ RendererConfig,
61
+ )
41
62
  from renderers.deepseek_v3 import DeepSeekV3Renderer
42
63
  from renderers.default import DefaultRenderer
43
- from renderers.glm5 import GLM5Renderer
64
+ from renderers.glm5 import GLM5Renderer, GLM51Renderer
44
65
  from renderers.glm45 import GLM45Renderer
45
66
  from renderers.gpt_oss import GptOssRenderer
46
67
  from renderers.kimi_k2 import KimiK2Renderer
@@ -54,34 +75,53 @@ from renderers.qwen35 import Qwen35Renderer
54
75
  from renderers.qwen36 import Qwen36Renderer
55
76
 
56
77
  __all__ = [
78
+ "AutoRendererConfig",
79
+ "BaseRendererConfig",
57
80
  "Content",
58
81
  "ContentPart",
59
82
  "DeepSeekV3Renderer",
83
+ "DeepSeekV3RendererConfig",
60
84
  "DefaultRenderer",
85
+ "DefaultRendererConfig",
61
86
  "GLM45Renderer",
87
+ "GLM45RendererConfig",
88
+ "GLM51Renderer",
89
+ "GLM51RendererConfig",
62
90
  "GLM5Renderer",
91
+ "GLM5RendererConfig",
63
92
  "GptOssRenderer",
93
+ "GptOssRendererConfig",
64
94
  "ImagePart",
65
- "KimiK2Renderer",
66
95
  "KimiK25Renderer",
96
+ "KimiK25RendererConfig",
97
+ "KimiK2Renderer",
98
+ "KimiK2RendererConfig",
67
99
  "LagunaXS2Renderer",
100
+ "LagunaXS2RendererConfig",
68
101
  "MULTIMODAL_MODELS",
69
102
  "Message",
70
103
  "MiniMaxM2Renderer",
104
+ "MiniMaxM2RendererConfig",
71
105
  "MultiModalData",
72
106
  "MultimodalRenderer",
73
107
  "Nemotron3Renderer",
108
+ "Nemotron3RendererConfig",
74
109
  "OverlongPromptError",
75
110
  "ParsedResponse",
76
111
  "ParsedToolCall",
77
112
  "PlaceholderRange",
78
- "Qwen3Renderer",
79
- "Qwen3VLRenderer",
80
113
  "Qwen35Renderer",
114
+ "Qwen35RendererConfig",
81
115
  "Qwen36Renderer",
116
+ "Qwen36RendererConfig",
117
+ "Qwen3Renderer",
118
+ "Qwen3RendererConfig",
119
+ "Qwen3VLRenderer",
120
+ "Qwen3VLRendererConfig",
82
121
  "RenderedConversation",
83
122
  "RenderedTokens",
84
123
  "Renderer",
124
+ "RendererConfig",
85
125
  "RendererPool",
86
126
  "TextPart",
87
127
  "ThinkingPart",
@@ -94,6 +134,7 @@ __all__ = [
94
134
  "attribute_text_segments",
95
135
  "build_training_sample",
96
136
  "build_trajectory_step",
137
+ "config_from_name",
97
138
  "create_renderer",
98
139
  "create_renderer_pool",
99
140
  "is_multimodal",
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
18
18
  commit_id: str | None
19
19
  __commit_id__: str | None
20
20
 
21
- __version__ = version = '0.1.8.dev26'
22
- __version_tuple__ = version_tuple = (0, 1, 8, 'dev26')
21
+ __version__ = version = '0.1.8.dev27'
22
+ __version_tuple__ = version_tuple = (0, 1, 8, 'dev27')
23
23
 
24
24
  __commit_id__ = commit_id = None