renderers 0.1.8.dev4__tar.gz → 0.1.8.dev27__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- renderers-0.1.8.dev27/.github/workflows/publish-dev.yml +104 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/.gitignore +3 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/PKG-INFO +41 -15
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/README.md +38 -13
- renderers-0.1.8.dev27/docs/renderer-config.md +163 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/pyproject.toml +15 -4
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/renderers/__init__.py +47 -4
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/renderers/_version.py +2 -2
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/renderers/base.py +438 -111
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/renderers/client.py +68 -16
- renderers-0.1.8.dev27/renderers/configs.py +468 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/renderers/deepseek_v3.py +124 -53
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/renderers/default.py +13 -20
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/renderers/glm45.py +176 -68
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/renderers/glm5.py +171 -63
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/renderers/gpt_oss.py +210 -50
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/renderers/kimi_k2.py +178 -84
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/renderers/kimi_k25.py +203 -82
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/renderers/laguna_xs2.py +199 -58
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/renderers/minimax_m2.py +233 -74
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/renderers/nemotron3.py +184 -78
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/renderers/qwen3.py +183 -78
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/renderers/qwen35.py +306 -117
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/renderers/qwen36.py +9 -6
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/renderers/qwen3_vl.py +241 -110
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/tests/conftest.py +2 -1
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/tests/test_bridge.py +2 -1
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/tests/test_client.py +71 -5
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/tests/test_gpt_oss_harmony_parity.py +4 -1
- renderers-0.1.8.dev27/tests/test_is_content.py +389 -0
- renderers-0.1.8.dev27/tests/test_kimi_k25_tool_schema.py +53 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/tests/test_load_tokenizer_fastokens.py +44 -5
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/tests/test_message_indices.py +1 -1
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/tests/test_multimodal.py +183 -4
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/tests/test_parse_response.py +2 -2
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/tests/test_parse_response_robustness.py +1 -2
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/tests/test_parsers.py +5 -4
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/tests/test_preserve_thinking.py +53 -46
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/tests/test_qwen35_size_coverage.py +5 -5
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/tests/test_render_ids.py +3 -3
- renderers-0.1.8.dev27/tests/test_renderer_config.py +116 -0
- renderers-0.1.8.dev27/tests/test_renderer_config_parity.py +513 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/tests/test_roundtrip.py +3 -3
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/tests/test_tool_arg_type_preservation.py +2 -2
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/uv.lock +53 -4
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/.github/workflows/publish.yml +0 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/.github/workflows/style.yml +0 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/.github/workflows/test.yml +0 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/.pre-commit-config.yaml +0 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/LICENSE +0 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/examples/README.md +0 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/examples/sglang/multiturn_generate_sglang.py +0 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/examples/sglang/online_multiturn_sglang.py +0 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/examples/tinker/multiturn_generate_tinker.py +0 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/examples/transformers/multiturn_generate_transformers.py +0 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/examples/vllm/multiturn_generate_vllm.py +0 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/renderers/parsers.py +0 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/renderers/parsing.py +0 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/tests/test_build_helpers.py +0 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/tests/test_incremental.py +0 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/tests/test_load_tokenizer.py +0 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/tests/test_sampled_mask.py +0 -0
- {renderers-0.1.8.dev4 → renderers-0.1.8.dev27}/tests/test_tokens_per_message.py +0 -0
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
name: Publish Dev
|
|
2
|
+
|
|
3
|
+
# Tag every commit on main as ``renderers-v<next>.dev<N>`` and publish the
|
|
4
|
+
# wheel to PyPI as a pre-release. ``<next>`` is the latest release tag with
|
|
5
|
+
# its patch bumped; ``<N>`` is the number of commits since that release so
|
|
6
|
+
# each main commit maps to a unique PEP 440 dev version.
|
|
7
|
+
#
|
|
8
|
+
# Building from the freshly-created tag means hatch-vcs resolves the version
|
|
9
|
+
# cleanly (no ``+gHASH`` local segment), which PyPI requires.
|
|
10
|
+
|
|
11
|
+
on:
|
|
12
|
+
push:
|
|
13
|
+
branches: [main]
|
|
14
|
+
|
|
15
|
+
concurrency:
|
|
16
|
+
group: publish-dev-${{ github.ref }}
|
|
17
|
+
cancel-in-progress: false
|
|
18
|
+
|
|
19
|
+
jobs:
|
|
20
|
+
tag:
|
|
21
|
+
runs-on: ubuntu-latest
|
|
22
|
+
permissions:
|
|
23
|
+
contents: write
|
|
24
|
+
outputs:
|
|
25
|
+
tag: ${{ steps.compute.outputs.tag }}
|
|
26
|
+
steps:
|
|
27
|
+
- uses: actions/checkout@v4
|
|
28
|
+
with:
|
|
29
|
+
fetch-depth: 0
|
|
30
|
+
|
|
31
|
+
- name: Compute next dev tag
|
|
32
|
+
id: compute
|
|
33
|
+
run: |
|
|
34
|
+
set -euo pipefail
|
|
35
|
+
LATEST_RELEASE=$(git tag --list 'renderers-v*' --sort=-v:refname \
|
|
36
|
+
| grep -Ev '(dev|rc|a[0-9]|b[0-9])' \
|
|
37
|
+
| head -1)
|
|
38
|
+
if [ -z "$LATEST_RELEASE" ]; then
|
|
39
|
+
echo "No release tag matching 'renderers-v<MAJOR.MINOR.PATCH>' found" >&2
|
|
40
|
+
exit 1
|
|
41
|
+
fi
|
|
42
|
+
BASE=${LATEST_RELEASE#renderers-v}
|
|
43
|
+
MAJOR=$(echo "$BASE" | cut -d. -f1)
|
|
44
|
+
MINOR=$(echo "$BASE" | cut -d. -f2)
|
|
45
|
+
PATCH=$(echo "$BASE" | cut -d. -f3)
|
|
46
|
+
NEXT="${MAJOR}.${MINOR}.$((PATCH + 1))"
|
|
47
|
+
N=$(git rev-list --count "${LATEST_RELEASE}..HEAD")
|
|
48
|
+
TAG="renderers-v${NEXT}.dev${N}"
|
|
49
|
+
echo "tag=${TAG}" >> "$GITHUB_OUTPUT"
|
|
50
|
+
echo "Computed tag: ${TAG} (base=${LATEST_RELEASE}, commits=${N})"
|
|
51
|
+
|
|
52
|
+
- name: Create and push tag
|
|
53
|
+
env:
|
|
54
|
+
TAG: ${{ steps.compute.outputs.tag }}
|
|
55
|
+
run: |
|
|
56
|
+
set -euo pipefail
|
|
57
|
+
if git ls-remote --exit-code --tags origin "refs/tags/${TAG}" >/dev/null 2>&1; then
|
|
58
|
+
echo "Tag ${TAG} already exists on origin — nothing to do" >&2
|
|
59
|
+
exit 0
|
|
60
|
+
fi
|
|
61
|
+
git config user.name 'github-actions[bot]'
|
|
62
|
+
git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
|
|
63
|
+
git tag -a "$TAG" -m "Automated dev release ${TAG}"
|
|
64
|
+
git push origin "$TAG"
|
|
65
|
+
|
|
66
|
+
build:
|
|
67
|
+
needs: tag
|
|
68
|
+
runs-on: ubuntu-latest
|
|
69
|
+
permissions:
|
|
70
|
+
contents: read
|
|
71
|
+
steps:
|
|
72
|
+
- uses: actions/checkout@v4
|
|
73
|
+
with:
|
|
74
|
+
fetch-depth: 0
|
|
75
|
+
ref: refs/tags/${{ needs.tag.outputs.tag }}
|
|
76
|
+
|
|
77
|
+
- uses: astral-sh/setup-uv@v7
|
|
78
|
+
|
|
79
|
+
- name: Build renderers
|
|
80
|
+
run: uv build
|
|
81
|
+
|
|
82
|
+
- name: Upload dist artifacts
|
|
83
|
+
uses: actions/upload-artifact@v4
|
|
84
|
+
with:
|
|
85
|
+
name: dist-dev
|
|
86
|
+
path: dist/
|
|
87
|
+
if-no-files-found: error
|
|
88
|
+
retention-days: 7
|
|
89
|
+
|
|
90
|
+
publish:
|
|
91
|
+
needs: build
|
|
92
|
+
runs-on: ubuntu-latest
|
|
93
|
+
environment: pypi-prod
|
|
94
|
+
permissions:
|
|
95
|
+
id-token: write
|
|
96
|
+
steps:
|
|
97
|
+
- name: Download dist artifacts
|
|
98
|
+
uses: actions/download-artifact@v4
|
|
99
|
+
with:
|
|
100
|
+
name: dist-dev
|
|
101
|
+
path: dist/
|
|
102
|
+
|
|
103
|
+
- name: Publish to PyPI
|
|
104
|
+
uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
|
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: renderers
|
|
3
|
-
Version: 0.1.8.
|
|
3
|
+
Version: 0.1.8.dev27
|
|
4
4
|
Summary: Chat template renderers — deterministic message-to-token conversion for LLM training
|
|
5
5
|
License-Expression: Apache-2.0
|
|
6
6
|
License-File: LICENSE
|
|
7
7
|
Requires-Python: <3.14,>=3.10
|
|
8
|
-
Requires-Dist: fastokens>=0.
|
|
8
|
+
Requires-Dist: fastokens>=0.2.0
|
|
9
9
|
Requires-Dist: jinja2
|
|
10
10
|
Requires-Dist: numpy
|
|
11
11
|
Requires-Dist: openai-harmony>=0.0.8
|
|
12
12
|
Requires-Dist: openai>=1.108.1
|
|
13
|
+
Requires-Dist: prime-pydantic-config>=0.3.0.dev83
|
|
13
14
|
Requires-Dist: tiktoken
|
|
14
15
|
Requires-Dist: transformers>=4.50.0
|
|
15
16
|
Description-Content-Type: text/markdown
|
|
@@ -33,7 +34,7 @@ from transformers import AutoTokenizer
|
|
|
33
34
|
from renderers import create_renderer
|
|
34
35
|
|
|
35
36
|
tok = AutoTokenizer.from_pretrained("Qwen/Qwen3-8B")
|
|
36
|
-
r = create_renderer(tok
|
|
37
|
+
r = create_renderer(tok) # → Qwen3Renderer (auto-resolved)
|
|
37
38
|
|
|
38
39
|
prompt_ids = r.render_ids(
|
|
39
40
|
[{"role": "user", "content": "hi"}],
|
|
@@ -87,17 +88,17 @@ Each hand-coded bridge:
|
|
|
87
88
|
### Picking a renderer
|
|
88
89
|
|
|
89
90
|
```python
|
|
90
|
-
r = create_renderer(tok
|
|
91
|
+
r = create_renderer(tok) # AutoRendererConfig is the implicit default
|
|
91
92
|
```
|
|
92
93
|
|
|
93
|
-
Auto-detect matches `tokenizer.name_or_path` against `MODEL_RENDERER_MAP` by **exact match**. Prefix matching is intentionally off — same architecture can ship different chat templates (base vs instruct, fine-tune renames). Fine-tunes must pass `
|
|
94
|
+
Auto-detect matches `tokenizer.name_or_path` against `MODEL_RENDERER_MAP` by **exact match**. Prefix matching is intentionally off — same architecture can ship different chat templates (base vs instruct, fine-tune renames). Fine-tunes must pass an explicit typed config (e.g. `Qwen3RendererConfig()`); unknown names fall back to `DefaultRenderer`.
|
|
94
95
|
|
|
95
96
|
### Pools
|
|
96
97
|
|
|
97
98
|
```python
|
|
98
99
|
from renderers import create_renderer_pool
|
|
99
100
|
|
|
100
|
-
pool = create_renderer_pool("Qwen/Qwen3-8B",
|
|
101
|
+
pool = create_renderer_pool("Qwen/Qwen3-8B", size=16)
|
|
101
102
|
with pool.checkout() as r:
|
|
102
103
|
ids = r.render_ids(messages)
|
|
103
104
|
```
|
|
@@ -124,25 +125,50 @@ Empirical delta on Qwen3.5-35B-A3B + mini-swe-agent-plus, step 0:
|
|
|
124
125
|
|
|
125
126
|
Each break fragments a rollout into multiple training samples — every fragment re-encodes its prefix, inflating compute roughly linearly with the number of breaks.
|
|
126
127
|
|
|
127
|
-
##
|
|
128
|
+
## Typed renderer configs
|
|
128
129
|
|
|
129
|
-
`create_renderer` and `create_renderer_pool`
|
|
130
|
+
Each renderer accepts a typed pydantic config that pins its template-control kwargs at construction. `create_renderer` and `create_renderer_pool` take one positional `config` argument:
|
|
130
131
|
|
|
131
132
|
```python
|
|
132
|
-
|
|
133
|
-
|
|
133
|
+
from renderers import (
|
|
134
|
+
create_renderer,
|
|
135
|
+
AutoRendererConfig,
|
|
136
|
+
Qwen3RendererConfig,
|
|
137
|
+
GLM5RendererConfig,
|
|
138
|
+
DefaultRendererConfig,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Auto-resolve renderer from the tokenizer's model name. Carries the
|
|
142
|
+
# shared preserve_* flags; template kwargs require an explicit choice.
|
|
143
|
+
renderer = create_renderer(tokenizer)
|
|
144
|
+
renderer = create_renderer(tokenizer, AutoRendererConfig(preserve_all_thinking=True))
|
|
145
|
+
|
|
146
|
+
# Explicit choice — the typed config exposes exactly the fields that
|
|
147
|
+
# renderer's chat template honours.
|
|
148
|
+
renderer = create_renderer(tokenizer, Qwen3RendererConfig(enable_thinking=False))
|
|
149
|
+
renderer = create_renderer(tokenizer, GLM5RendererConfig(clear_thinking=False))
|
|
150
|
+
|
|
151
|
+
# Default renderer (apply_chat_template fallback) — extra fields are
|
|
152
|
+
# captured via pydantic ``extra="allow"`` and forwarded to the Jinja
|
|
153
|
+
# template; tool / reasoning parsers are typed.
|
|
154
|
+
renderer = create_renderer(
|
|
155
|
+
tokenizer,
|
|
156
|
+
DefaultRendererConfig(tool_parser="qwen3", reasoning_parser="think"),
|
|
157
|
+
)
|
|
134
158
|
```
|
|
135
159
|
|
|
136
|
-
|
|
160
|
+
Discriminated union: every per-renderer config is a variant of `RendererConfig`, dispatched on the `name` field. Bogus combinations (e.g. `add_vision_id` under `name="qwen3"`) error at construction with a `pydantic.ValidationError`. Downstream pydantic configs (prime-rl orchestrator, verifiers `ClientConfig`) hold a single field typed as `RendererConfig` and inherit the same strict-per-variant validation.
|
|
161
|
+
|
|
162
|
+
Two shared behaviour flags live on every variant via `_BaseRendererConfig`:
|
|
137
163
|
|
|
138
|
-
- `preserve_all_thinking=True` — every past assistant's
|
|
139
|
-
- `preserve_thinking_between_tool_calls=True` — reasoning is kept on assistants in the in-flight tool cycle (
|
|
164
|
+
- `preserve_all_thinking=True` — every past assistant's `reasoning_content` is kept, even when the chat template would drop it.
|
|
165
|
+
- `preserve_thinking_between_tool_calls=True` — reasoning is kept on assistants in the in-flight tool cycle (post-last-user A-T-…-A block when it contains a tool response). A new user turn closes the block and drops its thinking.
|
|
140
166
|
|
|
141
|
-
The canonical use case is **compaction
|
|
167
|
+
These OR-compose with template-level toggles (e.g. GLM-5 `clear_thinking`, Nemotron-3 `truncate_history_thinking`): either flag saying "keep" wins. preserve_* can only ever *extend* retention — never override a template kwarg into a "drop" decision. The canonical use case is **compaction**: injecting a `user` turn like *"summarize the work so far"* puts every prior assistant in a past cycle, and `preserve_all_thinking=True` keeps reasoning visible end-to-end.
|
|
142
168
|
|
|
143
169
|
## `DefaultRenderer`
|
|
144
170
|
|
|
145
|
-
Fallback for unsupported models. Wraps `apply_chat_template` and accepts `tool_parser` / `reasoning_parser`
|
|
171
|
+
Fallback for unsupported models. Wraps `apply_chat_template` and accepts `tool_parser` / `reasoning_parser` (vLLM convention) plus arbitrary Jinja kwargs via `DefaultRendererConfig`'s `extra="allow"`. `bridge_to_next_turn` returns `None` because the template's close is unknown, so multi-turn rollouts fall back to full re-render. Implementing a hand-coded renderer is a few hundred lines of Python (`render_ids` + `parse_response` + `bridge_to_next_turn`) and is the only path that closes the failure modes above by construction.
|
|
146
172
|
|
|
147
173
|
## Roadmap
|
|
148
174
|
|
|
@@ -17,7 +17,7 @@ from transformers import AutoTokenizer
|
|
|
17
17
|
from renderers import create_renderer
|
|
18
18
|
|
|
19
19
|
tok = AutoTokenizer.from_pretrained("Qwen/Qwen3-8B")
|
|
20
|
-
r = create_renderer(tok
|
|
20
|
+
r = create_renderer(tok) # → Qwen3Renderer (auto-resolved)
|
|
21
21
|
|
|
22
22
|
prompt_ids = r.render_ids(
|
|
23
23
|
[{"role": "user", "content": "hi"}],
|
|
@@ -71,17 +71,17 @@ Each hand-coded bridge:
|
|
|
71
71
|
### Picking a renderer
|
|
72
72
|
|
|
73
73
|
```python
|
|
74
|
-
r = create_renderer(tok
|
|
74
|
+
r = create_renderer(tok) # AutoRendererConfig is the implicit default
|
|
75
75
|
```
|
|
76
76
|
|
|
77
|
-
Auto-detect matches `tokenizer.name_or_path` against `MODEL_RENDERER_MAP` by **exact match**. Prefix matching is intentionally off — same architecture can ship different chat templates (base vs instruct, fine-tune renames). Fine-tunes must pass `
|
|
77
|
+
Auto-detect matches `tokenizer.name_or_path` against `MODEL_RENDERER_MAP` by **exact match**. Prefix matching is intentionally off — same architecture can ship different chat templates (base vs instruct, fine-tune renames). Fine-tunes must pass an explicit typed config (e.g. `Qwen3RendererConfig()`); unknown names fall back to `DefaultRenderer`.
|
|
78
78
|
|
|
79
79
|
### Pools
|
|
80
80
|
|
|
81
81
|
```python
|
|
82
82
|
from renderers import create_renderer_pool
|
|
83
83
|
|
|
84
|
-
pool = create_renderer_pool("Qwen/Qwen3-8B",
|
|
84
|
+
pool = create_renderer_pool("Qwen/Qwen3-8B", size=16)
|
|
85
85
|
with pool.checkout() as r:
|
|
86
86
|
ids = r.render_ids(messages)
|
|
87
87
|
```
|
|
@@ -108,25 +108,50 @@ Empirical delta on Qwen3.5-35B-A3B + mini-swe-agent-plus, step 0:
|
|
|
108
108
|
|
|
109
109
|
Each break fragments a rollout into multiple training samples — every fragment re-encodes its prefix, inflating compute roughly linearly with the number of breaks.
|
|
110
110
|
|
|
111
|
-
##
|
|
111
|
+
## Typed renderer configs
|
|
112
112
|
|
|
113
|
-
`create_renderer` and `create_renderer_pool`
|
|
113
|
+
Each renderer accepts a typed pydantic config that pins its template-control kwargs at construction. `create_renderer` and `create_renderer_pool` take one positional `config` argument:
|
|
114
114
|
|
|
115
115
|
```python
|
|
116
|
-
|
|
117
|
-
|
|
116
|
+
from renderers import (
|
|
117
|
+
create_renderer,
|
|
118
|
+
AutoRendererConfig,
|
|
119
|
+
Qwen3RendererConfig,
|
|
120
|
+
GLM5RendererConfig,
|
|
121
|
+
DefaultRendererConfig,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# Auto-resolve renderer from the tokenizer's model name. Carries the
|
|
125
|
+
# shared preserve_* flags; template kwargs require an explicit choice.
|
|
126
|
+
renderer = create_renderer(tokenizer)
|
|
127
|
+
renderer = create_renderer(tokenizer, AutoRendererConfig(preserve_all_thinking=True))
|
|
128
|
+
|
|
129
|
+
# Explicit choice — the typed config exposes exactly the fields that
|
|
130
|
+
# renderer's chat template honours.
|
|
131
|
+
renderer = create_renderer(tokenizer, Qwen3RendererConfig(enable_thinking=False))
|
|
132
|
+
renderer = create_renderer(tokenizer, GLM5RendererConfig(clear_thinking=False))
|
|
133
|
+
|
|
134
|
+
# Default renderer (apply_chat_template fallback) — extra fields are
|
|
135
|
+
# captured via pydantic ``extra="allow"`` and forwarded to the Jinja
|
|
136
|
+
# template; tool / reasoning parsers are typed.
|
|
137
|
+
renderer = create_renderer(
|
|
138
|
+
tokenizer,
|
|
139
|
+
DefaultRendererConfig(tool_parser="qwen3", reasoning_parser="think"),
|
|
140
|
+
)
|
|
118
141
|
```
|
|
119
142
|
|
|
120
|
-
|
|
143
|
+
Discriminated union: every per-renderer config is a variant of `RendererConfig`, dispatched on the `name` field. Bogus combinations (e.g. `add_vision_id` under `name="qwen3"`) error at construction with a `pydantic.ValidationError`. Downstream pydantic configs (prime-rl orchestrator, verifiers `ClientConfig`) hold a single field typed as `RendererConfig` and inherit the same strict-per-variant validation.
|
|
144
|
+
|
|
145
|
+
Two shared behaviour flags live on every variant via `_BaseRendererConfig`:
|
|
121
146
|
|
|
122
|
-
- `preserve_all_thinking=True` — every past assistant's
|
|
123
|
-
- `preserve_thinking_between_tool_calls=True` — reasoning is kept on assistants in the in-flight tool cycle (
|
|
147
|
+
- `preserve_all_thinking=True` — every past assistant's `reasoning_content` is kept, even when the chat template would drop it.
|
|
148
|
+
- `preserve_thinking_between_tool_calls=True` — reasoning is kept on assistants in the in-flight tool cycle (post-last-user A-T-…-A block when it contains a tool response). A new user turn closes the block and drops its thinking.
|
|
124
149
|
|
|
125
|
-
The canonical use case is **compaction
|
|
150
|
+
These OR-compose with template-level toggles (e.g. GLM-5 `clear_thinking`, Nemotron-3 `truncate_history_thinking`): either flag saying "keep" wins. preserve_* can only ever *extend* retention — never override a template kwarg into a "drop" decision. The canonical use case is **compaction**: injecting a `user` turn like *"summarize the work so far"* puts every prior assistant in a past cycle, and `preserve_all_thinking=True` keeps reasoning visible end-to-end.
|
|
126
151
|
|
|
127
152
|
## `DefaultRenderer`
|
|
128
153
|
|
|
129
|
-
Fallback for unsupported models. Wraps `apply_chat_template` and accepts `tool_parser` / `reasoning_parser`
|
|
154
|
+
Fallback for unsupported models. Wraps `apply_chat_template` and accepts `tool_parser` / `reasoning_parser` (vLLM convention) plus arbitrary Jinja kwargs via `DefaultRendererConfig`'s `extra="allow"`. `bridge_to_next_turn` returns `None` because the template's close is unknown, so multi-turn rollouts fall back to full re-render. Implementing a hand-coded renderer is a few hundred lines of Python (`render_ids` + `parse_response` + `bridge_to_next_turn`) and is the only path that closes the failure modes above by construction.
|
|
130
155
|
|
|
131
156
|
## Roadmap
|
|
132
157
|
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# Renderer config
|
|
2
|
+
|
|
3
|
+
`renderers.RendererConfig` is the typed input to `create_renderer` and
|
|
4
|
+
`create_renderer_pool`. It pins the renderer choice and its template-control
|
|
5
|
+
kwargs at construction.
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
from renderers import create_renderer, Qwen35RendererConfig
|
|
9
|
+
|
|
10
|
+
r = create_renderer(tokenizer, Qwen35RendererConfig(enable_thinking=False))
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
`RendererConfig` is a pydantic discriminated union (one variant per renderer,
|
|
14
|
+
dispatched on the `name` field). Selecting a variant exposes exactly the
|
|
15
|
+
fields that renderer's chat template honours; anything else raises a
|
|
16
|
+
`pydantic.ValidationError` at construction.
|
|
17
|
+
|
|
18
|
+
## Per-renderer configs
|
|
19
|
+
|
|
20
|
+
Each hand-coded renderer has a typed config class with the template kwargs
|
|
21
|
+
its Jinja chat template reads. For example:
|
|
22
|
+
|
|
23
|
+
| Renderer | Config class | Template fields |
|
|
24
|
+
|----------------|--------------------------|----------------------------------------------------------------|
|
|
25
|
+
| Qwen3 | `Qwen3RendererConfig` | `enable_thinking` |
|
|
26
|
+
| Qwen3.5 / 3.6 | `Qwen35RendererConfig` | `enable_thinking`, `add_vision_id` |
|
|
27
|
+
| Qwen3-VL | `Qwen3VLRendererConfig` | `add_vision_id` |
|
|
28
|
+
| GLM-5 / 5.1 | `GLM5RendererConfig` | `enable_thinking`, `clear_thinking` |
|
|
29
|
+
| GLM-4.5 | `GLM45RendererConfig` | `enable_thinking` |
|
|
30
|
+
| Nemotron-3 | `Nemotron3RendererConfig`| `enable_thinking`, `truncate_history_thinking` |
|
|
31
|
+
| Kimi K2.5 | `KimiK25RendererConfig` | `thinking` |
|
|
32
|
+
| MiniMax-M2 | `MiniMaxM2RendererConfig`| `model_identity` |
|
|
33
|
+
| Laguna-XS.2 | `LagunaXS2RendererConfig`| `enable_thinking`, `render_assistant_messages_raw` |
|
|
34
|
+
| gpt-oss | `GptOssRendererConfig` | `reasoning_effort`, `conversation_start_date` |
|
|
35
|
+
|
|
36
|
+
Field names mirror the upstream Jinja variable names. Passing
|
|
37
|
+
`Qwen3RendererConfig(add_vision_id=True)` raises — Qwen3 is text-only, so
|
|
38
|
+
the field doesn't exist on its config. Use
|
|
39
|
+
`type(config).template_field_names()` to introspect the fields that mirror
|
|
40
|
+
chat-template kwargs (parity is verified against `apply_chat_template` in
|
|
41
|
+
`tests/test_renderer_config_parity.py`).
|
|
42
|
+
|
|
43
|
+
Configs are frozen. To override a field, construct a new instance or call
|
|
44
|
+
`config.model_copy(update={...})`.
|
|
45
|
+
|
|
46
|
+
## Auto-resolution
|
|
47
|
+
|
|
48
|
+
`create_renderer(tokenizer)` (no config) resolves the renderer from
|
|
49
|
+
`tokenizer.name_or_path` via `MODEL_RENDERER_MAP`:
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
r = create_renderer(tokenizer) # AutoRendererConfig() is the default
|
|
53
|
+
r = create_renderer(tokenizer, AutoRendererConfig(preserve_all_thinking=True))
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
`AutoRendererConfig` carries only the shared `preserve_*` flags. Template
|
|
57
|
+
kwargs depend on the renderer, so overriding them requires naming the
|
|
58
|
+
renderer explicitly:
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
r = create_renderer(tokenizer, GLM5RendererConfig(clear_thinking=False))
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Auto-resolution fails loudly for VLMs that miss the exact-match lookup —
|
|
65
|
+
`DefaultRenderer` only knows `apply_chat_template` + text tokens, so silently
|
|
66
|
+
falling back for a VLM would produce token streams the trainer can't
|
|
67
|
+
reconstruct. Text-only fine-tunes without a registered renderer fall back to
|
|
68
|
+
`DefaultRenderer` and log the choice at INFO.
|
|
69
|
+
|
|
70
|
+
## `preserve_*` flags
|
|
71
|
+
|
|
72
|
+
Every variant carries two renderer-agnostic flags on `_BaseRendererConfig`:
|
|
73
|
+
|
|
74
|
+
- `preserve_all_thinking: bool = False` — re-emit `reasoning_content` on
|
|
75
|
+
every past assistant turn, even when the chat template would drop it.
|
|
76
|
+
- `preserve_thinking_between_tool_calls: bool = False` — re-emit
|
|
77
|
+
`reasoning_content` only inside the in-flight tool cycle (the contiguous
|
|
78
|
+
A-T-…-A block after the most recent `user` message, when it contains at
|
|
79
|
+
least one `tool` response). A new user turn closes the block and drops
|
|
80
|
+
its thinking.
|
|
81
|
+
|
|
82
|
+
These OR-compose with template-level toggles. GLM-5's `clear_thinking` and
|
|
83
|
+
Nemotron-3's `truncate_history_thinking` already gate past thinking; the
|
|
84
|
+
`preserve_*` flags add to that:
|
|
85
|
+
|
|
86
|
+
| `clear_thinking` | `preserve_all_thinking` | past thinking? |
|
|
87
|
+
|------------------|-------------------------|----------------|
|
|
88
|
+
| `True` (default — drop) | `False` (default) | dropped |
|
|
89
|
+
| `True` | `True` | kept |
|
|
90
|
+
| `False` (keep) | `False` | kept |
|
|
91
|
+
| `False` | `True` | kept |
|
|
92
|
+
|
|
93
|
+
`preserve_*` can only extend retention, never force a drop. The canonical
|
|
94
|
+
use case is **compaction**: injecting a `user` turn like *"summarize the work
|
|
95
|
+
so far"* puts every prior assistant in a past cycle, and
|
|
96
|
+
`preserve_all_thinking=True` keeps reasoning visible end-to-end.
|
|
97
|
+
|
|
98
|
+
## `DefaultRendererConfig` accepts arbitrary Jinja kwargs
|
|
99
|
+
|
|
100
|
+
`DefaultRenderer` wraps `tokenizer.apply_chat_template` for any model that
|
|
101
|
+
doesn't have a hand-coded renderer. Its config sets `extra="allow"`:
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
from renderers import create_renderer, DefaultRendererConfig
|
|
105
|
+
|
|
106
|
+
r = create_renderer(
|
|
107
|
+
tokenizer,
|
|
108
|
+
DefaultRendererConfig(
|
|
109
|
+
tool_parser="qwen3", # registered in renderers.parsers
|
|
110
|
+
reasoning_parser="think",
|
|
111
|
+
enable_thinking=False, # forwarded to apply_chat_template
|
|
112
|
+
custom_jinja_kwarg=True, # ditto
|
|
113
|
+
),
|
|
114
|
+
)
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
`tool_parser` and `reasoning_parser` are typed because they configure
|
|
118
|
+
`DefaultRenderer`'s own parsing pipeline. Every other field lands in
|
|
119
|
+
`model_extra` and `DefaultRenderer._apply` forwards `model_extra` verbatim
|
|
120
|
+
to `apply_chat_template`.
|
|
121
|
+
|
|
122
|
+
## Downstream integration
|
|
123
|
+
|
|
124
|
+
Downstream pydantic configs (`prime-rl` orchestrator, `verifiers`
|
|
125
|
+
`ClientConfig`) hold a single field typed as `RendererConfig`:
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
from pydantic import BaseModel, Field
|
|
129
|
+
from renderers import AutoRendererConfig, RendererConfig
|
|
130
|
+
|
|
131
|
+
class ClientConfig(BaseModel):
|
|
132
|
+
renderer: RendererConfig = Field(default_factory=AutoRendererConfig)
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
In TOML / YAML, the discriminator routes deserialization:
|
|
136
|
+
|
|
137
|
+
```toml
|
|
138
|
+
[client.renderer]
|
|
139
|
+
name = "qwen3.5"
|
|
140
|
+
enable_thinking = false
|
|
141
|
+
add_vision_id = true
|
|
142
|
+
preserve_all_thinking = true
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
Pydantic dispatches on `name = "qwen3.5"` to `Qwen35RendererConfig`. Bogus
|
|
146
|
+
combinations (e.g. `add_vision_id` under `name = "qwen3"`) raise at
|
|
147
|
+
config-load with a clear message naming the offending field and the variant
|
|
148
|
+
that rejected it.
|
|
149
|
+
|
|
150
|
+
To construct a config from a renderer name string (e.g. from a CLI flag):
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
from renderers import config_from_name
|
|
154
|
+
|
|
155
|
+
cfg = config_from_name("glm-5") # → GLM5RendererConfig() with defaults
|
|
156
|
+
cfg = config_from_name("auto") # → None, the implicit "auto" form
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Renaming a renderer is a breaking change
|
|
160
|
+
|
|
161
|
+
The discriminator key is the renderer name string. Renaming `"qwen3.5"` to
|
|
162
|
+
something else would break any downstream config that references it by
|
|
163
|
+
name. Add new renderers; don't rename existing ones.
|
|
@@ -26,10 +26,15 @@ dependencies = [
|
|
|
26
26
|
"openai-harmony>=0.0.8",
|
|
27
27
|
# Crusoe's Rust BPE tokenizer; ~10x faster encode vs HF's tokenizers.
|
|
28
28
|
# ``load_tokenizer`` patches it in by default for every supported model
|
|
29
|
-
# except a small denylist (DeepSeek-V3 family
|
|
30
|
-
#
|
|
31
|
-
#
|
|
32
|
-
"fastokens>=0.
|
|
29
|
+
# except a small denylist (DeepSeek-V3 family). The patch is bracketed
|
|
30
|
+
# around ``from_pretrained``, so subsequent ``AutoTokenizer`` calls
|
|
31
|
+
# outside the renderers package stay vanilla.
|
|
32
|
+
"fastokens>=0.2.0",
|
|
33
|
+
# ``BaseRendererConfig`` inherits from ``pydantic_config.BaseConfig`` so
|
|
34
|
+
# the typed-config surface stays uniform with prime-rl / verifiers config
|
|
35
|
+
# bases. Transitively brings pydantic, which ``renderers.configs`` also
|
|
36
|
+
# imports directly.
|
|
37
|
+
"prime-pydantic-config>=0.3.0.dev83",
|
|
33
38
|
]
|
|
34
39
|
|
|
35
40
|
[tool.hatch.version]
|
|
@@ -68,6 +73,12 @@ dev = [
|
|
|
68
73
|
|
|
69
74
|
[tool.uv]
|
|
70
75
|
exclude-newer = "7 days"
|
|
76
|
+
# fastokens 0.2.0 was published on 2026-05-17 and contains the
|
|
77
|
+
# ``unpatch_transformers`` fix (crusoecloud/fastokens#32) needed for
|
|
78
|
+
# MiniMax-M2's slow→fast tokenizer conversion path. Exempting it from
|
|
79
|
+
# the project-wide 7-day cutoff lets the lockfile pick it up immediately
|
|
80
|
+
# while the rest of the dependency graph stays gated.
|
|
81
|
+
exclude-newer-package = { fastokens = false, "prime-pydantic-config" = false }
|
|
71
82
|
|
|
72
83
|
[tool.ty.environment]
|
|
73
84
|
python-version = "3.13"
|
|
@@ -28,6 +28,7 @@ from renderers.base import (
|
|
|
28
28
|
ToolCallParseStatus,
|
|
29
29
|
ToolSpec,
|
|
30
30
|
VideoPart,
|
|
31
|
+
attribute_text_segments,
|
|
31
32
|
build_training_sample,
|
|
32
33
|
build_trajectory_step,
|
|
33
34
|
create_renderer,
|
|
@@ -37,9 +38,30 @@ from renderers.base import (
|
|
|
37
38
|
trim_to_turn_close,
|
|
38
39
|
)
|
|
39
40
|
from renderers.client import OverlongPromptError
|
|
41
|
+
from renderers.configs import (
|
|
42
|
+
AutoRendererConfig,
|
|
43
|
+
BaseRendererConfig,
|
|
44
|
+
config_from_name,
|
|
45
|
+
DefaultRendererConfig,
|
|
46
|
+
DeepSeekV3RendererConfig,
|
|
47
|
+
GLM45RendererConfig,
|
|
48
|
+
GLM51RendererConfig,
|
|
49
|
+
GLM5RendererConfig,
|
|
50
|
+
GptOssRendererConfig,
|
|
51
|
+
KimiK25RendererConfig,
|
|
52
|
+
KimiK2RendererConfig,
|
|
53
|
+
LagunaXS2RendererConfig,
|
|
54
|
+
MiniMaxM2RendererConfig,
|
|
55
|
+
Nemotron3RendererConfig,
|
|
56
|
+
Qwen35RendererConfig,
|
|
57
|
+
Qwen36RendererConfig,
|
|
58
|
+
Qwen3RendererConfig,
|
|
59
|
+
Qwen3VLRendererConfig,
|
|
60
|
+
RendererConfig,
|
|
61
|
+
)
|
|
40
62
|
from renderers.deepseek_v3 import DeepSeekV3Renderer
|
|
41
63
|
from renderers.default import DefaultRenderer
|
|
42
|
-
from renderers.glm5 import GLM5Renderer
|
|
64
|
+
from renderers.glm5 import GLM5Renderer, GLM51Renderer
|
|
43
65
|
from renderers.glm45 import GLM45Renderer
|
|
44
66
|
from renderers.gpt_oss import GptOssRenderer
|
|
45
67
|
from renderers.kimi_k2 import KimiK2Renderer
|
|
@@ -53,34 +75,53 @@ from renderers.qwen35 import Qwen35Renderer
|
|
|
53
75
|
from renderers.qwen36 import Qwen36Renderer
|
|
54
76
|
|
|
55
77
|
__all__ = [
|
|
78
|
+
"AutoRendererConfig",
|
|
79
|
+
"BaseRendererConfig",
|
|
56
80
|
"Content",
|
|
57
81
|
"ContentPart",
|
|
58
82
|
"DeepSeekV3Renderer",
|
|
83
|
+
"DeepSeekV3RendererConfig",
|
|
59
84
|
"DefaultRenderer",
|
|
85
|
+
"DefaultRendererConfig",
|
|
60
86
|
"GLM45Renderer",
|
|
87
|
+
"GLM45RendererConfig",
|
|
88
|
+
"GLM51Renderer",
|
|
89
|
+
"GLM51RendererConfig",
|
|
61
90
|
"GLM5Renderer",
|
|
91
|
+
"GLM5RendererConfig",
|
|
62
92
|
"GptOssRenderer",
|
|
93
|
+
"GptOssRendererConfig",
|
|
63
94
|
"ImagePart",
|
|
64
|
-
"KimiK2Renderer",
|
|
65
95
|
"KimiK25Renderer",
|
|
96
|
+
"KimiK25RendererConfig",
|
|
97
|
+
"KimiK2Renderer",
|
|
98
|
+
"KimiK2RendererConfig",
|
|
66
99
|
"LagunaXS2Renderer",
|
|
100
|
+
"LagunaXS2RendererConfig",
|
|
67
101
|
"MULTIMODAL_MODELS",
|
|
68
102
|
"Message",
|
|
69
103
|
"MiniMaxM2Renderer",
|
|
104
|
+
"MiniMaxM2RendererConfig",
|
|
70
105
|
"MultiModalData",
|
|
71
106
|
"MultimodalRenderer",
|
|
72
107
|
"Nemotron3Renderer",
|
|
108
|
+
"Nemotron3RendererConfig",
|
|
73
109
|
"OverlongPromptError",
|
|
74
110
|
"ParsedResponse",
|
|
75
111
|
"ParsedToolCall",
|
|
76
112
|
"PlaceholderRange",
|
|
77
|
-
"Qwen3Renderer",
|
|
78
|
-
"Qwen3VLRenderer",
|
|
79
113
|
"Qwen35Renderer",
|
|
114
|
+
"Qwen35RendererConfig",
|
|
80
115
|
"Qwen36Renderer",
|
|
116
|
+
"Qwen36RendererConfig",
|
|
117
|
+
"Qwen3Renderer",
|
|
118
|
+
"Qwen3RendererConfig",
|
|
119
|
+
"Qwen3VLRenderer",
|
|
120
|
+
"Qwen3VLRendererConfig",
|
|
81
121
|
"RenderedConversation",
|
|
82
122
|
"RenderedTokens",
|
|
83
123
|
"Renderer",
|
|
124
|
+
"RendererConfig",
|
|
84
125
|
"RendererPool",
|
|
85
126
|
"TextPart",
|
|
86
127
|
"ThinkingPart",
|
|
@@ -90,8 +131,10 @@ __all__ = [
|
|
|
90
131
|
"ToolSpec",
|
|
91
132
|
"VideoPart",
|
|
92
133
|
"__version__",
|
|
134
|
+
"attribute_text_segments",
|
|
93
135
|
"build_training_sample",
|
|
94
136
|
"build_trajectory_step",
|
|
137
|
+
"config_from_name",
|
|
95
138
|
"create_renderer",
|
|
96
139
|
"create_renderer_pool",
|
|
97
140
|
"is_multimodal",
|
|
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
|
|
|
18
18
|
commit_id: str | None
|
|
19
19
|
__commit_id__: str | None
|
|
20
20
|
|
|
21
|
-
__version__ = version = '0.1.8.
|
|
22
|
-
__version_tuple__ = version_tuple = (0, 1, 8, '
|
|
21
|
+
__version__ = version = '0.1.8.dev27'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 1, 8, 'dev27')
|
|
23
23
|
|
|
24
24
|
__commit_id__ = commit_id = None
|