renderers 0.1.8.dev33__tar.gz → 0.1.8.dev35__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/PKG-INFO +1 -1
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/renderers/_version.py +2 -2
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/renderers/base.py +71 -8
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/renderers/qwen35.py +42 -37
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_qwen35_size_coverage.py +32 -9
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/.github/workflows/publish-dev.yml +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/.github/workflows/publish.yml +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/.github/workflows/style.yml +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/.github/workflows/test.yml +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/.gitignore +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/.pre-commit-config.yaml +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/LICENSE +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/README.md +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/docs/renderer-config.md +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/examples/README.md +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/examples/sglang/multiturn_generate_sglang.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/examples/sglang/online_multiturn_sglang.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/examples/tinker/multiturn_generate_tinker.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/examples/transformers/multiturn_generate_transformers.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/examples/vllm/multiturn_generate_vllm.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/pyproject.toml +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/renderers/__init__.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/renderers/client.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/renderers/configs.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/renderers/deepseek_v3.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/renderers/default.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/renderers/glm45.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/renderers/glm5.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/renderers/gpt_oss.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/renderers/kimi_k2.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/renderers/kimi_k25.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/renderers/laguna_xs2.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/renderers/minimax_m2.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/renderers/nemotron3.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/renderers/parsers.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/renderers/parsing.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/renderers/qwen3.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/renderers/qwen36.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/renderers/qwen3_vl.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/conftest.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_bridge.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_build_helpers.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_client.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_gpt_oss_harmony_parity.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_incremental.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_is_content.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_kimi_k25_tool_schema.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_load_tokenizer.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_load_tokenizer_fastokens.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_message_indices.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_multimodal.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_parse_response.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_parse_response_robustness.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_parsers.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_preserve_thinking.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_render_ids.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_renderer_config.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_renderer_config_parity.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_roundtrip.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_sampled_mask.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_tokens_per_message.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/tests/test_tool_arg_type_preservation.py +0 -0
- {renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/uv.lock +0 -0
|
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
|
|
|
18
18
|
commit_id: str | None
|
|
19
19
|
__commit_id__: str | None
|
|
20
20
|
|
|
21
|
-
__version__ = version = '0.1.8.
|
|
22
|
-
__version_tuple__ = version_tuple = (0, 1, 8, '
|
|
21
|
+
__version__ = version = '0.1.8.dev35'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 1, 8, 'dev35')
|
|
23
23
|
|
|
24
24
|
__commit_id__ = commit_id = None
|
|
@@ -911,8 +911,8 @@ MODEL_RENDERER_MAP: dict[str, str] = {
|
|
|
911
911
|
# ``enable_thinking=true`` (open ``<think>\n`` at the gen prompt);
|
|
912
912
|
# the smaller 0.8B / 2B variants flip the polarity (default
|
|
913
913
|
# ``enable_thinking=false``, empty ``<think>\n\n</think>\n\n``).
|
|
914
|
-
# ``Qwen35Renderer``
|
|
915
|
-
#
|
|
914
|
+
# ``Qwen35Renderer`` hard-codes this polarity per model
|
|
915
|
+
# (``_ENABLE_THINKING_DEFAULTS``), so all seven sizes are
|
|
916
916
|
# token-for-token parity-tested against their own
|
|
917
917
|
# ``apply_chat_template`` — including with
|
|
918
918
|
# ``add_generation_prompt=True``.
|
|
@@ -1089,7 +1089,6 @@ def _patched_load(model_name_or_path: str, **kwargs):
|
|
|
1089
1089
|
path is still discoverable in logs.
|
|
1090
1090
|
"""
|
|
1091
1091
|
import fastokens
|
|
1092
|
-
from transformers import AutoTokenizer
|
|
1093
1092
|
|
|
1094
1093
|
global _FASTOKENS_ANNOUNCED
|
|
1095
1094
|
|
|
@@ -1102,13 +1101,72 @@ def _patched_load(model_name_or_path: str, **kwargs):
|
|
|
1102
1101
|
)
|
|
1103
1102
|
_FASTOKENS_ANNOUNCED = True
|
|
1104
1103
|
try:
|
|
1105
|
-
return
|
|
1104
|
+
return _load_tokenizer_via_auto(model_name_or_path, **kwargs)
|
|
1106
1105
|
finally:
|
|
1107
1106
|
with _FASTOKENS_PATCH_LOCK:
|
|
1108
1107
|
with contextlib.redirect_stdout(io.StringIO()):
|
|
1109
1108
|
fastokens.unpatch_transformers()
|
|
1110
1109
|
|
|
1111
1110
|
|
|
1111
|
+
def _load_fast_tokenizer_directly(
|
|
1112
|
+
model_name_or_path: str, revision: str | None
|
|
1113
|
+
) -> Any | None:
|
|
1114
|
+
"""Load a self-contained fast tokenizer without building the model config.
|
|
1115
|
+
|
|
1116
|
+
``AutoTokenizer.from_pretrained`` eagerly constructs the *model* config to
|
|
1117
|
+
resolve the tokenizer class — even for a plain ``PreTrainedTokenizerFast``.
|
|
1118
|
+
That construction can raise on modeling-only concerns the tokenizer never
|
|
1119
|
+
needs (e.g. RoPE parameter validation for configs that carry nested
|
|
1120
|
+
``rope_parameters``). When the repo ships a complete ``tokenizer.json`` and
|
|
1121
|
+
declares no custom tokenizer, the tokenizer is fully self-describing, so we
|
|
1122
|
+
load it directly and skip the config detour.
|
|
1123
|
+
|
|
1124
|
+
Returns ``None`` when there's nothing safe to load this way — a custom
|
|
1125
|
+
``auto_map`` tokenizer (which must run through ``AutoTokenizer`` with
|
|
1126
|
+
``trust_remote_code``) or no fast tokenizer at all — so the caller can
|
|
1127
|
+
surface its original error instead.
|
|
1128
|
+
"""
|
|
1129
|
+
from transformers import PreTrainedTokenizerFast
|
|
1130
|
+
from transformers.models.auto.tokenization_auto import get_tokenizer_config
|
|
1131
|
+
|
|
1132
|
+
try:
|
|
1133
|
+
if "auto_map" in get_tokenizer_config(model_name_or_path, revision=revision):
|
|
1134
|
+
return None
|
|
1135
|
+
return PreTrainedTokenizerFast.from_pretrained(
|
|
1136
|
+
model_name_or_path, revision=revision
|
|
1137
|
+
)
|
|
1138
|
+
except Exception:
|
|
1139
|
+
return None
|
|
1140
|
+
|
|
1141
|
+
|
|
1142
|
+
def _load_tokenizer_via_auto(model_name_or_path: str, **kwargs) -> Any:
|
|
1143
|
+
"""``AutoTokenizer.from_pretrained`` with a config-free fallback.
|
|
1144
|
+
|
|
1145
|
+
renderers needs the tokenizer, not the model. If ``AutoTokenizer`` fails
|
|
1146
|
+
while building the model config it loads to resolve the tokenizer class,
|
|
1147
|
+
retry by loading the repo's self-contained ``tokenizer.json`` directly. The
|
|
1148
|
+
original error is re-raised if the repo has no such tokenizer.
|
|
1149
|
+
"""
|
|
1150
|
+
from transformers import AutoTokenizer
|
|
1151
|
+
|
|
1152
|
+
try:
|
|
1153
|
+
return AutoTokenizer.from_pretrained(model_name_or_path, **kwargs)
|
|
1154
|
+
except Exception as exc:
|
|
1155
|
+
tok = _load_fast_tokenizer_directly(
|
|
1156
|
+
model_name_or_path, revision=kwargs.get("revision")
|
|
1157
|
+
)
|
|
1158
|
+
if tok is None:
|
|
1159
|
+
raise
|
|
1160
|
+
logger.debug(
|
|
1161
|
+
"AutoTokenizer.from_pretrained(%r) failed building the model config "
|
|
1162
|
+
"(%s: %s); loaded the tokenizer directly from tokenizer.json.",
|
|
1163
|
+
model_name_or_path,
|
|
1164
|
+
type(exc).__name__,
|
|
1165
|
+
str(exc)[:160],
|
|
1166
|
+
)
|
|
1167
|
+
return tok
|
|
1168
|
+
|
|
1169
|
+
|
|
1112
1170
|
def load_tokenizer(
|
|
1113
1171
|
model_name_or_path: str,
|
|
1114
1172
|
*,
|
|
@@ -1138,9 +1196,14 @@ def load_tokenizer(
|
|
|
1138
1196
|
fastokens raises during the patched load (e.g. an unknown
|
|
1139
1197
|
pre-tokenizer type), we automatically retry with the vanilla
|
|
1140
1198
|
backend and emit an INFO log.
|
|
1141
|
-
"""
|
|
1142
|
-
from transformers import AutoTokenizer
|
|
1143
1199
|
|
|
1200
|
+
``AutoTokenizer.from_pretrained`` eagerly builds the model config to
|
|
1201
|
+
resolve the tokenizer class. If that construction raises on a
|
|
1202
|
+
modeling-only concern the tokenizer doesn't need (e.g. RoPE
|
|
1203
|
+
validation for configs with nested ``rope_parameters``), we fall
|
|
1204
|
+
back to loading the repo's self-contained ``tokenizer.json``
|
|
1205
|
+
directly — see ``_load_tokenizer_via_auto``.
|
|
1206
|
+
"""
|
|
1144
1207
|
kwargs: dict[str, Any] = {}
|
|
1145
1208
|
revision = TRUSTED_REVISIONS.get(model_name_or_path)
|
|
1146
1209
|
if revision is not None:
|
|
@@ -1149,7 +1212,7 @@ def load_tokenizer(
|
|
|
1149
1212
|
kwargs = {"trust_remote_code": False}
|
|
1150
1213
|
|
|
1151
1214
|
if not use_fastokens or model_name_or_path in FASTOKENS_INCOMPATIBLE:
|
|
1152
|
-
return
|
|
1215
|
+
return _load_tokenizer_via_auto(model_name_or_path, **kwargs)
|
|
1153
1216
|
|
|
1154
1217
|
try:
|
|
1155
1218
|
return _patched_load(model_name_or_path, **kwargs)
|
|
@@ -1162,7 +1225,7 @@ def load_tokenizer(
|
|
|
1162
1225
|
type(exc).__name__,
|
|
1163
1226
|
str(exc)[:160],
|
|
1164
1227
|
)
|
|
1165
|
-
return
|
|
1228
|
+
return _load_tokenizer_via_auto(model_name_or_path, **kwargs)
|
|
1166
1229
|
|
|
1167
1230
|
|
|
1168
1231
|
def _populate_registry():
|
|
@@ -66,39 +66,44 @@ _TOOLS_INSTRUCTIONS = (
|
|
|
66
66
|
)
|
|
67
67
|
|
|
68
68
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
69
|
+
# Per-model ``enable_thinking`` default, applied when the renderer config
|
|
70
|
+
# leaves it ``None``. The Qwen3.5 family ships two chat-template variants
|
|
71
|
+
# that differ only in the polarity of the gated thinking branch:
|
|
72
|
+
#
|
|
73
|
+
# * Big sizes (4B / 9B / 35B-A3B / 122B-A10B / 397B-A17B) default
|
|
74
|
+
# ``enable_thinking=true`` — an open ``<think>\n`` at the gen prompt.
|
|
75
|
+
# * Small sizes (0.8B / 2B) flip it — default ``false``, emitting the
|
|
76
|
+
# empty ``<think>\n\n</think>\n\n`` block.
|
|
77
|
+
#
|
|
78
|
+
# These are hard-coded (keyed by ``tokenizer.name_or_path``) rather than
|
|
79
|
+
# probed from the live ``chat_template``: probing meant calling
|
|
80
|
+
# ``apply_chat_template`` at construction, which pulls ``transformers`` onto
|
|
81
|
+
# the hot path and breaks bring-your-own-tokenizer use. The values are the
|
|
82
|
+
# ground truth pinned by ``tests/test_qwen35_size_coverage.py`` — both the
|
|
83
|
+
# polarity assertions and byte-parity against each size's own
|
|
84
|
+
# ``apply_chat_template``.
|
|
85
|
+
_ENABLE_THINKING_DEFAULTS: dict[str, bool] = {
|
|
86
|
+
"Qwen/Qwen3.5-0.8B": False,
|
|
87
|
+
"Qwen/Qwen3.5-2B": False,
|
|
88
|
+
"Qwen/Qwen3.5-4B": True,
|
|
89
|
+
"Qwen/Qwen3.5-9B": True,
|
|
90
|
+
"Qwen/Qwen3.5-35B-A3B": True,
|
|
91
|
+
"Qwen/Qwen3.5-122B-A10B": True,
|
|
92
|
+
"Qwen/Qwen3.5-397B-A17B": True,
|
|
93
|
+
# Qwen3.6 extends the Qwen3.5 template; same big-size polarity.
|
|
94
|
+
"Qwen/Qwen3.6-35B-A3B": True,
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _default_enable_thinking(tokenizer) -> bool:
|
|
99
|
+
"""Hard-coded ``enable_thinking`` default for ``tokenizer``'s model.
|
|
100
|
+
|
|
101
|
+
Falls back to ``True`` (the big-model default, and the majority of the
|
|
102
|
+
family) for unknown / fine-tuned checkpoints whose ``name_or_path`` isn't
|
|
103
|
+
in ``_ENABLE_THINKING_DEFAULTS``; pass an explicit ``enable_thinking=`` to
|
|
104
|
+
a small-size fine-tune that needs ``False``.
|
|
90
105
|
"""
|
|
91
|
-
|
|
92
|
-
out = tokenizer.apply_chat_template(
|
|
93
|
-
[{"role": "user", "content": "x"}],
|
|
94
|
-
tokenize=False,
|
|
95
|
-
add_generation_prompt=True,
|
|
96
|
-
)
|
|
97
|
-
except Exception:
|
|
98
|
-
return True
|
|
99
|
-
if not isinstance(out, str):
|
|
100
|
-
return True
|
|
101
|
-
return not out.rstrip().endswith("</think>")
|
|
106
|
+
return _ENABLE_THINKING_DEFAULTS.get(getattr(tokenizer, "name_or_path", ""), True)
|
|
102
107
|
|
|
103
108
|
|
|
104
109
|
class Qwen35Renderer:
|
|
@@ -116,13 +121,13 @@ class Qwen35Renderer:
|
|
|
116
121
|
self._tokenizer = tokenizer
|
|
117
122
|
self._processor = processor
|
|
118
123
|
cfg = config or type(self)._config_cls()
|
|
119
|
-
# ``enable_thinking=None`` defers to the
|
|
120
|
-
#
|
|
121
|
-
#
|
|
122
|
-
#
|
|
124
|
+
# ``enable_thinking=None`` defers to the model's known default (see
|
|
125
|
+
# ``_ENABLE_THINKING_DEFAULTS``). Materialise here so downstream reads
|
|
126
|
+
# see a concrete bool; rebind the config with the resolved value so
|
|
127
|
+
# introspection sees the same.
|
|
123
128
|
if cfg.enable_thinking is None:
|
|
124
129
|
cfg = cfg.model_copy(
|
|
125
|
-
update={"enable_thinking":
|
|
130
|
+
update={"enable_thinking": _default_enable_thinking(tokenizer)}
|
|
126
131
|
)
|
|
127
132
|
self.config = cfg
|
|
128
133
|
|
|
@@ -5,9 +5,8 @@ Seven Qwen3.5 sizes route to ``Qwen35Renderer``. The 4B / 9B / 35B-A3B /
|
|
|
5
5
|
``enable_thinking=true``); the smaller 0.8B / 2B sizes ship the polarity-
|
|
6
6
|
flipped variant (default ``enable_thinking=false`` → empty
|
|
7
7
|
``<think>\\n\\n</think>\\n\\n`` at the gen-prompt boundary). The renderer
|
|
8
|
-
|
|
9
|
-
both variants render byte-identical to their own
|
|
10
|
-
``apply_chat_template``.
|
|
8
|
+
hard-codes this polarity per model (``_ENABLE_THINKING_DEFAULTS``), so
|
|
9
|
+
both variants render byte-identical to their own ``apply_chat_template``.
|
|
11
10
|
|
|
12
11
|
These tests lock in (a) the exact set of Qwen3.5 sizes in the map and
|
|
13
12
|
(b) byte parity for every one of them across representative
|
|
@@ -57,7 +56,7 @@ def test_no_other_qwen35_sizes_silently_added():
|
|
|
57
56
|
|
|
58
57
|
|
|
59
58
|
# ---------------------------------------------------------------------------
|
|
60
|
-
# Polarity
|
|
59
|
+
# Polarity defaults: 0.8B / 2B flip ``enable_thinking`` default.
|
|
61
60
|
# ---------------------------------------------------------------------------
|
|
62
61
|
|
|
63
62
|
|
|
@@ -73,10 +72,10 @@ def test_no_other_qwen35_sizes_silently_added():
|
|
|
73
72
|
("Qwen/Qwen3.5-397B-A17B", True),
|
|
74
73
|
],
|
|
75
74
|
)
|
|
76
|
-
def
|
|
77
|
-
"""
|
|
78
|
-
|
|
79
|
-
|
|
75
|
+
def test_qwen35_enable_thinking_polarity_default(qwen35_model, expected_default):
|
|
76
|
+
"""With no explicit flag, the renderer resolves ``enable_thinking`` from
|
|
77
|
+
the hard-coded per-model default — so big / small sizes each match their
|
|
78
|
+
own template at the gen-prompt boundary."""
|
|
80
79
|
tok = load_tokenizer(qwen35_model)
|
|
81
80
|
renderer = create_renderer(tok, Qwen35RendererConfig())
|
|
82
81
|
assert isinstance(renderer, Qwen35Renderer)
|
|
@@ -86,6 +85,30 @@ def test_qwen35_enable_thinking_polarity_autodetected(qwen35_model, expected_def
|
|
|
86
85
|
)
|
|
87
86
|
|
|
88
87
|
|
|
88
|
+
def test_construction_does_not_call_apply_chat_template():
|
|
89
|
+
"""The ``enable_thinking`` default is hard-coded per model, so building a
|
|
90
|
+
``Qwen35Renderer`` must not probe ``apply_chat_template`` — a
|
|
91
|
+
bring-your-own tokenizer with no chat-template support still works."""
|
|
92
|
+
|
|
93
|
+
class _Stub:
|
|
94
|
+
name_or_path = "Qwen/Qwen3.5-0.8B"
|
|
95
|
+
unk_token_id = -1
|
|
96
|
+
|
|
97
|
+
def convert_tokens_to_ids(self, token):
|
|
98
|
+
# Any stable non-unk id per token; the renderer only needs the
|
|
99
|
+
# special tokens to resolve to distinct, in-vocab ids.
|
|
100
|
+
return abs(hash(token)) % 1_000_000 + 1
|
|
101
|
+
|
|
102
|
+
def apply_chat_template(self, *args, **kwargs):
|
|
103
|
+
raise AssertionError(
|
|
104
|
+
"apply_chat_template must not be called at construction"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
renderer = Qwen35Renderer(_Stub())
|
|
108
|
+
# 0.8B is a small size → thinking defaults off, from the hard-coded table.
|
|
109
|
+
assert renderer.config.enable_thinking is False
|
|
110
|
+
|
|
111
|
+
|
|
89
112
|
# ---------------------------------------------------------------------------
|
|
90
113
|
# Byte parity for each in-map Qwen3.5 size.
|
|
91
114
|
# ---------------------------------------------------------------------------
|
|
@@ -146,7 +169,7 @@ def test_qwen35_size_parity_with_apply_chat_template(
|
|
|
146
169
|
"""Each in-map Qwen3.5 size renders byte-identical to its own
|
|
147
170
|
``apply_chat_template`` output. Locks in the property that lets us
|
|
148
171
|
share ``Qwen35Renderer`` across all seven sizes — the polarity
|
|
149
|
-
flip on 0.8B / 2B is absorbed by the
|
|
172
|
+
flip on 0.8B / 2B is absorbed by the per-model default."""
|
|
150
173
|
tok = load_tokenizer(qwen35_model)
|
|
151
174
|
renderer = create_renderer(tok, Qwen35RendererConfig())
|
|
152
175
|
assert isinstance(renderer, Qwen35Renderer)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/examples/sglang/multiturn_generate_sglang.py
RENAMED
|
File without changes
|
|
File without changes
|
{renderers-0.1.8.dev33 → renderers-0.1.8.dev35}/examples/tinker/multiturn_generate_tinker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|