renderers 0.1.8.dev34__tar.gz → 0.1.8.dev35__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/PKG-INFO +1 -1
  2. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/renderers/_version.py +2 -2
  3. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/renderers/base.py +69 -6
  4. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/.github/workflows/publish-dev.yml +0 -0
  5. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/.github/workflows/publish.yml +0 -0
  6. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/.github/workflows/style.yml +0 -0
  7. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/.github/workflows/test.yml +0 -0
  8. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/.gitignore +0 -0
  9. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/.pre-commit-config.yaml +0 -0
  10. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/LICENSE +0 -0
  11. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/README.md +0 -0
  12. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/docs/renderer-config.md +0 -0
  13. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/examples/README.md +0 -0
  14. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/examples/sglang/multiturn_generate_sglang.py +0 -0
  15. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/examples/sglang/online_multiturn_sglang.py +0 -0
  16. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/examples/tinker/multiturn_generate_tinker.py +0 -0
  17. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/examples/transformers/multiturn_generate_transformers.py +0 -0
  18. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/examples/vllm/multiturn_generate_vllm.py +0 -0
  19. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/pyproject.toml +0 -0
  20. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/renderers/__init__.py +0 -0
  21. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/renderers/client.py +0 -0
  22. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/renderers/configs.py +0 -0
  23. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/renderers/deepseek_v3.py +0 -0
  24. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/renderers/default.py +0 -0
  25. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/renderers/glm45.py +0 -0
  26. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/renderers/glm5.py +0 -0
  27. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/renderers/gpt_oss.py +0 -0
  28. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/renderers/kimi_k2.py +0 -0
  29. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/renderers/kimi_k25.py +0 -0
  30. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/renderers/laguna_xs2.py +0 -0
  31. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/renderers/minimax_m2.py +0 -0
  32. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/renderers/nemotron3.py +0 -0
  33. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/renderers/parsers.py +0 -0
  34. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/renderers/parsing.py +0 -0
  35. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/renderers/qwen3.py +0 -0
  36. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/renderers/qwen35.py +0 -0
  37. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/renderers/qwen36.py +0 -0
  38. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/renderers/qwen3_vl.py +0 -0
  39. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/conftest.py +0 -0
  40. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_bridge.py +0 -0
  41. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_build_helpers.py +0 -0
  42. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_client.py +0 -0
  43. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_gpt_oss_harmony_parity.py +0 -0
  44. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_incremental.py +0 -0
  45. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_is_content.py +0 -0
  46. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_kimi_k25_tool_schema.py +0 -0
  47. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_load_tokenizer.py +0 -0
  48. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_load_tokenizer_fastokens.py +0 -0
  49. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_message_indices.py +0 -0
  50. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_multimodal.py +0 -0
  51. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_parse_response.py +0 -0
  52. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_parse_response_robustness.py +0 -0
  53. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_parsers.py +0 -0
  54. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_preserve_thinking.py +0 -0
  55. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_qwen35_size_coverage.py +0 -0
  56. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_render_ids.py +0 -0
  57. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_renderer_config.py +0 -0
  58. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_renderer_config_parity.py +0 -0
  59. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_roundtrip.py +0 -0
  60. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_sampled_mask.py +0 -0
  61. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_tokens_per_message.py +0 -0
  62. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/tests/test_tool_arg_type_preservation.py +0 -0
  63. {renderers-0.1.8.dev34 → renderers-0.1.8.dev35}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: renderers
3
- Version: 0.1.8.dev34
3
+ Version: 0.1.8.dev35
4
4
  Summary: Chat template renderers — deterministic message-to-token conversion for LLM training
5
5
  License-Expression: Apache-2.0
6
6
  License-File: LICENSE
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
18
18
  commit_id: str | None
19
19
  __commit_id__: str | None
20
20
 
21
- __version__ = version = '0.1.8.dev34'
22
- __version_tuple__ = version_tuple = (0, 1, 8, 'dev34')
21
+ __version__ = version = '0.1.8.dev35'
22
+ __version_tuple__ = version_tuple = (0, 1, 8, 'dev35')
23
23
 
24
24
  __commit_id__ = commit_id = None
@@ -1089,7 +1089,6 @@ def _patched_load(model_name_or_path: str, **kwargs):
1089
1089
  path is still discoverable in logs.
1090
1090
  """
1091
1091
  import fastokens
1092
- from transformers import AutoTokenizer
1093
1092
 
1094
1093
  global _FASTOKENS_ANNOUNCED
1095
1094
 
@@ -1102,13 +1101,72 @@ def _patched_load(model_name_or_path: str, **kwargs):
1102
1101
  )
1103
1102
  _FASTOKENS_ANNOUNCED = True
1104
1103
  try:
1105
- return AutoTokenizer.from_pretrained(model_name_or_path, **kwargs)
1104
+ return _load_tokenizer_via_auto(model_name_or_path, **kwargs)
1106
1105
  finally:
1107
1106
  with _FASTOKENS_PATCH_LOCK:
1108
1107
  with contextlib.redirect_stdout(io.StringIO()):
1109
1108
  fastokens.unpatch_transformers()
1110
1109
 
1111
1110
 
1111
+ def _load_fast_tokenizer_directly(
1112
+ model_name_or_path: str, revision: str | None
1113
+ ) -> Any | None:
1114
+ """Load a self-contained fast tokenizer without building the model config.
1115
+
1116
+ ``AutoTokenizer.from_pretrained`` eagerly constructs the *model* config to
1117
+ resolve the tokenizer class — even for a plain ``PreTrainedTokenizerFast``.
1118
+ That construction can raise on modeling-only concerns the tokenizer never
1119
+ needs (e.g. RoPE parameter validation for configs that carry nested
1120
+ ``rope_parameters``). When the repo ships a complete ``tokenizer.json`` and
1121
+ declares no custom tokenizer, the tokenizer is fully self-describing, so we
1122
+ load it directly and skip the config detour.
1123
+
1124
+ Returns ``None`` when there's nothing safe to load this way — a custom
1125
+ ``auto_map`` tokenizer (which must run through ``AutoTokenizer`` with
1126
+ ``trust_remote_code``) or no fast tokenizer at all — so the caller can
1127
+ surface its original error instead.
1128
+ """
1129
+ from transformers import PreTrainedTokenizerFast
1130
+ from transformers.models.auto.tokenization_auto import get_tokenizer_config
1131
+
1132
+ try:
1133
+ if "auto_map" in get_tokenizer_config(model_name_or_path, revision=revision):
1134
+ return None
1135
+ return PreTrainedTokenizerFast.from_pretrained(
1136
+ model_name_or_path, revision=revision
1137
+ )
1138
+ except Exception:
1139
+ return None
1140
+
1141
+
1142
+ def _load_tokenizer_via_auto(model_name_or_path: str, **kwargs) -> Any:
1143
+ """``AutoTokenizer.from_pretrained`` with a config-free fallback.
1144
+
1145
+ renderers needs the tokenizer, not the model. If ``AutoTokenizer`` fails
1146
+ while building the model config it loads to resolve the tokenizer class,
1147
+ retry by loading the repo's self-contained ``tokenizer.json`` directly. The
1148
+ original error is re-raised if the repo has no such tokenizer.
1149
+ """
1150
+ from transformers import AutoTokenizer
1151
+
1152
+ try:
1153
+ return AutoTokenizer.from_pretrained(model_name_or_path, **kwargs)
1154
+ except Exception as exc:
1155
+ tok = _load_fast_tokenizer_directly(
1156
+ model_name_or_path, revision=kwargs.get("revision")
1157
+ )
1158
+ if tok is None:
1159
+ raise
1160
+ logger.debug(
1161
+ "AutoTokenizer.from_pretrained(%r) failed building the model config "
1162
+ "(%s: %s); loaded the tokenizer directly from tokenizer.json.",
1163
+ model_name_or_path,
1164
+ type(exc).__name__,
1165
+ str(exc)[:160],
1166
+ )
1167
+ return tok
1168
+
1169
+
1112
1170
  def load_tokenizer(
1113
1171
  model_name_or_path: str,
1114
1172
  *,
@@ -1138,9 +1196,14 @@ def load_tokenizer(
1138
1196
  fastokens raises during the patched load (e.g. an unknown
1139
1197
  pre-tokenizer type), we automatically retry with the vanilla
1140
1198
  backend and emit an INFO log.
1141
- """
1142
- from transformers import AutoTokenizer
1143
1199
 
1200
+ ``AutoTokenizer.from_pretrained`` eagerly builds the model config to
1201
+ resolve the tokenizer class. If that construction raises on a
1202
+ modeling-only concern the tokenizer doesn't need (e.g. RoPE
1203
+ validation for configs with nested ``rope_parameters``), we fall
1204
+ back to loading the repo's self-contained ``tokenizer.json``
1205
+ directly — see ``_load_tokenizer_via_auto``.
1206
+ """
1144
1207
  kwargs: dict[str, Any] = {}
1145
1208
  revision = TRUSTED_REVISIONS.get(model_name_or_path)
1146
1209
  if revision is not None:
@@ -1149,7 +1212,7 @@ def load_tokenizer(
1149
1212
  kwargs = {"trust_remote_code": False}
1150
1213
 
1151
1214
  if not use_fastokens or model_name_or_path in FASTOKENS_INCOMPATIBLE:
1152
- return AutoTokenizer.from_pretrained(model_name_or_path, **kwargs)
1215
+ return _load_tokenizer_via_auto(model_name_or_path, **kwargs)
1153
1216
 
1154
1217
  try:
1155
1218
  return _patched_load(model_name_or_path, **kwargs)
@@ -1162,7 +1225,7 @@ def load_tokenizer(
1162
1225
  type(exc).__name__,
1163
1226
  str(exc)[:160],
1164
1227
  )
1165
- return AutoTokenizer.from_pretrained(model_name_or_path, **kwargs)
1228
+ return _load_tokenizer_via_auto(model_name_or_path, **kwargs)
1166
1229
 
1167
1230
 
1168
1231
  def _populate_registry():
File without changes
File without changes