renderers 0.1.8.dev30__tar.gz → 0.1.8.dev31__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/PKG-INFO +1 -1
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/_version.py +2 -2
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/base.py +38 -12
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/glm45.py +55 -16
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/glm5.py +55 -11
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/.github/workflows/publish-dev.yml +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/.github/workflows/publish.yml +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/.github/workflows/style.yml +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/.github/workflows/test.yml +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/.gitignore +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/.pre-commit-config.yaml +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/LICENSE +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/README.md +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/docs/renderer-config.md +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/examples/README.md +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/examples/sglang/multiturn_generate_sglang.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/examples/sglang/online_multiturn_sglang.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/examples/tinker/multiturn_generate_tinker.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/examples/transformers/multiturn_generate_transformers.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/examples/vllm/multiturn_generate_vllm.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/pyproject.toml +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/__init__.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/client.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/configs.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/deepseek_v3.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/default.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/gpt_oss.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/kimi_k2.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/kimi_k25.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/laguna_xs2.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/minimax_m2.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/nemotron3.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/parsers.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/parsing.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/qwen3.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/qwen35.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/qwen36.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/renderers/qwen3_vl.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/conftest.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_bridge.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_build_helpers.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_client.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_gpt_oss_harmony_parity.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_incremental.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_is_content.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_kimi_k25_tool_schema.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_load_tokenizer.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_load_tokenizer_fastokens.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_message_indices.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_multimodal.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_parse_response.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_parse_response_robustness.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_parsers.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_preserve_thinking.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_qwen35_size_coverage.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_render_ids.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_renderer_config.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_renderer_config_parity.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_roundtrip.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_sampled_mask.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_tokens_per_message.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/tests/test_tool_arg_type_preservation.py +0 -0
- {renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/uv.lock +0 -0
|
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
|
|
|
18
18
|
commit_id: str | None
|
|
19
19
|
__commit_id__: str | None
|
|
20
20
|
|
|
21
|
-
__version__ = version = '0.1.8.
|
|
22
|
-
__version_tuple__ = version_tuple = (0, 1, 8, '
|
|
21
|
+
__version__ = version = '0.1.8.dev31'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 1, 8, 'dev31')
|
|
23
23
|
|
|
24
24
|
__commit_id__ = commit_id = None
|
|
@@ -1098,8 +1098,7 @@ def _patched_load(model_name_or_path: str, **kwargs):
|
|
|
1098
1098
|
fastokens.patch_transformers()
|
|
1099
1099
|
if not _FASTOKENS_ANNOUNCED:
|
|
1100
1100
|
logger.info(
|
|
1101
|
-
"fastokens enabled — tokenizers load through the Rust BPE "
|
|
1102
|
-
"fast path (~10x encode speedup)."
|
|
1101
|
+
"fastokens enabled — tokenizers load through the Rust BPE fast path (~10x encode speedup)."
|
|
1103
1102
|
)
|
|
1104
1103
|
_FASTOKENS_ANNOUNCED = True
|
|
1105
1104
|
try:
|
|
@@ -1169,8 +1168,8 @@ def load_tokenizer(
|
|
|
1169
1168
|
def _populate_registry():
|
|
1170
1169
|
if RENDERER_REGISTRY:
|
|
1171
1170
|
return
|
|
1172
|
-
from renderers.default import DefaultRenderer
|
|
1173
1171
|
from renderers.deepseek_v3 import DeepSeekV3Renderer
|
|
1172
|
+
from renderers.default import DefaultRenderer
|
|
1174
1173
|
from renderers.glm5 import GLM5Renderer, GLM51Renderer
|
|
1175
1174
|
from renderers.glm45 import GLM45Renderer
|
|
1176
1175
|
from renderers.gpt_oss import GptOssRenderer
|
|
@@ -1271,8 +1270,7 @@ def create_renderer(
|
|
|
1271
1270
|
cls = RENDERER_REGISTRY.get(config.name)
|
|
1272
1271
|
if cls is None:
|
|
1273
1272
|
raise ValueError(
|
|
1274
|
-
f"Unknown renderer {config.name!r}. "
|
|
1275
|
-
f"Available: {', '.join(sorted(RENDERER_REGISTRY))}"
|
|
1273
|
+
f"Unknown renderer {config.name!r}. Available: {', '.join(sorted(RENDERER_REGISTRY))}"
|
|
1276
1274
|
)
|
|
1277
1275
|
return cls(tokenizer, config)
|
|
1278
1276
|
|
|
@@ -1345,7 +1343,7 @@ def build_training_sample(
|
|
|
1345
1343
|
renderer: Renderer,
|
|
1346
1344
|
messages: list[Message],
|
|
1347
1345
|
*,
|
|
1348
|
-
role_to_mask: Callable[[Message], bool],
|
|
1346
|
+
role_to_mask: Callable[[Message], bool] | None = None,
|
|
1349
1347
|
tools: list[ToolSpec] | None = None,
|
|
1350
1348
|
content_sft_roles: "set[str] | frozenset[str] | None" = None,
|
|
1351
1349
|
) -> tuple[list[int], list[bool]]:
|
|
@@ -1354,15 +1352,31 @@ def build_training_sample(
|
|
|
1354
1352
|
Single render() call + message_indices → per-token mask.
|
|
1355
1353
|
Replaces build_incremental_token_mask (O(N) renders → O(1)).
|
|
1356
1354
|
|
|
1357
|
-
When
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1355
|
+
When ``role_to_mask`` is omitted, ``loss_mask`` is the renderer's
|
|
1356
|
+
``sampled_mask`` directly: every token the model would have
|
|
1357
|
+
produced at inference is trainable, regardless of which message
|
|
1358
|
+
it's attributed to. This is the recommended default for renderer
|
|
1359
|
+
callers — the renderer owns the per-token "is this model output"
|
|
1360
|
+
signal, so role-level filtering becomes a downstream constraint
|
|
1361
|
+
rather than a precondition. (Some role markers — e.g. GLM
|
|
1362
|
+
``<|user|>`` / ``<|observation|>`` after a tool-calling assistant
|
|
1363
|
+
turn — *are* sampled by the model at inference and live inside the
|
|
1364
|
+
next message's span; ``sampled_mask`` captures that, but a
|
|
1365
|
+
naive role filter would mask them out.)
|
|
1366
|
+
|
|
1367
|
+
When ``role_to_mask`` is provided, ``loss_mask`` is the AND of the
|
|
1368
|
+
role-based attribution and the sampled signal: only tokens the
|
|
1369
|
+
model would have produced at inference AND attributed to a
|
|
1370
|
+
trainable role pass through. Useful when the caller needs to
|
|
1371
|
+
restrict training to a specific role (e.g. assistant-only) even on
|
|
1372
|
+
a renderer whose ``sampled_mask`` already covers other roles.
|
|
1373
|
+
|
|
1362
1374
|
Renderers that don't populate ``sampled_mask`` (empty list) fall
|
|
1363
1375
|
back to attribution-only masking — every token attributed to a
|
|
1364
1376
|
trainable role is trained on, including template-injected
|
|
1365
|
-
``<|im_start|>role\\n`` openers.
|
|
1377
|
+
``<|im_start|>role\\n`` openers. In this fallback mode
|
|
1378
|
+
``role_to_mask`` is required; calling without it raises
|
|
1379
|
+
``ValueError``.
|
|
1366
1380
|
|
|
1367
1381
|
``content_sft_roles`` opts in additional roles for "body-only"
|
|
1368
1382
|
supervision: for every message whose role is in this set, tokens
|
|
@@ -1393,6 +1407,13 @@ def build_training_sample(
|
|
|
1393
1407
|
else:
|
|
1394
1408
|
body_roles = frozenset()
|
|
1395
1409
|
|
|
1410
|
+
if role_to_mask is None and not has_sampled_info:
|
|
1411
|
+
raise ValueError(
|
|
1412
|
+
"role_to_mask is required when the renderer does not populate "
|
|
1413
|
+
"sampled_mask. Pass an explicit role filter (e.g. "
|
|
1414
|
+
"lambda m: m['role'] == 'assistant') for this renderer."
|
|
1415
|
+
)
|
|
1416
|
+
|
|
1396
1417
|
loss_mask: list[bool] = []
|
|
1397
1418
|
for k, msg_idx in enumerate(rendered.message_indices):
|
|
1398
1419
|
if msg_idx < 0:
|
|
@@ -1408,6 +1429,11 @@ def build_training_sample(
|
|
|
1408
1429
|
continue
|
|
1409
1430
|
if has_sampled_info and not rendered.sampled_mask[k]:
|
|
1410
1431
|
loss_mask.append(False)
|
|
1432
|
+
elif role_to_mask is None:
|
|
1433
|
+
# sampled_mask alone gates the loss when no role filter is
|
|
1434
|
+
# supplied. ``sampled_mask[k]`` is True here (handled by the
|
|
1435
|
+
# branch above), so this token is trainable.
|
|
1436
|
+
loss_mask.append(True)
|
|
1411
1437
|
else:
|
|
1412
1438
|
loss_mask.append(role_to_mask(msg))
|
|
1413
1439
|
return rendered.token_ids, loss_mask
|
|
@@ -184,6 +184,22 @@ class GLM45Renderer:
|
|
|
184
184
|
role = msg["role"]
|
|
185
185
|
content = self._visible_text(msg.get("content"))
|
|
186
186
|
|
|
187
|
+
# When the previous message is an assistant, this message's
|
|
188
|
+
# role-opening token (``<|user|>`` / ``<|observation|>``) is
|
|
189
|
+
# the inference-time stop signal that closes the assistant's
|
|
190
|
+
# turn (see ``get_stop_token_ids``). Mark it
|
|
191
|
+
# ``is_sampled=True`` so the loss-mask pipeline trains the
|
|
192
|
+
# model to emit it after ``</tool_call>`` (instead of
|
|
193
|
+
# continuing with another ``<tool_call>`` block). The token
|
|
194
|
+
# stays attributed to this message (msg_idx=i) and remains
|
|
195
|
+
# ``is_content=False`` — it's a role-marker / scaffold, not
|
|
196
|
+
# body bytes, so ``content_mask_for_roles({"tool"})`` and
|
|
197
|
+
# ``content_token_spans_by_role()`` correctly exclude it
|
|
198
|
+
# from "tool body" views. Byte stream is unchanged.
|
|
199
|
+
# ``system`` only appears at the start of a GLM conversation,
|
|
200
|
+
# so its opener is never the closer of an assistant turn.
|
|
201
|
+
closes_assistant_turn = i > 0 and messages[i - 1]["role"] == "assistant"
|
|
202
|
+
|
|
187
203
|
if role == "system":
|
|
188
204
|
emit_special(self._system, i, is_sampled=False, is_content=False)
|
|
189
205
|
# ``\n`` is the scaffold separator after the role tag;
|
|
@@ -193,7 +209,12 @@ class GLM45Renderer:
|
|
|
193
209
|
)
|
|
194
210
|
|
|
195
211
|
elif role == "user":
|
|
196
|
-
emit_special(
|
|
212
|
+
emit_special(
|
|
213
|
+
self._user,
|
|
214
|
+
i,
|
|
215
|
+
is_sampled=closes_assistant_turn,
|
|
216
|
+
is_content=False,
|
|
217
|
+
)
|
|
197
218
|
# ``\n`` is scaffold; ``content`` is body; the optional
|
|
198
219
|
# ``/nothink`` suffix is scaffold the renderer injects
|
|
199
220
|
# when ``enable_thinking=False``.
|
|
@@ -362,6 +383,21 @@ class GLM45Renderer:
|
|
|
362
383
|
ext_sampled.append(is_sampled)
|
|
363
384
|
ext_content.append(is_content)
|
|
364
385
|
|
|
386
|
+
# The opener-token of the first new_message may also serve as
|
|
387
|
+
# the close of the previous assistant turn (when the model
|
|
388
|
+
# failed to sample the stop token itself and the bridge has to
|
|
389
|
+
# synthesize the boundary above). Unlike :meth:`render`, the
|
|
390
|
+
# bridge emits these with ``is_sampled=False, is_content=False``
|
|
391
|
+
# — they are template scaffolding for the *next* step's prompt,
|
|
392
|
+
# not tokens the model produced *in this* step. The RL loss
|
|
393
|
+
# operates on ``previous_completion_ids`` (what the model
|
|
394
|
+
# actually sampled this round); bridge tokens belong to the
|
|
395
|
+
# subsequent prompt and must not be counted as "model output"
|
|
396
|
+
# by downstream mask consumers. This deliberate disagreement
|
|
397
|
+
# with ``render()`` reflects the SFT vs RL semantics: render's
|
|
398
|
+
# masks describe what the model *should* produce given a
|
|
399
|
+
# complete conversation; bridge's masks describe what it
|
|
400
|
+
# *actually* produced this step.
|
|
365
401
|
for i, msg in enumerate(new_messages):
|
|
366
402
|
role = msg.get("role")
|
|
367
403
|
content = self._visible_text(msg.get("content"))
|
|
@@ -531,21 +567,24 @@ class GLM45Renderer:
|
|
|
531
567
|
emit_text,
|
|
532
568
|
emit_text_segments,
|
|
533
569
|
) -> None:
|
|
534
|
-
# Tool
|
|
535
|
-
#
|
|
536
|
-
#
|
|
537
|
-
#
|
|
538
|
-
#
|
|
539
|
-
#
|
|
540
|
-
#
|
|
541
|
-
#
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
570
|
+
# Tool body bytes get ``is_content=True``; the wraps are
|
|
571
|
+
# scaffold. The ``<|observation|>`` role tag is scaffold too
|
|
572
|
+
# (``is_content=False`` so ``content_mask_for_roles({"tool"})``
|
|
573
|
+
# excludes it). When the previous message is an assistant it
|
|
574
|
+
# doubles as the inference stop signal for that assistant's
|
|
575
|
+
# turn — mark it ``is_sampled=True`` so SFT trains the model to
|
|
576
|
+
# emit it after ``</tool_call>``. The token stays attributed to
|
|
577
|
+
# this tool message; byte stream is unchanged.
|
|
578
|
+
prev_role = messages[msg_idx - 1]["role"] if msg_idx > 0 else None
|
|
579
|
+
closes_assistant_turn = prev_role == "assistant"
|
|
580
|
+
|
|
581
|
+
if prev_role != "tool":
|
|
582
|
+
emit_special(
|
|
583
|
+
self._observation,
|
|
584
|
+
msg_idx,
|
|
585
|
+
is_sampled=closes_assistant_turn,
|
|
586
|
+
is_content=False,
|
|
587
|
+
)
|
|
549
588
|
|
|
550
589
|
emit_text_segments(
|
|
551
590
|
[
|
|
@@ -207,12 +207,33 @@ class GLM5Renderer:
|
|
|
207
207
|
role = msg["role"]
|
|
208
208
|
content = self._visible_text(msg.get("content"))
|
|
209
209
|
|
|
210
|
+
# When the previous message is an assistant, this message's
|
|
211
|
+
# role-opening token (``<|user|>`` / ``<|observation|>``) is
|
|
212
|
+
# the inference-time stop signal that closes the assistant's
|
|
213
|
+
# turn (see ``get_stop_token_ids``). Mark it
|
|
214
|
+
# ``is_sampled=True`` so the loss-mask pipeline trains the
|
|
215
|
+
# model to emit it after ``</tool_call>`` (instead of
|
|
216
|
+
# continuing with another ``<tool_call>`` block). The token
|
|
217
|
+
# stays attributed to this message (msg_idx=i) and remains
|
|
218
|
+
# ``is_content=False`` — it's a role-marker / scaffold, not
|
|
219
|
+
# body bytes, so ``content_mask_for_roles({"tool"})`` and
|
|
220
|
+
# ``content_token_spans_by_role()`` correctly exclude it
|
|
221
|
+
# from "tool body" views. Byte stream is unchanged.
|
|
222
|
+
# ``system`` only appears at the start of a GLM conversation,
|
|
223
|
+
# so its opener is never the closer of an assistant turn.
|
|
224
|
+
closes_assistant_turn = i > 0 and messages[i - 1]["role"] == "assistant"
|
|
225
|
+
|
|
210
226
|
if role == "system":
|
|
211
227
|
emit_special(self._system, i, is_sampled=False, is_content=False)
|
|
212
228
|
emit_text(content, i, is_sampled=False, is_content=True)
|
|
213
229
|
|
|
214
230
|
elif role == "user":
|
|
215
|
-
emit_special(
|
|
231
|
+
emit_special(
|
|
232
|
+
self._user,
|
|
233
|
+
i,
|
|
234
|
+
is_sampled=closes_assistant_turn,
|
|
235
|
+
is_content=False,
|
|
236
|
+
)
|
|
216
237
|
emit_text(content, i, is_sampled=False, is_content=True)
|
|
217
238
|
|
|
218
239
|
elif role == "assistant":
|
|
@@ -382,6 +403,21 @@ class GLM5Renderer:
|
|
|
382
403
|
ext_sampled.append(is_sampled)
|
|
383
404
|
ext_content.append(is_content)
|
|
384
405
|
|
|
406
|
+
# The opener-token of the first new_message may also serve as
|
|
407
|
+
# the close of the previous assistant turn (when the model
|
|
408
|
+
# failed to sample the stop token itself and the bridge has to
|
|
409
|
+
# synthesize the boundary above). Unlike :meth:`render`, the
|
|
410
|
+
# bridge emits these with ``is_sampled=False, is_content=False``
|
|
411
|
+
# — they are template scaffolding for the *next* step's prompt,
|
|
412
|
+
# not tokens the model produced *in this* step. The RL loss
|
|
413
|
+
# operates on ``previous_completion_ids`` (what the model
|
|
414
|
+
# actually sampled this round); bridge tokens belong to the
|
|
415
|
+
# subsequent prompt and must not be counted as "model output"
|
|
416
|
+
# by downstream mask consumers. This deliberate disagreement
|
|
417
|
+
# with ``render()`` reflects the SFT vs RL semantics: render's
|
|
418
|
+
# masks describe what the model *should* produce given a
|
|
419
|
+
# complete conversation; bridge's masks describe what it
|
|
420
|
+
# *actually* produced this step.
|
|
385
421
|
for i, msg in enumerate(new_messages):
|
|
386
422
|
role = msg.get("role")
|
|
387
423
|
content = self._visible_text(msg.get("content"))
|
|
@@ -566,16 +602,24 @@ class GLM5Renderer:
|
|
|
566
602
|
emit_text,
|
|
567
603
|
emit_text_segments,
|
|
568
604
|
) -> None:
|
|
569
|
-
# Tool
|
|
570
|
-
#
|
|
571
|
-
#
|
|
572
|
-
#
|
|
573
|
-
#
|
|
574
|
-
#
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
if
|
|
578
|
-
|
|
605
|
+
# Tool body bytes get ``is_content=True``; the wraps are
|
|
606
|
+
# scaffold. The ``<|observation|>`` role tag is scaffold too
|
|
607
|
+
# (``is_content=False`` so ``content_mask_for_roles({"tool"})``
|
|
608
|
+
# excludes it). When the previous message is an assistant it
|
|
609
|
+
# doubles as the inference stop signal for that assistant's
|
|
610
|
+
# turn — mark it ``is_sampled=True`` so SFT trains the model to
|
|
611
|
+
# emit it after ``</tool_call>``. The token stays attributed to
|
|
612
|
+
# this tool message; byte stream is unchanged.
|
|
613
|
+
prev_role = messages[msg_idx - 1]["role"] if msg_idx > 0 else None
|
|
614
|
+
closes_assistant_turn = prev_role == "assistant"
|
|
615
|
+
|
|
616
|
+
if prev_role != "tool":
|
|
617
|
+
emit_special(
|
|
618
|
+
self._observation,
|
|
619
|
+
msg_idx,
|
|
620
|
+
is_sampled=closes_assistant_turn,
|
|
621
|
+
is_content=False,
|
|
622
|
+
)
|
|
579
623
|
|
|
580
624
|
emit_special(
|
|
581
625
|
self._tool_response_tok, msg_idx, is_sampled=False, is_content=False
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/examples/sglang/multiturn_generate_sglang.py
RENAMED
|
File without changes
|
|
File without changes
|
{renderers-0.1.8.dev30 → renderers-0.1.8.dev31}/examples/tinker/multiturn_generate_tinker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|