PyPI - renderers - Versions diffs - 0.1.8.dev41__tar.gz → 0.1.8.dev43__tar.gz - Mend

renderers 0.1.8.dev41tar.gz → 0.1.8.dev43tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

{renderers-0.1.8.dev41 → renderers-0.1.8.dev43}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: renderers
-Version: 0.1.8.dev41
+Version: 0.1.8.dev43
 Summary: Chat template renderers — deterministic message-to-token conversion for LLM training
 License-Expression: Apache-2.0
 License-File: LICENSE

{renderers-0.1.8.dev41 → renderers-0.1.8.dev43}/renderers/__init__.py RENAMED Viewed

@@ -44,6 +44,7 @@ from renderers.configs import (
     BaseRendererConfig,
     config_from_name,
     DefaultRendererConfig,
+    DeepSeekR1RendererConfig,
     DeepSeekV3RendererConfig,
     GLM45RendererConfig,
     GLM51RendererConfig,
@@ -74,6 +75,7 @@ from renderers.configs import (
 # imports — ``renderers.base._populate_registry`` lazy-imports the
 # concrete classes itself when a renderer is instantiated.
 _LAZY_RENDERERS: dict[str, str] = {
+    "DeepSeekR1Renderer": "renderers.deepseek_r1",
     "DeepSeekV3Renderer": "renderers.deepseek_v3",
     "DefaultRenderer": "renderers.default",
     "GLM45Renderer": "renderers.glm45",
@@ -113,6 +115,8 @@ __all__ = [
     "BaseRendererConfig",
     "Content",
     "ContentPart",
+    "DeepSeekR1Renderer",
+    "DeepSeekR1RendererConfig",
     "DeepSeekV3Renderer",
     "DeepSeekV3RendererConfig",
     "DefaultRenderer",

{renderers-0.1.8.dev41 → renderers-0.1.8.dev43}/renderers/_version.py RENAMED Viewed

@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
 commit_id: str | None
 __commit_id__: str | None
-__version__ = version = '0.1.8.dev41'
-__version_tuple__ = version_tuple = (0, 1, 8, 'dev41')
+__version__ = version = '0.1.8.dev43'
+__version_tuple__ = version_tuple = (0, 1, 8, 'dev43')
 __commit_id__ = commit_id = None

{renderers-0.1.8.dev41 → renderers-0.1.8.dev43}/renderers/base.py RENAMED Viewed

@@ -1030,9 +1030,12 @@ MODEL_RENDERER_MAP: dict[str, str] = {
     # MiniMax.
     "MiniMaxAI/MiniMax-M2": "minimax-m2",
     "MiniMaxAI/MiniMax-M2.5": "minimax-m2",
-    # DeepSeek V3.
+    # DeepSeek V3 (non-reasoning).
     "deepseek-ai/DeepSeek-V3": "deepseek-v3",
     "deepseek-ai/DeepSeek-V3-Base": "deepseek-v3",
+    # DeepSeek R1 (reasoning).
+    "deepseek-ai/DeepSeek-R1": "deepseek-r1",
+    "deepseek-ai/DeepSeek-R1-0528": "deepseek-r1",
     # Kimi K2 (K2.5 and K2.6 share the K2.5 template, distinct from K2).
     "moonshotai/Kimi-K2-Instruct": "kimi-k2",
     "moonshotai/Kimi-K2.5": "kimi-k2.5",
@@ -1161,6 +1164,8 @@ FASTOKENS_INCOMPATIBLE: frozenset[str] = frozenset(
         # doesn't yet implement.
         "deepseek-ai/DeepSeek-V3",
         "deepseek-ai/DeepSeek-V3-Base",
+        "deepseek-ai/DeepSeek-R1",
+        "deepseek-ai/DeepSeek-R1-0528",
     }
 )
@@ -1334,6 +1339,7 @@ def load_tokenizer(
 def _populate_registry():
     if RENDERER_REGISTRY:
         return
+    from renderers.deepseek_r1 import DeepSeekR1Renderer
     from renderers.deepseek_v3 import DeepSeekV3Renderer
     from renderers.default import DefaultRenderer
     from renderers.glm5 import GLM5Renderer, GLM51Renderer
@@ -1362,6 +1368,7 @@ def _populate_registry():
             "glm-4.5": GLM45Renderer,
             "minimax-m2": MiniMaxM2Renderer,
             "deepseek-v3": DeepSeekV3Renderer,
+            "deepseek-r1": DeepSeekR1Renderer,
             "kimi-k2": KimiK2Renderer,
             "kimi-k2.5": KimiK25Renderer,
             "laguna-xs.2": LagunaXS2Renderer,

{renderers-0.1.8.dev41 → renderers-0.1.8.dev43}/renderers/configs.py RENAMED Viewed

@@ -400,24 +400,30 @@ class Nemotron3RendererConfig(BaseRendererConfig):
 class DeepSeekV3RendererConfig(BaseRendererConfig):
-    """DeepSeek V3 renderer config.
+    """DeepSeek-V3 renderer config (non-reasoning).
-    ``enable_thinking`` is renderer-internal here — DeepSeek-V3's chat
-    template does not reference any thinking variable, so passing it to
-    ``apply_chat_template`` upstream is a no-op. The renderer uses it
-    to control the ``<think>`` prefill at the generation prompt (R1
-    distill convention).
+    DeepSeek-V3 has no thinking concept: the generation prompt is a bare
+    ``<｜Assistant｜>`` and assistant content is emitted verbatim. For the
+    reasoning variant use :class:`DeepSeekR1RendererConfig`.
     """
     name: Literal["deepseek-v3"] = "deepseek-v3"
-    enable_thinking: bool = True
-    """Renderer convention for the R1-distill family: when ``True``,
-    prefill ``<think>`` at the generation prompt. The DeepSeek-V3 Jinja
-    template ignores this kwarg upstream; it's not a chat-template
-    kwarg in the strict sense."""
-    _internal_fields = frozenset({"enable_thinking"})
+class DeepSeekR1RendererConfig(BaseRendererConfig):
+    """DeepSeek-R1 renderer config (reasoning).
+    R1 always reasons — its chat template unconditionally prefills
+    ``<think>\\n`` at the generation prompt and strips ``</think>`` from
+    historical assistant turns. There is therefore no ``enable_thinking``
+    knob (thinking is not optional), and ``preserve_*`` flags are no-ops
+    (history reasoning is always dropped); both stored for protocol
+    uniformity. Applies to full ``deepseek-ai/DeepSeek-R1`` / ``-R1-0528``
+    — NOT the R1-Distill-Qwen/Llama models, which use those base
+    tokenizers and route to the Qwen3 / Llama-3 renderers.
+    """
+    name: Literal["deepseek-r1"] = "deepseek-r1"
 RendererConfig = Annotated[
@@ -439,6 +445,7 @@ RendererConfig = Annotated[
         MiniMaxM2RendererConfig,
         Nemotron3RendererConfig,
         DeepSeekV3RendererConfig,
+        DeepSeekR1RendererConfig,
     ],
     Field(discriminator="name"),
 ]
@@ -474,6 +481,7 @@ _CONFIG_BY_NAME: dict[str, type[BaseRendererConfig]] = {
     "minimax-m2": MiniMaxM2RendererConfig,
     "nemotron-3": Nemotron3RendererConfig,
     "deepseek-v3": DeepSeekV3RendererConfig,
+    "deepseek-r1": DeepSeekR1RendererConfig,
 }
@@ -505,6 +513,7 @@ __all__ = [
     "AutoRendererConfig",
     "BaseRendererConfig",
     "DefaultRendererConfig",
+    "DeepSeekR1RendererConfig",
     "DeepSeekV3RendererConfig",
     "GLM45RendererConfig",
     "GLM51RendererConfig",

renderers-0.1.8.dev43/renderers/deepseek_r1.py ADDED Viewed

@@ -0,0 +1,58 @@
+"""DeepSeek-R1 Renderer — the reasoning variant of the DeepSeek format.
+R1 shares DeepSeek-V3's special tokens, message structure, and tool-call
+wire format, so it subclasses :class:`renderers.deepseek_v3.DeepSeekV3Renderer`
+and overrides only the two places its chat template diverges:
+1. Generation prompt — R1 unconditionally prefills ``<think>\\n``
+   (``<｜Assistant｜><think>\\n``) to trigger reasoning, where V3 emits a bare
+   ``<｜Assistant｜>``. Handled by ``_GEN_THINK_PREFILL``.
+2. Historical assistant turns — R1 strips the reasoning trace, keeping only
+   the text after ``</think>`` (``content.split('</think>')[-1]``), where V3
+   emits content verbatim. Handled by ``_prepare_assistant_content``.
+Everything else — system handling, tool-call / tool-output rendering,
+special-token resolution, and ``parse_response`` (``parse_deepseek_v3``,
+shared) — is inherited unchanged.
+Scope: full ``deepseek-ai/DeepSeek-R1`` and ``-R1-0528``. The R1-Distill
+models (``DeepSeek-R1-Distill-Qwen/Llama``) use their base models'
+tokenizers and route to the Qwen3 / Llama-3 renderers, not this one.
+"""
+from __future__ import annotations
+from renderers.base import Message
+from renderers.configs import DeepSeekR1RendererConfig
+from renderers.deepseek_v3 import DeepSeekV3Renderer
+class DeepSeekR1Renderer(DeepSeekV3Renderer):
+    """Deterministic message → token renderer for DeepSeek-R1 models."""
+    _config_cls: type = DeepSeekR1RendererConfig
+    _GEN_THINK_PREFILL: str = "<think>\n"
+    def _prepare_assistant_content(self, msg: Message) -> str:
+        """Assistant content with the reasoning trace stripped, mirroring the
+        R1 template's ``content.split('</think>')[-1]`` on historical turns.
+        Structured ``thinking``/``text`` parts are reconstructed inline first
+        so the same ``</think>`` split applies. The separate
+        ``reasoning_content`` field is ignored — the R1 chat template never
+        reads it, and history reasoning is dropped regardless.
+        """
+        content = msg.get("content") or ""
+        if isinstance(content, list):
+            parts: list[str] = []
+            for p in content:
+                if not isinstance(p, dict):
+                    continue
+                if p.get("type") == "thinking":
+                    parts.append(f"<think>{p.get('thinking', '')}</think>")
+                elif p.get("type") == "text":
+                    parts.append(p.get("text", ""))
+            content = "".join(parts)
+        if "</think>" in content:
+            content = content.split("</think>")[-1]
+        return content

{renderers-0.1.8.dev41 → renderers-0.1.8.dev43}/renderers/deepseek_v3.py RENAMED Viewed

@@ -41,25 +41,30 @@ def _ds_token(name: str) -> str:
 class DeepSeekV3Renderer:
-    """Deterministic message → token renderer for DeepSeek V3 models.
-    DeepSeek-V3's chat template does not consult any thinking-related
-    variable; the ``enable_thinking`` field on the typed config controls
-    the renderer's ``<think>\\n`` prefill at the generation prompt
-    (R1-distill convention) and is intentionally not forwarded to
-    ``apply_chat_template`` upstream — that would be a no-op. The
-    template also always emits ``<think>{reasoning}</think>`` when
-    ``reasoning_content`` is provided, so ``preserve_*`` flags are
-    no-ops here too; stored for protocol uniformity.
+    """Deterministic message → token renderer for DeepSeek-V3 models.
+    DeepSeek-V3 is non-reasoning: its chat template has no ``<think>``
+    concept — the generation prompt is a bare ``<｜Assistant｜>`` and past
+    assistant content is emitted verbatim. The reasoning variant
+    (``<think>``-prefilled prompt, history reasoning stripped) lives in
+    :class:`renderers.deepseek_r1.DeepSeekR1Renderer`, which subclasses
+    this one. ``preserve_*`` flags are no-ops here (no reasoning channel),
+    stored for protocol uniformity.
     """
+    #: Default typed config; the R1 subclass overrides this.
+    _config_cls: type = DeepSeekV3RendererConfig
+    #: Generation-prompt reasoning prefill. Empty for V3 (bare
+    #: ``<｜Assistant｜>``); the R1 subclass overrides to ``"<think>\n"``.
+    _GEN_THINK_PREFILL: str = ""
     def __init__(
         self,
         tokenizer: PreTrainedTokenizer,
         config: DeepSeekV3RendererConfig | None = None,
     ):
         self._tokenizer = tokenizer
-        self.config = config or DeepSeekV3RendererConfig()
+        self.config = config or type(self)._config_cls()
         # ── BOS / EOS ────────────────────────────────────────────────
         self._bos = self._get_special_token(f"begin{_US}of{_US}sentence")
@@ -239,8 +244,10 @@ class DeepSeekV3Renderer:
                 emit_special(
                     self._assistant_token, -1, is_sampled=False, is_content=False
                 )
-            if self.config.enable_thinking:
-                emit_text("<think>\n", -1, is_sampled=False, is_content=False)
+            if self._GEN_THINK_PREFILL:
+                emit_text(
+                    self._GEN_THINK_PREFILL, -1, is_sampled=False, is_content=False
+                )
         return RenderedTokens(
             token_ids=tokens,
@@ -382,8 +389,8 @@ class DeepSeekV3Renderer:
         last_role = new_messages[-1].get("role") if new_messages else None
         if last_role != "tool":
             emit_special(self._assistant_token, -1)
-        if self.config.enable_thinking:
-            emit_text("<think>\n", -1)
+        if self._GEN_THINK_PREFILL:
+            emit_text(self._GEN_THINK_PREFILL, -1)
         total_len = len(previous_ids) + len(ext)
         return RenderedTokens(
@@ -399,6 +406,23 @@ class DeepSeekV3Renderer:
     # Assistant rendering
     # ------------------------------------------------------------------
+    def _prepare_assistant_content(self, msg: Message) -> str:
+        """Assistant content as the V3 template would emit it: verbatim.
+        V3 is non-reasoning — its template emits ``message['content']`` as-is
+        and never reads ``reasoning_content``. A structured content list is
+        flattened to its ``text`` parts. The R1 subclass overrides this to
+        strip ``</think>`` from history.
+        """
+        content = msg.get("content") or ""
+        if isinstance(content, list):
+            content = "".join(
+                p.get("text", "")
+                for p in content
+                if isinstance(p, dict) and p.get("type") == "text"
+            )
+        return content
     def _render_assistant(
         self,
         msg: Message,
@@ -414,24 +438,7 @@ class DeepSeekV3Renderer:
         # without a new <｜Assistant｜> token in that case.
         prev_is_tool = msg_idx > 0 and messages[msg_idx - 1]["role"] == "tool"
-        content = msg.get("content") or ""
-        # Support structured content (ThinkingPart / TextPart list).
-        if isinstance(content, list):
-            parts_text: list[str] = []
-            for p in content:
-                if not isinstance(p, dict):
-                    continue
-                if p.get("type") == "thinking":
-                    thinking = p.get("thinking", "")
-                    parts_text.append(f"<think>{thinking}</think>")
-                elif p.get("type") == "text":
-                    parts_text.append(p.get("text", ""))
-            content = "".join(parts_text)
-        # Also accept reasoning_content stored separately (OpenAI-style).
-        elif isinstance(msg.get("reasoning_content"), str) and msg["reasoning_content"]:
-            reasoning = msg["reasoning_content"]
-            content = f"<think>{reasoning}</think>{content}"
+        content = self._prepare_assistant_content(msg)
         tool_calls = msg.get("tool_calls") or []
         # ``<｜Assistant｜>`` is template-injected scaffolding — at

{renderers-0.1.8.dev41 → renderers-0.1.8.dev43}/renderers/kimi_k25.py RENAMED Viewed

@@ -42,7 +42,7 @@ from renderers.base import (
     trim_to_turn_close,
 )
 from renderers.configs import KimiK25RendererConfig
-from renderers.parsing import parse_kimi_k2_section
+from renderers.parsing import _reasoning_end_token_index, parse_kimi_k2_section
 from renderers.qwen3_vl import (
     _image_hash,
     _is_image_part,
@@ -452,6 +452,13 @@ def _parse_kimi_k2_response(
             ids = ids[:i]
             break
+    # Reasoning first: a tool-call section the model drafts *inside* its
+    # <think> trace must not be parsed as a real call (regression #78 — cf.
+    # parse_qwen3). K2.5 renders </think> as text, so locate the boundary by
+    # decoding; the section scan then starts past it. content_ids still begins
+    # at 0, so the </think> text-split below recovers reasoning unchanged.
+    reasoning_end = _reasoning_end_token_index(tokenizer, ids)
     # Token-ID path — produces spans. Only run if every relevant special
     # token resolved at init (i.e. is in the tokenizer's vocab).
     tool_calls: list[ParsedToolCall] = []
@@ -471,6 +478,7 @@ def _parse_kimi_k2_response(
             tool_call_begin_id=tool_call_begin_id,
             tool_call_argument_begin_id=tool_call_argument_begin_id,
             tool_call_end_id=tool_call_end_id,
+            scan_start=reasoning_end,
         )
         text = (
             tokenizer.decode(content_ids, skip_special_tokens=False)
@@ -481,9 +489,13 @@ def _parse_kimi_k2_response(
         text = tokenizer.decode(ids, skip_special_tokens=False) if ids else ""
     # Fallback path: model emitted literal-text section delimiters (singular
-    # variant) rather than special tokens. Spans unavailable here.
+    # variant) rather than special tokens. Spans unavailable here. Start the
+    # search past the first </think> so a literal section drafted inside the
+    # reasoning trace isn't matched as a real call (regression #78).
     if not tool_calls:
-        tc_match = _TOOL_CALLS_SECTION_RE.search(text)
+        think_close = text.find("</think>")
+        search_from = think_close + len("</think>") if think_close != -1 else 0
+        tc_match = _TOOL_CALLS_SECTION_RE.search(text, search_from)
         if tc_match:
             text = text[: tc_match.start()]
             tool_section = (

{renderers-0.1.8.dev41 → renderers-0.1.8.dev43}/renderers/parsing.py RENAMED Viewed

@@ -133,6 +133,39 @@ def _decode(tokenizer, ids: list[int]) -> str:
     return tokenizer.decode(ids, skip_special_tokens=False)
+def _reasoning_end_token_index(
+    tokenizer, ids: list[int], marker: str = "</think>"
+) -> int:
+    """Token index immediately past the first ``</think>`` in ``ids``.
+    Returns 0 when ``ids`` has no closed reasoning region — callers treat
+    that as "scan from the start" (preserves pre-existing behavior for
+    non-thinking / truncated-reasoning completions).
+    Used by parsers whose ``</think>`` is *not* a single special token
+    (DeepSeek-V3, Kimi-K2.5) — where it tokenizes to several pieces and is
+    context-sensitive (the closing ``>`` merges differently depending on the
+    next char), so a token-id or fixed-subsequence search isn't reliable. We
+    instead locate the boundary in decoded text via binary search over prefix
+    decodes, which holds as long as ``decode(ids[:k])`` is prefix-stable in
+    ``k`` (true for the byte-level BPE tokenizers here; ``</think>`` is clean
+    ASCII that won't straddle a byte boundary). Single-token ``</think>``
+    parsers (Qwen3) anchor on the token id directly and don't need this.
+    """
+    if not ids or marker not in _decode(tokenizer, ids):
+        return 0
+    # Smallest prefix length (in tokens) whose decode already contains the
+    # full marker — i.e. the index just past where </think> completes.
+    lo, hi = 1, len(ids)
+    while lo < hi:
+        mid = (lo + hi) // 2
+        if marker in _decode(tokenizer, ids[:mid]):
+            hi = mid
+        else:
+            lo = mid + 1
+    return lo
 # ── Qwen3: <tool_call> JSON </tool_call> ────────────────────────────
@@ -143,11 +176,26 @@ def parse_qwen3(
     stop_ids: set[int],
     tool_call_id: int,
     tool_call_end_id: int,
+    reasoning_end_id: int | None = None,
 ) -> ParsedResponse:
     """Parse Qwen3 completion tokens. Hermes-style JSON tool calls."""
     ids = _strip_stop_tokens(token_ids, stop_ids)
-    tc_start = _find(ids, tool_call_id)
+    # Reasoning is resolved before tool calls. Thinking models (e.g.
+    # Qwen3-*-Thinking) routinely draft ``<tool_call>`` blocks *inside* their
+    # ``<think>...</think>`` trace while planning; those are reasoning, not
+    # real invocations. Anchoring the tool-call scan after the ``</think>``
+    # boundary keeps in-think drafts out of ``tool_calls`` (otherwise they
+    # surface as phantom/duplicate calls) and out of the reasoning/content
+    # split. Mirrors vLLM's DelegatingParser, which runs the reasoning parser
+    # first and tool-parses only the post-``</think>`` content.
+    # ``reasoning_end_id`` is the ``</think>`` token id; when it's absent
+    # (``None``) or the model never closed its reasoning, the scan falls back
+    # to the whole stream (prior behavior).
+    reasoning_end = _find(ids, reasoning_end_id) if reasoning_end_id is not None else -1
+    scan_start = reasoning_end + 1 if reasoning_end != -1 else 0
+    tc_start = _find(ids, tool_call_id, scan_start)
     tool_calls: list[ParsedToolCall] = []
     if tc_start != -1:
         content_ids = ids[:tc_start]
@@ -685,7 +733,15 @@ def parse_deepseek_v3(
     """
     ids = _strip_stop_tokens(token_ids, stop_ids)
-    tc_section_start = _find(ids, tool_calls_begin_id)
+    # Reasoning first: skip past </think> before looking for the tool-call
+    # section, so a section the model drafts *inside* its <think> trace isn't
+    # parsed as a real call (regression #78 — cf. parse_qwen3). content_ids
+    # still starts at 0, so the </think> text-split below recovers reasoning.
+    # DeepSeek-V3 renders </think> as multi-token text, hence the decode-based
+    # boundary finder rather than a token-id anchor.
+    reasoning_end = _reasoning_end_token_index(tokenizer, ids)
+    tc_section_start = _find(ids, tool_calls_begin_id, reasoning_end)
     tool_calls: list[ParsedToolCall] = []
     if tc_section_start != -1:
         content_ids = ids[:tc_section_start]
@@ -962,6 +1018,7 @@ def parse_kimi_k2_section(
     tool_call_begin_id: int,
     tool_call_argument_begin_id: int,
     tool_call_end_id: int,
+    scan_start: int = 0,
 ) -> tuple[list[int], list[ParsedToolCall]]:
     """Split ``ids`` into ``(content_before_section, tool_calls)`` by finding
     the Kimi-style tool-call section delimiters.
@@ -973,8 +1030,15 @@ def parse_kimi_k2_section(
     of the section and a list of ``ParsedToolCall`` covering every attempted
     block inside it; an unclosed section is still walked to whatever the model
     emitted before EOS. Returns ``(ids, [])`` when no section is present.
+    ``scan_start`` restricts the section search to ``ids[scan_start:]`` while
+    keeping ``content_ids = ids[:section_start]`` and all token spans relative
+    to the full ``ids``. Callers pass the post-``</think>`` index so a section
+    the model drafts inside its reasoning trace isn't parsed as a real call;
+    because ``content_ids`` still starts at 0, downstream text-based reasoning
+    extraction is unaffected (regression #78).
     """
-    section_start = _find_any(ids, tool_calls_section_begin_ids)
+    section_start = _find_any(ids, tool_calls_section_begin_ids, scan_start)
     if section_start == -1:
         return list(ids), []
     content_ids = ids[:section_start]

{renderers-0.1.8.dev41 → renderers-0.1.8.dev43}/renderers/qwen3.py RENAMED Viewed

@@ -62,6 +62,7 @@ class Qwen3Renderer:
         self._tool_call_end = self._token_id("</tool_call>")
         self._tool_response = self._token_id("<tool_response>")
         self._tool_response_end = self._token_id("</tool_response>")
+        self._think_end = self._token_id("</think>")
     def _token_id(self, token: str) -> int:
         tid = self._tokenizer.convert_tokens_to_ids(token)
@@ -276,6 +277,7 @@ class Qwen3Renderer:
             stop_ids={self._im_end, self._endoftext},
             tool_call_id=self._tool_call,
             tool_call_end_id=self._tool_call_end,
+            reasoning_end_id=self._think_end,
         )
     def get_stop_token_ids(self) -> list[int]:

{renderers-0.1.8.dev41 → renderers-0.1.8.dev43}/renderers/qwen3_vl.py RENAMED Viewed

@@ -325,6 +325,7 @@ class Qwen3VLRenderer:
         self._tool_call_end = self._token_id("</tool_call>")
         self._tool_response = self._token_id("<tool_response>")
         self._tool_response_end = self._token_id("</tool_response>")
+        self._think_end = self._token_id("</think>")
         self._vision_start = self._token_id("<|vision_start|>")
         self._vision_end = self._token_id("<|vision_end|>")
         self._image_pad = self._token_id("<|image_pad|>")
@@ -634,6 +635,7 @@ class Qwen3VLRenderer:
             stop_ids={self._im_end, self._endoftext},
             tool_call_id=self._tool_call,
             tool_call_end_id=self._tool_call_end,
+            reasoning_end_id=self._think_end,
         )
     def get_stop_token_ids(self) -> list[int]:

{renderers-0.1.8.dev41 → renderers-0.1.8.dev43}/tests/conftest.py RENAMED Viewed

@@ -36,6 +36,16 @@ RENDERER_MODELS = [
     # Ultra resolves the Ultra template variant via name (auto → ultra=True).
     ("nvidia/NVIDIA-Nemotron-3-Ultra-550B-A55B-BF16", "auto"),
     ("poolside/Laguna-XS.2", "auto"),
+    # DeepSeek-V3/R1 are intentionally NOT in this shared barrage: their
+    # chat templates can't render the barrage's tool-call fixtures (the
+    # templates require ``tool['type']`` and a string-serialized
+    # ``arguments``, and V3 only renders tool_calls when content is None —
+    # so ``apply_chat_template`` raises or drops the calls on the shared
+    # shapes), and the is_content body-recovery checks hit a Metaspace
+    # subset-decode artifact. The renderer is correct in all these cases;
+    # there's just no byte-output to parity-check against. Split-specific
+    # parity (V3 bare prompt vs R1 <think>+history-strip) is covered in
+    # tests/test_deepseek_r1.py.
     # Llama-3 loads via the unrestricted unsloth mirror (byte-identical
     # chat template) so CI needs no Meta-gated HF token. Pinned to the
     # explicit "llama-3" config because the mirror name isn't in

renderers 0.1.8.dev41__tar.gz → 0.1.8.dev43__tar.gz

renderers 0.1.8.dev41tar.gz → 0.1.8.dev43tar.gz