PyPI - sglang - Versions diffs - 0.4.6.post5__py3-none-any.whl → 0.4.7__py3-none-any.whl - Mend

sglang 0.4.6.post5py3-none-any.whl → 0.4.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (318) hide show

sglang/srt/function_call/ebnf_composer.py CHANGED Viewed

@@ -30,11 +30,6 @@ class EBNFComposer:
         ws ::= [ \n\t]*
     """
-    TOOL_CALLS_MAP = {
-        "pythonic": '"[" function_call ("," function_call)* "]"',
-        "json": "function_call",
-    }
     CALL_RULE_MAP = {
         "pythonic": 'call_{name} ::= "{name}" "(" {arguments_rule} ")"',
         "json": 'call_{name} ::= "{{" "\\"name\\"" ":" "\\"{name}\\"" ", " "\\"arguments\\"" ":" {arguments_rule} "}}"',
@@ -138,35 +133,54 @@ class EBNFComposer:
     @staticmethod
     def build_ebnf(
         tools,
-        *,
-        call_rule_fmt: Optional[str] = None,
         function_format: Literal["pythonic", "json"] = "json",
-        bot_token: Optional[str] = None,
-        eot_token: Optional[str] = None,
+        # Parameters for wrapping the entire sequence of tool calls
+        sequence_start_token: Optional[str] = None,
+        sequence_end_token: Optional[str] = None,
+        # Parameters for wrapping individual tool calls
+        individual_call_start_token: Optional[str] = None,
+        individual_call_end_token: Optional[str] = None,
+        # Parameter for separating multiple tool calls
         tool_call_separator: Optional[str] = None,
+        call_rule_fmt: Optional[str] = None,
     ):
         """
         Generalized EBNF builder for all detectors.
         Args:
             tools: List of Tool objects to generate EBNF grammar for
+            function_format: The format of function calls, either "pythonic" or "json"
+            sequence_start_token: Token that wraps the entire sequence of tool calls (start)
+            sequence_end_token: Token that wraps the entire sequence of tool calls (end)
+            individual_call_start_token: Token that wraps each individual tool call (start)
+            individual_call_end_token: Token that wraps each individual tool call (end)
+            tool_call_separator: The separator between multiple tool calls
             call_rule_fmt: Optional custom format string for call_{name} rule. It should define each function call's format, with
                 the placeholders {name} for the function name and {arguments_rule} for the arguments rule. If None, a default
                 format based on function_format will be used.
-            function_format: The format of function calls, either "pythonic" or "json"
-            bot_token: The token that indicates the start of a tool call section
-            eot_token: The token that indicates the end of a tool call section
-            tool_call_separator: The separator between multiple tool calls
         """
         # =================================================================
         # Step 1: Determine the root tool calls rule
         # =================================================================
-        if bot_token and eot_token:
-            if tool_call_separator:
-                root_rule = f'"{bot_token}" function_call ( "{tool_call_separator}" function_call )* "{eot_token}"'
-            else:
-                root_rule = f'"{bot_token}" function_call "{eot_token}"'
+        # Handle a single function call
+        if individual_call_start_token and individual_call_end_token:
+            function_call_unit = f'"{individual_call_start_token}" function_call "{individual_call_end_token}"'
+        else:
+            function_call_unit = "function_call"
+        # Handle multiple function calls with separators
+        if tool_call_separator is not None:
+            base_pattern = f'{function_call_unit} ( "{tool_call_separator}" {function_call_unit} )*'
+        else:
+            # Assume only support single function call
+            base_pattern = function_call_unit
+        # Apply sequence-level wrapping if needed
+        if sequence_start_token and sequence_end_token:
+            root_rule = (
+                f'"{sequence_start_token}" {base_pattern} "{sequence_end_token}"'
+            )
         else:
-            root_rule = EBNFComposer.TOOL_CALLS_MAP[function_format]
+            root_rule = base_pattern
         # =================================================================
         # Step 2: Build the header rules

sglang/srt/function_call/function_call_parser.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import logging
 from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Type, Union
 from sglang.srt.function_call.base_format_detector import BaseFormatDetector
@@ -14,6 +15,8 @@ from sglang.srt.openai_api.protocol import (
     ToolChoice,
 )
+logger = logging.getLogger(__name__)
 class FunctionCallParser:
     """
@@ -165,11 +168,35 @@ class FunctionCallParser:
     ) -> Optional[str]:
         """
         Get the EBNF grammar for the specified tool choice.
+        Args:
+            tool_choice: The tool choice specification
+        Returns:
+            EBNF grammar string, or None if no valid tools found
+        Note:
+            If a specific function is requested but not found in available tools,
+            logs a warning and falls back to using all available tools for backward compatibility.
         """
         filtered_tools = []
         if isinstance(tool_choice, ToolChoice):
             fn_name = tool_choice.function.name
             filtered_tools = [t for t in self.tools if t.function.name == fn_name]
+            # Check if the requested function exists in available tools
+            if not filtered_tools:
+                available_functions = [t.function.name for t in self.tools]
+                logger.warning(
+                    f"Function '{fn_name}' not found in available tools. "
+                    f"Available functions: {available_functions}. "
+                    f"Skipping tool choice."
+                )
+                # TODO: Return a 400 error instead of warning when adapter supports proper error handling
+                # For now, fall back to return None
+                return None
         else:
             filtered_tools = self.tools
         return self.detector.build_ebnf(filtered_tools)

sglang/srt/function_call/llama32_detector.py CHANGED Viewed

@@ -24,6 +24,11 @@ class Llama32Detector(BaseFormatDetector):
     def __init__(self):
         super().__init__()
         self.bot_token = "<|python_tag|>"
+        # NOTE: technically Llama3.2 doesn't support well with parallel tool calls
+        # They need specific prompt engineering to support parallel tool calls
+        # Here we use ';' as the separator, which might have compatibility issues
+        # if users define to use a different separator in their prompt
+        self.tool_call_separator = ";"
     def has_tool_call(self, text: str) -> bool:
         """Check if the text contains a Llama 3.2 format tool call."""
@@ -37,27 +42,41 @@ class Llama32Detector(BaseFormatDetector):
             return StreamingParseResult(normal_text=text, calls=[])
         if "<|python_tag|>" in text:
-            normal_text, action_text = text.split("<|python_tag|>")
+            normal_text, action_text = text.split("<|python_tag|>", maxsplit=1)
         else:
             normal_text, action_text = "", text
-        # Split by semicolon and process each part
-        json_parts = [part.strip() for part in action_text.split(";") if part.strip()]
+        decoder = json.JSONDecoder()
+        idx = 0
+        safe_idx = idx  # the index of the last valid JSON object
         all_actions = []
-        for part in json_parts:
+        action_text_len = len(action_text)
+        while idx < action_text_len:
             try:
-                # Parse each individual JSON object
-                action = json.loads(part)
-                all_actions.append(action)
+                obj, end = decoder.raw_decode(action_text[idx:])
+                all_actions.append(obj)
+                idx += end + len(self.tool_call_separator)
+                safe_idx = idx
             except json.JSONDecodeError as e:
-                logger.warning(f"Failed to parse JSON part: {part}")
-                logger.warning(f"JSON parse error: {str(e)}")
+                # Find where next `{"name"` appears and try again
+                logger.warning(
+                    f"Failed to parse JSON part: {action_text[idx:]}, JSON parse error: {str(e)}"
+                )
+                next_obj_start = action_text.find('{"name":', idx + 1)
+                if next_obj_start == -1:
+                    break
+                idx = next_obj_start
                 continue
-        calls = []
         # Only process if we found valid JSON objects
-        if all_actions:
-            calls = self.parse_base_json(all_actions, tools)
-        return StreamingParseResult(normal_text=normal_text, calls=calls)
+        calls = self.parse_base_json(all_actions, tools) if all_actions else []
+        # Use safe_idx to avoid idx containing the last part of an invalid JSON object
+        trailing_text = (
+            action_text[safe_idx:].strip() if safe_idx < action_text_len else ""
+        )
+        return StreamingParseResult(
+            normal_text=normal_text + trailing_text, calls=calls
+        )
     def structure_info(self) -> _GetInfoFunc:
         return lambda name: StructureInfo(
@@ -70,5 +89,5 @@ class Llama32Detector(BaseFormatDetector):
         return EBNFComposer.build_ebnf(
             tools,
             function_format="json",
-            tool_call_separator=",",
+            tool_call_separator=self.tool_call_separator,
         )

sglang/srt/function_call/mistral_detector.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import json
+import logging
 import re
 from typing import List
@@ -11,12 +12,14 @@ from sglang.srt.function_call.core_types import (
 from sglang.srt.function_call.ebnf_composer import EBNFComposer
 from sglang.srt.openai_api.protocol import Tool
+logger = logging.getLogger(__name__)
 class MistralDetector(BaseFormatDetector):
     """
     Detector for Mistral models.
     Assumes function call format:
-      [TOOL_CALLS] [{"name":"xxx", "arguments":{...}}]
+      [TOOL_CALLS] [{"name":"func1", "arguments":{...}}, {"name":"func2", "arguments":{...}}]
     """
     def __init__(self):
@@ -27,26 +30,12 @@ class MistralDetector(BaseFormatDetector):
         self.bot_token = "[TOOL_CALLS] ["
         self.eot_token = "]"
         self.tool_call_regex = re.compile(r"\[{.*}\]", re.DOTALL)
+        self.tool_call_separator = ", "
     def has_tool_call(self, text: str) -> bool:
         """Check if the text contains a Mistral format tool call."""
         return self.bot_token in text
-    def _clean_text(self, text: str) -> str:
-        """
-        clean text to only leave ''[TOOL_CALLS] [{"name": xxx, "arguments": {xxx}}]'
-        for example,
-            text = '[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"location": "Boston, MA", "unit": "fahrenheit"}}]\n\nToday\'s weather in Boston is :{function call result} (in Fahrenheit)\n\nIf you prefer Celsius, please let me know.'
-            return '[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"location": "Boston, MA", "unit": "fahrenheit"}}]'
-        The key pattern is [TOOL_CALLS] [...]
-        """
-        # TODO: check if Mistral supports multiple tool calls, currently assume only support one tool call
-        find_results = re.findall(r"\[TOOL_CALLS\] \[.*?\]", text, re.DOTALL)
-        if len(find_results) > 0:
-            return find_results[0]
-        else:
-            return ""
     def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
         """
         One-time parsing: Detects and parses tool calls in the provided text.
@@ -57,17 +46,74 @@ class MistralDetector(BaseFormatDetector):
         """
         idx = text.find(self.bot_token)
         normal_text = text[:idx].strip() if idx != -1 else text
-        text = self._clean_text(text)
-        tool_content = text.replace("[TOOL_CALLS]", "").strip()
-        raw_tool_calls = self.tool_call_regex.findall(tool_content)
+        if self.bot_token not in text:
+            return StreamingParseResult(normal_text=normal_text, calls=[])
+        # Extract the JSON array part from [TOOL_CALLS] [...]
+        # Use bracket counting to properly handle nested brackets in JSON content
+        json_array_str = self._extract_json_array(text)
+        if not json_array_str:
+            return StreamingParseResult(normal_text=normal_text, calls=[])
         calls = []
-        if len(raw_tool_calls) > 0:
-            raw_tool_call = raw_tool_calls[0]
-            function_call_arr = json.loads(raw_tool_call)
-            for match_result in function_call_arr:
-                calls.extend(self.parse_base_json(match_result, tools))
+        try:
+            function_call_arr = json.loads(json_array_str)
+            # Handle both single object and array of objects
+            if not isinstance(function_call_arr, list):
+                function_call_arr = [function_call_arr]
+            calls = self.parse_base_json(function_call_arr, tools)
+        except json.JSONDecodeError as e:
+            logger.warning(
+                f"Failed to parse JSON part: {json_array_str}, JSON parse error: {str(e)}"
+            )
         return StreamingParseResult(normal_text=normal_text, calls=calls)
+    def _extract_json_array(self, text: str) -> str:
+        """
+        Extract the JSON array part using bracket counting to handle nested brackets.
+        :param text: The complete text containing [TOOL_CALLS] [...]
+        :return: The JSON array string or None if not found
+        """
+        start_idx = text.find(self.bot_token)
+        if start_idx == -1:
+            return None
+        # Start from the opening bracket after [TOOL_CALLS]
+        json_start = (
+            start_idx + len(self.bot_token) - 1
+        )  # -1 to include the opening bracket
+        bracket_count = 0
+        in_string = False
+        escape_next = False
+        for i in range(json_start, len(text)):
+            char = text[i]
+            if escape_next:
+                escape_next = False
+                continue
+            if char == "\\":
+                escape_next = True
+                continue
+            if char == '"' and not escape_next:
+                in_string = not in_string
+                continue
+            if not in_string:
+                if char == "[":
+                    bracket_count += 1
+                elif char == "]":
+                    bracket_count -= 1
+                    if bracket_count == 0:
+                        return text[json_start : i + 1]
+        return None
     def structure_info(self) -> _GetInfoFunc:
         return lambda name: StructureInfo(
             begin='[TOOL_CALLS] [{"name":"' + name + '", "arguments":',
@@ -78,7 +124,8 @@ class MistralDetector(BaseFormatDetector):
     def build_ebnf(self, tools: List[Tool]):
         return EBNFComposer.build_ebnf(
             tools,
-            bot_token=self.bot_token,
-            eot_token=self.eot_token,
+            sequence_start_token=self.bot_token,
+            sequence_end_token=self.eot_token,
             function_format="json",
+            tool_call_separator=self.tool_call_separator,
         )

sglang/srt/function_call/pythonic_detector.py CHANGED Viewed

@@ -32,47 +32,79 @@ class PythonicDetector(BaseFormatDetector):
             re.DOTALL,
         )
+    @staticmethod
+    def _text_strip(text: str) -> str:
+        # Llama 4 model sometime will output <|python_start|> and <|python_end|> tokens
+        # remove those tokens
+        text = text.replace("<|python_start|>", "")
+        text = text.replace("<|python_end|>", "")
+        return text
     def has_tool_call(self, text: str) -> bool:
-        return bool(self.tool_call_regex.match(text.strip()))
+        return bool(self.tool_call_regex.search(self._text_strip(text.strip())))
     def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
         # Try parsing the text as a Python list of function calls
         text = text.strip()
-        if not (text.startswith("[") and text.endswith("]")):
-            # Not a pythonic tool call format
+        # Remove unexpected <|python_start|> and <|python_end|> for llama4
+        text = self._text_strip(text)
+        match = self.tool_call_regex.search(text)
+        if match is None:
             return StreamingParseResult(normal_text=text, calls=[])
+        # Extract the tool call part and any text before/after it
+        tool_call_start = match.start()
+        tool_call_end = match.end()
+        normal_text_before = text[:tool_call_start] if tool_call_start > 0 else ""
+        tool_call_text = text[tool_call_start:tool_call_end]
+        normal_text_after = text[tool_call_end:] if tool_call_end < len(text) else ""
+        # Combine normal text
+        normal_text = normal_text_before + normal_text_after
         try:
-            module = ast.parse(text)
+            module = ast.parse(tool_call_text)
             parsed = getattr(module.body[0], "value", None)
             if not (
                 isinstance(parsed, ast.List)
                 and all(isinstance(e, ast.Call) for e in parsed.elts)
             ):
-                return StreamingParseResult(normal_text=text, calls=[])
+                return StreamingParseResult(normal_text=normal_text, calls=[])
             calls = []
             tool_indices = {
                 tool.function.name: i
                 for i, tool in enumerate(tools)
                 if tool.function.name
             }
-            for call in parsed.elts:
+            for call_index, call in enumerate(parsed.elts):
                 if not isinstance(call.func, ast.Name):
                     continue
                 function_name = call.func.id
+                # Validate that the function exists in the tools
+                if function_name not in tool_indices:
+                    logger.warning(
+                        f"Model attempted to call undefined function: {function_name}"
+                    )
+                    continue
                 arguments = {}
                 for keyword in call.keywords:
                     arguments[keyword.arg] = self._get_parameter_value(keyword.value)
                 calls.append(
                     ToolCallItem(
-                        tool_index=tool_indices.get(function_name, -1),
+                        tool_index=call_index,  # Use the call index in the response, not tool position
                         name=function_name,
                         parameters=json.dumps(arguments, ensure_ascii=False),
                     )
                 )
-            return StreamingParseResult(normal_text="", calls=calls)
+            return StreamingParseResult(normal_text=normal_text, calls=calls)
         except Exception:
             logger.exception("Error in pythonic tool call parsing.")
-            return StreamingParseResult(normal_text=text, calls=[])
+            return StreamingParseResult(normal_text=normal_text, calls=[])
     def _find_matching_bracket(self, buffer: str, start: int) -> int:
         """
@@ -96,6 +128,30 @@ class PythonicDetector(BaseFormatDetector):
                     return i
         return -1  # No matching bracket found
+    def _strip_and_split_buffer(self, buffer: str) -> tuple[str, str]:
+        """
+        Strip special tokens from buffer and split into safe_text and held_back_text.
+        Returns:
+            tuple of (safe_text_to_output, text_to_hold_in_buffer)
+        """
+        # Check if original buffer ends with a partial token at the end
+        special_tokens = ["<|python_start|>", "<|python_end|>"]
+        for token in special_tokens:
+            partial_length = self._ends_with_partial_token(buffer, token)
+            if partial_length > 0:
+                # Split buffer: safe part + held back partial token
+                safe_text = buffer[:-partial_length]
+                held_back = buffer[-partial_length:]
+                # Strip complete special tokens from safe part only
+                safe_text = self._text_strip(safe_text)
+                return safe_text, held_back
+        # No partial tokens found, strip complete tokens from entire buffer
+        safe_text = self._text_strip(buffer)
+        return safe_text, ""
     def parse_streaming_increment(
         self, new_text: str, tools: List[Tool]
     ) -> StreamingParseResult:
@@ -105,20 +161,28 @@ class PythonicDetector(BaseFormatDetector):
         then parses and emits any detected calls.
         """
         self._buffer += new_text
-        start = self._buffer.find("[")
+        # Strip special tokens from entire buffer and handle partial tokens
+        stripped_buffer, held_back = self._strip_and_split_buffer(self._buffer)
+        start = stripped_buffer.find("[")
         if start == -1:
-            normal_text = self._buffer
-            self._buffer = ""
-            return StreamingParseResult(normal_text=normal_text)
+            # No tool call bracket found
+            self._buffer = held_back
+            return StreamingParseResult(normal_text=stripped_buffer)
-        normal_text = self._buffer[:start] if start > 0 else ""
+        normal_text = stripped_buffer[:start] if start > 0 else ""
-        end = self._find_matching_bracket(self._buffer, start)
+        end = self._find_matching_bracket(stripped_buffer, start)
         if end != -1:
-            call_text = self._buffer[start : end + 1]
+            # Found complete tool call
+            call_text = stripped_buffer[start : end + 1]
             result = self.detect_and_parse(call_text, tools)
-            self._buffer = self._buffer[end + 1 :]
+            # Update buffer with remaining text after tool call plus any held back text
+            remaining_text = stripped_buffer[end + 1 :] + held_back
+            self._buffer = remaining_text
             # If we had normal text before the tool call, add it to the result
             if normal_text:
@@ -127,8 +191,10 @@ class PythonicDetector(BaseFormatDetector):
             return result
         # We have an opening bracket but no closing bracket yet
+        # Put back everything from the bracket onwards plus held back text
+        self._buffer = stripped_buffer[start:] + held_back
         if normal_text:
-            self._buffer = self._buffer[start:]
             return StreamingParseResult(normal_text=normal_text)
         # Otherwise, we're still accumulating a potential tool call
@@ -156,8 +222,8 @@ class PythonicDetector(BaseFormatDetector):
     def build_ebnf(self, tools: List[Tool]) -> Optional[str]:
         return EBNFComposer.build_ebnf(
             tools,
-            bot_token="[",
-            eot_token="]",
+            sequence_start_token="[",
+            sequence_end_token="]",
             tool_call_separator=",",
             function_format="pythonic",
         )

sglang/srt/function_call/qwen25_detector.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import json
+import logging
 import re
 from typing import List
@@ -11,12 +12,14 @@ from sglang.srt.function_call.core_types import (
 from sglang.srt.function_call.ebnf_composer import EBNFComposer
 from sglang.srt.openai_api.protocol import Tool
+logger = logging.getLogger(__name__)
 class Qwen25Detector(BaseFormatDetector):
     """
     Detector for Qwen 2.5 models.
     Assumes function call format:
-      <tool_call>{"name":"xxx", "arguments":{...}}</tool_call>
+      <tool_call>\n{"name":"func1", "arguments":{...}}\n</tool_call>\n<tool_call>\n{"name":"func2", "arguments":{...}}\n</tool_call>
     """
     def __init__(self):
@@ -24,8 +27,10 @@ class Qwen25Detector(BaseFormatDetector):
         Initializes the detector with necessary state variables.
         """
         super().__init__()
-        self.bot_token = "<tool_call>"
-        self.eot_token = "</tool_call>"
+        self.bot_token = "<tool_call>\n"
+        self.eot_token = "\n</tool_call>"
+        self.tool_call_separator = "\n"
+        self._normal_text_buffer = ""  # Buffer for handling partial end tokens
     def has_tool_call(self, text: str) -> bool:
         """Check if the text contains a Qwen 2.5 format tool call."""
@@ -43,25 +48,74 @@ class Qwen25Detector(BaseFormatDetector):
         normal_text = text[:idx].strip() if idx != -1 else text
         if self.bot_token not in text:
             return StreamingParseResult(normal_text=normal_text, calls=[])
-        pattern = rf"{self.bot_token}(.*?){self.eot_token}"
+        # Find all <tool_call>\n...\n</tool_call> blocks
+        pattern = rf"{re.escape(self.bot_token)}(.*?){re.escape(self.eot_token)}"
         match_result_list = re.findall(pattern, text, re.DOTALL)
         calls = []
         for match_result in match_result_list:
-            match_result = json.loads(match_result)
-            calls.extend(self.parse_base_json(match_result, tools))
+            try:
+                parsed_call = json.loads(match_result.strip())
+                calls.extend(self.parse_base_json(parsed_call, tools))
+            except json.JSONDecodeError as e:
+                logger.warning(
+                    f"Failed to parse JSON part: {match_result}, JSON parse error: {str(e)}"
+                )
+                continue
         return StreamingParseResult(normal_text=normal_text, calls=calls)
+    def parse_streaming_increment(
+        self, new_text: str, tools: List[Tool]
+    ) -> StreamingParseResult:
+        """
+        Streaming incremental parsing for Qwen 2.5 tool calls.
+        Uses base class implementation with buffering to handle partial end tokens.
+        """
+        result = super().parse_streaming_increment(new_text, tools)
+        # Handle partial end tokens that are streamed character by character
+        if result.normal_text:
+            self._normal_text_buffer += result.normal_text
+            # Check if buffer contains complete end token (without leading newline)
+            end_token_without_newline = self.eot_token[1:]  # "</tool_call>"
+            if end_token_without_newline in self._normal_text_buffer:
+                cleaned_text = self._normal_text_buffer.replace(
+                    end_token_without_newline, ""
+                )
+                self._normal_text_buffer = ""
+                result.normal_text = cleaned_text
+            else:
+                # Check if buffer might contain partial end token at the end
+                partial_match_len = self._ends_with_partial_token(
+                    self._normal_text_buffer, end_token_without_newline
+                )
+                if partial_match_len:
+                    # Keep potential partial match in buffer, return the rest
+                    result.normal_text = self._normal_text_buffer[:-partial_match_len]
+                    self._normal_text_buffer = self._normal_text_buffer[
+                        -partial_match_len:
+                    ]
+                else:
+                    # No partial match, return all buffered text
+                    result.normal_text = self._normal_text_buffer
+                    self._normal_text_buffer = ""
+        return result
     def structure_info(self) -> _GetInfoFunc:
         return lambda name: StructureInfo(
-            begin='<tool_call>{"name":"' + name + '", "arguments":',
-            end="}</tool_call>",
+            begin='<tool_call>\n{"name":"' + name + '", "arguments":',
+            end="}\n</tool_call>",
             trigger="<tool_call>",
         )
     def build_ebnf(self, tools: List[Tool]):
         return EBNFComposer.build_ebnf(
             tools,
-            bot_token=self.bot_token,
-            eot_token=self.eot_token,
+            individual_call_start_token=self.bot_token.replace("\n", "\\n"),
+            individual_call_end_token=self.eot_token.replace("\n", "\\n"),
+            tool_call_separator="\\n",
             function_format="json",
         )

sglang/srt/function_call/utils.py CHANGED Viewed

@@ -18,6 +18,23 @@ def _find_common_prefix(s1: str, s2: str) -> str:
 def _partial_json_loads(input_str: str, flags: Allow) -> Tuple[Any, int]:
+    """
+    Parse incomplete or partial JSON strings commonly encountered during streaming.
+    Args:
+        input_str (str): The potentially incomplete JSON string to parse.
+        flags (Allow): Bitwise flags controlling what types of partial data are allowed.
+            Common flags include:
+            - Allow.STR: Allow partial strings (e.g., '"hello wo' -> 'hello wo')
+            - Allow.OBJ: Allow partial objects (e.g., '{"key":' -> {'key': None})
+            - Allow.ARR: Allow partial arrays (e.g., '[1, 2,' -> [1, 2])
+            - Allow.ALL: Allow all types of partial data
+    Returns:
+        Tuple[Any, int]: A tuple containing:
+            - parsed_object: The Python object parsed from the JSON
+            - consumed_length: Number of characters consumed from input_str
+    """
     try:
         return (partial_json_parser.loads(input_str, flags), len(input_str))
     except JSONDecodeError as e:

sglang 0.4.6.post5__py3-none-any.whl → 0.4.7__py3-none-any.whl

sglang 0.4.6.post5py3-none-any.whl → 0.4.7py3-none-any.whl