PyPI - sglang - Versions diffs - 0.4.9.post3__py3-none-any.whl → 0.4.9.post4__py3-none-any.whl - Mend

sglang 0.4.9.post3py3-none-any.whl → 0.4.9.post4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

sglang/srt/_custom_ops.py +29 -1
sglang/srt/configs/model_config.py +1 -1
sglang/srt/conversation.py +1 -1
sglang/srt/disaggregation/common/conn.py +34 -6
sglang/srt/disaggregation/mini_lb.py +3 -2
sglang/srt/disaggregation/mooncake/conn.py +49 -20
sglang/srt/disaggregation/mooncake/transfer_engine.py +4 -2
sglang/srt/disaggregation/nixl/conn.py +17 -13
sglang/srt/distributed/device_communicators/custom_all_reduce.py +3 -91
sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py +96 -1
sglang/srt/distributed/device_communicators/quick_all_reduce.py +273 -0
sglang/srt/distributed/device_communicators/shm_broadcast.py +12 -5
sglang/srt/distributed/parallel_state.py +70 -15
sglang/srt/entrypoints/engine.py +2 -8
sglang/srt/entrypoints/http_server.py +20 -32
sglang/srt/entrypoints/openai/protocol.py +3 -3
sglang/srt/entrypoints/openai/serving_chat.py +27 -4
sglang/srt/function_call/base_format_detector.py +74 -12
sglang/srt/function_call/deepseekv3_detector.py +26 -11
sglang/srt/function_call/ebnf_composer.py +95 -63
sglang/srt/function_call/function_call_parser.py +4 -4
sglang/srt/function_call/kimik2_detector.py +41 -16
sglang/srt/function_call/llama32_detector.py +6 -3
sglang/srt/function_call/mistral_detector.py +11 -3
sglang/srt/function_call/pythonic_detector.py +16 -14
sglang/srt/function_call/qwen25_detector.py +12 -3
sglang/srt/function_call/{qwen3_detector.py → qwen3_coder_detector.py} +10 -9
sglang/srt/layers/activation.py +11 -3
sglang/srt/layers/attention/base_attn_backend.py +3 -1
sglang/srt/layers/communicator.py +12 -12
sglang/srt/layers/dp_attention.py +72 -24
sglang/srt/layers/logits_processor.py +34 -24
sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=160,N=320,device_name=NVIDIA_H20-3e.json +146 -0
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +25 -224
sglang/srt/layers/moe/topk.py +5 -13
sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py +2 -9
sglang/srt/layers/quantization/modelopt_quant.py +8 -4
sglang/srt/layers/quantization/utils.py +0 -9
sglang/srt/layers/radix_attention.py +5 -3
sglang/srt/lora/lora_manager.py +133 -169
sglang/srt/lora/lora_registry.py +124 -0
sglang/srt/lora/mem_pool.py +2 -2
sglang/srt/managers/cache_controller.py +53 -6
sglang/srt/managers/io_struct.py +19 -1
sglang/srt/managers/schedule_batch.py +13 -3
sglang/srt/managers/scheduler.py +13 -25
sglang/srt/managers/tokenizer_manager.py +28 -25
sglang/srt/managers/tp_worker.py +2 -4
sglang/srt/mem_cache/allocator.py +67 -7
sglang/srt/mem_cache/hicache_storage.py +17 -1
sglang/srt/mem_cache/hiradix_cache.py +30 -16
sglang/srt/mem_cache/memory_pool_host.py +3 -0
sglang/srt/model_executor/cuda_graph_runner.py +61 -25
sglang/srt/model_executor/forward_batch_info.py +201 -29
sglang/srt/model_executor/model_runner.py +41 -23
sglang/srt/models/deepseek_v2.py +1 -2
sglang/srt/models/mllama4.py +10 -3
sglang/srt/models/qwen2_moe.py +0 -4
sglang/srt/models/qwen3_moe.py +1 -6
sglang/srt/reasoning_parser.py +46 -4
sglang/srt/sampling/sampling_batch_info.py +6 -5
sglang/srt/server_args.py +76 -55
sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +33 -28
sglang/srt/speculative/eagle_draft_extend_cuda_graph_runner.py +37 -36
sglang/srt/speculative/eagle_utils.py +51 -23
sglang/srt/speculative/eagle_worker.py +59 -44
sglang/srt/two_batch_overlap.py +9 -5
sglang/srt/utils.py +17 -68
sglang/test/test_activation.py +50 -1
sglang/version.py +1 -1
{sglang-0.4.9.post3.dist-info → sglang-0.4.9.post4.dist-info}/METADATA +5 -5
{sglang-0.4.9.post3.dist-info → sglang-0.4.9.post4.dist-info}/RECORD +75 -72
{sglang-0.4.9.post3.dist-info → sglang-0.4.9.post4.dist-info}/WHEEL +0 -0
{sglang-0.4.9.post3.dist-info → sglang-0.4.9.post4.dist-info}/licenses/LICENSE +0 -0
{sglang-0.4.9.post3.dist-info → sglang-0.4.9.post4.dist-info}/top_level.txt +0 -0

sglang/srt/entrypoints/http_server.py CHANGED Viewed

@@ -107,6 +107,8 @@ from sglang.version import __version__
 logger = logging.getLogger(__name__)
 asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
+HEALTH_CHECK_TIMEOUT = int(os.getenv("SGLANG_HEALTH_CHECK_TIMEOUT", 20))
 # Store global states
 @dataclasses.dataclass
@@ -212,9 +214,6 @@ async def validate_json_request(raw_request: Request):
         )
-HEALTH_CHECK_TIMEOUT = int(os.getenv("SGLANG_HEALTH_CHECK_TIMEOUT", 20))
 ##### Native API endpoints #####
@@ -807,6 +806,24 @@ async def retrieve_model(model: str):
     )
+@app.post("/v1/score", dependencies=[Depends(validate_json_request)])
+async def v1_score_request(request: ScoringRequest, raw_request: Request):
+    """Endpoint for the decoder-only scoring API. See Engine.score() for detailed documentation."""
+    return await raw_request.app.state.openai_serving_score.handle_request(
+        request, raw_request
+    )
+@app.api_route(
+    "/v1/rerank", methods=["POST", "PUT"], dependencies=[Depends(validate_json_request)]
+)
+async def v1_rerank_request(request: V1RerankReqInput, raw_request: Request):
+    """Endpoint for reranking documents based on query relevance."""
+    return await raw_request.app.state.openai_serving_rerank.handle_request(
+        request, raw_request
+    )
 ## SageMaker API
 @app.get("/ping")
 async def sagemaker_health() -> Response:
@@ -852,24 +869,6 @@ async def vertex_generate(vertex_req: VertexGenerateReqInput, raw_request: Reque
     return ORJSONResponse({"predictions": ret})
-@app.post("/v1/score", dependencies=[Depends(validate_json_request)])
-async def v1_score_request(request: ScoringRequest, raw_request: Request):
-    """Endpoint for the decoder-only scoring API. See Engine.score() for detailed documentation."""
-    return await raw_request.app.state.openai_serving_score.handle_request(
-        request, raw_request
-    )
-@app.api_route(
-    "/v1/rerank", methods=["POST", "PUT"], dependencies=[Depends(validate_json_request)]
-)
-async def v1_rerank_request(request: V1RerankReqInput, raw_request: Request):
-    """Endpoint for reranking documents based on query relevance."""
-    return await raw_request.app.state.openai_serving_rerank.handle_request(
-        request, raw_request
-    )
 def _create_error_response(e):
     return ORJSONResponse(
         {"error": {"message": str(e)}}, status_code=HTTPStatus.BAD_REQUEST
@@ -916,15 +915,6 @@ def launch_server(
         add_prometheus_middleware(app)
         enable_func_timer()
-    image_token_text = None
-    if (
-        tokenizer_manager.image_token_id is not None
-        and not server_args.skip_tokenizer_init
-    ):
-        image_token_text = tokenizer_manager.tokenizer.decode(
-            [tokenizer_manager.image_token_id]
-        )
     # Send a warmup request - we will create the thread launch it
     # in the lifespan after all other warmups have fired.
     warmup_thread = threading.Thread(
@@ -932,7 +922,6 @@ def launch_server(
         args=(
             server_args,
             pipe_finish_writer,
-            image_token_text,
             launch_callback,
         ),
     )
@@ -1066,7 +1055,6 @@ def _execute_server_warmup(
 def _wait_and_warmup(
     server_args: ServerArgs,
     pipe_finish_writer: Optional[multiprocessing.connection.Connection],
-    image_token_text: str,
     launch_callback: Optional[Callable[[], None]] = None,
 ):
     if not server_args.skip_server_warmup:

sglang/srt/entrypoints/openai/protocol.py CHANGED Viewed

@@ -192,9 +192,9 @@ class CompletionRequest(BaseModel):
     session_params: Optional[Dict] = None
     # For PD disaggregation
-    bootstrap_host: Optional[str] = None
-    bootstrap_port: Optional[int] = None
-    bootstrap_room: Optional[int] = None
+    bootstrap_host: Optional[Union[List[str], str]] = None
+    bootstrap_port: Optional[Union[List[Optional[int]], int]] = None
+    bootstrap_room: Optional[Union[List[int], int]] = None
     # For request id
     rid: Optional[Union[List[str], str]] = None

sglang/srt/entrypoints/openai/serving_chat.py CHANGED Viewed

@@ -55,6 +55,20 @@ class OpenAIServingChat(OpenAIServingBase):
     def _request_id_prefix(self) -> str:
         return "chatcmpl-"
+    def _validate_request(self, request: ChatCompletionRequest) -> Optional[str]:
+        """Validate that the input is valid."""
+        if not request.messages:
+            return "Messages cannot be empty."
+        if (
+            isinstance(request.tool_choice, str)
+            and request.tool_choice.lower() == "required"
+            and not request.tools
+        ):
+            return "Tools cannot be empty if tool choice is set to required."
+        return None
     def _convert_to_internal_request(
         self,
         request: ChatCompletionRequest,
@@ -484,7 +498,10 @@ class OpenAIServingChat(OpenAIServingBase):
                 # Handle tool calls
                 if request.tool_choice != "none" and request.tools:
-                    async for chunk in self._process_tool_call_stream(
+                    async for (
+                        chunk,
+                        tool_call_finish_reason_type,
+                    ) in self._process_tool_call_stream(
                         index,
                         delta,
                         parser_dict,
@@ -492,7 +509,10 @@ class OpenAIServingChat(OpenAIServingBase):
                         request,
                         finish_reason_type,
                     ):
-                        yield chunk
+                        if chunk:
+                            yield chunk
+                        finish_reason_type = tool_call_finish_reason_type
                 else:
                     # Regular content
                     if delta or not (
@@ -865,7 +885,7 @@ class OpenAIServingChat(OpenAIServingBase):
                 choices=[choice_data],
                 model=request.model,
             )
-            yield f"data: {chunk.model_dump_json()}\n\n"
+            yield f"data: {chunk.model_dump_json()}\n\n", finish_reason_type
         # Yield tool calls
         for call_item in calls:
@@ -920,4 +940,7 @@ class OpenAIServingChat(OpenAIServingBase):
                 choices=[choice_data],
                 model=request.model,
             )
-            yield f"data: {chunk.model_dump_json()}\n\n"
+            yield f"data: {chunk.model_dump_json()}\n\n", finish_reason_type
+        if finish_reason_type == "stop":
+            yield None, "tool_calls"

sglang/srt/function_call/base_format_detector.py CHANGED Viewed

@@ -25,23 +25,49 @@ class BaseFormatDetector(ABC):
     """Base class providing two sets of interfaces: one-time and streaming incremental."""
     def __init__(self):
-        # initialize properties used for state when parsing tool calls in
+        # Streaming state management
+        # Buffer for accumulating incomplete patterns that arrive across multiple streaming chunks
         self._buffer = ""
-        # streaming mode
+        # Stores complete tool call info (name and arguments) for each tool being parsed.
+        # Used by serving layer for completion handling when streaming ends.
+        # Format: [{"name": str, "arguments": dict}, ...]
         self.prev_tool_call_arr: List[Dict] = []
+        # Index of currently streaming tool call. Starts at -1 (no active tool),
+        # increments as each tool completes. Tracks which tool's arguments are streaming.
         self.current_tool_id: int = -1
+        # Flag for whether current tool's name has been sent to client.
+        # Tool names sent first with empty parameters, then arguments stream incrementally.
         self.current_tool_name_sent: bool = False
-        self.streamed_args_for_tool: List[str] = (
-            []
-        )  # map what has been streamed for each tool so far to a list
+        # Tracks raw JSON string content streamed to client for each tool's arguments.
+        # Critical for serving layer to calculate remaining content when streaming ends.
+        # Each index corresponds to a tool_id. Example: ['{"location": "San Francisco"', '{"temp": 72']
+        self.streamed_args_for_tool: List[str] = []
+        # Token configuration (override in subclasses)
         self.bot_token = ""
         self.eot_token = ""
         self.tool_call_separator = ", "
-    def parse_base_json(self, action: Any, tools: List[Tool]) -> List[ToolCallItem]:
-        tool_indices = {
+    def _get_tool_indices(self, tools: List[Tool]) -> Dict[str, int]:
+        """
+        Get a mapping of tool names to their indices in the tools list.
+        This utility method creates a dictionary mapping function names to their
+        indices in the tools list, which is commonly needed for tool validation
+        and ToolCallItem creation.
+        Args:
+            tools: List of available tools
+        Returns:
+            Dictionary mapping tool names to their indices
+        """
+        return {
             tool.function.name: i for i, tool in enumerate(tools) if tool.function.name
         }
+    def parse_base_json(self, action: Any, tools: List[Tool]) -> List[ToolCallItem]:
+        tool_indices = self._get_tool_indices(tools)
         if not isinstance(action, list):
             action = [action]
@@ -130,11 +156,7 @@ class BaseFormatDetector(ABC):
         # Build tool indices if not already built
         if not hasattr(self, "_tool_indices"):
-            self._tool_indices = {
-                tool.function.name: i
-                for i, tool in enumerate(tools)
-                if tool.function and tool.function.name
-            }
+            self._tool_indices = self._get_tool_indices(tools)
         flags = Allow.ALL if self.current_tool_name_sent else Allow.ALL & ~Allow.STR
@@ -294,12 +316,52 @@ class BaseFormatDetector(ABC):
     @abstractmethod
     def has_tool_call(self, text: str) -> bool:
+        """
+        Check if the given text contains function call markers specific to this format.
+        """
         raise NotImplementedError()
+    def supports_structural_tag(self) -> bool:
+        """Return True if this detector supports structural tag format."""
+        return True
     @abstractmethod
     def structure_info(self) -> _GetInfoFunc:
+        """
+        Return a function that creates StructureInfo for constrained generation.
+        The returned function takes a tool name and returns a StructureInfo object
+        containing the begin/end patterns and trigger tokens needed for constrained
+        generation of function calls in this format.
+        Returns:
+            A function that takes a tool name (str) and returns StructureInfo
+        """
         raise NotImplementedError()
     @abstractmethod
     def build_ebnf(self, tools: List[Tool]) -> str:
+        """
+        Build an EBNF grammar for constrained generation of function calls.
+        This method generates an Extended Backus-Naur Form (EBNF) grammar that
+        constrains the model's output to valid function calls in this format.
+        The grammar should include all available tools and their parameter schemas.
+        Args:
+            tools: List of available tools/functions that can be called
+        Returns:
+            A string containing the EBNF grammar for this function call format
+        The EBNF grammar should:
+            - Define the overall structure of function calls in this format
+            - Include all tool names from the provided tools list
+            - Define valid JSON structures for function arguments
+            - Handle multiple function calls if the format supports them
+        Note:
+            Most implementations use EBNFComposer.build_ebnf() utility with
+            format-specific parameters rather than writing EBNF from scratch.
+        """
         raise NotImplementedError()

sglang/srt/function_call/deepseekv3_detector.py CHANGED Viewed

@@ -19,9 +19,28 @@ logger = logging.getLogger(__name__)
 class DeepSeekV3Detector(BaseFormatDetector):
     """
-    Detector for DeepSeek models.
-    Assumes function call format:
-      '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>get_current_weather\n```json\n{"location": "Tokyo"}\n```<｜tool▁call▁end｜>\n<｜tool▁call▁begin｜>function<｜tool▁sep｜>get_current_weather\n```json\n{"location": "Paris"}\n```<｜tool▁call▁end｜><｜tool▁calls▁end｜><｜end▁of▁sentence｜>
+    Detector for DeepSeek V3 model function call format.
+    The DeepSeek V3 format uses special Unicode tokens to delimit function calls
+    with JSON code blocks for arguments.
+    Format Structure:
+    ```
+    <｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>{function_name}\n```json\n{json_arguments}\n```<｜tool▁calls▁end｜><｜end▁of▁sentence｜>
+    ```
+    Examples:
+    ```
+    <｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>get_current_weather\n```json\n{"location": "Tokyo"}\n```<｜tool▁call▁end｜>\n<｜tool▁call▁begin｜>function<｜tool▁sep｜>get_current_weather\n```json\n{"location": "Paris"}\n```<｜tool▁call▁end｜><｜tool▁calls▁end｜><｜end▁of▁sentence｜>
+    ```
+    Key Components:
+    - Tool Calls Section: Wrapped between `<｜tool▁calls▁begin｜>` and `<｜tool▁calls▁end｜>`
+    - Individual Tool Call: Wrapped between `<｜tool▁call▁begin｜>` and `<｜tool▁call▁end｜>`
+    - Function Declaration: `function<｜tool▁sep｜>{function_name}`
+    - Arguments: JSON code block between ````json` and ````
+    - Supports multiple tool calls
+    Reference: https://huggingface.co/deepseek-ai/DeepSeek-V3-0324?chat_template=default
     """
     def __init__(self):
@@ -89,16 +108,12 @@ class DeepSeekV3Detector(BaseFormatDetector):
             return StreamingParseResult(normal_text=new_text)
         if not hasattr(self, "_tool_indices"):
-            self._tool_indices = {
-                tool.function.name: i
-                for i, tool in enumerate(tools)
-                if tool.function and tool.function.name
-            }
+            self._tool_indices = self._get_tool_indices(tools)
         calls: list[ToolCallItem] = []
         try:
             partial_match = re.search(
-                pattern=r"<｜tool▁call▁begin｜>(.*)<｜tool▁sep｜>(.*)\n```json\n(.*)",
+                pattern=r"<｜tool▁call▁begin｜>(.*)<｜tool▁sep｜>(.*)\n```json\n(.*)\n```.*",
                 string=current_text,
                 flags=re.DOTALL,
             )
@@ -127,7 +142,7 @@ class DeepSeekV3Detector(BaseFormatDetector):
                         )
                     )
                     self.current_tool_name_sent = True
-                    # Store the tool call info for adapter.py
+                    # Store the tool call info for serving layer completions endpoint
                     self.prev_tool_call_arr[self.current_tool_id] = {
                         "name": func_name,
                         "arguments": {},
@@ -153,7 +168,7 @@ class DeepSeekV3Detector(BaseFormatDetector):
                         ] += argument_diff
                     if _is_complete_json(func_args_raw):
-                        # Update the stored arguments for adapter.py
+                        # Update the stored arguments
                         try:
                             parsed_args = json.loads(func_args_raw)
                             self.prev_tool_call_arr[self.current_tool_id][

sglang/srt/function_call/ebnf_composer.py CHANGED Viewed

@@ -1,51 +1,73 @@
-from typing import Literal, Optional
+from typing import Any, Dict, Literal, Optional
 class EBNFComposer:
     # Adapted from https://xgrammar.mlc.ai/docs/how_to/ebnf_guided_generation.html#try-out-via-hf-transformers
-    json_grammar_ebnf_str = r"""
-        json ::= basic_array | basic_object
-        basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object
-        basic_integer ::= ("0" | "-"? [1-9] [0-9]*) ".0"?
-        basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)?
+    # Shared primitive grammar rules used across all formats
+    BASE_PRIMITIVE_GRAMMAR = r"""
         basic_string ::= (([\"] basic_string_1 [\"]))
         basic_string_1 ::= "" | [^"\\\x00-\x1F] basic_string_1 | "\\" escape basic_string_1
-        escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9]
-        basic_boolean ::= "true" | "false"
-        basic_null ::= "null"
+        escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9]{4}
+        basic_integer ::= ("0" | "-"? [1-9] [0-9]*) ".0"?
+        basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)?
         basic_array ::= "[" ("" | ws basic_any (ws "," ws basic_any)*) ws "]"
         basic_object ::= "{" ("" | ws basic_string ws ":" ws basic_any ( ws "," ws basic_string ws ":" ws basic_any)*) ws "}"
         ws ::= [ \n\t]*
-        """
+    """
-    pythonic_grammar_ebnf_str = r"""
+    # Format-specific extensions
+    json_grammar_ebnf_str = (
+        r"""
+        json ::= basic_array | basic_object
+        basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object
+        basic_boolean ::= "true" | "false"
+        basic_null ::= "null"
+    """
+        + BASE_PRIMITIVE_GRAMMAR
+    )
+    pythonic_grammar_ebnf_str = (
+        r"""
         pythonic ::= basic_number | basic_string | basic_array | "True" | "False" | "None"
         basic_any ::= basic_number | basic_string | basic_array | basic_object
-        basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)?
-        basic_string ::= (([\"] basic_string_1 [\"]))
-        basic_string_1 ::= "" | [^"\\\x00-\x1F] basic_string_1 | "\\" escape basic_string_1
-        escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9]
-        basic_array ::= "[" ("" | ws basic_any (ws "," ws basic_any)*) ws "]"
-        basic_object ::= "{" ("" | ws basic_string ws ":" ws basic_any ( ws "," ws basic_string ws ":" ws basic_any)*) ws "}"
-        ws ::= [ \n\t]*
+        basic_boolean ::= "True" | "False"
+        basic_null ::= "None"
+    """
+        + BASE_PRIMITIVE_GRAMMAR
+    )
+    xml_grammar_ebnf_str = (
+        r"""
+        xml ::= xml_element | xml_text
+        xml_element ::= basic_string | basic_number | basic_boolean | basic_null | basic_array | basic_object
+        xml_text ::= [^<>]*
+        basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object
+        basic_boolean ::= "true" | "false"
+        basic_null ::= "null"
     """
+        + BASE_PRIMITIVE_GRAMMAR
+    )
     CALL_RULE_MAP = {
         "pythonic": 'call_{name} ::= "{name}" "(" {arguments_rule} ")"',
         "json": 'call_{name} ::= "{{" "\\"name\\"" ":" "\\"{name}\\"" ", " "\\"arguments\\"" ":" {arguments_rule} "}}"',
+        "xml": 'call_{name} ::= "<function={name}>\\n" {arguments_rule} "\\n</function>"',
     }
     ARGUMENTS_RULE_MAP = {
         "pythonic": "{arg_rules}",
         "json": '"{{" {arg_rules} "}}"',
+        "xml": "{arg_rules}",
     }
     KEY_VALUE_RULE_MAP = {
         "pythonic": '"{key}" "=" {valrule}',
         "json": '"\\"{key}\\"" ":" {valrule}',
+        "xml": '"<parameter={key}>\\n" {valrule} "\\n</parameter>"',
     }
-    JSON_TYPE_MAPPING = {
+    # Base type mapping - most types are the same across formats
+    BASE_TYPE_MAPPING = {
         "string": "basic_string",
         "number": "basic_number",
         "integer": "basic_number",
@@ -55,19 +77,20 @@ class EBNFComposer:
         "object": "basic_object",
     }
-    PYTHONIC_TYPE_MAPPING = {
-        "string": "basic_string",
-        "number": "basic_number",
-        "integer": "basic_number",
-        "boolean": '"True" | "False"',
-        "null": '"None"',
-        "array": "basic_array",
-        "object": "basic_object",
+    # Format-specific overrides for types that differ
+    FORMAT_TYPE_OVERRIDES = {
+        "pythonic": {
+            "boolean": '"True" | "False"',
+            "null": '"None"',
+        },
+        "xml": {
+            "string": "xml_text",
+        },
     }
     @staticmethod
     def get_value_rule(
-        prop: dict, function_format: Literal["pythonic", "json"] = "json"
+        prop: dict, function_format: Literal["pythonic", "json", "xml"] = "json"
     ) -> str:
         if "enum" in prop:
             return EBNFComposer._handle_enum(prop, function_format)
@@ -83,48 +106,46 @@ class EBNFComposer:
         enum_values = prop["enum"]
         prop_type = prop.get("type", "string")
-        # Define formatters for different type/format combinations
-        formatters = {
-            ("string", "json"): lambda v: f'"\\"{v}\\""',
-            ("string", "pythonic"): lambda v: f'"\\"{v}\\""',
-            ("number", "json"): str,
-            ("number", "pythonic"): str,
-            ("integer", "json"): str,
-            ("integer", "pythonic"): str,
-            ("boolean", "json"): lambda v: "true" if v else "false",
-            ("boolean", "pythonic"): lambda v: "True" if v else "False",
-        }
+        def format_enum_val(v: Any) -> str:
+            if prop_type == "boolean":
+                if function_format == "json" or function_format == "xml":
+                    return "true" if v else "false"
+                elif function_format == "pythonic":
+                    return "True" if v else "False"
+                else:
+                    return str(v)  # fallback
-        # Get the formatter or default to string handling
-        formatter = formatters.get(
-            (prop_type, function_format),
-            formatters[("string", function_format)],  # Default to string handling
-        )
+            if prop_type == "string":
+                if function_format == "xml":
+                    return f'"{v}"'
+                else:  # json or pythonic
+                    return f'"\\"{v}\\""'  # escape quote-wrapped string
-        formatted_values = [formatter(value) for value in enum_values]
-        enum_rule = " | ".join(formatted_values)
+            # All other types (number, integer, etc.)
+            return str(v)
-        # Wrap in parentheses if there are multiple values to ensure correct EBNF precedence
-        if len(formatted_values) > 1:
-            enum_rule = f"({enum_rule})"
+        formatted_values = [format_enum_val(v) for v in enum_values]
+        enum_rule = " | ".join(formatted_values)
+        return f"({enum_rule})" if len(formatted_values) > 1 else enum_rule
-        return enum_rule
+    @staticmethod
+    def get_type_mapping(function_format: str) -> Dict[str, str]:
+        """Get the complete type mapping for a given format."""
+        mapping = EBNFComposer.BASE_TYPE_MAPPING.copy()
+        overrides = EBNFComposer.FORMAT_TYPE_OVERRIDES.get(function_format, {})
+        mapping.update({k: v for k, v in overrides.items() if v is not None})
+        return mapping
     @staticmethod
     def _handle_type(prop: dict, function_format: str) -> str:
         """Handle type properties using the appropriate type mapping."""
         prop_type = prop["type"]
-        type_mapping = (
-            EBNFComposer.PYTHONIC_TYPE_MAPPING
-            if function_format == "pythonic"
-            else EBNFComposer.JSON_TYPE_MAPPING
-        )
+        type_mapping = EBNFComposer.get_type_mapping(function_format)
         if isinstance(prop_type, list):
             type_rules = [
-                type_mapping[single_type]
+                type_mapping.get(single_type, function_format)
                 for single_type in prop_type
-                if single_type in type_mapping
             ]
             return " | ".join(type_rules) if type_rules else function_format
@@ -133,7 +154,7 @@ class EBNFComposer:
     @staticmethod
     def build_ebnf(
         tools,
-        function_format: Literal["pythonic", "json"] = "json",
+        function_format: Literal["pythonic", "json", "xml"] = "json",
         # Parameters for wrapping the entire sequence of tool calls
         sequence_start_token: Optional[str] = None,
         sequence_end_token: Optional[str] = None,
@@ -143,6 +164,7 @@ class EBNFComposer:
         # Parameter for separating multiple tool calls
         tool_call_separator: Optional[str] = None,
         call_rule_fmt: Optional[str] = None,
+        key_value_rule_fmt: Optional[str] = None,
     ):
         """
         Generalized EBNF builder for all detectors.
@@ -157,6 +179,9 @@ class EBNFComposer:
             call_rule_fmt: Optional custom format string for call_{name} rule. It should define each function call's format, with
                 the placeholders {name} for the function name and {arguments_rule} for the arguments rule. If None, a default
                 format based on function_format will be used.
+            key_value_rule_fmt: Optional custom format string for key-value pairs. It should define how each parameter is formatted,
+                with placeholders {key} for the parameter name and {valrule} for the value rule. If None, a default format
+                based on function_format will be used.
         """
         # =================================================================
         # Step 1: Determine the root tool calls rule
@@ -200,7 +225,11 @@ class EBNFComposer:
             else EBNFComposer.CALL_RULE_MAP[function_format]
         )
         args_template = EBNFComposer.ARGUMENTS_RULE_MAP[function_format]
-        key_value_template = EBNFComposer.KEY_VALUE_RULE_MAP[function_format]
+        key_value_template = (
+            key_value_rule_fmt
+            if key_value_rule_fmt
+            else EBNFComposer.KEY_VALUE_RULE_MAP[function_format]
+        )
         # =================================================================
         # Step 4: Build rules for each tool
@@ -292,10 +321,13 @@ class EBNFComposer:
         # =================================================================
         # Step 5: Add base grammar rules
         # =================================================================
-        base_grammar = (
-            EBNFComposer.pythonic_grammar_ebnf_str
-            if function_format == "pythonic"
-            else EBNFComposer.json_grammar_ebnf_str
+        grammar_dict = {
+            "pythonic": EBNFComposer.pythonic_grammar_ebnf_str,
+            "json": EBNFComposer.json_grammar_ebnf_str,
+            "xml": EBNFComposer.xml_grammar_ebnf_str,
+        }
+        base_grammar = grammar_dict.get(
+            function_format, EBNFComposer.json_grammar_ebnf_str
         )
         ebnf_lines.append(base_grammar)

sglang/srt/function_call/function_call_parser.py CHANGED Viewed

@@ -14,7 +14,7 @@ from sglang.srt.function_call.kimik2_detector import KimiK2Detector
 from sglang.srt.function_call.llama32_detector import Llama32Detector
 from sglang.srt.function_call.mistral_detector import MistralDetector
 from sglang.srt.function_call.pythonic_detector import PythonicDetector
-from sglang.srt.function_call.qwen3_detector import Qwen3XMLDetector
+from sglang.srt.function_call.qwen3_coder_detector import Qwen3CoderDetector
 from sglang.srt.function_call.qwen25_detector import Qwen25Detector
 logger = logging.getLogger(__name__)
@@ -36,7 +36,7 @@ class FunctionCallParser:
         "deepseekv3": DeepSeekV3Detector,
         "pythonic": PythonicDetector,
         "kimi_k2": KimiK2Detector,
-        "qwen3": Qwen3XMLDetector,
+        "qwen3_coder": Qwen3CoderDetector,
     }
     def __init__(self, tools: List[Tool], tool_call_parser: str):
@@ -155,9 +155,9 @@ class FunctionCallParser:
             or None if no constraint applies.
         """
         # NOTE: structural_tag only supports JSON-compatible content between the begin and end.
-        # It cannot parse or validate Python syntax like function calls.
+        # It cannot parse or validate function call Pythonic or XML-ish syntax.
         if (
-            not isinstance(self.detector, PythonicDetector)
+            self.detector.supports_structural_tag()
             and tool_choice == "auto"
             and any(tool.function.strict for tool in self.tools)
         ):

sglang 0.4.9.post3__py3-none-any.whl → 0.4.9.post4__py3-none-any.whl

sglang 0.4.9.post3py3-none-any.whl → 0.4.9.post4py3-none-any.whl