PyPI - sglang - Versions diffs - 0.4.9.post3__py3-none-any.whl → 0.4.9.post5__py3-none-any.whl - Mend

sglang 0.4.9.post3py3-none-any.whl → 0.4.9.post5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (128) hide show

sglang/lang/chat_template.py +21 -0
sglang/srt/_custom_ops.py +29 -1
sglang/srt/configs/internvl.py +3 -0
sglang/srt/configs/model_config.py +5 -1
sglang/srt/constrained/base_grammar_backend.py +10 -2
sglang/srt/constrained/xgrammar_backend.py +7 -5
sglang/srt/conversation.py +17 -2
sglang/srt/debug_utils/__init__.py +0 -0
sglang/srt/debug_utils/dump_comparator.py +131 -0
sglang/srt/debug_utils/dumper.py +108 -0
sglang/srt/debug_utils/text_comparator.py +172 -0
sglang/srt/disaggregation/common/conn.py +34 -6
sglang/srt/disaggregation/decode_schedule_batch_mixin.py +13 -1
sglang/srt/disaggregation/mini_lb.py +3 -2
sglang/srt/disaggregation/mooncake/conn.py +65 -20
sglang/srt/disaggregation/mooncake/transfer_engine.py +4 -2
sglang/srt/disaggregation/nixl/conn.py +17 -13
sglang/srt/disaggregation/prefill.py +13 -1
sglang/srt/distributed/device_communicators/custom_all_reduce.py +3 -91
sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py +96 -1
sglang/srt/distributed/device_communicators/quick_all_reduce.py +273 -0
sglang/srt/distributed/device_communicators/shm_broadcast.py +12 -5
sglang/srt/distributed/parallel_state.py +70 -15
sglang/srt/entrypoints/engine.py +5 -9
sglang/srt/entrypoints/http_server.py +20 -32
sglang/srt/entrypoints/openai/protocol.py +3 -3
sglang/srt/entrypoints/openai/serving_chat.py +148 -72
sglang/srt/function_call/base_format_detector.py +74 -12
sglang/srt/function_call/deepseekv3_detector.py +26 -11
sglang/srt/function_call/ebnf_composer.py +105 -66
sglang/srt/function_call/function_call_parser.py +6 -4
sglang/srt/function_call/glm4_moe_detector.py +164 -0
sglang/srt/function_call/kimik2_detector.py +41 -16
sglang/srt/function_call/llama32_detector.py +6 -3
sglang/srt/function_call/mistral_detector.py +11 -3
sglang/srt/function_call/pythonic_detector.py +16 -14
sglang/srt/function_call/qwen25_detector.py +12 -3
sglang/srt/function_call/{qwen3_detector.py → qwen3_coder_detector.py} +11 -9
sglang/srt/layers/activation.py +11 -3
sglang/srt/layers/attention/base_attn_backend.py +3 -1
sglang/srt/layers/attention/hybrid_attn_backend.py +100 -0
sglang/srt/layers/attention/vision.py +56 -8
sglang/srt/layers/communicator.py +12 -12
sglang/srt/layers/dp_attention.py +72 -24
sglang/srt/layers/layernorm.py +26 -1
sglang/srt/layers/logits_processor.py +46 -25
sglang/srt/layers/moe/ep_moe/layer.py +172 -206
sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_2_0/E=160,N=320,device_name=NVIDIA_A800-SXM4-80GB,dtype=int8_w8a8.json +146 -0
sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=160,N=320,device_name=NVIDIA_H20-3e.json +146 -0
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +25 -224
sglang/srt/layers/moe/fused_moe_triton/layer.py +38 -48
sglang/srt/layers/moe/fused_moe_triton/triton_kernels_moe.py +11 -8
sglang/srt/layers/moe/topk.py +88 -34
sglang/srt/layers/multimodal.py +11 -8
sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py +2 -9
sglang/srt/layers/quantization/fp8.py +25 -247
sglang/srt/layers/quantization/fp8_kernel.py +78 -48
sglang/srt/layers/quantization/modelopt_quant.py +33 -14
sglang/srt/layers/quantization/unquant.py +24 -76
sglang/srt/layers/quantization/utils.py +0 -9
sglang/srt/layers/quantization/w4afp8.py +68 -17
sglang/srt/layers/radix_attention.py +5 -3
sglang/srt/lora/lora_manager.py +133 -169
sglang/srt/lora/lora_registry.py +188 -0
sglang/srt/lora/mem_pool.py +2 -2
sglang/srt/managers/cache_controller.py +62 -13
sglang/srt/managers/io_struct.py +19 -1
sglang/srt/managers/mm_utils.py +154 -35
sglang/srt/managers/multimodal_processor.py +3 -14
sglang/srt/managers/schedule_batch.py +27 -11
sglang/srt/managers/scheduler.py +48 -26
sglang/srt/managers/tokenizer_manager.py +62 -28
sglang/srt/managers/tp_worker.py +5 -4
sglang/srt/mem_cache/allocator.py +67 -7
sglang/srt/mem_cache/hicache_storage.py +17 -1
sglang/srt/mem_cache/hiradix_cache.py +35 -18
sglang/srt/mem_cache/memory_pool_host.py +3 -0
sglang/srt/model_executor/cuda_graph_runner.py +61 -25
sglang/srt/model_executor/forward_batch_info.py +201 -29
sglang/srt/model_executor/model_runner.py +109 -37
sglang/srt/models/deepseek_v2.py +63 -30
sglang/srt/models/glm4_moe.py +1035 -0
sglang/srt/models/glm4_moe_nextn.py +167 -0
sglang/srt/models/interns1.py +328 -0
sglang/srt/models/internvl.py +143 -47
sglang/srt/models/llava.py +9 -5
sglang/srt/models/minicpmo.py +4 -1
sglang/srt/models/mllama4.py +10 -3
sglang/srt/models/qwen2_moe.py +2 -6
sglang/srt/models/qwen3_moe.py +6 -8
sglang/srt/multimodal/processors/base_processor.py +20 -6
sglang/srt/multimodal/processors/clip.py +2 -2
sglang/srt/multimodal/processors/deepseek_vl_v2.py +2 -2
sglang/srt/multimodal/processors/gemma3.py +2 -2
sglang/srt/multimodal/processors/gemma3n.py +2 -2
sglang/srt/multimodal/processors/internvl.py +21 -8
sglang/srt/multimodal/processors/janus_pro.py +2 -2
sglang/srt/multimodal/processors/kimi_vl.py +2 -2
sglang/srt/multimodal/processors/llava.py +4 -4
sglang/srt/multimodal/processors/minicpm.py +2 -3
sglang/srt/multimodal/processors/mlama.py +2 -2
sglang/srt/multimodal/processors/mllama4.py +18 -111
sglang/srt/multimodal/processors/phi4mm.py +2 -2
sglang/srt/multimodal/processors/pixtral.py +2 -2
sglang/srt/multimodal/processors/qwen_audio.py +2 -2
sglang/srt/multimodal/processors/qwen_vl.py +2 -2
sglang/srt/multimodal/processors/vila.py +3 -1
sglang/srt/reasoning_parser.py +48 -5
sglang/srt/sampling/sampling_batch_info.py +6 -5
sglang/srt/server_args.py +132 -60
sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +33 -28
sglang/srt/speculative/eagle_draft_extend_cuda_graph_runner.py +37 -36
sglang/srt/speculative/eagle_utils.py +51 -23
sglang/srt/speculative/eagle_worker.py +59 -44
sglang/srt/two_batch_overlap.py +9 -5
sglang/srt/utils.py +113 -69
sglang/srt/weight_sync/utils.py +119 -0
sglang/test/runners.py +4 -0
sglang/test/test_activation.py +50 -1
sglang/test/test_utils.py +65 -5
sglang/utils.py +19 -0
sglang/version.py +1 -1
{sglang-0.4.9.post3.dist-info → sglang-0.4.9.post5.dist-info}/METADATA +6 -6
{sglang-0.4.9.post3.dist-info → sglang-0.4.9.post5.dist-info}/RECORD +127 -114
sglang/srt/debug_utils.py +0 -74
{sglang-0.4.9.post3.dist-info → sglang-0.4.9.post5.dist-info}/WHEEL +0 -0
{sglang-0.4.9.post3.dist-info → sglang-0.4.9.post5.dist-info}/licenses/LICENSE +0 -0
{sglang-0.4.9.post3.dist-info → sglang-0.4.9.post5.dist-info}/top_level.txt +0 -0

sglang/srt/function_call/ebnf_composer.py CHANGED Viewed

@@ -1,51 +1,73 @@
-from typing import Literal, Optional
+from typing import Any, Dict, Literal, Optional
 class EBNFComposer:
     # Adapted from https://xgrammar.mlc.ai/docs/how_to/ebnf_guided_generation.html#try-out-via-hf-transformers
-    json_grammar_ebnf_str = r"""
-        json ::= basic_array | basic_object
-        basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object
-        basic_integer ::= ("0" | "-"? [1-9] [0-9]*) ".0"?
-        basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)?
+    # Shared primitive grammar rules used across all formats
+    BASE_PRIMITIVE_GRAMMAR = r"""
         basic_string ::= (([\"] basic_string_1 [\"]))
         basic_string_1 ::= "" | [^"\\\x00-\x1F] basic_string_1 | "\\" escape basic_string_1
-        escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9]
-        basic_boolean ::= "true" | "false"
-        basic_null ::= "null"
+        escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9]{4}
+        basic_integer ::= ("0" | "-"? [1-9] [0-9]*) ".0"?
+        basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)?
         basic_array ::= "[" ("" | ws basic_any (ws "," ws basic_any)*) ws "]"
         basic_object ::= "{" ("" | ws basic_string ws ":" ws basic_any ( ws "," ws basic_string ws ":" ws basic_any)*) ws "}"
         ws ::= [ \n\t]*
-        """
+    """
-    pythonic_grammar_ebnf_str = r"""
+    # Format-specific extensions
+    json_grammar_ebnf_str = (
+        r"""
+        json ::= basic_array | basic_object
+        basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object
+        basic_boolean ::= "true" | "false"
+        basic_null ::= "null"
+    """
+        + BASE_PRIMITIVE_GRAMMAR
+    )
+    pythonic_grammar_ebnf_str = (
+        r"""
         pythonic ::= basic_number | basic_string | basic_array | "True" | "False" | "None"
         basic_any ::= basic_number | basic_string | basic_array | basic_object
-        basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)?
-        basic_string ::= (([\"] basic_string_1 [\"]))
-        basic_string_1 ::= "" | [^"\\\x00-\x1F] basic_string_1 | "\\" escape basic_string_1
-        escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9]
-        basic_array ::= "[" ("" | ws basic_any (ws "," ws basic_any)*) ws "]"
-        basic_object ::= "{" ("" | ws basic_string ws ":" ws basic_any ( ws "," ws basic_string ws ":" ws basic_any)*) ws "}"
-        ws ::= [ \n\t]*
+        basic_boolean ::= "True" | "False"
+        basic_null ::= "None"
+    """
+        + BASE_PRIMITIVE_GRAMMAR
+    )
+    xml_grammar_ebnf_str = (
+        r"""
+        xml ::= xml_element | xml_text
+        xml_element ::= basic_string | basic_number | basic_boolean | basic_null | basic_array | basic_object
+        xml_text ::= [^<>]*
+        basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object
+        basic_boolean ::= "true" | "false"
+        basic_null ::= "null"
     """
+        + BASE_PRIMITIVE_GRAMMAR
+    )
     CALL_RULE_MAP = {
         "pythonic": 'call_{name} ::= "{name}" "(" {arguments_rule} ")"',
         "json": 'call_{name} ::= "{{" "\\"name\\"" ":" "\\"{name}\\"" ", " "\\"arguments\\"" ":" {arguments_rule} "}}"',
+        "xml": 'call_{name} ::= "<function={name}>\\n" {arguments_rule} "\\n</function>"',
     }
     ARGUMENTS_RULE_MAP = {
         "pythonic": "{arg_rules}",
         "json": '"{{" {arg_rules} "}}"',
+        "xml": "{arg_rules}",
     }
     KEY_VALUE_RULE_MAP = {
         "pythonic": '"{key}" "=" {valrule}',
         "json": '"\\"{key}\\"" ":" {valrule}',
+        "xml": '"<parameter={key}>\\n" {valrule} "\\n</parameter>"',
     }
-    JSON_TYPE_MAPPING = {
+    # Base type mapping - most types are the same across formats
+    BASE_TYPE_MAPPING = {
         "string": "basic_string",
         "number": "basic_number",
         "integer": "basic_number",
@@ -55,19 +77,20 @@ class EBNFComposer:
         "object": "basic_object",
     }
-    PYTHONIC_TYPE_MAPPING = {
-        "string": "basic_string",
-        "number": "basic_number",
-        "integer": "basic_number",
-        "boolean": '"True" | "False"',
-        "null": '"None"',
-        "array": "basic_array",
-        "object": "basic_object",
+    # Format-specific overrides for types that differ
+    FORMAT_TYPE_OVERRIDES = {
+        "pythonic": {
+            "boolean": '"True" | "False"',
+            "null": '"None"',
+        },
+        "xml": {
+            "string": "xml_text",
+        },
     }
     @staticmethod
     def get_value_rule(
-        prop: dict, function_format: Literal["pythonic", "json"] = "json"
+        prop: dict, function_format: Literal["pythonic", "json", "xml"] = "json"
     ) -> str:
         if "enum" in prop:
             return EBNFComposer._handle_enum(prop, function_format)
@@ -83,48 +106,46 @@ class EBNFComposer:
         enum_values = prop["enum"]
         prop_type = prop.get("type", "string")
-        # Define formatters for different type/format combinations
-        formatters = {
-            ("string", "json"): lambda v: f'"\\"{v}\\""',
-            ("string", "pythonic"): lambda v: f'"\\"{v}\\""',
-            ("number", "json"): str,
-            ("number", "pythonic"): str,
-            ("integer", "json"): str,
-            ("integer", "pythonic"): str,
-            ("boolean", "json"): lambda v: "true" if v else "false",
-            ("boolean", "pythonic"): lambda v: "True" if v else "False",
-        }
+        def format_enum_val(v: Any) -> str:
+            if prop_type == "boolean":
+                if function_format == "json" or function_format == "xml":
+                    return "true" if v else "false"
+                elif function_format == "pythonic":
+                    return "True" if v else "False"
+                else:
+                    return str(v)  # fallback
-        # Get the formatter or default to string handling
-        formatter = formatters.get(
-            (prop_type, function_format),
-            formatters[("string", function_format)],  # Default to string handling
-        )
+            if prop_type == "string":
+                if function_format == "xml":
+                    return f'"{v}"'
+                else:  # json or pythonic
+                    return f'"\\"{v}\\""'  # escape quote-wrapped string
-        formatted_values = [formatter(value) for value in enum_values]
-        enum_rule = " | ".join(formatted_values)
+            # All other types (number, integer, etc.)
+            return str(v)
-        # Wrap in parentheses if there are multiple values to ensure correct EBNF precedence
-        if len(formatted_values) > 1:
-            enum_rule = f"({enum_rule})"
+        formatted_values = [format_enum_val(v) for v in enum_values]
+        enum_rule = " | ".join(formatted_values)
+        return f"({enum_rule})" if len(formatted_values) > 1 else enum_rule
-        return enum_rule
+    @staticmethod
+    def get_type_mapping(function_format: str) -> Dict[str, str]:
+        """Get the complete type mapping for a given format."""
+        mapping = EBNFComposer.BASE_TYPE_MAPPING.copy()
+        overrides = EBNFComposer.FORMAT_TYPE_OVERRIDES.get(function_format, {})
+        mapping.update({k: v for k, v in overrides.items() if v is not None})
+        return mapping
     @staticmethod
     def _handle_type(prop: dict, function_format: str) -> str:
         """Handle type properties using the appropriate type mapping."""
         prop_type = prop["type"]
-        type_mapping = (
-            EBNFComposer.PYTHONIC_TYPE_MAPPING
-            if function_format == "pythonic"
-            else EBNFComposer.JSON_TYPE_MAPPING
-        )
+        type_mapping = EBNFComposer.get_type_mapping(function_format)
         if isinstance(prop_type, list):
             type_rules = [
-                type_mapping[single_type]
+                type_mapping.get(single_type, function_format)
                 for single_type in prop_type
-                if single_type in type_mapping
             ]
             return " | ".join(type_rules) if type_rules else function_format
@@ -133,7 +154,7 @@ class EBNFComposer:
     @staticmethod
     def build_ebnf(
         tools,
-        function_format: Literal["pythonic", "json"] = "json",
+        function_format: Literal["pythonic", "json", "xml"] = "json",
         # Parameters for wrapping the entire sequence of tool calls
         sequence_start_token: Optional[str] = None,
         sequence_end_token: Optional[str] = None,
@@ -143,6 +164,8 @@ class EBNFComposer:
         # Parameter for separating multiple tool calls
         tool_call_separator: Optional[str] = None,
         call_rule_fmt: Optional[str] = None,
+        key_value_rule_fmt: Optional[str] = None,
+        key_value_separator: str = ",",
     ):
         """
         Generalized EBNF builder for all detectors.
@@ -157,6 +180,9 @@ class EBNFComposer:
             call_rule_fmt: Optional custom format string for call_{name} rule. It should define each function call's format, with
                 the placeholders {name} for the function name and {arguments_rule} for the arguments rule. If None, a default
                 format based on function_format will be used.
+            key_value_rule_fmt: Optional custom format string for key-value pairs. It should define how each parameter is formatted,
+                with placeholders {key} for the parameter name and {valrule} for the value rule. If None, a default format
+                based on function_format will be used.
         """
         # =================================================================
         # Step 1: Determine the root tool calls rule
@@ -200,7 +226,11 @@ class EBNFComposer:
             else EBNFComposer.CALL_RULE_MAP[function_format]
         )
         args_template = EBNFComposer.ARGUMENTS_RULE_MAP[function_format]
-        key_value_template = EBNFComposer.KEY_VALUE_RULE_MAP[function_format]
+        key_value_template = (
+            key_value_rule_fmt
+            if key_value_rule_fmt
+            else EBNFComposer.KEY_VALUE_RULE_MAP[function_format]
+        )
         # =================================================================
         # Step 4: Build rules for each tool
@@ -250,7 +280,11 @@ class EBNFComposer:
             # Add required properties joined by commas
             if required:
-                rule_parts.append(' "," '.join(prop_kv_pairs[k] for k in required))
+                rule_parts.append(
+                    f' "{key_value_separator}" '.join(
+                        prop_kv_pairs[k] for k in required
+                    )
+                )
             # Add optional properties with flexible ordering
             if optional:
@@ -263,13 +297,15 @@ class EBNFComposer:
                         if j == i:
                             opt_parts.append(prop_kv_pairs[optional[j]])
                         else:
-                            opt_parts.append(f' ( "," {prop_kv_pairs[optional[j]]} )?')
+                            opt_parts.append(
+                                f' ( "{key_value_separator}" {prop_kv_pairs[optional[j]]} )?'
+                            )
                     opt_alternatives.append("".join(opt_parts))
                 # Wrap with appropriate comma handling based on whether we have required properties
                 if required:
                     # Required properties exist, so optional group needs outer comma
-                    rule_parts.append(' ( "," ( ')
+                    rule_parts.append(f' ( "{key_value_separator}" ( ')
                     rule_parts.append(" | ".join(opt_alternatives))
                     rule_parts.append(" ) )?")
                 else:
@@ -292,10 +328,13 @@ class EBNFComposer:
         # =================================================================
         # Step 5: Add base grammar rules
         # =================================================================
-        base_grammar = (
-            EBNFComposer.pythonic_grammar_ebnf_str
-            if function_format == "pythonic"
-            else EBNFComposer.json_grammar_ebnf_str
+        grammar_dict = {
+            "pythonic": EBNFComposer.pythonic_grammar_ebnf_str,
+            "json": EBNFComposer.json_grammar_ebnf_str,
+            "xml": EBNFComposer.xml_grammar_ebnf_str,
+        }
+        base_grammar = grammar_dict.get(
+            function_format, EBNFComposer.json_grammar_ebnf_str
         )
         ebnf_lines.append(base_grammar)

sglang/srt/function_call/function_call_parser.py CHANGED Viewed

@@ -10,11 +10,12 @@ from sglang.srt.entrypoints.openai.protocol import (
 from sglang.srt.function_call.base_format_detector import BaseFormatDetector
 from sglang.srt.function_call.core_types import ToolCallItem
 from sglang.srt.function_call.deepseekv3_detector import DeepSeekV3Detector
+from sglang.srt.function_call.glm4_moe_detector import Glm4MoeDetector
 from sglang.srt.function_call.kimik2_detector import KimiK2Detector
 from sglang.srt.function_call.llama32_detector import Llama32Detector
 from sglang.srt.function_call.mistral_detector import MistralDetector
 from sglang.srt.function_call.pythonic_detector import PythonicDetector
-from sglang.srt.function_call.qwen3_detector import Qwen3XMLDetector
+from sglang.srt.function_call.qwen3_coder_detector import Qwen3CoderDetector
 from sglang.srt.function_call.qwen25_detector import Qwen25Detector
 logger = logging.getLogger(__name__)
@@ -36,7 +37,8 @@ class FunctionCallParser:
         "deepseekv3": DeepSeekV3Detector,
         "pythonic": PythonicDetector,
         "kimi_k2": KimiK2Detector,
-        "qwen3": Qwen3XMLDetector,
+        "qwen3_coder": Qwen3CoderDetector,
+        "glm45": Glm4MoeDetector,
     }
     def __init__(self, tools: List[Tool], tool_call_parser: str):
@@ -155,9 +157,9 @@ class FunctionCallParser:
             or None if no constraint applies.
         """
         # NOTE: structural_tag only supports JSON-compatible content between the begin and end.
-        # It cannot parse or validate Python syntax like function calls.
+        # It cannot parse or validate function call Pythonic or XML-ish syntax.
         if (
-            not isinstance(self.detector, PythonicDetector)
+            self.detector.supports_structural_tag()
             and tool_choice == "auto"
             and any(tool.function.strict for tool in self.tools)
         ):

sglang/srt/function_call/glm4_moe_detector.py ADDED Viewed

@@ -0,0 +1,164 @@
+import ast
+import json
+import logging
+import re
+from typing import List
+from sglang.srt.entrypoints.openai.protocol import Tool
+from sglang.srt.function_call.base_format_detector import BaseFormatDetector
+from sglang.srt.function_call.core_types import (
+    StreamingParseResult,
+    StructureInfo,
+    _GetInfoFunc,
+)
+from sglang.srt.function_call.ebnf_composer import EBNFComposer
+logger = logging.getLogger(__name__)
+def get_argument_type(func_name: str, arg_key: str, defined_tools: list):
+    name2tool = {tool.function.name: tool for tool in defined_tools}
+    if func_name not in name2tool:
+        return None
+    tool = name2tool[func_name]
+    if arg_key not in tool.function.parameters["properties"]:
+        return None
+    return tool.function.parameters["properties"][arg_key].get("type", None)
+def parse_arguments(json_value):
+    try:
+        try:
+            parsed_value = json.loads(json_value)
+        except:
+            parsed_value = ast.literal_eval(json_value)
+        return parsed_value, True
+    except:
+        return json_value, False
+class Glm4MoeDetector(BaseFormatDetector):
+    """
+    Detector for GLM-4.5 models.
+    Assumes function call format:
+      <tool_call>get_weather\n<arg_key>city</arg_key>\n<arg_value>北京</arg_value>\n<arg_key>date</arg_key>\n<arg_value>2024-06-27</arg_value>\n</tool_call>\n<tool_call>get_weather\n<arg_key>city</arg_key>\n<arg_value>上海</arg_value>\n<arg_key>date</arg_key>\n<arg_value>2024-06-27</arg_value>\n</tool_call>
+    """
+    def __init__(self):
+        super().__init__()
+        self.bot_token = "<tool_call>"
+        self.eot_token = "</tool_call>"
+        self.func_call_regex = r"<tool_call>.*?</tool_call>"
+        self.func_detail_regex = r"<tool_call>([^\n]*)\n(.*)</tool_call>"
+        self.func_arg_regex = r"<arg_key>(.*?)</arg_key>\s*<arg_value>(.*?)</arg_value>"
+    def has_tool_call(self, text: str) -> bool:
+        """Check if the text contains a glm-4.5 format tool call."""
+        return self.bot_token in text
+    def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
+        """
+        One-time parsing: Detects and parses tool calls in the provided text.
+        :param text: The complete text to parse.
+        :param tools: List of available tools.
+        :return: ParseResult indicating success or failure, consumed text, leftover text, and parsed calls.
+        """
+        idx = text.find(self.bot_token)
+        normal_text = text[:idx].strip() if idx != -1 else text
+        if self.bot_token not in text:
+            return StreamingParseResult(normal_text=normal_text, calls=[])
+        match_result_list = re.findall(self.func_call_regex, text, re.DOTALL)
+        calls = []
+        try:
+            for match_result in match_result_list:
+                # Get function name
+                func_detail = re.search(self.func_detail_regex, match_result, re.DOTALL)
+                func_name = func_detail.group(1)
+                func_args = func_detail.group(2)
+                pairs = re.findall(
+                    r"<arg_key>(.*?)</arg_key>\s*<arg_value>(.*?)</arg_value>",
+                    func_args,
+                    re.DOTALL,
+                )
+                arguments = {}
+                for arg_key, arg_value in pairs:
+                    arg_key = arg_key.strip()
+                    arg_value = arg_value.strip()
+                    arg_type = get_argument_type(func_name, arg_key, tools)
+                    if arg_type != "string":
+                        arg_value, is_good_json = parse_arguments(arg_value)
+                    arguments[arg_key] = arg_value
+                # construct match_result for parse_base_json
+                match_result = {"name": func_name, "parameters": arguments}
+                calls.extend(self.parse_base_json(match_result, tools))
+            return StreamingParseResult(normal_text=normal_text, calls=calls)
+        except Exception as e:
+            logger.error(f"Error in detect_and_parse: {e}")
+            # return the normal text if parsing fails
+            return StreamingParseResult(normal_text=text)
+    def parse_streaming_increment(
+        self, new_text: str, tools: List[Tool]
+    ) -> StreamingParseResult:
+        """
+        Streaming incremental parsing tool calls for GLM-4.5 format.
+        """
+        self._buffer += new_text
+        current_text = self._buffer
+        start = current_text.find(self.bot_token)
+        if start == -1:
+            self._buffer = ""
+            if self.current_tool_id > 0:
+                current_text = ""
+            return StreamingParseResult(normal_text=current_text)
+        # find ensures we find the first self.eot_token so there will be at most one tool_call in current_text[:end+len(self.eot_token)
+        end = current_text.find(self.eot_token)
+        if end != -1:
+            # Initialize state if this is the first tool call
+            if self.current_tool_id == -1:
+                self.current_tool_id = 0
+                self.prev_tool_call_arr = []
+                self.streamed_args_for_tool = [""]
+            # Ensure we have enough entries in our tracking arrays
+            while len(self.prev_tool_call_arr) <= self.current_tool_id:
+                self.prev_tool_call_arr.append({})
+            while len(self.streamed_args_for_tool) <= self.current_tool_id:
+                self.streamed_args_for_tool.append("")
+            result = self.detect_and_parse(
+                current_text[: end + len(self.eot_token)], tools=tools
+            )
+            if result.calls:
+                self.prev_tool_call_arr[self.current_tool_id] = {
+                    "name": result.calls[0].name,
+                    "arguments": json.loads(result.calls[0].parameters),
+                }
+                self.streamed_args_for_tool[self.current_tool_id] = result.calls[
+                    0
+                ].parameters
+                result.calls[0].tool_index = self.current_tool_id
+                self.current_tool_id += 1
+            self._buffer = current_text[end + len(self.eot_token) :]
+            return result
+        normal_text = current_text[:start]
+        self._buffer = current_text[start:]
+        return StreamingParseResult(normal_text=normal_text)
+    def supports_structural_tag(self) -> bool:
+        return False
+    def structure_info(self) -> _GetInfoFunc:
+        raise NotImplementedError()
+    def build_ebnf(self, tools: List[Tool]):
+        return EBNFComposer.build_ebnf(
+            tools,
+            individual_call_start_token=self.bot_token,
+            individual_call_end_token=self.eot_token,
+            tool_call_separator="\\n",
+            function_format="xml",
+            call_rule_fmt='"{name}" "\\n" {arguments_rule} "\\n"',
+            key_value_rule_fmt='"<arg_key>{key}</arg_key>" "\\n" "<arg_value>" {valrule} "</arg_value>"',
+            key_value_separator="\\n",
+        )

sglang/srt/function_call/kimik2_detector.py CHANGED Viewed

@@ -18,16 +18,21 @@ logger = logging.getLogger(__name__)
 class KimiK2Detector(BaseFormatDetector):
+    """
+    Detector for Kimi K2 model function call format.
+    Format Structure:
+    ```
+    <|tool_calls_section_begin|>
+    <|tool_call_begin|>functions.{func_name}:{index} <|tool_call_argument_begin|>{json_args}<|tool_call_end|>
+    <|tool_calls_section_end|>
+    ```
+    Reference: https://huggingface.co/moonshotai/Kimi-K2-Instruct/blob/main/docs/tool_call_guidance.md
+    """
     def __init__(self):
         super().__init__()
-        self._buffer = ""
-        self.current_tool_name_sent: bool = False
-        self.prev_tool_call_arr: list[dict] = []
-        self.current_tool_id: int = -1
-        self.streamed_args_for_tool: list[str] = (
-            []
-        )  # map what has been streamed for each tool so far to a list
         self.bot_token: str = "<|tool_calls_section_begin|>"
         self.eot_token: str = "<|tool_calls_section_end|>"
@@ -114,11 +119,7 @@ class KimiK2Detector(BaseFormatDetector):
             return StreamingParseResult(normal_text=new_text)
         if not hasattr(self, "_tool_indices"):
-            self._tool_indices = {
-                tool.function.name: i
-                for i, tool in enumerate(tools)
-                if tool.function and tool.function.name
-            }
+            self._tool_indices = self._get_tool_indices(tools)
         calls: list[ToolCallItem] = []
         try:
@@ -150,7 +151,7 @@ class KimiK2Detector(BaseFormatDetector):
                         )
                     )
                     self.current_tool_name_sent = True
-                    # Store the tool call info for adapter.py
+                    # Store the tool call info for serving layer completions endpoint
                     self.prev_tool_call_arr[self.current_tool_id] = {
                         "name": function_name,
                         "arguments": {},
@@ -214,7 +215,31 @@ class KimiK2Detector(BaseFormatDetector):
             return StreamingParseResult(normal_text=current_text)
     def structure_info(self) -> _GetInfoFunc:
-        raise NotImplementedError()
+        """Return function that creates StructureInfo for guided generation."""
+        def get_info(name: str) -> StructureInfo:
+            return StructureInfo(
+                begin=f"<|tool_calls_section_begin|><|tool_call_begin|>functions.{name}:0 <|tool_call_argument_begin|>",
+                end="<|tool_call_end|><|tool_calls_section_end|>",
+                trigger="<|tool_calls_section_begin|>",
+            )
+        return get_info
-    def build_ebnf(self, tools: List[Tool]):
-        raise NotImplementedError()
+    def build_ebnf(self, tools: List[Tool]) -> str:
+        """
+        Build EBNF grammar for KimiK2 tool call format.
+        NOTE: The call_rule_fmt uses [0-9]+ for the function index to allow the grammar
+        to accept any numeric index (0, 1, 2, etc.) for proper sequential indexing in
+        multiple function call scenarios, while still maintaining the correct KimiK2
+        format structure for constrained generation.
+        """
+        return EBNFComposer.build_ebnf(
+            tools,
+            sequence_start_token=self.bot_token,
+            sequence_end_token=self.eot_token,
+            tool_call_separator="",
+            call_rule_fmt='"<|tool_call_begin|>functions.{name}:" [0-9]+ " <|tool_call_argument_begin|>" {arguments_rule} "<|tool_call_end|>"',
+            function_format="json",
+        )

sglang/srt/function_call/llama32_detector.py CHANGED Viewed

@@ -16,9 +16,12 @@ logger = logging.getLogger(__name__)
 class Llama32Detector(BaseFormatDetector):
     """
-    Detector for Llama 3.2 models.
-    Assumes function call format:
-      <|python_tag|>{"name":"xxx", "arguments":{...}}
+    Detector for Llama 3.2 models with json tool call format.
+    Format Structure:
+    ```
+    <python_tag>{"name":"xxx", "arguments":{...}}
+    ```
     """
     def __init__(self):

sglang/srt/function_call/mistral_detector.py CHANGED Viewed

@@ -17,9 +17,17 @@ logger = logging.getLogger(__name__)
 class MistralDetector(BaseFormatDetector):
     """
-    Detector for Mistral models.
-    Assumes function call format:
-      [TOOL_CALLS] [{"name":"func1", "arguments":{...}}, {"name":"func2", "arguments":{...}}]
+    Detector for Mistral model function call format.
+    The Mistral format uses a simple bracket-delimited structure with JSON arrays
+    containing function call objects.
+    Format Structure:
+    ```
+    [TOOL_CALLS] [{"name": "function_name", "arguments": {json_args}}, ...]
+    ```
+    Reference: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3?chat_template=default
     """
     def __init__(self):

sglang 0.4.9.post3__py3-none-any.whl → 0.4.9.post5__py3-none-any.whl

sglang 0.4.9.post3py3-none-any.whl → 0.4.9.post5py3-none-any.whl