PyPI - sglang - Versions diffs - 0.4.6.post3__py3-none-any.whl → 0.4.6.post5__py3-none-any.whl - Mend

sglang 0.4.6.post3py3-none-any.whl → 0.4.6.post5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (180) hide show

sglang/bench_offline_throughput.py +10 -8
sglang/bench_one_batch.py +7 -6
sglang/bench_one_batch_server.py +157 -21
sglang/bench_serving.py +137 -59
sglang/compile_deep_gemm.py +5 -5
sglang/eval/loogle_eval.py +157 -0
sglang/lang/chat_template.py +78 -78
sglang/lang/tracer.py +1 -1
sglang/srt/code_completion_parser.py +1 -1
sglang/srt/configs/deepseekvl2.py +2 -2
sglang/srt/configs/model_config.py +40 -28
sglang/srt/constrained/base_grammar_backend.py +55 -72
sglang/srt/constrained/llguidance_backend.py +25 -21
sglang/srt/constrained/outlines_backend.py +27 -26
sglang/srt/constrained/reasoner_grammar_backend.py +22 -33
sglang/srt/constrained/xgrammar_backend.py +69 -43
sglang/srt/conversation.py +49 -44
sglang/srt/disaggregation/base/conn.py +1 -0
sglang/srt/disaggregation/decode.py +129 -135
sglang/srt/disaggregation/decode_schedule_batch_mixin.py +142 -0
sglang/srt/disaggregation/fake/conn.py +3 -13
sglang/srt/disaggregation/kv_events.py +357 -0
sglang/srt/disaggregation/mini_lb.py +57 -24
sglang/srt/disaggregation/mooncake/conn.py +238 -122
sglang/srt/disaggregation/mooncake/transfer_engine.py +2 -1
sglang/srt/disaggregation/nixl/conn.py +10 -19
sglang/srt/disaggregation/prefill.py +132 -47
sglang/srt/disaggregation/utils.py +123 -6
sglang/srt/distributed/utils.py +3 -3
sglang/srt/entrypoints/EngineBase.py +5 -0
sglang/srt/entrypoints/engine.py +44 -9
sglang/srt/entrypoints/http_server.py +23 -6
sglang/srt/entrypoints/http_server_engine.py +5 -2
sglang/srt/function_call/base_format_detector.py +250 -0
sglang/srt/function_call/core_types.py +34 -0
sglang/srt/function_call/deepseekv3_detector.py +157 -0
sglang/srt/function_call/ebnf_composer.py +234 -0
sglang/srt/function_call/function_call_parser.py +175 -0
sglang/srt/function_call/llama32_detector.py +74 -0
sglang/srt/function_call/mistral_detector.py +84 -0
sglang/srt/function_call/pythonic_detector.py +163 -0
sglang/srt/function_call/qwen25_detector.py +67 -0
sglang/srt/function_call/utils.py +35 -0
sglang/srt/hf_transformers_utils.py +46 -7
sglang/srt/layers/attention/aiter_backend.py +513 -0
sglang/srt/layers/attention/flashattention_backend.py +64 -18
sglang/srt/layers/attention/flashinfer_mla_backend.py +8 -4
sglang/srt/layers/attention/flashmla_backend.py +340 -78
sglang/srt/layers/attention/triton_backend.py +3 -0
sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py +1 -1
sglang/srt/layers/attention/utils.py +6 -4
sglang/srt/layers/attention/vision.py +1 -1
sglang/srt/layers/communicator.py +451 -0
sglang/srt/layers/dp_attention.py +61 -21
sglang/srt/layers/layernorm.py +1 -1
sglang/srt/layers/logits_processor.py +46 -11
sglang/srt/layers/moe/cutlass_moe.py +207 -0
sglang/srt/layers/moe/ep_moe/kernels.py +34 -12
sglang/srt/layers/moe/ep_moe/layer.py +105 -51
sglang/srt/layers/moe/ep_moe/token_dispatcher.py +82 -7
sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +1 -1
sglang/srt/layers/moe/fused_moe_triton/layer.py +14 -0
sglang/srt/layers/moe/topk.py +67 -10
sglang/srt/layers/multimodal.py +70 -0
sglang/srt/layers/quantization/__init__.py +8 -3
sglang/srt/layers/quantization/blockwise_int8.py +2 -2
sglang/srt/layers/quantization/deep_gemm.py +77 -74
sglang/srt/layers/quantization/fp8.py +92 -2
sglang/srt/layers/quantization/fp8_kernel.py +3 -3
sglang/srt/layers/quantization/fp8_utils.py +6 -0
sglang/srt/layers/quantization/gptq.py +298 -6
sglang/srt/layers/quantization/int8_kernel.py +20 -7
sglang/srt/layers/quantization/qoq.py +244 -0
sglang/srt/layers/sampler.py +0 -4
sglang/srt/layers/vocab_parallel_embedding.py +18 -7
sglang/srt/lora/lora_manager.py +2 -4
sglang/srt/lora/mem_pool.py +4 -4
sglang/srt/lora/triton_ops/gate_up_lora_b.py +1 -1
sglang/srt/lora/triton_ops/qkv_lora_b.py +1 -1
sglang/srt/lora/triton_ops/sgemm_lora_a.py +1 -1
sglang/srt/lora/triton_ops/sgemm_lora_b.py +1 -1
sglang/srt/lora/utils.py +1 -1
sglang/srt/managers/data_parallel_controller.py +3 -3
sglang/srt/managers/deepseek_eplb.py +278 -0
sglang/srt/managers/detokenizer_manager.py +21 -8
sglang/srt/managers/eplb_manager.py +55 -0
sglang/srt/managers/expert_distribution.py +704 -56
sglang/srt/managers/expert_location.py +394 -0
sglang/srt/managers/expert_location_dispatch.py +91 -0
sglang/srt/managers/io_struct.py +19 -4
sglang/srt/managers/mm_utils.py +294 -140
sglang/srt/managers/multimodal_processors/base_processor.py +127 -42
sglang/srt/managers/multimodal_processors/deepseek_vl_v2.py +6 -1
sglang/srt/managers/multimodal_processors/gemma3.py +31 -6
sglang/srt/managers/multimodal_processors/internvl.py +14 -5
sglang/srt/managers/multimodal_processors/janus_pro.py +7 -1
sglang/srt/managers/multimodal_processors/kimi_vl.py +7 -6
sglang/srt/managers/multimodal_processors/llava.py +46 -0
sglang/srt/managers/multimodal_processors/minicpm.py +25 -31
sglang/srt/managers/multimodal_processors/mllama4.py +6 -0
sglang/srt/managers/multimodal_processors/pixtral.py +127 -0
sglang/srt/managers/multimodal_processors/qwen_vl.py +58 -16
sglang/srt/managers/schedule_batch.py +122 -42
sglang/srt/managers/schedule_policy.py +1 -5
sglang/srt/managers/scheduler.py +205 -138
sglang/srt/managers/scheduler_output_processor_mixin.py +124 -55
sglang/srt/managers/session_controller.py +1 -1
sglang/srt/managers/tokenizer_manager.py +232 -58
sglang/srt/managers/tp_worker.py +12 -9
sglang/srt/managers/tp_worker_overlap_thread.py +22 -11
sglang/srt/mem_cache/base_prefix_cache.py +3 -0
sglang/srt/mem_cache/chunk_cache.py +3 -1
sglang/srt/mem_cache/hiradix_cache.py +4 -4
sglang/srt/mem_cache/memory_pool.py +76 -52
sglang/srt/mem_cache/multimodal_cache.py +45 -0
sglang/srt/mem_cache/radix_cache.py +58 -5
sglang/srt/metrics/collector.py +314 -39
sglang/srt/mm_utils.py +10 -0
sglang/srt/model_executor/cuda_graph_runner.py +29 -19
sglang/srt/model_executor/expert_location_updater.py +422 -0
sglang/srt/model_executor/forward_batch_info.py +5 -1
sglang/srt/model_executor/model_runner.py +163 -68
sglang/srt/model_loader/loader.py +10 -6
sglang/srt/models/clip.py +5 -1
sglang/srt/models/deepseek_janus_pro.py +2 -2
sglang/srt/models/deepseek_v2.py +308 -351
sglang/srt/models/exaone.py +8 -3
sglang/srt/models/gemma3_mm.py +70 -33
sglang/srt/models/llama.py +2 -0
sglang/srt/models/llama4.py +15 -8
sglang/srt/models/llava.py +258 -7
sglang/srt/models/mimo_mtp.py +220 -0
sglang/srt/models/minicpmo.py +5 -12
sglang/srt/models/mistral.py +71 -1
sglang/srt/models/mixtral.py +98 -34
sglang/srt/models/mllama.py +3 -3
sglang/srt/models/pixtral.py +467 -0
sglang/srt/models/qwen2.py +95 -26
sglang/srt/models/qwen2_5_vl.py +8 -0
sglang/srt/models/qwen2_moe.py +330 -60
sglang/srt/models/qwen2_vl.py +6 -0
sglang/srt/models/qwen3.py +52 -10
sglang/srt/models/qwen3_moe.py +411 -48
sglang/srt/models/roberta.py +1 -1
sglang/srt/models/siglip.py +294 -0
sglang/srt/models/torch_native_llama.py +1 -1
sglang/srt/openai_api/adapter.py +58 -20
sglang/srt/openai_api/protocol.py +6 -8
sglang/srt/operations.py +154 -0
sglang/srt/operations_strategy.py +31 -0
sglang/srt/reasoning_parser.py +3 -3
sglang/srt/sampling/custom_logit_processor.py +18 -3
sglang/srt/sampling/sampling_batch_info.py +4 -56
sglang/srt/sampling/sampling_params.py +2 -2
sglang/srt/server_args.py +162 -22
sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +3 -3
sglang/srt/speculative/eagle_utils.py +138 -7
sglang/srt/speculative/eagle_worker.py +69 -21
sglang/srt/utils.py +74 -17
sglang/test/few_shot_gsm8k.py +2 -2
sglang/test/few_shot_gsm8k_engine.py +2 -2
sglang/test/run_eval.py +2 -2
sglang/test/runners.py +8 -1
sglang/test/send_one.py +13 -3
sglang/test/simple_eval_common.py +1 -1
sglang/test/simple_eval_humaneval.py +1 -1
sglang/test/test_cutlass_moe.py +278 -0
sglang/test/test_programs.py +5 -5
sglang/test/test_utils.py +55 -14
sglang/utils.py +3 -3
sglang/version.py +1 -1
{sglang-0.4.6.post3.dist-info → sglang-0.4.6.post5.dist-info}/METADATA +23 -13
{sglang-0.4.6.post3.dist-info → sglang-0.4.6.post5.dist-info}/RECORD +178 -149
{sglang-0.4.6.post3.dist-info → sglang-0.4.6.post5.dist-info}/WHEEL +1 -1
sglang/srt/function_call_parser.py +0 -858
sglang/srt/platforms/interface.py +0 -371
/sglang/{llama3_eval.py → eval/llama3_eval.py} +0 -0
/sglang/srt/models/{xiaomi_mimo.py → mimo.py} +0 -0
{sglang-0.4.6.post3.dist-info → sglang-0.4.6.post5.dist-info}/licenses/LICENSE +0 -0
{sglang-0.4.6.post3.dist-info → sglang-0.4.6.post5.dist-info}/top_level.txt +0 -0

sglang/srt/function_call/ebnf_composer.py ADDED Viewed

@@ -0,0 +1,234 @@
+from typing import Literal, Optional
+class EBNFComposer:
+    # Adapted from https://xgrammar.mlc.ai/docs/how_to/ebnf_guided_generation.html#try-out-via-hf-transformers
+    json_grammar_ebnf_str = r"""
+        json ::= basic_array | basic_object
+        basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object
+        basic_integer ::= ("0" | "-"? [1-9] [0-9]*) ".0"?
+        basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)?
+        basic_string ::= (([\"] basic_string_1 [\"]))
+        basic_string_1 ::= "" | [^"\\\x00-\x1F] basic_string_1 | "\\" escape basic_string_1
+        escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9]
+        basic_boolean ::= "true" | "false"
+        basic_null ::= "null"
+        basic_array ::= "[" ("" | ws basic_any (ws "," ws basic_any)*) ws "]"
+        basic_object ::= "{" ("" | ws basic_string ws ":" ws basic_any ( ws "," ws basic_string ws ":" ws basic_any)*) ws "}"
+        ws ::= [ \n\t]*
+        """
+    pythonic_grammar_ebnf_str = r"""
+        pythonic ::= basic_number | basic_string | basic_array | "True" | "False" | "None"
+        basic_any ::= basic_number | basic_string | basic_array | basic_object
+        basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)?
+        basic_string ::= (([\"] basic_string_1 [\"]))
+        basic_string_1 ::= "" | [^"\\\x00-\x1F] basic_string_1 | "\\" escape basic_string_1
+        escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9]
+        basic_array ::= "[" ("" | ws basic_any (ws "," ws basic_any)*) ws "]"
+        basic_object ::= "{" ("" | ws basic_string ws ":" ws basic_any ( ws "," ws basic_string ws ":" ws basic_any)*) ws "}"
+        ws ::= [ \n\t]*
+    """
+    TOOL_CALLS_MAP = {
+        "pythonic": '"[" function_call ("," function_call)* "]"',
+        "json": "function_call",
+    }
+    CALL_RULE_MAP = {
+        "pythonic": 'call_{name} ::= "{name}" "(" {arguments_rule} ")"',
+        "json": 'call_{name} ::= "{{" "\\"name\\"" ":" "\\"{name}\\"" ", " "\\"arguments\\"" ":" {arguments_rule} "}}"',
+    }
+    ARGUMENTS_RULE_MAP = {
+        "pythonic": "{arg_rules}",
+        "json": '"{{" {arg_rules} "}}"',
+    }
+    KEY_VALUE_RULE_MAP = {
+        "pythonic": '"{key}" "=" {valrule}',
+        "json": '"\\"{key}\\"" ":" {valrule}',
+    }
+    JSON_TYPE_MAPPING = {
+        "string": "basic_string",
+        "number": "basic_number",
+        "integer": "basic_number",
+        "boolean": "basic_boolean",
+        "null": "basic_null",
+        "array": "basic_array",
+        "object": "basic_object",
+    }
+    PYTHONIC_TYPE_MAPPING = {
+        "string": "basic_string",
+        "number": "basic_number",
+        "integer": "basic_number",
+        "boolean": '"True" | "False"',
+        "null": '"None"',
+        "array": "basic_array",
+        "object": "basic_object",
+    }
+    @staticmethod
+    def get_value_rule(
+        prop: dict, function_format: Literal["pythonic", "json"] = "json"
+    ) -> str:
+        if "enum" in prop:
+            return EBNFComposer._handle_enum(prop, function_format)
+        if "type" in prop:
+            return EBNFComposer._handle_type(prop, function_format)
+        return function_format
+    @staticmethod
+    def _handle_enum(prop: dict, function_format: str) -> str:
+        """Handle enum properties by formatting each value according to type and format."""
+        enum_values = prop["enum"]
+        prop_type = prop.get("type", "string")
+        # Define formatters for different type/format combinations
+        formatters = {
+            ("string", "json"): lambda v: f'"\\"{v}\\""',
+            ("string", "pythonic"): lambda v: f'"\\"{v}\\""',
+            ("number", "json"): str,
+            ("number", "pythonic"): str,
+            ("integer", "json"): str,
+            ("integer", "pythonic"): str,
+            ("boolean", "json"): lambda v: "true" if v else "false",
+            ("boolean", "pythonic"): lambda v: "True" if v else "False",
+        }
+        # Get the formatter or default to string handling
+        formatter = formatters.get(
+            (prop_type, function_format),
+            formatters[("string", function_format)],  # Default to string handling
+        )
+        formatted_values = [formatter(value) for value in enum_values]
+        enum_rule = " | ".join(formatted_values)
+        # Wrap in parentheses if there are multiple values to ensure correct EBNF precedence
+        if len(formatted_values) > 1:
+            enum_rule = f"({enum_rule})"
+        return enum_rule
+    @staticmethod
+    def _handle_type(prop: dict, function_format: str) -> str:
+        """Handle type properties using the appropriate type mapping."""
+        prop_type = prop["type"]
+        type_mapping = (
+            EBNFComposer.PYTHONIC_TYPE_MAPPING
+            if function_format == "pythonic"
+            else EBNFComposer.JSON_TYPE_MAPPING
+        )
+        if isinstance(prop_type, list):
+            type_rules = [
+                type_mapping[single_type]
+                for single_type in prop_type
+                if single_type in type_mapping
+            ]
+            return " | ".join(type_rules) if type_rules else function_format
+        return type_mapping.get(prop_type, function_format)
+    @staticmethod
+    def build_ebnf(
+        tools,
+        *,
+        call_rule_fmt: Optional[str] = None,
+        function_format: Literal["pythonic", "json"] = "json",
+        bot_token: Optional[str] = None,
+        eot_token: Optional[str] = None,
+        tool_call_separator: Optional[str] = None,
+    ):
+        """
+        Generalized EBNF builder for all detectors.
+        Args:
+            tools: List of Tool objects to generate EBNF grammar for
+            call_rule_fmt: Optional custom format string for call_{name} rule. It should define each function call's format, with
+                the placeholders {name} for the function name and {arguments_rule} for the arguments rule. If None, a default
+                format based on function_format will be used.
+            function_format: The format of function calls, either "pythonic" or "json"
+            bot_token: The token that indicates the start of a tool call section
+            eot_token: The token that indicates the end of a tool call section
+            tool_call_separator: The separator between multiple tool calls
+        """
+        # =================================================================
+        # Step 1: Determine the root tool calls rule
+        # =================================================================
+        if bot_token and eot_token:
+            if tool_call_separator:
+                root_rule = f'"{bot_token}" function_call ( "{tool_call_separator}" function_call )* "{eot_token}"'
+            else:
+                root_rule = f'"{bot_token}" function_call "{eot_token}"'
+        else:
+            root_rule = EBNFComposer.TOOL_CALLS_MAP[function_format]
+        # =================================================================
+        # Step 2: Build the header rules
+        # =================================================================
+        ebnf_lines = [
+            f"root ::= {root_rule}",
+            "function_call ::= "
+            + " | ".join([f"call_{tool.function.name}" for tool in tools]),
+        ]
+        # =================================================================
+        # Step 3: Set up formatting templates
+        # =================================================================
+        call_template = (
+            f"call_{{name}} ::= {call_rule_fmt}"
+            if call_rule_fmt
+            else EBNFComposer.CALL_RULE_MAP[function_format]
+        )
+        args_template = EBNFComposer.ARGUMENTS_RULE_MAP[function_format]
+        key_value_template = EBNFComposer.KEY_VALUE_RULE_MAP[function_format]
+        # =================================================================
+        # Step 4: Build rules for each tool
+        # =================================================================
+        for tool in tools:
+            tool_name = tool.function.name
+            params = tool.function.parameters or {}
+            properties = params.get("properties", {})
+            required_props = set(params.get("required", []))
+            # Build argument rules for this tool
+            arg_rules = []
+            for prop_name, prop_schema in properties.items():
+                value_rule = EBNFComposer.get_value_rule(prop_schema, function_format)
+                # Create key=value pair
+                pair = key_value_template.format(key=prop_name, valrule=value_rule)
+                if prop_name not in required_props:
+                    pair = f"[ {pair} ]"
+                arg_rules.append(pair)
+            # Combine all argument rules
+            combined_args = ' "," '.join(arg_rules) if arg_rules else ""
+            arguments_rule = args_template.format(arg_rules=combined_args)
+            # Add the function call rule and its arguments rule
+            ebnf_lines.append(
+                call_template.format(
+                    name=tool_name, arguments_rule=f"arguments_{tool_name}"
+                )
+            )
+            ebnf_lines.append(f"arguments_{tool_name} ::= {arguments_rule}")
+        # =================================================================
+        # Step 5: Add base grammar rules
+        # =================================================================
+        base_grammar = (
+            EBNFComposer.pythonic_grammar_ebnf_str
+            if function_format == "pythonic"
+            else EBNFComposer.json_grammar_ebnf_str
+        )
+        ebnf_lines.append(base_grammar)
+        return "\n".join(ebnf_lines)

sglang/srt/function_call/function_call_parser.py ADDED Viewed

@@ -0,0 +1,175 @@
+from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Type, Union
+from sglang.srt.function_call.base_format_detector import BaseFormatDetector
+from sglang.srt.function_call.core_types import ToolCallItem
+from sglang.srt.function_call.deepseekv3_detector import DeepSeekV3Detector
+from sglang.srt.function_call.llama32_detector import Llama32Detector
+from sglang.srt.function_call.mistral_detector import MistralDetector
+from sglang.srt.function_call.pythonic_detector import PythonicDetector
+from sglang.srt.function_call.qwen25_detector import Qwen25Detector
+from sglang.srt.openai_api.protocol import (
+    StructuralTagResponseFormat,
+    StructuresResponseFormat,
+    Tool,
+    ToolChoice,
+)
+class FunctionCallParser:
+    """
+    Parser for function/tool calls in model outputs.
+    This class handles both streaming and non-streaming parsing of function calls using a detector.
+    In streaming scenarios, each time new_text is received, it calls detector.parse_streaming_increment
+    and returns the resulting normal_text and calls to the upper layer (or SSE).
+    """
+    ToolCallParserEnum: Dict[str, Type[BaseFormatDetector]] = {
+        "llama3": Llama32Detector,
+        "qwen25": Qwen25Detector,
+        "mistral": MistralDetector,
+        "deepseekv3": DeepSeekV3Detector,
+        "pythonic": PythonicDetector,
+    }
+    def __init__(self, tools: List[Tool], tool_call_parser: str):
+        detector: Type[BaseFormatDetector] = None
+        detector_class = self.ToolCallParserEnum.get(tool_call_parser)
+        if detector_class:
+            detector = detector_class()
+        else:
+            raise ValueError(f"Unsupported tool_call_parser: {tool_call_parser}")
+        self.detector = detector
+        self.tools = tools
+    def has_tool_call(self, text: str) -> bool:
+        """
+        Check if the given text contains a tool call in the format supported by this parser.
+        This delegates to the detector's implementation.
+        Args:
+            text: The text to check for tool calls
+        Returns:
+            True if the text contains a tool call, False otherwise
+        """
+        return self.detector.has_tool_call(text)
+    def parse_non_stream(self, full_text: str) -> Tuple[str, list[ToolCallItem]]:
+        """
+        One-time parsing of the full text to extract tool calls.
+        Args:
+            full_text: The complete text to parse
+        Returns:
+            A tuple containing:
+            - The remaining text after parsing that was not consumed by the detector (can be treated as normal text)
+            - A list of tool calls parsed from the text
+        """
+        parsed_result = self.detector.detect_and_parse(full_text, self.tools)
+        tool_call_list = parsed_result.calls
+        if tool_call_list:
+            return parsed_result.normal_text, tool_call_list
+        else:
+            return full_text, []
+    def parse_stream_chunk(self, chunk_text: str) -> Tuple[str, list[ToolCallItem]]:
+        """
+        Streaming incremental parsing of chunks of text as they arrive.
+        Args:
+            chunk_text: The new chunk of text to parse
+        Returns:
+            A tuple containing:
+            - The normal text that should be displayed to the user
+            - A list of tool calls parsed from the chunk
+        """
+        final_normal_text = ""
+        final_calls = []
+        sp_result = self.detector.parse_streaming_increment(chunk_text, self.tools)
+        if sp_result.normal_text:
+            final_normal_text = sp_result.normal_text
+        if sp_result.calls:
+            final_calls.extend(sp_result.calls)
+            final_normal_text = sp_result.normal_text
+        return final_normal_text, final_calls
+    def get_structure_tag(self) -> StructuralTagResponseFormat:
+        """
+        Generate a structural tag response format for all available tools.
+        This creates the necessary structural tags that guide the model's output format.
+        """
+        tool_structures: List[StructuresResponseFormat] = list()
+        tool_trigger_set: Set[str] = set()
+        get_structure_info = self.detector.structure_info()
+        for tool in self.tools:
+            function = tool.function
+            name = function.name
+            assert name is not None
+            info = get_structure_info(name)
+            # accept all if not strict, otherwise only accept the schema
+            schema = function.parameters if function.strict else {}
+            tool_structures.append(
+                StructuresResponseFormat(
+                    begin=info.begin,
+                    schema=schema,  # type: ignore
+                    end=info.end,
+                )
+            )
+            tool_trigger_set.add(info.trigger)
+        return StructuralTagResponseFormat(
+            type="structural_tag",
+            structures=tool_structures,
+            triggers=list(tool_trigger_set),
+        )
+    def get_structure_constraint(
+        self, tool_choice: Union[ToolChoice, Literal["auto", "required"]]
+    ) -> Optional[Tuple[str, Any]]:
+        """
+        Returns the appropriate structure constraint for tool calls based on the tool_choice.
+        The constraint is used to guide the model's output format.
+        Args:
+            tool_choice: The tool choice setting from the request
+        Returns:
+            A tuple of (constraint_type, constraint_value) to be added to sampling parameters,
+            or None if no constraint applies.
+        """
+        # NOTE: structural_tag only supports JSON-compatible content between the begin and end.
+        # It cannot parse or validate Python syntax like function calls.
+        if (
+            not isinstance(self.detector, PythonicDetector)
+            and tool_choice == "auto"
+            and any(tool.function.strict for tool in self.tools)
+        ):
+            strict_tag = self.get_structure_tag()
+            return ("structural_tag", strict_tag)
+        elif tool_choice == "required" or isinstance(tool_choice, ToolChoice):
+            ebnf = self.get_ebnf(tool_choice)
+            return ("ebnf", ebnf) if ebnf is not None else None
+    def get_ebnf(
+        self, tool_choice: Union[ToolChoice, Literal["required"]]
+    ) -> Optional[str]:
+        """
+        Get the EBNF grammar for the specified tool choice.
+        """
+        filtered_tools = []
+        if isinstance(tool_choice, ToolChoice):
+            fn_name = tool_choice.function.name
+            filtered_tools = [t for t in self.tools if t.function.name == fn_name]
+        else:
+            filtered_tools = self.tools
+        return self.detector.build_ebnf(filtered_tools)

sglang/srt/function_call/llama32_detector.py ADDED Viewed

@@ -0,0 +1,74 @@
+import json
+import logging
+from typing import List
+from sglang.srt.function_call.base_format_detector import BaseFormatDetector
+from sglang.srt.function_call.core_types import (
+    StreamingParseResult,
+    StructureInfo,
+    _GetInfoFunc,
+)
+from sglang.srt.function_call.ebnf_composer import EBNFComposer
+from sglang.srt.openai_api.protocol import Tool
+logger = logging.getLogger(__name__)
+class Llama32Detector(BaseFormatDetector):
+    """
+    Detector for Llama 3.2 models.
+    Assumes function call format:
+      <|python_tag|>{"name":"xxx", "arguments":{...}}
+    """
+    def __init__(self):
+        super().__init__()
+        self.bot_token = "<|python_tag|>"
+    def has_tool_call(self, text: str) -> bool:
+        """Check if the text contains a Llama 3.2 format tool call."""
+        # depending on the prompt format the Llama model may or may not
+        # prefix the output with the <|python_tag|> token
+        return "<|python_tag|>" in text or text.startswith("{")
+    def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
+        """Parse function calls from text, handling multiple JSON objects."""
+        if "<|python_tag|>" not in text and not text.startswith("{"):
+            return StreamingParseResult(normal_text=text, calls=[])
+        if "<|python_tag|>" in text:
+            normal_text, action_text = text.split("<|python_tag|>")
+        else:
+            normal_text, action_text = "", text
+        # Split by semicolon and process each part
+        json_parts = [part.strip() for part in action_text.split(";") if part.strip()]
+        all_actions = []
+        for part in json_parts:
+            try:
+                # Parse each individual JSON object
+                action = json.loads(part)
+                all_actions.append(action)
+            except json.JSONDecodeError as e:
+                logger.warning(f"Failed to parse JSON part: {part}")
+                logger.warning(f"JSON parse error: {str(e)}")
+                continue
+        calls = []
+        # Only process if we found valid JSON objects
+        if all_actions:
+            calls = self.parse_base_json(all_actions, tools)
+        return StreamingParseResult(normal_text=normal_text, calls=calls)
+    def structure_info(self) -> _GetInfoFunc:
+        return lambda name: StructureInfo(
+            begin='<|python_tag|>{"name":"' + name + '", "arguments":',
+            end="}",
+            trigger="<|python_tag|>",
+        )
+    def build_ebnf(self, tools: List[Tool]):
+        return EBNFComposer.build_ebnf(
+            tools,
+            function_format="json",
+            tool_call_separator=",",
+        )

sglang/srt/function_call/mistral_detector.py ADDED Viewed

@@ -0,0 +1,84 @@
+import json
+import re
+from typing import List
+from sglang.srt.function_call.base_format_detector import BaseFormatDetector
+from sglang.srt.function_call.core_types import (
+    StreamingParseResult,
+    StructureInfo,
+    _GetInfoFunc,
+)
+from sglang.srt.function_call.ebnf_composer import EBNFComposer
+from sglang.srt.openai_api.protocol import Tool
+class MistralDetector(BaseFormatDetector):
+    """
+    Detector for Mistral models.
+    Assumes function call format:
+      [TOOL_CALLS] [{"name":"xxx", "arguments":{...}}]
+    """
+    def __init__(self):
+        """
+        Initializes the detector with necessary state variables.
+        """
+        super().__init__()
+        self.bot_token = "[TOOL_CALLS] ["
+        self.eot_token = "]"
+        self.tool_call_regex = re.compile(r"\[{.*}\]", re.DOTALL)
+    def has_tool_call(self, text: str) -> bool:
+        """Check if the text contains a Mistral format tool call."""
+        return self.bot_token in text
+    def _clean_text(self, text: str) -> str:
+        """
+        clean text to only leave ''[TOOL_CALLS] [{"name": xxx, "arguments": {xxx}}]'
+        for example,
+            text = '[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"location": "Boston, MA", "unit": "fahrenheit"}}]\n\nToday\'s weather in Boston is :{function call result} (in Fahrenheit)\n\nIf you prefer Celsius, please let me know.'
+            return '[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"location": "Boston, MA", "unit": "fahrenheit"}}]'
+        The key pattern is [TOOL_CALLS] [...]
+        """
+        # TODO: check if Mistral supports multiple tool calls, currently assume only support one tool call
+        find_results = re.findall(r"\[TOOL_CALLS\] \[.*?\]", text, re.DOTALL)
+        if len(find_results) > 0:
+            return find_results[0]
+        else:
+            return ""
+    def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
+        """
+        One-time parsing: Detects and parses tool calls in the provided text.
+        :param text: The complete text to parse.
+        :param tools: List of available tools.
+        :return: ParseResult indicating success or failure, consumed text, leftover text, and parsed calls.
+        """
+        idx = text.find(self.bot_token)
+        normal_text = text[:idx].strip() if idx != -1 else text
+        text = self._clean_text(text)
+        tool_content = text.replace("[TOOL_CALLS]", "").strip()
+        raw_tool_calls = self.tool_call_regex.findall(tool_content)
+        calls = []
+        if len(raw_tool_calls) > 0:
+            raw_tool_call = raw_tool_calls[0]
+            function_call_arr = json.loads(raw_tool_call)
+            for match_result in function_call_arr:
+                calls.extend(self.parse_base_json(match_result, tools))
+        return StreamingParseResult(normal_text=normal_text, calls=calls)
+    def structure_info(self) -> _GetInfoFunc:
+        return lambda name: StructureInfo(
+            begin='[TOOL_CALLS] [{"name":"' + name + '", "arguments":',
+            end="}]",
+            trigger="[TOOL_CALLS]",
+        )
+    def build_ebnf(self, tools: List[Tool]):
+        return EBNFComposer.build_ebnf(
+            tools,
+            bot_token=self.bot_token,
+            eot_token=self.eot_token,
+            function_format="json",
+        )

sglang 0.4.6.post3__py3-none-any.whl → 0.4.6.post5__py3-none-any.whl

sglang 0.4.6.post3py3-none-any.whl → 0.4.6.post5py3-none-any.whl