PyPI - sglang - Versions diffs - 0.4.6.post5__py3-none-any.whl → 0.4.7.post1__py3-none-any.whl - Mend

sglang 0.4.6.post5py3-none-any.whl → 0.4.7.post1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (359) hide show

sglang/srt/entrypoints/http_server.py CHANGED Viewed

@@ -43,7 +43,7 @@ from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import ORJSONResponse, Response, StreamingResponse
 from sglang.srt.disaggregation.utils import (
-    FakeBootstrapHost,
+    FAKE_BOOTSTRAP_HOST,
     register_disaggregation_server,
 )
 from sglang.srt.entrypoints.engine import _launch_subprocesses
@@ -67,6 +67,7 @@ from sglang.srt.managers.io_struct import (
     UpdateWeightFromDiskReqInput,
     UpdateWeightsFromDistributedReqInput,
     UpdateWeightsFromTensorReqInput,
+    V1RerankReqInput,
     VertexGenerateReqInput,
 )
 from sglang.srt.managers.tokenizer_manager import TokenizerManager
@@ -79,9 +80,11 @@ from sglang.srt.openai_api.adapter import (
     v1_delete_file,
     v1_embeddings,
     v1_files_create,
+    v1_rerank,
     v1_retrieve_batch,
     v1_retrieve_file,
     v1_retrieve_file_content,
+    v1_score,
 )
 from sglang.srt.openai_api.protocol import ModelCard, ModelList
 from sglang.srt.reasoning_parser import ReasoningParser
@@ -229,6 +232,11 @@ async def get_server_info():
     }
+@app.get("/get_load")
+async def get_load():
+    return await _global_state.tokenizer_manager.get_load()
 @app.api_route("/set_internal_state", methods=["POST", "PUT"])
 async def set_internal_state(obj: SetInternalStateReq, request: Request):
     res = await _global_state.tokenizer_manager.set_internal_state(obj)
@@ -251,7 +259,7 @@ async def generate_request(obj: GenerateReqInput, request: Request):
                     ) + b"\n\n"
             except ValueError as e:
                 out = {"error": {"message": str(e)}}
-                logger.error(f"Error: {e}")
+                logger.error(f"[http_server] Error: {e}")
                 yield b"data: " + orjson.dumps(
                     out, option=orjson.OPT_NON_STR_KEYS
                 ) + b"\n\n"
@@ -269,7 +277,7 @@ async def generate_request(obj: GenerateReqInput, request: Request):
             ).__anext__()
             return ret
         except ValueError as e:
-            logger.error(f"Error: {e}")
+            logger.error(f"[http_server] Error: {e}")
             return _create_error_response(e)
@@ -322,6 +330,15 @@ async def classify_request(obj: EmbeddingReqInput, request: Request):
         return _create_error_response(e)
+@app.api_route("/v1/rerank", methods=["POST", "PUT"])
+async def v1_rerank_request(obj: V1RerankReqInput, raw_request: Request):
+    try:
+        ret = await v1_rerank(_global_state.tokenizer_manager, obj, raw_request)
+        return ret
+    except ValueError as e:
+        return _create_error_response(e)
 @app.api_route("/flush_cache", methods=["GET", "POST"])
 async def flush_cache():
     """Flush the radix cache."""
@@ -345,6 +362,7 @@ async def start_profile_async(obj: Optional[ProfileReqInput] = None):
         activities=obj.activities,
         with_stack=obj.with_stack,
         record_shapes=obj.record_shapes,
+        profile_by_stage=obj.profile_by_stage,
     )
     return Response(
         content="Start profiling.\n",
@@ -714,6 +732,12 @@ async def vertex_generate(vertex_req: VertexGenerateReqInput, raw_request: Reque
     return ORJSONResponse({"predictions": ret})
+@app.post("/v1/score")
+async def v1_score_request(raw_request: Request):
+    """Endpoint for the decoder-only scoring API. See Engine.score() for detailed documentation."""
+    return await v1_score(_global_state.tokenizer_manager, raw_request)
 def _create_error_response(e):
     return ORJSONResponse(
         {"error": {"message": str(e)}}, status_code=HTTPStatus.BAD_REQUEST
@@ -865,7 +889,7 @@ def _wait_and_warmup(
                     "max_new_tokens": 8,
                     "ignore_eos": True,
                 },
-                "bootstrap_host": [FakeBootstrapHost] * server_args.dp_size,
+                "bootstrap_host": [FAKE_BOOTSTRAP_HOST] * server_args.dp_size,
                 # This is a hack to ensure fake transfer is enabled during prefill warmup
                 # ensure each dp rank has a unique bootstrap_room during prefill warmup
                 "bootstrap_room": [

sglang/srt/eplb_simulator/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from . import reader

sglang/srt/eplb_simulator/reader.py ADDED Viewed

@@ -0,0 +1,51 @@
+from collections import defaultdict
+from pathlib import Path
+import torch
+from tqdm import tqdm
+from sglang.srt.managers.expert_distribution import (
+    _convert_global_physical_count_to_logical_count,
+)
+convert_global_physical_count_to_logical_count = (
+    _convert_global_physical_count_to_logical_count
+)
+def read_mode_per_pass(dir_data: Path):
+    """Read data from ExpertDistributionRecorder when recorded with mode `per_pass`"""
+    # gpc := global_physical_count
+    gpc_of_forward_pass_and_rank = defaultdict(lambda: defaultdict())
+    for path in tqdm(list(dir_data.glob("*.pt"))):
+        data_pack = torch.load(path, weights_only=True)
+        last_physical_to_logical_map = data_pack["last_physical_to_logical_map"]
+        for record in data_pack["records"]:
+            forward_pass_id = record["forward_pass_id"]
+            rank = record["rank"]
+            assert (
+                gpc_of_forward_pass_and_rank[forward_pass_id].get(rank) is None
+            ), f"Duplicated {forward_pass_id=} {rank=}"
+            gpc_of_forward_pass_and_rank[forward_pass_id][rank] = record[
+                "global_physical_count"
+            ]
+    forward_pass_ids = sorted(gpc_of_forward_pass_and_rank.keys())
+    print(f"Make {forward_pass_ids=} into array")
+    items = []
+    for forward_pass_id, gpc_of_rank in sorted(gpc_of_forward_pass_and_rank.items()):
+        gpc_of_rank_tensor = torch.stack(
+            [gpc for rank, gpc in sorted(gpc_of_rank.items())]
+        ).sum(dim=0)
+        items.append(gpc_of_rank_tensor)
+    gpc_of_forward_pass = torch.stack(items)
+    print(f"{gpc_of_forward_pass.shape=}")
+    return dict(
+        global_physical_count_of_forward_pass=gpc_of_forward_pass,
+        last_physical_to_logical_map=last_physical_to_logical_map,
+        forward_pass_ids=forward_pass_ids,
+    )

sglang/srt/function_call/base_format_detector.py CHANGED Viewed

@@ -36,6 +36,7 @@ class BaseFormatDetector(ABC):
         )  # map what has been streamed for each tool so far to a list
         self.bot_token = ""
         self.eot_token = ""
+        self.tool_call_separator = ", "
     def parse_base_json(self, action: Any, tools: List[Tool]) -> List[ToolCallItem]:
         tool_indices = {
@@ -50,7 +51,7 @@ class BaseFormatDetector(ABC):
             if name and name in tool_indices:
                 results.append(
                     ToolCallItem(
-                        tool_index=tool_indices[name],
+                        tool_index=-1,  # Caller should update this based on the actual tools array called
                         name=name,
                         parameters=json.dumps(
                             act.get("parameters") or act.get("arguments", {}),
@@ -72,20 +73,61 @@ class BaseFormatDetector(ABC):
         action = json.loads(text)
         return StreamingParseResult(calls=self.parse_base_json(action, tools))
+    def _ends_with_partial_token(self, buffer: str, bot_token: str) -> int:
+        """
+        Check if buffer ends with a partial bot_token.
+        Return the length of the partial bot_token.
+        For some format, the bot_token is not a token in model's vocabulary, such as
+        `[TOOL_CALLS] [` in Mistral.
+        """
+        for i in range(1, min(len(buffer) + 1, len(bot_token))):
+            if bot_token.startswith(buffer[-i:]):
+                return i
+        return 0
     def parse_streaming_increment(
         self, new_text: str, tools: List[Tool]
     ) -> StreamingParseResult:
         """
         Streaming incremental parsing with tool validation.
+        This base implementation works best with formats where:
+        1. bot_token is followed immediately by JSON (e.g., bot_token + JSON_array)
+        2. JSON can be parsed incrementally using partial_json_loads
+        3. Multiple tool calls are separated by "; " or ", "
+        Examples of incompatible formats (need custom implementation, may reuse some logic from this class):
+        - Each tool call is wrapped in a separate block: See Qwen25Detector
+        - Multiple separate blocks: [TOOL_CALLS] [...] \n [TOOL_CALLS] [...]
+        - Tool call is Pythonic style
+        For incompatible formats, detectors should override this method with custom logic.
         """
         # Append new text to buffer
         self._buffer += new_text
         current_text = self._buffer
-        if not (self.bot_token in current_text or current_text.startswith("{")):
-            self._buffer = ""
-            if self.eot_token in new_text:
-                new_text = new_text.replace(self.eot_token, "")
-            return StreamingParseResult(normal_text=new_text)
+        # The current_text has tool_call if it is the start of a new tool call sequence
+        # or it is the start of a new tool call after a tool call separator, when there is a previous tool call
+        if not (
+            self.bot_token in current_text
+            or current_text.startswith("{")
+            or (
+                self.current_tool_id > 0
+                and current_text.startswith(self.tool_call_separator + "{")
+            )
+        ):
+            # Only clear buffer if we're sure no tool call is starting
+            if not self._ends_with_partial_token(self._buffer, self.bot_token):
+                normal_text = self._buffer
+                self._buffer = ""
+                if self.eot_token in normal_text:
+                    normal_text = normal_text.replace(self.eot_token, "")
+                return StreamingParseResult(normal_text=normal_text)
+            else:
+                # Might be partial bot_token, keep buffering
+                return StreamingParseResult()
         # Build tool indices if not already built
         if not hasattr(self, "_tool_indices"):
@@ -96,91 +138,73 @@ class BaseFormatDetector(ABC):
             }
         flags = Allow.ALL if self.current_tool_name_sent else Allow.ALL & ~Allow.STR
         try:
-            tool_call_arr = []
-            is_complete = []
             try:
-                start_idx = (
-                    len(self.bot_token)
-                    if current_text.startswith(self.bot_token)
-                    else 0
+                if current_text.startswith(self.bot_token):
+                    start_idx = len(self.bot_token)
+                elif self.current_tool_id > 0 and current_text.startswith(
+                    self.tool_call_separator
+                ):
+                    start_idx = len(self.tool_call_separator)
+                else:
+                    start_idx = 0
+                if start_idx >= len(current_text):
+                    return StreamingParseResult()
+                (obj, end_idx) = _partial_json_loads(current_text[start_idx:], flags)
+                is_current_complete = _is_complete_json(
+                    current_text[start_idx : start_idx + end_idx]
                 )
-                while start_idx < len(current_text):
-                    (obj, end_idx) = _partial_json_loads(
-                        current_text[start_idx:], flags
-                    )
-                    is_complete.append(
-                        _is_complete_json(current_text[start_idx : start_idx + end_idx])
-                    )
-                    start_idx += end_idx + len("; ")
-                    # Validate tool name if present
-                    if "name" in obj and obj["name"] not in self._tool_indices:
-                        # Invalid tool name - reset state
-                        self._buffer = ""
-                        self.current_tool_id = -1
-                        self.current_tool_name_sent = False
-                        if self.streamed_args_for_tool:
-                            self.streamed_args_for_tool.pop()
-                        return StreamingParseResult()
-                    # Handle parameters/arguments consistency
-                    if "parameters" in obj:
-                        assert (
-                            "arguments" not in obj
-                        ), "model generated both parameters and arguments"
-                        obj["arguments"] = obj["parameters"]
-                    tool_call_arr.append(obj)
+                # Validate tool name if present
+                if "name" in obj and obj["name"] not in self._tool_indices:
+                    # Invalid tool name - reset state
+                    self._buffer = ""
+                    self.current_tool_id = -1
+                    self.current_tool_name_sent = False
+                    if self.streamed_args_for_tool:
+                        self.streamed_args_for_tool.pop()
+                    return StreamingParseResult()
+                # Handle parameters/arguments consistency
+                # NOTE: we assume here that the obj is always partial of a single tool call
+                if "parameters" in obj:
+                    assert (
+                        "arguments" not in obj
+                    ), "model generated both parameters and arguments"
+                    obj["arguments"] = obj["parameters"]
+                current_tool_call = obj
             except MalformedJSON:
                 return StreamingParseResult()
-            if len(tool_call_arr) == 0:
+            if not current_tool_call:
                 return StreamingParseResult()
-            current_tool_call: Dict = (
-                tool_call_arr[self.current_tool_id] if len(tool_call_arr) > 0 else {}
-            )
-            # Handle new tool in array
-            if len(tool_call_arr) > 0 and len(tool_call_arr) > self.current_tool_id + 1:
-                if self.current_tool_id >= 0:
-                    cur_arguments = current_tool_call.get("arguments")
-                    if cur_arguments:
-                        cur_args_json = json.dumps(cur_arguments)
-                        sent = len(self.streamed_args_for_tool[self.current_tool_id])
-                        argument_diff = cur_args_json[sent:]
-                        res = StreamingParseResult(
-                            calls=[
-                                ToolCallItem(
-                                    tool_index=self.current_tool_id,
-                                    name="",
-                                    parameters=argument_diff,
-                                )
-                            ],
-                        )
-                        self.streamed_args_for_tool[
-                            self.current_tool_id
-                        ] += argument_diff
-                    else:
-                        res = StreamingParseResult()
-                else:
-                    res = StreamingParseResult()
-                self.current_tool_id = len(tool_call_arr) - 1
-                self.current_tool_name_sent = False
-                self.streamed_args_for_tool.append("")
-                return res
-            # Handle tool name
-            elif not self.current_tool_name_sent:
+            # Case 1: Handle tool name streaming
+            # This happens when we encounter a tool but haven't sent its name yet
+            if not self.current_tool_name_sent:
                 function_name = current_tool_call.get("name")
                 if function_name and function_name in self._tool_indices:
+                    # If this is a new tool (current_tool_id was -1), initialize it
+                    if self.current_tool_id == -1:
+                        self.current_tool_id = 0
+                        self.streamed_args_for_tool.append("")
+                    # If this is a subsequent tool, ensure streamed_args_for_tool is large enough
+                    elif self.current_tool_id >= len(self.streamed_args_for_tool):
+                        while len(self.streamed_args_for_tool) <= self.current_tool_id:
+                            self.streamed_args_for_tool.append("")
+                    # Send the tool name with empty parameters
                     res = StreamingParseResult(
                         calls=[
                             ToolCallItem(
-                                tool_index=self._tool_indices[function_name],
+                                tool_index=self.current_tool_id,
                                 name=function_name,
                                 parameters="",
                             )
@@ -190,47 +214,75 @@ class BaseFormatDetector(ABC):
                 else:
                     res = StreamingParseResult()
-            # Handle streaming arguments
+            # Case 2: Handle streaming arguments
+            # This happens when we've already sent the tool name and now need to stream arguments incrementally
             else:
                 cur_arguments = current_tool_call.get("arguments")
                 res = StreamingParseResult()
                 if cur_arguments:
+                    # Calculate how much of the arguments we've already streamed
                     sent = len(self.streamed_args_for_tool[self.current_tool_id])
                     cur_args_json = json.dumps(cur_arguments)
-                    prev_arguments = self.prev_tool_call_arr[self.current_tool_id].get(
-                        "arguments"
-                    )
+                    prev_arguments = None
+                    if self.current_tool_id < len(self.prev_tool_call_arr):
+                        prev_arguments = self.prev_tool_call_arr[
+                            self.current_tool_id
+                        ].get("arguments")
                     argument_diff = None
-                    if is_complete[self.current_tool_id]:
+                    # If the current tool's JSON is complete, send all remaining arguments
+                    if is_current_complete:
                         argument_diff = cur_args_json[sent:]
-                        self._buffer = ""
-                        self.prev_tool_call_arr[self.current_tool_id].clear()
+                        completing_tool_id = (
+                            self.current_tool_id
+                        )  # Save the ID of the tool that's completing
+                        # Only remove the processed portion, keep unprocessed content
+                        self._buffer = current_text[start_idx + end_idx :]
+                        if self.current_tool_id < len(self.prev_tool_call_arr):
+                            self.prev_tool_call_arr[self.current_tool_id].clear()
                         self.current_tool_name_sent = False
                         self.streamed_args_for_tool[self.current_tool_id] = ""
+                        self.current_tool_id += 1
+                    # If the tool is still being parsed, send incremental changes
                     elif prev_arguments:
                         prev_args_json = json.dumps(prev_arguments)
                         if cur_args_json != prev_args_json:
                             prefix = _find_common_prefix(prev_args_json, cur_args_json)
                             argument_diff = prefix[sent:]
+                    # Send the argument diff if there's something new
                     if argument_diff is not None:
+                        # Use the correct tool_index: completing_tool_id for completed tools, current_tool_id for ongoing
+                        tool_index_to_use = (
+                            completing_tool_id
+                            if is_current_complete
+                            else self.current_tool_id
+                        )
                         res = StreamingParseResult(
                             calls=[
                                 ToolCallItem(
-                                    tool_index=self.current_tool_id,
+                                    tool_index=tool_index_to_use,
                                     parameters=argument_diff,
                                 )
                             ],
                         )
-                        if not is_complete[self.current_tool_id]:
+                        if not is_current_complete:
                             self.streamed_args_for_tool[
                                 self.current_tool_id
                             ] += argument_diff
-            self.prev_tool_call_arr = tool_call_arr
+            # Update prev_tool_call_arr with current state
+            if self.current_tool_id >= 0:
+                # Ensure prev_tool_call_arr is large enough
+                while len(self.prev_tool_call_arr) <= self.current_tool_id:
+                    self.prev_tool_call_arr.append({})
+                self.prev_tool_call_arr[self.current_tool_id] = current_tool_call
             return res
         except Exception as e:

sglang/srt/function_call/deepseekv3_detector.py CHANGED Viewed

@@ -31,6 +31,7 @@ class DeepSeekV3Detector(BaseFormatDetector):
         self.func_call_regex = r"<｜tool▁call▁begin｜>.*?<｜tool▁call▁end｜>"
         self.func_detail_regex = r"<｜tool▁call▁begin｜>(.*)<｜tool▁sep｜>(.*)\n```json\n(.*)\n```<｜tool▁call▁end｜>"
         self._last_arguments = ""
+        self.current_tool_id = -1
     def has_tool_call(self, text: str) -> bool:
         """Check if the text contains a deepseek format tool call."""
@@ -75,7 +76,12 @@ class DeepSeekV3Detector(BaseFormatDetector):
         self._buffer += new_text
         current_text = self._buffer
-        if self.bot_token not in current_text:
+        # Check if we have a tool call (either the start token or individual tool call)
+        has_tool_call = (
+            self.bot_token in current_text or "<｜tool▁call▁begin｜>" in current_text
+        )
+        if not has_tool_call:
             self._buffer = ""
             for e_token in [self.eot_token, "```", "<｜tool▁call▁end｜>"]:
                 if e_token in new_text:
@@ -100,15 +106,32 @@ class DeepSeekV3Detector(BaseFormatDetector):
                 func_name = partial_match.group(2).strip()
                 func_args_raw = partial_match.group(3).strip()
+                # Initialize state if this is the first tool call
+                if self.current_tool_id == -1:
+                    self.current_tool_id = 0
+                    self.prev_tool_call_arr = []
+                    self.streamed_args_for_tool = [""]
+                # Ensure we have enough entries in our tracking arrays
+                while len(self.prev_tool_call_arr) <= self.current_tool_id:
+                    self.prev_tool_call_arr.append({})
+                while len(self.streamed_args_for_tool) <= self.current_tool_id:
+                    self.streamed_args_for_tool.append("")
                 if not self.current_tool_name_sent:
                     calls.append(
                         ToolCallItem(
-                            tool_index=self._tool_indices.get(func_name, 0),
+                            tool_index=self.current_tool_id,
                             name=func_name,
                             parameters="",
                         )
                     )
                     self.current_tool_name_sent = True
+                    # Store the tool call info for adapter.py
+                    self.prev_tool_call_arr[self.current_tool_id] = {
+                        "name": func_name,
+                        "arguments": {},
+                    }
                 else:
                     argument_diff = (
                         func_args_raw[len(self._last_arguments) :]
@@ -119,16 +142,41 @@ class DeepSeekV3Detector(BaseFormatDetector):
                     if argument_diff:
                         calls.append(
                             ToolCallItem(
-                                tool_index=self._tool_indices.get(func_name, 0),
+                                tool_index=self.current_tool_id,
                                 name=None,
                                 parameters=argument_diff,
                             )
                         )
                         self._last_arguments += argument_diff
+                        self.streamed_args_for_tool[
+                            self.current_tool_id
+                        ] += argument_diff
                     if _is_complete_json(func_args_raw):
+                        # Update the stored arguments for adapter.py
+                        try:
+                            parsed_args = json.loads(func_args_raw)
+                            self.prev_tool_call_arr[self.current_tool_id][
+                                "arguments"
+                            ] = parsed_args
+                        except json.JSONDecodeError:
+                            pass
+                        # Find the end of the current tool call and remove only that part from buffer
+                        tool_call_end_pattern = (
+                            r"<｜tool▁call▁begin｜>.*?<｜tool▁call▁end｜>"
+                        )
+                        match = re.search(
+                            tool_call_end_pattern, current_text, re.DOTALL
+                        )
+                        if match:
+                            # Remove the completed tool call from buffer, keep any remaining content
+                            self._buffer = current_text[match.end() :]
+                        else:
+                            self._buffer = ""
                         result = StreamingParseResult(normal_text="", calls=calls)
-                        self._buffer = ""
+                        self.current_tool_id += 1
                         self._last_arguments = ""
                         self.current_tool_name_sent = False
                         return result
@@ -149,8 +197,8 @@ class DeepSeekV3Detector(BaseFormatDetector):
     def build_ebnf(self, tools: List[Tool]):
         return EBNFComposer.build_ebnf(
             tools,
-            bot_token=self.bot_token,
-            eot_token=self.eot_token,
+            sequence_start_token=self.bot_token,
+            sequence_end_token=self.eot_token,
             tool_call_separator="",
             call_rule_fmt='"<｜tool▁call▁begin｜>function<｜tool▁sep｜>{name}\\n```json\\n" {arguments_rule} "\\n```<｜tool▁call▁end｜>"',
             function_format="json",

sglang/srt/function_call/ebnf_composer.py CHANGED Viewed

@@ -30,11 +30,6 @@ class EBNFComposer:
         ws ::= [ \n\t]*
     """
-    TOOL_CALLS_MAP = {
-        "pythonic": '"[" function_call ("," function_call)* "]"',
-        "json": "function_call",
-    }
     CALL_RULE_MAP = {
         "pythonic": 'call_{name} ::= "{name}" "(" {arguments_rule} ")"',
         "json": 'call_{name} ::= "{{" "\\"name\\"" ":" "\\"{name}\\"" ", " "\\"arguments\\"" ":" {arguments_rule} "}}"',
@@ -138,35 +133,54 @@ class EBNFComposer:
     @staticmethod
     def build_ebnf(
         tools,
-        *,
-        call_rule_fmt: Optional[str] = None,
         function_format: Literal["pythonic", "json"] = "json",
-        bot_token: Optional[str] = None,
-        eot_token: Optional[str] = None,
+        # Parameters for wrapping the entire sequence of tool calls
+        sequence_start_token: Optional[str] = None,
+        sequence_end_token: Optional[str] = None,
+        # Parameters for wrapping individual tool calls
+        individual_call_start_token: Optional[str] = None,
+        individual_call_end_token: Optional[str] = None,
+        # Parameter for separating multiple tool calls
         tool_call_separator: Optional[str] = None,
+        call_rule_fmt: Optional[str] = None,
     ):
         """
         Generalized EBNF builder for all detectors.
         Args:
             tools: List of Tool objects to generate EBNF grammar for
+            function_format: The format of function calls, either "pythonic" or "json"
+            sequence_start_token: Token that wraps the entire sequence of tool calls (start)
+            sequence_end_token: Token that wraps the entire sequence of tool calls (end)
+            individual_call_start_token: Token that wraps each individual tool call (start)
+            individual_call_end_token: Token that wraps each individual tool call (end)
+            tool_call_separator: The separator between multiple tool calls
             call_rule_fmt: Optional custom format string for call_{name} rule. It should define each function call's format, with
                 the placeholders {name} for the function name and {arguments_rule} for the arguments rule. If None, a default
                 format based on function_format will be used.
-            function_format: The format of function calls, either "pythonic" or "json"
-            bot_token: The token that indicates the start of a tool call section
-            eot_token: The token that indicates the end of a tool call section
-            tool_call_separator: The separator between multiple tool calls
         """
         # =================================================================
         # Step 1: Determine the root tool calls rule
         # =================================================================
-        if bot_token and eot_token:
-            if tool_call_separator:
-                root_rule = f'"{bot_token}" function_call ( "{tool_call_separator}" function_call )* "{eot_token}"'
-            else:
-                root_rule = f'"{bot_token}" function_call "{eot_token}"'
+        # Handle a single function call
+        if individual_call_start_token and individual_call_end_token:
+            function_call_unit = f'"{individual_call_start_token}" function_call "{individual_call_end_token}"'
+        else:
+            function_call_unit = "function_call"
+        # Handle multiple function calls with separators
+        if tool_call_separator is not None:
+            base_pattern = f'{function_call_unit} ( "{tool_call_separator}" {function_call_unit} )*'
+        else:
+            # Assume only support single function call
+            base_pattern = function_call_unit
+        # Apply sequence-level wrapping if needed
+        if sequence_start_token and sequence_end_token:
+            root_rule = (
+                f'"{sequence_start_token}" {base_pattern} "{sequence_end_token}"'
+            )
         else:
-            root_rule = EBNFComposer.TOOL_CALLS_MAP[function_format]
+            root_rule = base_pattern
         # =================================================================
         # Step 2: Build the header rules

sglang 0.4.6.post5__py3-none-any.whl → 0.4.7.post1__py3-none-any.whl

sglang 0.4.6.post5py3-none-any.whl → 0.4.7.post1py3-none-any.whl