sglang 0.4.9.post3__py3-none-any.whl → 0.4.9.post4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/srt/_custom_ops.py +29 -1
- sglang/srt/configs/model_config.py +1 -1
- sglang/srt/conversation.py +1 -1
- sglang/srt/disaggregation/common/conn.py +34 -6
- sglang/srt/disaggregation/mini_lb.py +3 -2
- sglang/srt/disaggregation/mooncake/conn.py +49 -20
- sglang/srt/disaggregation/mooncake/transfer_engine.py +4 -2
- sglang/srt/disaggregation/nixl/conn.py +17 -13
- sglang/srt/distributed/device_communicators/custom_all_reduce.py +3 -91
- sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py +96 -1
- sglang/srt/distributed/device_communicators/quick_all_reduce.py +273 -0
- sglang/srt/distributed/device_communicators/shm_broadcast.py +12 -5
- sglang/srt/distributed/parallel_state.py +70 -15
- sglang/srt/entrypoints/engine.py +2 -8
- sglang/srt/entrypoints/http_server.py +20 -32
- sglang/srt/entrypoints/openai/protocol.py +3 -3
- sglang/srt/entrypoints/openai/serving_chat.py +27 -4
- sglang/srt/function_call/base_format_detector.py +74 -12
- sglang/srt/function_call/deepseekv3_detector.py +26 -11
- sglang/srt/function_call/ebnf_composer.py +95 -63
- sglang/srt/function_call/function_call_parser.py +4 -4
- sglang/srt/function_call/kimik2_detector.py +41 -16
- sglang/srt/function_call/llama32_detector.py +6 -3
- sglang/srt/function_call/mistral_detector.py +11 -3
- sglang/srt/function_call/pythonic_detector.py +16 -14
- sglang/srt/function_call/qwen25_detector.py +12 -3
- sglang/srt/function_call/{qwen3_detector.py → qwen3_coder_detector.py} +10 -9
- sglang/srt/layers/activation.py +11 -3
- sglang/srt/layers/attention/base_attn_backend.py +3 -1
- sglang/srt/layers/communicator.py +12 -12
- sglang/srt/layers/dp_attention.py +72 -24
- sglang/srt/layers/logits_processor.py +34 -24
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=160,N=320,device_name=NVIDIA_H20-3e.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +25 -224
- sglang/srt/layers/moe/topk.py +5 -13
- sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py +2 -9
- sglang/srt/layers/quantization/modelopt_quant.py +8 -4
- sglang/srt/layers/quantization/utils.py +0 -9
- sglang/srt/layers/radix_attention.py +5 -3
- sglang/srt/lora/lora_manager.py +133 -169
- sglang/srt/lora/lora_registry.py +124 -0
- sglang/srt/lora/mem_pool.py +2 -2
- sglang/srt/managers/cache_controller.py +53 -6
- sglang/srt/managers/io_struct.py +19 -1
- sglang/srt/managers/schedule_batch.py +13 -3
- sglang/srt/managers/scheduler.py +13 -25
- sglang/srt/managers/tokenizer_manager.py +28 -25
- sglang/srt/managers/tp_worker.py +2 -4
- sglang/srt/mem_cache/allocator.py +67 -7
- sglang/srt/mem_cache/hicache_storage.py +17 -1
- sglang/srt/mem_cache/hiradix_cache.py +30 -16
- sglang/srt/mem_cache/memory_pool_host.py +3 -0
- sglang/srt/model_executor/cuda_graph_runner.py +61 -25
- sglang/srt/model_executor/forward_batch_info.py +201 -29
- sglang/srt/model_executor/model_runner.py +41 -23
- sglang/srt/models/deepseek_v2.py +1 -2
- sglang/srt/models/mllama4.py +10 -3
- sglang/srt/models/qwen2_moe.py +0 -4
- sglang/srt/models/qwen3_moe.py +1 -6
- sglang/srt/reasoning_parser.py +46 -4
- sglang/srt/sampling/sampling_batch_info.py +6 -5
- sglang/srt/server_args.py +76 -55
- sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +33 -28
- sglang/srt/speculative/eagle_draft_extend_cuda_graph_runner.py +37 -36
- sglang/srt/speculative/eagle_utils.py +51 -23
- sglang/srt/speculative/eagle_worker.py +59 -44
- sglang/srt/two_batch_overlap.py +9 -5
- sglang/srt/utils.py +17 -68
- sglang/test/test_activation.py +50 -1
- sglang/version.py +1 -1
- {sglang-0.4.9.post3.dist-info → sglang-0.4.9.post4.dist-info}/METADATA +5 -5
- {sglang-0.4.9.post3.dist-info → sglang-0.4.9.post4.dist-info}/RECORD +75 -72
- {sglang-0.4.9.post3.dist-info → sglang-0.4.9.post4.dist-info}/WHEEL +0 -0
- {sglang-0.4.9.post3.dist-info → sglang-0.4.9.post4.dist-info}/licenses/LICENSE +0 -0
- {sglang-0.4.9.post3.dist-info → sglang-0.4.9.post4.dist-info}/top_level.txt +0 -0
@@ -107,6 +107,8 @@ from sglang.version import __version__
|
|
107
107
|
logger = logging.getLogger(__name__)
|
108
108
|
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
|
109
109
|
|
110
|
+
HEALTH_CHECK_TIMEOUT = int(os.getenv("SGLANG_HEALTH_CHECK_TIMEOUT", 20))
|
111
|
+
|
110
112
|
|
111
113
|
# Store global states
|
112
114
|
@dataclasses.dataclass
|
@@ -212,9 +214,6 @@ async def validate_json_request(raw_request: Request):
|
|
212
214
|
)
|
213
215
|
|
214
216
|
|
215
|
-
HEALTH_CHECK_TIMEOUT = int(os.getenv("SGLANG_HEALTH_CHECK_TIMEOUT", 20))
|
216
|
-
|
217
|
-
|
218
217
|
##### Native API endpoints #####
|
219
218
|
|
220
219
|
|
@@ -807,6 +806,24 @@ async def retrieve_model(model: str):
|
|
807
806
|
)
|
808
807
|
|
809
808
|
|
809
|
+
@app.post("/v1/score", dependencies=[Depends(validate_json_request)])
|
810
|
+
async def v1_score_request(request: ScoringRequest, raw_request: Request):
|
811
|
+
"""Endpoint for the decoder-only scoring API. See Engine.score() for detailed documentation."""
|
812
|
+
return await raw_request.app.state.openai_serving_score.handle_request(
|
813
|
+
request, raw_request
|
814
|
+
)
|
815
|
+
|
816
|
+
|
817
|
+
@app.api_route(
|
818
|
+
"/v1/rerank", methods=["POST", "PUT"], dependencies=[Depends(validate_json_request)]
|
819
|
+
)
|
820
|
+
async def v1_rerank_request(request: V1RerankReqInput, raw_request: Request):
|
821
|
+
"""Endpoint for reranking documents based on query relevance."""
|
822
|
+
return await raw_request.app.state.openai_serving_rerank.handle_request(
|
823
|
+
request, raw_request
|
824
|
+
)
|
825
|
+
|
826
|
+
|
810
827
|
## SageMaker API
|
811
828
|
@app.get("/ping")
|
812
829
|
async def sagemaker_health() -> Response:
|
@@ -852,24 +869,6 @@ async def vertex_generate(vertex_req: VertexGenerateReqInput, raw_request: Reque
|
|
852
869
|
return ORJSONResponse({"predictions": ret})
|
853
870
|
|
854
871
|
|
855
|
-
@app.post("/v1/score", dependencies=[Depends(validate_json_request)])
|
856
|
-
async def v1_score_request(request: ScoringRequest, raw_request: Request):
|
857
|
-
"""Endpoint for the decoder-only scoring API. See Engine.score() for detailed documentation."""
|
858
|
-
return await raw_request.app.state.openai_serving_score.handle_request(
|
859
|
-
request, raw_request
|
860
|
-
)
|
861
|
-
|
862
|
-
|
863
|
-
@app.api_route(
|
864
|
-
"/v1/rerank", methods=["POST", "PUT"], dependencies=[Depends(validate_json_request)]
|
865
|
-
)
|
866
|
-
async def v1_rerank_request(request: V1RerankReqInput, raw_request: Request):
|
867
|
-
"""Endpoint for reranking documents based on query relevance."""
|
868
|
-
return await raw_request.app.state.openai_serving_rerank.handle_request(
|
869
|
-
request, raw_request
|
870
|
-
)
|
871
|
-
|
872
|
-
|
873
872
|
def _create_error_response(e):
|
874
873
|
return ORJSONResponse(
|
875
874
|
{"error": {"message": str(e)}}, status_code=HTTPStatus.BAD_REQUEST
|
@@ -916,15 +915,6 @@ def launch_server(
|
|
916
915
|
add_prometheus_middleware(app)
|
917
916
|
enable_func_timer()
|
918
917
|
|
919
|
-
image_token_text = None
|
920
|
-
if (
|
921
|
-
tokenizer_manager.image_token_id is not None
|
922
|
-
and not server_args.skip_tokenizer_init
|
923
|
-
):
|
924
|
-
image_token_text = tokenizer_manager.tokenizer.decode(
|
925
|
-
[tokenizer_manager.image_token_id]
|
926
|
-
)
|
927
|
-
|
928
918
|
# Send a warmup request - we will create the thread launch it
|
929
919
|
# in the lifespan after all other warmups have fired.
|
930
920
|
warmup_thread = threading.Thread(
|
@@ -932,7 +922,6 @@ def launch_server(
|
|
932
922
|
args=(
|
933
923
|
server_args,
|
934
924
|
pipe_finish_writer,
|
935
|
-
image_token_text,
|
936
925
|
launch_callback,
|
937
926
|
),
|
938
927
|
)
|
@@ -1066,7 +1055,6 @@ def _execute_server_warmup(
|
|
1066
1055
|
def _wait_and_warmup(
|
1067
1056
|
server_args: ServerArgs,
|
1068
1057
|
pipe_finish_writer: Optional[multiprocessing.connection.Connection],
|
1069
|
-
image_token_text: str,
|
1070
1058
|
launch_callback: Optional[Callable[[], None]] = None,
|
1071
1059
|
):
|
1072
1060
|
if not server_args.skip_server_warmup:
|
@@ -192,9 +192,9 @@ class CompletionRequest(BaseModel):
|
|
192
192
|
session_params: Optional[Dict] = None
|
193
193
|
|
194
194
|
# For PD disaggregation
|
195
|
-
bootstrap_host: Optional[str] = None
|
196
|
-
bootstrap_port: Optional[int] = None
|
197
|
-
bootstrap_room: Optional[int] = None
|
195
|
+
bootstrap_host: Optional[Union[List[str], str]] = None
|
196
|
+
bootstrap_port: Optional[Union[List[Optional[int]], int]] = None
|
197
|
+
bootstrap_room: Optional[Union[List[int], int]] = None
|
198
198
|
|
199
199
|
# For request id
|
200
200
|
rid: Optional[Union[List[str], str]] = None
|
@@ -55,6 +55,20 @@ class OpenAIServingChat(OpenAIServingBase):
|
|
55
55
|
def _request_id_prefix(self) -> str:
|
56
56
|
return "chatcmpl-"
|
57
57
|
|
58
|
+
def _validate_request(self, request: ChatCompletionRequest) -> Optional[str]:
|
59
|
+
"""Validate that the input is valid."""
|
60
|
+
if not request.messages:
|
61
|
+
return "Messages cannot be empty."
|
62
|
+
|
63
|
+
if (
|
64
|
+
isinstance(request.tool_choice, str)
|
65
|
+
and request.tool_choice.lower() == "required"
|
66
|
+
and not request.tools
|
67
|
+
):
|
68
|
+
return "Tools cannot be empty if tool choice is set to required."
|
69
|
+
|
70
|
+
return None
|
71
|
+
|
58
72
|
def _convert_to_internal_request(
|
59
73
|
self,
|
60
74
|
request: ChatCompletionRequest,
|
@@ -484,7 +498,10 @@ class OpenAIServingChat(OpenAIServingBase):
|
|
484
498
|
|
485
499
|
# Handle tool calls
|
486
500
|
if request.tool_choice != "none" and request.tools:
|
487
|
-
async for
|
501
|
+
async for (
|
502
|
+
chunk,
|
503
|
+
tool_call_finish_reason_type,
|
504
|
+
) in self._process_tool_call_stream(
|
488
505
|
index,
|
489
506
|
delta,
|
490
507
|
parser_dict,
|
@@ -492,7 +509,10 @@ class OpenAIServingChat(OpenAIServingBase):
|
|
492
509
|
request,
|
493
510
|
finish_reason_type,
|
494
511
|
):
|
495
|
-
|
512
|
+
if chunk:
|
513
|
+
yield chunk
|
514
|
+
finish_reason_type = tool_call_finish_reason_type
|
515
|
+
|
496
516
|
else:
|
497
517
|
# Regular content
|
498
518
|
if delta or not (
|
@@ -865,7 +885,7 @@ class OpenAIServingChat(OpenAIServingBase):
|
|
865
885
|
choices=[choice_data],
|
866
886
|
model=request.model,
|
867
887
|
)
|
868
|
-
yield f"data: {chunk.model_dump_json()}\n\n"
|
888
|
+
yield f"data: {chunk.model_dump_json()}\n\n", finish_reason_type
|
869
889
|
|
870
890
|
# Yield tool calls
|
871
891
|
for call_item in calls:
|
@@ -920,4 +940,7 @@ class OpenAIServingChat(OpenAIServingBase):
|
|
920
940
|
choices=[choice_data],
|
921
941
|
model=request.model,
|
922
942
|
)
|
923
|
-
yield f"data: {chunk.model_dump_json()}\n\n"
|
943
|
+
yield f"data: {chunk.model_dump_json()}\n\n", finish_reason_type
|
944
|
+
|
945
|
+
if finish_reason_type == "stop":
|
946
|
+
yield None, "tool_calls"
|
@@ -25,23 +25,49 @@ class BaseFormatDetector(ABC):
|
|
25
25
|
"""Base class providing two sets of interfaces: one-time and streaming incremental."""
|
26
26
|
|
27
27
|
def __init__(self):
|
28
|
-
#
|
28
|
+
# Streaming state management
|
29
|
+
# Buffer for accumulating incomplete patterns that arrive across multiple streaming chunks
|
29
30
|
self._buffer = ""
|
30
|
-
#
|
31
|
+
# Stores complete tool call info (name and arguments) for each tool being parsed.
|
32
|
+
# Used by serving layer for completion handling when streaming ends.
|
33
|
+
# Format: [{"name": str, "arguments": dict}, ...]
|
31
34
|
self.prev_tool_call_arr: List[Dict] = []
|
35
|
+
# Index of currently streaming tool call. Starts at -1 (no active tool),
|
36
|
+
# increments as each tool completes. Tracks which tool's arguments are streaming.
|
32
37
|
self.current_tool_id: int = -1
|
38
|
+
# Flag for whether current tool's name has been sent to client.
|
39
|
+
# Tool names sent first with empty parameters, then arguments stream incrementally.
|
33
40
|
self.current_tool_name_sent: bool = False
|
34
|
-
|
35
|
-
|
36
|
-
|
41
|
+
# Tracks raw JSON string content streamed to client for each tool's arguments.
|
42
|
+
# Critical for serving layer to calculate remaining content when streaming ends.
|
43
|
+
# Each index corresponds to a tool_id. Example: ['{"location": "San Francisco"', '{"temp": 72']
|
44
|
+
self.streamed_args_for_tool: List[str] = []
|
45
|
+
|
46
|
+
# Token configuration (override in subclasses)
|
37
47
|
self.bot_token = ""
|
38
48
|
self.eot_token = ""
|
39
49
|
self.tool_call_separator = ", "
|
40
50
|
|
41
|
-
def
|
42
|
-
|
51
|
+
def _get_tool_indices(self, tools: List[Tool]) -> Dict[str, int]:
|
52
|
+
"""
|
53
|
+
Get a mapping of tool names to their indices in the tools list.
|
54
|
+
|
55
|
+
This utility method creates a dictionary mapping function names to their
|
56
|
+
indices in the tools list, which is commonly needed for tool validation
|
57
|
+
and ToolCallItem creation.
|
58
|
+
|
59
|
+
Args:
|
60
|
+
tools: List of available tools
|
61
|
+
|
62
|
+
Returns:
|
63
|
+
Dictionary mapping tool names to their indices
|
64
|
+
"""
|
65
|
+
return {
|
43
66
|
tool.function.name: i for i, tool in enumerate(tools) if tool.function.name
|
44
67
|
}
|
68
|
+
|
69
|
+
def parse_base_json(self, action: Any, tools: List[Tool]) -> List[ToolCallItem]:
|
70
|
+
tool_indices = self._get_tool_indices(tools)
|
45
71
|
if not isinstance(action, list):
|
46
72
|
action = [action]
|
47
73
|
|
@@ -130,11 +156,7 @@ class BaseFormatDetector(ABC):
|
|
130
156
|
|
131
157
|
# Build tool indices if not already built
|
132
158
|
if not hasattr(self, "_tool_indices"):
|
133
|
-
self._tool_indices =
|
134
|
-
tool.function.name: i
|
135
|
-
for i, tool in enumerate(tools)
|
136
|
-
if tool.function and tool.function.name
|
137
|
-
}
|
159
|
+
self._tool_indices = self._get_tool_indices(tools)
|
138
160
|
|
139
161
|
flags = Allow.ALL if self.current_tool_name_sent else Allow.ALL & ~Allow.STR
|
140
162
|
|
@@ -294,12 +316,52 @@ class BaseFormatDetector(ABC):
|
|
294
316
|
|
295
317
|
@abstractmethod
|
296
318
|
def has_tool_call(self, text: str) -> bool:
|
319
|
+
"""
|
320
|
+
Check if the given text contains function call markers specific to this format.
|
321
|
+
"""
|
297
322
|
raise NotImplementedError()
|
298
323
|
|
324
|
+
def supports_structural_tag(self) -> bool:
|
325
|
+
"""Return True if this detector supports structural tag format."""
|
326
|
+
return True
|
327
|
+
|
299
328
|
@abstractmethod
|
300
329
|
def structure_info(self) -> _GetInfoFunc:
|
330
|
+
"""
|
331
|
+
Return a function that creates StructureInfo for constrained generation.
|
332
|
+
|
333
|
+
The returned function takes a tool name and returns a StructureInfo object
|
334
|
+
containing the begin/end patterns and trigger tokens needed for constrained
|
335
|
+
generation of function calls in this format.
|
336
|
+
|
337
|
+
Returns:
|
338
|
+
A function that takes a tool name (str) and returns StructureInfo
|
339
|
+
"""
|
301
340
|
raise NotImplementedError()
|
302
341
|
|
303
342
|
@abstractmethod
|
304
343
|
def build_ebnf(self, tools: List[Tool]) -> str:
|
344
|
+
"""
|
345
|
+
Build an EBNF grammar for constrained generation of function calls.
|
346
|
+
|
347
|
+
This method generates an Extended Backus-Naur Form (EBNF) grammar that
|
348
|
+
constrains the model's output to valid function calls in this format.
|
349
|
+
The grammar should include all available tools and their parameter schemas.
|
350
|
+
|
351
|
+
Args:
|
352
|
+
tools: List of available tools/functions that can be called
|
353
|
+
|
354
|
+
Returns:
|
355
|
+
A string containing the EBNF grammar for this function call format
|
356
|
+
|
357
|
+
The EBNF grammar should:
|
358
|
+
- Define the overall structure of function calls in this format
|
359
|
+
- Include all tool names from the provided tools list
|
360
|
+
- Define valid JSON structures for function arguments
|
361
|
+
- Handle multiple function calls if the format supports them
|
362
|
+
|
363
|
+
Note:
|
364
|
+
Most implementations use EBNFComposer.build_ebnf() utility with
|
365
|
+
format-specific parameters rather than writing EBNF from scratch.
|
366
|
+
"""
|
305
367
|
raise NotImplementedError()
|
@@ -19,9 +19,28 @@ logger = logging.getLogger(__name__)
|
|
19
19
|
|
20
20
|
class DeepSeekV3Detector(BaseFormatDetector):
|
21
21
|
"""
|
22
|
-
Detector for DeepSeek
|
23
|
-
|
24
|
-
|
22
|
+
Detector for DeepSeek V3 model function call format.
|
23
|
+
|
24
|
+
The DeepSeek V3 format uses special Unicode tokens to delimit function calls
|
25
|
+
with JSON code blocks for arguments.
|
26
|
+
|
27
|
+
Format Structure:
|
28
|
+
```
|
29
|
+
<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>{function_name}\n```json\n{json_arguments}\n```<|tool▁calls▁end|><|end▁of▁sentence|>
|
30
|
+
```
|
31
|
+
Examples:
|
32
|
+
```
|
33
|
+
<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_current_weather\n```json\n{"location": "Tokyo"}\n```<|tool▁call▁end|>\n<|tool▁call▁begin|>function<|tool▁sep|>get_current_weather\n```json\n{"location": "Paris"}\n```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>
|
34
|
+
```
|
35
|
+
|
36
|
+
Key Components:
|
37
|
+
- Tool Calls Section: Wrapped between `<|tool▁calls▁begin|>` and `<|tool▁calls▁end|>`
|
38
|
+
- Individual Tool Call: Wrapped between `<|tool▁call▁begin|>` and `<|tool▁call▁end|>`
|
39
|
+
- Function Declaration: `function<|tool▁sep|>{function_name}`
|
40
|
+
- Arguments: JSON code block between ````json` and ````
|
41
|
+
- Supports multiple tool calls
|
42
|
+
|
43
|
+
Reference: https://huggingface.co/deepseek-ai/DeepSeek-V3-0324?chat_template=default
|
25
44
|
"""
|
26
45
|
|
27
46
|
def __init__(self):
|
@@ -89,16 +108,12 @@ class DeepSeekV3Detector(BaseFormatDetector):
|
|
89
108
|
return StreamingParseResult(normal_text=new_text)
|
90
109
|
|
91
110
|
if not hasattr(self, "_tool_indices"):
|
92
|
-
self._tool_indices =
|
93
|
-
tool.function.name: i
|
94
|
-
for i, tool in enumerate(tools)
|
95
|
-
if tool.function and tool.function.name
|
96
|
-
}
|
111
|
+
self._tool_indices = self._get_tool_indices(tools)
|
97
112
|
|
98
113
|
calls: list[ToolCallItem] = []
|
99
114
|
try:
|
100
115
|
partial_match = re.search(
|
101
|
-
pattern=r"<|tool▁call▁begin|>(.*)<|tool▁sep|>(.*)\n```json\n(.*)",
|
116
|
+
pattern=r"<|tool▁call▁begin|>(.*)<|tool▁sep|>(.*)\n```json\n(.*)\n```.*",
|
102
117
|
string=current_text,
|
103
118
|
flags=re.DOTALL,
|
104
119
|
)
|
@@ -127,7 +142,7 @@ class DeepSeekV3Detector(BaseFormatDetector):
|
|
127
142
|
)
|
128
143
|
)
|
129
144
|
self.current_tool_name_sent = True
|
130
|
-
# Store the tool call info for
|
145
|
+
# Store the tool call info for serving layer completions endpoint
|
131
146
|
self.prev_tool_call_arr[self.current_tool_id] = {
|
132
147
|
"name": func_name,
|
133
148
|
"arguments": {},
|
@@ -153,7 +168,7 @@ class DeepSeekV3Detector(BaseFormatDetector):
|
|
153
168
|
] += argument_diff
|
154
169
|
|
155
170
|
if _is_complete_json(func_args_raw):
|
156
|
-
# Update the stored arguments
|
171
|
+
# Update the stored arguments
|
157
172
|
try:
|
158
173
|
parsed_args = json.loads(func_args_raw)
|
159
174
|
self.prev_tool_call_arr[self.current_tool_id][
|
@@ -1,51 +1,73 @@
|
|
1
|
-
from typing import Literal, Optional
|
1
|
+
from typing import Any, Dict, Literal, Optional
|
2
2
|
|
3
3
|
|
4
4
|
class EBNFComposer:
|
5
5
|
# Adapted from https://xgrammar.mlc.ai/docs/how_to/ebnf_guided_generation.html#try-out-via-hf-transformers
|
6
|
-
|
7
|
-
|
8
|
-
basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object
|
9
|
-
basic_integer ::= ("0" | "-"? [1-9] [0-9]*) ".0"?
|
10
|
-
basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)?
|
6
|
+
# Shared primitive grammar rules used across all formats
|
7
|
+
BASE_PRIMITIVE_GRAMMAR = r"""
|
11
8
|
basic_string ::= (([\"] basic_string_1 [\"]))
|
12
9
|
basic_string_1 ::= "" | [^"\\\x00-\x1F] basic_string_1 | "\\" escape basic_string_1
|
13
|
-
escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9]
|
14
|
-
|
15
|
-
|
10
|
+
escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9]{4}
|
11
|
+
basic_integer ::= ("0" | "-"? [1-9] [0-9]*) ".0"?
|
12
|
+
basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)?
|
16
13
|
basic_array ::= "[" ("" | ws basic_any (ws "," ws basic_any)*) ws "]"
|
17
14
|
basic_object ::= "{" ("" | ws basic_string ws ":" ws basic_any ( ws "," ws basic_string ws ":" ws basic_any)*) ws "}"
|
18
15
|
ws ::= [ \n\t]*
|
19
|
-
|
16
|
+
"""
|
20
17
|
|
21
|
-
|
18
|
+
# Format-specific extensions
|
19
|
+
json_grammar_ebnf_str = (
|
20
|
+
r"""
|
21
|
+
json ::= basic_array | basic_object
|
22
|
+
basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object
|
23
|
+
basic_boolean ::= "true" | "false"
|
24
|
+
basic_null ::= "null"
|
25
|
+
"""
|
26
|
+
+ BASE_PRIMITIVE_GRAMMAR
|
27
|
+
)
|
28
|
+
|
29
|
+
pythonic_grammar_ebnf_str = (
|
30
|
+
r"""
|
22
31
|
pythonic ::= basic_number | basic_string | basic_array | "True" | "False" | "None"
|
23
32
|
basic_any ::= basic_number | basic_string | basic_array | basic_object
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
33
|
+
basic_boolean ::= "True" | "False"
|
34
|
+
basic_null ::= "None"
|
35
|
+
"""
|
36
|
+
+ BASE_PRIMITIVE_GRAMMAR
|
37
|
+
)
|
38
|
+
|
39
|
+
xml_grammar_ebnf_str = (
|
40
|
+
r"""
|
41
|
+
xml ::= xml_element | xml_text
|
42
|
+
xml_element ::= basic_string | basic_number | basic_boolean | basic_null | basic_array | basic_object
|
43
|
+
xml_text ::= [^<>]*
|
44
|
+
basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object
|
45
|
+
basic_boolean ::= "true" | "false"
|
46
|
+
basic_null ::= "null"
|
31
47
|
"""
|
48
|
+
+ BASE_PRIMITIVE_GRAMMAR
|
49
|
+
)
|
32
50
|
|
33
51
|
CALL_RULE_MAP = {
|
34
52
|
"pythonic": 'call_{name} ::= "{name}" "(" {arguments_rule} ")"',
|
35
53
|
"json": 'call_{name} ::= "{{" "\\"name\\"" ":" "\\"{name}\\"" ", " "\\"arguments\\"" ":" {arguments_rule} "}}"',
|
54
|
+
"xml": 'call_{name} ::= "<function={name}>\\n" {arguments_rule} "\\n</function>"',
|
36
55
|
}
|
37
56
|
|
38
57
|
ARGUMENTS_RULE_MAP = {
|
39
58
|
"pythonic": "{arg_rules}",
|
40
59
|
"json": '"{{" {arg_rules} "}}"',
|
60
|
+
"xml": "{arg_rules}",
|
41
61
|
}
|
42
62
|
|
43
63
|
KEY_VALUE_RULE_MAP = {
|
44
64
|
"pythonic": '"{key}" "=" {valrule}',
|
45
65
|
"json": '"\\"{key}\\"" ":" {valrule}',
|
66
|
+
"xml": '"<parameter={key}>\\n" {valrule} "\\n</parameter>"',
|
46
67
|
}
|
47
68
|
|
48
|
-
|
69
|
+
# Base type mapping - most types are the same across formats
|
70
|
+
BASE_TYPE_MAPPING = {
|
49
71
|
"string": "basic_string",
|
50
72
|
"number": "basic_number",
|
51
73
|
"integer": "basic_number",
|
@@ -55,19 +77,20 @@ class EBNFComposer:
|
|
55
77
|
"object": "basic_object",
|
56
78
|
}
|
57
79
|
|
58
|
-
|
59
|
-
|
60
|
-
"
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
"
|
65
|
-
|
80
|
+
# Format-specific overrides for types that differ
|
81
|
+
FORMAT_TYPE_OVERRIDES = {
|
82
|
+
"pythonic": {
|
83
|
+
"boolean": '"True" | "False"',
|
84
|
+
"null": '"None"',
|
85
|
+
},
|
86
|
+
"xml": {
|
87
|
+
"string": "xml_text",
|
88
|
+
},
|
66
89
|
}
|
67
90
|
|
68
91
|
@staticmethod
|
69
92
|
def get_value_rule(
|
70
|
-
prop: dict, function_format: Literal["pythonic", "json"] = "json"
|
93
|
+
prop: dict, function_format: Literal["pythonic", "json", "xml"] = "json"
|
71
94
|
) -> str:
|
72
95
|
if "enum" in prop:
|
73
96
|
return EBNFComposer._handle_enum(prop, function_format)
|
@@ -83,48 +106,46 @@ class EBNFComposer:
|
|
83
106
|
enum_values = prop["enum"]
|
84
107
|
prop_type = prop.get("type", "string")
|
85
108
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
("boolean", "json"): lambda v: "true" if v else "false",
|
95
|
-
("boolean", "pythonic"): lambda v: "True" if v else "False",
|
96
|
-
}
|
109
|
+
def format_enum_val(v: Any) -> str:
|
110
|
+
if prop_type == "boolean":
|
111
|
+
if function_format == "json" or function_format == "xml":
|
112
|
+
return "true" if v else "false"
|
113
|
+
elif function_format == "pythonic":
|
114
|
+
return "True" if v else "False"
|
115
|
+
else:
|
116
|
+
return str(v) # fallback
|
97
117
|
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
118
|
+
if prop_type == "string":
|
119
|
+
if function_format == "xml":
|
120
|
+
return f'"{v}"'
|
121
|
+
else: # json or pythonic
|
122
|
+
return f'"\\"{v}\\""' # escape quote-wrapped string
|
103
123
|
|
104
|
-
|
105
|
-
|
124
|
+
# All other types (number, integer, etc.)
|
125
|
+
return str(v)
|
106
126
|
|
107
|
-
|
108
|
-
|
109
|
-
|
127
|
+
formatted_values = [format_enum_val(v) for v in enum_values]
|
128
|
+
enum_rule = " | ".join(formatted_values)
|
129
|
+
return f"({enum_rule})" if len(formatted_values) > 1 else enum_rule
|
110
130
|
|
111
|
-
|
131
|
+
@staticmethod
|
132
|
+
def get_type_mapping(function_format: str) -> Dict[str, str]:
|
133
|
+
"""Get the complete type mapping for a given format."""
|
134
|
+
mapping = EBNFComposer.BASE_TYPE_MAPPING.copy()
|
135
|
+
overrides = EBNFComposer.FORMAT_TYPE_OVERRIDES.get(function_format, {})
|
136
|
+
mapping.update({k: v for k, v in overrides.items() if v is not None})
|
137
|
+
return mapping
|
112
138
|
|
113
139
|
@staticmethod
|
114
140
|
def _handle_type(prop: dict, function_format: str) -> str:
|
115
141
|
"""Handle type properties using the appropriate type mapping."""
|
116
142
|
prop_type = prop["type"]
|
117
|
-
type_mapping = (
|
118
|
-
EBNFComposer.PYTHONIC_TYPE_MAPPING
|
119
|
-
if function_format == "pythonic"
|
120
|
-
else EBNFComposer.JSON_TYPE_MAPPING
|
121
|
-
)
|
143
|
+
type_mapping = EBNFComposer.get_type_mapping(function_format)
|
122
144
|
|
123
145
|
if isinstance(prop_type, list):
|
124
146
|
type_rules = [
|
125
|
-
type_mapping
|
147
|
+
type_mapping.get(single_type, function_format)
|
126
148
|
for single_type in prop_type
|
127
|
-
if single_type in type_mapping
|
128
149
|
]
|
129
150
|
return " | ".join(type_rules) if type_rules else function_format
|
130
151
|
|
@@ -133,7 +154,7 @@ class EBNFComposer:
|
|
133
154
|
@staticmethod
|
134
155
|
def build_ebnf(
|
135
156
|
tools,
|
136
|
-
function_format: Literal["pythonic", "json"] = "json",
|
157
|
+
function_format: Literal["pythonic", "json", "xml"] = "json",
|
137
158
|
# Parameters for wrapping the entire sequence of tool calls
|
138
159
|
sequence_start_token: Optional[str] = None,
|
139
160
|
sequence_end_token: Optional[str] = None,
|
@@ -143,6 +164,7 @@ class EBNFComposer:
|
|
143
164
|
# Parameter for separating multiple tool calls
|
144
165
|
tool_call_separator: Optional[str] = None,
|
145
166
|
call_rule_fmt: Optional[str] = None,
|
167
|
+
key_value_rule_fmt: Optional[str] = None,
|
146
168
|
):
|
147
169
|
"""
|
148
170
|
Generalized EBNF builder for all detectors.
|
@@ -157,6 +179,9 @@ class EBNFComposer:
|
|
157
179
|
call_rule_fmt: Optional custom format string for call_{name} rule. It should define each function call's format, with
|
158
180
|
the placeholders {name} for the function name and {arguments_rule} for the arguments rule. If None, a default
|
159
181
|
format based on function_format will be used.
|
182
|
+
key_value_rule_fmt: Optional custom format string for key-value pairs. It should define how each parameter is formatted,
|
183
|
+
with placeholders {key} for the parameter name and {valrule} for the value rule. If None, a default format
|
184
|
+
based on function_format will be used.
|
160
185
|
"""
|
161
186
|
# =================================================================
|
162
187
|
# Step 1: Determine the root tool calls rule
|
@@ -200,7 +225,11 @@ class EBNFComposer:
|
|
200
225
|
else EBNFComposer.CALL_RULE_MAP[function_format]
|
201
226
|
)
|
202
227
|
args_template = EBNFComposer.ARGUMENTS_RULE_MAP[function_format]
|
203
|
-
key_value_template =
|
228
|
+
key_value_template = (
|
229
|
+
key_value_rule_fmt
|
230
|
+
if key_value_rule_fmt
|
231
|
+
else EBNFComposer.KEY_VALUE_RULE_MAP[function_format]
|
232
|
+
)
|
204
233
|
|
205
234
|
# =================================================================
|
206
235
|
# Step 4: Build rules for each tool
|
@@ -292,10 +321,13 @@ class EBNFComposer:
|
|
292
321
|
# =================================================================
|
293
322
|
# Step 5: Add base grammar rules
|
294
323
|
# =================================================================
|
295
|
-
|
296
|
-
EBNFComposer.pythonic_grammar_ebnf_str
|
297
|
-
|
298
|
-
|
324
|
+
grammar_dict = {
|
325
|
+
"pythonic": EBNFComposer.pythonic_grammar_ebnf_str,
|
326
|
+
"json": EBNFComposer.json_grammar_ebnf_str,
|
327
|
+
"xml": EBNFComposer.xml_grammar_ebnf_str,
|
328
|
+
}
|
329
|
+
base_grammar = grammar_dict.get(
|
330
|
+
function_format, EBNFComposer.json_grammar_ebnf_str
|
299
331
|
)
|
300
332
|
ebnf_lines.append(base_grammar)
|
301
333
|
|
@@ -14,7 +14,7 @@ from sglang.srt.function_call.kimik2_detector import KimiK2Detector
|
|
14
14
|
from sglang.srt.function_call.llama32_detector import Llama32Detector
|
15
15
|
from sglang.srt.function_call.mistral_detector import MistralDetector
|
16
16
|
from sglang.srt.function_call.pythonic_detector import PythonicDetector
|
17
|
-
from sglang.srt.function_call.
|
17
|
+
from sglang.srt.function_call.qwen3_coder_detector import Qwen3CoderDetector
|
18
18
|
from sglang.srt.function_call.qwen25_detector import Qwen25Detector
|
19
19
|
|
20
20
|
logger = logging.getLogger(__name__)
|
@@ -36,7 +36,7 @@ class FunctionCallParser:
|
|
36
36
|
"deepseekv3": DeepSeekV3Detector,
|
37
37
|
"pythonic": PythonicDetector,
|
38
38
|
"kimi_k2": KimiK2Detector,
|
39
|
-
"
|
39
|
+
"qwen3_coder": Qwen3CoderDetector,
|
40
40
|
}
|
41
41
|
|
42
42
|
def __init__(self, tools: List[Tool], tool_call_parser: str):
|
@@ -155,9 +155,9 @@ class FunctionCallParser:
|
|
155
155
|
or None if no constraint applies.
|
156
156
|
"""
|
157
157
|
# NOTE: structural_tag only supports JSON-compatible content between the begin and end.
|
158
|
-
# It cannot parse or validate
|
158
|
+
# It cannot parse or validate function call Pythonic or XML-ish syntax.
|
159
159
|
if (
|
160
|
-
|
160
|
+
self.detector.supports_structural_tag()
|
161
161
|
and tool_choice == "auto"
|
162
162
|
and any(tool.function.strict for tool in self.tools)
|
163
163
|
):
|