sglang 0.4.6.post3__py3-none-any.whl → 0.4.6.post5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. sglang/bench_offline_throughput.py +10 -8
  2. sglang/bench_one_batch.py +7 -6
  3. sglang/bench_one_batch_server.py +157 -21
  4. sglang/bench_serving.py +137 -59
  5. sglang/compile_deep_gemm.py +5 -5
  6. sglang/eval/loogle_eval.py +157 -0
  7. sglang/lang/chat_template.py +78 -78
  8. sglang/lang/tracer.py +1 -1
  9. sglang/srt/code_completion_parser.py +1 -1
  10. sglang/srt/configs/deepseekvl2.py +2 -2
  11. sglang/srt/configs/model_config.py +40 -28
  12. sglang/srt/constrained/base_grammar_backend.py +55 -72
  13. sglang/srt/constrained/llguidance_backend.py +25 -21
  14. sglang/srt/constrained/outlines_backend.py +27 -26
  15. sglang/srt/constrained/reasoner_grammar_backend.py +22 -33
  16. sglang/srt/constrained/xgrammar_backend.py +69 -43
  17. sglang/srt/conversation.py +49 -44
  18. sglang/srt/disaggregation/base/conn.py +1 -0
  19. sglang/srt/disaggregation/decode.py +129 -135
  20. sglang/srt/disaggregation/decode_schedule_batch_mixin.py +142 -0
  21. sglang/srt/disaggregation/fake/conn.py +3 -13
  22. sglang/srt/disaggregation/kv_events.py +357 -0
  23. sglang/srt/disaggregation/mini_lb.py +57 -24
  24. sglang/srt/disaggregation/mooncake/conn.py +238 -122
  25. sglang/srt/disaggregation/mooncake/transfer_engine.py +2 -1
  26. sglang/srt/disaggregation/nixl/conn.py +10 -19
  27. sglang/srt/disaggregation/prefill.py +132 -47
  28. sglang/srt/disaggregation/utils.py +123 -6
  29. sglang/srt/distributed/utils.py +3 -3
  30. sglang/srt/entrypoints/EngineBase.py +5 -0
  31. sglang/srt/entrypoints/engine.py +44 -9
  32. sglang/srt/entrypoints/http_server.py +23 -6
  33. sglang/srt/entrypoints/http_server_engine.py +5 -2
  34. sglang/srt/function_call/base_format_detector.py +250 -0
  35. sglang/srt/function_call/core_types.py +34 -0
  36. sglang/srt/function_call/deepseekv3_detector.py +157 -0
  37. sglang/srt/function_call/ebnf_composer.py +234 -0
  38. sglang/srt/function_call/function_call_parser.py +175 -0
  39. sglang/srt/function_call/llama32_detector.py +74 -0
  40. sglang/srt/function_call/mistral_detector.py +84 -0
  41. sglang/srt/function_call/pythonic_detector.py +163 -0
  42. sglang/srt/function_call/qwen25_detector.py +67 -0
  43. sglang/srt/function_call/utils.py +35 -0
  44. sglang/srt/hf_transformers_utils.py +46 -7
  45. sglang/srt/layers/attention/aiter_backend.py +513 -0
  46. sglang/srt/layers/attention/flashattention_backend.py +64 -18
  47. sglang/srt/layers/attention/flashinfer_mla_backend.py +8 -4
  48. sglang/srt/layers/attention/flashmla_backend.py +340 -78
  49. sglang/srt/layers/attention/triton_backend.py +3 -0
  50. sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py +1 -1
  51. sglang/srt/layers/attention/utils.py +6 -4
  52. sglang/srt/layers/attention/vision.py +1 -1
  53. sglang/srt/layers/communicator.py +451 -0
  54. sglang/srt/layers/dp_attention.py +61 -21
  55. sglang/srt/layers/layernorm.py +1 -1
  56. sglang/srt/layers/logits_processor.py +46 -11
  57. sglang/srt/layers/moe/cutlass_moe.py +207 -0
  58. sglang/srt/layers/moe/ep_moe/kernels.py +34 -12
  59. sglang/srt/layers/moe/ep_moe/layer.py +105 -51
  60. sglang/srt/layers/moe/ep_moe/token_dispatcher.py +82 -7
  61. sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +1 -1
  62. sglang/srt/layers/moe/fused_moe_triton/layer.py +14 -0
  63. sglang/srt/layers/moe/topk.py +67 -10
  64. sglang/srt/layers/multimodal.py +70 -0
  65. sglang/srt/layers/quantization/__init__.py +8 -3
  66. sglang/srt/layers/quantization/blockwise_int8.py +2 -2
  67. sglang/srt/layers/quantization/deep_gemm.py +77 -74
  68. sglang/srt/layers/quantization/fp8.py +92 -2
  69. sglang/srt/layers/quantization/fp8_kernel.py +3 -3
  70. sglang/srt/layers/quantization/fp8_utils.py +6 -0
  71. sglang/srt/layers/quantization/gptq.py +298 -6
  72. sglang/srt/layers/quantization/int8_kernel.py +20 -7
  73. sglang/srt/layers/quantization/qoq.py +244 -0
  74. sglang/srt/layers/sampler.py +0 -4
  75. sglang/srt/layers/vocab_parallel_embedding.py +18 -7
  76. sglang/srt/lora/lora_manager.py +2 -4
  77. sglang/srt/lora/mem_pool.py +4 -4
  78. sglang/srt/lora/triton_ops/gate_up_lora_b.py +1 -1
  79. sglang/srt/lora/triton_ops/qkv_lora_b.py +1 -1
  80. sglang/srt/lora/triton_ops/sgemm_lora_a.py +1 -1
  81. sglang/srt/lora/triton_ops/sgemm_lora_b.py +1 -1
  82. sglang/srt/lora/utils.py +1 -1
  83. sglang/srt/managers/data_parallel_controller.py +3 -3
  84. sglang/srt/managers/deepseek_eplb.py +278 -0
  85. sglang/srt/managers/detokenizer_manager.py +21 -8
  86. sglang/srt/managers/eplb_manager.py +55 -0
  87. sglang/srt/managers/expert_distribution.py +704 -56
  88. sglang/srt/managers/expert_location.py +394 -0
  89. sglang/srt/managers/expert_location_dispatch.py +91 -0
  90. sglang/srt/managers/io_struct.py +19 -4
  91. sglang/srt/managers/mm_utils.py +294 -140
  92. sglang/srt/managers/multimodal_processors/base_processor.py +127 -42
  93. sglang/srt/managers/multimodal_processors/deepseek_vl_v2.py +6 -1
  94. sglang/srt/managers/multimodal_processors/gemma3.py +31 -6
  95. sglang/srt/managers/multimodal_processors/internvl.py +14 -5
  96. sglang/srt/managers/multimodal_processors/janus_pro.py +7 -1
  97. sglang/srt/managers/multimodal_processors/kimi_vl.py +7 -6
  98. sglang/srt/managers/multimodal_processors/llava.py +46 -0
  99. sglang/srt/managers/multimodal_processors/minicpm.py +25 -31
  100. sglang/srt/managers/multimodal_processors/mllama4.py +6 -0
  101. sglang/srt/managers/multimodal_processors/pixtral.py +127 -0
  102. sglang/srt/managers/multimodal_processors/qwen_vl.py +58 -16
  103. sglang/srt/managers/schedule_batch.py +122 -42
  104. sglang/srt/managers/schedule_policy.py +1 -5
  105. sglang/srt/managers/scheduler.py +205 -138
  106. sglang/srt/managers/scheduler_output_processor_mixin.py +124 -55
  107. sglang/srt/managers/session_controller.py +1 -1
  108. sglang/srt/managers/tokenizer_manager.py +232 -58
  109. sglang/srt/managers/tp_worker.py +12 -9
  110. sglang/srt/managers/tp_worker_overlap_thread.py +22 -11
  111. sglang/srt/mem_cache/base_prefix_cache.py +3 -0
  112. sglang/srt/mem_cache/chunk_cache.py +3 -1
  113. sglang/srt/mem_cache/hiradix_cache.py +4 -4
  114. sglang/srt/mem_cache/memory_pool.py +76 -52
  115. sglang/srt/mem_cache/multimodal_cache.py +45 -0
  116. sglang/srt/mem_cache/radix_cache.py +58 -5
  117. sglang/srt/metrics/collector.py +314 -39
  118. sglang/srt/mm_utils.py +10 -0
  119. sglang/srt/model_executor/cuda_graph_runner.py +29 -19
  120. sglang/srt/model_executor/expert_location_updater.py +422 -0
  121. sglang/srt/model_executor/forward_batch_info.py +5 -1
  122. sglang/srt/model_executor/model_runner.py +163 -68
  123. sglang/srt/model_loader/loader.py +10 -6
  124. sglang/srt/models/clip.py +5 -1
  125. sglang/srt/models/deepseek_janus_pro.py +2 -2
  126. sglang/srt/models/deepseek_v2.py +308 -351
  127. sglang/srt/models/exaone.py +8 -3
  128. sglang/srt/models/gemma3_mm.py +70 -33
  129. sglang/srt/models/llama.py +2 -0
  130. sglang/srt/models/llama4.py +15 -8
  131. sglang/srt/models/llava.py +258 -7
  132. sglang/srt/models/mimo_mtp.py +220 -0
  133. sglang/srt/models/minicpmo.py +5 -12
  134. sglang/srt/models/mistral.py +71 -1
  135. sglang/srt/models/mixtral.py +98 -34
  136. sglang/srt/models/mllama.py +3 -3
  137. sglang/srt/models/pixtral.py +467 -0
  138. sglang/srt/models/qwen2.py +95 -26
  139. sglang/srt/models/qwen2_5_vl.py +8 -0
  140. sglang/srt/models/qwen2_moe.py +330 -60
  141. sglang/srt/models/qwen2_vl.py +6 -0
  142. sglang/srt/models/qwen3.py +52 -10
  143. sglang/srt/models/qwen3_moe.py +411 -48
  144. sglang/srt/models/roberta.py +1 -1
  145. sglang/srt/models/siglip.py +294 -0
  146. sglang/srt/models/torch_native_llama.py +1 -1
  147. sglang/srt/openai_api/adapter.py +58 -20
  148. sglang/srt/openai_api/protocol.py +6 -8
  149. sglang/srt/operations.py +154 -0
  150. sglang/srt/operations_strategy.py +31 -0
  151. sglang/srt/reasoning_parser.py +3 -3
  152. sglang/srt/sampling/custom_logit_processor.py +18 -3
  153. sglang/srt/sampling/sampling_batch_info.py +4 -56
  154. sglang/srt/sampling/sampling_params.py +2 -2
  155. sglang/srt/server_args.py +162 -22
  156. sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +3 -3
  157. sglang/srt/speculative/eagle_utils.py +138 -7
  158. sglang/srt/speculative/eagle_worker.py +69 -21
  159. sglang/srt/utils.py +74 -17
  160. sglang/test/few_shot_gsm8k.py +2 -2
  161. sglang/test/few_shot_gsm8k_engine.py +2 -2
  162. sglang/test/run_eval.py +2 -2
  163. sglang/test/runners.py +8 -1
  164. sglang/test/send_one.py +13 -3
  165. sglang/test/simple_eval_common.py +1 -1
  166. sglang/test/simple_eval_humaneval.py +1 -1
  167. sglang/test/test_cutlass_moe.py +278 -0
  168. sglang/test/test_programs.py +5 -5
  169. sglang/test/test_utils.py +55 -14
  170. sglang/utils.py +3 -3
  171. sglang/version.py +1 -1
  172. {sglang-0.4.6.post3.dist-info → sglang-0.4.6.post5.dist-info}/METADATA +23 -13
  173. {sglang-0.4.6.post3.dist-info → sglang-0.4.6.post5.dist-info}/RECORD +178 -149
  174. {sglang-0.4.6.post3.dist-info → sglang-0.4.6.post5.dist-info}/WHEEL +1 -1
  175. sglang/srt/function_call_parser.py +0 -858
  176. sglang/srt/platforms/interface.py +0 -371
  177. /sglang/{llama3_eval.py → eval/llama3_eval.py} +0 -0
  178. /sglang/srt/models/{xiaomi_mimo.py → mimo.py} +0 -0
  179. {sglang-0.4.6.post3.dist-info → sglang-0.4.6.post5.dist-info}/licenses/LICENSE +0 -0
  180. {sglang-0.4.6.post3.dist-info → sglang-0.4.6.post5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,234 @@
1
+ from typing import Literal, Optional
2
+
3
+
4
+ class EBNFComposer:
5
+ # Adapted from https://xgrammar.mlc.ai/docs/how_to/ebnf_guided_generation.html#try-out-via-hf-transformers
6
+ json_grammar_ebnf_str = r"""
7
+ json ::= basic_array | basic_object
8
+ basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object
9
+ basic_integer ::= ("0" | "-"? [1-9] [0-9]*) ".0"?
10
+ basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)?
11
+ basic_string ::= (([\"] basic_string_1 [\"]))
12
+ basic_string_1 ::= "" | [^"\\\x00-\x1F] basic_string_1 | "\\" escape basic_string_1
13
+ escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9]
14
+ basic_boolean ::= "true" | "false"
15
+ basic_null ::= "null"
16
+ basic_array ::= "[" ("" | ws basic_any (ws "," ws basic_any)*) ws "]"
17
+ basic_object ::= "{" ("" | ws basic_string ws ":" ws basic_any ( ws "," ws basic_string ws ":" ws basic_any)*) ws "}"
18
+ ws ::= [ \n\t]*
19
+ """
20
+
21
+ pythonic_grammar_ebnf_str = r"""
22
+ pythonic ::= basic_number | basic_string | basic_array | "True" | "False" | "None"
23
+ basic_any ::= basic_number | basic_string | basic_array | basic_object
24
+ basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)?
25
+ basic_string ::= (([\"] basic_string_1 [\"]))
26
+ basic_string_1 ::= "" | [^"\\\x00-\x1F] basic_string_1 | "\\" escape basic_string_1
27
+ escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9]
28
+ basic_array ::= "[" ("" | ws basic_any (ws "," ws basic_any)*) ws "]"
29
+ basic_object ::= "{" ("" | ws basic_string ws ":" ws basic_any ( ws "," ws basic_string ws ":" ws basic_any)*) ws "}"
30
+ ws ::= [ \n\t]*
31
+ """
32
+
33
+ TOOL_CALLS_MAP = {
34
+ "pythonic": '"[" function_call ("," function_call)* "]"',
35
+ "json": "function_call",
36
+ }
37
+
38
+ CALL_RULE_MAP = {
39
+ "pythonic": 'call_{name} ::= "{name}" "(" {arguments_rule} ")"',
40
+ "json": 'call_{name} ::= "{{" "\\"name\\"" ":" "\\"{name}\\"" ", " "\\"arguments\\"" ":" {arguments_rule} "}}"',
41
+ }
42
+
43
+ ARGUMENTS_RULE_MAP = {
44
+ "pythonic": "{arg_rules}",
45
+ "json": '"{{" {arg_rules} "}}"',
46
+ }
47
+
48
+ KEY_VALUE_RULE_MAP = {
49
+ "pythonic": '"{key}" "=" {valrule}',
50
+ "json": '"\\"{key}\\"" ":" {valrule}',
51
+ }
52
+
53
+ JSON_TYPE_MAPPING = {
54
+ "string": "basic_string",
55
+ "number": "basic_number",
56
+ "integer": "basic_number",
57
+ "boolean": "basic_boolean",
58
+ "null": "basic_null",
59
+ "array": "basic_array",
60
+ "object": "basic_object",
61
+ }
62
+
63
+ PYTHONIC_TYPE_MAPPING = {
64
+ "string": "basic_string",
65
+ "number": "basic_number",
66
+ "integer": "basic_number",
67
+ "boolean": '"True" | "False"',
68
+ "null": '"None"',
69
+ "array": "basic_array",
70
+ "object": "basic_object",
71
+ }
72
+
73
+ @staticmethod
74
+ def get_value_rule(
75
+ prop: dict, function_format: Literal["pythonic", "json"] = "json"
76
+ ) -> str:
77
+ if "enum" in prop:
78
+ return EBNFComposer._handle_enum(prop, function_format)
79
+
80
+ if "type" in prop:
81
+ return EBNFComposer._handle_type(prop, function_format)
82
+
83
+ return function_format
84
+
85
+ @staticmethod
86
+ def _handle_enum(prop: dict, function_format: str) -> str:
87
+ """Handle enum properties by formatting each value according to type and format."""
88
+ enum_values = prop["enum"]
89
+ prop_type = prop.get("type", "string")
90
+
91
+ # Define formatters for different type/format combinations
92
+ formatters = {
93
+ ("string", "json"): lambda v: f'"\\"{v}\\""',
94
+ ("string", "pythonic"): lambda v: f'"\\"{v}\\""',
95
+ ("number", "json"): str,
96
+ ("number", "pythonic"): str,
97
+ ("integer", "json"): str,
98
+ ("integer", "pythonic"): str,
99
+ ("boolean", "json"): lambda v: "true" if v else "false",
100
+ ("boolean", "pythonic"): lambda v: "True" if v else "False",
101
+ }
102
+
103
+ # Get the formatter or default to string handling
104
+ formatter = formatters.get(
105
+ (prop_type, function_format),
106
+ formatters[("string", function_format)], # Default to string handling
107
+ )
108
+
109
+ formatted_values = [formatter(value) for value in enum_values]
110
+ enum_rule = " | ".join(formatted_values)
111
+
112
+ # Wrap in parentheses if there are multiple values to ensure correct EBNF precedence
113
+ if len(formatted_values) > 1:
114
+ enum_rule = f"({enum_rule})"
115
+
116
+ return enum_rule
117
+
118
+ @staticmethod
119
+ def _handle_type(prop: dict, function_format: str) -> str:
120
+ """Handle type properties using the appropriate type mapping."""
121
+ prop_type = prop["type"]
122
+ type_mapping = (
123
+ EBNFComposer.PYTHONIC_TYPE_MAPPING
124
+ if function_format == "pythonic"
125
+ else EBNFComposer.JSON_TYPE_MAPPING
126
+ )
127
+
128
+ if isinstance(prop_type, list):
129
+ type_rules = [
130
+ type_mapping[single_type]
131
+ for single_type in prop_type
132
+ if single_type in type_mapping
133
+ ]
134
+ return " | ".join(type_rules) if type_rules else function_format
135
+
136
+ return type_mapping.get(prop_type, function_format)
137
+
138
+ @staticmethod
139
+ def build_ebnf(
140
+ tools,
141
+ *,
142
+ call_rule_fmt: Optional[str] = None,
143
+ function_format: Literal["pythonic", "json"] = "json",
144
+ bot_token: Optional[str] = None,
145
+ eot_token: Optional[str] = None,
146
+ tool_call_separator: Optional[str] = None,
147
+ ):
148
+ """
149
+ Generalized EBNF builder for all detectors.
150
+ Args:
151
+ tools: List of Tool objects to generate EBNF grammar for
152
+ call_rule_fmt: Optional custom format string for call_{name} rule. It should define each function call's format, with
153
+ the placeholders {name} for the function name and {arguments_rule} for the arguments rule. If None, a default
154
+ format based on function_format will be used.
155
+ function_format: The format of function calls, either "pythonic" or "json"
156
+ bot_token: The token that indicates the start of a tool call section
157
+ eot_token: The token that indicates the end of a tool call section
158
+ tool_call_separator: The separator between multiple tool calls
159
+ """
160
+ # =================================================================
161
+ # Step 1: Determine the root tool calls rule
162
+ # =================================================================
163
+ if bot_token and eot_token:
164
+ if tool_call_separator:
165
+ root_rule = f'"{bot_token}" function_call ( "{tool_call_separator}" function_call )* "{eot_token}"'
166
+ else:
167
+ root_rule = f'"{bot_token}" function_call "{eot_token}"'
168
+ else:
169
+ root_rule = EBNFComposer.TOOL_CALLS_MAP[function_format]
170
+
171
+ # =================================================================
172
+ # Step 2: Build the header rules
173
+ # =================================================================
174
+ ebnf_lines = [
175
+ f"root ::= {root_rule}",
176
+ "function_call ::= "
177
+ + " | ".join([f"call_{tool.function.name}" for tool in tools]),
178
+ ]
179
+
180
+ # =================================================================
181
+ # Step 3: Set up formatting templates
182
+ # =================================================================
183
+ call_template = (
184
+ f"call_{{name}} ::= {call_rule_fmt}"
185
+ if call_rule_fmt
186
+ else EBNFComposer.CALL_RULE_MAP[function_format]
187
+ )
188
+ args_template = EBNFComposer.ARGUMENTS_RULE_MAP[function_format]
189
+ key_value_template = EBNFComposer.KEY_VALUE_RULE_MAP[function_format]
190
+
191
+ # =================================================================
192
+ # Step 4: Build rules for each tool
193
+ # =================================================================
194
+ for tool in tools:
195
+ tool_name = tool.function.name
196
+ params = tool.function.parameters or {}
197
+ properties = params.get("properties", {})
198
+ required_props = set(params.get("required", []))
199
+
200
+ # Build argument rules for this tool
201
+ arg_rules = []
202
+ for prop_name, prop_schema in properties.items():
203
+ value_rule = EBNFComposer.get_value_rule(prop_schema, function_format)
204
+ # Create key=value pair
205
+ pair = key_value_template.format(key=prop_name, valrule=value_rule)
206
+
207
+ if prop_name not in required_props:
208
+ pair = f"[ {pair} ]"
209
+
210
+ arg_rules.append(pair)
211
+
212
+ # Combine all argument rules
213
+ combined_args = ' "," '.join(arg_rules) if arg_rules else ""
214
+ arguments_rule = args_template.format(arg_rules=combined_args)
215
+
216
+ # Add the function call rule and its arguments rule
217
+ ebnf_lines.append(
218
+ call_template.format(
219
+ name=tool_name, arguments_rule=f"arguments_{tool_name}"
220
+ )
221
+ )
222
+ ebnf_lines.append(f"arguments_{tool_name} ::= {arguments_rule}")
223
+
224
+ # =================================================================
225
+ # Step 5: Add base grammar rules
226
+ # =================================================================
227
+ base_grammar = (
228
+ EBNFComposer.pythonic_grammar_ebnf_str
229
+ if function_format == "pythonic"
230
+ else EBNFComposer.json_grammar_ebnf_str
231
+ )
232
+ ebnf_lines.append(base_grammar)
233
+
234
+ return "\n".join(ebnf_lines)
@@ -0,0 +1,175 @@
1
+ from typing import Any, Dict, List, Literal, Optional, Set, Tuple, Type, Union
2
+
3
+ from sglang.srt.function_call.base_format_detector import BaseFormatDetector
4
+ from sglang.srt.function_call.core_types import ToolCallItem
5
+ from sglang.srt.function_call.deepseekv3_detector import DeepSeekV3Detector
6
+ from sglang.srt.function_call.llama32_detector import Llama32Detector
7
+ from sglang.srt.function_call.mistral_detector import MistralDetector
8
+ from sglang.srt.function_call.pythonic_detector import PythonicDetector
9
+ from sglang.srt.function_call.qwen25_detector import Qwen25Detector
10
+ from sglang.srt.openai_api.protocol import (
11
+ StructuralTagResponseFormat,
12
+ StructuresResponseFormat,
13
+ Tool,
14
+ ToolChoice,
15
+ )
16
+
17
+
18
+ class FunctionCallParser:
19
+ """
20
+ Parser for function/tool calls in model outputs.
21
+
22
+ This class handles both streaming and non-streaming parsing of function calls using a detector.
23
+ In streaming scenarios, each time new_text is received, it calls detector.parse_streaming_increment
24
+ and returns the resulting normal_text and calls to the upper layer (or SSE).
25
+ """
26
+
27
+ ToolCallParserEnum: Dict[str, Type[BaseFormatDetector]] = {
28
+ "llama3": Llama32Detector,
29
+ "qwen25": Qwen25Detector,
30
+ "mistral": MistralDetector,
31
+ "deepseekv3": DeepSeekV3Detector,
32
+ "pythonic": PythonicDetector,
33
+ }
34
+
35
+ def __init__(self, tools: List[Tool], tool_call_parser: str):
36
+ detector: Type[BaseFormatDetector] = None
37
+ detector_class = self.ToolCallParserEnum.get(tool_call_parser)
38
+ if detector_class:
39
+ detector = detector_class()
40
+ else:
41
+ raise ValueError(f"Unsupported tool_call_parser: {tool_call_parser}")
42
+
43
+ self.detector = detector
44
+ self.tools = tools
45
+
46
+ def has_tool_call(self, text: str) -> bool:
47
+ """
48
+ Check if the given text contains a tool call in the format supported by this parser.
49
+ This delegates to the detector's implementation.
50
+
51
+ Args:
52
+ text: The text to check for tool calls
53
+
54
+ Returns:
55
+ True if the text contains a tool call, False otherwise
56
+ """
57
+ return self.detector.has_tool_call(text)
58
+
59
+ def parse_non_stream(self, full_text: str) -> Tuple[str, list[ToolCallItem]]:
60
+ """
61
+ One-time parsing of the full text to extract tool calls.
62
+
63
+ Args:
64
+ full_text: The complete text to parse
65
+
66
+ Returns:
67
+ A tuple containing:
68
+ - The remaining text after parsing that was not consumed by the detector (can be treated as normal text)
69
+ - A list of tool calls parsed from the text
70
+ """
71
+ parsed_result = self.detector.detect_and_parse(full_text, self.tools)
72
+ tool_call_list = parsed_result.calls
73
+ if tool_call_list:
74
+ return parsed_result.normal_text, tool_call_list
75
+ else:
76
+ return full_text, []
77
+
78
+ def parse_stream_chunk(self, chunk_text: str) -> Tuple[str, list[ToolCallItem]]:
79
+ """
80
+ Streaming incremental parsing of chunks of text as they arrive.
81
+
82
+ Args:
83
+ chunk_text: The new chunk of text to parse
84
+
85
+ Returns:
86
+ A tuple containing:
87
+ - The normal text that should be displayed to the user
88
+ - A list of tool calls parsed from the chunk
89
+ """
90
+ final_normal_text = ""
91
+ final_calls = []
92
+
93
+ sp_result = self.detector.parse_streaming_increment(chunk_text, self.tools)
94
+ if sp_result.normal_text:
95
+ final_normal_text = sp_result.normal_text
96
+ if sp_result.calls:
97
+ final_calls.extend(sp_result.calls)
98
+ final_normal_text = sp_result.normal_text
99
+
100
+ return final_normal_text, final_calls
101
+
102
+ def get_structure_tag(self) -> StructuralTagResponseFormat:
103
+ """
104
+ Generate a structural tag response format for all available tools.
105
+
106
+ This creates the necessary structural tags that guide the model's output format.
107
+ """
108
+ tool_structures: List[StructuresResponseFormat] = list()
109
+ tool_trigger_set: Set[str] = set()
110
+
111
+ get_structure_info = self.detector.structure_info()
112
+ for tool in self.tools:
113
+ function = tool.function
114
+ name = function.name
115
+ assert name is not None
116
+ info = get_structure_info(name)
117
+
118
+ # accept all if not strict, otherwise only accept the schema
119
+ schema = function.parameters if function.strict else {}
120
+
121
+ tool_structures.append(
122
+ StructuresResponseFormat(
123
+ begin=info.begin,
124
+ schema=schema, # type: ignore
125
+ end=info.end,
126
+ )
127
+ )
128
+ tool_trigger_set.add(info.trigger)
129
+
130
+ return StructuralTagResponseFormat(
131
+ type="structural_tag",
132
+ structures=tool_structures,
133
+ triggers=list(tool_trigger_set),
134
+ )
135
+
136
+ def get_structure_constraint(
137
+ self, tool_choice: Union[ToolChoice, Literal["auto", "required"]]
138
+ ) -> Optional[Tuple[str, Any]]:
139
+ """
140
+ Returns the appropriate structure constraint for tool calls based on the tool_choice.
141
+ The constraint is used to guide the model's output format.
142
+
143
+ Args:
144
+ tool_choice: The tool choice setting from the request
145
+
146
+ Returns:
147
+ A tuple of (constraint_type, constraint_value) to be added to sampling parameters,
148
+ or None if no constraint applies.
149
+ """
150
+ # NOTE: structural_tag only supports JSON-compatible content between the begin and end.
151
+ # It cannot parse or validate Python syntax like function calls.
152
+ if (
153
+ not isinstance(self.detector, PythonicDetector)
154
+ and tool_choice == "auto"
155
+ and any(tool.function.strict for tool in self.tools)
156
+ ):
157
+ strict_tag = self.get_structure_tag()
158
+ return ("structural_tag", strict_tag)
159
+ elif tool_choice == "required" or isinstance(tool_choice, ToolChoice):
160
+ ebnf = self.get_ebnf(tool_choice)
161
+ return ("ebnf", ebnf) if ebnf is not None else None
162
+
163
+ def get_ebnf(
164
+ self, tool_choice: Union[ToolChoice, Literal["required"]]
165
+ ) -> Optional[str]:
166
+ """
167
+ Get the EBNF grammar for the specified tool choice.
168
+ """
169
+ filtered_tools = []
170
+ if isinstance(tool_choice, ToolChoice):
171
+ fn_name = tool_choice.function.name
172
+ filtered_tools = [t for t in self.tools if t.function.name == fn_name]
173
+ else:
174
+ filtered_tools = self.tools
175
+ return self.detector.build_ebnf(filtered_tools)
@@ -0,0 +1,74 @@
1
+ import json
2
+ import logging
3
+ from typing import List
4
+
5
+ from sglang.srt.function_call.base_format_detector import BaseFormatDetector
6
+ from sglang.srt.function_call.core_types import (
7
+ StreamingParseResult,
8
+ StructureInfo,
9
+ _GetInfoFunc,
10
+ )
11
+ from sglang.srt.function_call.ebnf_composer import EBNFComposer
12
+ from sglang.srt.openai_api.protocol import Tool
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class Llama32Detector(BaseFormatDetector):
18
+ """
19
+ Detector for Llama 3.2 models.
20
+ Assumes function call format:
21
+ <|python_tag|>{"name":"xxx", "arguments":{...}}
22
+ """
23
+
24
+ def __init__(self):
25
+ super().__init__()
26
+ self.bot_token = "<|python_tag|>"
27
+
28
+ def has_tool_call(self, text: str) -> bool:
29
+ """Check if the text contains a Llama 3.2 format tool call."""
30
+ # depending on the prompt format the Llama model may or may not
31
+ # prefix the output with the <|python_tag|> token
32
+ return "<|python_tag|>" in text or text.startswith("{")
33
+
34
+ def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
35
+ """Parse function calls from text, handling multiple JSON objects."""
36
+ if "<|python_tag|>" not in text and not text.startswith("{"):
37
+ return StreamingParseResult(normal_text=text, calls=[])
38
+
39
+ if "<|python_tag|>" in text:
40
+ normal_text, action_text = text.split("<|python_tag|>")
41
+ else:
42
+ normal_text, action_text = "", text
43
+
44
+ # Split by semicolon and process each part
45
+ json_parts = [part.strip() for part in action_text.split(";") if part.strip()]
46
+ all_actions = []
47
+ for part in json_parts:
48
+ try:
49
+ # Parse each individual JSON object
50
+ action = json.loads(part)
51
+ all_actions.append(action)
52
+ except json.JSONDecodeError as e:
53
+ logger.warning(f"Failed to parse JSON part: {part}")
54
+ logger.warning(f"JSON parse error: {str(e)}")
55
+ continue
56
+ calls = []
57
+ # Only process if we found valid JSON objects
58
+ if all_actions:
59
+ calls = self.parse_base_json(all_actions, tools)
60
+ return StreamingParseResult(normal_text=normal_text, calls=calls)
61
+
62
+ def structure_info(self) -> _GetInfoFunc:
63
+ return lambda name: StructureInfo(
64
+ begin='<|python_tag|>{"name":"' + name + '", "arguments":',
65
+ end="}",
66
+ trigger="<|python_tag|>",
67
+ )
68
+
69
+ def build_ebnf(self, tools: List[Tool]):
70
+ return EBNFComposer.build_ebnf(
71
+ tools,
72
+ function_format="json",
73
+ tool_call_separator=",",
74
+ )
@@ -0,0 +1,84 @@
1
+ import json
2
+ import re
3
+ from typing import List
4
+
5
+ from sglang.srt.function_call.base_format_detector import BaseFormatDetector
6
+ from sglang.srt.function_call.core_types import (
7
+ StreamingParseResult,
8
+ StructureInfo,
9
+ _GetInfoFunc,
10
+ )
11
+ from sglang.srt.function_call.ebnf_composer import EBNFComposer
12
+ from sglang.srt.openai_api.protocol import Tool
13
+
14
+
15
+ class MistralDetector(BaseFormatDetector):
16
+ """
17
+ Detector for Mistral models.
18
+ Assumes function call format:
19
+ [TOOL_CALLS] [{"name":"xxx", "arguments":{...}}]
20
+ """
21
+
22
+ def __init__(self):
23
+ """
24
+ Initializes the detector with necessary state variables.
25
+ """
26
+ super().__init__()
27
+ self.bot_token = "[TOOL_CALLS] ["
28
+ self.eot_token = "]"
29
+ self.tool_call_regex = re.compile(r"\[{.*}\]", re.DOTALL)
30
+
31
+ def has_tool_call(self, text: str) -> bool:
32
+ """Check if the text contains a Mistral format tool call."""
33
+ return self.bot_token in text
34
+
35
+ def _clean_text(self, text: str) -> str:
36
+ """
37
+ clean text to only leave ''[TOOL_CALLS] [{"name": xxx, "arguments": {xxx}}]'
38
+ for example,
39
+ text = '[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"location": "Boston, MA", "unit": "fahrenheit"}}]\n\nToday\'s weather in Boston is :{function call result} (in Fahrenheit)\n\nIf you prefer Celsius, please let me know.'
40
+ return '[TOOL_CALLS] [{"name": "get_current_weather", "arguments": {"location": "Boston, MA", "unit": "fahrenheit"}}]'
41
+ The key pattern is [TOOL_CALLS] [...]
42
+ """
43
+ # TODO: check if Mistral supports multiple tool calls, currently assume only support one tool call
44
+ find_results = re.findall(r"\[TOOL_CALLS\] \[.*?\]", text, re.DOTALL)
45
+ if len(find_results) > 0:
46
+ return find_results[0]
47
+ else:
48
+ return ""
49
+
50
+ def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
51
+ """
52
+ One-time parsing: Detects and parses tool calls in the provided text.
53
+
54
+ :param text: The complete text to parse.
55
+ :param tools: List of available tools.
56
+ :return: ParseResult indicating success or failure, consumed text, leftover text, and parsed calls.
57
+ """
58
+ idx = text.find(self.bot_token)
59
+ normal_text = text[:idx].strip() if idx != -1 else text
60
+ text = self._clean_text(text)
61
+ tool_content = text.replace("[TOOL_CALLS]", "").strip()
62
+ raw_tool_calls = self.tool_call_regex.findall(tool_content)
63
+ calls = []
64
+ if len(raw_tool_calls) > 0:
65
+ raw_tool_call = raw_tool_calls[0]
66
+ function_call_arr = json.loads(raw_tool_call)
67
+ for match_result in function_call_arr:
68
+ calls.extend(self.parse_base_json(match_result, tools))
69
+ return StreamingParseResult(normal_text=normal_text, calls=calls)
70
+
71
+ def structure_info(self) -> _GetInfoFunc:
72
+ return lambda name: StructureInfo(
73
+ begin='[TOOL_CALLS] [{"name":"' + name + '", "arguments":',
74
+ end="}]",
75
+ trigger="[TOOL_CALLS]",
76
+ )
77
+
78
+ def build_ebnf(self, tools: List[Tool]):
79
+ return EBNFComposer.build_ebnf(
80
+ tools,
81
+ bot_token=self.bot_token,
82
+ eot_token=self.eot_token,
83
+ function_format="json",
84
+ )