sglang 0.4.9.post2__py3-none-any.whl → 0.4.9.post4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. sglang/bench_one_batch.py +2 -1
  2. sglang/eval/loogle_eval.py +7 -0
  3. sglang/srt/_custom_ops.py +29 -1
  4. sglang/srt/configs/deepseekvl2.py +11 -2
  5. sglang/srt/configs/internvl.py +3 -0
  6. sglang/srt/configs/janus_pro.py +3 -0
  7. sglang/srt/configs/model_config.py +10 -8
  8. sglang/srt/configs/update_config.py +3 -1
  9. sglang/srt/conversation.py +2 -1
  10. sglang/srt/custom_op.py +5 -2
  11. sglang/srt/disaggregation/common/conn.py +34 -6
  12. sglang/srt/disaggregation/decode.py +9 -1
  13. sglang/srt/disaggregation/mini_lb.py +3 -2
  14. sglang/srt/disaggregation/mooncake/conn.py +93 -76
  15. sglang/srt/disaggregation/mooncake/transfer_engine.py +4 -2
  16. sglang/srt/disaggregation/nixl/conn.py +17 -13
  17. sglang/srt/distributed/device_communicators/custom_all_reduce.py +3 -91
  18. sglang/srt/distributed/device_communicators/custom_all_reduce_utils.py +96 -1
  19. sglang/srt/distributed/device_communicators/quick_all_reduce.py +273 -0
  20. sglang/srt/distributed/device_communicators/shm_broadcast.py +12 -5
  21. sglang/srt/distributed/parallel_state.py +103 -15
  22. sglang/srt/entrypoints/engine.py +31 -33
  23. sglang/srt/entrypoints/http_server.py +20 -32
  24. sglang/srt/entrypoints/openai/protocol.py +3 -3
  25. sglang/srt/entrypoints/openai/serving_chat.py +48 -6
  26. sglang/srt/eplb/expert_location_dispatch.py +1 -1
  27. sglang/srt/function_call/base_format_detector.py +74 -12
  28. sglang/srt/function_call/deepseekv3_detector.py +26 -11
  29. sglang/srt/function_call/ebnf_composer.py +95 -63
  30. sglang/srt/function_call/function_call_parser.py +4 -2
  31. sglang/srt/function_call/kimik2_detector.py +41 -16
  32. sglang/srt/function_call/llama32_detector.py +6 -3
  33. sglang/srt/function_call/mistral_detector.py +11 -3
  34. sglang/srt/function_call/pythonic_detector.py +16 -14
  35. sglang/srt/function_call/qwen25_detector.py +12 -3
  36. sglang/srt/function_call/qwen3_coder_detector.py +151 -0
  37. sglang/srt/hf_transformers_utils.py +0 -1
  38. sglang/srt/layers/activation.py +24 -3
  39. sglang/srt/layers/attention/base_attn_backend.py +3 -1
  40. sglang/srt/layers/attention/flashattention_backend.py +3 -3
  41. sglang/srt/layers/attention/flashinfer_backend.py +40 -1
  42. sglang/srt/layers/communicator.py +12 -12
  43. sglang/srt/layers/dp_attention.py +72 -24
  44. sglang/srt/layers/linear.py +13 -102
  45. sglang/srt/layers/logits_processor.py +34 -24
  46. sglang/srt/layers/moe/ep_moe/kernels.py +4 -2
  47. sglang/srt/layers/moe/ep_moe/layer.py +23 -402
  48. sglang/srt/layers/moe/fused_moe_native.py +7 -47
  49. sglang/srt/layers/moe/fused_moe_triton/__init__.py +4 -4
  50. sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=160,N=320,device_name=NVIDIA_H20-3e.json +146 -0
  51. sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=384,N=128,device_name=NVIDIA_H20,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  52. sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=384,N=128,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  53. sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=384,N=256,device_name=NVIDIA_H20-3e,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  54. sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=385,N=128,device_name=NVIDIA_B200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  55. sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_3_1/E=385,N=128,device_name=NVIDIA_H200,dtype=fp8_w8a8,block_shape=[128, 128].json +146 -0
  56. sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +54 -263
  57. sglang/srt/layers/moe/fused_moe_triton/layer.py +14 -396
  58. sglang/srt/layers/moe/topk.py +190 -23
  59. sglang/srt/layers/quantization/__init__.py +20 -134
  60. sglang/srt/layers/quantization/awq.py +578 -11
  61. sglang/srt/layers/quantization/awq_triton.py +339 -0
  62. sglang/srt/layers/quantization/base_config.py +85 -10
  63. sglang/srt/layers/quantization/blockwise_int8.py +17 -55
  64. sglang/srt/layers/quantization/compressed_tensors/compressed_tensors.py +13 -11
  65. sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py +23 -79
  66. sglang/srt/layers/quantization/fp8.py +273 -62
  67. sglang/srt/layers/quantization/fp8_kernel.py +210 -46
  68. sglang/srt/layers/quantization/fp8_utils.py +2 -2
  69. sglang/srt/layers/quantization/gptq.py +501 -143
  70. sglang/srt/layers/quantization/marlin_utils.py +790 -0
  71. sglang/srt/layers/quantization/modelopt_quant.py +34 -112
  72. sglang/srt/layers/quantization/moe_wna16.py +45 -49
  73. sglang/srt/layers/quantization/petit.py +252 -0
  74. sglang/srt/layers/quantization/petit_utils.py +104 -0
  75. sglang/srt/layers/quantization/qoq.py +7 -6
  76. sglang/srt/layers/quantization/scalar_type.py +352 -0
  77. sglang/srt/layers/quantization/unquant.py +422 -0
  78. sglang/srt/layers/quantization/utils.py +340 -9
  79. sglang/srt/layers/quantization/w4afp8.py +8 -4
  80. sglang/srt/layers/quantization/w8a8_fp8.py +17 -51
  81. sglang/srt/layers/quantization/w8a8_int8.py +51 -115
  82. sglang/srt/layers/radix_attention.py +5 -3
  83. sglang/srt/layers/vocab_parallel_embedding.py +1 -41
  84. sglang/srt/lora/lora.py +0 -4
  85. sglang/srt/lora/lora_manager.py +162 -164
  86. sglang/srt/lora/lora_registry.py +124 -0
  87. sglang/srt/lora/mem_pool.py +83 -35
  88. sglang/srt/lora/utils.py +12 -5
  89. sglang/srt/managers/cache_controller.py +288 -0
  90. sglang/srt/managers/io_struct.py +60 -30
  91. sglang/srt/managers/mm_utils.py +7 -8
  92. sglang/srt/managers/schedule_batch.py +163 -113
  93. sglang/srt/managers/schedule_policy.py +68 -27
  94. sglang/srt/managers/scheduler.py +256 -86
  95. sglang/srt/managers/scheduler_output_processor_mixin.py +22 -4
  96. sglang/srt/managers/tokenizer_manager.py +38 -27
  97. sglang/srt/managers/tp_worker.py +16 -4
  98. sglang/srt/managers/tp_worker_overlap_thread.py +11 -0
  99. sglang/srt/mem_cache/allocator.py +74 -23
  100. sglang/srt/mem_cache/base_prefix_cache.py +14 -2
  101. sglang/srt/mem_cache/chunk_cache.py +5 -2
  102. sglang/srt/mem_cache/hicache_storage.py +168 -0
  103. sglang/srt/mem_cache/hiradix_cache.py +194 -5
  104. sglang/srt/mem_cache/memory_pool.py +16 -1
  105. sglang/srt/mem_cache/memory_pool_host.py +44 -2
  106. sglang/srt/mem_cache/radix_cache.py +26 -0
  107. sglang/srt/mem_cache/swa_radix_cache.py +1025 -0
  108. sglang/srt/metrics/collector.py +9 -0
  109. sglang/srt/model_executor/cuda_graph_runner.py +66 -31
  110. sglang/srt/model_executor/forward_batch_info.py +210 -25
  111. sglang/srt/model_executor/model_runner.py +147 -42
  112. sglang/srt/model_loader/loader.py +7 -1
  113. sglang/srt/model_loader/utils.py +4 -4
  114. sglang/srt/models/clip.py +1 -1
  115. sglang/srt/models/deepseek.py +9 -6
  116. sglang/srt/models/deepseek_janus_pro.py +1 -1
  117. sglang/srt/models/deepseek_v2.py +192 -173
  118. sglang/srt/models/deepseek_vl2.py +5 -5
  119. sglang/srt/models/gemma.py +48 -0
  120. sglang/srt/models/gemma2.py +52 -0
  121. sglang/srt/models/gemma3_causal.py +63 -0
  122. sglang/srt/models/gemma3_mm.py +1 -1
  123. sglang/srt/models/gemma3n_mm.py +2 -4
  124. sglang/srt/models/granitemoe.py +385 -0
  125. sglang/srt/models/grok.py +9 -3
  126. sglang/srt/models/hunyuan.py +63 -16
  127. sglang/srt/models/internvl.py +1 -1
  128. sglang/srt/models/kimi_vl.py +1 -1
  129. sglang/srt/models/llama.py +41 -0
  130. sglang/srt/models/llama4.py +11 -11
  131. sglang/srt/models/llava.py +2 -2
  132. sglang/srt/models/llavavid.py +1 -1
  133. sglang/srt/models/minicpm.py +0 -2
  134. sglang/srt/models/minicpmo.py +3 -7
  135. sglang/srt/models/minicpmv.py +1 -1
  136. sglang/srt/models/mistral.py +1 -1
  137. sglang/srt/models/mixtral.py +9 -2
  138. sglang/srt/models/mllama.py +3 -5
  139. sglang/srt/models/mllama4.py +13 -6
  140. sglang/srt/models/olmoe.py +8 -5
  141. sglang/srt/models/persimmon.py +330 -0
  142. sglang/srt/models/phi.py +321 -0
  143. sglang/srt/models/phi4mm.py +44 -4
  144. sglang/srt/models/phi4mm_audio.py +1260 -0
  145. sglang/srt/models/phi4mm_utils.py +1917 -0
  146. sglang/srt/models/phimoe.py +9 -3
  147. sglang/srt/models/qwen.py +37 -0
  148. sglang/srt/models/qwen2.py +41 -0
  149. sglang/srt/models/qwen2_5_vl.py +4 -4
  150. sglang/srt/models/qwen2_audio.py +1 -1
  151. sglang/srt/models/qwen2_moe.py +53 -9
  152. sglang/srt/models/qwen2_vl.py +4 -4
  153. sglang/srt/models/qwen3.py +65 -1
  154. sglang/srt/models/qwen3_moe.py +57 -24
  155. sglang/srt/models/vila.py +1 -1
  156. sglang/srt/multimodal/processors/base_processor.py +91 -97
  157. sglang/srt/multimodal/processors/clip.py +21 -19
  158. sglang/srt/multimodal/processors/deepseek_vl_v2.py +8 -26
  159. sglang/srt/multimodal/processors/gemma3.py +13 -17
  160. sglang/srt/multimodal/processors/gemma3n.py +19 -23
  161. sglang/srt/multimodal/processors/internvl.py +9 -10
  162. sglang/srt/multimodal/processors/janus_pro.py +12 -27
  163. sglang/srt/multimodal/processors/kimi_vl.py +12 -14
  164. sglang/srt/multimodal/processors/llava.py +4 -2
  165. sglang/srt/multimodal/processors/minicpm.py +35 -44
  166. sglang/srt/multimodal/processors/mlama.py +21 -18
  167. sglang/srt/multimodal/processors/mllama4.py +4 -5
  168. sglang/srt/multimodal/processors/phi4mm.py +63 -39
  169. sglang/srt/multimodal/processors/pixtral.py +14 -35
  170. sglang/srt/multimodal/processors/qwen_audio.py +65 -0
  171. sglang/srt/multimodal/processors/qwen_vl.py +16 -21
  172. sglang/srt/multimodal/processors/vila.py +14 -14
  173. sglang/srt/reasoning_parser.py +46 -4
  174. sglang/srt/sampling/sampling_batch_info.py +6 -5
  175. sglang/srt/sampling/sampling_params.py +8 -1
  176. sglang/srt/server_args.py +454 -270
  177. sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +33 -28
  178. sglang/srt/speculative/eagle_draft_extend_cuda_graph_runner.py +46 -37
  179. sglang/srt/speculative/eagle_utils.py +51 -23
  180. sglang/srt/speculative/eagle_worker.py +59 -44
  181. sglang/srt/two_batch_overlap.py +10 -5
  182. sglang/srt/utils.py +44 -69
  183. sglang/test/runners.py +14 -3
  184. sglang/test/test_activation.py +50 -1
  185. sglang/test/test_block_fp8.py +8 -3
  186. sglang/test/test_block_fp8_ep.py +1 -1
  187. sglang/test/test_custom_ops.py +12 -7
  188. sglang/test/test_cutlass_w4a8_moe.py +1 -3
  189. sglang/test/test_fp4_moe.py +1 -3
  190. sglang/test/test_marlin_moe.py +286 -0
  191. sglang/test/test_marlin_utils.py +171 -0
  192. sglang/test/test_utils.py +35 -0
  193. sglang/version.py +1 -1
  194. {sglang-0.4.9.post2.dist-info → sglang-0.4.9.post4.dist-info}/METADATA +10 -10
  195. {sglang-0.4.9.post2.dist-info → sglang-0.4.9.post4.dist-info}/RECORD +198 -175
  196. sglang/srt/layers/quantization/quant_utils.py +0 -166
  197. sglang/srt/managers/multimodal_processors/qwen_audio.py +0 -94
  198. {sglang-0.4.9.post2.dist-info → sglang-0.4.9.post4.dist-info}/WHEEL +0 -0
  199. {sglang-0.4.9.post2.dist-info → sglang-0.4.9.post4.dist-info}/licenses/LICENSE +0 -0
  200. {sglang-0.4.9.post2.dist-info → sglang-0.4.9.post4.dist-info}/top_level.txt +0 -0
@@ -1,51 +1,73 @@
1
- from typing import Literal, Optional
1
+ from typing import Any, Dict, Literal, Optional
2
2
 
3
3
 
4
4
  class EBNFComposer:
5
5
  # Adapted from https://xgrammar.mlc.ai/docs/how_to/ebnf_guided_generation.html#try-out-via-hf-transformers
6
- json_grammar_ebnf_str = r"""
7
- json ::= basic_array | basic_object
8
- basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object
9
- basic_integer ::= ("0" | "-"? [1-9] [0-9]*) ".0"?
10
- basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)?
6
+ # Shared primitive grammar rules used across all formats
7
+ BASE_PRIMITIVE_GRAMMAR = r"""
11
8
  basic_string ::= (([\"] basic_string_1 [\"]))
12
9
  basic_string_1 ::= "" | [^"\\\x00-\x1F] basic_string_1 | "\\" escape basic_string_1
13
- escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9]
14
- basic_boolean ::= "true" | "false"
15
- basic_null ::= "null"
10
+ escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9]{4}
11
+ basic_integer ::= ("0" | "-"? [1-9] [0-9]*) ".0"?
12
+ basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)?
16
13
  basic_array ::= "[" ("" | ws basic_any (ws "," ws basic_any)*) ws "]"
17
14
  basic_object ::= "{" ("" | ws basic_string ws ":" ws basic_any ( ws "," ws basic_string ws ":" ws basic_any)*) ws "}"
18
15
  ws ::= [ \n\t]*
19
- """
16
+ """
20
17
 
21
- pythonic_grammar_ebnf_str = r"""
18
+ # Format-specific extensions
19
+ json_grammar_ebnf_str = (
20
+ r"""
21
+ json ::= basic_array | basic_object
22
+ basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object
23
+ basic_boolean ::= "true" | "false"
24
+ basic_null ::= "null"
25
+ """
26
+ + BASE_PRIMITIVE_GRAMMAR
27
+ )
28
+
29
+ pythonic_grammar_ebnf_str = (
30
+ r"""
22
31
  pythonic ::= basic_number | basic_string | basic_array | "True" | "False" | "None"
23
32
  basic_any ::= basic_number | basic_string | basic_array | basic_object
24
- basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)?
25
- basic_string ::= (([\"] basic_string_1 [\"]))
26
- basic_string_1 ::= "" | [^"\\\x00-\x1F] basic_string_1 | "\\" escape basic_string_1
27
- escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9]
28
- basic_array ::= "[" ("" | ws basic_any (ws "," ws basic_any)*) ws "]"
29
- basic_object ::= "{" ("" | ws basic_string ws ":" ws basic_any ( ws "," ws basic_string ws ":" ws basic_any)*) ws "}"
30
- ws ::= [ \n\t]*
33
+ basic_boolean ::= "True" | "False"
34
+ basic_null ::= "None"
35
+ """
36
+ + BASE_PRIMITIVE_GRAMMAR
37
+ )
38
+
39
+ xml_grammar_ebnf_str = (
40
+ r"""
41
+ xml ::= xml_element | xml_text
42
+ xml_element ::= basic_string | basic_number | basic_boolean | basic_null | basic_array | basic_object
43
+ xml_text ::= [^<>]*
44
+ basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object
45
+ basic_boolean ::= "true" | "false"
46
+ basic_null ::= "null"
31
47
  """
48
+ + BASE_PRIMITIVE_GRAMMAR
49
+ )
32
50
 
33
51
  CALL_RULE_MAP = {
34
52
  "pythonic": 'call_{name} ::= "{name}" "(" {arguments_rule} ")"',
35
53
  "json": 'call_{name} ::= "{{" "\\"name\\"" ":" "\\"{name}\\"" ", " "\\"arguments\\"" ":" {arguments_rule} "}}"',
54
+ "xml": 'call_{name} ::= "<function={name}>\\n" {arguments_rule} "\\n</function>"',
36
55
  }
37
56
 
38
57
  ARGUMENTS_RULE_MAP = {
39
58
  "pythonic": "{arg_rules}",
40
59
  "json": '"{{" {arg_rules} "}}"',
60
+ "xml": "{arg_rules}",
41
61
  }
42
62
 
43
63
  KEY_VALUE_RULE_MAP = {
44
64
  "pythonic": '"{key}" "=" {valrule}',
45
65
  "json": '"\\"{key}\\"" ":" {valrule}',
66
+ "xml": '"<parameter={key}>\\n" {valrule} "\\n</parameter>"',
46
67
  }
47
68
 
48
- JSON_TYPE_MAPPING = {
69
+ # Base type mapping - most types are the same across formats
70
+ BASE_TYPE_MAPPING = {
49
71
  "string": "basic_string",
50
72
  "number": "basic_number",
51
73
  "integer": "basic_number",
@@ -55,19 +77,20 @@ class EBNFComposer:
55
77
  "object": "basic_object",
56
78
  }
57
79
 
58
- PYTHONIC_TYPE_MAPPING = {
59
- "string": "basic_string",
60
- "number": "basic_number",
61
- "integer": "basic_number",
62
- "boolean": '"True" | "False"',
63
- "null": '"None"',
64
- "array": "basic_array",
65
- "object": "basic_object",
80
+ # Format-specific overrides for types that differ
81
+ FORMAT_TYPE_OVERRIDES = {
82
+ "pythonic": {
83
+ "boolean": '"True" | "False"',
84
+ "null": '"None"',
85
+ },
86
+ "xml": {
87
+ "string": "xml_text",
88
+ },
66
89
  }
67
90
 
68
91
  @staticmethod
69
92
  def get_value_rule(
70
- prop: dict, function_format: Literal["pythonic", "json"] = "json"
93
+ prop: dict, function_format: Literal["pythonic", "json", "xml"] = "json"
71
94
  ) -> str:
72
95
  if "enum" in prop:
73
96
  return EBNFComposer._handle_enum(prop, function_format)
@@ -83,48 +106,46 @@ class EBNFComposer:
83
106
  enum_values = prop["enum"]
84
107
  prop_type = prop.get("type", "string")
85
108
 
86
- # Define formatters for different type/format combinations
87
- formatters = {
88
- ("string", "json"): lambda v: f'"\\"{v}\\""',
89
- ("string", "pythonic"): lambda v: f'"\\"{v}\\""',
90
- ("number", "json"): str,
91
- ("number", "pythonic"): str,
92
- ("integer", "json"): str,
93
- ("integer", "pythonic"): str,
94
- ("boolean", "json"): lambda v: "true" if v else "false",
95
- ("boolean", "pythonic"): lambda v: "True" if v else "False",
96
- }
109
+ def format_enum_val(v: Any) -> str:
110
+ if prop_type == "boolean":
111
+ if function_format == "json" or function_format == "xml":
112
+ return "true" if v else "false"
113
+ elif function_format == "pythonic":
114
+ return "True" if v else "False"
115
+ else:
116
+ return str(v) # fallback
97
117
 
98
- # Get the formatter or default to string handling
99
- formatter = formatters.get(
100
- (prop_type, function_format),
101
- formatters[("string", function_format)], # Default to string handling
102
- )
118
+ if prop_type == "string":
119
+ if function_format == "xml":
120
+ return f'"{v}"'
121
+ else: # json or pythonic
122
+ return f'"\\"{v}\\""' # escape quote-wrapped string
103
123
 
104
- formatted_values = [formatter(value) for value in enum_values]
105
- enum_rule = " | ".join(formatted_values)
124
+ # All other types (number, integer, etc.)
125
+ return str(v)
106
126
 
107
- # Wrap in parentheses if there are multiple values to ensure correct EBNF precedence
108
- if len(formatted_values) > 1:
109
- enum_rule = f"({enum_rule})"
127
+ formatted_values = [format_enum_val(v) for v in enum_values]
128
+ enum_rule = " | ".join(formatted_values)
129
+ return f"({enum_rule})" if len(formatted_values) > 1 else enum_rule
110
130
 
111
- return enum_rule
131
+ @staticmethod
132
+ def get_type_mapping(function_format: str) -> Dict[str, str]:
133
+ """Get the complete type mapping for a given format."""
134
+ mapping = EBNFComposer.BASE_TYPE_MAPPING.copy()
135
+ overrides = EBNFComposer.FORMAT_TYPE_OVERRIDES.get(function_format, {})
136
+ mapping.update({k: v for k, v in overrides.items() if v is not None})
137
+ return mapping
112
138
 
113
139
  @staticmethod
114
140
  def _handle_type(prop: dict, function_format: str) -> str:
115
141
  """Handle type properties using the appropriate type mapping."""
116
142
  prop_type = prop["type"]
117
- type_mapping = (
118
- EBNFComposer.PYTHONIC_TYPE_MAPPING
119
- if function_format == "pythonic"
120
- else EBNFComposer.JSON_TYPE_MAPPING
121
- )
143
+ type_mapping = EBNFComposer.get_type_mapping(function_format)
122
144
 
123
145
  if isinstance(prop_type, list):
124
146
  type_rules = [
125
- type_mapping[single_type]
147
+ type_mapping.get(single_type, function_format)
126
148
  for single_type in prop_type
127
- if single_type in type_mapping
128
149
  ]
129
150
  return " | ".join(type_rules) if type_rules else function_format
130
151
 
@@ -133,7 +154,7 @@ class EBNFComposer:
133
154
  @staticmethod
134
155
  def build_ebnf(
135
156
  tools,
136
- function_format: Literal["pythonic", "json"] = "json",
157
+ function_format: Literal["pythonic", "json", "xml"] = "json",
137
158
  # Parameters for wrapping the entire sequence of tool calls
138
159
  sequence_start_token: Optional[str] = None,
139
160
  sequence_end_token: Optional[str] = None,
@@ -143,6 +164,7 @@ class EBNFComposer:
143
164
  # Parameter for separating multiple tool calls
144
165
  tool_call_separator: Optional[str] = None,
145
166
  call_rule_fmt: Optional[str] = None,
167
+ key_value_rule_fmt: Optional[str] = None,
146
168
  ):
147
169
  """
148
170
  Generalized EBNF builder for all detectors.
@@ -157,6 +179,9 @@ class EBNFComposer:
157
179
  call_rule_fmt: Optional custom format string for call_{name} rule. It should define each function call's format, with
158
180
  the placeholders {name} for the function name and {arguments_rule} for the arguments rule. If None, a default
159
181
  format based on function_format will be used.
182
+ key_value_rule_fmt: Optional custom format string for key-value pairs. It should define how each parameter is formatted,
183
+ with placeholders {key} for the parameter name and {valrule} for the value rule. If None, a default format
184
+ based on function_format will be used.
160
185
  """
161
186
  # =================================================================
162
187
  # Step 1: Determine the root tool calls rule
@@ -200,7 +225,11 @@ class EBNFComposer:
200
225
  else EBNFComposer.CALL_RULE_MAP[function_format]
201
226
  )
202
227
  args_template = EBNFComposer.ARGUMENTS_RULE_MAP[function_format]
203
- key_value_template = EBNFComposer.KEY_VALUE_RULE_MAP[function_format]
228
+ key_value_template = (
229
+ key_value_rule_fmt
230
+ if key_value_rule_fmt
231
+ else EBNFComposer.KEY_VALUE_RULE_MAP[function_format]
232
+ )
204
233
 
205
234
  # =================================================================
206
235
  # Step 4: Build rules for each tool
@@ -292,10 +321,13 @@ class EBNFComposer:
292
321
  # =================================================================
293
322
  # Step 5: Add base grammar rules
294
323
  # =================================================================
295
- base_grammar = (
296
- EBNFComposer.pythonic_grammar_ebnf_str
297
- if function_format == "pythonic"
298
- else EBNFComposer.json_grammar_ebnf_str
324
+ grammar_dict = {
325
+ "pythonic": EBNFComposer.pythonic_grammar_ebnf_str,
326
+ "json": EBNFComposer.json_grammar_ebnf_str,
327
+ "xml": EBNFComposer.xml_grammar_ebnf_str,
328
+ }
329
+ base_grammar = grammar_dict.get(
330
+ function_format, EBNFComposer.json_grammar_ebnf_str
299
331
  )
300
332
  ebnf_lines.append(base_grammar)
301
333
 
@@ -14,6 +14,7 @@ from sglang.srt.function_call.kimik2_detector import KimiK2Detector
14
14
  from sglang.srt.function_call.llama32_detector import Llama32Detector
15
15
  from sglang.srt.function_call.mistral_detector import MistralDetector
16
16
  from sglang.srt.function_call.pythonic_detector import PythonicDetector
17
+ from sglang.srt.function_call.qwen3_coder_detector import Qwen3CoderDetector
17
18
  from sglang.srt.function_call.qwen25_detector import Qwen25Detector
18
19
 
19
20
  logger = logging.getLogger(__name__)
@@ -35,6 +36,7 @@ class FunctionCallParser:
35
36
  "deepseekv3": DeepSeekV3Detector,
36
37
  "pythonic": PythonicDetector,
37
38
  "kimi_k2": KimiK2Detector,
39
+ "qwen3_coder": Qwen3CoderDetector,
38
40
  }
39
41
 
40
42
  def __init__(self, tools: List[Tool], tool_call_parser: str):
@@ -153,9 +155,9 @@ class FunctionCallParser:
153
155
  or None if no constraint applies.
154
156
  """
155
157
  # NOTE: structural_tag only supports JSON-compatible content between the begin and end.
156
- # It cannot parse or validate Python syntax like function calls.
158
+ # It cannot parse or validate function call Pythonic or XML-ish syntax.
157
159
  if (
158
- not isinstance(self.detector, PythonicDetector)
160
+ self.detector.supports_structural_tag()
159
161
  and tool_choice == "auto"
160
162
  and any(tool.function.strict for tool in self.tools)
161
163
  ):
@@ -18,16 +18,21 @@ logger = logging.getLogger(__name__)
18
18
 
19
19
 
20
20
  class KimiK2Detector(BaseFormatDetector):
21
+ """
22
+ Detector for Kimi K2 model function call format.
23
+
24
+ Format Structure:
25
+ ```
26
+ <|tool_calls_section_begin|>
27
+ <|tool_call_begin|>functions.{func_name}:{index} <|tool_call_argument_begin|>{json_args}<|tool_call_end|>
28
+ <|tool_calls_section_end|>
29
+ ```
30
+
31
+ Reference: https://huggingface.co/moonshotai/Kimi-K2-Instruct/blob/main/docs/tool_call_guidance.md
32
+ """
21
33
 
22
34
  def __init__(self):
23
35
  super().__init__()
24
- self._buffer = ""
25
- self.current_tool_name_sent: bool = False
26
- self.prev_tool_call_arr: list[dict] = []
27
- self.current_tool_id: int = -1
28
- self.streamed_args_for_tool: list[str] = (
29
- []
30
- ) # map what has been streamed for each tool so far to a list
31
36
 
32
37
  self.bot_token: str = "<|tool_calls_section_begin|>"
33
38
  self.eot_token: str = "<|tool_calls_section_end|>"
@@ -114,11 +119,7 @@ class KimiK2Detector(BaseFormatDetector):
114
119
  return StreamingParseResult(normal_text=new_text)
115
120
 
116
121
  if not hasattr(self, "_tool_indices"):
117
- self._tool_indices = {
118
- tool.function.name: i
119
- for i, tool in enumerate(tools)
120
- if tool.function and tool.function.name
121
- }
122
+ self._tool_indices = self._get_tool_indices(tools)
122
123
 
123
124
  calls: list[ToolCallItem] = []
124
125
  try:
@@ -150,7 +151,7 @@ class KimiK2Detector(BaseFormatDetector):
150
151
  )
151
152
  )
152
153
  self.current_tool_name_sent = True
153
- # Store the tool call info for adapter.py
154
+ # Store the tool call info for serving layer completions endpoint
154
155
  self.prev_tool_call_arr[self.current_tool_id] = {
155
156
  "name": function_name,
156
157
  "arguments": {},
@@ -214,7 +215,31 @@ class KimiK2Detector(BaseFormatDetector):
214
215
  return StreamingParseResult(normal_text=current_text)
215
216
 
216
217
  def structure_info(self) -> _GetInfoFunc:
217
- raise NotImplementedError()
218
+ """Return function that creates StructureInfo for guided generation."""
219
+
220
+ def get_info(name: str) -> StructureInfo:
221
+ return StructureInfo(
222
+ begin=f"<|tool_calls_section_begin|><|tool_call_begin|>functions.{name}:0 <|tool_call_argument_begin|>",
223
+ end="<|tool_call_end|><|tool_calls_section_end|>",
224
+ trigger="<|tool_calls_section_begin|>",
225
+ )
226
+
227
+ return get_info
218
228
 
219
- def build_ebnf(self, tools: List[Tool]):
220
- raise NotImplementedError()
229
+ def build_ebnf(self, tools: List[Tool]) -> str:
230
+ """
231
+ Build EBNF grammar for KimiK2 tool call format.
232
+
233
+ NOTE: The call_rule_fmt uses [0-9]+ for the function index to allow the grammar
234
+ to accept any numeric index (0, 1, 2, etc.) for proper sequential indexing in
235
+ multiple function call scenarios, while still maintaining the correct KimiK2
236
+ format structure for constrained generation.
237
+ """
238
+ return EBNFComposer.build_ebnf(
239
+ tools,
240
+ sequence_start_token=self.bot_token,
241
+ sequence_end_token=self.eot_token,
242
+ tool_call_separator="",
243
+ call_rule_fmt='"<|tool_call_begin|>functions.{name}:" [0-9]+ " <|tool_call_argument_begin|>" {arguments_rule} "<|tool_call_end|>"',
244
+ function_format="json",
245
+ )
@@ -16,9 +16,12 @@ logger = logging.getLogger(__name__)
16
16
 
17
17
  class Llama32Detector(BaseFormatDetector):
18
18
  """
19
- Detector for Llama 3.2 models.
20
- Assumes function call format:
21
- <|python_tag|>{"name":"xxx", "arguments":{...}}
19
+ Detector for Llama 3.2 models with json tool call format.
20
+
21
+ Format Structure:
22
+ ```
23
+ <python_tag>{"name":"xxx", "arguments":{...}}
24
+ ```
22
25
  """
23
26
 
24
27
  def __init__(self):
@@ -17,9 +17,17 @@ logger = logging.getLogger(__name__)
17
17
 
18
18
  class MistralDetector(BaseFormatDetector):
19
19
  """
20
- Detector for Mistral models.
21
- Assumes function call format:
22
- [TOOL_CALLS] [{"name":"func1", "arguments":{...}}, {"name":"func2", "arguments":{...}}]
20
+ Detector for Mistral model function call format.
21
+
22
+ The Mistral format uses a simple bracket-delimited structure with JSON arrays
23
+ containing function call objects.
24
+
25
+ Format Structure:
26
+ ```
27
+ [TOOL_CALLS] [{"name": "function_name", "arguments": {json_args}}, ...]
28
+ ```
29
+
30
+ Reference: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3?chat_template=default
23
31
  """
24
32
 
25
33
  def __init__(self):
@@ -8,7 +8,6 @@ from sglang.srt.entrypoints.openai.protocol import Tool
8
8
  from sglang.srt.function_call.base_format_detector import BaseFormatDetector
9
9
  from sglang.srt.function_call.core_types import (
10
10
  StreamingParseResult,
11
- StructureInfo,
12
11
  ToolCallItem,
13
12
  _GetInfoFunc,
14
13
  )
@@ -19,10 +18,17 @@ logger = logging.getLogger(__name__)
19
18
 
20
19
  class PythonicDetector(BaseFormatDetector):
21
20
  """
22
- Detector for Llama-3.2 and Llama-4 models with pythonic tool call format.
23
- Assumes function call format:
24
- [tool1(arg1=val1, arg2=val2), tool2(arg1=val3)]
25
- Arguments are Python literals (not JSON).
21
+ Detector for Llama-4 models with Pythonic tool call format.
22
+
23
+ The Pythonic format uses Python function call syntax within square brackets,
24
+ with arguments as Python literals rather than JSON.
25
+
26
+ Format Structure:
27
+ ```
28
+ [tool1(arg1=val1, arg2=val2), tool2(arg1=val3)]
29
+ ```
30
+
31
+ Reference: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct?chat_template=default
26
32
  """
27
33
 
28
34
  def __init__(self):
@@ -75,11 +81,7 @@ class PythonicDetector(BaseFormatDetector):
75
81
  return StreamingParseResult(normal_text=normal_text, calls=[])
76
82
 
77
83
  calls = []
78
- tool_indices = {
79
- tool.function.name: i
80
- for i, tool in enumerate(tools)
81
- if tool.function.name
82
- }
84
+ tool_indices = self._get_tool_indices(tools)
83
85
  for call_index, call in enumerate(parsed.elts):
84
86
  if not isinstance(call.func, ast.Name):
85
87
  continue
@@ -213,11 +215,11 @@ class PythonicDetector(BaseFormatDetector):
213
215
  else:
214
216
  raise ValueError("Tool call arguments must be literals")
215
217
 
216
- def structure_info(self) -> _GetInfoFunc:
217
- def info(name: str):
218
- return StructureInfo(begin=f"[{name}(", end=")]", trigger=f"[{name}(")
218
+ def supports_structural_tag(self) -> bool:
219
+ return False
219
220
 
220
- return info
221
+ def structure_info(self) -> _GetInfoFunc:
222
+ raise NotImplementedError
221
223
 
222
224
  def build_ebnf(self, tools: List[Tool]) -> Optional[str]:
223
225
  return EBNFComposer.build_ebnf(
@@ -17,9 +17,18 @@ logger = logging.getLogger(__name__)
17
17
 
18
18
  class Qwen25Detector(BaseFormatDetector):
19
19
  """
20
- Detector for Qwen 2.5 models.
21
- Assumes function call format:
22
- <tool_call>\n{"name":"func1", "arguments":{...}}\n</tool_call>\n<tool_call>\n{"name":"func2", "arguments":{...}}\n</tool_call>
20
+ Detector for Qwen 2.5 and Qwen 3 model function call format.
21
+
22
+ Format Structure:
23
+ ```
24
+ <tool_call>\n{"name":"func1", "arguments":{...}}\n</tool_call>\n<tool_call>\n{"name":"func2", "arguments":{...}}\n</tool_call>
25
+ ```
26
+
27
+ Key Components:
28
+ - Tool Call Tags: `<tool_call>` and `</tool_call>` wrap each individual call
29
+ - Function Call Object: JSON object with "name" and "arguments" fields
30
+
31
+ Reference: https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct?chat_template=default
23
32
  """
24
33
 
25
34
  def __init__(self):
@@ -0,0 +1,151 @@
1
+ import ast
2
+ import html
3
+ import json
4
+ import logging
5
+ import re
6
+ from typing import Any, Dict, List, Tuple
7
+
8
+ from sglang.srt.entrypoints.openai.protocol import Tool
9
+ from sglang.srt.function_call.base_format_detector import BaseFormatDetector
10
+ from sglang.srt.function_call.core_types import (
11
+ StreamingParseResult,
12
+ ToolCallItem,
13
+ _GetInfoFunc,
14
+ )
15
+ from sglang.srt.function_call.ebnf_composer import EBNFComposer
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ def _safe_val(raw: str) -> Any:
21
+ raw = html.unescape(raw.strip())
22
+ try:
23
+ return json.loads(raw)
24
+ except Exception:
25
+ try:
26
+ return ast.literal_eval(raw)
27
+ except Exception:
28
+ return raw
29
+
30
+
31
+ class Qwen3CoderDetector(BaseFormatDetector):
32
+ """
33
+ Detector for Qwen 3 models.
34
+ Assumes function call format:
35
+ <tool_call>
36
+ <function=execute_bash>
37
+ <parameter=command>
38
+ pwd && ls
39
+ </parameter>
40
+ </function>
41
+ </tool_call>
42
+ """
43
+
44
+ def __init__(self):
45
+ super().__init__()
46
+ self.tool_call_start_token: str = "<tool_call>"
47
+ self.tool_call_end_token: str = "</tool_call>"
48
+ self.tool_call_prefix: str = "<function="
49
+ self.tool_call_regex = re.compile(
50
+ r"<tool_call>(.*?)</tool_call>|<tool_call>(.*?)$", re.DOTALL
51
+ )
52
+ self.tool_call_function_regex = re.compile(
53
+ r"<function=(.*?)</function>|<function=(.*)$", re.DOTALL
54
+ )
55
+ self.tool_call_parameter_regex = re.compile(
56
+ r"<parameter=(.*?)</parameter>|<parameter=(.*?)$", re.DOTALL
57
+ )
58
+ self._buf: str = ""
59
+
60
+ def has_tool_call(self, text: str) -> bool:
61
+ return self.tool_call_start_token in text
62
+
63
+ def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
64
+ normal, calls = self._extract(text, tools)
65
+ return StreamingParseResult(normal_text=normal, calls=calls)
66
+
67
+ def parse_streaming_increment(
68
+ self, new_text: str, tools: List[Tool]
69
+ ) -> StreamingParseResult:
70
+ self._buf += new_text
71
+ normal = ""
72
+ calls: List[ToolCallItem] = []
73
+ while True:
74
+ if self.tool_call_start_token not in self._buf:
75
+ normal += self._buf
76
+ self._buf = ""
77
+ break
78
+ s = self._buf.find(self.tool_call_start_token)
79
+ if s > 0:
80
+ normal += self._buf[:s]
81
+ self._buf = self._buf[s:]
82
+ e = self._buf.find(self.tool_call_end_token)
83
+ if e == -1:
84
+ break
85
+ block = self._buf[: e + len(self.tool_call_end_token)]
86
+ self._buf = self._buf[e + len(self.tool_call_end_token) :]
87
+ calls.extend(self._parse_block(block, tools))
88
+ return StreamingParseResult(normal_text=normal, calls=calls)
89
+
90
+ def _extract(self, text: str, tools: List[Tool]) -> Tuple[str, List[ToolCallItem]]:
91
+ normal_parts: List[str] = []
92
+ calls: List[ToolCallItem] = []
93
+ cursor = 0
94
+ while True:
95
+ s = text.find(self.tool_call_start_token, cursor)
96
+ if s == -1:
97
+ normal_parts.append(text[cursor:])
98
+ break
99
+ normal_parts.append(text[cursor:s])
100
+ e = text.find(self.tool_call_end_token, s)
101
+ if e == -1:
102
+ normal_parts.append(text[s:])
103
+ break
104
+ block = text[s : e + len(self.tool_call_end_token)]
105
+ cursor = e + len(self.tool_call_end_token)
106
+ calls.extend(self._parse_block(block, tools))
107
+ return "".join(normal_parts), calls
108
+
109
+ def _parse_block(self, block: str, tools: List[Tool]) -> List[ToolCallItem]:
110
+ res: List[ToolCallItem] = []
111
+ for m in self.tool_call_function_regex.findall(block):
112
+ txt = m[0] if m[0] else m[1]
113
+ if ">" not in txt:
114
+ continue
115
+ idx = txt.index(">")
116
+ fname = txt[:idx].strip()
117
+ body = txt[idx + 1 :]
118
+ params: Dict[str, Any] = {}
119
+ for pm in self.tool_call_parameter_regex.findall(body):
120
+ ptxt = pm[0] if pm[0] else pm[1]
121
+ if ">" not in ptxt:
122
+ continue
123
+ pidx = ptxt.index(">")
124
+ pname = ptxt[:pidx].strip()
125
+ pval = ptxt[pidx + 1 :].lstrip("\n").rstrip("\n")
126
+ params[pname] = _safe_val(pval)
127
+ raw = {"name": fname, "arguments": params}
128
+ try:
129
+ # TODO: fix idx in function call, the index for a function
130
+ # call will always be -1 in parse_base_json
131
+ res.extend(self.parse_base_json(raw, tools))
132
+ except Exception:
133
+ logger.warning("invalid tool call for %s dropped", fname)
134
+ return res
135
+
136
+ def supports_structural_tag(self) -> bool:
137
+ return False
138
+
139
+ def structure_info(self) -> _GetInfoFunc:
140
+ raise NotImplementedError
141
+
142
+ def build_ebnf(self, tools: List[Tool]):
143
+ return EBNFComposer.build_ebnf(
144
+ tools,
145
+ individual_call_start_token=self.tool_call_start_token.replace("\n", "\\n"),
146
+ individual_call_end_token=self.tool_call_end_token.replace("\n", "\\n"),
147
+ tool_call_separator="\\n",
148
+ function_format="xml",
149
+ call_rule_fmt='"<function={name}>\\n" {arguments_rule} "\\n</function>"',
150
+ key_value_rule_fmt='"<parameter={key}>\\n" {valrule} "\\n</parameter>"',
151
+ )
@@ -167,7 +167,6 @@ def get_generation_config(
167
167
  model, trust_remote_code=trust_remote_code, revision=revision, **kwargs
168
168
  )
169
169
  except OSError as e:
170
- logging.info("model doesn't have generation_config.json")
171
170
  return None
172
171
 
173
172