sglang 0.4.6.post3__py3-none-any.whl → 0.4.6.post5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. sglang/bench_offline_throughput.py +10 -8
  2. sglang/bench_one_batch.py +7 -6
  3. sglang/bench_one_batch_server.py +157 -21
  4. sglang/bench_serving.py +137 -59
  5. sglang/compile_deep_gemm.py +5 -5
  6. sglang/eval/loogle_eval.py +157 -0
  7. sglang/lang/chat_template.py +78 -78
  8. sglang/lang/tracer.py +1 -1
  9. sglang/srt/code_completion_parser.py +1 -1
  10. sglang/srt/configs/deepseekvl2.py +2 -2
  11. sglang/srt/configs/model_config.py +40 -28
  12. sglang/srt/constrained/base_grammar_backend.py +55 -72
  13. sglang/srt/constrained/llguidance_backend.py +25 -21
  14. sglang/srt/constrained/outlines_backend.py +27 -26
  15. sglang/srt/constrained/reasoner_grammar_backend.py +22 -33
  16. sglang/srt/constrained/xgrammar_backend.py +69 -43
  17. sglang/srt/conversation.py +49 -44
  18. sglang/srt/disaggregation/base/conn.py +1 -0
  19. sglang/srt/disaggregation/decode.py +129 -135
  20. sglang/srt/disaggregation/decode_schedule_batch_mixin.py +142 -0
  21. sglang/srt/disaggregation/fake/conn.py +3 -13
  22. sglang/srt/disaggregation/kv_events.py +357 -0
  23. sglang/srt/disaggregation/mini_lb.py +57 -24
  24. sglang/srt/disaggregation/mooncake/conn.py +238 -122
  25. sglang/srt/disaggregation/mooncake/transfer_engine.py +2 -1
  26. sglang/srt/disaggregation/nixl/conn.py +10 -19
  27. sglang/srt/disaggregation/prefill.py +132 -47
  28. sglang/srt/disaggregation/utils.py +123 -6
  29. sglang/srt/distributed/utils.py +3 -3
  30. sglang/srt/entrypoints/EngineBase.py +5 -0
  31. sglang/srt/entrypoints/engine.py +44 -9
  32. sglang/srt/entrypoints/http_server.py +23 -6
  33. sglang/srt/entrypoints/http_server_engine.py +5 -2
  34. sglang/srt/function_call/base_format_detector.py +250 -0
  35. sglang/srt/function_call/core_types.py +34 -0
  36. sglang/srt/function_call/deepseekv3_detector.py +157 -0
  37. sglang/srt/function_call/ebnf_composer.py +234 -0
  38. sglang/srt/function_call/function_call_parser.py +175 -0
  39. sglang/srt/function_call/llama32_detector.py +74 -0
  40. sglang/srt/function_call/mistral_detector.py +84 -0
  41. sglang/srt/function_call/pythonic_detector.py +163 -0
  42. sglang/srt/function_call/qwen25_detector.py +67 -0
  43. sglang/srt/function_call/utils.py +35 -0
  44. sglang/srt/hf_transformers_utils.py +46 -7
  45. sglang/srt/layers/attention/aiter_backend.py +513 -0
  46. sglang/srt/layers/attention/flashattention_backend.py +64 -18
  47. sglang/srt/layers/attention/flashinfer_mla_backend.py +8 -4
  48. sglang/srt/layers/attention/flashmla_backend.py +340 -78
  49. sglang/srt/layers/attention/triton_backend.py +3 -0
  50. sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py +1 -1
  51. sglang/srt/layers/attention/utils.py +6 -4
  52. sglang/srt/layers/attention/vision.py +1 -1
  53. sglang/srt/layers/communicator.py +451 -0
  54. sglang/srt/layers/dp_attention.py +61 -21
  55. sglang/srt/layers/layernorm.py +1 -1
  56. sglang/srt/layers/logits_processor.py +46 -11
  57. sglang/srt/layers/moe/cutlass_moe.py +207 -0
  58. sglang/srt/layers/moe/ep_moe/kernels.py +34 -12
  59. sglang/srt/layers/moe/ep_moe/layer.py +105 -51
  60. sglang/srt/layers/moe/ep_moe/token_dispatcher.py +82 -7
  61. sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +1 -1
  62. sglang/srt/layers/moe/fused_moe_triton/layer.py +14 -0
  63. sglang/srt/layers/moe/topk.py +67 -10
  64. sglang/srt/layers/multimodal.py +70 -0
  65. sglang/srt/layers/quantization/__init__.py +8 -3
  66. sglang/srt/layers/quantization/blockwise_int8.py +2 -2
  67. sglang/srt/layers/quantization/deep_gemm.py +77 -74
  68. sglang/srt/layers/quantization/fp8.py +92 -2
  69. sglang/srt/layers/quantization/fp8_kernel.py +3 -3
  70. sglang/srt/layers/quantization/fp8_utils.py +6 -0
  71. sglang/srt/layers/quantization/gptq.py +298 -6
  72. sglang/srt/layers/quantization/int8_kernel.py +20 -7
  73. sglang/srt/layers/quantization/qoq.py +244 -0
  74. sglang/srt/layers/sampler.py +0 -4
  75. sglang/srt/layers/vocab_parallel_embedding.py +18 -7
  76. sglang/srt/lora/lora_manager.py +2 -4
  77. sglang/srt/lora/mem_pool.py +4 -4
  78. sglang/srt/lora/triton_ops/gate_up_lora_b.py +1 -1
  79. sglang/srt/lora/triton_ops/qkv_lora_b.py +1 -1
  80. sglang/srt/lora/triton_ops/sgemm_lora_a.py +1 -1
  81. sglang/srt/lora/triton_ops/sgemm_lora_b.py +1 -1
  82. sglang/srt/lora/utils.py +1 -1
  83. sglang/srt/managers/data_parallel_controller.py +3 -3
  84. sglang/srt/managers/deepseek_eplb.py +278 -0
  85. sglang/srt/managers/detokenizer_manager.py +21 -8
  86. sglang/srt/managers/eplb_manager.py +55 -0
  87. sglang/srt/managers/expert_distribution.py +704 -56
  88. sglang/srt/managers/expert_location.py +394 -0
  89. sglang/srt/managers/expert_location_dispatch.py +91 -0
  90. sglang/srt/managers/io_struct.py +19 -4
  91. sglang/srt/managers/mm_utils.py +294 -140
  92. sglang/srt/managers/multimodal_processors/base_processor.py +127 -42
  93. sglang/srt/managers/multimodal_processors/deepseek_vl_v2.py +6 -1
  94. sglang/srt/managers/multimodal_processors/gemma3.py +31 -6
  95. sglang/srt/managers/multimodal_processors/internvl.py +14 -5
  96. sglang/srt/managers/multimodal_processors/janus_pro.py +7 -1
  97. sglang/srt/managers/multimodal_processors/kimi_vl.py +7 -6
  98. sglang/srt/managers/multimodal_processors/llava.py +46 -0
  99. sglang/srt/managers/multimodal_processors/minicpm.py +25 -31
  100. sglang/srt/managers/multimodal_processors/mllama4.py +6 -0
  101. sglang/srt/managers/multimodal_processors/pixtral.py +127 -0
  102. sglang/srt/managers/multimodal_processors/qwen_vl.py +58 -16
  103. sglang/srt/managers/schedule_batch.py +122 -42
  104. sglang/srt/managers/schedule_policy.py +1 -5
  105. sglang/srt/managers/scheduler.py +205 -138
  106. sglang/srt/managers/scheduler_output_processor_mixin.py +124 -55
  107. sglang/srt/managers/session_controller.py +1 -1
  108. sglang/srt/managers/tokenizer_manager.py +232 -58
  109. sglang/srt/managers/tp_worker.py +12 -9
  110. sglang/srt/managers/tp_worker_overlap_thread.py +22 -11
  111. sglang/srt/mem_cache/base_prefix_cache.py +3 -0
  112. sglang/srt/mem_cache/chunk_cache.py +3 -1
  113. sglang/srt/mem_cache/hiradix_cache.py +4 -4
  114. sglang/srt/mem_cache/memory_pool.py +76 -52
  115. sglang/srt/mem_cache/multimodal_cache.py +45 -0
  116. sglang/srt/mem_cache/radix_cache.py +58 -5
  117. sglang/srt/metrics/collector.py +314 -39
  118. sglang/srt/mm_utils.py +10 -0
  119. sglang/srt/model_executor/cuda_graph_runner.py +29 -19
  120. sglang/srt/model_executor/expert_location_updater.py +422 -0
  121. sglang/srt/model_executor/forward_batch_info.py +5 -1
  122. sglang/srt/model_executor/model_runner.py +163 -68
  123. sglang/srt/model_loader/loader.py +10 -6
  124. sglang/srt/models/clip.py +5 -1
  125. sglang/srt/models/deepseek_janus_pro.py +2 -2
  126. sglang/srt/models/deepseek_v2.py +308 -351
  127. sglang/srt/models/exaone.py +8 -3
  128. sglang/srt/models/gemma3_mm.py +70 -33
  129. sglang/srt/models/llama.py +2 -0
  130. sglang/srt/models/llama4.py +15 -8
  131. sglang/srt/models/llava.py +258 -7
  132. sglang/srt/models/mimo_mtp.py +220 -0
  133. sglang/srt/models/minicpmo.py +5 -12
  134. sglang/srt/models/mistral.py +71 -1
  135. sglang/srt/models/mixtral.py +98 -34
  136. sglang/srt/models/mllama.py +3 -3
  137. sglang/srt/models/pixtral.py +467 -0
  138. sglang/srt/models/qwen2.py +95 -26
  139. sglang/srt/models/qwen2_5_vl.py +8 -0
  140. sglang/srt/models/qwen2_moe.py +330 -60
  141. sglang/srt/models/qwen2_vl.py +6 -0
  142. sglang/srt/models/qwen3.py +52 -10
  143. sglang/srt/models/qwen3_moe.py +411 -48
  144. sglang/srt/models/roberta.py +1 -1
  145. sglang/srt/models/siglip.py +294 -0
  146. sglang/srt/models/torch_native_llama.py +1 -1
  147. sglang/srt/openai_api/adapter.py +58 -20
  148. sglang/srt/openai_api/protocol.py +6 -8
  149. sglang/srt/operations.py +154 -0
  150. sglang/srt/operations_strategy.py +31 -0
  151. sglang/srt/reasoning_parser.py +3 -3
  152. sglang/srt/sampling/custom_logit_processor.py +18 -3
  153. sglang/srt/sampling/sampling_batch_info.py +4 -56
  154. sglang/srt/sampling/sampling_params.py +2 -2
  155. sglang/srt/server_args.py +162 -22
  156. sglang/srt/speculative/eagle_draft_cuda_graph_runner.py +3 -3
  157. sglang/srt/speculative/eagle_utils.py +138 -7
  158. sglang/srt/speculative/eagle_worker.py +69 -21
  159. sglang/srt/utils.py +74 -17
  160. sglang/test/few_shot_gsm8k.py +2 -2
  161. sglang/test/few_shot_gsm8k_engine.py +2 -2
  162. sglang/test/run_eval.py +2 -2
  163. sglang/test/runners.py +8 -1
  164. sglang/test/send_one.py +13 -3
  165. sglang/test/simple_eval_common.py +1 -1
  166. sglang/test/simple_eval_humaneval.py +1 -1
  167. sglang/test/test_cutlass_moe.py +278 -0
  168. sglang/test/test_programs.py +5 -5
  169. sglang/test/test_utils.py +55 -14
  170. sglang/utils.py +3 -3
  171. sglang/version.py +1 -1
  172. {sglang-0.4.6.post3.dist-info → sglang-0.4.6.post5.dist-info}/METADATA +23 -13
  173. {sglang-0.4.6.post3.dist-info → sglang-0.4.6.post5.dist-info}/RECORD +178 -149
  174. {sglang-0.4.6.post3.dist-info → sglang-0.4.6.post5.dist-info}/WHEEL +1 -1
  175. sglang/srt/function_call_parser.py +0 -858
  176. sglang/srt/platforms/interface.py +0 -371
  177. /sglang/{llama3_eval.py → eval/llama3_eval.py} +0 -0
  178. /sglang/srt/models/{xiaomi_mimo.py → mimo.py} +0 -0
  179. {sglang-0.4.6.post3.dist-info → sglang-0.4.6.post5.dist-info}/licenses/LICENSE +0 -0
  180. {sglang-0.4.6.post3.dist-info → sglang-0.4.6.post5.dist-info}/top_level.txt +0 -0
@@ -47,8 +47,9 @@ from sglang.srt.disaggregation.utils import (
47
47
  register_disaggregation_server,
48
48
  )
49
49
  from sglang.srt.entrypoints.engine import _launch_subprocesses
50
- from sglang.srt.function_call_parser import FunctionCallParser
50
+ from sglang.srt.function_call.function_call_parser import FunctionCallParser
51
51
  from sglang.srt.managers.io_struct import (
52
+ AbortReq,
52
53
  CloseSessionReqInput,
53
54
  ConfigureLoggingReq,
54
55
  EmbeddingReqInput,
@@ -181,13 +182,14 @@ async def health_generate(request: Request) -> Response:
181
182
  async for _ in _global_state.tokenizer_manager.generate_request(gri, request):
182
183
  break
183
184
 
184
- tic = time.time()
185
+ tic = time.perf_counter()
185
186
  task = asyncio.create_task(gen())
186
- while time.time() < tic + HEALTH_CHECK_TIMEOUT:
187
+ while time.perf_counter() < tic + HEALTH_CHECK_TIMEOUT:
187
188
  await asyncio.sleep(1)
188
189
  if _global_state.tokenizer_manager.last_receive_tstamp > tic:
189
190
  task.cancel()
190
191
  _global_state.tokenizer_manager.rid_to_state.pop(rid, None)
192
+ _global_state.tokenizer_manager.health_check_failed = False
191
193
  return Response(status_code=200)
192
194
 
193
195
  task.cancel()
@@ -201,6 +203,7 @@ async def health_generate(request: Request) -> Response:
201
203
  f"last_heartbeat time: {last_receive_time}"
202
204
  )
203
205
  _global_state.tokenizer_manager.rid_to_state.pop(rid, None)
206
+ _global_state.tokenizer_manager.health_check_failed = True
204
207
  return Response(status_code=503)
205
208
 
206
209
 
@@ -221,7 +224,7 @@ async def get_server_info():
221
224
  return {
222
225
  **dataclasses.asdict(_global_state.tokenizer_manager.server_args),
223
226
  **_global_state.scheduler_info,
224
- **internal_states,
227
+ "internal_states": internal_states,
225
228
  "version": __version__,
226
229
  }
227
230
 
@@ -337,7 +340,11 @@ async def start_profile_async(obj: Optional[ProfileReqInput] = None):
337
340
  obj = ProfileReqInput()
338
341
 
339
342
  await _global_state.tokenizer_manager.start_profile(
340
- obj.output_dir, obj.num_steps, obj.activities
343
+ output_dir=obj.output_dir,
344
+ num_steps=obj.num_steps,
345
+ activities=obj.activities,
346
+ with_stack=obj.with_stack,
347
+ record_shapes=obj.record_shapes,
341
348
  )
342
349
  return Response(
343
350
  content="Start profiling.\n",
@@ -348,7 +355,7 @@ async def start_profile_async(obj: Optional[ProfileReqInput] = None):
348
355
  @app.api_route("/stop_profile", methods=["GET", "POST"])
349
356
  async def stop_profile_async():
350
357
  """Stop profiling."""
351
- _global_state.tokenizer_manager.stop_profile()
358
+ await _global_state.tokenizer_manager.stop_profile()
352
359
  return Response(
353
360
  content="Stop profiling. This will take some time.\n",
354
361
  status_code=200,
@@ -539,6 +546,16 @@ async def configure_logging(obj: ConfigureLoggingReq, request: Request):
539
546
  return Response(status_code=200)
540
547
 
541
548
 
549
+ @app.post("/abort_request")
550
+ async def abort_request(obj: AbortReq, request: Request):
551
+ """Abort a request."""
552
+ try:
553
+ _global_state.tokenizer_manager.abort_request(rid=obj.rid)
554
+ return Response(status_code=200)
555
+ except Exception as e:
556
+ return _create_error_response(e)
557
+
558
+
542
559
  @app.post("/parse_function_call")
543
560
  async def parse_function_call_request(obj: ParseFunctionCallReq, request: Request):
544
561
  """
@@ -24,10 +24,10 @@ def launch_server_process(server_args: ServerArgs) -> multiprocessing.Process:
24
24
 
25
25
  base_url = server_args.url()
26
26
  timeout = 300.0 # Increased timeout to 5 minutes for downloading large models
27
- start_time = time.time()
27
+ start_time = time.perf_counter()
28
28
 
29
29
  with requests.Session() as session:
30
- while time.time() - start_time < timeout:
30
+ while time.perf_counter() - start_time < timeout:
31
31
  try:
32
32
  headers = {
33
33
  "Content-Type": "application/json; charset=utf-8",
@@ -140,3 +140,6 @@ class HttpServerEngineAdapter(EngineBase):
140
140
 
141
141
  def resume_memory_occupation(self):
142
142
  return self._make_request("resume_memory_occupation")
143
+
144
+ def flush_cache(self):
145
+ return self._make_request("flush_cache")
@@ -0,0 +1,250 @@
1
+ import json
2
+ import logging
3
+ from abc import ABC, abstractmethod
4
+ from typing import Any, Dict, List
5
+
6
+ from partial_json_parser.core.exceptions import MalformedJSON
7
+ from partial_json_parser.core.options import Allow
8
+
9
+ from sglang.srt.function_call.core_types import (
10
+ StreamingParseResult,
11
+ ToolCallItem,
12
+ _GetInfoFunc,
13
+ )
14
+ from sglang.srt.function_call.utils import (
15
+ _find_common_prefix,
16
+ _is_complete_json,
17
+ _partial_json_loads,
18
+ )
19
+ from sglang.srt.openai_api.protocol import Tool
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class BaseFormatDetector(ABC):
25
+ """Base class providing two sets of interfaces: one-time and streaming incremental."""
26
+
27
+ def __init__(self):
28
+ # initialize properties used for state when parsing tool calls in
29
+ self._buffer = ""
30
+ # streaming mode
31
+ self.prev_tool_call_arr: List[Dict] = []
32
+ self.current_tool_id: int = -1
33
+ self.current_tool_name_sent: bool = False
34
+ self.streamed_args_for_tool: List[str] = (
35
+ []
36
+ ) # map what has been streamed for each tool so far to a list
37
+ self.bot_token = ""
38
+ self.eot_token = ""
39
+
40
+ def parse_base_json(self, action: Any, tools: List[Tool]) -> List[ToolCallItem]:
41
+ tool_indices = {
42
+ tool.function.name: i for i, tool in enumerate(tools) if tool.function.name
43
+ }
44
+ if not isinstance(action, list):
45
+ action = [action]
46
+
47
+ results = []
48
+ for act in action:
49
+ name = act.get("name")
50
+ if name and name in tool_indices:
51
+ results.append(
52
+ ToolCallItem(
53
+ tool_index=tool_indices[name],
54
+ name=name,
55
+ parameters=json.dumps(
56
+ act.get("parameters") or act.get("arguments", {}),
57
+ ensure_ascii=False,
58
+ ),
59
+ )
60
+ )
61
+ else:
62
+ logger.warning(f"Model attempted to call undefined function: {name}")
63
+
64
+ return results
65
+
66
+ @abstractmethod
67
+ def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
68
+ """
69
+ Parses the text in one go. Returns success=True if the format matches, otherwise False.
70
+ Note that leftover_text here represents "content that this parser will not consume further".
71
+ """
72
+ action = json.loads(text)
73
+ return StreamingParseResult(calls=self.parse_base_json(action, tools))
74
+
75
+ def parse_streaming_increment(
76
+ self, new_text: str, tools: List[Tool]
77
+ ) -> StreamingParseResult:
78
+ """
79
+ Streaming incremental parsing with tool validation.
80
+ """
81
+ # Append new text to buffer
82
+ self._buffer += new_text
83
+ current_text = self._buffer
84
+ if not (self.bot_token in current_text or current_text.startswith("{")):
85
+ self._buffer = ""
86
+ if self.eot_token in new_text:
87
+ new_text = new_text.replace(self.eot_token, "")
88
+ return StreamingParseResult(normal_text=new_text)
89
+
90
+ # Build tool indices if not already built
91
+ if not hasattr(self, "_tool_indices"):
92
+ self._tool_indices = {
93
+ tool.function.name: i
94
+ for i, tool in enumerate(tools)
95
+ if tool.function and tool.function.name
96
+ }
97
+
98
+ flags = Allow.ALL if self.current_tool_name_sent else Allow.ALL & ~Allow.STR
99
+ try:
100
+ tool_call_arr = []
101
+ is_complete = []
102
+ try:
103
+ start_idx = (
104
+ len(self.bot_token)
105
+ if current_text.startswith(self.bot_token)
106
+ else 0
107
+ )
108
+ while start_idx < len(current_text):
109
+ (obj, end_idx) = _partial_json_loads(
110
+ current_text[start_idx:], flags
111
+ )
112
+ is_complete.append(
113
+ _is_complete_json(current_text[start_idx : start_idx + end_idx])
114
+ )
115
+ start_idx += end_idx + len("; ")
116
+
117
+ # Validate tool name if present
118
+ if "name" in obj and obj["name"] not in self._tool_indices:
119
+ # Invalid tool name - reset state
120
+ self._buffer = ""
121
+ self.current_tool_id = -1
122
+ self.current_tool_name_sent = False
123
+ if self.streamed_args_for_tool:
124
+ self.streamed_args_for_tool.pop()
125
+ return StreamingParseResult()
126
+
127
+ # Handle parameters/arguments consistency
128
+ if "parameters" in obj:
129
+ assert (
130
+ "arguments" not in obj
131
+ ), "model generated both parameters and arguments"
132
+ obj["arguments"] = obj["parameters"]
133
+ tool_call_arr.append(obj)
134
+
135
+ except MalformedJSON:
136
+ return StreamingParseResult()
137
+
138
+ if len(tool_call_arr) == 0:
139
+ return StreamingParseResult()
140
+
141
+ current_tool_call: Dict = (
142
+ tool_call_arr[self.current_tool_id] if len(tool_call_arr) > 0 else {}
143
+ )
144
+
145
+ # Handle new tool in array
146
+ if len(tool_call_arr) > 0 and len(tool_call_arr) > self.current_tool_id + 1:
147
+ if self.current_tool_id >= 0:
148
+ cur_arguments = current_tool_call.get("arguments")
149
+ if cur_arguments:
150
+ cur_args_json = json.dumps(cur_arguments)
151
+ sent = len(self.streamed_args_for_tool[self.current_tool_id])
152
+ argument_diff = cur_args_json[sent:]
153
+
154
+ res = StreamingParseResult(
155
+ calls=[
156
+ ToolCallItem(
157
+ tool_index=self.current_tool_id,
158
+ name="",
159
+ parameters=argument_diff,
160
+ )
161
+ ],
162
+ )
163
+ self.streamed_args_for_tool[
164
+ self.current_tool_id
165
+ ] += argument_diff
166
+ else:
167
+ res = StreamingParseResult()
168
+ else:
169
+ res = StreamingParseResult()
170
+
171
+ self.current_tool_id = len(tool_call_arr) - 1
172
+ self.current_tool_name_sent = False
173
+ self.streamed_args_for_tool.append("")
174
+ return res
175
+
176
+ # Handle tool name
177
+ elif not self.current_tool_name_sent:
178
+ function_name = current_tool_call.get("name")
179
+ if function_name and function_name in self._tool_indices:
180
+ res = StreamingParseResult(
181
+ calls=[
182
+ ToolCallItem(
183
+ tool_index=self._tool_indices[function_name],
184
+ name=function_name,
185
+ parameters="",
186
+ )
187
+ ],
188
+ )
189
+ self.current_tool_name_sent = True
190
+ else:
191
+ res = StreamingParseResult()
192
+
193
+ # Handle streaming arguments
194
+ else:
195
+ cur_arguments = current_tool_call.get("arguments")
196
+ res = StreamingParseResult()
197
+
198
+ if cur_arguments:
199
+ sent = len(self.streamed_args_for_tool[self.current_tool_id])
200
+ cur_args_json = json.dumps(cur_arguments)
201
+ prev_arguments = self.prev_tool_call_arr[self.current_tool_id].get(
202
+ "arguments"
203
+ )
204
+
205
+ argument_diff = None
206
+ if is_complete[self.current_tool_id]:
207
+ argument_diff = cur_args_json[sent:]
208
+ self._buffer = ""
209
+ self.prev_tool_call_arr[self.current_tool_id].clear()
210
+ self.current_tool_name_sent = False
211
+ self.streamed_args_for_tool[self.current_tool_id] = ""
212
+
213
+ elif prev_arguments:
214
+ prev_args_json = json.dumps(prev_arguments)
215
+ if cur_args_json != prev_args_json:
216
+ prefix = _find_common_prefix(prev_args_json, cur_args_json)
217
+ argument_diff = prefix[sent:]
218
+
219
+ if argument_diff is not None:
220
+ res = StreamingParseResult(
221
+ calls=[
222
+ ToolCallItem(
223
+ tool_index=self.current_tool_id,
224
+ parameters=argument_diff,
225
+ )
226
+ ],
227
+ )
228
+ if not is_complete[self.current_tool_id]:
229
+ self.streamed_args_for_tool[
230
+ self.current_tool_id
231
+ ] += argument_diff
232
+
233
+ self.prev_tool_call_arr = tool_call_arr
234
+ return res
235
+
236
+ except Exception as e:
237
+ logger.error(f"Error in parse_streaming_increment: {e}")
238
+ return StreamingParseResult()
239
+
240
+ @abstractmethod
241
+ def has_tool_call(self, text: str) -> bool:
242
+ raise NotImplementedError()
243
+
244
+ @abstractmethod
245
+ def structure_info(self) -> _GetInfoFunc:
246
+ raise NotImplementedError()
247
+
248
+ @abstractmethod
249
+ def build_ebnf(self, tools: List[Tool]) -> str:
250
+ raise NotImplementedError()
@@ -0,0 +1,34 @@
1
+ from dataclasses import dataclass
2
+ from typing import Callable, List, Optional
3
+
4
+ from pydantic import BaseModel
5
+
6
+
7
+ class ToolCallItem(BaseModel):
8
+ """Simple encapsulation of the parsed ToolCall result for easier usage in streaming contexts."""
9
+
10
+ tool_index: int
11
+ name: Optional[str] = None
12
+ parameters: str # JSON string
13
+
14
+
15
+ class StreamingParseResult(BaseModel):
16
+ """Result of streaming incremental parsing."""
17
+
18
+ normal_text: str = ""
19
+ calls: List[ToolCallItem] = []
20
+
21
+
22
+ @dataclass
23
+ class StructureInfo:
24
+ begin: str
25
+ end: str
26
+ trigger: str
27
+
28
+
29
+ """
30
+ Helper alias of function
31
+ Usually it is a function that takes a name string and returns a StructureInfo object,
32
+ which can be used to construct a structural_tag object
33
+ """
34
+ _GetInfoFunc = Callable[[str], StructureInfo]
@@ -0,0 +1,157 @@
1
+ import json
2
+ import logging
3
+ import re
4
+ from typing import List
5
+
6
+ from sglang.srt.function_call.base_format_detector import BaseFormatDetector
7
+ from sglang.srt.function_call.core_types import (
8
+ StreamingParseResult,
9
+ StructureInfo,
10
+ ToolCallItem,
11
+ _GetInfoFunc,
12
+ )
13
+ from sglang.srt.function_call.ebnf_composer import EBNFComposer
14
+ from sglang.srt.function_call.utils import _is_complete_json
15
+ from sglang.srt.openai_api.protocol import Tool
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class DeepSeekV3Detector(BaseFormatDetector):
21
+ """
22
+ Detector for DeepSeek models.
23
+ Assumes function call format:
24
+ '<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_current_weather\n```json\n{"location": "Tokyo"}\n```<|tool▁call▁end|>\n<|tool▁call▁begin|>function<|tool▁sep|>get_current_weather\n```json\n{"location": "Paris"}\n```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>
25
+ """
26
+
27
+ def __init__(self):
28
+ super().__init__()
29
+ self.bot_token = "<|tool▁calls▁begin|>"
30
+ self.eot_token = "<|tool▁calls▁end|>"
31
+ self.func_call_regex = r"<|tool▁call▁begin|>.*?<|tool▁call▁end|>"
32
+ self.func_detail_regex = r"<|tool▁call▁begin|>(.*)<|tool▁sep|>(.*)\n```json\n(.*)\n```<|tool▁call▁end|>"
33
+ self._last_arguments = ""
34
+
35
+ def has_tool_call(self, text: str) -> bool:
36
+ """Check if the text contains a deepseek format tool call."""
37
+ return self.bot_token in text
38
+
39
+ def detect_and_parse(self, text: str, tools: List[Tool]) -> StreamingParseResult:
40
+ """
41
+ One-time parsing: Detects and parses tool calls in the provided text.
42
+
43
+ :param text: The complete text to parse.
44
+ :param tools: List of available tools.
45
+ :return: ParseResult indicating success or failure, consumed text, leftover text, and parsed calls.
46
+ """
47
+ idx = text.find(self.bot_token)
48
+ normal_text = text[:idx].strip() if idx != -1 else text
49
+ if self.bot_token not in text:
50
+ return StreamingParseResult(normal_text=normal_text, calls=[])
51
+ match_result_list = re.findall(self.func_call_regex, text, re.DOTALL)
52
+ calls = []
53
+ try:
54
+ for match_result in match_result_list:
55
+ # Get function name
56
+ func_detail = re.search(self.func_detail_regex, match_result, re.DOTALL)
57
+ func_name = func_detail.group(2)
58
+ func_args = func_detail.group(3)
59
+ func_args = json.loads(func_args)
60
+ # construct match_result for parse_base_json
61
+ match_result = {"name": func_name, "parameters": func_args}
62
+ calls.extend(self.parse_base_json(match_result, tools))
63
+ return StreamingParseResult(normal_text=normal_text, calls=calls)
64
+ except Exception as e:
65
+ logger.error(f"Error in detect_and_parse: {e}")
66
+ # return the normal text if parsing fails
67
+ return StreamingParseResult(normal_text=text)
68
+
69
+ def parse_streaming_increment(
70
+ self, new_text: str, tools: List[Tool]
71
+ ) -> StreamingParseResult:
72
+ """
73
+ Streaming incremental parsing tool calls for DeepSeekV3 format.
74
+ """
75
+ self._buffer += new_text
76
+ current_text = self._buffer
77
+
78
+ if self.bot_token not in current_text:
79
+ self._buffer = ""
80
+ for e_token in [self.eot_token, "```", "<|tool▁call▁end|>"]:
81
+ if e_token in new_text:
82
+ new_text = new_text.replace(e_token, "")
83
+ return StreamingParseResult(normal_text=new_text)
84
+
85
+ if not hasattr(self, "_tool_indices"):
86
+ self._tool_indices = {
87
+ tool.function.name: i
88
+ for i, tool in enumerate(tools)
89
+ if tool.function and tool.function.name
90
+ }
91
+
92
+ calls: list[ToolCallItem] = []
93
+ try:
94
+ partial_match = re.search(
95
+ pattern=r"<|tool▁call▁begin|>(.*)<|tool▁sep|>(.*)\n```json\n(.*)",
96
+ string=current_text,
97
+ flags=re.DOTALL,
98
+ )
99
+ if partial_match:
100
+ func_name = partial_match.group(2).strip()
101
+ func_args_raw = partial_match.group(3).strip()
102
+
103
+ if not self.current_tool_name_sent:
104
+ calls.append(
105
+ ToolCallItem(
106
+ tool_index=self._tool_indices.get(func_name, 0),
107
+ name=func_name,
108
+ parameters="",
109
+ )
110
+ )
111
+ self.current_tool_name_sent = True
112
+ else:
113
+ argument_diff = (
114
+ func_args_raw[len(self._last_arguments) :]
115
+ if func_args_raw.startswith(self._last_arguments)
116
+ else func_args_raw
117
+ )
118
+
119
+ if argument_diff:
120
+ calls.append(
121
+ ToolCallItem(
122
+ tool_index=self._tool_indices.get(func_name, 0),
123
+ name=None,
124
+ parameters=argument_diff,
125
+ )
126
+ )
127
+ self._last_arguments += argument_diff
128
+
129
+ if _is_complete_json(func_args_raw):
130
+ result = StreamingParseResult(normal_text="", calls=calls)
131
+ self._buffer = ""
132
+ self._last_arguments = ""
133
+ self.current_tool_name_sent = False
134
+ return result
135
+
136
+ return StreamingParseResult(normal_text="", calls=calls)
137
+
138
+ except Exception as e:
139
+ logger.error(f"Error in parse_streaming_increment: {e}")
140
+ return StreamingParseResult(normal_text=current_text)
141
+
142
+ def structure_info(self) -> _GetInfoFunc:
143
+ return lambda name: StructureInfo(
144
+ begin=">" + name + "\n```json\n",
145
+ end="\n```<",
146
+ trigger=">" + name + "\n```json\n",
147
+ )
148
+
149
+ def build_ebnf(self, tools: List[Tool]):
150
+ return EBNFComposer.build_ebnf(
151
+ tools,
152
+ bot_token=self.bot_token,
153
+ eot_token=self.eot_token,
154
+ tool_call_separator="",
155
+ call_rule_fmt='"<|tool▁call▁begin|>function<|tool▁sep|>{name}\\n```json\\n" {arguments_rule} "\\n```<|tool▁call▁end|>"',
156
+ function_format="json",
157
+ )