sglang 0.5.1.post2__py3-none-any.whl → 0.5.2rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sglang/bench_one_batch.py +3 -0
- sglang/bench_one_batch_server.py +79 -53
- sglang/bench_serving.py +186 -14
- sglang/profiler.py +0 -1
- sglang/srt/configs/__init__.py +2 -0
- sglang/srt/configs/longcat_flash.py +104 -0
- sglang/srt/configs/model_config.py +12 -0
- sglang/srt/connector/__init__.py +1 -1
- sglang/srt/connector/base_connector.py +1 -2
- sglang/srt/connector/redis.py +2 -2
- sglang/srt/connector/serde/__init__.py +1 -1
- sglang/srt/connector/serde/safe_serde.py +4 -3
- sglang/srt/conversation.py +38 -5
- sglang/srt/disaggregation/ascend/conn.py +75 -0
- sglang/srt/disaggregation/launch_lb.py +0 -13
- sglang/srt/disaggregation/mini_lb.py +33 -8
- sglang/srt/disaggregation/prefill.py +1 -1
- sglang/srt/distributed/parallel_state.py +24 -14
- sglang/srt/entrypoints/engine.py +19 -12
- sglang/srt/entrypoints/http_server.py +174 -34
- sglang/srt/entrypoints/openai/protocol.py +87 -24
- sglang/srt/entrypoints/openai/serving_chat.py +50 -9
- sglang/srt/entrypoints/openai/serving_completions.py +15 -0
- sglang/srt/eplb/eplb_manager.py +26 -2
- sglang/srt/eplb/expert_distribution.py +29 -2
- sglang/srt/function_call/deepseekv31_detector.py +222 -0
- sglang/srt/function_call/function_call_parser.py +2 -0
- sglang/srt/function_call/gpt_oss_detector.py +144 -256
- sglang/srt/harmony_parser.py +588 -0
- sglang/srt/hf_transformers_utils.py +26 -7
- sglang/srt/layers/activation.py +12 -0
- sglang/srt/layers/attention/ascend_backend.py +374 -136
- sglang/srt/layers/attention/flashattention_backend.py +241 -7
- sglang/srt/layers/attention/flashinfer_backend.py +5 -2
- sglang/srt/layers/attention/flashinfer_mla_backend.py +5 -2
- sglang/srt/layers/attention/hybrid_attn_backend.py +53 -21
- sglang/srt/layers/attention/trtllm_mla_backend.py +25 -10
- sglang/srt/layers/communicator.py +1 -2
- sglang/srt/layers/layernorm.py +28 -3
- sglang/srt/layers/linear.py +3 -2
- sglang/srt/layers/logits_processor.py +1 -1
- sglang/srt/layers/moe/cutlass_moe.py +0 -8
- sglang/srt/layers/moe/ep_moe/kernels.py +74 -0
- sglang/srt/layers/moe/ep_moe/layer.py +13 -13
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=129,N=352,device_name=NVIDIA_B200,dtype=fp8_w8a8.json +146 -0
- sglang/srt/layers/moe/fused_moe_triton/configs/triton_3_4_0/E=257,N=64,device_name=NVIDIA_A100-SXM4-80GB.json +146 -0
- sglang/srt/layers/moe/topk.py +35 -12
- sglang/srt/layers/quantization/deep_gemm_wrapper/compile_utils.py +133 -235
- sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py +5 -10
- sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py +5 -23
- sglang/srt/layers/quantization/fp8.py +2 -1
- sglang/srt/layers/quantization/fp8_kernel.py +2 -2
- sglang/srt/layers/quantization/fp8_utils.py +2 -2
- sglang/srt/layers/quantization/modelopt_quant.py +7 -0
- sglang/srt/layers/quantization/mxfp4.py +25 -27
- sglang/srt/layers/quantization/mxfp4_tensor.py +3 -1
- sglang/srt/layers/quantization/utils.py +13 -0
- sglang/srt/layers/quantization/w8a8_int8.py +7 -3
- sglang/srt/layers/rotary_embedding.py +28 -1
- sglang/srt/layers/sampler.py +29 -5
- sglang/srt/layers/utils.py +0 -14
- sglang/srt/managers/cache_controller.py +237 -204
- sglang/srt/managers/detokenizer_manager.py +48 -2
- sglang/srt/managers/io_struct.py +57 -0
- sglang/srt/managers/mm_utils.py +5 -1
- sglang/srt/managers/multi_tokenizer_mixin.py +591 -0
- sglang/srt/managers/scheduler.py +94 -9
- sglang/srt/managers/scheduler_output_processor_mixin.py +20 -18
- sglang/srt/managers/scheduler_update_weights_mixin.py +8 -1
- sglang/srt/managers/tokenizer_manager.py +122 -42
- sglang/srt/mem_cache/chunk_cache.py +1 -1
- sglang/srt/mem_cache/hicache_storage.py +51 -23
- sglang/srt/mem_cache/hiradix_cache.py +87 -71
- sglang/srt/mem_cache/lora_radix_cache.py +1 -1
- sglang/srt/mem_cache/memory_pool.py +77 -14
- sglang/srt/mem_cache/memory_pool_host.py +4 -5
- sglang/srt/mem_cache/radix_cache.py +6 -4
- sglang/srt/mem_cache/radix_cache_cpp.py +1 -1
- sglang/srt/mem_cache/storage/hf3fs/storage_hf3fs.py +38 -20
- sglang/srt/mem_cache/storage/mooncake_store/mooncake_store.py +87 -82
- sglang/srt/mem_cache/swa_radix_cache.py +1 -1
- sglang/srt/model_executor/model_runner.py +6 -5
- sglang/srt/model_loader/loader.py +15 -24
- sglang/srt/model_loader/utils.py +12 -0
- sglang/srt/models/deepseek_v2.py +38 -13
- sglang/srt/models/gpt_oss.py +2 -15
- sglang/srt/models/llama_eagle3.py +4 -0
- sglang/srt/models/longcat_flash.py +1015 -0
- sglang/srt/models/longcat_flash_nextn.py +691 -0
- sglang/srt/models/qwen2.py +26 -3
- sglang/srt/models/qwen2_5_vl.py +66 -41
- sglang/srt/models/qwen2_moe.py +22 -2
- sglang/srt/models/transformers.py +1 -1
- sglang/srt/multimodal/processors/base_processor.py +4 -2
- sglang/srt/reasoning_parser.py +56 -300
- sglang/srt/sampling/penaltylib/orchestrator.py +14 -2
- sglang/srt/server_args.py +122 -56
- sglang/srt/speculative/eagle_worker.py +28 -8
- sglang/srt/tokenizer/tiktoken_tokenizer.py +6 -1
- sglang/srt/utils.py +73 -5
- sglang/test/attention/test_trtllm_mla_backend.py +12 -3
- sglang/version.py +1 -1
- {sglang-0.5.1.post2.dist-info → sglang-0.5.2rc0.dist-info}/METADATA +7 -6
- {sglang-0.5.1.post2.dist-info → sglang-0.5.2rc0.dist-info}/RECORD +107 -99
- {sglang-0.5.1.post2.dist-info → sglang-0.5.2rc0.dist-info}/WHEEL +0 -0
- {sglang-0.5.1.post2.dist-info → sglang-0.5.2rc0.dist-info}/licenses/LICENSE +0 -0
- {sglang-0.5.1.post2.dist-info → sglang-0.5.2rc0.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
|
|
1
1
|
import json
|
2
2
|
import logging
|
3
3
|
import re
|
4
|
-
from typing import List
|
4
|
+
from typing import List, Optional
|
5
5
|
|
6
6
|
from sglang.srt.entrypoints.openai.protocol import Tool
|
7
7
|
from sglang.srt.function_call.base_format_detector import BaseFormatDetector
|
@@ -10,60 +10,31 @@ from sglang.srt.function_call.core_types import (
|
|
10
10
|
ToolCallItem,
|
11
11
|
_GetInfoFunc,
|
12
12
|
)
|
13
|
+
from sglang.srt.harmony_parser import HarmonyParser
|
13
14
|
|
14
15
|
logger = logging.getLogger(__name__)
|
15
16
|
|
16
17
|
|
17
18
|
class GptOssDetector(BaseFormatDetector):
|
18
19
|
"""
|
19
|
-
Detector for T4-style function calls
|
20
|
+
Detector for T4-style function calls using HarmonyParser.
|
20
21
|
|
21
|
-
|
22
|
-
|
23
|
-
2. Commentary with action plan: <|channel|>commentary<|message|>{content}<|end|>
|
24
|
-
|
25
|
-
For parallel function calls, each call is self-contained and starts with its own channel:
|
26
|
-
<|channel|>commentary to=functions.get_weather<|constrain|>json<|message|>{"location":"SF"}<|call|>
|
27
|
-
<|channel|>commentary to=functions.search<|constrain|>json<|message|>{"query":"SF attractions"}<|call|>
|
28
|
-
|
29
|
-
Examples:
|
30
|
-
Single: <|channel|>commentary to=functions.get_weather<|constrain|>json<|message|>{"location":"San Francisco"}<|call|>commentary
|
31
|
-
Multiple: <|channel|>commentary to=functions.get_weather<|constrain|>json<|message|>{"location":"Paris"}<|call|>commentary<|channel|>commentary to=functions.search<|constrain|>json<|message|>{"query":"Paris tourism"}<|call|>
|
32
|
-
With Action Plan: <|channel|>commentary<|message|>**Action plan**: 1. Do X 2. Do Y<|end|><|start|>assistant<|channel|>commentary to=functions.x<|constrain|>json<|message|>{"template": "basic_html", "path": "index.html"}<|call|>
|
22
|
+
Handles tool calls in the format:
|
23
|
+
<|channel|>commentary to={namespace.function}<|constrain|>json<|message|>{args}<|call|>
|
33
24
|
"""
|
34
25
|
|
35
26
|
def __init__(self):
|
36
27
|
super().__init__()
|
28
|
+
self.harmony_parser = HarmonyParser()
|
37
29
|
self.bot_token = "<|start|>assistant<|channel|>commentary"
|
38
30
|
self.eot_token = "<|call|>"
|
39
|
-
# TODO: no clear indication how parallel tool call response format is
|
40
|
-
self.tool_call_separator = ""
|
41
|
-
|
42
|
-
# Pattern for complete function calls with to= parameter
|
43
|
-
# Handles both <|call|> and <|call|>commentary endings
|
44
|
-
# Also handles optional <|start|>assistant prefix and whitespace after function name
|
45
|
-
self.function_call_pattern = re.compile(
|
46
|
-
r"(?:<\|start\|>assistant)?<\|channel\|>commentary to=([a-zA-Z_][a-zA-Z0-9_]*(?:\.[a-zA-Z_][a-zA-Z0-9_]*)*)\s*"
|
47
|
-
r"<\|constrain\|>json<\|message\|>(.*?)<\|call\|>(?:commentary)?",
|
48
|
-
re.DOTALL,
|
49
|
-
)
|
50
|
-
|
51
|
-
# Pattern for streaming function calls (incomplete)
|
52
|
-
# Also handles optional whitespace after function name
|
53
|
-
self.streaming_pattern = re.compile(
|
54
|
-
r"(?:<\|start\|>assistant)?<\|channel\|>commentary to=([a-zA-Z_][a-zA-Z0-9_]*(?:\.[a-zA-Z_][a-zA-Z0-9_]*)*)\s*"
|
55
|
-
r"<\|constrain\|>json<\|message\|>(.*)",
|
56
|
-
re.DOTALL,
|
57
|
-
)
|
58
31
|
|
59
|
-
# Pattern
|
60
|
-
self.
|
61
|
-
r"
|
32
|
+
# Pattern to extract function name and JSON from tool_call event content
|
33
|
+
self.tool_extract_pattern = re.compile(
|
34
|
+
r"to=([a-zA-Z_][a-zA-Z0-9_.]*)\s*<\|constrain\|>json<\|message\|>(.*?)(?:<\|call\|>|$)",
|
62
35
|
re.DOTALL,
|
63
36
|
)
|
64
37
|
|
65
|
-
self._last_arguments = ""
|
66
|
-
|
67
38
|
def has_tool_call(self, text: str) -> bool:
|
68
39
|
"""Check if text contains TypeScript-style function call markers."""
|
69
40
|
return self.bot_token in text
|
@@ -73,259 +44,176 @@ class GptOssDetector(BaseFormatDetector):
|
|
73
44
|
if not self.has_tool_call(text):
|
74
45
|
return StreamingParseResult(normal_text=text, calls=[])
|
75
46
|
|
76
|
-
|
47
|
+
# Parse with HarmonyParser
|
48
|
+
events = self.harmony_parser.parse(text)
|
49
|
+
# Flush buffer for complete parsing
|
50
|
+
events += self.harmony_parser.parse("")
|
77
51
|
|
52
|
+
tool_indices = self._get_tool_indices(tools)
|
78
53
|
calls = []
|
54
|
+
normal_parts = []
|
79
55
|
tool_index = 0
|
80
56
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
)
|
89
|
-
|
90
|
-
# Track processed positions to avoid double-processing
|
91
|
-
processed_ranges = []
|
92
|
-
|
93
|
-
# First, extract all tool calls
|
94
|
-
for match in self.function_call_pattern.finditer(text):
|
95
|
-
full_function_name = match.group(1)
|
96
|
-
args_content = match.group(2)
|
97
|
-
processed_ranges.append((match.start(), match.end()))
|
98
|
-
|
99
|
-
function_name = (
|
100
|
-
full_function_name.split(".")[-1]
|
101
|
-
if "." in full_function_name
|
102
|
-
else full_function_name
|
103
|
-
)
|
104
|
-
|
105
|
-
try:
|
106
|
-
arguments = json.loads(args_content) if args_content.strip() else {}
|
107
|
-
except json.JSONDecodeError:
|
108
|
-
continue
|
109
|
-
|
110
|
-
if function_name in tool_indices:
|
111
|
-
calls.append(
|
112
|
-
ToolCallItem(
|
113
|
-
tool_index=tool_index,
|
114
|
-
name=function_name,
|
115
|
-
parameters=json.dumps(arguments, ensure_ascii=False),
|
116
|
-
)
|
57
|
+
for event in events:
|
58
|
+
if event.event_type == "tool_call":
|
59
|
+
# Extract tool call from event content
|
60
|
+
tool_call = self._extract_tool_call_from_event(
|
61
|
+
event.raw_text if event.raw_text else event.content,
|
62
|
+
tool_indices,
|
63
|
+
tool_index,
|
117
64
|
)
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
is_tool_call = any(
|
125
|
-
start <= match_start < end or start < match_end <= end
|
126
|
-
for start, end in processed_ranges
|
127
|
-
)
|
128
|
-
|
129
|
-
# If this commentary is not part of a tool call, include it in normal text
|
130
|
-
if not is_tool_call:
|
131
|
-
content = match.group(1).strip()
|
132
|
-
if content:
|
133
|
-
normal_text_parts.append(content)
|
134
|
-
|
135
|
-
# Handle remaining text after all matches
|
136
|
-
if processed_ranges:
|
137
|
-
last_match_end = max(end for _, end in processed_ranges)
|
138
|
-
if last_match_end < len(text):
|
139
|
-
remaining_text = text[last_match_end:]
|
140
|
-
|
141
|
-
# Clean up <|start|>assistant prefixes and extract final content
|
142
|
-
# Remove standalone <|start|>assistant prefixes
|
143
|
-
remaining_text = re.sub(r"<\|start\|>assistant(?!\w)", "", remaining_text)
|
144
|
-
|
145
|
-
# Extract content from final channel if present
|
146
|
-
final_pattern = re.compile(
|
147
|
-
r"<\|channel\|>final<\|message\|>(.*?)(?:<\|return\|>|$)", re.DOTALL
|
148
|
-
)
|
149
|
-
final_match = final_pattern.search(remaining_text)
|
150
|
-
|
151
|
-
if final_match:
|
152
|
-
# Get everything before final channel + final channel content
|
153
|
-
before_final = remaining_text[: final_match.start()].strip()
|
154
|
-
final_content = final_match.group(1).strip()
|
65
|
+
if tool_call:
|
66
|
+
calls.append(tool_call)
|
67
|
+
tool_index += 1
|
68
|
+
elif event.event_type == "normal":
|
69
|
+
normal_parts.append(event.content)
|
70
|
+
# Ignore reasoning events in function call context
|
155
71
|
|
156
|
-
|
157
|
-
|
158
|
-
parts.append(before_final)
|
159
|
-
if final_content:
|
160
|
-
parts.append(final_content)
|
161
|
-
remaining_text = " ".join(parts) if parts else ""
|
162
|
-
|
163
|
-
remaining_text = remaining_text.strip()
|
164
|
-
|
165
|
-
if remaining_text:
|
166
|
-
normal_text_parts.append(remaining_text)
|
167
|
-
|
168
|
-
# Combine all normal text parts
|
169
|
-
final_normal_text = " ".join(part for part in normal_text_parts if part).strip()
|
170
|
-
return StreamingParseResult(normal_text=final_normal_text, calls=calls)
|
72
|
+
normal_text = " ".join(normal_parts).strip()
|
73
|
+
return StreamingParseResult(normal_text=normal_text, calls=calls)
|
171
74
|
|
172
75
|
def parse_streaming_increment(
|
173
76
|
self, new_text: str, tools: List[Tool]
|
174
77
|
) -> StreamingParseResult:
|
175
78
|
"""Parse incremental streaming text for TypeScript-style function calls."""
|
176
79
|
self._buffer += new_text
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
if
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
80
|
+
|
81
|
+
# Always use HarmonyParser for parsing to ensure proper filtering
|
82
|
+
events = self.harmony_parser.parse(new_text)
|
83
|
+
|
84
|
+
# Quick check if we might have tool calls
|
85
|
+
if (
|
86
|
+
"<|channel|>commentary to=" not in self._buffer
|
87
|
+
and not self.current_tool_name_sent
|
88
|
+
):
|
89
|
+
# No tool calls detected, check for final content
|
90
|
+
if (
|
91
|
+
"<|channel|>final" in self._buffer
|
92
|
+
or "assistantfinal" in self._buffer.lower()
|
93
|
+
):
|
94
|
+
# Extract normal text from events
|
95
|
+
normal_text = "".join(
|
96
|
+
[e.content for e in events if e.event_type == "normal"]
|
97
|
+
)
|
98
|
+
if normal_text:
|
99
|
+
self._buffer = ""
|
100
|
+
return StreamingParseResult(normal_text=normal_text, calls=[])
|
101
|
+
|
102
|
+
# For other content, extract normal text from events (with filtering applied)
|
103
|
+
normal_text = "".join(
|
104
|
+
[e.content for e in events if e.event_type == "normal"]
|
194
105
|
)
|
195
|
-
|
196
|
-
if final_match:
|
197
|
-
final_content = final_match.group(1).strip()
|
106
|
+
if normal_text or events:
|
198
107
|
self._buffer = ""
|
199
|
-
return StreamingParseResult(normal_text=
|
108
|
+
return StreamingParseResult(normal_text=normal_text, calls=[])
|
109
|
+
else:
|
110
|
+
# No events processed, continue buffering
|
111
|
+
return StreamingParseResult(normal_text="", calls=[])
|
200
112
|
|
201
|
-
|
202
|
-
|
113
|
+
if not events:
|
114
|
+
# No complete events yet
|
115
|
+
return StreamingParseResult(normal_text="", calls=[])
|
203
116
|
|
117
|
+
# Initialize state if needed
|
204
118
|
if not hasattr(self, "_tool_indices"):
|
205
119
|
self._tool_indices = self._get_tool_indices(tools)
|
206
120
|
|
207
121
|
calls = []
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
if
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
if "." in full_function_name
|
218
|
-
else full_function_name
|
122
|
+
normal_text = ""
|
123
|
+
|
124
|
+
for event in events:
|
125
|
+
if event.event_type == "tool_call":
|
126
|
+
# We got a complete tool call from HarmonyParser
|
127
|
+
tool_call_info = self._extract_tool_call_from_event(
|
128
|
+
event.raw_text if event.raw_text else event.content,
|
129
|
+
self._tool_indices,
|
130
|
+
self.current_tool_id if self.current_tool_id >= 0 else 0,
|
219
131
|
)
|
220
132
|
|
221
|
-
|
222
|
-
|
223
|
-
self.current_tool_id
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
self.prev_tool_call_arr.
|
230
|
-
|
231
|
-
self.streamed_args_for_tool.
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
ToolCallItem(
|
236
|
-
tool_index=self.current_tool_id,
|
237
|
-
name=function_name,
|
238
|
-
parameters="",
|
239
|
-
)
|
240
|
-
)
|
241
|
-
self.current_tool_name_sent = True
|
242
|
-
# Store the tool call info
|
133
|
+
if tool_call_info:
|
134
|
+
# Initialize state if first tool
|
135
|
+
if self.current_tool_id == -1:
|
136
|
+
self.current_tool_id = 0
|
137
|
+
self.prev_tool_call_arr = []
|
138
|
+
self.streamed_args_for_tool = [""]
|
139
|
+
|
140
|
+
# Ensure arrays are large enough
|
141
|
+
while len(self.prev_tool_call_arr) <= self.current_tool_id:
|
142
|
+
self.prev_tool_call_arr.append({})
|
143
|
+
while len(self.streamed_args_for_tool) <= self.current_tool_id:
|
144
|
+
self.streamed_args_for_tool.append("")
|
145
|
+
|
146
|
+
# Store tool call info
|
243
147
|
self.prev_tool_call_arr[self.current_tool_id] = {
|
244
|
-
"name":
|
245
|
-
"arguments":
|
148
|
+
"name": tool_call_info.name,
|
149
|
+
"arguments": json.loads(tool_call_info.parameters),
|
246
150
|
}
|
247
|
-
self.streamed_args_for_tool[self.current_tool_id] = ""
|
248
|
-
|
249
|
-
# Check if we have a complete function call
|
250
|
-
complete_match = self.function_call_pattern.search(current_text)
|
251
|
-
if complete_match:
|
252
|
-
args_content = complete_match.group(2)
|
253
|
-
|
254
|
-
try:
|
255
|
-
parsed_args = json.loads(args_content)
|
256
|
-
self.prev_tool_call_arr[self.current_tool_id][
|
257
|
-
"arguments"
|
258
|
-
] = parsed_args
|
259
|
-
|
260
|
-
# Send complete arguments if we haven't sent them yet
|
261
|
-
if not self.streamed_args_for_tool[self.current_tool_id]:
|
262
|
-
# Send the complete arguments as JSON string
|
263
|
-
calls.append(
|
264
|
-
ToolCallItem(
|
265
|
-
tool_index=self.current_tool_id,
|
266
|
-
name=None,
|
267
|
-
parameters=json.dumps(
|
268
|
-
parsed_args, ensure_ascii=False
|
269
|
-
),
|
270
|
-
)
|
271
|
-
)
|
272
|
-
self.streamed_args_for_tool[self.current_tool_id] = (
|
273
|
-
json.dumps(parsed_args, ensure_ascii=False)
|
274
|
-
)
|
275
|
-
except json.JSONDecodeError:
|
276
|
-
pass
|
277
|
-
|
278
|
-
# Remove the completed function call from buffer
|
279
|
-
remaining_after_call = current_text[complete_match.end() :]
|
280
|
-
|
281
|
-
# Clean up <|start|>assistant prefixes and extract final content
|
282
|
-
remaining_after_call = re.sub(
|
283
|
-
r"<\|start\|>assistant(?!\w)", "", remaining_after_call
|
284
|
-
)
|
285
151
|
|
286
|
-
#
|
287
|
-
|
288
|
-
|
289
|
-
|
152
|
+
# Emit the complete tool call at once
|
153
|
+
# (Could be modified to emit name first, then args, if needed)
|
154
|
+
calls.append(tool_call_info)
|
155
|
+
|
156
|
+
# Mark as streamed
|
157
|
+
self.streamed_args_for_tool[self.current_tool_id] = (
|
158
|
+
tool_call_info.parameters
|
290
159
|
)
|
291
|
-
final_match = final_pattern.search(remaining_after_call)
|
292
160
|
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
161
|
+
# Move to next tool
|
162
|
+
self.current_tool_id += 1
|
163
|
+
self.current_tool_name_sent = False
|
164
|
+
|
165
|
+
elif event.event_type == "normal":
|
166
|
+
normal_text += event.content
|
298
167
|
|
299
|
-
|
300
|
-
|
301
|
-
parts.append(before_final)
|
302
|
-
if final_content:
|
303
|
-
parts.append(final_content)
|
304
|
-
remaining_after_call = " ".join(parts) if parts else ""
|
168
|
+
# Clear buffer since HarmonyParser handles buffering
|
169
|
+
self._buffer = ""
|
305
170
|
|
306
|
-
|
171
|
+
return StreamingParseResult(normal_text=normal_text, calls=calls)
|
307
172
|
|
308
|
-
|
309
|
-
|
310
|
-
|
173
|
+
def _extract_tool_call_from_event(
|
174
|
+
self, content: str, tool_indices: dict, tool_index: int
|
175
|
+
) -> Optional[ToolCallItem]:
|
176
|
+
"""
|
177
|
+
Extract tool call information from HarmonyParser event content.
|
311
178
|
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
final_text = final_content
|
316
|
-
elif remaining_after_call:
|
317
|
-
final_text = remaining_after_call
|
179
|
+
Content format: "commentary to=functions.get_weather<|constrain|>json<|message|>{...}"
|
180
|
+
"""
|
181
|
+
match = self.tool_extract_pattern.search(content)
|
318
182
|
|
319
|
-
|
183
|
+
if not match:
|
184
|
+
logger.debug(f"Could not extract tool call from: {content[:100]}")
|
185
|
+
return None
|
320
186
|
|
321
|
-
|
187
|
+
full_function_name = match.group(1)
|
188
|
+
json_content = match.group(2)
|
322
189
|
|
323
|
-
|
324
|
-
|
325
|
-
|
190
|
+
# Extract function name (last part after .)
|
191
|
+
function_name = (
|
192
|
+
full_function_name.split(".")[-1]
|
193
|
+
if "." in full_function_name
|
194
|
+
else full_function_name
|
195
|
+
)
|
196
|
+
|
197
|
+
# Check if tool exists
|
198
|
+
if function_name not in tool_indices:
|
199
|
+
logger.debug(f"Function {function_name} not in available tools")
|
200
|
+
return None
|
201
|
+
|
202
|
+
# Parse JSON arguments
|
203
|
+
try:
|
204
|
+
arguments = json.loads(json_content) if json_content.strip() else {}
|
205
|
+
except json.JSONDecodeError as e:
|
206
|
+
logger.debug(f"Failed to parse JSON arguments: {e}")
|
207
|
+
return None
|
208
|
+
|
209
|
+
return ToolCallItem(
|
210
|
+
tool_index=tool_index,
|
211
|
+
name=function_name,
|
212
|
+
parameters=json.dumps(arguments, ensure_ascii=False),
|
213
|
+
)
|
326
214
|
|
327
215
|
def structure_info(self) -> _GetInfoFunc:
|
328
|
-
raise NotImplementedError()
|
216
|
+
raise NotImplementedError("structure_info not used with HarmonyParser")
|
329
217
|
|
330
218
|
def build_ebnf(self, tools: List[Tool]) -> str:
|
331
|
-
raise NotImplementedError()
|
219
|
+
raise NotImplementedError("build_ebnf not used with HarmonyParser")
|