entari-plugin-hyw 3.3.1__py3-none-any.whl → 3.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of entari-plugin-hyw might be problematic. Click here for more details.
- entari_plugin_hyw/__init__.py +763 -309
- entari_plugin_hyw/assets/icon/anthropic.svg +1 -0
- entari_plugin_hyw/assets/icon/deepseek.png +0 -0
- entari_plugin_hyw/assets/icon/gemini.svg +1 -0
- entari_plugin_hyw/assets/icon/google.svg +1 -0
- entari_plugin_hyw/assets/icon/grok.png +0 -0
- entari_plugin_hyw/assets/icon/microsoft.svg +15 -0
- entari_plugin_hyw/assets/icon/minimax.png +0 -0
- entari_plugin_hyw/assets/icon/mistral.png +0 -0
- entari_plugin_hyw/assets/icon/nvida.png +0 -0
- entari_plugin_hyw/assets/icon/openai.svg +1 -0
- entari_plugin_hyw/assets/icon/openrouter.png +0 -0
- entari_plugin_hyw/assets/icon/perplexity.svg +24 -0
- entari_plugin_hyw/assets/icon/qwen.png +0 -0
- entari_plugin_hyw/assets/icon/xai.png +0 -0
- entari_plugin_hyw/assets/icon/zai.png +0 -0
- entari_plugin_hyw/assets/libs/highlight.css +10 -0
- entari_plugin_hyw/assets/libs/highlight.js +1213 -0
- entari_plugin_hyw/assets/libs/katex-auto-render.js +1 -0
- entari_plugin_hyw/assets/libs/katex.css +1 -0
- entari_plugin_hyw/assets/libs/katex.js +1 -0
- entari_plugin_hyw/assets/libs/tailwind.css +1 -0
- entari_plugin_hyw/assets/package-lock.json +953 -0
- entari_plugin_hyw/assets/package.json +16 -0
- entari_plugin_hyw/assets/tailwind.config.js +12 -0
- entari_plugin_hyw/assets/tailwind.input.css +235 -0
- entari_plugin_hyw/assets/template.html +157 -0
- entari_plugin_hyw/assets/template.html.bak +157 -0
- entari_plugin_hyw/assets/template.j2 +307 -0
- entari_plugin_hyw/core/__init__.py +0 -0
- entari_plugin_hyw/core/config.py +36 -0
- entari_plugin_hyw/core/history.py +146 -0
- entari_plugin_hyw/core/hyw.py +41 -0
- entari_plugin_hyw/core/pipeline.py +1065 -0
- entari_plugin_hyw/core/render.py +596 -0
- entari_plugin_hyw/core/render.py.bak +926 -0
- entari_plugin_hyw/utils/__init__.py +2 -0
- entari_plugin_hyw/utils/browser.py +40 -0
- entari_plugin_hyw/utils/misc.py +93 -0
- entari_plugin_hyw/utils/playwright_tool.py +36 -0
- entari_plugin_hyw/utils/prompts.py +129 -0
- entari_plugin_hyw/utils/search.py +249 -0
- {entari_plugin_hyw-3.3.1.dist-info → entari_plugin_hyw-3.3.3.dist-info}/METADATA +20 -28
- entari_plugin_hyw-3.3.3.dist-info/RECORD +46 -0
- entari_plugin_hyw/hyw_core.py +0 -700
- entari_plugin_hyw-3.3.1.dist-info/RECORD +0 -6
- {entari_plugin_hyw-3.3.1.dist-info → entari_plugin_hyw-3.3.3.dist-info}/WHEEL +0 -0
- {entari_plugin_hyw-3.3.1.dist-info → entari_plugin_hyw-3.3.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1065 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import html
|
|
3
|
+
import json
|
|
4
|
+
import time
|
|
5
|
+
from contextlib import asynccontextmanager
|
|
6
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
7
|
+
|
|
8
|
+
from loguru import logger
|
|
9
|
+
from openai import AsyncOpenAI
|
|
10
|
+
|
|
11
|
+
from .config import HYWConfig
|
|
12
|
+
from ..utils.search import SearchService
|
|
13
|
+
from ..utils.prompts import (
|
|
14
|
+
AGENT_SP,
|
|
15
|
+
AGENT_SP_INTRUCT_VISION_ADD,
|
|
16
|
+
AGENT_SP_TOOLS_STANDARD_ADD,
|
|
17
|
+
AGENT_SP_TOOLS_AGENT_ADD,
|
|
18
|
+
AGENT_SP_SEARCH_ADD,
|
|
19
|
+
AGENT_SP_PAGE_ADD,
|
|
20
|
+
AGENT_SP_IMAGE_SEARCH_ADD,
|
|
21
|
+
INTRUCT_SP,
|
|
22
|
+
INTRUCT_SP_VISION_ADD,
|
|
23
|
+
VISION_SP,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
@asynccontextmanager
|
|
27
|
+
async def _null_async_context():
|
|
28
|
+
yield None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ProcessingPipeline:
|
|
32
|
+
"""
|
|
33
|
+
Core pipeline (vision -> instruct/search -> agent).
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self, config: HYWConfig):
|
|
37
|
+
self.config = config
|
|
38
|
+
self.search_service = SearchService(config)
|
|
39
|
+
self.client = AsyncOpenAI(base_url=self.config.base_url, api_key=self.config.api_key)
|
|
40
|
+
self.all_web_results = [] # Cache for search results
|
|
41
|
+
self.current_mode = "standard" # standard | agent
|
|
42
|
+
|
|
43
|
+
self.web_search_tool = {
|
|
44
|
+
"type": "function",
|
|
45
|
+
"function": {
|
|
46
|
+
"name": "internal_web_search",
|
|
47
|
+
"description": "Search the web for text.",
|
|
48
|
+
"parameters": {
|
|
49
|
+
"type": "object",
|
|
50
|
+
"properties": {"query": {"type": "string"}},
|
|
51
|
+
"required": ["query"],
|
|
52
|
+
},
|
|
53
|
+
},
|
|
54
|
+
}
|
|
55
|
+
self.image_search_tool = {
|
|
56
|
+
"type": "function",
|
|
57
|
+
"function": {
|
|
58
|
+
"name": "internal_image_search",
|
|
59
|
+
"description": "Search for images related to a query.",
|
|
60
|
+
"parameters": {
|
|
61
|
+
"type": "object",
|
|
62
|
+
"properties": {"query": {"type": "string"}},
|
|
63
|
+
"required": ["query"],
|
|
64
|
+
},
|
|
65
|
+
},
|
|
66
|
+
}
|
|
67
|
+
self.set_mode_tool = {
|
|
68
|
+
"type": "function",
|
|
69
|
+
"function": {
|
|
70
|
+
"name": "set_mode",
|
|
71
|
+
"description": "设定后续 Agent 的运行模式: standard | agent",
|
|
72
|
+
"parameters": {
|
|
73
|
+
"type": "object",
|
|
74
|
+
"properties": {
|
|
75
|
+
"mode": {"type": "string", "enum": ["standard", "agent"]},
|
|
76
|
+
"reason": {"type": "string"},
|
|
77
|
+
},
|
|
78
|
+
"required": ["mode"],
|
|
79
|
+
},
|
|
80
|
+
},
|
|
81
|
+
}
|
|
82
|
+
self.crawl_page_tool = {
|
|
83
|
+
"type": "function",
|
|
84
|
+
"function": {
|
|
85
|
+
"name": "crawl_page",
|
|
86
|
+
"description": "使用 Crawl4AI 抓取网页并返回 Markdown 文本。",
|
|
87
|
+
"parameters": {
|
|
88
|
+
"type": "object",
|
|
89
|
+
"properties": {
|
|
90
|
+
"url": {"type": "string"},
|
|
91
|
+
},
|
|
92
|
+
"required": ["url"],
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
async def execute(
|
|
98
|
+
self,
|
|
99
|
+
user_input: str,
|
|
100
|
+
conversation_history: List[Dict],
|
|
101
|
+
model_name: str = None,
|
|
102
|
+
images: List[str] = None,
|
|
103
|
+
vision_model_name: str = None,
|
|
104
|
+
selected_vision_model: str = None,
|
|
105
|
+
) -> Dict[str, Any]:
|
|
106
|
+
"""
|
|
107
|
+
1) Vision: summarize images once (no image persistence).
|
|
108
|
+
2) Intruct: run web_search and decide whether to grant Playwright MCP tools.
|
|
109
|
+
3) Agent: normally no tools; if granted, allow Playwright MCP tools (max 6 rounds; step 5 nudge, step 6 forced).
|
|
110
|
+
"""
|
|
111
|
+
start_time = time.time()
|
|
112
|
+
stats = {"start_time": start_time, "tool_calls_count": 0}
|
|
113
|
+
# Token usage tracking for billing
|
|
114
|
+
usage_totals = {"input_tokens": 0, "output_tokens": 0}
|
|
115
|
+
active_model = model_name or self.config.model_name
|
|
116
|
+
|
|
117
|
+
current_history = conversation_history
|
|
118
|
+
final_response_content = ""
|
|
119
|
+
structured: Dict[str, Any] = {}
|
|
120
|
+
|
|
121
|
+
# Reset search cache for this execution
|
|
122
|
+
self.all_web_results = []
|
|
123
|
+
|
|
124
|
+
try:
|
|
125
|
+
logger.info(f"Pipeline: Starting workflow for '{user_input}' using {active_model}")
|
|
126
|
+
|
|
127
|
+
trace: Dict[str, Any] = {
|
|
128
|
+
"vision": None,
|
|
129
|
+
"intruct": None,
|
|
130
|
+
"agent": None,
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
# Vision stage
|
|
134
|
+
vision_text = ""
|
|
135
|
+
vision_start = time.time()
|
|
136
|
+
vision_time = 0
|
|
137
|
+
vision_cost = 0.0
|
|
138
|
+
vision_usage = {}
|
|
139
|
+
if images:
|
|
140
|
+
vision_model = (
|
|
141
|
+
selected_vision_model
|
|
142
|
+
or vision_model_name
|
|
143
|
+
or getattr(self.config, "vision_model_name", None)
|
|
144
|
+
or active_model
|
|
145
|
+
)
|
|
146
|
+
vision_prompt_tpl = getattr(self.config, "vision_system_prompt", None) or VISION_SP
|
|
147
|
+
vision_prompt = vision_prompt_tpl.format(user_msgs=user_input or "[图片]")
|
|
148
|
+
vision_text, vision_usage = await self._run_vision_stage(
|
|
149
|
+
user_input=user_input,
|
|
150
|
+
images=images,
|
|
151
|
+
model=vision_model,
|
|
152
|
+
prompt=vision_prompt,
|
|
153
|
+
)
|
|
154
|
+
# Add vision usage with vision-specific pricing
|
|
155
|
+
usage_totals["input_tokens"] += vision_usage.get("input_tokens", 0)
|
|
156
|
+
usage_totals["output_tokens"] += vision_usage.get("output_tokens", 0)
|
|
157
|
+
|
|
158
|
+
# Calculate Vision Cost
|
|
159
|
+
v_in_price = float(getattr(self.config, "vision_input_price", None) or getattr(self.config, "input_price", 0.0) or 0.0)
|
|
160
|
+
v_out_price = float(getattr(self.config, "vision_output_price", None) or getattr(self.config, "output_price", 0.0) or 0.0)
|
|
161
|
+
if v_in_price > 0 or v_out_price > 0:
|
|
162
|
+
vision_cost = (vision_usage.get("input_tokens", 0) / 1_000_000 * v_in_price) + (vision_usage.get("output_tokens", 0) / 1_000_000 * v_out_price)
|
|
163
|
+
|
|
164
|
+
vision_time = time.time() - vision_start
|
|
165
|
+
|
|
166
|
+
trace["vision"] = {
|
|
167
|
+
"model": vision_model,
|
|
168
|
+
"base_url": getattr(self.config, "vision_base_url", None) or self.config.base_url,
|
|
169
|
+
"prompt": vision_prompt,
|
|
170
|
+
"user_input": user_input or "",
|
|
171
|
+
"images_count": len(images or []),
|
|
172
|
+
"output": vision_text,
|
|
173
|
+
"usage": vision_usage,
|
|
174
|
+
"time": vision_time,
|
|
175
|
+
"cost": vision_cost
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
# Intruct + pre-search
|
|
179
|
+
instruct_start = time.time()
|
|
180
|
+
instruct_model = getattr(self.config, "intruct_model_name", None) or active_model
|
|
181
|
+
instruct_text, search_payloads, intruct_trace, intruct_usage, search_time = await self._run_instruct_stage(
|
|
182
|
+
user_input=user_input,
|
|
183
|
+
vision_text=vision_text,
|
|
184
|
+
model=instruct_model,
|
|
185
|
+
)
|
|
186
|
+
instruct_time = time.time() - instruct_start
|
|
187
|
+
|
|
188
|
+
# Calculate Instruct Cost
|
|
189
|
+
instruct_cost = 0.0
|
|
190
|
+
i_in_price = float(getattr(self.config, "intruct_input_price", None) or getattr(self.config, "input_price", 0.0) or 0.0)
|
|
191
|
+
i_out_price = float(getattr(self.config, "intruct_output_price", None) or getattr(self.config, "output_price", 0.0) or 0.0)
|
|
192
|
+
if i_in_price > 0 or i_out_price > 0:
|
|
193
|
+
instruct_cost = (intruct_usage.get("input_tokens", 0) / 1_000_000 * i_in_price) + (intruct_usage.get("output_tokens", 0) / 1_000_000 * i_out_price)
|
|
194
|
+
|
|
195
|
+
# Add instruct usage
|
|
196
|
+
usage_totals["input_tokens"] += intruct_usage.get("input_tokens", 0)
|
|
197
|
+
usage_totals["output_tokens"] += intruct_usage.get("output_tokens", 0)
|
|
198
|
+
|
|
199
|
+
intruct_trace["time"] = instruct_time
|
|
200
|
+
intruct_trace["cost"] = instruct_cost
|
|
201
|
+
trace["intruct"] = intruct_trace
|
|
202
|
+
|
|
203
|
+
# Start agent loop
|
|
204
|
+
agent_start_time = time.time()
|
|
205
|
+
current_history.append({"role": "user", "content": user_input or "..."})
|
|
206
|
+
|
|
207
|
+
mode = intruct_trace.get("mode", self.current_mode).lower()
|
|
208
|
+
logger.success(f"Instruct Mode: {mode}")
|
|
209
|
+
self.current_mode = mode
|
|
210
|
+
|
|
211
|
+
# Determine max iterations
|
|
212
|
+
max_steps = 10 if mode == "agent" else 1
|
|
213
|
+
|
|
214
|
+
step = 0
|
|
215
|
+
agent_trace_steps: List[Dict[str, Any]] = []
|
|
216
|
+
last_system_prompt = ""
|
|
217
|
+
|
|
218
|
+
agent_tools: Optional[List[Dict[str, Any]]] = None
|
|
219
|
+
if mode == "agent":
|
|
220
|
+
agent_tools = [self.web_search_tool, self.image_search_tool, self.crawl_page_tool]
|
|
221
|
+
|
|
222
|
+
# Agent loop
|
|
223
|
+
while step < max_steps:
|
|
224
|
+
step += 1
|
|
225
|
+
logger.info(f"Pipeline: Agent step {step}/{max_steps}")
|
|
226
|
+
|
|
227
|
+
if step == 5 and mode == "agent":
|
|
228
|
+
current_history.append(
|
|
229
|
+
{
|
|
230
|
+
"role": "system",
|
|
231
|
+
"content": "System: [Next Step Final] Please start consolidating the answer; the next step must be the final response.",
|
|
232
|
+
}
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
tools_desc = ""
|
|
236
|
+
if agent_tools:
|
|
237
|
+
tools_desc = "\n".join([
|
|
238
|
+
"- internal_web_search(query): 触发搜索并缓存结果",
|
|
239
|
+
"- crawl_page(url): 使用 Crawl4AI 抓取网页返回 Markdown"
|
|
240
|
+
])
|
|
241
|
+
|
|
242
|
+
user_msgs_text = user_input or ""
|
|
243
|
+
|
|
244
|
+
search_msgs_text = self._format_search_msgs()
|
|
245
|
+
image_msgs_text = self._format_image_search_msgs()
|
|
246
|
+
|
|
247
|
+
has_search_results = any(not r.get("is_image") for r in self.all_web_results)
|
|
248
|
+
has_image_results = any(r.get("is_image") for r in self.all_web_results)
|
|
249
|
+
|
|
250
|
+
# Build agent system prompt
|
|
251
|
+
agent_prompt_tpl = getattr(self.config, "agent_system_prompt", None) or AGENT_SP
|
|
252
|
+
|
|
253
|
+
mode_desc_text = AGENT_SP_TOOLS_AGENT_ADD.format(tools_desc=tools_desc) if mode == "agent" else AGENT_SP_TOOLS_STANDARD_ADD
|
|
254
|
+
system_prompt = agent_prompt_tpl.format(
|
|
255
|
+
user_msgs=user_msgs_text,
|
|
256
|
+
mode=mode,
|
|
257
|
+
mode_desc=mode_desc_text
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
# Append vision text if available
|
|
261
|
+
if vision_text:
|
|
262
|
+
system_prompt += AGENT_SP_INTRUCT_VISION_ADD.format(vision_msgs=vision_text)
|
|
263
|
+
|
|
264
|
+
# Append search results
|
|
265
|
+
if has_search_results and search_msgs_text:
|
|
266
|
+
system_prompt += AGENT_SP_SEARCH_ADD.format(search_msgs=search_msgs_text)
|
|
267
|
+
|
|
268
|
+
# Append crawled page content
|
|
269
|
+
page_msgs_text = self._format_page_msgs()
|
|
270
|
+
if page_msgs_text:
|
|
271
|
+
system_prompt += AGENT_SP_PAGE_ADD.format(page_msgs=page_msgs_text)
|
|
272
|
+
|
|
273
|
+
if has_image_results and image_msgs_text:
|
|
274
|
+
system_prompt += AGENT_SP_IMAGE_SEARCH_ADD.format(image_search_msgs=image_msgs_text)
|
|
275
|
+
|
|
276
|
+
last_system_prompt = system_prompt
|
|
277
|
+
|
|
278
|
+
messages = [{"role": "system", "content": system_prompt}]
|
|
279
|
+
messages.extend(current_history)
|
|
280
|
+
|
|
281
|
+
tools_for_step = agent_tools if (agent_tools and step < max_steps) else None
|
|
282
|
+
|
|
283
|
+
# Debug logging
|
|
284
|
+
if tools_for_step:
|
|
285
|
+
logger.info(f"[Agent] Tools provided: {[t['function']['name'] for t in tools_for_step]}")
|
|
286
|
+
else:
|
|
287
|
+
logger.warning(f"[Agent] NO TOOLS provided for step {step} (agent_tools={agent_tools is not None}, step<max={step < max_steps})")
|
|
288
|
+
|
|
289
|
+
step_llm_start = time.time()
|
|
290
|
+
response, step_usage = await self._safe_llm_call(
|
|
291
|
+
messages=messages,
|
|
292
|
+
model=active_model,
|
|
293
|
+
tools=tools_for_step,
|
|
294
|
+
tool_choice="auto" if tools_for_step else None,
|
|
295
|
+
)
|
|
296
|
+
step_llm_time = time.time() - step_llm_start
|
|
297
|
+
|
|
298
|
+
# Debug: Check response
|
|
299
|
+
has_tool_calls = response.tool_calls is not None and len(response.tool_calls) > 0
|
|
300
|
+
logger.info(f"[Agent] Response has_tool_calls={has_tool_calls}, has_content={bool(response.content)}")
|
|
301
|
+
|
|
302
|
+
# Accumulate agent usage
|
|
303
|
+
usage_totals["input_tokens"] += step_usage.get("input_tokens", 0)
|
|
304
|
+
usage_totals["output_tokens"] += step_usage.get("output_tokens", 0)
|
|
305
|
+
|
|
306
|
+
if response.tool_calls and tools_for_step:
|
|
307
|
+
tool_calls = response.tool_calls
|
|
308
|
+
stats["tool_calls_count"] += len(tool_calls)
|
|
309
|
+
|
|
310
|
+
# Use model_dump to preserve provider-specific fields (e.g., Gemini's thought_signature)
|
|
311
|
+
assistant_msg = response.model_dump(exclude_unset=True) if hasattr(response, "model_dump") else {
|
|
312
|
+
"role": "assistant",
|
|
313
|
+
"content": response.content,
|
|
314
|
+
"tool_calls": [{"id": tc.id, "type": "function", "function": {"name": tc.function.name, "arguments": tc.function.arguments}} for tc in tool_calls]
|
|
315
|
+
}
|
|
316
|
+
current_history.append(assistant_msg)
|
|
317
|
+
|
|
318
|
+
tasks = [self._safe_route_tool(tc) for tc in tool_calls]
|
|
319
|
+
tool_start_time = time.time()
|
|
320
|
+
results = await asyncio.gather(*tasks)
|
|
321
|
+
tool_exec_time = time.time() - tool_start_time
|
|
322
|
+
|
|
323
|
+
step_trace = {
|
|
324
|
+
"step": step,
|
|
325
|
+
"tool_calls": [self._tool_call_to_trace(tc) for tc in tool_calls],
|
|
326
|
+
"tool_results": [],
|
|
327
|
+
"tool_time": tool_exec_time,
|
|
328
|
+
"llm_time": step_llm_time,
|
|
329
|
+
}
|
|
330
|
+
for i, result in enumerate(results):
|
|
331
|
+
tc = tool_calls[i]
|
|
332
|
+
step_trace["tool_results"].append({"name": tc.function.name, "content": str(result)})
|
|
333
|
+
current_history.append(
|
|
334
|
+
{
|
|
335
|
+
"tool_call_id": tc.id,
|
|
336
|
+
"role": "tool",
|
|
337
|
+
"name": tc.function.name,
|
|
338
|
+
"content": str(result),
|
|
339
|
+
}
|
|
340
|
+
)
|
|
341
|
+
agent_trace_steps.append(step_trace)
|
|
342
|
+
continue
|
|
343
|
+
|
|
344
|
+
final_response_content = response.content or ""
|
|
345
|
+
current_history.append({"role": "assistant", "content": final_response_content})
|
|
346
|
+
agent_trace_steps.append({"step": step, "final": True, "output": final_response_content})
|
|
347
|
+
break
|
|
348
|
+
|
|
349
|
+
if not final_response_content:
|
|
350
|
+
final_response_content = "执行结束,但未生成内容。"
|
|
351
|
+
|
|
352
|
+
structured = self._parse_tagged_response(final_response_content)
|
|
353
|
+
final_content = structured.get("response") or final_response_content
|
|
354
|
+
|
|
355
|
+
agent_time = time.time() - agent_start_time
|
|
356
|
+
|
|
357
|
+
# Calculate Agent Cost
|
|
358
|
+
agent_cost = 0.0
|
|
359
|
+
a_in_price = float(getattr(self.config, "input_price", 0.0) or 0.0)
|
|
360
|
+
a_out_price = float(getattr(self.config, "output_price", 0.0) or 0.0)
|
|
361
|
+
|
|
362
|
+
agent_input_tokens = usage_totals["input_tokens"] - vision_usage.get("input_tokens", 0) - intruct_usage.get("input_tokens", 0)
|
|
363
|
+
agent_output_tokens = usage_totals["output_tokens"] - vision_usage.get("output_tokens", 0) - intruct_usage.get("output_tokens", 0)
|
|
364
|
+
|
|
365
|
+
if a_in_price > 0 or a_out_price > 0:
|
|
366
|
+
agent_cost = (max(0, agent_input_tokens) / 1_000_000 * a_in_price) + (max(0, agent_output_tokens) / 1_000_000 * a_out_price)
|
|
367
|
+
|
|
368
|
+
trace["agent"] = {
|
|
369
|
+
"model": active_model,
|
|
370
|
+
"base_url": self.config.base_url,
|
|
371
|
+
"system_prompt": last_system_prompt,
|
|
372
|
+
"steps": agent_trace_steps,
|
|
373
|
+
"final_output": final_response_content,
|
|
374
|
+
"time": agent_time,
|
|
375
|
+
"cost": agent_cost
|
|
376
|
+
}
|
|
377
|
+
trace_markdown = self._render_trace_markdown(trace)
|
|
378
|
+
|
|
379
|
+
stats["total_time"] = time.time() - start_time
|
|
380
|
+
stats["steps"] = step
|
|
381
|
+
|
|
382
|
+
# Calculate billing info
|
|
383
|
+
billing_info = {
|
|
384
|
+
"input_tokens": usage_totals["input_tokens"],
|
|
385
|
+
"output_tokens": usage_totals["output_tokens"],
|
|
386
|
+
"total_cost": 0.0,
|
|
387
|
+
}
|
|
388
|
+
input_price = getattr(self.config, "input_price", None) or 0.0
|
|
389
|
+
output_price = getattr(self.config, "output_price", None) or 0.0
|
|
390
|
+
|
|
391
|
+
if input_price > 0 or output_price > 0:
|
|
392
|
+
input_cost = (usage_totals["input_tokens"] / 1_000_000) * input_price
|
|
393
|
+
output_cost = (usage_totals["output_tokens"] / 1_000_000) * output_price
|
|
394
|
+
billing_info["total_cost"] = input_cost + output_cost
|
|
395
|
+
|
|
396
|
+
# Build stages_used list for UI display
|
|
397
|
+
stages_used = []
|
|
398
|
+
|
|
399
|
+
def infer_icon(model_name: str, base_url: str) -> str:
|
|
400
|
+
model_lower = (model_name or "").lower()
|
|
401
|
+
url_lower = (base_url or "").lower()
|
|
402
|
+
if "deepseek" in model_lower or "deepseek" in url_lower: return "deepseek"
|
|
403
|
+
elif "claude" in model_lower or "anthropic" in url_lower: return "anthropic"
|
|
404
|
+
elif "gemini" in model_lower or "google" in url_lower: return "google"
|
|
405
|
+
elif "gpt" in model_lower or "openai" in url_lower: return "openai"
|
|
406
|
+
elif "qwen" in model_lower: return "qwen"
|
|
407
|
+
elif "openrouter" in url_lower: return "openrouter"
|
|
408
|
+
return "openai"
|
|
409
|
+
|
|
410
|
+
def infer_provider(base_url: str) -> str:
|
|
411
|
+
url_lower = (base_url or "").lower()
|
|
412
|
+
if "openrouter" in url_lower: return "OpenRouter"
|
|
413
|
+
elif "openai" in url_lower: return "OpenAI"
|
|
414
|
+
elif "anthropic" in url_lower: return "Anthropic"
|
|
415
|
+
elif "google" in url_lower: return "Google"
|
|
416
|
+
elif "deepseek" in url_lower: return "DeepSeek"
|
|
417
|
+
return ""
|
|
418
|
+
|
|
419
|
+
if trace.get("vision"):
|
|
420
|
+
v = trace["vision"]
|
|
421
|
+
v_model = v.get("model", "")
|
|
422
|
+
v_base_url = v.get("base_url", "") or self.config.base_url
|
|
423
|
+
stages_used.append({
|
|
424
|
+
"name": "Vision",
|
|
425
|
+
"model": v_model,
|
|
426
|
+
"icon_config": getattr(self.config, "vision_icon", None) or infer_icon(v_model, v_base_url),
|
|
427
|
+
"provider": infer_provider(v_base_url),
|
|
428
|
+
"time": v.get("time", 0),
|
|
429
|
+
"cost": v.get("cost", 0.0)
|
|
430
|
+
})
|
|
431
|
+
|
|
432
|
+
if trace.get("intruct"):
|
|
433
|
+
i = trace["intruct"]
|
|
434
|
+
i_model = i.get("model", "")
|
|
435
|
+
i_base_url = i.get("base_url", "") or self.config.base_url
|
|
436
|
+
stages_used.append({
|
|
437
|
+
"name": "Instruct",
|
|
438
|
+
"model": i_model,
|
|
439
|
+
"icon_config": getattr(self.config, "instruct_icon", None) or getattr(self.config, "intruct_icon", None) or infer_icon(i_model, i_base_url),
|
|
440
|
+
"provider": infer_provider(i_base_url),
|
|
441
|
+
"time": i.get("time", 0),
|
|
442
|
+
"cost": i.get("cost", 0.0)
|
|
443
|
+
})
|
|
444
|
+
|
|
445
|
+
if has_search_results and search_payloads:
|
|
446
|
+
stages_used.append({
|
|
447
|
+
"name": "Search",
|
|
448
|
+
"model": getattr(self.config, "search_name", "DuckDuckGo"),
|
|
449
|
+
"icon_config": "search",
|
|
450
|
+
"provider": getattr(self.config, 'search_provider', 'Crawl4AI'),
|
|
451
|
+
"time": search_time,
|
|
452
|
+
"cost": 0.0
|
|
453
|
+
})
|
|
454
|
+
|
|
455
|
+
# Add Crawler stage if Instruct used crawl_page
|
|
456
|
+
if trace.get("intruct"):
|
|
457
|
+
intruct_tool_calls = trace["intruct"].get("tool_calls", [])
|
|
458
|
+
crawl_calls = [tc for tc in intruct_tool_calls if tc.get("name") == "crawl_page"]
|
|
459
|
+
if crawl_calls:
|
|
460
|
+
# Build crawled_pages list for UI
|
|
461
|
+
crawled_pages = []
|
|
462
|
+
for tc in crawl_calls:
|
|
463
|
+
url = tc.get("arguments", {}).get("url", "")
|
|
464
|
+
# Try to find cached result
|
|
465
|
+
found = next((r for r in self.all_web_results if r.get("url") == url and r.get("is_crawled")), None)
|
|
466
|
+
if found:
|
|
467
|
+
try:
|
|
468
|
+
from urllib.parse import urlparse
|
|
469
|
+
domain = urlparse(url).netloc
|
|
470
|
+
except:
|
|
471
|
+
domain = ""
|
|
472
|
+
crawled_pages.append({
|
|
473
|
+
"title": found.get("title", "Page"),
|
|
474
|
+
"url": url,
|
|
475
|
+
"favicon_url": f"https://www.google.com/s2/favicons?domain={domain}&sz=32"
|
|
476
|
+
})
|
|
477
|
+
|
|
478
|
+
stages_used.append({
|
|
479
|
+
"name": "Crawler",
|
|
480
|
+
"model": "Crawl4AI",
|
|
481
|
+
"icon_config": "search",
|
|
482
|
+
"provider": "网页抓取",
|
|
483
|
+
"time": search_time, # Use existing search_time which includes fetch time
|
|
484
|
+
"cost": 0.0,
|
|
485
|
+
"crawled_pages": crawled_pages
|
|
486
|
+
})
|
|
487
|
+
|
|
488
|
+
# --- Granular Agent Stages (Grouped) ---
|
|
489
|
+
if trace.get("agent"):
|
|
490
|
+
a = trace["agent"]
|
|
491
|
+
a_model = a.get("model", "") or active_model
|
|
492
|
+
a_base_url = a.get("base_url", "") or self.config.base_url
|
|
493
|
+
steps = a.get("steps", [])
|
|
494
|
+
agent_icon = getattr(self.config, "icon", None) or infer_icon(a_model, a_base_url)
|
|
495
|
+
agent_provider = infer_provider(a_base_url)
|
|
496
|
+
|
|
497
|
+
for s in steps:
|
|
498
|
+
if "tool_calls" in s:
|
|
499
|
+
# 1. Agent Thought Stage (with LLM time)
|
|
500
|
+
stages_used.append({
|
|
501
|
+
"name": "Agent",
|
|
502
|
+
"model": a_model,
|
|
503
|
+
"icon_config": agent_icon,
|
|
504
|
+
"provider": agent_provider,
|
|
505
|
+
"time": s.get("llm_time", 0), "cost": 0
|
|
506
|
+
})
|
|
507
|
+
|
|
508
|
+
# 2. Grouped Tool Stages
|
|
509
|
+
# Collect results for grouping
|
|
510
|
+
search_group_items = []
|
|
511
|
+
crawler_group_items = []
|
|
512
|
+
|
|
513
|
+
tcs = s.get("tool_calls", [])
|
|
514
|
+
trs = s.get("tool_results", [])
|
|
515
|
+
|
|
516
|
+
for idx, tc in enumerate(tcs):
|
|
517
|
+
t_name = tc.get("name")
|
|
518
|
+
# Try to get result content if available
|
|
519
|
+
t_res_content = trs[idx].get("content", "") if idx < len(trs) else ""
|
|
520
|
+
|
|
521
|
+
if t_name in ["internal_web_search", "web_search", "internal_image_search"]:
|
|
522
|
+
# We don't have per-call metadata easily unless we parse the 'result' string (which is JSON dump now for route_tool)
|
|
523
|
+
# But search results are cached in self.all_web_results.
|
|
524
|
+
# The 'content' of search tool result is basically "cached_for_prompt".
|
|
525
|
+
# So we don't need to put items here, just show "Search" container.
|
|
526
|
+
# But wait, if we want to show "what was searched", we can parse args.
|
|
527
|
+
args = tc.get("arguments", {})
|
|
528
|
+
query = args.get("query", "")
|
|
529
|
+
if query:
|
|
530
|
+
search_group_items.append({"query": query})
|
|
531
|
+
|
|
532
|
+
elif t_name == "crawl_page":
|
|
533
|
+
# Get URL from arguments, title from result
|
|
534
|
+
args = tc.get("arguments", {})
|
|
535
|
+
url = args.get("url", "")
|
|
536
|
+
title = "Page"
|
|
537
|
+
try:
|
|
538
|
+
page_data = json.loads(t_res_content)
|
|
539
|
+
if isinstance(page_data, dict):
|
|
540
|
+
title = page_data.get("title", "Page")
|
|
541
|
+
except:
|
|
542
|
+
pass
|
|
543
|
+
|
|
544
|
+
if url:
|
|
545
|
+
try:
|
|
546
|
+
domain = urlparse(url).netloc
|
|
547
|
+
except:
|
|
548
|
+
domain = ""
|
|
549
|
+
crawler_group_items.append({
|
|
550
|
+
"title": title,
|
|
551
|
+
"url": url,
|
|
552
|
+
"favicon_url": f"https://www.google.com/s2/favicons?domain={domain}&sz=32"
|
|
553
|
+
})
|
|
554
|
+
|
|
555
|
+
# Append Grouped Stages
|
|
556
|
+
if search_group_items:
|
|
557
|
+
stages_used.append({
|
|
558
|
+
"name": "Search",
|
|
559
|
+
"model": getattr(self.config, "search_name", "DuckDuckGo"),
|
|
560
|
+
"icon_config": "search",
|
|
561
|
+
"provider": "Agent Search",
|
|
562
|
+
"time": s.get("tool_time", 0), "cost": 0,
|
|
563
|
+
"queries": search_group_items # Render can use this if needed, or just show generic
|
|
564
|
+
})
|
|
565
|
+
|
|
566
|
+
if crawler_group_items:
|
|
567
|
+
stages_used.append({
|
|
568
|
+
"name": "Crawler",
|
|
569
|
+
"model": "Crawl4AI",
|
|
570
|
+
"icon_config": "browser",
|
|
571
|
+
"provider": "Page Fetcher",
|
|
572
|
+
"time": s.get("tool_time", 0), "cost": 0,
|
|
573
|
+
"crawled_pages": crawler_group_items
|
|
574
|
+
})
|
|
575
|
+
|
|
576
|
+
elif s.get("final"):
|
|
577
|
+
stages_used.append({
|
|
578
|
+
"name": "Agent",
|
|
579
|
+
"model": a_model,
|
|
580
|
+
"icon_config": agent_icon,
|
|
581
|
+
"provider": agent_provider,
|
|
582
|
+
"time": 0, "cost": 0
|
|
583
|
+
})
|
|
584
|
+
|
|
585
|
+
# Assign total time/cost to last Agent stage
|
|
586
|
+
last_agent = next((s for s in reversed(stages_used) if s["name"] == "Agent"), None)
|
|
587
|
+
if last_agent:
|
|
588
|
+
last_agent["time"] = a.get("time", 0)
|
|
589
|
+
last_agent["cost"] = a.get("cost", 0.0)
|
|
590
|
+
|
|
591
|
+
return {
|
|
592
|
+
"llm_response": final_content,
|
|
593
|
+
"structured_response": structured,
|
|
594
|
+
"stats": stats,
|
|
595
|
+
"model_used": active_model,
|
|
596
|
+
"vision_model_used": (selected_vision_model or getattr(self.config, "vision_model_name", None)) if images else None,
|
|
597
|
+
"conversation_history": current_history,
|
|
598
|
+
"trace_markdown": trace_markdown,
|
|
599
|
+
"billing_info": billing_info,
|
|
600
|
+
"stages_used": stages_used,
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
except Exception as e:
|
|
604
|
+
logger.exception("Pipeline Critical Failure")
|
|
605
|
+
return {
|
|
606
|
+
"llm_response": f"I encountered a critical error: {e}",
|
|
607
|
+
"stats": stats,
|
|
608
|
+
"error": str(e),
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
def _parse_tagged_response(self, text: str) -> Dict[str, Any]:
|
|
612
|
+
"""Parse response for references and page references."""
|
|
613
|
+
parsed = {"response": "", "references": [], "page_references": [], "flow_steps": []}
|
|
614
|
+
if not text:
|
|
615
|
+
return parsed
|
|
616
|
+
|
|
617
|
+
import re
|
|
618
|
+
|
|
619
|
+
remaining_text = text
|
|
620
|
+
|
|
621
|
+
# 1. Try to unwrap JSON if the model acted like a ReAct agent
|
|
622
|
+
try:
|
|
623
|
+
# Check if it looks like JSON first to avoid performance hit
|
|
624
|
+
if remaining_text.strip().startswith("{") and "action" in remaining_text:
|
|
625
|
+
data = json.loads(remaining_text)
|
|
626
|
+
if isinstance(data, dict) and "action_input" in data:
|
|
627
|
+
remaining_text = data["action_input"]
|
|
628
|
+
except Exception:
|
|
629
|
+
pass
|
|
630
|
+
|
|
631
|
+
id_map = {} # Map original search ID (str) -> new index (int)
|
|
632
|
+
page_id_map = {} # Map original page ID (str) -> new index (int)
|
|
633
|
+
|
|
634
|
+
# Parse References Block (unified: contains both [search] and [page] entries)
|
|
635
|
+
ref_block_match = re.search(r'```references\s*(.*?)\s*```', remaining_text, re.DOTALL | re.IGNORECASE)
|
|
636
|
+
if ref_block_match:
|
|
637
|
+
ref_content = ref_block_match.group(1).strip()
|
|
638
|
+
for line in ref_content.split("\n"):
|
|
639
|
+
line = line.strip()
|
|
640
|
+
if not line: continue
|
|
641
|
+
|
|
642
|
+
# Match [id] [type] [title](url)
|
|
643
|
+
# e.g. [1] [search] [文本描述](url) or [5] [page] [页面标题](url)
|
|
644
|
+
id_match = re.match(r"^\[(\d+)\]", line)
|
|
645
|
+
type_match = re.search(r"\[(search|page)\]", line, re.IGNORECASE)
|
|
646
|
+
link_match = re.search(r"\[([^\[\]]+)\]\(([^)]+)\)", line)
|
|
647
|
+
|
|
648
|
+
idx = None
|
|
649
|
+
if id_match:
|
|
650
|
+
try:
|
|
651
|
+
idx = int(id_match.group(1))
|
|
652
|
+
except ValueError:
|
|
653
|
+
pass
|
|
654
|
+
|
|
655
|
+
ref_type = "search" # default
|
|
656
|
+
if type_match:
|
|
657
|
+
ref_type = type_match.group(1).lower()
|
|
658
|
+
|
|
659
|
+
entry = None
|
|
660
|
+
if idx is not None and self.all_web_results:
|
|
661
|
+
# For page type, only match crawled items
|
|
662
|
+
if ref_type == "page":
|
|
663
|
+
found = next((r for r in self.all_web_results if r.get("_id") == idx and r.get("is_crawled")), None)
|
|
664
|
+
else:
|
|
665
|
+
found = next((r for r in self.all_web_results if r.get("_id") == idx and not r.get("is_crawled")), None)
|
|
666
|
+
|
|
667
|
+
if found:
|
|
668
|
+
entry = {
|
|
669
|
+
"title": found.get("title"),
|
|
670
|
+
"url": found.get("url"),
|
|
671
|
+
"domain": found.get("domain", "")
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
if not entry and link_match:
|
|
675
|
+
entry = {"title": link_match.group(1), "url": link_match.group(2)}
|
|
676
|
+
|
|
677
|
+
if entry:
|
|
678
|
+
if ref_type == "page":
|
|
679
|
+
parsed["page_references"].append(entry)
|
|
680
|
+
if idx is not None:
|
|
681
|
+
page_id_map[str(idx)] = len(parsed["page_references"])
|
|
682
|
+
else:
|
|
683
|
+
parsed["references"].append(entry)
|
|
684
|
+
if idx is not None:
|
|
685
|
+
id_map[str(idx)] = len(parsed["references"])
|
|
686
|
+
|
|
687
|
+
remaining_text = remaining_text.replace(ref_block_match.group(0), "").strip()
|
|
688
|
+
|
|
689
|
+
# Replace search:id citations
|
|
690
|
+
if id_map:
|
|
691
|
+
def replace_search_citation(match):
|
|
692
|
+
old_id = match.group(1) or match.group(2)
|
|
693
|
+
if old_id in id_map:
|
|
694
|
+
return f"`search:{id_map[old_id]}`"
|
|
695
|
+
return match.group(0)
|
|
696
|
+
|
|
697
|
+
remaining_text = re.sub(r'\[(\d+)\]', replace_search_citation, remaining_text)
|
|
698
|
+
remaining_text = re.sub(r'(?<!`)search:(\d+)(?!`)', replace_search_citation, remaining_text)
|
|
699
|
+
remaining_text = re.sub(r'`search:(\d+)`', replace_search_citation, remaining_text)
|
|
700
|
+
|
|
701
|
+
# Replace page:id citations
|
|
702
|
+
if page_id_map:
|
|
703
|
+
def replace_page_citation(match):
|
|
704
|
+
old_id = match.group(1)
|
|
705
|
+
if old_id in page_id_map:
|
|
706
|
+
return f"`page:{page_id_map[old_id]}`"
|
|
707
|
+
return match.group(0)
|
|
708
|
+
|
|
709
|
+
remaining_text = re.sub(r'(?<!`)page:(\d+)(?!`)', replace_page_citation, remaining_text)
|
|
710
|
+
remaining_text = re.sub(r'`page:(\d+)`', replace_page_citation, remaining_text)
|
|
711
|
+
|
|
712
|
+
parsed["response"] = remaining_text.strip()
|
|
713
|
+
return parsed
|
|
714
|
+
|
|
715
|
+
async def _safe_route_tool(self, tool_call):
|
|
716
|
+
"""Wrapper for safe concurrent execution of tool calls."""
|
|
717
|
+
try:
|
|
718
|
+
return await asyncio.wait_for(self._route_tool(tool_call), timeout=30.0)
|
|
719
|
+
except asyncio.TimeoutError:
|
|
720
|
+
return "Error: Tool execution timed out (30s limit)."
|
|
721
|
+
except Exception as e:
|
|
722
|
+
return f"Error: Tool execution failed: {e}"
|
|
723
|
+
|
|
724
|
+
async def _route_tool(self, tool_call):
|
|
725
|
+
"""Execute tool call and return result."""
|
|
726
|
+
name = tool_call.function.name
|
|
727
|
+
args = json.loads(html.unescape(tool_call.function.arguments))
|
|
728
|
+
|
|
729
|
+
if name == "internal_web_search" or name == "web_search":
|
|
730
|
+
query = args.get("query")
|
|
731
|
+
web = await self.search_service.search(query)
|
|
732
|
+
|
|
733
|
+
# Cache results and assign IDs
|
|
734
|
+
current_max_id = max([item.get("_id", 0) for item in self.all_web_results], default=0)
|
|
735
|
+
|
|
736
|
+
for item in web:
|
|
737
|
+
current_max_id += 1
|
|
738
|
+
item["_id"] = current_max_id
|
|
739
|
+
item["query"] = query
|
|
740
|
+
self.all_web_results.append(item)
|
|
741
|
+
|
|
742
|
+
return json.dumps({"web_results_count": len(web), "status": "cached_for_prompt"}, ensure_ascii=False)
|
|
743
|
+
|
|
744
|
+
if name == "internal_image_search":
|
|
745
|
+
query = args.get("query")
|
|
746
|
+
images = await self.search_service.image_search(query)
|
|
747
|
+
|
|
748
|
+
current_max_id = max([item.get("_id", 0) for item in self.all_web_results], default=0)
|
|
749
|
+
for item in images:
|
|
750
|
+
current_max_id += 1
|
|
751
|
+
item["_id"] = current_max_id
|
|
752
|
+
item["query"] = query
|
|
753
|
+
item["is_image"] = True
|
|
754
|
+
self.all_web_results.append(item)
|
|
755
|
+
|
|
756
|
+
return json.dumps({"image_results_count": len(images), "status": "cached_for_prompt"}, ensure_ascii=False)
|
|
757
|
+
|
|
758
|
+
if name == "crawl_page":
|
|
759
|
+
url = args.get("url")
|
|
760
|
+
logger.info(f"[Tool] Crawling page: {url}")
|
|
761
|
+
# Returns Dict: {content, title, url}
|
|
762
|
+
result_dict = await self.search_service.fetch_page(url)
|
|
763
|
+
|
|
764
|
+
# Cache the crawled content so Agent can access it
|
|
765
|
+
current_max_id = max([item.get("_id", 0) for item in self.all_web_results], default=0)
|
|
766
|
+
current_max_id += 1
|
|
767
|
+
|
|
768
|
+
cached_item = {
|
|
769
|
+
"_id": current_max_id,
|
|
770
|
+
"title": result_dict.get("title", "Page"),
|
|
771
|
+
"url": result_dict.get("url", url),
|
|
772
|
+
"content": result_dict.get("content", "")[:2000], # Clip content for prompt
|
|
773
|
+
"domain": "",
|
|
774
|
+
"is_crawled": True,
|
|
775
|
+
}
|
|
776
|
+
try:
|
|
777
|
+
from urllib.parse import urlparse
|
|
778
|
+
cached_item["domain"] = urlparse(url).netloc
|
|
779
|
+
except:
|
|
780
|
+
pass
|
|
781
|
+
|
|
782
|
+
self.all_web_results.append(cached_item)
|
|
783
|
+
|
|
784
|
+
return json.dumps({"crawl_status": "success", "title": cached_item["title"], "content_length": len(result_dict.get("content", ""))}, ensure_ascii=False)
|
|
785
|
+
|
|
786
|
+
if name == "set_mode":
|
|
787
|
+
mode = args.get("mode", "standard")
|
|
788
|
+
self.current_mode = mode
|
|
789
|
+
return f"Mode set to {mode}"
|
|
790
|
+
|
|
791
|
+
return f"Unknown tool {name}"
|
|
792
|
+
|
|
793
|
+
|
|
794
|
+
async def _safe_llm_call(self, messages, model, tools=None, tool_choice=None, client: Optional[AsyncOpenAI] = None):
|
|
795
|
+
try:
|
|
796
|
+
return await asyncio.wait_for(
|
|
797
|
+
self._do_llm_request(messages, model, tools, tool_choice, client=client or self.client),
|
|
798
|
+
timeout=120.0,
|
|
799
|
+
)
|
|
800
|
+
except asyncio.TimeoutError:
|
|
801
|
+
logger.error("LLM Call Timed Out")
|
|
802
|
+
return type("obj", (object,), {"content": "Error: The model took too long to respond.", "tool_calls": None})(), {"input_tokens": 0, "output_tokens": 0}
|
|
803
|
+
except Exception as e:
|
|
804
|
+
logger.error(f"LLM Call Failed: {e}")
|
|
805
|
+
return type("obj", (object,), {"content": f"Error: Model failure ({e})", "tool_calls": None})(), {"input_tokens": 0, "output_tokens": 0}
|
|
806
|
+
|
|
807
|
+
async def _do_llm_request(self, messages, model, tools, tool_choice, client: AsyncOpenAI):
|
|
808
|
+
try:
|
|
809
|
+
payload_debug = json.dumps(messages)
|
|
810
|
+
logger.info(f"LLM Request Payload Size: {len(payload_debug)} chars")
|
|
811
|
+
except Exception:
|
|
812
|
+
pass
|
|
813
|
+
|
|
814
|
+
t0 = time.time()
|
|
815
|
+
logger.info("LLM Request SENT to API...")
|
|
816
|
+
response = await client.chat.completions.create(
|
|
817
|
+
model=model,
|
|
818
|
+
messages=messages,
|
|
819
|
+
tools=tools,
|
|
820
|
+
tool_choice=tool_choice,
|
|
821
|
+
temperature=self.config.temperature,
|
|
822
|
+
)
|
|
823
|
+
logger.info(f"LLM Request RECEIVED after {time.time() - t0:.2f}s")
|
|
824
|
+
|
|
825
|
+
usage = {"input_tokens": 0, "output_tokens": 0}
|
|
826
|
+
if hasattr(response, "usage") and response.usage:
|
|
827
|
+
usage["input_tokens"] = getattr(response.usage, "prompt_tokens", 0) or 0
|
|
828
|
+
usage["output_tokens"] = getattr(response.usage, "completion_tokens", 0) or 0
|
|
829
|
+
|
|
830
|
+
return response.choices[0].message, usage
|
|
831
|
+
|
|
832
|
+
async def _run_vision_stage(self, user_input: str, images: List[str], model: str, prompt: str) -> Tuple[str, Dict[str, int]]:
|
|
833
|
+
content_payload: List[Dict[str, Any]] = [{"type": "text", "text": user_input or ""}]
|
|
834
|
+
for img_b64 in images:
|
|
835
|
+
url = f"data:image/png;base64,{img_b64}" if not img_b64.startswith("data:") else img_b64
|
|
836
|
+
content_payload.append({"type": "image_url", "image_url": {"url": url}})
|
|
837
|
+
|
|
838
|
+
client = self._client_for(
|
|
839
|
+
api_key=getattr(self.config, "vision_api_key", None),
|
|
840
|
+
base_url=getattr(self.config, "vision_base_url", None),
|
|
841
|
+
)
|
|
842
|
+
response, usage = await self._safe_llm_call(
|
|
843
|
+
messages=[{"role": "system", "content": prompt}, {"role": "user", "content": content_payload}],
|
|
844
|
+
model=model,
|
|
845
|
+
client=client,
|
|
846
|
+
)
|
|
847
|
+
return (response.content or "").strip(), usage
|
|
848
|
+
|
|
849
|
+
async def _run_instruct_stage(
|
|
850
|
+
self, user_input: str, vision_text: str, model: str
|
|
851
|
+
) -> Tuple[str, List[str], Dict[str, Any], Dict[str, int], float]:
|
|
852
|
+
"""Returns (instruct_text, search_payloads, trace_dict, usage_dict, search_time)."""
|
|
853
|
+
# Instruct has access to: web_search, image_search, set_mode, crawl_page
|
|
854
|
+
tools = [self.web_search_tool, self.image_search_tool, self.set_mode_tool, self.crawl_page_tool]
|
|
855
|
+
tools_desc = "- internal_web_search: 搜索文本\n- internal_image_search: 搜索图片\n- crawl_page: 获取网页内容\n- set_mode: 设定standard/agent模式"
|
|
856
|
+
|
|
857
|
+
prompt_tpl = getattr(self.config, "intruct_system_prompt", None) or INTRUCT_SP
|
|
858
|
+
prompt = prompt_tpl.format(user_msgs=user_input or "", tools_desc=tools_desc)
|
|
859
|
+
|
|
860
|
+
if vision_text:
|
|
861
|
+
prompt = f"{prompt}\\n\\n{INTRUCT_SP_VISION_ADD.format(vision_msgs=vision_text)}"
|
|
862
|
+
|
|
863
|
+
client = self._client_for(
|
|
864
|
+
api_key=getattr(self.config, "intruct_api_key", None),
|
|
865
|
+
base_url=getattr(self.config, "intruct_base_url", None),
|
|
866
|
+
)
|
|
867
|
+
|
|
868
|
+
history: List[Dict[str, Any]] = [
|
|
869
|
+
{"role": "system", "content": prompt},
|
|
870
|
+
{"role": "user", "content": user_input or "..."},
|
|
871
|
+
]
|
|
872
|
+
|
|
873
|
+
response, usage = await self._safe_llm_call(
|
|
874
|
+
messages=history,
|
|
875
|
+
model=model,
|
|
876
|
+
tools=tools,
|
|
877
|
+
tool_choice="auto",
|
|
878
|
+
client=client,
|
|
879
|
+
)
|
|
880
|
+
|
|
881
|
+
search_payloads: List[str] = []
|
|
882
|
+
intruct_trace: Dict[str, Any] = {
|
|
883
|
+
"model": model,
|
|
884
|
+
"base_url": getattr(self.config, "intruct_base_url", None) or self.config.base_url,
|
|
885
|
+
"prompt": prompt,
|
|
886
|
+
"user_input": user_input or "",
|
|
887
|
+
"vision_add": vision_text or "",
|
|
888
|
+
"tool_calls": [],
|
|
889
|
+
"tool_results": [],
|
|
890
|
+
"output": "",
|
|
891
|
+
}
|
|
892
|
+
|
|
893
|
+
search_time = 0.0
|
|
894
|
+
mode = "standard"
|
|
895
|
+
mode_reason = ""
|
|
896
|
+
|
|
897
|
+
if response.tool_calls:
|
|
898
|
+
plan_dict = response.model_dump() if hasattr(response, "model_dump") else response
|
|
899
|
+
history.append(plan_dict)
|
|
900
|
+
|
|
901
|
+
tasks = [self._safe_route_tool(tc) for tc in response.tool_calls]
|
|
902
|
+
|
|
903
|
+
st = time.time()
|
|
904
|
+
results = await asyncio.gather(*tasks)
|
|
905
|
+
search_time = time.time() - st
|
|
906
|
+
|
|
907
|
+
for i, result in enumerate(results):
|
|
908
|
+
tc = response.tool_calls[i]
|
|
909
|
+
history.append(
|
|
910
|
+
{"tool_call_id": tc.id, "role": "tool", "name": tc.function.name, "content": str(result)}
|
|
911
|
+
)
|
|
912
|
+
intruct_trace["tool_calls"].append(self._tool_call_to_trace(tc))
|
|
913
|
+
intruct_trace["tool_results"].append({"name": tc.function.name, "content": str(result)})
|
|
914
|
+
|
|
915
|
+
if tc.function.name in ["web_search", "internal_web_search"]:
|
|
916
|
+
search_payloads.append(str(result))
|
|
917
|
+
elif tc.function.name == "set_mode":
|
|
918
|
+
try:
|
|
919
|
+
args = json.loads(html.unescape(tc.function.arguments))
|
|
920
|
+
except Exception:
|
|
921
|
+
args = {}
|
|
922
|
+
mode = args.get("mode", mode)
|
|
923
|
+
mode_reason = args.get("reason", "")
|
|
924
|
+
|
|
925
|
+
intruct_trace["mode"] = mode
|
|
926
|
+
if mode_reason:
|
|
927
|
+
intruct_trace["mode_reason"] = mode_reason
|
|
928
|
+
|
|
929
|
+
intruct_trace["output"] = ""
|
|
930
|
+
intruct_trace["usage"] = usage
|
|
931
|
+
return "", search_payloads, intruct_trace, usage, search_time
|
|
932
|
+
|
|
933
|
+
intruct_trace["mode"] = mode
|
|
934
|
+
intruct_trace["output"] = (response.content or "").strip()
|
|
935
|
+
intruct_trace["usage"] = usage
|
|
936
|
+
return "", search_payloads, intruct_trace, usage, 0.0
|
|
937
|
+
|
|
938
|
+
def _format_search_msgs(self) -> str:
|
|
939
|
+
"""Format search snippets only (not crawled pages)."""
|
|
940
|
+
if not self.all_web_results:
|
|
941
|
+
return ""
|
|
942
|
+
|
|
943
|
+
def clip(s: str, n: int) -> str:
|
|
944
|
+
s = (s or "").strip()
|
|
945
|
+
return s if len(s) <= n else s[: n - 1] + "…"
|
|
946
|
+
|
|
947
|
+
lines = []
|
|
948
|
+
for res in self.all_web_results:
|
|
949
|
+
if res.get("is_image"): continue # Skip images
|
|
950
|
+
if res.get("is_crawled"): continue # Skip crawled pages (handled separately)
|
|
951
|
+
idx = res.get("_id")
|
|
952
|
+
title = clip(res.get("title", ""), 80)
|
|
953
|
+
url = res.get("url", "")
|
|
954
|
+
content = clip(res.get("content", ""), 200)
|
|
955
|
+
lines.append(f"[{idx}] Title: {title}\nURL: {url}\nSnippet: {content}\n")
|
|
956
|
+
|
|
957
|
+
return "\n".join(lines)
|
|
958
|
+
|
|
959
|
+
def _format_page_msgs(self) -> str:
|
|
960
|
+
"""Format crawled page content (detailed)."""
|
|
961
|
+
if not self.all_web_results:
|
|
962
|
+
return ""
|
|
963
|
+
|
|
964
|
+
def clip(s: str, n: int) -> str:
|
|
965
|
+
s = (s or "").strip()
|
|
966
|
+
return s if len(s) <= n else s[: n - 1] + "…"
|
|
967
|
+
|
|
968
|
+
lines = []
|
|
969
|
+
for res in self.all_web_results:
|
|
970
|
+
if not res.get("is_crawled"): continue # Only crawled pages
|
|
971
|
+
idx = res.get("_id")
|
|
972
|
+
title = clip(res.get("title", ""), 80)
|
|
973
|
+
url = res.get("url", "")
|
|
974
|
+
content = clip(res.get("content", ""), 1500) # More content for pages
|
|
975
|
+
lines.append(f"[{idx}] Title: {title}\nURL: {url}\nContent: {content}\n")
|
|
976
|
+
|
|
977
|
+
return "\n".join(lines)
|
|
978
|
+
|
|
979
|
+
def _format_image_search_msgs(self) -> str:
|
|
980
|
+
if not self.all_web_results:
|
|
981
|
+
return ""
|
|
982
|
+
|
|
983
|
+
lines = []
|
|
984
|
+
for res in self.all_web_results:
|
|
985
|
+
if not res.get("is_image"): continue
|
|
986
|
+
idx = res.get("_id")
|
|
987
|
+
title = res.get("title", "")
|
|
988
|
+
url = res.get("image", "") or res.get("url", "")
|
|
989
|
+
thumb = res.get("thumbnail", "")
|
|
990
|
+
lines.append(f"[{idx}] Title: {title}\nURL: {url}\nThumbnail: {thumb}\n")
|
|
991
|
+
return "\n".join(lines)
|
|
992
|
+
|
|
993
|
+
def _client_for(self, api_key: Optional[str], base_url: Optional[str]) -> AsyncOpenAI:
|
|
994
|
+
if api_key or base_url:
|
|
995
|
+
return AsyncOpenAI(base_url=base_url or self.config.base_url, api_key=api_key or self.config.api_key)
|
|
996
|
+
return self.client
|
|
997
|
+
|
|
998
|
+
def _tool_call_to_trace(self, tool_call) -> Dict[str, Any]:
|
|
999
|
+
try:
|
|
1000
|
+
args = json.loads(html.unescape(tool_call.function.arguments))
|
|
1001
|
+
except Exception:
|
|
1002
|
+
args = tool_call.function.arguments
|
|
1003
|
+
return {"id": getattr(tool_call, "id", None), "name": tool_call.function.name, "arguments": args}
|
|
1004
|
+
|
|
1005
|
+
def _render_trace_markdown(self, trace: Dict[str, Any]) -> str:
|
|
1006
|
+
def fence(label: str, content: str) -> str:
|
|
1007
|
+
safe = (content or "").replace("```", "``\\`")
|
|
1008
|
+
return f"```{label}\n{safe}\n```"
|
|
1009
|
+
|
|
1010
|
+
parts: List[str] = []
|
|
1011
|
+
parts.append("# Pipeline Trace\n")
|
|
1012
|
+
|
|
1013
|
+
if trace.get("vision"):
|
|
1014
|
+
v = trace["vision"]
|
|
1015
|
+
parts.append("## Vision\n")
|
|
1016
|
+
parts.append(f"- model: `{v.get('model')}`")
|
|
1017
|
+
parts.append(f"- base_url: `{v.get('base_url')}`")
|
|
1018
|
+
parts.append(f"- images_count: `{v.get('images_count')}`\n")
|
|
1019
|
+
parts.append("### Prompt\n")
|
|
1020
|
+
parts.append(fence("text", v.get("prompt", "")))
|
|
1021
|
+
parts.append("\n### Output\n")
|
|
1022
|
+
parts.append(fence("text", v.get("output", "")))
|
|
1023
|
+
parts.append("")
|
|
1024
|
+
|
|
1025
|
+
if trace.get("intruct"):
|
|
1026
|
+
t = trace["intruct"]
|
|
1027
|
+
parts.append("## Intruct\n")
|
|
1028
|
+
parts.append(f"- model: `{t.get('model')}`")
|
|
1029
|
+
parts.append(f"- base_url: `{t.get('base_url')}`\n")
|
|
1030
|
+
parts.append("### Prompt\n")
|
|
1031
|
+
parts.append(fence("text", t.get("prompt", "")))
|
|
1032
|
+
if t.get("tool_calls"):
|
|
1033
|
+
parts.append("\n### Tool Calls\n")
|
|
1034
|
+
parts.append(fence("json", json.dumps(t.get("tool_calls"), ensure_ascii=False, indent=2)))
|
|
1035
|
+
if t.get("tool_results"):
|
|
1036
|
+
parts.append("\n### Tool Results\n")
|
|
1037
|
+
parts.append(fence("json", json.dumps(t.get("tool_results"), ensure_ascii=False, indent=2)))
|
|
1038
|
+
parts.append("\n### Output\n")
|
|
1039
|
+
parts.append(fence("text", t.get("output", "")))
|
|
1040
|
+
parts.append("")
|
|
1041
|
+
|
|
1042
|
+
if trace.get("agent"):
|
|
1043
|
+
a = trace["agent"]
|
|
1044
|
+
parts.append("## Agent\n")
|
|
1045
|
+
parts.append(f"- model: `{a.get('model')}`")
|
|
1046
|
+
parts.append(f"- base_url: `{a.get('base_url')}`\n")
|
|
1047
|
+
parts.append("### System Prompt\n")
|
|
1048
|
+
parts.append(fence("text", a.get("system_prompt", "")))
|
|
1049
|
+
parts.append("\n### Steps\n")
|
|
1050
|
+
parts.append(fence("json", json.dumps(a.get("steps", []), ensure_ascii=False, indent=2)))
|
|
1051
|
+
parts.append("\n### Final Output\n")
|
|
1052
|
+
parts.append(fence("text", a.get("final_output", "")))
|
|
1053
|
+
|
|
1054
|
+
return "\n".join(parts).strip() + "\n"
|
|
1055
|
+
|
|
1056
|
+
async def close(self):
|
|
1057
|
+
try:
|
|
1058
|
+
await self.search_service.close()
|
|
1059
|
+
except Exception:
|
|
1060
|
+
pass
|
|
1061
|
+
try:
|
|
1062
|
+
from ..utils.search import close_shared_crawler
|
|
1063
|
+
await close_shared_crawler()
|
|
1064
|
+
except Exception:
|
|
1065
|
+
pass
|