entari-plugin-hyw 3.5.0rc7__py3-none-any.whl → 4.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of entari-plugin-hyw might be problematic. Click here for more details.
- entari_plugin_hyw/__init__.py +26 -4
- entari_plugin_hyw/misc.py +10 -3
- entari_plugin_hyw/pipeline.py +600 -700
- entari_plugin_hyw/prompts.py +10 -71
- entari_plugin_hyw/search.py +45 -8
- {entari_plugin_hyw-3.5.0rc7.dist-info → entari_plugin_hyw-4.0.0rc2.dist-info}/METADATA +1 -1
- {entari_plugin_hyw-3.5.0rc7.dist-info → entari_plugin_hyw-4.0.0rc2.dist-info}/RECORD +9 -9
- {entari_plugin_hyw-3.5.0rc7.dist-info → entari_plugin_hyw-4.0.0rc2.dist-info}/WHEEL +0 -0
- {entari_plugin_hyw-3.5.0rc7.dist-info → entari_plugin_hyw-4.0.0rc2.dist-info}/top_level.txt +0 -0
entari_plugin_hyw/pipeline.py
CHANGED
|
@@ -12,14 +12,8 @@ from openai import AsyncOpenAI
|
|
|
12
12
|
from .search import SearchService
|
|
13
13
|
from .image_cache import get_cached_images
|
|
14
14
|
from .prompts import (
|
|
15
|
-
|
|
16
|
-
AGENT_SP_INSTRUCT_VISION_ADD,
|
|
17
|
-
AGENT_SP_TOOLS_STANDARD_ADD,
|
|
18
|
-
AGENT_SP_TOOLS_AGENT_ADD,
|
|
19
|
-
AGENT_SP_SEARCH_ADD,
|
|
15
|
+
SUMMARY_SP,
|
|
20
16
|
INSTRUCT_SP,
|
|
21
|
-
INSTRUCT_SP_VISION_ADD,
|
|
22
|
-
VISION_SP,
|
|
23
17
|
)
|
|
24
18
|
|
|
25
19
|
@asynccontextmanager
|
|
@@ -42,6 +36,7 @@ class ProcessingPipeline:
|
|
|
42
36
|
self.global_id_counter = 0
|
|
43
37
|
# Background tasks for async image search (not blocking agent)
|
|
44
38
|
self._image_search_tasks: List[asyncio.Task] = []
|
|
39
|
+
self._search_error: Optional[str] = None # Track critical search errors
|
|
45
40
|
|
|
46
41
|
self.web_search_tool = {
|
|
47
42
|
"type": "function",
|
|
@@ -55,33 +50,6 @@ class ProcessingPipeline:
|
|
|
55
50
|
},
|
|
56
51
|
},
|
|
57
52
|
}
|
|
58
|
-
self.image_search_tool = {
|
|
59
|
-
"type": "function",
|
|
60
|
-
"function": {
|
|
61
|
-
"name": "internal_image_search",
|
|
62
|
-
"description": "Search for images related to a query.",
|
|
63
|
-
"parameters": {
|
|
64
|
-
"type": "object",
|
|
65
|
-
"properties": {"query": {"type": "string"}},
|
|
66
|
-
"required": ["query"],
|
|
67
|
-
},
|
|
68
|
-
},
|
|
69
|
-
}
|
|
70
|
-
self.set_mode_tool = {
|
|
71
|
-
"type": "function",
|
|
72
|
-
"function": {
|
|
73
|
-
"name": "set_mode",
|
|
74
|
-
"description": "设定后续 Agent 的运行模式: standard | agent",
|
|
75
|
-
"parameters": {
|
|
76
|
-
"type": "object",
|
|
77
|
-
"properties": {
|
|
78
|
-
"mode": {"type": "string", "enum": ["standard", "agent"]},
|
|
79
|
-
"reason": {"type": "string"},
|
|
80
|
-
},
|
|
81
|
-
"required": ["mode"],
|
|
82
|
-
},
|
|
83
|
-
},
|
|
84
|
-
}
|
|
85
53
|
self.crawl_page_tool = {
|
|
86
54
|
"type": "function",
|
|
87
55
|
"function": {
|
|
@@ -104,7 +72,7 @@ class ProcessingPipeline:
|
|
|
104
72
|
"parameters": {
|
|
105
73
|
"type": "object",
|
|
106
74
|
"properties": {
|
|
107
|
-
"reason": {"type": "string", "description": "
|
|
75
|
+
"reason": {"type": "string", "description": "拒绝回答的原因(展示给用户)"},
|
|
108
76
|
},
|
|
109
77
|
"required": [],
|
|
110
78
|
},
|
|
@@ -124,669 +92,305 @@ class ProcessingPipeline:
|
|
|
124
92
|
selected_vision_model: str = None,
|
|
125
93
|
) -> Dict[str, Any]:
|
|
126
94
|
"""
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
95
|
+
New Pipeline Flow:
|
|
96
|
+
1) Instruct: Images go directly here, decides web_search/crawl_page/refuse.
|
|
97
|
+
2) Auto-Fetch: Automatically fetch first 4 search result pages.
|
|
98
|
+
3) Screenshot: Render fetched pages as screenshots.
|
|
99
|
+
4) Summary: Receives user images + page screenshots for final answer.
|
|
130
100
|
"""
|
|
131
101
|
start_time = time.time()
|
|
132
102
|
stats = {"start_time": start_time, "tool_calls_count": 0}
|
|
133
|
-
# Token usage tracking for billing
|
|
134
103
|
usage_totals = {"input_tokens": 0, "output_tokens": 0}
|
|
135
104
|
active_model = model_name or self.config.model_name
|
|
136
105
|
|
|
137
106
|
current_history = conversation_history
|
|
138
|
-
|
|
139
|
-
structured: Dict[str, Any] = {}
|
|
140
|
-
|
|
141
|
-
# Reset search cache and ID counter for this execution
|
|
107
|
+
# Reset globals
|
|
142
108
|
self.all_web_results = []
|
|
143
109
|
self.global_id_counter = 0
|
|
144
|
-
# Reset refuse_answer flag
|
|
145
110
|
self._should_refuse = False
|
|
146
111
|
self._refuse_reason = ""
|
|
112
|
+
self._image_search_tasks = []
|
|
147
113
|
|
|
148
114
|
try:
|
|
149
115
|
logger.info(f"Pipeline: Starting workflow for '{user_input}' using {active_model}")
|
|
150
|
-
|
|
116
|
+
|
|
151
117
|
trace: Dict[str, Any] = {
|
|
152
|
-
"vision": None,
|
|
153
118
|
"instruct": None,
|
|
154
|
-
"
|
|
119
|
+
"search": None,
|
|
120
|
+
"fetch": None,
|
|
121
|
+
"summary": None,
|
|
155
122
|
}
|
|
156
123
|
|
|
157
|
-
#
|
|
158
|
-
vision_text = ""
|
|
159
|
-
vision_start = time.time()
|
|
160
|
-
vision_time = 0
|
|
161
|
-
vision_cost = 0.0
|
|
162
|
-
vision_usage = {}
|
|
163
|
-
if images:
|
|
164
|
-
vision_model = (
|
|
165
|
-
selected_vision_model
|
|
166
|
-
or vision_model_name
|
|
167
|
-
or getattr(self.config, "vision_model_name", None)
|
|
168
|
-
or active_model
|
|
169
|
-
)
|
|
170
|
-
vision_prompt = VISION_SP.format(user_msgs=user_input or "[图片]")
|
|
171
|
-
vision_text, vision_usage = await self._run_vision_stage(
|
|
172
|
-
user_input=user_input,
|
|
173
|
-
images=images,
|
|
174
|
-
model=vision_model,
|
|
175
|
-
prompt=vision_prompt,
|
|
176
|
-
)
|
|
177
|
-
# Add vision usage with vision-specific pricing
|
|
178
|
-
usage_totals["input_tokens"] += vision_usage.get("input_tokens", 0)
|
|
179
|
-
usage_totals["output_tokens"] += vision_usage.get("output_tokens", 0)
|
|
180
|
-
|
|
181
|
-
# Calculate Vision Cost
|
|
182
|
-
v_in_price = float(getattr(self.config, "vision_input_price", None) or getattr(self.config, "input_price", 0.0) or 0.0)
|
|
183
|
-
v_out_price = float(getattr(self.config, "vision_output_price", None) or getattr(self.config, "output_price", 0.0) or 0.0)
|
|
184
|
-
if v_in_price > 0 or v_out_price > 0:
|
|
185
|
-
vision_cost = (vision_usage.get("input_tokens", 0) / 1_000_000 * v_in_price) + (vision_usage.get("output_tokens", 0) / 1_000_000 * v_out_price)
|
|
186
|
-
|
|
187
|
-
vision_time = time.time() - vision_start
|
|
188
|
-
|
|
189
|
-
trace["vision"] = {
|
|
190
|
-
"model": vision_model,
|
|
191
|
-
"base_url": getattr(self.config, "vision_base_url", None) or self.config.base_url,
|
|
192
|
-
"prompt": vision_prompt,
|
|
193
|
-
"user_input": user_input or "",
|
|
194
|
-
"images_count": len(images or []),
|
|
195
|
-
"output": vision_text,
|
|
196
|
-
"usage": vision_usage,
|
|
197
|
-
"time": vision_time,
|
|
198
|
-
"cost": vision_cost
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
# Instruct + pre-search
|
|
124
|
+
# --- 1. Instruct Stage (with images if provided) ---
|
|
202
125
|
instruct_start = time.time()
|
|
203
126
|
instruct_model = getattr(self.config, "instruct_model_name", None) or active_model
|
|
204
|
-
logger.info(f"Instruct Stage Config: instruct_model_name={getattr(self.config, 'instruct_model_name', None)}, active_model={active_model}, using: {instruct_model}")
|
|
205
127
|
instruct_text, search_payloads, instruct_trace, instruct_usage, search_time = await self._run_instruct_stage(
|
|
206
128
|
user_input=user_input,
|
|
207
|
-
|
|
129
|
+
images=images, # Pass images directly to instruct
|
|
208
130
|
model=instruct_model,
|
|
209
131
|
)
|
|
210
|
-
# Instruct time excludes search time (search_time is returned separately)
|
|
211
|
-
instruct_time = time.time() - instruct_start - search_time
|
|
212
132
|
|
|
213
|
-
#
|
|
133
|
+
# Check refuse
|
|
134
|
+
if self._should_refuse:
|
|
135
|
+
return {
|
|
136
|
+
"llm_response": "",
|
|
137
|
+
"structured_response": {},
|
|
138
|
+
"stats": stats,
|
|
139
|
+
"model_used": active_model,
|
|
140
|
+
"conversation_history": current_history,
|
|
141
|
+
"refuse_answer": True,
|
|
142
|
+
"refuse_reason": self._refuse_reason
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
# Check for critical search errors
|
|
146
|
+
if self._search_error:
|
|
147
|
+
return {
|
|
148
|
+
"llm_response": "",
|
|
149
|
+
"structured_response": {},
|
|
150
|
+
"stats": stats,
|
|
151
|
+
"model_used": active_model,
|
|
152
|
+
"conversation_history": current_history,
|
|
153
|
+
"refuse_answer": True,
|
|
154
|
+
"refuse_reason": f"搜索服务异常: {self._search_error} 请联系管理员。"
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
usage_totals["input_tokens"] += instruct_usage.get("input_tokens", 0)
|
|
158
|
+
usage_totals["output_tokens"] += instruct_usage.get("output_tokens", 0)
|
|
159
|
+
|
|
214
160
|
instruct_cost = 0.0
|
|
215
161
|
i_in_price = float(getattr(self.config, "instruct_input_price", None) or getattr(self.config, "input_price", 0.0) or 0.0)
|
|
216
162
|
i_out_price = float(getattr(self.config, "instruct_output_price", None) or getattr(self.config, "output_price", 0.0) or 0.0)
|
|
217
163
|
if i_in_price > 0 or i_out_price > 0:
|
|
218
164
|
instruct_cost = (instruct_usage.get("input_tokens", 0) / 1_000_000 * i_in_price) + (instruct_usage.get("output_tokens", 0) / 1_000_000 * i_out_price)
|
|
219
165
|
|
|
220
|
-
# Add instruct usage
|
|
221
|
-
usage_totals["input_tokens"] += instruct_usage.get("input_tokens", 0)
|
|
222
|
-
usage_totals["output_tokens"] += instruct_usage.get("output_tokens", 0)
|
|
223
|
-
|
|
224
|
-
instruct_trace["time"] = instruct_time
|
|
225
166
|
instruct_trace["cost"] = instruct_cost
|
|
226
167
|
trace["instruct"] = instruct_trace
|
|
227
168
|
|
|
228
|
-
#
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
return {
|
|
233
|
-
"llm_response": "",
|
|
234
|
-
"structured_response": {},
|
|
235
|
-
"stats": stats,
|
|
236
|
-
"model_used": active_model,
|
|
237
|
-
"conversation_history": current_history,
|
|
238
|
-
"refuse_answer": True,
|
|
239
|
-
"refuse_reason": self._refuse_reason,
|
|
240
|
-
"stages_used": [],
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
# Start agent loop
|
|
244
|
-
agent_start_time = time.time()
|
|
245
|
-
current_history.append({"role": "user", "content": user_input or "..."})
|
|
246
|
-
|
|
247
|
-
mode = instruct_trace.get("mode", self.current_mode).lower()
|
|
248
|
-
logger.success(f"Instruct Mode: {mode}")
|
|
249
|
-
self.current_mode = mode
|
|
169
|
+
# --- 2. Auto-Fetch Stage (Automatically fetch first 4 search results) ---
|
|
170
|
+
fetch_start = time.time()
|
|
171
|
+
fetch_trace = {}
|
|
172
|
+
page_screenshots: List[str] = [] # Base64 screenshots of fetched pages
|
|
250
173
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
# Agent loop
|
|
263
|
-
while step < max_steps:
|
|
264
|
-
step += 1
|
|
265
|
-
logger.info(f"Pipeline: Agent step {step}/{max_steps}")
|
|
266
|
-
|
|
267
|
-
if step == 5 and mode == "agent":
|
|
268
|
-
current_history.append(
|
|
269
|
-
{
|
|
270
|
-
"role": "system",
|
|
271
|
-
"content": "System: [Next Step Final] Please start consolidating the answer; the next step must be the final response.",
|
|
272
|
-
}
|
|
273
|
-
)
|
|
274
|
-
|
|
275
|
-
tools_desc = ""
|
|
276
|
-
if agent_tools:
|
|
277
|
-
tools_desc = "\n".join([
|
|
278
|
-
"- internal_web_search(query): 触发搜索并缓存结果",
|
|
279
|
-
"- crawl_page(url): 使用 Crawl4AI 抓取网页返回 Markdown"
|
|
280
|
-
])
|
|
281
|
-
|
|
282
|
-
user_msgs_text = user_input or ""
|
|
283
|
-
|
|
284
|
-
search_msgs_text = self._format_search_msgs()
|
|
285
|
-
# Image search results are NOT passed to LLM - they're for UI rendering only
|
|
174
|
+
fetch_urls = []
|
|
175
|
+
search_items = [r for r in self.all_web_results if r.get("_type") == "search"]
|
|
176
|
+
if search_items:
|
|
177
|
+
# Group search results by query
|
|
178
|
+
query_groups = {}
|
|
179
|
+
for r in search_items:
|
|
180
|
+
q = r.get("query", "default")
|
|
181
|
+
if q not in query_groups:
|
|
182
|
+
query_groups[q] = []
|
|
183
|
+
query_groups[q].append(r)
|
|
286
184
|
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
185
|
+
raw_fetch_urls = []
|
|
186
|
+
# If multiple queries, take top 3 from each
|
|
187
|
+
if len(query_groups) > 1:
|
|
188
|
+
logger.info(f"Pipeline: Multiple search queries detected ({len(query_groups)}). Taking top 3 from each.")
|
|
189
|
+
for q, items in query_groups.items():
|
|
190
|
+
for item in items[:3]:
|
|
191
|
+
if item.get("url"):
|
|
192
|
+
raw_fetch_urls.append(item.get("url"))
|
|
193
|
+
else:
|
|
194
|
+
# Single query, take top 8
|
|
195
|
+
raw_fetch_urls = [r.get("url") for r in search_items[:8] if r.get("url")]
|
|
298
196
|
|
|
299
|
-
#
|
|
300
|
-
|
|
301
|
-
|
|
197
|
+
# Deduplicate while preserving order and filter blocked domains
|
|
198
|
+
final_fetch_urls = []
|
|
199
|
+
blocked_domains = getattr(self.config, "fetch_blocked_domains", ["wikipedia.org", "csdn.net", "sohu.com", "sogou.com"])
|
|
200
|
+
if isinstance(blocked_domains, str):
|
|
201
|
+
blocked_domains = [d.strip() for d in blocked_domains.split(",")]
|
|
302
202
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
all_search_parts.append(page_msgs_text)
|
|
310
|
-
# Images are excluded from LLM prompt - they're for UI rendering only
|
|
203
|
+
for url in raw_fetch_urls:
|
|
204
|
+
if url and url not in final_fetch_urls:
|
|
205
|
+
# Check blocklist
|
|
206
|
+
if any(domain in url.lower() for domain in blocked_domains):
|
|
207
|
+
continue
|
|
208
|
+
final_fetch_urls.append(url)
|
|
311
209
|
|
|
312
|
-
|
|
313
|
-
system_prompt += AGENT_SP_SEARCH_ADD.format(search_msgs="\n".join(all_search_parts))
|
|
314
|
-
|
|
315
|
-
last_system_prompt = system_prompt
|
|
210
|
+
fetch_urls = final_fetch_urls
|
|
316
211
|
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
)
|
|
336
|
-
step_llm_time = time.time() - step_llm_start
|
|
337
|
-
|
|
338
|
-
# Debug: Check response
|
|
339
|
-
has_tool_calls = response.tool_calls is not None and len(response.tool_calls) > 0
|
|
340
|
-
logger.info(f"[Agent] Response has_tool_calls={has_tool_calls}, has_content={bool(response.content)}")
|
|
212
|
+
# Check if search was performed but no URLs were available for fetching
|
|
213
|
+
has_search_call = False
|
|
214
|
+
if instruct_trace and "tool_calls" in instruct_trace:
|
|
215
|
+
has_search_call = any(tc.get("name") in ["web_search", "internal_web_search"] for tc in instruct_trace["tool_calls"])
|
|
216
|
+
|
|
217
|
+
if has_search_call and not fetch_urls:
|
|
218
|
+
return {
|
|
219
|
+
"llm_response": "",
|
|
220
|
+
"structured_response": {},
|
|
221
|
+
"stats": stats,
|
|
222
|
+
"model_used": active_model,
|
|
223
|
+
"conversation_history": current_history,
|
|
224
|
+
"refuse_answer": True,
|
|
225
|
+
"refuse_reason": "搜索结果为空或全部被过滤,无法生成回答。"
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
if fetch_urls:
|
|
229
|
+
logger.info(f"Pipeline: Auto-fetching up to {len(fetch_urls)} pages (keeping fastest 5): {fetch_urls}")
|
|
341
230
|
|
|
342
|
-
#
|
|
343
|
-
|
|
344
|
-
usage_totals["output_tokens"] += step_usage.get("output_tokens", 0)
|
|
345
|
-
|
|
346
|
-
if response.tool_calls and tools_for_step:
|
|
347
|
-
tool_calls = response.tool_calls
|
|
348
|
-
stats["tool_calls_count"] += len(tool_calls)
|
|
349
|
-
|
|
350
|
-
# Use model_dump to preserve provider-specific fields (e.g., Gemini's thought_signature)
|
|
351
|
-
assistant_msg = response.model_dump(exclude_unset=True) if hasattr(response, "model_dump") else {
|
|
352
|
-
"role": "assistant",
|
|
353
|
-
"content": response.content,
|
|
354
|
-
"tool_calls": [{"id": tc.id, "type": "function", "function": {"name": tc.function.name, "arguments": tc.function.arguments}} for tc in tool_calls]
|
|
355
|
-
}
|
|
356
|
-
current_history.append(assistant_msg)
|
|
357
|
-
|
|
358
|
-
tasks = [self._safe_route_tool(tc) for tc in tool_calls]
|
|
359
|
-
tool_start_time = time.time()
|
|
360
|
-
results = await asyncio.gather(*tasks)
|
|
361
|
-
tool_exec_time = time.time() - tool_start_time
|
|
362
|
-
|
|
363
|
-
step_trace = {
|
|
364
|
-
"step": step,
|
|
365
|
-
"tool_calls": [self._tool_call_to_trace(tc) for tc in tool_calls],
|
|
366
|
-
"tool_results": [],
|
|
367
|
-
"tool_time": tool_exec_time,
|
|
368
|
-
"llm_time": step_llm_time,
|
|
369
|
-
"usage": step_usage,
|
|
370
|
-
}
|
|
371
|
-
for i, result in enumerate(results):
|
|
372
|
-
tc = tool_calls[i]
|
|
373
|
-
step_trace["tool_results"].append({"name": tc.function.name, "content": str(result)})
|
|
374
|
-
current_history.append(
|
|
375
|
-
{
|
|
376
|
-
"tool_call_id": tc.id,
|
|
377
|
-
"role": "tool",
|
|
378
|
-
"name": tc.function.name,
|
|
379
|
-
"content": str(result),
|
|
380
|
-
}
|
|
381
|
-
)
|
|
382
|
-
agent_trace_steps.append(step_trace)
|
|
383
|
-
continue
|
|
384
|
-
|
|
385
|
-
final_response_content = response.content or ""
|
|
386
|
-
current_history.append({"role": "assistant", "content": final_response_content})
|
|
387
|
-
agent_trace_steps.append({
|
|
388
|
-
"step": step,
|
|
389
|
-
"final": True,
|
|
390
|
-
"output": final_response_content,
|
|
391
|
-
"llm_time": step_llm_time,
|
|
392
|
-
"usage": step_usage
|
|
393
|
-
})
|
|
394
|
-
break
|
|
231
|
+
# Execute fetch and get screenshots
|
|
232
|
+
await self._run_auto_fetch_with_screenshots(fetch_urls)
|
|
395
233
|
|
|
396
|
-
|
|
397
|
-
|
|
234
|
+
fetch_trace = {
|
|
235
|
+
"model": "Auto",
|
|
236
|
+
"urls_fetched": fetch_urls,
|
|
237
|
+
"time": time.time() - fetch_start,
|
|
238
|
+
"cost": 0.0,
|
|
239
|
+
}
|
|
240
|
+
trace["fetch"] = fetch_trace
|
|
398
241
|
|
|
399
|
-
|
|
400
|
-
|
|
242
|
+
# Always collect screenshots from ALL page results (search auto-fetch + direct URL crawl)
|
|
243
|
+
fetch_items = [r for r in self.all_web_results if r.get("_type") == "page"]
|
|
244
|
+
for r in fetch_items:
|
|
245
|
+
if r.get("screenshot_b64"):
|
|
246
|
+
page_screenshots.append(r["screenshot_b64"])
|
|
247
|
+
|
|
248
|
+
if fetch_trace:
|
|
249
|
+
fetch_trace["screenshots_count"] = len(page_screenshots)
|
|
401
250
|
|
|
402
|
-
|
|
251
|
+
# --- 3. Summary Stage (with user images + page screenshots only) ---
|
|
252
|
+
summary_start = time.time()
|
|
253
|
+
summary_model = active_model
|
|
254
|
+
|
|
255
|
+
# Combine user images and page screenshots for summary
|
|
256
|
+
all_summary_images: List[str] = []
|
|
257
|
+
if images:
|
|
258
|
+
all_summary_images.extend(images)
|
|
259
|
+
all_summary_images.extend(page_screenshots)
|
|
403
260
|
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
261
|
+
summary_content, summary_usage, summary_trace_info = await self._run_summary_stage(
|
|
262
|
+
user_input=user_input,
|
|
263
|
+
images=all_summary_images if all_summary_images else None,
|
|
264
|
+
has_page_screenshots=bool(page_screenshots),
|
|
265
|
+
model=summary_model
|
|
266
|
+
)
|
|
408
267
|
|
|
409
|
-
|
|
410
|
-
|
|
268
|
+
usage_totals["input_tokens"] += summary_usage.get("input_tokens", 0)
|
|
269
|
+
usage_totals["output_tokens"] += summary_usage.get("output_tokens", 0)
|
|
411
270
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
"
|
|
420
|
-
"
|
|
421
|
-
"
|
|
422
|
-
"
|
|
271
|
+
summary_cost = 0.0
|
|
272
|
+
s_in_price = float(getattr(self.config, "input_price", 0.0) or 0.0)
|
|
273
|
+
s_out_price = float(getattr(self.config, "output_price", 0.0) or 0.0)
|
|
274
|
+
if s_in_price > 0 or s_out_price > 0:
|
|
275
|
+
summary_cost = (summary_usage.get("input_tokens", 0) / 1_000_000 * s_in_price) + (summary_usage.get("output_tokens", 0) / 1_000_000 * s_out_price)
|
|
276
|
+
|
|
277
|
+
trace["summary"] = {
|
|
278
|
+
"model": summary_model,
|
|
279
|
+
"system_prompt": summary_trace_info.get("prompt", ""),
|
|
280
|
+
"output": summary_content,
|
|
281
|
+
"usage": summary_usage,
|
|
282
|
+
"time": time.time() - summary_start,
|
|
283
|
+
"cost": summary_cost,
|
|
284
|
+
"images_count": len(all_summary_images)
|
|
423
285
|
}
|
|
424
|
-
trace_markdown = self._render_trace_markdown(trace)
|
|
425
286
|
|
|
287
|
+
# --- Result Assembly ---
|
|
426
288
|
stats["total_time"] = time.time() - start_time
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
total_cost_sum = vision_cost + instruct_cost
|
|
431
|
-
for s in agent_trace_steps:
|
|
432
|
-
s_usage = s.get("usage", {})
|
|
433
|
-
if s_usage:
|
|
434
|
-
s_in_price = float(getattr(self.config, "input_price", 0.0) or 0.0)
|
|
435
|
-
s_out_price = float(getattr(self.config, "output_price", 0.0) or 0.0)
|
|
436
|
-
total_cost_sum += (s_usage.get("input_tokens", 0) / 1_000_000 * s_in_price) + (s_usage.get("output_tokens", 0) / 1_000_000 * s_out_price)
|
|
437
|
-
|
|
289
|
+
structured = self._parse_tagged_response(summary_content)
|
|
290
|
+
final_content = structured.get("response") or summary_content
|
|
291
|
+
|
|
438
292
|
billing_info = {
|
|
439
293
|
"input_tokens": usage_totals["input_tokens"],
|
|
440
294
|
"output_tokens": usage_totals["output_tokens"],
|
|
441
|
-
"total_cost":
|
|
295
|
+
"total_cost": instruct_cost + summary_cost
|
|
442
296
|
}
|
|
443
|
-
|
|
444
|
-
# Build stages_used list for UI display
|
|
445
|
-
stages_used = []
|
|
446
297
|
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
url_lower = (base_url or "").lower()
|
|
450
|
-
if "deepseek" in model_lower or "deepseek" in url_lower: return "deepseek"
|
|
451
|
-
elif "claude" in model_lower or "anthropic" in url_lower: return "anthropic"
|
|
452
|
-
elif "gemini" in model_lower or "google" in url_lower: return "google"
|
|
453
|
-
elif "gpt" in model_lower or "openai" in url_lower: return "openai"
|
|
454
|
-
elif "qwen" in model_lower: return "qwen"
|
|
455
|
-
elif "openrouter" in url_lower: return "openrouter"
|
|
456
|
-
return "openai"
|
|
457
|
-
|
|
458
|
-
def infer_provider(base_url: str) -> str:
|
|
459
|
-
url_lower = (base_url or "").lower()
|
|
460
|
-
if "openrouter" in url_lower: return "OpenRouter"
|
|
461
|
-
elif "openai" in url_lower: return "OpenAI"
|
|
462
|
-
elif "anthropic" in url_lower: return "Anthropic"
|
|
463
|
-
elif "google" in url_lower: return "Google"
|
|
464
|
-
elif "deepseek" in url_lower: return "DeepSeek"
|
|
465
|
-
return ""
|
|
298
|
+
# Build stages_used
|
|
299
|
+
stages_used = []
|
|
466
300
|
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
"
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
"
|
|
301
|
+
# Get page info
|
|
302
|
+
fetch_items = [r for r in self.all_web_results if r.get("_type") == "page"]
|
|
303
|
+
crawled_pages_ui = []
|
|
304
|
+
for r in fetch_items:
|
|
305
|
+
domain = ""
|
|
306
|
+
try:
|
|
307
|
+
from urllib.parse import urlparse
|
|
308
|
+
domain = urlparse(r.get("url", "")).netloc
|
|
309
|
+
except: pass
|
|
310
|
+
crawled_pages_ui.append({
|
|
311
|
+
"title": r.get("title", ""),
|
|
312
|
+
"url": r.get("url", ""),
|
|
313
|
+
"favicon_url": f"https://www.google.com/s2/favicons?domain={domain}&sz=32"
|
|
478
314
|
})
|
|
479
315
|
|
|
316
|
+
# Extract images from pages
|
|
317
|
+
extracted_images = []
|
|
318
|
+
seen_imgs = set()
|
|
319
|
+
junk_keywords = ["icon", "logo", "badge", "avatar", "button", "social", "footer", "header", "banner", "license", "by-nc", "hosted_by", "pixel", "tracker", "ad", "ads", "advert", "promotion", "shop", "store", "group", "join", "qr", "qrcode", "weibo", "weixin", "douyin", "xiaohongshu", "bilibili", "official", "follow", "subscribe", "app"]
|
|
320
|
+
|
|
321
|
+
for r in fetch_items:
|
|
322
|
+
if "images" in r:
|
|
323
|
+
for img_url in r["images"]:
|
|
324
|
+
if img_url not in seen_imgs:
|
|
325
|
+
# Filter junk images
|
|
326
|
+
lower_url = img_url.lower()
|
|
327
|
+
if any(k in lower_url for k in junk_keywords):
|
|
328
|
+
continue
|
|
329
|
+
|
|
330
|
+
extracted_images.append({
|
|
331
|
+
"title": r.get("title", "Image"),
|
|
332
|
+
"url": img_url,
|
|
333
|
+
"thumbnail": img_url,
|
|
334
|
+
"domain": r.get("domain", "")
|
|
335
|
+
})
|
|
336
|
+
seen_imgs.add(img_url)
|
|
337
|
+
|
|
338
|
+
# Instruct Stage (with crawled pages and images)
|
|
480
339
|
if trace.get("instruct"):
|
|
481
340
|
i = trace["instruct"]
|
|
482
|
-
|
|
483
|
-
|
|
341
|
+
# Total time = instruct + search + fetch (until summary starts)
|
|
342
|
+
instruct_total_time = (i.get("time", 0) or 0) + search_time
|
|
343
|
+
if trace.get("fetch"):
|
|
344
|
+
instruct_total_time += trace["fetch"].get("time", 0)
|
|
345
|
+
|
|
484
346
|
stages_used.append({
|
|
485
347
|
"name": "Instruct",
|
|
486
|
-
"model":
|
|
487
|
-
"icon_config":
|
|
488
|
-
"provider":
|
|
489
|
-
"time":
|
|
490
|
-
"cost": i.get("cost", 0
|
|
348
|
+
"model": i.get("model"),
|
|
349
|
+
"icon_config": "openai",
|
|
350
|
+
"provider": "Instruct",
|
|
351
|
+
"time": instruct_total_time,
|
|
352
|
+
"cost": i.get("cost", 0),
|
|
353
|
+
"has_images": bool(images),
|
|
354
|
+
"crawled_pages": crawled_pages_ui, # Add crawled pages here
|
|
355
|
+
"image_references": extracted_images[:9] # Add images here
|
|
491
356
|
})
|
|
492
|
-
|
|
493
|
-
#
|
|
494
|
-
if (
|
|
495
|
-
|
|
496
|
-
initial_refs = [
|
|
497
|
-
{"title": r.get("title", ""), "url": r.get("url", ""), "domain": r.get("domain", "")}
|
|
498
|
-
for r in self.all_web_results if r.get("_type") == "search"
|
|
499
|
-
]
|
|
500
|
-
initial_images = [
|
|
501
|
-
{"title": r.get("title", ""), "url": r.get("url", ""), "thumbnail": r.get("thumbnail", "")}
|
|
502
|
-
for r in self.all_web_results if r.get("_type") == "image"
|
|
503
|
-
]
|
|
504
|
-
|
|
357
|
+
|
|
358
|
+
# Summary Stage
|
|
359
|
+
if trace.get("summary"):
|
|
360
|
+
s = trace["summary"]
|
|
505
361
|
stages_used.append({
|
|
506
|
-
"name": "
|
|
507
|
-
"model":
|
|
508
|
-
"icon_config": "
|
|
509
|
-
"provider":
|
|
510
|
-
"time":
|
|
511
|
-
"cost":
|
|
512
|
-
"
|
|
513
|
-
"image_references": initial_images
|
|
362
|
+
"name": "Summary",
|
|
363
|
+
"model": s.get("model"),
|
|
364
|
+
"icon_config": "openai",
|
|
365
|
+
"provider": "Summary",
|
|
366
|
+
"time": s.get("time", 0),
|
|
367
|
+
"cost": s.get("cost", 0),
|
|
368
|
+
"images_count": s.get("images_count", 0)
|
|
514
369
|
})
|
|
515
|
-
|
|
516
|
-
# Add Crawler stage if Instruct used crawl_page
|
|
517
|
-
if trace.get("instruct"):
|
|
518
|
-
instruct_tool_calls = trace["instruct"].get("tool_calls", [])
|
|
519
|
-
crawl_calls = [tc for tc in instruct_tool_calls if tc.get("name") == "crawl_page"]
|
|
520
|
-
if crawl_calls:
|
|
521
|
-
# Build crawled_pages list for UI
|
|
522
|
-
crawled_pages = []
|
|
523
|
-
for tc in crawl_calls:
|
|
524
|
-
url = tc.get("arguments", {}).get("url", "")
|
|
525
|
-
# Try to find cached result
|
|
526
|
-
found = next((r for r in self.all_web_results if r.get("url") == url and r.get("_type") == "page"), None)
|
|
527
|
-
if found:
|
|
528
|
-
try:
|
|
529
|
-
from urllib.parse import urlparse
|
|
530
|
-
domain = urlparse(url).netloc
|
|
531
|
-
except:
|
|
532
|
-
domain = ""
|
|
533
|
-
crawled_pages.append({
|
|
534
|
-
"title": found.get("title", "Page"),
|
|
535
|
-
"url": url,
|
|
536
|
-
"favicon_url": f"https://www.google.com/s2/favicons?domain={domain}&sz=32"
|
|
537
|
-
})
|
|
538
|
-
|
|
539
|
-
stages_used.append({
|
|
540
|
-
"name": "Crawler",
|
|
541
|
-
"model": "Crawl4AI",
|
|
542
|
-
"icon_config": "search",
|
|
543
|
-
"provider": "网页抓取",
|
|
544
|
-
"time": search_time, # Use existing search_time which includes fetch time
|
|
545
|
-
"cost": 0.0,
|
|
546
|
-
"crawled_pages": crawled_pages
|
|
547
|
-
})
|
|
548
|
-
|
|
549
|
-
# --- Granular Agent Stages (Grouped) ---
|
|
550
|
-
if trace.get("agent"):
|
|
551
|
-
a = trace["agent"]
|
|
552
|
-
a_model = a.get("model", "") or active_model
|
|
553
|
-
a_base_url = a.get("base_url", "") or self.config.base_url
|
|
554
|
-
steps = a.get("steps", [])
|
|
555
|
-
agent_icon = infer_icon(a_model, a_base_url)
|
|
556
|
-
agent_provider = infer_provider(a_base_url)
|
|
557
|
-
|
|
558
|
-
for s in steps:
|
|
559
|
-
if "tool_calls" in s:
|
|
560
|
-
# 1. Agent Thought Stage (with LLM time)
|
|
561
|
-
# Calculate step cost
|
|
562
|
-
step_usage = s.get("usage", {})
|
|
563
|
-
step_cost = 0.0
|
|
564
|
-
if a_in_price > 0 or a_out_price > 0:
|
|
565
|
-
step_cost = (step_usage.get("input_tokens", 0) / 1_000_000 * a_in_price) + (step_usage.get("output_tokens", 0) / 1_000_000 * a_out_price)
|
|
566
|
-
|
|
567
|
-
stages_used.append({
|
|
568
|
-
"name": "Agent",
|
|
569
|
-
"model": a_model,
|
|
570
|
-
"icon_config": agent_icon,
|
|
571
|
-
"provider": agent_provider,
|
|
572
|
-
"time": s.get("llm_time", 0), "cost": step_cost
|
|
573
|
-
})
|
|
574
|
-
|
|
575
|
-
# 2. Grouped Tool Stages
|
|
576
|
-
# Collect results for grouping
|
|
577
|
-
search_group_items = []
|
|
578
|
-
crawler_group_items = []
|
|
579
|
-
|
|
580
|
-
tcs = s.get("tool_calls", [])
|
|
581
|
-
trs = s.get("tool_results", [])
|
|
582
|
-
|
|
583
|
-
for idx, tc in enumerate(tcs):
|
|
584
|
-
t_name = tc.get("name")
|
|
585
|
-
# Try to get result content if available
|
|
586
|
-
t_res_content = trs[idx].get("content", "") if idx < len(trs) else ""
|
|
587
|
-
|
|
588
|
-
if t_name in ["internal_web_search", "web_search", "internal_image_search"]:
|
|
589
|
-
# We don't have per-call metadata easily unless we parse the 'result' string (which is JSON dump now for route_tool)
|
|
590
|
-
# But search results are cached in self.all_web_results.
|
|
591
|
-
# The 'content' of search tool result is basically "cached_for_prompt".
|
|
592
|
-
# So we don't need to put items here, just show "Search" container.
|
|
593
|
-
# But wait, if we want to show "what was searched", we can parse args.
|
|
594
|
-
args = tc.get("arguments", {})
|
|
595
|
-
query = args.get("query", "")
|
|
596
|
-
if query:
|
|
597
|
-
search_group_items.append({"query": query})
|
|
598
|
-
|
|
599
|
-
elif t_name == "crawl_page":
|
|
600
|
-
# Get URL from arguments, title from result
|
|
601
|
-
args = tc.get("arguments", {})
|
|
602
|
-
url = args.get("url", "")
|
|
603
|
-
title = "Page"
|
|
604
|
-
try:
|
|
605
|
-
page_data = json.loads(t_res_content)
|
|
606
|
-
if isinstance(page_data, dict):
|
|
607
|
-
title = page_data.get("title", "Page")
|
|
608
|
-
except:
|
|
609
|
-
pass
|
|
610
|
-
|
|
611
|
-
if url:
|
|
612
|
-
try:
|
|
613
|
-
domain = urlparse(url).netloc
|
|
614
|
-
except:
|
|
615
|
-
domain = ""
|
|
616
|
-
crawler_group_items.append({
|
|
617
|
-
"title": title,
|
|
618
|
-
"url": url,
|
|
619
|
-
"favicon_url": f"https://www.google.com/s2/favicons?domain={domain}&sz=32"
|
|
620
|
-
})
|
|
621
|
-
|
|
622
|
-
# Append Grouped Stages
|
|
623
|
-
if search_group_items:
|
|
624
|
-
stages_used.append({
|
|
625
|
-
"name": "Search",
|
|
626
|
-
"model": getattr(self.config, "search_name", "DuckDuckGo"),
|
|
627
|
-
"icon_config": "search",
|
|
628
|
-
"provider": "Agent Search",
|
|
629
|
-
"time": s.get("tool_time", 0), "cost": 0,
|
|
630
|
-
"queries": search_group_items # Render can use this if needed, or just show generic
|
|
631
|
-
})
|
|
632
|
-
|
|
633
|
-
if crawler_group_items:
|
|
634
|
-
stages_used.append({
|
|
635
|
-
"name": "Crawler",
|
|
636
|
-
"model": "Crawl4AI",
|
|
637
|
-
"icon_config": "browser",
|
|
638
|
-
"provider": "Page Fetcher",
|
|
639
|
-
"time": s.get("tool_time", 0), "cost": 0,
|
|
640
|
-
"crawled_pages": crawler_group_items
|
|
641
|
-
})
|
|
642
370
|
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
step_usage = s.get("usage", {})
|
|
646
|
-
step_cost = 0.0
|
|
647
|
-
if a_in_price > 0 or a_out_price > 0:
|
|
648
|
-
step_cost = (step_usage.get("input_tokens", 0) / 1_000_000 * a_in_price) + (step_usage.get("output_tokens", 0) / 1_000_000 * a_out_price)
|
|
649
|
-
|
|
650
|
-
stages_used.append({
|
|
651
|
-
"name": "Agent",
|
|
652
|
-
"model": a_model,
|
|
653
|
-
"icon_config": agent_icon,
|
|
654
|
-
"provider": agent_provider,
|
|
655
|
-
"time": s.get("llm_time", 0),
|
|
656
|
-
"cost": step_cost
|
|
657
|
-
})
|
|
658
|
-
|
|
659
|
-
# Assign total time/cost to last Agent stage
|
|
660
|
-
# Sum up total time/cost for UI/stats (implicit via loop above)
|
|
661
|
-
# No need to assign everything to last agent anymore as we distribute it.
|
|
662
|
-
|
|
663
|
-
# --- Final Filter: Only show cited items in workflow cards ---
|
|
664
|
-
cited_urls = {ref['url'] for ref in (structured.get("references", []) +
|
|
665
|
-
structured.get("page_references", []) +
|
|
666
|
-
structured.get("image_references", []))}
|
|
667
|
-
|
|
668
|
-
# Find images already rendered in markdown content (to avoid duplicate display)
|
|
669
|
-
markdown_image_urls = set()
|
|
670
|
-
md_img_pattern = re.compile(r'!\[.*?\]\((https?://[^)]+)\)')
|
|
671
|
-
for match in md_img_pattern.finditer(final_content):
|
|
672
|
-
markdown_image_urls.add(match.group(1))
|
|
673
|
-
|
|
674
|
-
for s in stages_used:
|
|
675
|
-
if "references" in s and s["references"]:
|
|
676
|
-
s["references"] = [r for r in s["references"] if r.get("url") in cited_urls]
|
|
677
|
-
# Filter out images already shown in markdown content
|
|
678
|
-
# Check both url AND thumbnail since either might be used in markdown
|
|
679
|
-
if "image_references" in s and s["image_references"]:
|
|
680
|
-
s["image_references"] = [
|
|
681
|
-
r for r in s["image_references"]
|
|
682
|
-
if r.get("url") not in markdown_image_urls and (r.get("thumbnail") or "") not in markdown_image_urls
|
|
683
|
-
]
|
|
684
|
-
if "crawled_pages" in s and s["crawled_pages"]:
|
|
685
|
-
s["crawled_pages"] = [r for r in s["crawled_pages"] if r.get("url") in cited_urls]
|
|
686
|
-
|
|
687
|
-
# Clean up conversation history: Remove tool calls and results to save tokens and avoid ID conflicts
|
|
688
|
-
# Keep only 'user' messages and 'assistant' messages without tool_calls (final answers)
|
|
689
|
-
cleaned_history = []
|
|
690
|
-
for msg in current_history:
|
|
691
|
-
if msg.get("role") == "tool":
|
|
692
|
-
continue
|
|
693
|
-
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
|
694
|
-
continue
|
|
695
|
-
cleaned_history.append(msg)
|
|
696
|
-
|
|
697
|
-
# Update the reference (since it might be used by caller)
|
|
698
|
-
current_history[:] = cleaned_history
|
|
699
|
-
|
|
700
|
-
# --- Apply cached images to reduce render time ---
|
|
701
|
-
# Collect all image URLs that need caching (avoid duplicates when thumbnail == url)
|
|
702
|
-
all_image_urls = set()
|
|
703
|
-
for img_ref in structured.get("image_references", []):
|
|
704
|
-
if img_ref.get("thumbnail"):
|
|
705
|
-
all_image_urls.add(img_ref["thumbnail"])
|
|
706
|
-
if img_ref.get("url"):
|
|
707
|
-
all_image_urls.add(img_ref["url"])
|
|
708
|
-
|
|
709
|
-
for stage in stages_used:
|
|
710
|
-
for img_ref in stage.get("image_references", []):
|
|
711
|
-
if img_ref.get("thumbnail"):
|
|
712
|
-
all_image_urls.add(img_ref["thumbnail"])
|
|
713
|
-
if img_ref.get("url"):
|
|
714
|
-
all_image_urls.add(img_ref["url"])
|
|
715
|
-
|
|
716
|
-
# Also collect image URLs from markdown content
|
|
717
|
-
markdown_img_pattern = re.compile(r'!\[.*?\]\((https?://[^)]+)\)')
|
|
718
|
-
markdown_urls = markdown_img_pattern.findall(final_content)
|
|
719
|
-
all_image_urls.update(markdown_urls)
|
|
371
|
+
# Construct final trace markdown
|
|
372
|
+
trace_markdown = self._render_trace_markdown(trace)
|
|
720
373
|
|
|
721
|
-
#
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
cached_map = await get_cached_images(list(all_image_urls))
|
|
725
|
-
|
|
726
|
-
# Apply cached URLs to structured response
|
|
727
|
-
for img_ref in structured.get("image_references", []):
|
|
728
|
-
if img_ref.get("thumbnail") and img_ref["thumbnail"] in cached_map:
|
|
729
|
-
img_ref["thumbnail"] = cached_map[img_ref["thumbnail"]]
|
|
730
|
-
if img_ref.get("url") and img_ref["url"] in cached_map:
|
|
731
|
-
img_ref["url"] = cached_map[img_ref["url"]]
|
|
732
|
-
|
|
733
|
-
# Apply cached URLs to stages
|
|
734
|
-
for stage in stages_used:
|
|
735
|
-
for img_ref in stage.get("image_references", []):
|
|
736
|
-
if img_ref.get("thumbnail") and img_ref["thumbnail"] in cached_map:
|
|
737
|
-
img_ref["thumbnail"] = cached_map[img_ref["thumbnail"]]
|
|
738
|
-
if img_ref.get("url") and img_ref["url"] in cached_map:
|
|
739
|
-
img_ref["url"] = cached_map[img_ref["url"]]
|
|
740
|
-
|
|
741
|
-
# Replace image URLs in markdown content with cached versions
|
|
742
|
-
def replace_markdown_img(match):
|
|
743
|
-
full_match = match.group(0)
|
|
744
|
-
url = match.group(1)
|
|
745
|
-
cached_url = cached_map.get(url)
|
|
746
|
-
if cached_url and cached_url != url:
|
|
747
|
-
return full_match.replace(url, cached_url)
|
|
748
|
-
return full_match
|
|
749
|
-
|
|
750
|
-
final_content = markdown_img_pattern.sub(replace_markdown_img, final_content)
|
|
751
|
-
structured["response"] = markdown_img_pattern.sub(replace_markdown_img, structured.get("response", ""))
|
|
752
|
-
|
|
753
|
-
# Log cache stats
|
|
754
|
-
from .image_cache import get_image_cache
|
|
755
|
-
cache_stats = get_image_cache().get_stats()
|
|
756
|
-
logger.info(f"ImageCache stats: {cache_stats}")
|
|
757
|
-
|
|
758
|
-
except Exception as e:
|
|
759
|
-
logger.warning(f"Failed to apply image cache: {e}")
|
|
374
|
+
# Update history
|
|
375
|
+
current_history.append({"role": "user", "content": user_input or "..."})
|
|
376
|
+
current_history.append({"role": "assistant", "content": final_content})
|
|
760
377
|
|
|
761
|
-
#
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
pass
|
|
772
|
-
self._image_search_tasks.clear()
|
|
773
|
-
|
|
774
|
-
# Also cancel any pending image downloads in the cache
|
|
775
|
-
from .image_cache import get_image_cache
|
|
776
|
-
cache = get_image_cache()
|
|
777
|
-
if cache._pending:
|
|
778
|
-
logger.info(f"Cancelling {len(cache._pending)} pending image downloads")
|
|
779
|
-
for task in cache._pending.values():
|
|
780
|
-
if not task.done():
|
|
781
|
-
task.cancel()
|
|
782
|
-
cache._pending.clear()
|
|
378
|
+
# Schedule async cache task (fire and forget - doesn't block return)
|
|
379
|
+
cache_data = {
|
|
380
|
+
"user_input": user_input,
|
|
381
|
+
"trace": trace,
|
|
382
|
+
"trace_markdown": trace_markdown,
|
|
383
|
+
"page_screenshots": page_screenshots,
|
|
384
|
+
"final_content": final_content,
|
|
385
|
+
"stages_used": stages_used,
|
|
386
|
+
}
|
|
387
|
+
asyncio.create_task(self._cache_run_async(cache_data))
|
|
783
388
|
|
|
784
389
|
return {
|
|
785
390
|
"llm_response": final_content,
|
|
786
391
|
"structured_response": structured,
|
|
787
392
|
"stats": stats,
|
|
788
393
|
"model_used": active_model,
|
|
789
|
-
"vision_model_used": (selected_vision_model or getattr(self.config, "vision_model_name", None)) if images else None,
|
|
790
394
|
"conversation_history": current_history,
|
|
791
395
|
"trace_markdown": trace_markdown,
|
|
792
396
|
"billing_info": billing_info,
|
|
@@ -799,18 +403,11 @@ class ProcessingPipeline:
|
|
|
799
403
|
if hasattr(self, '_image_search_tasks') and self._image_search_tasks:
|
|
800
404
|
for task in self._image_search_tasks:
|
|
801
405
|
if not task.done(): task.cancel()
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
406
|
+
try:
|
|
407
|
+
await asyncio.wait(self._image_search_tasks, timeout=0.1)
|
|
408
|
+
except Exception: pass
|
|
409
|
+
self._image_search_tasks = []
|
|
805
410
|
|
|
806
|
-
from .image_cache import get_image_cache
|
|
807
|
-
cache = get_image_cache()
|
|
808
|
-
if cache._pending:
|
|
809
|
-
pending_tasks = list(cache._pending.values())
|
|
810
|
-
for task in pending_tasks:
|
|
811
|
-
if not task.done(): task.cancel()
|
|
812
|
-
await asyncio.wait(pending_tasks, timeout=0.1)
|
|
813
|
-
cache._pending.clear()
|
|
814
411
|
return {
|
|
815
412
|
"llm_response": f"I encountered a critical error: {e}",
|
|
816
413
|
"stats": stats,
|
|
@@ -968,7 +565,26 @@ class ProcessingPipeline:
|
|
|
968
565
|
|
|
969
566
|
if name == "internal_web_search" or name == "web_search":
|
|
970
567
|
query = args.get("query")
|
|
971
|
-
|
|
568
|
+
try:
|
|
569
|
+
web = await self.search_service.search(query)
|
|
570
|
+
except Exception as e:
|
|
571
|
+
logger.error(f"Failed to execute search: {e}")
|
|
572
|
+
self._search_error = str(e)
|
|
573
|
+
raise e
|
|
574
|
+
|
|
575
|
+
# Filter blocked domains immediately
|
|
576
|
+
blocked_domains = getattr(self.config, "fetch_blocked_domains", ["wikipedia.org", "csdn.net", "baidu.com"])
|
|
577
|
+
if isinstance(blocked_domains, str):
|
|
578
|
+
blocked_domains = [d.strip() for d in blocked_domains.split(",")]
|
|
579
|
+
|
|
580
|
+
# Use list comprehension for filtering
|
|
581
|
+
original_count = len(web)
|
|
582
|
+
web = [
|
|
583
|
+
item for item in web
|
|
584
|
+
if not any(blocked in item.get("url", "").lower() for blocked in blocked_domains)
|
|
585
|
+
]
|
|
586
|
+
if len(web) < original_count:
|
|
587
|
+
logger.info(f"Filtered {original_count - len(web)} blocked search results.")
|
|
972
588
|
|
|
973
589
|
# Cache results and assign global IDs
|
|
974
590
|
for item in web:
|
|
@@ -1018,6 +634,13 @@ class ProcessingPipeline:
|
|
|
1018
634
|
# Cache the crawled content with global ID
|
|
1019
635
|
self.global_id_counter += 1
|
|
1020
636
|
|
|
637
|
+
# Generate screenshot for direct URL crawl (so LLM can see it)
|
|
638
|
+
screenshot_b64 = await self._render_page_screenshot(
|
|
639
|
+
title=result_dict.get("title", "Page"),
|
|
640
|
+
url=url,
|
|
641
|
+
content=result_dict.get("content", "")[:4000]
|
|
642
|
+
)
|
|
643
|
+
|
|
1021
644
|
cached_item = {
|
|
1022
645
|
"_id": self.global_id_counter,
|
|
1023
646
|
"_type": "page",
|
|
@@ -1026,6 +649,7 @@ class ProcessingPipeline:
|
|
|
1026
649
|
"content": result_dict.get("content", ""),
|
|
1027
650
|
"domain": "",
|
|
1028
651
|
"is_crawled": True,
|
|
652
|
+
"screenshot_b64": screenshot_b64, # Add screenshot
|
|
1029
653
|
}
|
|
1030
654
|
try:
|
|
1031
655
|
from urllib.parse import urlparse
|
|
@@ -1091,45 +715,38 @@ class ProcessingPipeline:
|
|
|
1091
715
|
|
|
1092
716
|
return response.choices[0].message, usage
|
|
1093
717
|
|
|
1094
|
-
async def _run_vision_stage(self, user_input: str, images: List[str], model: str, prompt: str) -> Tuple[str, Dict[str, int]]:
|
|
1095
|
-
content_payload: List[Dict[str, Any]] = [{"type": "text", "text": user_input or ""}]
|
|
1096
|
-
for img_b64 in images:
|
|
1097
|
-
url = f"data:image/png;base64,{img_b64}" if not img_b64.startswith("data:") else img_b64
|
|
1098
|
-
content_payload.append({"type": "image_url", "image_url": {"url": url}})
|
|
1099
718
|
|
|
1100
|
-
client = self._client_for(
|
|
1101
|
-
api_key=getattr(self.config, "vision_api_key", None),
|
|
1102
|
-
base_url=getattr(self.config, "vision_base_url", None),
|
|
1103
|
-
)
|
|
1104
|
-
response, usage = await self._safe_llm_call(
|
|
1105
|
-
messages=[{"role": "system", "content": prompt}, {"role": "user", "content": content_payload}],
|
|
1106
|
-
model=model,
|
|
1107
|
-
client=client,
|
|
1108
|
-
extra_body=getattr(self.config, "vision_extra_body", None),
|
|
1109
|
-
)
|
|
1110
|
-
return (response.content or "").strip(), usage
|
|
1111
719
|
|
|
1112
720
|
async def _run_instruct_stage(
|
|
1113
|
-
self, user_input: str,
|
|
721
|
+
self, user_input: str, images: List[str] = None, model: str = None
|
|
1114
722
|
) -> Tuple[str, List[str], Dict[str, Any], Dict[str, int], float]:
|
|
1115
|
-
"""Returns (instruct_text, search_payloads, trace_dict, usage_dict, search_time).
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
723
|
+
"""Returns (instruct_text, search_payloads, trace_dict, usage_dict, search_time).
|
|
724
|
+
|
|
725
|
+
Images are now passed directly here (merged vision stage).
|
|
726
|
+
"""
|
|
727
|
+
# Instruct has access to: web_search, crawl_page, refuse_answer
|
|
728
|
+
tools = [self.web_search_tool, self.crawl_page_tool, self.refuse_answer_tool]
|
|
729
|
+
tools_desc = "- internal_web_search: 搜索文本\n- crawl_page: 获取网页内容\n- refuse_answer: 拒绝回答(敏感/违规内容)"
|
|
1119
730
|
|
|
1120
731
|
prompt = INSTRUCT_SP.format(user_msgs=user_input or "", tools_desc=tools_desc)
|
|
1121
|
-
|
|
1122
|
-
if vision_text:
|
|
1123
|
-
prompt = f"{prompt}\\n\\n{INSTRUCT_SP_VISION_ADD.format(vision_msgs=vision_text)}"
|
|
1124
732
|
|
|
1125
733
|
client = self._client_for(
|
|
1126
734
|
api_key=getattr(self.config, "instruct_api_key", None),
|
|
1127
735
|
base_url=getattr(self.config, "instruct_base_url", None),
|
|
1128
736
|
)
|
|
1129
737
|
|
|
738
|
+
# Build user content - multimodal if images provided
|
|
739
|
+
if images:
|
|
740
|
+
user_content: List[Dict[str, Any]] = [{"type": "text", "text": user_input or "..."}]
|
|
741
|
+
for img_b64 in images:
|
|
742
|
+
url = f"data:image/png;base64,{img_b64}" if not img_b64.startswith("data:") else img_b64
|
|
743
|
+
user_content.append({"type": "image_url", "image_url": {"url": url}})
|
|
744
|
+
else:
|
|
745
|
+
user_content = user_input or "..."
|
|
746
|
+
|
|
1130
747
|
history: List[Dict[str, Any]] = [
|
|
1131
748
|
{"role": "system", "content": prompt},
|
|
1132
|
-
{"role": "user", "content":
|
|
749
|
+
{"role": "user", "content": user_content},
|
|
1133
750
|
]
|
|
1134
751
|
|
|
1135
752
|
response, usage = await self._safe_llm_call(
|
|
@@ -1147,15 +764,14 @@ class ProcessingPipeline:
|
|
|
1147
764
|
"base_url": getattr(self.config, "instruct_base_url", None) or self.config.base_url,
|
|
1148
765
|
"prompt": prompt,
|
|
1149
766
|
"user_input": user_input or "",
|
|
1150
|
-
"
|
|
767
|
+
"has_images": bool(images),
|
|
768
|
+
"images_count": len(images) if images else 0,
|
|
1151
769
|
"tool_calls": [],
|
|
1152
770
|
"tool_results": [],
|
|
1153
771
|
"output": "",
|
|
1154
772
|
}
|
|
1155
773
|
|
|
1156
774
|
search_time = 0.0
|
|
1157
|
-
mode = "standard"
|
|
1158
|
-
mode_reason = ""
|
|
1159
775
|
|
|
1160
776
|
if response.tool_calls:
|
|
1161
777
|
plan_dict = response.model_dump() if hasattr(response, "model_dump") else response
|
|
@@ -1177,27 +793,262 @@ class ProcessingPipeline:
|
|
|
1177
793
|
|
|
1178
794
|
if tc.function.name in ["web_search", "internal_web_search"]:
|
|
1179
795
|
search_payloads.append(str(result))
|
|
1180
|
-
|
|
1181
|
-
try:
|
|
1182
|
-
args = json.loads(html.unescape(tc.function.arguments))
|
|
1183
|
-
except Exception:
|
|
1184
|
-
args = {}
|
|
1185
|
-
mode = args.get("mode", mode)
|
|
1186
|
-
mode_reason = args.get("reason", "")
|
|
1187
|
-
|
|
1188
|
-
instruct_trace["mode"] = mode
|
|
1189
|
-
if mode_reason:
|
|
1190
|
-
instruct_trace["mode_reason"] = mode_reason
|
|
1191
|
-
|
|
796
|
+
|
|
1192
797
|
instruct_trace["output"] = ""
|
|
1193
798
|
instruct_trace["usage"] = usage
|
|
1194
799
|
return "", search_payloads, instruct_trace, usage, search_time
|
|
1195
800
|
|
|
1196
|
-
instruct_trace["mode"] = mode
|
|
1197
801
|
instruct_trace["output"] = (response.content or "").strip()
|
|
1198
802
|
instruct_trace["usage"] = usage
|
|
1199
803
|
return "", search_payloads, instruct_trace, usage, 0.0
|
|
1200
804
|
|
|
805
|
+
async def _run_auto_fetch_with_screenshots(self, urls: List[str]):
|
|
806
|
+
"""
|
|
807
|
+
Automatically fetch URLs and generate screenshots of their content.
|
|
808
|
+
Stops after getting the first 5 successful results (fastest wins).
|
|
809
|
+
Screenshots are stored as base64 in the cached items.
|
|
810
|
+
"""
|
|
811
|
+
if not urls:
|
|
812
|
+
return
|
|
813
|
+
|
|
814
|
+
# Get config
|
|
815
|
+
fetch_timeout = float(getattr(self.config, "fetch_timeout", 15.0))
|
|
816
|
+
max_results = int(getattr(self.config, "fetch_max_results", 5))
|
|
817
|
+
|
|
818
|
+
async def _fetch_and_screenshot(url: str):
|
|
819
|
+
try:
|
|
820
|
+
# Fetch page content
|
|
821
|
+
result_dict = await self.search_service.fetch_page(url)
|
|
822
|
+
|
|
823
|
+
self.global_id_counter += 1
|
|
824
|
+
|
|
825
|
+
# Generate screenshot from page content
|
|
826
|
+
screenshot_b64 = await self._render_page_screenshot(
|
|
827
|
+
title=result_dict.get("title", "Page"),
|
|
828
|
+
url=url,
|
|
829
|
+
content=result_dict.get("content", "")[:4000] # Limit content for screenshot
|
|
830
|
+
)
|
|
831
|
+
|
|
832
|
+
cached_item = {
|
|
833
|
+
"_id": self.global_id_counter,
|
|
834
|
+
"_type": "page",
|
|
835
|
+
"title": result_dict.get("title", "Page"),
|
|
836
|
+
"url": result_dict.get("url", url),
|
|
837
|
+
"content": result_dict.get("content", ""),
|
|
838
|
+
"images": result_dict.get("images", []),
|
|
839
|
+
"domain": "",
|
|
840
|
+
"is_crawled": True,
|
|
841
|
+
"screenshot_b64": screenshot_b64,
|
|
842
|
+
}
|
|
843
|
+
try:
|
|
844
|
+
from urllib.parse import urlparse
|
|
845
|
+
cached_item["domain"] = urlparse(url).netloc
|
|
846
|
+
except:
|
|
847
|
+
pass
|
|
848
|
+
|
|
849
|
+
return cached_item
|
|
850
|
+
except Exception as e:
|
|
851
|
+
logger.error(f"Failed to fetch/screenshot {url}: {e}")
|
|
852
|
+
return None
|
|
853
|
+
|
|
854
|
+
async def _fetch_with_timeout(url: str):
|
|
855
|
+
"""Wrapper to apply timeout to each fetch operation."""
|
|
856
|
+
try:
|
|
857
|
+
return await asyncio.wait_for(_fetch_and_screenshot(url), timeout=fetch_timeout)
|
|
858
|
+
except asyncio.TimeoutError:
|
|
859
|
+
logger.warning(f"Fetch timeout ({fetch_timeout}s) exceeded for: {url}")
|
|
860
|
+
return None
|
|
861
|
+
|
|
862
|
+
# Create tasks for all URLs (track url -> task mapping)
|
|
863
|
+
url_to_task = {url: asyncio.create_task(_fetch_with_timeout(url)) for url in urls}
|
|
864
|
+
tasks = list(url_to_task.values())
|
|
865
|
+
first_url = urls[0] if urls else None
|
|
866
|
+
first_task = url_to_task.get(first_url) if first_url else None
|
|
867
|
+
|
|
868
|
+
# Collect first N successful results (fastest wins)
|
|
869
|
+
collected_results = {} # url -> result
|
|
870
|
+
successful_count = 0
|
|
871
|
+
for coro in asyncio.as_completed(tasks):
|
|
872
|
+
try:
|
|
873
|
+
result = await coro
|
|
874
|
+
if result:
|
|
875
|
+
# Find which URL this result belongs to
|
|
876
|
+
result_url = result.get("url", "")
|
|
877
|
+
collected_results[result_url] = result
|
|
878
|
+
successful_count += 1
|
|
879
|
+
# Only break if we have enough AND first URL is done (or failed)
|
|
880
|
+
first_done = first_url in collected_results or (first_task and first_task.done())
|
|
881
|
+
if successful_count >= max_results and first_done:
|
|
882
|
+
logger.info(f"Got {max_results} successful results, cancelling remaining tasks")
|
|
883
|
+
break
|
|
884
|
+
except Exception as e:
|
|
885
|
+
logger.warning(f"Fetch task failed: {e}")
|
|
886
|
+
|
|
887
|
+
# Ensure first URL task completes (if not already) before cancelling others
|
|
888
|
+
if first_task and not first_task.done():
|
|
889
|
+
logger.info("Waiting for first URL to complete...")
|
|
890
|
+
try:
|
|
891
|
+
result = await first_task
|
|
892
|
+
if result:
|
|
893
|
+
collected_results[result.get("url", first_url)] = result
|
|
894
|
+
except Exception as e:
|
|
895
|
+
logger.warning(f"First URL fetch failed: {e}")
|
|
896
|
+
|
|
897
|
+
# Cancel remaining tasks
|
|
898
|
+
for task in tasks:
|
|
899
|
+
if not task.done():
|
|
900
|
+
task.cancel()
|
|
901
|
+
|
|
902
|
+
# Wait briefly for cancellation to propagate
|
|
903
|
+
if any(not t.done() for t in tasks):
|
|
904
|
+
await asyncio.gather(*tasks, return_exceptions=True)
|
|
905
|
+
|
|
906
|
+
# Add results in original URL order (not fetch speed order)
|
|
907
|
+
for url in urls:
|
|
908
|
+
if url in collected_results:
|
|
909
|
+
self.all_web_results.append(collected_results[url])
|
|
910
|
+
|
|
911
|
+
async def _render_page_screenshot(self, title: str, url: str, content: str) -> Optional[str]:
|
|
912
|
+
"""
|
|
913
|
+
Render page content as a simple HTML and take a screenshot.
|
|
914
|
+
Returns base64 encoded image or None on failure.
|
|
915
|
+
Images are compressed to reduce LLM payload size.
|
|
916
|
+
"""
|
|
917
|
+
import base64
|
|
918
|
+
import tempfile
|
|
919
|
+
|
|
920
|
+
try:
|
|
921
|
+
# Try to use the content renderer if available
|
|
922
|
+
from .render_vue import ContentRenderer
|
|
923
|
+
|
|
924
|
+
# Create a simple markdown representation for screenshot
|
|
925
|
+
markdown = f"> 来源: {url}\n\n# {title}\n\n{content}" # Limit content
|
|
926
|
+
|
|
927
|
+
# Use temp file for screenshot
|
|
928
|
+
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
|
|
929
|
+
tmp_path = tmp.name
|
|
930
|
+
|
|
931
|
+
# Get or create renderer (reuse if possible)
|
|
932
|
+
if not hasattr(self, '_screenshot_renderer'):
|
|
933
|
+
self._screenshot_renderer = ContentRenderer(auto_start=True)
|
|
934
|
+
await self._screenshot_renderer.start(timeout=10000)
|
|
935
|
+
|
|
936
|
+
# Await the async render method
|
|
937
|
+
await self._screenshot_renderer.render(
|
|
938
|
+
markdown,
|
|
939
|
+
tmp_path,
|
|
940
|
+
stats={"total_time": 0},
|
|
941
|
+
references=[{"title": title, "url": url, "domain": ""}],
|
|
942
|
+
)
|
|
943
|
+
|
|
944
|
+
# Compress image to reduce LLM payload size (~350KB target)
|
|
945
|
+
img_bytes = await self._compress_image(tmp_path, max_width=600, quality=70)
|
|
946
|
+
|
|
947
|
+
# Cleanup
|
|
948
|
+
import os
|
|
949
|
+
os.unlink(tmp_path)
|
|
950
|
+
|
|
951
|
+
return base64.b64encode(img_bytes).decode("utf-8")
|
|
952
|
+
|
|
953
|
+
except Exception as e:
|
|
954
|
+
logger.warning(f"Failed to render page screenshot: {e}")
|
|
955
|
+
return None
|
|
956
|
+
|
|
957
|
+
async def _compress_image(self, image_path: str, max_width: int = 400, quality: int = 50) -> bytes:
|
|
958
|
+
"""Compress image to reduce size for LLM payload."""
|
|
959
|
+
from io import BytesIO
|
|
960
|
+
|
|
961
|
+
try:
|
|
962
|
+
from PIL import Image
|
|
963
|
+
|
|
964
|
+
def _compress():
|
|
965
|
+
with Image.open(image_path) as img:
|
|
966
|
+
# Calculate new height maintaining aspect ratio
|
|
967
|
+
if img.width > max_width:
|
|
968
|
+
ratio = max_width / img.width
|
|
969
|
+
new_height = int(img.height * ratio)
|
|
970
|
+
img = img.resize((max_width, new_height), Image.Resampling.LANCZOS)
|
|
971
|
+
|
|
972
|
+
# Convert to RGB if necessary
|
|
973
|
+
if img.mode in ('RGBA', 'P'):
|
|
974
|
+
img = img.convert('RGB')
|
|
975
|
+
|
|
976
|
+
# Save to buffer with compression
|
|
977
|
+
buffer = BytesIO()
|
|
978
|
+
img.save(buffer, format='JPEG', quality=quality, optimize=True)
|
|
979
|
+
return buffer.getvalue()
|
|
980
|
+
|
|
981
|
+
return await asyncio.to_thread(_compress)
|
|
982
|
+
|
|
983
|
+
except ImportError:
|
|
984
|
+
# PIL not available, return original
|
|
985
|
+
logger.warning("PIL not available for image compression, using original")
|
|
986
|
+
with open(image_path, 'rb') as f:
|
|
987
|
+
return f.read()
|
|
988
|
+
|
|
989
|
+
async def _run_summary_stage(
|
|
990
|
+
self, user_input: str, images: List[str] = None,
|
|
991
|
+
has_page_screenshots: bool = False, model: str = None
|
|
992
|
+
) -> Tuple[str, Dict[str, int], Dict[str, Any]]:
|
|
993
|
+
"""
|
|
994
|
+
Generate final summary using page screenshots only.
|
|
995
|
+
Returns (content, usage, trace_info).
|
|
996
|
+
"""
|
|
997
|
+
|
|
998
|
+
# Build system prompt
|
|
999
|
+
try:
|
|
1000
|
+
language_conf = getattr(self.config, "language", "Simplified Chinese")
|
|
1001
|
+
system_prompt = SUMMARY_SP.format(language=language_conf)
|
|
1002
|
+
except Exception:
|
|
1003
|
+
system_prompt = SUMMARY_SP
|
|
1004
|
+
|
|
1005
|
+
|
|
1006
|
+
|
|
1007
|
+
# Build user content - multimodal if images provided
|
|
1008
|
+
if images:
|
|
1009
|
+
user_content: List[Dict[str, Any]] = [{"type": "text", "text": user_input or "..."}]
|
|
1010
|
+
for img_b64 in images:
|
|
1011
|
+
url = f"data:image/jpeg;base64,{img_b64}" if not img_b64.startswith("data:") else img_b64
|
|
1012
|
+
user_content.append({"type": "image_url", "image_url": {"url": url}})
|
|
1013
|
+
else:
|
|
1014
|
+
user_content = user_input or "..."
|
|
1015
|
+
|
|
1016
|
+
messages = [
|
|
1017
|
+
{"role": "system", "content": system_prompt},
|
|
1018
|
+
{"role": "user", "content": user_content}
|
|
1019
|
+
]
|
|
1020
|
+
|
|
1021
|
+
client = self._client_for(
|
|
1022
|
+
api_key=getattr(self.config, "summary_api_key", None),
|
|
1023
|
+
base_url=getattr(self.config, "summary_base_url", None)
|
|
1024
|
+
)
|
|
1025
|
+
|
|
1026
|
+
response, usage = await self._safe_llm_call(
|
|
1027
|
+
messages=messages,
|
|
1028
|
+
model=model,
|
|
1029
|
+
client=client,
|
|
1030
|
+
extra_body=getattr(self.config, "summary_extra_body", None)
|
|
1031
|
+
)
|
|
1032
|
+
|
|
1033
|
+
return (response.content or "").strip(), usage, {"prompt": system_prompt}
|
|
1034
|
+
|
|
1035
|
+
def _format_fetch_msgs(self) -> str:
|
|
1036
|
+
"""Format crawled page content for Summary prompt."""
|
|
1037
|
+
if not self.all_web_results:
|
|
1038
|
+
return ""
|
|
1039
|
+
|
|
1040
|
+
lines = []
|
|
1041
|
+
for res in self.all_web_results:
|
|
1042
|
+
if res.get("_type") != "page": continue
|
|
1043
|
+
idx = res.get("_id")
|
|
1044
|
+
title = (res.get("title", "") or "").strip()
|
|
1045
|
+
url = res.get("url", "")
|
|
1046
|
+
content = (res.get("content", "") or "").strip()
|
|
1047
|
+
# Truncate content if too long? For now keep it full or rely on model context
|
|
1048
|
+
lines.append(f"Title: {title}\nURL: {url}\nContent:\n{content}\n")
|
|
1049
|
+
|
|
1050
|
+
return "\n".join(lines)
|
|
1051
|
+
|
|
1201
1052
|
def _format_search_msgs(self) -> str:
|
|
1202
1053
|
"""Format search snippets only (not crawled pages)."""
|
|
1203
1054
|
if not self.all_web_results:
|
|
@@ -1264,23 +1115,13 @@ class ProcessingPipeline:
|
|
|
1264
1115
|
parts: List[str] = []
|
|
1265
1116
|
parts.append("# Pipeline Trace\n")
|
|
1266
1117
|
|
|
1267
|
-
if trace.get("vision"):
|
|
1268
|
-
v = trace["vision"]
|
|
1269
|
-
parts.append("## Vision\n")
|
|
1270
|
-
parts.append(f"- model: `{v.get('model')}`")
|
|
1271
|
-
parts.append(f"- base_url: `{v.get('base_url')}`")
|
|
1272
|
-
parts.append(f"- images_count: `{v.get('images_count')}`\n")
|
|
1273
|
-
parts.append("### Prompt\n")
|
|
1274
|
-
parts.append(fence("text", v.get("prompt", "")))
|
|
1275
|
-
parts.append("\n### Output\n")
|
|
1276
|
-
parts.append(fence("text", v.get("output", "")))
|
|
1277
|
-
parts.append("")
|
|
1278
|
-
|
|
1279
1118
|
if trace.get("instruct"):
|
|
1280
1119
|
t = trace["instruct"]
|
|
1281
1120
|
parts.append("## Instruct\n")
|
|
1282
1121
|
parts.append(f"- model: `{t.get('model')}`")
|
|
1283
|
-
parts.append(f"- base_url: `{t.get('base_url')}
|
|
1122
|
+
parts.append(f"- base_url: `{t.get('base_url')}`")
|
|
1123
|
+
parts.append(f"- has_images: `{t.get('has_images', False)}`")
|
|
1124
|
+
parts.append(f"- images_count: `{t.get('images_count', 0)}`\n")
|
|
1284
1125
|
parts.append("### Prompt\n")
|
|
1285
1126
|
parts.append(fence("text", t.get("prompt", "")))
|
|
1286
1127
|
if t.get("tool_calls"):
|
|
@@ -1293,20 +1134,79 @@ class ProcessingPipeline:
|
|
|
1293
1134
|
parts.append(fence("text", t.get("output", "")))
|
|
1294
1135
|
parts.append("")
|
|
1295
1136
|
|
|
1296
|
-
if trace.get("
|
|
1297
|
-
|
|
1298
|
-
parts.append("##
|
|
1299
|
-
parts.append(f"-
|
|
1300
|
-
parts.append(f"-
|
|
1137
|
+
if trace.get("fetch"):
|
|
1138
|
+
f = trace["fetch"]
|
|
1139
|
+
parts.append("## Auto-Fetch\n")
|
|
1140
|
+
parts.append(f"- urls_fetched: `{f.get('urls_fetched', [])}`")
|
|
1141
|
+
parts.append(f"- screenshots_count: `{f.get('screenshots_count', 0)}`\n")
|
|
1142
|
+
parts.append("")
|
|
1143
|
+
|
|
1144
|
+
if trace.get("summary"):
|
|
1145
|
+
s = trace["summary"]
|
|
1146
|
+
parts.append("## Summary\n")
|
|
1147
|
+
parts.append(f"- model: `{s.get('model')}`\n")
|
|
1301
1148
|
parts.append("### System Prompt\n")
|
|
1302
|
-
parts.append(fence("text",
|
|
1303
|
-
parts.append("\n###
|
|
1304
|
-
parts.append(fence("
|
|
1305
|
-
parts.append("
|
|
1306
|
-
parts.append(fence("text", a.get("final_output", "")))
|
|
1149
|
+
parts.append(fence("text", s.get("system_prompt", "")))
|
|
1150
|
+
parts.append("\n### Output\n")
|
|
1151
|
+
parts.append(fence("text", s.get("output", "")))
|
|
1152
|
+
parts.append("")
|
|
1307
1153
|
|
|
1308
1154
|
return "\n".join(parts).strip() + "\n"
|
|
1309
1155
|
|
|
1156
|
+
async def _cache_run_async(self, cache_data: Dict[str, Any]):
|
|
1157
|
+
"""
|
|
1158
|
+
Async background task to cache run data (trace, screenshots) to a folder.
|
|
1159
|
+
Saves to data/conversations/{timestamp}_{query}/
|
|
1160
|
+
This runs after the response is sent, so it doesn't block the main pipeline.
|
|
1161
|
+
"""
|
|
1162
|
+
import base64
|
|
1163
|
+
from datetime import datetime
|
|
1164
|
+
from pathlib import Path
|
|
1165
|
+
|
|
1166
|
+
try:
|
|
1167
|
+
# Create cache directory: data/conversations/{timestamp}_{query}/
|
|
1168
|
+
cache_base = Path(getattr(self.config, "conversations_dir", "data/conversations"))
|
|
1169
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
1170
|
+
user_input_short = (cache_data.get("user_input", "query") or "query")[:20]
|
|
1171
|
+
# Clean filename
|
|
1172
|
+
user_input_short = "".join(c if c.isalnum() or c in "._-" else "_" for c in user_input_short)
|
|
1173
|
+
cache_dir = cache_base / f"{timestamp}_{user_input_short}"
|
|
1174
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
1175
|
+
|
|
1176
|
+
# Save conversation markdown (includes trace and response)
|
|
1177
|
+
conversation_md = f"""# {cache_data.get("user_input", "Query")}
|
|
1178
|
+
|
|
1179
|
+
## Response
|
|
1180
|
+
|
|
1181
|
+
{cache_data.get("final_content", "")}
|
|
1182
|
+
|
|
1183
|
+
---
|
|
1184
|
+
|
|
1185
|
+
## Trace
|
|
1186
|
+
|
|
1187
|
+
{cache_data.get("trace_markdown", "")}
|
|
1188
|
+
"""
|
|
1189
|
+
conv_path = cache_dir / "conversation.md"
|
|
1190
|
+
await asyncio.to_thread(
|
|
1191
|
+
conv_path.write_text,
|
|
1192
|
+
conversation_md,
|
|
1193
|
+
encoding="utf-8"
|
|
1194
|
+
)
|
|
1195
|
+
|
|
1196
|
+
# Save page screenshots
|
|
1197
|
+
screenshots = cache_data.get("page_screenshots", [])
|
|
1198
|
+
for i, screenshot_b64 in enumerate(screenshots):
|
|
1199
|
+
if screenshot_b64:
|
|
1200
|
+
screenshot_path = cache_dir / f"page_{i+1}.jpg"
|
|
1201
|
+
img_bytes = base64.b64decode(screenshot_b64)
|
|
1202
|
+
await asyncio.to_thread(screenshot_path.write_bytes, img_bytes)
|
|
1203
|
+
|
|
1204
|
+
logger.debug(f"Conversation cached to: {cache_dir}")
|
|
1205
|
+
|
|
1206
|
+
except Exception as e:
|
|
1207
|
+
# Don't fail silently but also don't crash the pipeline
|
|
1208
|
+
logger.warning(f"Failed to cache conversation: {e}")
|
|
1209
|
+
|
|
1310
1210
|
async def close(self):
|
|
1311
1211
|
try:
|
|
1312
1212
|
await self.search_service.close()
|