entari-plugin-hyw 2.2.5__py3-none-any.whl → 3.5.0rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. entari_plugin_hyw/__init__.py +371 -315
  2. entari_plugin_hyw/assets/card-dist/index.html +396 -0
  3. entari_plugin_hyw/assets/card-dist/logos/anthropic.svg +1 -0
  4. entari_plugin_hyw/assets/card-dist/logos/cerebras.svg +9 -0
  5. entari_plugin_hyw/assets/card-dist/logos/deepseek.png +0 -0
  6. entari_plugin_hyw/assets/card-dist/logos/gemini.svg +1 -0
  7. entari_plugin_hyw/assets/card-dist/logos/google.svg +1 -0
  8. entari_plugin_hyw/assets/card-dist/logos/grok.png +0 -0
  9. entari_plugin_hyw/assets/card-dist/logos/huggingface.png +0 -0
  10. entari_plugin_hyw/assets/card-dist/logos/microsoft.svg +15 -0
  11. entari_plugin_hyw/assets/card-dist/logos/minimax.png +0 -0
  12. entari_plugin_hyw/assets/card-dist/logos/mistral.png +0 -0
  13. entari_plugin_hyw/assets/card-dist/logos/nvida.png +0 -0
  14. entari_plugin_hyw/assets/card-dist/logos/openai.svg +1 -0
  15. entari_plugin_hyw/assets/card-dist/logos/openrouter.png +0 -0
  16. entari_plugin_hyw/assets/card-dist/logos/perplexity.svg +24 -0
  17. entari_plugin_hyw/assets/card-dist/logos/qwen.png +0 -0
  18. entari_plugin_hyw/assets/card-dist/logos/xai.png +0 -0
  19. entari_plugin_hyw/assets/card-dist/logos/xiaomi.png +0 -0
  20. entari_plugin_hyw/assets/card-dist/logos/zai.png +0 -0
  21. entari_plugin_hyw/assets/card-dist/vite.svg +1 -0
  22. entari_plugin_hyw/assets/icon/anthropic.svg +1 -0
  23. entari_plugin_hyw/assets/icon/cerebras.svg +9 -0
  24. entari_plugin_hyw/assets/icon/deepseek.png +0 -0
  25. entari_plugin_hyw/assets/icon/gemini.svg +1 -0
  26. entari_plugin_hyw/assets/icon/google.svg +1 -0
  27. entari_plugin_hyw/assets/icon/grok.png +0 -0
  28. entari_plugin_hyw/assets/icon/huggingface.png +0 -0
  29. entari_plugin_hyw/assets/icon/microsoft.svg +15 -0
  30. entari_plugin_hyw/assets/icon/minimax.png +0 -0
  31. entari_plugin_hyw/assets/icon/mistral.png +0 -0
  32. entari_plugin_hyw/assets/icon/nvida.png +0 -0
  33. entari_plugin_hyw/assets/icon/openai.svg +1 -0
  34. entari_plugin_hyw/assets/icon/openrouter.png +0 -0
  35. entari_plugin_hyw/assets/icon/perplexity.svg +24 -0
  36. entari_plugin_hyw/assets/icon/qwen.png +0 -0
  37. entari_plugin_hyw/assets/icon/xai.png +0 -0
  38. entari_plugin_hyw/assets/icon/xiaomi.png +0 -0
  39. entari_plugin_hyw/assets/icon/zai.png +0 -0
  40. entari_plugin_hyw/card-ui/.gitignore +24 -0
  41. entari_plugin_hyw/card-ui/README.md +5 -0
  42. entari_plugin_hyw/card-ui/index.html +16 -0
  43. entari_plugin_hyw/card-ui/package-lock.json +2342 -0
  44. entari_plugin_hyw/card-ui/package.json +31 -0
  45. entari_plugin_hyw/card-ui/public/logos/anthropic.svg +1 -0
  46. entari_plugin_hyw/card-ui/public/logos/cerebras.svg +9 -0
  47. entari_plugin_hyw/card-ui/public/logos/deepseek.png +0 -0
  48. entari_plugin_hyw/card-ui/public/logos/gemini.svg +1 -0
  49. entari_plugin_hyw/card-ui/public/logos/google.svg +1 -0
  50. entari_plugin_hyw/card-ui/public/logos/grok.png +0 -0
  51. entari_plugin_hyw/card-ui/public/logos/huggingface.png +0 -0
  52. entari_plugin_hyw/card-ui/public/logos/microsoft.svg +15 -0
  53. entari_plugin_hyw/card-ui/public/logos/minimax.png +0 -0
  54. entari_plugin_hyw/card-ui/public/logos/mistral.png +0 -0
  55. entari_plugin_hyw/card-ui/public/logos/nvida.png +0 -0
  56. entari_plugin_hyw/card-ui/public/logos/openai.svg +1 -0
  57. entari_plugin_hyw/card-ui/public/logos/openrouter.png +0 -0
  58. entari_plugin_hyw/card-ui/public/logos/perplexity.svg +24 -0
  59. entari_plugin_hyw/card-ui/public/logos/qwen.png +0 -0
  60. entari_plugin_hyw/card-ui/public/logos/xai.png +0 -0
  61. entari_plugin_hyw/card-ui/public/logos/xiaomi.png +0 -0
  62. entari_plugin_hyw/card-ui/public/logos/zai.png +0 -0
  63. entari_plugin_hyw/card-ui/public/vite.svg +1 -0
  64. entari_plugin_hyw/card-ui/src/App.vue +412 -0
  65. entari_plugin_hyw/card-ui/src/assets/vue.svg +1 -0
  66. entari_plugin_hyw/card-ui/src/components/HelloWorld.vue +41 -0
  67. entari_plugin_hyw/card-ui/src/components/MarkdownContent.vue +386 -0
  68. entari_plugin_hyw/card-ui/src/components/SectionCard.vue +41 -0
  69. entari_plugin_hyw/card-ui/src/components/StageCard.vue +237 -0
  70. entari_plugin_hyw/card-ui/src/main.ts +5 -0
  71. entari_plugin_hyw/card-ui/src/style.css +29 -0
  72. entari_plugin_hyw/card-ui/src/test_regex.js +103 -0
  73. entari_plugin_hyw/card-ui/src/types.ts +52 -0
  74. entari_plugin_hyw/card-ui/tsconfig.app.json +16 -0
  75. entari_plugin_hyw/card-ui/tsconfig.json +7 -0
  76. entari_plugin_hyw/card-ui/tsconfig.node.json +26 -0
  77. entari_plugin_hyw/card-ui/vite.config.ts +16 -0
  78. entari_plugin_hyw/history.py +170 -0
  79. entari_plugin_hyw/image_cache.py +274 -0
  80. entari_plugin_hyw/misc.py +128 -0
  81. entari_plugin_hyw/pipeline.py +1338 -0
  82. entari_plugin_hyw/prompts.py +108 -0
  83. entari_plugin_hyw/render_vue.py +314 -0
  84. entari_plugin_hyw/search.py +696 -0
  85. entari_plugin_hyw-3.5.0rc6.dist-info/METADATA +116 -0
  86. entari_plugin_hyw-3.5.0rc6.dist-info/RECORD +88 -0
  87. entari_plugin_hyw/hyw_core.py +0 -555
  88. entari_plugin_hyw-2.2.5.dist-info/METADATA +0 -135
  89. entari_plugin_hyw-2.2.5.dist-info/RECORD +0 -6
  90. {entari_plugin_hyw-2.2.5.dist-info → entari_plugin_hyw-3.5.0rc6.dist-info}/WHEEL +0 -0
  91. {entari_plugin_hyw-2.2.5.dist-info → entari_plugin_hyw-3.5.0rc6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1338 @@
1
+ import asyncio
2
+ import html
3
+ import json
4
+ import re
5
+ import time
6
+ from contextlib import asynccontextmanager
7
+ from typing import Any, Dict, List, Optional, Tuple
8
+
9
+ from loguru import logger
10
+ from openai import AsyncOpenAI
11
+
12
+ from .search import SearchService
13
+ from .image_cache import get_cached_images
14
+ from .prompts import (
15
+ AGENT_SP,
16
+ AGENT_SP_INSTRUCT_VISION_ADD,
17
+ AGENT_SP_TOOLS_STANDARD_ADD,
18
+ AGENT_SP_TOOLS_AGENT_ADD,
19
+ AGENT_SP_SEARCH_ADD,
20
+ INSTRUCT_SP,
21
+ INSTRUCT_SP_VISION_ADD,
22
+ VISION_SP,
23
+ )
24
+
25
+ @asynccontextmanager
26
+ async def _null_async_context():
27
+ yield None
28
+
29
+
30
+ class ProcessingPipeline:
31
+ """
32
+ Core pipeline (vision -> instruct/search -> agent).
33
+ """
34
+
35
+ def __init__(self, config: Any):
36
+ self.config = config
37
+ self.search_service = SearchService(config)
38
+ self.client = AsyncOpenAI(base_url=self.config.base_url, api_key=self.config.api_key)
39
+ self.all_web_results = [] # Cache for search results
40
+ self.current_mode = "standard" # standard | agent
41
+ # Global ID counter for all types (unified numbering)
42
+ self.global_id_counter = 0
43
+ # Background tasks for async image search (not blocking agent)
44
+ self._image_search_tasks: List[asyncio.Task] = []
45
+
46
+ self.web_search_tool = {
47
+ "type": "function",
48
+ "function": {
49
+ "name": "internal_web_search",
50
+ "description": "Search the web for text.",
51
+ "parameters": {
52
+ "type": "object",
53
+ "properties": {"query": {"type": "string"}},
54
+ "required": ["query"],
55
+ },
56
+ },
57
+ }
58
+ self.image_search_tool = {
59
+ "type": "function",
60
+ "function": {
61
+ "name": "internal_image_search",
62
+ "description": "Search for images related to a query.",
63
+ "parameters": {
64
+ "type": "object",
65
+ "properties": {"query": {"type": "string"}},
66
+ "required": ["query"],
67
+ },
68
+ },
69
+ }
70
+ self.set_mode_tool = {
71
+ "type": "function",
72
+ "function": {
73
+ "name": "set_mode",
74
+ "description": "设定后续 Agent 的运行模式: standard | agent",
75
+ "parameters": {
76
+ "type": "object",
77
+ "properties": {
78
+ "mode": {"type": "string", "enum": ["standard", "agent"]},
79
+ "reason": {"type": "string"},
80
+ },
81
+ "required": ["mode"],
82
+ },
83
+ },
84
+ }
85
+ self.crawl_page_tool = {
86
+ "type": "function",
87
+ "function": {
88
+ "name": "crawl_page",
89
+ "description": "使用 Crawl4AI 抓取网页并返回 Markdown 文本。",
90
+ "parameters": {
91
+ "type": "object",
92
+ "properties": {
93
+ "url": {"type": "string"},
94
+ },
95
+ "required": ["url"],
96
+ },
97
+ },
98
+ }
99
+ self.refuse_answer_tool = {
100
+ "type": "function",
101
+ "function": {
102
+ "name": "refuse_answer",
103
+ "description": "拒绝回答问题。当用户问题涉及敏感、违规、不适宜内容时调用此工具,立即终止流程并返回拒绝回答的图片。",
104
+ "parameters": {
105
+ "type": "object",
106
+ "properties": {
107
+ "reason": {"type": "string", "description": "拒绝回答的原因(内部记录,不展示给用户)"},
108
+ },
109
+ "required": [],
110
+ },
111
+ },
112
+ }
113
+ # Flag to indicate refuse_answer was called
114
+ self._should_refuse = False
115
+ self._refuse_reason = ""
116
+
117
+ async def execute(
118
+ self,
119
+ user_input: str,
120
+ conversation_history: List[Dict],
121
+ model_name: str = None,
122
+ images: List[str] = None,
123
+ vision_model_name: str = None,
124
+ selected_vision_model: str = None,
125
+ ) -> Dict[str, Any]:
126
+ """
127
+ 1) Vision: summarize images once (no image persistence).
128
+ 2) Instruct: run web_search and decide whether to grant Playwright MCP tools.
129
+ 3) Agent: normally no tools; if granted, allow Playwright MCP tools (max 6 rounds; step 5 nudge, step 6 forced).
130
+ """
131
+ start_time = time.time()
132
+ stats = {"start_time": start_time, "tool_calls_count": 0}
133
+ # Token usage tracking for billing
134
+ usage_totals = {"input_tokens": 0, "output_tokens": 0}
135
+ active_model = model_name or self.config.model_name
136
+
137
+ current_history = conversation_history
138
+ final_response_content = ""
139
+ structured: Dict[str, Any] = {}
140
+
141
+ # Reset search cache and ID counter for this execution
142
+ self.all_web_results = []
143
+ self.global_id_counter = 0
144
+ # Reset refuse_answer flag
145
+ self._should_refuse = False
146
+ self._refuse_reason = ""
147
+
148
+ try:
149
+ logger.info(f"Pipeline: Starting workflow for '{user_input}' using {active_model}")
150
+
151
+ trace: Dict[str, Any] = {
152
+ "vision": None,
153
+ "instruct": None,
154
+ "agent": None,
155
+ }
156
+
157
+ # Vision stage
158
+ vision_text = ""
159
+ vision_start = time.time()
160
+ vision_time = 0
161
+ vision_cost = 0.0
162
+ vision_usage = {}
163
+ if images:
164
+ vision_model = (
165
+ selected_vision_model
166
+ or vision_model_name
167
+ or getattr(self.config, "vision_model_name", None)
168
+ or active_model
169
+ )
170
+ vision_prompt = VISION_SP.format(user_msgs=user_input or "[图片]")
171
+ vision_text, vision_usage = await self._run_vision_stage(
172
+ user_input=user_input,
173
+ images=images,
174
+ model=vision_model,
175
+ prompt=vision_prompt,
176
+ )
177
+ # Add vision usage with vision-specific pricing
178
+ usage_totals["input_tokens"] += vision_usage.get("input_tokens", 0)
179
+ usage_totals["output_tokens"] += vision_usage.get("output_tokens", 0)
180
+
181
+ # Calculate Vision Cost
182
+ v_in_price = float(getattr(self.config, "vision_input_price", None) or getattr(self.config, "input_price", 0.0) or 0.0)
183
+ v_out_price = float(getattr(self.config, "vision_output_price", None) or getattr(self.config, "output_price", 0.0) or 0.0)
184
+ if v_in_price > 0 or v_out_price > 0:
185
+ vision_cost = (vision_usage.get("input_tokens", 0) / 1_000_000 * v_in_price) + (vision_usage.get("output_tokens", 0) / 1_000_000 * v_out_price)
186
+
187
+ vision_time = time.time() - vision_start
188
+
189
+ trace["vision"] = {
190
+ "model": vision_model,
191
+ "base_url": getattr(self.config, "vision_base_url", None) or self.config.base_url,
192
+ "prompt": vision_prompt,
193
+ "user_input": user_input or "",
194
+ "images_count": len(images or []),
195
+ "output": vision_text,
196
+ "usage": vision_usage,
197
+ "time": vision_time,
198
+ "cost": vision_cost
199
+ }
200
+
201
+ # Instruct + pre-search
202
+ instruct_start = time.time()
203
+ instruct_model = getattr(self.config, "instruct_model_name", None) or active_model
204
+ logger.info(f"Instruct Stage Config: instruct_model_name={getattr(self.config, 'instruct_model_name', None)}, active_model={active_model}, using: {instruct_model}")
205
+ instruct_text, search_payloads, instruct_trace, instruct_usage, search_time = await self._run_instruct_stage(
206
+ user_input=user_input,
207
+ vision_text=vision_text,
208
+ model=instruct_model,
209
+ )
210
+ # Instruct time excludes search time (search_time is returned separately)
211
+ instruct_time = time.time() - instruct_start - search_time
212
+
213
+ # Calculate Instruct Cost
214
+ instruct_cost = 0.0
215
+ i_in_price = float(getattr(self.config, "instruct_input_price", None) or getattr(self.config, "input_price", 0.0) or 0.0)
216
+ i_out_price = float(getattr(self.config, "instruct_output_price", None) or getattr(self.config, "output_price", 0.0) or 0.0)
217
+ if i_in_price > 0 or i_out_price > 0:
218
+ instruct_cost = (instruct_usage.get("input_tokens", 0) / 1_000_000 * i_in_price) + (instruct_usage.get("output_tokens", 0) / 1_000_000 * i_out_price)
219
+
220
+ # Add instruct usage
221
+ usage_totals["input_tokens"] += instruct_usage.get("input_tokens", 0)
222
+ usage_totals["output_tokens"] += instruct_usage.get("output_tokens", 0)
223
+
224
+ instruct_trace["time"] = instruct_time
225
+ instruct_trace["cost"] = instruct_cost
226
+ trace["instruct"] = instruct_trace
227
+
228
+ # Check if refuse_answer was called - terminate early
229
+ if self._should_refuse:
230
+ logger.info(f"Pipeline: refuse_answer triggered. Reason: {self._refuse_reason}")
231
+ stats["total_time"] = time.time() - start_time
232
+ return {
233
+ "llm_response": "",
234
+ "structured_response": {},
235
+ "stats": stats,
236
+ "model_used": active_model,
237
+ "conversation_history": current_history,
238
+ "refuse_answer": True,
239
+ "refuse_reason": self._refuse_reason,
240
+ "stages_used": [],
241
+ }
242
+
243
+ # Start agent loop
244
+ agent_start_time = time.time()
245
+ current_history.append({"role": "user", "content": user_input or "..."})
246
+
247
+ mode = instruct_trace.get("mode", self.current_mode).lower()
248
+ logger.success(f"Instruct Mode: {mode}")
249
+ self.current_mode = mode
250
+
251
+ # Determine max iterations
252
+ max_steps = 10 if mode == "agent" else 1
253
+
254
+ step = 0
255
+ agent_trace_steps: List[Dict[str, Any]] = []
256
+ last_system_prompt = ""
257
+
258
+ agent_tools: Optional[List[Dict[str, Any]]] = None
259
+ if mode == "agent":
260
+ agent_tools = [self.web_search_tool, self.image_search_tool, self.crawl_page_tool]
261
+
262
+ # Agent loop
263
+ while step < max_steps:
264
+ step += 1
265
+ logger.info(f"Pipeline: Agent step {step}/{max_steps}")
266
+
267
+ if step == 5 and mode == "agent":
268
+ current_history.append(
269
+ {
270
+ "role": "system",
271
+ "content": "System: [Next Step Final] Please start consolidating the answer; the next step must be the final response.",
272
+ }
273
+ )
274
+
275
+ tools_desc = ""
276
+ if agent_tools:
277
+ tools_desc = "\n".join([
278
+ "- internal_web_search(query): 触发搜索并缓存结果",
279
+ "- crawl_page(url): 使用 Crawl4AI 抓取网页返回 Markdown"
280
+ ])
281
+
282
+ user_msgs_text = user_input or ""
283
+
284
+ search_msgs_text = self._format_search_msgs()
285
+ # Image search results are NOT passed to LLM - they're for UI rendering only
286
+
287
+ has_search_results = any(r.get("_type") == "search" for r in self.all_web_results)
288
+ has_image_results = any(r.get("_type") == "image" for r in self.all_web_results) # For UI rendering only
289
+
290
+ # Build agent system prompt
291
+ mode_desc_text = AGENT_SP_TOOLS_AGENT_ADD.format(tools_desc=tools_desc) if mode == "agent" else AGENT_SP_TOOLS_STANDARD_ADD
292
+ system_prompt = AGENT_SP.format(
293
+ user_msgs=user_msgs_text,
294
+ mode=mode,
295
+ mode_desc=mode_desc_text,
296
+ language=getattr(self.config, "language", "Simplified Chinese")[:128]
297
+ )
298
+
299
+ # Append vision text if available
300
+ if vision_text:
301
+ system_prompt += AGENT_SP_INSTRUCT_VISION_ADD.format(vision_msgs=vision_text)
302
+
303
+ # Append search results (text and page only, NOT images)
304
+ page_msgs_text = self._format_page_msgs()
305
+ all_search_parts = []
306
+ if has_search_results and search_msgs_text:
307
+ all_search_parts.append(search_msgs_text)
308
+ if page_msgs_text:
309
+ all_search_parts.append(page_msgs_text)
310
+ # Images are excluded from LLM prompt - they're for UI rendering only
311
+
312
+ if all_search_parts:
313
+ system_prompt += AGENT_SP_SEARCH_ADD.format(search_msgs="\n".join(all_search_parts))
314
+
315
+ last_system_prompt = system_prompt
316
+
317
+ messages = [{"role": "system", "content": system_prompt}]
318
+ messages.extend(current_history)
319
+
320
+ tools_for_step = agent_tools if (agent_tools and step < max_steps) else None
321
+
322
+ # Debug logging
323
+ if tools_for_step:
324
+ logger.info(f"[Agent] Tools provided: {[t['function']['name'] for t in tools_for_step]}")
325
+ else:
326
+ logger.warning(f"[Agent] NO TOOLS provided for step {step} (agent_tools={agent_tools is not None}, step<max={step < max_steps})")
327
+
328
+ step_llm_start = time.time()
329
+ response, step_usage = await self._safe_llm_call(
330
+ messages=messages,
331
+ model=active_model,
332
+ tools=tools_for_step,
333
+ tool_choice="auto" if tools_for_step else None,
334
+ extra_body=self.config.extra_body,
335
+ )
336
+ step_llm_time = time.time() - step_llm_start
337
+
338
+ # Debug: Check response
339
+ has_tool_calls = response.tool_calls is not None and len(response.tool_calls) > 0
340
+ logger.info(f"[Agent] Response has_tool_calls={has_tool_calls}, has_content={bool(response.content)}")
341
+
342
+ # Accumulate agent usage
343
+ usage_totals["input_tokens"] += step_usage.get("input_tokens", 0)
344
+ usage_totals["output_tokens"] += step_usage.get("output_tokens", 0)
345
+
346
+ if response.tool_calls and tools_for_step:
347
+ tool_calls = response.tool_calls
348
+ stats["tool_calls_count"] += len(tool_calls)
349
+
350
+ # Use model_dump to preserve provider-specific fields (e.g., Gemini's thought_signature)
351
+ assistant_msg = response.model_dump(exclude_unset=True) if hasattr(response, "model_dump") else {
352
+ "role": "assistant",
353
+ "content": response.content,
354
+ "tool_calls": [{"id": tc.id, "type": "function", "function": {"name": tc.function.name, "arguments": tc.function.arguments}} for tc in tool_calls]
355
+ }
356
+ current_history.append(assistant_msg)
357
+
358
+ tasks = [self._safe_route_tool(tc) for tc in tool_calls]
359
+ tool_start_time = time.time()
360
+ results = await asyncio.gather(*tasks)
361
+ tool_exec_time = time.time() - tool_start_time
362
+
363
+ step_trace = {
364
+ "step": step,
365
+ "tool_calls": [self._tool_call_to_trace(tc) for tc in tool_calls],
366
+ "tool_results": [],
367
+ "tool_time": tool_exec_time,
368
+ "llm_time": step_llm_time,
369
+ "usage": step_usage,
370
+ }
371
+ for i, result in enumerate(results):
372
+ tc = tool_calls[i]
373
+ step_trace["tool_results"].append({"name": tc.function.name, "content": str(result)})
374
+ current_history.append(
375
+ {
376
+ "tool_call_id": tc.id,
377
+ "role": "tool",
378
+ "name": tc.function.name,
379
+ "content": str(result),
380
+ }
381
+ )
382
+ agent_trace_steps.append(step_trace)
383
+ continue
384
+
385
+ final_response_content = response.content or ""
386
+ current_history.append({"role": "assistant", "content": final_response_content})
387
+ agent_trace_steps.append({
388
+ "step": step,
389
+ "final": True,
390
+ "output": final_response_content,
391
+ "llm_time": step_llm_time,
392
+ "usage": step_usage
393
+ })
394
+ break
395
+
396
+ if not final_response_content:
397
+ final_response_content = "执行结束,但未生成内容。"
398
+
399
+ structured = self._parse_tagged_response(final_response_content)
400
+ final_content = structured.get("response") or final_response_content
401
+
402
+ agent_time = time.time() - agent_start_time
403
+
404
+ # Calculate Agent Cost
405
+ agent_cost = 0.0
406
+ a_in_price = float(getattr(self.config, "input_price", 0.0) or 0.0)
407
+ a_out_price = float(getattr(self.config, "output_price", 0.0) or 0.0)
408
+
409
+ agent_input_tokens = usage_totals["input_tokens"] - vision_usage.get("input_tokens", 0) - instruct_usage.get("input_tokens", 0)
410
+ agent_output_tokens = usage_totals["output_tokens"] - vision_usage.get("output_tokens", 0) - instruct_usage.get("output_tokens", 0)
411
+
412
+ if a_in_price > 0 or a_out_price > 0:
413
+ agent_cost = (max(0, agent_input_tokens) / 1_000_000 * a_in_price) + (max(0, agent_output_tokens) / 1_000_000 * a_out_price)
414
+
415
+ trace["agent"] = {
416
+ "model": active_model,
417
+ "base_url": self.config.base_url,
418
+ "system_prompt": last_system_prompt,
419
+ "steps": agent_trace_steps,
420
+ "final_output": final_response_content,
421
+ "time": agent_time,
422
+ "cost": agent_cost
423
+ }
424
+ trace_markdown = self._render_trace_markdown(trace)
425
+
426
+ stats["total_time"] = time.time() - start_time
427
+ stats["steps"] = step
428
+
429
+ # Calculate billing info correctly by summing up all actual costs
430
+ total_cost_sum = vision_cost + instruct_cost
431
+ for s in agent_trace_steps:
432
+ s_usage = s.get("usage", {})
433
+ if s_usage:
434
+ s_in_price = float(getattr(self.config, "input_price", 0.0) or 0.0)
435
+ s_out_price = float(getattr(self.config, "output_price", 0.0) or 0.0)
436
+ total_cost_sum += (s_usage.get("input_tokens", 0) / 1_000_000 * s_in_price) + (s_usage.get("output_tokens", 0) / 1_000_000 * s_out_price)
437
+
438
+ billing_info = {
439
+ "input_tokens": usage_totals["input_tokens"],
440
+ "output_tokens": usage_totals["output_tokens"],
441
+ "total_cost": total_cost_sum,
442
+ }
443
+
444
+ # Build stages_used list for UI display
445
+ stages_used = []
446
+
447
+ def infer_icon(model_name: str, base_url: str) -> str:
448
+ model_lower = (model_name or "").lower()
449
+ url_lower = (base_url or "").lower()
450
+ if "deepseek" in model_lower or "deepseek" in url_lower: return "deepseek"
451
+ elif "claude" in model_lower or "anthropic" in url_lower: return "anthropic"
452
+ elif "gemini" in model_lower or "google" in url_lower: return "google"
453
+ elif "gpt" in model_lower or "openai" in url_lower: return "openai"
454
+ elif "qwen" in model_lower: return "qwen"
455
+ elif "openrouter" in url_lower: return "openrouter"
456
+ return "openai"
457
+
458
+ def infer_provider(base_url: str) -> str:
459
+ url_lower = (base_url or "").lower()
460
+ if "openrouter" in url_lower: return "OpenRouter"
461
+ elif "openai" in url_lower: return "OpenAI"
462
+ elif "anthropic" in url_lower: return "Anthropic"
463
+ elif "google" in url_lower: return "Google"
464
+ elif "deepseek" in url_lower: return "DeepSeek"
465
+ return ""
466
+
467
+ if trace.get("vision"):
468
+ v = trace["vision"]
469
+ v_model = v.get("model", "")
470
+ v_base_url = v.get("base_url", "") or self.config.base_url
471
+ stages_used.append({
472
+ "name": "Vision",
473
+ "model": v_model,
474
+ "icon_config": infer_icon(v_model, v_base_url),
475
+ "provider": infer_provider(v_base_url),
476
+ "time": v.get("time", 0),
477
+ "cost": v.get("cost", 0.0)
478
+ })
479
+
480
+ if trace.get("instruct"):
481
+ i = trace["instruct"]
482
+ i_model = i.get("model", "")
483
+ i_base_url = i.get("base_url", "") or self.config.base_url
484
+ stages_used.append({
485
+ "name": "Instruct",
486
+ "model": i_model,
487
+ "icon_config": infer_icon(i_model, i_base_url),
488
+ "provider": infer_provider(i_base_url),
489
+ "time": i.get("time", 0),
490
+ "cost": i.get("cost", 0.0)
491
+ })
492
+
493
+ # Show Search stage if we have ANY search results (text OR image)
494
+ if (has_search_results or has_image_results) and search_payloads:
495
+ # Collect initial search results for the Search stage card
496
+ initial_refs = [
497
+ {"title": r.get("title", ""), "url": r.get("url", ""), "domain": r.get("domain", "")}
498
+ for r in self.all_web_results if r.get("_type") == "search"
499
+ ]
500
+ initial_images = [
501
+ {"title": r.get("title", ""), "url": r.get("url", ""), "thumbnail": r.get("thumbnail", "")}
502
+ for r in self.all_web_results if r.get("_type") == "image"
503
+ ]
504
+
505
+ stages_used.append({
506
+ "name": "Search",
507
+ "model": getattr(self.config, "search_name", "DuckDuckGo"),
508
+ "icon_config": "search",
509
+ "provider": getattr(self.config, 'search_provider', 'Crawl4AI'),
510
+ "time": search_time,
511
+ "cost": 0.0,
512
+ "references": initial_refs,
513
+ "image_references": initial_images
514
+ })
515
+
516
+ # Add Crawler stage if Instruct used crawl_page
517
+ if trace.get("instruct"):
518
+ instruct_tool_calls = trace["instruct"].get("tool_calls", [])
519
+ crawl_calls = [tc for tc in instruct_tool_calls if tc.get("name") == "crawl_page"]
520
+ if crawl_calls:
521
+ # Build crawled_pages list for UI
522
+ crawled_pages = []
523
+ for tc in crawl_calls:
524
+ url = tc.get("arguments", {}).get("url", "")
525
+ # Try to find cached result
526
+ found = next((r for r in self.all_web_results if r.get("url") == url and r.get("_type") == "page"), None)
527
+ if found:
528
+ try:
529
+ from urllib.parse import urlparse
530
+ domain = urlparse(url).netloc
531
+ except:
532
+ domain = ""
533
+ crawled_pages.append({
534
+ "title": found.get("title", "Page"),
535
+ "url": url,
536
+ "favicon_url": f"https://www.google.com/s2/favicons?domain={domain}&sz=32"
537
+ })
538
+
539
+ stages_used.append({
540
+ "name": "Crawler",
541
+ "model": "Crawl4AI",
542
+ "icon_config": "search",
543
+ "provider": "网页抓取",
544
+ "time": search_time, # Use existing search_time which includes fetch time
545
+ "cost": 0.0,
546
+ "crawled_pages": crawled_pages
547
+ })
548
+
549
+ # --- Granular Agent Stages (Grouped) ---
550
+ if trace.get("agent"):
551
+ a = trace["agent"]
552
+ a_model = a.get("model", "") or active_model
553
+ a_base_url = a.get("base_url", "") or self.config.base_url
554
+ steps = a.get("steps", [])
555
+ agent_icon = infer_icon(a_model, a_base_url)
556
+ agent_provider = infer_provider(a_base_url)
557
+
558
+ for s in steps:
559
+ if "tool_calls" in s:
560
+ # 1. Agent Thought Stage (with LLM time)
561
+ # Calculate step cost
562
+ step_usage = s.get("usage", {})
563
+ step_cost = 0.0
564
+ if a_in_price > 0 or a_out_price > 0:
565
+ step_cost = (step_usage.get("input_tokens", 0) / 1_000_000 * a_in_price) + (step_usage.get("output_tokens", 0) / 1_000_000 * a_out_price)
566
+
567
+ stages_used.append({
568
+ "name": "Agent",
569
+ "model": a_model,
570
+ "icon_config": agent_icon,
571
+ "provider": agent_provider,
572
+ "time": s.get("llm_time", 0), "cost": step_cost
573
+ })
574
+
575
+ # 2. Grouped Tool Stages
576
+ # Collect results for grouping
577
+ search_group_items = []
578
+ crawler_group_items = []
579
+
580
+ tcs = s.get("tool_calls", [])
581
+ trs = s.get("tool_results", [])
582
+
583
+ for idx, tc in enumerate(tcs):
584
+ t_name = tc.get("name")
585
+ # Try to get result content if available
586
+ t_res_content = trs[idx].get("content", "") if idx < len(trs) else ""
587
+
588
+ if t_name in ["internal_web_search", "web_search", "internal_image_search"]:
589
+ # We don't have per-call metadata easily unless we parse the 'result' string (which is JSON dump now for route_tool)
590
+ # But search results are cached in self.all_web_results.
591
+ # The 'content' of search tool result is basically "cached_for_prompt".
592
+ # So we don't need to put items here, just show "Search" container.
593
+ # But wait, if we want to show "what was searched", we can parse args.
594
+ args = tc.get("arguments", {})
595
+ query = args.get("query", "")
596
+ if query:
597
+ search_group_items.append({"query": query})
598
+
599
+ elif t_name == "crawl_page":
600
+ # Get URL from arguments, title from result
601
+ args = tc.get("arguments", {})
602
+ url = args.get("url", "")
603
+ title = "Page"
604
+ try:
605
+ page_data = json.loads(t_res_content)
606
+ if isinstance(page_data, dict):
607
+ title = page_data.get("title", "Page")
608
+ except:
609
+ pass
610
+
611
+ if url:
612
+ try:
613
+ domain = urlparse(url).netloc
614
+ except:
615
+ domain = ""
616
+ crawler_group_items.append({
617
+ "title": title,
618
+ "url": url,
619
+ "favicon_url": f"https://www.google.com/s2/favicons?domain={domain}&sz=32"
620
+ })
621
+
622
+ # Append Grouped Stages
623
+ if search_group_items:
624
+ stages_used.append({
625
+ "name": "Search",
626
+ "model": getattr(self.config, "search_name", "DuckDuckGo"),
627
+ "icon_config": "search",
628
+ "provider": "Agent Search",
629
+ "time": s.get("tool_time", 0), "cost": 0,
630
+ "queries": search_group_items # Render can use this if needed, or just show generic
631
+ })
632
+
633
+ if crawler_group_items:
634
+ stages_used.append({
635
+ "name": "Crawler",
636
+ "model": "Crawl4AI",
637
+ "icon_config": "browser",
638
+ "provider": "Page Fetcher",
639
+ "time": s.get("tool_time", 0), "cost": 0,
640
+ "crawled_pages": crawler_group_items
641
+ })
642
+
643
+ elif s.get("final"):
644
+ # Correctly calculate final step cost
645
+ step_usage = s.get("usage", {})
646
+ step_cost = 0.0
647
+ if a_in_price > 0 or a_out_price > 0:
648
+ step_cost = (step_usage.get("input_tokens", 0) / 1_000_000 * a_in_price) + (step_usage.get("output_tokens", 0) / 1_000_000 * a_out_price)
649
+
650
+ stages_used.append({
651
+ "name": "Agent",
652
+ "model": a_model,
653
+ "icon_config": agent_icon,
654
+ "provider": agent_provider,
655
+ "time": s.get("llm_time", 0),
656
+ "cost": step_cost
657
+ })
658
+
659
+ # Assign total time/cost to last Agent stage
660
+ # Sum up total time/cost for UI/stats (implicit via loop above)
661
+ # No need to assign everything to last agent anymore as we distribute it.
662
+
663
+ # --- Final Filter: Only show cited items in workflow cards ---
664
+ cited_urls = {ref['url'] for ref in (structured.get("references", []) +
665
+ structured.get("page_references", []) +
666
+ structured.get("image_references", []))}
667
+
668
+ # Find images already rendered in markdown content (to avoid duplicate display)
669
+ markdown_image_urls = set()
670
+ md_img_pattern = re.compile(r'!\[.*?\]\((https?://[^)]+)\)')
671
+ for match in md_img_pattern.finditer(final_content):
672
+ markdown_image_urls.add(match.group(1))
673
+
674
+ for s in stages_used:
675
+ if "references" in s and s["references"]:
676
+ s["references"] = [r for r in s["references"] if r.get("url") in cited_urls]
677
+ # Filter out images already shown in markdown content
678
+ # Check both url AND thumbnail since either might be used in markdown
679
+ if "image_references" in s and s["image_references"]:
680
+ s["image_references"] = [
681
+ r for r in s["image_references"]
682
+ if r.get("url") not in markdown_image_urls and (r.get("thumbnail") or "") not in markdown_image_urls
683
+ ]
684
+ if "crawled_pages" in s and s["crawled_pages"]:
685
+ s["crawled_pages"] = [r for r in s["crawled_pages"] if r.get("url") in cited_urls]
686
+
687
+ # Clean up conversation history: Remove tool calls and results to save tokens and avoid ID conflicts
688
+ # Keep only 'user' messages and 'assistant' messages without tool_calls (final answers)
689
+ cleaned_history = []
690
+ for msg in current_history:
691
+ if msg.get("role") == "tool":
692
+ continue
693
+ if msg.get("role") == "assistant" and msg.get("tool_calls"):
694
+ continue
695
+ cleaned_history.append(msg)
696
+
697
+ # Update the reference (since it might be used by caller)
698
+ current_history[:] = cleaned_history
699
+
700
+ # --- Apply cached images to reduce render time ---
701
+ # Collect all image URLs that need caching (avoid duplicates when thumbnail == url)
702
+ all_image_urls = set()
703
+ for img_ref in structured.get("image_references", []):
704
+ if img_ref.get("thumbnail"):
705
+ all_image_urls.add(img_ref["thumbnail"])
706
+ if img_ref.get("url"):
707
+ all_image_urls.add(img_ref["url"])
708
+
709
+ for stage in stages_used:
710
+ for img_ref in stage.get("image_references", []):
711
+ if img_ref.get("thumbnail"):
712
+ all_image_urls.add(img_ref["thumbnail"])
713
+ if img_ref.get("url"):
714
+ all_image_urls.add(img_ref["url"])
715
+
716
+ # Also collect image URLs from markdown content
717
+ markdown_img_pattern = re.compile(r'!\[.*?\]\((https?://[^)]+)\)')
718
+ markdown_urls = markdown_img_pattern.findall(final_content)
719
+ all_image_urls.update(markdown_urls)
720
+
721
+ # Get cached versions (waits for pending downloads until agent ends)
722
+ if all_image_urls:
723
+ try:
724
+ cached_map = await get_cached_images(list(all_image_urls))
725
+
726
+ # Apply cached URLs to structured response
727
+ for img_ref in structured.get("image_references", []):
728
+ if img_ref.get("thumbnail") and img_ref["thumbnail"] in cached_map:
729
+ img_ref["thumbnail"] = cached_map[img_ref["thumbnail"]]
730
+ if img_ref.get("url") and img_ref["url"] in cached_map:
731
+ img_ref["url"] = cached_map[img_ref["url"]]
732
+
733
+ # Apply cached URLs to stages
734
+ for stage in stages_used:
735
+ for img_ref in stage.get("image_references", []):
736
+ if img_ref.get("thumbnail") and img_ref["thumbnail"] in cached_map:
737
+ img_ref["thumbnail"] = cached_map[img_ref["thumbnail"]]
738
+ if img_ref.get("url") and img_ref["url"] in cached_map:
739
+ img_ref["url"] = cached_map[img_ref["url"]]
740
+
741
+ # Replace image URLs in markdown content with cached versions
742
+ def replace_markdown_img(match):
743
+ full_match = match.group(0)
744
+ url = match.group(1)
745
+ cached_url = cached_map.get(url)
746
+ if cached_url and cached_url != url:
747
+ return full_match.replace(url, cached_url)
748
+ return full_match
749
+
750
+ final_content = markdown_img_pattern.sub(replace_markdown_img, final_content)
751
+ structured["response"] = markdown_img_pattern.sub(replace_markdown_img, structured.get("response", ""))
752
+
753
+ # Log cache stats
754
+ from .image_cache import get_image_cache
755
+ cache_stats = get_image_cache().get_stats()
756
+ logger.info(f"ImageCache stats: {cache_stats}")
757
+
758
+ except Exception as e:
759
+ logger.warning(f"Failed to apply image cache: {e}")
760
+
761
+ # Cancel all background image search/download tasks when agent ends
762
+ if self._image_search_tasks:
763
+ logger.info(f"Cancelling {len(self._image_search_tasks)} background image search tasks")
764
+ for task in self._image_search_tasks:
765
+ if not task.done():
766
+ task.cancel()
767
+ # Wait a bit for tasks to handle cancellation gracefully
768
+ try:
769
+ await asyncio.gather(*self._image_search_tasks, return_exceptions=True)
770
+ except Exception:
771
+ pass
772
+ self._image_search_tasks.clear()
773
+
774
+ # Also cancel any pending image downloads in the cache
775
+ from .image_cache import get_image_cache
776
+ cache = get_image_cache()
777
+ if cache._pending:
778
+ logger.info(f"Cancelling {len(cache._pending)} pending image downloads")
779
+ for task in cache._pending.values():
780
+ if not task.done():
781
+ task.cancel()
782
+ cache._pending.clear()
783
+
784
+ return {
785
+ "llm_response": final_content,
786
+ "structured_response": structured,
787
+ "stats": stats,
788
+ "model_used": active_model,
789
+ "vision_model_used": (selected_vision_model or getattr(self.config, "vision_model_name", None)) if images else None,
790
+ "conversation_history": current_history,
791
+ "trace_markdown": trace_markdown,
792
+ "billing_info": billing_info,
793
+ "stages_used": stages_used,
794
+ }
795
+
796
+ except Exception as e:
797
+ logger.exception("Pipeline Critical Failure")
798
+ # Cancel all background image tasks on error
799
+ if hasattr(self, '_image_search_tasks') and self._image_search_tasks:
800
+ for task in self._image_search_tasks:
801
+ if not task.done(): task.cancel()
802
+ # Wait briefly for cleanup
803
+ await asyncio.wait(self._image_search_tasks, timeout=0.1)
804
+ self._image_search_tasks.clear()
805
+
806
+ from .image_cache import get_image_cache
807
+ cache = get_image_cache()
808
+ if cache._pending:
809
+ pending_tasks = list(cache._pending.values())
810
+ for task in pending_tasks:
811
+ if not task.done(): task.cancel()
812
+ await asyncio.wait(pending_tasks, timeout=0.1)
813
+ cache._pending.clear()
814
+ return {
815
+ "llm_response": f"I encountered a critical error: {e}",
816
+ "stats": stats,
817
+ "error": str(e),
818
+ }
819
+
820
+ def _parse_tagged_response(self, text: str) -> Dict[str, Any]:
821
+ """Parse response and auto-infer references from citations and markdown images.
822
+ """
823
+ parsed = {"response": "", "references": [], "page_references": [], "image_references": [], "flow_steps": []}
824
+ if not text:
825
+ return parsed
826
+
827
+ import re
828
+
829
+ # 1. Strip trailing reference/source list
830
+ body_text = text
831
+ ref_list_pattern = re.compile(r'(?:\n\s*|^)\s*(?:#{1,3}|\*\*)\s*(?:References|Citations|Sources|参考资料|引用)[\s\S]*$', re.IGNORECASE | re.MULTILINE)
832
+ body_text = ref_list_pattern.sub('', body_text)
833
+
834
+ remaining_text = body_text.strip()
835
+
836
+ # 2. Unwrap JSON if necessary
837
+ try:
838
+ if remaining_text.strip().startswith("{") and "action" in remaining_text:
839
+ data = json.loads(remaining_text)
840
+ if isinstance(data, dict) and "action_input" in data:
841
+ remaining_text = data["action_input"]
842
+ except Exception:
843
+ pass
844
+
845
+ # 3. Identify all citations [N] and direct markdown images ![]()
846
+ cited_ids = []
847
+ body_pattern = re.compile(r'\[(\d+)\]')
848
+ for match in body_pattern.finditer(remaining_text):
849
+ try:
850
+ cited_ids.append(int(match.group(1)))
851
+ except ValueError: pass
852
+
853
+ # Also find direct URLs in ![]()
854
+ direct_image_urls = []
855
+ img_pattern = re.compile(r'!\[.*?\]\((.*?)\)')
856
+ for match in img_pattern.finditer(remaining_text):
857
+ url = match.group(1).strip()
858
+ if url and not url.startswith('['): # Not a [N] citation
859
+ direct_image_urls.append(url)
860
+
861
+ # 4. Build Citation Maps and Reference Lists
862
+ unified_id_map = {}
863
+ # Keep track of what we've already added to avoid duplicates
864
+ seen_urls = set()
865
+
866
+ # id_order needs to be unique and preserve appearance order
867
+ id_order = []
868
+ for id_val in cited_ids:
869
+ if id_val not in id_order:
870
+ id_order.append(id_val)
871
+
872
+ # Process [N] citations first to determine numbering
873
+ for old_id in id_order:
874
+ result_item = next((r for r in self.all_web_results if r.get("_id") == old_id), None)
875
+ if not result_item: continue
876
+
877
+ url = result_item.get("url", "")
878
+ item_type = result_item.get("_type", "")
879
+
880
+ entry = {
881
+ "title": result_item.get("title", ""),
882
+ "url": url,
883
+ "domain": result_item.get("domain", "")
884
+ }
885
+
886
+ if item_type == "search":
887
+ parsed["references"].append(entry)
888
+ unified_id_map[old_id] = len(parsed["references"]) + len(parsed["page_references"])
889
+ seen_urls.add(url)
890
+ elif item_type == "page":
891
+ parsed["page_references"].append(entry)
892
+ unified_id_map[old_id] = len(parsed["references"]) + len(parsed["page_references"])
893
+ seen_urls.add(url)
894
+ elif item_type == "image":
895
+ entry["thumbnail"] = result_item.get("thumbnail", "")
896
+ if url not in seen_urls:
897
+ parsed["image_references"].append(entry)
898
+ seen_urls.add(url)
899
+ # Note: Images cited as [N] might be used in text like ![...]([N])
900
+ # We'll handle this in replacement
901
+
902
+ # Now handle direct image URLs from ![]() that weren't cited as [N]
903
+ for url in direct_image_urls:
904
+ if url in seen_urls: continue
905
+ # Find in all_web_results
906
+ result_item = next((r for r in self.all_web_results if (r.get("url") == url or r.get("image") == url) and r.get("_type") == "image"), None)
907
+ if result_item:
908
+ entry = {
909
+ "title": result_item.get("title", ""),
910
+ "url": url,
911
+ "domain": result_item.get("domain", ""),
912
+ "thumbnail": result_item.get("thumbnail", "")
913
+ }
914
+ parsed["image_references"].append(entry)
915
+ seen_urls.add(url)
916
+
917
+ # 5. Replacement Logic
918
+ # Define image replacement map separately to handle ![...]([N])
919
+ image_url_map = {} # old_id -> raw_url
920
+ for old_id in id_order:
921
+ item = next((r for r in self.all_web_results if r.get("_id") == old_id), None)
922
+ if item and item.get("_type") == "image":
923
+ image_url_map[old_id] = item.get("url", "")
924
+
925
+ def refined_replace(text):
926
+ # First, handle ![...]([N]) specifically
927
+ # We want to replace the [N] with the actual URL so the markdown renders
928
+ def sub_img_ref(match):
929
+ alt = match.group(1)
930
+ ref = match.group(2)
931
+ inner_match = body_pattern.match(ref)
932
+ if inner_match:
933
+ oid = int(inner_match.group(1))
934
+ if oid in image_url_map:
935
+ return f"![{alt}]({image_url_map[oid]})"
936
+ return match.group(0)
937
+
938
+ text = re.sub(r'!\[(.*?)\]\((.*?)\)', sub_img_ref, text)
939
+
940
+ # Then handle normal [N] replacements
941
+ def sub_norm_ref(match):
942
+ oid = int(match.group(1))
943
+ if oid in unified_id_map:
944
+ return f"[{unified_id_map[oid]}]"
945
+ if oid in image_url_map:
946
+ return "" # Remove standalone image citations like [5] if they aren't in ![]()
947
+ return "" # Remove hallucinated or invalid citations like [99] if not found in results
948
+
949
+ return body_pattern.sub(sub_norm_ref, text)
950
+
951
+ final_text = refined_replace(remaining_text)
952
+ parsed["response"] = final_text.strip()
953
+ return parsed
954
+
955
+ async def _safe_route_tool(self, tool_call):
956
+ """Wrapper for safe concurrent execution of tool calls."""
957
+ try:
958
+ return await asyncio.wait_for(self._route_tool(tool_call), timeout=30.0)
959
+ except asyncio.TimeoutError:
960
+ return "Error: Tool execution timed out (30s limit)."
961
+ except Exception as e:
962
+ return f"Error: Tool execution failed: {e}"
963
+
964
+ async def _route_tool(self, tool_call):
965
+ """Execute tool call and return result."""
966
+ name = tool_call.function.name
967
+ args = json.loads(html.unescape(tool_call.function.arguments))
968
+
969
+ if name == "internal_web_search" or name == "web_search":
970
+ query = args.get("query")
971
+ web = await self.search_service.search(query)
972
+
973
+ # Cache results and assign global IDs
974
+ for item in web:
975
+ self.global_id_counter += 1
976
+ item["_id"] = self.global_id_counter
977
+ item["_type"] = "search"
978
+ item["query"] = query
979
+ self.all_web_results.append(item)
980
+
981
+ return json.dumps({"web_results_count": len(web), "status": "cached_for_prompt"}, ensure_ascii=False)
982
+
983
+ if name == "internal_image_search":
984
+ query = args.get("query")
985
+ # Start image search in background (non-blocking)
986
+ # Images are for UI rendering only, not passed to LLM
987
+ async def _background_image_search():
988
+ try:
989
+ images = await self.search_service.image_search(query)
990
+ # Cache results and assign global IDs for UI rendering
991
+ for item in images:
992
+ self.global_id_counter += 1
993
+ item["_id"] = self.global_id_counter
994
+ item["_type"] = "image"
995
+ item["query"] = query
996
+ item["is_image"] = True
997
+ self.all_web_results.append(item)
998
+ logger.info(f"Background image search completed: {len(images)} images for query '{query}'")
999
+ except (asyncio.CancelledError, Exception) as e:
1000
+ # Silently handle cancellation or minor errors in background pre-warming
1001
+ if isinstance(e, asyncio.CancelledError):
1002
+ logger.debug(f"Background image search cancelled for query '{query}'")
1003
+ else:
1004
+ logger.error(f"Background image search failed for query '{query}': {e}")
1005
+
1006
+ task = asyncio.create_task(_background_image_search())
1007
+ self._image_search_tasks.append(task)
1008
+
1009
+ # Return immediately without waiting for search to complete
1010
+ return json.dumps({"image_results_count": 0, "status": "searching_in_background"}, ensure_ascii=False)
1011
+
1012
+ if name == "crawl_page":
1013
+ url = args.get("url")
1014
+ logger.info(f"[Tool] Crawling page: {url}")
1015
+ # Returns Dict: {content, title, url}
1016
+ result_dict = await self.search_service.fetch_page(url)
1017
+
1018
+ # Cache the crawled content with global ID
1019
+ self.global_id_counter += 1
1020
+
1021
+ cached_item = {
1022
+ "_id": self.global_id_counter,
1023
+ "_type": "page",
1024
+ "title": result_dict.get("title", "Page"),
1025
+ "url": result_dict.get("url", url),
1026
+ "content": result_dict.get("content", ""),
1027
+ "domain": "",
1028
+ "is_crawled": True,
1029
+ }
1030
+ try:
1031
+ from urllib.parse import urlparse
1032
+ cached_item["domain"] = urlparse(url).netloc
1033
+ except:
1034
+ pass
1035
+
1036
+ self.all_web_results.append(cached_item)
1037
+
1038
+ return json.dumps({"crawl_status": "success", "title": cached_item["title"], "content_length": len(result_dict.get("content", ""))}, ensure_ascii=False)
1039
+
1040
+ if name == "set_mode":
1041
+ mode = args.get("mode", "standard")
1042
+ self.current_mode = mode
1043
+ return f"Mode set to {mode}"
1044
+
1045
+ if name == "refuse_answer":
1046
+ reason = args.get("reason", "")
1047
+ self._should_refuse = True
1048
+ self._refuse_reason = reason
1049
+ logger.info(f"[Tool] refuse_answer called. Reason: {reason}")
1050
+ return "Refuse answer triggered. Pipeline will terminate early."
1051
+
1052
+ return f"Unknown tool {name}"
1053
+
1054
+
1055
+ async def _safe_llm_call(self, messages, model, tools=None, tool_choice=None, client: Optional[AsyncOpenAI] = None, extra_body: Optional[Dict[str, Any]] = None):
1056
+ try:
1057
+ return await asyncio.wait_for(
1058
+ self._do_llm_request(messages, model, tools, tool_choice, client=client or self.client, extra_body=extra_body),
1059
+ timeout=120.0,
1060
+ )
1061
+ except asyncio.TimeoutError:
1062
+ logger.error("LLM Call Timed Out")
1063
+ return type("obj", (object,), {"content": "Error: The model took too long to respond.", "tool_calls": None})(), {"input_tokens": 0, "output_tokens": 0}
1064
+ except Exception as e:
1065
+ logger.error(f"LLM Call Failed: {e}")
1066
+ return type("obj", (object,), {"content": f"Error: Model failure ({e})", "tool_calls": None})(), {"input_tokens": 0, "output_tokens": 0}
1067
+
1068
+ async def _do_llm_request(self, messages, model, tools, tool_choice, client: AsyncOpenAI, extra_body: Optional[Dict[str, Any]] = None):
1069
+ try:
1070
+ payload_debug = json.dumps(messages)
1071
+ logger.info(f"LLM Request Payload Size: {len(payload_debug)} chars")
1072
+ except Exception:
1073
+ pass
1074
+
1075
+ t0 = time.time()
1076
+ logger.info("LLM Request SENT to API...")
1077
+ response = await client.chat.completions.create(
1078
+ model=model,
1079
+ messages=messages,
1080
+ tools=tools,
1081
+ tool_choice=tool_choice,
1082
+ temperature=self.config.temperature,
1083
+ extra_body=extra_body,
1084
+ )
1085
+ logger.info(f"LLM Request RECEIVED after {time.time() - t0:.2f}s")
1086
+
1087
+ usage = {"input_tokens": 0, "output_tokens": 0}
1088
+ if hasattr(response, "usage") and response.usage:
1089
+ usage["input_tokens"] = getattr(response.usage, "prompt_tokens", 0) or 0
1090
+ usage["output_tokens"] = getattr(response.usage, "completion_tokens", 0) or 0
1091
+
1092
+ return response.choices[0].message, usage
1093
+
1094
+ async def _run_vision_stage(self, user_input: str, images: List[str], model: str, prompt: str) -> Tuple[str, Dict[str, int]]:
1095
+ content_payload: List[Dict[str, Any]] = [{"type": "text", "text": user_input or ""}]
1096
+ for img_b64 in images:
1097
+ url = f"data:image/png;base64,{img_b64}" if not img_b64.startswith("data:") else img_b64
1098
+ content_payload.append({"type": "image_url", "image_url": {"url": url}})
1099
+
1100
+ client = self._client_for(
1101
+ api_key=getattr(self.config, "vision_api_key", None),
1102
+ base_url=getattr(self.config, "vision_base_url", None),
1103
+ )
1104
+ response, usage = await self._safe_llm_call(
1105
+ messages=[{"role": "system", "content": prompt}, {"role": "user", "content": content_payload}],
1106
+ model=model,
1107
+ client=client,
1108
+ extra_body=getattr(self.config, "vision_extra_body", None),
1109
+ )
1110
+ return (response.content or "").strip(), usage
1111
+
1112
+ async def _run_instruct_stage(
1113
+ self, user_input: str, vision_text: str, model: str
1114
+ ) -> Tuple[str, List[str], Dict[str, Any], Dict[str, int], float]:
1115
+ """Returns (instruct_text, search_payloads, trace_dict, usage_dict, search_time)."""
1116
+ # Instruct has access to: web_search, image_search, set_mode, crawl_page, refuse_answer
1117
+ tools = [self.web_search_tool, self.image_search_tool, self.set_mode_tool, self.crawl_page_tool, self.refuse_answer_tool]
1118
+ tools_desc = "- internal_web_search: 搜索文本\n- internal_image_search: 搜索图片\n- crawl_page: 获取网页内容\n- set_mode: 设定standard/agent模式\n- refuse_answer: 拒绝回答(敏感/违规内容)"
1119
+
1120
+ prompt = INSTRUCT_SP.format(user_msgs=user_input or "", tools_desc=tools_desc)
1121
+
1122
+ if vision_text:
1123
+ prompt = f"{prompt}\\n\\n{INSTRUCT_SP_VISION_ADD.format(vision_msgs=vision_text)}"
1124
+
1125
+ client = self._client_for(
1126
+ api_key=getattr(self.config, "instruct_api_key", None),
1127
+ base_url=getattr(self.config, "instruct_base_url", None),
1128
+ )
1129
+
1130
+ history: List[Dict[str, Any]] = [
1131
+ {"role": "system", "content": prompt},
1132
+ {"role": "user", "content": user_input or "..."},
1133
+ ]
1134
+
1135
+ response, usage = await self._safe_llm_call(
1136
+ messages=history,
1137
+ model=model,
1138
+ tools=tools,
1139
+ tool_choice="auto",
1140
+ client=client,
1141
+ extra_body=getattr(self.config, "instruct_extra_body", None),
1142
+ )
1143
+
1144
+ search_payloads: List[str] = []
1145
+ instruct_trace: Dict[str, Any] = {
1146
+ "model": model,
1147
+ "base_url": getattr(self.config, "instruct_base_url", None) or self.config.base_url,
1148
+ "prompt": prompt,
1149
+ "user_input": user_input or "",
1150
+ "vision_add": vision_text or "",
1151
+ "tool_calls": [],
1152
+ "tool_results": [],
1153
+ "output": "",
1154
+ }
1155
+
1156
+ search_time = 0.0
1157
+ mode = "standard"
1158
+ mode_reason = ""
1159
+
1160
+ if response.tool_calls:
1161
+ plan_dict = response.model_dump() if hasattr(response, "model_dump") else response
1162
+ history.append(plan_dict)
1163
+
1164
+ tasks = [self._safe_route_tool(tc) for tc in response.tool_calls]
1165
+
1166
+ st = time.time()
1167
+ results = await asyncio.gather(*tasks)
1168
+ search_time = time.time() - st
1169
+
1170
+ for i, result in enumerate(results):
1171
+ tc = response.tool_calls[i]
1172
+ history.append(
1173
+ {"tool_call_id": tc.id, "role": "tool", "name": tc.function.name, "content": str(result)}
1174
+ )
1175
+ instruct_trace["tool_calls"].append(self._tool_call_to_trace(tc))
1176
+ instruct_trace["tool_results"].append({"name": tc.function.name, "content": str(result)})
1177
+
1178
+ if tc.function.name in ["web_search", "internal_web_search"]:
1179
+ search_payloads.append(str(result))
1180
+ elif tc.function.name == "set_mode":
1181
+ try:
1182
+ args = json.loads(html.unescape(tc.function.arguments))
1183
+ except Exception:
1184
+ args = {}
1185
+ mode = args.get("mode", mode)
1186
+ mode_reason = args.get("reason", "")
1187
+
1188
+ instruct_trace["mode"] = mode
1189
+ if mode_reason:
1190
+ instruct_trace["mode_reason"] = mode_reason
1191
+
1192
+ instruct_trace["output"] = ""
1193
+ instruct_trace["usage"] = usage
1194
+ return "", search_payloads, instruct_trace, usage, search_time
1195
+
1196
+ instruct_trace["mode"] = mode
1197
+ instruct_trace["output"] = (response.content or "").strip()
1198
+ instruct_trace["usage"] = usage
1199
+ return "", search_payloads, instruct_trace, usage, 0.0
1200
+
1201
+ def _format_search_msgs(self) -> str:
1202
+ """Format search snippets only (not crawled pages)."""
1203
+ if not self.all_web_results:
1204
+ return ""
1205
+
1206
+ lines = []
1207
+ for res in self.all_web_results:
1208
+ if res.get("_type") != "search": continue # Only search results
1209
+ idx = res.get("_id")
1210
+ title = (res.get("title", "") or "").strip()
1211
+ url = res.get("url", "")
1212
+ content = (res.get("content", "") or "").strip()
1213
+ lines.append(f"[{idx}] Title: {title}\nURL: {url}\nSnippet: {content}\n")
1214
+
1215
+ return "\n".join(lines)
1216
+
1217
+ def _format_page_msgs(self) -> str:
1218
+ """Format crawled page content (detailed)."""
1219
+ if not self.all_web_results:
1220
+ return ""
1221
+
1222
+ lines = []
1223
+ for res in self.all_web_results:
1224
+ if res.get("_type") != "page": continue # Only page results
1225
+ idx = res.get("_id")
1226
+ title = (res.get("title", "") or "").strip()
1227
+ url = res.get("url", "")
1228
+ content = (res.get("content", "") or "").strip()
1229
+ lines.append(f"[{idx}] Title: {title}\nURL: {url}\nContent: {content}\n")
1230
+
1231
+ return "\n".join(lines)
1232
+
1233
+ def _format_image_search_msgs(self) -> str:
1234
+ if not self.all_web_results:
1235
+ return ""
1236
+
1237
+ lines = []
1238
+ for res in self.all_web_results:
1239
+ if res.get("_type") != "image": continue # Only image results
1240
+ idx = res.get("_id")
1241
+ title = res.get("title", "")
1242
+ url = res.get("image", "") or res.get("url", "")
1243
+ thumb = res.get("thumbnail", "")
1244
+ lines.append(f"[{idx}] Title: {title}\nURL: {url}\nThumbnail: {thumb}\n")
1245
+ return "\n".join(lines)
1246
+
1247
+ def _client_for(self, api_key: Optional[str], base_url: Optional[str]) -> AsyncOpenAI:
1248
+ if api_key or base_url:
1249
+ return AsyncOpenAI(base_url=base_url or self.config.base_url, api_key=api_key or self.config.api_key)
1250
+ return self.client
1251
+
1252
+ def _tool_call_to_trace(self, tool_call) -> Dict[str, Any]:
1253
+ try:
1254
+ args = json.loads(html.unescape(tool_call.function.arguments))
1255
+ except Exception:
1256
+ args = tool_call.function.arguments
1257
+ return {"id": getattr(tool_call, "id", None), "name": tool_call.function.name, "arguments": args}
1258
+
1259
+ def _render_trace_markdown(self, trace: Dict[str, Any]) -> str:
1260
+ def fence(label: str, content: str) -> str:
1261
+ safe = (content or "").replace("```", "``\\`")
1262
+ return f"```{label}\n{safe}\n```"
1263
+
1264
+ parts: List[str] = []
1265
+ parts.append("# Pipeline Trace\n")
1266
+
1267
+ if trace.get("vision"):
1268
+ v = trace["vision"]
1269
+ parts.append("## Vision\n")
1270
+ parts.append(f"- model: `{v.get('model')}`")
1271
+ parts.append(f"- base_url: `{v.get('base_url')}`")
1272
+ parts.append(f"- images_count: `{v.get('images_count')}`\n")
1273
+ parts.append("### Prompt\n")
1274
+ parts.append(fence("text", v.get("prompt", "")))
1275
+ parts.append("\n### Output\n")
1276
+ parts.append(fence("text", v.get("output", "")))
1277
+ parts.append("")
1278
+
1279
+ if trace.get("instruct"):
1280
+ t = trace["instruct"]
1281
+ parts.append("## Instruct\n")
1282
+ parts.append(f"- model: `{t.get('model')}`")
1283
+ parts.append(f"- base_url: `{t.get('base_url')}`\n")
1284
+ parts.append("### Prompt\n")
1285
+ parts.append(fence("text", t.get("prompt", "")))
1286
+ if t.get("tool_calls"):
1287
+ parts.append("\n### Tool Calls\n")
1288
+ parts.append(fence("json", json.dumps(t.get("tool_calls"), ensure_ascii=False, indent=2)))
1289
+ if t.get("tool_results"):
1290
+ parts.append("\n### Tool Results\n")
1291
+ parts.append(fence("json", json.dumps(t.get("tool_results"), ensure_ascii=False, indent=2)))
1292
+ parts.append("\n### Output\n")
1293
+ parts.append(fence("text", t.get("output", "")))
1294
+ parts.append("")
1295
+
1296
+ if trace.get("agent"):
1297
+ a = trace["agent"]
1298
+ parts.append("## Agent\n")
1299
+ parts.append(f"- model: `{a.get('model')}`")
1300
+ parts.append(f"- base_url: `{a.get('base_url')}`\n")
1301
+ parts.append("### System Prompt\n")
1302
+ parts.append(fence("text", a.get("system_prompt", "")))
1303
+ parts.append("\n### Steps\n")
1304
+ parts.append(fence("json", json.dumps(a.get("steps", []), ensure_ascii=False, indent=2)))
1305
+ parts.append("\n### Final Output\n")
1306
+ parts.append(fence("text", a.get("final_output", "")))
1307
+
1308
+ return "\n".join(parts).strip() + "\n"
1309
+
1310
+ async def close(self):
1311
+ try:
1312
+ await self.search_service.close()
1313
+ except Exception:
1314
+ pass
1315
+
1316
+ # Gracefully handle background tasks completion
1317
+ if hasattr(self, '_image_search_tasks') and self._image_search_tasks:
1318
+ for task in self._image_search_tasks:
1319
+ if not task.done(): task.cancel()
1320
+ try:
1321
+ # Wait briefly for cancellation to propagate
1322
+ await asyncio.wait(self._image_search_tasks, timeout=0.2)
1323
+ except Exception: pass
1324
+ self._image_search_tasks = []
1325
+
1326
+ # Also cleanup image cache pending tasks if any
1327
+ try:
1328
+ from .image_cache import get_image_cache
1329
+ cache = get_image_cache()
1330
+ if cache._pending:
1331
+ pending = list(cache._pending.values())
1332
+ for task in pending:
1333
+ if not task.done(): task.cancel()
1334
+ await asyncio.wait(pending, timeout=0.2)
1335
+ cache._pending.clear()
1336
+ except Exception: pass
1337
+
1338
+ self.all_web_results = []