entari-plugin-hyw 3.3.0__py3-none-any.whl → 3.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of entari-plugin-hyw might be problematic. Click here for more details.

Files changed (48) hide show
  1. entari_plugin_hyw/__init__.py +309 -763
  2. entari_plugin_hyw/hyw_core.py +700 -0
  3. {entari_plugin_hyw-3.3.0.dist-info → entari_plugin_hyw-3.3.1.dist-info}/METADATA +28 -20
  4. entari_plugin_hyw-3.3.1.dist-info/RECORD +6 -0
  5. entari_plugin_hyw/assets/icon/anthropic.svg +0 -1
  6. entari_plugin_hyw/assets/icon/deepseek.png +0 -0
  7. entari_plugin_hyw/assets/icon/gemini.svg +0 -1
  8. entari_plugin_hyw/assets/icon/google.svg +0 -1
  9. entari_plugin_hyw/assets/icon/grok.png +0 -0
  10. entari_plugin_hyw/assets/icon/microsoft.svg +0 -15
  11. entari_plugin_hyw/assets/icon/minimax.png +0 -0
  12. entari_plugin_hyw/assets/icon/mistral.png +0 -0
  13. entari_plugin_hyw/assets/icon/nvida.png +0 -0
  14. entari_plugin_hyw/assets/icon/openai.svg +0 -1
  15. entari_plugin_hyw/assets/icon/openrouter.png +0 -0
  16. entari_plugin_hyw/assets/icon/perplexity.svg +0 -24
  17. entari_plugin_hyw/assets/icon/qwen.png +0 -0
  18. entari_plugin_hyw/assets/icon/xai.png +0 -0
  19. entari_plugin_hyw/assets/icon/zai.png +0 -0
  20. entari_plugin_hyw/assets/libs/highlight.css +0 -10
  21. entari_plugin_hyw/assets/libs/highlight.js +0 -1213
  22. entari_plugin_hyw/assets/libs/katex-auto-render.js +0 -1
  23. entari_plugin_hyw/assets/libs/katex.css +0 -1
  24. entari_plugin_hyw/assets/libs/katex.js +0 -1
  25. entari_plugin_hyw/assets/libs/tailwind.css +0 -1
  26. entari_plugin_hyw/assets/package-lock.json +0 -953
  27. entari_plugin_hyw/assets/package.json +0 -16
  28. entari_plugin_hyw/assets/tailwind.config.js +0 -12
  29. entari_plugin_hyw/assets/tailwind.input.css +0 -235
  30. entari_plugin_hyw/assets/template.html +0 -157
  31. entari_plugin_hyw/assets/template.html.bak +0 -157
  32. entari_plugin_hyw/assets/template.j2 +0 -307
  33. entari_plugin_hyw/core/__init__.py +0 -0
  34. entari_plugin_hyw/core/config.py +0 -35
  35. entari_plugin_hyw/core/history.py +0 -146
  36. entari_plugin_hyw/core/hyw.py +0 -41
  37. entari_plugin_hyw/core/pipeline.py +0 -1065
  38. entari_plugin_hyw/core/render.py +0 -596
  39. entari_plugin_hyw/core/render.py.bak +0 -926
  40. entari_plugin_hyw/utils/__init__.py +0 -2
  41. entari_plugin_hyw/utils/browser.py +0 -40
  42. entari_plugin_hyw/utils/misc.py +0 -93
  43. entari_plugin_hyw/utils/playwright_tool.py +0 -36
  44. entari_plugin_hyw/utils/prompts.py +0 -128
  45. entari_plugin_hyw/utils/search.py +0 -241
  46. entari_plugin_hyw-3.3.0.dist-info/RECORD +0 -46
  47. {entari_plugin_hyw-3.3.0.dist-info → entari_plugin_hyw-3.3.1.dist-info}/WHEEL +0 -0
  48. {entari_plugin_hyw-3.3.0.dist-info → entari_plugin_hyw-3.3.1.dist-info}/top_level.txt +0 -0
@@ -1,1065 +0,0 @@
1
- import asyncio
2
- import html
3
- import json
4
- import time
5
- from contextlib import asynccontextmanager
6
- from typing import Any, Dict, List, Optional, Tuple
7
-
8
- from loguru import logger
9
- from openai import AsyncOpenAI
10
-
11
- from .config import HYWConfig
12
- from ..utils.search import SearchService
13
- from ..utils.prompts import (
14
- AGENT_SP,
15
- AGENT_SP_INTRUCT_VISION_ADD,
16
- AGENT_SP_TOOLS_STANDARD_ADD,
17
- AGENT_SP_TOOLS_AGENT_ADD,
18
- AGENT_SP_SEARCH_ADD,
19
- AGENT_SP_PAGE_ADD,
20
- AGENT_SP_IMAGE_SEARCH_ADD,
21
- INTRUCT_SP,
22
- INTRUCT_SP_VISION_ADD,
23
- VISION_SP,
24
- )
25
-
26
- @asynccontextmanager
27
- async def _null_async_context():
28
- yield None
29
-
30
-
31
- class ProcessingPipeline:
32
- """
33
- Core pipeline (vision -> instruct/search -> agent).
34
- """
35
-
36
- def __init__(self, config: HYWConfig):
37
- self.config = config
38
- self.search_service = SearchService(config)
39
- self.client = AsyncOpenAI(base_url=self.config.base_url, api_key=self.config.api_key)
40
- self.all_web_results = [] # Cache for search results
41
- self.current_mode = "standard" # standard | agent
42
-
43
- self.web_search_tool = {
44
- "type": "function",
45
- "function": {
46
- "name": "internal_web_search",
47
- "description": "Search the web for text.",
48
- "parameters": {
49
- "type": "object",
50
- "properties": {"query": {"type": "string"}},
51
- "required": ["query"],
52
- },
53
- },
54
- }
55
- self.image_search_tool = {
56
- "type": "function",
57
- "function": {
58
- "name": "internal_image_search",
59
- "description": "Search for images related to a query.",
60
- "parameters": {
61
- "type": "object",
62
- "properties": {"query": {"type": "string"}},
63
- "required": ["query"],
64
- },
65
- },
66
- }
67
- self.set_mode_tool = {
68
- "type": "function",
69
- "function": {
70
- "name": "set_mode",
71
- "description": "设定后续 Agent 的运行模式: standard | agent",
72
- "parameters": {
73
- "type": "object",
74
- "properties": {
75
- "mode": {"type": "string", "enum": ["standard", "agent"]},
76
- "reason": {"type": "string"},
77
- },
78
- "required": ["mode"],
79
- },
80
- },
81
- }
82
- self.crawl_page_tool = {
83
- "type": "function",
84
- "function": {
85
- "name": "crawl_page",
86
- "description": "使用 Crawl4AI 抓取网页并返回 Markdown 文本。",
87
- "parameters": {
88
- "type": "object",
89
- "properties": {
90
- "url": {"type": "string"},
91
- },
92
- "required": ["url"],
93
- },
94
- },
95
- }
96
-
97
- async def execute(
98
- self,
99
- user_input: str,
100
- conversation_history: List[Dict],
101
- model_name: str = None,
102
- images: List[str] = None,
103
- vision_model_name: str = None,
104
- selected_vision_model: str = None,
105
- ) -> Dict[str, Any]:
106
- """
107
- 1) Vision: summarize images once (no image persistence).
108
- 2) Intruct: run web_search and decide whether to grant Playwright MCP tools.
109
- 3) Agent: normally no tools; if granted, allow Playwright MCP tools (max 6 rounds; step 5 nudge, step 6 forced).
110
- """
111
- start_time = time.time()
112
- stats = {"start_time": start_time, "tool_calls_count": 0}
113
- # Token usage tracking for billing
114
- usage_totals = {"input_tokens": 0, "output_tokens": 0}
115
- active_model = model_name or self.config.model_name
116
-
117
- current_history = conversation_history
118
- final_response_content = ""
119
- structured: Dict[str, Any] = {}
120
-
121
- # Reset search cache for this execution
122
- self.all_web_results = []
123
-
124
- try:
125
- logger.info(f"Pipeline: Starting workflow for '{user_input}' using {active_model}")
126
-
127
- trace: Dict[str, Any] = {
128
- "vision": None,
129
- "intruct": None,
130
- "agent": None,
131
- }
132
-
133
- # Vision stage
134
- vision_text = ""
135
- vision_start = time.time()
136
- vision_time = 0
137
- vision_cost = 0.0
138
- vision_usage = {}
139
- if images:
140
- vision_model = (
141
- selected_vision_model
142
- or vision_model_name
143
- or getattr(self.config, "vision_model_name", None)
144
- or active_model
145
- )
146
- vision_prompt_tpl = getattr(self.config, "vision_system_prompt", None) or VISION_SP
147
- vision_prompt = vision_prompt_tpl.format(user_msgs=user_input or "[图片]")
148
- vision_text, vision_usage = await self._run_vision_stage(
149
- user_input=user_input,
150
- images=images,
151
- model=vision_model,
152
- prompt=vision_prompt,
153
- )
154
- # Add vision usage with vision-specific pricing
155
- usage_totals["input_tokens"] += vision_usage.get("input_tokens", 0)
156
- usage_totals["output_tokens"] += vision_usage.get("output_tokens", 0)
157
-
158
- # Calculate Vision Cost
159
- v_in_price = float(getattr(self.config, "vision_input_price", None) or getattr(self.config, "input_price", 0.0) or 0.0)
160
- v_out_price = float(getattr(self.config, "vision_output_price", None) or getattr(self.config, "output_price", 0.0) or 0.0)
161
- if v_in_price > 0 or v_out_price > 0:
162
- vision_cost = (vision_usage.get("input_tokens", 0) / 1_000_000 * v_in_price) + (vision_usage.get("output_tokens", 0) / 1_000_000 * v_out_price)
163
-
164
- vision_time = time.time() - vision_start
165
-
166
- trace["vision"] = {
167
- "model": vision_model,
168
- "base_url": getattr(self.config, "vision_base_url", None) or self.config.base_url,
169
- "prompt": vision_prompt,
170
- "user_input": user_input or "",
171
- "images_count": len(images or []),
172
- "output": vision_text,
173
- "usage": vision_usage,
174
- "time": vision_time,
175
- "cost": vision_cost
176
- }
177
-
178
- # Intruct + pre-search
179
- instruct_start = time.time()
180
- instruct_model = getattr(self.config, "intruct_model_name", None) or active_model
181
- instruct_text, search_payloads, intruct_trace, intruct_usage, search_time = await self._run_instruct_stage(
182
- user_input=user_input,
183
- vision_text=vision_text,
184
- model=instruct_model,
185
- )
186
- instruct_time = time.time() - instruct_start
187
-
188
- # Calculate Instruct Cost
189
- instruct_cost = 0.0
190
- i_in_price = float(getattr(self.config, "intruct_input_price", None) or getattr(self.config, "input_price", 0.0) or 0.0)
191
- i_out_price = float(getattr(self.config, "intruct_output_price", None) or getattr(self.config, "output_price", 0.0) or 0.0)
192
- if i_in_price > 0 or i_out_price > 0:
193
- instruct_cost = (intruct_usage.get("input_tokens", 0) / 1_000_000 * i_in_price) + (intruct_usage.get("output_tokens", 0) / 1_000_000 * i_out_price)
194
-
195
- # Add instruct usage
196
- usage_totals["input_tokens"] += intruct_usage.get("input_tokens", 0)
197
- usage_totals["output_tokens"] += intruct_usage.get("output_tokens", 0)
198
-
199
- intruct_trace["time"] = instruct_time
200
- intruct_trace["cost"] = instruct_cost
201
- trace["intruct"] = intruct_trace
202
-
203
- # Start agent loop
204
- agent_start_time = time.time()
205
- current_history.append({"role": "user", "content": user_input or "..."})
206
-
207
- mode = intruct_trace.get("mode", self.current_mode).lower()
208
- logger.success(f"Instruct Mode: {mode}")
209
- self.current_mode = mode
210
-
211
- # Determine max iterations
212
- max_steps = 10 if mode == "agent" else 1
213
-
214
- step = 0
215
- agent_trace_steps: List[Dict[str, Any]] = []
216
- last_system_prompt = ""
217
-
218
- agent_tools: Optional[List[Dict[str, Any]]] = None
219
- if mode == "agent":
220
- agent_tools = [self.web_search_tool, self.image_search_tool, self.crawl_page_tool]
221
-
222
- # Agent loop
223
- while step < max_steps:
224
- step += 1
225
- logger.info(f"Pipeline: Agent step {step}/{max_steps}")
226
-
227
- if step == 5 and mode == "agent":
228
- current_history.append(
229
- {
230
- "role": "system",
231
- "content": "System: [Next Step Final] Please start consolidating the answer; the next step must be the final response.",
232
- }
233
- )
234
-
235
- tools_desc = ""
236
- if agent_tools:
237
- tools_desc = "\n".join([
238
- "- internal_web_search(query): 触发搜索并缓存结果",
239
- "- crawl_page(url): 使用 Crawl4AI 抓取网页返回 Markdown"
240
- ])
241
-
242
- user_msgs_text = user_input or ""
243
-
244
- search_msgs_text = self._format_search_msgs()
245
- image_msgs_text = self._format_image_search_msgs()
246
-
247
- has_search_results = any(not r.get("is_image") for r in self.all_web_results)
248
- has_image_results = any(r.get("is_image") for r in self.all_web_results)
249
-
250
- # Build agent system prompt
251
- agent_prompt_tpl = getattr(self.config, "agent_system_prompt", None) or AGENT_SP
252
-
253
- mode_desc_text = AGENT_SP_TOOLS_AGENT_ADD.format(tools_desc=tools_desc) if mode == "agent" else AGENT_SP_TOOLS_STANDARD_ADD
254
- system_prompt = agent_prompt_tpl.format(
255
- user_msgs=user_msgs_text,
256
- mode=mode,
257
- mode_desc=mode_desc_text
258
- )
259
-
260
- # Append vision text if available
261
- if vision_text:
262
- system_prompt += AGENT_SP_INTRUCT_VISION_ADD.format(vision_msgs=vision_text)
263
-
264
- # Append search results
265
- if has_search_results and search_msgs_text:
266
- system_prompt += AGENT_SP_SEARCH_ADD.format(search_msgs=search_msgs_text)
267
-
268
- # Append crawled page content
269
- page_msgs_text = self._format_page_msgs()
270
- if page_msgs_text:
271
- system_prompt += AGENT_SP_PAGE_ADD.format(page_msgs=page_msgs_text)
272
-
273
- if has_image_results and image_msgs_text:
274
- system_prompt += AGENT_SP_IMAGE_SEARCH_ADD.format(image_search_msgs=image_msgs_text)
275
-
276
- last_system_prompt = system_prompt
277
-
278
- messages = [{"role": "system", "content": system_prompt}]
279
- messages.extend(current_history)
280
-
281
- tools_for_step = agent_tools if (agent_tools and step < max_steps) else None
282
-
283
- # Debug logging
284
- if tools_for_step:
285
- logger.info(f"[Agent] Tools provided: {[t['function']['name'] for t in tools_for_step]}")
286
- else:
287
- logger.warning(f"[Agent] NO TOOLS provided for step {step} (agent_tools={agent_tools is not None}, step<max={step < max_steps})")
288
-
289
- step_llm_start = time.time()
290
- response, step_usage = await self._safe_llm_call(
291
- messages=messages,
292
- model=active_model,
293
- tools=tools_for_step,
294
- tool_choice="auto" if tools_for_step else None,
295
- )
296
- step_llm_time = time.time() - step_llm_start
297
-
298
- # Debug: Check response
299
- has_tool_calls = response.tool_calls is not None and len(response.tool_calls) > 0
300
- logger.info(f"[Agent] Response has_tool_calls={has_tool_calls}, has_content={bool(response.content)}")
301
-
302
- # Accumulate agent usage
303
- usage_totals["input_tokens"] += step_usage.get("input_tokens", 0)
304
- usage_totals["output_tokens"] += step_usage.get("output_tokens", 0)
305
-
306
- if response.tool_calls and tools_for_step:
307
- tool_calls = response.tool_calls
308
- stats["tool_calls_count"] += len(tool_calls)
309
-
310
- # Use model_dump to preserve provider-specific fields (e.g., Gemini's thought_signature)
311
- assistant_msg = response.model_dump(exclude_unset=True) if hasattr(response, "model_dump") else {
312
- "role": "assistant",
313
- "content": response.content,
314
- "tool_calls": [{"id": tc.id, "type": "function", "function": {"name": tc.function.name, "arguments": tc.function.arguments}} for tc in tool_calls]
315
- }
316
- current_history.append(assistant_msg)
317
-
318
- tasks = [self._safe_route_tool(tc) for tc in tool_calls]
319
- tool_start_time = time.time()
320
- results = await asyncio.gather(*tasks)
321
- tool_exec_time = time.time() - tool_start_time
322
-
323
- step_trace = {
324
- "step": step,
325
- "tool_calls": [self._tool_call_to_trace(tc) for tc in tool_calls],
326
- "tool_results": [],
327
- "tool_time": tool_exec_time,
328
- "llm_time": step_llm_time,
329
- }
330
- for i, result in enumerate(results):
331
- tc = tool_calls[i]
332
- step_trace["tool_results"].append({"name": tc.function.name, "content": str(result)})
333
- current_history.append(
334
- {
335
- "tool_call_id": tc.id,
336
- "role": "tool",
337
- "name": tc.function.name,
338
- "content": str(result),
339
- }
340
- )
341
- agent_trace_steps.append(step_trace)
342
- continue
343
-
344
- final_response_content = response.content or ""
345
- current_history.append({"role": "assistant", "content": final_response_content})
346
- agent_trace_steps.append({"step": step, "final": True, "output": final_response_content})
347
- break
348
-
349
- if not final_response_content:
350
- final_response_content = "执行结束,但未生成内容。"
351
-
352
- structured = self._parse_tagged_response(final_response_content)
353
- final_content = structured.get("response") or final_response_content
354
-
355
- agent_time = time.time() - agent_start_time
356
-
357
- # Calculate Agent Cost
358
- agent_cost = 0.0
359
- a_in_price = float(getattr(self.config, "input_price", 0.0) or 0.0)
360
- a_out_price = float(getattr(self.config, "output_price", 0.0) or 0.0)
361
-
362
- agent_input_tokens = usage_totals["input_tokens"] - vision_usage.get("input_tokens", 0) - intruct_usage.get("input_tokens", 0)
363
- agent_output_tokens = usage_totals["output_tokens"] - vision_usage.get("output_tokens", 0) - intruct_usage.get("output_tokens", 0)
364
-
365
- if a_in_price > 0 or a_out_price > 0:
366
- agent_cost = (max(0, agent_input_tokens) / 1_000_000 * a_in_price) + (max(0, agent_output_tokens) / 1_000_000 * a_out_price)
367
-
368
- trace["agent"] = {
369
- "model": active_model,
370
- "base_url": self.config.base_url,
371
- "system_prompt": last_system_prompt,
372
- "steps": agent_trace_steps,
373
- "final_output": final_response_content,
374
- "time": agent_time,
375
- "cost": agent_cost
376
- }
377
- trace_markdown = self._render_trace_markdown(trace)
378
-
379
- stats["total_time"] = time.time() - start_time
380
- stats["steps"] = step
381
-
382
- # Calculate billing info
383
- billing_info = {
384
- "input_tokens": usage_totals["input_tokens"],
385
- "output_tokens": usage_totals["output_tokens"],
386
- "total_cost": 0.0,
387
- }
388
- input_price = getattr(self.config, "input_price", None) or 0.0
389
- output_price = getattr(self.config, "output_price", None) or 0.0
390
-
391
- if input_price > 0 or output_price > 0:
392
- input_cost = (usage_totals["input_tokens"] / 1_000_000) * input_price
393
- output_cost = (usage_totals["output_tokens"] / 1_000_000) * output_price
394
- billing_info["total_cost"] = input_cost + output_cost
395
-
396
- # Build stages_used list for UI display
397
- stages_used = []
398
-
399
- def infer_icon(model_name: str, base_url: str) -> str:
400
- model_lower = (model_name or "").lower()
401
- url_lower = (base_url or "").lower()
402
- if "deepseek" in model_lower or "deepseek" in url_lower: return "deepseek"
403
- elif "claude" in model_lower or "anthropic" in url_lower: return "anthropic"
404
- elif "gemini" in model_lower or "google" in url_lower: return "google"
405
- elif "gpt" in model_lower or "openai" in url_lower: return "openai"
406
- elif "qwen" in model_lower: return "qwen"
407
- elif "openrouter" in url_lower: return "openrouter"
408
- return "openai"
409
-
410
- def infer_provider(base_url: str) -> str:
411
- url_lower = (base_url or "").lower()
412
- if "openrouter" in url_lower: return "OpenRouter"
413
- elif "openai" in url_lower: return "OpenAI"
414
- elif "anthropic" in url_lower: return "Anthropic"
415
- elif "google" in url_lower: return "Google"
416
- elif "deepseek" in url_lower: return "DeepSeek"
417
- return ""
418
-
419
- if trace.get("vision"):
420
- v = trace["vision"]
421
- v_model = v.get("model", "")
422
- v_base_url = v.get("base_url", "") or self.config.base_url
423
- stages_used.append({
424
- "name": "Vision",
425
- "model": v_model,
426
- "icon_config": getattr(self.config, "vision_icon", None) or infer_icon(v_model, v_base_url),
427
- "provider": infer_provider(v_base_url),
428
- "time": v.get("time", 0),
429
- "cost": v.get("cost", 0.0)
430
- })
431
-
432
- if trace.get("intruct"):
433
- i = trace["intruct"]
434
- i_model = i.get("model", "")
435
- i_base_url = i.get("base_url", "") or self.config.base_url
436
- stages_used.append({
437
- "name": "Instruct",
438
- "model": i_model,
439
- "icon_config": getattr(self.config, "instruct_icon", None) or getattr(self.config, "intruct_icon", None) or infer_icon(i_model, i_base_url),
440
- "provider": infer_provider(i_base_url),
441
- "time": i.get("time", 0),
442
- "cost": i.get("cost", 0.0)
443
- })
444
-
445
- if has_search_results and search_payloads:
446
- stages_used.append({
447
- "name": "Search",
448
- "model": getattr(self.config, "search_name", "DuckDuckGo"),
449
- "icon_config": "search",
450
- "provider": getattr(self.config, 'search_provider', 'Crawl4AI'),
451
- "time": search_time,
452
- "cost": 0.0
453
- })
454
-
455
- # Add Crawler stage if Instruct used crawl_page
456
- if trace.get("intruct"):
457
- intruct_tool_calls = trace["intruct"].get("tool_calls", [])
458
- crawl_calls = [tc for tc in intruct_tool_calls if tc.get("name") == "crawl_page"]
459
- if crawl_calls:
460
- # Build crawled_pages list for UI
461
- crawled_pages = []
462
- for tc in crawl_calls:
463
- url = tc.get("arguments", {}).get("url", "")
464
- # Try to find cached result
465
- found = next((r for r in self.all_web_results if r.get("url") == url and r.get("is_crawled")), None)
466
- if found:
467
- try:
468
- from urllib.parse import urlparse
469
- domain = urlparse(url).netloc
470
- except:
471
- domain = ""
472
- crawled_pages.append({
473
- "title": found.get("title", "Page"),
474
- "url": url,
475
- "favicon_url": f"https://www.google.com/s2/favicons?domain={domain}&sz=32"
476
- })
477
-
478
- stages_used.append({
479
- "name": "Crawler",
480
- "model": "Crawl4AI",
481
- "icon_config": "search",
482
- "provider": "网页抓取",
483
- "time": search_time, # Use existing search_time which includes fetch time
484
- "cost": 0.0,
485
- "crawled_pages": crawled_pages
486
- })
487
-
488
- # --- Granular Agent Stages (Grouped) ---
489
- if trace.get("agent"):
490
- a = trace["agent"]
491
- a_model = a.get("model", "") or active_model
492
- a_base_url = a.get("base_url", "") or self.config.base_url
493
- steps = a.get("steps", [])
494
- agent_icon = getattr(self.config, "icon", None) or infer_icon(a_model, a_base_url)
495
- agent_provider = infer_provider(a_base_url)
496
-
497
- for s in steps:
498
- if "tool_calls" in s:
499
- # 1. Agent Thought Stage (with LLM time)
500
- stages_used.append({
501
- "name": "Agent",
502
- "model": a_model,
503
- "icon_config": agent_icon,
504
- "provider": agent_provider,
505
- "time": s.get("llm_time", 0), "cost": 0
506
- })
507
-
508
- # 2. Grouped Tool Stages
509
- # Collect results for grouping
510
- search_group_items = []
511
- crawler_group_items = []
512
-
513
- tcs = s.get("tool_calls", [])
514
- trs = s.get("tool_results", [])
515
-
516
- for idx, tc in enumerate(tcs):
517
- t_name = tc.get("name")
518
- # Try to get result content if available
519
- t_res_content = trs[idx].get("content", "") if idx < len(trs) else ""
520
-
521
- if t_name in ["internal_web_search", "web_search", "internal_image_search"]:
522
- # We don't have per-call metadata easily unless we parse the 'result' string (which is JSON dump now for route_tool)
523
- # But search results are cached in self.all_web_results.
524
- # The 'content' of search tool result is basically "cached_for_prompt".
525
- # So we don't need to put items here, just show "Search" container.
526
- # But wait, if we want to show "what was searched", we can parse args.
527
- args = tc.get("arguments", {})
528
- query = args.get("query", "")
529
- if query:
530
- search_group_items.append({"query": query})
531
-
532
- elif t_name == "crawl_page":
533
- # Get URL from arguments, title from result
534
- args = tc.get("arguments", {})
535
- url = args.get("url", "")
536
- title = "Page"
537
- try:
538
- page_data = json.loads(t_res_content)
539
- if isinstance(page_data, dict):
540
- title = page_data.get("title", "Page")
541
- except:
542
- pass
543
-
544
- if url:
545
- try:
546
- domain = urlparse(url).netloc
547
- except:
548
- domain = ""
549
- crawler_group_items.append({
550
- "title": title,
551
- "url": url,
552
- "favicon_url": f"https://www.google.com/s2/favicons?domain={domain}&sz=32"
553
- })
554
-
555
- # Append Grouped Stages
556
- if search_group_items:
557
- stages_used.append({
558
- "name": "Search",
559
- "model": getattr(self.config, "search_name", "DuckDuckGo"),
560
- "icon_config": "search",
561
- "provider": "Agent Search",
562
- "time": s.get("tool_time", 0), "cost": 0,
563
- "queries": search_group_items # Render can use this if needed, or just show generic
564
- })
565
-
566
- if crawler_group_items:
567
- stages_used.append({
568
- "name": "Crawler",
569
- "model": "Crawl4AI",
570
- "icon_config": "browser",
571
- "provider": "Page Fetcher",
572
- "time": s.get("tool_time", 0), "cost": 0,
573
- "crawled_pages": crawler_group_items
574
- })
575
-
576
- elif s.get("final"):
577
- stages_used.append({
578
- "name": "Agent",
579
- "model": a_model,
580
- "icon_config": agent_icon,
581
- "provider": agent_provider,
582
- "time": 0, "cost": 0
583
- })
584
-
585
- # Assign total time/cost to last Agent stage
586
- last_agent = next((s for s in reversed(stages_used) if s["name"] == "Agent"), None)
587
- if last_agent:
588
- last_agent["time"] = a.get("time", 0)
589
- last_agent["cost"] = a.get("cost", 0.0)
590
-
591
- return {
592
- "llm_response": final_content,
593
- "structured_response": structured,
594
- "stats": stats,
595
- "model_used": active_model,
596
- "vision_model_used": (selected_vision_model or getattr(self.config, "vision_model_name", None)) if images else None,
597
- "conversation_history": current_history,
598
- "trace_markdown": trace_markdown,
599
- "billing_info": billing_info,
600
- "stages_used": stages_used,
601
- }
602
-
603
- except Exception as e:
604
- logger.exception("Pipeline Critical Failure")
605
- return {
606
- "llm_response": f"I encountered a critical error: {e}",
607
- "stats": stats,
608
- "error": str(e),
609
- }
610
-
611
- def _parse_tagged_response(self, text: str) -> Dict[str, Any]:
612
- """Parse response for references and page references."""
613
- parsed = {"response": "", "references": [], "page_references": [], "flow_steps": []}
614
- if not text:
615
- return parsed
616
-
617
- import re
618
-
619
- remaining_text = text
620
-
621
- # 1. Try to unwrap JSON if the model acted like a ReAct agent
622
- try:
623
- # Check if it looks like JSON first to avoid performance hit
624
- if remaining_text.strip().startswith("{") and "action" in remaining_text:
625
- data = json.loads(remaining_text)
626
- if isinstance(data, dict) and "action_input" in data:
627
- remaining_text = data["action_input"]
628
- except Exception:
629
- pass
630
-
631
- id_map = {} # Map original search ID (str) -> new index (int)
632
- page_id_map = {} # Map original page ID (str) -> new index (int)
633
-
634
- # Parse References Block (unified: contains both [search] and [page] entries)
635
- ref_block_match = re.search(r'```references\s*(.*?)\s*```', remaining_text, re.DOTALL | re.IGNORECASE)
636
- if ref_block_match:
637
- ref_content = ref_block_match.group(1).strip()
638
- for line in ref_content.split("\n"):
639
- line = line.strip()
640
- if not line: continue
641
-
642
- # Match [id] [type] [title](url)
643
- # e.g. [1] [search] [文本描述](url) or [5] [page] [页面标题](url)
644
- id_match = re.match(r"^\[(\d+)\]", line)
645
- type_match = re.search(r"\[(search|page)\]", line, re.IGNORECASE)
646
- link_match = re.search(r"\[([^\[\]]+)\]\(([^)]+)\)", line)
647
-
648
- idx = None
649
- if id_match:
650
- try:
651
- idx = int(id_match.group(1))
652
- except ValueError:
653
- pass
654
-
655
- ref_type = "search" # default
656
- if type_match:
657
- ref_type = type_match.group(1).lower()
658
-
659
- entry = None
660
- if idx is not None and self.all_web_results:
661
- # For page type, only match crawled items
662
- if ref_type == "page":
663
- found = next((r for r in self.all_web_results if r.get("_id") == idx and r.get("is_crawled")), None)
664
- else:
665
- found = next((r for r in self.all_web_results if r.get("_id") == idx and not r.get("is_crawled")), None)
666
-
667
- if found:
668
- entry = {
669
- "title": found.get("title"),
670
- "url": found.get("url"),
671
- "domain": found.get("domain", "")
672
- }
673
-
674
- if not entry and link_match:
675
- entry = {"title": link_match.group(1), "url": link_match.group(2)}
676
-
677
- if entry:
678
- if ref_type == "page":
679
- parsed["page_references"].append(entry)
680
- if idx is not None:
681
- page_id_map[str(idx)] = len(parsed["page_references"])
682
- else:
683
- parsed["references"].append(entry)
684
- if idx is not None:
685
- id_map[str(idx)] = len(parsed["references"])
686
-
687
- remaining_text = remaining_text.replace(ref_block_match.group(0), "").strip()
688
-
689
- # Replace search:id citations
690
- if id_map:
691
- def replace_search_citation(match):
692
- old_id = match.group(1) or match.group(2)
693
- if old_id in id_map:
694
- return f"`search:{id_map[old_id]}`"
695
- return match.group(0)
696
-
697
- remaining_text = re.sub(r'\[(\d+)\]', replace_search_citation, remaining_text)
698
- remaining_text = re.sub(r'(?<!`)search:(\d+)(?!`)', replace_search_citation, remaining_text)
699
- remaining_text = re.sub(r'`search:(\d+)`', replace_search_citation, remaining_text)
700
-
701
- # Replace page:id citations
702
- if page_id_map:
703
- def replace_page_citation(match):
704
- old_id = match.group(1)
705
- if old_id in page_id_map:
706
- return f"`page:{page_id_map[old_id]}`"
707
- return match.group(0)
708
-
709
- remaining_text = re.sub(r'(?<!`)page:(\d+)(?!`)', replace_page_citation, remaining_text)
710
- remaining_text = re.sub(r'`page:(\d+)`', replace_page_citation, remaining_text)
711
-
712
- parsed["response"] = remaining_text.strip()
713
- return parsed
714
-
715
- async def _safe_route_tool(self, tool_call):
716
- """Wrapper for safe concurrent execution of tool calls."""
717
- try:
718
- return await asyncio.wait_for(self._route_tool(tool_call), timeout=30.0)
719
- except asyncio.TimeoutError:
720
- return "Error: Tool execution timed out (30s limit)."
721
- except Exception as e:
722
- return f"Error: Tool execution failed: {e}"
723
-
724
- async def _route_tool(self, tool_call):
725
- """Execute tool call and return result."""
726
- name = tool_call.function.name
727
- args = json.loads(html.unescape(tool_call.function.arguments))
728
-
729
- if name == "internal_web_search" or name == "web_search":
730
- query = args.get("query")
731
- web = await self.search_service.search(query)
732
-
733
- # Cache results and assign IDs
734
- current_max_id = max([item.get("_id", 0) for item in self.all_web_results], default=0)
735
-
736
- for item in web:
737
- current_max_id += 1
738
- item["_id"] = current_max_id
739
- item["query"] = query
740
- self.all_web_results.append(item)
741
-
742
- return json.dumps({"web_results_count": len(web), "status": "cached_for_prompt"}, ensure_ascii=False)
743
-
744
- if name == "internal_image_search":
745
- query = args.get("query")
746
- images = await self.search_service.image_search(query)
747
-
748
- current_max_id = max([item.get("_id", 0) for item in self.all_web_results], default=0)
749
- for item in images:
750
- current_max_id += 1
751
- item["_id"] = current_max_id
752
- item["query"] = query
753
- item["is_image"] = True
754
- self.all_web_results.append(item)
755
-
756
- return json.dumps({"image_results_count": len(images), "status": "cached_for_prompt"}, ensure_ascii=False)
757
-
758
- if name == "crawl_page":
759
- url = args.get("url")
760
- logger.info(f"[Tool] Crawling page: {url}")
761
- # Returns Dict: {content, title, url}
762
- result_dict = await self.search_service.fetch_page(url)
763
-
764
- # Cache the crawled content so Agent can access it
765
- current_max_id = max([item.get("_id", 0) for item in self.all_web_results], default=0)
766
- current_max_id += 1
767
-
768
- cached_item = {
769
- "_id": current_max_id,
770
- "title": result_dict.get("title", "Page"),
771
- "url": result_dict.get("url", url),
772
- "content": result_dict.get("content", "")[:2000], # Clip content for prompt
773
- "domain": "",
774
- "is_crawled": True,
775
- }
776
- try:
777
- from urllib.parse import urlparse
778
- cached_item["domain"] = urlparse(url).netloc
779
- except:
780
- pass
781
-
782
- self.all_web_results.append(cached_item)
783
-
784
- return json.dumps({"crawl_status": "success", "title": cached_item["title"], "content_length": len(result_dict.get("content", ""))}, ensure_ascii=False)
785
-
786
- if name == "set_mode":
787
- mode = args.get("mode", "standard")
788
- self.current_mode = mode
789
- return f"Mode set to {mode}"
790
-
791
- return f"Unknown tool {name}"
792
-
793
-
794
- async def _safe_llm_call(self, messages, model, tools=None, tool_choice=None, client: Optional[AsyncOpenAI] = None):
795
- try:
796
- return await asyncio.wait_for(
797
- self._do_llm_request(messages, model, tools, tool_choice, client=client or self.client),
798
- timeout=120.0,
799
- )
800
- except asyncio.TimeoutError:
801
- logger.error("LLM Call Timed Out")
802
- return type("obj", (object,), {"content": "Error: The model took too long to respond.", "tool_calls": None})(), {"input_tokens": 0, "output_tokens": 0}
803
- except Exception as e:
804
- logger.error(f"LLM Call Failed: {e}")
805
- return type("obj", (object,), {"content": f"Error: Model failure ({e})", "tool_calls": None})(), {"input_tokens": 0, "output_tokens": 0}
806
-
807
- async def _do_llm_request(self, messages, model, tools, tool_choice, client: AsyncOpenAI):
808
- try:
809
- payload_debug = json.dumps(messages)
810
- logger.info(f"LLM Request Payload Size: {len(payload_debug)} chars")
811
- except Exception:
812
- pass
813
-
814
- t0 = time.time()
815
- logger.info("LLM Request SENT to API...")
816
- response = await client.chat.completions.create(
817
- model=model,
818
- messages=messages,
819
- tools=tools,
820
- tool_choice=tool_choice,
821
- temperature=self.config.temperature,
822
- )
823
- logger.info(f"LLM Request RECEIVED after {time.time() - t0:.2f}s")
824
-
825
- usage = {"input_tokens": 0, "output_tokens": 0}
826
- if hasattr(response, "usage") and response.usage:
827
- usage["input_tokens"] = getattr(response.usage, "prompt_tokens", 0) or 0
828
- usage["output_tokens"] = getattr(response.usage, "completion_tokens", 0) or 0
829
-
830
- return response.choices[0].message, usage
831
-
832
- async def _run_vision_stage(self, user_input: str, images: List[str], model: str, prompt: str) -> Tuple[str, Dict[str, int]]:
833
- content_payload: List[Dict[str, Any]] = [{"type": "text", "text": user_input or ""}]
834
- for img_b64 in images:
835
- url = f"data:image/png;base64,{img_b64}" if not img_b64.startswith("data:") else img_b64
836
- content_payload.append({"type": "image_url", "image_url": {"url": url}})
837
-
838
- client = self._client_for(
839
- api_key=getattr(self.config, "vision_api_key", None),
840
- base_url=getattr(self.config, "vision_base_url", None),
841
- )
842
- response, usage = await self._safe_llm_call(
843
- messages=[{"role": "system", "content": prompt}, {"role": "user", "content": content_payload}],
844
- model=model,
845
- client=client,
846
- )
847
- return (response.content or "").strip(), usage
848
-
849
- async def _run_instruct_stage(
850
- self, user_input: str, vision_text: str, model: str
851
- ) -> Tuple[str, List[str], Dict[str, Any], Dict[str, int], float]:
852
- """Returns (instruct_text, search_payloads, trace_dict, usage_dict, search_time)."""
853
- # Instruct has access to: web_search, image_search, set_mode, crawl_page
854
- tools = [self.web_search_tool, self.image_search_tool, self.set_mode_tool, self.crawl_page_tool]
855
- tools_desc = "- internal_web_search: 搜索文本\n- internal_image_search: 搜索图片\n- crawl_page: 获取网页内容\n- set_mode: 设定standard/agent模式"
856
-
857
- prompt_tpl = getattr(self.config, "intruct_system_prompt", None) or INTRUCT_SP
858
- prompt = prompt_tpl.format(user_msgs=user_input or "", tools_desc=tools_desc)
859
-
860
- if vision_text:
861
- prompt = f"{prompt}\\n\\n{INTRUCT_SP_VISION_ADD.format(vision_msgs=vision_text)}"
862
-
863
- client = self._client_for(
864
- api_key=getattr(self.config, "intruct_api_key", None),
865
- base_url=getattr(self.config, "intruct_base_url", None),
866
- )
867
-
868
- history: List[Dict[str, Any]] = [
869
- {"role": "system", "content": prompt},
870
- {"role": "user", "content": user_input or "..."},
871
- ]
872
-
873
- response, usage = await self._safe_llm_call(
874
- messages=history,
875
- model=model,
876
- tools=tools,
877
- tool_choice="auto",
878
- client=client,
879
- )
880
-
881
- search_payloads: List[str] = []
882
- intruct_trace: Dict[str, Any] = {
883
- "model": model,
884
- "base_url": getattr(self.config, "intruct_base_url", None) or self.config.base_url,
885
- "prompt": prompt,
886
- "user_input": user_input or "",
887
- "vision_add": vision_text or "",
888
- "tool_calls": [],
889
- "tool_results": [],
890
- "output": "",
891
- }
892
-
893
- search_time = 0.0
894
- mode = "standard"
895
- mode_reason = ""
896
-
897
- if response.tool_calls:
898
- plan_dict = response.model_dump() if hasattr(response, "model_dump") else response
899
- history.append(plan_dict)
900
-
901
- tasks = [self._safe_route_tool(tc) for tc in response.tool_calls]
902
-
903
- st = time.time()
904
- results = await asyncio.gather(*tasks)
905
- search_time = time.time() - st
906
-
907
- for i, result in enumerate(results):
908
- tc = response.tool_calls[i]
909
- history.append(
910
- {"tool_call_id": tc.id, "role": "tool", "name": tc.function.name, "content": str(result)}
911
- )
912
- intruct_trace["tool_calls"].append(self._tool_call_to_trace(tc))
913
- intruct_trace["tool_results"].append({"name": tc.function.name, "content": str(result)})
914
-
915
- if tc.function.name in ["web_search", "internal_web_search"]:
916
- search_payloads.append(str(result))
917
- elif tc.function.name == "set_mode":
918
- try:
919
- args = json.loads(html.unescape(tc.function.arguments))
920
- except Exception:
921
- args = {}
922
- mode = args.get("mode", mode)
923
- mode_reason = args.get("reason", "")
924
-
925
- intruct_trace["mode"] = mode
926
- if mode_reason:
927
- intruct_trace["mode_reason"] = mode_reason
928
-
929
- intruct_trace["output"] = ""
930
- intruct_trace["usage"] = usage
931
- return "", search_payloads, intruct_trace, usage, search_time
932
-
933
- intruct_trace["mode"] = mode
934
- intruct_trace["output"] = (response.content or "").strip()
935
- intruct_trace["usage"] = usage
936
- return "", search_payloads, intruct_trace, usage, 0.0
937
-
938
- def _format_search_msgs(self) -> str:
939
- """Format search snippets only (not crawled pages)."""
940
- if not self.all_web_results:
941
- return ""
942
-
943
- def clip(s: str, n: int) -> str:
944
- s = (s or "").strip()
945
- return s if len(s) <= n else s[: n - 1] + "…"
946
-
947
- lines = []
948
- for res in self.all_web_results:
949
- if res.get("is_image"): continue # Skip images
950
- if res.get("is_crawled"): continue # Skip crawled pages (handled separately)
951
- idx = res.get("_id")
952
- title = clip(res.get("title", ""), 80)
953
- url = res.get("url", "")
954
- content = clip(res.get("content", ""), 200)
955
- lines.append(f"[{idx}] Title: {title}\nURL: {url}\nSnippet: {content}\n")
956
-
957
- return "\n".join(lines)
958
-
959
- def _format_page_msgs(self) -> str:
960
- """Format crawled page content (detailed)."""
961
- if not self.all_web_results:
962
- return ""
963
-
964
- def clip(s: str, n: int) -> str:
965
- s = (s or "").strip()
966
- return s if len(s) <= n else s[: n - 1] + "…"
967
-
968
- lines = []
969
- for res in self.all_web_results:
970
- if not res.get("is_crawled"): continue # Only crawled pages
971
- idx = res.get("_id")
972
- title = clip(res.get("title", ""), 80)
973
- url = res.get("url", "")
974
- content = clip(res.get("content", ""), 1500) # More content for pages
975
- lines.append(f"[{idx}] Title: {title}\nURL: {url}\nContent: {content}\n")
976
-
977
- return "\n".join(lines)
978
-
979
- def _format_image_search_msgs(self) -> str:
980
- if not self.all_web_results:
981
- return ""
982
-
983
- lines = []
984
- for res in self.all_web_results:
985
- if not res.get("is_image"): continue
986
- idx = res.get("_id")
987
- title = res.get("title", "")
988
- url = res.get("image", "") or res.get("url", "")
989
- thumb = res.get("thumbnail", "")
990
- lines.append(f"[{idx}] Title: {title}\nURL: {url}\nThumbnail: {thumb}\n")
991
- return "\n".join(lines)
992
-
993
- def _client_for(self, api_key: Optional[str], base_url: Optional[str]) -> AsyncOpenAI:
994
- if api_key or base_url:
995
- return AsyncOpenAI(base_url=base_url or self.config.base_url, api_key=api_key or self.config.api_key)
996
- return self.client
997
-
998
- def _tool_call_to_trace(self, tool_call) -> Dict[str, Any]:
999
- try:
1000
- args = json.loads(html.unescape(tool_call.function.arguments))
1001
- except Exception:
1002
- args = tool_call.function.arguments
1003
- return {"id": getattr(tool_call, "id", None), "name": tool_call.function.name, "arguments": args}
1004
-
1005
- def _render_trace_markdown(self, trace: Dict[str, Any]) -> str:
1006
- def fence(label: str, content: str) -> str:
1007
- safe = (content or "").replace("```", "``\\`")
1008
- return f"```{label}\n{safe}\n```"
1009
-
1010
- parts: List[str] = []
1011
- parts.append("# Pipeline Trace\n")
1012
-
1013
- if trace.get("vision"):
1014
- v = trace["vision"]
1015
- parts.append("## Vision\n")
1016
- parts.append(f"- model: `{v.get('model')}`")
1017
- parts.append(f"- base_url: `{v.get('base_url')}`")
1018
- parts.append(f"- images_count: `{v.get('images_count')}`\n")
1019
- parts.append("### Prompt\n")
1020
- parts.append(fence("text", v.get("prompt", "")))
1021
- parts.append("\n### Output\n")
1022
- parts.append(fence("text", v.get("output", "")))
1023
- parts.append("")
1024
-
1025
- if trace.get("intruct"):
1026
- t = trace["intruct"]
1027
- parts.append("## Intruct\n")
1028
- parts.append(f"- model: `{t.get('model')}`")
1029
- parts.append(f"- base_url: `{t.get('base_url')}`\n")
1030
- parts.append("### Prompt\n")
1031
- parts.append(fence("text", t.get("prompt", "")))
1032
- if t.get("tool_calls"):
1033
- parts.append("\n### Tool Calls\n")
1034
- parts.append(fence("json", json.dumps(t.get("tool_calls"), ensure_ascii=False, indent=2)))
1035
- if t.get("tool_results"):
1036
- parts.append("\n### Tool Results\n")
1037
- parts.append(fence("json", json.dumps(t.get("tool_results"), ensure_ascii=False, indent=2)))
1038
- parts.append("\n### Output\n")
1039
- parts.append(fence("text", t.get("output", "")))
1040
- parts.append("")
1041
-
1042
- if trace.get("agent"):
1043
- a = trace["agent"]
1044
- parts.append("## Agent\n")
1045
- parts.append(f"- model: `{a.get('model')}`")
1046
- parts.append(f"- base_url: `{a.get('base_url')}`\n")
1047
- parts.append("### System Prompt\n")
1048
- parts.append(fence("text", a.get("system_prompt", "")))
1049
- parts.append("\n### Steps\n")
1050
- parts.append(fence("json", json.dumps(a.get("steps", []), ensure_ascii=False, indent=2)))
1051
- parts.append("\n### Final Output\n")
1052
- parts.append(fence("text", a.get("final_output", "")))
1053
-
1054
- return "\n".join(parts).strip() + "\n"
1055
-
1056
- async def close(self):
1057
- try:
1058
- await self.search_service.close()
1059
- except Exception:
1060
- pass
1061
- try:
1062
- from ..utils.search import close_shared_crawler
1063
- await close_shared_crawler()
1064
- except Exception:
1065
- pass