entari-plugin-hyw 4.0.0rc17__py3-none-any.whl → 4.0.0rc19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of entari-plugin-hyw might be problematic. Click here for more details.

Files changed (55) hide show
  1. entari_plugin_hyw-4.0.0rc19.dist-info/METADATA +26 -0
  2. entari_plugin_hyw-4.0.0rc19.dist-info/RECORD +4 -0
  3. entari_plugin_hyw-4.0.0rc19.dist-info/top_level.txt +1 -0
  4. entari_plugin_hyw/__init__.py +0 -914
  5. entari_plugin_hyw/filters.py +0 -83
  6. entari_plugin_hyw/history.py +0 -251
  7. entari_plugin_hyw/misc.py +0 -214
  8. entari_plugin_hyw/search_cache.py +0 -253
  9. entari_plugin_hyw-4.0.0rc17.dist-info/METADATA +0 -119
  10. entari_plugin_hyw-4.0.0rc17.dist-info/RECORD +0 -52
  11. entari_plugin_hyw-4.0.0rc17.dist-info/top_level.txt +0 -2
  12. hyw_core/__init__.py +0 -94
  13. hyw_core/agent.py +0 -876
  14. hyw_core/browser_control/__init__.py +0 -63
  15. hyw_core/browser_control/assets/card-dist/index.html +0 -429
  16. hyw_core/browser_control/assets/card-dist/logos/anthropic.svg +0 -1
  17. hyw_core/browser_control/assets/card-dist/logos/cerebras.svg +0 -9
  18. hyw_core/browser_control/assets/card-dist/logos/deepseek.png +0 -0
  19. hyw_core/browser_control/assets/card-dist/logos/gemini.svg +0 -1
  20. hyw_core/browser_control/assets/card-dist/logos/google.svg +0 -1
  21. hyw_core/browser_control/assets/card-dist/logos/grok.png +0 -0
  22. hyw_core/browser_control/assets/card-dist/logos/huggingface.png +0 -0
  23. hyw_core/browser_control/assets/card-dist/logos/microsoft.svg +0 -15
  24. hyw_core/browser_control/assets/card-dist/logos/minimax.png +0 -0
  25. hyw_core/browser_control/assets/card-dist/logos/mistral.png +0 -0
  26. hyw_core/browser_control/assets/card-dist/logos/nvida.png +0 -0
  27. hyw_core/browser_control/assets/card-dist/logos/openai.svg +0 -1
  28. hyw_core/browser_control/assets/card-dist/logos/openrouter.png +0 -0
  29. hyw_core/browser_control/assets/card-dist/logos/perplexity.svg +0 -24
  30. hyw_core/browser_control/assets/card-dist/logos/qwen.png +0 -0
  31. hyw_core/browser_control/assets/card-dist/logos/xai.png +0 -0
  32. hyw_core/browser_control/assets/card-dist/logos/xiaomi.png +0 -0
  33. hyw_core/browser_control/assets/card-dist/logos/zai.png +0 -0
  34. hyw_core/browser_control/assets/card-dist/vite.svg +0 -1
  35. hyw_core/browser_control/engines/__init__.py +0 -15
  36. hyw_core/browser_control/engines/base.py +0 -13
  37. hyw_core/browser_control/engines/default.py +0 -166
  38. hyw_core/browser_control/engines/duckduckgo.py +0 -171
  39. hyw_core/browser_control/landing.html +0 -172
  40. hyw_core/browser_control/manager.py +0 -173
  41. hyw_core/browser_control/renderer.py +0 -446
  42. hyw_core/browser_control/service.py +0 -1002
  43. hyw_core/config.py +0 -154
  44. hyw_core/core.py +0 -454
  45. hyw_core/crawling/__init__.py +0 -18
  46. hyw_core/crawling/completeness.py +0 -437
  47. hyw_core/crawling/models.py +0 -88
  48. hyw_core/definitions.py +0 -166
  49. hyw_core/image_cache.py +0 -274
  50. hyw_core/pipeline.py +0 -502
  51. hyw_core/search.py +0 -169
  52. hyw_core/stages/__init__.py +0 -21
  53. hyw_core/stages/base.py +0 -95
  54. hyw_core/stages/summary.py +0 -218
  55. {entari_plugin_hyw-4.0.0rc17.dist-info → entari_plugin_hyw-4.0.0rc19.dist-info}/WHEEL +0 -0
hyw_core/agent.py DELETED
@@ -1,876 +0,0 @@
1
- """
2
- Agent Pipeline
3
-
4
- Tool-calling agent that can autonomously use web_tool to search/screenshot.
5
- Maximum 3 rounds of tool calls, up to 3 parallel calls per round.
6
- """
7
-
8
- import asyncio
9
- import json
10
- import re
11
- import time
12
- from dataclasses import dataclass, field
13
- from typing import Any, Callable, Awaitable, Dict, List, Optional
14
-
15
- from loguru import logger
16
- from openai import AsyncOpenAI
17
-
18
- from .definitions import get_web_tool, get_refuse_answer_tool, get_js_tool, AGENT_SYSTEM_PROMPT
19
- from .stages.base import StageContext, StageResult
20
- from .search import SearchService
21
-
22
-
23
- @dataclass
24
- class AgentSession:
25
- """Agent session with tool call tracking."""
26
- session_id: str
27
- user_query: str
28
- tool_calls: List[Dict[str, Any]] = field(default_factory=list)
29
- tool_results: List[Dict[str, Any]] = field(default_factory=list)
30
- conversation_history: List[Dict] = field(default_factory=list)
31
- messages: List[Dict] = field(default_factory=list) # LLM conversation
32
- created_at: float = field(default_factory=time.time)
33
-
34
- # Round tracking (each round can have up to 3 parallel tool calls)
35
- round_count: int = 0
36
-
37
- # Image tracking
38
- user_image_count: int = 0 # Number of images from user input
39
- total_image_count: int = 0 # Total images including web screenshots
40
-
41
- # Time tracking
42
- search_time: float = 0.0 # Total time spent on search/screenshot
43
- llm_time: float = 0.0 # Total time spent on LLM calls
44
- first_llm_time: float = 0.0 # Time for first LLM call (understanding intent)
45
-
46
- # Usage tracking
47
- usage_totals: Dict[str, int] = field(default_factory=lambda: {"input_tokens": 0, "output_tokens": 0})
48
-
49
- @property
50
- def call_count(self) -> int:
51
- """Total number of individual tool calls."""
52
- return len(self.tool_calls)
53
-
54
- @property
55
- def should_force_summary(self) -> bool:
56
- """Force summary after 3 rounds of tool calls."""
57
- return self.round_count >= 3
58
-
59
-
60
- def parse_filter_syntax(query: str, max_count: int = 3):
61
- """
62
- Parse enhanced filter syntax supporting:
63
- - Chinese/English colons (: :) and commas (, ,)
64
- - Multiple filters: "mcmod=2, github=1 : xxx"
65
- - Index lists: "1, 2, 3 : xxx"
66
- - Max total selections
67
-
68
- Returns:
69
- filters: list of (filter_type, filter_value, count) tuples
70
- filter_type: 'index' or 'link'
71
- filter_value: int (for index) or str (for link match term)
72
- count: how many to get (default 1)
73
- search_query: the actual search query
74
- error_msg: error message if exceeded max
75
- """
76
- import re
77
-
78
- # Skip filter parsing if query contains URL (has :// pattern)
79
- if re.search(r'https?://', query):
80
- return [], query.strip(), None
81
-
82
- # Normalize colons
83
- query = query.replace(':', ':')
84
-
85
- if ':' not in query:
86
- return [], query.strip(), None
87
-
88
- parts = query.split(':', 1)
89
- if len(parts) != 2:
90
- return [], query.strip(), None
91
-
92
- filter_part = parts[0].strip()
93
- search_query = parts[1].strip()
94
-
95
- if not filter_part or not search_query:
96
- return [], query.strip(), None
97
-
98
- # Parse filter expressions
99
- filters = []
100
- total_count = 0
101
-
102
- # Normalize commas
103
- filter_part = filter_part.replace(',', ',').replace('、', ',')
104
- filter_items = [f.strip() for f in filter_part.split(',') if f.strip()]
105
-
106
- for item in filter_items:
107
- # Check for "term=count" format (link filter)
108
- if '=' in item:
109
- term, count_str = item.split('=', 1)
110
- term = term.strip().lower()
111
- try:
112
- count = int(count_str.strip())
113
- except ValueError:
114
- count = 1
115
- if term and count > 0:
116
- filters.append(('link', term, count))
117
- total_count += count
118
- # Check for pure number (index filter)
119
- elif item.isdigit():
120
- idx = int(item)
121
- if 1 <= idx <= 10:
122
- filters.append(('index', idx, 1))
123
- total_count += 1
124
-
125
- if total_count > max_count:
126
- return None, search_query, f"⚠️ 最多选择{max_count}个结果"
127
-
128
- return filters, search_query, None
129
-
130
-
131
- class AgentPipeline:
132
- """
133
- Tool-calling agent pipeline.
134
-
135
- Flow:
136
- 1. 用户输入 → LLM (with tools)
137
- 2. If tool_call: execute all tools in parallel → notify user with batched message → loop
138
- 3. If call_count >= 3 rounds: force summary on next call
139
- 4. Return final content
140
- """
141
-
142
- MAX_TOOL_ROUNDS = 3 # Maximum rounds of tool calls
143
- MAX_PARALLEL_TOOLS = 3 # Maximum parallel tool calls per round
144
- MAX_LLM_RETRIES = 3 # Maximum retries for empty API responses
145
- LLM_RETRY_DELAY = 1.0 # Delay between retries in seconds
146
-
147
- def __init__(
148
- self,
149
- config: Any,
150
- search_service: SearchService,
151
- send_func: Optional[Callable[[str], Awaitable[None]]] = None
152
- ):
153
- self.config = config
154
- self.search_service = search_service
155
- self.send_func = send_func
156
- self.client = AsyncOpenAI(base_url=config.base_url, api_key=config.api_key)
157
-
158
- async def execute(
159
- self,
160
- user_input: str,
161
- conversation_history: List[Dict],
162
- images: List[str] = None,
163
- model_name: str = None,
164
- ) -> Dict[str, Any]:
165
- """Execute agent with tool-calling loop."""
166
- start_time = time.time()
167
-
168
- # Get model config
169
- model_cfg = self.config.get_model_config("main")
170
- model = model_name or model_cfg.model_name or self.config.model_name
171
-
172
- client = AsyncOpenAI(
173
- base_url=model_cfg.base_url or self.config.base_url,
174
- api_key=model_cfg.api_key or self.config.api_key
175
- )
176
-
177
- # Create session
178
- session = AgentSession(
179
- session_id=str(time.time()),
180
- user_query=user_input,
181
- conversation_history=conversation_history.copy()
182
- )
183
-
184
- # Create context for results
185
- context = StageContext(
186
- user_input=user_input,
187
- images=images or [],
188
- conversation_history=conversation_history,
189
- )
190
-
191
- # Build initial messages
192
- language = getattr(self.config, "language", "Simplified Chinese")
193
- from datetime import datetime
194
- current_time = datetime.now().strftime("%Y-%m-%d %H:%M")
195
- system_prompt = AGENT_SYSTEM_PROMPT + f"\n\n用户要求的语言: {language}\n当前时间: {current_time}"
196
-
197
- # Build user content with images if provided
198
- user_image_count = len(images) if images else 0
199
- session.user_image_count = user_image_count
200
- session.total_image_count = user_image_count
201
-
202
- if images:
203
- user_content: List[Dict[str, Any]] = [{"type": "text", "text": user_input}]
204
- for img_b64 in images:
205
- url = f"data:image/jpeg;base64,{img_b64}" if not img_b64.startswith("data:") else img_b64
206
- user_content.append({"type": "image_url", "image_url": {"url": url}})
207
- else:
208
- user_content = user_input
209
-
210
- session.messages = [
211
- {"role": "system", "content": system_prompt},
212
- ]
213
-
214
- # Add conversation history (previous turns) before current user message
215
- # This enables continuous conversation context
216
- if conversation_history:
217
- for msg in conversation_history:
218
- role = msg.get("role", "")
219
- content = msg.get("content", "")
220
- if role in ("user", "assistant") and content:
221
- session.messages.append({"role": role, "content": content})
222
-
223
- # Add current user message
224
- session.messages.append({"role": "user", "content": user_content})
225
-
226
- # Add image source hint for user images
227
- if user_image_count > 0:
228
- if user_image_count == 1:
229
- hint = "第1张图片来自用户输入,请将这张图片作为用户输入的参考"
230
- else:
231
- hint = f"第1-{user_image_count}张图片来自用户输入,请将这{user_image_count}张图片作为用户输入的参考"
232
- session.messages.append({"role": "system", "content": hint})
233
-
234
- # Tool definitions
235
- web_tool = get_web_tool()
236
- refuse_tool = get_refuse_answer_tool()
237
- js_tool = get_js_tool()
238
- tools = [web_tool, refuse_tool, js_tool]
239
-
240
- usage_totals = {"input_tokens": 0, "output_tokens": 0}
241
- final_content = ""
242
-
243
- # Send initial status notification
244
- if self.send_func:
245
- try:
246
- await self.send_func("💭 何意味...")
247
- except Exception as e:
248
- logger.warning(f"AgentPipeline: Failed to send initial notification: {e}")
249
-
250
- # Agent loop
251
- while True:
252
- # Check if we need to force summary (no tools)
253
- if session.should_force_summary:
254
- logger.info(f"AgentPipeline: Max tool rounds ({self.MAX_TOOL_ROUNDS}) reached, forcing summary")
255
- # Add context message about collected info
256
- if context.web_results:
257
- context_msg = self._format_web_context(context)
258
- session.messages.append({
259
- "role": "system",
260
- "content": f"你已经完成了{session.call_count}次工具调用。请基于已收集的信息给出最终回答。\n\n{context_msg}"
261
- })
262
-
263
-
264
- # Final call without tools (with retry)
265
- response = None
266
- for retry in range(self.MAX_LLM_RETRIES):
267
- try:
268
- response = await client.chat.completions.create(
269
- model=model,
270
- messages=session.messages,
271
- temperature=self.config.temperature,
272
- )
273
-
274
- if response.usage:
275
- usage_totals["input_tokens"] += response.usage.prompt_tokens or 0
276
- usage_totals["output_tokens"] += response.usage.completion_tokens or 0
277
-
278
- # Check for valid response
279
- if response.choices:
280
- break # Success, exit retry loop
281
-
282
- # Empty choices, retry
283
- logger.warning(f"AgentPipeline: Empty choices in force-summary (attempt {retry + 1}/{self.MAX_LLM_RETRIES}): {response}")
284
- if retry < self.MAX_LLM_RETRIES - 1:
285
- await asyncio.sleep(self.LLM_RETRY_DELAY)
286
- except Exception as e:
287
- logger.warning(f"AgentPipeline: LLM error (attempt {retry + 1}/{self.MAX_LLM_RETRIES}): {e}")
288
- if retry < self.MAX_LLM_RETRIES - 1:
289
- await asyncio.sleep(self.LLM_RETRY_DELAY)
290
- else:
291
- return {
292
- "llm_response": f"Error: {e}",
293
- "success": False,
294
- "error": str(e),
295
- "stats": {"total_time": time.time() - start_time}
296
- }
297
-
298
- # Final check after all retries
299
- if not response or not response.choices:
300
- logger.error(f"AgentPipeline: All retries failed for force-summary")
301
- return {
302
- "llm_response": "抱歉,AI 服务返回了空响应,请稍后重试。",
303
- "success": False,
304
- "error": "Empty response from API after retries",
305
- "stats": {"total_time": time.time() - start_time},
306
- "usage": usage_totals,
307
- }
308
-
309
- final_content = response.choices[0].message.content or ""
310
- break
311
-
312
- # Normal call with tools (with retry)
313
- llm_start = time.time()
314
- response = None
315
-
316
- for retry in range(self.MAX_LLM_RETRIES):
317
- try:
318
- response = await client.chat.completions.create(
319
- model=model,
320
- messages=session.messages,
321
- temperature=self.config.temperature,
322
- tools=tools,
323
- tool_choice="auto",
324
- )
325
-
326
- # Check for valid response
327
- if response.choices:
328
- break # Success, exit retry loop
329
-
330
- # Empty choices, retry
331
- logger.warning(f"AgentPipeline: Empty choices (attempt {retry + 1}/{self.MAX_LLM_RETRIES}): {response}")
332
- if retry < self.MAX_LLM_RETRIES - 1:
333
- await asyncio.sleep(self.LLM_RETRY_DELAY)
334
- except Exception as e:
335
- logger.warning(f"AgentPipeline: LLM error (attempt {retry + 1}/{self.MAX_LLM_RETRIES}): {e}")
336
- if retry < self.MAX_LLM_RETRIES - 1:
337
- await asyncio.sleep(self.LLM_RETRY_DELAY)
338
- else:
339
- logger.error(f"AgentPipeline: All retries failed: {e}")
340
- return {
341
- "llm_response": f"Error: {e}",
342
- "success": False,
343
- "error": str(e),
344
- "stats": {"total_time": time.time() - start_time}
345
- }
346
-
347
- llm_duration = time.time() - llm_start
348
- session.llm_time += llm_duration
349
-
350
- # Track first LLM call time (理解用户意图)
351
- if session.call_count == 0 and session.first_llm_time == 0:
352
- session.first_llm_time = llm_duration
353
-
354
- # Final check after all retries
355
- if not response or not response.choices:
356
- logger.error(f"AgentPipeline: All retries failed, empty choices")
357
- return {
358
- "llm_response": "抱歉,AI 服务返回了空响应,请稍后重试。",
359
- "success": False,
360
- "error": "Empty response from API after retries",
361
- "stats": {"total_time": time.time() - start_time},
362
- "usage": usage_totals,
363
- }
364
-
365
- if response.usage:
366
- usage_totals["input_tokens"] += response.usage.prompt_tokens or 0
367
- usage_totals["output_tokens"] += response.usage.completion_tokens or 0
368
-
369
- message = response.choices[0].message
370
-
371
- # Check for tool calls
372
- if not message.tool_calls:
373
- # Model chose to answer directly
374
- final_content = message.content or ""
375
- logger.info(f"AgentPipeline: Model answered directly after {session.call_count} tool calls")
376
- break
377
-
378
- # Add assistant message with tool calls
379
- session.messages.append({
380
- "role": "assistant",
381
- "content": message.content,
382
- "tool_calls": [
383
- {
384
- "id": tc.id,
385
- "type": "function",
386
- "function": {"name": tc.function.name, "arguments": tc.function.arguments}
387
- }
388
- for tc in message.tool_calls
389
- ]
390
- })
391
-
392
- # Execute all tool calls in parallel
393
- tool_tasks = []
394
- tool_call_ids = []
395
- tool_call_names = []
396
- tool_call_args_list = []
397
-
398
- for tool_call in message.tool_calls:
399
- tc_id = tool_call.id
400
- func_name = tool_call.function.name
401
-
402
- try:
403
- args = json.loads(tool_call.function.arguments)
404
- except json.JSONDecodeError:
405
- args = {}
406
-
407
- tool_call_ids.append(tc_id)
408
- tool_call_names.append(func_name)
409
- tool_call_args_list.append(args)
410
- logger.info(f"AgentPipeline: Queueing tool '{func_name}' with args: {args}")
411
-
412
- # Check for refuse_answer first (handle immediately)
413
- for idx, func_name in enumerate(tool_call_names):
414
- if func_name == "refuse_answer":
415
- args = tool_call_args_list[idx]
416
- reason = args.get("reason", "Refused")
417
- context.should_refuse = True
418
- context.refuse_reason = reason
419
-
420
- session.messages.append({
421
- "role": "tool",
422
- "tool_call_id": tool_call_ids[idx],
423
- "content": f"已拒绝回答: {reason}"
424
- })
425
-
426
- return {
427
- "llm_response": "",
428
- "success": True,
429
- "refuse_answer": True,
430
- "refuse_reason": reason,
431
- "stats": {"total_time": time.time() - start_time},
432
- "usage": usage_totals,
433
- }
434
-
435
- # Execute web_tool calls in parallel
436
- search_start = time.time()
437
- tasks_to_run = []
438
- task_indices = []
439
-
440
- for idx, func_name in enumerate(tool_call_names):
441
- if func_name == "web_tool":
442
- tasks_to_run.append(self._execute_web_tool(tool_call_args_list[idx], context))
443
- task_indices.append(idx)
444
- elif func_name == "js_executor":
445
- tasks_to_run.append(self._execute_js_tool(tool_call_args_list[idx], context))
446
- task_indices.append(idx)
447
-
448
- # Run all web_tool calls in parallel
449
- if tasks_to_run:
450
- results = await asyncio.gather(*tasks_to_run, return_exceptions=True)
451
- else:
452
- results = []
453
-
454
- session.search_time += time.time() - search_start
455
-
456
- # Process results and collect notifications
457
- notifications = []
458
- result_map = {} # Map task index to result
459
-
460
- for i, result in enumerate(results):
461
- task_idx = task_indices[i]
462
- if isinstance(result, Exception):
463
- result_map[task_idx] = {"summary": f"执行失败: {result}", "results": []}
464
- else:
465
- result_map[task_idx] = result
466
-
467
- # Add all tool results to messages and collect notifications
468
- for idx, func_name in enumerate(tool_call_names):
469
- tc_id = tool_call_ids[idx]
470
- args = tool_call_args_list[idx]
471
-
472
- if func_name == "web_tool":
473
- result = result_map.get(idx, {"summary": "未执行", "results": []})
474
-
475
- # Track tool call
476
- session.tool_calls.append({"name": func_name, "args": args})
477
- session.tool_results.append(result)
478
-
479
- # Collect notification
480
- notifications.append(f"🔍 {result['summary']}")
481
-
482
- # Add tool result to messages
483
- formatted_results = ""
484
- if result.get("results"):
485
- formatted_results = "\n\n详细结果:\n"
486
- for i, r in enumerate(result["results"]):
487
- title = r.get("title", "无标题")
488
- url = r.get("url", "")
489
- snippet = r.get("snippet", "") or r.get("content", "") or ""
490
- # Limit snippet length
491
- snippet = snippet[:300] + "..." if len(snippet) > 300 else snippet
492
- formatted_results += f"{i+1}. [{title}]({url})\n 摘要: {snippet}\n\n"
493
-
494
- result_content = f"搜索完成: {result['summary']}\n\n找到 {len(result.get('results', []))} 个结果{formatted_results}"
495
- session.messages.append({
496
- "role": "tool",
497
- "tool_call_id": tc_id,
498
- "content": result_content
499
- })
500
-
501
- # Add image source hint for web screenshots
502
- screenshot_count = result.get("screenshot_count", 0)
503
- if screenshot_count > 0:
504
- start_idx_img = session.total_image_count + 1
505
- end_idx_img = session.total_image_count + screenshot_count
506
- session.total_image_count = end_idx_img
507
-
508
- source_desc = result.get("source_desc", "网页截图")
509
- if start_idx_img == end_idx_img:
510
- hint = f"第{start_idx_img}张图片来自{source_desc},作为查询的参考资料"
511
- else:
512
- hint = f"第{start_idx_img}-{end_idx_img}张图片来自{source_desc},作为查询的参考资料"
513
- session.messages.append({"role": "system", "content": hint})
514
- else:
515
- # Unknown tool
516
- session.messages.append({
517
- "role": "tool",
518
- "tool_call_id": tc_id,
519
- "content": f"Unknown tool: {func_name}"
520
- })
521
-
522
- # Send batched notification (up to 3 lines)
523
- if self.send_func and notifications:
524
- try:
525
- # Join notifications with newlines, max 3 lines
526
- notification_msg = "\n".join(notifications[:3])
527
- await self.send_func(notification_msg)
528
- except Exception as e:
529
- logger.warning(f"AgentPipeline: Failed to send notification: {e}")
530
-
531
- # Increment round count after processing all tool calls in this round
532
- if tasks_to_run:
533
- session.round_count += 1
534
-
535
- # Build final response
536
- total_time = time.time() - start_time
537
- stats = {"total_time": total_time}
538
-
539
- # Update conversation history
540
- conversation_history.append({"role": "user", "content": user_input})
541
- conversation_history.append({"role": "assistant", "content": final_content})
542
-
543
- stages_used = self._build_stages_ui(session, context, usage_totals, total_time)
544
- logger.info(f"AgentPipeline: Built stages_used = {stages_used}")
545
-
546
- return {
547
- "llm_response": final_content,
548
- "success": True,
549
- "stats": stats,
550
- "model_used": model,
551
- "conversation_history": conversation_history,
552
- "usage": usage_totals,
553
- "web_results": context.web_results,
554
- "tool_calls_count": session.call_count,
555
- "stages_used": stages_used,
556
- }
557
-
558
- async def _execute_web_tool(self, args: Dict, context: StageContext) -> Dict[str, Any]:
559
- """执行 web_tool - 复用 /w 逻辑,支持过滤器语法"""
560
- query = args.get("query", "")
561
-
562
- # 1. URL 截图模式 - 检测 query 中是否包含 URL
563
- url_match = re.search(r'https?://\S+', query)
564
- if url_match:
565
- url = url_match.group(0)
566
- # Send URL screenshot notification
567
- if self.send_func:
568
- try:
569
- short_url = url[:40] + "..." if len(url) > 40 else url
570
- await self.send_func(f"📸 正在截图: {short_url}")
571
- except Exception:
572
- pass
573
-
574
- logger.info(f"AgentPipeline: Screenshot URL with content: {url}")
575
- # Use screenshot_with_content to get both screenshot and text
576
- result = await self.search_service.screenshot_with_content(url)
577
- screenshot_b64 = result.get("screenshot_b64")
578
- content = result.get("content", "")
579
- title = result.get("title", "")
580
-
581
- if screenshot_b64:
582
- context.web_results.append({
583
- "_id": context.next_id(),
584
- "_type": "page",
585
- "url": url,
586
- "title": title or "Screenshot",
587
- "screenshot_b64": screenshot_b64,
588
- "content": content, # Text content for LLM
589
- })
590
- return {
591
- "summary": f"已截图: {url[:50]}{'...' if len(url) > 50 else ''}",
592
- "results": [{"_type": "screenshot", "url": url}],
593
- "screenshot_count": 1,
594
- "source_desc": f"URL截图 ({url[:30]}...)"
595
- }
596
- return {
597
- "summary": f"截图失败: {url[:50]}",
598
- "results": [],
599
- "screenshot_count": 0
600
- }
601
-
602
- # 2. 解析过滤器语法
603
- filters, search_query, error = parse_filter_syntax(query, max_count=3)
604
-
605
- if error:
606
- return {"summary": error, "results": []}
607
-
608
- # 3. 如果有过滤器,发送搜索+截图预告
609
- if filters and self.send_func:
610
- try:
611
- # Build filter description
612
- filter_desc_parts = []
613
- for f_type, f_val, f_count in filters:
614
- if f_type == 'index':
615
- filter_desc_parts.append(f"第{f_val}个")
616
- else:
617
- filter_desc_parts.append(f"{f_val}={f_count}")
618
- filter_desc = ", ".join(filter_desc_parts)
619
- await self.send_func(f"🔍 正在搜索 \"{search_query}\" 并匹配 [{filter_desc}]...")
620
- except Exception:
621
- pass
622
-
623
- logger.info(f"AgentPipeline: Searching for: {search_query}")
624
- results = await self.search_service.search(search_query)
625
- visible = [r for r in results if not r.get("_hidden")]
626
-
627
- # Add search results to context
628
- for r in results:
629
- r["_id"] = context.next_id()
630
- if "_type" not in r:
631
- r["_type"] = "search"
632
- r["query"] = search_query
633
- context.web_results.append(r)
634
-
635
- # 4. 如果有过滤器,截图匹配的链接
636
- if filters:
637
- urls = self._collect_filter_urls(filters, visible)
638
- if urls:
639
- logger.info(f"AgentPipeline: Taking screenshots with content of {len(urls)} URLs")
640
- # Use screenshot_with_content to get both screenshot and text
641
- screenshot_tasks = [self.search_service.screenshot_with_content(u) for u in urls]
642
- results = await asyncio.gather(*screenshot_tasks)
643
-
644
- # Add screenshots and content to context
645
- successful_count = 0
646
- for url, result in zip(urls, results):
647
- screenshot_b64 = result.get("screenshot_b64") if isinstance(result, dict) else None
648
- content = result.get("content", "") if isinstance(result, dict) else ""
649
- title = result.get("title", "") if isinstance(result, dict) else ""
650
-
651
- if screenshot_b64:
652
- successful_count += 1
653
- # Find and update the matching result
654
- for r in context.web_results:
655
- if r.get("url") == url:
656
- r["screenshot_b64"] = screenshot_b64
657
- r["content"] = content # Text content for LLM
658
- r["title"] = title or r.get("title", "")
659
- r["_type"] = "page"
660
- break
661
-
662
- return {
663
- "summary": f"搜索 \"{search_query}\" 并截图 {successful_count} 个匹配结果",
664
- "results": [{"url": u, "_type": "page"} for u in urls],
665
- "screenshot_count": successful_count,
666
- "source_desc": f"搜索 \"{search_query}\" 的网页截图"
667
- }
668
-
669
- # 5. 普通搜索模式 (无截图)
670
- return {
671
- "summary": f"搜索 \"{search_query}\" 找到 {len(visible)} 条结果",
672
- "results": visible,
673
- "screenshot_count": 0
674
- }
675
-
676
- async def _execute_js_tool(self, args: Dict, context: StageContext) -> Dict[str, Any]:
677
- """执行 JS 代码工具"""
678
- script = args.get("script", "")
679
- if not script:
680
- return {"summary": "JS执行失败: 代码为空", "results": []}
681
-
682
- if self.send_func:
683
- try:
684
- await self.send_func("💻 正在执行JavaScript代码...")
685
- except: pass
686
-
687
- logger.info(f"AgentPipeline: Executing JS script: {script[:50]}...")
688
- result = await self.search_service.execute_script(script)
689
-
690
- # 格式化结果
691
- success = result.get("success", False)
692
- output = result.get("result", None)
693
- error = result.get("error", None)
694
- url = result.get("url", "")
695
- title = result.get("title", "")
696
-
697
- # Add to context
698
- context.web_results.append({
699
- "_id": context.next_id(),
700
- "_type": "js_result",
701
- "url": url,
702
- "title": title or "JS Execution",
703
- "script": script,
704
- "output": str(output) if success else str(error),
705
- "success": success,
706
- "content": f"Script: {script}\n\nOutput: {output}" if success else f"Error: {error}"
707
- })
708
-
709
- if success:
710
- summary = f"JS执行成功 (返回: {str(output)[:50]}...)"
711
- return {
712
- "summary": summary,
713
- "results": [{"_type": "js_result", "url": url}],
714
- "screenshot_count": 0,
715
- "full_output": str(output), # Return full output for LLM
716
- "success": True
717
- }
718
- else:
719
- return {
720
- "summary": f"JS执行失败: {str(error)[:50]}",
721
- "results": [],
722
- "screenshot_count": 0,
723
- "full_output": f"JS Execution Failed: {error}",
724
- "success": False,
725
- "error": str(error)
726
- }
727
-
728
-
729
- def _collect_filter_urls(self, filters: List, visible: List[Dict]) -> List[str]:
730
- """Collect URLs based on filter specifications."""
731
- urls = []
732
-
733
- for filter_type, filter_value, count in filters:
734
- if filter_type == 'index':
735
- idx = filter_value - 1 # Convert to 0-based
736
- if 0 <= idx < len(visible):
737
- url = visible[idx].get("url", "")
738
- if url and url not in urls:
739
- urls.append(url)
740
- else:
741
- # Link filter
742
- found_count = 0
743
- for res in visible:
744
- url = res.get("url", "")
745
- title = res.get("title", "")
746
- # Match filter against both URL and title
747
- if (filter_value in url.lower() or filter_value in title.lower()) and url not in urls:
748
- urls.append(url)
749
- found_count += 1
750
- if found_count >= count:
751
- break
752
-
753
- return urls
754
-
755
- def _format_web_context(self, context: StageContext) -> str:
756
- """Format web results for summary context."""
757
- if not context.web_results:
758
- return ""
759
-
760
- lines = ["## 已收集的信息\n"]
761
- for r in context.web_results:
762
- idx = r.get("_id", "?")
763
- title = r.get("title", "Untitled")
764
- url = r.get("url", "")
765
- content = r.get("content", "")[:500] if r.get("content") else ""
766
- has_screenshot = "有截图" if r.get("screenshot_b64") else ""
767
-
768
- lines.append(f"[{idx}] {title}")
769
- if url:
770
- lines.append(f" URL: {url}")
771
- if has_screenshot:
772
- lines.append(f" {has_screenshot}")
773
- if content:
774
- lines.append(f" 摘要: {content[:200]}...")
775
- lines.append("")
776
-
777
- return "\n".join(lines)
778
-
779
- def _build_stages_ui(self, session: AgentSession, context: StageContext, usage_totals: Dict, total_time: float) -> List[Dict[str, Any]]:
780
- """Build stages UI for rendering - compatible with App.vue flow section.
781
-
782
- Flow: Instruct (意图) → Search (搜索) → Summary (总结)
783
- """
784
- stages = []
785
-
786
- # Get model config for pricing
787
- model_cfg = self.config.get_model_config("main")
788
- model_name = model_cfg.model_name or self.config.model_name
789
- input_price = getattr(model_cfg, "input_price", 0) or 0
790
- output_price = getattr(model_cfg, "output_price", 0) or 0
791
-
792
- # 1. Instruct Stage (理解用户意图 - 第一次LLM调用)
793
- if session.first_llm_time > 0:
794
- # Estimate tokens for first call (rough split based on proportion)
795
- # Since we track total usage, we approximate first call as ~40% of total
796
- first_call_ratio = 0.4 if session.call_count > 0 else 1.0
797
- instruct_input = int(usage_totals.get("input_tokens", 0) * first_call_ratio)
798
- instruct_output = int(usage_totals.get("output_tokens", 0) * first_call_ratio)
799
- instruct_cost = (instruct_input * input_price + instruct_output * output_price) / 1_000_000
800
-
801
- stages.append({
802
- "name": "Instruct",
803
- "model": model_name,
804
- "provider": model_cfg.model_provider or "OpenRouter",
805
- "description": "理解用户意图",
806
- "time": session.first_llm_time,
807
- "usage": {"input_tokens": instruct_input, "output_tokens": instruct_output},
808
- "cost": instruct_cost,
809
- })
810
-
811
- # 2. Search Stage (搜索) / Browser JS Stage
812
- if session.tool_calls:
813
- # Collect all search descriptions and check for JS executor calls
814
- search_descriptions = []
815
- js_calls = []
816
-
817
- for tc, result in zip(session.tool_calls, session.tool_results):
818
- if tc.get("name") == "js_executor":
819
- # Collect JS execution info
820
- js_calls.append({
821
- "script": tc.get("args", {}).get("script", ""),
822
- "output": result.get("full_output", result.get("summary", "")),
823
- "url": result.get("results", [{}])[0].get("url", "") if result.get("results") else "",
824
- "success": result.get("success", True), # Default to True if not present
825
- "error": result.get("error", "")
826
- })
827
- else:
828
- desc = result.get("summary", "")
829
- if desc:
830
- search_descriptions.append(desc)
831
-
832
- # Add Search stage if there are search calls
833
- if search_descriptions:
834
- stages.append({
835
- "name": "Search",
836
- "model": "",
837
- "provider": "Web",
838
- "description": " → ".join(search_descriptions),
839
- "time": session.search_time,
840
- })
841
-
842
- # Add Browser JS stage for each JS call
843
- for js_call in js_calls:
844
- stages.append({
845
- "name": "browser_js",
846
- "model": "",
847
- "provider": "Browser",
848
- "description": "JavaScript Execution",
849
- "script": js_call["script"],
850
- "output": js_call["output"],
851
- "url": js_call["url"],
852
- "success": js_call.get("success"),
853
- "error": js_call.get("error"),
854
- "time": 0, # JS execution time is included in search_time
855
- })
856
-
857
- # 3. Summary Stage (总结)
858
- # Calculate remaining tokens after instruct
859
- summary_ratio = 0.6 if session.call_count > 0 else 0.0
860
- summary_input = int(usage_totals.get("input_tokens", 0) * summary_ratio)
861
- summary_output = int(usage_totals.get("output_tokens", 0) * summary_ratio)
862
- summary_cost = (summary_input * input_price + summary_output * output_price) / 1_000_000
863
- summary_time = session.llm_time - session.first_llm_time
864
-
865
- if summary_time > 0 or session.call_count > 0:
866
- stages.append({
867
- "name": "Summary",
868
- "model": model_name,
869
- "provider": model_cfg.model_provider or "OpenRouter",
870
- "description": f"生成回答 ({session.call_count} 次工具调用)",
871
- "time": max(0, summary_time),
872
- "usage": {"input_tokens": summary_input, "output_tokens": summary_output},
873
- "cost": summary_cost,
874
- })
875
-
876
- return stages