entari-plugin-hyw 3.2.113__py3-none-any.whl → 3.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of entari-plugin-hyw might be problematic. Click here for more details.

Files changed (49) hide show
  1. entari_plugin_hyw/__init__.py +309 -758
  2. entari_plugin_hyw/hyw_core.py +700 -0
  3. {entari_plugin_hyw-3.2.113.dist-info → entari_plugin_hyw-3.3.1.dist-info}/METADATA +25 -17
  4. entari_plugin_hyw-3.3.1.dist-info/RECORD +6 -0
  5. entari_plugin_hyw/assets/icon/anthropic.svg +0 -1
  6. entari_plugin_hyw/assets/icon/deepseek.png +0 -0
  7. entari_plugin_hyw/assets/icon/gemini.svg +0 -1
  8. entari_plugin_hyw/assets/icon/google.svg +0 -1
  9. entari_plugin_hyw/assets/icon/grok.png +0 -0
  10. entari_plugin_hyw/assets/icon/microsoft.svg +0 -15
  11. entari_plugin_hyw/assets/icon/minimax.png +0 -0
  12. entari_plugin_hyw/assets/icon/mistral.png +0 -0
  13. entari_plugin_hyw/assets/icon/nvida.png +0 -0
  14. entari_plugin_hyw/assets/icon/openai.svg +0 -1
  15. entari_plugin_hyw/assets/icon/openrouter.png +0 -0
  16. entari_plugin_hyw/assets/icon/perplexity.svg +0 -24
  17. entari_plugin_hyw/assets/icon/qwen.png +0 -0
  18. entari_plugin_hyw/assets/icon/xai.png +0 -0
  19. entari_plugin_hyw/assets/icon/zai.png +0 -0
  20. entari_plugin_hyw/assets/libs/highlight.css +0 -10
  21. entari_plugin_hyw/assets/libs/highlight.js +0 -1213
  22. entari_plugin_hyw/assets/libs/katex-auto-render.js +0 -1
  23. entari_plugin_hyw/assets/libs/katex.css +0 -1
  24. entari_plugin_hyw/assets/libs/katex.js +0 -1
  25. entari_plugin_hyw/assets/libs/tailwind.css +0 -1
  26. entari_plugin_hyw/assets/package-lock.json +0 -953
  27. entari_plugin_hyw/assets/package.json +0 -16
  28. entari_plugin_hyw/assets/tailwind.config.js +0 -12
  29. entari_plugin_hyw/assets/tailwind.input.css +0 -235
  30. entari_plugin_hyw/assets/template.html +0 -157
  31. entari_plugin_hyw/assets/template.html.bak +0 -157
  32. entari_plugin_hyw/assets/template.j2 +0 -259
  33. entari_plugin_hyw/core/__init__.py +0 -0
  34. entari_plugin_hyw/core/config.py +0 -36
  35. entari_plugin_hyw/core/history.py +0 -146
  36. entari_plugin_hyw/core/hyw.py +0 -41
  37. entari_plugin_hyw/core/pipeline.py +0 -840
  38. entari_plugin_hyw/core/render.py +0 -531
  39. entari_plugin_hyw/core/render.py.bak +0 -926
  40. entari_plugin_hyw/utils/__init__.py +0 -3
  41. entari_plugin_hyw/utils/browser.py +0 -61
  42. entari_plugin_hyw/utils/mcp_playwright.py +0 -128
  43. entari_plugin_hyw/utils/misc.py +0 -93
  44. entari_plugin_hyw/utils/playwright_tool.py +0 -46
  45. entari_plugin_hyw/utils/prompts.py +0 -94
  46. entari_plugin_hyw/utils/search.py +0 -193
  47. entari_plugin_hyw-3.2.113.dist-info/RECORD +0 -47
  48. {entari_plugin_hyw-3.2.113.dist-info → entari_plugin_hyw-3.3.1.dist-info}/WHEEL +0 -0
  49. {entari_plugin_hyw-3.2.113.dist-info → entari_plugin_hyw-3.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,700 @@
1
+ import asyncio
2
+ import html
3
+ import json
4
+ import re
5
+ import time
6
+ from dataclasses import dataclass, field
7
+ from typing import Any, Dict, List, Optional, Union
8
+
9
+ import httpx
10
+ from loguru import logger
11
+ from openai import AsyncOpenAI
12
+
13
+ # Try to import Playwright and Trafilatura
14
+ try:
15
+ import trafilatura
16
+ from playwright.async_api import async_playwright, Browser, Playwright
17
+ PLAYWRIGHT_AVAILABLE = True
18
+ except ImportError:
19
+ PLAYWRIGHT_AVAILABLE = False
20
+ trafilatura = None
21
+ async_playwright = None
22
+ Browser = None
23
+ Playwright = None
24
+
25
+ # --- Constants & Prompts ---
26
+
27
+ # --- Constants & Prompts ---
28
+
29
+ VISION_EXPERT_SYSTEM_PROMPT = """你是一个专业的图像分析专家。
30
+
31
+ [核心任务]
32
+ - 请智能分析图片内容,根据图片类型自主选择侧重点。
33
+ - **文字优先原则**:如果图片包含清晰的文字(如文档、截图、海报、对话记录等),或者用户的意图明显是获取文字信息,请将 **OCR文字识别** 作为核心任务。
34
+ - 必须完整、准确地转录所有可见文字,不要遗漏。
35
+ - 视觉描述作为补充,仅需简要说明图片类型(如"这是一张聊天记录截图")。
36
+ - 视觉补充:如果图片几乎没有文字,或者文字仅为背景点缀,请重点描述图片的视觉内容(物体、场景、人物、动作、氛围等)。
37
+
38
+ [输出格式]
39
+ - 直接输出分析结果。
40
+ - 如果识别到文字,请使用清晰的格式列出。
41
+ """
42
+
43
+ BASE_SYSTEM_PROMPT = """你是一个智能AI助手, 你的目的是帮助用户解决问题,
44
+ - 你拥有强大的思维链能力,在回答前请先进行深度的思考和隐形规划
45
+ - 你最多可以同时一次调用 5 次 tool
46
+ - 尽量在一次调用内完成搜索与思考任务并进行最终回复
47
+
48
+ [任务分析思考]
49
+ - 对话过滤:此消息是用户间的对话,过滤掉无关人员和干扰信息,只关注需要解释的关键词
50
+ - 直接提问:用户直接提问或要求查询、简洁问题和询问报错, 请分析解释的同进行操作, 解决用户需求
51
+ - 多媒体/视觉内容:包含视觉分析结果时,理解其意义,减少转述损耗,并利用工具验证易产生幻觉的信息(如具体人物、事件)
52
+ - JSON/结构化数据:理解数据含义(如小程序分享),寻找并获取其中指向的 URL 内容
53
+ - URL处理:如果包含网页链接,务必使用工具获取内容, 同时可以进行补充搜索
54
+ - 意图理解:纠正可能的拼写/语法错误,还原缩写含义,确保准确理解查询意图
55
+
56
+ [核心原则 - 必须严格遵守]
57
+ - 永远使用中文回答
58
+ - 语言简洁、语气客观专业、描述详精练抓重点
59
+ - 绝对不允许使用除代码框外的markdown语法(**、*、`、#、-等符号)
60
+ - 如果需要给出代码, 请添加到代码框内, 只给出部分代码即可, 尽可能减少回复字数
61
+ - 回复带有紧扣结果相关的「补充推测」, 帮助用户进行下一步行动
62
+
63
+ [安全审查]
64
+ - 禁止讨论政策、国家领导人、政治体制等敏感话题的搜索与验证计划、新闻、历史事件
65
+ - 过于敏感的话题, 规划时请谨慎
66
+ - 过于色情、暴力、血腥等内容, 请谨慎处理, 避免直接描述
67
+
68
+ [搜索与验证原则]
69
+ - web 搜索优先使用 google、bing、duckduckgo 三个搜索引擎混合验证, 且只能使用此三种搜索引擎以确保数据准确性, 永远禁止使用 `baidu` 等国内搜索引擎
70
+ - 搜索时关键词抓住重点、语言简洁专业, 以获取更专业贴切的搜索结果
71
+ - 避免搜索 x.com 、 csdn 等不准确信息, 尽可能使用权威网站、相关项目官方网站
72
+ - 搜索内容指向不同相关项目时, 尝试理解关系, 请避免混为一谈
73
+ - 禁止导航到搜索引擎页面, 你可以直接导航到相关官网或权威网站
74
+ - 可以同时启动多个工具查看不同页面, 提高效率
75
+ - 人名、地名、组织名等关键信息优先验证, 只相信权威网站、相关项目官方网站
76
+ - 存在视觉分析专家信息时, 不要尝试通过角色、人物特征进行搜索验证、直接利用视觉分析结果回答. 但如果视觉分析中有文字存在,可以对文字内容进行搜索, 抓住重点补充
77
+ - 分步验证思想: 先确认A, 通过A确认B或C. 验证重点:指出需要特别验证的事实、数据或来源.
78
+
79
+ [使用以下工具来获取页面和验证信息]
80
+ {tools_desc}
81
+
82
+ [针对搜索结果的回复要求]
83
+ - 根据搜索结果给出准确回答,忽略浏览器广告、自动纠错提示等多余信息
84
+ - 减少"根据搜索结果"、"未发现相关信息"等无意义表述
85
+ - 由于搜索客观实效性, 避免 `预计` `大概` `可能` 等词汇
86
+ - 不能使用 `教程` `怎么办` 等词汇进行搜索, 这些词汇会导致搜索结果偏离主题, 而且非官方信息居多
87
+
88
+ [推测]
89
+ - 回复推测时也要使得语气平稳、陈述
90
+ - 回复推测语句简短, 通常在10个字左右
91
+ - 一些合适的推测示例方向: /1 深入研究 /2 了解更多关于 /3 继续深度搜索 /4 解决方案 /1 官方文档的最佳实践 /2 给出实际代码片段 /3 获取页面完整内容...
92
+
93
+ [最终回复格式]
94
+ [LLM Agent] >>
95
+ <纯文本详细解释>
96
+ <(如果需要提供代码)
97
+ ```<核心代码语言片段>
98
+ <代码>
99
+ ```>
100
+
101
+ [Next?] >>
102
+ /1 <回复推测1>
103
+ /2 <回复推测2>
104
+ /3 <回复推测3>
105
+ /4 <回复推测4>
106
+ """
107
+
108
+ ADDITIONAL_RULES_PROMPT = """
109
+ [补充规则]
110
+ - 请保持回答的连贯性, 参考上文的历史信息
111
+ - 判断之前的信息是否足够回复用户的问题, 如果足够请不使用工具, 直接快速的给出回答
112
+ - 如果之前的信息不够, 可以使用工具获取信息
113
+ """
114
+
115
+ FINAL_TURN_PROMPT = """
116
+ 注意: 对话即将结束, 请改变最终回复格式.
117
+
118
+ [最终回复格式]
119
+ [LLM Agent] >>
120
+ <纯文本详细解释>
121
+ <(如果需要提供代码)
122
+ ```<核心代码语言片段>
123
+ <代码>
124
+ ```>
125
+ """
126
+
127
+ # --- Configuration ---
128
+
129
+ @dataclass
130
+ class HYWConfig:
131
+ api_key: str
132
+ model_name: str
133
+ base_url: str = "https://openrouter.ai/api/v1"
134
+ save_conversation: bool = False
135
+ headless: bool = True
136
+
137
+ # Browser Tool Configuration
138
+ browser_tool: str = "jina" # "jina" or "playwright"
139
+ jina_api_key: Optional[str] = None
140
+
141
+ vision_model_name: Optional[str] = None
142
+ vision_base_url: Optional[str] = None
143
+ vision_api_key: Optional[str] = None
144
+
145
+ extra_body: Optional[Dict[str, Any]] = None
146
+
147
+ enable_browser_fallback: bool = False
148
+
149
+ # --- Browser Tool ---
150
+
151
+ class BrowserTool:
152
+ def __init__(self, config: HYWConfig):
153
+ self.config = config
154
+ self.playwright: Optional[Any] = None
155
+ self.browser: Optional[Any] = None
156
+
157
+ if self.config.browser_tool == "playwright" and not PLAYWRIGHT_AVAILABLE:
158
+ raise RuntimeError("Browser tool set to 'playwright' but playwright/trafilatura is not installed. Please install with 'pip install entari-plugin-hyw[playwright]' or set browser_tool to 'jina'.")
159
+
160
+ if not PLAYWRIGHT_AVAILABLE and self.config.browser_tool != "jina":
161
+ logger.warning("Playwright not installed. Local browser navigation disabled.")
162
+
163
+ async def navigate(self, url: str) -> str:
164
+ """Navigate to a URL and return the page content with fallback mechanism"""
165
+
166
+ # Determine primary and secondary methods
167
+ can_use_playwright = PLAYWRIGHT_AVAILABLE
168
+
169
+ if self.config.browser_tool == "jina":
170
+ primary_method = self._navigate_jina
171
+ primary_name = "Jina"
172
+
173
+ if can_use_playwright:
174
+ secondary_method = self._navigate_playwright
175
+ secondary_name = "Playwright"
176
+ else:
177
+ secondary_method = None
178
+ secondary_name = None
179
+ elif self.config.browser_tool == "playwright":
180
+ primary_method = self._navigate_playwright
181
+ primary_name = "Playwright"
182
+ secondary_method = self._navigate_jina
183
+ secondary_name = "Jina"
184
+ else:
185
+ # Default to Jina if unknown
186
+ logger.warning(f"Unknown browser_tool '{self.config.browser_tool}', defaulting to Jina")
187
+ primary_method = self._navigate_jina
188
+ primary_name = "Jina"
189
+ secondary_method = None
190
+ secondary_name = None
191
+
192
+ # Try primary method
193
+ content = await primary_method(url)
194
+
195
+ # Check for failure (assuming error messages start with "Error")
196
+ if content.startswith("Error") and secondary_method and self.config.enable_browser_fallback:
197
+ logger.warning(f"{primary_name} failed: {content}. Falling back to {secondary_name}...")
198
+ content = await secondary_method(url)
199
+
200
+ return content
201
+
202
+ async def _navigate_jina(self, url: str) -> str:
203
+ """Navigate using Jina AI"""
204
+ try:
205
+ logger.info(f"Jina AI navigating to: {url}")
206
+ headers = {}
207
+ if self.config.jina_api_key:
208
+ headers["Authorization"] = f"Bearer {self.config.jina_api_key}"
209
+
210
+ async with httpx.AsyncClient(timeout=30.0) as client:
211
+ resp = await client.get(f"https://r.jina.ai/{url}", headers=headers)
212
+ if resp.status_code == 200:
213
+ content = resp.text
214
+ logger.info(f"Successfully fetched {len(content)} chars from {url} via Jina")
215
+ return content
216
+ else:
217
+ return f"Error navigating to {url} via Jina: Status {resp.status_code}"
218
+ except Exception as e:
219
+ logger.error(f"Jina navigation failed: {e}")
220
+ return f"Error navigating to {url} via Jina: {str(e)}"
221
+
222
+ async def _ensure_browser(self):
223
+ """Ensure Playwright browser is initialized"""
224
+ if not PLAYWRIGHT_AVAILABLE:
225
+ return
226
+
227
+ if self.playwright is None:
228
+ self.playwright = await async_playwright().start()
229
+
230
+ if self.browser is None:
231
+ self.browser = await self.playwright.chromium.launch(
232
+ headless=self.config.headless,
233
+ args=["--disable-blink-features=AutomationControlled"],
234
+ ignore_default_args=["--enable-automation"]
235
+ )
236
+ logger.info("Playwright browser initialized")
237
+
238
+ async def _navigate_playwright(self, url: str) -> str:
239
+ """Navigate using Playwright with a fresh context/page"""
240
+ if not PLAYWRIGHT_AVAILABLE:
241
+ return "Error: Playwright not installed"
242
+
243
+ await self._ensure_browser()
244
+
245
+ if not self.browser:
246
+ return "Error: Browser not initialized"
247
+
248
+ context = await self.browser.new_context(
249
+ viewport={"width": 1280, "height": 800},
250
+ user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
251
+ )
252
+
253
+ # Inject script to hide navigator.webdriver
254
+ await context.add_init_script("""
255
+ Object.defineProperty(navigator, 'webdriver', {
256
+ get: () => undefined
257
+ });
258
+ """)
259
+
260
+ page = await context.new_page()
261
+
262
+ try:
263
+ logger.info(f"Playwright navigating to: {url}")
264
+ await page.goto(url, wait_until="domcontentloaded", timeout=60000)
265
+
266
+ # Wait a bit for dynamic content
267
+ try:
268
+ await page.wait_for_load_state("networkidle", timeout=5000)
269
+ except Exception:
270
+ pass
271
+
272
+ # Get page content
273
+ html_content = await page.content()
274
+
275
+ # Use trafilatura for extraction
276
+ content = trafilatura.extract(
277
+ html_content,
278
+ include_links=False,
279
+ include_images=False,
280
+ include_tables=False,
281
+ include_comments=False,
282
+ output_format="markdown"
283
+ )
284
+
285
+ # Fallback
286
+ if not content:
287
+ content = await page.evaluate("() => document.body.innerText")
288
+
289
+ logger.info(f"Successfully fetched {len(content) if content else 0} chars from {url}")
290
+ return content if content else "Error: Empty content"
291
+
292
+ except Exception as e:
293
+ logger.error(f"Playwright navigation failed: {e}")
294
+ return f"Error navigating to {url}: {str(e)}"
295
+ finally:
296
+ await page.close()
297
+ await context.close()
298
+
299
+ async def close(self):
300
+ if self.browser:
301
+ await self.browser.close()
302
+ if self.playwright:
303
+ await self.playwright.stop()
304
+
305
+ async def search(self, query: str) -> str:
306
+ """Search using Jina AI or fallback to browser navigation"""
307
+ import urllib.parse
308
+ encoded_query = urllib.parse.quote(query)
309
+
310
+ # Try Jina Search first
311
+ try:
312
+ url = f"https://s.jina.ai/{encoded_query}"
313
+
314
+ logger.info(f"Jina AI searching: {query}")
315
+ headers = {}
316
+ if self.config.jina_api_key:
317
+ headers["Authorization"] = f"Bearer {self.config.jina_api_key}"
318
+
319
+ async with httpx.AsyncClient(timeout=30.0) as client:
320
+ resp = await client.get(url, headers=headers)
321
+ if resp.status_code == 200:
322
+ content = resp.text
323
+ logger.info(f"Successfully fetched search results for '{query}' via Jina")
324
+ return content
325
+ else:
326
+ logger.warning(f"Jina search failed with status {resp.status_code}, falling back to direct navigation")
327
+ except Exception as e:
328
+ logger.error(f"Jina search failed: {e}")
329
+
330
+ # Fallback: Navigate to search engine
331
+ try:
332
+ # Use Bing as it's often friendlier to scrapers/readers than Google
333
+ search_url = f"https://www.bing.com/search?q={encoded_query}"
334
+ logger.info(f"Fallback searching via Bing: {search_url}")
335
+ return await self.navigate(search_url)
336
+ except Exception as e:
337
+ return f"Error searching '{query}': {str(e)}"
338
+
339
+
340
+ # --- Core Class ---
341
+
342
+ class HYW:
343
+ def __init__(self, config: HYWConfig):
344
+ self.config = config
345
+ self.client = AsyncOpenAI(base_url=config.base_url, api_key=config.api_key)
346
+
347
+ self._init_clients()
348
+ self.browser_tool = BrowserTool(config)
349
+ self._init_tools()
350
+
351
+ logger.info(f"HYW initialized - Save Conversation: {config.save_conversation}, Browser Tool: {config.browser_tool}")
352
+
353
+ def _init_clients(self):
354
+ # Vision Client
355
+ if self.config.vision_base_url:
356
+ self.vision_client = AsyncOpenAI(
357
+ base_url=self.config.vision_base_url,
358
+ api_key=self.config.vision_api_key or self.config.api_key
359
+ )
360
+ model = self.config.vision_model_name or self.config.model_name
361
+ logger.info(f"Vision client created - Endpoint: {self.config.vision_base_url}, Model: {model}")
362
+ else:
363
+ self.vision_client = self.client
364
+ model = self.config.vision_model_name or self.config.model_name
365
+ logger.info(f"Vision using main client - Model: {model}")
366
+
367
+ def _init_tools(self):
368
+ self.tools = []
369
+
370
+ self.tools.append({
371
+ "type": "function",
372
+ "function": {
373
+ "name": "browser_navigate",
374
+ "description": "Navigate to a URL and return the page content. Use this to search or view pages.",
375
+ "parameters": {
376
+ "type": "object",
377
+ "properties": {
378
+ "url": {
379
+ "type": "string",
380
+ "description": "The URL to navigate to. For searching, use the search engine URL with the query."
381
+ }
382
+ },
383
+ "required": ["url"]
384
+ }
385
+ }
386
+ })
387
+
388
+ if self.config.jina_api_key:
389
+ self.tools.append({
390
+ "type": "function",
391
+ "function": {
392
+ "name": "web_search",
393
+ "description": "Search the web using Jina AI to find information.",
394
+ "parameters": {
395
+ "type": "object",
396
+ "properties": {
397
+ "query": {
398
+ "type": "string",
399
+ "description": "The search query"
400
+ }
401
+ },
402
+ "required": ["query"]
403
+ }
404
+ }
405
+ })
406
+
407
+ self.tools_desc = "\n".join([f"- {t['function']['name']}" for t in self.tools])
408
+
409
+ async def analyze_images(self, images: List[str]) -> str:
410
+ """Analyze images and return description"""
411
+ if not images:
412
+ return ""
413
+
414
+ try:
415
+ logger.info(f"Starting image analysis - Count: {len(images)}")
416
+
417
+ img_content: List[Dict[str, Any]] = [{'type': 'text', 'text': '请分析这些图片'}]
418
+ for img in images:
419
+ img_content.append({"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img}"}})
420
+
421
+ img_messages = [
422
+ {"role": "system", "content": VISION_EXPERT_SYSTEM_PROMPT},
423
+ {"role": "user", "content": img_content}
424
+ ]
425
+
426
+ model = self.config.vision_model_name or self.config.model_name
427
+ img_resp = await self.vision_client.chat.completions.create(
428
+ model=model,
429
+ messages=img_messages
430
+ )
431
+ if img_resp.choices[0].message.content:
432
+ logger.info(f"Image analysis complete")
433
+ return img_resp.choices[0].message.content
434
+ except Exception as e:
435
+ logger.error(f"Image analysis failed: {e}")
436
+ return ""
437
+ return ""
438
+
439
+ async def call_tool(self, tool_call) -> str:
440
+ func_name = tool_call.function.name
441
+ # Decode HTML entities in arguments before parsing
442
+ args_str = html.unescape(tool_call.function.arguments)
443
+ try:
444
+ args = json.loads(args_str)
445
+ except json.JSONDecodeError:
446
+ return f"Error: Invalid JSON arguments for tool {func_name}"
447
+
448
+ if func_name == "browser_navigate":
449
+ if not self.browser_tool:
450
+ return "Error: Browser tool is disabled"
451
+
452
+ url = args.get("url")
453
+ if url:
454
+ return await self.browser_tool.navigate(url)
455
+ return "Error: Missing URL argument"
456
+
457
+ if func_name == "web_search":
458
+ if not self.browser_tool:
459
+ return "Error: Browser tool is disabled"
460
+
461
+ query = args.get("query")
462
+ if query:
463
+ return await self.browser_tool.search(query)
464
+ return "Error: Missing query argument"
465
+
466
+ return f"Error: Unknown tool {func_name}"
467
+
468
+ def _tool_msg(self, tool_call_id: str, content: Any, is_error: bool = False, elapsed_time: Optional[float] = None) -> Dict[str, Any]:
469
+ msg_content = f"错误: {content}" if is_error else str(content)
470
+ if elapsed_time is not None:
471
+ msg_content = f"[已运行: {elapsed_time:.2f}s] {msg_content}"
472
+ return {
473
+ "role": "tool",
474
+ "tool_call_id": tool_call_id,
475
+ "content": msg_content
476
+ }
477
+
478
+ async def _run_tool_isolated(self, tool_call, agent_start_time: float) -> Dict[str, Any]:
479
+ tool_start = time.time()
480
+ try:
481
+ result = await self.call_tool(tool_call)
482
+ tool_duration = time.time() - tool_start
483
+ total_elapsed = time.time() - agent_start_time
484
+ logger.info(f"Tool {tool_call.function.name} finished in {tool_duration:.2f}s (Total since start: {total_elapsed:.2f}s)")
485
+ return self._tool_msg(tool_call.id, result, elapsed_time=total_elapsed)
486
+ except Exception as e:
487
+ total_elapsed = time.time() - agent_start_time
488
+ logger.error(f"Tool failed: {e}")
489
+ return self._tool_msg(tool_call.id, e, is_error=True, elapsed_time=total_elapsed)
490
+
491
+ def _format_extra_content(self, content: Any) -> str:
492
+ """Format extra content like reasoning or annotations"""
493
+ return str(content)
494
+
495
+
496
+ def _save_conversation_debug(self, messages: List[Dict[str, Any]]):
497
+ """Save conversation history to JSON file for debugging"""
498
+ if not self.config.save_conversation:
499
+ return
500
+
501
+ try:
502
+ import os
503
+ debug_dir = "saved_conversations"
504
+ os.makedirs(debug_dir, exist_ok=True)
505
+
506
+ timestamp = int(time.time())
507
+ filename = os.path.join(debug_dir, f"conversation_{timestamp}.json")
508
+
509
+ with open(filename, "w", encoding="utf-8") as f:
510
+ json.dump(messages, f, ensure_ascii=False, indent=2)
511
+
512
+ logger.debug(f"Conversation saved to {filename}")
513
+ except Exception as e:
514
+ logger.warning(f"Failed to save conversation debug: {e}")
515
+
516
+ def _append_stats_info(self, content: str, stats: Dict[str, Any], start_time: float, user_turns: int = 1) -> str:
517
+ current_duration = time.time() - start_time
518
+
519
+ if not content or not content.strip():
520
+ content = "[ERROR] \n>> 抱歉,获取到的内容可能包含敏感信息,暂时无法显示完整结果。"
521
+
522
+ # Build stats parts
523
+ vision_duration = stats.get("vision_duration", 0)
524
+ if vision_duration > 0:
525
+ time_parts = [f"[V:{vision_duration:.2f}s/{current_duration:.2f}s]"]
526
+ else:
527
+ time_parts = [f"[{current_duration:.2f}s]"]
528
+
529
+ # Tools
530
+ search_count = stats.get('search_results', 0)
531
+ web_count = stats.get('web_pages_opened', 0)
532
+ tools_parts = []
533
+ if search_count > 0:
534
+ tools_parts.append(f"[S:{search_count}]")
535
+ if web_count > 0:
536
+ tools_parts.append(f"[W:{web_count}]")
537
+
538
+ # Domains
539
+ visited_domains = stats.get('visited_domains', [])
540
+ # Deduplicate while preserving order
541
+ unique_domains = []
542
+ seen = set()
543
+ for d in visited_domains:
544
+ if d not in seen:
545
+ unique_domains.append(d)
546
+ seen.add(d)
547
+
548
+ domain_parts = [f"[{d}]" for d in unique_domains]
549
+
550
+ # Turn count
551
+ turn_info = f"[{user_turns}/5]"
552
+
553
+ stats_info = f"\n[Stats] :: {turn_info} {' '.join(tools_parts)} {' '.join(domain_parts)} {' '.join(time_parts)}"
554
+ return content + stats_info.replace(" ", " ")
555
+
556
+ async def agent(self, user_input: str, conversation_history: Optional[List[Dict[str, Any]]] = None, images: Optional[List[str]] = None) -> Dict[str, Any]:
557
+ start_time = time.time()
558
+
559
+ stats = {
560
+ "llm_calls": 0,
561
+ "search_results": 0,
562
+ "web_pages_opened": 0,
563
+ "visited_domains": [],
564
+ "total_time": 0.0,
565
+ "vision_duration": 0.0
566
+ }
567
+
568
+ user_turns = 1
569
+ if conversation_history:
570
+ user_turns = len([m for m in conversation_history if m.get("role") == "user"]) + 1
571
+
572
+ # Vision/OCR Analysis
573
+ image_analysis = ""
574
+ if images:
575
+ v_start = time.time()
576
+ image_analysis = await self.analyze_images(images)
577
+ stats["vision_duration"] = time.time() - v_start
578
+
579
+ system_prompt = BASE_SYSTEM_PROMPT.format(tools_desc=self.tools_desc)
580
+ messages: List[Dict[str, Any]] = [{"role": "system", "content": system_prompt}]
581
+
582
+ if image_analysis:
583
+ messages.append({"role": "system", "content": f"[图片分析报告]\n{image_analysis}"})
584
+
585
+ if conversation_history:
586
+ # Calculate turns (count user messages)
587
+ # user_turns is already calculated above
588
+
589
+ # Inject additional rules
590
+ messages.append({"role": "system", "content": ADDITIONAL_RULES_PROMPT})
591
+
592
+ if user_turns >= 5:
593
+ messages.append({"role": "system", "content": FINAL_TURN_PROMPT})
594
+
595
+ messages.extend([m for m in conversation_history if m.get("role") != "system"])
596
+
597
+ messages.append({"role": "user", "content": user_input})
598
+
599
+ logger.info(f"Processing: {user_input[:50]}...")
600
+
601
+ try:
602
+ for _ in range(25):
603
+ # Retry mechanism for API calls
604
+ max_retries = 3
605
+ resp = None
606
+ last_error = None
607
+
608
+ for attempt in range(max_retries):
609
+ try:
610
+ stats["llm_calls"] += 1
611
+ resp = await self.client.chat.completions.create(
612
+ model=self.config.model_name,
613
+ messages=messages,
614
+ tools=self.tools if self.tools else None,
615
+ tool_choice="auto" if self.tools else None,
616
+ extra_body=self.config.extra_body
617
+ )
618
+ break
619
+ except Exception as e:
620
+ last_error = e
621
+ if attempt < max_retries - 1:
622
+ logger.warning(f"API call failed (attempt {attempt + 1}/{max_retries}): {e}")
623
+ await asyncio.sleep(2)
624
+ else:
625
+ logger.error(f"API call failed after {max_retries} attempts: {e}")
626
+
627
+ if resp is None:
628
+ if last_error:
629
+ self._save_conversation_debug(messages)
630
+ logger.error(f"Final API failure: {last_error}")
631
+ return {
632
+ "llm_response": f"""[Error] :: internal error
633
+ 抱歉, 虽然很不想承认,但AI提供商、开发者、部署配置总有一个出了问题:
634
+ 错误信息: {str(last_error)}""",
635
+ "conversation_history": messages,
636
+ "stats": stats
637
+ }
638
+ return {"llm_response": "Error: Failed to get response from LLM", "conversation_history": messages, "stats": stats}
639
+
640
+ msg = resp.choices[0].message
641
+ msg_dict = msg.model_dump(exclude_none=True)
642
+
643
+ # Process reasoning and annotations
644
+ annotations = msg_dict.get('annotations')
645
+
646
+ # Clean up response dict
647
+ for key in ['reasoning_details', 'annotations', 'reasoning']:
648
+ msg_dict.pop(key, None)
649
+
650
+ # Add system message with search info if available
651
+ if annotations:
652
+ search_info = self._format_extra_content(annotations)
653
+ try:
654
+ if isinstance(annotations, list):
655
+ stats["search_results"] += len(annotations)
656
+ except Exception:
657
+ pass
658
+
659
+ system_msg = {
660
+ "role": "tool",
661
+ "content": search_info,
662
+ "tool_call_id": "citation"
663
+ }
664
+ messages.append(system_msg)
665
+
666
+ messages.append(msg_dict)
667
+
668
+ logger.info(f"LLM Response: content={bool(msg.content)}, tools={bool(msg.tool_calls)}")
669
+
670
+ if msg.tool_calls:
671
+ stats["web_pages_opened"] += len([tc for tc in msg.tool_calls if tc.function.name == "browser_navigate"])
672
+
673
+ # Extract domains for stats
674
+ for tc in msg.tool_calls:
675
+ if tc.function.name == "browser_navigate":
676
+ try:
677
+ args_str = html.unescape(tc.function.arguments)
678
+ args = json.loads(args_str)
679
+ url = args.get("url", "")
680
+ match = re.search(r'https?://(?:www\.)?([^/.]+)', url)
681
+ if match:
682
+ stats["visited_domains"].append(match.group(1))
683
+ except Exception:
684
+ pass
685
+
686
+ tasks = [self._run_tool_isolated(tc, start_time) for tc in msg.tool_calls]
687
+ results = await asyncio.gather(*tasks)
688
+ messages.extend(results)
689
+ elif msg.content:
690
+ logger.success("Conversation completed")
691
+ filtered_history = [m for m in messages if m.get("role") != "system"]
692
+ final_response = self._append_stats_info(msg.content, stats, start_time, user_turns)
693
+ return {"llm_response": final_response, "conversation_history": filtered_history, "stats": stats}
694
+
695
+ # Max turns reached
696
+ self._save_conversation_debug(messages)
697
+ final_response = self._append_stats_info("Max turns reached", stats, start_time, user_turns)
698
+ return {"llm_response": final_response, "conversation_history": messages, "stats": stats}
699
+ finally:
700
+ pass