PyPI - entari-plugin-hyw - Versions diffs - 4.0.0rc14__py3-none-any.whl → 4.0.0rc15__py3-none-any.whl - Mend

entari-plugin-hyw 4.0.0rc14py3-none-any.whl → 4.0.0rc15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of entari-plugin-hyw might be problematic. Click here for more details.

Files changed (12) hide show

entari_plugin_hyw/__init__.py +149 -367
{entari_plugin_hyw-4.0.0rc14.dist-info → entari_plugin_hyw-4.0.0rc15.dist-info}/METADATA +1 -1
{entari_plugin_hyw-4.0.0rc14.dist-info → entari_plugin_hyw-4.0.0rc15.dist-info}/RECORD +11 -12
{entari_plugin_hyw-4.0.0rc14.dist-info → entari_plugin_hyw-4.0.0rc15.dist-info}/WHEEL +1 -1
hyw_core/agent.py +115 -18
hyw_core/browser_control/assets/card-dist/index.html +35 -31
hyw_core/browser_control/service.py +43 -0
hyw_core/definitions.py +57 -12
hyw_core/search.py +4 -6
hyw_core/stages/summary.py +45 -18
entari_plugin_hyw/Untitled-1 +0 -1865
{entari_plugin_hyw-4.0.0rc14.dist-info → entari_plugin_hyw-4.0.0rc15.dist-info}/top_level.txt +0 -0

hyw_core/browser_control/service.py CHANGED Viewed

@@ -914,6 +914,49 @@ class ScreenshotService:
                 try: tab.close()
                 except: pass
+    async def execute_script(self, script: str) -> Dict[str, Any]:
+        """
+        Execute JavaScript in the current active page context.
+        This reuses the shared browser instance.
+        """
+        loop = asyncio.get_running_loop()
+        return await loop.run_in_executor(
+            self._executor,
+            self._execute_script_sync,
+            script
+        )
+    def _execute_script_sync(self, script: str) -> Dict[str, Any]:
+        """Synchronous JS execution."""
+        try:
+            self._ensure_ready()
+            page = self._manager.page
+            if not page:
+                return {"success": False, "error": "Browser not available"}
+            # Get current active tab or first tab
+            # Fix: ChromiumPage object has no attribute 'tabs'
+            # We use the page object itself as it represents the active tab controller
+            tab = page
+            if not tab:
+                return {"success": False, "error": "No active tab"}
+            logger.info(f"ScreenshotService: Executing JS on {tab.url}")
+            # Execute JS
+            result = tab.run_js(script)
+            return {
+                "success": True,
+                "result": result,
+                "url": tab.url,
+                "title": tab.title
+            }
+        except Exception as e:
+            logger.error(f"ScreenshotService: JS execution failed: {e}")
+            return {"success": False, "error": str(e)}
     async def close(self):
         self._executor.shutdown(wait=False)
         logger.info("ScreenshotService: Closed.")

hyw_core/definitions.py CHANGED Viewed

@@ -10,17 +10,36 @@ from typing import Dict, Any
 SUMMARY_REPORT_SP = """# 你是一个总结助手 (Agent), 你的职责是基于搜索工具给出的信息，回答用户的问题或解释用户问题中的关键词。
 ## 核心原则
 最小限度使用自身知识, 尽可能使用 web_tool 获取信息.
+遇到计算、js代码、算法任务, 积极使用 js_executor 工具完成计算任务.
+## 抓重点原则
+搜索结果中往往混杂大量信息，你需要：
+- 主动识别与用户问题最匹配的结果，大胆引用，不要因为信息混在众多结果中就忽略它
+- 即使只有一条结果明确匹配，也要优先使用该结果，而非泛泛而谈
+## 图文融合原则
+当用户同时提供图片和文字时：
+- 先理解用户真正想知道什么（识图？查资料？对比分析？）
+- 图片是"锚点"，搜索是"扩展"——围绕图片内容组织搜索信息
+- 行文自然流畅，让图片分析和搜索结果无缝衔接
+- 例如："图中展示的是 XX（识别结果），这是一款...（搜索扩展）"
 ## 工具使用指南
 - 适当时候调用 `refuse_answer`
 ## 回答格式
+- 字数: 尽可能少, 有多少信息写多少信息, 减少无意义, 足够回答用户问题或解释关键词所需的文字即可
 - `# ` 大标题约 8-10 个字
-- <summary>...</summary> 约 100 字的概括
+- 必要时可以辅助以 <summary>...</summary> 为格式, 不超过 100 字的概括
 - 二级标题 + markdown 正文
-- 正文使用 [1] 格式引用信息来源, 无需写出源, 系统自动渲染
+- 正文使用 [1] 格式引用信息来源. 如有 js 计算结果, 积极引用. 无需写出源, 系统自动渲染.
 """
+# Used by SummaryStage for multimodal input with images
+IMAGE_CONTEXT_TEMPLATE = """[System: 图文融合分析指南]
+用户提供了 {image_count} 张图片，请根据问题类型智能调整回答策略
+用户问题："""
 def get_refuse_answer_tool() -> Dict[str, Any]:
     """Tool for refusing to answer inappropriate content."""
     return {
@@ -45,25 +64,20 @@ def get_web_tool() -> Dict[str, Any]:
         "type": "function",
         "function": {
             "name": "web_tool",
-            "description": """搜索网页或截图指定URL。用于获取最新信息、查找资料。
+            "description": """搜索网页或截图指定URL。用于获取duckduckgo搜索结果或网页内容
 ## 使用方式
 网页搜索(大部分问题优先使用此方法):
-直接传入搜索词如 "python async" 会返回搜索结果列表
+直接传入搜索词如 "python async" 会返回搜索结果列表 搜索词尽可能少且精准, 以利于传统搜索引擎检索
 网页截图(当用户明确要求截图时使用):
 传入完整URL如 "https://example.com" 会直接截图该页面
-网页搜索 + 网页截图(可以预测能直接搜到什么样的结果时使用): (最终截图最多3张)
-- 域名过滤: "github=2: python async" → 会搜索 "python async github" 并截图 链接/标题包含 "github" 的前2个结果
-- 序号选择: "1,2: minecraft mods" → 会搜索 "minecraft mods" 并截图第1、2个结果
-- 多域名: "mcmod=1, github=1: forge mod" → 会搜索 "forge mod mcmod github" 并截图 链接/标题包含 "mcmod" 的前1个结果和 链接/标题包含 "github" 的前1个结果
 """,
             "parameters": {
                 "type": "object",
                 "properties": {
                     "query": {
                         "type": "string",
-                        "description": "搜索查询或URL。支持过滤器语法(见描述)"
+                        "description": "搜索查询或网页获取"
                     }
                 },
                 "required": ["query"]
@@ -72,6 +86,32 @@ def get_web_tool() -> Dict[str, Any]:
     }
+def get_js_tool() -> Dict[str, Any]:
+    """Tool for executing JavaScript in the browser."""
+    return {
+        "type": "function",
+        "function": {
+            "name": "js_executor",
+            "description": """执行JavaScript代码并返回结果。
+代码将在当前浏览器页面的上下文中执行。
+注意：
+1. 必须使用 `return` 语句返回结果，或者直接作为表达式（如 `1+1`）。
+2. 严禁使用 `console.log`，其输出无法被捕获，会导致返回 None。
+""",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "script": {
+                        "type": "string",
+                        "description": "要执行的JavaScript代码字符串"
+                    }
+                },
+                "required": ["script"]
+            }
+        }
+    }
 # =============================================================================
 # AGENT PROMPTS
 # =============================================================================
@@ -89,16 +129,21 @@ AGENT_SYSTEM_PROMPT = """# 你是一个智能助手 (Agent), 你的职责是使
     - 网页搜索: 可以同时调用3次, 其中URL截图消耗较大, 最多同时调用1个
 - 积极使用 web_tool 获取信息
     - 搜索时, 关键词保证简单、指向准确、利于传统搜索引擎.
+    - 不要尝试通过搜索引擎反推出角色、任务、地点, 搜索引擎没有这个能力
+    - 禁止搜索可能导致一切潜在推销广告的内容, 不出现“是什么”、“怎么办”等容易产生广告的内容
+    - 禁止搜索任何敏感内容(galgame之类的除外), 禁止搜索政治、成人色情、暴力等内容
     - 获取页面截图时, 只使用官方性较强的 wiki、官方网站、资源站等等, 不使用第三方转载新闻网站.
 - 最多可调用2轮工具, 之后必须给出最终回答
 - 适当时候调用 `refuse_answer`
 - 对于具体任务, 如果是转述、格式化、翻译等, 请直接给出最终回答, 不再调用工具
+- 遇到计算、js代码、算法任务, 积极使用 js_executor 工具完成计算任务.
 ## 回答格式
+- 字数: 尽可能少, 有多少获取到的信息、需要解释的内容, 就写多少, 减少无意义输出, 足够完成用户分配给你的任务 / 解释关键词即可.
 - `# ` 大标题约 8-10 个字
-- <summary>...</summary> 约 100 字的概括
+- 必要时可以辅助以 <summary>...</summary> 为格式, 不超过 100 字的概括
 - 二级标题 + markdown 正文
-- 正文使用 [1] 格式引用信息来源, 无需写出源, 系统自动渲染
+- 正文使用 [1] 格式引用信息来源. 如有 js 计算结果, 积极引用. 无需写出源, 系统自动渲染.
 """

hyw_core/search.py CHANGED Viewed

@@ -160,12 +160,10 @@ class SearchService:
         service = get_screenshot_service(headless=self._headless)
         return await service.screenshot_url(url, full_page=full_page)
-    async def screenshot_with_content(self, url: str, max_content_length: int = 8000) -> Dict[str, Any]:
+    async def execute_script(self, script: str) -> Dict[str, Any]:
         """
-        Capture screenshot and extract page content.
-        Returns:
-            Dict with screenshot_b64, content (truncated), title, url
+        Execute JavaScript in the current page context.
         """
         service = get_screenshot_service(headless=self._headless)
-        return await service.screenshot_with_content(url, max_content_length=max_content_length)
+        return await service.execute_script(script)

hyw_core/stages/summary.py CHANGED Viewed

@@ -13,7 +13,7 @@ from loguru import logger
 from openai import AsyncOpenAI
 from .base import BaseStage, StageContext, StageResult
-from ..definitions import SUMMARY_REPORT_SP, get_refuse_answer_tool
+from ..definitions import SUMMARY_REPORT_SP, IMAGE_CONTEXT_TEMPLATE, get_refuse_answer_tool
 class SummaryStage(BaseStage):
@@ -52,9 +52,9 @@ class SummaryStage(BaseStage):
         # Build user content
         user_text = context.user_input or "..."
         if images:
-            # Add image context message for multimodal input
-            image_context = f"[System: The user has provided {len(images)} image(s). Please analyze these images together with the text query to provide a comprehensive response.]"
-            user_content: List[Dict[str, Any]] = [{"type": "text", "text": f"{image_context}\n\n{user_text}"}]
+            # 构建智能图文融合指导
+            image_context = IMAGE_CONTEXT_TEMPLATE.format(image_count=len(images))
+            user_content: List[Dict[str, Any]] = [{"type": "text", "text": f"{image_context}{user_text}"}]
             for img_b64 in images:
                 url = f"data:image/jpeg;base64,{img_b64}" if not img_b64.startswith("data:") else img_b64
                 user_content.append({"type": "image_url", "image_url": {"url": url}})
@@ -77,22 +77,49 @@ class SummaryStage(BaseStage):
         model = model_cfg.model_name or self.config.model_name
-        try:
-            response = await client.chat.completions.create(
-                model=model,
-                messages=messages,
-                temperature=self.config.temperature,
-                extra_body=getattr(self.config, "summary_extra_body", None),
-                tools=[refuse_tool],
-                tool_choice="auto",
-            )
-        except Exception as e:
-            logger.error(f"SummaryStage LLM error: {e}")
+        # Retry logic for API calls
+        max_retries = 2
+        response = None
+        last_error = None
+        for attempt in range(max_retries + 1):
+            try:
+                response = await client.chat.completions.create(
+                    model=model,
+                    messages=messages,
+                    temperature=self.config.temperature,
+                    extra_body=getattr(self.config, "summary_extra_body", None),
+                    tools=[refuse_tool],
+                    tool_choice="auto",
+                )
+                # Check for valid response
+                if response.choices:
+                    break  # Success, exit retry loop
+                # Empty choices - log and retry
+                logger.warning(f"SummaryStage: Empty choices response (attempt {attempt + 1}/{max_retries + 1}). Response: {response}")
+                last_error = "Invalid API response: no choices returned"
+                if attempt < max_retries:
+                    import asyncio
+                    await asyncio.sleep(1)  # Wait 1 second before retry
+            except Exception as e:
+                logger.error(f"SummaryStage LLM error (attempt {attempt + 1}/{max_retries + 1}): {e}")
+                last_error = str(e)
+                if attempt < max_retries:
+                    import asyncio
+                    await asyncio.sleep(1)  # Wait 1 second before retry
+        # Check if we got a valid response after retries
+        if not response or not response.choices:
+            logger.error(f"SummaryStage: All retries exhausted. Last error: {last_error}")
             return StageResult(
                 success=False,
-                error=str(e),
-                data={"content": f"Error generating summary: {e}"}
+                error=last_error or "Invalid API response after retries",
+                data={"content": f"Error: {last_error or 'API returned invalid response after retries'}"}
             )
         usage = {"input_tokens": 0, "output_tokens": 0}

entari-plugin-hyw 4.0.0rc14__py3-none-any.whl → 4.0.0rc15__py3-none-any.whl

Potentially problematic release.

entari-plugin-hyw 4.0.0rc14py3-none-any.whl → 4.0.0rc15py3-none-any.whl