entari-plugin-hyw 3.2.113__py3-none-any.whl → 3.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of entari-plugin-hyw might be problematic. Click here for more details.
- entari_plugin_hyw/__init__.py +309 -758
- entari_plugin_hyw/hyw_core.py +700 -0
- {entari_plugin_hyw-3.2.113.dist-info → entari_plugin_hyw-3.3.1.dist-info}/METADATA +25 -17
- entari_plugin_hyw-3.3.1.dist-info/RECORD +6 -0
- entari_plugin_hyw/assets/icon/anthropic.svg +0 -1
- entari_plugin_hyw/assets/icon/deepseek.png +0 -0
- entari_plugin_hyw/assets/icon/gemini.svg +0 -1
- entari_plugin_hyw/assets/icon/google.svg +0 -1
- entari_plugin_hyw/assets/icon/grok.png +0 -0
- entari_plugin_hyw/assets/icon/microsoft.svg +0 -15
- entari_plugin_hyw/assets/icon/minimax.png +0 -0
- entari_plugin_hyw/assets/icon/mistral.png +0 -0
- entari_plugin_hyw/assets/icon/nvida.png +0 -0
- entari_plugin_hyw/assets/icon/openai.svg +0 -1
- entari_plugin_hyw/assets/icon/openrouter.png +0 -0
- entari_plugin_hyw/assets/icon/perplexity.svg +0 -24
- entari_plugin_hyw/assets/icon/qwen.png +0 -0
- entari_plugin_hyw/assets/icon/xai.png +0 -0
- entari_plugin_hyw/assets/icon/zai.png +0 -0
- entari_plugin_hyw/assets/libs/highlight.css +0 -10
- entari_plugin_hyw/assets/libs/highlight.js +0 -1213
- entari_plugin_hyw/assets/libs/katex-auto-render.js +0 -1
- entari_plugin_hyw/assets/libs/katex.css +0 -1
- entari_plugin_hyw/assets/libs/katex.js +0 -1
- entari_plugin_hyw/assets/libs/tailwind.css +0 -1
- entari_plugin_hyw/assets/package-lock.json +0 -953
- entari_plugin_hyw/assets/package.json +0 -16
- entari_plugin_hyw/assets/tailwind.config.js +0 -12
- entari_plugin_hyw/assets/tailwind.input.css +0 -235
- entari_plugin_hyw/assets/template.html +0 -157
- entari_plugin_hyw/assets/template.html.bak +0 -157
- entari_plugin_hyw/assets/template.j2 +0 -259
- entari_plugin_hyw/core/__init__.py +0 -0
- entari_plugin_hyw/core/config.py +0 -36
- entari_plugin_hyw/core/history.py +0 -146
- entari_plugin_hyw/core/hyw.py +0 -41
- entari_plugin_hyw/core/pipeline.py +0 -840
- entari_plugin_hyw/core/render.py +0 -531
- entari_plugin_hyw/core/render.py.bak +0 -926
- entari_plugin_hyw/utils/__init__.py +0 -3
- entari_plugin_hyw/utils/browser.py +0 -61
- entari_plugin_hyw/utils/mcp_playwright.py +0 -128
- entari_plugin_hyw/utils/misc.py +0 -93
- entari_plugin_hyw/utils/playwright_tool.py +0 -46
- entari_plugin_hyw/utils/prompts.py +0 -94
- entari_plugin_hyw/utils/search.py +0 -193
- entari_plugin_hyw-3.2.113.dist-info/RECORD +0 -47
- {entari_plugin_hyw-3.2.113.dist-info → entari_plugin_hyw-3.3.1.dist-info}/WHEEL +0 -0
- {entari_plugin_hyw-3.2.113.dist-info → entari_plugin_hyw-3.3.1.dist-info}/top_level.txt +0 -0
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import urllib.parse
|
|
3
|
-
from typing import Any, Optional
|
|
4
|
-
|
|
5
|
-
import httpx
|
|
6
|
-
import trafilatura
|
|
7
|
-
from loguru import logger
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class BrowserTool:
|
|
11
|
-
"""Simple HTTP fetcher for search and page content."""
|
|
12
|
-
|
|
13
|
-
def __init__(self, config: Any):
|
|
14
|
-
self.config = config
|
|
15
|
-
self._client: Optional[httpx.AsyncClient] = None
|
|
16
|
-
|
|
17
|
-
async def _ensure_client(self) -> httpx.AsyncClient:
|
|
18
|
-
if self._client is None:
|
|
19
|
-
timeout = httpx.Timeout(8.0)
|
|
20
|
-
self._client = httpx.AsyncClient(
|
|
21
|
-
timeout=timeout,
|
|
22
|
-
follow_redirects=True,
|
|
23
|
-
headers={
|
|
24
|
-
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
|
|
25
|
-
"Accept": "application/json,text/html;q=0.9,*/*;q=0.8",
|
|
26
|
-
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
27
|
-
},
|
|
28
|
-
verify=False
|
|
29
|
-
)
|
|
30
|
-
return self._client
|
|
31
|
-
|
|
32
|
-
async def navigate(self, url: str) -> str:
|
|
33
|
-
"""Fetch URL content via HTTP and extract markdown."""
|
|
34
|
-
try:
|
|
35
|
-
client = await self._ensure_client()
|
|
36
|
-
resp = await client.get(url)
|
|
37
|
-
if resp.status_code >= 400:
|
|
38
|
-
logger.error(f"HTTP navigation failed status={resp.status_code} url={url}")
|
|
39
|
-
return f"Error navigating to {url}: {resp.status_code}"
|
|
40
|
-
|
|
41
|
-
html = resp.text
|
|
42
|
-
content = await asyncio.to_thread(
|
|
43
|
-
trafilatura.extract,
|
|
44
|
-
html,
|
|
45
|
-
include_links=True,
|
|
46
|
-
include_images=True,
|
|
47
|
-
include_tables=True,
|
|
48
|
-
output_format="markdown",
|
|
49
|
-
)
|
|
50
|
-
if not content:
|
|
51
|
-
content = html[:4000]
|
|
52
|
-
return content
|
|
53
|
-
except Exception as e:
|
|
54
|
-
logger.error(f"HTTP navigation failed: {e}")
|
|
55
|
-
return f"Error navigating to {url}: {e}"
|
|
56
|
-
|
|
57
|
-
async def close(self):
|
|
58
|
-
if self._client:
|
|
59
|
-
await self._client.aclose()
|
|
60
|
-
self._client = None
|
|
61
|
-
|
|
@@ -1,128 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from contextlib import AsyncExitStack, asynccontextmanager
|
|
4
|
-
from typing import Any, AsyncIterator, Dict, List, Optional, Tuple
|
|
5
|
-
|
|
6
|
-
import anyio
|
|
7
|
-
from mcp.client.session import ClientSession
|
|
8
|
-
from mcp.client.stdio import StdioServerParameters, stdio_client
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class MCPPlaywright:
|
|
12
|
-
def __init__(
|
|
13
|
-
self,
|
|
14
|
-
command: str = "npx",
|
|
15
|
-
args: Optional[List[str]] = None,
|
|
16
|
-
env: Optional[Dict[str, str]] = None,
|
|
17
|
-
cwd: Optional[str] = None,
|
|
18
|
-
):
|
|
19
|
-
self.command = command
|
|
20
|
-
self.args = args or ["-y", "@playwright/mcp@latest"]
|
|
21
|
-
self.env = env
|
|
22
|
-
self.cwd = cwd
|
|
23
|
-
|
|
24
|
-
@asynccontextmanager
|
|
25
|
-
async def connect(self) -> AsyncIterator[ClientSession]:
|
|
26
|
-
server = StdioServerParameters(command=self.command, args=self.args, env=self.env, cwd=self.cwd)
|
|
27
|
-
async with stdio_client(server) as (read, write):
|
|
28
|
-
async with ClientSession(read, write) as session:
|
|
29
|
-
await session.initialize()
|
|
30
|
-
yield session
|
|
31
|
-
|
|
32
|
-
@staticmethod
|
|
33
|
-
def mcp_tools_to_openai(tools: Any) -> List[Dict[str, Any]]:
|
|
34
|
-
openai_tools: List[Dict[str, Any]] = []
|
|
35
|
-
for t in tools.tools:
|
|
36
|
-
schema = dict(getattr(t, "inputSchema", None) or {"type": "object", "properties": {}})
|
|
37
|
-
schema.pop("$schema", None)
|
|
38
|
-
openai_tools.append(
|
|
39
|
-
{
|
|
40
|
-
"type": "function",
|
|
41
|
-
"function": {
|
|
42
|
-
"name": t.name,
|
|
43
|
-
"description": t.description or "",
|
|
44
|
-
"parameters": schema,
|
|
45
|
-
},
|
|
46
|
-
}
|
|
47
|
-
)
|
|
48
|
-
return openai_tools
|
|
49
|
-
|
|
50
|
-
@staticmethod
|
|
51
|
-
def call_result_to_text(result: Any) -> str:
|
|
52
|
-
if result is None:
|
|
53
|
-
return ""
|
|
54
|
-
# MCP CallToolResult.content is typically a list of TextContent items
|
|
55
|
-
content = getattr(result, "content", None)
|
|
56
|
-
if isinstance(content, list):
|
|
57
|
-
parts: List[str] = []
|
|
58
|
-
for item in content:
|
|
59
|
-
text = getattr(item, "text", None)
|
|
60
|
-
if text is not None:
|
|
61
|
-
parts.append(str(text))
|
|
62
|
-
else:
|
|
63
|
-
parts.append(str(item))
|
|
64
|
-
return "\n".join(parts).strip()
|
|
65
|
-
return str(result)
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
class MCPPlaywrightManager:
|
|
69
|
-
def __init__(
|
|
70
|
-
self,
|
|
71
|
-
command: str = "npx",
|
|
72
|
-
args: Optional[List[str]] = None,
|
|
73
|
-
env: Optional[Dict[str, str]] = None,
|
|
74
|
-
cwd: Optional[str] = None,
|
|
75
|
-
):
|
|
76
|
-
self._client = MCPPlaywright(command=command, args=args, env=env, cwd=cwd)
|
|
77
|
-
self._lock = anyio.Lock()
|
|
78
|
-
self._stack: Optional[AsyncExitStack] = None
|
|
79
|
-
self._session: Optional[ClientSession] = None
|
|
80
|
-
self._tools_openai: Optional[List[Dict[str, Any]]] = None
|
|
81
|
-
|
|
82
|
-
async def ensure_connected(self) -> bool:
|
|
83
|
-
async with self._lock:
|
|
84
|
-
if self._session is not None:
|
|
85
|
-
return True
|
|
86
|
-
|
|
87
|
-
stack = AsyncExitStack()
|
|
88
|
-
try:
|
|
89
|
-
server = StdioServerParameters(
|
|
90
|
-
command=self._client.command, args=self._client.args, env=self._client.env, cwd=self._client.cwd
|
|
91
|
-
)
|
|
92
|
-
read, write = await stack.enter_async_context(stdio_client(server))
|
|
93
|
-
session = await stack.enter_async_context(ClientSession(read, write))
|
|
94
|
-
await session.initialize()
|
|
95
|
-
|
|
96
|
-
tools = await session.list_tools()
|
|
97
|
-
self._tools_openai = MCPPlaywright.mcp_tools_to_openai(tools)
|
|
98
|
-
|
|
99
|
-
self._stack = stack
|
|
100
|
-
self._session = session
|
|
101
|
-
return True
|
|
102
|
-
except Exception:
|
|
103
|
-
await stack.aclose()
|
|
104
|
-
self._stack = None
|
|
105
|
-
self._session = None
|
|
106
|
-
self._tools_openai = None
|
|
107
|
-
return False
|
|
108
|
-
|
|
109
|
-
async def tools_openai(self) -> List[Dict[str, Any]]:
|
|
110
|
-
ok = await self.ensure_connected()
|
|
111
|
-
if not ok or self._tools_openai is None:
|
|
112
|
-
return []
|
|
113
|
-
return self._tools_openai
|
|
114
|
-
|
|
115
|
-
async def call_tool_text(self, name: str, arguments: Optional[Dict[str, Any]] = None) -> str:
|
|
116
|
-
ok = await self.ensure_connected()
|
|
117
|
-
if not ok or self._session is None:
|
|
118
|
-
return "Error: Playwright MCP is not connected."
|
|
119
|
-
result = await self._session.call_tool(name, arguments or {})
|
|
120
|
-
return MCPPlaywright.call_result_to_text(result)
|
|
121
|
-
|
|
122
|
-
async def close(self):
|
|
123
|
-
async with self._lock:
|
|
124
|
-
if self._stack is not None:
|
|
125
|
-
await self._stack.aclose()
|
|
126
|
-
self._stack = None
|
|
127
|
-
self._session = None
|
|
128
|
-
self._tools_openai = None
|
entari_plugin_hyw/utils/misc.py
DELETED
|
@@ -1,93 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import base64
|
|
3
|
-
import httpx
|
|
4
|
-
from typing import Dict, Any, List, Optional
|
|
5
|
-
from loguru import logger
|
|
6
|
-
from arclet.entari import MessageChain, Image
|
|
7
|
-
from typing import Tuple
|
|
8
|
-
import asyncio
|
|
9
|
-
from satori.exception import ActionFailed
|
|
10
|
-
|
|
11
|
-
def process_onebot_json(data: Dict[str, Any]) -> str:
|
|
12
|
-
"""Process OneBot JSON elements"""
|
|
13
|
-
try:
|
|
14
|
-
if "data" in data:
|
|
15
|
-
json_str = data["data"]
|
|
16
|
-
if isinstance(json_str, str):
|
|
17
|
-
json_str = json_str.replace(""", '"').replace(",", ",")
|
|
18
|
-
content = json.loads(json_str)
|
|
19
|
-
if "meta" in content and "detail_1" in content["meta"]:
|
|
20
|
-
detail = content["meta"]["detail_1"]
|
|
21
|
-
if "desc" in detail and "qqdocurl" in detail:
|
|
22
|
-
return f"[Shared Document] {detail['desc']}: {detail['qqdocurl']}"
|
|
23
|
-
except Exception as e:
|
|
24
|
-
logger.warning(f"Failed to process JSON element: {e}")
|
|
25
|
-
return ""
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
async def download_image(url: str) -> bytes:
|
|
32
|
-
"""下载图片"""
|
|
33
|
-
try:
|
|
34
|
-
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
35
|
-
resp = await client.get(url)
|
|
36
|
-
if resp.status_code == 200:
|
|
37
|
-
return resp.content
|
|
38
|
-
else:
|
|
39
|
-
raise ActionFailed(f"下载图片失败,状态码: {resp.status_code}")
|
|
40
|
-
except Exception as e:
|
|
41
|
-
raise ActionFailed(f"下载图片失败: {url}, 错误: {str(e)}")
|
|
42
|
-
|
|
43
|
-
async def process_images(mc: MessageChain, vision_model: Optional[str] = None) -> Tuple[List[str], Optional[str]]:
|
|
44
|
-
# If vision model is explicitly set to "off", skip image processing
|
|
45
|
-
if vision_model == "off":
|
|
46
|
-
return [], None
|
|
47
|
-
|
|
48
|
-
has_images = bool(mc.get(Image))
|
|
49
|
-
images = []
|
|
50
|
-
if has_images:
|
|
51
|
-
urls = mc[Image].map(lambda x: x.src)
|
|
52
|
-
tasks = [download_image(url) for url in urls]
|
|
53
|
-
raw_images = await asyncio.gather(*tasks)
|
|
54
|
-
import base64
|
|
55
|
-
images = [base64.b64encode(img).decode('utf-8') for img in raw_images]
|
|
56
|
-
|
|
57
|
-
return images, None
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def resolve_model_name(name: str, models_config: List[Dict[str, Any]]) -> Tuple[Optional[str], Optional[str]]:
|
|
61
|
-
"""
|
|
62
|
-
Resolve a user input model name to the full API model name from config.
|
|
63
|
-
Supports partial matching if unique.
|
|
64
|
-
"""
|
|
65
|
-
if not name:
|
|
66
|
-
return None, "No model name provided"
|
|
67
|
-
|
|
68
|
-
name = name.lower()
|
|
69
|
-
|
|
70
|
-
# 1. Exact match (name or id or shortname)
|
|
71
|
-
for m in models_config:
|
|
72
|
-
if m.get("name") == name or m.get("id") == name:
|
|
73
|
-
return m.get("name"), None
|
|
74
|
-
|
|
75
|
-
# 2. Key/Shortcut match
|
|
76
|
-
# Assuming the config might have keys like 'gpt4' mapping to full name
|
|
77
|
-
# But usually models list is [{'name': '...', 'provider': '...'}, ...]
|
|
78
|
-
|
|
79
|
-
# Check if 'name' matches any model 'name' partially?
|
|
80
|
-
# Or just return the name itself if it looks like a valid model ID (contains / or -)
|
|
81
|
-
if "/" in name or "-" in name or "." in name:
|
|
82
|
-
return name, None
|
|
83
|
-
|
|
84
|
-
# If not found in config specific list, and doesn't look like an ID, maybe return error
|
|
85
|
-
# But let's look for partial match in config names
|
|
86
|
-
matches = [m["name"] for m in models_config if name in m.get("name", "").lower()]
|
|
87
|
-
if len(matches) == 1:
|
|
88
|
-
return matches[0], None
|
|
89
|
-
elif len(matches) > 1:
|
|
90
|
-
return None, f"Model name '{name}' is ambiguous. Matches: {', '.join(matches[:3])}..."
|
|
91
|
-
|
|
92
|
-
# Default: assume it's a valid ID passed directly
|
|
93
|
-
return name, None
|
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
from typing import Any, Optional
|
|
3
|
-
|
|
4
|
-
import trafilatura
|
|
5
|
-
from loguru import logger
|
|
6
|
-
|
|
7
|
-
try:
|
|
8
|
-
from playwright.async_api import async_playwright
|
|
9
|
-
except Exception: # pragma: no cover
|
|
10
|
-
async_playwright = None
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class PlaywrightTool:
|
|
14
|
-
def __init__(self, config: Any):
|
|
15
|
-
self.config = config
|
|
16
|
-
|
|
17
|
-
async def navigate(self, url: str) -> str:
|
|
18
|
-
if not url:
|
|
19
|
-
return "Error: Missing url"
|
|
20
|
-
if async_playwright is None:
|
|
21
|
-
return "Error: Playwright is not available in this environment."
|
|
22
|
-
|
|
23
|
-
headless = bool(getattr(self.config, "headless", True))
|
|
24
|
-
try:
|
|
25
|
-
async with async_playwright() as p:
|
|
26
|
-
browser = await p.chromium.launch(headless=headless)
|
|
27
|
-
context = await browser.new_context()
|
|
28
|
-
page = await context.new_page()
|
|
29
|
-
await page.goto(url, wait_until="domcontentloaded", timeout=15000)
|
|
30
|
-
html = await page.content()
|
|
31
|
-
await context.close()
|
|
32
|
-
await browser.close()
|
|
33
|
-
|
|
34
|
-
content = await asyncio.to_thread(
|
|
35
|
-
trafilatura.extract,
|
|
36
|
-
html,
|
|
37
|
-
include_links=True,
|
|
38
|
-
include_images=True,
|
|
39
|
-
include_tables=True,
|
|
40
|
-
output_format="markdown",
|
|
41
|
-
)
|
|
42
|
-
return content or html[:4000]
|
|
43
|
-
except Exception as e:
|
|
44
|
-
logger.warning(f"Playwright navigation failed: {e}")
|
|
45
|
-
return f"Error: Playwright navigation failed: {e}"
|
|
46
|
-
|
|
@@ -1,94 +0,0 @@
|
|
|
1
|
-
VISION_SYSTEM_PROMPT = """你是一个专业的视觉转文字专家.
|
|
2
|
-
|
|
3
|
-
[用户消息]
|
|
4
|
-
{user_msgs}
|
|
5
|
-
|
|
6
|
-
[核心任务]
|
|
7
|
-
- 智能分析图片内容, 转述成文本, 除此之外不要添加任何内容
|
|
8
|
-
- 文字优先: 若包含清晰文字(文档、截图等), 必须完整准确转录, 不要遗漏.
|
|
9
|
-
- 视觉补充: 若无文字, 重点描述视觉内容(物体、场景、氛围).
|
|
10
|
-
- 用户要求: 根据用户消息中提示侧重转文本的偏向, 若无或无关联则不理会常规完成.
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
INTRUCT_SYSTEM_PROMPT = """你是一个专业的指导专家.
|
|
14
|
-
|
|
15
|
-
[用户消息]
|
|
16
|
-
{user_msgs}
|
|
17
|
-
|
|
18
|
-
[核心任务]
|
|
19
|
-
- 决定是否使用搜索工具
|
|
20
|
-
- 如果用户消息包含典型名词、可能的专有名词组合, 且意图为解释此词, 请使用搜索工具, 搜索工具会给你返回最新的资料和图片.
|
|
21
|
-
- 如果用户消息明显不需要搜索, 或虽然存在名词但是作为过程参与不涉及结果, 则不调用搜索工具
|
|
22
|
-
- 如果用户的消息明显有两个搜索的方向, 本次对话最多同时调用两个搜索工具分开搜索
|
|
23
|
-
- 理解用户话语, 提炼出搜索关键词.
|
|
24
|
-
- 保持原意, 禁止添加额外内容.
|
|
25
|
-
- 禁止擅自分割关键词导致语意变化.
|
|
26
|
-
- 决定是否放权 mcp工具 给 agent
|
|
27
|
-
- 如果用户显式地表达了要求模型使用mcp帮助完成任务的意图, 调用工具放权
|
|
28
|
-
> 所有工具需要在本次对话同时调用
|
|
29
|
-
|
|
30
|
-
[调用工具]
|
|
31
|
-
{tools_desc}
|
|
32
|
-
"""
|
|
33
|
-
|
|
34
|
-
INTRUCT_SYSTEM_PROMPT_VISION_ADD = """
|
|
35
|
-
[视觉专家消息]
|
|
36
|
-
{vision_msgs}
|
|
37
|
-
"""
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
AGENT_SYSTEM_PROMPT = """
|
|
41
|
-
你是一个全能助手, 请根据用户需求和搜索结果中贴切用户意图的可靠信息解释用户消息中的关键词.
|
|
42
|
-
|
|
43
|
-
[用户消息]
|
|
44
|
-
{user_msgs}
|
|
45
|
-
|
|
46
|
-
[回复格式要求]
|
|
47
|
-
当不调用工具发送文本, 即会变成最终回复, 请遵守:
|
|
48
|
-
- 语言: 简体中文, 百科式风格.
|
|
49
|
-
- 正文格式: 使用 Markdown, 有大标题, 可以使用数学公式, 格式内容丰富.
|
|
50
|
-
"""
|
|
51
|
-
|
|
52
|
-
AGENT_SYSTEM_PROMPT_INTRUCT_VISION_ADD = """
|
|
53
|
-
[视觉专家消息]
|
|
54
|
-
{vision_msgs}
|
|
55
|
-
"""
|
|
56
|
-
|
|
57
|
-
AGENT_SYSTEM_PROMPT_SEARCH_ADD = """
|
|
58
|
-
[搜索专家给出的信息]
|
|
59
|
-
{search_msgs}
|
|
60
|
-
|
|
61
|
-
[最终回复]
|
|
62
|
-
- 图片: 如果本次回答适合配图, 对搜索到的图片, 选择 1-3 张合适的尽量类型、来源、不同、主题契合的图片, 美观分布嵌入正文 .
|
|
63
|
-
- 搜索引用: 在正文中使用 `ref:数字id` (代码形式) 如 `ref:1` 标注你挑选的来源编号, 每个引用必须分开标注.
|
|
64
|
-
- 底部添加 references 代码块.
|
|
65
|
-
- 在搜索专家给出的信息中挑选出你需要的条目并重新从1开始编号, 按顺序在文末的 references 代码块中列出.
|
|
66
|
-
|
|
67
|
-
- 格式: `1. [标题](url)`
|
|
68
|
-
```references
|
|
69
|
-
1. [标题](url)
|
|
70
|
-
2. [标题](url)
|
|
71
|
-
```
|
|
72
|
-
"""
|
|
73
|
-
|
|
74
|
-
AGENT_SYSTEM_PROMPT_MCP_ADD = """
|
|
75
|
-
[MCP 工具已授权]
|
|
76
|
-
可用工具:
|
|
77
|
-
{tools_desc}
|
|
78
|
-
|
|
79
|
-
> 积极使用工具完成任务,工具优先于文本回复。
|
|
80
|
-
[最终回复]
|
|
81
|
-
- 工具引用: 在正文中使用 `mcp:字母顺序` (代码形式) 如 `mcp:a` 标注你挑选的来源编号, 每个引用必须分开标注.
|
|
82
|
-
- 底部添加 `mcp` 代码块列出工具调用流程:
|
|
83
|
-
- 按照实际的工具调用的顺序编号
|
|
84
|
-
|
|
85
|
-
- 格式: `1. [图标] 工具名称: 文本描述`
|
|
86
|
-
- 图标: navigate, snapshot, click, type, code, wait, default
|
|
87
|
-
```mcp
|
|
88
|
-
1. [code] browser_run_code: 执行JavaScript计算
|
|
89
|
-
2. [navigate] navigate: 导航到xxx网站
|
|
90
|
-
```
|
|
91
|
-
"""
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
@@ -1,193 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
import httpx
|
|
3
|
-
import urllib.parse
|
|
4
|
-
from typing import List, Dict, Optional, Any
|
|
5
|
-
from loguru import logger
|
|
6
|
-
|
|
7
|
-
class SearchService:
|
|
8
|
-
"""
|
|
9
|
-
Specialized service for interacting with SearXNG.
|
|
10
|
-
Uses regex-based HTML parsing to ensure O(n) performance and zero blocking,
|
|
11
|
-
bypasssing heavy DOM parsers like Trafilatura.
|
|
12
|
-
"""
|
|
13
|
-
def __init__(self, config: Any):
|
|
14
|
-
self.config = config
|
|
15
|
-
|
|
16
|
-
async def search(self, query: str) -> List[Dict[str, str]]:
|
|
17
|
-
"""
|
|
18
|
-
Execute search and parse results using Regex.
|
|
19
|
-
Returns a list of dicts: {'title': str, 'url': str, 'content': str}
|
|
20
|
-
"""
|
|
21
|
-
# 1. Construct URL (Force HTML format since JSON is 403)
|
|
22
|
-
encoded_query = urllib.parse.quote(query)
|
|
23
|
-
base = getattr(self.config, "search_base_url", "http://127.0.0.1:8888/search?")
|
|
24
|
-
|
|
25
|
-
# Ensure we don't have double '?' or '&' issues
|
|
26
|
-
sep = "&" if "?" in base else "?"
|
|
27
|
-
|
|
28
|
-
# Remove any existing format=json if present in base (just in case)
|
|
29
|
-
base = base.replace("format=json&", "").replace("&format=json", "")
|
|
30
|
-
|
|
31
|
-
# Handle {query} placeholder if present (common in config defaults)
|
|
32
|
-
if "{query}" in base:
|
|
33
|
-
# We need to handle potential other placeholders like {limit} if they exist, or escape them
|
|
34
|
-
# For simplicity, we just replace {query} and ignore format/limit changes since we parse HTML
|
|
35
|
-
# Actually, standard python format() might fail if other braces exist.
|
|
36
|
-
# safe replace:
|
|
37
|
-
url = base.replace("{query}", encoded_query)
|
|
38
|
-
# Remove other common placeholders if they linger
|
|
39
|
-
url = url.replace("{limit}", "8")
|
|
40
|
-
else:
|
|
41
|
-
# Append mode
|
|
42
|
-
url = f"{base}{sep}q={encoded_query}&language=zh-CN"
|
|
43
|
-
|
|
44
|
-
logger.info(f"SearchService: Fetching {url}")
|
|
45
|
-
|
|
46
|
-
try:
|
|
47
|
-
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
48
|
-
resp = await client.get(url)
|
|
49
|
-
if resp.status_code != 200:
|
|
50
|
-
logger.error(f"Search failed: {resp.status_code}")
|
|
51
|
-
return []
|
|
52
|
-
html = resp.text
|
|
53
|
-
return self._parse_searxng_html(html)
|
|
54
|
-
except Exception as e:
|
|
55
|
-
logger.error(f"Search execution failed: {e}")
|
|
56
|
-
return []
|
|
57
|
-
|
|
58
|
-
def _parse_searxng_html(self, html: str) -> List[Dict[str, str]]:
|
|
59
|
-
"""
|
|
60
|
-
Parse SearXNG HTML results using Regex.
|
|
61
|
-
Target structure:
|
|
62
|
-
<article class="result ...">
|
|
63
|
-
<h3><a href="(url)">(title)</a></h3>
|
|
64
|
-
<p class="content">(snippet)</p>
|
|
65
|
-
</article>
|
|
66
|
-
"""
|
|
67
|
-
results = []
|
|
68
|
-
|
|
69
|
-
# Regex to find result blocks.
|
|
70
|
-
# We split by <article to find chunks, then parse each chunk.
|
|
71
|
-
# This is safer than a global regex which might get confused by nested structures.
|
|
72
|
-
chunks = html.split('<article')
|
|
73
|
-
|
|
74
|
-
for chunk in chunks[1:]: # Skip preamble
|
|
75
|
-
try:
|
|
76
|
-
# 1. Extract URL and Title
|
|
77
|
-
# Look for <a href="..." ... >Title</a> inside h3
|
|
78
|
-
# Simplified pattern: href="([^"]+)" text is >([^<]+)<
|
|
79
|
-
link_match = re.search(r'href="([^"]+)".*?>([^<]+)<', chunk)
|
|
80
|
-
if not link_match:
|
|
81
|
-
continue
|
|
82
|
-
|
|
83
|
-
url = link_match.group(1)
|
|
84
|
-
title = link_match.group(2).strip()
|
|
85
|
-
|
|
86
|
-
# Verify it's a valid result link (sometimes engine links appear)
|
|
87
|
-
if "searxng" in url or url.startswith("/"):
|
|
88
|
-
continue
|
|
89
|
-
|
|
90
|
-
# 2. Extract Snippet
|
|
91
|
-
# Look for class="content">...<
|
|
92
|
-
# We try to capture text until the next tag open
|
|
93
|
-
snippet_match = re.search(r'class="content"[^>]*>([\s\S]*?)</p>', chunk)
|
|
94
|
-
snippet = ""
|
|
95
|
-
if snippet_match:
|
|
96
|
-
# Clean up HTML tags from snippet if any remain (basic check)
|
|
97
|
-
raw_snippet = snippet_match.group(1)
|
|
98
|
-
snippet = re.sub(r'<[^>]+>', '', raw_snippet).strip()
|
|
99
|
-
|
|
100
|
-
if url and title:
|
|
101
|
-
# SAFETY: Truncate snippet to 500 chars to prevent context explosion
|
|
102
|
-
final_snippet = (snippet or title)[:500]
|
|
103
|
-
results.append({
|
|
104
|
-
"title": title,
|
|
105
|
-
"url": url,
|
|
106
|
-
"content": final_snippet
|
|
107
|
-
})
|
|
108
|
-
|
|
109
|
-
if len(results) >= 8: # Limit to 8 results
|
|
110
|
-
break
|
|
111
|
-
|
|
112
|
-
except Exception:
|
|
113
|
-
continue
|
|
114
|
-
|
|
115
|
-
logger.info(f"SearchService: Parsed {len(results)} results")
|
|
116
|
-
return results
|
|
117
|
-
|
|
118
|
-
async def image_search(self, query: str) -> List[Dict[str, str]]:
|
|
119
|
-
"""
|
|
120
|
-
Perform image search using regex parsing on HTML results.
|
|
121
|
-
"""
|
|
122
|
-
if not query: return []
|
|
123
|
-
|
|
124
|
-
encoded_query = urllib.parse.quote(query)
|
|
125
|
-
base = getattr(self.config, "image_search_base_url", "http://127.0.0.1:8888/search?")
|
|
126
|
-
sep = "&" if "?" in base else "?"
|
|
127
|
-
|
|
128
|
-
# Clean format=json
|
|
129
|
-
base = base.replace("format=json&", "").replace("&format=json", "")
|
|
130
|
-
|
|
131
|
-
if "{query}" in base:
|
|
132
|
-
url = base.replace("{query}", encoded_query)
|
|
133
|
-
url = url.replace("{limit}", "8")
|
|
134
|
-
else:
|
|
135
|
-
url = f"{base}{sep}q={encoded_query}&iax=images&ia=images"
|
|
136
|
-
|
|
137
|
-
logger.info(f"SearchService: Fetching Images {url}")
|
|
138
|
-
|
|
139
|
-
try:
|
|
140
|
-
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
141
|
-
resp = await client.get(url)
|
|
142
|
-
resp.raise_for_status()
|
|
143
|
-
html_content = resp.text
|
|
144
|
-
except Exception as e:
|
|
145
|
-
logger.error(f"Image Search failed: {e}")
|
|
146
|
-
return []
|
|
147
|
-
|
|
148
|
-
# Regex for Images (DuckDuckGo style / Generic)
|
|
149
|
-
# DDG images usually in a script or complex layout.
|
|
150
|
-
# For simplicity in V2 regex approach, we look for common img tags with logical classes or structure
|
|
151
|
-
# OR, since the user's SearXNG likely returns standard HTML list for images too.
|
|
152
|
-
# SearXNG Image results usually: <img src="..." alt="..."> inside a result container.
|
|
153
|
-
# Let's try a generic pattern for SearXNG image results
|
|
154
|
-
|
|
155
|
-
results = []
|
|
156
|
-
# SearXNG pattern: <div class="img-search-result"> ... <img src="URL" ...>
|
|
157
|
-
# Or just look for img tags with src that are http
|
|
158
|
-
|
|
159
|
-
# More robust SearXNG specific regex:
|
|
160
|
-
# Pattern: <img class="image" src="(?P<url>[^"]+)" alt="(?P<title>[^"]+)"
|
|
161
|
-
# This is a guess. Let's try to match standard "result_image" or similar if possible.
|
|
162
|
-
|
|
163
|
-
# Assuming SearXNG:
|
|
164
|
-
# More robust regex to capture images from various engines (SearXNG, Google, Bing)
|
|
165
|
-
# 1. Try generic <img ... src="..."> with http
|
|
166
|
-
# 2. Try to extract alt text if available
|
|
167
|
-
|
|
168
|
-
# Pattern 1: Standard img tag with src
|
|
169
|
-
# We look for src="http..." and optional alt
|
|
170
|
-
image_matches = re.finditer(r'<img[^>]+src=["\'](http[^"\']+)["\'][^>]*>', html_content, re.IGNORECASE)
|
|
171
|
-
|
|
172
|
-
for match in image_matches:
|
|
173
|
-
img_tag = match.group(0)
|
|
174
|
-
img_url = match.group(1)
|
|
175
|
-
|
|
176
|
-
# Extract alt/title
|
|
177
|
-
alt_match = re.search(r'alt=["\']([^"\']*)["\']', img_tag, re.IGNORECASE)
|
|
178
|
-
title = alt_match.group(1) if alt_match else ""
|
|
179
|
-
|
|
180
|
-
# Filter out tiny icons/favicons/data uris if possible
|
|
181
|
-
if "favicon" in img_url or "static" in img_url or "data:image" in img_url:
|
|
182
|
-
continue
|
|
183
|
-
|
|
184
|
-
results.append({
|
|
185
|
-
"title": title or "Image",
|
|
186
|
-
"url": img_url,
|
|
187
|
-
"content": f"Image: {title}"
|
|
188
|
-
})
|
|
189
|
-
|
|
190
|
-
if len(results) >= 8:
|
|
191
|
-
break
|
|
192
|
-
|
|
193
|
-
return results
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
entari_plugin_hyw/__init__.py,sha256=qRk1hp5HTb4gUPOalYXJScjWZtFKlv2SKEOWar-c4AI,35164
|
|
2
|
-
entari_plugin_hyw/assets/package-lock.json,sha256=TIrLM-wLWZTrp3LKfzhEVuduhvBJmI93NdQEKYLW2W0,33172
|
|
3
|
-
entari_plugin_hyw/assets/package.json,sha256=Y4H8JGtp3nv2WUtI20tXoXWddR-dwwKJhqQVLercpiw,306
|
|
4
|
-
entari_plugin_hyw/assets/tailwind.config.js,sha256=S8I9X8hI8IaQRczWK9hTW-zl4oVpAXw5ykeksrzHjpU,382
|
|
5
|
-
entari_plugin_hyw/assets/tailwind.input.css,sha256=QEZD-647GQfZeRYQdBx17RLUk6mKnZkDEwj7haTQzew,4735
|
|
6
|
-
entari_plugin_hyw/assets/template.html,sha256=xPgOKlhKzz2p2_1nn5y44XwD37UqkW2uwHn4HpaGtCU,5646
|
|
7
|
-
entari_plugin_hyw/assets/template.html.bak,sha256=xPgOKlhKzz2p2_1nn5y44XwD37UqkW2uwHn4HpaGtCU,5646
|
|
8
|
-
entari_plugin_hyw/assets/template.j2,sha256=z-3X68wlncMkUYpHYsD4W7csmQIzWwVRamztDkQ6gpo,14329
|
|
9
|
-
entari_plugin_hyw/assets/icon/anthropic.svg,sha256=ASsy1ypo3osNc3n-B0R81tk_dIFsVgg7qQORrd5T2kA,558
|
|
10
|
-
entari_plugin_hyw/assets/icon/deepseek.png,sha256=KWWAr9aeYMc6I07U_1qo7zcXO6e7-kfd9S2XjQumnf4,25338
|
|
11
|
-
entari_plugin_hyw/assets/icon/gemini.svg,sha256=H74CoVmx5opcCtr3Ay3M09dpqL9cd9Whkx-M6an3t7s,599
|
|
12
|
-
entari_plugin_hyw/assets/icon/google.svg,sha256=H74CoVmx5opcCtr3Ay3M09dpqL9cd9Whkx-M6an3t7s,599
|
|
13
|
-
entari_plugin_hyw/assets/icon/grok.png,sha256=uSulvvDVqoA4RUOW0ZAkdvBVM2rpyGJRZIbn5dEFspw,362
|
|
14
|
-
entari_plugin_hyw/assets/icon/microsoft.svg,sha256=-am_6N3UEQYSzldDg-xrdGYjTWsagH-3v4Q_eia1ymE,684
|
|
15
|
-
entari_plugin_hyw/assets/icon/minimax.png,sha256=tWqVlMdFNPpP8zWWX9tvIsWXI9q76P7O3t3CEZO7NU0,1525
|
|
16
|
-
entari_plugin_hyw/assets/icon/mistral.png,sha256=0vv7jPmPKiBRYVYYJxVL_wIH_qa_ZssIdV3NDO5vbmk,869
|
|
17
|
-
entari_plugin_hyw/assets/icon/nvida.png,sha256=JMITdcyjR9Lz6Gub0n1_30d0ynvV1ZSCJRcjy23qgrA,1607
|
|
18
|
-
entari_plugin_hyw/assets/icon/openai.svg,sha256=LhVwCR4qaXj6qHm31qniQTCkJ-FX932JLSycUis5kao,1692
|
|
19
|
-
entari_plugin_hyw/assets/icon/openrouter.png,sha256=exxfjWGDWpYH-Vc8xJDbhNVeXFEVxnu6TMxYIBc1WmY,1665
|
|
20
|
-
entari_plugin_hyw/assets/icon/perplexity.svg,sha256=mHWZFoeWmDYXOIDzm9pj6_sRotaI8xNy5Lkeg5Vzu70,555
|
|
21
|
-
entari_plugin_hyw/assets/icon/qwen.png,sha256=eqLbnIPbjh2_PsODU_mmqjeD82xXj8fV_kN0fDrNaD0,38419
|
|
22
|
-
entari_plugin_hyw/assets/icon/xai.png,sha256=uSulvvDVqoA4RUOW0ZAkdvBVM2rpyGJRZIbn5dEFspw,362
|
|
23
|
-
entari_plugin_hyw/assets/icon/zai.png,sha256=K-gnabdsjMLInppHA1Op7Nyt33iegrx1x-yNlvCZ0Tc,2351
|
|
24
|
-
entari_plugin_hyw/assets/libs/highlight.css,sha256=Oppd74ucMR5a5Dq96FxjEzGF7tTw2fZ_6ksAqDCM8GY,1309
|
|
25
|
-
entari_plugin_hyw/assets/libs/highlight.js,sha256=g3pvpbDHNrUrveKythkPMF2j_J7UFoHbUyFQcFe1yEY,121727
|
|
26
|
-
entari_plugin_hyw/assets/libs/katex-auto-render.js,sha256=nLjaz8CGwpZsnsS6VPSi3EO3y-KzPOwaJ0PYhsf7R6c,3478
|
|
27
|
-
entari_plugin_hyw/assets/libs/katex.css,sha256=UF1fgpAiu3tPJN_uCqEUHNe7pnr-QR0SQDNfgglgtcM,23196
|
|
28
|
-
entari_plugin_hyw/assets/libs/katex.js,sha256=3ISyluw-iE3gkxWPdg_Z1Ftser5YtTgVV_ThOPRqWK4,277038
|
|
29
|
-
entari_plugin_hyw/assets/libs/tailwind.css,sha256=Vxv_ByyehWd0I7CJ74ZgnwNmgBVU_dRUZ9FqB-FqPDY,18886
|
|
30
|
-
entari_plugin_hyw/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
|
-
entari_plugin_hyw/core/config.py,sha256=LUP1LY0U9xfF_rzV2HxnNfJnfi929RNUZeFgbFFsyd4,1620
|
|
32
|
-
entari_plugin_hyw/core/history.py,sha256=vqp7itwR5-KaqC4Ftmq6GOz7OM9GsiFJnSN9JJ2P6L4,5894
|
|
33
|
-
entari_plugin_hyw/core/hyw.py,sha256=QwLB2YpUnzbVCWXcJF0uUeDAjxd4LWMzm9EjB9T1G1g,1671
|
|
34
|
-
entari_plugin_hyw/core/pipeline.py,sha256=nV07UxzCXLLvAYDzcxgoYTxbxcHzD-_Ym4ZluvM82xg,38673
|
|
35
|
-
entari_plugin_hyw/core/render.py,sha256=f2ugb5eZtZQKSQehmnyD9bvHasgZ2Px6QD-jUS8ndgY,26962
|
|
36
|
-
entari_plugin_hyw/core/render.py.bak,sha256=qMd6Tk0p6ItqGmErR6dkWRwCuKQYXINc7KRxnP-mb_s,48768
|
|
37
|
-
entari_plugin_hyw/utils/__init__.py,sha256=2JEC3lqFoa8FgR1kUjNUbwSph23YSOxbCaqYSgFomxs,128
|
|
38
|
-
entari_plugin_hyw/utils/browser.py,sha256=Szr7oIx8My9Qnh8_bQsEm7o3iUZYk9Gamtz7kvwTNyg,2086
|
|
39
|
-
entari_plugin_hyw/utils/mcp_playwright.py,sha256=WB8LPrmfMotfX6C-4h2WxEKXypnSzQ4-poNS-lTNe7c,4680
|
|
40
|
-
entari_plugin_hyw/utils/misc.py,sha256=_7iHVYj_mJ6OGq6FU1s_cFeS1Ao-neBjZYd6eI2p95U,3482
|
|
41
|
-
entari_plugin_hyw/utils/playwright_tool.py,sha256=eSIfur64U8Z5rN0UlZK8qg3vJrGgZZHjhyIId1jSHzs,1507
|
|
42
|
-
entari_plugin_hyw/utils/prompts.py,sha256=PaPAV1l-7CVlmSLRpstIqxqYg9-LZOEgSogByzbv1KU,3352
|
|
43
|
-
entari_plugin_hyw/utils/search.py,sha256=v9lW18V8rAGneejAiqJ4hCiCy7mIjkRCmR4UuiBqRxY,8048
|
|
44
|
-
entari_plugin_hyw-3.2.113.dist-info/METADATA,sha256=u1oK1Kx7pZaIt_k776jY1uI3YvIiv5Tri3GqI0czu8A,4697
|
|
45
|
-
entari_plugin_hyw-3.2.113.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
46
|
-
entari_plugin_hyw-3.2.113.dist-info/top_level.txt,sha256=TIDsn6XPs6KA5e3ezsE65JoXsy03ejDdrB41I4SPjmo,18
|
|
47
|
-
entari_plugin_hyw-3.2.113.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|