entari-plugin-hyw 3.2.105__py3-none-any.whl → 3.5.0rc6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- entari_plugin_hyw/__init__.py +120 -428
- entari_plugin_hyw/assets/card-dist/index.html +396 -0
- entari_plugin_hyw/assets/card-dist/logos/anthropic.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/cerebras.svg +9 -0
- entari_plugin_hyw/assets/card-dist/logos/deepseek.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/gemini.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/google.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/grok.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/huggingface.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/microsoft.svg +15 -0
- entari_plugin_hyw/assets/card-dist/logos/minimax.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/mistral.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/nvida.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/openai.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/openrouter.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/perplexity.svg +24 -0
- entari_plugin_hyw/assets/card-dist/logos/qwen.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/xai.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/xiaomi.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/zai.png +0 -0
- entari_plugin_hyw/assets/card-dist/vite.svg +1 -0
- entari_plugin_hyw/assets/icon/cerebras.svg +9 -0
- entari_plugin_hyw/assets/icon/huggingface.png +0 -0
- entari_plugin_hyw/assets/icon/xiaomi.png +0 -0
- entari_plugin_hyw/card-ui/.gitignore +24 -0
- entari_plugin_hyw/card-ui/README.md +5 -0
- entari_plugin_hyw/card-ui/index.html +16 -0
- entari_plugin_hyw/card-ui/package-lock.json +2342 -0
- entari_plugin_hyw/card-ui/package.json +31 -0
- entari_plugin_hyw/card-ui/public/logos/anthropic.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/cerebras.svg +9 -0
- entari_plugin_hyw/card-ui/public/logos/deepseek.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/gemini.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/google.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/grok.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/huggingface.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/microsoft.svg +15 -0
- entari_plugin_hyw/card-ui/public/logos/minimax.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/mistral.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/nvida.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/openai.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/openrouter.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/perplexity.svg +24 -0
- entari_plugin_hyw/card-ui/public/logos/qwen.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/xai.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/xiaomi.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/zai.png +0 -0
- entari_plugin_hyw/card-ui/public/vite.svg +1 -0
- entari_plugin_hyw/card-ui/src/App.vue +412 -0
- entari_plugin_hyw/card-ui/src/assets/vue.svg +1 -0
- entari_plugin_hyw/card-ui/src/components/HelloWorld.vue +41 -0
- entari_plugin_hyw/card-ui/src/components/MarkdownContent.vue +386 -0
- entari_plugin_hyw/card-ui/src/components/SectionCard.vue +41 -0
- entari_plugin_hyw/card-ui/src/components/StageCard.vue +237 -0
- entari_plugin_hyw/card-ui/src/main.ts +5 -0
- entari_plugin_hyw/card-ui/src/style.css +29 -0
- entari_plugin_hyw/card-ui/src/test_regex.js +103 -0
- entari_plugin_hyw/card-ui/src/types.ts +52 -0
- entari_plugin_hyw/card-ui/tsconfig.app.json +16 -0
- entari_plugin_hyw/card-ui/tsconfig.json +7 -0
- entari_plugin_hyw/card-ui/tsconfig.node.json +26 -0
- entari_plugin_hyw/card-ui/vite.config.ts +16 -0
- entari_plugin_hyw/{core/history.py → history.py} +25 -1
- entari_plugin_hyw/image_cache.py +274 -0
- entari_plugin_hyw/{utils/misc.py → misc.py} +38 -3
- entari_plugin_hyw/pipeline.py +1338 -0
- entari_plugin_hyw/prompts.py +108 -0
- entari_plugin_hyw/render_vue.py +314 -0
- entari_plugin_hyw/search.py +696 -0
- entari_plugin_hyw-3.5.0rc6.dist-info/METADATA +116 -0
- entari_plugin_hyw-3.5.0rc6.dist-info/RECORD +88 -0
- entari_plugin_hyw/assets/libs/highlight.css +0 -10
- entari_plugin_hyw/assets/libs/highlight.js +0 -1213
- entari_plugin_hyw/assets/libs/katex-auto-render.js +0 -1
- entari_plugin_hyw/assets/libs/katex.css +0 -1
- entari_plugin_hyw/assets/libs/katex.js +0 -1
- entari_plugin_hyw/assets/libs/tailwind.css +0 -1
- entari_plugin_hyw/assets/tailwind.config.js +0 -12
- entari_plugin_hyw/assets/tailwind.input.css +0 -235
- entari_plugin_hyw/assets/template.html +0 -157
- entari_plugin_hyw/core/__init__.py +0 -0
- entari_plugin_hyw/core/config.py +0 -36
- entari_plugin_hyw/core/hyw.py +0 -41
- entari_plugin_hyw/core/pipeline.py +0 -816
- entari_plugin_hyw/core/render.py +0 -926
- entari_plugin_hyw/utils/__init__.py +0 -3
- entari_plugin_hyw/utils/browser.py +0 -61
- entari_plugin_hyw/utils/mcp_playwright.py +0 -128
- entari_plugin_hyw/utils/playwright_tool.py +0 -46
- entari_plugin_hyw/utils/prompts.py +0 -91
- entari_plugin_hyw/utils/search.py +0 -193
- entari_plugin_hyw-3.2.105.dist-info/METADATA +0 -141
- entari_plugin_hyw-3.2.105.dist-info/RECORD +0 -42
- {entari_plugin_hyw-3.2.105.dist-info → entari_plugin_hyw-3.5.0rc6.dist-info}/WHEEL +0 -0
- {entari_plugin_hyw-3.2.105.dist-info → entari_plugin_hyw-3.5.0rc6.dist-info}/top_level.txt +0 -0
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import urllib.parse
|
|
3
|
-
from typing import Any, Optional
|
|
4
|
-
|
|
5
|
-
import httpx
|
|
6
|
-
import trafilatura
|
|
7
|
-
from loguru import logger
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class BrowserTool:
|
|
11
|
-
"""Simple HTTP fetcher for search and page content."""
|
|
12
|
-
|
|
13
|
-
def __init__(self, config: Any):
|
|
14
|
-
self.config = config
|
|
15
|
-
self._client: Optional[httpx.AsyncClient] = None
|
|
16
|
-
|
|
17
|
-
async def _ensure_client(self) -> httpx.AsyncClient:
|
|
18
|
-
if self._client is None:
|
|
19
|
-
timeout = httpx.Timeout(8.0)
|
|
20
|
-
self._client = httpx.AsyncClient(
|
|
21
|
-
timeout=timeout,
|
|
22
|
-
follow_redirects=True,
|
|
23
|
-
headers={
|
|
24
|
-
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
|
|
25
|
-
"Accept": "application/json,text/html;q=0.9,*/*;q=0.8",
|
|
26
|
-
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
27
|
-
},
|
|
28
|
-
verify=False
|
|
29
|
-
)
|
|
30
|
-
return self._client
|
|
31
|
-
|
|
32
|
-
async def navigate(self, url: str) -> str:
|
|
33
|
-
"""Fetch URL content via HTTP and extract markdown."""
|
|
34
|
-
try:
|
|
35
|
-
client = await self._ensure_client()
|
|
36
|
-
resp = await client.get(url)
|
|
37
|
-
if resp.status_code >= 400:
|
|
38
|
-
logger.error(f"HTTP navigation failed status={resp.status_code} url={url}")
|
|
39
|
-
return f"Error navigating to {url}: {resp.status_code}"
|
|
40
|
-
|
|
41
|
-
html = resp.text
|
|
42
|
-
content = await asyncio.to_thread(
|
|
43
|
-
trafilatura.extract,
|
|
44
|
-
html,
|
|
45
|
-
include_links=True,
|
|
46
|
-
include_images=True,
|
|
47
|
-
include_tables=True,
|
|
48
|
-
output_format="markdown",
|
|
49
|
-
)
|
|
50
|
-
if not content:
|
|
51
|
-
content = html[:4000]
|
|
52
|
-
return content
|
|
53
|
-
except Exception as e:
|
|
54
|
-
logger.error(f"HTTP navigation failed: {e}")
|
|
55
|
-
return f"Error navigating to {url}: {e}"
|
|
56
|
-
|
|
57
|
-
async def close(self):
|
|
58
|
-
if self._client:
|
|
59
|
-
await self._client.aclose()
|
|
60
|
-
self._client = None
|
|
61
|
-
|
|
@@ -1,128 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from contextlib import AsyncExitStack, asynccontextmanager
|
|
4
|
-
from typing import Any, AsyncIterator, Dict, List, Optional, Tuple
|
|
5
|
-
|
|
6
|
-
import anyio
|
|
7
|
-
from mcp.client.session import ClientSession
|
|
8
|
-
from mcp.client.stdio import StdioServerParameters, stdio_client
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class MCPPlaywright:
|
|
12
|
-
def __init__(
|
|
13
|
-
self,
|
|
14
|
-
command: str = "npx",
|
|
15
|
-
args: Optional[List[str]] = None,
|
|
16
|
-
env: Optional[Dict[str, str]] = None,
|
|
17
|
-
cwd: Optional[str] = None,
|
|
18
|
-
):
|
|
19
|
-
self.command = command
|
|
20
|
-
self.args = args or ["-y", "@playwright/mcp@latest"]
|
|
21
|
-
self.env = env
|
|
22
|
-
self.cwd = cwd
|
|
23
|
-
|
|
24
|
-
@asynccontextmanager
|
|
25
|
-
async def connect(self) -> AsyncIterator[ClientSession]:
|
|
26
|
-
server = StdioServerParameters(command=self.command, args=self.args, env=self.env, cwd=self.cwd)
|
|
27
|
-
async with stdio_client(server) as (read, write):
|
|
28
|
-
async with ClientSession(read, write) as session:
|
|
29
|
-
await session.initialize()
|
|
30
|
-
yield session
|
|
31
|
-
|
|
32
|
-
@staticmethod
|
|
33
|
-
def mcp_tools_to_openai(tools: Any) -> List[Dict[str, Any]]:
|
|
34
|
-
openai_tools: List[Dict[str, Any]] = []
|
|
35
|
-
for t in tools.tools:
|
|
36
|
-
schema = dict(getattr(t, "inputSchema", None) or {"type": "object", "properties": {}})
|
|
37
|
-
schema.pop("$schema", None)
|
|
38
|
-
openai_tools.append(
|
|
39
|
-
{
|
|
40
|
-
"type": "function",
|
|
41
|
-
"function": {
|
|
42
|
-
"name": t.name,
|
|
43
|
-
"description": t.description or "",
|
|
44
|
-
"parameters": schema,
|
|
45
|
-
},
|
|
46
|
-
}
|
|
47
|
-
)
|
|
48
|
-
return openai_tools
|
|
49
|
-
|
|
50
|
-
@staticmethod
|
|
51
|
-
def call_result_to_text(result: Any) -> str:
|
|
52
|
-
if result is None:
|
|
53
|
-
return ""
|
|
54
|
-
# MCP CallToolResult.content is typically a list of TextContent items
|
|
55
|
-
content = getattr(result, "content", None)
|
|
56
|
-
if isinstance(content, list):
|
|
57
|
-
parts: List[str] = []
|
|
58
|
-
for item in content:
|
|
59
|
-
text = getattr(item, "text", None)
|
|
60
|
-
if text is not None:
|
|
61
|
-
parts.append(str(text))
|
|
62
|
-
else:
|
|
63
|
-
parts.append(str(item))
|
|
64
|
-
return "\n".join(parts).strip()
|
|
65
|
-
return str(result)
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
class MCPPlaywrightManager:
|
|
69
|
-
def __init__(
|
|
70
|
-
self,
|
|
71
|
-
command: str = "npx",
|
|
72
|
-
args: Optional[List[str]] = None,
|
|
73
|
-
env: Optional[Dict[str, str]] = None,
|
|
74
|
-
cwd: Optional[str] = None,
|
|
75
|
-
):
|
|
76
|
-
self._client = MCPPlaywright(command=command, args=args, env=env, cwd=cwd)
|
|
77
|
-
self._lock = anyio.Lock()
|
|
78
|
-
self._stack: Optional[AsyncExitStack] = None
|
|
79
|
-
self._session: Optional[ClientSession] = None
|
|
80
|
-
self._tools_openai: Optional[List[Dict[str, Any]]] = None
|
|
81
|
-
|
|
82
|
-
async def ensure_connected(self) -> bool:
|
|
83
|
-
async with self._lock:
|
|
84
|
-
if self._session is not None:
|
|
85
|
-
return True
|
|
86
|
-
|
|
87
|
-
stack = AsyncExitStack()
|
|
88
|
-
try:
|
|
89
|
-
server = StdioServerParameters(
|
|
90
|
-
command=self._client.command, args=self._client.args, env=self._client.env, cwd=self._client.cwd
|
|
91
|
-
)
|
|
92
|
-
read, write = await stack.enter_async_context(stdio_client(server))
|
|
93
|
-
session = await stack.enter_async_context(ClientSession(read, write))
|
|
94
|
-
await session.initialize()
|
|
95
|
-
|
|
96
|
-
tools = await session.list_tools()
|
|
97
|
-
self._tools_openai = MCPPlaywright.mcp_tools_to_openai(tools)
|
|
98
|
-
|
|
99
|
-
self._stack = stack
|
|
100
|
-
self._session = session
|
|
101
|
-
return True
|
|
102
|
-
except Exception:
|
|
103
|
-
await stack.aclose()
|
|
104
|
-
self._stack = None
|
|
105
|
-
self._session = None
|
|
106
|
-
self._tools_openai = None
|
|
107
|
-
return False
|
|
108
|
-
|
|
109
|
-
async def tools_openai(self) -> List[Dict[str, Any]]:
|
|
110
|
-
ok = await self.ensure_connected()
|
|
111
|
-
if not ok or self._tools_openai is None:
|
|
112
|
-
return []
|
|
113
|
-
return self._tools_openai
|
|
114
|
-
|
|
115
|
-
async def call_tool_text(self, name: str, arguments: Optional[Dict[str, Any]] = None) -> str:
|
|
116
|
-
ok = await self.ensure_connected()
|
|
117
|
-
if not ok or self._session is None:
|
|
118
|
-
return "Error: Playwright MCP is not connected."
|
|
119
|
-
result = await self._session.call_tool(name, arguments or {})
|
|
120
|
-
return MCPPlaywright.call_result_to_text(result)
|
|
121
|
-
|
|
122
|
-
async def close(self):
|
|
123
|
-
async with self._lock:
|
|
124
|
-
if self._stack is not None:
|
|
125
|
-
await self._stack.aclose()
|
|
126
|
-
self._stack = None
|
|
127
|
-
self._session = None
|
|
128
|
-
self._tools_openai = None
|
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
from typing import Any, Optional
|
|
3
|
-
|
|
4
|
-
import trafilatura
|
|
5
|
-
from loguru import logger
|
|
6
|
-
|
|
7
|
-
try:
|
|
8
|
-
from playwright.async_api import async_playwright
|
|
9
|
-
except Exception: # pragma: no cover
|
|
10
|
-
async_playwright = None
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class PlaywrightTool:
|
|
14
|
-
def __init__(self, config: Any):
|
|
15
|
-
self.config = config
|
|
16
|
-
|
|
17
|
-
async def navigate(self, url: str) -> str:
|
|
18
|
-
if not url:
|
|
19
|
-
return "Error: Missing url"
|
|
20
|
-
if async_playwright is None:
|
|
21
|
-
return "Error: Playwright is not available in this environment."
|
|
22
|
-
|
|
23
|
-
headless = bool(getattr(self.config, "headless", True))
|
|
24
|
-
try:
|
|
25
|
-
async with async_playwright() as p:
|
|
26
|
-
browser = await p.chromium.launch(headless=headless)
|
|
27
|
-
context = await browser.new_context()
|
|
28
|
-
page = await context.new_page()
|
|
29
|
-
await page.goto(url, wait_until="domcontentloaded", timeout=15000)
|
|
30
|
-
html = await page.content()
|
|
31
|
-
await context.close()
|
|
32
|
-
await browser.close()
|
|
33
|
-
|
|
34
|
-
content = await asyncio.to_thread(
|
|
35
|
-
trafilatura.extract,
|
|
36
|
-
html,
|
|
37
|
-
include_links=True,
|
|
38
|
-
include_images=True,
|
|
39
|
-
include_tables=True,
|
|
40
|
-
output_format="markdown",
|
|
41
|
-
)
|
|
42
|
-
return content or html[:4000]
|
|
43
|
-
except Exception as e:
|
|
44
|
-
logger.warning(f"Playwright navigation failed: {e}")
|
|
45
|
-
return f"Error: Playwright navigation failed: {e}"
|
|
46
|
-
|
|
@@ -1,91 +0,0 @@
|
|
|
1
|
-
VISION_SYSTEM_PROMPT = """你是一个专业的视觉转文字专家.
|
|
2
|
-
|
|
3
|
-
[用户消息]
|
|
4
|
-
{user_msgs}
|
|
5
|
-
|
|
6
|
-
[核心任务]
|
|
7
|
-
- 智能分析图片内容, 转述成文本, 除此之外不要添加任何内容
|
|
8
|
-
- 文字优先: 若包含清晰文字(文档、截图等), 必须完整准确转录, 不要遗漏.
|
|
9
|
-
- 视觉补充: 若无文字, 重点描述视觉内容(物体、场景、氛围).
|
|
10
|
-
- 用户要求: 根据用户消息中提示侧重转文本的偏向, 若无或无关联则不理会常规完成.
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
INTRUCT_SYSTEM_PROMPT = """你是一个专业的指导专家.
|
|
14
|
-
|
|
15
|
-
[用户消息]
|
|
16
|
-
{user_msgs}
|
|
17
|
-
|
|
18
|
-
[核心任务]
|
|
19
|
-
- 决定是否使用搜索工具
|
|
20
|
-
- 如果用户消息包含典型名词、可能的专有名词组合, 且意图为解释此词, 请使用搜索工具, 搜索工具会给你返回最新的资料和图片.
|
|
21
|
-
- 如果用户消息明显不需要搜索, 或虽然存在名词但是作为过程参与不涉及结果, 则不调用搜索工具
|
|
22
|
-
- 如果用户的消息明显有两个搜索的方向, 本次对话最多同时调用两个搜索工具分开搜索
|
|
23
|
-
- 理解用户话语, 提炼出搜索关键词.
|
|
24
|
-
- 保持原意, 禁止添加额外内容.
|
|
25
|
-
- 禁止擅自分割关键词导致语意变化.
|
|
26
|
-
- 决定是否放权 mcp工具 给 agent
|
|
27
|
-
- 如果用户显式地表达了要求模型使用mcp帮助完成任务的意图, 调用工具放权
|
|
28
|
-
> 所有工具需要在本次对话同时调用
|
|
29
|
-
|
|
30
|
-
[调用工具]
|
|
31
|
-
{tools_desc}
|
|
32
|
-
"""
|
|
33
|
-
|
|
34
|
-
INTRUCT_SYSTEM_PROMPT_VISION_ADD = """
|
|
35
|
-
[视觉专家消息]
|
|
36
|
-
{vision_msgs}
|
|
37
|
-
"""
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
AGENT_SYSTEM_PROMPT = """
|
|
41
|
-
你是一个全能助手, 请根据用户需求和搜索结果中贴切用户意图的可靠信息解释用户消息中的关键词.
|
|
42
|
-
|
|
43
|
-
[用户消息]
|
|
44
|
-
{user_msgs}
|
|
45
|
-
|
|
46
|
-
[回复格式要求]
|
|
47
|
-
当不调用工具发送文本, 即会变成最终回复, 请遵守:
|
|
48
|
-
- 语言: 简体中文, 百科式风格.
|
|
49
|
-
- 正文格式: 使用 Markdown, 有大标题, 可以使用数学公式, 格式内容丰富.
|
|
50
|
-
"""
|
|
51
|
-
|
|
52
|
-
AGENT_SYSTEM_PROMPT_INTRUCT_VISION_ADD = """
|
|
53
|
-
[视觉专家消息]
|
|
54
|
-
{vision_msgs}
|
|
55
|
-
"""
|
|
56
|
-
|
|
57
|
-
AGENT_SYSTEM_PROMPT_SEARCH_ADD = """
|
|
58
|
-
[搜索专家给出的信息]
|
|
59
|
-
{search_msgs}
|
|
60
|
-
|
|
61
|
-
[最终回复]
|
|
62
|
-
- 图片: 如果本次回答适合配图, 对搜索到的图片, 选择 1-3 张合适的尽量类型、来源、不同、主题契合的图片, 美观分布嵌入正文 ``.
|
|
63
|
-
- 引用: 在正文中使用 `[id]` 标注来源, 并在文末通过 `references` 代码块列出.
|
|
64
|
-
- 你需要在最终回复底部添加 `references` 代码块.
|
|
65
|
-
|
|
66
|
-
```references
|
|
67
|
-
1. [标题](url)
|
|
68
|
-
2. [标题](url)
|
|
69
|
-
```
|
|
70
|
-
"""
|
|
71
|
-
|
|
72
|
-
AGENT_SYSTEM_PROMPT_MCP_ADD = """
|
|
73
|
-
[MCP 工具已授权]
|
|
74
|
-
可用工具:
|
|
75
|
-
{tools_desc}
|
|
76
|
-
|
|
77
|
-
> 积极使用工具完成任务,工具优先于文本回复。
|
|
78
|
-
|
|
79
|
-
[最终回复格式]
|
|
80
|
-
底部添加 `mcp` 代码块列出工具调用流程:
|
|
81
|
-
- 格式: `[图标] 工具名称` + 文本描述
|
|
82
|
-
- 图标: navigate, snapshot, click, type, code, wait, default
|
|
83
|
-
|
|
84
|
-
```mcp
|
|
85
|
-
[code] browser_run_code
|
|
86
|
-
执行JavaScript计算
|
|
87
|
-
```
|
|
88
|
-
"""
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
@@ -1,193 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
import httpx
|
|
3
|
-
import urllib.parse
|
|
4
|
-
from typing import List, Dict, Optional, Any
|
|
5
|
-
from loguru import logger
|
|
6
|
-
|
|
7
|
-
class SearchService:
|
|
8
|
-
"""
|
|
9
|
-
Specialized service for interacting with SearXNG.
|
|
10
|
-
Uses regex-based HTML parsing to ensure O(n) performance and zero blocking,
|
|
11
|
-
bypasssing heavy DOM parsers like Trafilatura.
|
|
12
|
-
"""
|
|
13
|
-
def __init__(self, config: Any):
|
|
14
|
-
self.config = config
|
|
15
|
-
|
|
16
|
-
async def search(self, query: str) -> List[Dict[str, str]]:
|
|
17
|
-
"""
|
|
18
|
-
Execute search and parse results using Regex.
|
|
19
|
-
Returns a list of dicts: {'title': str, 'url': str, 'content': str}
|
|
20
|
-
"""
|
|
21
|
-
# 1. Construct URL (Force HTML format since JSON is 403)
|
|
22
|
-
encoded_query = urllib.parse.quote(query)
|
|
23
|
-
base = getattr(self.config, "search_base_url", "http://127.0.0.1:8888/search?")
|
|
24
|
-
|
|
25
|
-
# Ensure we don't have double '?' or '&' issues
|
|
26
|
-
sep = "&" if "?" in base else "?"
|
|
27
|
-
|
|
28
|
-
# Remove any existing format=json if present in base (just in case)
|
|
29
|
-
base = base.replace("format=json&", "").replace("&format=json", "")
|
|
30
|
-
|
|
31
|
-
# Handle {query} placeholder if present (common in config defaults)
|
|
32
|
-
if "{query}" in base:
|
|
33
|
-
# We need to handle potential other placeholders like {limit} if they exist, or escape them
|
|
34
|
-
# For simplicity, we just replace {query} and ignore format/limit changes since we parse HTML
|
|
35
|
-
# Actually, standard python format() might fail if other braces exist.
|
|
36
|
-
# safe replace:
|
|
37
|
-
url = base.replace("{query}", encoded_query)
|
|
38
|
-
# Remove other common placeholders if they linger
|
|
39
|
-
url = url.replace("{limit}", "8")
|
|
40
|
-
else:
|
|
41
|
-
# Append mode
|
|
42
|
-
url = f"{base}{sep}q={encoded_query}&language=zh-CN"
|
|
43
|
-
|
|
44
|
-
logger.info(f"SearchService: Fetching {url}")
|
|
45
|
-
|
|
46
|
-
try:
|
|
47
|
-
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
48
|
-
resp = await client.get(url)
|
|
49
|
-
if resp.status_code != 200:
|
|
50
|
-
logger.error(f"Search failed: {resp.status_code}")
|
|
51
|
-
return []
|
|
52
|
-
html = resp.text
|
|
53
|
-
return self._parse_searxng_html(html)
|
|
54
|
-
except Exception as e:
|
|
55
|
-
logger.error(f"Search execution failed: {e}")
|
|
56
|
-
return []
|
|
57
|
-
|
|
58
|
-
def _parse_searxng_html(self, html: str) -> List[Dict[str, str]]:
|
|
59
|
-
"""
|
|
60
|
-
Parse SearXNG HTML results using Regex.
|
|
61
|
-
Target structure:
|
|
62
|
-
<article class="result ...">
|
|
63
|
-
<h3><a href="(url)">(title)</a></h3>
|
|
64
|
-
<p class="content">(snippet)</p>
|
|
65
|
-
</article>
|
|
66
|
-
"""
|
|
67
|
-
results = []
|
|
68
|
-
|
|
69
|
-
# Regex to find result blocks.
|
|
70
|
-
# We split by <article to find chunks, then parse each chunk.
|
|
71
|
-
# This is safer than a global regex which might get confused by nested structures.
|
|
72
|
-
chunks = html.split('<article')
|
|
73
|
-
|
|
74
|
-
for chunk in chunks[1:]: # Skip preamble
|
|
75
|
-
try:
|
|
76
|
-
# 1. Extract URL and Title
|
|
77
|
-
# Look for <a href="..." ... >Title</a> inside h3
|
|
78
|
-
# Simplified pattern: href="([^"]+)" text is >([^<]+)<
|
|
79
|
-
link_match = re.search(r'href="([^"]+)".*?>([^<]+)<', chunk)
|
|
80
|
-
if not link_match:
|
|
81
|
-
continue
|
|
82
|
-
|
|
83
|
-
url = link_match.group(1)
|
|
84
|
-
title = link_match.group(2).strip()
|
|
85
|
-
|
|
86
|
-
# Verify it's a valid result link (sometimes engine links appear)
|
|
87
|
-
if "searxng" in url or url.startswith("/"):
|
|
88
|
-
continue
|
|
89
|
-
|
|
90
|
-
# 2. Extract Snippet
|
|
91
|
-
# Look for class="content">...<
|
|
92
|
-
# We try to capture text until the next tag open
|
|
93
|
-
snippet_match = re.search(r'class="content"[^>]*>([\s\S]*?)</p>', chunk)
|
|
94
|
-
snippet = ""
|
|
95
|
-
if snippet_match:
|
|
96
|
-
# Clean up HTML tags from snippet if any remain (basic check)
|
|
97
|
-
raw_snippet = snippet_match.group(1)
|
|
98
|
-
snippet = re.sub(r'<[^>]+>', '', raw_snippet).strip()
|
|
99
|
-
|
|
100
|
-
if url and title:
|
|
101
|
-
# SAFETY: Truncate snippet to 500 chars to prevent context explosion
|
|
102
|
-
final_snippet = (snippet or title)[:500]
|
|
103
|
-
results.append({
|
|
104
|
-
"title": title,
|
|
105
|
-
"url": url,
|
|
106
|
-
"content": final_snippet
|
|
107
|
-
})
|
|
108
|
-
|
|
109
|
-
if len(results) >= 8: # Limit to 8 results
|
|
110
|
-
break
|
|
111
|
-
|
|
112
|
-
except Exception:
|
|
113
|
-
continue
|
|
114
|
-
|
|
115
|
-
logger.info(f"SearchService: Parsed {len(results)} results")
|
|
116
|
-
return results
|
|
117
|
-
|
|
118
|
-
async def image_search(self, query: str) -> List[Dict[str, str]]:
|
|
119
|
-
"""
|
|
120
|
-
Perform image search using regex parsing on HTML results.
|
|
121
|
-
"""
|
|
122
|
-
if not query: return []
|
|
123
|
-
|
|
124
|
-
encoded_query = urllib.parse.quote(query)
|
|
125
|
-
base = getattr(self.config, "image_search_base_url", "http://127.0.0.1:8888/search?")
|
|
126
|
-
sep = "&" if "?" in base else "?"
|
|
127
|
-
|
|
128
|
-
# Clean format=json
|
|
129
|
-
base = base.replace("format=json&", "").replace("&format=json", "")
|
|
130
|
-
|
|
131
|
-
if "{query}" in base:
|
|
132
|
-
url = base.replace("{query}", encoded_query)
|
|
133
|
-
url = url.replace("{limit}", "8")
|
|
134
|
-
else:
|
|
135
|
-
url = f"{base}{sep}q={encoded_query}&iax=images&ia=images"
|
|
136
|
-
|
|
137
|
-
logger.info(f"SearchService: Fetching Images {url}")
|
|
138
|
-
|
|
139
|
-
try:
|
|
140
|
-
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
141
|
-
resp = await client.get(url)
|
|
142
|
-
resp.raise_for_status()
|
|
143
|
-
html_content = resp.text
|
|
144
|
-
except Exception as e:
|
|
145
|
-
logger.error(f"Image Search failed: {e}")
|
|
146
|
-
return []
|
|
147
|
-
|
|
148
|
-
# Regex for Images (DuckDuckGo style / Generic)
|
|
149
|
-
# DDG images usually in a script or complex layout.
|
|
150
|
-
# For simplicity in V2 regex approach, we look for common img tags with logical classes or structure
|
|
151
|
-
# OR, since the user's SearXNG likely returns standard HTML list for images too.
|
|
152
|
-
# SearXNG Image results usually: <img src="..." alt="..."> inside a result container.
|
|
153
|
-
# Let's try a generic pattern for SearXNG image results
|
|
154
|
-
|
|
155
|
-
results = []
|
|
156
|
-
# SearXNG pattern: <div class="img-search-result"> ... <img src="URL" ...>
|
|
157
|
-
# Or just look for img tags with src that are http
|
|
158
|
-
|
|
159
|
-
# More robust SearXNG specific regex:
|
|
160
|
-
# Pattern: <img class="image" src="(?P<url>[^"]+)" alt="(?P<title>[^"]+)"
|
|
161
|
-
# This is a guess. Let's try to match standard "result_image" or similar if possible.
|
|
162
|
-
|
|
163
|
-
# Assuming SearXNG:
|
|
164
|
-
# More robust regex to capture images from various engines (SearXNG, Google, Bing)
|
|
165
|
-
# 1. Try generic <img ... src="..."> with http
|
|
166
|
-
# 2. Try to extract alt text if available
|
|
167
|
-
|
|
168
|
-
# Pattern 1: Standard img tag with src
|
|
169
|
-
# We look for src="http..." and optional alt
|
|
170
|
-
image_matches = re.finditer(r'<img[^>]+src=["\'](http[^"\']+)["\'][^>]*>', html_content, re.IGNORECASE)
|
|
171
|
-
|
|
172
|
-
for match in image_matches:
|
|
173
|
-
img_tag = match.group(0)
|
|
174
|
-
img_url = match.group(1)
|
|
175
|
-
|
|
176
|
-
# Extract alt/title
|
|
177
|
-
alt_match = re.search(r'alt=["\']([^"\']*)["\']', img_tag, re.IGNORECASE)
|
|
178
|
-
title = alt_match.group(1) if alt_match else ""
|
|
179
|
-
|
|
180
|
-
# Filter out tiny icons/favicons/data uris if possible
|
|
181
|
-
if "favicon" in img_url or "static" in img_url or "data:image" in img_url:
|
|
182
|
-
continue
|
|
183
|
-
|
|
184
|
-
results.append({
|
|
185
|
-
"title": title or "Image",
|
|
186
|
-
"url": img_url,
|
|
187
|
-
"content": f"Image: {title}"
|
|
188
|
-
})
|
|
189
|
-
|
|
190
|
-
if len(results) >= 8:
|
|
191
|
-
break
|
|
192
|
-
|
|
193
|
-
return results
|
|
@@ -1,141 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: entari_plugin_hyw
|
|
3
|
-
Version: 3.2.105
|
|
4
|
-
Summary: Use large language models to interpret chat messages
|
|
5
|
-
Author-email: kumoSleeping <zjr2992@outlook.com>
|
|
6
|
-
License: MIT
|
|
7
|
-
Project-URL: Homepage, https://github.com/kumoSleeping/entari-plugin-hyw
|
|
8
|
-
Project-URL: Repository, https://github.com/kumoSleeping/entari-plugin-hyw
|
|
9
|
-
Project-URL: Issue Tracker, https://github.com/kumoSleeping/entari-plugin-hyw/issues
|
|
10
|
-
Keywords: entari,llm,ai,bot,chat
|
|
11
|
-
Classifier: Development Status :: 3 - Alpha
|
|
12
|
-
Classifier: Intended Audience :: Developers
|
|
13
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
-
Requires-Python: >=3.10
|
|
18
|
-
Description-Content-Type: text/markdown
|
|
19
|
-
Requires-Dist: arclet-entari[full]>=0.16.5
|
|
20
|
-
Requires-Dist: openai
|
|
21
|
-
Requires-Dist: mcp
|
|
22
|
-
Requires-Dist: httpx
|
|
23
|
-
Requires-Dist: markdown>=3.10
|
|
24
|
-
Requires-Dist: trafilatura>=2.0.0
|
|
25
|
-
Requires-Dist: playwright>=1.56.0
|
|
26
|
-
Provides-Extra: playwright
|
|
27
|
-
Requires-Dist: playwright>=1.56.0; extra == "playwright"
|
|
28
|
-
Requires-Dist: trafilatura>=2.0.0; extra == "playwright"
|
|
29
|
-
Provides-Extra: dev
|
|
30
|
-
Requires-Dist: entari-plugin-server>=0.5.0; extra == "dev"
|
|
31
|
-
Requires-Dist: satori-python-adapter-onebot11>=0.2.5; extra == "dev"
|
|
32
|
-
|
|
33
|
-
<div align="center">
|
|
34
|
-
|
|
35
|
-
# Entari Plugin HYW
|
|
36
|
-
|
|
37
|
-
**Entari 智能聊天解释插件**
|
|
38
|
-
|
|
39
|
-
[](https://opensource.org/licenses/MIT) [](https://pypi.org/project/entari-plugin-hyw/) [](https://www.python.org/downloads/)
|
|
40
|
-
|
|
41
|
-
*IM 环境下的 LLM 智能解释方案*
|
|
42
|
-
|
|
43
|
-
</div>
|
|
44
|
-
|
|
45
|
-
# v3.2迎来大幅度改动、现在图文不符
|
|
46
|
-
|
|
47
|
-
## 🎑 效果展示
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
<div align="center">
|
|
52
|
-
<img src="demo.svg" alt="Chat Demo" width="100%">
|
|
53
|
-
</div>
|
|
54
|
-
|
|
55
|
-
## ✨ 功能特性
|
|
56
|
-
- **关于搜索**:一次性触发 Bing 网页与图片搜索,组合结果后再回应。
|
|
57
|
-
- 给予 `Alconna` 与 `MessageChain` 混合处理, 深度优化触发体验。
|
|
58
|
-
- **网页获取**:使用 Playwright 进行实时页面获取。
|
|
59
|
-
- **多模态理解**:支持图片视觉分析。
|
|
60
|
-
- **上下文感知**:维护对话历史记录,支持连续的多轮对话。
|
|
61
|
-
- `reaction` 表情, 表示任务开始。
|
|
62
|
-
- **OneBot 优化**:针对 OneBot 11 协议深度优化,支持解析 JSON 卡片、引用消息等特殊元素。
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
## 📦 安装
|
|
67
|
-
|
|
68
|
-
### 基础安装
|
|
69
|
-
```bash
|
|
70
|
-
pip install entari-plugin-hyw
|
|
71
|
-
```
|
|
72
|
-
|
|
73
|
-
### 搜索
|
|
74
|
-
默认通过 HTTP 请求搜索引擎(DuckDuckGo,可在配置中自定义完整搜索链接,如 `https://duckduckgo.com/?q={query}`)。
|
|
75
|
-
|
|
76
|
-
## ⚙️ 配置
|
|
77
|
-
|
|
78
|
-
请在 `entari.yml` 中添加以下配置:
|
|
79
|
-
|
|
80
|
-
```yaml
|
|
81
|
-
plugins:
|
|
82
|
-
entari_plugin_hyw:
|
|
83
|
-
# --- 基础设置 ---
|
|
84
|
-
# 触发机器人的命令列表
|
|
85
|
-
command_name_list: ["zssm", "hyw"]
|
|
86
|
-
|
|
87
|
-
# 主 LLM 模型配置(必需), 如 x-ai/grok-4.1-fast:online、perplexity/sonar
|
|
88
|
-
model_name: "gx-ai/grok-4.1-fast:free"
|
|
89
|
-
api_key: "your-api-key"
|
|
90
|
-
|
|
91
|
-
# 默认 https://openrouter.ai/api/v1
|
|
92
|
-
base_url: "openai-compatible-url"
|
|
93
|
-
|
|
94
|
-
# --- 浏览器与搜索 ---
|
|
95
|
-
headless: true
|
|
96
|
-
|
|
97
|
-
# --- 视觉配置 (可选) ---
|
|
98
|
-
# 如果未设置,将回退使用主模型
|
|
99
|
-
vision_model_name: "qwen-vl-plus"
|
|
100
|
-
vision_api_key: "your-vision-api-key"
|
|
101
|
-
vision_base_url: "your-vision_base_url"
|
|
102
|
-
|
|
103
|
-
# --- openai extra_body ---
|
|
104
|
-
extra_body:
|
|
105
|
-
reasoning:
|
|
106
|
-
effort: low
|
|
107
|
-
|
|
108
|
-
# --- 交互体验 ---
|
|
109
|
-
# 是否开启表情反应 (默认: true)
|
|
110
|
-
reaction: true
|
|
111
|
-
|
|
112
|
-
# --- 调试 ---
|
|
113
|
-
save_conversation: false
|
|
114
|
-
```
|
|
115
|
-
|
|
116
|
-
## 📖 使用方法
|
|
117
|
-
|
|
118
|
-
### 基础指令
|
|
119
|
-
使用配置的命令前缀与机器人交互:
|
|
120
|
-
|
|
121
|
-
```text
|
|
122
|
-
hyw 最近LLM有啥新闻, 是不是claude又被秒了
|
|
123
|
-
hyw [图片消息] 里面这人写代码怎么我一句都看不懂
|
|
124
|
-
hyw https://koishi.chat/ 怎么安装
|
|
125
|
-
[回复消息] hyw
|
|
126
|
-
[回复消息<[图片消息]>] hyw -t
|
|
127
|
-
[回复消息] hyw 补充: 这个rf的意思是github用户RF-Tar-Railt
|
|
128
|
-
[回复消息(hyw插件的输出)] /1 详细点描述
|
|
129
|
-
[回复消息(hyw插件的输出>] /那谁有多余解释器?
|
|
130
|
-
```
|
|
131
|
-
|
|
132
|
-
### 选项参数
|
|
133
|
-
- `-t` / `--text`: 强制纯文本模式(跳过图片分析,节省 Token 或时间)。
|
|
134
|
-
|
|
135
|
-
```text
|
|
136
|
-
hyw -t 一大段话。
|
|
137
|
-
```
|
|
138
|
-
|
|
139
|
-
### 引用回复
|
|
140
|
-
支持引用消息进行追问,机器人会自动读取被引用的消息作为上下文:
|
|
141
|
-
- **引用 + 命令**:机器人将理解被引用消息的内容(包括图片)通过 `MessageChain` 操作拼接 `Text`、`Image` 与部分 `Custom`。
|