entari-plugin-hyw 3.3.1__py3-none-any.whl → 3.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of entari-plugin-hyw might be problematic. Click here for more details.

Files changed (48) hide show
  1. entari_plugin_hyw/__init__.py +763 -309
  2. entari_plugin_hyw/assets/icon/anthropic.svg +1 -0
  3. entari_plugin_hyw/assets/icon/deepseek.png +0 -0
  4. entari_plugin_hyw/assets/icon/gemini.svg +1 -0
  5. entari_plugin_hyw/assets/icon/google.svg +1 -0
  6. entari_plugin_hyw/assets/icon/grok.png +0 -0
  7. entari_plugin_hyw/assets/icon/microsoft.svg +15 -0
  8. entari_plugin_hyw/assets/icon/minimax.png +0 -0
  9. entari_plugin_hyw/assets/icon/mistral.png +0 -0
  10. entari_plugin_hyw/assets/icon/nvida.png +0 -0
  11. entari_plugin_hyw/assets/icon/openai.svg +1 -0
  12. entari_plugin_hyw/assets/icon/openrouter.png +0 -0
  13. entari_plugin_hyw/assets/icon/perplexity.svg +24 -0
  14. entari_plugin_hyw/assets/icon/qwen.png +0 -0
  15. entari_plugin_hyw/assets/icon/xai.png +0 -0
  16. entari_plugin_hyw/assets/icon/zai.png +0 -0
  17. entari_plugin_hyw/assets/libs/highlight.css +10 -0
  18. entari_plugin_hyw/assets/libs/highlight.js +1213 -0
  19. entari_plugin_hyw/assets/libs/katex-auto-render.js +1 -0
  20. entari_plugin_hyw/assets/libs/katex.css +1 -0
  21. entari_plugin_hyw/assets/libs/katex.js +1 -0
  22. entari_plugin_hyw/assets/libs/tailwind.css +1 -0
  23. entari_plugin_hyw/assets/package-lock.json +953 -0
  24. entari_plugin_hyw/assets/package.json +16 -0
  25. entari_plugin_hyw/assets/tailwind.config.js +12 -0
  26. entari_plugin_hyw/assets/tailwind.input.css +235 -0
  27. entari_plugin_hyw/assets/template.html +157 -0
  28. entari_plugin_hyw/assets/template.html.bak +157 -0
  29. entari_plugin_hyw/assets/template.j2 +307 -0
  30. entari_plugin_hyw/core/__init__.py +0 -0
  31. entari_plugin_hyw/core/config.py +35 -0
  32. entari_plugin_hyw/core/history.py +146 -0
  33. entari_plugin_hyw/core/hyw.py +41 -0
  34. entari_plugin_hyw/core/pipeline.py +1065 -0
  35. entari_plugin_hyw/core/render.py +596 -0
  36. entari_plugin_hyw/core/render.py.bak +926 -0
  37. entari_plugin_hyw/utils/__init__.py +2 -0
  38. entari_plugin_hyw/utils/browser.py +40 -0
  39. entari_plugin_hyw/utils/misc.py +93 -0
  40. entari_plugin_hyw/utils/playwright_tool.py +36 -0
  41. entari_plugin_hyw/utils/prompts.py +129 -0
  42. entari_plugin_hyw/utils/search.py +241 -0
  43. {entari_plugin_hyw-3.3.1.dist-info → entari_plugin_hyw-3.3.2.dist-info}/METADATA +20 -28
  44. entari_plugin_hyw-3.3.2.dist-info/RECORD +46 -0
  45. entari_plugin_hyw/hyw_core.py +0 -700
  46. entari_plugin_hyw-3.3.1.dist-info/RECORD +0 -6
  47. {entari_plugin_hyw-3.3.1.dist-info → entari_plugin_hyw-3.3.2.dist-info}/WHEEL +0 -0
  48. {entari_plugin_hyw-3.3.1.dist-info → entari_plugin_hyw-3.3.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,2 @@
1
+ from .prompts import AGENT_SP
2
+ from .misc import process_onebot_json, process_images
@@ -0,0 +1,40 @@
1
+ from typing import Any
2
+ from loguru import logger
3
+ from crawl4ai import AsyncWebCrawler
4
+ from crawl4ai.async_configs import CrawlerRunConfig
5
+ from crawl4ai.cache_context import CacheMode
6
+
7
+
8
+ class BrowserTool:
9
+ """Crawl4AI-based page fetcher."""
10
+
11
+ def __init__(self, config: Any):
12
+ self.config = config
13
+
14
+ async def navigate(self, url: str) -> str:
15
+ """Fetch URL content via Crawl4AI and return markdown."""
16
+ if not url:
17
+ return "Error: missing url"
18
+ try:
19
+ async with AsyncWebCrawler() as crawler:
20
+ result = await crawler.arun(
21
+ url=url,
22
+ config=CrawlerRunConfig(
23
+ wait_until="networkidle",
24
+ wait_for_images=True,
25
+ cache_mode=CacheMode.BYPASS,
26
+ word_count_threshold=1,
27
+ screenshot=False,
28
+ ),
29
+ )
30
+ if not result.success:
31
+ return f"Error navigating to {url}: {result.error_message or result.status_code}"
32
+
33
+ content = result.markdown or result.extracted_content or result.cleaned_html or result.html or ""
34
+ return content[:8000]
35
+ except Exception as e:
36
+ logger.error(f"HTTP navigation failed: {e}")
37
+ return f"Error navigating to {url}: {e}"
38
+
39
+ async def close(self):
40
+ return None
@@ -0,0 +1,93 @@
1
+ import json
2
+ import base64
3
+ import httpx
4
+ from typing import Dict, Any, List, Optional
5
+ from loguru import logger
6
+ from arclet.entari import MessageChain, Image
7
+ from typing import Tuple
8
+ import asyncio
9
+ from satori.exception import ActionFailed
10
+
11
+ def process_onebot_json(data: Dict[str, Any]) -> str:
12
+ """Process OneBot JSON elements"""
13
+ try:
14
+ if "data" in data:
15
+ json_str = data["data"]
16
+ if isinstance(json_str, str):
17
+ json_str = json_str.replace(""", '"').replace(",", ",")
18
+ content = json.loads(json_str)
19
+ if "meta" in content and "detail_1" in content["meta"]:
20
+ detail = content["meta"]["detail_1"]
21
+ if "desc" in detail and "qqdocurl" in detail:
22
+ return f"[Shared Document] {detail['desc']}: {detail['qqdocurl']}"
23
+ except Exception as e:
24
+ logger.warning(f"Failed to process JSON element: {e}")
25
+ return ""
26
+
27
+
28
+
29
+
30
+
31
+ async def download_image(url: str) -> bytes:
32
+ """下载图片"""
33
+ try:
34
+ async with httpx.AsyncClient(timeout=30.0) as client:
35
+ resp = await client.get(url)
36
+ if resp.status_code == 200:
37
+ return resp.content
38
+ else:
39
+ raise ActionFailed(f"下载图片失败,状态码: {resp.status_code}")
40
+ except Exception as e:
41
+ raise ActionFailed(f"下载图片失败: {url}, 错误: {str(e)}")
42
+
43
+ async def process_images(mc: MessageChain, vision_model: Optional[str] = None) -> Tuple[List[str], Optional[str]]:
44
+ # If vision model is explicitly set to "off", skip image processing
45
+ if vision_model == "off":
46
+ return [], None
47
+
48
+ has_images = bool(mc.get(Image))
49
+ images = []
50
+ if has_images:
51
+ urls = mc[Image].map(lambda x: x.src)
52
+ tasks = [download_image(url) for url in urls]
53
+ raw_images = await asyncio.gather(*tasks)
54
+ import base64
55
+ images = [base64.b64encode(img).decode('utf-8') for img in raw_images]
56
+
57
+ return images, None
58
+
59
+
60
+ def resolve_model_name(name: str, models_config: List[Dict[str, Any]]) -> Tuple[Optional[str], Optional[str]]:
61
+ """
62
+ Resolve a user input model name to the full API model name from config.
63
+ Supports partial matching if unique.
64
+ """
65
+ if not name:
66
+ return None, "No model name provided"
67
+
68
+ name = name.lower()
69
+
70
+ # 1. Exact match (name or id or shortname)
71
+ for m in models_config:
72
+ if m.get("name") == name or m.get("id") == name:
73
+ return m.get("name"), None
74
+
75
+ # 2. Key/Shortcut match
76
+ # Assuming the config might have keys like 'gpt4' mapping to full name
77
+ # But usually models list is [{'name': '...', 'provider': '...'}, ...]
78
+
79
+ # Check if 'name' matches any model 'name' partially?
80
+ # Or just return the name itself if it looks like a valid model ID (contains / or -)
81
+ if "/" in name or "-" in name or "." in name:
82
+ return name, None
83
+
84
+ # If not found in config specific list, and doesn't look like an ID, maybe return error
85
+ # But let's look for partial match in config names
86
+ matches = [m["name"] for m in models_config if name in m.get("name", "").lower()]
87
+ if len(matches) == 1:
88
+ return matches[0], None
89
+ elif len(matches) > 1:
90
+ return None, f"Model name '{name}' is ambiguous. Matches: {', '.join(matches[:3])}..."
91
+
92
+ # Default: assume it's a valid ID passed directly
93
+ return name, None
@@ -0,0 +1,36 @@
1
+ from typing import Any
2
+ from loguru import logger
3
+ from crawl4ai.async_configs import CrawlerRunConfig
4
+ from crawl4ai.cache_context import CacheMode
5
+ from .search import get_shared_crawler
6
+
7
+
8
+ class PlaywrightTool:
9
+ """
10
+ Backwards-compatible wrapper now powered by Crawl4AI.
11
+ """
12
+ def __init__(self, config: Any):
13
+ self.config = config
14
+
15
+ async def navigate(self, url: str) -> str:
16
+ if not url:
17
+ return "Error: Missing url"
18
+
19
+ try:
20
+ crawler = await get_shared_crawler()
21
+ result = await crawler.arun(
22
+ url=url,
23
+ config=CrawlerRunConfig(
24
+ wait_until="networkidle",
25
+ wait_for_images=True,
26
+ cache_mode=CacheMode.BYPASS,
27
+ word_count_threshold=1,
28
+ screenshot=False,
29
+ ),
30
+ )
31
+ if not result.success:
32
+ return f"Error: crawl failed ({result.error_message or result.status_code})"
33
+ return (result.markdown or result.extracted_content or result.cleaned_html or result.html or "")[:8000]
34
+ except Exception as e:
35
+ logger.warning(f"Crawl navigation failed: {e}")
36
+ return f"Error: Crawl navigation failed: {e}"
@@ -0,0 +1,129 @@
1
+ VISION_SP = """# 你是一个专业的视觉转文字专家.
2
+
3
+ # 核心任务
4
+ - 智能分析图片内容, 转述成文本, 除此之外不要添加任何内容
5
+ - 文字优先: 若包含清晰文字(文档、截图等), 必须完整准确转录, 不要遗漏.
6
+ - 视觉补充: 若无文字, 重点描述视觉内容(物体、场景、氛围).
7
+ - 用户要求: 根据用户消息中提示侧重转文本的偏向, 若无或无关联则不理会常规完成.
8
+
9
+ ## 用户消息
10
+ ```text
11
+ {user_msgs}
12
+ ```
13
+ """
14
+
15
+ INTRUCT_SP = """# 你是一个专业的指导专家.
16
+
17
+ ## 核心任务
18
+ - 决定预处理工具:
19
+ - 用户消息包含链接: 调用 crawl_page 获取内容, 无需其他工具
20
+ - 用户消息包含典型名词、可能的专有名词组合: 调用 internal_web_search
21
+ - 提炼出关键词搜索关键词本身, 不添加任何其他助词, 搜索效果最好
22
+ - 同时调用 internal_image_search
23
+ - 用户消息不需要搜索: 不调用工具
24
+ - 调用 set_mode:
25
+ - 绝大部分常规问题: standard
26
+ - 用户要求研究/深度搜索: agent
27
+ - 需要获取页面具体信息才能回答问题: agent
28
+ > 所有工具需要在本次对话同时调用
29
+
30
+ ## 调用工具
31
+ - 使用工具时, 必须通过 function_call / tool_call 机制调用.
32
+ {tools_desc}
33
+
34
+ ## 你的回复
35
+ 调用工具后无需额外文本.
36
+
37
+ ## 用户消息
38
+ ```
39
+ {user_msgs}
40
+ ```
41
+ """
42
+
43
+
44
+ INTRUCT_SP_VISION_ADD = """
45
+ ## 视觉专家消息
46
+ ```text
47
+ {vision_msgs}
48
+ ```
49
+ """
50
+
51
+ AGENT_SP = """# 你是一个 Agent 总控专家, 你需要理解用户意图, 根据已有信息给出最终回复.
52
+ > 请确保你输出的任何消息有着准确的来源, 减少输出错误信息.
53
+
54
+ 当前模式: {mode}, {mode_desc}
55
+
56
+ ## 最终回复格式要求
57
+ - 直接输出 Markdown 正文.
58
+
59
+ 当不调用工具发送文本, 即会变成最终回复, 请遵守:
60
+ - 语言: 简体中文, 百科式风格, 语言严谨不啰嗦.
61
+ - 正文格式: 使用 Markdown格式, [hightlight, katex], 有大标题, 内容丰富突出重点.
62
+ - 工具引用:
63
+ - 搜索摘要引用: 使用 `search:数字id` 如 `search:3`
64
+ - 页面内容引用: 使用 `page:数字id` 如 `page:5`
65
+ - 每个引用必须分开标注
66
+ - 在正文底部添加 references 代码块:
67
+ - 用不到的条目不写, 没有专家给信息就不写.
68
+ ```references
69
+ [1] [search] [文本描述](url)
70
+ [3] [search] [文本描述](url)
71
+ [5] [page] [页面标题](url)
72
+ [7] [page] [页面标题](url)
73
+ ```
74
+
75
+ ## 用户消息
76
+ ```text
77
+ {user_msgs}
78
+ ```
79
+ """
80
+
81
+ # PS: agent 无搜索图片权限
82
+ AGENT_SP_TOOLS_STANDARD_ADD = """
83
+ 你需要整合已有的信息, 提炼用户消息中的关键词, 进行最终回复.
84
+ """
85
+
86
+
87
+ AGENT_SP_TOOLS_AGENT_ADD = """
88
+ - 你现在可以使用工具: {tools_desc}
89
+ - 你需要判断顺序或并发使用工具获取信息:
90
+ - 0-1 次 internal_web_search
91
+ - 0-1 次 internal_image_search (如果用户需要图片, 通常和 internal_web_search 并发执行)
92
+ - 1-2 次 crawl_page
93
+ - 使用工具时, 必须通过 function_call / tool_call 机制调用.
94
+ """
95
+
96
+
97
+
98
+ AGENT_SP_INTRUCT_VISION_ADD = """
99
+ ## 视觉专家消息
100
+ ```text
101
+ {vision_msgs}
102
+ ```
103
+ """
104
+
105
+ AGENT_SP_SEARCH_ADD = """
106
+ ## 搜索专家消息
107
+ ```text
108
+ {search_msgs}
109
+ ```
110
+
111
+
112
+ """
113
+
114
+ AGENT_SP_PAGE_ADD = """
115
+ ## 页面内容专家消息
116
+ ```text
117
+ {page_msgs}
118
+ ```
119
+ - 引用页面内容时, 必须使用 `page:id` 格式
120
+ """
121
+
122
+ AGENT_SP_IMAGE_SEARCH_ADD = """
123
+ ## 图像搜索专家消息
124
+ ```text
125
+ {image_search_msgs}
126
+ ```
127
+ - 每进行一次 internal_image_search, 挑选 1 张图像插入正文
128
+ """
129
+
@@ -0,0 +1,241 @@
1
+ import urllib.parse
2
+ from typing import List, Dict, Optional, Any
3
+ from loguru import logger
4
+ from crawl4ai import AsyncWebCrawler
5
+ from crawl4ai.async_configs import CrawlerRunConfig
6
+ from crawl4ai.cache_context import CacheMode
7
+
8
+ # Shared crawler instance to avoid repeated init
9
+ _shared_crawler: Optional[AsyncWebCrawler] = None
10
+
11
+
12
+ async def get_shared_crawler() -> AsyncWebCrawler:
13
+ global _shared_crawler
14
+ if _shared_crawler is None:
15
+ _shared_crawler = AsyncWebCrawler()
16
+ await _shared_crawler.start()
17
+ return _shared_crawler
18
+
19
+
20
+ async def close_shared_crawler():
21
+ global _shared_crawler
22
+ if _shared_crawler:
23
+ try:
24
+ await _shared_crawler.close()
25
+ except Exception:
26
+ pass
27
+ _shared_crawler = None
28
+
29
+ class SearchService:
30
+ """
31
+ Crawl4AI-backed search & fetch service.
32
+ Uses the configured search engine results page (SERP) URL and parses links from the HTML.
33
+ """
34
+ def __init__(self, config: Any):
35
+ self.config = config
36
+ self._default_limit = 8
37
+ self._crawler: Optional[AsyncWebCrawler] = None
38
+
39
+ def _build_search_url(self, query: str) -> str:
40
+ encoded_query = urllib.parse.quote(query)
41
+ base = getattr(self.config, "search_base_url", "https://lite.duckduckgo.com/lite/?q={query}")
42
+ if "{query}" in base:
43
+ return base.replace("{query}", encoded_query).replace("{limit}", str(self._default_limit))
44
+ sep = "&" if "?" in base else "?"
45
+ return f"{base}{sep}q={encoded_query}"
46
+
47
+ def _build_image_url(self, query: str) -> str:
48
+ encoded_query = urllib.parse.quote(query)
49
+ base = getattr(self.config, "image_search_base_url", "https://duckduckgo.com/?q={query}&iax=images&ia=images")
50
+ if "{query}" in base:
51
+ return base.replace("{query}", encoded_query).replace("{limit}", str(self._default_limit))
52
+ sep = "&" if "?" in base else "?"
53
+ return f"{base}{sep}q={encoded_query}&iax=images&ia=images"
54
+
55
+ async def search(self, query: str) -> List[Dict[str, str]]:
56
+ """
57
+ Crawl the configured SERP using Crawl4AI and return parsed results.
58
+ """
59
+ if not query:
60
+ return []
61
+
62
+ url = self._build_search_url(query)
63
+ logger.info(f"SearchService(Crawl4AI): fetching {url}")
64
+
65
+ try:
66
+ crawler = await self._get_crawler()
67
+ result = await crawler.arun(
68
+ url=url,
69
+ config=CrawlerRunConfig(
70
+ wait_until="domcontentloaded",
71
+ wait_for="article",
72
+ cache_mode=CacheMode.BYPASS,
73
+ word_count_threshold=1,
74
+ screenshot=False,
75
+ capture_console_messages=False,
76
+ capture_network_requests=False,
77
+ ),
78
+ )
79
+ return self._parse_markdown_result(result, limit=self._default_limit)
80
+ except Exception as e:
81
+ logger.error(f"Crawl4AI search failed: {e}")
82
+ return []
83
+
84
+ def _parse_markdown_result(self, result, limit: int = 8) -> List[Dict[str, str]]:
85
+ """Parse Crawl4AI result into search items without manual HTML parsing."""
86
+ md = (result.markdown or result.extracted_content or "").strip()
87
+ lines = [ln.strip() for ln in md.splitlines() if ln.strip()]
88
+ links = result.links.get("external", []) if getattr(result, "links", None) else []
89
+ seen = set()
90
+ results: List[Dict[str, str]] = []
91
+
92
+ def find_snippet(url: str, domain: str) -> str:
93
+ for ln in lines:
94
+ if url in ln or (domain and domain in ln):
95
+ return ln[:400]
96
+ # fallback to first non-empty line
97
+ return lines[0][:400] if lines else ""
98
+
99
+ for link in links:
100
+ url = link.get("href") or ""
101
+ if not url or url in seen:
102
+ continue
103
+ seen.add(url)
104
+ domain = urllib.parse.urlparse(url).hostname or ""
105
+ title = link.get("title") or link.get("text") or url
106
+ snippet = find_snippet(url, domain)
107
+ results.append({
108
+ "title": title.strip(),
109
+ "url": url,
110
+ "domain": domain,
111
+ "content": snippet or title,
112
+ })
113
+ if len(results) >= limit:
114
+ break
115
+
116
+ if not results:
117
+ logger.warning(f"SearchService: no results parsed; md_length={len(md)}, links={len(links)}")
118
+ else:
119
+ logger.info(f"SearchService: parsed {len(results)} results via Crawl4AI links")
120
+ return results
121
+
122
+ async def fetch_page(self, url: str) -> Dict[str, str]:
123
+ """
124
+ Fetch a single page via Crawl4AI and return cleaned markdown/text plus metadata.
125
+ """
126
+ if not url:
127
+ return {"content": "Error: missing url", "title": "Error", "url": ""}
128
+
129
+ try:
130
+ crawler = await self._get_crawler()
131
+ result = await crawler.arun(
132
+ url=url,
133
+ config=CrawlerRunConfig(
134
+ wait_until="networkidle",
135
+ wait_for_images=False, # Faster: skip image loading
136
+ cache_mode=CacheMode.BYPASS,
137
+ word_count_threshold=1,
138
+ screenshot=False,
139
+ capture_console_messages=False,
140
+ capture_network_requests=False,
141
+ ),
142
+ )
143
+ if not result.success:
144
+ return {"content": f"Error: crawl failed ({result.error_message or 'unknown'})", "title": "Error", "url": url}
145
+
146
+ content = result.markdown or result.extracted_content or result.cleaned_html or result.html or ""
147
+ # Extract metadata if available, otherwise fallback
148
+ title = "No Title"
149
+ if result.metadata:
150
+ title = result.metadata.get("title") or result.metadata.get("og:title") or title
151
+
152
+ # If metadata title is missing/generic, try to grab from links or url? No, metadata is best.
153
+ if title == "No Title" and result.links:
154
+ # Minimal fallback not really possible without parsing HTML again or regex
155
+ pass
156
+
157
+ return {
158
+ "content": content[:8000],
159
+ "title": title,
160
+ "url": result.url or url
161
+ }
162
+ except Exception as e:
163
+ logger.error(f"Crawl4AI fetch failed: {e}")
164
+ return {"content": f"Error: crawl failed ({e})", "title": "Error", "url": url}
165
+
166
+ async def _get_crawler(self) -> AsyncWebCrawler:
167
+ # Prefer shared crawler to minimize INIT logs; fall back to local if needed
168
+ try:
169
+ return await get_shared_crawler()
170
+ except Exception as e:
171
+ logger.warning(f"Shared crawler unavailable, creating local: {e}")
172
+ if self._crawler is None:
173
+ self._crawler = AsyncWebCrawler()
174
+ await self._crawler.start()
175
+ return self._crawler
176
+
177
+ async def close(self):
178
+ if self._crawler:
179
+ try:
180
+ await self._crawler.close()
181
+ except Exception:
182
+ pass
183
+ self._crawler = None
184
+
185
+ async def image_search(self, query: str) -> List[Dict[str, str]]:
186
+ """
187
+ Image search via Crawl4AI media extraction.
188
+ """
189
+ if not query:
190
+ return []
191
+
192
+ url = self._build_image_url(query)
193
+ logger.info(f"SearchService(Crawl4AI Image): fetching {url}")
194
+
195
+ try:
196
+ # Use image crawler (text_mode=False) for image search
197
+ crawler = await self._get_crawler()
198
+ result = await crawler.arun(
199
+ url=url,
200
+ config=CrawlerRunConfig(
201
+ wait_until="networkidle",
202
+ wait_for_images=True,
203
+ wait_for="img",
204
+ cache_mode=CacheMode.BYPASS,
205
+ word_count_threshold=1,
206
+ screenshot=False,
207
+ capture_console_messages=False,
208
+ capture_network_requests=False,
209
+ ),
210
+ )
211
+ images = []
212
+ seen = set()
213
+ for img in result.media.get("images", []):
214
+ src = img.get("src") or ""
215
+ if not src:
216
+ continue
217
+ if src.startswith("//"):
218
+ src = "https:" + src
219
+ if not src.startswith("http"):
220
+ continue
221
+ if src in seen:
222
+ continue
223
+ seen.add(src)
224
+ alt = (img.get("alt") or img.get("desc") or "").strip()
225
+ domain = urllib.parse.urlparse(src).hostname or ""
226
+ images.append({
227
+ "title": alt or "Image",
228
+ "url": src,
229
+ "domain": domain,
230
+ "content": alt or "Image",
231
+ })
232
+ if len(images) >= self._default_limit:
233
+ break
234
+ if not images:
235
+ logger.warning(f"SearchService: no images parsed; media_count={len(result.media.get('images', []))}")
236
+ else:
237
+ logger.info(f"SearchService: parsed {len(images)} images via Crawl4AI media")
238
+ return images
239
+ except Exception as e:
240
+ logger.error(f"Crawl4AI image search failed: {e}")
241
+ return []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: entari_plugin_hyw
3
- Version: 3.3.1
3
+ Version: 3.3.2
4
4
  Summary: Use large language models to interpret chat messages
5
5
  Author-email: kumoSleeping <zjr2992@outlook.com>
6
6
  License: MIT
@@ -19,9 +19,9 @@ Description-Content-Type: text/markdown
19
19
  Requires-Dist: arclet-entari[full]>=0.16.5
20
20
  Requires-Dist: openai
21
21
  Requires-Dist: httpx
22
- Provides-Extra: playwright
23
- Requires-Dist: playwright>=1.56.0; extra == "playwright"
24
- Requires-Dist: trafilatura>=2.0.0; extra == "playwright"
22
+ Requires-Dist: markdown>=3.10
23
+ Requires-Dist: crawl4ai>=0.7.8
24
+ Requires-Dist: jinja2>=3.0
25
25
  Provides-Extra: dev
26
26
  Requires-Dist: entari-plugin-server>=0.5.0; extra == "dev"
27
27
  Requires-Dist: satori-python-adapter-onebot11>=0.2.5; extra == "dev"
@@ -38,6 +38,10 @@ Requires-Dist: satori-python-adapter-onebot11>=0.2.5; extra == "dev"
38
38
 
39
39
  </div>
40
40
 
41
+ # v3.3 迎来大幅度改动、现在图文不符
42
+
43
+
44
+
41
45
  ## 🎑 效果展示
42
46
 
43
47
 
@@ -47,12 +51,9 @@ Requires-Dist: satori-python-adapter-onebot11>=0.2.5; extra == "dev"
47
51
  </div>
48
52
 
49
53
  ## ✨ 功能特性
50
- - **关于搜索**:
51
- - 如果不设置 jina token, 模型会根据提示词优先使用 jina / playwright(成功率较低) 获取渲染 bing / google 混合搜索结果。
52
- - 存在 jina token 时,模型会获得一个 web search 工具,~~但我没试过我喜欢白嫖~~。
53
- - 也可以 OpenRouter 的 `:online` 参数,该参数会优先使用模型提供商的搜索、其次 `exa`(较贵) 进行网页搜索。
54
- - 给予 `Alconna` 与 `MessageChain` 混合处理, 深度优化触发体验`。
55
- - **网页获取**:支持通过 **Jina AI** 或 **Playwright** 进行实时页面获取。
54
+ - **关于搜索**:一次性触发 Bing 网页与图片搜索,组合结果后再回应。
55
+ - 给予 `Alconna` `MessageChain` 混合处理, 深度优化触发体验。
56
+ - **网页获取**:使用 Playwright 进行实时页面获取。
56
57
  - **多模态理解**:支持图片视觉分析。
57
58
  - **上下文感知**:维护对话历史记录,支持连续的多轮对话。
58
59
  - `reaction` 表情, 表示任务开始。
@@ -67,12 +68,8 @@ Requires-Dist: satori-python-adapter-onebot11>=0.2.5; extra == "dev"
67
68
  pip install entari-plugin-hyw
68
69
  ```
69
70
 
70
- ### 启用 Playwright 支持
71
- 如果你希望使用 Playwright 进行本地网页渲染(而非仅使用 Jina AI):
72
- ```bash
73
- pip install entari-plugin-hyw[playwright]
74
- playwright install chromium
75
- ```
71
+ ### 搜索
72
+ 默认通过 HTTP 请求搜索引擎(DuckDuckGo,可在配置中自定义完整搜索链接,如 `https://duckduckgo.com/?q={query}`)。
76
73
 
77
74
  ## ⚙️ 配置
78
75
 
@@ -86,7 +83,6 @@ plugins:
86
83
  command_name_list: ["zssm", "hyw"]
87
84
 
88
85
  # 主 LLM 模型配置(必需), 如 x-ai/grok-4.1-fast:online、perplexity/sonar
89
- # 如果模型不自带搜索 模型会根据提示词优先使用 jina / playwright(成功率较低) 获取渲染 bing / google 混合搜索结果
90
86
  model_name: "gx-ai/grok-4.1-fast:free"
91
87
  api_key: "your-api-key"
92
88
 
@@ -94,19 +90,8 @@ plugins:
94
90
  base_url: "openai-compatible-url"
95
91
 
96
92
  # --- 浏览器与搜索 ---
97
- # 网页浏览工具: "jina" (默认) 或 "playwright"
98
- browser_tool: "jina"
99
-
100
- # 可选: Jina AI API Key (配置以获得更高限额)(免费方案20/min)
101
- # 配置此项同时会启用 web search 工具
102
- jina_api_key: "jina_..."
103
-
104
- # Playwright 设置
105
93
  headless: true
106
94
 
107
- # 浏览器回退: 当首选 browser_tool 失败时,尝试使用备用 browser_tool (默认: false)
108
- enable_browser_fallback: false
109
-
110
95
  # --- 视觉配置 (可选) ---
111
96
  # 如果未设置,将回退使用主模型
112
97
  vision_model_name: "qwen-vl-plus"
@@ -118,6 +103,10 @@ plugins:
118
103
  reasoning:
119
104
  effort: low
120
105
 
106
+ # --- 交互体验 ---
107
+ # 是否开启表情反应 (默认: true)
108
+ reaction: true
109
+
121
110
  # --- 调试 ---
122
111
  save_conversation: false
123
112
  ```
@@ -148,3 +137,6 @@ hyw -t 一大段话。
148
137
  ### 引用回复
149
138
  支持引用消息进行追问,机器人会自动读取被引用的消息作为上下文:
150
139
  - **引用 + 命令**:机器人将理解被引用消息的内容(包括图片)通过 `MessageChain` 操作拼接 `Text`、`Image` 与部分 `Custom`。
140
+
141
+ UncleCode. (2024). Crawl4AI: Open-source LLM Friendly Web Crawler & Scraper [Computer software].
142
+ GitHub. https://github.com/unclecode/crawl4ai