entari-plugin-hyw 3.3.5__py3-none-any.whl → 3.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of entari-plugin-hyw might be problematic. Click here for more details.
- entari_plugin_hyw/__init__.py +14 -351
- entari_plugin_hyw/assets/libs/tailwind.css +1 -1
- entari_plugin_hyw/assets/tailwind.input.css +1 -1
- entari_plugin_hyw/assets/template.j2 +113 -20
- entari_plugin_hyw/core/config.py +1 -0
- entari_plugin_hyw/core/pipeline.py +131 -103
- entari_plugin_hyw/core/render.py +65 -41
- entari_plugin_hyw/utils/prompts.py +26 -16
- entari_plugin_hyw/utils/search.py +233 -3
- entari_plugin_hyw-3.3.7.dist-info/METADATA +142 -0
- {entari_plugin_hyw-3.3.5.dist-info → entari_plugin_hyw-3.3.7.dist-info}/RECORD +13 -14
- entari_plugin_hyw/core/render.py.bak +0 -926
- entari_plugin_hyw-3.3.5.dist-info/METADATA +0 -142
- {entari_plugin_hyw-3.3.5.dist-info → entari_plugin_hyw-3.3.7.dist-info}/WHEEL +0 -0
- {entari_plugin_hyw-3.3.5.dist-info → entari_plugin_hyw-3.3.7.dist-info}/top_level.txt +0 -0
entari_plugin_hyw/core/render.py
CHANGED
|
@@ -173,7 +173,8 @@ class ContentRenderer:
|
|
|
173
173
|
stats: Dict[str, Any] = None,
|
|
174
174
|
references: List[Dict[str, Any]] = None,
|
|
175
175
|
page_references: List[Dict[str, Any]] = None,
|
|
176
|
-
|
|
176
|
+
image_references: List[Dict[str, Any]] = None, # Added
|
|
177
|
+
stages_used: List[Dict[str, Any]] = None,
|
|
177
178
|
flow_steps: List[Dict[str, Any]] = None,
|
|
178
179
|
model_name: str = "",
|
|
179
180
|
provider_name: str = "Unknown",
|
|
@@ -197,6 +198,9 @@ class ContentRenderer:
|
|
|
197
198
|
# Preprocess to fix common markdown issues
|
|
198
199
|
markdown_content = re.sub(r'(?<=\S)\n(?=\s*(\d+\.|\-|\*|\+) )', r'\n\n', markdown_content)
|
|
199
200
|
|
|
201
|
+
# references, page_references, image_references are already parsed by pipeline
|
|
202
|
+
# No filtering needed here - use them directly
|
|
203
|
+
|
|
200
204
|
# AGGRESSIVE CLEANING: Strip out "References" section and "[code]" blocks from the text
|
|
201
205
|
# because we are rendering them as structured UI elements now.
|
|
202
206
|
|
|
@@ -262,41 +266,21 @@ class ContentRenderer:
|
|
|
262
266
|
|
|
263
267
|
content_html = restore_math(content_html)
|
|
264
268
|
|
|
265
|
-
#
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
if ref_match:
|
|
278
|
-
citation_id = ref_match.group(1)
|
|
279
|
-
parts[i] = f'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-blue-600 bg-blue-50 border border-blue-200 rounded mx-0.5 align-top relative -top-0.5">{citation_id}</span>'
|
|
280
|
-
continue
|
|
281
|
-
# 2. Flow marker: <code>flow:a</code>
|
|
282
|
-
flow_match = re.match(r'^<code.*?>flow:([a-zA-Z])</code>$', part)
|
|
283
|
-
if flow_match:
|
|
284
|
-
flow_id = flow_match.group(1).lower()
|
|
285
|
-
parts[i] = f'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-orange-700 bg-orange-50 border border-orange-200 rounded mx-0.5 align-top relative -top-0.5">{flow_id}</span>'
|
|
286
|
-
continue
|
|
287
|
-
|
|
288
|
-
# If it's NOT a code block, or a code block we didn't transform, we leave it alone.
|
|
289
|
-
# (Previous logic was to regex replace inside non-code blocks. We don't need that anymore
|
|
290
|
-
# because the prompt now enforces code spans).
|
|
291
|
-
content_html = "".join(parts)
|
|
269
|
+
# Convert [search:N] to blue badge
|
|
270
|
+
content_html = re.sub(
|
|
271
|
+
r'\[search:(\d+)\]',
|
|
272
|
+
r'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-blue-600 bg-blue-50 border border-blue-200 rounded mx-0.5 align-top relative -top-0.5">\1</span>',
|
|
273
|
+
content_html
|
|
274
|
+
)
|
|
275
|
+
# Convert [page:N] to orange badge
|
|
276
|
+
content_html = re.sub(
|
|
277
|
+
r'\[page:(\d+)\]',
|
|
278
|
+
r'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-orange-700 bg-orange-50 border border-orange-200 rounded mx-0.5 align-top relative -top-0.5">\1</span>',
|
|
279
|
+
content_html
|
|
280
|
+
)
|
|
292
281
|
|
|
293
|
-
# Strip out the
|
|
294
|
-
|
|
295
|
-
# Make regex robust to any language class or no class
|
|
296
|
-
content_html = re.sub(r'<pre><code[^>]*>[^<]*references[^<]*</code></pre>\s*$', '', content_html, flags=re.DOTALL | re.IGNORECASE)
|
|
297
|
-
# Loop to remove multiple if present
|
|
298
|
-
while re.search(r'<pre><code[^>]*>[^<]*references[^<]*</code></pre>\s*$', content_html, flags=re.DOTALL | re.IGNORECASE):
|
|
299
|
-
content_html = re.sub(r'<pre><code[^>]*>[^<]*references[^<]*</code></pre>\s*$', '', content_html, flags=re.DOTALL | re.IGNORECASE)
|
|
282
|
+
# Strip out the references code block if it leaked into the content
|
|
283
|
+
content_html = re.sub(r'<pre><code[^>]*>.*?references.*?</code></pre>\s*$', '', content_html, flags=re.DOTALL | re.IGNORECASE)
|
|
300
284
|
|
|
301
285
|
# --- PREPARE DATA FOR JINJA TEMPLATE ---
|
|
302
286
|
|
|
@@ -361,6 +345,18 @@ class ContentRenderer:
|
|
|
361
345
|
"favicon_url": f"https://www.google.com/s2/favicons?domain={domain}&sz=32"
|
|
362
346
|
})
|
|
363
347
|
|
|
348
|
+
# 2c. Image Reference Processing
|
|
349
|
+
processed_image_refs = []
|
|
350
|
+
if image_references:
|
|
351
|
+
for ref in image_references[:8]:
|
|
352
|
+
url = ref.get("url", "#")
|
|
353
|
+
processed_image_refs.append({
|
|
354
|
+
"title": ref.get("title", "Image"),
|
|
355
|
+
"url": url,
|
|
356
|
+
"thumbnail": ref.get("thumbnail") or url, # Fallback to url if thumbnail not provided
|
|
357
|
+
"domain": self._get_domain(url) or ref.get("domain") or "image"
|
|
358
|
+
})
|
|
359
|
+
|
|
364
360
|
flow_steps = flow_steps or []
|
|
365
361
|
|
|
366
362
|
if stages_used:
|
|
@@ -404,8 +400,12 @@ class ContentRenderer:
|
|
|
404
400
|
stage_children = {}
|
|
405
401
|
|
|
406
402
|
# References go to "Search"
|
|
407
|
-
|
|
408
|
-
|
|
403
|
+
# Also Image References to "Search"
|
|
404
|
+
if name == "Search":
|
|
405
|
+
if processed_refs:
|
|
406
|
+
stage_children['references'] = processed_refs
|
|
407
|
+
if processed_image_refs:
|
|
408
|
+
stage_children['image_references'] = processed_image_refs
|
|
409
409
|
|
|
410
410
|
# Flow steps go to "Agent"
|
|
411
411
|
if name == "Agent" and flow_steps:
|
|
@@ -425,7 +425,7 @@ class ContentRenderer:
|
|
|
425
425
|
# Pass through Search Queries
|
|
426
426
|
if "queries" in stage:
|
|
427
427
|
stage_children["queries"] = stage["queries"]
|
|
428
|
-
|
|
428
|
+
|
|
429
429
|
# Pass through Crawled Pages
|
|
430
430
|
if "crawled_pages" in stage:
|
|
431
431
|
stage_children["crawled_pages"] = stage["crawled_pages"]
|
|
@@ -441,12 +441,36 @@ class ContentRenderer:
|
|
|
441
441
|
**stage_children # Merge children
|
|
442
442
|
})
|
|
443
443
|
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
444
|
+
# Ensure references are displayed even if no "Search" stage was present
|
|
445
|
+
has_search_stage = any(s.get("name") == "Search" for s in processed_stages)
|
|
446
|
+
if not has_search_stage and (processed_refs or processed_image_refs):
|
|
447
|
+
# Create a virtual Search stage
|
|
448
|
+
virtual_search = {
|
|
449
|
+
"name": "Search",
|
|
450
|
+
"model": "DuckDuckGo", # Default assumption
|
|
451
|
+
"model_short": "DuckDuckGo",
|
|
452
|
+
"provider": "Reference",
|
|
453
|
+
"icon_html": SEARCH_ICON,
|
|
454
|
+
"time_str": "0.00s",
|
|
455
|
+
"cost_str": "$0",
|
|
456
|
+
}
|
|
457
|
+
if processed_refs:
|
|
458
|
+
virtual_search['references'] = processed_refs
|
|
459
|
+
if processed_image_refs:
|
|
460
|
+
virtual_search['image_references'] = processed_image_refs
|
|
461
|
+
|
|
462
|
+
# Insert after Vision/Instruct (usually index 0 or 1), or at start
|
|
463
|
+
insert_idx = 0
|
|
464
|
+
if processed_stages and processed_stages[0]["name"] in ["Vision", "Instruct"]:
|
|
465
|
+
insert_idx = 1
|
|
466
|
+
if len(processed_stages) > 1 and processed_stages[1]["name"] == "Instruct":
|
|
467
|
+
insert_idx = 2
|
|
468
|
+
|
|
469
|
+
processed_stages.insert(insert_idx, virtual_search)
|
|
447
470
|
|
|
448
471
|
# 4. Stats Footer Logic
|
|
449
472
|
processed_stats = {}
|
|
473
|
+
stats_dict = {}
|
|
450
474
|
if stats:
|
|
451
475
|
# Assuming standard 'stats' dict structure, handle list if needed
|
|
452
476
|
if isinstance(stats, list):
|
|
@@ -3,8 +3,8 @@ VISION_SP = """# 你是一个专业的视觉转文字专家.
|
|
|
3
3
|
# 核心任务
|
|
4
4
|
- 智能分析图片内容, 转述成文本, 除此之外不要添加任何内容
|
|
5
5
|
- 文字优先: 若包含清晰文字(文档、截图等), 必须完整准确转录, 不要遗漏.
|
|
6
|
-
- 视觉补充:
|
|
7
|
-
- 用户要求: 根据用户消息中提示侧重转文本的偏向,
|
|
6
|
+
- 视觉补充: 解释完文字后, 描述视觉内容总结(物体、场景、氛围).
|
|
7
|
+
- 用户要求: 根据用户消息中提示侧重转文本的偏向, 若无关联则不理会.
|
|
8
8
|
|
|
9
9
|
## 用户消息
|
|
10
10
|
```text
|
|
@@ -32,7 +32,7 @@ INTRUCT_SP = """# 你是一个专业的指导专家.
|
|
|
32
32
|
{tools_desc}
|
|
33
33
|
|
|
34
34
|
## 你的回复
|
|
35
|
-
|
|
35
|
+
调用工具后无需回复额外文本节省token.
|
|
36
36
|
|
|
37
37
|
## 用户消息
|
|
38
38
|
```
|
|
@@ -53,24 +53,36 @@ AGENT_SP = """# 你是一个 Agent 总控专家, 你需要理解用户意图,
|
|
|
53
53
|
|
|
54
54
|
当前模式: {mode}, {mode_desc}
|
|
55
55
|
|
|
56
|
-
## 最终回复格式要求
|
|
57
|
-
- 直接输出 Markdown 正文.
|
|
58
56
|
|
|
57
|
+
|
|
58
|
+
## 过程要求
|
|
59
59
|
当不调用工具发送文本, 即会变成最终回复, 请遵守:
|
|
60
|
+
- 直接给出一篇报告, 无需回答用户消息
|
|
60
61
|
- 语言: 简体中文, 百科式风格, 语言严谨不啰嗦.
|
|
61
|
-
- 正文格式:
|
|
62
|
+
- 正文格式:
|
|
63
|
+
- 使用 Markdown 格式, 支持 hightlight, katex
|
|
64
|
+
- 最开始给出`# `大标题, 不要有多余废话, 不要直接回答用户的提问.
|
|
65
|
+
- 内容丰富突出重点.
|
|
62
66
|
- 工具引用:
|
|
63
|
-
|
|
64
|
-
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
+
> 重要: 所有正文内容必须基于实际信息, 保证百分百真实度
|
|
68
|
+
- 引用规则:
|
|
69
|
+
- 本次会话中存在对解决此问题有用的信息才加以引用, 不需要的消息可以不引用.
|
|
70
|
+
- 角标必须真实对应上下文中获取的信息, 同时对应 references 中的内容, 图片按顺序对应.
|
|
71
|
+
- 正文中的引用规则
|
|
72
|
+
- 搜索摘要引用: 使用如 [search:3][search:4]
|
|
73
|
+
- 页面内容引用: 使用如 [page:5][page:6]
|
|
74
|
+
- 图片引用: 使用如 [image:7][image:8]
|
|
75
|
+
- search 的意思是你使用 internal_web_search 获取的搜索摘要, 如果没有此工具相关信息则不引用
|
|
76
|
+
- page 的意思是你使用 crawl_page 获取的页面内容, 如果没有此工具相关信息则不引用
|
|
77
|
+
- image 的意思是你使用 internal_image_search 获取的图片, 图片按顺序摆放即可, 你无需显式引用
|
|
67
78
|
- 在正文底部添加 references 代码块:
|
|
68
79
|
- 用不到的条目不写, 没有专家给信息就不写.
|
|
69
80
|
```references
|
|
70
|
-
[
|
|
71
|
-
[
|
|
72
|
-
[
|
|
73
|
-
[
|
|
81
|
+
[2] [search] [文本描述](url)
|
|
82
|
+
[8] [search] [文本描述](url)
|
|
83
|
+
[1] [page] [页面标题](url)
|
|
84
|
+
[2] [page] [页面标题](url)
|
|
85
|
+
[1] [image] [来源](url)
|
|
74
86
|
```
|
|
75
87
|
|
|
76
88
|
## 用户消息
|
|
@@ -79,7 +91,6 @@ AGENT_SP = """# 你是一个 Agent 总控专家, 你需要理解用户意图,
|
|
|
79
91
|
```
|
|
80
92
|
"""
|
|
81
93
|
|
|
82
|
-
# PS: agent 无搜索图片权限
|
|
83
94
|
AGENT_SP_TOOLS_STANDARD_ADD = """
|
|
84
95
|
你需要整合已有的信息, 提炼用户消息中的关键词, 进行最终回复.
|
|
85
96
|
"""
|
|
@@ -127,4 +138,3 @@ AGENT_SP_IMAGE_SEARCH_ADD = """
|
|
|
127
138
|
```
|
|
128
139
|
- 每进行一次 internal_image_search, 挑选 1 张图像插入正文
|
|
129
140
|
"""
|
|
130
|
-
|
|
@@ -1,10 +1,27 @@
|
|
|
1
1
|
import urllib.parse
|
|
2
|
+
import asyncio
|
|
3
|
+
import re
|
|
4
|
+
import html
|
|
2
5
|
from typing import List, Dict, Optional, Any
|
|
3
6
|
from loguru import logger
|
|
4
7
|
from crawl4ai import AsyncWebCrawler
|
|
5
8
|
from crawl4ai.async_configs import CrawlerRunConfig
|
|
6
9
|
from crawl4ai.cache_context import CacheMode
|
|
7
10
|
|
|
11
|
+
# Optional imports for new strategies
|
|
12
|
+
try:
|
|
13
|
+
import httpx
|
|
14
|
+
except ImportError:
|
|
15
|
+
httpx = None
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
from ddgs import DDGS
|
|
19
|
+
except ImportError:
|
|
20
|
+
try:
|
|
21
|
+
from duckduckgo_search import DDGS
|
|
22
|
+
except ImportError:
|
|
23
|
+
DDGS = None
|
|
24
|
+
|
|
8
25
|
# Shared crawler instance to avoid repeated init
|
|
9
26
|
_shared_crawler: Optional[AsyncWebCrawler] = None
|
|
10
27
|
|
|
@@ -28,13 +45,19 @@ async def close_shared_crawler():
|
|
|
28
45
|
|
|
29
46
|
class SearchService:
|
|
30
47
|
"""
|
|
31
|
-
|
|
32
|
-
|
|
48
|
+
Multi-strategy search & fetch service.
|
|
49
|
+
Supported providers: 'crawl4ai' (default), 'httpx', 'ddgs'.
|
|
33
50
|
"""
|
|
34
51
|
def __init__(self, config: Any):
|
|
35
52
|
self.config = config
|
|
36
53
|
self._default_limit = getattr(config, "search_limit", 8)
|
|
37
54
|
self._crawler: Optional[AsyncWebCrawler] = None
|
|
55
|
+
|
|
56
|
+
# Configuration for retries/timeouts
|
|
57
|
+
self._search_timeout = getattr(config, "search_timeout", 10.0)
|
|
58
|
+
self._search_retries = getattr(config, "search_retries", 2)
|
|
59
|
+
self._provider = getattr(config, "search_provider", "crawl4ai")
|
|
60
|
+
logger.info(f"SearchService initialized: provider='{self._provider}', limit={self._default_limit}, timeout={self._search_timeout}s")
|
|
38
61
|
|
|
39
62
|
def _build_search_url(self, query: str) -> str:
|
|
40
63
|
encoded_query = urllib.parse.quote(query)
|
|
@@ -53,8 +76,211 @@ class SearchService:
|
|
|
53
76
|
return f"{base}{sep}q={encoded_query}&iax=images&ia=images"
|
|
54
77
|
|
|
55
78
|
async def search(self, query: str) -> List[Dict[str, str]]:
|
|
79
|
+
"""
|
|
80
|
+
Dispatch search to the configured provider.
|
|
81
|
+
"""
|
|
82
|
+
if not query:
|
|
83
|
+
return []
|
|
84
|
+
|
|
85
|
+
provider = self._provider.lower()
|
|
86
|
+
logger.info(f"SearchService: searching for '{query}' using provider='{provider}'")
|
|
87
|
+
|
|
88
|
+
if provider == "httpx":
|
|
89
|
+
return await self._search_httpx(query)
|
|
90
|
+
elif provider == "ddgs":
|
|
91
|
+
return await self._search_ddgs(query)
|
|
92
|
+
else:
|
|
93
|
+
# Default to crawl4ai for backward compatibility or explicit choice
|
|
94
|
+
return await self._search_crawl4ai(query)
|
|
95
|
+
|
|
96
|
+
async def _search_httpx(self, query: str) -> List[Dict[str, str]]:
|
|
97
|
+
"""
|
|
98
|
+
Directly fetch https://lite.duckduckgo.com/lite/ via httpx and parse HTML.
|
|
99
|
+
Fast, no browser overhead.
|
|
100
|
+
"""
|
|
101
|
+
if not httpx:
|
|
102
|
+
logger.error("SearchService: httpx not installed, fallback to crawl4ai")
|
|
103
|
+
return await self._search_crawl4ai(query)
|
|
104
|
+
|
|
105
|
+
url = self._build_search_url(query)
|
|
106
|
+
|
|
107
|
+
results: List[Dict[str, str]] = []
|
|
108
|
+
try:
|
|
109
|
+
async with httpx.AsyncClient(timeout=self._search_timeout, follow_redirects=True) as client:
|
|
110
|
+
resp = await client.get(url, headers={
|
|
111
|
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"
|
|
112
|
+
})
|
|
113
|
+
resp.raise_for_status()
|
|
114
|
+
html_content = resp.text
|
|
115
|
+
|
|
116
|
+
# Regex parsing for DDG Lite
|
|
117
|
+
snippet_regex = re.compile(r'<td[^>]*>(.*?)</td>', re.DOTALL)
|
|
118
|
+
link_regex = re.compile(r'<a[^>]+href="([^"]+)"[^>]*>(.*?)</a>', re.DOTALL)
|
|
119
|
+
|
|
120
|
+
raw_links = link_regex.findall(html_content)
|
|
121
|
+
|
|
122
|
+
seen = set()
|
|
123
|
+
for href, text in raw_links:
|
|
124
|
+
if len(results) >= self._default_limit:
|
|
125
|
+
break
|
|
126
|
+
|
|
127
|
+
# Clean href
|
|
128
|
+
if "duckduckgo.com" in href:
|
|
129
|
+
if "uddg=" in href:
|
|
130
|
+
parsed = urllib.parse.parse_qs(urllib.parse.urlparse(href).query)
|
|
131
|
+
href = parsed.get("uddg", [href])[0]
|
|
132
|
+
else:
|
|
133
|
+
continue
|
|
134
|
+
|
|
135
|
+
if not href.startswith("http"):
|
|
136
|
+
continue
|
|
137
|
+
|
|
138
|
+
if href in seen:
|
|
139
|
+
continue
|
|
140
|
+
seen.add(href)
|
|
141
|
+
|
|
142
|
+
# Title clean
|
|
143
|
+
title = re.sub(r'<[^>]+>', '', text).strip()
|
|
144
|
+
title = html.unescape(title)
|
|
145
|
+
|
|
146
|
+
results.append({
|
|
147
|
+
"title": title,
|
|
148
|
+
"url": href,
|
|
149
|
+
"domain": urllib.parse.urlparse(href).hostname or "",
|
|
150
|
+
"content": title
|
|
151
|
+
})
|
|
152
|
+
|
|
153
|
+
if not results:
|
|
154
|
+
logger.warning("SearchService(httpx): No results parsed via regex.")
|
|
155
|
+
|
|
156
|
+
return results
|
|
157
|
+
|
|
158
|
+
except Exception as e:
|
|
159
|
+
logger.error(f"SearchService(httpx) failed: {e}")
|
|
160
|
+
return []
|
|
161
|
+
|
|
162
|
+
async def _search_ddgs(self, query: str) -> List[Dict[str, str]]:
|
|
163
|
+
"""
|
|
164
|
+
Use duckduckgo_search library (Sync DDGS).
|
|
165
|
+
Executes in thread pool to allow async usage.
|
|
166
|
+
Supports retries and timeouts.
|
|
167
|
+
"""
|
|
168
|
+
if not DDGS:
|
|
169
|
+
logger.error("SearchService: duckduckgo_search not installed, fallback to crawl4ai")
|
|
170
|
+
return await self._search_crawl4ai(query)
|
|
171
|
+
|
|
172
|
+
def _do_sync_search():
|
|
173
|
+
"""Sync search function to run in thread"""
|
|
174
|
+
results: List[Dict[str, str]] = []
|
|
175
|
+
final_exc = None
|
|
176
|
+
|
|
177
|
+
for attempt in range(self._search_retries + 1):
|
|
178
|
+
try:
|
|
179
|
+
with DDGS(timeout=self._search_timeout) as ddgs:
|
|
180
|
+
# Use positional argument for query to be safe across versions
|
|
181
|
+
ddgs_gen = ddgs.text(
|
|
182
|
+
query,
|
|
183
|
+
region='cn-zh',
|
|
184
|
+
safesearch='moderate',
|
|
185
|
+
max_results=self._default_limit,
|
|
186
|
+
backend="duckduckgo",
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
if ddgs_gen:
|
|
190
|
+
for r in ddgs_gen:
|
|
191
|
+
results.append({
|
|
192
|
+
"title": r.get("title", ""),
|
|
193
|
+
"url": r.get("href", ""),
|
|
194
|
+
"domain": urllib.parse.urlparse(r.get("href", "")).hostname or "",
|
|
195
|
+
"content": r.get("body", "")
|
|
196
|
+
})
|
|
197
|
+
if len(results) >= self._default_limit:
|
|
198
|
+
break
|
|
199
|
+
|
|
200
|
+
return results, None
|
|
201
|
+
|
|
202
|
+
except Exception as e:
|
|
203
|
+
final_exc = e
|
|
204
|
+
if attempt < self._search_retries:
|
|
205
|
+
import time
|
|
206
|
+
time.sleep(1)
|
|
207
|
+
|
|
208
|
+
return [], final_exc
|
|
209
|
+
|
|
210
|
+
# Run sync search in executor
|
|
211
|
+
try:
|
|
212
|
+
results, err = await asyncio.to_thread(_do_sync_search)
|
|
213
|
+
|
|
214
|
+
if err:
|
|
215
|
+
logger.warning(f"SearchService(ddgs) text search failed after retries: {err}")
|
|
216
|
+
return []
|
|
217
|
+
|
|
218
|
+
logger.info(f"SearchService(ddgs): Got {len(results)} text results")
|
|
219
|
+
return results
|
|
220
|
+
|
|
221
|
+
except Exception as e:
|
|
222
|
+
logger.error(f"SearchService(ddgs) thread execution failed: {e}")
|
|
223
|
+
return []
|
|
224
|
+
|
|
225
|
+
async def _search_ddgs_images(self, query: str) -> List[Dict[str, str]]:
|
|
226
|
+
"""
|
|
227
|
+
Use duckduckgo_search library for images.
|
|
228
|
+
"""
|
|
229
|
+
if not DDGS:
|
|
230
|
+
return []
|
|
231
|
+
|
|
232
|
+
def _do_sync_image_search():
|
|
233
|
+
results: List[Dict[str, str]] = []
|
|
234
|
+
final_exc = None
|
|
235
|
+
|
|
236
|
+
for attempt in range(self._search_retries + 1):
|
|
237
|
+
try:
|
|
238
|
+
with DDGS(timeout=self._search_timeout) as ddgs:
|
|
239
|
+
ddgs_gen = ddgs.images(
|
|
240
|
+
query,
|
|
241
|
+
region='cn-zh',
|
|
242
|
+
safesearch='moderate',
|
|
243
|
+
max_results=self._default_limit,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
if ddgs_gen:
|
|
247
|
+
for r in ddgs_gen:
|
|
248
|
+
# DDGS images returns: title, image, thumbnail, url, source, etc.
|
|
249
|
+
# API might differ, adapt to standard format
|
|
250
|
+
results.append({
|
|
251
|
+
"title": r.get("title", "Image"),
|
|
252
|
+
"url": r.get("image", "") or r.get("url", ""), # Full image URL
|
|
253
|
+
"thumbnail": r.get("thumbnail", ""),
|
|
254
|
+
"domain": r.get("source", "") or urllib.parse.urlparse(r.get("url", "")).hostname or "",
|
|
255
|
+
})
|
|
256
|
+
if len(results) >= self._default_limit:
|
|
257
|
+
break
|
|
258
|
+
|
|
259
|
+
return results, None
|
|
260
|
+
except Exception as e:
|
|
261
|
+
final_exc = e
|
|
262
|
+
if attempt < self._search_retries:
|
|
263
|
+
import time
|
|
264
|
+
time.sleep(1)
|
|
265
|
+
|
|
266
|
+
return [], final_exc
|
|
267
|
+
|
|
268
|
+
try:
|
|
269
|
+
results, err = await asyncio.to_thread(_do_sync_image_search)
|
|
270
|
+
if err:
|
|
271
|
+
logger.warning(f"SearchService(ddgs) image search failed: {err}")
|
|
272
|
+
return []
|
|
273
|
+
|
|
274
|
+
logger.info(f"SearchService(ddgs): Got {len(results)} image results")
|
|
275
|
+
return results
|
|
276
|
+
except Exception as e:
|
|
277
|
+
logger.error(f"SearchService(ddgs) image thread failed: {e}")
|
|
278
|
+
return []
|
|
279
|
+
|
|
280
|
+
async def _search_crawl4ai(self, query: str) -> List[Dict[str, str]]:
|
|
56
281
|
"""
|
|
57
282
|
Crawl the configured SERP using Crawl4AI and return parsed results.
|
|
283
|
+
Original implementation.
|
|
58
284
|
"""
|
|
59
285
|
if not query:
|
|
60
286
|
return []
|
|
@@ -192,11 +418,15 @@ class SearchService:
|
|
|
192
418
|
|
|
193
419
|
async def image_search(self, query: str) -> List[Dict[str, str]]:
|
|
194
420
|
"""
|
|
195
|
-
Image search via Crawl4AI media extraction.
|
|
421
|
+
Image search via Crawl4AI media extraction or DDGS.
|
|
196
422
|
"""
|
|
197
423
|
if not query:
|
|
198
424
|
return []
|
|
199
425
|
|
|
426
|
+
# If ddgs is selected, use it
|
|
427
|
+
if self._provider == "ddgs":
|
|
428
|
+
return await self._search_ddgs_images(query)
|
|
429
|
+
|
|
200
430
|
url = self._build_image_url(query)
|
|
201
431
|
logger.info(f"SearchService(Crawl4AI Image): fetching {url}")
|
|
202
432
|
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: entari_plugin_hyw
|
|
3
|
+
Version: 3.3.7
|
|
4
|
+
Summary: Use large language models to interpret chat messages
|
|
5
|
+
Author-email: kumoSleeping <zjr2992@outlook.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/kumoSleeping/entari-plugin-hyw
|
|
8
|
+
Project-URL: Repository, https://github.com/kumoSleeping/entari-plugin-hyw
|
|
9
|
+
Project-URL: Issue Tracker, https://github.com/kumoSleeping/entari-plugin-hyw/issues
|
|
10
|
+
Keywords: entari,llm,ai,bot,chat
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
Requires-Dist: arclet-entari[full]>=0.16.5
|
|
20
|
+
Requires-Dist: openai
|
|
21
|
+
Requires-Dist: httpx
|
|
22
|
+
Requires-Dist: markdown>=3.10
|
|
23
|
+
Requires-Dist: crawl4ai>=0.7.8
|
|
24
|
+
Requires-Dist: jinja2>=3.0
|
|
25
|
+
Requires-Dist: ddgs>=9.10.0
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: entari-plugin-server>=0.5.0; extra == "dev"
|
|
28
|
+
Requires-Dist: satori-python-adapter-onebot11>=0.2.5; extra == "dev"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# Entari Plugin HYW
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
[](https://badge.fury.io/py/entari-plugin-hyw)
|
|
35
|
+
[](https://opensource.org/licenses/MIT)
|
|
36
|
+
[](https://pypi.org/project/entari-plugin-hyw/)
|
|
37
|
+
|
|
38
|
+
**Entari Plugin HYW** is an advanced agentic chat plugin for the [Entari](https://github.com/entari-org/entari) framework. It leverages Large Language Models (LLMs) to provide intelligent, context-aware, and multi-modal responses within instant messaging environments (OneBot 11, Satori).
|
|
39
|
+
|
|
40
|
+
**Entari Plugin HYW** 是 Entari 框架的高级智能体聊天插件。它利用大语言模型(LLM)在即时通讯环境(OneBot 11, Satori)中提供智能、上下文感知和多模态的回复体验。
|
|
41
|
+
|
|
42
|
+
The plugin implements a three-stage pipeline (**Vision**, **Instruct**, **Agent**) to autonomously decide when to search the web, crawl pages, or analyze images to answer user queries effectively.
|
|
43
|
+
|
|
44
|
+
插件实现了三阶段流水线(**视觉**、**指令**、**代理**),能够自主决定何时搜索网络、抓取网页或分析图片,从而高效地回答用户问题。
|
|
45
|
+
|
|
46
|
+
<img src="demo.jpg" width="300" />
|
|
47
|
+
|
|
48
|
+
## Features / 功能特性
|
|
49
|
+
|
|
50
|
+
- 📖 **Agentic Workflow (智能工作流)**
|
|
51
|
+
Autonomous decision-making process to search, browse, and reason.
|
|
52
|
+
具备自主决策能力,能够自动进行搜索、网页浏览和逻辑推理。
|
|
53
|
+
|
|
54
|
+
- 🎑 **Multi-Modal Support (多模态支持)**
|
|
55
|
+
Native support for image analysis using Vision Language Models (VLMs).
|
|
56
|
+
原生支持图片分析,利用视觉语言模型(VLM)理解图像内容。
|
|
57
|
+
|
|
58
|
+
- 🔍 **Web Search & Crawling (搜索与抓取)**
|
|
59
|
+
Integrated **DuckDuckGo** and **Crawl4AI** for real-time information retrieval.
|
|
60
|
+
集成 DuckDuckGo 搜索与 Crawl4AI 网页抓取,实时获取互联网信息。
|
|
61
|
+
|
|
62
|
+
- 🎨 **Rich Rendering (富媒体渲染)**
|
|
63
|
+
Responses are rendered as images containing Markdown, syntax-highlighted code, LaTeX math, and citation badges.
|
|
64
|
+
回答将渲染为包含 Markdown、代码高亮、LaTeX 公式及引用角标的精美图片。
|
|
65
|
+
|
|
66
|
+
- 🔌 **Protocol Support (多协议适配)**
|
|
67
|
+
Deep integration with OneBot 11 and Satori protocols.
|
|
68
|
+
深度适配 OneBot 11 和 Satori 协议,完美处理回复上下文与 JSON 卡片。
|
|
69
|
+
|
|
70
|
+
## Installation / 安装
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
pip install entari-plugin-hyw
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Configuration / 配置
|
|
77
|
+
|
|
78
|
+
Configure the plugin in your `entari.yml`.
|
|
79
|
+
在 `entari.yml` 中进行配置。
|
|
80
|
+
|
|
81
|
+
### Minimal Configuration / 最小配置
|
|
82
|
+
|
|
83
|
+
```yaml
|
|
84
|
+
plugins:
|
|
85
|
+
entari_plugin_hyw:
|
|
86
|
+
# Trigger command / 触发指令
|
|
87
|
+
question_command: ".q"
|
|
88
|
+
|
|
89
|
+
# Main Model (Required) / 主模型(必需)
|
|
90
|
+
model_name: "google/gemini-2.0-flash-exp"
|
|
91
|
+
api_key: "your-api-key-here"
|
|
92
|
+
base_url: "https://generativelanguage.googleapis.com/v1beta/openai/"
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Configuration Reference / 配置详解
|
|
96
|
+
|
|
97
|
+
| Option (选项) | Type | Default | Description (说明) |
|
|
98
|
+
| :--- | :--- | :--- | :--- |
|
|
99
|
+
| **Basic** | | | |
|
|
100
|
+
| `question_command` | `str` | `/q` | The command to trigger the bot. <br> 触发机器人的指令前缀。 |
|
|
101
|
+
| `reaction` | `bool` | `true` | React with emoji on start(now only lagrange ob extension). <br> 收到指令时是否回应表情(目前只支持拉格兰ob扩展)。 |
|
|
102
|
+
| `quote` | `bool` | `true` | Quote the user's message in reply. <br> 回复时是否引用原消息。 |
|
|
103
|
+
| **Models** | | | |
|
|
104
|
+
| `model_name` | `str` | *None* | **Required.** Main Agent model ID. <br> **必需。** 主代理模型 ID。 |
|
|
105
|
+
| `api_key` | `str` | *None* | **Required.** API key. <br> **必需。** API 密钥。 |
|
|
106
|
+
| `base_url` | `str` | `...` | OpenAI-compatible API base URL. <br> 兼容 OpenAI 的 API 地址。 |
|
|
107
|
+
| `extra_body` | `dict` | `null` | Extra parameters (e.g. `reasoning_effort`). <br> 传递给 LLM 的额外参数。 |
|
|
108
|
+
| **Specialized** | | | |
|
|
109
|
+
| `vision_model_name`| `str` | *None* | Model for images. Defaults to `model_name`. <br> 处理图片的模型,默认同主模型。 |
|
|
110
|
+
| `intruct_model_name`| `str` | *None* | Model for intent. Defaults to `model_name`. <br> 意图识别模型,默认同主模型。 |
|
|
111
|
+
| **Tools** | | | |
|
|
112
|
+
| `search_provider` | `str` | `ddgs`| `ddgs` (DuckDuckGo), `crawl4ai`, `httpx`. <br> 搜索后端提供商。 |
|
|
113
|
+
| `search_limit` | `int` | `8` | Max search results. <br> 搜索结果数量限制。 |
|
|
114
|
+
| `headless` | `bool` | `true` | Browser headless mode. <br> 浏览器无头模式。 |
|
|
115
|
+
|
|
116
|
+
## Usage / 使用方法
|
|
117
|
+
|
|
118
|
+
### Commands / 指令
|
|
119
|
+
|
|
120
|
+
- **Text Query (文本问答)**
|
|
121
|
+
```text
|
|
122
|
+
.q What's the latest news on Rust 1.83?
|
|
123
|
+
.q Rust 1.83 有什么新特性?
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
- **Image Analysis (图片分析)**
|
|
127
|
+
*(Send an image with command, or reply to an image)*
|
|
128
|
+
*(发送带图片的指令,或回复一张图片)*
|
|
129
|
+
```text
|
|
130
|
+
.q [Image] Explain this error.
|
|
131
|
+
.q [图片] 解释一下这个报错。
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
- **Follow-up (追问)**
|
|
135
|
+
*Reply to the bot's message to continue the conversation.*
|
|
136
|
+
*直接回复机器人的消息即可进行连续对话。*
|
|
137
|
+
|
|
138
|
+
-----
|
|
139
|
+
|
|
140
|
+
## License
|
|
141
|
+
|
|
142
|
+
This project is licensed under the MIT License.
|