entari-plugin-hyw 3.3.5__py3-none-any.whl → 3.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of entari-plugin-hyw might be problematic. Click here for more details.

@@ -173,7 +173,8 @@ class ContentRenderer:
173
173
  stats: Dict[str, Any] = None,
174
174
  references: List[Dict[str, Any]] = None,
175
175
  page_references: List[Dict[str, Any]] = None,
176
- stages_used: List[Dict[str, Any]] = None,
176
+ image_references: List[Dict[str, Any]] = None, # Added
177
+ stages_used: List[Dict[str, Any]] = None,
177
178
  flow_steps: List[Dict[str, Any]] = None,
178
179
  model_name: str = "",
179
180
  provider_name: str = "Unknown",
@@ -197,6 +198,9 @@ class ContentRenderer:
197
198
  # Preprocess to fix common markdown issues
198
199
  markdown_content = re.sub(r'(?<=\S)\n(?=\s*(\d+\.|\-|\*|\+) )', r'\n\n', markdown_content)
199
200
 
201
+ # references, page_references, image_references are already parsed by pipeline
202
+ # No filtering needed here - use them directly
203
+
200
204
  # AGGRESSIVE CLEANING: Strip out "References" section and "[code]" blocks from the text
201
205
  # because we are rendering them as structured UI elements now.
202
206
 
@@ -262,41 +266,21 @@ class ContentRenderer:
262
266
 
263
267
  content_html = restore_math(content_html)
264
268
 
265
- # Post-process to style citation markers
266
- # We split by code blocks to avoid messing up real code, BUT our citations ARE code blocks now.
267
- # So we need to look at the code blocks themselves.
268
- parts = re.split(r'(<code.*?>.*?</code>)', content_html, flags=re.DOTALL)
269
- for i, part in enumerate(parts):
270
- # Check if this part is a code block containing our specific citation format
271
- if part.startswith('<code'):
272
- # Match <code>ref:123</code>
273
- # Note: attributes like class might be present if we are unlucky, but `ref:` inside usually means inline code.
274
-
275
- # 1. Numeric: <code>ref:123</code>
276
- ref_match = re.match(r'^<code.*?>ref:(\d+)</code>$', part)
277
- if ref_match:
278
- citation_id = ref_match.group(1)
279
- parts[i] = f'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-blue-600 bg-blue-50 border border-blue-200 rounded mx-0.5 align-top relative -top-0.5">{citation_id}</span>'
280
- continue
281
- # 2. Flow marker: <code>flow:a</code>
282
- flow_match = re.match(r'^<code.*?>flow:([a-zA-Z])</code>$', part)
283
- if flow_match:
284
- flow_id = flow_match.group(1).lower()
285
- parts[i] = f'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-orange-700 bg-orange-50 border border-orange-200 rounded mx-0.5 align-top relative -top-0.5">{flow_id}</span>'
286
- continue
287
-
288
- # If it's NOT a code block, or a code block we didn't transform, we leave it alone.
289
- # (Previous logic was to regex replace inside non-code blocks. We don't need that anymore
290
- # because the prompt now enforces code spans).
291
- content_html = "".join(parts)
269
+ # Convert [search:N] to blue badge
270
+ content_html = re.sub(
271
+ r'\[search:(\d+)\]',
272
+ r'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-blue-600 bg-blue-50 border border-blue-200 rounded mx-0.5 align-top relative -top-0.5">\1</span>',
273
+ content_html
274
+ )
275
+ # Convert [page:N] to orange badge
276
+ content_html = re.sub(
277
+ r'\[page:(\d+)\]',
278
+ r'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-orange-700 bg-orange-50 border border-orange-200 rounded mx-0.5 align-top relative -top-0.5">\1</span>',
279
+ content_html
280
+ )
292
281
 
293
- # Strip out the structured JSON blocks if they leaked into the content
294
- # Look for <pre>... containing "references" at the end
295
- # Make regex robust to any language class or no class
296
- content_html = re.sub(r'<pre><code[^>]*>[^<]*references[^<]*</code></pre>\s*$', '', content_html, flags=re.DOTALL | re.IGNORECASE)
297
- # Loop to remove multiple if present
298
- while re.search(r'<pre><code[^>]*>[^<]*references[^<]*</code></pre>\s*$', content_html, flags=re.DOTALL | re.IGNORECASE):
299
- content_html = re.sub(r'<pre><code[^>]*>[^<]*references[^<]*</code></pre>\s*$', '', content_html, flags=re.DOTALL | re.IGNORECASE)
282
+ # Strip out the references code block if it leaked into the content
283
+ content_html = re.sub(r'<pre><code[^>]*>.*?references.*?</code></pre>\s*$', '', content_html, flags=re.DOTALL | re.IGNORECASE)
300
284
 
301
285
  # --- PREPARE DATA FOR JINJA TEMPLATE ---
302
286
 
@@ -361,6 +345,18 @@ class ContentRenderer:
361
345
  "favicon_url": f"https://www.google.com/s2/favicons?domain={domain}&sz=32"
362
346
  })
363
347
 
348
+ # 2c. Image Reference Processing
349
+ processed_image_refs = []
350
+ if image_references:
351
+ for ref in image_references[:8]:
352
+ url = ref.get("url", "#")
353
+ processed_image_refs.append({
354
+ "title": ref.get("title", "Image"),
355
+ "url": url,
356
+ "thumbnail": ref.get("thumbnail") or url, # Fallback to url if thumbnail not provided
357
+ "domain": self._get_domain(url) or ref.get("domain") or "image"
358
+ })
359
+
364
360
  flow_steps = flow_steps or []
365
361
 
366
362
  if stages_used:
@@ -404,8 +400,12 @@ class ContentRenderer:
404
400
  stage_children = {}
405
401
 
406
402
  # References go to "Search"
407
- if name == "Search" and processed_refs:
408
- stage_children['references'] = processed_refs
403
+ # Also Image References to "Search"
404
+ if name == "Search":
405
+ if processed_refs:
406
+ stage_children['references'] = processed_refs
407
+ if processed_image_refs:
408
+ stage_children['image_references'] = processed_image_refs
409
409
 
410
410
  # Flow steps go to "Agent"
411
411
  if name == "Agent" and flow_steps:
@@ -425,7 +425,7 @@ class ContentRenderer:
425
425
  # Pass through Search Queries
426
426
  if "queries" in stage:
427
427
  stage_children["queries"] = stage["queries"]
428
-
428
+
429
429
  # Pass through Crawled Pages
430
430
  if "crawled_pages" in stage:
431
431
  stage_children["crawled_pages"] = stage["crawled_pages"]
@@ -441,12 +441,36 @@ class ContentRenderer:
441
441
  **stage_children # Merge children
442
442
  })
443
443
 
444
-
445
-
446
-
444
+ # Ensure references are displayed even if no "Search" stage was present
445
+ has_search_stage = any(s.get("name") == "Search" for s in processed_stages)
446
+ if not has_search_stage and (processed_refs or processed_image_refs):
447
+ # Create a virtual Search stage
448
+ virtual_search = {
449
+ "name": "Search",
450
+ "model": "DuckDuckGo", # Default assumption
451
+ "model_short": "DuckDuckGo",
452
+ "provider": "Reference",
453
+ "icon_html": SEARCH_ICON,
454
+ "time_str": "0.00s",
455
+ "cost_str": "$0",
456
+ }
457
+ if processed_refs:
458
+ virtual_search['references'] = processed_refs
459
+ if processed_image_refs:
460
+ virtual_search['image_references'] = processed_image_refs
461
+
462
+ # Insert after Vision/Instruct (usually index 0 or 1), or at start
463
+ insert_idx = 0
464
+ if processed_stages and processed_stages[0]["name"] in ["Vision", "Instruct"]:
465
+ insert_idx = 1
466
+ if len(processed_stages) > 1 and processed_stages[1]["name"] == "Instruct":
467
+ insert_idx = 2
468
+
469
+ processed_stages.insert(insert_idx, virtual_search)
447
470
 
448
471
  # 4. Stats Footer Logic
449
472
  processed_stats = {}
473
+ stats_dict = {}
450
474
  if stats:
451
475
  # Assuming standard 'stats' dict structure, handle list if needed
452
476
  if isinstance(stats, list):
@@ -3,8 +3,8 @@ VISION_SP = """# 你是一个专业的视觉转文字专家.
3
3
  # 核心任务
4
4
  - 智能分析图片内容, 转述成文本, 除此之外不要添加任何内容
5
5
  - 文字优先: 若包含清晰文字(文档、截图等), 必须完整准确转录, 不要遗漏.
6
- - 视觉补充: 若无文字, 重点描述视觉内容(物体、场景、氛围).
7
- - 用户要求: 根据用户消息中提示侧重转文本的偏向, 若无或无关联则不理会常规完成.
6
+ - 视觉补充: 解释完文字后, 描述视觉内容总结(物体、场景、氛围).
7
+ - 用户要求: 根据用户消息中提示侧重转文本的偏向, 若无关联则不理会.
8
8
 
9
9
  ## 用户消息
10
10
  ```text
@@ -32,7 +32,7 @@ INTRUCT_SP = """# 你是一个专业的指导专家.
32
32
  {tools_desc}
33
33
 
34
34
  ## 你的回复
35
- 调用工具后无需额外文本.
35
+ 调用工具后无需回复额外文本节省token.
36
36
 
37
37
  ## 用户消息
38
38
  ```
@@ -53,24 +53,36 @@ AGENT_SP = """# 你是一个 Agent 总控专家, 你需要理解用户意图,
53
53
 
54
54
  当前模式: {mode}, {mode_desc}
55
55
 
56
- ## 最终回复格式要求
57
- - 直接输出 Markdown 正文.
58
56
 
57
+
58
+ ## 过程要求
59
59
  当不调用工具发送文本, 即会变成最终回复, 请遵守:
60
+ - 直接给出一篇报告, 无需回答用户消息
60
61
  - 语言: 简体中文, 百科式风格, 语言严谨不啰嗦.
61
- - 正文格式: 使用 Markdown格式, [hightlight, katex], 有大标题, 内容丰富突出重点.
62
+ - 正文格式:
63
+ - 使用 Markdown 格式, 支持 hightlight, katex
64
+ - 最开始给出`# `大标题, 不要有多余废话, 不要直接回答用户的提问.
65
+ - 内容丰富突出重点.
62
66
  - 工具引用:
63
- - 搜索摘要引用: 使用 `search:数字id` 如 `search:3`
64
- - 页面内容引用: 使用 `page:数字id` 如 `page:5`
65
- - 图片内容不引用
66
- - 每个引用必须分开标注
67
+ > 重要: 所有正文内容必须基于实际信息, 保证百分百真实度
68
+ - 引用规则:
69
+ - 本次会话中存在对解决此问题有用的信息才加以引用, 不需要的消息可以不引用.
70
+ - 角标必须真实对应上下文中获取的信息, 同时对应 references 中的内容, 图片按顺序对应.
71
+ - 正文中的引用规则
72
+ - 搜索摘要引用: 使用如 [search:3][search:4]
73
+ - 页面内容引用: 使用如 [page:5][page:6]
74
+ - 图片引用: 使用如 [image:7][image:8]
75
+ - search 的意思是你使用 internal_web_search 获取的搜索摘要, 如果没有此工具相关信息则不引用
76
+ - page 的意思是你使用 crawl_page 获取的页面内容, 如果没有此工具相关信息则不引用
77
+ - image 的意思是你使用 internal_image_search 获取的图片, 图片按顺序摆放即可, 你无需显式引用
67
78
  - 在正文底部添加 references 代码块:
68
79
  - 用不到的条目不写, 没有专家给信息就不写.
69
80
  ```references
70
- [1] [search] [文本描述](url)
71
- [3] [search] [文本描述](url)
72
- [5] [page] [页面标题](url)
73
- [7] [page] [页面标题](url)
81
+ [2] [search] [文本描述](url)
82
+ [8] [search] [文本描述](url)
83
+ [1] [page] [页面标题](url)
84
+ [2] [page] [页面标题](url)
85
+ [1] [image] [来源](url)
74
86
  ```
75
87
 
76
88
  ## 用户消息
@@ -79,7 +91,6 @@ AGENT_SP = """# 你是一个 Agent 总控专家, 你需要理解用户意图,
79
91
  ```
80
92
  """
81
93
 
82
- # PS: agent 无搜索图片权限
83
94
  AGENT_SP_TOOLS_STANDARD_ADD = """
84
95
  你需要整合已有的信息, 提炼用户消息中的关键词, 进行最终回复.
85
96
  """
@@ -127,4 +138,3 @@ AGENT_SP_IMAGE_SEARCH_ADD = """
127
138
  ```
128
139
  - 每进行一次 internal_image_search, 挑选 1 张图像插入正文
129
140
  """
130
-
@@ -1,10 +1,27 @@
1
1
  import urllib.parse
2
+ import asyncio
3
+ import re
4
+ import html
2
5
  from typing import List, Dict, Optional, Any
3
6
  from loguru import logger
4
7
  from crawl4ai import AsyncWebCrawler
5
8
  from crawl4ai.async_configs import CrawlerRunConfig
6
9
  from crawl4ai.cache_context import CacheMode
7
10
 
11
+ # Optional imports for new strategies
12
+ try:
13
+ import httpx
14
+ except ImportError:
15
+ httpx = None
16
+
17
+ try:
18
+ from ddgs import DDGS
19
+ except ImportError:
20
+ try:
21
+ from duckduckgo_search import DDGS
22
+ except ImportError:
23
+ DDGS = None
24
+
8
25
  # Shared crawler instance to avoid repeated init
9
26
  _shared_crawler: Optional[AsyncWebCrawler] = None
10
27
 
@@ -28,13 +45,19 @@ async def close_shared_crawler():
28
45
 
29
46
  class SearchService:
30
47
  """
31
- Crawl4AI-backed search & fetch service.
32
- Uses the configured search engine results page (SERP) URL and parses links from the HTML.
48
+ Multi-strategy search & fetch service.
49
+ Supported providers: 'crawl4ai' (default), 'httpx', 'ddgs'.
33
50
  """
34
51
  def __init__(self, config: Any):
35
52
  self.config = config
36
53
  self._default_limit = getattr(config, "search_limit", 8)
37
54
  self._crawler: Optional[AsyncWebCrawler] = None
55
+
56
+ # Configuration for retries/timeouts
57
+ self._search_timeout = getattr(config, "search_timeout", 10.0)
58
+ self._search_retries = getattr(config, "search_retries", 2)
59
+ self._provider = getattr(config, "search_provider", "crawl4ai")
60
+ logger.info(f"SearchService initialized: provider='{self._provider}', limit={self._default_limit}, timeout={self._search_timeout}s")
38
61
 
39
62
  def _build_search_url(self, query: str) -> str:
40
63
  encoded_query = urllib.parse.quote(query)
@@ -53,8 +76,211 @@ class SearchService:
53
76
  return f"{base}{sep}q={encoded_query}&iax=images&ia=images"
54
77
 
55
78
  async def search(self, query: str) -> List[Dict[str, str]]:
79
+ """
80
+ Dispatch search to the configured provider.
81
+ """
82
+ if not query:
83
+ return []
84
+
85
+ provider = self._provider.lower()
86
+ logger.info(f"SearchService: searching for '{query}' using provider='{provider}'")
87
+
88
+ if provider == "httpx":
89
+ return await self._search_httpx(query)
90
+ elif provider == "ddgs":
91
+ return await self._search_ddgs(query)
92
+ else:
93
+ # Default to crawl4ai for backward compatibility or explicit choice
94
+ return await self._search_crawl4ai(query)
95
+
96
+ async def _search_httpx(self, query: str) -> List[Dict[str, str]]:
97
+ """
98
+ Directly fetch https://lite.duckduckgo.com/lite/ via httpx and parse HTML.
99
+ Fast, no browser overhead.
100
+ """
101
+ if not httpx:
102
+ logger.error("SearchService: httpx not installed, fallback to crawl4ai")
103
+ return await self._search_crawl4ai(query)
104
+
105
+ url = self._build_search_url(query)
106
+
107
+ results: List[Dict[str, str]] = []
108
+ try:
109
+ async with httpx.AsyncClient(timeout=self._search_timeout, follow_redirects=True) as client:
110
+ resp = await client.get(url, headers={
111
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"
112
+ })
113
+ resp.raise_for_status()
114
+ html_content = resp.text
115
+
116
+ # Regex parsing for DDG Lite
117
+ snippet_regex = re.compile(r'<td[^>]*>(.*?)</td>', re.DOTALL)
118
+ link_regex = re.compile(r'<a[^>]+href="([^"]+)"[^>]*>(.*?)</a>', re.DOTALL)
119
+
120
+ raw_links = link_regex.findall(html_content)
121
+
122
+ seen = set()
123
+ for href, text in raw_links:
124
+ if len(results) >= self._default_limit:
125
+ break
126
+
127
+ # Clean href
128
+ if "duckduckgo.com" in href:
129
+ if "uddg=" in href:
130
+ parsed = urllib.parse.parse_qs(urllib.parse.urlparse(href).query)
131
+ href = parsed.get("uddg", [href])[0]
132
+ else:
133
+ continue
134
+
135
+ if not href.startswith("http"):
136
+ continue
137
+
138
+ if href in seen:
139
+ continue
140
+ seen.add(href)
141
+
142
+ # Title clean
143
+ title = re.sub(r'<[^>]+>', '', text).strip()
144
+ title = html.unescape(title)
145
+
146
+ results.append({
147
+ "title": title,
148
+ "url": href,
149
+ "domain": urllib.parse.urlparse(href).hostname or "",
150
+ "content": title
151
+ })
152
+
153
+ if not results:
154
+ logger.warning("SearchService(httpx): No results parsed via regex.")
155
+
156
+ return results
157
+
158
+ except Exception as e:
159
+ logger.error(f"SearchService(httpx) failed: {e}")
160
+ return []
161
+
162
+ async def _search_ddgs(self, query: str) -> List[Dict[str, str]]:
163
+ """
164
+ Use duckduckgo_search library (Sync DDGS).
165
+ Executes in thread pool to allow async usage.
166
+ Supports retries and timeouts.
167
+ """
168
+ if not DDGS:
169
+ logger.error("SearchService: duckduckgo_search not installed, fallback to crawl4ai")
170
+ return await self._search_crawl4ai(query)
171
+
172
+ def _do_sync_search():
173
+ """Sync search function to run in thread"""
174
+ results: List[Dict[str, str]] = []
175
+ final_exc = None
176
+
177
+ for attempt in range(self._search_retries + 1):
178
+ try:
179
+ with DDGS(timeout=self._search_timeout) as ddgs:
180
+ # Use positional argument for query to be safe across versions
181
+ ddgs_gen = ddgs.text(
182
+ query,
183
+ region='cn-zh',
184
+ safesearch='moderate',
185
+ max_results=self._default_limit,
186
+ backend="duckduckgo",
187
+ )
188
+
189
+ if ddgs_gen:
190
+ for r in ddgs_gen:
191
+ results.append({
192
+ "title": r.get("title", ""),
193
+ "url": r.get("href", ""),
194
+ "domain": urllib.parse.urlparse(r.get("href", "")).hostname or "",
195
+ "content": r.get("body", "")
196
+ })
197
+ if len(results) >= self._default_limit:
198
+ break
199
+
200
+ return results, None
201
+
202
+ except Exception as e:
203
+ final_exc = e
204
+ if attempt < self._search_retries:
205
+ import time
206
+ time.sleep(1)
207
+
208
+ return [], final_exc
209
+
210
+ # Run sync search in executor
211
+ try:
212
+ results, err = await asyncio.to_thread(_do_sync_search)
213
+
214
+ if err:
215
+ logger.warning(f"SearchService(ddgs) text search failed after retries: {err}")
216
+ return []
217
+
218
+ logger.info(f"SearchService(ddgs): Got {len(results)} text results")
219
+ return results
220
+
221
+ except Exception as e:
222
+ logger.error(f"SearchService(ddgs) thread execution failed: {e}")
223
+ return []
224
+
225
+ async def _search_ddgs_images(self, query: str) -> List[Dict[str, str]]:
226
+ """
227
+ Use duckduckgo_search library for images.
228
+ """
229
+ if not DDGS:
230
+ return []
231
+
232
+ def _do_sync_image_search():
233
+ results: List[Dict[str, str]] = []
234
+ final_exc = None
235
+
236
+ for attempt in range(self._search_retries + 1):
237
+ try:
238
+ with DDGS(timeout=self._search_timeout) as ddgs:
239
+ ddgs_gen = ddgs.images(
240
+ query,
241
+ region='cn-zh',
242
+ safesearch='moderate',
243
+ max_results=self._default_limit,
244
+ )
245
+
246
+ if ddgs_gen:
247
+ for r in ddgs_gen:
248
+ # DDGS images returns: title, image, thumbnail, url, source, etc.
249
+ # API might differ, adapt to standard format
250
+ results.append({
251
+ "title": r.get("title", "Image"),
252
+ "url": r.get("image", "") or r.get("url", ""), # Full image URL
253
+ "thumbnail": r.get("thumbnail", ""),
254
+ "domain": r.get("source", "") or urllib.parse.urlparse(r.get("url", "")).hostname or "",
255
+ })
256
+ if len(results) >= self._default_limit:
257
+ break
258
+
259
+ return results, None
260
+ except Exception as e:
261
+ final_exc = e
262
+ if attempt < self._search_retries:
263
+ import time
264
+ time.sleep(1)
265
+
266
+ return [], final_exc
267
+
268
+ try:
269
+ results, err = await asyncio.to_thread(_do_sync_image_search)
270
+ if err:
271
+ logger.warning(f"SearchService(ddgs) image search failed: {err}")
272
+ return []
273
+
274
+ logger.info(f"SearchService(ddgs): Got {len(results)} image results")
275
+ return results
276
+ except Exception as e:
277
+ logger.error(f"SearchService(ddgs) image thread failed: {e}")
278
+ return []
279
+
280
+ async def _search_crawl4ai(self, query: str) -> List[Dict[str, str]]:
56
281
  """
57
282
  Crawl the configured SERP using Crawl4AI and return parsed results.
283
+ Original implementation.
58
284
  """
59
285
  if not query:
60
286
  return []
@@ -192,11 +418,15 @@ class SearchService:
192
418
 
193
419
  async def image_search(self, query: str) -> List[Dict[str, str]]:
194
420
  """
195
- Image search via Crawl4AI media extraction.
421
+ Image search via Crawl4AI media extraction or DDGS.
196
422
  """
197
423
  if not query:
198
424
  return []
199
425
 
426
+ # If ddgs is selected, use it
427
+ if self._provider == "ddgs":
428
+ return await self._search_ddgs_images(query)
429
+
200
430
  url = self._build_image_url(query)
201
431
  logger.info(f"SearchService(Crawl4AI Image): fetching {url}")
202
432
 
@@ -0,0 +1,142 @@
1
+ Metadata-Version: 2.4
2
+ Name: entari_plugin_hyw
3
+ Version: 3.3.7
4
+ Summary: Use large language models to interpret chat messages
5
+ Author-email: kumoSleeping <zjr2992@outlook.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/kumoSleeping/entari-plugin-hyw
8
+ Project-URL: Repository, https://github.com/kumoSleeping/entari-plugin-hyw
9
+ Project-URL: Issue Tracker, https://github.com/kumoSleeping/entari-plugin-hyw/issues
10
+ Keywords: entari,llm,ai,bot,chat
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Requires-Python: >=3.10
18
+ Description-Content-Type: text/markdown
19
+ Requires-Dist: arclet-entari[full]>=0.16.5
20
+ Requires-Dist: openai
21
+ Requires-Dist: httpx
22
+ Requires-Dist: markdown>=3.10
23
+ Requires-Dist: crawl4ai>=0.7.8
24
+ Requires-Dist: jinja2>=3.0
25
+ Requires-Dist: ddgs>=9.10.0
26
+ Provides-Extra: dev
27
+ Requires-Dist: entari-plugin-server>=0.5.0; extra == "dev"
28
+ Requires-Dist: satori-python-adapter-onebot11>=0.2.5; extra == "dev"
29
+
30
+
31
+ # Entari Plugin HYW
32
+
33
+
34
+ [![PyPI version](https://badge.fury.io/py/entari-plugin-hyw.svg)](https://badge.fury.io/py/entari-plugin-hyw)
35
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
36
+ [![Python Versions](https://img.shields.io/pypi/pyversions/entari-plugin-hyw.svg)](https://pypi.org/project/entari-plugin-hyw/)
37
+
38
+ **Entari Plugin HYW** is an advanced agentic chat plugin for the [Entari](https://github.com/entari-org/entari) framework. It leverages Large Language Models (LLMs) to provide intelligent, context-aware, and multi-modal responses within instant messaging environments (OneBot 11, Satori).
39
+
40
+ **Entari Plugin HYW** 是 Entari 框架的高级智能体聊天插件。它利用大语言模型(LLM)在即时通讯环境(OneBot 11, Satori)中提供智能、上下文感知和多模态的回复体验。
41
+
42
+ The plugin implements a three-stage pipeline (**Vision**, **Instruct**, **Agent**) to autonomously decide when to search the web, crawl pages, or analyze images to answer user queries effectively.
43
+
44
+ 插件实现了三阶段流水线(**视觉**、**指令**、**代理**),能够自主决定何时搜索网络、抓取网页或分析图片,从而高效地回答用户问题。
45
+
46
+ <img src="demo.jpg" width="300" />
47
+
48
+ ## Features / 功能特性
49
+
50
+ - 📖 **Agentic Workflow (智能工作流)**
51
+ Autonomous decision-making process to search, browse, and reason.
52
+ 具备自主决策能力,能够自动进行搜索、网页浏览和逻辑推理。
53
+
54
+ - 🎑 **Multi-Modal Support (多模态支持)**
55
+ Native support for image analysis using Vision Language Models (VLMs).
56
+ 原生支持图片分析,利用视觉语言模型(VLM)理解图像内容。
57
+
58
+ - 🔍 **Web Search & Crawling (搜索与抓取)**
59
+ Integrated **DuckDuckGo** and **Crawl4AI** for real-time information retrieval.
60
+ 集成 DuckDuckGo 搜索与 Crawl4AI 网页抓取,实时获取互联网信息。
61
+
62
+ - 🎨 **Rich Rendering (富媒体渲染)**
63
+ Responses are rendered as images containing Markdown, syntax-highlighted code, LaTeX math, and citation badges.
64
+ 回答将渲染为包含 Markdown、代码高亮、LaTeX 公式及引用角标的精美图片。
65
+
66
+ - 🔌 **Protocol Support (多协议适配)**
67
+ Deep integration with OneBot 11 and Satori protocols.
68
+ 深度适配 OneBot 11 和 Satori 协议,完美处理回复上下文与 JSON 卡片。
69
+
70
+ ## Installation / 安装
71
+
72
+ ```bash
73
+ pip install entari-plugin-hyw
74
+ ```
75
+
76
+ ## Configuration / 配置
77
+
78
+ Configure the plugin in your `entari.yml`.
79
+ 在 `entari.yml` 中进行配置。
80
+
81
+ ### Minimal Configuration / 最小配置
82
+
83
+ ```yaml
84
+ plugins:
85
+ entari_plugin_hyw:
86
+ # Trigger command / 触发指令
87
+ question_command: ".q"
88
+
89
+ # Main Model (Required) / 主模型(必需)
90
+ model_name: "google/gemini-2.0-flash-exp"
91
+ api_key: "your-api-key-here"
92
+ base_url: "https://generativelanguage.googleapis.com/v1beta/openai/"
93
+ ```
94
+
95
+ ### Configuration Reference / 配置详解
96
+
97
+ | Option (选项) | Type | Default | Description (说明) |
98
+ | :--- | :--- | :--- | :--- |
99
+ | **Basic** | | | |
100
+ | `question_command` | `str` | `/q` | The command to trigger the bot. <br> 触发机器人的指令前缀。 |
101
+ | `reaction` | `bool` | `true` | React with emoji on start(now only lagrange ob extension). <br> 收到指令时是否回应表情(目前只支持拉格兰ob扩展)。 |
102
+ | `quote` | `bool` | `true` | Quote the user's message in reply. <br> 回复时是否引用原消息。 |
103
+ | **Models** | | | |
104
+ | `model_name` | `str` | *None* | **Required.** Main Agent model ID. <br> **必需。** 主代理模型 ID。 |
105
+ | `api_key` | `str` | *None* | **Required.** API key. <br> **必需。** API 密钥。 |
106
+ | `base_url` | `str` | `...` | OpenAI-compatible API base URL. <br> 兼容 OpenAI 的 API 地址。 |
107
+ | `extra_body` | `dict` | `null` | Extra parameters (e.g. `reasoning_effort`). <br> 传递给 LLM 的额外参数。 |
108
+ | **Specialized** | | | |
109
+ | `vision_model_name`| `str` | *None* | Model for images. Defaults to `model_name`. <br> 处理图片的模型,默认同主模型。 |
110
+ | `intruct_model_name`| `str` | *None* | Model for intent. Defaults to `model_name`. <br> 意图识别模型,默认同主模型。 |
111
+ | **Tools** | | | |
112
+ | `search_provider` | `str` | `ddgs`| `ddgs` (DuckDuckGo), `crawl4ai`, `httpx`. <br> 搜索后端提供商。 |
113
+ | `search_limit` | `int` | `8` | Max search results. <br> 搜索结果数量限制。 |
114
+ | `headless` | `bool` | `true` | Browser headless mode. <br> 浏览器无头模式。 |
115
+
116
+ ## Usage / 使用方法
117
+
118
+ ### Commands / 指令
119
+
120
+ - **Text Query (文本问答)**
121
+ ```text
122
+ .q What's the latest news on Rust 1.83?
123
+ .q Rust 1.83 有什么新特性?
124
+ ```
125
+
126
+ - **Image Analysis (图片分析)**
127
+ *(Send an image with command, or reply to an image)*
128
+ *(发送带图片的指令,或回复一张图片)*
129
+ ```text
130
+ .q [Image] Explain this error.
131
+ .q [图片] 解释一下这个报错。
132
+ ```
133
+
134
+ - **Follow-up (追问)**
135
+ *Reply to the bot's message to continue the conversation.*
136
+ *直接回复机器人的消息即可进行连续对话。*
137
+
138
+ -----
139
+
140
+ ## License
141
+
142
+ This project is licensed under the MIT License.