entari-plugin-hyw 3.3.4__py3-none-any.whl → 3.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- entari_plugin_hyw/__init__.py +14 -351
- entari_plugin_hyw/assets/libs/tailwind.css +1 -1
- entari_plugin_hyw/assets/tailwind.input.css +1 -1
- entari_plugin_hyw/assets/template.j2 +113 -20
- entari_plugin_hyw/core/config.py +2 -0
- entari_plugin_hyw/core/pipeline.py +116 -112
- entari_plugin_hyw/core/render.py +39 -42
- entari_plugin_hyw/utils/prompts.py +26 -15
- entari_plugin_hyw/utils/search.py +234 -4
- {entari_plugin_hyw-3.3.4.dist-info → entari_plugin_hyw-3.3.6.dist-info}/METADATA +2 -1
- {entari_plugin_hyw-3.3.4.dist-info → entari_plugin_hyw-3.3.6.dist-info}/RECORD +13 -14
- entari_plugin_hyw/core/render.py.bak +0 -926
- {entari_plugin_hyw-3.3.4.dist-info → entari_plugin_hyw-3.3.6.dist-info}/WHEEL +0 -0
- {entari_plugin_hyw-3.3.4.dist-info → entari_plugin_hyw-3.3.6.dist-info}/top_level.txt +0 -0
entari_plugin_hyw/core/render.py
CHANGED
|
@@ -173,7 +173,8 @@ class ContentRenderer:
|
|
|
173
173
|
stats: Dict[str, Any] = None,
|
|
174
174
|
references: List[Dict[str, Any]] = None,
|
|
175
175
|
page_references: List[Dict[str, Any]] = None,
|
|
176
|
-
|
|
176
|
+
image_references: List[Dict[str, Any]] = None, # Added
|
|
177
|
+
stages_used: List[Dict[str, Any]] = None,
|
|
177
178
|
flow_steps: List[Dict[str, Any]] = None,
|
|
178
179
|
model_name: str = "",
|
|
179
180
|
provider_name: str = "Unknown",
|
|
@@ -197,6 +198,9 @@ class ContentRenderer:
|
|
|
197
198
|
# Preprocess to fix common markdown issues
|
|
198
199
|
markdown_content = re.sub(r'(?<=\S)\n(?=\s*(\d+\.|\-|\*|\+) )', r'\n\n', markdown_content)
|
|
199
200
|
|
|
201
|
+
# references, page_references, image_references are already parsed by pipeline
|
|
202
|
+
# No filtering needed here - use them directly
|
|
203
|
+
|
|
200
204
|
# AGGRESSIVE CLEANING: Strip out "References" section and "[code]" blocks from the text
|
|
201
205
|
# because we are rendering them as structured UI elements now.
|
|
202
206
|
|
|
@@ -262,41 +266,21 @@ class ContentRenderer:
|
|
|
262
266
|
|
|
263
267
|
content_html = restore_math(content_html)
|
|
264
268
|
|
|
265
|
-
#
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
if ref_match:
|
|
278
|
-
citation_id = ref_match.group(1)
|
|
279
|
-
parts[i] = f'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-blue-600 bg-blue-50 border border-blue-200 rounded mx-0.5 align-top relative -top-0.5">{citation_id}</span>'
|
|
280
|
-
continue
|
|
281
|
-
# 2. Flow marker: <code>flow:a</code>
|
|
282
|
-
flow_match = re.match(r'^<code.*?>flow:([a-zA-Z])</code>$', part)
|
|
283
|
-
if flow_match:
|
|
284
|
-
flow_id = flow_match.group(1).lower()
|
|
285
|
-
parts[i] = f'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-orange-700 bg-orange-50 border border-orange-200 rounded mx-0.5 align-top relative -top-0.5">{flow_id}</span>'
|
|
286
|
-
continue
|
|
287
|
-
|
|
288
|
-
# If it's NOT a code block, or a code block we didn't transform, we leave it alone.
|
|
289
|
-
# (Previous logic was to regex replace inside non-code blocks. We don't need that anymore
|
|
290
|
-
# because the prompt now enforces code spans).
|
|
291
|
-
content_html = "".join(parts)
|
|
269
|
+
# Convert [search:N] to blue badge
|
|
270
|
+
content_html = re.sub(
|
|
271
|
+
r'\[search:(\d+)\]',
|
|
272
|
+
r'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-blue-600 bg-blue-50 border border-blue-200 rounded mx-0.5 align-top relative -top-0.5">\1</span>',
|
|
273
|
+
content_html
|
|
274
|
+
)
|
|
275
|
+
# Convert [page:N] to orange badge
|
|
276
|
+
content_html = re.sub(
|
|
277
|
+
r'\[page:(\d+)\]',
|
|
278
|
+
r'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-orange-700 bg-orange-50 border border-orange-200 rounded mx-0.5 align-top relative -top-0.5">\1</span>',
|
|
279
|
+
content_html
|
|
280
|
+
)
|
|
292
281
|
|
|
293
|
-
# Strip out the
|
|
294
|
-
|
|
295
|
-
# Make regex robust to any language class or no class
|
|
296
|
-
content_html = re.sub(r'<pre><code[^>]*>[^<]*references[^<]*</code></pre>\s*$', '', content_html, flags=re.DOTALL | re.IGNORECASE)
|
|
297
|
-
# Loop to remove multiple if present
|
|
298
|
-
while re.search(r'<pre><code[^>]*>[^<]*references[^<]*</code></pre>\s*$', content_html, flags=re.DOTALL | re.IGNORECASE):
|
|
299
|
-
content_html = re.sub(r'<pre><code[^>]*>[^<]*references[^<]*</code></pre>\s*$', '', content_html, flags=re.DOTALL | re.IGNORECASE)
|
|
282
|
+
# Strip out the references code block if it leaked into the content
|
|
283
|
+
content_html = re.sub(r'<pre><code[^>]*>.*?references.*?</code></pre>\s*$', '', content_html, flags=re.DOTALL | re.IGNORECASE)
|
|
300
284
|
|
|
301
285
|
# --- PREPARE DATA FOR JINJA TEMPLATE ---
|
|
302
286
|
|
|
@@ -361,6 +345,18 @@ class ContentRenderer:
|
|
|
361
345
|
"favicon_url": f"https://www.google.com/s2/favicons?domain={domain}&sz=32"
|
|
362
346
|
})
|
|
363
347
|
|
|
348
|
+
# 2c. Image Reference Processing
|
|
349
|
+
processed_image_refs = []
|
|
350
|
+
if image_references:
|
|
351
|
+
for ref in image_references[:8]:
|
|
352
|
+
url = ref.get("url", "#")
|
|
353
|
+
processed_image_refs.append({
|
|
354
|
+
"title": ref.get("title", "Image"),
|
|
355
|
+
"url": url,
|
|
356
|
+
"thumbnail": ref.get("thumbnail") or url, # Fallback to url if thumbnail not provided
|
|
357
|
+
"domain": self._get_domain(url) or ref.get("domain") or "image"
|
|
358
|
+
})
|
|
359
|
+
|
|
364
360
|
flow_steps = flow_steps or []
|
|
365
361
|
|
|
366
362
|
if stages_used:
|
|
@@ -404,8 +400,12 @@ class ContentRenderer:
|
|
|
404
400
|
stage_children = {}
|
|
405
401
|
|
|
406
402
|
# References go to "Search"
|
|
407
|
-
|
|
408
|
-
|
|
403
|
+
# Also Image References to "Search"
|
|
404
|
+
if name == "Search":
|
|
405
|
+
if processed_refs:
|
|
406
|
+
stage_children['references'] = processed_refs
|
|
407
|
+
if processed_image_refs:
|
|
408
|
+
stage_children['image_references'] = processed_image_refs
|
|
409
409
|
|
|
410
410
|
# Flow steps go to "Agent"
|
|
411
411
|
if name == "Agent" and flow_steps:
|
|
@@ -425,7 +425,7 @@ class ContentRenderer:
|
|
|
425
425
|
# Pass through Search Queries
|
|
426
426
|
if "queries" in stage:
|
|
427
427
|
stage_children["queries"] = stage["queries"]
|
|
428
|
-
|
|
428
|
+
|
|
429
429
|
# Pass through Crawled Pages
|
|
430
430
|
if "crawled_pages" in stage:
|
|
431
431
|
stage_children["crawled_pages"] = stage["crawled_pages"]
|
|
@@ -441,12 +441,9 @@ class ContentRenderer:
|
|
|
441
441
|
**stage_children # Merge children
|
|
442
442
|
})
|
|
443
443
|
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
444
|
# 4. Stats Footer Logic
|
|
449
445
|
processed_stats = {}
|
|
446
|
+
stats_dict = {}
|
|
450
447
|
if stats:
|
|
451
448
|
# Assuming standard 'stats' dict structure, handle list if needed
|
|
452
449
|
if isinstance(stats, list):
|
|
@@ -3,8 +3,8 @@ VISION_SP = """# 你是一个专业的视觉转文字专家.
|
|
|
3
3
|
# 核心任务
|
|
4
4
|
- 智能分析图片内容, 转述成文本, 除此之外不要添加任何内容
|
|
5
5
|
- 文字优先: 若包含清晰文字(文档、截图等), 必须完整准确转录, 不要遗漏.
|
|
6
|
-
- 视觉补充:
|
|
7
|
-
- 用户要求: 根据用户消息中提示侧重转文本的偏向,
|
|
6
|
+
- 视觉补充: 解释完文字后, 描述视觉内容总结(物体、场景、氛围).
|
|
7
|
+
- 用户要求: 根据用户消息中提示侧重转文本的偏向, 若无关联则不理会.
|
|
8
8
|
|
|
9
9
|
## 用户消息
|
|
10
10
|
```text
|
|
@@ -32,7 +32,7 @@ INTRUCT_SP = """# 你是一个专业的指导专家.
|
|
|
32
32
|
{tools_desc}
|
|
33
33
|
|
|
34
34
|
## 你的回复
|
|
35
|
-
|
|
35
|
+
调用工具后无需回复额外文本节省token.
|
|
36
36
|
|
|
37
37
|
## 用户消息
|
|
38
38
|
```
|
|
@@ -53,23 +53,36 @@ AGENT_SP = """# 你是一个 Agent 总控专家, 你需要理解用户意图,
|
|
|
53
53
|
|
|
54
54
|
当前模式: {mode}, {mode_desc}
|
|
55
55
|
|
|
56
|
-
## 最终回复格式要求
|
|
57
|
-
- 直接输出 Markdown 正文.
|
|
58
56
|
|
|
57
|
+
|
|
58
|
+
## 过程要求
|
|
59
59
|
当不调用工具发送文本, 即会变成最终回复, 请遵守:
|
|
60
|
+
- 直接给出一篇报告, 无需回答用户消息
|
|
60
61
|
- 语言: 简体中文, 百科式风格, 语言严谨不啰嗦.
|
|
61
|
-
- 正文格式:
|
|
62
|
+
- 正文格式:
|
|
63
|
+
- 使用 Markdown 格式, 支持 hightlight, katex
|
|
64
|
+
- 最开始给出`# `大标题, 不要有多余废话, 不要直接回答用户的提问.
|
|
65
|
+
- 内容丰富突出重点.
|
|
62
66
|
- 工具引用:
|
|
63
|
-
|
|
64
|
-
-
|
|
65
|
-
|
|
67
|
+
> 重要: 所有正文内容必须基于实际信息, 保证百分百真实度
|
|
68
|
+
- 引用规则:
|
|
69
|
+
- 本次会话中存在对解决此问题有用的信息才加以引用, 不需要的消息可以不引用.
|
|
70
|
+
- 角标必须真实对应上下文中获取的信息, 同时对应 references 中的内容, 图片按顺序对应.
|
|
71
|
+
- 正文中的引用规则
|
|
72
|
+
- 搜索摘要引用: 使用如 [search:3][search:4]
|
|
73
|
+
- 页面内容引用: 使用如 [page:5][page:6]
|
|
74
|
+
- 图片引用: 使用如 [image:7][image:8]
|
|
75
|
+
- search 的意思是你使用 internal_web_search 获取的搜索摘要, 如果没有此工具相关信息则不引用
|
|
76
|
+
- page 的意思是你使用 crawl_page 获取的页面内容, 如果没有此工具相关信息则不引用
|
|
77
|
+
- image 的意思是你使用 internal_image_search 获取的图片, 图片按顺序摆放即可, 你无需显式引用
|
|
66
78
|
- 在正文底部添加 references 代码块:
|
|
67
79
|
- 用不到的条目不写, 没有专家给信息就不写.
|
|
68
80
|
```references
|
|
69
|
-
[
|
|
70
|
-
[
|
|
71
|
-
[
|
|
72
|
-
[
|
|
81
|
+
[2] [search] [文本描述](url)
|
|
82
|
+
[8] [search] [文本描述](url)
|
|
83
|
+
[1] [page] [页面标题](url)
|
|
84
|
+
[2] [page] [页面标题](url)
|
|
85
|
+
[1] [image] [来源](url)
|
|
73
86
|
```
|
|
74
87
|
|
|
75
88
|
## 用户消息
|
|
@@ -78,7 +91,6 @@ AGENT_SP = """# 你是一个 Agent 总控专家, 你需要理解用户意图,
|
|
|
78
91
|
```
|
|
79
92
|
"""
|
|
80
93
|
|
|
81
|
-
# PS: agent 无搜索图片权限
|
|
82
94
|
AGENT_SP_TOOLS_STANDARD_ADD = """
|
|
83
95
|
你需要整合已有的信息, 提炼用户消息中的关键词, 进行最终回复.
|
|
84
96
|
"""
|
|
@@ -126,4 +138,3 @@ AGENT_SP_IMAGE_SEARCH_ADD = """
|
|
|
126
138
|
```
|
|
127
139
|
- 每进行一次 internal_image_search, 挑选 1 张图像插入正文
|
|
128
140
|
"""
|
|
129
|
-
|
|
@@ -1,10 +1,27 @@
|
|
|
1
1
|
import urllib.parse
|
|
2
|
+
import asyncio
|
|
3
|
+
import re
|
|
4
|
+
import html
|
|
2
5
|
from typing import List, Dict, Optional, Any
|
|
3
6
|
from loguru import logger
|
|
4
7
|
from crawl4ai import AsyncWebCrawler
|
|
5
8
|
from crawl4ai.async_configs import CrawlerRunConfig
|
|
6
9
|
from crawl4ai.cache_context import CacheMode
|
|
7
10
|
|
|
11
|
+
# Optional imports for new strategies
|
|
12
|
+
try:
|
|
13
|
+
import httpx
|
|
14
|
+
except ImportError:
|
|
15
|
+
httpx = None
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
from ddgs import DDGS
|
|
19
|
+
except ImportError:
|
|
20
|
+
try:
|
|
21
|
+
from duckduckgo_search import DDGS
|
|
22
|
+
except ImportError:
|
|
23
|
+
DDGS = None
|
|
24
|
+
|
|
8
25
|
# Shared crawler instance to avoid repeated init
|
|
9
26
|
_shared_crawler: Optional[AsyncWebCrawler] = None
|
|
10
27
|
|
|
@@ -28,13 +45,19 @@ async def close_shared_crawler():
|
|
|
28
45
|
|
|
29
46
|
class SearchService:
|
|
30
47
|
"""
|
|
31
|
-
|
|
32
|
-
|
|
48
|
+
Multi-strategy search & fetch service.
|
|
49
|
+
Supported providers: 'crawl4ai' (default), 'httpx', 'ddgs'.
|
|
33
50
|
"""
|
|
34
51
|
def __init__(self, config: Any):
|
|
35
52
|
self.config = config
|
|
36
|
-
self._default_limit = 8
|
|
53
|
+
self._default_limit = getattr(config, "search_limit", 8)
|
|
37
54
|
self._crawler: Optional[AsyncWebCrawler] = None
|
|
55
|
+
|
|
56
|
+
# Configuration for retries/timeouts
|
|
57
|
+
self._search_timeout = getattr(config, "search_timeout", 10.0)
|
|
58
|
+
self._search_retries = getattr(config, "search_retries", 2)
|
|
59
|
+
self._provider = getattr(config, "search_provider", "crawl4ai")
|
|
60
|
+
logger.info(f"SearchService initialized: provider='{self._provider}', limit={self._default_limit}, timeout={self._search_timeout}s")
|
|
38
61
|
|
|
39
62
|
def _build_search_url(self, query: str) -> str:
|
|
40
63
|
encoded_query = urllib.parse.quote(query)
|
|
@@ -53,8 +76,211 @@ class SearchService:
|
|
|
53
76
|
return f"{base}{sep}q={encoded_query}&iax=images&ia=images"
|
|
54
77
|
|
|
55
78
|
async def search(self, query: str) -> List[Dict[str, str]]:
|
|
79
|
+
"""
|
|
80
|
+
Dispatch search to the configured provider.
|
|
81
|
+
"""
|
|
82
|
+
if not query:
|
|
83
|
+
return []
|
|
84
|
+
|
|
85
|
+
provider = self._provider.lower()
|
|
86
|
+
logger.info(f"SearchService: searching for '{query}' using provider='{provider}'")
|
|
87
|
+
|
|
88
|
+
if provider == "httpx":
|
|
89
|
+
return await self._search_httpx(query)
|
|
90
|
+
elif provider == "ddgs":
|
|
91
|
+
return await self._search_ddgs(query)
|
|
92
|
+
else:
|
|
93
|
+
# Default to crawl4ai for backward compatibility or explicit choice
|
|
94
|
+
return await self._search_crawl4ai(query)
|
|
95
|
+
|
|
96
|
+
async def _search_httpx(self, query: str) -> List[Dict[str, str]]:
|
|
97
|
+
"""
|
|
98
|
+
Directly fetch https://lite.duckduckgo.com/lite/ via httpx and parse HTML.
|
|
99
|
+
Fast, no browser overhead.
|
|
100
|
+
"""
|
|
101
|
+
if not httpx:
|
|
102
|
+
logger.error("SearchService: httpx not installed, fallback to crawl4ai")
|
|
103
|
+
return await self._search_crawl4ai(query)
|
|
104
|
+
|
|
105
|
+
url = self._build_search_url(query)
|
|
106
|
+
|
|
107
|
+
results: List[Dict[str, str]] = []
|
|
108
|
+
try:
|
|
109
|
+
async with httpx.AsyncClient(timeout=self._search_timeout, follow_redirects=True) as client:
|
|
110
|
+
resp = await client.get(url, headers={
|
|
111
|
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"
|
|
112
|
+
})
|
|
113
|
+
resp.raise_for_status()
|
|
114
|
+
html_content = resp.text
|
|
115
|
+
|
|
116
|
+
# Regex parsing for DDG Lite
|
|
117
|
+
snippet_regex = re.compile(r'<td[^>]*>(.*?)</td>', re.DOTALL)
|
|
118
|
+
link_regex = re.compile(r'<a[^>]+href="([^"]+)"[^>]*>(.*?)</a>', re.DOTALL)
|
|
119
|
+
|
|
120
|
+
raw_links = link_regex.findall(html_content)
|
|
121
|
+
|
|
122
|
+
seen = set()
|
|
123
|
+
for href, text in raw_links:
|
|
124
|
+
if len(results) >= self._default_limit:
|
|
125
|
+
break
|
|
126
|
+
|
|
127
|
+
# Clean href
|
|
128
|
+
if "duckduckgo.com" in href:
|
|
129
|
+
if "uddg=" in href:
|
|
130
|
+
parsed = urllib.parse.parse_qs(urllib.parse.urlparse(href).query)
|
|
131
|
+
href = parsed.get("uddg", [href])[0]
|
|
132
|
+
else:
|
|
133
|
+
continue
|
|
134
|
+
|
|
135
|
+
if not href.startswith("http"):
|
|
136
|
+
continue
|
|
137
|
+
|
|
138
|
+
if href in seen:
|
|
139
|
+
continue
|
|
140
|
+
seen.add(href)
|
|
141
|
+
|
|
142
|
+
# Title clean
|
|
143
|
+
title = re.sub(r'<[^>]+>', '', text).strip()
|
|
144
|
+
title = html.unescape(title)
|
|
145
|
+
|
|
146
|
+
results.append({
|
|
147
|
+
"title": title,
|
|
148
|
+
"url": href,
|
|
149
|
+
"domain": urllib.parse.urlparse(href).hostname or "",
|
|
150
|
+
"content": title
|
|
151
|
+
})
|
|
152
|
+
|
|
153
|
+
if not results:
|
|
154
|
+
logger.warning("SearchService(httpx): No results parsed via regex.")
|
|
155
|
+
|
|
156
|
+
return results
|
|
157
|
+
|
|
158
|
+
except Exception as e:
|
|
159
|
+
logger.error(f"SearchService(httpx) failed: {e}")
|
|
160
|
+
return []
|
|
161
|
+
|
|
162
|
+
async def _search_ddgs(self, query: str) -> List[Dict[str, str]]:
|
|
163
|
+
"""
|
|
164
|
+
Use duckduckgo_search library (Sync DDGS).
|
|
165
|
+
Executes in thread pool to allow async usage.
|
|
166
|
+
Supports retries and timeouts.
|
|
167
|
+
"""
|
|
168
|
+
if not DDGS:
|
|
169
|
+
logger.error("SearchService: duckduckgo_search not installed, fallback to crawl4ai")
|
|
170
|
+
return await self._search_crawl4ai(query)
|
|
171
|
+
|
|
172
|
+
def _do_sync_search():
|
|
173
|
+
"""Sync search function to run in thread"""
|
|
174
|
+
results: List[Dict[str, str]] = []
|
|
175
|
+
final_exc = None
|
|
176
|
+
|
|
177
|
+
for attempt in range(self._search_retries + 1):
|
|
178
|
+
try:
|
|
179
|
+
with DDGS(timeout=self._search_timeout) as ddgs:
|
|
180
|
+
# Use positional argument for query to be safe across versions
|
|
181
|
+
ddgs_gen = ddgs.text(
|
|
182
|
+
query,
|
|
183
|
+
region='cn-zh',
|
|
184
|
+
safesearch='moderate',
|
|
185
|
+
max_results=self._default_limit,
|
|
186
|
+
backend="duckduckgo",
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
if ddgs_gen:
|
|
190
|
+
for r in ddgs_gen:
|
|
191
|
+
results.append({
|
|
192
|
+
"title": r.get("title", ""),
|
|
193
|
+
"url": r.get("href", ""),
|
|
194
|
+
"domain": urllib.parse.urlparse(r.get("href", "")).hostname or "",
|
|
195
|
+
"content": r.get("body", "")
|
|
196
|
+
})
|
|
197
|
+
if len(results) >= self._default_limit:
|
|
198
|
+
break
|
|
199
|
+
|
|
200
|
+
return results, None
|
|
201
|
+
|
|
202
|
+
except Exception as e:
|
|
203
|
+
final_exc = e
|
|
204
|
+
if attempt < self._search_retries:
|
|
205
|
+
import time
|
|
206
|
+
time.sleep(1)
|
|
207
|
+
|
|
208
|
+
return [], final_exc
|
|
209
|
+
|
|
210
|
+
# Run sync search in executor
|
|
211
|
+
try:
|
|
212
|
+
results, err = await asyncio.to_thread(_do_sync_search)
|
|
213
|
+
|
|
214
|
+
if err:
|
|
215
|
+
logger.warning(f"SearchService(ddgs) text search failed after retries: {err}")
|
|
216
|
+
return []
|
|
217
|
+
|
|
218
|
+
logger.info(f"SearchService(ddgs): Got {len(results)} text results")
|
|
219
|
+
return results
|
|
220
|
+
|
|
221
|
+
except Exception as e:
|
|
222
|
+
logger.error(f"SearchService(ddgs) thread execution failed: {e}")
|
|
223
|
+
return []
|
|
224
|
+
|
|
225
|
+
async def _search_ddgs_images(self, query: str) -> List[Dict[str, str]]:
|
|
226
|
+
"""
|
|
227
|
+
Use duckduckgo_search library for images.
|
|
228
|
+
"""
|
|
229
|
+
if not DDGS:
|
|
230
|
+
return []
|
|
231
|
+
|
|
232
|
+
def _do_sync_image_search():
|
|
233
|
+
results: List[Dict[str, str]] = []
|
|
234
|
+
final_exc = None
|
|
235
|
+
|
|
236
|
+
for attempt in range(self._search_retries + 1):
|
|
237
|
+
try:
|
|
238
|
+
with DDGS(timeout=self._search_timeout) as ddgs:
|
|
239
|
+
ddgs_gen = ddgs.images(
|
|
240
|
+
query,
|
|
241
|
+
region='cn-zh',
|
|
242
|
+
safesearch='moderate',
|
|
243
|
+
max_results=self._default_limit,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
if ddgs_gen:
|
|
247
|
+
for r in ddgs_gen:
|
|
248
|
+
# DDGS images returns: title, image, thumbnail, url, source, etc.
|
|
249
|
+
# API might differ, adapt to standard format
|
|
250
|
+
results.append({
|
|
251
|
+
"title": r.get("title", "Image"),
|
|
252
|
+
"url": r.get("image", "") or r.get("url", ""), # Full image URL
|
|
253
|
+
"thumbnail": r.get("thumbnail", ""),
|
|
254
|
+
"domain": r.get("source", "") or urllib.parse.urlparse(r.get("url", "")).hostname or "",
|
|
255
|
+
})
|
|
256
|
+
if len(results) >= self._default_limit:
|
|
257
|
+
break
|
|
258
|
+
|
|
259
|
+
return results, None
|
|
260
|
+
except Exception as e:
|
|
261
|
+
final_exc = e
|
|
262
|
+
if attempt < self._search_retries:
|
|
263
|
+
import time
|
|
264
|
+
time.sleep(1)
|
|
265
|
+
|
|
266
|
+
return [], final_exc
|
|
267
|
+
|
|
268
|
+
try:
|
|
269
|
+
results, err = await asyncio.to_thread(_do_sync_image_search)
|
|
270
|
+
if err:
|
|
271
|
+
logger.warning(f"SearchService(ddgs) image search failed: {err}")
|
|
272
|
+
return []
|
|
273
|
+
|
|
274
|
+
logger.info(f"SearchService(ddgs): Got {len(results)} image results")
|
|
275
|
+
return results
|
|
276
|
+
except Exception as e:
|
|
277
|
+
logger.error(f"SearchService(ddgs) image thread failed: {e}")
|
|
278
|
+
return []
|
|
279
|
+
|
|
280
|
+
async def _search_crawl4ai(self, query: str) -> List[Dict[str, str]]:
|
|
56
281
|
"""
|
|
57
282
|
Crawl the configured SERP using Crawl4AI and return parsed results.
|
|
283
|
+
Original implementation.
|
|
58
284
|
"""
|
|
59
285
|
if not query:
|
|
60
286
|
return []
|
|
@@ -192,11 +418,15 @@ class SearchService:
|
|
|
192
418
|
|
|
193
419
|
async def image_search(self, query: str) -> List[Dict[str, str]]:
|
|
194
420
|
"""
|
|
195
|
-
Image search via Crawl4AI media extraction.
|
|
421
|
+
Image search via Crawl4AI media extraction or DDGS.
|
|
196
422
|
"""
|
|
197
423
|
if not query:
|
|
198
424
|
return []
|
|
199
425
|
|
|
426
|
+
# If ddgs is selected, use it
|
|
427
|
+
if self._provider == "ddgs":
|
|
428
|
+
return await self._search_ddgs_images(query)
|
|
429
|
+
|
|
200
430
|
url = self._build_image_url(query)
|
|
201
431
|
logger.info(f"SearchService(Crawl4AI Image): fetching {url}")
|
|
202
432
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: entari_plugin_hyw
|
|
3
|
-
Version: 3.3.
|
|
3
|
+
Version: 3.3.6
|
|
4
4
|
Summary: Use large language models to interpret chat messages
|
|
5
5
|
Author-email: kumoSleeping <zjr2992@outlook.com>
|
|
6
6
|
License: MIT
|
|
@@ -22,6 +22,7 @@ Requires-Dist: httpx
|
|
|
22
22
|
Requires-Dist: markdown>=3.10
|
|
23
23
|
Requires-Dist: crawl4ai>=0.7.8
|
|
24
24
|
Requires-Dist: jinja2>=3.0
|
|
25
|
+
Requires-Dist: ddgs>=9.10.0
|
|
25
26
|
Provides-Extra: dev
|
|
26
27
|
Requires-Dist: entari-plugin-server>=0.5.0; extra == "dev"
|
|
27
28
|
Requires-Dist: satori-python-adapter-onebot11>=0.2.5; extra == "dev"
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
entari_plugin_hyw/__init__.py,sha256=
|
|
1
|
+
entari_plugin_hyw/__init__.py,sha256=K5WW4usKpP38CTxQHpm693brIhsbxBRsD0ojwekmMGE,19689
|
|
2
2
|
entari_plugin_hyw/assets/package-lock.json,sha256=TIrLM-wLWZTrp3LKfzhEVuduhvBJmI93NdQEKYLW2W0,33172
|
|
3
3
|
entari_plugin_hyw/assets/package.json,sha256=Y4H8JGtp3nv2WUtI20tXoXWddR-dwwKJhqQVLercpiw,306
|
|
4
4
|
entari_plugin_hyw/assets/tailwind.config.js,sha256=S8I9X8hI8IaQRczWK9hTW-zl4oVpAXw5ykeksrzHjpU,382
|
|
5
|
-
entari_plugin_hyw/assets/tailwind.input.css,sha256=
|
|
5
|
+
entari_plugin_hyw/assets/tailwind.input.css,sha256=kaG4To1QFHBoJdgP3dZH9E0eMBCYXuw2nJj0uSRgsUY,4720
|
|
6
6
|
entari_plugin_hyw/assets/template.html,sha256=xPgOKlhKzz2p2_1nn5y44XwD37UqkW2uwHn4HpaGtCU,5646
|
|
7
7
|
entari_plugin_hyw/assets/template.html.bak,sha256=xPgOKlhKzz2p2_1nn5y44XwD37UqkW2uwHn4HpaGtCU,5646
|
|
8
|
-
entari_plugin_hyw/assets/template.j2,sha256=
|
|
8
|
+
entari_plugin_hyw/assets/template.j2,sha256=7RKVXVTUtAApxGZyxbB_r-XtgugbvDniE8hMl8vvhCc,21995
|
|
9
9
|
entari_plugin_hyw/assets/icon/anthropic.svg,sha256=ASsy1ypo3osNc3n-B0R81tk_dIFsVgg7qQORrd5T2kA,558
|
|
10
10
|
entari_plugin_hyw/assets/icon/deepseek.png,sha256=KWWAr9aeYMc6I07U_1qo7zcXO6e7-kfd9S2XjQumnf4,25338
|
|
11
11
|
entari_plugin_hyw/assets/icon/gemini.svg,sha256=H74CoVmx5opcCtr3Ay3M09dpqL9cd9Whkx-M6an3t7s,599
|
|
@@ -26,21 +26,20 @@ entari_plugin_hyw/assets/libs/highlight.js,sha256=g3pvpbDHNrUrveKythkPMF2j_J7UFo
|
|
|
26
26
|
entari_plugin_hyw/assets/libs/katex-auto-render.js,sha256=nLjaz8CGwpZsnsS6VPSi3EO3y-KzPOwaJ0PYhsf7R6c,3478
|
|
27
27
|
entari_plugin_hyw/assets/libs/katex.css,sha256=UF1fgpAiu3tPJN_uCqEUHNe7pnr-QR0SQDNfgglgtcM,23196
|
|
28
28
|
entari_plugin_hyw/assets/libs/katex.js,sha256=3ISyluw-iE3gkxWPdg_Z1Ftser5YtTgVV_ThOPRqWK4,277038
|
|
29
|
-
entari_plugin_hyw/assets/libs/tailwind.css,sha256=
|
|
29
|
+
entari_plugin_hyw/assets/libs/tailwind.css,sha256=ee_3txpnxhChZOjSJQUX0XiL1Nq0U2KLTvSGJLZBlaA,19916
|
|
30
30
|
entari_plugin_hyw/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
|
-
entari_plugin_hyw/core/config.py,sha256=
|
|
31
|
+
entari_plugin_hyw/core/config.py,sha256=rN2hVI964D7eM6xPuOthEIpXyFGZpRSYNrl1xdKHJ1s,1636
|
|
32
32
|
entari_plugin_hyw/core/history.py,sha256=vqp7itwR5-KaqC4Ftmq6GOz7OM9GsiFJnSN9JJ2P6L4,5894
|
|
33
33
|
entari_plugin_hyw/core/hyw.py,sha256=RCRjV9uYmvXysiliztphLP3VyUabrf0LY2Bk66W5JGA,1927
|
|
34
|
-
entari_plugin_hyw/core/pipeline.py,sha256=
|
|
35
|
-
entari_plugin_hyw/core/render.py,sha256=
|
|
36
|
-
entari_plugin_hyw/core/render.py.bak,sha256=qMd6Tk0p6ItqGmErR6dkWRwCuKQYXINc7KRxnP-mb_s,48768
|
|
34
|
+
entari_plugin_hyw/core/pipeline.py,sha256=7mZDm7W9Izui_hcQqd_KjRVtlxfEYFWamGxqu-C9exY,48052
|
|
35
|
+
entari_plugin_hyw/core/render.py,sha256=U5wZ6kQKBBF85acDT8kq-HyXyNVPwUd-__SPLbFwXGg,27466
|
|
37
36
|
entari_plugin_hyw/utils/__init__.py,sha256=TnkxDqYr0zgRE7TC92tVbUaY8m1UyyoLg2zvzQ8nMVI,84
|
|
38
37
|
entari_plugin_hyw/utils/browser.py,sha256=LJlFh-oSqt9mQBpMALxbYGUG__t1YLUo7RxUAslsWUc,1416
|
|
39
38
|
entari_plugin_hyw/utils/misc.py,sha256=_7iHVYj_mJ6OGq6FU1s_cFeS1Ao-neBjZYd6eI2p95U,3482
|
|
40
39
|
entari_plugin_hyw/utils/playwright_tool.py,sha256=ZZNkzFtUt_Gxny3Od4boBAgNF9J0N84uySatzn1Bwe4,1272
|
|
41
|
-
entari_plugin_hyw/utils/prompts.py,sha256=
|
|
42
|
-
entari_plugin_hyw/utils/search.py,sha256=
|
|
43
|
-
entari_plugin_hyw-3.3.
|
|
44
|
-
entari_plugin_hyw-3.3.
|
|
45
|
-
entari_plugin_hyw-3.3.
|
|
46
|
-
entari_plugin_hyw-3.3.
|
|
40
|
+
entari_plugin_hyw/utils/prompts.py,sha256=oJpgNvRQ_Lmr2Ca-B6fcpysMT2i0obioBC1DuH_Z1MY,4430
|
|
41
|
+
entari_plugin_hyw/utils/search.py,sha256=Bvz2KFw3Gr2nuvmlo_8ExLHvO353NKX-YN35A2FCsBw,19047
|
|
42
|
+
entari_plugin_hyw-3.3.6.dist-info/METADATA,sha256=iNeG0220pvABmvr1S76OuGYJa6KZi5yz8iEx3s6i-KY,4674
|
|
43
|
+
entari_plugin_hyw-3.3.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
44
|
+
entari_plugin_hyw-3.3.6.dist-info/top_level.txt,sha256=TIDsn6XPs6KA5e3ezsE65JoXsy03ejDdrB41I4SPjmo,18
|
|
45
|
+
entari_plugin_hyw-3.3.6.dist-info/RECORD,,
|