entari-plugin-hyw 4.0.0rc17__py3-none-any.whl → 4.0.0rc19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of entari-plugin-hyw might be problematic. Click here for more details.

Files changed (55) hide show
  1. entari_plugin_hyw-4.0.0rc19.dist-info/METADATA +26 -0
  2. entari_plugin_hyw-4.0.0rc19.dist-info/RECORD +4 -0
  3. entari_plugin_hyw-4.0.0rc19.dist-info/top_level.txt +1 -0
  4. entari_plugin_hyw/__init__.py +0 -914
  5. entari_plugin_hyw/filters.py +0 -83
  6. entari_plugin_hyw/history.py +0 -251
  7. entari_plugin_hyw/misc.py +0 -214
  8. entari_plugin_hyw/search_cache.py +0 -253
  9. entari_plugin_hyw-4.0.0rc17.dist-info/METADATA +0 -119
  10. entari_plugin_hyw-4.0.0rc17.dist-info/RECORD +0 -52
  11. entari_plugin_hyw-4.0.0rc17.dist-info/top_level.txt +0 -2
  12. hyw_core/__init__.py +0 -94
  13. hyw_core/agent.py +0 -876
  14. hyw_core/browser_control/__init__.py +0 -63
  15. hyw_core/browser_control/assets/card-dist/index.html +0 -429
  16. hyw_core/browser_control/assets/card-dist/logos/anthropic.svg +0 -1
  17. hyw_core/browser_control/assets/card-dist/logos/cerebras.svg +0 -9
  18. hyw_core/browser_control/assets/card-dist/logos/deepseek.png +0 -0
  19. hyw_core/browser_control/assets/card-dist/logos/gemini.svg +0 -1
  20. hyw_core/browser_control/assets/card-dist/logos/google.svg +0 -1
  21. hyw_core/browser_control/assets/card-dist/logos/grok.png +0 -0
  22. hyw_core/browser_control/assets/card-dist/logos/huggingface.png +0 -0
  23. hyw_core/browser_control/assets/card-dist/logos/microsoft.svg +0 -15
  24. hyw_core/browser_control/assets/card-dist/logos/minimax.png +0 -0
  25. hyw_core/browser_control/assets/card-dist/logos/mistral.png +0 -0
  26. hyw_core/browser_control/assets/card-dist/logos/nvida.png +0 -0
  27. hyw_core/browser_control/assets/card-dist/logos/openai.svg +0 -1
  28. hyw_core/browser_control/assets/card-dist/logos/openrouter.png +0 -0
  29. hyw_core/browser_control/assets/card-dist/logos/perplexity.svg +0 -24
  30. hyw_core/browser_control/assets/card-dist/logos/qwen.png +0 -0
  31. hyw_core/browser_control/assets/card-dist/logos/xai.png +0 -0
  32. hyw_core/browser_control/assets/card-dist/logos/xiaomi.png +0 -0
  33. hyw_core/browser_control/assets/card-dist/logos/zai.png +0 -0
  34. hyw_core/browser_control/assets/card-dist/vite.svg +0 -1
  35. hyw_core/browser_control/engines/__init__.py +0 -15
  36. hyw_core/browser_control/engines/base.py +0 -13
  37. hyw_core/browser_control/engines/default.py +0 -166
  38. hyw_core/browser_control/engines/duckduckgo.py +0 -171
  39. hyw_core/browser_control/landing.html +0 -172
  40. hyw_core/browser_control/manager.py +0 -173
  41. hyw_core/browser_control/renderer.py +0 -446
  42. hyw_core/browser_control/service.py +0 -1002
  43. hyw_core/config.py +0 -154
  44. hyw_core/core.py +0 -454
  45. hyw_core/crawling/__init__.py +0 -18
  46. hyw_core/crawling/completeness.py +0 -437
  47. hyw_core/crawling/models.py +0 -88
  48. hyw_core/definitions.py +0 -166
  49. hyw_core/image_cache.py +0 -274
  50. hyw_core/pipeline.py +0 -502
  51. hyw_core/search.py +0 -169
  52. hyw_core/stages/__init__.py +0 -21
  53. hyw_core/stages/base.py +0 -95
  54. hyw_core/stages/summary.py +0 -218
  55. {entari_plugin_hyw-4.0.0rc17.dist-info → entari_plugin_hyw-4.0.0rc19.dist-info}/WHEEL +0 -0
@@ -1,437 +0,0 @@
1
- """
2
- Page Completeness Checker
3
-
4
- Multi-signal page readiness detection with focus on image loading guarantees.
5
- Implements Crawl4AI's wait_for_images and networkidle concepts.
6
- """
7
-
8
- import time
9
- from typing import Any, Optional, Dict
10
- from dataclasses import dataclass
11
- from loguru import logger
12
-
13
- from .models import CrawlConfig, CompletenessResult
14
-
15
-
16
- # CSP-compliant JavaScript for image loading verification
17
- # Based on Crawl4AI's wait_for_images logic
18
- IMAGE_CHECK_JS = """
19
- (() => {
20
- const results = {
21
- total: 0,
22
- loaded: 0,
23
- failed: 0,
24
- placeholders: 0,
25
- details: []
26
- };
27
-
28
- const imgs = Array.from(document.querySelectorAll('img'));
29
- const viewportHeight = window.innerHeight;
30
- const minSize = %d; // Injected from Python
31
-
32
- for (const img of imgs) {
33
- // Skip tiny images (icons, tracking pixels)
34
- if (img.clientWidth < minSize && img.clientHeight < minSize) {
35
- continue;
36
- }
37
-
38
- // Skip images outside viewport (unless scan_full_page)
39
- const rect = img.getBoundingClientRect();
40
- const inViewport = rect.top < viewportHeight * 2 && rect.bottom > -viewportHeight;
41
-
42
- if (!inViewport && !%s) { // scan_full_page injected
43
- continue;
44
- }
45
-
46
- results.total++;
47
-
48
- const src = img.src || img.getAttribute('data-src') || '';
49
- const dataSrc = img.getAttribute('data-src') || img.getAttribute('data-original') ||
50
- img.getAttribute('data-lazy-src') || img.getAttribute('data-lazy') || '';
51
- const className = (typeof img.className === 'string' ? img.className : '').toLowerCase();
52
- const loadingAttr = img.getAttribute('loading') || '';
53
-
54
- // Enhanced placeholder detection for blurred preview images (like mcmod.cn)
55
- const isPlaceholder = (
56
- // 1. data-src exists but not yet loaded into src
57
- (dataSrc && img.src !== dataSrc) ||
58
- // 2. Natural size much smaller than display size (blurred placeholder)
59
- (img.naturalWidth < 50 && img.clientWidth > 100) ||
60
- (img.naturalWidth < 100 && img.clientWidth > 200 && img.naturalWidth * 4 < img.clientWidth) ||
61
- // 3. Common placeholder keywords in src
62
- src.includes('placeholder') ||
63
- src.includes('loading') ||
64
- src.includes('blank') ||
65
- // 4. SVG placeholder or 1x1 tracking pixel
66
- src.startsWith('data:image/svg+xml') ||
67
- (img.naturalWidth === 1 && img.naturalHeight === 1) ||
68
- // 5. Lazy-loading class indicators (common patterns)
69
- className.includes('lazy') ||
70
- className.includes('lazyload') ||
71
- className.includes('lozad') ||
72
- className.includes('b-lazy') ||
73
- // 6. Blur indicators (common for LQIP - Low Quality Image Placeholder)
74
- className.includes('blur') ||
75
- src.includes('blur') ||
76
- src.includes('thumb') ||
77
- src.includes('thumbnail') ||
78
- // 7. loading="lazy" + not complete (browser native lazy loading)
79
- (loadingAttr === 'lazy' && !img.complete) ||
80
- // 8. CSS blur filter applied (visual blurring)
81
- (window.getComputedStyle(img).filter || '').includes('blur')
82
- );
83
-
84
- if (isPlaceholder) {
85
- results.placeholders++;
86
- results.details.push({
87
- src: src.substring(0, 100),
88
- status: 'placeholder',
89
- natural: [img.naturalWidth, img.naturalHeight],
90
- display: [img.clientWidth, img.clientHeight]
91
- });
92
- continue;
93
- }
94
-
95
- // Check if fully loaded
96
- if (img.complete && img.naturalWidth > 0 && img.naturalHeight > 0) {
97
- results.loaded++;
98
- results.details.push({
99
- src: src.substring(0, 100),
100
- status: 'loaded',
101
- natural: [img.naturalWidth, img.naturalHeight]
102
- });
103
- } else {
104
- results.failed++;
105
- results.details.push({
106
- src: src.substring(0, 100),
107
- status: 'pending',
108
- complete: img.complete,
109
- natural: [img.naturalWidth, img.naturalHeight]
110
- });
111
- }
112
- }
113
-
114
- return results;
115
- })()
116
- """
117
-
118
- # JavaScript for height stability check
119
- HEIGHT_CHECK_JS = """
120
- (() => {
121
- return {
122
- height: Math.max(
123
- document.body.scrollHeight || 0,
124
- document.documentElement.scrollHeight || 0,
125
- document.body.offsetHeight || 0,
126
- document.documentElement.offsetHeight || 0
127
- ),
128
- ready: document.readyState === 'complete'
129
- };
130
- })()
131
- """
132
-
133
-
134
- class CompletenessChecker:
135
- """
136
- Multi-signal page completeness verification.
137
-
138
- Signals checked:
139
- 1. Image Loading (naturalWidth > 0, not placeholder)
140
- 2. Height Stability (no layout shifts)
141
- 3. DOM Ready State
142
-
143
- Usage:
144
- checker = CompletenessChecker(CrawlConfig())
145
- result = checker.check(tab)
146
-
147
- # Or wait until complete
148
- result = checker.wait_for_complete(tab)
149
- """
150
-
151
- def __init__(self, config: Optional[CrawlConfig] = None):
152
- self._config = config or CrawlConfig()
153
-
154
- def check(self, tab: Any) -> CompletenessResult:
155
- """
156
- Perform a single completeness check.
157
-
158
- Args:
159
- tab: DrissionPage tab object
160
-
161
- Returns:
162
- CompletenessResult with all signals
163
- """
164
- start = time.time()
165
-
166
- # Check images
167
- img_result = self._check_images(tab)
168
-
169
- # Check height
170
- height_result = self._check_height(tab)
171
-
172
- # Determine overall completeness
173
- # Complete if: all real images loaded AND height is stable
174
- total_pending = img_result.get('failed', 0) + img_result.get('placeholders', 0)
175
- all_images_loaded = total_pending == 0 or img_result.get('total', 0) == 0
176
-
177
- return CompletenessResult(
178
- is_complete=all_images_loaded and height_result.get('ready', False),
179
- total_images=img_result.get('total', 0),
180
- loaded_images=img_result.get('loaded', 0),
181
- failed_images=img_result.get('failed', 0),
182
- placeholder_images=img_result.get('placeholders', 0),
183
- height=height_result.get('height', 0),
184
- height_stable=True, # Single check can't determine stability
185
- network_idle=True, # TODO: network monitoring
186
- check_duration=time.time() - start
187
- )
188
-
189
- def wait_for_complete(
190
- self,
191
- tab: Any,
192
- timeout: Optional[float] = None
193
- ) -> CompletenessResult:
194
- """
195
- Wait for page to be fully loaded with image guarantees.
196
-
197
- Uses multi-signal approach:
198
- 1. Poll image loading status
199
- 2. Track height stability
200
- 3. Respect timeout
201
-
202
- Args:
203
- tab: DrissionPage tab object
204
- timeout: Max wait time (default from config)
205
-
206
- Returns:
207
- CompletenessResult (may not be complete if timeout)
208
- """
209
- timeout = timeout or self._config.image_load_timeout
210
- start = time.time()
211
-
212
- height_history = []
213
- stable_count = 0
214
- last_result = None
215
-
216
- logger.debug(f"CompletenessChecker: Starting wait (timeout={timeout}s)")
217
-
218
- while time.time() - start < timeout:
219
- result = self.check(tab)
220
- last_result = result
221
-
222
- # Track height stability
223
- height_history.append(result.height)
224
- if len(height_history) > self._config.height_stability_checks:
225
- height_history.pop(0)
226
-
227
- # Check if height is stable
228
- height_stable = False
229
- if len(height_history) >= self._config.height_stability_checks:
230
- max_h = max(height_history)
231
- min_h = min(height_history)
232
- height_stable = (max_h - min_h) <= self._config.height_stability_threshold
233
-
234
- # Log progress
235
- elapsed = time.time() - start
236
- logger.debug(
237
- f"CompletenessChecker: [{elapsed:.1f}s] "
238
- f"images={result.loaded_images}/{result.total_images} "
239
- f"pending={result.failed_images} "
240
- f"placeholders={result.placeholder_images} "
241
- f"height={result.height} stable={height_stable}"
242
- )
243
-
244
- # Check if all images loaded AND height stable
245
- all_loaded = (
246
- result.failed_images == 0 and
247
- result.placeholder_images == 0
248
- )
249
-
250
- if all_loaded and height_stable:
251
- stable_count += 1
252
- if stable_count >= self._config.image_stability_checks:
253
- logger.info(
254
- f"CompletenessChecker: Page complete! "
255
- f"{result.loaded_images} images loaded, "
256
- f"height={result.height}, "
257
- f"took {elapsed:.2f}s"
258
- )
259
- return CompletenessResult(
260
- is_complete=True,
261
- total_images=result.total_images,
262
- loaded_images=result.loaded_images,
263
- failed_images=0,
264
- placeholder_images=0,
265
- height=result.height,
266
- height_stable=True,
267
- network_idle=True,
268
- check_duration=elapsed
269
- )
270
- else:
271
- stable_count = 0
272
-
273
- # Wait before next check
274
- time.sleep(self._config.image_check_interval)
275
-
276
- # Timeout reached
277
- elapsed = time.time() - start
278
- logger.warning(
279
- f"CompletenessChecker: Timeout after {elapsed:.1f}s! "
280
- f"images={last_result.loaded_images}/{last_result.total_images} "
281
- f"pending={last_result.failed_images}"
282
- )
283
-
284
- # Return last result with is_complete=False
285
- if last_result:
286
- return CompletenessResult(
287
- is_complete=False,
288
- total_images=last_result.total_images,
289
- loaded_images=last_result.loaded_images,
290
- failed_images=last_result.failed_images,
291
- placeholder_images=last_result.placeholder_images,
292
- height=last_result.height,
293
- height_stable=len(set(height_history)) == 1,
294
- network_idle=True,
295
- check_duration=elapsed
296
- )
297
-
298
- return CompletenessResult(
299
- is_complete=False,
300
- total_images=0, loaded_images=0, failed_images=0, placeholder_images=0,
301
- height=0, height_stable=False, network_idle=False,
302
- check_duration=elapsed
303
- )
304
-
305
- def _check_images(self, tab: Any) -> Dict[str, Any]:
306
- """Run image check JavaScript and return results."""
307
- try:
308
- js = IMAGE_CHECK_JS % (
309
- self._config.min_image_size,
310
- 'true' if self._config.scan_full_page else 'false'
311
- )
312
- result = tab.run_js(js, as_expr=True)
313
- return result or {'total': 0, 'loaded': 0, 'failed': 0, 'placeholders': 0}
314
- except Exception as e:
315
- logger.warning(f"CompletenessChecker: Image check failed: {e}")
316
- return {'total': 0, 'loaded': 0, 'failed': 0, 'placeholders': 0}
317
-
318
- def _check_height(self, tab: Any) -> Dict[str, Any]:
319
- """Run height check JavaScript and return results."""
320
- try:
321
- result = tab.run_js(HEIGHT_CHECK_JS, as_expr=True)
322
- return result or {'height': 0, 'ready': False}
323
- except Exception as e:
324
- logger.warning(f"CompletenessChecker: Height check failed: {e}")
325
- return {'height': 0, 'ready': False}
326
-
327
-
328
- def trigger_lazy_load(tab: Any, config: Optional[CrawlConfig] = None) -> None:
329
- """
330
- Scroll through page to trigger lazy-loaded images.
331
-
332
- Implements Crawl4AI's scan_full_page behavior.
333
- Strategy: Fast scroll with minimal delay (0.2s) per step to trigger network requests,
334
- then wait at the bottom for all images to settle.
335
-
336
- Args:
337
- tab: DrissionPage tab object
338
- config: Crawl configuration
339
- """
340
- config = config or CrawlConfig()
341
- if not config.scan_full_page:
342
- return
343
-
344
- start = time.time()
345
- current_pos = 0
346
-
347
- logger.info(f"CompletenessChecker: Starting lazy load scroll (fast scroll + final wait)")
348
-
349
- try:
350
- max_scroll_steps = 100
351
- step_count = 0
352
-
353
- # 1. Fast Scroll Phase
354
- while step_count < max_scroll_steps:
355
- step_count += 1
356
- current_pos += config.scroll_step
357
- tab.run_js(f"window.scrollTo(0, {current_pos});")
358
-
359
- # Simple fixed delay per step (0.2s) as requested
360
- time.sleep(0.2)
361
-
362
- # Check if reached bottom
363
- height = tab.run_js("""
364
- Math.max(
365
- document.body.scrollHeight || 0,
366
- document.documentElement.scrollHeight || 0
367
- )
368
- """, as_expr=True) or 0
369
-
370
- if current_pos >= height:
371
- logger.debug(f"CompletenessChecker: Reached bottom at position {current_pos}")
372
- break
373
-
374
- # 2. Wait Phase at Bottom (Wait for images to settle - reduced timeout)
375
- logger.debug("CompletenessChecker: Reached bottom, waiting for images to settle (max 2s)...")
376
- wait_start = time.time()
377
- max_wait_at_bottom = 2.0 # Reduced from 8s to 2s - scroll usually triggers loading quickly
378
-
379
- # Quick check: just verify images are not placeholders (simplified check)
380
- check_all_images_js = """
381
- (() => {
382
- const imgs = Array.from(document.querySelectorAll('img'));
383
- if (imgs.length === 0) return true;
384
-
385
- // Quick check: count non-placeholder images that are loaded
386
- let loaded_count = 0;
387
- let total_count = 0;
388
-
389
- for (const img of imgs) {
390
- // Skip tiny images
391
- if (img.clientWidth < 50 && img.clientHeight < 50) continue;
392
-
393
- total_count++;
394
- const dataSrc = img.getAttribute('data-src') || img.getAttribute('data-original') || '';
395
- const src = img.src || '';
396
-
397
- // Check if placeholder
398
- const isPlaceholder = (
399
- (dataSrc && img.src !== dataSrc) ||
400
- (img.naturalWidth < 50 && img.clientWidth > 100) ||
401
- src.includes('placeholder') || src.includes('loading')
402
- );
403
-
404
- // If not placeholder and loaded, count it
405
- if (!isPlaceholder && img.complete && img.naturalWidth > 0) {
406
- loaded_count++;
407
- }
408
- }
409
-
410
- // If most images are loaded (80%+), consider it done
411
- return total_count === 0 || (loaded_count / total_count) >= 0.8;
412
- })()
413
- """
414
-
415
- # Quick check loop with shorter interval
416
- check_count = 0
417
- max_checks = 4 # 2s / 0.5s = 4 checks max
418
- while check_count < max_checks:
419
- try:
420
- all_loaded = tab.run_js(check_all_images_js, as_expr=True)
421
- if all_loaded:
422
- elapsed_wait = time.time() - wait_start
423
- logger.debug(f"CompletenessChecker: Images settled at bottom in {elapsed_wait:.1f}s")
424
- break
425
- except:
426
- pass
427
- time.sleep(0.5)
428
- check_count += 1
429
-
430
- # Scroll back to top
431
- tab.run_js("window.scrollTo(0, 0);")
432
-
433
- elapsed = time.time() - start
434
- logger.info(f"CompletenessChecker: Lazy load scroll complete - {step_count} steps in {elapsed:.1f}s")
435
-
436
- except Exception as e:
437
- logger.warning(f"CompletenessChecker: Lazy load scroll failed: {e}")
@@ -1,88 +0,0 @@
1
- """
2
- Crawling Data Models
3
-
4
- Core data structures for the intelligent crawling system.
5
- """
6
-
7
- from dataclasses import dataclass, field
8
- from typing import List, Optional, Set
9
-
10
-
11
- @dataclass
12
- class CrawlConfig:
13
- """
14
- Crawling configuration with Crawl4AI-style parameters.
15
-
16
- Focuses on page completeness and image loading guarantees.
17
- """
18
- # Wait Strategy (Priority: Image Loading)
19
- wait_for_images: bool = True
20
- wait_until: str = "networkidle" # domcontentloaded | networkidle | load
21
- delay_before_return: float = 0.1
22
- page_timeout: float = 30.0
23
-
24
- # Image Loading Specific
25
- image_load_timeout: float = 10.0 # Max wait for images
26
- image_stability_checks: int = 3 # Consecutive stable checks needed
27
- image_check_interval: float = 0.2 # Interval between checks
28
- min_image_size: int = 50 # Ignore images smaller than this
29
-
30
- # Scroll for lazy loading
31
- scan_full_page: bool = True
32
- scroll_step: int = 800
33
- scroll_delay: float = 0.5
34
- scroll_timeout: float = 15.0
35
-
36
- # Height Stability
37
- height_stability_checks: int = 3
38
- height_stability_threshold: int = 10 # pixels
39
-
40
- # Future: Adaptive Stop Logic
41
- confidence_threshold: float = 0.75
42
- min_gain_threshold: float = 0.1
43
- max_pages: int = 20
44
-
45
-
46
- @dataclass
47
- class CompletenessResult:
48
- """Result from completeness check."""
49
- is_complete: bool
50
- total_images: int
51
- loaded_images: int
52
- failed_images: int
53
- placeholder_images: int
54
- height: int
55
- height_stable: bool
56
- network_idle: bool
57
- check_duration: float
58
-
59
- @property
60
- def image_load_ratio(self) -> float:
61
- if self.total_images == 0:
62
- return 1.0
63
- return self.loaded_images / self.total_images
64
-
65
-
66
- @dataclass
67
- class PageResult:
68
- """Result from fetching a single page."""
69
- url: str
70
- final_url: str
71
- title: str
72
- html: str
73
- content: str # Extracted markdown
74
- images: List[str] = field(default_factory=list) # base64 images
75
- screenshot: Optional[str] = None
76
-
77
- # Quality Signals
78
- load_time: float = 0.0
79
- completeness: Optional[CompletenessResult] = None
80
-
81
- # Error handling
82
- error: Optional[str] = None
83
-
84
- @property
85
- def is_complete(self) -> bool:
86
- if self.completeness is None:
87
- return False
88
- return self.completeness.is_complete
hyw_core/definitions.py DELETED
@@ -1,166 +0,0 @@
1
- """
2
- Centralized Definitions
3
-
4
- All global prompts and tool definitions for the pipeline stages.
5
- """
6
-
7
- from typing import Dict, Any
8
-
9
- # Used by SummaryStage - language appended at runtime
10
- SUMMARY_REPORT_SP = """# 你是一个总结助手 (Agent), 你的职责是基于搜索工具给出的信息,回答用户的问题或解释用户问题中的关键词。
11
- ## 核心原则
12
- 最小限度使用自身知识, 尽可能使用 web_tool 获取信息.
13
- 遇到计算、js代码、算法任务, 积极使用 js_executor 工具完成计算任务.
14
-
15
- ## 抓重点原则
16
- 搜索结果中往往混杂大量信息,你需要:
17
- - 主动识别与用户问题最匹配的结果,大胆引用,不要因为信息混在众多结果中就忽略它
18
- - 即使只有一条结果明确匹配,也要优先使用该结果,而非泛泛而谈
19
-
20
- ## 图文融合原则
21
- 当用户同时提供图片和文字时:
22
- - 先理解用户真正想知道什么(识图?查资料?对比分析?)
23
- - 图片是"锚点",搜索是"扩展"——围绕图片内容组织搜索信息
24
- - 行文自然流畅,让图片分析和搜索结果无缝衔接
25
- - 例如:"图中展示的是 XX(识别结果),这是一款...(搜索扩展)"
26
-
27
- ## 工具使用指南
28
- - 适当时候调用 `refuse_answer`
29
-
30
- ## 回答格式
31
- - 字数: 尽可能少, 有多少信息写多少信息, 减少无意义, 足够回答用户问题或解释关键词所需的文字即可
32
- - `# ` 大标题约 8-10 个字
33
- - 必要时可以辅助以 <summary>...</summary> 为格式, 不超过 100 字的概括
34
- - 二级标题 + markdown 正文
35
- - 正文使用 [1] 格式引用信息来源. 如有 js 计算结果, 积极引用. 无需写出源, 系统自动渲染.
36
- """
37
-
38
- # Used by SummaryStage for multimodal input with images
39
- IMAGE_CONTEXT_TEMPLATE = """[System: 图文融合分析指南]
40
- 用户提供了 {image_count} 张图片,请根据问题类型智能调整回答策略
41
- 用户问题:"""
42
-
43
- def get_refuse_answer_tool() -> Dict[str, Any]:
44
- """Tool for refusing to answer inappropriate content."""
45
- return {
46
- "type": "function",
47
- "function": {
48
- "name": "refuse_answer",
49
- "description": "违规内容拒绝回答,内容涉及隐喻政治事件、任务、现役国家领导人、r18+、r18g(但不包含正常galgame、科普等)",
50
- "parameters": {
51
- "type": "object",
52
- "properties": {
53
- "reason": {"type": "string", "description": "拒绝回答的原因(展示给用户)"},
54
- },
55
- "required": ["reason"],
56
- },
57
- },
58
- }
59
-
60
-
61
- def get_web_tool() -> Dict[str, Any]:
62
- """Tool for web search with filter syntax and URL screenshot."""
63
- return {
64
- "type": "function",
65
- "function": {
66
- "name": "web_tool",
67
- "description": """搜索网页或截图指定URL。用于获取duckduckgo搜索结果或网页内容
68
- ## 使用方式
69
- 网页搜索(大部分问题优先使用此方法):
70
- 直接传入搜索词如 "python async" 会返回搜索结果列表 搜索词尽可能少且精准, 以利于传统搜索引擎检索
71
-
72
- 网页截图(当用户明确要求截图时使用):
73
- 传入完整URL如 "https://example.com" 会直接截图该页面
74
- """,
75
- "parameters": {
76
- "type": "object",
77
- "properties": {
78
- "query": {
79
- "type": "string",
80
- "description": "搜索查询或网页获取"
81
- }
82
- },
83
- "required": ["query"]
84
- }
85
- }
86
- }
87
-
88
-
89
- def get_js_tool() -> Dict[str, Any]:
90
- """Tool for executing JavaScript in the browser."""
91
- return {
92
- "type": "function",
93
- "function": {
94
- "name": "js_executor",
95
- "description": """执行JavaScript代码并返回结果。
96
- 代码将在当前浏览器页面的上下文中执行。
97
- 注意:
98
- 1. 必须使用 `return` 语句返回结果,或者直接作为表达式(如 `1+1`)。
99
- 2. 严禁使用 `console.log`,其输出无法被捕获,会导致返回 None。
100
- """,
101
- "parameters": {
102
- "type": "object",
103
- "properties": {
104
- "script": {
105
- "type": "string",
106
- "description": "要执行的JavaScript代码字符串"
107
- }
108
- },
109
- "required": ["script"]
110
- }
111
- }
112
- }
113
-
114
-
115
- # =============================================================================
116
- # AGENT PROMPTS
117
- # =============================================================================
118
-
119
- AGENT_SYSTEM_PROMPT = """# 你是一个智能助手 (Agent), 你的职责是使用 `web_tool` 工具来帮助用户搜索网页或截图URL, 同时完成用户分配给你的任务.
120
- ## 任务
121
- 理解用户意图分配给你的任务.
122
- 如果用户没有明确分配任务, 则默认任务为解释用户问题中的关键词.
123
- 分辨用户消息的语意, 提取出用户最想了解的核心内容, 作为任务的核心.
124
-
125
- ## 核心原则
126
- 最小限度使用自身知识, 尽可能使用 web_tool 获取信息.
127
-
128
- ## 工具使用指南
129
- - 并行调用工具
130
- - 网页搜索: 可以同时调用3次, 其中URL截图消耗较大, 最多同时调用1个
131
- - 积极使用 web_tool 获取信息
132
- - 搜索时, 关键词保证单一、简单、指向准确、利于传统搜索引擎, 通常只搜索1个词或短语.
133
- - 建议搜索: "minecraft create"; 不搜索 "create 是什么 百科"
134
- - 建议搜索: "opnecode"; 不搜索 "open code 怎么配置"
135
- - 建议搜索: "Bypass permissions"; 不搜索 "Bypass permissions 软件 选项"
136
- - 本搜索不支持高级搜索、不支持引号、不支持减号等复杂语法
137
- - 不要尝试通过搜索引擎描述如何尼尔反推出角色、任务、地点, 搜索引擎没有这个能力
138
- - 禁止搜索可能导致一切潜在推销广告的内容, 不出现“是什么”、“怎么办”等容易产生广告的内容
139
- - 禁止搜索任何敏感内容(galgame之类的除外), 禁止搜索政治、成人色情、暴力等内容
140
- - 获取页面截图时, 只挑选官方性较强的 wiki、官方网站、资源站等等, 不使用第三方转载新闻网站.
141
- - 最多可调用3轮工具, 但请适度保持快速, 3轮之后必须给出最终回答.
142
- - 适当时候调用 `refuse_answer`
143
- - 对于具体任务, 如果是转述、格式化、翻译等, 请直接给出最终回答, 不再调用工具
144
- - 遇到计算、js代码、算法任务, 积极使用 js_executor 工具完成计算任务.
145
-
146
- ## 抓重点原则
147
- 搜索结果中往往混杂大量信息,你需要:
148
- - 主动识别与用户问题最匹配的结果,大胆引用,不要因为信息混在众多结果中就忽略它
149
- - 即使只有一条结果明确匹配,也要优先使用该结果,而非泛泛而谈
150
-
151
- ## 图文融合原则
152
- 当用户同时提供图片和文字时:
153
- - 先理解用户真正想知道什么(识图?查资料?对比分析?)
154
- - 图片是"锚点",搜索是"扩展"——围绕图片内容组织搜索信息
155
- - 行文自然流畅,让图片分析和搜索结果无缝衔接
156
- - 例如:"图中展示的是 XX(识别结果),这是一款...(搜索扩展)"
157
-
158
- ## 回答格式
159
- - 字数: 尽可能少, 有多少获取到的信息、需要解释的内容, 就写多少, 减少无意义输出, 足够完成用户分配给你的任务 / 解释关键词即可.
160
- - `# ` 大标题约 8-10 个字
161
- - 必要时可以辅助以 <summary>...</summary> 为格式, 不超过 100 字的概括
162
- - 二级标题 + markdown 正文
163
- - 正文使用 [1] 格式引用信息来源. 如有 js 计算结果, 积极引用. 无需写出源, 系统自动渲染.
164
- """
165
-
166
-