entari-plugin-hyw 0.3.5__py3-none-any.whl → 4.0.0rc14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of entari-plugin-hyw might be problematic. Click here for more details.

Files changed (78) hide show
  1. entari_plugin_hyw/Untitled-1 +1865 -0
  2. entari_plugin_hyw/__init__.py +979 -116
  3. entari_plugin_hyw/filters.py +83 -0
  4. entari_plugin_hyw/history.py +251 -0
  5. entari_plugin_hyw/misc.py +214 -0
  6. entari_plugin_hyw/search_cache.py +154 -0
  7. entari_plugin_hyw-4.0.0rc14.dist-info/METADATA +118 -0
  8. entari_plugin_hyw-4.0.0rc14.dist-info/RECORD +72 -0
  9. {entari_plugin_hyw-0.3.5.dist-info → entari_plugin_hyw-4.0.0rc14.dist-info}/WHEEL +1 -1
  10. {entari_plugin_hyw-0.3.5.dist-info → entari_plugin_hyw-4.0.0rc14.dist-info}/top_level.txt +1 -0
  11. hyw_core/__init__.py +94 -0
  12. hyw_core/agent.py +768 -0
  13. hyw_core/browser_control/__init__.py +63 -0
  14. hyw_core/browser_control/assets/card-dist/index.html +425 -0
  15. hyw_core/browser_control/assets/card-dist/logos/anthropic.svg +1 -0
  16. hyw_core/browser_control/assets/card-dist/logos/cerebras.svg +9 -0
  17. hyw_core/browser_control/assets/card-dist/logos/deepseek.png +0 -0
  18. hyw_core/browser_control/assets/card-dist/logos/gemini.svg +1 -0
  19. hyw_core/browser_control/assets/card-dist/logos/google.svg +1 -0
  20. hyw_core/browser_control/assets/card-dist/logos/grok.png +0 -0
  21. hyw_core/browser_control/assets/card-dist/logos/huggingface.png +0 -0
  22. hyw_core/browser_control/assets/card-dist/logos/microsoft.svg +15 -0
  23. hyw_core/browser_control/assets/card-dist/logos/minimax.png +0 -0
  24. hyw_core/browser_control/assets/card-dist/logos/mistral.png +0 -0
  25. hyw_core/browser_control/assets/card-dist/logos/nvida.png +0 -0
  26. hyw_core/browser_control/assets/card-dist/logos/openai.svg +1 -0
  27. hyw_core/browser_control/assets/card-dist/logos/openrouter.png +0 -0
  28. hyw_core/browser_control/assets/card-dist/logos/perplexity.svg +24 -0
  29. hyw_core/browser_control/assets/card-dist/logos/qwen.png +0 -0
  30. hyw_core/browser_control/assets/card-dist/logos/xai.png +0 -0
  31. hyw_core/browser_control/assets/card-dist/logos/xiaomi.png +0 -0
  32. hyw_core/browser_control/assets/card-dist/logos/zai.png +0 -0
  33. hyw_core/browser_control/assets/card-dist/vite.svg +1 -0
  34. hyw_core/browser_control/assets/index.html +5691 -0
  35. hyw_core/browser_control/assets/logos/anthropic.svg +1 -0
  36. hyw_core/browser_control/assets/logos/cerebras.svg +9 -0
  37. hyw_core/browser_control/assets/logos/deepseek.png +0 -0
  38. hyw_core/browser_control/assets/logos/gemini.svg +1 -0
  39. hyw_core/browser_control/assets/logos/google.svg +1 -0
  40. hyw_core/browser_control/assets/logos/grok.png +0 -0
  41. hyw_core/browser_control/assets/logos/huggingface.png +0 -0
  42. hyw_core/browser_control/assets/logos/microsoft.svg +15 -0
  43. hyw_core/browser_control/assets/logos/minimax.png +0 -0
  44. hyw_core/browser_control/assets/logos/mistral.png +0 -0
  45. hyw_core/browser_control/assets/logos/nvida.png +0 -0
  46. hyw_core/browser_control/assets/logos/openai.svg +1 -0
  47. hyw_core/browser_control/assets/logos/openrouter.png +0 -0
  48. hyw_core/browser_control/assets/logos/perplexity.svg +24 -0
  49. hyw_core/browser_control/assets/logos/qwen.png +0 -0
  50. hyw_core/browser_control/assets/logos/xai.png +0 -0
  51. hyw_core/browser_control/assets/logos/xiaomi.png +0 -0
  52. hyw_core/browser_control/assets/logos/zai.png +0 -0
  53. hyw_core/browser_control/engines/__init__.py +15 -0
  54. hyw_core/browser_control/engines/base.py +13 -0
  55. hyw_core/browser_control/engines/default.py +166 -0
  56. hyw_core/browser_control/engines/duckduckgo.py +171 -0
  57. hyw_core/browser_control/landing.html +172 -0
  58. hyw_core/browser_control/manager.py +173 -0
  59. hyw_core/browser_control/renderer.py +446 -0
  60. hyw_core/browser_control/service.py +940 -0
  61. hyw_core/config.py +154 -0
  62. hyw_core/core.py +462 -0
  63. hyw_core/crawling/__init__.py +18 -0
  64. hyw_core/crawling/completeness.py +437 -0
  65. hyw_core/crawling/models.py +88 -0
  66. hyw_core/definitions.py +104 -0
  67. hyw_core/image_cache.py +274 -0
  68. hyw_core/pipeline.py +502 -0
  69. hyw_core/search.py +171 -0
  70. hyw_core/stages/__init__.py +21 -0
  71. hyw_core/stages/base.py +95 -0
  72. hyw_core/stages/summary.py +191 -0
  73. entari_plugin_hyw/agent.py +0 -419
  74. entari_plugin_hyw/compressor.py +0 -59
  75. entari_plugin_hyw/tools.py +0 -236
  76. entari_plugin_hyw/vision.py +0 -35
  77. entari_plugin_hyw-0.3.5.dist-info/METADATA +0 -112
  78. entari_plugin_hyw-0.3.5.dist-info/RECORD +0 -9
@@ -0,0 +1,446 @@
1
+ """
2
+ Vue-based Card Renderer (DrissionPage-based)
3
+
4
+ Renders content to image using the shared DrissionPage browser.
5
+ Wraps synchronous DrissionPage operations in a thread pool.
6
+ """
7
+
8
+ import json
9
+ import asyncio
10
+ from pathlib import Path
11
+ from typing import List, Dict, Any, Optional
12
+ from concurrent.futures import ThreadPoolExecutor
13
+
14
+ from loguru import logger
15
+ from .manager import SharedBrowserManager, get_shared_browser_manager
16
+
17
+
18
+ class ContentRenderer:
19
+ """Renderer using DrissionPage with thread pool for async interface."""
20
+
21
+ def __init__(self, template_path: str = None, auto_start: bool = True, headless: bool = True):
22
+ self.headless = headless
23
+
24
+ if template_path is None:
25
+ current_dir = Path(__file__).parent
26
+ # Use card-dist which has properly inlined JS (viteSingleFile)
27
+ template_path = current_dir / "assets" / "card-dist" / "index.html"
28
+
29
+ self.template_path = Path(template_path)
30
+ if not self.template_path.exists():
31
+ raise FileNotFoundError(f"Vue template not found: {self.template_path}")
32
+
33
+ self.template_content = self.template_path.read_text(encoding="utf-8")
34
+ logger.info(f"ContentRenderer: loaded Vue template ({len(self.template_content)} bytes)")
35
+
36
+ self._manager = None
37
+ self._executor = ThreadPoolExecutor(max_workers=10) # Enough for batch crawls
38
+ self._render_tab = None
39
+
40
+ if auto_start:
41
+ self._ensure_manager()
42
+
43
+ def _ensure_manager(self):
44
+ """Ensure shared browser manager exists."""
45
+ if not self._manager:
46
+ self._manager = get_shared_browser_manager(headless=self.headless)
47
+
48
+ async def start(self, timeout: int = 6000):
49
+ """Initialize renderer manager (async wrapper)."""
50
+ loop = asyncio.get_running_loop()
51
+ await loop.run_in_executor(self._executor, self._ensure_manager)
52
+
53
+ async def prepare_tab(self) -> str:
54
+ """Async wrapper to prepare a new render tab."""
55
+ loop = asyncio.get_running_loop()
56
+ return await loop.run_in_executor(self._executor, self._prepare_tab_sync)
57
+
58
+ def _wait_for_render_finished(self, tab, timeout: float = 12.0, context: str = ""):
59
+ """Wait for window.RENDER_FINISHED to be true in the tab."""
60
+ import time as pytime
61
+ start = pytime.time()
62
+
63
+ # Check initial state
64
+ initial_state = tab.run_js("return window.RENDER_FINISHED")
65
+ logger.debug(f"ContentRenderer[{context}]: Starting wait, initial RENDER_FINISHED={initial_state}")
66
+
67
+ # If already true, it's stale from previous render - need to wait for JS to reset it
68
+ if initial_state:
69
+ logger.debug(f"ContentRenderer[{context}]: RENDER_FINISHED was true, waiting for reset...")
70
+ # Wait for JS to reset it to false (updateRenderData sets it to false)
71
+ reset_start = pytime.time()
72
+ while pytime.time() - reset_start < 1.0: # 1s max to wait for reset
73
+ is_reset = tab.run_js("return window.RENDER_FINISHED")
74
+ if not is_reset:
75
+ logger.debug(f"ContentRenderer[{context}]: RENDER_FINISHED reset to false")
76
+ break
77
+ pytime.sleep(0.05)
78
+ else:
79
+ logger.warning(f"ContentRenderer[{context}]: RENDER_FINISHED not reset, force resetting via JS")
80
+ tab.run_js("window.RENDER_FINISHED = false")
81
+
82
+ # Now wait for it to become true
83
+ poll_count = 0
84
+ while pytime.time() - start < timeout:
85
+ is_finished = tab.run_js("return window.RENDER_FINISHED")
86
+ poll_count += 1
87
+ if is_finished:
88
+ elapsed = pytime.time() - start
89
+ logger.debug(f"ContentRenderer[{context}]: RENDER_FINISHED=true after {elapsed:.2f}s ({poll_count} polls)")
90
+ return True
91
+ pytime.sleep(0.1) # Poll every 100ms
92
+
93
+ elapsed = pytime.time() - start
94
+ logger.warning(f"ContentRenderer[{context}]: Wait for RENDER_FINISHED timed out after {elapsed:.2f}s ({poll_count} polls)")
95
+ return False
96
+
97
+ def _prepare_tab_sync(self) -> str:
98
+ """Create and warm up a new tab, return its ID."""
99
+ import time as pytimeout
100
+ start = pytimeout.time()
101
+ self._ensure_manager()
102
+ try:
103
+ tab = self._manager.new_tab(self.template_path.as_uri())
104
+ tab_id = tab.tab_id
105
+
106
+ # Wait for app to mount instead of fixed 1s
107
+ tab.ele('#app', timeout=5)
108
+
109
+ # Pre-warm with data to trigger Vue render
110
+ warmup_data = {
111
+ "markdown": "# Ready",
112
+ "total_time": 0,
113
+ "stages": [],
114
+ "references": [],
115
+ "stats": {},
116
+ "theme_color": "#ef4444",
117
+ }
118
+
119
+ logger.debug(f"ContentRenderer: Calling warmup updateRenderData for tab {tab_id}")
120
+ tab.run_js(f"window.updateRenderData({json.dumps(warmup_data)})")
121
+ self._wait_for_render_finished(tab, timeout=12.0, context=f"warmup:{tab_id}")
122
+
123
+ # Wait for main-container after warmup (Vue needs to render it)
124
+ tab.ele('#main-container', timeout=3)
125
+
126
+ elapsed = pytimeout.time() - start
127
+ logger.info(f"ContentRenderer: Prepared tab {tab_id} in {elapsed:.2f}s")
128
+ return tab_id
129
+ except Exception as e:
130
+ logger.error(f"ContentRenderer: Failed to prepare tab: {e}")
131
+ raise
132
+
133
+ async def render_pages_batch(
134
+ self,
135
+ pages: List[Dict[str, Any]],
136
+ theme_color: str = "#ef4444"
137
+ ) -> List[str]:
138
+ """
139
+ Render multiple page markdown contents to images concurrently.
140
+
141
+ Args:
142
+ pages: List of dicts with 'title', 'content', 'url' keys
143
+ theme_color: Theme color for rendering
144
+
145
+ Returns:
146
+ List of base64-encoded JPG images
147
+ """
148
+ if not pages:
149
+ return []
150
+
151
+ loop = asyncio.get_running_loop()
152
+
153
+ # Prepare tabs concurrently
154
+ logger.info(f"ContentRenderer: Preparing {len(pages)} tabs for batch render")
155
+ tab_tasks = [
156
+ loop.run_in_executor(self._executor, self._prepare_tab_sync)
157
+ for _ in pages
158
+ ]
159
+ tab_ids = await asyncio.gather(*tab_tasks, return_exceptions=True)
160
+
161
+ # Filter out failed tab preparations
162
+ valid_pairs = []
163
+ for i, (page, tab_id) in enumerate(zip(pages, tab_ids)):
164
+ if isinstance(tab_id, Exception):
165
+ logger.warning(f"ContentRenderer: Failed to prepare tab for page {i}: {tab_id}")
166
+ else:
167
+ valid_pairs.append((page, tab_id))
168
+
169
+ if not valid_pairs:
170
+ return []
171
+
172
+ # Render concurrently
173
+ render_tasks = [
174
+ loop.run_in_executor(
175
+ self._executor,
176
+ self._render_page_to_b64_sync,
177
+ page,
178
+ tab_id,
179
+ theme_color
180
+ )
181
+ for page, tab_id in valid_pairs
182
+ ]
183
+
184
+ results = await asyncio.gather(*render_tasks, return_exceptions=True)
185
+
186
+ # Process results
187
+ screenshots = []
188
+ for i, res in enumerate(results):
189
+ if isinstance(res, Exception):
190
+ logger.warning(f"ContentRenderer: Batch render error for page {i}: {res}")
191
+ screenshots.append(None)
192
+ else:
193
+ screenshots.append(res)
194
+
195
+ logger.info(f"ContentRenderer: Batch rendered {len([s for s in screenshots if s])} pages")
196
+ return screenshots
197
+
198
+ def _render_page_to_b64_sync(
199
+ self,
200
+ page_data: Dict[str, Any],
201
+ tab_id: str,
202
+ theme_color: str
203
+ ) -> Optional[str]:
204
+ """Render a single page's markdown to base64 image."""
205
+ tab = None
206
+ try:
207
+ self._ensure_manager()
208
+ browser_page = self._manager.page
209
+
210
+ try:
211
+ tab = browser_page.get_tab(tab_id)
212
+ except Exception:
213
+ return None
214
+
215
+ if not tab:
216
+ return None
217
+
218
+ # Build render data for this page
219
+ markdown = f"# {page_data.get('title', 'Page')}\n\n{page_data.get('content', '')}"
220
+
221
+ render_data = {
222
+ "markdown": markdown,
223
+ "total_time": 0,
224
+ "stages": [],
225
+ "references": [],
226
+ "page_references": [],
227
+ "image_references": [],
228
+ "stats": {},
229
+ "theme_color": theme_color,
230
+ }
231
+
232
+ # 1. Update Data & Wait for Finished flag
233
+ tab.run_js(f"window.updateRenderData({json.dumps(render_data)})")
234
+ self._wait_for_render_finished(tab, context=f"batch:{tab_id}")
235
+
236
+ # 2. Dynamic Resize
237
+ # Get actual content height to prevent clipping
238
+ scroll_height = tab.run_js('return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);')
239
+ viewport_height = int(scroll_height) + 200
240
+
241
+ tab.run_cdp('Emulation.setDeviceMetricsOverride',
242
+ width=1440, height=viewport_height, deviceScaleFactor=1, mobile=False
243
+ )
244
+
245
+ # 3. Hide Scrollbars (Now that viewport is large enough, overflow:hidden won't clip)
246
+ tab.run_js('document.documentElement.style.overflow = "hidden"')
247
+ tab.run_js('document.body.style.overflow = "hidden"')
248
+
249
+ # Use element's actual position and size
250
+ main_ele = tab.ele('#main-container', timeout=3)
251
+ if main_ele:
252
+ # Robustly hide scrollbars via CDP and Style Injection
253
+ SharedBrowserManager.hide_scrollbars(tab)
254
+
255
+ # Force root styles to eliminate gutter and ensure full width
256
+ tab.run_js('document.documentElement.style.overflow = "hidden";')
257
+ tab.run_js('document.body.style.overflow = "hidden";')
258
+ tab.run_js('document.documentElement.style.scrollbarGutter = "unset";')
259
+ tab.run_js('document.documentElement.style.width = "100%";')
260
+
261
+ orig_overflow = "auto" # just a placeholder, we rely on full refresh usually or don't care about restoring for single-purpose tabs
262
+
263
+ b64_img = main_ele.get_screenshot(as_base64='jpg')
264
+
265
+ # Restore not strictly needed for throwaway render tabs, but good practice
266
+ # tab.run_js(f'document.documentElement.style.overflow = "{orig_overflow}";')
267
+ try:
268
+ tab.set.scroll_bars(True)
269
+ except:
270
+ pass
271
+ return b64_img
272
+ else:
273
+ return tab.get_screenshot(as_base64='jpg', full_page=False)
274
+
275
+ except Exception as e:
276
+ logger.error(f"ContentRenderer: Failed to render page: {e}")
277
+ return None
278
+ finally:
279
+ if tab:
280
+ try:
281
+ tab.close()
282
+ except Exception:
283
+ pass
284
+
285
+
286
+ async def render(
287
+ self,
288
+ markdown_content: str,
289
+ output_path: str,
290
+ tab_id: Optional[str] = None,
291
+ stats: Dict[str, Any] = None,
292
+ references: List[Dict[str, Any]] = None,
293
+ page_references: List[Dict[str, Any]] = None,
294
+ image_references: List[Dict[str, Any]] = None,
295
+ stages_used: List[Dict[str, Any]] = None,
296
+ theme_color: str = "#ef4444",
297
+ **kwargs
298
+ ) -> bool:
299
+ """Render content to image using a specific (pre-warmed) tab or a temp one."""
300
+ loop = asyncio.get_running_loop()
301
+ return await loop.run_in_executor(
302
+ self._executor,
303
+ self._render_sync,
304
+ markdown_content,
305
+ output_path,
306
+ tab_id,
307
+ stats,
308
+ references,
309
+ page_references,
310
+ image_references,
311
+ stages_used,
312
+ theme_color
313
+ )
314
+
315
+ def _render_sync(
316
+ self,
317
+ markdown_content: str,
318
+ output_path: str,
319
+ tab_id: Optional[str],
320
+ stats: Dict[str, Any],
321
+ references: List[Dict[str, Any]],
322
+ page_references: List[Dict[str, Any]],
323
+ image_references: List[Dict[str, Any]],
324
+ stages_used: List[Dict[str, Any]],
325
+ theme_color: str
326
+ ) -> bool:
327
+ """Synchronous render implementation."""
328
+ tab = None
329
+
330
+ try:
331
+ self._ensure_manager()
332
+ page = self._manager.page
333
+
334
+ if tab_id:
335
+ try:
336
+ tab = page.get_tab(tab_id)
337
+ except Exception:
338
+ pass
339
+
340
+ if not tab:
341
+ logger.warning("ContentRenderer: Pre-warmed tab not found, creating new.")
342
+ tab = page.new_tab(self.template_path.as_uri())
343
+ tab.ele('#app', timeout=5)
344
+
345
+ resolved_output_path = Path(output_path).resolve()
346
+ resolved_output_path.parent.mkdir(parents=True, exist_ok=True)
347
+
348
+ stats_dict = stats[0] if isinstance(stats, list) and stats else (stats or {})
349
+
350
+ render_data = {
351
+ "markdown": markdown_content,
352
+ "total_time": stats_dict.get("total_time", 0) or 0,
353
+ "stages": stages_used or [],
354
+ "references": references or [],
355
+ "page_references": page_references or [],
356
+ "image_references": image_references or [],
357
+ "stats": stats_dict,
358
+ "theme_color": theme_color,
359
+ }
360
+
361
+ actual_tab_id = getattr(tab, 'tab_id', 'unknown')
362
+ logger.info(f"ContentRenderer: Calling updateRenderData for tab {actual_tab_id}, markdown length={len(markdown_content)}")
363
+ tab.run_js(f"window.updateRenderData({json.dumps(render_data)})")
364
+
365
+ # Wait for event-driven finish
366
+ self._wait_for_render_finished(tab, timeout=12.0, context=f"render:{actual_tab_id}")
367
+
368
+ # Dynamic Resize
369
+ scroll_height = tab.run_js('return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);')
370
+ viewport_height = int(scroll_height) + 200
371
+
372
+ tab.run_cdp('Emulation.setDeviceMetricsOverride',
373
+ width=1440, height=viewport_height, deviceScaleFactor=1, mobile=False
374
+ )
375
+
376
+ # Hide scrollbars
377
+ tab.run_js('document.documentElement.style.overflow = "hidden"')
378
+ tab.run_js('document.body.style.overflow = "hidden"')
379
+
380
+ # Use element's actual position and size
381
+ main_ele = tab.ele('#main-container', timeout=5)
382
+ if main_ele:
383
+ import base64
384
+
385
+ # Robustly hide scrollbars via CDP and Style Injection
386
+ SharedBrowserManager.hide_scrollbars(tab)
387
+
388
+ # Force root styles to eliminate gutter and ensure full width
389
+ tab.run_js('document.documentElement.style.overflow = "hidden";')
390
+ tab.run_js('document.body.style.overflow = "hidden";')
391
+ tab.run_js('document.documentElement.style.scrollbarGutter = "unset";')
392
+ tab.run_js('document.documentElement.style.width = "100%";')
393
+
394
+ b64_img = main_ele.get_screenshot(as_base64='jpg')
395
+
396
+ # Restore scrollbars (optional here since we often close or navigate away)
397
+ try:
398
+ tab.set.scroll_bars(True)
399
+ except:
400
+ pass
401
+
402
+ with open(str(resolved_output_path), 'wb') as f:
403
+ f.write(base64.b64decode(b64_img))
404
+ else:
405
+ logger.warning("ContentRenderer: #main-container not found, using fallback")
406
+ tab.get_screenshot(path=str(resolved_output_path.parent), name=resolved_output_path.name, full_page=True)
407
+
408
+ return True
409
+ except Exception as e:
410
+ logger.error(f"ContentRenderer: Render failed: {e}")
411
+ return False
412
+ finally:
413
+ if tab:
414
+ try:
415
+ tab.close()
416
+ except Exception:
417
+ pass
418
+
419
+ async def close(self):
420
+ """Close renderer."""
421
+ self._executor.shutdown(wait=False)
422
+ if self._render_tab:
423
+ try:
424
+ self._render_tab.close()
425
+ except Exception:
426
+ pass
427
+ self._render_tab = None
428
+
429
+
430
+ # Singleton
431
+ _content_renderer: Optional[ContentRenderer] = None
432
+
433
+
434
+
435
+ async def get_content_renderer() -> ContentRenderer:
436
+ global _content_renderer
437
+ if _content_renderer is None:
438
+ _content_renderer = ContentRenderer()
439
+ await _content_renderer.start()
440
+ return _content_renderer
441
+
442
+
443
+ def set_global_renderer(renderer: ContentRenderer):
444
+ """Set the global renderer instance."""
445
+ global _content_renderer
446
+ _content_renderer = renderer