entari-plugin-hyw 4.0.0rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of entari-plugin-hyw might be problematic. Click here for more details.
- entari_plugin_hyw/__init__.py +532 -0
- entari_plugin_hyw/assets/card-dist/index.html +387 -0
- entari_plugin_hyw/assets/card-dist/logos/anthropic.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/cerebras.svg +9 -0
- entari_plugin_hyw/assets/card-dist/logos/deepseek.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/gemini.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/google.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/grok.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/huggingface.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/microsoft.svg +15 -0
- entari_plugin_hyw/assets/card-dist/logos/minimax.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/mistral.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/nvida.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/openai.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/openrouter.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/perplexity.svg +24 -0
- entari_plugin_hyw/assets/card-dist/logos/qwen.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/xai.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/xiaomi.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/zai.png +0 -0
- entari_plugin_hyw/assets/card-dist/vite.svg +1 -0
- entari_plugin_hyw/assets/icon/anthropic.svg +1 -0
- entari_plugin_hyw/assets/icon/cerebras.svg +9 -0
- entari_plugin_hyw/assets/icon/deepseek.png +0 -0
- entari_plugin_hyw/assets/icon/gemini.svg +1 -0
- entari_plugin_hyw/assets/icon/google.svg +1 -0
- entari_plugin_hyw/assets/icon/grok.png +0 -0
- entari_plugin_hyw/assets/icon/huggingface.png +0 -0
- entari_plugin_hyw/assets/icon/microsoft.svg +15 -0
- entari_plugin_hyw/assets/icon/minimax.png +0 -0
- entari_plugin_hyw/assets/icon/mistral.png +0 -0
- entari_plugin_hyw/assets/icon/nvida.png +0 -0
- entari_plugin_hyw/assets/icon/openai.svg +1 -0
- entari_plugin_hyw/assets/icon/openrouter.png +0 -0
- entari_plugin_hyw/assets/icon/perplexity.svg +24 -0
- entari_plugin_hyw/assets/icon/qwen.png +0 -0
- entari_plugin_hyw/assets/icon/xai.png +0 -0
- entari_plugin_hyw/assets/icon/xiaomi.png +0 -0
- entari_plugin_hyw/assets/icon/zai.png +0 -0
- entari_plugin_hyw/browser/__init__.py +10 -0
- entari_plugin_hyw/browser/engines/base.py +13 -0
- entari_plugin_hyw/browser/engines/bing.py +95 -0
- entari_plugin_hyw/browser/engines/searxng.py +137 -0
- entari_plugin_hyw/browser/landing.html +172 -0
- entari_plugin_hyw/browser/manager.py +153 -0
- entari_plugin_hyw/browser/service.py +275 -0
- entari_plugin_hyw/card-ui/.gitignore +24 -0
- entari_plugin_hyw/card-ui/README.md +5 -0
- entari_plugin_hyw/card-ui/index.html +16 -0
- entari_plugin_hyw/card-ui/package-lock.json +2342 -0
- entari_plugin_hyw/card-ui/package.json +31 -0
- entari_plugin_hyw/card-ui/public/logos/anthropic.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/cerebras.svg +9 -0
- entari_plugin_hyw/card-ui/public/logos/deepseek.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/gemini.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/google.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/grok.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/huggingface.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/microsoft.svg +15 -0
- entari_plugin_hyw/card-ui/public/logos/minimax.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/mistral.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/nvida.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/openai.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/openrouter.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/perplexity.svg +24 -0
- entari_plugin_hyw/card-ui/public/logos/qwen.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/xai.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/xiaomi.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/zai.png +0 -0
- entari_plugin_hyw/card-ui/public/vite.svg +1 -0
- entari_plugin_hyw/card-ui/src/App.vue +756 -0
- entari_plugin_hyw/card-ui/src/assets/vue.svg +1 -0
- entari_plugin_hyw/card-ui/src/components/HelloWorld.vue +41 -0
- entari_plugin_hyw/card-ui/src/components/MarkdownContent.vue +382 -0
- entari_plugin_hyw/card-ui/src/components/SectionCard.vue +41 -0
- entari_plugin_hyw/card-ui/src/components/StageCard.vue +240 -0
- entari_plugin_hyw/card-ui/src/main.ts +5 -0
- entari_plugin_hyw/card-ui/src/style.css +29 -0
- entari_plugin_hyw/card-ui/src/test_regex.js +103 -0
- entari_plugin_hyw/card-ui/src/types.ts +61 -0
- entari_plugin_hyw/card-ui/tsconfig.app.json +16 -0
- entari_plugin_hyw/card-ui/tsconfig.json +7 -0
- entari_plugin_hyw/card-ui/tsconfig.node.json +26 -0
- entari_plugin_hyw/card-ui/vite.config.ts +16 -0
- entari_plugin_hyw/definitions.py +130 -0
- entari_plugin_hyw/history.py +248 -0
- entari_plugin_hyw/image_cache.py +274 -0
- entari_plugin_hyw/misc.py +135 -0
- entari_plugin_hyw/modular_pipeline.py +351 -0
- entari_plugin_hyw/render_vue.py +401 -0
- entari_plugin_hyw/search.py +116 -0
- entari_plugin_hyw/stage_base.py +88 -0
- entari_plugin_hyw/stage_instruct.py +328 -0
- entari_plugin_hyw/stage_instruct_review.py +92 -0
- entari_plugin_hyw/stage_summary.py +164 -0
- entari_plugin_hyw-4.0.0rc5.dist-info/METADATA +116 -0
- entari_plugin_hyw-4.0.0rc5.dist-info/RECORD +99 -0
- entari_plugin_hyw-4.0.0rc5.dist-info/WHEEL +5 -0
- entari_plugin_hyw-4.0.0rc5.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,401 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Vue-based Card Renderer (DrissionPage-based)
|
|
3
|
+
|
|
4
|
+
Renders content to image using the shared DrissionPage browser.
|
|
5
|
+
Wraps synchronous DrissionPage operations in a thread pool.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import asyncio
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import List, Dict, Any, Optional
|
|
12
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
13
|
+
|
|
14
|
+
from loguru import logger
|
|
15
|
+
from .browser.manager import SharedBrowserManager
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ContentRenderer:
|
|
19
|
+
"""Renderer using DrissionPage with thread pool for async interface."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, template_path: str = None, auto_start: bool = True, headless: bool = True):
|
|
22
|
+
self.headless = headless
|
|
23
|
+
|
|
24
|
+
if template_path is None:
|
|
25
|
+
current_dir = Path(__file__).parent
|
|
26
|
+
template_path = current_dir / "assets" / "card-dist" / "index.html"
|
|
27
|
+
|
|
28
|
+
self.template_path = Path(template_path)
|
|
29
|
+
if not self.template_path.exists():
|
|
30
|
+
raise FileNotFoundError(f"Vue template not found: {self.template_path}")
|
|
31
|
+
|
|
32
|
+
self.template_content = self.template_path.read_text(encoding="utf-8")
|
|
33
|
+
logger.info(f"ContentRenderer: loaded Vue template ({len(self.template_content)} bytes)")
|
|
34
|
+
|
|
35
|
+
self._manager = None
|
|
36
|
+
self._executor = ThreadPoolExecutor(max_workers=10) # Enough for batch crawls
|
|
37
|
+
self._render_tab = None
|
|
38
|
+
|
|
39
|
+
if auto_start:
|
|
40
|
+
self._ensure_manager()
|
|
41
|
+
|
|
42
|
+
def _ensure_manager(self):
|
|
43
|
+
"""Ensure shared browser manager exists."""
|
|
44
|
+
if not self._manager:
|
|
45
|
+
from .browser.manager import get_shared_browser_manager
|
|
46
|
+
self._manager = get_shared_browser_manager(headless=self.headless)
|
|
47
|
+
|
|
48
|
+
async def start(self, timeout: int = 6000):
|
|
49
|
+
"""Initialize renderer manager (async wrapper)."""
|
|
50
|
+
loop = asyncio.get_running_loop()
|
|
51
|
+
await loop.run_in_executor(self._executor, self._ensure_manager)
|
|
52
|
+
|
|
53
|
+
async def prepare_tab(self) -> str:
|
|
54
|
+
"""Async wrapper to prepare a new render tab."""
|
|
55
|
+
loop = asyncio.get_running_loop()
|
|
56
|
+
return await loop.run_in_executor(self._executor, self._prepare_tab_sync)
|
|
57
|
+
|
|
58
|
+
def _prepare_tab_sync(self) -> str:
|
|
59
|
+
"""Create and warm up a new tab, return its ID."""
|
|
60
|
+
import time as pytimeout
|
|
61
|
+
start = pytimeout.time()
|
|
62
|
+
self._ensure_manager()
|
|
63
|
+
try:
|
|
64
|
+
tab = self._manager.new_tab(self.template_path.as_uri())
|
|
65
|
+
tab_id = tab.tab_id
|
|
66
|
+
|
|
67
|
+
# Basic wait
|
|
68
|
+
tab.wait(1)
|
|
69
|
+
|
|
70
|
+
# Pre-warm
|
|
71
|
+
warmup_data = {
|
|
72
|
+
"markdown": "# Ready",
|
|
73
|
+
"total_time": 0,
|
|
74
|
+
"stages": [],
|
|
75
|
+
"references": [],
|
|
76
|
+
"stats": {},
|
|
77
|
+
"theme_color": "#ef4444",
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if tab.ele('#app', timeout=5):
|
|
81
|
+
tab.run_js(f"window.updateRenderData({json.dumps(warmup_data)})")
|
|
82
|
+
|
|
83
|
+
elapsed = pytimeout.time() - start
|
|
84
|
+
logger.info(f"ContentRenderer: Prepared tab {tab_id} in {elapsed:.2f}s")
|
|
85
|
+
return tab_id
|
|
86
|
+
except Exception as e:
|
|
87
|
+
logger.error(f"ContentRenderer: Failed to prepare tab: {e}")
|
|
88
|
+
raise
|
|
89
|
+
|
|
90
|
+
async def render_pages_batch(
|
|
91
|
+
self,
|
|
92
|
+
pages: List[Dict[str, Any]],
|
|
93
|
+
theme_color: str = "#ef4444"
|
|
94
|
+
) -> List[str]:
|
|
95
|
+
"""
|
|
96
|
+
Render multiple page markdown contents to images concurrently.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
pages: List of dicts with 'title', 'content', 'url' keys
|
|
100
|
+
theme_color: Theme color for rendering
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
List of base64-encoded JPG images
|
|
104
|
+
"""
|
|
105
|
+
if not pages:
|
|
106
|
+
return []
|
|
107
|
+
|
|
108
|
+
loop = asyncio.get_running_loop()
|
|
109
|
+
|
|
110
|
+
# Prepare tabs concurrently
|
|
111
|
+
logger.info(f"ContentRenderer: Preparing {len(pages)} tabs for batch render")
|
|
112
|
+
tab_tasks = [
|
|
113
|
+
loop.run_in_executor(self._executor, self._prepare_tab_sync)
|
|
114
|
+
for _ in pages
|
|
115
|
+
]
|
|
116
|
+
tab_ids = await asyncio.gather(*tab_tasks, return_exceptions=True)
|
|
117
|
+
|
|
118
|
+
# Filter out failed tab preparations
|
|
119
|
+
valid_pairs = []
|
|
120
|
+
for i, (page, tab_id) in enumerate(zip(pages, tab_ids)):
|
|
121
|
+
if isinstance(tab_id, Exception):
|
|
122
|
+
logger.warning(f"ContentRenderer: Failed to prepare tab for page {i}: {tab_id}")
|
|
123
|
+
else:
|
|
124
|
+
valid_pairs.append((page, tab_id))
|
|
125
|
+
|
|
126
|
+
if not valid_pairs:
|
|
127
|
+
return []
|
|
128
|
+
|
|
129
|
+
# Render concurrently
|
|
130
|
+
render_tasks = [
|
|
131
|
+
loop.run_in_executor(
|
|
132
|
+
self._executor,
|
|
133
|
+
self._render_page_to_b64_sync,
|
|
134
|
+
page,
|
|
135
|
+
tab_id,
|
|
136
|
+
theme_color
|
|
137
|
+
)
|
|
138
|
+
for page, tab_id in valid_pairs
|
|
139
|
+
]
|
|
140
|
+
|
|
141
|
+
results = await asyncio.gather(*render_tasks, return_exceptions=True)
|
|
142
|
+
|
|
143
|
+
# Process results
|
|
144
|
+
screenshots = []
|
|
145
|
+
for i, res in enumerate(results):
|
|
146
|
+
if isinstance(res, Exception):
|
|
147
|
+
logger.warning(f"ContentRenderer: Batch render error for page {i}: {res}")
|
|
148
|
+
screenshots.append(None)
|
|
149
|
+
else:
|
|
150
|
+
screenshots.append(res)
|
|
151
|
+
|
|
152
|
+
logger.info(f"ContentRenderer: Batch rendered {len([s for s in screenshots if s])} pages")
|
|
153
|
+
return screenshots
|
|
154
|
+
|
|
155
|
+
def _render_page_to_b64_sync(
|
|
156
|
+
self,
|
|
157
|
+
page_data: Dict[str, Any],
|
|
158
|
+
tab_id: str,
|
|
159
|
+
theme_color: str
|
|
160
|
+
) -> Optional[str]:
|
|
161
|
+
"""Render a single page's markdown to base64 image."""
|
|
162
|
+
tab = None
|
|
163
|
+
try:
|
|
164
|
+
self._ensure_manager()
|
|
165
|
+
browser_page = self._manager.page
|
|
166
|
+
|
|
167
|
+
try:
|
|
168
|
+
tab = browser_page.get_tab(tab_id)
|
|
169
|
+
except Exception:
|
|
170
|
+
return None
|
|
171
|
+
|
|
172
|
+
if not tab:
|
|
173
|
+
return None
|
|
174
|
+
|
|
175
|
+
# Build render data for this page
|
|
176
|
+
markdown = f"# {page_data.get('title', 'Page')}\n\n{page_data.get('content', '')}"
|
|
177
|
+
|
|
178
|
+
render_data = {
|
|
179
|
+
"markdown": markdown,
|
|
180
|
+
"total_time": 0,
|
|
181
|
+
"stages": [],
|
|
182
|
+
"references": [],
|
|
183
|
+
"page_references": [],
|
|
184
|
+
"image_references": [],
|
|
185
|
+
"stats": {},
|
|
186
|
+
"theme_color": theme_color,
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
# 1. Update Data & Settle
|
|
190
|
+
tab.run_js(f"window.updateRenderData({json.dumps(render_data)})")
|
|
191
|
+
tab.wait(0.5) # Since images are Base64, decoding is nearly instant once injected
|
|
192
|
+
|
|
193
|
+
# 2. Dynamic Resize
|
|
194
|
+
# Get actual content height to prevent clipping
|
|
195
|
+
scroll_height = tab.run_js('return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);')
|
|
196
|
+
viewport_height = int(scroll_height) + 200
|
|
197
|
+
|
|
198
|
+
tab.run_cdp('Emulation.setDeviceMetricsOverride',
|
|
199
|
+
width=1920, height=viewport_height, deviceScaleFactor=1, mobile=False
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# 3. Hide Scrollbars (Now that viewport is large enough, overflow:hidden won't clip)
|
|
203
|
+
tab.run_js('document.documentElement.style.overflow = "hidden"')
|
|
204
|
+
tab.run_js('document.body.style.overflow = "hidden"')
|
|
205
|
+
|
|
206
|
+
# Use element's actual position and size
|
|
207
|
+
main_ele = tab.ele('#main-container', timeout=3)
|
|
208
|
+
if main_ele:
|
|
209
|
+
# Robustly hide scrollbars via CDP and Style Injection
|
|
210
|
+
SharedBrowserManager.hide_scrollbars(tab)
|
|
211
|
+
|
|
212
|
+
# Force root styles to eliminate gutter and ensure full width
|
|
213
|
+
tab.run_js('document.documentElement.style.overflow = "hidden";')
|
|
214
|
+
tab.run_js('document.body.style.overflow = "hidden";')
|
|
215
|
+
tab.run_js('document.documentElement.style.scrollbarGutter = "unset";')
|
|
216
|
+
tab.run_js('document.documentElement.style.width = "100%";')
|
|
217
|
+
|
|
218
|
+
orig_overflow = "auto" # just a placeholder, we rely on full refresh usually or don't care about restoring for single-purpose tabs
|
|
219
|
+
|
|
220
|
+
b64_img = main_ele.get_screenshot(as_base64='jpg')
|
|
221
|
+
|
|
222
|
+
# Restore not strictly needed for throwaway render tabs, but good practice
|
|
223
|
+
# tab.run_js(f'document.documentElement.style.overflow = "{orig_overflow}";')
|
|
224
|
+
try:
|
|
225
|
+
tab.set.scroll_bars(True)
|
|
226
|
+
except:
|
|
227
|
+
pass
|
|
228
|
+
return b64_img
|
|
229
|
+
else:
|
|
230
|
+
return tab.get_screenshot(as_base64='jpg', full_page=False)
|
|
231
|
+
|
|
232
|
+
except Exception as e:
|
|
233
|
+
logger.error(f"ContentRenderer: Failed to render page: {e}")
|
|
234
|
+
return None
|
|
235
|
+
finally:
|
|
236
|
+
if tab:
|
|
237
|
+
try:
|
|
238
|
+
tab.close()
|
|
239
|
+
except Exception:
|
|
240
|
+
pass
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
async def render(
|
|
244
|
+
self,
|
|
245
|
+
markdown_content: str,
|
|
246
|
+
output_path: str,
|
|
247
|
+
tab_id: Optional[str] = None,
|
|
248
|
+
stats: Dict[str, Any] = None,
|
|
249
|
+
references: List[Dict[str, Any]] = None,
|
|
250
|
+
page_references: List[Dict[str, Any]] = None,
|
|
251
|
+
image_references: List[Dict[str, Any]] = None,
|
|
252
|
+
stages_used: List[Dict[str, Any]] = None,
|
|
253
|
+
theme_color: str = "#ef4444",
|
|
254
|
+
**kwargs
|
|
255
|
+
) -> bool:
|
|
256
|
+
"""Render content to image using a specific (pre-warmed) tab or a temp one."""
|
|
257
|
+
loop = asyncio.get_running_loop()
|
|
258
|
+
return await loop.run_in_executor(
|
|
259
|
+
self._executor,
|
|
260
|
+
self._render_sync,
|
|
261
|
+
markdown_content,
|
|
262
|
+
output_path,
|
|
263
|
+
tab_id,
|
|
264
|
+
stats,
|
|
265
|
+
references,
|
|
266
|
+
page_references,
|
|
267
|
+
image_references,
|
|
268
|
+
stages_used,
|
|
269
|
+
theme_color
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
def _render_sync(
|
|
273
|
+
self,
|
|
274
|
+
markdown_content: str,
|
|
275
|
+
output_path: str,
|
|
276
|
+
tab_id: Optional[str],
|
|
277
|
+
stats: Dict[str, Any],
|
|
278
|
+
references: List[Dict[str, Any]],
|
|
279
|
+
page_references: List[Dict[str, Any]],
|
|
280
|
+
image_references: List[Dict[str, Any]],
|
|
281
|
+
stages_used: List[Dict[str, Any]],
|
|
282
|
+
theme_color: str
|
|
283
|
+
) -> bool:
|
|
284
|
+
"""Synchronous render implementation."""
|
|
285
|
+
tab = None
|
|
286
|
+
|
|
287
|
+
try:
|
|
288
|
+
self._ensure_manager()
|
|
289
|
+
page = self._manager.page
|
|
290
|
+
|
|
291
|
+
if tab_id:
|
|
292
|
+
try:
|
|
293
|
+
tab = page.get_tab(tab_id)
|
|
294
|
+
except Exception:
|
|
295
|
+
pass
|
|
296
|
+
|
|
297
|
+
if not tab:
|
|
298
|
+
logger.warning("ContentRenderer: Pre-warmed tab not found, creating new.")
|
|
299
|
+
tab = page.new_tab(self.template_path.as_uri())
|
|
300
|
+
tab.wait(0.5)
|
|
301
|
+
|
|
302
|
+
resolved_output_path = Path(output_path).resolve()
|
|
303
|
+
resolved_output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
304
|
+
|
|
305
|
+
stats_dict = stats[0] if isinstance(stats, list) and stats else (stats or {})
|
|
306
|
+
|
|
307
|
+
render_data = {
|
|
308
|
+
"markdown": markdown_content,
|
|
309
|
+
"total_time": stats_dict.get("total_time", 0) or 0,
|
|
310
|
+
"stages": stages_used or [],
|
|
311
|
+
"references": references or [],
|
|
312
|
+
"page_references": page_references or [],
|
|
313
|
+
"image_references": image_references or [],
|
|
314
|
+
"stats": stats_dict,
|
|
315
|
+
"theme_color": theme_color,
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
tab.run_js(f"window.updateRenderData({json.dumps(render_data)})")
|
|
319
|
+
|
|
320
|
+
# Brief settle wait for masonry/images
|
|
321
|
+
tab.wait(0.6)
|
|
322
|
+
|
|
323
|
+
# Dynamic Resize
|
|
324
|
+
scroll_height = tab.run_js('return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);')
|
|
325
|
+
viewport_height = int(scroll_height) + 200
|
|
326
|
+
|
|
327
|
+
tab.run_cdp('Emulation.setDeviceMetricsOverride',
|
|
328
|
+
width=1920, height=viewport_height, deviceScaleFactor=1, mobile=False
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
# Hide scrollbars
|
|
332
|
+
tab.run_js('document.documentElement.style.overflow = "hidden"')
|
|
333
|
+
tab.run_js('document.body.style.overflow = "hidden"')
|
|
334
|
+
|
|
335
|
+
# Use element's actual position and size
|
|
336
|
+
main_ele = tab.ele('#main-container', timeout=5)
|
|
337
|
+
if main_ele:
|
|
338
|
+
import base64
|
|
339
|
+
|
|
340
|
+
# Robustly hide scrollbars via CDP and Style Injection
|
|
341
|
+
SharedBrowserManager.hide_scrollbars(tab)
|
|
342
|
+
|
|
343
|
+
# Force root styles to eliminate gutter and ensure full width
|
|
344
|
+
tab.run_js('document.documentElement.style.overflow = "hidden";')
|
|
345
|
+
tab.run_js('document.body.style.overflow = "hidden";')
|
|
346
|
+
tab.run_js('document.documentElement.style.scrollbarGutter = "unset";')
|
|
347
|
+
tab.run_js('document.documentElement.style.width = "100%";')
|
|
348
|
+
|
|
349
|
+
b64_img = main_ele.get_screenshot(as_base64='jpg')
|
|
350
|
+
|
|
351
|
+
# Restore scrollbars (optional here since we often close or navigate away)
|
|
352
|
+
try:
|
|
353
|
+
tab.set.scroll_bars(True)
|
|
354
|
+
except:
|
|
355
|
+
pass
|
|
356
|
+
|
|
357
|
+
with open(str(resolved_output_path), 'wb') as f:
|
|
358
|
+
f.write(base64.b64decode(b64_img))
|
|
359
|
+
else:
|
|
360
|
+
logger.warning("ContentRenderer: #main-container not found, using fallback")
|
|
361
|
+
tab.get_screenshot(path=str(resolved_output_path.parent), name=resolved_output_path.name, full_page=True)
|
|
362
|
+
|
|
363
|
+
return True
|
|
364
|
+
except Exception as e:
|
|
365
|
+
logger.error(f"ContentRenderer: Render failed: {e}")
|
|
366
|
+
return False
|
|
367
|
+
finally:
|
|
368
|
+
if tab:
|
|
369
|
+
try:
|
|
370
|
+
tab.close()
|
|
371
|
+
except Exception:
|
|
372
|
+
pass
|
|
373
|
+
|
|
374
|
+
async def close(self):
|
|
375
|
+
"""Close renderer."""
|
|
376
|
+
self._executor.shutdown(wait=False)
|
|
377
|
+
if self._render_tab:
|
|
378
|
+
try:
|
|
379
|
+
self._render_tab.close()
|
|
380
|
+
except Exception:
|
|
381
|
+
pass
|
|
382
|
+
self._render_tab = None
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
# Singleton
|
|
386
|
+
_content_renderer: Optional[ContentRenderer] = None
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
async def get_content_renderer() -> ContentRenderer:
|
|
391
|
+
global _content_renderer
|
|
392
|
+
if _content_renderer is None:
|
|
393
|
+
_content_renderer = ContentRenderer()
|
|
394
|
+
await _content_renderer.start()
|
|
395
|
+
return _content_renderer
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def set_global_renderer(renderer: ContentRenderer):
|
|
399
|
+
"""Set the global renderer instance."""
|
|
400
|
+
global _content_renderer
|
|
401
|
+
_content_renderer = renderer
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import urllib.parse
|
|
3
|
+
import re
|
|
4
|
+
import time
|
|
5
|
+
from typing import List, Dict, Any, Optional
|
|
6
|
+
from loguru import logger
|
|
7
|
+
|
|
8
|
+
from .browser.service import get_screenshot_service
|
|
9
|
+
# New engines
|
|
10
|
+
from .browser.engines.bing import BingEngine
|
|
11
|
+
from .browser.engines.searxng import SearXNGEngine
|
|
12
|
+
|
|
13
|
+
class SearchService:
|
|
14
|
+
def __init__(self, config: Any):
|
|
15
|
+
self.config = config
|
|
16
|
+
self._headless = getattr(config, "headless", True)
|
|
17
|
+
self._fetch_timeout = getattr(config, "fetch_timeout", 20.0)
|
|
18
|
+
self._default_limit = getattr(config, "search_limit", 10)
|
|
19
|
+
|
|
20
|
+
# Domain blocking
|
|
21
|
+
self._blocked_domains = getattr(config, "blocked_domains", []) or []
|
|
22
|
+
|
|
23
|
+
# Select Engine
|
|
24
|
+
self._engine_name = getattr(config, "search_engine", "bing").lower()
|
|
25
|
+
if self._engine_name == "bing":
|
|
26
|
+
self._engine = BingEngine()
|
|
27
|
+
else:
|
|
28
|
+
self._engine = SearXNGEngine()
|
|
29
|
+
|
|
30
|
+
logger.info(f"SearchService initialized with engine: {self._engine_name}")
|
|
31
|
+
|
|
32
|
+
def _build_search_url(self, query: str) -> str:
|
|
33
|
+
return self._engine.build_url(query, self._default_limit)
|
|
34
|
+
|
|
35
|
+
async def search_batch(self, queries: List[str]) -> List[List[Dict[str, Any]]]:
|
|
36
|
+
"""Execute multiple searches concurrently."""
|
|
37
|
+
tasks = [self.search(q) for q in queries]
|
|
38
|
+
return await asyncio.gather(*tasks)
|
|
39
|
+
|
|
40
|
+
async def search(self, query: str) -> List[Dict[str, Any]]:
|
|
41
|
+
"""
|
|
42
|
+
Main search entry point.
|
|
43
|
+
Returns parsed results + 1 raw page item (marked hidden).
|
|
44
|
+
"""
|
|
45
|
+
if not query:
|
|
46
|
+
return []
|
|
47
|
+
|
|
48
|
+
# Apply blocking
|
|
49
|
+
final_query = query
|
|
50
|
+
enable_blocking = getattr(self.config, "enable_domain_blocking", True)
|
|
51
|
+
if enable_blocking and self._blocked_domains and "-site:" not in query:
|
|
52
|
+
exclusions = " ".join([f"-site:{d}" for d in self._blocked_domains])
|
|
53
|
+
final_query = f"{query} {exclusions}"
|
|
54
|
+
|
|
55
|
+
url = self._build_search_url(final_query)
|
|
56
|
+
logger.info(f"Search: '{query}' -> {url}")
|
|
57
|
+
|
|
58
|
+
results = []
|
|
59
|
+
try:
|
|
60
|
+
# Fetch - Search parsing doesn't need screenshot, only HTML
|
|
61
|
+
page_data = await self.fetch_page_raw(url, include_screenshot=False)
|
|
62
|
+
content = page_data.get("html", "") or page_data.get("content", "")
|
|
63
|
+
|
|
64
|
+
# 1. Add Raw Page Item (Always)
|
|
65
|
+
# This allows history manager to save the raw search page for debugging
|
|
66
|
+
raw_item = {
|
|
67
|
+
"title": f"Raw Search: {query}",
|
|
68
|
+
"url": url,
|
|
69
|
+
"content": content, # Keep original content
|
|
70
|
+
"type": "search_raw_page", # Special type for history
|
|
71
|
+
"_hidden": False, # Unhidden to allow LLM access if needed
|
|
72
|
+
"query": query,
|
|
73
|
+
"images": page_data.get("images", [])
|
|
74
|
+
}
|
|
75
|
+
results.append(raw_item)
|
|
76
|
+
|
|
77
|
+
# 2. Parse Results
|
|
78
|
+
if content and not content.startswith("Error"):
|
|
79
|
+
parsed = self._engine.parse(content)
|
|
80
|
+
logger.info(f"Search parsed {len(parsed)} results for '{query}' using {self._engine_name}")
|
|
81
|
+
results.extend(parsed)
|
|
82
|
+
else:
|
|
83
|
+
logger.warning(f"Search failed/empty for '{query}': {content[:100]}")
|
|
84
|
+
|
|
85
|
+
return results
|
|
86
|
+
|
|
87
|
+
except Exception as e:
|
|
88
|
+
logger.error(f"Search error for '{query}': {e}")
|
|
89
|
+
# Ensure we return at least an error item
|
|
90
|
+
return [{
|
|
91
|
+
"title": f"Error Search: {query}",
|
|
92
|
+
"url": url,
|
|
93
|
+
"content": f"Error: {e}",
|
|
94
|
+
"type": "search_raw_page",
|
|
95
|
+
"_hidden": True
|
|
96
|
+
}]
|
|
97
|
+
|
|
98
|
+
async def fetch_pages_batch(self, urls: List[str], include_screenshot: bool = True) -> List[Dict[str, Any]]:
|
|
99
|
+
"""Fetch multiple pages concurrently."""
|
|
100
|
+
tasks = [self.fetch_page(u, include_screenshot=include_screenshot) for u in urls]
|
|
101
|
+
return await asyncio.gather(*tasks)
|
|
102
|
+
|
|
103
|
+
async def fetch_page(self, url: str, timeout: Optional[float] = None, include_screenshot: bool = True) -> Dict[str, Any]:
|
|
104
|
+
"""
|
|
105
|
+
Fetch a single page for reading/extracting content.
|
|
106
|
+
"""
|
|
107
|
+
if timeout is None:
|
|
108
|
+
timeout = self._fetch_timeout
|
|
109
|
+
return await self.fetch_page_raw(url, timeout, include_screenshot=include_screenshot)
|
|
110
|
+
|
|
111
|
+
async def fetch_page_raw(self, url: str, timeout: Optional[float] = None, include_screenshot: bool = True) -> Dict[str, Any]:
|
|
112
|
+
"""Internal: Get raw data from browser service."""
|
|
113
|
+
if timeout is None:
|
|
114
|
+
timeout = self._fetch_timeout
|
|
115
|
+
service = get_screenshot_service(headless=self._headless)
|
|
116
|
+
return await service.fetch_page(url, timeout=timeout, include_screenshot=include_screenshot)
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Stage Base Classes
|
|
3
|
+
|
|
4
|
+
Abstract base classes for pipeline stages.
|
|
5
|
+
Each stage is a self-contained unit of work.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Any, Dict, List, Optional
|
|
11
|
+
|
|
12
|
+
from openai import AsyncOpenAI
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class StageContext:
|
|
17
|
+
"""Shared context passed between stages."""
|
|
18
|
+
user_input: str
|
|
19
|
+
images: List[str] = field(default_factory=list)
|
|
20
|
+
conversation_history: List[Dict] = field(default_factory=list)
|
|
21
|
+
instruct_history: List[Dict] = field(default_factory=list) # History for Instruct stage rounds
|
|
22
|
+
|
|
23
|
+
# Accumulated data
|
|
24
|
+
web_results: List[Dict] = field(default_factory=list)
|
|
25
|
+
agent_context: str = ""
|
|
26
|
+
review_context: str = "" # Context passed from Instruct to Review stage
|
|
27
|
+
|
|
28
|
+
# Mode info (set by Instruct stage)
|
|
29
|
+
task_list: List[str] = field(default_factory=list)
|
|
30
|
+
|
|
31
|
+
# Control flags
|
|
32
|
+
should_refuse: bool = False
|
|
33
|
+
refuse_reason: str = ""
|
|
34
|
+
|
|
35
|
+
# ID counter for unified referencing
|
|
36
|
+
global_id_counter: int = 0
|
|
37
|
+
|
|
38
|
+
def next_id(self) -> int:
|
|
39
|
+
"""Get next global ID."""
|
|
40
|
+
self.global_id_counter += 1
|
|
41
|
+
return self.global_id_counter
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class StageResult:
|
|
46
|
+
"""Result from a stage execution."""
|
|
47
|
+
success: bool
|
|
48
|
+
data: Dict[str, Any] = field(default_factory=dict)
|
|
49
|
+
usage: Dict[str, int] = field(default_factory=lambda: {"input_tokens": 0, "output_tokens": 0})
|
|
50
|
+
trace: Dict[str, Any] = field(default_factory=dict)
|
|
51
|
+
error: Optional[str] = None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class BaseStage(ABC):
|
|
55
|
+
"""Abstract base class for pipeline stages."""
|
|
56
|
+
|
|
57
|
+
def __init__(self, config: Any, search_service: Any, client: AsyncOpenAI):
|
|
58
|
+
self.config = config
|
|
59
|
+
self.search_service = search_service
|
|
60
|
+
self.client = client
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
@abstractmethod
|
|
64
|
+
def name(self) -> str:
|
|
65
|
+
"""Stage name for logging and tracing."""
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
@abstractmethod
|
|
69
|
+
async def execute(self, context: StageContext) -> StageResult:
|
|
70
|
+
"""
|
|
71
|
+
Execute the stage.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
context: Shared context with accumulated data
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
StageResult with success status, data, usage, and trace info
|
|
78
|
+
"""
|
|
79
|
+
pass
|
|
80
|
+
|
|
81
|
+
def _client_for(self, api_key: Optional[str], base_url: Optional[str]) -> AsyncOpenAI:
|
|
82
|
+
"""Get or create client with custom credentials."""
|
|
83
|
+
if api_key or base_url:
|
|
84
|
+
return AsyncOpenAI(
|
|
85
|
+
base_url=base_url or self.config.base_url,
|
|
86
|
+
api_key=api_key or self.config.api_key
|
|
87
|
+
)
|
|
88
|
+
return self.client
|