entari-plugin-hyw 3.2.113__py3-none-any.whl → 3.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of entari-plugin-hyw might be problematic. Click here for more details.

Files changed (49) hide show
  1. entari_plugin_hyw/__init__.py +309 -758
  2. entari_plugin_hyw/hyw_core.py +700 -0
  3. {entari_plugin_hyw-3.2.113.dist-info → entari_plugin_hyw-3.3.1.dist-info}/METADATA +25 -17
  4. entari_plugin_hyw-3.3.1.dist-info/RECORD +6 -0
  5. entari_plugin_hyw/assets/icon/anthropic.svg +0 -1
  6. entari_plugin_hyw/assets/icon/deepseek.png +0 -0
  7. entari_plugin_hyw/assets/icon/gemini.svg +0 -1
  8. entari_plugin_hyw/assets/icon/google.svg +0 -1
  9. entari_plugin_hyw/assets/icon/grok.png +0 -0
  10. entari_plugin_hyw/assets/icon/microsoft.svg +0 -15
  11. entari_plugin_hyw/assets/icon/minimax.png +0 -0
  12. entari_plugin_hyw/assets/icon/mistral.png +0 -0
  13. entari_plugin_hyw/assets/icon/nvida.png +0 -0
  14. entari_plugin_hyw/assets/icon/openai.svg +0 -1
  15. entari_plugin_hyw/assets/icon/openrouter.png +0 -0
  16. entari_plugin_hyw/assets/icon/perplexity.svg +0 -24
  17. entari_plugin_hyw/assets/icon/qwen.png +0 -0
  18. entari_plugin_hyw/assets/icon/xai.png +0 -0
  19. entari_plugin_hyw/assets/icon/zai.png +0 -0
  20. entari_plugin_hyw/assets/libs/highlight.css +0 -10
  21. entari_plugin_hyw/assets/libs/highlight.js +0 -1213
  22. entari_plugin_hyw/assets/libs/katex-auto-render.js +0 -1
  23. entari_plugin_hyw/assets/libs/katex.css +0 -1
  24. entari_plugin_hyw/assets/libs/katex.js +0 -1
  25. entari_plugin_hyw/assets/libs/tailwind.css +0 -1
  26. entari_plugin_hyw/assets/package-lock.json +0 -953
  27. entari_plugin_hyw/assets/package.json +0 -16
  28. entari_plugin_hyw/assets/tailwind.config.js +0 -12
  29. entari_plugin_hyw/assets/tailwind.input.css +0 -235
  30. entari_plugin_hyw/assets/template.html +0 -157
  31. entari_plugin_hyw/assets/template.html.bak +0 -157
  32. entari_plugin_hyw/assets/template.j2 +0 -259
  33. entari_plugin_hyw/core/__init__.py +0 -0
  34. entari_plugin_hyw/core/config.py +0 -36
  35. entari_plugin_hyw/core/history.py +0 -146
  36. entari_plugin_hyw/core/hyw.py +0 -41
  37. entari_plugin_hyw/core/pipeline.py +0 -840
  38. entari_plugin_hyw/core/render.py +0 -531
  39. entari_plugin_hyw/core/render.py.bak +0 -926
  40. entari_plugin_hyw/utils/__init__.py +0 -3
  41. entari_plugin_hyw/utils/browser.py +0 -61
  42. entari_plugin_hyw/utils/mcp_playwright.py +0 -128
  43. entari_plugin_hyw/utils/misc.py +0 -93
  44. entari_plugin_hyw/utils/playwright_tool.py +0 -46
  45. entari_plugin_hyw/utils/prompts.py +0 -94
  46. entari_plugin_hyw/utils/search.py +0 -193
  47. entari_plugin_hyw-3.2.113.dist-info/RECORD +0 -47
  48. {entari_plugin_hyw-3.2.113.dist-info → entari_plugin_hyw-3.3.1.dist-info}/WHEEL +0 -0
  49. {entari_plugin_hyw-3.2.113.dist-info → entari_plugin_hyw-3.3.1.dist-info}/top_level.txt +0 -0
@@ -1,531 +0,0 @@
1
- import asyncio
2
- import gc
3
- import os
4
- import markdown
5
- import base64
6
- import mimetypes
7
- from datetime import datetime
8
- from urllib.parse import urlparse
9
- from typing import List, Dict, Optional, Any, Union
10
- import re
11
- import json
12
- from pathlib import Path
13
- from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
14
- from loguru import logger
15
- from jinja2 import Environment, FileSystemLoader, select_autoescape
16
-
17
- class ContentRenderer:
18
- def __init__(self, template_path: str = None):
19
- if template_path is None:
20
- # Default to assets/template.j2 in the plugin root
21
- current_dir = os.path.dirname(os.path.abspath(__file__))
22
- plugin_root = os.path.dirname(current_dir)
23
- template_path = os.path.join(plugin_root, "assets", "template.j2")
24
-
25
- self.template_path = template_path
26
- current_dir = os.path.dirname(os.path.abspath(__file__))
27
- plugin_root = os.path.dirname(current_dir)
28
- self.assets_dir = os.path.join(plugin_root, "assets", "icon")
29
-
30
- # Load JS libraries (CSS is now inline in template)
31
- libs_dir = os.path.join(plugin_root, "assets", "libs")
32
-
33
- # Define all assets to load
34
- self.assets = {}
35
- assets_map = {
36
- "highlight_css": os.path.join(libs_dir, "highlight.css"),
37
- "highlight_js": os.path.join(libs_dir, "highlight.js"),
38
- "katex_css": os.path.join(libs_dir, "katex.css"),
39
- "katex_js": os.path.join(libs_dir, "katex.js"),
40
- "katex_auto_render_js": os.path.join(libs_dir, "katex-auto-render.js"),
41
- "tailwind_css": os.path.join(libs_dir, "tailwind.css"),
42
- }
43
-
44
- total_size = 0
45
- for key, path in assets_map.items():
46
- try:
47
- with open(path, "r", encoding="utf-8") as f:
48
- content = f.read()
49
- self.assets[key] = content
50
- total_size += len(content)
51
- except Exception as exc:
52
- logger.warning(f"ContentRenderer: failed to load {key} ({exc})")
53
- self.assets[key] = ""
54
-
55
- logger.info(f"ContentRenderer: loaded {len(assets_map)} libs ({total_size} bytes)")
56
-
57
- # Initialize Jinja2 Environment
58
- template_dir = os.path.dirname(self.template_path)
59
- template_name = os.path.basename(self.template_path)
60
- logger.info(f"ContentRenderer: initializing Jinja2 from {template_dir} / {template_name}")
61
-
62
- self.env = Environment(
63
- loader=FileSystemLoader(template_dir),
64
- autoescape=select_autoescape(['html', 'xml'])
65
- )
66
- self.template = self.env.get_template(template_name)
67
-
68
- async def _set_content_safe(self, page, html: str, timeout_ms: int) -> bool:
69
- html_size = len(html)
70
- try:
71
- await page.set_content(html, wait_until="networkidle", timeout=timeout_ms)
72
- return True
73
- except PlaywrightTimeoutError:
74
- logger.warning(f"ContentRenderer: page.set_content timed out after {timeout_ms}ms (html_size={html_size})")
75
- return False
76
- except Exception as exc:
77
- logger.warning(f"ContentRenderer: page.set_content failed (html_size={html_size}): {exc}")
78
- return False
79
-
80
- def _get_icon_data_url(self, icon_name: str) -> str:
81
- if not icon_name:
82
- return ""
83
- # 1. Check if it's a URL
84
- if icon_name.startswith(("http://", "https://")):
85
- try:
86
- import httpx
87
- resp = httpx.get(icon_name, timeout=5.0)
88
- if resp.status_code == 200:
89
- mime_type = resp.headers.get("content-type", "image/png")
90
- b64_data = base64.b64encode(resp.content).decode("utf-8")
91
- return f"data:{mime_type};base64,{b64_data}"
92
- except Exception as e:
93
- print(f"Failed to download icon from {icon_name}: {e}")
94
- # Fallback to local lookup
95
-
96
- # 2. Local file lookup
97
- filename = None
98
-
99
- if "." in icon_name:
100
- filename = icon_name
101
- else:
102
- # Try extensions
103
- for ext in [".svg", ".png"]:
104
- if os.path.exists(os.path.join(self.assets_dir, icon_name + ext)):
105
- filename = icon_name + ext
106
- break
107
- if not filename:
108
- filename = icon_name + ".svg" # Default fallback
109
-
110
- filepath = os.path.join(self.assets_dir, filename)
111
-
112
- if not os.path.exists(filepath):
113
- # Fallback to openai.svg if specific file not found
114
- filepath = os.path.join(self.assets_dir, "openai.svg")
115
- if not os.path.exists(filepath):
116
- return ""
117
-
118
- mime_type, _ = mimetypes.guess_type(filepath)
119
- if not mime_type:
120
- mime_type = "image/png"
121
-
122
- with open(filepath, "rb") as f:
123
- data = f.read()
124
- b64_data = base64.b64encode(data).decode("utf-8")
125
- return f"data:{mime_type};base64,{b64_data}"
126
-
127
- def _get_domain(self, url: str) -> str:
128
- try:
129
- parsed = urlparse(url)
130
- domain = parsed.netloc
131
- if "openrouter" in domain: return "openrouter.ai"
132
- if "openai" in domain: return "openai.com"
133
- if "anthropic" in domain: return "anthropic.com"
134
- if "google" in domain: return "google.com"
135
- if "deepseek" in domain: return "deepseek.com"
136
- return domain
137
- except:
138
- return "unknown"
139
-
140
- async def render(self,
141
- markdown_content: str,
142
- output_path: str,
143
- suggestions: List[str] = None,
144
- stats: Dict[str, Any] = None,
145
- references: List[Dict[str, Any]] = None,
146
- mcp_steps: List[Dict[str, Any]] = None,
147
- stages_used: List[Dict[str, Any]] = None,
148
- model_name: str = "",
149
- provider_name: str = "Unknown",
150
- behavior_summary: str = "Text Generation",
151
- icon_config: str = "openai",
152
- vision_model_name: str = None,
153
- vision_icon_config: str = None,
154
- vision_base_url: str = None,
155
- base_url: str = "https://openrouter.ai/api/v1",
156
- billing_info: Dict[str, Any] = None,
157
- render_timeout_ms: int = 6000):
158
- """
159
- Render markdown content to an image using Playwright and Jinja2.
160
- """
161
- render_start_time = asyncio.get_event_loop().time()
162
-
163
- # Preprocess to fix common markdown issues
164
- markdown_content = re.sub(r'(?<=\S)\n(?=\s*(\d+\.|[-*+]) )', r'\n\n', markdown_content)
165
-
166
- # AGGRESSIVE CLEANING: Strip out "References" section and "[code]" blocks from the text
167
- # because we are rendering them as structured UI elements now.
168
-
169
- # 1. Remove "References" or "Citations" header and everything after it specific to the end of file
170
- # Matches ### References, ## References, **References**, etc., followed by list items
171
- markdown_content = re.sub(r'(?i)^\s*(#{1,3}|\*\*)\s*(References|Citations|Sources).*$', '', markdown_content, flags=re.MULTILINE | re.DOTALL)
172
-
173
- # 2. Remove isolated "[code] ..." lines (checking for the specific format seen in user screenshot)
174
- # Matches lines starting with [code] or [CODE]
175
- markdown_content = re.sub(r'(?i)^\s*\[code\].*?(\n|$)', '', markdown_content, flags=re.MULTILINE)
176
-
177
- max_attempts = 1
178
- last_exc = None
179
- for attempt in range(1, max_attempts + 1):
180
- try:
181
- # 1. Protect math blocks
182
- # We look for $$...$$, \[...\], \(...\)
183
- # We'll replace them with placeholders so markdown extensions (like nl2br) don't touch them.
184
- math_blocks = {}
185
-
186
- def protect_math(match):
187
- key = f"__MATH_BLOCK_{len(math_blocks)}__"
188
- math_blocks[key] = match.group(0)
189
- return key
190
-
191
- # Patterns for math:
192
- # 1) $$ ... $$ (display math)
193
- # 2) \[ ... \] (display math)
194
- # 3) \( ... \) (inline math)
195
- # Note: We must handle multiline for $$ and \[
196
-
197
- # Regex for $$...$$
198
- markdown_content = re.sub(r'\$\$(.*?)\$\$\s*', protect_math, markdown_content, flags=re.DOTALL)
199
-
200
- # Regex for \[...\]
201
- markdown_content = re.sub(r'\\\[(.*?)\\\]\s*', protect_math, markdown_content, flags=re.DOTALL)
202
-
203
- # Regex for \(...\) (usually single line, but DOTALL is safest if user wraps lines)
204
- markdown_content = re.sub(r'\\\((.*?)\\\)', protect_math, markdown_content, flags=re.DOTALL)
205
-
206
- # 2. Render Markdown
207
- # Use 'nl2br' to turn newlines into <br>, 'fenced_code' for code blocks
208
- content_html = markdown.markdown(
209
- markdown_content.strip(),
210
- extensions=['fenced_code', 'tables', 'nl2br', 'sane_lists']
211
- )
212
-
213
- # 3. Restore math blocks
214
- def restore_math(text):
215
- # We assume placeholders are intact. We do a simple string replace or regex.
216
- # Since placeholders are unique strings, we can just replace them.
217
- for key, val in math_blocks.items():
218
- text = text.replace(key, val)
219
- return text
220
-
221
- content_html = restore_math(content_html)
222
-
223
- # Post-process to style citation markers
224
- # We split by code blocks to avoid messing up real code, BUT our citations ARE code blocks now.
225
- # So we need to look at the code blocks themselves.
226
- parts = re.split(r'(<code.*?>.*?</code>)', content_html, flags=re.DOTALL)
227
- for i, part in enumerate(parts):
228
- # Check if this part is a code block containing our specific citation format
229
- if part.startswith('<code'):
230
- # Match <code>ref:123</code> or <code>mcp:abc</code>
231
- # Note: attributes like class might be present if we are unlucky, but `ref:` inside usually means inline code.
232
-
233
- # 1. Numeric: <code>ref:123</code>
234
- ref_match = re.match(r'^<code.*?>ref:(\d+)</code>$', part)
235
- if ref_match:
236
- citation_id = ref_match.group(1)
237
- parts[i] = f'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-blue-600 bg-blue-50 border border-blue-200 rounded mx-0.5 align-top relative -top-0.5">{citation_id}</span>'
238
- continue
239
-
240
- # 2. Alpha: <code>mcp:abc</code>
241
- mcp_match = re.match(r'^<code.*?>mcp:([a-zA-Z]+)</code>$', part)
242
- if mcp_match:
243
- mcp_id = mcp_match.group(1)
244
- parts[i] = f'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-orange-600 bg-orange-50 border border-orange-200 rounded mx-0.5 align-top relative -top-0.5">{mcp_id}</span>'
245
- continue
246
-
247
- # If it's NOT a code block, or a code block we didn't transform, we leave it alone.
248
- # (Previous logic was to regex replace inside non-code blocks. We don't need that anymore
249
- # because the prompt now enforces code spans).
250
- content_html = "".join(parts)
251
-
252
- # Strip out the structured JSON blocks if they leaked into the content
253
- # Look for <pre>... containing "mcp_steps" or "references" at the end
254
- # Make regex robust to any language class or no class
255
- content_html = re.sub(r'<pre><code[^>]*>[^<]*(mcp_steps|references)[^<]*</code></pre>\s*$', '', content_html, flags=re.DOTALL | re.IGNORECASE)
256
- # Loop to remove multiple if present
257
- while re.search(r'<pre><code[^>]*>[^<]*(mcp_steps|references)[^<]*</code></pre>\s*$', content_html, flags=re.DOTALL | re.IGNORECASE):
258
- content_html = re.sub(r'<pre><code[^>]*>[^<]*(mcp_steps|references)[^<]*</code></pre>\s*$', '', content_html, flags=re.DOTALL | re.IGNORECASE)
259
-
260
- # --- PREPARE DATA FOR JINJA TEMPLATE ---
261
-
262
- # 1. Pipeline Stages (with Nested Data)
263
- processed_stages = []
264
-
265
- # Unified Search Icon (RemixIcon)
266
- SEARCH_ICON = '<i class="ri-search-line text-[16px]"></i>'
267
- DEFAULT_ICON = '<i class="ri-box-3-line text-[16px]"></i>'
268
-
269
- # Helper to infer provider/icon name from model string
270
- def infer_icon_name(model_str):
271
- if not model_str: return None
272
- m = model_str.lower()
273
- if "claude" in m or "anthropic" in m: return "anthropic"
274
- if "gpt" in m or "openai" in m or "o1" in m: return "openai"
275
- if "gemini" in m or "google" in m: return "google"
276
- if "deepseek" in m: return "deepseek"
277
- if "mistral" in m: return "mistral"
278
- if "llama" in m: return "meta"
279
- if "qwen" in m: return "qwen"
280
- if "grok" in m: return "grok"
281
- if "perplexity" in m: return "perplexity"
282
- if "minimax" in m: return "minimax"
283
- if "nvidia" in m: return "nvidia"
284
- return None
285
-
286
- # 2. Reference Processing (Moved up for nesting)
287
- processed_refs = []
288
- if references:
289
- for ref in references[:8]:
290
- url = ref.get("url", "#")
291
- try:
292
- domain = urlparse(url).netloc
293
- if domain.startswith("www."): domain = domain[4:]
294
- except:
295
- domain = "unknown"
296
-
297
- processed_refs.append({
298
- "title": ref.get("title", "No Title"),
299
- "url": url,
300
- "domain": domain,
301
- "favicon_url": f"https://www.google.com/s2/favicons?domain={domain}&sz=32"
302
- })
303
-
304
- if stages_used:
305
- for stage in stages_used:
306
- name = stage.get("name", "Step")
307
- model = stage.get("model", "")
308
-
309
- icon_html = ""
310
-
311
- if name == "Search":
312
- icon_html = SEARCH_ICON
313
- else:
314
- # Try to find vendor logo
315
- # 1. Check explicit icon_config
316
- icon_key = stage.get("icon_config", "")
317
- # 2. Infer from model name if not present
318
- if not icon_key:
319
- icon_key = infer_icon_name(model)
320
-
321
- icon_data_url = ""
322
- if icon_key:
323
- icon_data_url = self._get_icon_data_url(icon_key)
324
-
325
- if icon_data_url:
326
- icon_html = f'<img src="{icon_data_url}" class="w-5 h-5 object-contain rounded">'
327
- else:
328
- icon_html = DEFAULT_ICON
329
-
330
- # Model Short
331
- model_short = model.split("/")[-1] if "/" in model else model
332
- if len(model_short) > 25:
333
- model_short = model_short[:23] + "…"
334
-
335
- time_val = stage.get("time", 0)
336
- cost_val = stage.get("cost", 0.0)
337
- if name == "Search": cost_val = 0.0
338
-
339
- # --- NESTED DATA ---
340
- stage_children = {}
341
-
342
- # References go to "Search"
343
- if name == "Search" and processed_refs:
344
- stage_children['references'] = processed_refs
345
-
346
- # MCP Steps go to "Agent"
347
- # Process MCP steps here for the template
348
- stage_mcp_steps = []
349
- if name == "Agent" and mcp_steps:
350
- # RemixIcon Mapping
351
- STEP_ICONS = {
352
- "navigate": '<i class="ri-compass-3-line"></i>',
353
- "snapshot": '<i class="ri-camera-lens-line"></i>',
354
- "click": '<i class="ri-cursor-fill"></i>',
355
- "type": '<i class="ri-keyboard-line"></i>',
356
- "code": '<i class="ri-code-line"></i>',
357
- "search": SEARCH_ICON,
358
- "default": '<i class="ri-arrow-right-s-line"></i>',
359
- }
360
- for step in mcp_steps:
361
- icon_key = step.get("icon", "").lower()
362
- if "search" in icon_key: icon_key = "search"
363
- elif "nav" in icon_key or "visit" in icon_key: icon_key = "navigate"
364
- elif "click" in icon_key: icon_key = "click"
365
- elif "type" in icon_key or "input" in icon_key: icon_key = "type"
366
- elif "shot" in icon_key: icon_key = "snapshot"
367
-
368
- stage_mcp_steps.append({
369
- "name": step.get("name", "unknown"),
370
- "description": step.get("description", ""),
371
- "icon_svg": STEP_ICONS.get(icon_key, STEP_ICONS["default"])
372
- })
373
- stage_children['mcp_steps'] = stage_mcp_steps
374
-
375
- processed_stages.append({
376
- "name": name,
377
- "model": model,
378
- "model_short": model_short,
379
- "provider": stage.get("provider", ""),
380
- "icon_html": icon_html,
381
- "time_str": f"{time_val:.2f}s",
382
- "cost_str": f"${cost_val:.6f}" if cost_val > 0 else "$0",
383
- **stage_children # Merge children
384
- })
385
-
386
-
387
-
388
-
389
-
390
- # 4. Stats Footer Logic
391
- processed_stats = {}
392
- if stats:
393
- # Assuming standard 'stats' dict structure, handle list if needed
394
- if isinstance(stats, list):
395
- stats_dict = stats[0] if stats else {}
396
- else:
397
- stats_dict = stats
398
-
399
- agent_total_time = stats_dict.get("time", 0)
400
- vision_time = stats_dict.get("vision_duration", 0)
401
- llm_time = max(0, agent_total_time - vision_time)
402
-
403
- vision_html = ""
404
- if vision_time > 0:
405
- vision_html = f'''
406
- <div class="flex items-center gap-1.5 bg-white/60 px-2 py-1 rounded shadow-sm">
407
- <span class="w-2 h-2 rounded-full bg-purple-400"></span>
408
- <span>{vision_time:.1f}s</span>
409
- </div>
410
- '''
411
-
412
- llm_html = f'''
413
- <div class="flex items-center gap-1.5 bg-white/60 px-2 py-1 rounded shadow-sm">
414
- <span class="w-2 h-2 rounded-full bg-green-400"></span>
415
- <span>{llm_time:.1f}s</span>
416
- </div>
417
- '''
418
-
419
- billing_html = ""
420
- if billing_info and billing_info.get("total_cost", 0) > 0:
421
- cost_cents = billing_info["total_cost"] * 100
422
- billing_html = f'''
423
- <div class="flex items-center gap-1.5 bg-white/60 px-2 py-1 rounded shadow-sm">
424
- <span class="w-2 h-2 rounded-full bg-pink-500"></span>
425
- <span>{cost_cents:.4f}¢</span>
426
- </div>
427
- '''
428
-
429
- processed_stats = {
430
- "vision_html": vision_html,
431
- "llm_html": llm_html,
432
- "billing_html": billing_html
433
- }
434
-
435
- # Render Template
436
- context = {
437
- "content_html": content_html,
438
- "suggestions": suggestions or [],
439
- "stages": processed_stages,
440
- "references": processed_refs,
441
- "references_json": json.dumps(references or []),
442
- "stats": processed_stats,
443
- **self.assets
444
- }
445
-
446
- final_html = self.template.render(**context)
447
-
448
- except MemoryError:
449
- last_exc = "memory"
450
- logger.warning(f"ContentRenderer: out of memory while building HTML (attempt {attempt}/{max_attempts})")
451
- continue
452
- except Exception as exc:
453
- last_exc = exc
454
- logger.warning(f"ContentRenderer: failed to build HTML (attempt {attempt}/{max_attempts}) ({exc})")
455
- continue
456
-
457
- try:
458
- # logger.info("ContentRenderer: launching playwright...")
459
- async with async_playwright() as p:
460
- # logger.info("ContentRenderer: playwright context ready, launching browser...")
461
- browser = await p.chromium.launch(headless=True)
462
- try:
463
- # Use device_scale_factor=2 for high DPI rendering (better quality)
464
- page = await browser.new_page(viewport={"width": 450, "height": 1200}, device_scale_factor=2)
465
-
466
- # Set content (10s timeout to handle slow CDN loading)
467
- set_ok = await self._set_content_safe(page, final_html, 10000)
468
- if not set_ok or page.is_closed():
469
- raise RuntimeError("set_content failed")
470
-
471
- # Wait for images with user-configured timeout (render_timeout_ms)
472
- image_timeout_sec = render_timeout_ms / 1000.0
473
- try:
474
- await asyncio.wait_for(
475
- page.evaluate("""
476
- () => Promise.all(
477
- Array.from(document.images).map(img => {
478
- if (img.complete) {
479
- if (img.naturalWidth === 0 || img.naturalHeight === 0) {
480
- img.style.display = 'none';
481
- }
482
- return Promise.resolve();
483
- }
484
- return new Promise((resolve) => {
485
- img.onload = () => {
486
- if (img.naturalWidth === 0 || img.naturalHeight === 0) {
487
- img.style.display = 'none';
488
- }
489
- resolve();
490
- };
491
- img.onerror = () => {
492
- img.style.display = 'none';
493
- resolve();
494
- };
495
- });
496
- })
497
- )
498
- """),
499
- timeout=image_timeout_sec
500
- )
501
- except asyncio.TimeoutError:
502
- logger.warning(f"ContentRenderer: image loading timed out after {image_timeout_sec}s, continuing...")
503
-
504
- # Brief wait for layout to stabilize
505
- await asyncio.sleep(0.1)
506
-
507
- # Try element screenshot first, fallback to full page
508
- element = await page.query_selector("#main-container")
509
-
510
- try:
511
- if element:
512
- await element.screenshot(path=output_path)
513
- else:
514
- await page.screenshot(path=output_path, full_page=True)
515
- except Exception as screenshot_exc:
516
- logger.warning(f"ContentRenderer: element screenshot failed ({screenshot_exc}), trying full page...")
517
- await page.screenshot(path=output_path, full_page=True)
518
-
519
- finally:
520
- try:
521
- await browser.close()
522
- except Exception as exc:
523
- logger.warning(f"ContentRenderer: failed to close browser ({exc})")
524
- return True
525
- except Exception as exc:
526
- last_exc = exc
527
- logger.warning(f"ContentRenderer: render attempt {attempt}/{max_attempts} failed ({exc})")
528
- finally:
529
- content_html = None
530
- final_html = None
531
- gc.collect()