entari-plugin-hyw 3.3.1__py3-none-any.whl → 3.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of entari-plugin-hyw might be problematic. Click here for more details.
- entari_plugin_hyw/__init__.py +763 -309
- entari_plugin_hyw/assets/icon/anthropic.svg +1 -0
- entari_plugin_hyw/assets/icon/deepseek.png +0 -0
- entari_plugin_hyw/assets/icon/gemini.svg +1 -0
- entari_plugin_hyw/assets/icon/google.svg +1 -0
- entari_plugin_hyw/assets/icon/grok.png +0 -0
- entari_plugin_hyw/assets/icon/microsoft.svg +15 -0
- entari_plugin_hyw/assets/icon/minimax.png +0 -0
- entari_plugin_hyw/assets/icon/mistral.png +0 -0
- entari_plugin_hyw/assets/icon/nvida.png +0 -0
- entari_plugin_hyw/assets/icon/openai.svg +1 -0
- entari_plugin_hyw/assets/icon/openrouter.png +0 -0
- entari_plugin_hyw/assets/icon/perplexity.svg +24 -0
- entari_plugin_hyw/assets/icon/qwen.png +0 -0
- entari_plugin_hyw/assets/icon/xai.png +0 -0
- entari_plugin_hyw/assets/icon/zai.png +0 -0
- entari_plugin_hyw/assets/libs/highlight.css +10 -0
- entari_plugin_hyw/assets/libs/highlight.js +1213 -0
- entari_plugin_hyw/assets/libs/katex-auto-render.js +1 -0
- entari_plugin_hyw/assets/libs/katex.css +1 -0
- entari_plugin_hyw/assets/libs/katex.js +1 -0
- entari_plugin_hyw/assets/libs/tailwind.css +1 -0
- entari_plugin_hyw/assets/package-lock.json +953 -0
- entari_plugin_hyw/assets/package.json +16 -0
- entari_plugin_hyw/assets/tailwind.config.js +12 -0
- entari_plugin_hyw/assets/tailwind.input.css +235 -0
- entari_plugin_hyw/assets/template.html +157 -0
- entari_plugin_hyw/assets/template.html.bak +157 -0
- entari_plugin_hyw/assets/template.j2 +307 -0
- entari_plugin_hyw/core/__init__.py +0 -0
- entari_plugin_hyw/core/config.py +35 -0
- entari_plugin_hyw/core/history.py +146 -0
- entari_plugin_hyw/core/hyw.py +41 -0
- entari_plugin_hyw/core/pipeline.py +1065 -0
- entari_plugin_hyw/core/render.py +596 -0
- entari_plugin_hyw/core/render.py.bak +926 -0
- entari_plugin_hyw/utils/__init__.py +2 -0
- entari_plugin_hyw/utils/browser.py +40 -0
- entari_plugin_hyw/utils/misc.py +93 -0
- entari_plugin_hyw/utils/playwright_tool.py +36 -0
- entari_plugin_hyw/utils/prompts.py +129 -0
- entari_plugin_hyw/utils/search.py +241 -0
- {entari_plugin_hyw-3.3.1.dist-info → entari_plugin_hyw-3.3.2.dist-info}/METADATA +20 -28
- entari_plugin_hyw-3.3.2.dist-info/RECORD +46 -0
- entari_plugin_hyw/hyw_core.py +0 -700
- entari_plugin_hyw-3.3.1.dist-info/RECORD +0 -6
- {entari_plugin_hyw-3.3.1.dist-info → entari_plugin_hyw-3.3.2.dist-info}/WHEEL +0 -0
- {entari_plugin_hyw-3.3.1.dist-info → entari_plugin_hyw-3.3.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,596 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import gc
|
|
3
|
+
import os
|
|
4
|
+
import markdown
|
|
5
|
+
import base64
|
|
6
|
+
import html # Import html for escaping
|
|
7
|
+
import mimetypes
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from urllib.parse import urlparse
|
|
10
|
+
from typing import List, Dict, Optional, Any, Union
|
|
11
|
+
import re
|
|
12
|
+
import json
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from loguru import logger
|
|
15
|
+
from jinja2 import Environment, FileSystemLoader, select_autoescape
|
|
16
|
+
from crawl4ai import AsyncWebCrawler
|
|
17
|
+
from crawl4ai.async_configs import CrawlerRunConfig
|
|
18
|
+
from crawl4ai.cache_context import CacheMode
|
|
19
|
+
from crawl4ai.async_crawler_strategy import AsyncPlaywrightCrawlerStrategy
|
|
20
|
+
from playwright.async_api import async_playwright
|
|
21
|
+
|
|
22
|
+
# Patch Crawl4AI 0.7.x to support screenshot from raw/file HTML
|
|
23
|
+
async def _c4a_generate_screenshot_from_html(self, html: str) -> str:
|
|
24
|
+
"""
|
|
25
|
+
Monkey-patched fallback: render arbitrary HTML to a screenshot using Playwright.
|
|
26
|
+
"""
|
|
27
|
+
page, context = await self.browser_manager.get_page(
|
|
28
|
+
crawlerRunConfig=CrawlerRunConfig(
|
|
29
|
+
adjust_viewport_to_content=True,
|
|
30
|
+
wait_until="networkidle",
|
|
31
|
+
wait_for_images=True,
|
|
32
|
+
cache_mode=CacheMode.BYPASS,
|
|
33
|
+
)
|
|
34
|
+
)
|
|
35
|
+
try:
|
|
36
|
+
try:
|
|
37
|
+
await page.set_viewport_size({"width": 520, "height": 1200})
|
|
38
|
+
except Exception:
|
|
39
|
+
pass
|
|
40
|
+
await page.set_content(html, wait_until="networkidle")
|
|
41
|
+
await page.wait_for_timeout(150)
|
|
42
|
+
element = await page.query_selector("#main-container")
|
|
43
|
+
if element:
|
|
44
|
+
screenshot_bytes = await element.screenshot()
|
|
45
|
+
else:
|
|
46
|
+
screenshot_bytes = await page.screenshot(full_page=True)
|
|
47
|
+
import base64 as _b64
|
|
48
|
+
return _b64.b64encode(screenshot_bytes).decode()
|
|
49
|
+
finally:
|
|
50
|
+
try:
|
|
51
|
+
await context.close()
|
|
52
|
+
except Exception:
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
if not hasattr(AsyncPlaywrightCrawlerStrategy, "_generate_screenshot_from_html"):
|
|
56
|
+
AsyncPlaywrightCrawlerStrategy._generate_screenshot_from_html = _c4a_generate_screenshot_from_html
|
|
57
|
+
|
|
58
|
+
class ContentRenderer:
|
|
59
|
+
def __init__(self, template_path: str = None):
|
|
60
|
+
if template_path is None:
|
|
61
|
+
# Default to assets/template.j2 in the plugin root
|
|
62
|
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
63
|
+
plugin_root = os.path.dirname(current_dir)
|
|
64
|
+
template_path = os.path.join(plugin_root, "assets", "template.j2")
|
|
65
|
+
|
|
66
|
+
self.template_path = template_path
|
|
67
|
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
68
|
+
plugin_root = os.path.dirname(current_dir)
|
|
69
|
+
self.assets_dir = os.path.join(plugin_root, "assets", "icon")
|
|
70
|
+
|
|
71
|
+
# Load JS libraries (CSS is now inline in template)
|
|
72
|
+
libs_dir = os.path.join(plugin_root, "assets", "libs")
|
|
73
|
+
|
|
74
|
+
# Define all assets to load
|
|
75
|
+
self.assets = {}
|
|
76
|
+
assets_map = {
|
|
77
|
+
"highlight_css": os.path.join(libs_dir, "highlight.css"),
|
|
78
|
+
"highlight_js": os.path.join(libs_dir, "highlight.js"),
|
|
79
|
+
"katex_css": os.path.join(libs_dir, "katex.css"),
|
|
80
|
+
"katex_js": os.path.join(libs_dir, "katex.js"),
|
|
81
|
+
"katex_auto_render_js": os.path.join(libs_dir, "katex-auto-render.js"),
|
|
82
|
+
"tailwind_css": os.path.join(libs_dir, "tailwind.css"),
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
total_size = 0
|
|
86
|
+
for key, path in assets_map.items():
|
|
87
|
+
try:
|
|
88
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
89
|
+
content = f.read()
|
|
90
|
+
self.assets[key] = content
|
|
91
|
+
total_size += len(content)
|
|
92
|
+
except Exception as exc:
|
|
93
|
+
logger.warning(f"ContentRenderer: failed to load {key} ({exc})")
|
|
94
|
+
self.assets[key] = ""
|
|
95
|
+
|
|
96
|
+
logger.info(f"ContentRenderer: loaded {len(assets_map)} libs ({total_size} bytes)")
|
|
97
|
+
|
|
98
|
+
# Initialize Jinja2 Environment
|
|
99
|
+
template_dir = os.path.dirname(self.template_path)
|
|
100
|
+
template_name = os.path.basename(self.template_path)
|
|
101
|
+
logger.info(f"ContentRenderer: initializing Jinja2 from {template_dir} / {template_name}")
|
|
102
|
+
|
|
103
|
+
self.env = Environment(
|
|
104
|
+
loader=FileSystemLoader(template_dir),
|
|
105
|
+
autoescape=select_autoescape(['html', 'xml'])
|
|
106
|
+
)
|
|
107
|
+
self.template = self.env.get_template(template_name)
|
|
108
|
+
|
|
109
|
+
def _get_icon_data_url(self, icon_name: str) -> str:
|
|
110
|
+
if not icon_name:
|
|
111
|
+
return ""
|
|
112
|
+
# 1. Check if it's a URL
|
|
113
|
+
if icon_name.startswith(("http://", "https://")):
|
|
114
|
+
try:
|
|
115
|
+
import httpx
|
|
116
|
+
resp = httpx.get(icon_name, timeout=5.0)
|
|
117
|
+
if resp.status_code == 200:
|
|
118
|
+
mime_type = resp.headers.get("content-type", "image/png")
|
|
119
|
+
b64_data = base64.b64encode(resp.content).decode("utf-8")
|
|
120
|
+
return f"data:{mime_type};base64,{b64_data}"
|
|
121
|
+
except Exception as e:
|
|
122
|
+
print(f"Failed to download icon from {icon_name}: {e}")
|
|
123
|
+
# Fallback to local lookup
|
|
124
|
+
|
|
125
|
+
# 2. Local file lookup
|
|
126
|
+
filename = None
|
|
127
|
+
|
|
128
|
+
if "." in icon_name:
|
|
129
|
+
filename = icon_name
|
|
130
|
+
else:
|
|
131
|
+
# Try extensions
|
|
132
|
+
for ext in [".svg", ".png"]:
|
|
133
|
+
if os.path.exists(os.path.join(self.assets_dir, icon_name + ext)):
|
|
134
|
+
filename = icon_name + ext
|
|
135
|
+
break
|
|
136
|
+
if not filename:
|
|
137
|
+
filename = icon_name + ".svg" # Default fallback
|
|
138
|
+
|
|
139
|
+
filepath = os.path.join(self.assets_dir, filename)
|
|
140
|
+
|
|
141
|
+
if not os.path.exists(filepath):
|
|
142
|
+
# Fallback to openai.svg if specific file not found
|
|
143
|
+
filepath = os.path.join(self.assets_dir, "openai.svg")
|
|
144
|
+
if not os.path.exists(filepath):
|
|
145
|
+
return ""
|
|
146
|
+
|
|
147
|
+
mime_type, _ = mimetypes.guess_type(filepath)
|
|
148
|
+
if not mime_type:
|
|
149
|
+
mime_type = "image/png"
|
|
150
|
+
|
|
151
|
+
with open(filepath, "rb") as f:
|
|
152
|
+
data = f.read()
|
|
153
|
+
b64_data = base64.b64encode(data).decode("utf-8")
|
|
154
|
+
return f"data:{mime_type};base64,{b64_data}"
|
|
155
|
+
|
|
156
|
+
def _get_domain(self, url: str) -> str:
|
|
157
|
+
try:
|
|
158
|
+
parsed = urlparse(url)
|
|
159
|
+
domain = parsed.netloc
|
|
160
|
+
if "openrouter" in domain: return "openrouter.ai"
|
|
161
|
+
if "openai" in domain: return "openai.com"
|
|
162
|
+
if "anthropic" in domain: return "anthropic.com"
|
|
163
|
+
if "google" in domain: return "google.com"
|
|
164
|
+
if "deepseek" in domain: return "deepseek.com"
|
|
165
|
+
return domain
|
|
166
|
+
except:
|
|
167
|
+
return "unknown"
|
|
168
|
+
|
|
169
|
+
async def render(self,
|
|
170
|
+
markdown_content: str,
|
|
171
|
+
output_path: str,
|
|
172
|
+
suggestions: List[str] = None,
|
|
173
|
+
stats: Dict[str, Any] = None,
|
|
174
|
+
references: List[Dict[str, Any]] = None,
|
|
175
|
+
page_references: List[Dict[str, Any]] = None,
|
|
176
|
+
stages_used: List[Dict[str, Any]] = None,
|
|
177
|
+
flow_steps: List[Dict[str, Any]] = None,
|
|
178
|
+
model_name: str = "",
|
|
179
|
+
provider_name: str = "Unknown",
|
|
180
|
+
behavior_summary: str = "Text Generation",
|
|
181
|
+
icon_config: str = "openai",
|
|
182
|
+
vision_model_name: str = None,
|
|
183
|
+
vision_icon_config: str = None,
|
|
184
|
+
vision_base_url: str = None,
|
|
185
|
+
base_url: str = "https://openrouter.ai/api/v1",
|
|
186
|
+
billing_info: Dict[str, Any] = None,
|
|
187
|
+
render_timeout_ms: int = 6000):
|
|
188
|
+
"""
|
|
189
|
+
Render markdown content to an image using Crawl4AI (headless) and Jinja2.
|
|
190
|
+
"""
|
|
191
|
+
render_start_time = asyncio.get_event_loop().time()
|
|
192
|
+
|
|
193
|
+
# Resolve output path early to avoid relative URI issues
|
|
194
|
+
resolved_output_path = Path(output_path).resolve()
|
|
195
|
+
resolved_output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
196
|
+
|
|
197
|
+
# Preprocess to fix common markdown issues
|
|
198
|
+
markdown_content = re.sub(r'(?<=\S)\n(?=\s*(\d+\.|\-|\*|\+) )', r'\n\n', markdown_content)
|
|
199
|
+
|
|
200
|
+
# AGGRESSIVE CLEANING: Strip out "References" section and "[code]" blocks from the text
|
|
201
|
+
# because we are rendering them as structured UI elements now.
|
|
202
|
+
|
|
203
|
+
# 0. Remove content before first # heading (keep the heading)
|
|
204
|
+
heading_match = re.search(r'^(#[^#])', markdown_content, re.MULTILINE)
|
|
205
|
+
if heading_match:
|
|
206
|
+
markdown_content = markdown_content[heading_match.start():]
|
|
207
|
+
|
|
208
|
+
# 1. Remove "References" or "Citations" header and everything after it specific to the end of file
|
|
209
|
+
# Matches ### References, ## References, **References**, etc., followed by list items
|
|
210
|
+
markdown_content = re.sub(r'(?i)^\s*(#{1,3}|\*\*)\s*(References|Citations|Sources).*$', '', markdown_content, flags=re.MULTILINE | re.DOTALL)
|
|
211
|
+
|
|
212
|
+
# 2. Remove isolated "[code] ..." lines (checking for the specific format seen in user screenshot)
|
|
213
|
+
# Matches lines starting with [code] or [CODE]
|
|
214
|
+
markdown_content = re.sub(r'(?i)^\s*\[code\].*?(\n|$)', '', markdown_content, flags=re.MULTILINE)
|
|
215
|
+
|
|
216
|
+
max_attempts = 1
|
|
217
|
+
last_exc = None
|
|
218
|
+
for attempt in range(1, max_attempts + 1):
|
|
219
|
+
try:
|
|
220
|
+
# 1. Protect math blocks
|
|
221
|
+
# We look for $$...$$, \[...\], \(...\)
|
|
222
|
+
# We'll replace them with placeholders so markdown extensions (like nl2br) don't touch them.
|
|
223
|
+
math_blocks = {}
|
|
224
|
+
|
|
225
|
+
def protect_math(match):
|
|
226
|
+
key = f"MATHBLOCK{len(math_blocks)}PLACEHOLDER"
|
|
227
|
+
# Escape ONLY < and > to prevent them from being parsed as HTML tags
|
|
228
|
+
# We preserve & and other chars to avoid breaking LaTeX alignment
|
|
229
|
+
escaped_math = match.group(0).replace("<", "<").replace(">", ">")
|
|
230
|
+
math_blocks[key] = escaped_math
|
|
231
|
+
return key
|
|
232
|
+
|
|
233
|
+
# Patterns for math:
|
|
234
|
+
# 1) $$ ... $$ (display math)
|
|
235
|
+
# 2) \[ ... \] (display math)
|
|
236
|
+
# 3) \( ... \) (inline math)
|
|
237
|
+
# Note: We must handle multiline for $$ and \[
|
|
238
|
+
|
|
239
|
+
# Regex for $$...$$
|
|
240
|
+
markdown_content = re.sub(r'\$\$(.*?)\$\$', protect_math, markdown_content, flags=re.DOTALL)
|
|
241
|
+
|
|
242
|
+
# Regex for \[...\]
|
|
243
|
+
markdown_content = re.sub(r'\\\[(.*?)\\\]', protect_math, markdown_content, flags=re.DOTALL)
|
|
244
|
+
|
|
245
|
+
# Regex for \(...\) (usually single line, but DOTALL is safest if user wraps lines)
|
|
246
|
+
markdown_content = re.sub(r'\\\((.*?)\\\)', protect_math, markdown_content, flags=re.DOTALL)
|
|
247
|
+
|
|
248
|
+
# 2. Render Markdown
|
|
249
|
+
# Use 'nl2br' to turn newlines into <br>, 'fenced_code' for code blocks
|
|
250
|
+
content_html = markdown.markdown(
|
|
251
|
+
markdown_content.strip(),
|
|
252
|
+
extensions=['fenced_code', 'tables', 'nl2br', 'sane_lists']
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
# 3. Restore math blocks
|
|
256
|
+
def restore_math(text):
|
|
257
|
+
# We assume placeholders are intact. We do a simple string replace or regex.
|
|
258
|
+
# Since placeholders are unique strings, we can just replace them.
|
|
259
|
+
for key, val in math_blocks.items():
|
|
260
|
+
text = text.replace(key, val)
|
|
261
|
+
return text
|
|
262
|
+
|
|
263
|
+
content_html = restore_math(content_html)
|
|
264
|
+
|
|
265
|
+
# Post-process to style citation markers
|
|
266
|
+
# We split by code blocks to avoid messing up real code, BUT our citations ARE code blocks now.
|
|
267
|
+
# So we need to look at the code blocks themselves.
|
|
268
|
+
parts = re.split(r'(<code.*?>.*?</code>)', content_html, flags=re.DOTALL)
|
|
269
|
+
for i, part in enumerate(parts):
|
|
270
|
+
# Check if this part is a code block containing our specific citation format
|
|
271
|
+
if part.startswith('<code'):
|
|
272
|
+
# Match <code>ref:123</code>
|
|
273
|
+
# Note: attributes like class might be present if we are unlucky, but `ref:` inside usually means inline code.
|
|
274
|
+
|
|
275
|
+
# 1. Numeric: <code>ref:123</code>
|
|
276
|
+
ref_match = re.match(r'^<code.*?>ref:(\d+)</code>$', part)
|
|
277
|
+
if ref_match:
|
|
278
|
+
citation_id = ref_match.group(1)
|
|
279
|
+
parts[i] = f'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-blue-600 bg-blue-50 border border-blue-200 rounded mx-0.5 align-top relative -top-0.5">{citation_id}</span>'
|
|
280
|
+
continue
|
|
281
|
+
# 2. Flow marker: <code>flow:a</code>
|
|
282
|
+
flow_match = re.match(r'^<code.*?>flow:([a-zA-Z])</code>$', part)
|
|
283
|
+
if flow_match:
|
|
284
|
+
flow_id = flow_match.group(1).lower()
|
|
285
|
+
parts[i] = f'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-orange-700 bg-orange-50 border border-orange-200 rounded mx-0.5 align-top relative -top-0.5">{flow_id}</span>'
|
|
286
|
+
continue
|
|
287
|
+
|
|
288
|
+
# If it's NOT a code block, or a code block we didn't transform, we leave it alone.
|
|
289
|
+
# (Previous logic was to regex replace inside non-code blocks. We don't need that anymore
|
|
290
|
+
# because the prompt now enforces code spans).
|
|
291
|
+
content_html = "".join(parts)
|
|
292
|
+
|
|
293
|
+
# Strip out the structured JSON blocks if they leaked into the content
|
|
294
|
+
# Look for <pre>... containing "references" at the end
|
|
295
|
+
# Make regex robust to any language class or no class
|
|
296
|
+
content_html = re.sub(r'<pre><code[^>]*>[^<]*references[^<]*</code></pre>\s*$', '', content_html, flags=re.DOTALL | re.IGNORECASE)
|
|
297
|
+
# Loop to remove multiple if present
|
|
298
|
+
while re.search(r'<pre><code[^>]*>[^<]*references[^<]*</code></pre>\s*$', content_html, flags=re.DOTALL | re.IGNORECASE):
|
|
299
|
+
content_html = re.sub(r'<pre><code[^>]*>[^<]*references[^<]*</code></pre>\s*$', '', content_html, flags=re.DOTALL | re.IGNORECASE)
|
|
300
|
+
|
|
301
|
+
# --- PREPARE DATA FOR JINJA TEMPLATE ---
|
|
302
|
+
|
|
303
|
+
# 1. Pipeline Stages (with Nested Data)
|
|
304
|
+
processed_stages = []
|
|
305
|
+
|
|
306
|
+
# Unified Search Icon (RemixIcon)
|
|
307
|
+
SEARCH_ICON = '<i class="ri-search-line text-[16px]"></i>'
|
|
308
|
+
BROWSER_ICON = '<i class="ri-global-line text-[16px]"></i>'
|
|
309
|
+
DEFAULT_ICON = '<i class="ri-box-3-line text-[16px]"></i>'
|
|
310
|
+
|
|
311
|
+
# Helper to infer provider/icon name from model string
|
|
312
|
+
def infer_icon_name(model_str):
|
|
313
|
+
if not model_str: return None
|
|
314
|
+
m = model_str.lower()
|
|
315
|
+
if "claude" in m or "anthropic" in m: return "anthropic"
|
|
316
|
+
if "gpt" in m or "openai" in m or "o1" in m: return "openai"
|
|
317
|
+
if "gemini" in m or "google" in m: return "google"
|
|
318
|
+
if "deepseek" in m: return "deepseek"
|
|
319
|
+
if "mistral" in m: return "mistral"
|
|
320
|
+
if "llama" in m: return "meta"
|
|
321
|
+
if "qwen" in m: return "qwen"
|
|
322
|
+
if "grok" in m: return "grok"
|
|
323
|
+
if "perplexity" in m: return "perplexity"
|
|
324
|
+
if "minimax" in m: return "minimax"
|
|
325
|
+
if "nvidia" in m: return "nvidia"
|
|
326
|
+
return None
|
|
327
|
+
|
|
328
|
+
# 2. Reference Processing (Moved up for nesting)
|
|
329
|
+
processed_refs = []
|
|
330
|
+
if references:
|
|
331
|
+
for ref in references[:8]:
|
|
332
|
+
url = ref.get("url", "#")
|
|
333
|
+
try:
|
|
334
|
+
domain = urlparse(url).netloc
|
|
335
|
+
if domain.startswith("www."): domain = domain[4:]
|
|
336
|
+
except:
|
|
337
|
+
domain = "unknown"
|
|
338
|
+
|
|
339
|
+
processed_refs.append({
|
|
340
|
+
"title": ref.get("title", "No Title"),
|
|
341
|
+
"url": url,
|
|
342
|
+
"domain": domain,
|
|
343
|
+
"favicon_url": f"https://www.google.com/s2/favicons?domain={domain}&sz=32"
|
|
344
|
+
})
|
|
345
|
+
|
|
346
|
+
# 2b. Page Reference Processing (crawled pages)
|
|
347
|
+
processed_page_refs = []
|
|
348
|
+
if page_references:
|
|
349
|
+
for ref in page_references[:8]:
|
|
350
|
+
url = ref.get("url", "#")
|
|
351
|
+
try:
|
|
352
|
+
domain = urlparse(url).netloc
|
|
353
|
+
if domain.startswith("www."): domain = domain[4:]
|
|
354
|
+
except:
|
|
355
|
+
domain = "unknown"
|
|
356
|
+
|
|
357
|
+
processed_page_refs.append({
|
|
358
|
+
"title": ref.get("title", "No Title"),
|
|
359
|
+
"url": url,
|
|
360
|
+
"domain": domain,
|
|
361
|
+
"favicon_url": f"https://www.google.com/s2/favicons?domain={domain}&sz=32"
|
|
362
|
+
})
|
|
363
|
+
|
|
364
|
+
flow_steps = flow_steps or []
|
|
365
|
+
|
|
366
|
+
if stages_used:
|
|
367
|
+
for stage in stages_used:
|
|
368
|
+
name = stage.get("name", "Step")
|
|
369
|
+
model = stage.get("model", "")
|
|
370
|
+
|
|
371
|
+
icon_html = ""
|
|
372
|
+
|
|
373
|
+
if name == "Search":
|
|
374
|
+
icon_html = SEARCH_ICON
|
|
375
|
+
elif name == "Crawler":
|
|
376
|
+
icon_html = BROWSER_ICON
|
|
377
|
+
else:
|
|
378
|
+
# Try to find vendor logo
|
|
379
|
+
# 1. Check explicit icon_config
|
|
380
|
+
icon_key = stage.get("icon_config", "")
|
|
381
|
+
# 2. Infer from model name if not present
|
|
382
|
+
if not icon_key:
|
|
383
|
+
icon_key = infer_icon_name(model)
|
|
384
|
+
|
|
385
|
+
icon_data_url = ""
|
|
386
|
+
if icon_key:
|
|
387
|
+
icon_data_url = self._get_icon_data_url(icon_key)
|
|
388
|
+
|
|
389
|
+
if icon_data_url:
|
|
390
|
+
icon_html = f'<img src="{icon_data_url}" class="w-5 h-5 object-contain rounded">'
|
|
391
|
+
else:
|
|
392
|
+
icon_html = DEFAULT_ICON
|
|
393
|
+
|
|
394
|
+
# Model Short
|
|
395
|
+
model_short = model.split("/")[-1] if "/" in model else model
|
|
396
|
+
if len(model_short) > 25:
|
|
397
|
+
model_short = model_short[:23] + "…"
|
|
398
|
+
|
|
399
|
+
time_val = stage.get("time", 0)
|
|
400
|
+
cost_val = stage.get("cost", 0.0)
|
|
401
|
+
if name == "Search": cost_val = 0.0
|
|
402
|
+
|
|
403
|
+
# --- NESTED DATA ---
|
|
404
|
+
stage_children = {}
|
|
405
|
+
|
|
406
|
+
# References go to "Search"
|
|
407
|
+
if name == "Search" and processed_refs:
|
|
408
|
+
stage_children['references'] = processed_refs
|
|
409
|
+
|
|
410
|
+
# Flow steps go to "Agent"
|
|
411
|
+
if name == "Agent" and flow_steps:
|
|
412
|
+
FLOW_ICONS = {
|
|
413
|
+
"search": SEARCH_ICON,
|
|
414
|
+
"page": '<i class="ri-file-text-line text-[16px]"></i>',
|
|
415
|
+
}
|
|
416
|
+
formatted_flow = []
|
|
417
|
+
for step in flow_steps:
|
|
418
|
+
icon_key = step.get("icon", "").lower()
|
|
419
|
+
formatted_flow.append({
|
|
420
|
+
"icon_svg": FLOW_ICONS.get(icon_key, FLOW_ICONS.get("search")),
|
|
421
|
+
"description": step.get("description", "")
|
|
422
|
+
})
|
|
423
|
+
stage_children['flow_steps'] = formatted_flow
|
|
424
|
+
|
|
425
|
+
# Pass through Search Queries
|
|
426
|
+
if "queries" in stage:
|
|
427
|
+
stage_children["queries"] = stage["queries"]
|
|
428
|
+
|
|
429
|
+
# Pass through Crawled Pages
|
|
430
|
+
if "crawled_pages" in stage:
|
|
431
|
+
stage_children["crawled_pages"] = stage["crawled_pages"]
|
|
432
|
+
|
|
433
|
+
processed_stages.append({
|
|
434
|
+
"name": name,
|
|
435
|
+
"model": model,
|
|
436
|
+
"model_short": model_short,
|
|
437
|
+
"provider": stage.get("provider", ""),
|
|
438
|
+
"icon_html": icon_html,
|
|
439
|
+
"time_str": f"{time_val:.2f}s",
|
|
440
|
+
"cost_str": f"${cost_val:.6f}" if cost_val > 0 else "$0",
|
|
441
|
+
**stage_children # Merge children
|
|
442
|
+
})
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
# 4. Stats Footer Logic
|
|
449
|
+
processed_stats = {}
|
|
450
|
+
if stats:
|
|
451
|
+
# Assuming standard 'stats' dict structure, handle list if needed
|
|
452
|
+
if isinstance(stats, list):
|
|
453
|
+
stats_dict = stats[0] if stats else {}
|
|
454
|
+
else:
|
|
455
|
+
stats_dict = stats
|
|
456
|
+
|
|
457
|
+
agent_total_time = stats_dict.get("time", 0)
|
|
458
|
+
vision_time = stats_dict.get("vision_duration", 0)
|
|
459
|
+
llm_time = max(0, agent_total_time - vision_time)
|
|
460
|
+
|
|
461
|
+
vision_html = ""
|
|
462
|
+
if vision_time > 0:
|
|
463
|
+
vision_html = f'''
|
|
464
|
+
<div class="flex items-center gap-1.5 bg-white/60 px-2 py-1 rounded shadow-sm">
|
|
465
|
+
<span class="w-2 h-2 rounded-full bg-purple-400"></span>
|
|
466
|
+
<span>{vision_time:.1f}s</span>
|
|
467
|
+
</div>
|
|
468
|
+
'''
|
|
469
|
+
|
|
470
|
+
llm_html = f'''
|
|
471
|
+
<div class="flex items-center gap-1.5 bg-white/60 px-2 py-1 rounded shadow-sm">
|
|
472
|
+
<span class="w-2 h-2 rounded-full bg-green-400"></span>
|
|
473
|
+
<span>{llm_time:.1f}s</span>
|
|
474
|
+
</div>
|
|
475
|
+
'''
|
|
476
|
+
|
|
477
|
+
billing_html = ""
|
|
478
|
+
if billing_info and billing_info.get("total_cost", 0) > 0:
|
|
479
|
+
cost_cents = billing_info["total_cost"] * 100
|
|
480
|
+
billing_html = f'''
|
|
481
|
+
<div class="flex items-center gap-1.5 bg-white/60 px-2 py-1 rounded shadow-sm">
|
|
482
|
+
<span class="w-2 h-2 rounded-full bg-pink-500"></span>
|
|
483
|
+
<span>{cost_cents:.4f}¢</span>
|
|
484
|
+
</div>
|
|
485
|
+
'''
|
|
486
|
+
|
|
487
|
+
processed_stats = {
|
|
488
|
+
"vision_html": vision_html,
|
|
489
|
+
"llm_html": llm_html,
|
|
490
|
+
"billing_html": billing_html
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
# 5. Feature Flags for Header Icons
|
|
494
|
+
feature_flags = {
|
|
495
|
+
"has_vision": False,
|
|
496
|
+
"has_search": False,
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
# Check Vision
|
|
500
|
+
if stats_dict.get("vision_duration", 0) > 0:
|
|
501
|
+
feature_flags["has_vision"] = True
|
|
502
|
+
|
|
503
|
+
# Check Search
|
|
504
|
+
if any(s.get("name") == "Search" for s in stages_used or []):
|
|
505
|
+
feature_flags["has_search"] = True
|
|
506
|
+
|
|
507
|
+
# Render Template
|
|
508
|
+
context = {
|
|
509
|
+
"content_html": content_html,
|
|
510
|
+
"suggestions": suggestions or [],
|
|
511
|
+
"stages": processed_stages,
|
|
512
|
+
"references": processed_refs,
|
|
513
|
+
"page_references": processed_page_refs,
|
|
514
|
+
"references_json": json.dumps(references or []),
|
|
515
|
+
"stats": processed_stats,
|
|
516
|
+
"flags": feature_flags,
|
|
517
|
+
"total_time": stats_dict.get("total_time", 0) or 0,
|
|
518
|
+
**self.assets
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
final_html = self.template.render(**context)
|
|
522
|
+
|
|
523
|
+
except MemoryError:
|
|
524
|
+
last_exc = "memory"
|
|
525
|
+
logger.warning(f"ContentRenderer: out of memory while building HTML (attempt {attempt}/{max_attempts})")
|
|
526
|
+
continue
|
|
527
|
+
except Exception as exc:
|
|
528
|
+
last_exc = exc
|
|
529
|
+
logger.warning(f"ContentRenderer: failed to build HTML (attempt {attempt}/{max_attempts}) ({exc})")
|
|
530
|
+
continue
|
|
531
|
+
|
|
532
|
+
try:
|
|
533
|
+
# Use Playwright directly for crisp element screenshot (Crawl4AI already depends on it)
|
|
534
|
+
async with async_playwright() as p:
|
|
535
|
+
browser = await p.chromium.launch(headless=True)
|
|
536
|
+
try:
|
|
537
|
+
page = await browser.new_page(
|
|
538
|
+
viewport={"width": 520, "height": 1400},
|
|
539
|
+
device_scale_factor=3,
|
|
540
|
+
)
|
|
541
|
+
await page.set_content(final_html, wait_until="networkidle")
|
|
542
|
+
await page.wait_for_timeout(150)
|
|
543
|
+
element = await page.query_selector("#main-container")
|
|
544
|
+
if element:
|
|
545
|
+
await element.screenshot(path=resolved_output_path, type="jpeg", quality=98)
|
|
546
|
+
else:
|
|
547
|
+
await page.screenshot(path=resolved_output_path, full_page=True, type="jpeg", quality=98)
|
|
548
|
+
return True
|
|
549
|
+
finally:
|
|
550
|
+
await browser.close()
|
|
551
|
+
|
|
552
|
+
except Exception as exc:
|
|
553
|
+
last_exc = exc
|
|
554
|
+
logger.warning(f"ContentRenderer: render attempt {attempt}/{max_attempts} failed ({exc})")
|
|
555
|
+
finally:
|
|
556
|
+
content_html = None
|
|
557
|
+
final_html = None
|
|
558
|
+
gc.collect()
|
|
559
|
+
|
|
560
|
+
logger.error(f"ContentRenderer: render failed after {max_attempts} attempts ({last_exc})")
|
|
561
|
+
return False
|
|
562
|
+
|
|
563
|
+
async def render_models_list(
|
|
564
|
+
self,
|
|
565
|
+
models: List[Dict[str, Any]],
|
|
566
|
+
output_path: str,
|
|
567
|
+
default_base_url: str = "https://openrouter.ai/api/v1",
|
|
568
|
+
render_timeout_ms: int = 6000,
|
|
569
|
+
) -> bool:
|
|
570
|
+
"""
|
|
571
|
+
Lightweight models list renderer leveraging the main render pipeline.
|
|
572
|
+
"""
|
|
573
|
+
lines = ["# 模型列表"]
|
|
574
|
+
for idx, model in enumerate(models or [], start=1):
|
|
575
|
+
name = model.get("name", "unknown")
|
|
576
|
+
base_url = model.get("base_url") or default_base_url
|
|
577
|
+
provider = model.get("provider", "")
|
|
578
|
+
lines.append(f"{idx}. **{name}** \n - base_url: {base_url} \n - provider: {provider}")
|
|
579
|
+
|
|
580
|
+
markdown_content = "\n\n".join(lines) if len(lines) > 1 else "# 模型列表\n暂无模型"
|
|
581
|
+
|
|
582
|
+
return await self.render(
|
|
583
|
+
markdown_content=markdown_content,
|
|
584
|
+
output_path=output_path,
|
|
585
|
+
suggestions=[],
|
|
586
|
+
stats={"time": 0.0},
|
|
587
|
+
references=[],
|
|
588
|
+
stages_used=[],
|
|
589
|
+
model_name="",
|
|
590
|
+
provider_name="Models",
|
|
591
|
+
behavior_summary="Model List",
|
|
592
|
+
icon_config="openai",
|
|
593
|
+
base_url=default_base_url,
|
|
594
|
+
billing_info=None,
|
|
595
|
+
render_timeout_ms=render_timeout_ms,
|
|
596
|
+
)
|