entari-plugin-hyw 3.4.2__py3-none-any.whl → 3.5.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of entari-plugin-hyw might be problematic. Click here for more details.
- entari_plugin_hyw/__init__.py +14 -89
- entari_plugin_hyw/assets/card-dist/index.html +135 -0
- entari_plugin_hyw/assets/card-dist/logos/anthropic.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/cerebras.svg +9 -0
- entari_plugin_hyw/assets/card-dist/logos/deepseek.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/gemini.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/google.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/grok.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/huggingface.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/microsoft.svg +15 -0
- entari_plugin_hyw/assets/card-dist/logos/minimax.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/mistral.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/nvida.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/openai.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/openrouter.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/perplexity.svg +24 -0
- entari_plugin_hyw/assets/card-dist/logos/qwen.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/xai.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/xiaomi.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/zai.png +0 -0
- entari_plugin_hyw/assets/card-dist/vite.svg +1 -0
- entari_plugin_hyw/card-ui/.gitignore +24 -0
- entari_plugin_hyw/card-ui/README.md +5 -0
- entari_plugin_hyw/card-ui/index.html +16 -0
- entari_plugin_hyw/card-ui/package-lock.json +2342 -0
- entari_plugin_hyw/card-ui/package.json +31 -0
- entari_plugin_hyw/card-ui/public/logos/anthropic.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/cerebras.svg +9 -0
- entari_plugin_hyw/card-ui/public/logos/deepseek.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/gemini.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/google.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/grok.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/huggingface.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/microsoft.svg +15 -0
- entari_plugin_hyw/card-ui/public/logos/minimax.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/mistral.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/nvida.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/openai.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/openrouter.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/perplexity.svg +24 -0
- entari_plugin_hyw/card-ui/public/logos/qwen.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/xai.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/xiaomi.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/zai.png +0 -0
- entari_plugin_hyw/card-ui/public/vite.svg +1 -0
- entari_plugin_hyw/card-ui/src/App.vue +216 -0
- entari_plugin_hyw/card-ui/src/assets/vue.svg +1 -0
- entari_plugin_hyw/card-ui/src/components/HelloWorld.vue +41 -0
- entari_plugin_hyw/card-ui/src/components/MarkdownContent.vue +330 -0
- entari_plugin_hyw/card-ui/src/components/SectionCard.vue +41 -0
- entari_plugin_hyw/card-ui/src/components/StageCard.vue +163 -0
- entari_plugin_hyw/card-ui/src/main.ts +5 -0
- entari_plugin_hyw/card-ui/src/style.css +8 -0
- entari_plugin_hyw/card-ui/src/types.ts +51 -0
- entari_plugin_hyw/card-ui/tsconfig.app.json +16 -0
- entari_plugin_hyw/card-ui/tsconfig.json +7 -0
- entari_plugin_hyw/card-ui/tsconfig.node.json +26 -0
- entari_plugin_hyw/card-ui/vite.config.ts +16 -0
- entari_plugin_hyw/core/config.py +0 -3
- entari_plugin_hyw/core/pipeline.py +136 -61
- entari_plugin_hyw/core/render_vue.py +255 -0
- entari_plugin_hyw/test_output/render_0.jpg +0 -0
- entari_plugin_hyw/test_output/render_1.jpg +0 -0
- entari_plugin_hyw/test_output/render_2.jpg +0 -0
- entari_plugin_hyw/test_output/render_3.jpg +0 -0
- entari_plugin_hyw/test_output/render_4.jpg +0 -0
- entari_plugin_hyw/tests/ui_test_output.jpg +0 -0
- entari_plugin_hyw/tests/verify_ui.py +139 -0
- entari_plugin_hyw/utils/misc.py +0 -3
- entari_plugin_hyw/utils/prompts.py +65 -63
- {entari_plugin_hyw-3.4.2.dist-info → entari_plugin_hyw-3.5.0rc1.dist-info}/METADATA +5 -2
- entari_plugin_hyw-3.5.0rc1.dist-info/RECORD +99 -0
- entari_plugin_hyw/assets/libs/highlight.css +0 -10
- entari_plugin_hyw/assets/libs/highlight.js +0 -1213
- entari_plugin_hyw/assets/libs/katex-auto-render.js +0 -1
- entari_plugin_hyw/assets/libs/katex.css +0 -1
- entari_plugin_hyw/assets/libs/katex.js +0 -1
- entari_plugin_hyw/assets/libs/tailwind.css +0 -1
- entari_plugin_hyw/assets/package-lock.json +0 -953
- entari_plugin_hyw/assets/package.json +0 -16
- entari_plugin_hyw/assets/tailwind.config.js +0 -12
- entari_plugin_hyw/assets/tailwind.input.css +0 -235
- entari_plugin_hyw/assets/template.html +0 -157
- entari_plugin_hyw/assets/template.html.bak +0 -157
- entari_plugin_hyw/assets/template.j2 +0 -400
- entari_plugin_hyw/core/render.py +0 -630
- entari_plugin_hyw/utils/prompts_cn.py +0 -119
- entari_plugin_hyw-3.4.2.dist-info/RECORD +0 -49
- {entari_plugin_hyw-3.4.2.dist-info → entari_plugin_hyw-3.5.0rc1.dist-info}/WHEEL +0 -0
- {entari_plugin_hyw-3.4.2.dist-info → entari_plugin_hyw-3.5.0rc1.dist-info}/top_level.txt +0 -0
entari_plugin_hyw/core/render.py
DELETED
|
@@ -1,630 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import gc
|
|
3
|
-
import os
|
|
4
|
-
import markdown
|
|
5
|
-
import base64
|
|
6
|
-
import html # Import html for escaping
|
|
7
|
-
import mimetypes
|
|
8
|
-
from datetime import datetime
|
|
9
|
-
from urllib.parse import urlparse
|
|
10
|
-
from typing import List, Dict, Optional, Any, Union
|
|
11
|
-
import re
|
|
12
|
-
import json
|
|
13
|
-
from pathlib import Path
|
|
14
|
-
from loguru import logger
|
|
15
|
-
from jinja2 import Environment, FileSystemLoader, select_autoescape
|
|
16
|
-
from crawl4ai import AsyncWebCrawler
|
|
17
|
-
from crawl4ai.async_configs import CrawlerRunConfig
|
|
18
|
-
from crawl4ai.cache_context import CacheMode
|
|
19
|
-
from crawl4ai.async_crawler_strategy import AsyncPlaywrightCrawlerStrategy
|
|
20
|
-
from playwright.async_api import async_playwright
|
|
21
|
-
|
|
22
|
-
# Patch Crawl4AI 0.7.x to support screenshot from raw/file HTML
|
|
23
|
-
async def _c4a_generate_screenshot_from_html(self, html: str) -> str:
|
|
24
|
-
"""
|
|
25
|
-
Monkey-patched fallback: render arbitrary HTML to a screenshot using Playwright.
|
|
26
|
-
"""
|
|
27
|
-
page, context = await self.browser_manager.get_page(
|
|
28
|
-
crawlerRunConfig=CrawlerRunConfig(
|
|
29
|
-
adjust_viewport_to_content=True,
|
|
30
|
-
wait_until="networkidle",
|
|
31
|
-
wait_for_images=True,
|
|
32
|
-
cache_mode=CacheMode.BYPASS,
|
|
33
|
-
)
|
|
34
|
-
)
|
|
35
|
-
try:
|
|
36
|
-
try:
|
|
37
|
-
await page.set_viewport_size({"width": 520, "height": 1200})
|
|
38
|
-
except Exception:
|
|
39
|
-
pass
|
|
40
|
-
await page.set_content(html, wait_until="networkidle")
|
|
41
|
-
await page.wait_for_timeout(150)
|
|
42
|
-
element = await page.query_selector("#main-container")
|
|
43
|
-
if element:
|
|
44
|
-
screenshot_bytes = await element.screenshot()
|
|
45
|
-
else:
|
|
46
|
-
screenshot_bytes = await page.screenshot(full_page=True)
|
|
47
|
-
import base64 as _b64
|
|
48
|
-
return _b64.b64encode(screenshot_bytes).decode()
|
|
49
|
-
finally:
|
|
50
|
-
try:
|
|
51
|
-
await context.close()
|
|
52
|
-
except Exception:
|
|
53
|
-
pass
|
|
54
|
-
|
|
55
|
-
if not hasattr(AsyncPlaywrightCrawlerStrategy, "_generate_screenshot_from_html"):
|
|
56
|
-
AsyncPlaywrightCrawlerStrategy._generate_screenshot_from_html = _c4a_generate_screenshot_from_html
|
|
57
|
-
|
|
58
|
-
class ContentRenderer:
|
|
59
|
-
def __init__(self, template_path: str = None):
|
|
60
|
-
if template_path is None:
|
|
61
|
-
# Default to assets/template.j2 in the plugin root
|
|
62
|
-
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
63
|
-
plugin_root = os.path.dirname(current_dir)
|
|
64
|
-
template_path = os.path.join(plugin_root, "assets", "template.j2")
|
|
65
|
-
|
|
66
|
-
self.template_path = template_path
|
|
67
|
-
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
68
|
-
plugin_root = os.path.dirname(current_dir)
|
|
69
|
-
self.assets_dir = os.path.join(plugin_root, "assets", "icon")
|
|
70
|
-
|
|
71
|
-
# Load JS libraries (CSS is now inline in template)
|
|
72
|
-
libs_dir = os.path.join(plugin_root, "assets", "libs")
|
|
73
|
-
|
|
74
|
-
# Define all assets to load
|
|
75
|
-
self.assets = {}
|
|
76
|
-
assets_map = {
|
|
77
|
-
"highlight_css": os.path.join(libs_dir, "highlight.css"),
|
|
78
|
-
"highlight_js": os.path.join(libs_dir, "highlight.js"),
|
|
79
|
-
"katex_css": os.path.join(libs_dir, "katex.css"),
|
|
80
|
-
"katex_js": os.path.join(libs_dir, "katex.js"),
|
|
81
|
-
"katex_auto_render_js": os.path.join(libs_dir, "katex-auto-render.js"),
|
|
82
|
-
"tailwind_css": os.path.join(libs_dir, "tailwind.css"),
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
total_size = 0
|
|
86
|
-
for key, path in assets_map.items():
|
|
87
|
-
try:
|
|
88
|
-
with open(path, "r", encoding="utf-8") as f:
|
|
89
|
-
content = f.read()
|
|
90
|
-
self.assets[key] = content
|
|
91
|
-
total_size += len(content)
|
|
92
|
-
except Exception as exc:
|
|
93
|
-
logger.warning(f"ContentRenderer: failed to load {key} ({exc})")
|
|
94
|
-
self.assets[key] = ""
|
|
95
|
-
|
|
96
|
-
logger.info(f"ContentRenderer: loaded {len(assets_map)} libs ({total_size} bytes)")
|
|
97
|
-
|
|
98
|
-
# Initialize Jinja2 Environment
|
|
99
|
-
template_dir = os.path.dirname(self.template_path)
|
|
100
|
-
template_name = os.path.basename(self.template_path)
|
|
101
|
-
logger.info(f"ContentRenderer: initializing Jinja2 from {template_dir} / {template_name}")
|
|
102
|
-
|
|
103
|
-
self.env = Environment(
|
|
104
|
-
loader=FileSystemLoader(template_dir),
|
|
105
|
-
autoescape=select_autoescape(['html', 'xml'])
|
|
106
|
-
)
|
|
107
|
-
self.template = self.env.get_template(template_name)
|
|
108
|
-
|
|
109
|
-
def _get_icon_data_url(self, icon_name: str) -> str:
|
|
110
|
-
if not icon_name:
|
|
111
|
-
return ""
|
|
112
|
-
# 1. Check if it's a URL
|
|
113
|
-
if icon_name.startswith(("http://", "https://")):
|
|
114
|
-
try:
|
|
115
|
-
import httpx
|
|
116
|
-
resp = httpx.get(icon_name, timeout=5.0)
|
|
117
|
-
if resp.status_code == 200:
|
|
118
|
-
mime_type = resp.headers.get("content-type", "image/png")
|
|
119
|
-
b64_data = base64.b64encode(resp.content).decode("utf-8")
|
|
120
|
-
return f"data:{mime_type};base64,{b64_data}"
|
|
121
|
-
except Exception as e:
|
|
122
|
-
print(f"Failed to download icon from {icon_name}: {e}")
|
|
123
|
-
# Fallback to local lookup
|
|
124
|
-
|
|
125
|
-
# 2. Local file lookup
|
|
126
|
-
filename = None
|
|
127
|
-
|
|
128
|
-
if "." in icon_name:
|
|
129
|
-
filename = icon_name
|
|
130
|
-
else:
|
|
131
|
-
# Try extensions
|
|
132
|
-
for ext in [".svg", ".png"]:
|
|
133
|
-
if os.path.exists(os.path.join(self.assets_dir, icon_name + ext)):
|
|
134
|
-
filename = icon_name + ext
|
|
135
|
-
break
|
|
136
|
-
if not filename:
|
|
137
|
-
filename = icon_name + ".svg" # Default fallback
|
|
138
|
-
|
|
139
|
-
filepath = os.path.join(self.assets_dir, filename)
|
|
140
|
-
|
|
141
|
-
if not os.path.exists(filepath):
|
|
142
|
-
# Fallback to openai.svg if specific file not found
|
|
143
|
-
filepath = os.path.join(self.assets_dir, "openai.svg")
|
|
144
|
-
if not os.path.exists(filepath):
|
|
145
|
-
return ""
|
|
146
|
-
|
|
147
|
-
mime_type, _ = mimetypes.guess_type(filepath)
|
|
148
|
-
if not mime_type:
|
|
149
|
-
mime_type = "image/png"
|
|
150
|
-
|
|
151
|
-
with open(filepath, "rb") as f:
|
|
152
|
-
data = f.read()
|
|
153
|
-
b64_data = base64.b64encode(data).decode("utf-8")
|
|
154
|
-
return f"data:{mime_type};base64,{b64_data}"
|
|
155
|
-
|
|
156
|
-
def _get_domain(self, url: str) -> str:
|
|
157
|
-
try:
|
|
158
|
-
parsed = urlparse(url)
|
|
159
|
-
domain = parsed.netloc
|
|
160
|
-
if "openrouter" in domain: return "openrouter.ai"
|
|
161
|
-
if "openai" in domain: return "openai.com"
|
|
162
|
-
if "anthropic" in domain: return "anthropic.com"
|
|
163
|
-
if "google" in domain: return "google.com"
|
|
164
|
-
if "deepseek" in domain: return "deepseek.com"
|
|
165
|
-
return domain
|
|
166
|
-
except:
|
|
167
|
-
return "unknown"
|
|
168
|
-
|
|
169
|
-
async def render(self,
|
|
170
|
-
markdown_content: str,
|
|
171
|
-
output_path: str,
|
|
172
|
-
suggestions: List[str] = None,
|
|
173
|
-
stats: Dict[str, Any] = None,
|
|
174
|
-
references: List[Dict[str, Any]] = None,
|
|
175
|
-
page_references: List[Dict[str, Any]] = None,
|
|
176
|
-
image_references: List[Dict[str, Any]] = None, # Added
|
|
177
|
-
stages_used: List[Dict[str, Any]] = None,
|
|
178
|
-
flow_steps: List[Dict[str, Any]] = None,
|
|
179
|
-
model_name: str = "",
|
|
180
|
-
provider_name: str = "Unknown",
|
|
181
|
-
behavior_summary: str = "Text Generation",
|
|
182
|
-
icon_config: str = "openai",
|
|
183
|
-
vision_model_name: str = None,
|
|
184
|
-
vision_icon_config: str = None,
|
|
185
|
-
vision_base_url: str = None,
|
|
186
|
-
base_url: str = "https://openrouter.ai/api/v1",
|
|
187
|
-
billing_info: Dict[str, Any] = None,
|
|
188
|
-
render_timeout_ms: int = 6000):
|
|
189
|
-
"""
|
|
190
|
-
Render markdown content to an image using Crawl4AI (headless) and Jinja2.
|
|
191
|
-
"""
|
|
192
|
-
render_start_time = asyncio.get_event_loop().time()
|
|
193
|
-
|
|
194
|
-
# Resolve output path early to avoid relative URI issues
|
|
195
|
-
resolved_output_path = Path(output_path).resolve()
|
|
196
|
-
resolved_output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
197
|
-
|
|
198
|
-
# Preprocess to fix common markdown issues
|
|
199
|
-
markdown_content = re.sub(r'(?<=\S)\n(?=\s*(\d+\.|\-|\*|\+) )', r'\n\n', markdown_content)
|
|
200
|
-
|
|
201
|
-
# references, page_references, image_references are already parsed by pipeline
|
|
202
|
-
# No filtering needed here - use them directly
|
|
203
|
-
|
|
204
|
-
# AGGRESSIVE CLEANING: Strip out "References" section and "[code]" blocks from the text
|
|
205
|
-
# because we are rendering them as structured UI elements now.
|
|
206
|
-
|
|
207
|
-
# 0. Remove content before first # heading (keep the heading)
|
|
208
|
-
heading_match = re.search(r'^(#[^#])', markdown_content, re.MULTILINE)
|
|
209
|
-
if heading_match:
|
|
210
|
-
markdown_content = markdown_content[heading_match.start():]
|
|
211
|
-
|
|
212
|
-
# 1. Remove "References" or "Citations" header and everything after it specific to the end of file
|
|
213
|
-
# Matches ### References, ## References, **References**, etc., followed by list items
|
|
214
|
-
markdown_content = re.sub(r'(?i)^\s*(#{1,3}|\*\*)\s*(References|Citations|Sources).*$', '', markdown_content, flags=re.MULTILINE | re.DOTALL)
|
|
215
|
-
|
|
216
|
-
# 2. Remove isolated "[code] ..." lines (checking for the specific format seen in user screenshot)
|
|
217
|
-
# Matches lines starting with [code] or [CODE]
|
|
218
|
-
markdown_content = re.sub(r'(?i)^\s*\[code\].*?(\n|$)', '', markdown_content, flags=re.MULTILINE)
|
|
219
|
-
|
|
220
|
-
max_attempts = 1
|
|
221
|
-
last_exc = None
|
|
222
|
-
for attempt in range(1, max_attempts + 1):
|
|
223
|
-
try:
|
|
224
|
-
# 1. Protect math blocks
|
|
225
|
-
# We look for $$...$$, \[...\], \(...\)
|
|
226
|
-
# We'll replace them with placeholders so markdown extensions (like nl2br) don't touch them.
|
|
227
|
-
math_blocks = {}
|
|
228
|
-
|
|
229
|
-
def protect_math(match):
|
|
230
|
-
key = f"MATHBLOCK{len(math_blocks)}PLACEHOLDER"
|
|
231
|
-
# Escape ONLY < and > to prevent them from being parsed as HTML tags
|
|
232
|
-
# We preserve & and other chars to avoid breaking LaTeX alignment
|
|
233
|
-
escaped_math = match.group(0).replace("<", "<").replace(">", ">")
|
|
234
|
-
math_blocks[key] = escaped_math
|
|
235
|
-
return key
|
|
236
|
-
|
|
237
|
-
# Patterns for math:
|
|
238
|
-
# 1) $$ ... $$ (display math)
|
|
239
|
-
# 2) \[ ... \] (display math)
|
|
240
|
-
# 3) \( ... \) (inline math)
|
|
241
|
-
# Note: We must handle multiline for $$ and \[
|
|
242
|
-
|
|
243
|
-
# Regex for $$...$$
|
|
244
|
-
markdown_content = re.sub(r'\$\$(.*?)\$\$', protect_math, markdown_content, flags=re.DOTALL)
|
|
245
|
-
|
|
246
|
-
# Regex for \[...\]
|
|
247
|
-
markdown_content = re.sub(r'\\\[(.*?)\\\]', protect_math, markdown_content, flags=re.DOTALL)
|
|
248
|
-
|
|
249
|
-
# Regex for \(...\) (usually single line, but DOTALL is safest if user wraps lines)
|
|
250
|
-
markdown_content = re.sub(r'\\\((.*?)\\\)', protect_math, markdown_content, flags=re.DOTALL)
|
|
251
|
-
|
|
252
|
-
# 2. Render Markdown
|
|
253
|
-
# Use 'nl2br' to turn newlines into <br>, 'fenced_code' for code blocks
|
|
254
|
-
content_html = markdown.markdown(
|
|
255
|
-
markdown_content.strip(),
|
|
256
|
-
extensions=['fenced_code', 'tables', 'nl2br', 'sane_lists']
|
|
257
|
-
)
|
|
258
|
-
|
|
259
|
-
# 3. Restore math blocks
|
|
260
|
-
def restore_math(text):
|
|
261
|
-
# We assume placeholders are intact. We do a simple string replace or regex.
|
|
262
|
-
# Since placeholders are unique strings, we can just replace them.
|
|
263
|
-
for key, val in math_blocks.items():
|
|
264
|
-
text = text.replace(key, val)
|
|
265
|
-
return text
|
|
266
|
-
|
|
267
|
-
content_html = restore_math(content_html)
|
|
268
|
-
|
|
269
|
-
# Convert [N] to colored badges based on index position
|
|
270
|
-
# - Numbers 1 to len(references) → blue (search results)
|
|
271
|
-
# - Numbers len(references)+1 to len(references)+len(page_references) → orange (page content)
|
|
272
|
-
|
|
273
|
-
num_search_refs = len(references) if references else 0
|
|
274
|
-
num_page_refs = len(page_references) if page_references else 0
|
|
275
|
-
|
|
276
|
-
def replace_badge(match):
|
|
277
|
-
n = int(match.group(1))
|
|
278
|
-
if 1 <= n <= num_search_refs:
|
|
279
|
-
# Blue badge for search results
|
|
280
|
-
return f'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-blue-600 bg-blue-50 border border-blue-200 rounded mx-0.5 align-top relative -top-0.5">{n}</span>'
|
|
281
|
-
elif num_search_refs < n <= num_search_refs + num_page_refs:
|
|
282
|
-
# Orange badge for page content (renumber from 1)
|
|
283
|
-
page_num = n - num_search_refs
|
|
284
|
-
return f'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-orange-700 bg-orange-50 border border-orange-200 rounded mx-0.5 align-top relative -top-0.5">{page_num}</span>'
|
|
285
|
-
else:
|
|
286
|
-
# Fallback: keep original if out of range
|
|
287
|
-
return match.group(0)
|
|
288
|
-
|
|
289
|
-
content_html = re.sub(r'\[(\d+)\]', replace_badge, content_html)
|
|
290
|
-
|
|
291
|
-
# Strip out the references code block if it leaked into the content
|
|
292
|
-
content_html = re.sub(r'<pre><code[^>]*>.*?references.*?</code></pre>\s*$', '', content_html, flags=re.DOTALL | re.IGNORECASE)
|
|
293
|
-
|
|
294
|
-
# --- PREPARE DATA FOR JINJA TEMPLATE ---
|
|
295
|
-
|
|
296
|
-
# 1. Pipeline Stages (with Nested Data)
|
|
297
|
-
processed_stages = []
|
|
298
|
-
|
|
299
|
-
# Unified Search Icon (RemixIcon)
|
|
300
|
-
SEARCH_ICON = '<i class="ri-search-line text-[16px]"></i>'
|
|
301
|
-
BROWSER_ICON = '<i class="ri-global-line text-[16px]"></i>'
|
|
302
|
-
DEFAULT_ICON = '<i class="ri-box-3-line text-[16px]"></i>'
|
|
303
|
-
|
|
304
|
-
# Helper to infer provider/icon name from model string
|
|
305
|
-
def infer_icon_name(model_str):
|
|
306
|
-
if not model_str: return None
|
|
307
|
-
m = model_str.lower()
|
|
308
|
-
if "cerebras" in m: return "cerebras"
|
|
309
|
-
if "claude" in m or "anthropic" in m: return "anthropic"
|
|
310
|
-
if "gpt" in m or "openai" in m or "o1" in m: return "openai"
|
|
311
|
-
if "gemini" in m or "google" in m: return "google"
|
|
312
|
-
if "deepseek" in m: return "deepseek"
|
|
313
|
-
if "mistral" in m: return "mistral"
|
|
314
|
-
if "llama" in m: return "meta"
|
|
315
|
-
if "qwen" in m: return "qwen"
|
|
316
|
-
if "grok" in m: return "grok"
|
|
317
|
-
if "perplexity" in m: return "perplexity"
|
|
318
|
-
if "minimax" in m: return "minimax"
|
|
319
|
-
if "nvidia" in m: return "nvidia"
|
|
320
|
-
return None
|
|
321
|
-
|
|
322
|
-
# 2. Reference Processing (Moved up for nesting)
|
|
323
|
-
processed_refs = []
|
|
324
|
-
if references:
|
|
325
|
-
for ref in references[:8]:
|
|
326
|
-
url = ref.get("url", "#")
|
|
327
|
-
try:
|
|
328
|
-
domain = urlparse(url).netloc
|
|
329
|
-
if domain.startswith("www."): domain = domain[4:]
|
|
330
|
-
except:
|
|
331
|
-
domain = "unknown"
|
|
332
|
-
|
|
333
|
-
processed_refs.append({
|
|
334
|
-
"title": ref.get("title", "No Title"),
|
|
335
|
-
"url": url,
|
|
336
|
-
"domain": domain,
|
|
337
|
-
"favicon_url": f"https://www.google.com/s2/favicons?domain={domain}&sz=32"
|
|
338
|
-
})
|
|
339
|
-
|
|
340
|
-
# 2b. Page Reference Processing (crawled pages)
|
|
341
|
-
processed_page_refs = []
|
|
342
|
-
if page_references:
|
|
343
|
-
for ref in page_references[:8]:
|
|
344
|
-
url = ref.get("url", "#")
|
|
345
|
-
try:
|
|
346
|
-
domain = urlparse(url).netloc
|
|
347
|
-
if domain.startswith("www."): domain = domain[4:]
|
|
348
|
-
except:
|
|
349
|
-
domain = "unknown"
|
|
350
|
-
|
|
351
|
-
processed_page_refs.append({
|
|
352
|
-
"title": ref.get("title", "No Title"),
|
|
353
|
-
"url": url,
|
|
354
|
-
"domain": domain,
|
|
355
|
-
"favicon_url": f"https://www.google.com/s2/favicons?domain={domain}&sz=32"
|
|
356
|
-
})
|
|
357
|
-
|
|
358
|
-
# 2c. Image Reference Processing
|
|
359
|
-
processed_image_refs = []
|
|
360
|
-
if image_references:
|
|
361
|
-
for ref in image_references[:8]:
|
|
362
|
-
url = ref.get("url", "#")
|
|
363
|
-
processed_image_refs.append({
|
|
364
|
-
"title": ref.get("title", "Image"),
|
|
365
|
-
"url": url,
|
|
366
|
-
"thumbnail": ref.get("thumbnail") or url, # Fallback to url if thumbnail not provided
|
|
367
|
-
"domain": self._get_domain(url) or ref.get("domain") or "image"
|
|
368
|
-
})
|
|
369
|
-
|
|
370
|
-
flow_steps = flow_steps or []
|
|
371
|
-
|
|
372
|
-
if stages_used:
|
|
373
|
-
for stage in stages_used:
|
|
374
|
-
name = stage.get("name", "Step")
|
|
375
|
-
model = stage.get("model", "")
|
|
376
|
-
|
|
377
|
-
icon_html = ""
|
|
378
|
-
|
|
379
|
-
if name == "Search":
|
|
380
|
-
icon_html = SEARCH_ICON
|
|
381
|
-
elif name == "Crawler":
|
|
382
|
-
icon_html = BROWSER_ICON
|
|
383
|
-
else:
|
|
384
|
-
# Try to find vendor logo
|
|
385
|
-
# 1. Check explicit icon_config
|
|
386
|
-
icon_key = stage.get("icon_config", "")
|
|
387
|
-
# 2. Infer from model name if not present
|
|
388
|
-
if not icon_key:
|
|
389
|
-
icon_key = infer_icon_name(model)
|
|
390
|
-
|
|
391
|
-
icon_data_url = ""
|
|
392
|
-
if icon_key:
|
|
393
|
-
icon_data_url = self._get_icon_data_url(icon_key)
|
|
394
|
-
|
|
395
|
-
if icon_data_url:
|
|
396
|
-
icon_html = f'<img src="{icon_data_url}" class="w-5 h-5 object-contain rounded">'
|
|
397
|
-
else:
|
|
398
|
-
icon_html = DEFAULT_ICON
|
|
399
|
-
|
|
400
|
-
# Model Short
|
|
401
|
-
model_short = model.split("/")[-1] if "/" in model else model
|
|
402
|
-
if len(model_short) > 25:
|
|
403
|
-
model_short = model_short[:23] + "…"
|
|
404
|
-
|
|
405
|
-
time_val = stage.get("time", 0)
|
|
406
|
-
cost_val = stage.get("cost", 0.0)
|
|
407
|
-
if name == "Search": cost_val = 0.0
|
|
408
|
-
|
|
409
|
-
# --- NESTED DATA ---
|
|
410
|
-
stage_children = {}
|
|
411
|
-
|
|
412
|
-
# References go to "Search"
|
|
413
|
-
# Also Image References to "Search"
|
|
414
|
-
if name == "Search":
|
|
415
|
-
if processed_refs:
|
|
416
|
-
stage_children['references'] = processed_refs
|
|
417
|
-
if processed_image_refs:
|
|
418
|
-
stage_children['image_references'] = processed_image_refs
|
|
419
|
-
|
|
420
|
-
# Flow steps go to "Agent"
|
|
421
|
-
if name == "Agent" and flow_steps:
|
|
422
|
-
FLOW_ICONS = {
|
|
423
|
-
"search": SEARCH_ICON,
|
|
424
|
-
"page": '<i class="ri-file-text-line text-[16px]"></i>',
|
|
425
|
-
}
|
|
426
|
-
formatted_flow = []
|
|
427
|
-
for step in flow_steps:
|
|
428
|
-
icon_key = step.get("icon", "").lower()
|
|
429
|
-
formatted_flow.append({
|
|
430
|
-
"icon_svg": FLOW_ICONS.get(icon_key, FLOW_ICONS.get("search")),
|
|
431
|
-
"description": step.get("description", "")
|
|
432
|
-
})
|
|
433
|
-
stage_children['flow_steps'] = formatted_flow
|
|
434
|
-
|
|
435
|
-
# Pass through Search Queries
|
|
436
|
-
if "queries" in stage:
|
|
437
|
-
stage_children["queries"] = stage["queries"]
|
|
438
|
-
|
|
439
|
-
# Pass through Crawled Pages
|
|
440
|
-
if "crawled_pages" in stage:
|
|
441
|
-
stage_children["crawled_pages"] = stage["crawled_pages"]
|
|
442
|
-
|
|
443
|
-
processed_stages.append({
|
|
444
|
-
"name": name,
|
|
445
|
-
"model": model,
|
|
446
|
-
"model_short": model_short,
|
|
447
|
-
"provider": stage.get("provider", ""),
|
|
448
|
-
"icon_html": icon_html,
|
|
449
|
-
"time_str": f"{time_val:.2f}s",
|
|
450
|
-
"cost_str": f"${cost_val:.6f}" if cost_val > 0 else "$0",
|
|
451
|
-
**stage_children # Merge children
|
|
452
|
-
})
|
|
453
|
-
|
|
454
|
-
# Ensure references are displayed even if no "Search" stage was present
|
|
455
|
-
has_search_stage = any(s.get("name") == "Search" for s in processed_stages)
|
|
456
|
-
if not has_search_stage and (processed_refs or processed_image_refs):
|
|
457
|
-
# Create a virtual Search stage
|
|
458
|
-
virtual_search = {
|
|
459
|
-
"name": "Search",
|
|
460
|
-
"model": "DuckDuckGo", # Default assumption
|
|
461
|
-
"model_short": "DuckDuckGo",
|
|
462
|
-
"provider": "Reference",
|
|
463
|
-
"icon_html": SEARCH_ICON,
|
|
464
|
-
"time_str": "0.00s",
|
|
465
|
-
"cost_str": "$0",
|
|
466
|
-
}
|
|
467
|
-
if processed_refs:
|
|
468
|
-
virtual_search['references'] = processed_refs
|
|
469
|
-
if processed_image_refs:
|
|
470
|
-
virtual_search['image_references'] = processed_image_refs
|
|
471
|
-
|
|
472
|
-
# Insert after Vision/Instruct (usually index 0 or 1), or at start
|
|
473
|
-
insert_idx = 0
|
|
474
|
-
if processed_stages and processed_stages[0]["name"] in ["Vision", "Instruct"]:
|
|
475
|
-
insert_idx = 1
|
|
476
|
-
if len(processed_stages) > 1 and processed_stages[1]["name"] == "Instruct":
|
|
477
|
-
insert_idx = 2
|
|
478
|
-
|
|
479
|
-
processed_stages.insert(insert_idx, virtual_search)
|
|
480
|
-
|
|
481
|
-
# 4. Stats Footer Logic
|
|
482
|
-
processed_stats = {}
|
|
483
|
-
stats_dict = {}
|
|
484
|
-
if stats:
|
|
485
|
-
# Assuming standard 'stats' dict structure, handle list if needed
|
|
486
|
-
if isinstance(stats, list):
|
|
487
|
-
stats_dict = stats[0] if stats else {}
|
|
488
|
-
else:
|
|
489
|
-
stats_dict = stats
|
|
490
|
-
|
|
491
|
-
agent_total_time = stats_dict.get("time", 0)
|
|
492
|
-
vision_time = stats_dict.get("vision_duration", 0)
|
|
493
|
-
llm_time = max(0, agent_total_time - vision_time)
|
|
494
|
-
|
|
495
|
-
vision_html = ""
|
|
496
|
-
if vision_time > 0:
|
|
497
|
-
vision_html = f'''
|
|
498
|
-
<div class="flex items-center gap-1.5 bg-white/60 px-2 py-1 rounded shadow-sm">
|
|
499
|
-
<span class="w-2 h-2 rounded-full bg-purple-400"></span>
|
|
500
|
-
<span>{vision_time:.1f}s</span>
|
|
501
|
-
</div>
|
|
502
|
-
'''
|
|
503
|
-
|
|
504
|
-
llm_html = f'''
|
|
505
|
-
<div class="flex items-center gap-1.5 bg-white/60 px-2 py-1 rounded shadow-sm">
|
|
506
|
-
<span class="w-2 h-2 rounded-full bg-green-400"></span>
|
|
507
|
-
<span>{llm_time:.1f}s</span>
|
|
508
|
-
</div>
|
|
509
|
-
'''
|
|
510
|
-
|
|
511
|
-
billing_html = ""
|
|
512
|
-
if billing_info and billing_info.get("total_cost", 0) > 0:
|
|
513
|
-
cost_cents = billing_info["total_cost"] * 100
|
|
514
|
-
billing_html = f'''
|
|
515
|
-
<div class="flex items-center gap-1.5 bg-white/60 px-2 py-1 rounded shadow-sm">
|
|
516
|
-
<span class="w-2 h-2 rounded-full bg-pink-500"></span>
|
|
517
|
-
<span>{cost_cents:.4f}¢</span>
|
|
518
|
-
</div>
|
|
519
|
-
'''
|
|
520
|
-
|
|
521
|
-
processed_stats = {
|
|
522
|
-
"vision_html": vision_html,
|
|
523
|
-
"llm_html": llm_html,
|
|
524
|
-
"billing_html": billing_html
|
|
525
|
-
}
|
|
526
|
-
|
|
527
|
-
# 5. Feature Flags for Header Icons
|
|
528
|
-
feature_flags = {
|
|
529
|
-
"has_vision": False,
|
|
530
|
-
"has_search": False,
|
|
531
|
-
}
|
|
532
|
-
|
|
533
|
-
# Check Vision
|
|
534
|
-
if stats_dict.get("vision_duration", 0) > 0:
|
|
535
|
-
feature_flags["has_vision"] = True
|
|
536
|
-
|
|
537
|
-
# Check Search
|
|
538
|
-
if any(s.get("name") == "Search" for s in stages_used or []):
|
|
539
|
-
feature_flags["has_search"] = True
|
|
540
|
-
|
|
541
|
-
# Render Template
|
|
542
|
-
context = {
|
|
543
|
-
"content_html": content_html,
|
|
544
|
-
"suggestions": suggestions or [],
|
|
545
|
-
"stages": processed_stages,
|
|
546
|
-
"references": processed_refs,
|
|
547
|
-
"page_references": processed_page_refs,
|
|
548
|
-
"references_json": json.dumps(references or []),
|
|
549
|
-
"stats": processed_stats,
|
|
550
|
-
"flags": feature_flags,
|
|
551
|
-
"total_time": stats_dict.get("total_time", 0) or 0,
|
|
552
|
-
**self.assets
|
|
553
|
-
}
|
|
554
|
-
|
|
555
|
-
final_html = self.template.render(**context)
|
|
556
|
-
|
|
557
|
-
except MemoryError:
|
|
558
|
-
last_exc = "memory"
|
|
559
|
-
logger.warning(f"ContentRenderer: out of memory while building HTML (attempt {attempt}/{max_attempts})")
|
|
560
|
-
continue
|
|
561
|
-
except Exception as exc:
|
|
562
|
-
last_exc = exc
|
|
563
|
-
logger.warning(f"ContentRenderer: failed to build HTML (attempt {attempt}/{max_attempts}) ({exc})")
|
|
564
|
-
continue
|
|
565
|
-
|
|
566
|
-
try:
|
|
567
|
-
# Use Playwright directly for crisp element screenshot (Crawl4AI already depends on it)
|
|
568
|
-
async with async_playwright() as p:
|
|
569
|
-
browser = await p.chromium.launch(headless=True)
|
|
570
|
-
try:
|
|
571
|
-
page = await browser.new_page(
|
|
572
|
-
viewport={"width": 520, "height": 1400},
|
|
573
|
-
device_scale_factor=3,
|
|
574
|
-
)
|
|
575
|
-
await page.set_content(final_html, wait_until="networkidle")
|
|
576
|
-
await page.wait_for_timeout(150)
|
|
577
|
-
element = await page.query_selector("#main-container")
|
|
578
|
-
if element:
|
|
579
|
-
await element.screenshot(path=resolved_output_path, type="jpeg", quality=98)
|
|
580
|
-
else:
|
|
581
|
-
await page.screenshot(path=resolved_output_path, full_page=True, type="jpeg", quality=98)
|
|
582
|
-
return True
|
|
583
|
-
finally:
|
|
584
|
-
await browser.close()
|
|
585
|
-
|
|
586
|
-
except Exception as exc:
|
|
587
|
-
last_exc = exc
|
|
588
|
-
logger.warning(f"ContentRenderer: render attempt {attempt}/{max_attempts} failed ({exc})")
|
|
589
|
-
finally:
|
|
590
|
-
content_html = None
|
|
591
|
-
final_html = None
|
|
592
|
-
gc.collect()
|
|
593
|
-
|
|
594
|
-
logger.error(f"ContentRenderer: render failed after {max_attempts} attempts ({last_exc})")
|
|
595
|
-
return False
|
|
596
|
-
|
|
597
|
-
async def render_models_list(
|
|
598
|
-
self,
|
|
599
|
-
models: List[Dict[str, Any]],
|
|
600
|
-
output_path: str,
|
|
601
|
-
default_base_url: str = "https://openrouter.ai/api/v1",
|
|
602
|
-
render_timeout_ms: int = 6000,
|
|
603
|
-
) -> bool:
|
|
604
|
-
"""
|
|
605
|
-
Lightweight models list renderer leveraging the main render pipeline.
|
|
606
|
-
"""
|
|
607
|
-
lines = ["# 模型列表"]
|
|
608
|
-
for idx, model in enumerate(models or [], start=1):
|
|
609
|
-
name = model.get("name", "unknown")
|
|
610
|
-
base_url = model.get("base_url") or default_base_url
|
|
611
|
-
provider = model.get("provider", "")
|
|
612
|
-
lines.append(f"{idx}. **{name}** \n - base_url: {base_url} \n - provider: {provider}")
|
|
613
|
-
|
|
614
|
-
markdown_content = "\n\n".join(lines) if len(lines) > 1 else "# 模型列表\n暂无模型"
|
|
615
|
-
|
|
616
|
-
return await self.render(
|
|
617
|
-
markdown_content=markdown_content,
|
|
618
|
-
output_path=output_path,
|
|
619
|
-
suggestions=[],
|
|
620
|
-
stats={"time": 0.0},
|
|
621
|
-
references=[],
|
|
622
|
-
stages_used=[],
|
|
623
|
-
model_name="",
|
|
624
|
-
provider_name="Models",
|
|
625
|
-
behavior_summary="Model List",
|
|
626
|
-
icon_config="openai",
|
|
627
|
-
base_url=default_base_url,
|
|
628
|
-
billing_info=None,
|
|
629
|
-
render_timeout_ms=render_timeout_ms,
|
|
630
|
-
)
|