entari-plugin-hyw 3.5.0rc1__py3-none-any.whl → 3.5.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of entari-plugin-hyw might be problematic. Click here for more details.

Files changed (32) hide show
  1. entari_plugin_hyw/__init__.py +77 -82
  2. entari_plugin_hyw/assets/card-dist/index.html +360 -99
  3. entari_plugin_hyw/card-ui/src/App.vue +246 -52
  4. entari_plugin_hyw/card-ui/src/components/MarkdownContent.vue +122 -67
  5. entari_plugin_hyw/card-ui/src/components/StageCard.vue +46 -26
  6. entari_plugin_hyw/card-ui/src/test_regex.js +103 -0
  7. entari_plugin_hyw/card-ui/src/types.ts +1 -0
  8. entari_plugin_hyw/{core/history.py → history.py} +25 -1
  9. entari_plugin_hyw/image_cache.py +283 -0
  10. entari_plugin_hyw/{core/pipeline.py → pipeline.py} +102 -27
  11. entari_plugin_hyw/{utils/prompts.py → prompts.py} +7 -24
  12. entari_plugin_hyw/render_vue.py +314 -0
  13. entari_plugin_hyw/{utils/search.py → search.py} +227 -10
  14. {entari_plugin_hyw-3.5.0rc1.dist-info → entari_plugin_hyw-3.5.0rc2.dist-info}/METADATA +1 -1
  15. {entari_plugin_hyw-3.5.0rc1.dist-info → entari_plugin_hyw-3.5.0rc2.dist-info}/RECORD +18 -29
  16. entari_plugin_hyw/core/__init__.py +0 -0
  17. entari_plugin_hyw/core/config.py +0 -35
  18. entari_plugin_hyw/core/hyw.py +0 -48
  19. entari_plugin_hyw/core/render_vue.py +0 -255
  20. entari_plugin_hyw/test_output/render_0.jpg +0 -0
  21. entari_plugin_hyw/test_output/render_1.jpg +0 -0
  22. entari_plugin_hyw/test_output/render_2.jpg +0 -0
  23. entari_plugin_hyw/test_output/render_3.jpg +0 -0
  24. entari_plugin_hyw/test_output/render_4.jpg +0 -0
  25. entari_plugin_hyw/tests/ui_test_output.jpg +0 -0
  26. entari_plugin_hyw/tests/verify_ui.py +0 -139
  27. entari_plugin_hyw/utils/__init__.py +0 -2
  28. entari_plugin_hyw/utils/browser.py +0 -40
  29. entari_plugin_hyw/utils/playwright_tool.py +0 -36
  30. /entari_plugin_hyw/{utils/misc.py → misc.py} +0 -0
  31. {entari_plugin_hyw-3.5.0rc1.dist-info → entari_plugin_hyw-3.5.0rc2.dist-info}/WHEEL +0 -0
  32. {entari_plugin_hyw-3.5.0rc1.dist-info → entari_plugin_hyw-3.5.0rc2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,314 @@
1
+ """
2
+ Vue-based Card Renderer (Minimal Python)
3
+
4
+ Python only provides raw data. All frontend logic (markdown, syntax highlighting,
5
+ math rendering, citations) is handled by the Vue frontend.
6
+ """
7
+
8
+ import json
9
+ import gc
10
+ import os
11
+ import threading
12
+ import asyncio
13
+ from pathlib import Path
14
+ from typing import List, Dict, Any
15
+ from concurrent.futures import Future
16
+
17
+ from loguru import logger
18
+ from playwright.async_api import async_playwright
19
+
20
+
21
+ class ContentRenderer:
22
+ """Minimal renderer with background browser thread for instant startup."""
23
+
24
+ def __init__(self, template_path: str = None, auto_start: bool = True):
25
+ if template_path is None:
26
+ current_dir = Path(__file__).parent
27
+ template_path = current_dir / "assets" / "card-dist" / "index.html"
28
+
29
+ self.template_path = Path(template_path)
30
+ if not self.template_path.exists():
31
+ raise FileNotFoundError(f"Vue template not found: {self.template_path}")
32
+
33
+ self.template_content = self.template_path.read_text(encoding="utf-8")
34
+ logger.info(f"ContentRenderer: loaded Vue template ({len(self.template_content)} bytes)")
35
+
36
+ # Browser state (managed by background thread)
37
+ self._playwright = None
38
+ self._browser = None
39
+ self._context = None
40
+ self._page = None
41
+ self._render_count = 0
42
+ self._max_renders_before_restart = 50
43
+
44
+ # Background event loop for playwright
45
+ self._loop: asyncio.AbstractEventLoop = None
46
+ self._thread: threading.Thread = None
47
+ self._ready = threading.Event()
48
+ self._lock = threading.Lock()
49
+
50
+ if auto_start:
51
+ self._start_background_loop()
52
+
53
+ def _start_background_loop(self):
54
+ """Start dedicated event loop in background thread."""
55
+ def _run_loop():
56
+ self._loop = asyncio.new_event_loop()
57
+ asyncio.set_event_loop(self._loop)
58
+ # Start browser immediately
59
+ self._loop.run_until_complete(self._init_browser())
60
+ self._ready.set()
61
+ # Keep loop running for future tasks
62
+ self._loop.run_forever()
63
+
64
+ self._thread = threading.Thread(target=_run_loop, daemon=True, name="ContentRenderer-Browser")
65
+ self._thread.start()
66
+ logger.info("ContentRenderer: Background browser thread started")
67
+
68
+ async def _init_browser(self, timeout: int = 6000):
69
+ """Initialize browser and page with warmup render (runs in background loop)."""
70
+ logger.info("ContentRenderer: Starting browser...")
71
+ try:
72
+ self._playwright = await async_playwright().start()
73
+ self._browser = await self._playwright.chromium.launch(
74
+ headless=True,
75
+ args=['--no-sandbox', '--disable-setuid-sandbox']
76
+ )
77
+ self._context = await self._browser.new_context(
78
+ viewport={"width": 540, "height": 1400},
79
+ device_scale_factor=2.0,
80
+ )
81
+ self._page = await self._context.new_page()
82
+ await self._page.goto(self.template_path.as_uri(), wait_until="domcontentloaded", timeout=timeout)
83
+
84
+ # Pre-warm the page with initial data so Vue compiles and renders
85
+ warmup_data = {
86
+ "markdown": "# Ready",
87
+ "total_time": 0,
88
+ "stages": [],
89
+ "references": [],
90
+ "page_references": [],
91
+ "image_references": [],
92
+ "stats": {},
93
+ "theme_color": "#ef4444",
94
+ }
95
+ await self._page.evaluate("(data) => window.updateRenderData(data)", warmup_data)
96
+ await asyncio.sleep(0.1) # Let Vue render
97
+ logger.success("ContentRenderer: Browser + page ready!")
98
+ except Exception as e:
99
+ logger.error(f"ContentRenderer: Failed to start browser: {e}")
100
+ raise
101
+
102
+ def _run_in_background(self, coro) -> Future:
103
+ """Schedule coroutine in background loop and return Future."""
104
+ if not self._loop or not self._loop.is_running():
105
+ raise RuntimeError("Background loop not running")
106
+ return asyncio.run_coroutine_threadsafe(coro, self._loop)
107
+
108
+ async def start(self, timeout: int = 6000):
109
+ """Wait for browser to be ready (for compatibility)."""
110
+ ready = await asyncio.to_thread(self._ready.wait, timeout / 1000)
111
+ if not ready:
112
+ raise TimeoutError("Browser startup timeout")
113
+
114
+ async def close(self):
115
+ """Clean up browser resources."""
116
+ if self._loop and self._loop.is_running():
117
+ future = self._run_in_background(self._close_internal())
118
+ # Use asyncio.to_thread to wait without blocking the event loop
119
+ await asyncio.to_thread(future.result, 10)
120
+ if self._loop:
121
+ self._loop.call_soon_threadsafe(self._loop.stop)
122
+ if self._thread:
123
+ # Use asyncio.to_thread to wait without blocking the event loop
124
+ await asyncio.to_thread(self._thread.join, 5)
125
+ logger.info("ContentRenderer: Browser closed.")
126
+
127
+ async def _close_internal(self):
128
+ """Internal close (runs in background loop)."""
129
+ if self._page:
130
+ await self._page.close()
131
+ self._page = None
132
+ if self._context:
133
+ await self._context.close()
134
+ self._context = None
135
+ if self._browser:
136
+ await self._browser.close()
137
+ self._browser = None
138
+ if self._playwright:
139
+ await self._playwright.stop()
140
+ self._playwright = None
141
+
142
+ async def _ensure_page(self):
143
+ """Ensure page is ready, restart if needed (runs in background loop)."""
144
+ if self._render_count >= self._max_renders_before_restart:
145
+ logger.info(f"ContentRenderer: Restarting browser after {self._render_count} renders...")
146
+ await self._close_internal()
147
+ self._render_count = 0
148
+
149
+ if not self._page:
150
+ await self._init_browser()
151
+
152
+ async def render(
153
+ self,
154
+ markdown_content: str,
155
+ output_path: str,
156
+ stats: Dict[str, Any] = None,
157
+ references: List[Dict[str, Any]] = None,
158
+ page_references: List[Dict[str, Any]] = None,
159
+ image_references: List[Dict[str, Any]] = None,
160
+ stages_used: List[Dict[str, Any]] = None,
161
+ image_timeout: int = 3000,
162
+ theme_color: str = "#ef4444",
163
+ **kwargs
164
+ ) -> bool:
165
+ """Render content to image."""
166
+ # Wait for browser ready (non-blocking)
167
+ ready = await asyncio.to_thread(self._ready.wait, 30)
168
+ if not ready:
169
+ logger.error("ContentRenderer: Browser not ready after 30s")
170
+ return False
171
+
172
+ # Prepare data
173
+ resolved_output_path = Path(output_path).resolve()
174
+ resolved_output_path.parent.mkdir(parents=True, exist_ok=True)
175
+
176
+ stats_dict = stats[0] if isinstance(stats, list) and stats else (stats or {})
177
+
178
+ render_data = {
179
+ "markdown": markdown_content,
180
+ "total_time": stats_dict.get("total_time", 0) or 0,
181
+ "stages": [
182
+ {
183
+ "name": s.get("name", "Step"),
184
+ "model": s.get("model", ""),
185
+ "provider": s.get("provider", ""),
186
+ "time": s.get("time", 0),
187
+ "cost": s.get("cost", 0),
188
+ "references": s.get("references") or s.get("search_results"),
189
+ "image_references": s.get("image_references"),
190
+ "crawled_pages": s.get("crawled_pages"),
191
+ }
192
+ for s in (stages_used or [])
193
+ ],
194
+ "references": references or [],
195
+ "page_references": page_references or [],
196
+ "image_references": image_references or [],
197
+ "stats": stats_dict,
198
+ "theme_color": theme_color,
199
+ }
200
+
201
+ # Reorder images in stages
202
+ self._reorder_images_in_stages(render_data["markdown"], render_data["stages"])
203
+
204
+ # Run render in background loop (non-blocking wait for result)
205
+ try:
206
+ future = self._run_in_background(
207
+ self._render_internal(render_data, str(resolved_output_path), image_timeout)
208
+ )
209
+ # Use asyncio.to_thread to wait for the future without blocking the event loop
210
+ return await asyncio.to_thread(future.result, 60)
211
+ except Exception as e:
212
+ logger.error(f"ContentRenderer: render failed ({e})")
213
+ return False
214
+
215
+ async def _render_internal(self, render_data: dict, output_path: str, image_timeout: int) -> bool:
216
+ """Internal render (runs in background loop)."""
217
+ import time
218
+ start_time = time.time()
219
+
220
+ try:
221
+ await self._ensure_page()
222
+
223
+ # Update data via JS
224
+ await self._page.evaluate("(data) => window.updateRenderData(data)", render_data)
225
+
226
+ # Wait for Vue to update DOM
227
+ await asyncio.sleep(0.1)
228
+
229
+ # Wait for images to load
230
+ try:
231
+ await self._page.wait_for_function(
232
+ "() => Array.from(document.images).every(img => img.complete)",
233
+ timeout=image_timeout
234
+ )
235
+ except Exception:
236
+ logger.warning(f"ContentRenderer: Timeout waiting for images ({image_timeout}ms)")
237
+
238
+ # Take screenshot
239
+ element = await self._page.query_selector("#main-container")
240
+ if element:
241
+ await element.screenshot(path=output_path, type="jpeg", quality=88)
242
+ else:
243
+ await self._page.screenshot(path=output_path, full_page=True, type="jpeg", quality=88)
244
+
245
+ self._render_count += 1
246
+ duration = time.time() - start_time
247
+ logger.success(f"ContentRenderer: Rendered in {duration:.3f}s (No.{self._render_count})")
248
+ return True
249
+
250
+ except Exception as exc:
251
+ logger.error(f"ContentRenderer: render failed ({exc})")
252
+ # Reset page to force restart next time
253
+ self._page = None
254
+ return False
255
+ finally:
256
+ gc.collect()
257
+
258
+ async def render_models_list(
259
+ self,
260
+ models: List[Dict[str, Any]],
261
+ output_path: str,
262
+ default_base_url: str = "https://openrouter.ai/api/v1",
263
+ **kwargs
264
+ ) -> bool:
265
+ """Render models list."""
266
+ lines = ["# 模型列表"]
267
+ for idx, model in enumerate(models or [], start=1):
268
+ name = model.get("name", "unknown")
269
+ base_url = model.get("base_url") or default_base_url
270
+ provider = model.get("provider", "")
271
+ lines.append(f"{idx}. **{name}** \n - base_url: {base_url} \n - provider: {provider}")
272
+
273
+ markdown_content = "\n\n".join(lines) if len(lines) > 1 else "# 模型列表\n暂无模型"
274
+
275
+ return await self.render(
276
+ markdown_content=markdown_content,
277
+ output_path=output_path,
278
+ stats={},
279
+ references=[],
280
+ stages_used=[],
281
+ )
282
+
283
+ def _reorder_images_in_stages(self, markdown: str, stages: List[Dict[str, Any]]) -> None:
284
+ """Reorder image references in stages based on appearance in markdown."""
285
+ import re
286
+
287
+ img_urls = []
288
+ for match in re.finditer(r'!\[.*?\]\((.*?)\)', markdown):
289
+ url_part = match.group(1).split()[0].strip()
290
+ if url_part and url_part not in img_urls:
291
+ img_urls.append(url_part)
292
+
293
+ if not img_urls:
294
+ return
295
+
296
+ for stage in stages:
297
+ refs = stage.get("image_references")
298
+ if not refs:
299
+ continue
300
+
301
+ ref_map = {r["url"]: r for r in refs}
302
+ new_refs = []
303
+ seen_urls = set()
304
+
305
+ for url in img_urls:
306
+ if url in ref_map:
307
+ new_refs.append(ref_map[url])
308
+ seen_urls.add(url)
309
+
310
+ for r in refs:
311
+ if r["url"] not in seen_urls:
312
+ new_refs.append(r)
313
+
314
+ stage["image_references"] = new_refs
@@ -22,6 +22,9 @@ except ImportError:
22
22
  except ImportError:
23
23
  DDGS = None
24
24
 
25
+ # Import image cache for prefetching
26
+ from .image_cache import prefetch_images
27
+
25
28
  # Shared crawler instance to avoid repeated init
26
29
  _shared_crawler: Optional[AsyncWebCrawler] = None
27
30
 
@@ -46,7 +49,8 @@ async def close_shared_crawler():
46
49
  class SearchService:
47
50
  """
48
51
  Multi-strategy search & fetch service.
49
- Supported providers: 'crawl4ai' (default), 'httpx', 'ddgs'.
52
+ Search providers: 'crawl4ai' (default), 'httpx', 'ddgs'.
53
+ Fetch providers: 'crawl4ai' (default), 'jinaai'.
50
54
  """
51
55
  def __init__(self, config: Any):
52
56
  self.config = config
@@ -56,8 +60,11 @@ class SearchService:
56
60
  # Configuration for retries/timeouts
57
61
  self._search_timeout = getattr(config, "search_timeout", 10.0)
58
62
  self._search_retries = getattr(config, "search_retries", 2)
59
- self._provider = getattr(config, "search_provider", "crawl4ai")
60
- logger.info(f"SearchService initialized: provider='{self._provider}', limit={self._default_limit}, timeout={self._search_timeout}s")
63
+ # Separate providers for search and page fetching
64
+ self._search_provider = getattr(config, "search_provider", "crawl4ai")
65
+ self._fetch_provider = getattr(config, "fetch_provider", "crawl4ai")
66
+ self._jina_api_key = getattr(config, "jina_api_key", None)
67
+ logger.info(f"SearchService initialized: search_provider='{self._search_provider}', fetch_provider='{self._fetch_provider}', limit={self._default_limit}, timeout={self._search_timeout}s")
61
68
 
62
69
  def _build_search_url(self, query: str) -> str:
63
70
  encoded_query = urllib.parse.quote(query)
@@ -82,7 +89,7 @@ class SearchService:
82
89
  if not query:
83
90
  return []
84
91
 
85
- provider = self._provider.lower()
92
+ provider = self._search_provider.lower()
86
93
  logger.info(f"SearchService: searching for '{query}' using provider='{provider}'")
87
94
 
88
95
  if provider == "httpx":
@@ -355,11 +362,122 @@ class SearchService:
355
362
 
356
363
  async def fetch_page(self, url: str) -> Dict[str, str]:
357
364
  """
358
- Fetch a single page via Crawl4AI and return cleaned markdown/text plus metadata.
365
+ Fetch a single page and return cleaned markdown/text plus metadata.
366
+ Dispatches to jinaai or Crawl4AI based on fetch_provider config.
359
367
  """
360
368
  if not url:
361
369
  return {"content": "Error: missing url", "title": "Error", "url": ""}
362
370
 
371
+ provider = self._fetch_provider.lower()
372
+ logger.info(f"SearchService: fetching page '{url}' using fetch_provider='{provider}'")
373
+
374
+ if provider == "jinaai":
375
+ return await self._fetch_page_jinaai(url)
376
+ else:
377
+ return await self._fetch_page_crawl4ai(url)
378
+
379
+ async def _fetch_page_jinaai(self, url: str) -> Dict[str, str]:
380
+ """
381
+ Fetch page via Jina AI Reader - returns clean markdown content.
382
+ https://r.jina.ai/{url}
383
+ """
384
+ if not httpx:
385
+ logger.warning("SearchService: httpx not installed, fallback to crawl4ai")
386
+ return await self._fetch_page_crawl4ai(url)
387
+
388
+ jina_url = f"https://r.jina.ai/{url}"
389
+ headers = {
390
+ "X-Return-Format": "markdown",
391
+ }
392
+ # Add authorization header if API key is configured
393
+ if self._jina_api_key:
394
+ headers["Authorization"] = f"Bearer {self._jina_api_key}"
395
+
396
+ try:
397
+ async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
398
+ resp = await client.get(jina_url, headers=headers)
399
+ resp.raise_for_status()
400
+ content = resp.text
401
+
402
+ # Jina AI returns markdown content directly
403
+ # Try to extract title from first heading or first line
404
+ title = "No Title"
405
+ lines = content.strip().split('\n')
406
+ for line in lines:
407
+ line = line.strip()
408
+ if line.startswith('# '):
409
+ title = line[2:].strip()
410
+ break
411
+ elif line and not line.startswith('!') and not line.startswith('['):
412
+ # Use first non-empty, non-image, non-link line as title
413
+ title = line[:100]
414
+ break
415
+
416
+ logger.info(f"SearchService(jinaai): fetched page, title='{title}', content_len={len(content)}")
417
+ return {
418
+ "content": content[:8000],
419
+ "title": title,
420
+ "url": url
421
+ }
422
+
423
+ except Exception as e:
424
+ logger.error(f"SearchService(jinaai) fetch_page failed: {e}")
425
+ return {"content": f"Error: fetch failed ({e})", "title": "Error", "url": url}
426
+
427
+ async def _fetch_page_httpx(self, url: str) -> Dict[str, str]:
428
+ """
429
+ Fetch page via httpx - fast, no browser overhead.
430
+ """
431
+ if not httpx:
432
+ logger.warning("SearchService: httpx not installed, fallback to crawl4ai")
433
+ return await self._fetch_page_crawl4ai(url)
434
+
435
+ try:
436
+ async with httpx.AsyncClient(timeout=self._search_timeout, follow_redirects=True) as client:
437
+ resp = await client.get(url, headers={
438
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
439
+ })
440
+ resp.raise_for_status()
441
+ html_content = resp.text
442
+
443
+ # Extract title from HTML
444
+ title = "No Title"
445
+ title_match = re.search(r'<title[^>]*>([^<]+)</title>', html_content, re.IGNORECASE)
446
+ if title_match:
447
+ title = html.unescape(title_match.group(1).strip())
448
+
449
+ # Try og:title as fallback
450
+ if title == "No Title":
451
+ og_match = re.search(r'<meta[^>]+property=["\']og:title["\'][^>]+content=["\']([^"\']+)["\']', html_content, re.IGNORECASE)
452
+ if og_match:
453
+ title = html.unescape(og_match.group(1).strip())
454
+
455
+ # Simple HTML to text conversion
456
+ # Remove script/style tags
457
+ content = re.sub(r'<script[^>]*>[\s\S]*?</script>', '', html_content, flags=re.IGNORECASE)
458
+ content = re.sub(r'<style[^>]*>[\s\S]*?</style>', '', html_content, flags=re.IGNORECASE)
459
+ # Remove HTML tags
460
+ content = re.sub(r'<[^>]+>', ' ', content)
461
+ # Decode entities
462
+ content = html.unescape(content)
463
+ # Normalize whitespace
464
+ content = re.sub(r'\s+', ' ', content).strip()
465
+
466
+ logger.info(f"SearchService(httpx): fetched page, title='{title}', content_len={len(content)}")
467
+ return {
468
+ "content": content[:8000],
469
+ "title": title,
470
+ "url": url
471
+ }
472
+
473
+ except Exception as e:
474
+ logger.error(f"SearchService(httpx) fetch_page failed: {e}")
475
+ return {"content": f"Error: fetch failed ({e})", "title": "Error", "url": url}
476
+
477
+ async def _fetch_page_crawl4ai(self, url: str) -> Dict[str, str]:
478
+ """
479
+ Fetch page via Crawl4AI - full browser rendering.
480
+ """
363
481
  try:
364
482
  crawler = await self._get_crawler()
365
483
  result = await crawler.arun(
@@ -416,17 +534,116 @@ class SearchService:
416
534
  pass
417
535
  self._crawler = None
418
536
 
419
- async def image_search(self, query: str) -> List[Dict[str, str]]:
537
+ async def image_search(self, query: str, prefetch: bool = True) -> List[Dict[str, str]]:
420
538
  """
421
- Image search via Crawl4AI media extraction or DDGS.
539
+ Image search - dispatches to httpx, ddgs, or Crawl4AI based on search_provider.
540
+
541
+ Args:
542
+ query: Search query
543
+ prefetch: If True, automatically start prefetching images for caching
422
544
  """
423
545
  if not query:
424
546
  return []
425
547
 
426
- # If ddgs is selected, use it
427
- if self._provider == "ddgs":
428
- return await self._search_ddgs_images(query)
548
+ provider = self._search_provider.lower()
549
+ logger.info(f"SearchService: image searching for '{query}' using provider='{provider}'")
550
+
551
+ if provider == "ddgs":
552
+ results = await self._search_ddgs_images(query)
553
+ elif provider == "httpx":
554
+ results = await self._image_search_httpx(query)
555
+ else:
556
+ results = await self._image_search_crawl4ai(query)
557
+
558
+ # Start prefetching images in background for faster rendering
559
+ if prefetch and results:
560
+ urls_to_prefetch = []
561
+ for img in results:
562
+ # Prefer thumbnail for prefetch (smaller, used in UI)
563
+ thumb = img.get("thumbnail")
564
+ url = img.get("url")
565
+ if thumb:
566
+ urls_to_prefetch.append(thumb)
567
+ if url and url != thumb:
568
+ urls_to_prefetch.append(url)
569
+
570
+ if urls_to_prefetch:
571
+ logger.info(f"SearchService: Starting prefetch for {len(urls_to_prefetch)} images")
572
+ await prefetch_images(urls_to_prefetch)
573
+
574
+ return results
575
+
576
+ async def _image_search_httpx(self, query: str) -> List[Dict[str, str]]:
577
+ """
578
+ Image search via httpx - parse img tags from HTML response.
579
+ """
580
+ if not httpx:
581
+ logger.warning("SearchService: httpx not installed, fallback to crawl4ai")
582
+ return await self._image_search_crawl4ai(query)
583
+
584
+ url = self._build_image_url(query)
585
+ logger.info(f"SearchService(httpx Image): fetching {url}")
586
+
587
+ try:
588
+ async with httpx.AsyncClient(timeout=self._search_timeout, follow_redirects=True) as client:
589
+ resp = await client.get(url, headers={
590
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
591
+ })
592
+ resp.raise_for_status()
593
+ html_content = resp.text
594
+
595
+ # Parse img tags from HTML
596
+ # Match: <img ... src="..." ... > or <img ... data-src="..." ...>
597
+ img_regex = re.compile(r'<img[^>]+(?:src|data-src)=["\']([^"\']+)["\'][^>]*(?:alt=["\']([^"\']*)["\'])?[^>]*>', re.IGNORECASE)
598
+
599
+ images = []
600
+ seen = set()
601
+
602
+ for match in img_regex.finditer(html_content):
603
+ src = match.group(1) or ""
604
+ alt = match.group(2) or ""
605
+
606
+ if not src:
607
+ continue
608
+ if src.startswith("//"):
609
+ src = "https:" + src
610
+ if not src.startswith("http"):
611
+ continue
612
+ # Skip tiny icons/placeholders
613
+ if "favicon" in src.lower() or "logo" in src.lower() or "icon" in src.lower():
614
+ continue
615
+ if src in seen:
616
+ continue
617
+ seen.add(src)
618
+
619
+ alt = html.unescape(alt.strip()) if alt else "Image"
620
+ domain = urllib.parse.urlparse(src).hostname or ""
621
+
622
+ images.append({
623
+ "title": alt,
624
+ "url": src,
625
+ "thumbnail": src, # Use same URL as thumbnail
626
+ "domain": domain,
627
+ "content": alt,
628
+ })
629
+
630
+ if len(images) >= self._default_limit:
631
+ break
632
+
633
+ if not images:
634
+ logger.warning(f"SearchService(httpx): no images parsed from HTML")
635
+ else:
636
+ logger.info(f"SearchService(httpx): parsed {len(images)} images")
637
+ return images
638
+
639
+ except Exception as e:
640
+ logger.error(f"SearchService(httpx) image_search failed: {e}")
641
+ return []
429
642
 
643
+ async def _image_search_crawl4ai(self, query: str) -> List[Dict[str, str]]:
644
+ """
645
+ Image search via Crawl4AI media extraction.
646
+ """
430
647
  url = self._build_image_url(query)
431
648
  logger.info(f"SearchService(Crawl4AI Image): fetching {url}")
432
649
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: entari_plugin_hyw
3
- Version: 3.5.0rc1
3
+ Version: 3.5.0rc2
4
4
  Summary: Use large language models to interpret chat messages
5
5
  Author-email: kumoSleeping <zjr2992@outlook.com>
6
6
  License: MIT
@@ -1,5 +1,12 @@
1
- entari_plugin_hyw/__init__.py,sha256=_IfEZZhGgTTAgJIV4-eyPruxDyCMwf61QrAA24HAD9s,16105
2
- entari_plugin_hyw/assets/card-dist/index.html,sha256=l0T2bQLb51XKXMGR_lL1IIV_GemCd171-I0AxEjgnRQ,274777
1
+ entari_plugin_hyw/__init__.py,sha256=LHhCnOL5X6Il5OOrL4Q9HH1PHZtGfof9rTsRDQpVYpQ,15790
2
+ entari_plugin_hyw/history.py,sha256=zYtON0FgkA_AcXerLV335OzpIP30eAxDEp7NHCFFXis,7016
3
+ entari_plugin_hyw/image_cache.py,sha256=-GWNgzmIZv8OF2qEECqcQcuzGnhz5vmZ2-GWdPkEX4I,10373
4
+ entari_plugin_hyw/misc.py,sha256=pW-eSRKGJjJhVfz8Z-N0bTIHL57Jq3ynPzVFuy7YWnI,3479
5
+ entari_plugin_hyw/pipeline.py,sha256=Q4896twMwL_UbaabA7v-TFlLl1fduTrtPkFyiH3Qvyc,56173
6
+ entari_plugin_hyw/prompts.py,sha256=ZE0UN4GuUuG6HCzH4RbfQFHVCC9zT4_xByiG2L-U70I,3961
7
+ entari_plugin_hyw/render_vue.py,sha256=BkjfIB4JJ-CTvbreSHbczMu5NyDZtKjK-0HVQpYGacQ,12393
8
+ entari_plugin_hyw/search.py,sha256=yauAkNs3bM79dWoMSbn5ngRsxGLAHRbiAYSaGOxzVUQ,28316
9
+ entari_plugin_hyw/assets/card-dist/index.html,sha256=2DEOmrRDN6QwoqsywMfe8zdHvzHxrn5Rnhgg2bJA8UE,2016347
3
10
  entari_plugin_hyw/assets/card-dist/vite.svg,sha256=SnSK_UQ5GLsWWRyDTEAdrjPoeGGrXbrQgRw6O0qSFPs,1497
4
11
  entari_plugin_hyw/assets/card-dist/logos/anthropic.svg,sha256=ASsy1ypo3osNc3n-B0R81tk_dIFsVgg7qQORrd5T2kA,558
5
12
  entari_plugin_hyw/assets/card-dist/logos/cerebras.svg,sha256=bpmiiYTODwc06knTmPj3GQ7NNtosMog5lkggvB_Z-7M,44166
@@ -65,35 +72,17 @@ entari_plugin_hyw/card-ui/public/logos/qwen.png,sha256=eqLbnIPbjh2_PsODU_mmqjeD8
65
72
  entari_plugin_hyw/card-ui/public/logos/xai.png,sha256=uSulvvDVqoA4RUOW0ZAkdvBVM2rpyGJRZIbn5dEFspw,362
66
73
  entari_plugin_hyw/card-ui/public/logos/xiaomi.png,sha256=WHxlDFGU5FCjb-ure3ngdGG18-efYZUUfqA3_lqCUN0,4084
67
74
  entari_plugin_hyw/card-ui/public/logos/zai.png,sha256=K-gnabdsjMLInppHA1Op7Nyt33iegrx1x-yNlvCZ0Tc,2351
68
- entari_plugin_hyw/card-ui/src/App.vue,sha256=ijVHzRH9nnwt723YRrgYRebISyM1f2c6M_hozcUlzVo,8094
75
+ entari_plugin_hyw/card-ui/src/App.vue,sha256=DA5CLpWtl_bm3n3WBpEhdmun9IkLaRE1E_jWMeOmwwY,17656
69
76
  entari_plugin_hyw/card-ui/src/main.ts,sha256=rm653lPnK5fuTIj-iNLpgr8GAmayuCoKop7IWfo0IBk,111
70
77
  entari_plugin_hyw/card-ui/src/style.css,sha256=LnQEZyUqsj-IuESW1YBRqAz7T6LMayMDuiNKP5TAB1o,188
71
- entari_plugin_hyw/card-ui/src/types.ts,sha256=hEofV_xlQCpYIMU3chjjn6wDII8NBww3YqtGiZvIfpI,1182
78
+ entari_plugin_hyw/card-ui/src/test_regex.js,sha256=cWmclm6LRKYfjeN1RT5HECdltmo1HvS2BwGCYY_4l14,3040
79
+ entari_plugin_hyw/card-ui/src/types.ts,sha256=jzI8MXDKxz7WRZmGVb-bToO8BEFW2AOp8iek1n4VuQY,1246
72
80
  entari_plugin_hyw/card-ui/src/assets/vue.svg,sha256=VTLbNPHFKEGIG6uK7KbD6NCSvSGmiaZfTY-M-AQe750,496
73
81
  entari_plugin_hyw/card-ui/src/components/HelloWorld.vue,sha256=yvBIzJua9BfikUOR1I7GYytlnBbgB6xyposxslAoRLU,856
74
- entari_plugin_hyw/card-ui/src/components/MarkdownContent.vue,sha256=4-xt_r79Llni6VP5A4hCs7I9xBQzqL4f14yYc-0p3uw,11992
82
+ entari_plugin_hyw/card-ui/src/components/MarkdownContent.vue,sha256=w6XbsYa0UQVnb_E7Z6CR2HwKMXWEEzAgQxJ5DQbL9yg,13068
75
83
  entari_plugin_hyw/card-ui/src/components/SectionCard.vue,sha256=owcDNx2JYVmF2J5SYCroR2gvg_cPApQsNunjK1WJpVI,1433
76
- entari_plugin_hyw/card-ui/src/components/StageCard.vue,sha256=HPh_O8ltcdOCe4KXMN4OqMZTHsN4NJEZnhICRDjyrBM,7572
77
- entari_plugin_hyw/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
- entari_plugin_hyw/core/config.py,sha256=09lpgYTuNnZlR3KXaHmR4WfuUVEHwrHH0Thjbq93jX4,1440
79
- entari_plugin_hyw/core/history.py,sha256=vqp7itwR5-KaqC4Ftmq6GOz7OM9GsiFJnSN9JJ2P6L4,5894
80
- entari_plugin_hyw/core/hyw.py,sha256=RCRjV9uYmvXysiliztphLP3VyUabrf0LY2Bk66W5JGA,1927
81
- entari_plugin_hyw/core/pipeline.py,sha256=15SyghuDkNX81kr3ESeLmCyy2eL2RZgt44-PQY1gdx8,51793
82
- entari_plugin_hyw/core/render_vue.py,sha256=54XtNiacH_9eyBhcD0dZtL0_wOy05YaK2td13jyVXcs,9694
83
- entari_plugin_hyw/test_output/render_0.jpg,sha256=uoODJLt-PPY7iCpmlo9ehPIlMMiN6dmmlkT9tpIHEn8,109963
84
- entari_plugin_hyw/test_output/render_1.jpg,sha256=uoODJLt-PPY7iCpmlo9ehPIlMMiN6dmmlkT9tpIHEn8,109963
85
- entari_plugin_hyw/test_output/render_2.jpg,sha256=uoODJLt-PPY7iCpmlo9ehPIlMMiN6dmmlkT9tpIHEn8,109963
86
- entari_plugin_hyw/test_output/render_3.jpg,sha256=uoODJLt-PPY7iCpmlo9ehPIlMMiN6dmmlkT9tpIHEn8,109963
87
- entari_plugin_hyw/test_output/render_4.jpg,sha256=uoODJLt-PPY7iCpmlo9ehPIlMMiN6dmmlkT9tpIHEn8,109963
88
- entari_plugin_hyw/tests/ui_test_output.jpg,sha256=UwkpO8saGV0HubXq9Oo0WpdVON9t5_7FSmQrak_YfpE,1586227
89
- entari_plugin_hyw/tests/verify_ui.py,sha256=Zln2mNCaJopvDBOFujux0K1UF5PfjpmscBq6q3-dKPI,6728
90
- entari_plugin_hyw/utils/__init__.py,sha256=TnkxDqYr0zgRE7TC92tVbUaY8m1UyyoLg2zvzQ8nMVI,84
91
- entari_plugin_hyw/utils/browser.py,sha256=LJlFh-oSqt9mQBpMALxbYGUG__t1YLUo7RxUAslsWUc,1416
92
- entari_plugin_hyw/utils/misc.py,sha256=pW-eSRKGJjJhVfz8Z-N0bTIHL57Jq3ynPzVFuy7YWnI,3479
93
- entari_plugin_hyw/utils/playwright_tool.py,sha256=ZZNkzFtUt_Gxny3Od4boBAgNF9J0N84uySatzn1Bwe4,1272
94
- entari_plugin_hyw/utils/prompts.py,sha256=Zqgs9ywMri-LLyPjPYA1qcA9GVBfqjZ3qVHb7K7Fnx8,4284
95
- entari_plugin_hyw/utils/search.py,sha256=Bvz2KFw3Gr2nuvmlo_8ExLHvO353NKX-YN35A2FCsBw,19047
96
- entari_plugin_hyw-3.5.0rc1.dist-info/METADATA,sha256=RDxS81rieII4yZMeKuhLS-1sd365gGH6N5zjHa4kiu0,3740
97
- entari_plugin_hyw-3.5.0rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
98
- entari_plugin_hyw-3.5.0rc1.dist-info/top_level.txt,sha256=TIDsn6XPs6KA5e3ezsE65JoXsy03ejDdrB41I4SPjmo,18
99
- entari_plugin_hyw-3.5.0rc1.dist-info/RECORD,,
84
+ entari_plugin_hyw/card-ui/src/components/StageCard.vue,sha256=4nIZXKDtvsAKwc3-lkeONrpdT2Fbw_fgn16SdJOOku4,9066
85
+ entari_plugin_hyw-3.5.0rc2.dist-info/METADATA,sha256=r67ZlZIVbMlpS99bqIHs14JS3xiL8v_jz0ziQR69LrQ,3740
86
+ entari_plugin_hyw-3.5.0rc2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
87
+ entari_plugin_hyw-3.5.0rc2.dist-info/top_level.txt,sha256=TIDsn6XPs6KA5e3ezsE65JoXsy03ejDdrB41I4SPjmo,18
88
+ entari_plugin_hyw-3.5.0rc2.dist-info/RECORD,,
File without changes