entari-plugin-hyw 4.0.0rc17__py3-none-any.whl → 4.0.0rc19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of entari-plugin-hyw might be problematic. Click here for more details.
- entari_plugin_hyw-4.0.0rc19.dist-info/METADATA +26 -0
- entari_plugin_hyw-4.0.0rc19.dist-info/RECORD +4 -0
- entari_plugin_hyw-4.0.0rc19.dist-info/top_level.txt +1 -0
- entari_plugin_hyw/__init__.py +0 -914
- entari_plugin_hyw/filters.py +0 -83
- entari_plugin_hyw/history.py +0 -251
- entari_plugin_hyw/misc.py +0 -214
- entari_plugin_hyw/search_cache.py +0 -253
- entari_plugin_hyw-4.0.0rc17.dist-info/METADATA +0 -119
- entari_plugin_hyw-4.0.0rc17.dist-info/RECORD +0 -52
- entari_plugin_hyw-4.0.0rc17.dist-info/top_level.txt +0 -2
- hyw_core/__init__.py +0 -94
- hyw_core/agent.py +0 -876
- hyw_core/browser_control/__init__.py +0 -63
- hyw_core/browser_control/assets/card-dist/index.html +0 -429
- hyw_core/browser_control/assets/card-dist/logos/anthropic.svg +0 -1
- hyw_core/browser_control/assets/card-dist/logos/cerebras.svg +0 -9
- hyw_core/browser_control/assets/card-dist/logos/deepseek.png +0 -0
- hyw_core/browser_control/assets/card-dist/logos/gemini.svg +0 -1
- hyw_core/browser_control/assets/card-dist/logos/google.svg +0 -1
- hyw_core/browser_control/assets/card-dist/logos/grok.png +0 -0
- hyw_core/browser_control/assets/card-dist/logos/huggingface.png +0 -0
- hyw_core/browser_control/assets/card-dist/logos/microsoft.svg +0 -15
- hyw_core/browser_control/assets/card-dist/logos/minimax.png +0 -0
- hyw_core/browser_control/assets/card-dist/logos/mistral.png +0 -0
- hyw_core/browser_control/assets/card-dist/logos/nvida.png +0 -0
- hyw_core/browser_control/assets/card-dist/logos/openai.svg +0 -1
- hyw_core/browser_control/assets/card-dist/logos/openrouter.png +0 -0
- hyw_core/browser_control/assets/card-dist/logos/perplexity.svg +0 -24
- hyw_core/browser_control/assets/card-dist/logos/qwen.png +0 -0
- hyw_core/browser_control/assets/card-dist/logos/xai.png +0 -0
- hyw_core/browser_control/assets/card-dist/logos/xiaomi.png +0 -0
- hyw_core/browser_control/assets/card-dist/logos/zai.png +0 -0
- hyw_core/browser_control/assets/card-dist/vite.svg +0 -1
- hyw_core/browser_control/engines/__init__.py +0 -15
- hyw_core/browser_control/engines/base.py +0 -13
- hyw_core/browser_control/engines/default.py +0 -166
- hyw_core/browser_control/engines/duckduckgo.py +0 -171
- hyw_core/browser_control/landing.html +0 -172
- hyw_core/browser_control/manager.py +0 -173
- hyw_core/browser_control/renderer.py +0 -446
- hyw_core/browser_control/service.py +0 -1002
- hyw_core/config.py +0 -154
- hyw_core/core.py +0 -454
- hyw_core/crawling/__init__.py +0 -18
- hyw_core/crawling/completeness.py +0 -437
- hyw_core/crawling/models.py +0 -88
- hyw_core/definitions.py +0 -166
- hyw_core/image_cache.py +0 -274
- hyw_core/pipeline.py +0 -502
- hyw_core/search.py +0 -169
- hyw_core/stages/__init__.py +0 -21
- hyw_core/stages/base.py +0 -95
- hyw_core/stages/summary.py +0 -218
- {entari_plugin_hyw-4.0.0rc17.dist-info → entari_plugin_hyw-4.0.0rc19.dist-info}/WHEEL +0 -0
hyw_core/image_cache.py
DELETED
|
@@ -1,274 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Image Caching Module for Pre-downloading Images
|
|
3
|
-
|
|
4
|
-
This module provides async image pre-download functionality to reduce render time.
|
|
5
|
-
Images are downloaded in the background when search results are obtained,
|
|
6
|
-
and cached as base64 data URLs for instant use during rendering.
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
import asyncio
|
|
10
|
-
import base64
|
|
11
|
-
import hashlib
|
|
12
|
-
from typing import Dict, List, Optional, Any
|
|
13
|
-
from loguru import logger
|
|
14
|
-
import httpx
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class ImageCache:
|
|
19
|
-
"""
|
|
20
|
-
Async image cache that pre-downloads images as base64.
|
|
21
|
-
|
|
22
|
-
Usage:
|
|
23
|
-
cache = ImageCache()
|
|
24
|
-
|
|
25
|
-
# Start pre-downloading images (non-blocking)
|
|
26
|
-
cache.start_prefetch(image_urls)
|
|
27
|
-
|
|
28
|
-
# Later, get cached image (blocking if not ready)
|
|
29
|
-
cached_url = await cache.get_cached(url) # Returns data:image/... or original URL
|
|
30
|
-
"""
|
|
31
|
-
|
|
32
|
-
def __init__(
|
|
33
|
-
self,
|
|
34
|
-
max_size_kb: int = 500, # Max image size to cache (KB)
|
|
35
|
-
max_concurrent: int = 6, # Max concurrent downloads
|
|
36
|
-
):
|
|
37
|
-
self.max_size_bytes = max_size_kb * 1024
|
|
38
|
-
self.max_concurrent = max_concurrent
|
|
39
|
-
|
|
40
|
-
# Cache storage: url -> base64_data_url or None (if failed)
|
|
41
|
-
self._cache: Dict[str, Optional[str]] = {}
|
|
42
|
-
# Pending downloads: url -> asyncio.Task
|
|
43
|
-
self._pending: Dict[str, asyncio.Task] = {}
|
|
44
|
-
# Semaphore for concurrent downloads
|
|
45
|
-
self._semaphore = asyncio.Semaphore(max_concurrent)
|
|
46
|
-
# Lock for cache access
|
|
47
|
-
self._lock = asyncio.Lock()
|
|
48
|
-
|
|
49
|
-
def start_prefetch(self, urls: List[str]) -> None:
|
|
50
|
-
"""
|
|
51
|
-
Start pre-downloading images in the background (non-blocking).
|
|
52
|
-
|
|
53
|
-
Args:
|
|
54
|
-
urls: List of image URLs to prefetch
|
|
55
|
-
"""
|
|
56
|
-
if not httpx:
|
|
57
|
-
logger.warning("ImageCache: httpx not installed, prefetch disabled")
|
|
58
|
-
return
|
|
59
|
-
|
|
60
|
-
for url in urls:
|
|
61
|
-
if not url or not url.startswith("http"):
|
|
62
|
-
continue
|
|
63
|
-
if url in self._cache or url in self._pending:
|
|
64
|
-
continue
|
|
65
|
-
|
|
66
|
-
# Create background task
|
|
67
|
-
task = asyncio.create_task(self._download_image(url))
|
|
68
|
-
self._pending[url] = task
|
|
69
|
-
|
|
70
|
-
async def _download_image(self, url: str) -> Optional[str]:
|
|
71
|
-
"""
|
|
72
|
-
Download a single image and convert to base64.
|
|
73
|
-
|
|
74
|
-
Returns:
|
|
75
|
-
Base64 data URL or None if failed/too large
|
|
76
|
-
"""
|
|
77
|
-
async with self._semaphore:
|
|
78
|
-
try:
|
|
79
|
-
# No timeout - images download until agent ends
|
|
80
|
-
async with httpx.AsyncClient(timeout=None, follow_redirects=True) as client:
|
|
81
|
-
resp = await client.get(url, headers={
|
|
82
|
-
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
|
|
83
|
-
})
|
|
84
|
-
resp.raise_for_status()
|
|
85
|
-
|
|
86
|
-
# Check content length
|
|
87
|
-
content_length = resp.headers.get("content-length")
|
|
88
|
-
if content_length and int(content_length) > self.max_size_bytes:
|
|
89
|
-
logger.debug(f"ImageCache: Skipping {url} (too large: {content_length} bytes)")
|
|
90
|
-
async with self._lock:
|
|
91
|
-
self._cache[url] = None
|
|
92
|
-
self._pending.pop(url, None)
|
|
93
|
-
return None
|
|
94
|
-
|
|
95
|
-
# Read content
|
|
96
|
-
content = resp.content
|
|
97
|
-
if len(content) > self.max_size_bytes:
|
|
98
|
-
logger.debug(f"ImageCache: Skipping {url} (content too large: {len(content)} bytes)")
|
|
99
|
-
async with self._lock:
|
|
100
|
-
self._cache[url] = None
|
|
101
|
-
self._pending.pop(url, None)
|
|
102
|
-
return None
|
|
103
|
-
|
|
104
|
-
# Determine MIME type
|
|
105
|
-
content_type = resp.headers.get("content-type", "").lower()
|
|
106
|
-
if "jpeg" in content_type or "jpg" in content_type:
|
|
107
|
-
mime = "image/jpeg"
|
|
108
|
-
elif "png" in content_type:
|
|
109
|
-
mime = "image/png"
|
|
110
|
-
elif "gif" in content_type:
|
|
111
|
-
mime = "image/gif"
|
|
112
|
-
elif "webp" in content_type:
|
|
113
|
-
mime = "image/webp"
|
|
114
|
-
elif "svg" in content_type:
|
|
115
|
-
mime = "image/svg+xml"
|
|
116
|
-
else:
|
|
117
|
-
# Try to infer from URL
|
|
118
|
-
url_lower = url.lower()
|
|
119
|
-
if ".jpg" in url_lower or ".jpeg" in url_lower:
|
|
120
|
-
mime = "image/jpeg"
|
|
121
|
-
elif ".png" in url_lower:
|
|
122
|
-
mime = "image/png"
|
|
123
|
-
elif ".gif" in url_lower:
|
|
124
|
-
mime = "image/gif"
|
|
125
|
-
elif ".webp" in url_lower:
|
|
126
|
-
mime = "image/webp"
|
|
127
|
-
elif ".svg" in url_lower:
|
|
128
|
-
mime = "image/svg+xml"
|
|
129
|
-
else:
|
|
130
|
-
mime = "image/jpeg" # Default fallback
|
|
131
|
-
|
|
132
|
-
# Encode to base64
|
|
133
|
-
b64 = base64.b64encode(content).decode("utf-8")
|
|
134
|
-
data_url = f"data:{mime};base64,{b64}"
|
|
135
|
-
|
|
136
|
-
async with self._lock:
|
|
137
|
-
self._cache[url] = data_url
|
|
138
|
-
self._pending.pop(url, None)
|
|
139
|
-
|
|
140
|
-
logger.debug(f"ImageCache: Cached {url} ({len(content)} bytes)")
|
|
141
|
-
return data_url
|
|
142
|
-
|
|
143
|
-
except Exception as e:
|
|
144
|
-
logger.debug(f"ImageCache: Failed to download {url}: {e}")
|
|
145
|
-
|
|
146
|
-
async with self._lock:
|
|
147
|
-
self._cache[url] = None
|
|
148
|
-
self._pending.pop(url, None)
|
|
149
|
-
return None
|
|
150
|
-
|
|
151
|
-
async def get_cached(self, url: str, wait: bool = True) -> str:
|
|
152
|
-
"""
|
|
153
|
-
Get cached image data URL, or original URL if not cached.
|
|
154
|
-
|
|
155
|
-
Args:
|
|
156
|
-
url: Original image URL
|
|
157
|
-
wait: If True, wait for pending download to complete (no timeout - waits until agent ends)
|
|
158
|
-
|
|
159
|
-
Returns:
|
|
160
|
-
Cached data URL or original URL
|
|
161
|
-
"""
|
|
162
|
-
if not url:
|
|
163
|
-
return url
|
|
164
|
-
|
|
165
|
-
# Check if already cached
|
|
166
|
-
async with self._lock:
|
|
167
|
-
if url in self._cache:
|
|
168
|
-
cached = self._cache[url]
|
|
169
|
-
return cached if cached else url # Return original if cached as None (failed)
|
|
170
|
-
|
|
171
|
-
pending_task = self._pending.get(url)
|
|
172
|
-
|
|
173
|
-
# Wait for pending download if requested (no timeout - waits until cancelled)
|
|
174
|
-
if pending_task and wait:
|
|
175
|
-
try:
|
|
176
|
-
await pending_task
|
|
177
|
-
async with self._lock:
|
|
178
|
-
cached = self._cache.get(url)
|
|
179
|
-
return cached if cached else url
|
|
180
|
-
except asyncio.CancelledError:
|
|
181
|
-
logger.debug(f"ImageCache: Download cancelled for {url}")
|
|
182
|
-
return url
|
|
183
|
-
except Exception:
|
|
184
|
-
return url
|
|
185
|
-
|
|
186
|
-
return url
|
|
187
|
-
|
|
188
|
-
async def get_all_cached(self, urls: List[str]) -> Dict[str, str]:
|
|
189
|
-
"""
|
|
190
|
-
Get cached URLs for multiple images.
|
|
191
|
-
|
|
192
|
-
Args:
|
|
193
|
-
urls: List of original URLs
|
|
194
|
-
|
|
195
|
-
Returns:
|
|
196
|
-
Dict mapping original URL to cached data URL (or original if not cached)
|
|
197
|
-
"""
|
|
198
|
-
result = {}
|
|
199
|
-
|
|
200
|
-
# Wait for all pending downloads first (no timeout - waits until cancelled)
|
|
201
|
-
pending_tasks = []
|
|
202
|
-
async with self._lock:
|
|
203
|
-
for url in urls:
|
|
204
|
-
if url in self._pending:
|
|
205
|
-
pending_tasks.append(self._pending[url])
|
|
206
|
-
|
|
207
|
-
if pending_tasks:
|
|
208
|
-
try:
|
|
209
|
-
await asyncio.gather(*pending_tasks, return_exceptions=True)
|
|
210
|
-
except asyncio.CancelledError:
|
|
211
|
-
logger.debug(f"ImageCache: Batch download cancelled")
|
|
212
|
-
|
|
213
|
-
# Collect results
|
|
214
|
-
for url in urls:
|
|
215
|
-
async with self._lock:
|
|
216
|
-
cached = self._cache.get(url)
|
|
217
|
-
result[url] = cached if cached else url
|
|
218
|
-
|
|
219
|
-
return result
|
|
220
|
-
|
|
221
|
-
def get_stats(self) -> Dict[str, Any]:
|
|
222
|
-
"""Get cache statistics."""
|
|
223
|
-
cached_count = sum(1 for v in self._cache.values() if v is not None)
|
|
224
|
-
failed_count = sum(1 for v in self._cache.values() if v is None)
|
|
225
|
-
return {
|
|
226
|
-
"cached": cached_count,
|
|
227
|
-
"failed": failed_count,
|
|
228
|
-
"pending": len(self._pending),
|
|
229
|
-
"total": len(self._cache) + len(self._pending),
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
def clear(self) -> None:
|
|
233
|
-
"""Clear all cached data."""
|
|
234
|
-
self._cache.clear()
|
|
235
|
-
for task in self._pending.values():
|
|
236
|
-
task.cancel()
|
|
237
|
-
self._pending.clear()
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
# Global cache instance for reuse across requests
|
|
241
|
-
_global_cache: Optional[ImageCache] = None
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
def get_image_cache() -> ImageCache:
|
|
245
|
-
"""Get or create the global image cache instance."""
|
|
246
|
-
global _global_cache
|
|
247
|
-
if _global_cache is None:
|
|
248
|
-
_global_cache = ImageCache()
|
|
249
|
-
return _global_cache
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
async def prefetch_images(urls: List[str]) -> None:
|
|
253
|
-
"""
|
|
254
|
-
Convenience function to start prefetching images.
|
|
255
|
-
|
|
256
|
-
Args:
|
|
257
|
-
urls: List of image URLs to prefetch
|
|
258
|
-
"""
|
|
259
|
-
cache = get_image_cache()
|
|
260
|
-
cache.start_prefetch(urls)
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
async def get_cached_images(urls: List[str]) -> Dict[str, str]:
|
|
264
|
-
"""
|
|
265
|
-
Convenience function to get cached images.
|
|
266
|
-
|
|
267
|
-
Args:
|
|
268
|
-
urls: List of original URLs
|
|
269
|
-
|
|
270
|
-
Returns:
|
|
271
|
-
Dict mapping original URL to cached data URL
|
|
272
|
-
"""
|
|
273
|
-
cache = get_image_cache()
|
|
274
|
-
return await cache.get_all_cached(urls)
|