entari-plugin-hyw 4.0.0rc17__py3-none-any.whl → 4.0.0rc19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of entari-plugin-hyw might be problematic. Click here for more details.

Files changed (55) hide show
  1. entari_plugin_hyw-4.0.0rc19.dist-info/METADATA +26 -0
  2. entari_plugin_hyw-4.0.0rc19.dist-info/RECORD +4 -0
  3. entari_plugin_hyw-4.0.0rc19.dist-info/top_level.txt +1 -0
  4. entari_plugin_hyw/__init__.py +0 -914
  5. entari_plugin_hyw/filters.py +0 -83
  6. entari_plugin_hyw/history.py +0 -251
  7. entari_plugin_hyw/misc.py +0 -214
  8. entari_plugin_hyw/search_cache.py +0 -253
  9. entari_plugin_hyw-4.0.0rc17.dist-info/METADATA +0 -119
  10. entari_plugin_hyw-4.0.0rc17.dist-info/RECORD +0 -52
  11. entari_plugin_hyw-4.0.0rc17.dist-info/top_level.txt +0 -2
  12. hyw_core/__init__.py +0 -94
  13. hyw_core/agent.py +0 -876
  14. hyw_core/browser_control/__init__.py +0 -63
  15. hyw_core/browser_control/assets/card-dist/index.html +0 -429
  16. hyw_core/browser_control/assets/card-dist/logos/anthropic.svg +0 -1
  17. hyw_core/browser_control/assets/card-dist/logos/cerebras.svg +0 -9
  18. hyw_core/browser_control/assets/card-dist/logos/deepseek.png +0 -0
  19. hyw_core/browser_control/assets/card-dist/logos/gemini.svg +0 -1
  20. hyw_core/browser_control/assets/card-dist/logos/google.svg +0 -1
  21. hyw_core/browser_control/assets/card-dist/logos/grok.png +0 -0
  22. hyw_core/browser_control/assets/card-dist/logos/huggingface.png +0 -0
  23. hyw_core/browser_control/assets/card-dist/logos/microsoft.svg +0 -15
  24. hyw_core/browser_control/assets/card-dist/logos/minimax.png +0 -0
  25. hyw_core/browser_control/assets/card-dist/logos/mistral.png +0 -0
  26. hyw_core/browser_control/assets/card-dist/logos/nvida.png +0 -0
  27. hyw_core/browser_control/assets/card-dist/logos/openai.svg +0 -1
  28. hyw_core/browser_control/assets/card-dist/logos/openrouter.png +0 -0
  29. hyw_core/browser_control/assets/card-dist/logos/perplexity.svg +0 -24
  30. hyw_core/browser_control/assets/card-dist/logos/qwen.png +0 -0
  31. hyw_core/browser_control/assets/card-dist/logos/xai.png +0 -0
  32. hyw_core/browser_control/assets/card-dist/logos/xiaomi.png +0 -0
  33. hyw_core/browser_control/assets/card-dist/logos/zai.png +0 -0
  34. hyw_core/browser_control/assets/card-dist/vite.svg +0 -1
  35. hyw_core/browser_control/engines/__init__.py +0 -15
  36. hyw_core/browser_control/engines/base.py +0 -13
  37. hyw_core/browser_control/engines/default.py +0 -166
  38. hyw_core/browser_control/engines/duckduckgo.py +0 -171
  39. hyw_core/browser_control/landing.html +0 -172
  40. hyw_core/browser_control/manager.py +0 -173
  41. hyw_core/browser_control/renderer.py +0 -446
  42. hyw_core/browser_control/service.py +0 -1002
  43. hyw_core/config.py +0 -154
  44. hyw_core/core.py +0 -454
  45. hyw_core/crawling/__init__.py +0 -18
  46. hyw_core/crawling/completeness.py +0 -437
  47. hyw_core/crawling/models.py +0 -88
  48. hyw_core/definitions.py +0 -166
  49. hyw_core/image_cache.py +0 -274
  50. hyw_core/pipeline.py +0 -502
  51. hyw_core/search.py +0 -169
  52. hyw_core/stages/__init__.py +0 -21
  53. hyw_core/stages/base.py +0 -95
  54. hyw_core/stages/summary.py +0 -218
  55. {entari_plugin_hyw-4.0.0rc17.dist-info → entari_plugin_hyw-4.0.0rc19.dist-info}/WHEEL +0 -0
hyw_core/image_cache.py DELETED
@@ -1,274 +0,0 @@
1
- """
2
- Image Caching Module for Pre-downloading Images
3
-
4
- This module provides async image pre-download functionality to reduce render time.
5
- Images are downloaded in the background when search results are obtained,
6
- and cached as base64 data URLs for instant use during rendering.
7
- """
8
-
9
- import asyncio
10
- import base64
11
- import hashlib
12
- from typing import Dict, List, Optional, Any
13
- from loguru import logger
14
- import httpx
15
-
16
-
17
-
18
- class ImageCache:
19
- """
20
- Async image cache that pre-downloads images as base64.
21
-
22
- Usage:
23
- cache = ImageCache()
24
-
25
- # Start pre-downloading images (non-blocking)
26
- cache.start_prefetch(image_urls)
27
-
28
- # Later, get cached image (blocking if not ready)
29
- cached_url = await cache.get_cached(url) # Returns data:image/... or original URL
30
- """
31
-
32
- def __init__(
33
- self,
34
- max_size_kb: int = 500, # Max image size to cache (KB)
35
- max_concurrent: int = 6, # Max concurrent downloads
36
- ):
37
- self.max_size_bytes = max_size_kb * 1024
38
- self.max_concurrent = max_concurrent
39
-
40
- # Cache storage: url -> base64_data_url or None (if failed)
41
- self._cache: Dict[str, Optional[str]] = {}
42
- # Pending downloads: url -> asyncio.Task
43
- self._pending: Dict[str, asyncio.Task] = {}
44
- # Semaphore for concurrent downloads
45
- self._semaphore = asyncio.Semaphore(max_concurrent)
46
- # Lock for cache access
47
- self._lock = asyncio.Lock()
48
-
49
- def start_prefetch(self, urls: List[str]) -> None:
50
- """
51
- Start pre-downloading images in the background (non-blocking).
52
-
53
- Args:
54
- urls: List of image URLs to prefetch
55
- """
56
- if not httpx:
57
- logger.warning("ImageCache: httpx not installed, prefetch disabled")
58
- return
59
-
60
- for url in urls:
61
- if not url or not url.startswith("http"):
62
- continue
63
- if url in self._cache or url in self._pending:
64
- continue
65
-
66
- # Create background task
67
- task = asyncio.create_task(self._download_image(url))
68
- self._pending[url] = task
69
-
70
- async def _download_image(self, url: str) -> Optional[str]:
71
- """
72
- Download a single image and convert to base64.
73
-
74
- Returns:
75
- Base64 data URL or None if failed/too large
76
- """
77
- async with self._semaphore:
78
- try:
79
- # No timeout - images download until agent ends
80
- async with httpx.AsyncClient(timeout=None, follow_redirects=True) as client:
81
- resp = await client.get(url, headers={
82
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
83
- })
84
- resp.raise_for_status()
85
-
86
- # Check content length
87
- content_length = resp.headers.get("content-length")
88
- if content_length and int(content_length) > self.max_size_bytes:
89
- logger.debug(f"ImageCache: Skipping {url} (too large: {content_length} bytes)")
90
- async with self._lock:
91
- self._cache[url] = None
92
- self._pending.pop(url, None)
93
- return None
94
-
95
- # Read content
96
- content = resp.content
97
- if len(content) > self.max_size_bytes:
98
- logger.debug(f"ImageCache: Skipping {url} (content too large: {len(content)} bytes)")
99
- async with self._lock:
100
- self._cache[url] = None
101
- self._pending.pop(url, None)
102
- return None
103
-
104
- # Determine MIME type
105
- content_type = resp.headers.get("content-type", "").lower()
106
- if "jpeg" in content_type or "jpg" in content_type:
107
- mime = "image/jpeg"
108
- elif "png" in content_type:
109
- mime = "image/png"
110
- elif "gif" in content_type:
111
- mime = "image/gif"
112
- elif "webp" in content_type:
113
- mime = "image/webp"
114
- elif "svg" in content_type:
115
- mime = "image/svg+xml"
116
- else:
117
- # Try to infer from URL
118
- url_lower = url.lower()
119
- if ".jpg" in url_lower or ".jpeg" in url_lower:
120
- mime = "image/jpeg"
121
- elif ".png" in url_lower:
122
- mime = "image/png"
123
- elif ".gif" in url_lower:
124
- mime = "image/gif"
125
- elif ".webp" in url_lower:
126
- mime = "image/webp"
127
- elif ".svg" in url_lower:
128
- mime = "image/svg+xml"
129
- else:
130
- mime = "image/jpeg" # Default fallback
131
-
132
- # Encode to base64
133
- b64 = base64.b64encode(content).decode("utf-8")
134
- data_url = f"data:{mime};base64,{b64}"
135
-
136
- async with self._lock:
137
- self._cache[url] = data_url
138
- self._pending.pop(url, None)
139
-
140
- logger.debug(f"ImageCache: Cached {url} ({len(content)} bytes)")
141
- return data_url
142
-
143
- except Exception as e:
144
- logger.debug(f"ImageCache: Failed to download {url}: {e}")
145
-
146
- async with self._lock:
147
- self._cache[url] = None
148
- self._pending.pop(url, None)
149
- return None
150
-
151
- async def get_cached(self, url: str, wait: bool = True) -> str:
152
- """
153
- Get cached image data URL, or original URL if not cached.
154
-
155
- Args:
156
- url: Original image URL
157
- wait: If True, wait for pending download to complete (no timeout - waits until agent ends)
158
-
159
- Returns:
160
- Cached data URL or original URL
161
- """
162
- if not url:
163
- return url
164
-
165
- # Check if already cached
166
- async with self._lock:
167
- if url in self._cache:
168
- cached = self._cache[url]
169
- return cached if cached else url # Return original if cached as None (failed)
170
-
171
- pending_task = self._pending.get(url)
172
-
173
- # Wait for pending download if requested (no timeout - waits until cancelled)
174
- if pending_task and wait:
175
- try:
176
- await pending_task
177
- async with self._lock:
178
- cached = self._cache.get(url)
179
- return cached if cached else url
180
- except asyncio.CancelledError:
181
- logger.debug(f"ImageCache: Download cancelled for {url}")
182
- return url
183
- except Exception:
184
- return url
185
-
186
- return url
187
-
188
- async def get_all_cached(self, urls: List[str]) -> Dict[str, str]:
189
- """
190
- Get cached URLs for multiple images.
191
-
192
- Args:
193
- urls: List of original URLs
194
-
195
- Returns:
196
- Dict mapping original URL to cached data URL (or original if not cached)
197
- """
198
- result = {}
199
-
200
- # Wait for all pending downloads first (no timeout - waits until cancelled)
201
- pending_tasks = []
202
- async with self._lock:
203
- for url in urls:
204
- if url in self._pending:
205
- pending_tasks.append(self._pending[url])
206
-
207
- if pending_tasks:
208
- try:
209
- await asyncio.gather(*pending_tasks, return_exceptions=True)
210
- except asyncio.CancelledError:
211
- logger.debug(f"ImageCache: Batch download cancelled")
212
-
213
- # Collect results
214
- for url in urls:
215
- async with self._lock:
216
- cached = self._cache.get(url)
217
- result[url] = cached if cached else url
218
-
219
- return result
220
-
221
- def get_stats(self) -> Dict[str, Any]:
222
- """Get cache statistics."""
223
- cached_count = sum(1 for v in self._cache.values() if v is not None)
224
- failed_count = sum(1 for v in self._cache.values() if v is None)
225
- return {
226
- "cached": cached_count,
227
- "failed": failed_count,
228
- "pending": len(self._pending),
229
- "total": len(self._cache) + len(self._pending),
230
- }
231
-
232
- def clear(self) -> None:
233
- """Clear all cached data."""
234
- self._cache.clear()
235
- for task in self._pending.values():
236
- task.cancel()
237
- self._pending.clear()
238
-
239
-
240
- # Global cache instance for reuse across requests
241
- _global_cache: Optional[ImageCache] = None
242
-
243
-
244
- def get_image_cache() -> ImageCache:
245
- """Get or create the global image cache instance."""
246
- global _global_cache
247
- if _global_cache is None:
248
- _global_cache = ImageCache()
249
- return _global_cache
250
-
251
-
252
- async def prefetch_images(urls: List[str]) -> None:
253
- """
254
- Convenience function to start prefetching images.
255
-
256
- Args:
257
- urls: List of image URLs to prefetch
258
- """
259
- cache = get_image_cache()
260
- cache.start_prefetch(urls)
261
-
262
-
263
- async def get_cached_images(urls: List[str]) -> Dict[str, str]:
264
- """
265
- Convenience function to get cached images.
266
-
267
- Args:
268
- urls: List of original URLs
269
-
270
- Returns:
271
- Dict mapping original URL to cached data URL
272
- """
273
- cache = get_image_cache()
274
- return await cache.get_all_cached(urls)