entari-plugin-hyw 4.0.0rc10__tar.gz → 4.0.0rc11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of entari-plugin-hyw might be problematic. Click here for more details.
- {entari_plugin_hyw-4.0.0rc10/src/entari_plugin_hyw.egg-info → entari_plugin_hyw-4.0.0rc11}/PKG-INFO +1 -1
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/pyproject.toml +1 -1
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/entari_plugin_hyw/__init__.py +23 -14
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11/src/entari_plugin_hyw.egg-info}/PKG-INFO +1 -1
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/service.py +208 -119
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/core.py +0 -7
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/crawling/completeness.py +99 -10
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/MANIFEST.in +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/README.md +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/setup.cfg +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/entari_plugin_hyw/Untitled-1 +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/entari_plugin_hyw/history.py +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/entari_plugin_hyw/misc.py +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/entari_plugin_hyw/search_cache.py +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/entari_plugin_hyw.egg-info/SOURCES.txt +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/entari_plugin_hyw.egg-info/dependency_links.txt +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/entari_plugin_hyw.egg-info/requires.txt +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/entari_plugin_hyw.egg-info/top_level.txt +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/__init__.py +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/__init__.py +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/card-dist/index.html +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/card-dist/logos/anthropic.svg +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/card-dist/logos/cerebras.svg +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/card-dist/logos/deepseek.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/card-dist/logos/gemini.svg +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/card-dist/logos/google.svg +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/card-dist/logos/grok.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/card-dist/logos/huggingface.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/card-dist/logos/microsoft.svg +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/card-dist/logos/minimax.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/card-dist/logos/mistral.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/card-dist/logos/nvida.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/card-dist/logos/openai.svg +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/card-dist/logos/openrouter.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/card-dist/logos/perplexity.svg +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/card-dist/logos/qwen.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/card-dist/logos/xai.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/card-dist/logos/xiaomi.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/card-dist/logos/zai.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/card-dist/vite.svg +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/index.html +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/logos/anthropic.svg +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/logos/cerebras.svg +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/logos/deepseek.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/logos/gemini.svg +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/logos/google.svg +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/logos/grok.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/logos/huggingface.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/logos/microsoft.svg +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/logos/minimax.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/logos/mistral.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/logos/nvida.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/logos/openai.svg +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/logos/openrouter.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/logos/perplexity.svg +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/logos/qwen.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/logos/xai.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/logos/xiaomi.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/assets/logos/zai.png +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/engines/__init__.py +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/engines/base.py +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/engines/default.py +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/engines/duckduckgo.py +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/landing.html +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/manager.py +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/renderer.py +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/config.py +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/crawling/__init__.py +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/crawling/models.py +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/definitions.py +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/image_cache.py +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/pipeline.py +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/search.py +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/stages/__init__.py +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/stages/base.py +0 -0
- {entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/stages/summary.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "entari_plugin_hyw"
|
|
7
|
-
version = "4.0.0-
|
|
7
|
+
version = "4.0.0-rc11"
|
|
8
8
|
description = "Use large language models to interpret chat messages"
|
|
9
9
|
authors = [{name = "kumoSleeping", email = "zjr2992@outlook.com"}]
|
|
10
10
|
dependencies = [
|
{entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/entari_plugin_hyw/__init__.py
RENAMED
|
@@ -108,6 +108,13 @@ def parse_filter_syntax(query: str, max_count: int = 3):
|
|
|
108
108
|
if total > max_count:
|
|
109
109
|
return [], search_query, f"最多选择{max_count}个结果 (当前选择了{total}个)"
|
|
110
110
|
|
|
111
|
+
# Append filter names to search query
|
|
112
|
+
# Extract filter names (only 'link' type, skip 'index' type)
|
|
113
|
+
filter_names = [f[1] for f in filters if f[0] == 'link']
|
|
114
|
+
if filter_names:
|
|
115
|
+
# Append filter names to search query: "search_query filter1 filter2"
|
|
116
|
+
search_query = f"{search_query} {' '.join(filter_names)}"
|
|
117
|
+
|
|
111
118
|
return filters, search_query, None
|
|
112
119
|
|
|
113
120
|
|
|
@@ -212,12 +219,11 @@ renderer = ContentRenderer(headless=conf.headless)
|
|
|
212
219
|
set_global_renderer(renderer)
|
|
213
220
|
search_cache = SearchResultCache(ttl_seconds=600.0) # 10 minutes
|
|
214
221
|
|
|
215
|
-
|
|
222
|
+
# Initialize HywCore immediately at plugin load time (not lazy)
|
|
223
|
+
# This avoids the 2s delay on first user request caused by AsyncOpenAI client creation
|
|
224
|
+
_hyw_core: HywCore = HywCore(conf.to_hyw_core_config())
|
|
216
225
|
|
|
217
226
|
def get_hyw_core() -> HywCore:
|
|
218
|
-
global _hyw_core
|
|
219
|
-
if _hyw_core is None:
|
|
220
|
-
_hyw_core = HywCore(conf.to_hyw_core_config())
|
|
221
227
|
return _hyw_core
|
|
222
228
|
|
|
223
229
|
|
|
@@ -799,10 +805,14 @@ async def handle_web_command(session: Session[MessageCreatedEvent], result: Arpa
|
|
|
799
805
|
await session.send(filter_error)
|
|
800
806
|
return
|
|
801
807
|
|
|
802
|
-
# Start search
|
|
808
|
+
# Start search first
|
|
803
809
|
local_renderer = await get_content_renderer()
|
|
804
810
|
search_task = asyncio.create_task(core.search([search_query]))
|
|
805
|
-
|
|
811
|
+
|
|
812
|
+
# Only pre-warm tab if NOT in filter mode (filter mode = screenshots only, no card render)
|
|
813
|
+
tab_task = None
|
|
814
|
+
if not filters:
|
|
815
|
+
tab_task = asyncio.create_task(local_renderer.prepare_tab())
|
|
806
816
|
|
|
807
817
|
if conf.reaction:
|
|
808
818
|
asyncio.create_task(react(session, "🔍"))
|
|
@@ -811,24 +821,23 @@ async def handle_web_command(session: Session[MessageCreatedEvent], result: Arpa
|
|
|
811
821
|
flat_results = results[0] if results else []
|
|
812
822
|
|
|
813
823
|
if not flat_results:
|
|
814
|
-
|
|
815
|
-
|
|
824
|
+
if tab_task:
|
|
825
|
+
try: await tab_task
|
|
826
|
+
except: pass
|
|
816
827
|
await session.send("Search returned no results.")
|
|
817
828
|
return
|
|
818
829
|
|
|
819
830
|
visible = [r for r in flat_results if not r.get("_hidden", False)]
|
|
820
831
|
|
|
821
832
|
if not visible:
|
|
822
|
-
|
|
823
|
-
|
|
833
|
+
if tab_task:
|
|
834
|
+
try: await tab_task
|
|
835
|
+
except: pass
|
|
824
836
|
await session.send("Search returned no visible results.")
|
|
825
837
|
return
|
|
826
838
|
|
|
827
|
-
# === Filter Mode: Screenshot matching links ===
|
|
839
|
+
# === Filter Mode: Screenshot matching links (NO tab needed) ===
|
|
828
840
|
if filters:
|
|
829
|
-
# No need for tab in filter/screenshot mode, cancel it
|
|
830
|
-
try: await tab_task
|
|
831
|
-
except: pass
|
|
832
841
|
|
|
833
842
|
urls_to_screenshot = []
|
|
834
843
|
|
{entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/service.py
RENAMED
|
@@ -285,14 +285,27 @@ class ScreenshotService:
|
|
|
285
285
|
// 2. Check loading status using decode() AND heuristic for placeholders
|
|
286
286
|
// Some sites load a tiny blurred placeholder first.
|
|
287
287
|
const checks = visibleImgs.map(img => {
|
|
288
|
-
//
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
288
|
+
// Enhanced placeholder detection (matching completeness.py)
|
|
289
|
+
const dataSrc = img.getAttribute('data-src') || img.getAttribute('data-original') ||
|
|
290
|
+
img.getAttribute('data-lazy-src') || img.getAttribute('data-lazy') || '';
|
|
291
|
+
const className = (typeof img.className === 'string' ? img.className : '').toLowerCase();
|
|
292
|
+
const loadingAttr = img.getAttribute('loading') || '';
|
|
293
|
+
const src = img.src || '';
|
|
292
294
|
|
|
293
295
|
const isPlaceholder = (
|
|
294
|
-
|
|
295
|
-
(
|
|
296
|
+
// data-src not yet loaded
|
|
297
|
+
(dataSrc && img.src !== dataSrc) ||
|
|
298
|
+
// Natural size much smaller than display (blurred placeholder)
|
|
299
|
+
(img.naturalWidth < 50 && img.clientWidth > 100) ||
|
|
300
|
+
(img.naturalWidth < 100 && img.clientWidth > 200 && img.naturalWidth * 4 < img.clientWidth) ||
|
|
301
|
+
// Lazy-loading class indicators
|
|
302
|
+
className.includes('lazy') ||
|
|
303
|
+
className.includes('lazyload') ||
|
|
304
|
+
className.includes('lozad') ||
|
|
305
|
+
// CSS blur filter applied
|
|
306
|
+
(window.getComputedStyle(img).filter || '').includes('blur') ||
|
|
307
|
+
// loading="lazy" + not complete
|
|
308
|
+
(loadingAttr === 'lazy' && !img.complete)
|
|
296
309
|
);
|
|
297
310
|
|
|
298
311
|
if (isPlaceholder) {
|
|
@@ -562,84 +575,50 @@ class ScreenshotService:
|
|
|
562
575
|
# Now navigate to the URL - page will render at target width
|
|
563
576
|
tab.get(url)
|
|
564
577
|
|
|
565
|
-
#
|
|
566
|
-
try:
|
|
567
|
-
# Listen for data packets (XHR/Fetch/POST)
|
|
568
|
-
# Targets: xhr, fetch. POST usually falls under these or Document.
|
|
569
|
-
tab.listen.start(targets=True) # Listen to everything for now to be safe
|
|
570
|
-
except Exception as e:
|
|
571
|
-
logger.warning(f"ScreenshotService: Failed to start network listener: {e}")
|
|
578
|
+
# Network monitoring removed - not needed for simplified wait logic
|
|
572
579
|
|
|
573
580
|
# Initialize crawl config for completeness checking (defined outside try for scope)
|
|
574
581
|
crawl_config = CrawlConfig(
|
|
575
582
|
scan_full_page=True,
|
|
576
583
|
scroll_step=800,
|
|
577
584
|
scroll_delay=0.5,
|
|
578
|
-
scroll_timeout=
|
|
579
|
-
image_load_timeout=
|
|
585
|
+
scroll_timeout=20.0, # Increased for lazy-loading pages
|
|
586
|
+
image_load_timeout=3.0, # Image loading timeout: 3 seconds
|
|
580
587
|
image_stability_checks=3,
|
|
581
588
|
)
|
|
582
589
|
|
|
590
|
+
# === Start scrolling immediately to trigger lazy loading ===
|
|
591
|
+
# Scroll first, then wait for DOM - this allows lazy loading to start in parallel
|
|
592
|
+
logger.info(f"ScreenshotService: Starting lazy load scroll for {url} (before DOM wait)")
|
|
593
|
+
trigger_lazy_load(tab, crawl_config)
|
|
594
|
+
|
|
595
|
+
# Now wait for DOM to be ready (after scroll has triggered lazy loading)
|
|
583
596
|
try:
|
|
584
597
|
# Wait for full page load (including JS execution)
|
|
585
|
-
|
|
598
|
+
try:
|
|
599
|
+
tab.wait.doc_loaded(timeout=timeout)
|
|
600
|
+
except:
|
|
601
|
+
pass
|
|
586
602
|
|
|
587
603
|
# Wait for actual content to appear (for CDN verification pages)
|
|
588
|
-
#
|
|
589
|
-
# 1. Network Idle: Wait for silence in XHR/POST
|
|
590
|
-
# 2. Stability: Wait for Height/Text/DOM stability
|
|
591
|
-
|
|
592
|
-
time.sleep(1.5) # user request: force wait 1.5s before detection
|
|
604
|
+
# Simplified wait logic for screenshot: just check basic readiness
|
|
593
605
|
|
|
594
|
-
|
|
595
|
-
last_text_len = 0
|
|
596
|
-
last_html_len = 0
|
|
597
|
-
stable_count = 0
|
|
598
|
-
|
|
599
|
-
for i in range(200): # Max 200 iterations (~20s)
|
|
606
|
+
for i in range(20): # Max 20 iterations (~1s) - much faster
|
|
600
607
|
try:
|
|
601
|
-
# 1. Check Network Activity
|
|
602
|
-
has_recent_network = False
|
|
603
|
-
try:
|
|
604
|
-
# Iterate over any captured packets since last check
|
|
605
|
-
for packet in tab.listen.steps(timeout=0.01):
|
|
606
|
-
# Check if it's a significant request (POST or XHR/Fetch)
|
|
607
|
-
method = packet.method.upper()
|
|
608
|
-
r_type = packet.resourceType.upper() if getattr(packet, 'resourceType', None) else ""
|
|
609
|
-
|
|
610
|
-
# Interested in: POST requests OR any XHR/Fetch response
|
|
611
|
-
if method == 'POST' or 'XMLHTTPREQUEST' in r_type or 'FETCH' in r_type:
|
|
612
|
-
# Ignore some common noise? (Optional: analytics, tracking)
|
|
613
|
-
# For now, simplistic approach: any API traffic resets stability
|
|
614
|
-
has_recent_network = True
|
|
615
|
-
# logger.debug(f"Network Activity: {method} {packet.url[:50]}")
|
|
616
|
-
break
|
|
617
|
-
except:
|
|
618
|
-
pass
|
|
619
|
-
|
|
620
|
-
# 2. Check DOM State
|
|
621
608
|
state = tab.run_js('''
|
|
622
609
|
return {
|
|
623
610
|
ready: document.readyState === 'complete',
|
|
624
611
|
title: document.title,
|
|
625
|
-
|
|
626
|
-
document.body.scrollHeight || 0,
|
|
627
|
-
document.documentElement.scrollHeight || 0
|
|
628
|
-
),
|
|
629
|
-
text: document.body.innerText.substring(0, 1000) || "",
|
|
630
|
-
html_len: document.body.innerHTML.length || 0
|
|
612
|
+
text: document.body.innerText.substring(0, 500) || ""
|
|
631
613
|
};
|
|
632
|
-
''') or {'ready': False, 'title': "", '
|
|
614
|
+
''') or {'ready': False, 'title': "", 'text': ""}
|
|
633
615
|
|
|
634
616
|
is_ready = state.get('ready', False)
|
|
635
617
|
title = state.get('title', "").lower()
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
text_len = len(text_content)
|
|
639
|
-
html_len = int(state.get('html_len', 0))
|
|
640
|
-
text_lower = text_content.lower()
|
|
618
|
+
text_lower = state.get('text', "").lower()
|
|
619
|
+
text_len = len(text_lower)
|
|
641
620
|
|
|
642
|
-
#
|
|
621
|
+
# Check for verification pages
|
|
643
622
|
is_verification = "checking your browser" in text_lower or \
|
|
644
623
|
"just a moment" in text_lower or \
|
|
645
624
|
"please wait" in text_lower or \
|
|
@@ -647,54 +626,19 @@ class ScreenshotService:
|
|
|
647
626
|
"just a moment" in title or \
|
|
648
627
|
"loading..." in title
|
|
649
628
|
|
|
650
|
-
#
|
|
651
|
-
|
|
652
|
-
is_height_stable = current_h == last_h
|
|
653
|
-
is_text_stable = abs(text_len - last_text_len) < 5 # Allow minor fluctuations
|
|
654
|
-
is_dom_stable = abs(html_len - last_html_len) < 20 # Allow minor fluctuations (ads/tracking)
|
|
655
|
-
|
|
656
|
-
if is_height_stable and is_text_stable and is_dom_stable and not has_recent_network:
|
|
657
|
-
stable_count += 1
|
|
658
|
-
else:
|
|
659
|
-
# Reset if ANY metric changed or NETWORK active
|
|
660
|
-
stable_count = 0
|
|
661
|
-
# if has_recent_network: logger.debug("Stability reset: Network Activity")
|
|
662
|
-
|
|
663
|
-
# Conditions
|
|
664
|
-
has_content = text_len > 100 # At least 100 real chars
|
|
629
|
+
# Basic content check
|
|
630
|
+
has_content = text_len > 100
|
|
665
631
|
|
|
666
|
-
#
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
# If we just saw network, enforce at least 1s (10 ticks) clean silence even for large pages
|
|
670
|
-
required_stability = max(10, 40 if text_len < 500 else 25)
|
|
671
|
-
else:
|
|
672
|
-
required_stability = 40 if text_len < 500 else 25 # 4.0s or 2.5s
|
|
673
|
-
|
|
674
|
-
is_stable = stable_count >= required_stability
|
|
675
|
-
|
|
676
|
-
# Pass if all conditions met
|
|
677
|
-
if is_ready and not is_verification and has_content and is_stable:
|
|
632
|
+
# Pass if ready, not verification, and has content
|
|
633
|
+
if is_ready and not is_verification and has_content:
|
|
634
|
+
logger.debug(f"ScreenshotService: Page ready after {i * 0.05:.2f}s")
|
|
678
635
|
break
|
|
679
|
-
|
|
680
|
-
last_h = current_h
|
|
681
|
-
last_text_len = text_len
|
|
682
|
-
last_html_len = html_len
|
|
683
|
-
|
|
684
|
-
# Wait timing within loop (tab.listen.steps consumed some time, so sleep less)
|
|
685
|
-
try: time.sleep(0.05)
|
|
686
|
-
except: pass
|
|
687
636
|
|
|
637
|
+
time.sleep(0.05)
|
|
688
638
|
except Exception:
|
|
689
|
-
|
|
690
|
-
try: time.sleep(0.1)
|
|
691
|
-
except: pass
|
|
639
|
+
time.sleep(0.05)
|
|
692
640
|
continue
|
|
693
641
|
|
|
694
|
-
# Cleanup listener
|
|
695
|
-
try: tab.listen.stop()
|
|
696
|
-
except: pass
|
|
697
|
-
|
|
698
642
|
# DEBUG: Save HTML to inspect what happened (in data dir)
|
|
699
643
|
try:
|
|
700
644
|
import os
|
|
@@ -703,12 +647,9 @@ class ScreenshotService:
|
|
|
703
647
|
f.write(f"<!-- URL: {url} -->\n")
|
|
704
648
|
f.write(tab.html)
|
|
705
649
|
except: pass
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
except:
|
|
711
|
-
pass
|
|
650
|
+
|
|
651
|
+
except Exception as e:
|
|
652
|
+
logger.warning(f"ScreenshotService: Page readiness check failed: {e}")
|
|
712
653
|
|
|
713
654
|
# Scrollbar Hiding first (before any height calculation)
|
|
714
655
|
from .manager import SharedBrowserManager
|
|
@@ -717,17 +658,165 @@ class ScreenshotService:
|
|
|
717
658
|
# Scroll back to top
|
|
718
659
|
tab.run_js("window.scrollTo(0, 0);")
|
|
719
660
|
|
|
720
|
-
#
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
661
|
+
# Image loading monitoring with time tracking - DISABLED
|
|
662
|
+
# No longer waiting for images to load
|
|
663
|
+
# Initialize image tracking JavaScript
|
|
664
|
+
# image_tracking_js = """
|
|
665
|
+
# (() => {
|
|
666
|
+
# if (!window._imageLoadTracker) {
|
|
667
|
+
# window._imageLoadTracker = {
|
|
668
|
+
# startTime: Date.now(),
|
|
669
|
+
# images: new Map()
|
|
670
|
+
# };
|
|
671
|
+
#
|
|
672
|
+
# const imgs = Array.from(document.querySelectorAll('img'));
|
|
673
|
+
# const minSize = 50;
|
|
674
|
+
#
|
|
675
|
+
# imgs.forEach((img, idx) => {
|
|
676
|
+
# if (img.clientWidth < minSize && img.clientHeight < minSize) return;
|
|
677
|
+
#
|
|
678
|
+
# const src = img.src || img.getAttribute('data-src') || '';
|
|
679
|
+
# const key = `${idx}_${src.substring(0, 100)}`;
|
|
680
|
+
#
|
|
681
|
+
# if (img.complete && img.naturalWidth > 0 && img.naturalHeight > 0) {
|
|
682
|
+
# // Already loaded
|
|
683
|
+
# window._imageLoadTracker.images.set(key, {
|
|
684
|
+
# src: src.substring(0, 150),
|
|
685
|
+
# status: 'loaded',
|
|
686
|
+
# loadTime: 0, // Already loaded before tracking
|
|
687
|
+
# naturalSize: [img.naturalWidth, img.naturalHeight],
|
|
688
|
+
# displaySize: [img.clientWidth, img.clientHeight]
|
|
689
|
+
# });
|
|
690
|
+
# } else {
|
|
691
|
+
# // Track loading
|
|
692
|
+
# window._imageLoadTracker.images.set(key, {
|
|
693
|
+
# src: src.substring(0, 150),
|
|
694
|
+
# status: 'pending',
|
|
695
|
+
# startTime: Date.now(),
|
|
696
|
+
# naturalSize: [img.naturalWidth, img.naturalHeight],
|
|
697
|
+
# displaySize: [img.clientWidth, img.clientHeight]
|
|
698
|
+
# });
|
|
699
|
+
#
|
|
700
|
+
# // Add load event listener
|
|
701
|
+
# img.addEventListener('load', () => {
|
|
702
|
+
# const entry = window._imageLoadTracker.images.get(key);
|
|
703
|
+
# if (entry && entry.status === 'pending') {
|
|
704
|
+
# entry.status = 'loaded';
|
|
705
|
+
# entry.loadTime = Date.now() - entry.startTime;
|
|
706
|
+
# }
|
|
707
|
+
# });
|
|
708
|
+
#
|
|
709
|
+
# img.addEventListener('error', () => {
|
|
710
|
+
# const entry = window._imageLoadTracker.images.get(key);
|
|
711
|
+
# if (entry && entry.status === 'pending') {
|
|
712
|
+
# entry.status = 'failed';
|
|
713
|
+
# entry.loadTime = Date.now() - entry.startTime;
|
|
714
|
+
# }
|
|
715
|
+
# });
|
|
716
|
+
# }
|
|
717
|
+
# });
|
|
718
|
+
# }
|
|
719
|
+
#
|
|
720
|
+
# // Return current status
|
|
721
|
+
# const results = [];
|
|
722
|
+
# window._imageLoadTracker.images.forEach((value, key) => {
|
|
723
|
+
# const entry = {
|
|
724
|
+
# src: value.src,
|
|
725
|
+
# status: value.status,
|
|
726
|
+
# loadTime: value.status === 'loaded' ? (value.loadTime || 0) : (Date.now() - value.startTime),
|
|
727
|
+
# naturalSize: value.naturalSize,
|
|
728
|
+
# displaySize: value.displaySize
|
|
729
|
+
# };
|
|
730
|
+
# results.push(entry);
|
|
731
|
+
# });
|
|
732
|
+
#
|
|
733
|
+
# return {
|
|
734
|
+
# total: results.length,
|
|
735
|
+
# loaded: results.filter(r => r.status === 'loaded').length,
|
|
736
|
+
# pending: results.filter(r => r.status === 'pending').length,
|
|
737
|
+
# failed: results.filter(r => r.status === 'failed').length,
|
|
738
|
+
# details: results
|
|
739
|
+
# };
|
|
740
|
+
# })()
|
|
741
|
+
# """
|
|
742
|
+
#
|
|
743
|
+
# # Initialize tracking
|
|
744
|
+
# tab.run_js(image_tracking_js)
|
|
745
|
+
#
|
|
746
|
+
# # Monitor image loading with dynamic stop logic
|
|
747
|
+
# check_interval = 0.2 # Check every 200ms
|
|
748
|
+
# image_timeout = 3.0 # Image loading timeout: 3 seconds
|
|
749
|
+
# monitoring_start = time.time()
|
|
750
|
+
# loaded_times = [] # Track load times of completed images
|
|
751
|
+
#
|
|
752
|
+
# logger.info(f"ScreenshotService: Starting image load monitoring (timeout={image_timeout}s)...")
|
|
753
|
+
#
|
|
754
|
+
# while True:
|
|
755
|
+
# elapsed = time.time() - monitoring_start
|
|
756
|
+
#
|
|
757
|
+
# # Check timeout first
|
|
758
|
+
# if elapsed >= image_timeout:
|
|
759
|
+
# logger.info(f"ScreenshotService: Image loading timeout ({image_timeout}s) reached")
|
|
760
|
+
# break
|
|
761
|
+
#
|
|
762
|
+
# # Get current image status
|
|
763
|
+
# status = tab.run_js(image_tracking_js, as_expr=True) or {
|
|
764
|
+
# 'total': 0, 'loaded': 0, 'pending': 0, 'failed': 0, 'details': []
|
|
765
|
+
# }
|
|
766
|
+
#
|
|
767
|
+
# # Log each image's status and load time
|
|
768
|
+
# for img_detail in status.get('details', []):
|
|
769
|
+
# src_short = img_detail.get('src', '')[:80]
|
|
770
|
+
# status_str = img_detail.get('status', 'unknown')
|
|
771
|
+
# load_time = img_detail.get('loadTime', 0)
|
|
772
|
+
# logger.info(
|
|
773
|
+
# f"ScreenshotService: Image [{status_str}] "
|
|
774
|
+
# f"loadTime={load_time:.0f}ms "
|
|
775
|
+
# f"src={src_short}"
|
|
776
|
+
# )
|
|
777
|
+
#
|
|
778
|
+
# # Collect load times of completed images
|
|
779
|
+
# loaded_times = [
|
|
780
|
+
# img.get('loadTime', 0)
|
|
781
|
+
# for img in status.get('details', [])
|
|
782
|
+
# if img.get('status') == 'loaded' and img.get('loadTime', 0) > 0
|
|
783
|
+
# ]
|
|
784
|
+
#
|
|
785
|
+
# pending_count = status.get('pending', 0)
|
|
786
|
+
# loaded_count = status.get('loaded', 0)
|
|
787
|
+
#
|
|
788
|
+
# # Check stop conditions
|
|
789
|
+
# if pending_count == 0:
|
|
790
|
+
# logger.info(f"ScreenshotService: All images loaded. Total: {status.get('total', 0)}, Loaded: {loaded_count}")
|
|
791
|
+
# break
|
|
792
|
+
#
|
|
793
|
+
# # Check dynamic stop condition (if we have loaded images to calculate average)
|
|
794
|
+
# if loaded_times:
|
|
795
|
+
# avg_load_time = sum(loaded_times) / len(loaded_times)
|
|
796
|
+
# max_wait_time = avg_load_time * 2
|
|
797
|
+
#
|
|
798
|
+
# # Check if any pending image has exceeded max wait time
|
|
799
|
+
# pending_images = [
|
|
800
|
+
# img for img in status.get('details', [])
|
|
801
|
+
# if img.get('status') == 'pending'
|
|
802
|
+
# ]
|
|
803
|
+
#
|
|
804
|
+
# should_stop = False
|
|
805
|
+
# for pending_img in pending_images:
|
|
806
|
+
# wait_time = pending_img.get('loadTime', 0)
|
|
807
|
+
# if wait_time >= max_wait_time:
|
|
808
|
+
# should_stop = True
|
|
809
|
+
# logger.info(
|
|
810
|
+
# f"ScreenshotService: Stopping - pending image waited {wait_time:.0f}ms, "
|
|
811
|
+
# f"exceeds 2x avg load time ({max_wait_time:.0f}ms, avg={avg_load_time:.0f}ms)"
|
|
812
|
+
# )
|
|
813
|
+
# break
|
|
814
|
+
#
|
|
815
|
+
# if should_stop:
|
|
816
|
+
# break
|
|
817
|
+
#
|
|
818
|
+
# # Wait before next check
|
|
819
|
+
# time.sleep(check_interval)
|
|
731
820
|
|
|
732
821
|
# Now calculate final height ONCE after all content loaded
|
|
733
822
|
# CompletenessChecker already verified height stability
|
|
@@ -10,7 +10,6 @@ from dataclasses import dataclass, field
|
|
|
10
10
|
from typing import Dict, List, Any, Optional, Callable, Awaitable
|
|
11
11
|
|
|
12
12
|
from loguru import logger
|
|
13
|
-
from openai import AsyncOpenAI
|
|
14
13
|
|
|
15
14
|
from .config import HywCoreConfig, ModelConfig
|
|
16
15
|
from .pipeline import ModularPipeline
|
|
@@ -95,12 +94,6 @@ class HywCore:
|
|
|
95
94
|
self.config = config
|
|
96
95
|
self._send_func = send_func
|
|
97
96
|
|
|
98
|
-
# Create OpenAI client
|
|
99
|
-
self._client = AsyncOpenAI(
|
|
100
|
-
api_key=config.api_key,
|
|
101
|
-
base_url=config.base_url if config.base_url else None
|
|
102
|
-
)
|
|
103
|
-
|
|
104
97
|
# Create search service
|
|
105
98
|
self._search_service = SearchService(config)
|
|
106
99
|
|
{entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/crawling/completeness.py
RENAMED
|
@@ -46,13 +46,39 @@ IMAGE_CHECK_JS = """
|
|
|
46
46
|
results.total++;
|
|
47
47
|
|
|
48
48
|
const src = img.src || img.getAttribute('data-src') || '';
|
|
49
|
+
const dataSrc = img.getAttribute('data-src') || img.getAttribute('data-original') ||
|
|
50
|
+
img.getAttribute('data-lazy-src') || img.getAttribute('data-lazy') || '';
|
|
51
|
+
const className = (typeof img.className === 'string' ? img.className : '').toLowerCase();
|
|
52
|
+
const loadingAttr = img.getAttribute('loading') || '';
|
|
53
|
+
|
|
54
|
+
// Enhanced placeholder detection for blurred preview images (like mcmod.cn)
|
|
49
55
|
const isPlaceholder = (
|
|
50
|
-
|
|
56
|
+
// 1. data-src exists but not yet loaded into src
|
|
57
|
+
(dataSrc && img.src !== dataSrc) ||
|
|
58
|
+
// 2. Natural size much smaller than display size (blurred placeholder)
|
|
51
59
|
(img.naturalWidth < 50 && img.clientWidth > 100) ||
|
|
60
|
+
(img.naturalWidth < 100 && img.clientWidth > 200 && img.naturalWidth * 4 < img.clientWidth) ||
|
|
61
|
+
// 3. Common placeholder keywords in src
|
|
52
62
|
src.includes('placeholder') ||
|
|
53
63
|
src.includes('loading') ||
|
|
64
|
+
src.includes('blank') ||
|
|
65
|
+
// 4. SVG placeholder or 1x1 tracking pixel
|
|
54
66
|
src.startsWith('data:image/svg+xml') ||
|
|
55
|
-
(img.naturalWidth === 1 && img.naturalHeight === 1)
|
|
67
|
+
(img.naturalWidth === 1 && img.naturalHeight === 1) ||
|
|
68
|
+
// 5. Lazy-loading class indicators (common patterns)
|
|
69
|
+
className.includes('lazy') ||
|
|
70
|
+
className.includes('lazyload') ||
|
|
71
|
+
className.includes('lozad') ||
|
|
72
|
+
className.includes('b-lazy') ||
|
|
73
|
+
// 6. Blur indicators (common for LQIP - Low Quality Image Placeholder)
|
|
74
|
+
className.includes('blur') ||
|
|
75
|
+
src.includes('blur') ||
|
|
76
|
+
src.includes('thumb') ||
|
|
77
|
+
src.includes('thumbnail') ||
|
|
78
|
+
// 7. loading="lazy" + not complete (browser native lazy loading)
|
|
79
|
+
(loadingAttr === 'lazy' && !img.complete) ||
|
|
80
|
+
// 8. CSS blur filter applied (visual blurring)
|
|
81
|
+
(window.getComputedStyle(img).filter || '').includes('blur')
|
|
56
82
|
);
|
|
57
83
|
|
|
58
84
|
if (isPlaceholder) {
|
|
@@ -304,6 +330,8 @@ def trigger_lazy_load(tab: Any, config: Optional[CrawlConfig] = None) -> None:
|
|
|
304
330
|
Scroll through page to trigger lazy-loaded images.
|
|
305
331
|
|
|
306
332
|
Implements Crawl4AI's scan_full_page behavior.
|
|
333
|
+
Strategy: Fast scroll with minimal delay (0.2s) per step to trigger network requests,
|
|
334
|
+
then wait at the bottom for all images to settle.
|
|
307
335
|
|
|
308
336
|
Args:
|
|
309
337
|
tab: DrissionPage tab object
|
|
@@ -316,33 +344,94 @@ def trigger_lazy_load(tab: Any, config: Optional[CrawlConfig] = None) -> None:
|
|
|
316
344
|
start = time.time()
|
|
317
345
|
current_pos = 0
|
|
318
346
|
|
|
319
|
-
logger.
|
|
347
|
+
logger.info(f"CompletenessChecker: Starting lazy load scroll (fast scroll + final wait)")
|
|
320
348
|
|
|
321
349
|
try:
|
|
322
|
-
|
|
323
|
-
|
|
350
|
+
max_scroll_steps = 100
|
|
351
|
+
step_count = 0
|
|
352
|
+
|
|
353
|
+
# 1. Fast Scroll Phase
|
|
354
|
+
while step_count < max_scroll_steps:
|
|
355
|
+
step_count += 1
|
|
324
356
|
current_pos += config.scroll_step
|
|
325
357
|
tab.run_js(f"window.scrollTo(0, {current_pos});")
|
|
326
358
|
|
|
327
|
-
#
|
|
328
|
-
time.sleep(
|
|
359
|
+
# Simple fixed delay per step (0.2s) as requested
|
|
360
|
+
time.sleep(0.2)
|
|
329
361
|
|
|
330
362
|
# Check if reached bottom
|
|
331
363
|
height = tab.run_js("""
|
|
332
|
-
|
|
364
|
+
Math.max(
|
|
333
365
|
document.body.scrollHeight || 0,
|
|
334
366
|
document.documentElement.scrollHeight || 0
|
|
335
|
-
)
|
|
367
|
+
)
|
|
336
368
|
""", as_expr=True) or 0
|
|
337
369
|
|
|
338
370
|
if current_pos >= height:
|
|
371
|
+
logger.debug(f"CompletenessChecker: Reached bottom at position {current_pos}")
|
|
339
372
|
break
|
|
373
|
+
|
|
374
|
+
# 2. Wait Phase at Bottom (Wait for images to settle - reduced timeout)
|
|
375
|
+
logger.debug("CompletenessChecker: Reached bottom, waiting for images to settle (max 2s)...")
|
|
376
|
+
wait_start = time.time()
|
|
377
|
+
max_wait_at_bottom = 2.0 # Reduced from 8s to 2s - scroll usually triggers loading quickly
|
|
378
|
+
|
|
379
|
+
# Quick check: just verify images are not placeholders (simplified check)
|
|
380
|
+
check_all_images_js = """
|
|
381
|
+
(() => {
|
|
382
|
+
const imgs = Array.from(document.querySelectorAll('img'));
|
|
383
|
+
if (imgs.length === 0) return true;
|
|
384
|
+
|
|
385
|
+
// Quick check: count non-placeholder images that are loaded
|
|
386
|
+
let loaded_count = 0;
|
|
387
|
+
let total_count = 0;
|
|
388
|
+
|
|
389
|
+
for (const img of imgs) {
|
|
390
|
+
// Skip tiny images
|
|
391
|
+
if (img.clientWidth < 50 && img.clientHeight < 50) continue;
|
|
392
|
+
|
|
393
|
+
total_count++;
|
|
394
|
+
const dataSrc = img.getAttribute('data-src') || img.getAttribute('data-original') || '';
|
|
395
|
+
const src = img.src || '';
|
|
396
|
+
|
|
397
|
+
// Check if placeholder
|
|
398
|
+
const isPlaceholder = (
|
|
399
|
+
(dataSrc && img.src !== dataSrc) ||
|
|
400
|
+
(img.naturalWidth < 50 && img.clientWidth > 100) ||
|
|
401
|
+
src.includes('placeholder') || src.includes('loading')
|
|
402
|
+
);
|
|
403
|
+
|
|
404
|
+
// If not placeholder and loaded, count it
|
|
405
|
+
if (!isPlaceholder && img.complete && img.naturalWidth > 0) {
|
|
406
|
+
loaded_count++;
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
// If most images are loaded (80%+), consider it done
|
|
411
|
+
return total_count === 0 || (loaded_count / total_count) >= 0.8;
|
|
412
|
+
})()
|
|
413
|
+
"""
|
|
414
|
+
|
|
415
|
+
# Quick check loop with shorter interval
|
|
416
|
+
check_count = 0
|
|
417
|
+
max_checks = 4 # 2s / 0.5s = 4 checks max
|
|
418
|
+
while check_count < max_checks:
|
|
419
|
+
try:
|
|
420
|
+
all_loaded = tab.run_js(check_all_images_js, as_expr=True)
|
|
421
|
+
if all_loaded:
|
|
422
|
+
elapsed_wait = time.time() - wait_start
|
|
423
|
+
logger.debug(f"CompletenessChecker: Images settled at bottom in {elapsed_wait:.1f}s")
|
|
424
|
+
break
|
|
425
|
+
except:
|
|
426
|
+
pass
|
|
427
|
+
time.sleep(0.5)
|
|
428
|
+
check_count += 1
|
|
340
429
|
|
|
341
430
|
# Scroll back to top
|
|
342
431
|
tab.run_js("window.scrollTo(0, 0);")
|
|
343
432
|
|
|
344
433
|
elapsed = time.time() - start
|
|
345
|
-
logger.
|
|
434
|
+
logger.info(f"CompletenessChecker: Lazy load scroll complete - {step_count} steps in {elapsed:.1f}s")
|
|
346
435
|
|
|
347
436
|
except Exception as e:
|
|
348
437
|
logger.warning(f"CompletenessChecker: Lazy load scroll failed: {e}")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/entari_plugin_hyw/Untitled-1
RENAMED
|
File without changes
|
{entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/entari_plugin_hyw/history.py
RENAMED
|
File without changes
|
|
File without changes
|
{entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/entari_plugin_hyw/search_cache.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/manager.py
RENAMED
|
File without changes
|
{entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/renderer.py
RENAMED
|
File without changes
|
|
File without changes
|
{entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/crawling/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|