PyPI - entari-plugin-hyw - Versions diffs - 4.0.0rc10__tar.gz → 4.0.0rc11__tar.gz - Mend

entari-plugin-hyw 4.0.0rc10tar.gz → 4.0.0rc11tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of entari-plugin-hyw might be problematic. Click here for more details.

Files changed (76) hide show

{entari_plugin_hyw-4.0.0rc10/src/entari_plugin_hyw.egg-info → entari_plugin_hyw-4.0.0rc11}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: entari_plugin_hyw
-Version: 4.0.0rc10
+Version: 4.0.0rc11
 Summary: Use large language models to interpret chat messages
 Author-email: kumoSleeping <zjr2992@outlook.com>
 License: MIT

{entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "entari_plugin_hyw"
-version = "4.0.0-rc10"
+version = "4.0.0-rc11"
 description = "Use large language models to interpret chat messages"
 authors = [{name = "kumoSleeping", email = "zjr2992@outlook.com"}]
 dependencies = [

{entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/entari_plugin_hyw/__init__.py RENAMED Viewed

@@ -108,6 +108,13 @@ def parse_filter_syntax(query: str, max_count: int = 3):
     if total > max_count:
         return [], search_query, f"最多选择{max_count}个结果 (当前选择了{total}个)"
+    # Append filter names to search query
+    # Extract filter names (only 'link' type, skip 'index' type)
+    filter_names = [f[1] for f in filters if f[0] == 'link']
+    if filter_names:
+        # Append filter names to search query: "search_query filter1 filter2"
+        search_query = f"{search_query} {' '.join(filter_names)}"
     return filters, search_query, None
@@ -212,12 +219,11 @@ renderer = ContentRenderer(headless=conf.headless)
 set_global_renderer(renderer)
 search_cache = SearchResultCache(ttl_seconds=600.0)  # 10 minutes
-_hyw_core: Optional[HywCore] = None
+# Initialize HywCore immediately at plugin load time (not lazy)
+# This avoids the 2s delay on first user request caused by AsyncOpenAI client creation
+_hyw_core: HywCore = HywCore(conf.to_hyw_core_config())
 def get_hyw_core() -> HywCore:
-    global _hyw_core
-    if _hyw_core is None:
-        _hyw_core = HywCore(conf.to_hyw_core_config())
     return _hyw_core
@@ -799,10 +805,14 @@ async def handle_web_command(session: Session[MessageCreatedEvent], result: Arpa
             await session.send(filter_error)
             return
-        # Start search and tab pre-warming in parallel
+        # Start search first
         local_renderer = await get_content_renderer()
         search_task = asyncio.create_task(core.search([search_query]))
-        tab_task = asyncio.create_task(local_renderer.prepare_tab())
+        # Only pre-warm tab if NOT in filter mode (filter mode = screenshots only, no card render)
+        tab_task = None
+        if not filters:
+            tab_task = asyncio.create_task(local_renderer.prepare_tab())
         if conf.reaction:
             asyncio.create_task(react(session, "🔍"))
@@ -811,24 +821,23 @@ async def handle_web_command(session: Session[MessageCreatedEvent], result: Arpa
         flat_results = results[0] if results else []
         if not flat_results:
-            try: await tab_task
-            except: pass
+            if tab_task:
+                try: await tab_task
+                except: pass
             await session.send("Search returned no results.")
             return
         visible = [r for r in flat_results if not r.get("_hidden", False)]
         if not visible:
-            try: await tab_task
-            except: pass
+            if tab_task:
+                try: await tab_task
+                except: pass
             await session.send("Search returned no visible results.")
             return
-        # === Filter Mode: Screenshot matching links ===
+        # === Filter Mode: Screenshot matching links (NO tab needed) ===
         if filters:
-            # No need for tab in filter/screenshot mode, cancel it
-            try: await tab_task
-            except: pass
             urls_to_screenshot = []

{entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11/src/entari_plugin_hyw.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: entari_plugin_hyw
-Version: 4.0.0rc10
+Version: 4.0.0rc11
 Summary: Use large language models to interpret chat messages
 Author-email: kumoSleeping <zjr2992@outlook.com>
 License: MIT

{entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/browser_control/service.py RENAMED Viewed

@@ -285,14 +285,27 @@ class ScreenshotService:
                             // 2. Check loading status using decode() AND heuristic for placeholders
                             // Some sites load a tiny blurred placeholder first.
                             const checks = visibleImgs.map(img => {
-                                // HEURISTIC: content is likely not ready if:
-                                // - img has 'data-src' but src is different (or src is empty)
-                                // - img has 'loading="lazy"' and is not complete
-                                // - naturalWidth is very small (placeholder) compared to display width
+                                // Enhanced placeholder detection (matching completeness.py)
+                                const dataSrc = img.getAttribute('data-src') || img.getAttribute('data-original') ||
+                                                img.getAttribute('data-lazy-src') || img.getAttribute('data-lazy') || '';
+                                const className = (typeof img.className === 'string' ? img.className : '').toLowerCase();
+                                const loadingAttr = img.getAttribute('loading') || '';
+                                const src = img.src || '';
                                 const isPlaceholder = (
-                                    (img.getAttribute('data-src') && img.src !== img.getAttribute('data-src')) ||
-                                    (img.naturalWidth < 50 && img.clientWidth > 100)
+                                    // data-src not yet loaded
+                                    (dataSrc && img.src !== dataSrc) ||
+                                    // Natural size much smaller than display (blurred placeholder)
+                                    (img.naturalWidth < 50 && img.clientWidth > 100) ||
+                                    (img.naturalWidth < 100 && img.clientWidth > 200 && img.naturalWidth * 4 < img.clientWidth) ||
+                                    // Lazy-loading class indicators
+                                    className.includes('lazy') ||
+                                    className.includes('lazyload') ||
+                                    className.includes('lozad') ||
+                                    // CSS blur filter applied
+                                    (window.getComputedStyle(img).filter || '').includes('blur') ||
+                                    // loading="lazy" + not complete
+                                    (loadingAttr === 'lazy' && !img.complete)
                                 );
                                 if (isPlaceholder) {
@@ -562,84 +575,50 @@ class ScreenshotService:
             # Now navigate to the URL - page will render at target width
             tab.get(url)
-            # Start monitoring network traffic
-            try:
-                # Listen for data packets (XHR/Fetch/POST)
-                # Targets: xhr, fetch. POST usually falls under these or Document.
-                tab.listen.start(targets=True) # Listen to everything for now to be safe
-            except Exception as e:
-                logger.warning(f"ScreenshotService: Failed to start network listener: {e}")
+            # Network monitoring removed - not needed for simplified wait logic
             # Initialize crawl config for completeness checking (defined outside try for scope)
             crawl_config = CrawlConfig(
                 scan_full_page=True,
                 scroll_step=800,
                 scroll_delay=0.5,
-                scroll_timeout=min(timeout, 10),
-                image_load_timeout=8.0,
+                scroll_timeout=20.0,  # Increased for lazy-loading pages
+                image_load_timeout=3.0,  # Image loading timeout: 3 seconds
                 image_stability_checks=3,
             )
+            # === Start scrolling immediately to trigger lazy loading ===
+            # Scroll first, then wait for DOM - this allows lazy loading to start in parallel
+            logger.info(f"ScreenshotService: Starting lazy load scroll for {url} (before DOM wait)")
+            trigger_lazy_load(tab, crawl_config)
+            # Now wait for DOM to be ready (after scroll has triggered lazy loading)
             try:
                 # Wait for full page load (including JS execution)
-                tab.wait.load_complete(timeout=timeout)
+                try:
+                    tab.wait.doc_loaded(timeout=timeout)
+                except:
+                    pass
                 # Wait for actual content to appear (for CDN verification pages)
-                # Smart Wait Logic (Final Robust):
-                # 1. Network Idle: Wait for silence in XHR/POST
-                # 2. Stability: Wait for Height/Text/DOM stability
-                time.sleep(1.5)  # user request: force wait 1.5s before detection
+                # Simplified wait logic for screenshot: just check basic readiness
-                last_h = 0
-                last_text_len = 0
-                last_html_len = 0
-                stable_count = 0
-                for i in range(200):  # Max 200 iterations (~20s)
+                for i in range(20):  # Max 20 iterations (~1s) - much faster
                     try:
-                        # 1. Check Network Activity
-                        has_recent_network = False
-                        try:
-                            # Iterate over any captured packets since last check
-                            for packet in tab.listen.steps(timeout=0.01):
-                                # Check if it's a significant request (POST or XHR/Fetch)
-                                method = packet.method.upper()
-                                r_type = packet.resourceType.upper() if getattr(packet, 'resourceType', None) else ""
-                                # Interested in: POST requests OR any XHR/Fetch response
-                                if method == 'POST' or 'XMLHTTPREQUEST' in r_type or 'FETCH' in r_type:
-                                    # Ignore some common noise? (Optional: analytics, tracking)
-                                    # For now, simplistic approach: any API traffic resets stability
-                                    has_recent_network = True
-                                    # logger.debug(f"Network Activity: {method} {packet.url[:50]}")
-                                    break
-                        except:
-                            pass
-                        # 2. Check DOM State
                         state = tab.run_js('''
                             return {
                                 ready: document.readyState === 'complete',
                                 title: document.title,
-                                height: Math.max(
-                                    document.body.scrollHeight || 0,
-                                    document.documentElement.scrollHeight || 0
-                                ),
-                                text: document.body.innerText.substring(0, 1000) || "",
-                                html_len: document.body.innerHTML.length || 0
+                                text: document.body.innerText.substring(0, 500) || ""
                             };
-                        ''') or {'ready': False, 'title': "", 'height': 0, 'text': "", 'html_len': 0}
+                        ''') or {'ready': False, 'title': "", 'text': ""}
                         is_ready = state.get('ready', False)
                         title = state.get('title', "").lower()
-                        current_h = int(state.get('height', 0))
-                        text_content = state.get('text', "")
-                        text_len = len(text_content)
-                        html_len = int(state.get('html_len', 0))
-                        text_lower = text_content.lower()
+                        text_lower = state.get('text', "").lower()
+                        text_len = len(text_lower)
-                        # Blacklist check (Loading indicators)
+                        # Check for verification pages
                         is_verification = "checking your browser" in text_lower or \
                                         "just a moment" in text_lower or \
                                         "please wait" in text_lower or \
@@ -647,54 +626,19 @@ class ScreenshotService:
                                         "just a moment" in title or \
                                         "loading..." in title
-                        # Stability check (Multi-metric + Network)
-                        # We require STABILITY across Height, Text Length, DOM Size AND Network Silence
-                        is_height_stable = current_h == last_h
-                        is_text_stable = abs(text_len - last_text_len) < 5  # Allow minor fluctuations
-                        is_dom_stable = abs(html_len - last_html_len) < 20 # Allow minor fluctuations (ads/tracking)
-                        if is_height_stable and is_text_stable and is_dom_stable and not has_recent_network:
-                            stable_count += 1
-                        else:
-                            # Reset if ANY metric changed or NETWORK active
-                            stable_count = 0
-                            # if has_recent_network: logger.debug("Stability reset: Network Activity")
-                        # Conditions
-                        has_content = text_len > 100 # At least 100 real chars
+                        # Basic content check
+                        has_content = text_len > 100
-                        # Dynamic Stability Requirement:
-                        # If page looks like it's loading (small content), require longer stability
-                        if has_recent_network:
-                             # If we just saw network, enforce at least 1s (10 ticks) clean silence even for large pages
-                             required_stability = max(10, 40 if text_len < 500 else 25)
-                        else:
-                             required_stability = 40 if text_len < 500 else 25  # 4.0s or 2.5s
-                        is_stable = stable_count >= required_stability
-                        # Pass if all conditions met
-                        if is_ready and not is_verification and has_content and is_stable:
+                        # Pass if ready, not verification, and has content
+                        if is_ready and not is_verification and has_content:
+                            logger.debug(f"ScreenshotService: Page ready after {i * 0.05:.2f}s")
                             break
-                        last_h = current_h
-                        last_text_len = text_len
-                        last_html_len = html_len
-                        # Wait timing within loop (tab.listen.steps consumed some time, so sleep less)
-                        try: time.sleep(0.05)
-                        except: pass
+                        time.sleep(0.05)
                     except Exception:
-                        stable_count = 0
-                        try: time.sleep(0.1)
-                        except: pass
+                        time.sleep(0.05)
                         continue
-                # Cleanup listener
-                try: tab.listen.stop()
-                except: pass
                 # DEBUG: Save HTML to inspect what happened (in data dir)
                 try:
                     import os
@@ -703,12 +647,9 @@ class ScreenshotService:
                         f.write(f"<!-- URL: {url} -->\n")
                         f.write(tab.html)
                 except: pass
-                # Use crawling module for lazy loading trigger (config defined above)
-                trigger_lazy_load(tab, crawl_config)
-            except:
-                pass
+            except Exception as e:
+                logger.warning(f"ScreenshotService: Page readiness check failed: {e}")
             # Scrollbar Hiding first (before any height calculation)
             from .manager import SharedBrowserManager
@@ -717,17 +658,165 @@ class ScreenshotService:
             # Scroll back to top
             tab.run_js("window.scrollTo(0, 0);")
-            # Use CompletenessChecker to verify all images are loaded
-            checker = CompletenessChecker(crawl_config)
-            completeness = checker.wait_for_complete(tab, timeout=crawl_config.image_load_timeout)
-            logger.info(
-                f"ScreenshotService: Image completeness: "
-                f"{completeness.loaded_images}/{completeness.total_images} loaded, "
-                f"{completeness.failed_images} pending, "
-                f"{completeness.placeholder_images} placeholders, "
-                f"complete={completeness.is_complete}"
-            )
+            # Image loading monitoring with time tracking - DISABLED
+            # No longer waiting for images to load
+            # Initialize image tracking JavaScript
+            # image_tracking_js = """
+            # (() => {
+            #     if (!window._imageLoadTracker) {
+            #         window._imageLoadTracker = {
+            #             startTime: Date.now(),
+            #             images: new Map()
+            #         };
+            #
+            #         const imgs = Array.from(document.querySelectorAll('img'));
+            #         const minSize = 50;
+            #
+            #         imgs.forEach((img, idx) => {
+            #             if (img.clientWidth < minSize && img.clientHeight < minSize) return;
+            #
+            #             const src = img.src || img.getAttribute('data-src') || '';
+            #             const key = `${idx}_${src.substring(0, 100)}`;
+            #
+            #             if (img.complete && img.naturalWidth > 0 && img.naturalHeight > 0) {
+            #                 // Already loaded
+            #                 window._imageLoadTracker.images.set(key, {
+            #                     src: src.substring(0, 150),
+            #                     status: 'loaded',
+            #                     loadTime: 0,  // Already loaded before tracking
+            #                     naturalSize: [img.naturalWidth, img.naturalHeight],
+            #                     displaySize: [img.clientWidth, img.clientHeight]
+            #                 });
+            #             } else {
+            #                 // Track loading
+            #                 window._imageLoadTracker.images.set(key, {
+            #                     src: src.substring(0, 150),
+            #                     status: 'pending',
+            #                     startTime: Date.now(),
+            #                     naturalSize: [img.naturalWidth, img.naturalHeight],
+            #                     displaySize: [img.clientWidth, img.clientHeight]
+            #                 });
+            #
+            #                 // Add load event listener
+            #                 img.addEventListener('load', () => {
+            #                     const entry = window._imageLoadTracker.images.get(key);
+            #                     if (entry && entry.status === 'pending') {
+            #                         entry.status = 'loaded';
+            #                         entry.loadTime = Date.now() - entry.startTime;
+            #                     }
+            #                 });
+            #
+            #                 img.addEventListener('error', () => {
+            #                     const entry = window._imageLoadTracker.images.get(key);
+            #                     if (entry && entry.status === 'pending') {
+            #                         entry.status = 'failed';
+            #                         entry.loadTime = Date.now() - entry.startTime;
+            #                     }
+            #                 });
+            #             }
+            #         });
+            #     }
+            #
+            #     // Return current status
+            #     const results = [];
+            #     window._imageLoadTracker.images.forEach((value, key) => {
+            #         const entry = {
+            #             src: value.src,
+            #             status: value.status,
+            #             loadTime: value.status === 'loaded' ? (value.loadTime || 0) : (Date.now() - value.startTime),
+            #             naturalSize: value.naturalSize,
+            #             displaySize: value.displaySize
+            #         };
+            #         results.push(entry);
+            #     });
+            #
+            #     return {
+            #         total: results.length,
+            #         loaded: results.filter(r => r.status === 'loaded').length,
+            #         pending: results.filter(r => r.status === 'pending').length,
+            #         failed: results.filter(r => r.status === 'failed').length,
+            #         details: results
+            #     };
+            # })()
+            # """
+            #
+            # # Initialize tracking
+            # tab.run_js(image_tracking_js)
+            #
+            # # Monitor image loading with dynamic stop logic
+            # check_interval = 0.2  # Check every 200ms
+            # image_timeout = 3.0  # Image loading timeout: 3 seconds
+            # monitoring_start = time.time()
+            # loaded_times = []  # Track load times of completed images
+            #
+            # logger.info(f"ScreenshotService: Starting image load monitoring (timeout={image_timeout}s)...")
+            #
+            # while True:
+            #     elapsed = time.time() - monitoring_start
+            #
+            #     # Check timeout first
+            #     if elapsed >= image_timeout:
+            #         logger.info(f"ScreenshotService: Image loading timeout ({image_timeout}s) reached")
+            #         break
+            #
+            #     # Get current image status
+            #     status = tab.run_js(image_tracking_js, as_expr=True) or {
+            #         'total': 0, 'loaded': 0, 'pending': 0, 'failed': 0, 'details': []
+            #     }
+            #
+            #     # Log each image's status and load time
+            #     for img_detail in status.get('details', []):
+            #         src_short = img_detail.get('src', '')[:80]
+            #         status_str = img_detail.get('status', 'unknown')
+            #         load_time = img_detail.get('loadTime', 0)
+            #         logger.info(
+            #             f"ScreenshotService: Image [{status_str}] "
+            #             f"loadTime={load_time:.0f}ms "
+            #             f"src={src_short}"
+            #         )
+            #
+            #     # Collect load times of completed images
+            #     loaded_times = [
+            #         img.get('loadTime', 0)
+            #         for img in status.get('details', [])
+            #         if img.get('status') == 'loaded' and img.get('loadTime', 0) > 0
+            #     ]
+            #
+            #     pending_count = status.get('pending', 0)
+            #     loaded_count = status.get('loaded', 0)
+            #
+            #     # Check stop conditions
+            #     if pending_count == 0:
+            #         logger.info(f"ScreenshotService: All images loaded. Total: {status.get('total', 0)}, Loaded: {loaded_count}")
+            #         break
+            #
+            #     # Check dynamic stop condition (if we have loaded images to calculate average)
+            #     if loaded_times:
+            #         avg_load_time = sum(loaded_times) / len(loaded_times)
+            #         max_wait_time = avg_load_time * 2
+            #
+            #         # Check if any pending image has exceeded max wait time
+            #         pending_images = [
+            #             img for img in status.get('details', [])
+            #             if img.get('status') == 'pending'
+            #         ]
+            #
+            #         should_stop = False
+            #         for pending_img in pending_images:
+            #             wait_time = pending_img.get('loadTime', 0)
+            #             if wait_time >= max_wait_time:
+            #                 should_stop = True
+            #                 logger.info(
+            #                     f"ScreenshotService: Stopping - pending image waited {wait_time:.0f}ms, "
+            #                     f"exceeds 2x avg load time ({max_wait_time:.0f}ms, avg={avg_load_time:.0f}ms)"
+            #                 )
+            #                 break
+            #
+            #         if should_stop:
+            #             break
+            #
+            #     # Wait before next check
+            #     time.sleep(check_interval)
             # Now calculate final height ONCE after all content loaded
             # CompletenessChecker already verified height stability

{entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/core.py RENAMED Viewed

@@ -10,7 +10,6 @@ from dataclasses import dataclass, field
 from typing import Dict, List, Any, Optional, Callable, Awaitable
 from loguru import logger
-from openai import AsyncOpenAI
 from .config import HywCoreConfig, ModelConfig
 from .pipeline import ModularPipeline
@@ -95,12 +94,6 @@ class HywCore:
         self.config = config
         self._send_func = send_func
-        # Create OpenAI client
-        self._client = AsyncOpenAI(
-            api_key=config.api_key,
-            base_url=config.base_url if config.base_url else None
-        )
         # Create search service
         self._search_service = SearchService(config)

{entari_plugin_hyw-4.0.0rc10 → entari_plugin_hyw-4.0.0rc11}/src/hyw_core/crawling/completeness.py RENAMED Viewed

@@ -46,13 +46,39 @@ IMAGE_CHECK_JS = """
         results.total++;
         const src = img.src || img.getAttribute('data-src') || '';
+        const dataSrc = img.getAttribute('data-src') || img.getAttribute('data-original') ||
+                        img.getAttribute('data-lazy-src') || img.getAttribute('data-lazy') || '';
+        const className = (typeof img.className === 'string' ? img.className : '').toLowerCase();
+        const loadingAttr = img.getAttribute('loading') || '';
+        // Enhanced placeholder detection for blurred preview images (like mcmod.cn)
         const isPlaceholder = (
-            (img.getAttribute('data-src') && img.src !== img.getAttribute('data-src')) ||
+            // 1. data-src exists but not yet loaded into src
+            (dataSrc && img.src !== dataSrc) ||
+            // 2. Natural size much smaller than display size (blurred placeholder)
             (img.naturalWidth < 50 && img.clientWidth > 100) ||
+            (img.naturalWidth < 100 && img.clientWidth > 200 && img.naturalWidth * 4 < img.clientWidth) ||
+            // 3. Common placeholder keywords in src
             src.includes('placeholder') ||
             src.includes('loading') ||
+            src.includes('blank') ||
+            // 4. SVG placeholder or 1x1 tracking pixel
             src.startsWith('data:image/svg+xml') ||
-            (img.naturalWidth === 1 && img.naturalHeight === 1)
+            (img.naturalWidth === 1 && img.naturalHeight === 1) ||
+            // 5. Lazy-loading class indicators (common patterns)
+            className.includes('lazy') ||
+            className.includes('lazyload') ||
+            className.includes('lozad') ||
+            className.includes('b-lazy') ||
+            // 6. Blur indicators (common for LQIP - Low Quality Image Placeholder)
+            className.includes('blur') ||
+            src.includes('blur') ||
+            src.includes('thumb') ||
+            src.includes('thumbnail') ||
+            // 7. loading="lazy" + not complete (browser native lazy loading)
+            (loadingAttr === 'lazy' && !img.complete) ||
+            // 8. CSS blur filter applied (visual blurring)
+            (window.getComputedStyle(img).filter || '').includes('blur')
         );
         if (isPlaceholder) {
@@ -304,6 +330,8 @@ def trigger_lazy_load(tab: Any, config: Optional[CrawlConfig] = None) -> None:
     Scroll through page to trigger lazy-loaded images.
     Implements Crawl4AI's scan_full_page behavior.
+    Strategy: Fast scroll with minimal delay (0.2s) per step to trigger network requests,
+    then wait at the bottom for all images to settle.
     Args:
         tab: DrissionPage tab object
@@ -316,33 +344,94 @@ def trigger_lazy_load(tab: Any, config: Optional[CrawlConfig] = None) -> None:
     start = time.time()
     current_pos = 0
-    logger.debug("CompletenessChecker: Starting lazy load trigger scroll")
+    logger.info(f"CompletenessChecker: Starting lazy load scroll (fast scroll + final wait)")
     try:
-        while time.time() - start < config.scroll_timeout:
-            # Scroll down
+        max_scroll_steps = 100
+        step_count = 0
+        # 1. Fast Scroll Phase
+        while step_count < max_scroll_steps:
+            step_count += 1
             current_pos += config.scroll_step
             tab.run_js(f"window.scrollTo(0, {current_pos});")
-            # Wait for lazy load triggers
-            time.sleep(config.scroll_delay)
+            # Simple fixed delay per step (0.2s) as requested
+            time.sleep(0.2)
             # Check if reached bottom
             height = tab.run_js("""
-                return Math.max(
+                Math.max(
                     document.body.scrollHeight || 0,
                     document.documentElement.scrollHeight || 0
-                );
+                )
             """, as_expr=True) or 0
             if current_pos >= height:
+                logger.debug(f"CompletenessChecker: Reached bottom at position {current_pos}")
                 break
+        # 2. Wait Phase at Bottom (Wait for images to settle - reduced timeout)
+        logger.debug("CompletenessChecker: Reached bottom, waiting for images to settle (max 2s)...")
+        wait_start = time.time()
+        max_wait_at_bottom = 2.0  # Reduced from 8s to 2s - scroll usually triggers loading quickly
+        # Quick check: just verify images are not placeholders (simplified check)
+        check_all_images_js = """
+        (() => {
+            const imgs = Array.from(document.querySelectorAll('img'));
+            if (imgs.length === 0) return true;
+            // Quick check: count non-placeholder images that are loaded
+            let loaded_count = 0;
+            let total_count = 0;
+            for (const img of imgs) {
+                // Skip tiny images
+                if (img.clientWidth < 50 && img.clientHeight < 50) continue;
+                total_count++;
+                const dataSrc = img.getAttribute('data-src') || img.getAttribute('data-original') || '';
+                const src = img.src || '';
+                // Check if placeholder
+                const isPlaceholder = (
+                    (dataSrc && img.src !== dataSrc) ||
+                    (img.naturalWidth < 50 && img.clientWidth > 100) ||
+                    src.includes('placeholder') || src.includes('loading')
+                );
+                // If not placeholder and loaded, count it
+                if (!isPlaceholder && img.complete && img.naturalWidth > 0) {
+                    loaded_count++;
+                }
+            }
+            // If most images are loaded (80%+), consider it done
+            return total_count === 0 || (loaded_count / total_count) >= 0.8;
+        })()
+        """
+        # Quick check loop with shorter interval
+        check_count = 0
+        max_checks = 4  # 2s / 0.5s = 4 checks max
+        while check_count < max_checks:
+            try:
+                all_loaded = tab.run_js(check_all_images_js, as_expr=True)
+                if all_loaded:
+                    elapsed_wait = time.time() - wait_start
+                    logger.debug(f"CompletenessChecker: Images settled at bottom in {elapsed_wait:.1f}s")
+                    break
+            except:
+                pass
+            time.sleep(0.5)
+            check_count += 1
         # Scroll back to top
         tab.run_js("window.scrollTo(0, 0);")
         elapsed = time.time() - start
-        logger.debug(f"CompletenessChecker: Lazy load scroll complete in {elapsed:.1f}s")
+        logger.info(f"CompletenessChecker: Lazy load scroll complete - {step_count} steps in {elapsed:.1f}s")
     except Exception as e:
         logger.warning(f"CompletenessChecker: Lazy load scroll failed: {e}")