entari-plugin-hyw 4.0.0rc8__py3-none-any.whl → 4.0.0rc10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of entari-plugin-hyw might be problematic. Click here for more details.
- entari_plugin_hyw/__init__.py +62 -34
- {entari_plugin_hyw-4.0.0rc8.dist-info → entari_plugin_hyw-4.0.0rc10.dist-info}/METADATA +1 -1
- {entari_plugin_hyw-4.0.0rc8.dist-info → entari_plugin_hyw-4.0.0rc10.dist-info}/RECORD +15 -13
- hyw_core/browser_control/__init__.py +1 -3
- hyw_core/browser_control/assets/card-dist/index.html +48 -32
- hyw_core/browser_control/engines/__init__.py +0 -2
- hyw_core/browser_control/manager.py +18 -5
- hyw_core/browser_control/renderer.py +36 -6
- hyw_core/browser_control/service.py +119 -52
- hyw_core/crawling/__init__.py +18 -0
- hyw_core/crawling/completeness.py +348 -0
- hyw_core/crawling/models.py +88 -0
- hyw_core/search.py +4 -6
- hyw_core/browser_control/engines/google.py +0 -155
- {entari_plugin_hyw-4.0.0rc8.dist-info → entari_plugin_hyw-4.0.0rc10.dist-info}/WHEEL +0 -0
- {entari_plugin_hyw-4.0.0rc8.dist-info → entari_plugin_hyw-4.0.0rc10.dist-info}/top_level.txt +0 -0
|
@@ -5,13 +5,11 @@ Provides search engine adapters for different search providers.
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from .base import SearchEngine
|
|
8
|
-
from .google import GoogleEngine
|
|
9
8
|
from .duckduckgo import DuckDuckGoEngine
|
|
10
9
|
from .default import DefaultEngine
|
|
11
10
|
|
|
12
11
|
__all__ = [
|
|
13
12
|
"SearchEngine",
|
|
14
|
-
"GoogleEngine",
|
|
15
13
|
"DuckDuckGoEngine",
|
|
16
14
|
"DefaultEngine",
|
|
17
15
|
]
|
|
@@ -124,15 +124,28 @@ class SharedBrowserManager:
|
|
|
124
124
|
@staticmethod
|
|
125
125
|
def hide_scrollbars(page: ChromiumPage):
|
|
126
126
|
"""
|
|
127
|
-
Robustly hide scrollbars using CDP commands.
|
|
128
|
-
This
|
|
127
|
+
Robustly hide scrollbars using CDP commands AND CSS injection.
|
|
128
|
+
This provides double protection against scrollbar gutters.
|
|
129
129
|
"""
|
|
130
130
|
try:
|
|
131
|
-
#
|
|
131
|
+
# 1. CDP Command
|
|
132
132
|
page.run_cdp('Emulation.setScrollbarsHidden', hidden=True)
|
|
133
|
-
|
|
133
|
+
|
|
134
|
+
# 2. CSS Injection (Standard + Webkit)
|
|
135
|
+
css = """
|
|
136
|
+
::-webkit-scrollbar { display: none !important; width: 0 !important; height: 0 !important; }
|
|
137
|
+
* { -ms-overflow-style: none !important; scrollbar-width: none !important; }
|
|
138
|
+
"""
|
|
139
|
+
# Inject into current page
|
|
140
|
+
page.run_js(f"""
|
|
141
|
+
const style = document.createElement('style');
|
|
142
|
+
style.textContent = `{css}`;
|
|
143
|
+
document.head.appendChild(style);
|
|
144
|
+
""")
|
|
145
|
+
|
|
146
|
+
logger.debug("SharedBrowserManager: Scrollbars hidden via CDP + CSS.")
|
|
134
147
|
except Exception as e:
|
|
135
|
-
logger.warning(f"SharedBrowserManager: Failed to hide scrollbars
|
|
148
|
+
logger.warning(f"SharedBrowserManager: Failed to hide scrollbars: {e}")
|
|
136
149
|
|
|
137
150
|
|
|
138
151
|
# Module-level singleton accessor
|
|
@@ -55,16 +55,43 @@ class ContentRenderer:
|
|
|
55
55
|
loop = asyncio.get_running_loop()
|
|
56
56
|
return await loop.run_in_executor(self._executor, self._prepare_tab_sync)
|
|
57
57
|
|
|
58
|
-
def _wait_for_render_finished(self, tab, timeout: float =
|
|
58
|
+
def _wait_for_render_finished(self, tab, timeout: float = 12.0, context: str = ""):
|
|
59
59
|
"""Wait for window.RENDER_FINISHED to be true in the tab."""
|
|
60
60
|
import time as pytime
|
|
61
61
|
start = pytime.time()
|
|
62
|
+
|
|
63
|
+
# Check initial state
|
|
64
|
+
initial_state = tab.run_js("return window.RENDER_FINISHED")
|
|
65
|
+
logger.debug(f"ContentRenderer[{context}]: Starting wait, initial RENDER_FINISHED={initial_state}")
|
|
66
|
+
|
|
67
|
+
# If already true, it's stale from previous render - need to wait for JS to reset it
|
|
68
|
+
if initial_state:
|
|
69
|
+
logger.debug(f"ContentRenderer[{context}]: RENDER_FINISHED was true, waiting for reset...")
|
|
70
|
+
# Wait for JS to reset it to false (updateRenderData sets it to false)
|
|
71
|
+
reset_start = pytime.time()
|
|
72
|
+
while pytime.time() - reset_start < 1.0: # 1s max to wait for reset
|
|
73
|
+
is_reset = tab.run_js("return window.RENDER_FINISHED")
|
|
74
|
+
if not is_reset:
|
|
75
|
+
logger.debug(f"ContentRenderer[{context}]: RENDER_FINISHED reset to false")
|
|
76
|
+
break
|
|
77
|
+
pytime.sleep(0.05)
|
|
78
|
+
else:
|
|
79
|
+
logger.warning(f"ContentRenderer[{context}]: RENDER_FINISHED not reset, force resetting via JS")
|
|
80
|
+
tab.run_js("window.RENDER_FINISHED = false")
|
|
81
|
+
|
|
82
|
+
# Now wait for it to become true
|
|
83
|
+
poll_count = 0
|
|
62
84
|
while pytime.time() - start < timeout:
|
|
63
85
|
is_finished = tab.run_js("return window.RENDER_FINISHED")
|
|
86
|
+
poll_count += 1
|
|
64
87
|
if is_finished:
|
|
88
|
+
elapsed = pytime.time() - start
|
|
89
|
+
logger.debug(f"ContentRenderer[{context}]: RENDER_FINISHED=true after {elapsed:.2f}s ({poll_count} polls)")
|
|
65
90
|
return True
|
|
66
|
-
pytime.sleep(0.
|
|
67
|
-
|
|
91
|
+
pytime.sleep(0.1) # Poll every 100ms
|
|
92
|
+
|
|
93
|
+
elapsed = pytime.time() - start
|
|
94
|
+
logger.warning(f"ContentRenderer[{context}]: Wait for RENDER_FINISHED timed out after {elapsed:.2f}s ({poll_count} polls)")
|
|
68
95
|
return False
|
|
69
96
|
|
|
70
97
|
def _prepare_tab_sync(self) -> str:
|
|
@@ -89,8 +116,9 @@ class ContentRenderer:
|
|
|
89
116
|
"theme_color": "#ef4444",
|
|
90
117
|
}
|
|
91
118
|
|
|
119
|
+
logger.debug(f"ContentRenderer: Calling warmup updateRenderData for tab {tab_id}")
|
|
92
120
|
tab.run_js(f"window.updateRenderData({json.dumps(warmup_data)})")
|
|
93
|
-
self._wait_for_render_finished(tab, timeout=
|
|
121
|
+
self._wait_for_render_finished(tab, timeout=12.0, context=f"warmup:{tab_id}")
|
|
94
122
|
|
|
95
123
|
# Wait for main-container after warmup (Vue needs to render it)
|
|
96
124
|
tab.ele('#main-container', timeout=3)
|
|
@@ -203,7 +231,7 @@ class ContentRenderer:
|
|
|
203
231
|
|
|
204
232
|
# 1. Update Data & Wait for Finished flag
|
|
205
233
|
tab.run_js(f"window.updateRenderData({json.dumps(render_data)})")
|
|
206
|
-
self._wait_for_render_finished(tab)
|
|
234
|
+
self._wait_for_render_finished(tab, context=f"batch:{tab_id}")
|
|
207
235
|
|
|
208
236
|
# 2. Dynamic Resize
|
|
209
237
|
# Get actual content height to prevent clipping
|
|
@@ -330,10 +358,12 @@ class ContentRenderer:
|
|
|
330
358
|
"theme_color": theme_color,
|
|
331
359
|
}
|
|
332
360
|
|
|
361
|
+
actual_tab_id = getattr(tab, 'tab_id', 'unknown')
|
|
362
|
+
logger.info(f"ContentRenderer: Calling updateRenderData for tab {actual_tab_id}, markdown length={len(markdown_content)}")
|
|
333
363
|
tab.run_js(f"window.updateRenderData({json.dumps(render_data)})")
|
|
334
364
|
|
|
335
365
|
# Wait for event-driven finish
|
|
336
|
-
self._wait_for_render_finished(tab, timeout=
|
|
366
|
+
self._wait_for_render_finished(tab, timeout=12.0, context=f"render:{actual_tab_id}")
|
|
337
367
|
|
|
338
368
|
# Dynamic Resize
|
|
339
369
|
scroll_height = tab.run_js('return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);')
|
|
@@ -13,6 +13,10 @@ from typing import Optional, Dict, Any, List
|
|
|
13
13
|
from loguru import logger
|
|
14
14
|
import trafilatura
|
|
15
15
|
|
|
16
|
+
# Import intelligent completeness checker
|
|
17
|
+
from ..crawling.completeness import CompletenessChecker, trigger_lazy_load
|
|
18
|
+
from ..crawling.models import CrawlConfig
|
|
19
|
+
|
|
16
20
|
class ScreenshotService:
|
|
17
21
|
"""
|
|
18
22
|
Browser Service using DrissionPage.
|
|
@@ -537,31 +541,83 @@ class ScreenshotService:
|
|
|
537
541
|
"""Synchronous screenshot."""
|
|
538
542
|
if not url: return None
|
|
539
543
|
tab = None
|
|
544
|
+
capture_width = 1024 # Standard capture width
|
|
545
|
+
|
|
540
546
|
try:
|
|
541
547
|
self._ensure_ready()
|
|
542
548
|
page = self._manager.page
|
|
543
549
|
if not page: return None
|
|
544
550
|
|
|
545
|
-
tab
|
|
551
|
+
# Create blank tab first
|
|
552
|
+
tab = page.new_tab()
|
|
553
|
+
|
|
554
|
+
# Set viewport BEFORE navigation so page renders at target width from the start
|
|
555
|
+
# This eliminates the need for post-load resize and reflow
|
|
556
|
+
try:
|
|
557
|
+
tab.run_cdp('Emulation.setDeviceMetricsOverride',
|
|
558
|
+
width=capture_width, height=900, deviceScaleFactor=1, mobile=False)
|
|
559
|
+
except:
|
|
560
|
+
pass
|
|
561
|
+
|
|
562
|
+
# Now navigate to the URL - page will render at target width
|
|
563
|
+
tab.get(url)
|
|
564
|
+
|
|
565
|
+
# Start monitoring network traffic
|
|
566
|
+
try:
|
|
567
|
+
# Listen for data packets (XHR/Fetch/POST)
|
|
568
|
+
# Targets: xhr, fetch. POST usually falls under these or Document.
|
|
569
|
+
tab.listen.start(targets=True) # Listen to everything for now to be safe
|
|
570
|
+
except Exception as e:
|
|
571
|
+
logger.warning(f"ScreenshotService: Failed to start network listener: {e}")
|
|
572
|
+
|
|
573
|
+
# Initialize crawl config for completeness checking (defined outside try for scope)
|
|
574
|
+
crawl_config = CrawlConfig(
|
|
575
|
+
scan_full_page=True,
|
|
576
|
+
scroll_step=800,
|
|
577
|
+
scroll_delay=0.5,
|
|
578
|
+
scroll_timeout=min(timeout, 10),
|
|
579
|
+
image_load_timeout=8.0,
|
|
580
|
+
image_stability_checks=3,
|
|
581
|
+
)
|
|
582
|
+
|
|
546
583
|
try:
|
|
547
584
|
# Wait for full page load (including JS execution)
|
|
548
585
|
tab.wait.load_complete(timeout=timeout)
|
|
549
586
|
|
|
550
587
|
# Wait for actual content to appear (for CDN verification pages)
|
|
551
588
|
# Smart Wait Logic (Final Robust):
|
|
552
|
-
# 1.
|
|
553
|
-
# 2.
|
|
554
|
-
# 3. Height Stable for 2.0 seconds (20 checks)
|
|
555
|
-
# 4. Text > 100 chars (Crucial: Distinguishes stable content from stable spinners)
|
|
556
|
-
# 5. No Blacklist phrases
|
|
589
|
+
# 1. Network Idle: Wait for silence in XHR/POST
|
|
590
|
+
# 2. Stability: Wait for Height/Text/DOM stability
|
|
557
591
|
|
|
558
592
|
time.sleep(1.5) # user request: force wait 1.5s before detection
|
|
559
593
|
|
|
560
594
|
last_h = 0
|
|
595
|
+
last_text_len = 0
|
|
596
|
+
last_html_len = 0
|
|
561
597
|
stable_count = 0
|
|
562
598
|
|
|
563
599
|
for i in range(200): # Max 200 iterations (~20s)
|
|
564
600
|
try:
|
|
601
|
+
# 1. Check Network Activity
|
|
602
|
+
has_recent_network = False
|
|
603
|
+
try:
|
|
604
|
+
# Iterate over any captured packets since last check
|
|
605
|
+
for packet in tab.listen.steps(timeout=0.01):
|
|
606
|
+
# Check if it's a significant request (POST or XHR/Fetch)
|
|
607
|
+
method = packet.method.upper()
|
|
608
|
+
r_type = packet.resourceType.upper() if getattr(packet, 'resourceType', None) else ""
|
|
609
|
+
|
|
610
|
+
# Interested in: POST requests OR any XHR/Fetch response
|
|
611
|
+
if method == 'POST' or 'XMLHTTPREQUEST' in r_type or 'FETCH' in r_type:
|
|
612
|
+
# Ignore some common noise? (Optional: analytics, tracking)
|
|
613
|
+
# For now, simplistic approach: any API traffic resets stability
|
|
614
|
+
has_recent_network = True
|
|
615
|
+
# logger.debug(f"Network Activity: {method} {packet.url[:50]}")
|
|
616
|
+
break
|
|
617
|
+
except:
|
|
618
|
+
pass
|
|
619
|
+
|
|
620
|
+
# 2. Check DOM State
|
|
565
621
|
state = tab.run_js('''
|
|
566
622
|
return {
|
|
567
623
|
ready: document.readyState === 'complete',
|
|
@@ -571,42 +627,62 @@ class ScreenshotService:
|
|
|
571
627
|
document.documentElement.scrollHeight || 0
|
|
572
628
|
),
|
|
573
629
|
text: document.body.innerText.substring(0, 1000) || "",
|
|
574
|
-
|
|
630
|
+
html_len: document.body.innerHTML.length || 0
|
|
575
631
|
};
|
|
576
|
-
''') or {'ready': False, 'title': "", 'height': 0, 'text': ""}
|
|
632
|
+
''') or {'ready': False, 'title': "", 'height': 0, 'text': "", 'html_len': 0}
|
|
577
633
|
|
|
578
634
|
is_ready = state.get('ready', False)
|
|
579
635
|
title = state.get('title', "").lower()
|
|
580
636
|
current_h = int(state.get('height', 0))
|
|
581
637
|
text_content = state.get('text', "")
|
|
582
638
|
text_len = len(text_content)
|
|
639
|
+
html_len = int(state.get('html_len', 0))
|
|
583
640
|
text_lower = text_content.lower()
|
|
584
641
|
|
|
585
|
-
# Blacklist check
|
|
642
|
+
# Blacklist check (Loading indicators)
|
|
586
643
|
is_verification = "checking your browser" in text_lower or \
|
|
587
644
|
"just a moment" in text_lower or \
|
|
588
645
|
"please wait" in text_lower or \
|
|
589
646
|
"security check" in title or \
|
|
590
|
-
"just a moment" in title
|
|
647
|
+
"just a moment" in title or \
|
|
648
|
+
"loading..." in title
|
|
591
649
|
|
|
592
|
-
# Stability check
|
|
593
|
-
|
|
650
|
+
# Stability check (Multi-metric + Network)
|
|
651
|
+
# We require STABILITY across Height, Text Length, DOM Size AND Network Silence
|
|
652
|
+
is_height_stable = current_h == last_h
|
|
653
|
+
is_text_stable = abs(text_len - last_text_len) < 5 # Allow minor fluctuations
|
|
654
|
+
is_dom_stable = abs(html_len - last_html_len) < 20 # Allow minor fluctuations (ads/tracking)
|
|
655
|
+
|
|
656
|
+
if is_height_stable and is_text_stable and is_dom_stable and not has_recent_network:
|
|
594
657
|
stable_count += 1
|
|
595
658
|
else:
|
|
659
|
+
# Reset if ANY metric changed or NETWORK active
|
|
596
660
|
stable_count = 0
|
|
661
|
+
# if has_recent_network: logger.debug("Stability reset: Network Activity")
|
|
597
662
|
|
|
598
663
|
# Conditions
|
|
599
664
|
has_content = text_len > 100 # At least 100 real chars
|
|
600
|
-
|
|
665
|
+
|
|
666
|
+
# Dynamic Stability Requirement:
|
|
667
|
+
# If page looks like it's loading (small content), require longer stability
|
|
668
|
+
if has_recent_network:
|
|
669
|
+
# If we just saw network, enforce at least 1s (10 ticks) clean silence even for large pages
|
|
670
|
+
required_stability = max(10, 40 if text_len < 500 else 25)
|
|
671
|
+
else:
|
|
672
|
+
required_stability = 40 if text_len < 500 else 25 # 4.0s or 2.5s
|
|
673
|
+
|
|
674
|
+
is_stable = stable_count >= required_stability
|
|
601
675
|
|
|
602
676
|
# Pass if all conditions met
|
|
603
677
|
if is_ready and not is_verification and has_content and is_stable:
|
|
604
678
|
break
|
|
605
679
|
|
|
606
680
|
last_h = current_h
|
|
681
|
+
last_text_len = text_len
|
|
682
|
+
last_html_len = html_len
|
|
607
683
|
|
|
608
|
-
# Wait timing
|
|
609
|
-
try:
|
|
684
|
+
# Wait timing within loop (tab.listen.steps consumed some time, so sleep less)
|
|
685
|
+
try: time.sleep(0.05)
|
|
610
686
|
except: pass
|
|
611
687
|
|
|
612
688
|
except Exception:
|
|
@@ -615,6 +691,10 @@ class ScreenshotService:
|
|
|
615
691
|
except: pass
|
|
616
692
|
continue
|
|
617
693
|
|
|
694
|
+
# Cleanup listener
|
|
695
|
+
try: tab.listen.stop()
|
|
696
|
+
except: pass
|
|
697
|
+
|
|
618
698
|
# DEBUG: Save HTML to inspect what happened (in data dir)
|
|
619
699
|
try:
|
|
620
700
|
import os
|
|
@@ -624,48 +704,33 @@ class ScreenshotService:
|
|
|
624
704
|
f.write(tab.html)
|
|
625
705
|
except: pass
|
|
626
706
|
|
|
627
|
-
# Use
|
|
628
|
-
|
|
707
|
+
# Use crawling module for lazy loading trigger (config defined above)
|
|
708
|
+
trigger_lazy_load(tab, crawl_config)
|
|
629
709
|
|
|
630
710
|
except:
|
|
631
711
|
pass
|
|
632
712
|
|
|
633
|
-
#
|
|
634
|
-
capture_width = 1024
|
|
635
|
-
|
|
636
|
-
# Calculate actual content height after lazy loading
|
|
637
|
-
try:
|
|
638
|
-
# Use a robust height calculation
|
|
639
|
-
content_height = tab.run_js('''
|
|
640
|
-
return Math.max(
|
|
641
|
-
document.body.scrollHeight || 0,
|
|
642
|
-
document.documentElement.scrollHeight || 0,
|
|
643
|
-
document.body.offsetHeight || 0,
|
|
644
|
-
document.documentElement.offsetHeight || 0,
|
|
645
|
-
document.documentElement.clientHeight || 0
|
|
646
|
-
);
|
|
647
|
-
''')
|
|
648
|
-
# Add a small buffer and cap at 15000px to prevent memory issues
|
|
649
|
-
h = min(int(content_height) + 50, 15000)
|
|
650
|
-
except:
|
|
651
|
-
h = 1000 # Fallback
|
|
652
|
-
|
|
653
|
-
# Set viewport to full content size for single-shot capture
|
|
654
|
-
try:
|
|
655
|
-
tab.run_cdp('Emulation.setDeviceMetricsOverride',
|
|
656
|
-
width=capture_width, height=h, deviceScaleFactor=1, mobile=False)
|
|
657
|
-
except:
|
|
658
|
-
pass
|
|
659
|
-
|
|
660
|
-
# Scrollbar Hiding
|
|
713
|
+
# Scrollbar Hiding first (before any height calculation)
|
|
661
714
|
from .manager import SharedBrowserManager
|
|
662
715
|
SharedBrowserManager.hide_scrollbars(tab)
|
|
663
716
|
|
|
664
|
-
# Scroll back to top
|
|
717
|
+
# Scroll back to top
|
|
665
718
|
tab.run_js("window.scrollTo(0, 0);")
|
|
666
719
|
|
|
667
|
-
#
|
|
668
|
-
|
|
720
|
+
# Use CompletenessChecker to verify all images are loaded
|
|
721
|
+
checker = CompletenessChecker(crawl_config)
|
|
722
|
+
completeness = checker.wait_for_complete(tab, timeout=crawl_config.image_load_timeout)
|
|
723
|
+
|
|
724
|
+
logger.info(
|
|
725
|
+
f"ScreenshotService: Image completeness: "
|
|
726
|
+
f"{completeness.loaded_images}/{completeness.total_images} loaded, "
|
|
727
|
+
f"{completeness.failed_images} pending, "
|
|
728
|
+
f"{completeness.placeholder_images} placeholders, "
|
|
729
|
+
f"complete={completeness.is_complete}"
|
|
730
|
+
)
|
|
731
|
+
|
|
732
|
+
# Now calculate final height ONCE after all content loaded
|
|
733
|
+
# CompletenessChecker already verified height stability
|
|
669
734
|
try:
|
|
670
735
|
final_height = tab.run_js('''
|
|
671
736
|
return Math.max(
|
|
@@ -675,13 +740,15 @@ class ScreenshotService:
|
|
|
675
740
|
document.documentElement.offsetHeight || 0
|
|
676
741
|
);
|
|
677
742
|
''')
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
width=capture_width, height=final_h, deviceScaleFactor=1, mobile=False)
|
|
743
|
+
h = min(int(final_height) + 50, 15000)
|
|
744
|
+
tab.run_cdp('Emulation.setDeviceMetricsOverride',
|
|
745
|
+
width=capture_width, height=h, deviceScaleFactor=1, mobile=False)
|
|
682
746
|
except:
|
|
683
747
|
pass
|
|
684
748
|
|
|
749
|
+
# Final scroll to top
|
|
750
|
+
tab.run_js("window.scrollTo(0, 0);")
|
|
751
|
+
|
|
685
752
|
# Use full_page=False because we manually set the viewport to the full height
|
|
686
753
|
# This avoids stitching artifacts and blank spaces
|
|
687
754
|
return tab.get_screenshot(as_base64='jpg', full_page=False)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""
|
|
2
|
+
hyw_core.crawling - Intelligent Web Crawling Module
|
|
3
|
+
|
|
4
|
+
Provides Crawl4AI-inspired adaptive crawling with:
|
|
5
|
+
- Page completeness guarantees (image loading verification)
|
|
6
|
+
- Content quality scoring
|
|
7
|
+
- Adaptive stop logic
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from .models import CrawlConfig, PageResult, CompletenessResult
|
|
11
|
+
from .completeness import CompletenessChecker
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"CrawlConfig",
|
|
15
|
+
"PageResult",
|
|
16
|
+
"CompletenessResult",
|
|
17
|
+
"CompletenessChecker",
|
|
18
|
+
]
|