entari-plugin-hyw 4.0.0rc8__py3-none-any.whl → 4.0.0rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {entari_plugin_hyw-4.0.0rc8.dist-info → entari_plugin_hyw-4.0.0rc9.dist-info}/METADATA +1 -1
- {entari_plugin_hyw-4.0.0rc8.dist-info → entari_plugin_hyw-4.0.0rc9.dist-info}/RECORD +5 -5
- hyw_core/browser_control/service.py +67 -14
- {entari_plugin_hyw-4.0.0rc8.dist-info → entari_plugin_hyw-4.0.0rc9.dist-info}/WHEEL +0 -0
- {entari_plugin_hyw-4.0.0rc8.dist-info → entari_plugin_hyw-4.0.0rc9.dist-info}/top_level.txt +0 -0
|
@@ -14,7 +14,7 @@ hyw_core/browser_control/__init__.py,sha256=X1vHZpYXLG-P1RRivVyK014WKnv48GN1ibF9
|
|
|
14
14
|
hyw_core/browser_control/landing.html,sha256=wgqldumdylz69T83pvOkrigT1Mdb9GY0_KU0ceLGwdY,4642
|
|
15
15
|
hyw_core/browser_control/manager.py,sha256=U8dVpkWTG5pcIE5WiSQSfTx4gEo9PnBbmBD0KZcLBbU,5513
|
|
16
16
|
hyw_core/browser_control/renderer.py,sha256=hsCjJPMSCAvqTFtiAmyjaw0IE8xmbIjq5VK9dd70gfc,15539
|
|
17
|
-
hyw_core/browser_control/service.py,sha256=
|
|
17
|
+
hyw_core/browser_control/service.py,sha256=tuNaEnxRZVkLgQczWnWDyEHXhyvhF9RboDmnT3OFX34,35905
|
|
18
18
|
hyw_core/browser_control/assets/index.html,sha256=BpbM0vD9OYicE5MBHSVLo3j_y-MpULI82PMqmBKpWT8,2328623
|
|
19
19
|
hyw_core/browser_control/assets/card-dist/index.html,sha256=fNfT_0TgSZLqwuTtKAl3Wzc4lKRAY_rbWxc_mQHfaCs,2209006
|
|
20
20
|
hyw_core/browser_control/assets/card-dist/vite.svg,sha256=SnSK_UQ5GLsWWRyDTEAdrjPoeGGrXbrQgRw6O0qSFPs,1497
|
|
@@ -62,7 +62,7 @@ hyw_core/browser_control/engines/google.py,sha256=PmU0_n8UrnQ1oyYVS-Y_jLS6rzgkQZ
|
|
|
62
62
|
hyw_core/stages/__init__.py,sha256=W89cWpq-HBLi2FprtJQSjQNLzpbhM8ZCkqPG61D_imE,521
|
|
63
63
|
hyw_core/stages/base.py,sha256=EfnTkISXbBNxjARykqIhmMrVqw2tqZl7ozJbJEbRnhI,2806
|
|
64
64
|
hyw_core/stages/summary.py,sha256=ODOwhIAmBZJuA4KOhUP7Lygch7XSkshrTZj-MdZjbEs,7085
|
|
65
|
-
entari_plugin_hyw-4.0.
|
|
66
|
-
entari_plugin_hyw-4.0.
|
|
67
|
-
entari_plugin_hyw-4.0.
|
|
68
|
-
entari_plugin_hyw-4.0.
|
|
65
|
+
entari_plugin_hyw-4.0.0rc9.dist-info/METADATA,sha256=ofBdYe9A6sFakK9NDDpnmTczWuh1mbm6OpzpLO1j3cc,3844
|
|
66
|
+
entari_plugin_hyw-4.0.0rc9.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
|
|
67
|
+
entari_plugin_hyw-4.0.0rc9.dist-info/top_level.txt,sha256=ah76OrufRX0okOl4Fv8MO6PXiT0IaZ1oG0eDrdAPoNo,27
|
|
68
|
+
entari_plugin_hyw-4.0.0rc9.dist-info/RECORD,,
|
|
@@ -543,25 +543,53 @@ class ScreenshotService:
|
|
|
543
543
|
if not page: return None
|
|
544
544
|
|
|
545
545
|
tab = page.new_tab(url)
|
|
546
|
+
|
|
547
|
+
# Start monitoring network traffic
|
|
548
|
+
try:
|
|
549
|
+
# Listen for data packets (XHR/Fetch/POST)
|
|
550
|
+
# Targets: xhr, fetch. POST usually falls under these or Document.
|
|
551
|
+
tab.listen.start(targets=True) # Listen to everything for now to be safe
|
|
552
|
+
except Exception as e:
|
|
553
|
+
logger.warning(f"ScreenshotService: Failed to start network listener: {e}")
|
|
554
|
+
|
|
546
555
|
try:
|
|
547
556
|
# Wait for full page load (including JS execution)
|
|
548
557
|
tab.wait.load_complete(timeout=timeout)
|
|
549
558
|
|
|
550
559
|
# Wait for actual content to appear (for CDN verification pages)
|
|
551
560
|
# Smart Wait Logic (Final Robust):
|
|
552
|
-
# 1.
|
|
553
|
-
# 2.
|
|
554
|
-
# 3. Height Stable for 2.0 seconds (20 checks)
|
|
555
|
-
# 4. Text > 100 chars (Crucial: Distinguishes stable content from stable spinners)
|
|
556
|
-
# 5. No Blacklist phrases
|
|
561
|
+
# 1. Network Idle: Wait for silence in XHR/POST
|
|
562
|
+
# 2. Stability: Wait for Height/Text/DOM stability
|
|
557
563
|
|
|
558
564
|
time.sleep(1.5) # user request: force wait 1.5s before detection
|
|
559
565
|
|
|
560
566
|
last_h = 0
|
|
567
|
+
last_text_len = 0
|
|
568
|
+
last_html_len = 0
|
|
561
569
|
stable_count = 0
|
|
562
570
|
|
|
563
571
|
for i in range(200): # Max 200 iterations (~20s)
|
|
564
572
|
try:
|
|
573
|
+
# 1. Check Network Activity
|
|
574
|
+
has_recent_network = False
|
|
575
|
+
try:
|
|
576
|
+
# Iterate over any captured packets since last check
|
|
577
|
+
for packet in tab.listen.steps(timeout=0.01):
|
|
578
|
+
# Check if it's a significant request (POST or XHR/Fetch)
|
|
579
|
+
method = packet.method.upper()
|
|
580
|
+
r_type = packet.resourceType.upper() if getattr(packet, 'resourceType', None) else ""
|
|
581
|
+
|
|
582
|
+
# Interested in: POST requests OR any XHR/Fetch response
|
|
583
|
+
if method == 'POST' or 'XMLHTTPREQUEST' in r_type or 'FETCH' in r_type:
|
|
584
|
+
# Ignore some common noise? (Optional: analytics, tracking)
|
|
585
|
+
# For now, simplistic approach: any API traffic resets stability
|
|
586
|
+
has_recent_network = True
|
|
587
|
+
# logger.debug(f"Network Activity: {method} {packet.url[:50]}")
|
|
588
|
+
break
|
|
589
|
+
except:
|
|
590
|
+
pass
|
|
591
|
+
|
|
592
|
+
# 2. Check DOM State
|
|
565
593
|
state = tab.run_js('''
|
|
566
594
|
return {
|
|
567
595
|
ready: document.readyState === 'complete',
|
|
@@ -571,42 +599,62 @@ class ScreenshotService:
|
|
|
571
599
|
document.documentElement.scrollHeight || 0
|
|
572
600
|
),
|
|
573
601
|
text: document.body.innerText.substring(0, 1000) || "",
|
|
574
|
-
|
|
602
|
+
html_len: document.body.innerHTML.length || 0
|
|
575
603
|
};
|
|
576
|
-
''') or {'ready': False, 'title': "", 'height': 0, 'text': ""}
|
|
604
|
+
''') or {'ready': False, 'title': "", 'height': 0, 'text': "", 'html_len': 0}
|
|
577
605
|
|
|
578
606
|
is_ready = state.get('ready', False)
|
|
579
607
|
title = state.get('title', "").lower()
|
|
580
608
|
current_h = int(state.get('height', 0))
|
|
581
609
|
text_content = state.get('text', "")
|
|
582
610
|
text_len = len(text_content)
|
|
611
|
+
html_len = int(state.get('html_len', 0))
|
|
583
612
|
text_lower = text_content.lower()
|
|
584
613
|
|
|
585
|
-
# Blacklist check
|
|
614
|
+
# Blacklist check (Loading indicators)
|
|
586
615
|
is_verification = "checking your browser" in text_lower or \
|
|
587
616
|
"just a moment" in text_lower or \
|
|
588
617
|
"please wait" in text_lower or \
|
|
589
618
|
"security check" in title or \
|
|
590
|
-
"just a moment" in title
|
|
619
|
+
"just a moment" in title or \
|
|
620
|
+
"loading..." in title
|
|
591
621
|
|
|
592
|
-
# Stability check
|
|
593
|
-
|
|
622
|
+
# Stability check (Multi-metric + Network)
|
|
623
|
+
# We require STABILITY across Height, Text Length, DOM Size AND Network Silence
|
|
624
|
+
is_height_stable = current_h == last_h
|
|
625
|
+
is_text_stable = abs(text_len - last_text_len) < 5 # Allow minor fluctuations
|
|
626
|
+
is_dom_stable = abs(html_len - last_html_len) < 20 # Allow minor fluctuations (ads/tracking)
|
|
627
|
+
|
|
628
|
+
if is_height_stable and is_text_stable and is_dom_stable and not has_recent_network:
|
|
594
629
|
stable_count += 1
|
|
595
630
|
else:
|
|
631
|
+
# Reset if ANY metric changed or NETWORK active
|
|
596
632
|
stable_count = 0
|
|
633
|
+
# if has_recent_network: logger.debug("Stability reset: Network Activity")
|
|
597
634
|
|
|
598
635
|
# Conditions
|
|
599
636
|
has_content = text_len > 100 # At least 100 real chars
|
|
600
|
-
|
|
637
|
+
|
|
638
|
+
# Dynamic Stability Requirement:
|
|
639
|
+
# If page looks like it's loading (small content), require longer stability
|
|
640
|
+
if has_recent_network:
|
|
641
|
+
# If we just saw network, enforce at least 1s (10 ticks) clean silence even for large pages
|
|
642
|
+
required_stability = max(10, 40 if text_len < 500 else 25)
|
|
643
|
+
else:
|
|
644
|
+
required_stability = 40 if text_len < 500 else 25 # 4.0s or 2.5s
|
|
645
|
+
|
|
646
|
+
is_stable = stable_count >= required_stability
|
|
601
647
|
|
|
602
648
|
# Pass if all conditions met
|
|
603
649
|
if is_ready and not is_verification and has_content and is_stable:
|
|
604
650
|
break
|
|
605
651
|
|
|
606
652
|
last_h = current_h
|
|
653
|
+
last_text_len = text_len
|
|
654
|
+
last_html_len = html_len
|
|
607
655
|
|
|
608
|
-
# Wait timing
|
|
609
|
-
try:
|
|
656
|
+
# Wait timing within loop (tab.listen.steps consumed some time, so sleep less)
|
|
657
|
+
try: time.sleep(0.05)
|
|
610
658
|
except: pass
|
|
611
659
|
|
|
612
660
|
except Exception:
|
|
@@ -615,6 +663,10 @@ class ScreenshotService:
|
|
|
615
663
|
except: pass
|
|
616
664
|
continue
|
|
617
665
|
|
|
666
|
+
# Cleanup listener
|
|
667
|
+
try: tab.listen.stop()
|
|
668
|
+
except: pass
|
|
669
|
+
|
|
618
670
|
# DEBUG: Save HTML to inspect what happened (in data dir)
|
|
619
671
|
try:
|
|
620
672
|
import os
|
|
@@ -631,6 +683,7 @@ class ScreenshotService:
|
|
|
631
683
|
pass
|
|
632
684
|
|
|
633
685
|
# Refine calculation: Set viewport width to 1024
|
|
686
|
+
|
|
634
687
|
capture_width = 1024
|
|
635
688
|
|
|
636
689
|
# Calculate actual content height after lazy loading
|
|
File without changes
|
|
File without changes
|