helixwright 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,58 @@
1
+ """Helixwright automation SDK.
2
+
3
+ The SDK talks to Helix Browser through the desktop Local API. It does not spawn
4
+ Chrome, generate fingerprints, or manage browser profile directories by itself.
5
+ """
6
+
7
+ from .client import Client, Profile, discover_base_url
8
+ from .errors import (
9
+ AutomationEndpointUnavailable,
10
+ AutomationSessionNotFound,
11
+ DesktopNotLoggedIn,
12
+ ElementNotFoundError,
13
+ HelixwrightError,
14
+ LocalApiError,
15
+ LocalApiUnavailable,
16
+ LoginRejected,
17
+ ProfileNotFound,
18
+ ReachabilityError,
19
+ TransportError,
20
+ WaitTimeoutError,
21
+ )
22
+ from ._impl._network import Response, Route
23
+ from ._impl._operator import El, NoneEl, Op
24
+ from .launcher import HelixBrowser, attach, launch
25
+ from .models import AutomationRpc, AutomationSession, Fingerprint, LaunchConfig, Proxy
26
+
27
+ __all__ = [
28
+ "launch",
29
+ "attach",
30
+ "Client",
31
+ "Profile",
32
+ "Fingerprint",
33
+ "Proxy",
34
+ "LaunchConfig",
35
+ "AutomationRpc",
36
+ "AutomationSession",
37
+ "HelixBrowser",
38
+ "Op",
39
+ "El",
40
+ "NoneEl",
41
+ "Response",
42
+ "Route",
43
+ "discover_base_url",
44
+ "HelixwrightError",
45
+ "LocalApiError",
46
+ "LocalApiUnavailable",
47
+ "DesktopNotLoggedIn",
48
+ "ProfileNotFound",
49
+ "AutomationSessionNotFound",
50
+ "AutomationEndpointUnavailable",
51
+ "TransportError",
52
+ "ReachabilityError",
53
+ "ElementNotFoundError",
54
+ "WaitTimeoutError",
55
+ "LoginRejected",
56
+ ]
57
+
58
+ __version__ = "0.1.0"
@@ -0,0 +1 @@
1
+ """Private implementation package (unstable internals). Import the public API from `helixwright`, not from here."""
@@ -0,0 +1,184 @@
1
+ from __future__ import annotations
2
+ import json
3
+ import time
4
+ from ._errors import HelixwrightError
5
+
6
+ # The accessibility matchers now read via NATIVE RPC helpers on the Locator (_aria_role /
7
+ # _accessible_name / _accessible_description over get_attribute + element_property('tagName')),
8
+ # so NO JS runs. They remain APPROXIMATE (heuristic, not Chromium's computed accessibility tree).
9
+
10
+
11
+ class _AssertionsBase:
12
+ """Polling assertion base: shared timeout, negation (not_/is_not), and the poll loop."""
13
+
14
+ def __init__(self, target, timeout_ms=5000, is_not=False):
15
+ self._t = target
16
+ self._to = timeout_ms
17
+ self._not = is_not
18
+
19
+ @property
20
+ def not_(self):
21
+ return type(self)(self._t, self._to, not self._not)
22
+
23
+ is_not = not_
24
+
25
+ def _poll(self, pred, desc):
26
+ deadline = time.time() + self._to / 1000.0
27
+ last = None
28
+ while True:
29
+ try:
30
+ last = bool(pred())
31
+ except (HelixwrightError, TypeError, AttributeError):
32
+ # transient (element gone / null evaluate) -> failed-but-retryable, never crash
33
+ last = False
34
+ if last != self._not:
35
+ return
36
+ if time.time() >= deadline:
37
+ raise AssertionError("expect %s%s failed (last=%r)"
38
+ % ("not_." if self._not else "", desc, last))
39
+ time.sleep(0.08)
40
+
41
+
42
+ class LocatorAssertions(_AssertionsBase):
43
+ """Assertions about a Locator — visibility, text, value, count, state."""
44
+
45
+ def to_be_visible(self):
46
+ self._poll(self._t.is_visible, "to_be_visible")
47
+
48
+ def to_be_hidden(self):
49
+ self._poll(lambda: not self._t.is_visible(), "to_be_hidden")
50
+
51
+ def to_be_enabled(self):
52
+ self._poll(self._t.is_enabled, "to_be_enabled")
53
+
54
+ def to_be_disabled(self):
55
+ self._poll(lambda: not self._t.is_enabled(), "to_be_disabled")
56
+
57
+ def to_have_text(self, txt):
58
+ self._poll(lambda: self._t.text().strip() == txt, "to_have_text(%r)" % txt)
59
+
60
+ def to_contain_text(self, txt):
61
+ self._poll(lambda: txt in self._t.text(), "to_contain_text(%r)" % txt)
62
+
63
+ def to_have_value(self, v):
64
+ self._poll(lambda: self._t.input_value() == v, "to_have_value(%r)" % v)
65
+
66
+ def to_have_count(self, n):
67
+ self._poll(lambda: self._t.count() == n, "to_have_count(%d)" % n)
68
+
69
+ def to_be_checked(self):
70
+ self._poll(self._t.is_checked, "to_be_checked")
71
+
72
+ def to_be_editable(self):
73
+ self._poll(self._t.is_editable, "to_be_editable")
74
+
75
+ def to_be_focused(self):
76
+ self._poll(self._t.is_focused, "to_be_focused")
77
+
78
+ def to_have_attribute(self, name, value=None):
79
+ def _p():
80
+ v = self._t.get_attribute(name)
81
+ return (v is not None) if value is None else (v == value)
82
+ self._poll(_p, "to_have_attribute(%r,%r)" % (name, value))
83
+
84
+ # --- Playwright-parity matchers (pure-Python over Locator.evaluate/get_attribute/count) ----
85
+ def to_have_class(self, value):
86
+ """The full class attribute equals |value| (Playwright string form)."""
87
+ self._poll(lambda: (self._t.get_attribute("class") or "") == value, "to_have_class(%r)" % value)
88
+
89
+ def to_contain_class(self, value):
90
+ """The class token list contains |value|."""
91
+ self._poll(lambda: value in (self._t.get_attribute("class") or "").split(),
92
+ "to_contain_class(%r)" % value)
93
+
94
+ def to_have_id(self, id_):
95
+ self._poll(lambda: (self._t.get_attribute("id") or "") == id_, "to_have_id(%r)" % id_)
96
+
97
+ def to_have_css(self, name, value):
98
+ # [no-JS] native WebElement::GetComputedValue via Locator.computed_style.
99
+ self._poll(lambda: self._t.computed_style(name) == value, "to_have_css(%r,%r)" % (name, value))
100
+
101
+ def to_have_js_property(self, name, value):
102
+ self._poll(lambda: self._t.evaluate("el[%s]" % json.dumps(name)) == value,
103
+ "to_have_js_property(%r,%r)" % (name, value))
104
+
105
+ def to_have_values(self, values):
106
+ """The selected <option> values of a multi-select equal |values| (list)."""
107
+ want = list(values)
108
+ self._poll(lambda: self._t.evaluate(
109
+ "Array.prototype.map.call(el.selectedOptions||[],function(o){return o.value;})") == want,
110
+ "to_have_values(%r)" % (want,))
111
+
112
+ def to_be_empty(self):
113
+ """No child elements and no text (Playwright to_be_empty). [no-JS] native InnerHTML()=="" check."""
114
+ self._poll(lambda: (self._t.inner_html() or "").strip() == "", "to_be_empty")
115
+
116
+ def to_be_attached(self):
117
+ self._poll(lambda: self._t.count() > 0, "to_be_attached")
118
+
119
+ def to_be_in_viewport(self):
120
+ # [no-JS] VisibleBoundsInWidget() is viewport-clipped, so IsVisible() == visible-in-viewport.
121
+ self._poll(self._t.is_visible, "to_be_in_viewport")
122
+
123
+ def to_have_role(self, role):
124
+ """APPROXIMATE computed ARIA role (explicit role attr or a tag heuristic — not the engine AOM).
125
+ [no-JS] via Locator._aria_role (get_attribute + element_property('tagName'))."""
126
+ self._poll(lambda: self._t._aria_role() == role, "to_have_role(%r)" % role)
127
+
128
+ def to_have_accessible_name(self, name):
129
+ """APPROXIMATE accessible name (aria-label/text/alt/title heuristic). [no-JS] via native RPCs."""
130
+ self._poll(lambda: (self._t._accessible_name() or "").strip() == name,
131
+ "to_have_accessible_name(%r)" % name)
132
+
133
+ def to_have_accessible_description(self, desc):
134
+ """APPROXIMATE accessible description (title heuristic). [no-JS] via native RPC."""
135
+ self._poll(lambda: (self._t._accessible_description() or "").strip() == desc,
136
+ "to_have_accessible_description(%r)" % desc)
137
+
138
+
139
+ class PageAssertions(_AssertionsBase):
140
+ """Assertions about a Page — url, title."""
141
+
142
+ def _page(self):
143
+ # target is normally the Page; tolerate a Locator (resolve its page) for the
144
+ # combined Expect's back-compat.
145
+ t = self._t
146
+ return t if hasattr(t, "goto") else getattr(t, "_page", t)
147
+
148
+ def to_have_url(self, url):
149
+ self._poll(lambda: url in self._page().evaluate("location.href"),
150
+ "to_have_url(%r)" % url)
151
+
152
+ def to_have_title(self, title):
153
+ self._poll(lambda: title in self._page().evaluate("document.title"),
154
+ "to_have_title(%r)" % title)
155
+
156
+
157
+ class Expect(LocatorAssertions, PageAssertions):
158
+ """Back-compat combined assertions (the public ``Expect`` export). Prefer ``expect()``,
159
+ which returns the precise LocatorAssertions / PageAssertions for the target."""
160
+
161
+
162
+ def expect(target, timeout_ms=5000):
163
+ """Polling assertion proxy for |target|: a Page -> PageAssertions (url/title), a
164
+ Locator -> LocatorAssertions (visibility/text/value/state). Async facade objects
165
+ (AsyncPage/AsyncLocator) are unwrapped to their sync object (assertions are
166
+ synchronous). For a bare Element use page.locator(css) instead."""
167
+ inner = getattr(target, "_p", None) # AsyncPage wraps the sync Page as _p
168
+ if inner is None:
169
+ inner = getattr(target, "_l", None) # AsyncLocator wraps the sync Locator as _l
170
+ if inner is not None:
171
+ target = inner
172
+ if hasattr(target, "goto"): # a Page
173
+ return PageAssertions(target, timeout_ms)
174
+ # Locator OR Element -> BOTH expose _as_locator() (Locator returns self; Element re-resolves to a
175
+ # Locator from its originating selector, or raises clearly for a selector-less Element). Resolve to
176
+ # a Locator so assertions POLL across re-render/detach (a fixed node_id snapshot can't survive one).
177
+ # We no longer inspect the .text descriptor to tell Element from Locator: that property-vs-method
178
+ # hack breaks once the two unify, and an instance .text read would fire a get_text RPC on a
179
+ # navigating page. _as_locator() is a plain method (no RPC) present on both.
180
+ loc_fn = getattr(target, "_as_locator", None)
181
+ if callable(loc_fn):
182
+ return LocatorAssertions(loc_fn(), timeout_ms)
183
+ raise HelixwrightError(
184
+ "expect() needs a Page, Locator, or selector-based Element; got %r" % type(target).__name__)
@@ -0,0 +1,80 @@
1
+ """Declarative cookie-consent rules (plan F7 / G3 accept_cookies).
2
+
3
+ Our advantage over autoconsent extensions: we execute these with our OWN isTrusted cross-frame
4
+ (+ shadow-piercing) click engine — no extension, no injected JS. Each entry is a Helixwright
5
+ selector (CMP-specific id/class first — fast + precise — then aria-label, then visible text as a
6
+ multilingual fallback). page.accept_cookies() tries them in order via find_anywhere (so a CMP
7
+ inside an OOPIF / shadow root is reached) and isTrusted-clicks the first match. Opt-in by
8
+ calling it; default action is 'accept'."""
9
+ from __future__ import annotations
10
+
11
+
12
+ # Ordered most-specific -> most-general. CMP id/class are unique enough to be safe;
13
+ # text fallbacks are intentionally conservative (exact, common labels) to avoid mis-clicks.
14
+ _ACCEPT = [
15
+ # OneTrust
16
+ "#onetrust-accept-btn-handler",
17
+ "#accept-recommended-btn-handler",
18
+ ".onetrust-close-btn-handler.accept",
19
+ # Cookiebot
20
+ "#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll",
21
+ "#CybotCookiebotDialogBodyButtonAccept",
22
+ "#CybotCookiebotDialogBodyLevelButtonAccept",
23
+ # Usercentrics
24
+ 'button[data-testid="uc-accept-all-button"]',
25
+ 'button[data-testid="uc-accept-button"]',
26
+ # Didomi
27
+ "#didomi-notice-agree-button",
28
+ "button.didomi-button-highlight",
29
+ # Quantcast
30
+ '.qc-cmp2-summary-buttons button[mode="primary"]',
31
+ # Osano / TrustArc / Cookie-script / cookieconsent
32
+ ".osano-cm-accept-all",
33
+ "#truste-consent-button",
34
+ "#cookiescript_accept",
35
+ ".cc-allow",
36
+ ".cc-btn.cc-allow",
37
+ # Google Funding Choices / IAB generic
38
+ ".fc-cta-consent",
39
+ # aria-label fallbacks
40
+ '[aria-label="Accept all"]',
41
+ '[aria-label="Accept all cookies"]',
42
+ '[aria-label="Accept cookies"]',
43
+ # visible-text fallbacks (role=button restricts to actual buttons/links; @@name:
44
+ # filters by aria-label/text/value — the role grammar's supported text predicate).
45
+ 'role=button@@name:Accept all',
46
+ 'role=button@@name:Accept All Cookies',
47
+ 'role=button@@name:Allow all',
48
+ 'role=button@@name:I accept',
49
+ 'role=button@@name:Agree',
50
+ 'role=button@@name:Got it',
51
+ 'role=button@@name:Accept',
52
+ 'role=button@@name:Alle akzeptieren', # de
53
+ 'role=button@@name:Tout accepter', # fr
54
+ 'role=button@@name:Aceptar todo', # es
55
+ 'role=button@@name:接受全部', # zh
56
+ 'role=button@@name:同意', # zh/ja
57
+ ]
58
+
59
+ _REJECT = [
60
+ "#onetrust-reject-all-handler",
61
+ ".ot-pc-refuse-all-handler",
62
+ "#CybotCookiebotDialogBodyButtonDecline",
63
+ "#CybotCookiebotDialogBodyLevelButtonLevelOptinDeclineAll",
64
+ 'button[data-testid="uc-deny-all-button"]',
65
+ "#didomi-notice-disagree-button",
66
+ ".qc-cmp2-summary-buttons button[mode=\"secondary\"]",
67
+ ".osano-cm-deny-all",
68
+ ".cc-deny",
69
+ '[aria-label="Reject all"]',
70
+ '[aria-label="Decline all"]',
71
+ 'role=button@@name:Reject all',
72
+ 'role=button@@name:Decline all',
73
+ 'role=button@@name:Necessary only',
74
+ 'role=button@@name:Reject',
75
+ 'role=button@@name:Decline',
76
+ 'role=button@@name:Alle ablehnen', # de
77
+ 'role=button@@name:Tout refuser', # fr
78
+ ]
79
+
80
+ _CONSENT_SELECTORS = {"accept": _ACCEPT, "reject": _REJECT}
@@ -0,0 +1,44 @@
1
+
2
+ class HelixwrightError(Exception):
3
+ """Base for all Helixwright errors. Public API: callers can `except HelixwrightError`
4
+ to catch everything; the subtypes below let internal code (and callers) discriminate
5
+ failure CLASSES without inspecting the error message string. [B3]"""
6
+ pass
7
+
8
+
9
+ class TransportError(HelixwrightError):
10
+ """The local RPC transport failed (socket/HTTP/JSON, or the host process died). This is a
11
+ SESSION-level failure -- distinct from a per-element miss -- and must NOT be swallowed by the
12
+ best-effort `except` sites that mean 'element not present in this frame' (a browser crash
13
+ mid frame-walk used to surface as a confusing 'element not found')."""
14
+ pass
15
+
16
+
17
+ class ElementNotFoundError(HelixwrightError):
18
+ """A query resolved to no element (the host's 'not found' reply). Frame-walk / non-strict
19
+ lookups suppress exactly this class; everything else (incl. TransportError) propagates."""
20
+ pass
21
+
22
+
23
+ class WaitTimeoutError(HelixwrightError):
24
+ """A wait/poll exceeded its deadline. (Named WaitTimeoutError, not TimeoutError, to avoid
25
+ shadowing the builtin OSError-derived TimeoutError.)"""
26
+ pass
27
+
28
+
29
+ class ReachabilityError(HelixwrightError):
30
+ """[3.0] A pre-launch connectivity check FAILED -- the proxy is dead/unreachable or there is no
31
+ internet -- so the browser is NOT opened (fail BEFORE spending a launch on a dead exit). This is
32
+ CONNECTIVITY only, not IP-reputation vetting (the framework does not vet proxy reputation)."""
33
+ pass
34
+
35
+
36
+ class LoginRejected(HelixwrightError):
37
+ """[Tier-1A] A submitted form/login was REJECTED by the site -- raised by fill_form/login with
38
+ confirm=True when an unambiguous blocking inline error (aria-invalid / [role=alert] / a known
39
+ error class, with non-empty text) renders after submit. `.message` carries the site's VERBATIM
40
+ text so the failure surfaces at the cause ('Invalid password') instead of three steps later."""
41
+
42
+ def __init__(self, message):
43
+ self.message = message
44
+ super().__init__("login/form rejected: %s" % (message,))
@@ -0,0 +1,202 @@
1
+ """Failure forensics (plan G2) — the anti-detection debugging moat.
2
+
3
+ Competitors' debuggers (Playwright trace, DrissionPage, SeleniumBase) capture the DOM. They
4
+ cannot answer the question that matters for a fingerprint browser: whether the page saw the
5
+ fingerprint snapshot Helix Local API launched. capture_failure() writes that bundle locally.
6
+
7
+ Defensive by construction: capturing a failure must never raise (that would mask the original
8
+ error), so every step is individually guarded."""
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import os
13
+ import time
14
+
15
+
16
+ # Compact live-fingerprint probe: the values the page really saw.
17
+ _FP_SNAPSHOT_JS = r"""(function(){
18
+ function gl(){try{var c=document.createElement('canvas');
19
+ var x=c.getContext('webgl')||c.getContext('experimental-webgl');if(!x)return {};
20
+ var e=x.getExtension('WEBGL_debug_renderer_info');if(!e)return {};
21
+ return {renderer:x.getParameter(e.UNMASKED_RENDERER_WEBGL),
22
+ vendor:x.getParameter(e.UNMASKED_VENDOR_WEBGL)};}catch(_){return {};}}
23
+ var o={platform:navigator.platform,vendor:navigator.vendor,
24
+ hardwareConcurrency:navigator.hardwareConcurrency,deviceMemory:navigator.deviceMemory,
25
+ maxTouchPoints:navigator.maxTouchPoints,language:navigator.language,
26
+ languages:navigator.languages,webdriver:navigator.webdriver,userAgent:navigator.userAgent,
27
+ screen:[screen.width,screen.height,screen.availWidth,screen.availHeight,screen.colorDepth],
28
+ devicePixelRatio:window.devicePixelRatio,
29
+ outer:[window.outerWidth,window.outerHeight,window.screenX,window.screenY]};
30
+ try{o.timezone=Intl.DateTimeFormat().resolvedOptions().timeZone;}catch(_){}
31
+ try{var g=gl();if(g.renderer)o.webglRenderer=g.renderer;if(g.vendor)o.webglVendor=g.vendor;}catch(_){}
32
+ return o;})()"""
33
+
34
+
35
+ def _safe(fn, default=None):
36
+ try:
37
+ return fn()
38
+ except Exception:
39
+ return default
40
+
41
+
42
+ def _slug(s):
43
+ return "".join(c if (c.isalnum() or c in "-_") else "_" for c in str(s))[:48] or "x"
44
+
45
+
46
+ def _live_fp_js():
47
+ """Return the local fingerprint snapshot probe used in failure bundles."""
48
+ return _FP_SNAPSHOT_JS
49
+
50
+
51
+ def _eq(a, b):
52
+ if a is None or b is None:
53
+ return a == b
54
+ if str(a).strip() == str(b).strip():
55
+ return True
56
+ try:
57
+ return abs(float(a) - float(b)) < 1e-6
58
+ except (ValueError, TypeError):
59
+ return False
60
+
61
+
62
+ # fingerprint snapshot key -> live JS key.
63
+ _FINGERPRINT_TO_LIVE = [
64
+ ("platform", "platform"), ("hwcc", "hardwareConcurrency"), ("devmem", "deviceMemory"),
65
+ ("webglRenderer", "webglRenderer"), ("webglVendor", "webglVendor"),
66
+ ("tz", "timezone"), ("sw", "screenWidth"), ("sh", "screenHeight"),
67
+ ("dpr", "devicePixelRatio"), ("ua", "userAgent"), ("userAgent", "userAgent"),
68
+ ("vendor", "vendor"), ("maxTouch", "maxTouchPoints"),
69
+ ]
70
+
71
+
72
+ def _delta(live, fingerprint_snapshot):
73
+ """Return {key: {expected, actual, live_key}} for fingerprint snapshot mismatches."""
74
+ if not isinstance(live, dict) or not isinstance(fingerprint_snapshot, dict):
75
+ return {}
76
+ out = {}
77
+ for pk, lk in _FINGERPRINT_TO_LIVE:
78
+ if pk not in fingerprint_snapshot:
79
+ continue
80
+ exp, act = fingerprint_snapshot.get(pk), live.get(lk)
81
+ if not _eq(exp, act):
82
+ out[pk] = {"expected": exp, "actual": act, "live_key": lk}
83
+ if "langs" in fingerprint_snapshot:
84
+ exp = fingerprint_snapshot.get("langs")
85
+ expl = [s.strip() for s in (exp.split(",") if isinstance(exp, str) else list(exp or []))]
86
+ act = live.get("languages")
87
+ actl = [str(s).strip() for s in (act if isinstance(act, list) else [])]
88
+ if expl != actl:
89
+ out["langs"] = {"expected": expl, "actual": actl, "live_key": "languages"}
90
+ return out
91
+
92
+
93
+ # [W6.1] WAF block/challenge fingerprints for a LOCAL scan over the already-captured DOM (NO
94
+ # network). A fingerprint that passes every readback check can still be IP/ASN-blocked; these
95
+ # markers say WHICH WAF challenged -- turning a "delta says ok but still blocked" bundle actionable.
96
+ _BLOCK_MARKERS = {
97
+ "cloudflare": ("just a moment", "checking your browser", "cf-browser-verification",
98
+ "challenge-platform", "__cf_chl", "cf-turnstile", "attention required",
99
+ "cf-error-details"),
100
+ "datadome": ("datadome", "captcha-delivery.com", "geo.captcha-delivery"),
101
+ "akamai": ("errors.edgesuite.net", "reference #", "access denied"),
102
+ "perimeterx_human": ("px-captcha", "perimeterx", "px-cdn", "press & hold", "press and hold"),
103
+ "imperva_incapsula": ("incapsula", "incident id", "_incap_"),
104
+ "generic": ("unusual traffic", "verify you are human", "are you a robot",
105
+ "pardon our interruption", "request blocked", "bot detection"),
106
+ }
107
+
108
+
109
+ def _scan_block_signals(html):
110
+ """[W6.1] Pure LOCAL scan of the already-captured DOM for WAF block/challenge markers (no
111
+ network, no navigation). Returns sorted "waf:marker" hits (empty if the page looks unblocked)."""
112
+ if not isinstance(html, str) or not html:
113
+ return []
114
+ low = html.lower()
115
+ hits = set()
116
+ for waf, markers in _BLOCK_MARKERS.items():
117
+ for m in markers:
118
+ if m in low:
119
+ hits.add("%s:%s" % (waf, m))
120
+ return sorted(hits)
121
+
122
+
123
+ def capture_failure(page, label="failure", error=None):
124
+ """Write a forensic bundle for |page| into settings.capture_dir and return the bundle dir
125
+ (or None if capture is disabled / the dir can't be created). Never raises."""
126
+ settings = getattr(page, "settings", None)
127
+ base = getattr(settings, "capture_dir", None)
128
+ if not base:
129
+ return None
130
+ stamp = time.strftime("%Y%m%d_%H%M%S")
131
+ bundle = os.path.join(base, "%s_%s" % (stamp, _slug(label)))
132
+ if not _safe(lambda: (os.makedirs(bundle, exist_ok=True) or True), False):
133
+ return None
134
+
135
+ # 1) screenshot (PNG) — the visual state at failure.
136
+ _safe(lambda: page.screenshot(os.path.join(bundle, "screenshot.png")))
137
+
138
+ # 2) live fingerprint snapshot — what the page actually saw.
139
+ fp = _safe(lambda: page.evaluate(_live_fp_js()), {})
140
+ _safe(lambda: _write_json(os.path.join(bundle, "fingerprint.json"), fp))
141
+
142
+ # 2b) configured-vs-live delta.
143
+ fingerprint_snapshot = getattr(settings, "fingerprint_snapshot", None) or {}
144
+ delta = _safe(lambda: _delta(fp, fingerprint_snapshot), {})
145
+ _safe(lambda: _write_json(os.path.join(bundle, "delta.json"),
146
+ {"mismatches": delta, "ok": not delta}))
147
+
148
+ # 3) action trail (G10) — the behavior leading up to the failure.
149
+ trail = _safe(lambda: list(getattr(page, "_trail", [])), [])
150
+ _safe(lambda: _write_json(os.path.join(bundle, "trail.json"), trail))
151
+
152
+ # 4) console messages (if console_listen() was active).
153
+ console = _safe(lambda: page.console_messages(), None)
154
+ if console:
155
+ _safe(lambda: _write_json(os.path.join(bundle, "console.json"), console))
156
+
157
+ # 5) DOM snapshot (for completeness; we already beat competitors on 1-4).
158
+ html = _safe(lambda: page.content(), None)
159
+ if isinstance(html, str):
160
+ _safe(lambda: _write_text(os.path.join(bundle, "page.html"), html))
161
+
162
+ # 5b) Local WAF-marker DOM scan (Cloudflare/DataDome/Akamai/PerimeterX cookie/script/global
163
+ # markers in the ALREADY-captured HTML). Pure local scan — NO network/navigation (that would
164
+ # destroy the captured failure state). Forensic signal only, NOT IP-reputation vetting:
165
+ # IP/ASN reputation is the user's Plane B, not the framework's (no proxy pool / no preflight).
166
+ block_signals = _safe(lambda: _scan_block_signals(html if isinstance(html, str) else ""), [])
167
+ if block_signals:
168
+ _safe(lambda: _write_json(os.path.join(bundle, "block_signals.json"), {
169
+ "block_signals": block_signals,
170
+ "note": "local DOM WAF-marker scan of the captured page (no network)"}))
171
+
172
+ # 6) meta — url, error, fingerprint summary, think-time stats. fingerprint_summary carries the
173
+ # full set the delta compares (so delta.json has complete expected values), not 7 keys.
174
+ _SUMMARY_KEYS = ("platform", "lang", "langs", "tz", "webglRenderer", "webglVendor",
175
+ "hwcc", "devmem", "sw", "sh", "dpr", "ua", "userAgent", "vendor",
176
+ "maxTouch", "webgpuArch", "voices", "color_scheme")
177
+ meta = {
178
+ "label": label,
179
+ "error": (str(error) if error is not None else None),
180
+ "url": _safe(lambda: page.url, ""),
181
+ "frame_depth": getattr(page, "_frame_depth", 0),
182
+ "humanize": getattr(settings, "humanize", None),
183
+ "behavior_seed": getattr(settings, "behavior_seed", 0),
184
+ "fingerprint_summary": {
185
+ k: fingerprint_snapshot.get(k) for k in _SUMMARY_KEYS if k in fingerprint_snapshot
186
+ },
187
+ "delta_ok": (not delta),
188
+ "block_signals": block_signals,
189
+ "think_time_total_s": round(getattr(getattr(page, "_hz", None), "total_slept", 0.0), 2),
190
+ }
191
+ _safe(lambda: _write_json(os.path.join(bundle, "meta.json"), meta))
192
+ return bundle
193
+
194
+
195
+ def _write_json(path, obj):
196
+ with open(path, "w", encoding="utf-8") as f:
197
+ json.dump(obj, f, ensure_ascii=False, indent=2)
198
+
199
+
200
+ def _write_text(path, text):
201
+ with open(path, "w", encoding="utf-8") as f:
202
+ f.write(text)