@mochi.js/core 0.1.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -5
- package/src/__tests__/geo-consistency.test.ts +277 -0
- package/src/__tests__/geo-probe.test.ts +415 -0
- package/src/__tests__/inject.test.ts +4 -0
- package/src/__tests__/integration.e2e.test.ts +24 -0
- package/src/__tests__/piercing.test.ts +164 -0
- package/src/__tests__/proc.test.ts +383 -0
- package/src/__tests__/selector.test.ts +188 -0
- package/src/__tests__/window-size.e2e.test.ts +130 -0
- package/src/cdp/types.ts +47 -0
- package/src/geo-consistency.ts +343 -0
- package/src/geo-probe.ts +603 -0
- package/src/index.ts +11 -0
- package/src/launch.ts +145 -9
- package/src/page/element-handle.ts +110 -0
- package/src/page/piercing.ts +135 -0
- package/src/page/selector.ts +423 -0
- package/src/page.ts +152 -1
- package/src/proc.ts +386 -41
- package/src/session.ts +358 -12
package/src/launch.ts
CHANGED
|
@@ -12,6 +12,8 @@
|
|
|
12
12
|
|
|
13
13
|
import { deriveMatrix, type ProfileV1 } from "@mochi.js/consistency";
|
|
14
14
|
import { resolveBinary } from "./binary";
|
|
15
|
+
import { type GeoConsistencyMode, reconcileGeoConsistency } from "./geo-consistency";
|
|
16
|
+
import { probeExitGeo } from "./geo-probe";
|
|
15
17
|
import { spawnChromium } from "./proc";
|
|
16
18
|
import { parseProxyUrl } from "./proxy-auth";
|
|
17
19
|
import { Session } from "./session";
|
|
@@ -91,6 +93,16 @@ export interface LaunchOptions {
|
|
|
91
93
|
args?: string[];
|
|
92
94
|
out?: { traceDir?: string };
|
|
93
95
|
timeout?: number;
|
|
96
|
+
/**
|
|
97
|
+
* Opt out of mochi's "auto-add `--no-sandbox` when running as root on
|
|
98
|
+
* Linux" fallback. Default `false` (the fallback is on). When `true`,
|
|
99
|
+
* mochi will NOT inject `--no-sandbox` even under root + Linux — useful
|
|
100
|
+
* if you've configured a SUID `chrome-sandbox` helper and want to keep
|
|
101
|
+
* the user-namespace sandbox active. The launch will crash with EPIPE
|
|
102
|
+
* if the SUID setup is wrong, but you keep stealth posture intact
|
|
103
|
+
* (`--no-sandbox` is a fingerprint leak per PLAN.md §8.6).
|
|
104
|
+
*/
|
|
105
|
+
allowRootWithSandbox?: boolean;
|
|
94
106
|
/**
|
|
95
107
|
* When `true`, the {@link Session} skips both `buildPayload` (no payload
|
|
96
108
|
* is compiled) and `Page.addScriptToEvaluateOnNewDocument` on every new
|
|
@@ -107,6 +119,27 @@ export interface LaunchOptions {
|
|
|
107
119
|
* Chromium); task 0040.
|
|
108
120
|
*/
|
|
109
121
|
bypassInject?: boolean;
|
|
122
|
+
/**
|
|
123
|
+
* When `true`, re-applies the harness/CI-only Chromium flags
|
|
124
|
+
* (`--disable-component-update`, `--disable-default-apps`,
|
|
125
|
+
* `--disable-background-networking`, `--disable-sync`, plus a noise-
|
|
126
|
+
* reduction `--disable-features=` block) on top of the production
|
|
127
|
+
* default flag set. Used by `@mochi.js/harness`, CI runs, and
|
|
128
|
+
* `mochi capture` flows where update traffic, default-apps auto-install,
|
|
129
|
+
* sync, and feed prefetches would inject non-determinism into baseline
|
|
130
|
+
* collection or stealth conformance.
|
|
131
|
+
*
|
|
132
|
+
* Defaults to `false` — production users get a cleaner flag set without
|
|
133
|
+
* the passive command-line bot-tells that patchright explicitly removes
|
|
134
|
+
* from its Playwright fork (`chromiumSwitchesPatch.ts:20-34`) and that
|
|
135
|
+
* `puppeteer-real-browser` strips for the same reason
|
|
136
|
+
* (`lib/cjs/index.js:57-58`).
|
|
137
|
+
*
|
|
138
|
+
* Pairs with — but is independent of — {@link bypassInject}. Capture
|
|
139
|
+
* flows set both `true`; harness conformance runs set `hermetic: true`
|
|
140
|
+
* with full inject pipeline active. PLAN.md §8.6 + task 0256.
|
|
141
|
+
*/
|
|
142
|
+
hermetic?: boolean;
|
|
110
143
|
/**
|
|
111
144
|
* Convenience layer toggles for common bot-defense widgets. When
|
|
112
145
|
* `challenges.turnstile.autoClick` is `true`, every page returned by
|
|
@@ -120,6 +153,35 @@ export interface LaunchOptions {
|
|
|
120
153
|
* solving is v0.3+).
|
|
121
154
|
*/
|
|
122
155
|
challenges?: ChallengeLaunchOptions;
|
|
156
|
+
/**
|
|
157
|
+
* Reconcile `(matrix.timezone, matrix.locale)` against the proxy's
|
|
158
|
+
* exit-IP geolocation. Closes the cross-layer leak where a US profile
|
|
159
|
+
* over an EU proxy would have `Date.getTimezoneOffset()` reporting PT
|
|
160
|
+
* while the IP geolocates to UTC+1 — the canonical bot signature.
|
|
161
|
+
*
|
|
162
|
+
* - `"privacy-fallback"` *(default)* — on mismatch (or probe failure),
|
|
163
|
+
* override the matrix to UTC + `en-US`. The session fingerprints as
|
|
164
|
+
* a privacy-conscious user (Tor / Brave / hardened-FF style), which
|
|
165
|
+
* is benign in most threat models.
|
|
166
|
+
* - `"auto-correct"` — on mismatch, override the matrix's timezone
|
|
167
|
+
* with the IP's timezone and the locale with a primary-locale
|
|
168
|
+
* guess for the IP's country. Most "stealth" but trusts mochi's
|
|
169
|
+
* IP-derived defaults over the user's declared profile.
|
|
170
|
+
* - `"strict"` — throw `GeoMismatchError` on mismatch. The user must
|
|
171
|
+
* change profile or change proxy. Probe failure (null) does NOT
|
|
172
|
+
* throw under strict — that's a network blip, not a mismatch.
|
|
173
|
+
* - `"off"` — skip the probe entirely. Use in offline tests / when
|
|
174
|
+
* the probe service is rate-limited.
|
|
175
|
+
*
|
|
176
|
+
* The probe is a single GET through wreq (using the matrix's
|
|
177
|
+
* `wreqPreset`, so the geo service sees the same JA4/headers as user
|
|
178
|
+
* traffic). 4-attempt cap, 2s per endpoint. Probe results are NOT
|
|
179
|
+
* cached across sessions — proxy IPs rotate.
|
|
180
|
+
*
|
|
181
|
+
* @see PLAN.md §9 (relational consistency, IP/TZ/Locale axis)
|
|
182
|
+
* @see tasks/0262-ip-tz-locale-exit-consistency.md
|
|
183
|
+
*/
|
|
184
|
+
geoConsistency?: GeoConsistencyMode;
|
|
123
185
|
}
|
|
124
186
|
|
|
125
187
|
/**
|
|
@@ -129,26 +191,100 @@ export interface LaunchOptions {
|
|
|
129
191
|
export async function launch(opts: LaunchOptions): Promise<Session> {
|
|
130
192
|
const binary = await resolveBinary(opts.binary);
|
|
131
193
|
const normalized = normalizeProxy(opts.proxy);
|
|
194
|
+
|
|
195
|
+
// Resolve the `MatrixV1` BEFORE spawning so matrix-derived values flow
|
|
196
|
+
// into both the `--lang` flag (task 0251) and `--window-size` flag
|
|
197
|
+
// (task 0252). The matrix is otherwise read post-spawn for inject;
|
|
198
|
+
// deriving early is cheap (~µs, pure function) and lets us close the
|
|
199
|
+
// I-5 leaks between Chromium's native network/OS-window state and the
|
|
200
|
+
// JS-layer spoof.
|
|
201
|
+
//
|
|
202
|
+
// Inline `ProfileV1` objects flow straight through; string profile ids
|
|
203
|
+
// are resolved against a placeholder profile until `@mochi.js/profiles`
|
|
204
|
+
// ships its first capture (phase 0.4). The matrix is bit-stable per
|
|
205
|
+
// `(profile, seed)` excluding the `derivedAt` timestamp.
|
|
206
|
+
const profile = resolveProfile(opts.profile);
|
|
207
|
+
const matrix = deriveMatrix(profile, opts.seed);
|
|
208
|
+
|
|
209
|
+
// Task 0262 — exit-IP / TZ / locale reconciliation.
|
|
210
|
+
//
|
|
211
|
+
// Probe the apparent exit IP through the configured proxy (using wreq
|
|
212
|
+
// with the matrix's `wreqPreset` so the geo service sees the same JA4
|
|
213
|
+
// / headers as user traffic). Then cross-reference against
|
|
214
|
+
// `(matrix.timezone, matrix.locale)` and apply `geoConsistency`. The
|
|
215
|
+
// adjusted matrix flows into BOTH `spawnChromium` (so `--lang` reflects
|
|
216
|
+
// any override) AND `Session` (so inject + the CDP `Emulation.set
|
|
217
|
+
// TimezoneOverride` send pick it up). PLAN.md §9.
|
|
218
|
+
//
|
|
219
|
+
// `"off"` short-circuits the probe — the probe call itself respects
|
|
220
|
+
// the mode so we don't pay the network round-trip in offline tests.
|
|
221
|
+
const geoMode: GeoConsistencyMode = opts.geoConsistency ?? "privacy-fallback";
|
|
222
|
+
let adjustedMatrix = matrix;
|
|
223
|
+
if (geoMode !== "off") {
|
|
224
|
+
const geo = await probeExitGeo({
|
|
225
|
+
...(normalized?.netProxy !== undefined ? { proxy: normalized.netProxy } : {}),
|
|
226
|
+
matrix,
|
|
227
|
+
});
|
|
228
|
+
// Strict mode throws GeoMismatchError on real mismatch; let it
|
|
229
|
+
// propagate up so callers can recover (the orchestrator surfaced
|
|
230
|
+
// it as the canonical failure mode for "wrong proxy for profile").
|
|
231
|
+
const result = reconcileGeoConsistency(matrix, geo, geoMode);
|
|
232
|
+
adjustedMatrix = result.matrix;
|
|
233
|
+
if (result.action === "privacy-fallback" || result.action === "auto-correct") {
|
|
234
|
+
console.warn(
|
|
235
|
+
`[mochi] geoConsistency=${geoMode}: ${result.action} applied — ${result.reason ?? "(no reason)"}`,
|
|
236
|
+
);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
132
240
|
const proc = await spawnChromium({
|
|
133
241
|
binary,
|
|
134
242
|
extraArgs: opts.args,
|
|
135
243
|
headless: opts.headless ?? false,
|
|
244
|
+
// Opt-out for the auto-no-sandbox-as-root fallback (default: fallback
|
|
245
|
+
// is on so first-run on a Linux server box doesn't crash).
|
|
246
|
+
...(opts.allowRootWithSandbox === true ? { allowRootWithSandbox: true } : {}),
|
|
136
247
|
// Chromium rejects inline auth on `--proxy-server`; pass the
|
|
137
248
|
// auth-stripped server URL.
|
|
138
249
|
...(normalized !== undefined ? { proxy: normalized.server } : {}),
|
|
250
|
+
// Primary BCP-47 locale → `--lang=<value>`. Locks the network-layer
|
|
251
|
+
// `Accept-Language` header to the JS spoof (PLAN.md I-5). The full
|
|
252
|
+
// multi-locale list still flows through `matrix.languages` to the
|
|
253
|
+
// inject layer's `navigator.languages` spoof; Chromium derives the
|
|
254
|
+
// q-weighted `Accept-Language` value from the single `--lang` primary
|
|
255
|
+
// automatically. Task 0251.
|
|
256
|
+
locale: adjustedMatrix.locale,
|
|
257
|
+
// Pin OS-level outer window from the matrix's display geometry so
|
|
258
|
+
// `window.outerWidth/outerHeight` (which reads from the OS window,
|
|
259
|
+
// NOT the JS-spoofed `screen.*`) matches the spoof. Closes the
|
|
260
|
+
// `fingerprint-scan.com` 800×600 leak under `--headless=new`.
|
|
261
|
+
// UDC fixes the same issue at `__init__.py:410-411`. Task 0252.
|
|
262
|
+
//
|
|
263
|
+
// (`adjustedMatrix.display` === `matrix.display` since geo reconcile
|
|
264
|
+
// only touches timezone/locale/languages — but we use the adjusted
|
|
265
|
+
// ref for forward-compat.)
|
|
266
|
+
...(Number.isInteger(adjustedMatrix.display.width) &&
|
|
267
|
+
Number.isInteger(adjustedMatrix.display.height) &&
|
|
268
|
+
adjustedMatrix.display.width > 0 &&
|
|
269
|
+
adjustedMatrix.display.height > 0
|
|
270
|
+
? {
|
|
271
|
+
windowSize: {
|
|
272
|
+
width: adjustedMatrix.display.width,
|
|
273
|
+
height: adjustedMatrix.display.height,
|
|
274
|
+
},
|
|
275
|
+
}
|
|
276
|
+
: {}),
|
|
277
|
+
// Hermetic harness/CI escape hatch — re-applies the patchright-trim
|
|
278
|
+
// flags (`--disable-component-update`, `--disable-default-apps`,
|
|
279
|
+
// `--disable-background-networking`, `--disable-sync`, hermetic
|
|
280
|
+
// `--disable-features=` extras). Default `false` keeps production users
|
|
281
|
+
// off the passive command-line bot-tell list. Task 0256, PLAN.md §8.6.
|
|
282
|
+
...(opts.hermetic === true ? { hermetic: true } : {}),
|
|
139
283
|
});
|
|
140
284
|
|
|
141
|
-
// Resolve the `MatrixV1` for this session via the consistency engine.
|
|
142
|
-
// Inline `ProfileV1` objects flow straight through; string profile ids
|
|
143
|
-
// are resolved against a placeholder profile until `@mochi.js/profiles`
|
|
144
|
-
// ships its first capture (phase 0.4). The matrix is bit-stable per
|
|
145
|
-
// `(profile, seed)` excluding the `derivedAt` timestamp.
|
|
146
|
-
const profile = resolveProfile(opts.profile);
|
|
147
|
-
const matrix = deriveMatrix(profile, opts.seed);
|
|
148
|
-
|
|
149
285
|
const session = new Session({
|
|
150
286
|
proc,
|
|
151
|
-
matrix,
|
|
287
|
+
matrix: adjustedMatrix,
|
|
152
288
|
seed: opts.seed,
|
|
153
289
|
...(opts.timeout !== undefined ? { defaultTimeoutMs: opts.timeout } : {}),
|
|
154
290
|
...(opts.bypassInject === true ? { bypassInject: true } : {}),
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `ElementHandle` — lightweight wrapper around a CDP `RemoteObject` that lets
|
|
3
|
+
* callers operate on an element resolved via the closed-shadow piercing
|
|
4
|
+
* locator (`Page.querySelectorPiercing`).
|
|
5
|
+
*
|
|
6
|
+
* The handle is intentionally minimal — Phase 0.2 only needs enough surface
|
|
7
|
+
* for the Turnstile auto-clicker to ask "is this an iframe whose src matches
|
|
8
|
+
* cf-turnstile?" and then position a click. Wider parity with Playwright's
|
|
9
|
+
* `ElementHandle` (waitFor, fill, hover, screenshot…) is deferred — those
|
|
10
|
+
* compose on top of the same primitives once they're needed.
|
|
11
|
+
*
|
|
12
|
+
* Lifecycle: the underlying `objectId` is bound to a CDP `Runtime` execution
|
|
13
|
+
* context. Closing the page invalidates every handle the page produced; we
|
|
14
|
+
* don't try to release them via `Runtime.releaseObject` because there's no
|
|
15
|
+
* `Runtime.enable` in this session (PLAN.md §8.2). Stale handles surface as
|
|
16
|
+
* `Cannot find context with specified id` errors from the next CDP call,
|
|
17
|
+
* which is fine for a v0.2 surface.
|
|
18
|
+
*
|
|
19
|
+
* @see PLAN.md §8.2 / §8.3
|
|
20
|
+
* @see tasks/0253-closed-shadow-piercing-locator.md
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import type { MessageRouter } from "../cdp/router";
|
|
24
|
+
import type { CdpSessionId, RemoteObject } from "../cdp/types";
|
|
25
|
+
|
|
26
|
+
export interface ElementHandleInit {
|
|
27
|
+
router: MessageRouter;
|
|
28
|
+
sessionId: CdpSessionId;
|
|
29
|
+
objectId: string;
|
|
30
|
+
/** CDP `backendNodeId` — stable across DOM mutations. */
|
|
31
|
+
backendNodeId: number;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* A handle to a single DOM element exposed to host-side automation. Issued
|
|
36
|
+
* by `Page.querySelectorPiercing` / `Page.querySelectorAllPiercing`.
|
|
37
|
+
*/
|
|
38
|
+
export class ElementHandle {
|
|
39
|
+
private readonly router: MessageRouter;
|
|
40
|
+
private readonly sessionId: CdpSessionId;
|
|
41
|
+
private readonly objectId: string;
|
|
42
|
+
private readonly _backendNodeId: number;
|
|
43
|
+
|
|
44
|
+
constructor(init: ElementHandleInit) {
|
|
45
|
+
this.router = init.router;
|
|
46
|
+
this.sessionId = init.sessionId;
|
|
47
|
+
this.objectId = init.objectId;
|
|
48
|
+
this._backendNodeId = init.backendNodeId;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/** The CDP `backendNodeId` for the element — stable across DOM mutations. */
|
|
52
|
+
get backendNodeId(): number {
|
|
53
|
+
return this._backendNodeId;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Read a single attribute via `Runtime.callFunctionOn`. Returns `null` when
|
|
58
|
+
* the attribute is absent (mirrors `Element.getAttribute`).
|
|
59
|
+
*/
|
|
60
|
+
async getAttribute(name: string): Promise<string | null> {
|
|
61
|
+
const r = await this.router.send<{ result: RemoteObject }>(
|
|
62
|
+
"Runtime.callFunctionOn",
|
|
63
|
+
{
|
|
64
|
+
objectId: this.objectId,
|
|
65
|
+
functionDeclaration:
|
|
66
|
+
"function(n) { var v = this.getAttribute(n); return v === null ? null : String(v); }",
|
|
67
|
+
arguments: [{ value: name }],
|
|
68
|
+
returnByValue: true,
|
|
69
|
+
},
|
|
70
|
+
{ sessionId: this.sessionId },
|
|
71
|
+
);
|
|
72
|
+
const v = r.result.value;
|
|
73
|
+
return v === null || v === undefined ? null : String(v);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Get the element's text content via `Runtime.callFunctionOn`.
|
|
78
|
+
*/
|
|
79
|
+
async textContent(): Promise<string | null> {
|
|
80
|
+
const r = await this.router.send<{ result: RemoteObject }>(
|
|
81
|
+
"Runtime.callFunctionOn",
|
|
82
|
+
{
|
|
83
|
+
objectId: this.objectId,
|
|
84
|
+
functionDeclaration: "function() { return this.textContent; }",
|
|
85
|
+
returnByValue: true,
|
|
86
|
+
},
|
|
87
|
+
{ sessionId: this.sessionId },
|
|
88
|
+
);
|
|
89
|
+
const v = r.result.value;
|
|
90
|
+
return v === null || v === undefined ? null : String(v);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Evaluate a function bound to this element (the handle is `this`). Result
|
|
95
|
+
* is JSON-serialised via `returnByValue: true`. Same contract as
|
|
96
|
+
* `Page.evaluate` — no closures, no arguments, no DOM-node returns.
|
|
97
|
+
*/
|
|
98
|
+
async evaluate<T>(fn: (this: Element) => T): Promise<T> {
|
|
99
|
+
const r = await this.router.send<{ result: RemoteObject }>(
|
|
100
|
+
"Runtime.callFunctionOn",
|
|
101
|
+
{
|
|
102
|
+
objectId: this.objectId,
|
|
103
|
+
functionDeclaration: fn.toString(),
|
|
104
|
+
returnByValue: true,
|
|
105
|
+
},
|
|
106
|
+
{ sessionId: this.sessionId },
|
|
107
|
+
);
|
|
108
|
+
return r.result.value as T;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Closed-shadow piercing locator.
|
|
3
|
+
*
|
|
4
|
+
* Walks a tree returned by `DOM.getDocument({ depth: -1, pierce: true })` and
|
|
5
|
+
* yields `backendNodeId`s for every element that matches a parsed CSS
|
|
6
|
+
* selector — including elements inside **closed** shadow roots, which
|
|
7
|
+
* `DOM.querySelector(..., pierce: true)` does NOT traverse from the parent
|
|
8
|
+
* document. Patchright solves the same problem in `_customFindElementsByParsed`
|
|
9
|
+
* (`framesPatch.ts:868-1012`); this is mochi's port — we kept the recursive-walk
|
|
10
|
+
* shape but simplified the selector subset (CSS only — no XPath; see task
|
|
11
|
+
* 0253 brief for the rationale).
|
|
12
|
+
*
|
|
13
|
+
* The walker recurses through:
|
|
14
|
+
* - `node.children[]` (regular DOM descendants)
|
|
15
|
+
* - `node.shadowRoots[]` (BOTH `shadowRootType:"open"` and `"closed"` — the
|
|
16
|
+
* pierce flag yields both; the matcher just doesn't care which kind it is)
|
|
17
|
+
* - `node.contentDocument` (iframes — same-origin only; OOPIF subframes
|
|
18
|
+
* surface as separate targets and are out of scope here)
|
|
19
|
+
* - `node.templateContent` (template fragment, rare but cheap to walk)
|
|
20
|
+
*
|
|
21
|
+
* It deliberately does NOT recurse into:
|
|
22
|
+
* - `pseudoElements` — `::before` / `::after` aren't real DOM nodes for
|
|
23
|
+
* selector matching purposes; CDP yields them but they'd produce
|
|
24
|
+
* spurious matches on `*` selectors.
|
|
25
|
+
*
|
|
26
|
+
* The walker keeps a *flat* ancestor chain across shadow boundaries so the
|
|
27
|
+
* descendant-combinator matcher can reason about "div .btn" correctly even
|
|
28
|
+
* when the `.btn` is inside a closed shadow rooted at `<div>`. This mirrors
|
|
29
|
+
* how DOM's regular ancestor walk behaves under `composedPath` semantics —
|
|
30
|
+
* patchright does the same.
|
|
31
|
+
*
|
|
32
|
+
* Performance: O(N) in DOM size per call. Acceptable for v0.2 (per task
|
|
33
|
+
* brief — a per-page cache layer is a v0.3+ concern).
|
|
34
|
+
*
|
|
35
|
+
* @see PLAN.md §8.2 — `DOM.getDocument` / `DOM.resolveNode` are not forbidden
|
|
36
|
+
* @see tasks/0253-closed-shadow-piercing-locator.md
|
|
37
|
+
*/
|
|
38
|
+
|
|
39
|
+
import type { PierceDomNode } from "../cdp/types";
|
|
40
|
+
import { matchSelector, type ParsedSelector } from "./selector";
|
|
41
|
+
|
|
42
|
+
export interface PierceMatch {
|
|
43
|
+
/** The CDP `backendNodeId` of the matched element — stable across DOM mutations. */
|
|
44
|
+
backendNodeId: number;
|
|
45
|
+
/** The CDP node id (per-DOMSession-instance; less stable than backend). */
|
|
46
|
+
nodeId: number;
|
|
47
|
+
/** The matched node itself (for diagnostics + tests). */
|
|
48
|
+
node: PierceDomNode;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Walk `root` and return every matching element. Ordering is depth-first,
|
|
53
|
+
* pre-order (parents before children) — matches the natural `querySelectorAll`
|
|
54
|
+
* traversal order users expect.
|
|
55
|
+
*
|
|
56
|
+
* If `limit` is set, the walk short-circuits as soon as that many matches
|
|
57
|
+
* accumulate. `Page.querySelectorPiercing` passes `1` for a single-element
|
|
58
|
+
* lookup; `querySelectorAllPiercing` leaves it undefined.
|
|
59
|
+
*/
|
|
60
|
+
export function findPiercingMatches(
|
|
61
|
+
root: PierceDomNode,
|
|
62
|
+
selector: ParsedSelector,
|
|
63
|
+
limit?: number,
|
|
64
|
+
): PierceMatch[] {
|
|
65
|
+
const out: PierceMatch[] = [];
|
|
66
|
+
walk(root, selector, [], out, limit);
|
|
67
|
+
return out;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function walk(
|
|
71
|
+
node: PierceDomNode,
|
|
72
|
+
selector: ParsedSelector,
|
|
73
|
+
ancestors: PierceDomNode[],
|
|
74
|
+
out: PierceMatch[],
|
|
75
|
+
limit: number | undefined,
|
|
76
|
+
): boolean {
|
|
77
|
+
if (limit !== undefined && out.length >= limit) return true;
|
|
78
|
+
|
|
79
|
+
// Match element nodes only — but document / fragment nodes still need to
|
|
80
|
+
// recurse into children.
|
|
81
|
+
if (node.nodeType === 1 && matchSelector(selector, node, ancestors)) {
|
|
82
|
+
out.push({ backendNodeId: node.backendNodeId, nodeId: node.nodeId, node });
|
|
83
|
+
if (limit !== undefined && out.length >= limit) return true;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Push self into ancestor stack ONLY if it's an element (text / shadow-root
|
|
87
|
+
// / document nodes aren't ancestors for `div .btn`-style descendant matches).
|
|
88
|
+
const isElement = node.nodeType === 1;
|
|
89
|
+
if (isElement) ancestors.push(node);
|
|
90
|
+
|
|
91
|
+
// Children (regular DOM descendants).
|
|
92
|
+
const children = node.children;
|
|
93
|
+
if (children !== undefined) {
|
|
94
|
+
for (const child of children) {
|
|
95
|
+
if (walk(child, selector, ancestors, out, limit)) {
|
|
96
|
+
if (isElement) ancestors.pop();
|
|
97
|
+
return true;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Shadow roots — both open AND closed. This is the whole point.
|
|
103
|
+
const shadowRoots = node.shadowRoots;
|
|
104
|
+
if (shadowRoots !== undefined) {
|
|
105
|
+
for (const root of shadowRoots) {
|
|
106
|
+
if (walk(root, selector, ancestors, out, limit)) {
|
|
107
|
+
if (isElement) ancestors.pop();
|
|
108
|
+
return true;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// iframe contentDocument (same-origin only — OOPIFs surface as separate
|
|
114
|
+
// CDP targets and aren't reachable here).
|
|
115
|
+
const contentDocument = node.contentDocument;
|
|
116
|
+
if (contentDocument !== undefined) {
|
|
117
|
+
if (walk(contentDocument, selector, ancestors, out, limit)) {
|
|
118
|
+
if (isElement) ancestors.pop();
|
|
119
|
+
return true;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// <template>.content — rare in real-world Cloudflare integrations but
|
|
124
|
+
// matches what patchright walks.
|
|
125
|
+
const templateContent = node.templateContent;
|
|
126
|
+
if (templateContent !== undefined) {
|
|
127
|
+
if (walk(templateContent, selector, ancestors, out, limit)) {
|
|
128
|
+
if (isElement) ancestors.pop();
|
|
129
|
+
return true;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
if (isElement) ancestors.pop();
|
|
134
|
+
return false;
|
|
135
|
+
}
|