@mochi.js/core 0.1.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +5 -5
- package/src/__tests__/geo-consistency.test.ts +277 -0
- package/src/__tests__/geo-probe.test.ts +415 -0
- package/src/__tests__/inject.test.ts +4 -0
- package/src/__tests__/integration.e2e.test.ts +24 -0
- package/src/__tests__/piercing.test.ts +164 -0
- package/src/__tests__/proc.test.ts +383 -0
- package/src/__tests__/selector.test.ts +188 -0
- package/src/__tests__/window-size.e2e.test.ts +130 -0
- package/src/cdp/types.ts +47 -0
- package/src/geo-consistency.ts +343 -0
- package/src/geo-probe.ts +603 -0
- package/src/index.ts +11 -0
- package/src/launch.ts +145 -9
- package/src/page/element-handle.ts +110 -0
- package/src/page/piercing.ts +135 -0
- package/src/page/selector.ts +423 -0
- package/src/page.ts +152 -1
- package/src/proc.ts +386 -41
- package/src/session.ts +358 -12
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tiny host-side CSS selector engine for the closed-shadow piercing locator
|
|
3
|
+
* (`Page.querySelectorPiercing`). Parses a CSS selector into a sequence of
|
|
4
|
+
* **compound** parts joined by descendant combinators, then matches a
|
|
5
|
+
* pre-walked `PierceDomNode` against that compound chain.
|
|
6
|
+
*
|
|
7
|
+
* Why we don't `DOM.querySelector` per shadow root: that CDP method does NOT
|
|
8
|
+
* pierce closed shadows even when its parent `DOM.getDocument` was called
|
|
9
|
+
* with `pierce: true`. Patchright's `_customFindElementsByParsed`
|
|
10
|
+
* (`framesPatch.ts:868-1012`) parses the selector itself and walks the tree
|
|
11
|
+
* manually for exactly this reason. We port the algorithm — *not* the surface
|
|
12
|
+
* area: only the CSS-selector subset listed in `tasks/0253` lands here.
|
|
13
|
+
*
|
|
14
|
+
* **Supported subset (CSS Selectors level 4 — strict subset):**
|
|
15
|
+
* - Tag selectors: `div`, `iframe`, `*`
|
|
16
|
+
* - ID: `#main`
|
|
17
|
+
* - Class: `.btn`, `.btn.primary`
|
|
18
|
+
* - Attribute: `[src]`, `[name="x"]`, `[href*="foo"]`, `[role^="b"]`,
|
|
19
|
+
* `[data-x$="y"]`, `[data-x~="z"]`, `[data-x|="en"]`. Quotes optional for
|
|
20
|
+
* value-less words.
|
|
21
|
+
* - Descendant combinator: `div .btn` (whitespace).
|
|
22
|
+
* - Comma-separated selector lists: `a, button` — match if ANY branch matches.
|
|
23
|
+
*
|
|
24
|
+
* **NOT supported (intentionally — see Out of scope in 0253):**
|
|
25
|
+
* - `>`, `+`, `~` combinators
|
|
26
|
+
* - `:pseudo-classes` (`:hover`, `:nth-child`, `:has`, `:not`)
|
|
27
|
+
* - `::pseudo-elements`
|
|
28
|
+
* - XPath (deferred — STRETCH per task brief; document as TODO if it lands).
|
|
29
|
+
* - Namespaces.
|
|
30
|
+
*
|
|
31
|
+
* Throws `SelectorParseError` on syntactically invalid input. The matcher
|
|
32
|
+
* itself never throws — unsupported nodes just don't match.
|
|
33
|
+
*
|
|
34
|
+
* @see tasks/0253-closed-shadow-piercing-locator.md
|
|
35
|
+
* @see PLAN.md §8.2 (forbidden CDP — neither `DOM.getDocument` nor
|
|
36
|
+
* `DOM.resolveNode` is forbidden; both fine).
|
|
37
|
+
*/
|
|
38
|
+
|
|
39
|
+
import type { PierceDomNode } from "../cdp/types";
|
|
40
|
+
|
|
41
|
+
/** Thrown when the selector has a syntax error we can't recover from. */
|
|
42
|
+
export class SelectorParseError extends Error {
|
|
43
|
+
readonly selector: string;
|
|
44
|
+
constructor(selector: string, message: string) {
|
|
45
|
+
super(`[mochi] invalid selector "${selector}": ${message}`);
|
|
46
|
+
this.name = "SelectorParseError";
|
|
47
|
+
this.selector = selector;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/** A single attribute filter inside a compound part. */
|
|
52
|
+
export interface AttrFilter {
|
|
53
|
+
name: string;
|
|
54
|
+
/**
|
|
55
|
+
* Matcher op:
|
|
56
|
+
* - `"exists"`: attribute is present (value ignored)
|
|
57
|
+
* - `"="`: exact value
|
|
58
|
+
* - `"~="`: whitespace-separated word match
|
|
59
|
+
* - `"|="`: exact OR `value-…` prefix
|
|
60
|
+
* - `"^="`: prefix match
|
|
61
|
+
* - `"$="`: suffix match
|
|
62
|
+
* - `"*="`: substring match
|
|
63
|
+
*/
|
|
64
|
+
op: "exists" | "=" | "~=" | "|=" | "^=" | "$=" | "*=";
|
|
65
|
+
/** Match value (always present except for `op === "exists"`). */
|
|
66
|
+
value?: string;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/** A compound (no whitespace) selector — one element's worth of constraints. */
|
|
70
|
+
export interface CompoundPart {
|
|
71
|
+
/** Lower-case tag, or `"*"` for the universal selector. */
|
|
72
|
+
tag: string;
|
|
73
|
+
id?: string;
|
|
74
|
+
classes: string[];
|
|
75
|
+
attrs: AttrFilter[];
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* A single descendant chain (one comma-separated branch). Matching iterates
|
|
80
|
+
* the chain right-to-left: the rightmost part must match the candidate; each
|
|
81
|
+
* earlier part must have a matching ancestor (DOM-ancestor-aware, including
|
|
82
|
+
* across shadow boundaries — see `matchSelector` for the walk).
|
|
83
|
+
*/
|
|
84
|
+
export interface CompoundChain {
|
|
85
|
+
parts: CompoundPart[];
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/** A parsed selector list — `,`-separated chains. */
|
|
89
|
+
export interface ParsedSelector {
|
|
90
|
+
chains: CompoundChain[];
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// ---- parser ----------------------------------------------------------------
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Parse a CSS selector string into a {@link ParsedSelector}. Throws
|
|
97
|
+
* {@link SelectorParseError} on bad input.
|
|
98
|
+
*
|
|
99
|
+
* The grammar we accept is a strict subset documented at the top of this
|
|
100
|
+
* module. We deliberately do not use a regex-driven parser — those struggle
|
|
101
|
+
* with quoted attribute values that contain `[`, `,`, or whitespace.
|
|
102
|
+
*/
|
|
103
|
+
export function parseSelector(input: string): ParsedSelector {
|
|
104
|
+
if (typeof input !== "string") {
|
|
105
|
+
throw new SelectorParseError(String(input), "selector must be a string");
|
|
106
|
+
}
|
|
107
|
+
const trimmed = input.trim();
|
|
108
|
+
if (trimmed.length === 0) {
|
|
109
|
+
throw new SelectorParseError(input, "selector must not be empty");
|
|
110
|
+
}
|
|
111
|
+
const branches = splitTopLevel(trimmed, ",");
|
|
112
|
+
const chains: CompoundChain[] = [];
|
|
113
|
+
for (const branch of branches) {
|
|
114
|
+
const parts = splitTopLevel(branch.trim(), " ").filter((p) => p.length > 0);
|
|
115
|
+
if (parts.length === 0) {
|
|
116
|
+
throw new SelectorParseError(input, "empty selector branch");
|
|
117
|
+
}
|
|
118
|
+
chains.push({ parts: parts.map((p) => parseCompound(p, input)) });
|
|
119
|
+
}
|
|
120
|
+
return { chains };
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Split a selector string at top-level occurrences of `sep` — i.e. ignoring
|
|
125
|
+
* separators inside `[...]` brackets or quoted attribute values.
|
|
126
|
+
*/
|
|
127
|
+
function splitTopLevel(input: string, sep: string): string[] {
|
|
128
|
+
const out: string[] = [];
|
|
129
|
+
let buf = "";
|
|
130
|
+
let depth = 0;
|
|
131
|
+
let quote: '"' | "'" | null = null;
|
|
132
|
+
for (let i = 0; i < input.length; i++) {
|
|
133
|
+
const ch = input[i] as string;
|
|
134
|
+
if (quote !== null) {
|
|
135
|
+
buf += ch;
|
|
136
|
+
if (ch === "\\" && i + 1 < input.length) {
|
|
137
|
+
const next = input[i + 1] as string;
|
|
138
|
+
buf += next;
|
|
139
|
+
i++;
|
|
140
|
+
continue;
|
|
141
|
+
}
|
|
142
|
+
if (ch === quote) quote = null;
|
|
143
|
+
continue;
|
|
144
|
+
}
|
|
145
|
+
if (ch === '"' || ch === "'") {
|
|
146
|
+
quote = ch;
|
|
147
|
+
buf += ch;
|
|
148
|
+
continue;
|
|
149
|
+
}
|
|
150
|
+
if (ch === "[") {
|
|
151
|
+
depth++;
|
|
152
|
+
buf += ch;
|
|
153
|
+
continue;
|
|
154
|
+
}
|
|
155
|
+
if (ch === "]") {
|
|
156
|
+
depth = Math.max(0, depth - 1);
|
|
157
|
+
buf += ch;
|
|
158
|
+
continue;
|
|
159
|
+
}
|
|
160
|
+
if (depth === 0 && ch === sep) {
|
|
161
|
+
out.push(buf);
|
|
162
|
+
buf = "";
|
|
163
|
+
continue;
|
|
164
|
+
}
|
|
165
|
+
if (depth === 0 && sep === " " && /\s/.test(ch)) {
|
|
166
|
+
out.push(buf);
|
|
167
|
+
buf = "";
|
|
168
|
+
continue;
|
|
169
|
+
}
|
|
170
|
+
buf += ch;
|
|
171
|
+
}
|
|
172
|
+
out.push(buf);
|
|
173
|
+
return out;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/** Parse one compound (tag + ids + classes + attrs, no whitespace). */
|
|
177
|
+
function parseCompound(input: string, original: string): CompoundPart {
|
|
178
|
+
const part: CompoundPart = { tag: "*", classes: [], attrs: [] };
|
|
179
|
+
let i = 0;
|
|
180
|
+
// Optional tag prefix (or `*`).
|
|
181
|
+
let tagBuf = "";
|
|
182
|
+
while (i < input.length) {
|
|
183
|
+
const ch = input[i] as string;
|
|
184
|
+
if (ch === "#" || ch === "." || ch === "[") break;
|
|
185
|
+
tagBuf += ch;
|
|
186
|
+
i++;
|
|
187
|
+
}
|
|
188
|
+
if (tagBuf.length > 0) {
|
|
189
|
+
if (!/^[*a-zA-Z][a-zA-Z0-9-]*$/.test(tagBuf)) {
|
|
190
|
+
throw new SelectorParseError(original, `bad tag "${tagBuf}"`);
|
|
191
|
+
}
|
|
192
|
+
part.tag = tagBuf.toLowerCase();
|
|
193
|
+
}
|
|
194
|
+
while (i < input.length) {
|
|
195
|
+
const ch = input[i] as string;
|
|
196
|
+
if (ch === "#") {
|
|
197
|
+
i++;
|
|
198
|
+
const id = readIdent(input, i, original);
|
|
199
|
+
part.id = id.value;
|
|
200
|
+
i = id.next;
|
|
201
|
+
continue;
|
|
202
|
+
}
|
|
203
|
+
if (ch === ".") {
|
|
204
|
+
i++;
|
|
205
|
+
const cls = readIdent(input, i, original);
|
|
206
|
+
part.classes.push(cls.value);
|
|
207
|
+
i = cls.next;
|
|
208
|
+
continue;
|
|
209
|
+
}
|
|
210
|
+
if (ch === "[") {
|
|
211
|
+
i++;
|
|
212
|
+
const attr = readAttr(input, i, original);
|
|
213
|
+
part.attrs.push(attr.filter);
|
|
214
|
+
i = attr.next;
|
|
215
|
+
continue;
|
|
216
|
+
}
|
|
217
|
+
throw new SelectorParseError(original, `unexpected "${ch}" in compound "${input}"`);
|
|
218
|
+
}
|
|
219
|
+
return part;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/** Read an identifier starting at `i`. Returns the parsed value + next idx. */
|
|
223
|
+
function readIdent(input: string, i: number, original: string): { value: string; next: number } {
|
|
224
|
+
const start = i;
|
|
225
|
+
while (i < input.length) {
|
|
226
|
+
const ch = input[i] as string;
|
|
227
|
+
if (!/[a-zA-Z0-9_-]/.test(ch)) break;
|
|
228
|
+
i++;
|
|
229
|
+
}
|
|
230
|
+
const value = input.slice(start, i);
|
|
231
|
+
if (value.length === 0) {
|
|
232
|
+
throw new SelectorParseError(original, `expected identifier at position ${start}`);
|
|
233
|
+
}
|
|
234
|
+
return { value, next: i };
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/** Read the contents of `[...]` starting just past the `[`. */
|
|
238
|
+
function readAttr(
|
|
239
|
+
input: string,
|
|
240
|
+
i: number,
|
|
241
|
+
original: string,
|
|
242
|
+
): { filter: AttrFilter; next: number } {
|
|
243
|
+
// Read attribute name (case-insensitive HTML; lower-case for storage).
|
|
244
|
+
const nameStart = i;
|
|
245
|
+
while (i < input.length) {
|
|
246
|
+
const ch = input[i] as string;
|
|
247
|
+
if (!/[a-zA-Z0-9_:-]/.test(ch)) break;
|
|
248
|
+
i++;
|
|
249
|
+
}
|
|
250
|
+
const name = input.slice(nameStart, i).toLowerCase();
|
|
251
|
+
if (name.length === 0) {
|
|
252
|
+
throw new SelectorParseError(original, `expected attribute name at position ${nameStart}`);
|
|
253
|
+
}
|
|
254
|
+
while (i < input.length && /\s/.test(input[i] as string)) i++;
|
|
255
|
+
if (i >= input.length) {
|
|
256
|
+
throw new SelectorParseError(original, `unterminated [...] in selector`);
|
|
257
|
+
}
|
|
258
|
+
if ((input[i] as string) === "]") {
|
|
259
|
+
return { filter: { name, op: "exists" }, next: i + 1 };
|
|
260
|
+
}
|
|
261
|
+
// Operator.
|
|
262
|
+
const opChars = ["~=", "|=", "^=", "$=", "*=", "="] as const;
|
|
263
|
+
let op: AttrFilter["op"] | null = null;
|
|
264
|
+
for (const cand of opChars) {
|
|
265
|
+
if (input.startsWith(cand, i)) {
|
|
266
|
+
op = cand;
|
|
267
|
+
i += cand.length;
|
|
268
|
+
break;
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
if (op === null) {
|
|
272
|
+
throw new SelectorParseError(original, `expected operator at position ${i}`);
|
|
273
|
+
}
|
|
274
|
+
while (i < input.length && /\s/.test(input[i] as string)) i++;
|
|
275
|
+
// Value: quoted or bare ident.
|
|
276
|
+
let value: string;
|
|
277
|
+
const ch0 = input[i] as string | undefined;
|
|
278
|
+
if (ch0 === '"' || ch0 === "'") {
|
|
279
|
+
const quote = ch0;
|
|
280
|
+
i++;
|
|
281
|
+
let buf = "";
|
|
282
|
+
while (i < input.length) {
|
|
283
|
+
const ch = input[i] as string;
|
|
284
|
+
if (ch === "\\" && i + 1 < input.length) {
|
|
285
|
+
buf += input[i + 1];
|
|
286
|
+
i += 2;
|
|
287
|
+
continue;
|
|
288
|
+
}
|
|
289
|
+
if (ch === quote) {
|
|
290
|
+
i++;
|
|
291
|
+
break;
|
|
292
|
+
}
|
|
293
|
+
buf += ch;
|
|
294
|
+
i++;
|
|
295
|
+
}
|
|
296
|
+
value = buf;
|
|
297
|
+
} else {
|
|
298
|
+
const start = i;
|
|
299
|
+
while (i < input.length) {
|
|
300
|
+
const ch = input[i] as string;
|
|
301
|
+
if (ch === "]" || /\s/.test(ch)) break;
|
|
302
|
+
i++;
|
|
303
|
+
}
|
|
304
|
+
value = input.slice(start, i);
|
|
305
|
+
}
|
|
306
|
+
while (i < input.length && /\s/.test(input[i] as string)) i++;
|
|
307
|
+
if ((input[i] as string | undefined) !== "]") {
|
|
308
|
+
throw new SelectorParseError(original, `expected ']' at position ${i}`);
|
|
309
|
+
}
|
|
310
|
+
return { filter: { name, op, value }, next: i + 1 };
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
// ---- matcher ---------------------------------------------------------------
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* Test whether a single (already-walked) node matches the rightmost compound
|
|
317
|
+
* part of any branch in `parsed`, with ancestor-walking for descendant
|
|
318
|
+
* combinators. `ancestors` is the chain of parent element nodes from the
|
|
319
|
+
* document root down to (but not including) `node`, INCLUDING ancestors that
|
|
320
|
+
* cross shadow boundaries (the piercing walker keeps a flat chain).
|
|
321
|
+
*/
|
|
322
|
+
export function matchSelector(
|
|
323
|
+
parsed: ParsedSelector,
|
|
324
|
+
node: PierceDomNode,
|
|
325
|
+
ancestors: PierceDomNode[],
|
|
326
|
+
): boolean {
|
|
327
|
+
for (const chain of parsed.chains) {
|
|
328
|
+
if (matchChain(chain, node, ancestors)) return true;
|
|
329
|
+
}
|
|
330
|
+
return false;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
function matchChain(
|
|
334
|
+
chain: CompoundChain,
|
|
335
|
+
node: PierceDomNode,
|
|
336
|
+
ancestors: PierceDomNode[],
|
|
337
|
+
): boolean {
|
|
338
|
+
const parts = chain.parts;
|
|
339
|
+
if (parts.length === 0) return false;
|
|
340
|
+
const last = parts[parts.length - 1] as CompoundPart;
|
|
341
|
+
if (!matchCompound(last, node)) return false;
|
|
342
|
+
// Walk leftwards through compound parts, each must be matched by some
|
|
343
|
+
// ancestor (in any order — `parts[k]` ancestor must be deeper than
|
|
344
|
+
// `parts[k-1]` ancestor; we enforce by iterating right-to-left and
|
|
345
|
+
// consuming ancestors from the bottom up).
|
|
346
|
+
let idx = ancestors.length - 1;
|
|
347
|
+
for (let p = parts.length - 2; p >= 0; p--) {
|
|
348
|
+
const part = parts[p] as CompoundPart;
|
|
349
|
+
let found = false;
|
|
350
|
+
while (idx >= 0) {
|
|
351
|
+
const a = ancestors[idx] as PierceDomNode;
|
|
352
|
+
idx--;
|
|
353
|
+
if (matchCompound(part, a)) {
|
|
354
|
+
found = true;
|
|
355
|
+
break;
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
if (!found) return false;
|
|
359
|
+
}
|
|
360
|
+
return true;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
/** Test a single compound part against a single element node. */
|
|
364
|
+
export function matchCompound(part: CompoundPart, node: PierceDomNode): boolean {
|
|
365
|
+
// Element nodes only.
|
|
366
|
+
if (node.nodeType !== 1) return false;
|
|
367
|
+
const local = (node.localName ?? node.nodeName.toLowerCase()).toLowerCase();
|
|
368
|
+
if (part.tag !== "*" && part.tag !== local) return false;
|
|
369
|
+
if (part.id !== undefined) {
|
|
370
|
+
const id = readAttribute(node, "id");
|
|
371
|
+
if (id !== part.id) return false;
|
|
372
|
+
}
|
|
373
|
+
if (part.classes.length > 0) {
|
|
374
|
+
const cls = readAttribute(node, "class") ?? "";
|
|
375
|
+
const tokens = cls.split(/\s+/).filter((t) => t.length > 0);
|
|
376
|
+
for (const c of part.classes) {
|
|
377
|
+
if (!tokens.includes(c)) return false;
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
for (const f of part.attrs) {
|
|
381
|
+
if (!matchAttr(f, node)) return false;
|
|
382
|
+
}
|
|
383
|
+
return true;
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
function matchAttr(f: AttrFilter, node: PierceDomNode): boolean {
|
|
387
|
+
const val = readAttribute(node, f.name);
|
|
388
|
+
if (f.op === "exists") return val !== undefined;
|
|
389
|
+
if (val === undefined) return false;
|
|
390
|
+
const target = f.value ?? "";
|
|
391
|
+
switch (f.op) {
|
|
392
|
+
case "=":
|
|
393
|
+
return val === target;
|
|
394
|
+
case "~=": {
|
|
395
|
+
// Whitespace-separated word match.
|
|
396
|
+
const tokens = val.split(/\s+/).filter((t) => t.length > 0);
|
|
397
|
+
return tokens.includes(target);
|
|
398
|
+
}
|
|
399
|
+
case "|=":
|
|
400
|
+
return val === target || val.startsWith(`${target}-`);
|
|
401
|
+
case "^=":
|
|
402
|
+
return target.length > 0 && val.startsWith(target);
|
|
403
|
+
case "$=":
|
|
404
|
+
return target.length > 0 && val.endsWith(target);
|
|
405
|
+
case "*=":
|
|
406
|
+
return target.length > 0 && val.indexOf(target) >= 0;
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
/**
|
|
411
|
+
* Read an attribute value from a `PierceDomNode`. CDP serialises attributes
|
|
412
|
+
* as a flat `[name, value, name, value, ...]` array (lower-cased names per
|
|
413
|
+
* the protocol). Returns `undefined` if absent.
|
|
414
|
+
*/
|
|
415
|
+
export function readAttribute(node: PierceDomNode, name: string): string | undefined {
|
|
416
|
+
const attrs = node.attributes;
|
|
417
|
+
if (attrs === undefined) return undefined;
|
|
418
|
+
const lower = name.toLowerCase();
|
|
419
|
+
for (let i = 0; i + 1 < attrs.length; i += 2) {
|
|
420
|
+
if ((attrs[i] as string).toLowerCase() === lower) return attrs[i + 1] as string;
|
|
421
|
+
}
|
|
422
|
+
return undefined;
|
|
423
|
+
}
|
package/src/page.ts
CHANGED
|
@@ -35,9 +35,13 @@ import type {
|
|
|
35
35
|
DispatchMouseEventParams,
|
|
36
36
|
DomNode,
|
|
37
37
|
FrameNavigatedEvent,
|
|
38
|
+
PierceDomNode,
|
|
38
39
|
RemoteObject,
|
|
39
40
|
} from "./cdp/types";
|
|
40
41
|
import { NotImplementedError } from "./errors";
|
|
42
|
+
import { ElementHandle } from "./page/element-handle";
|
|
43
|
+
import { findPiercingMatches } from "./page/piercing";
|
|
44
|
+
import { parseSelector } from "./page/selector";
|
|
41
45
|
|
|
42
46
|
/** Wait conditions for `Page.goto`. */
|
|
43
47
|
export type WaitUntil = "load" | "domcontentloaded" | "networkidle";
|
|
@@ -307,6 +311,14 @@ export class Page {
|
|
|
307
311
|
* document (so `this` === document). Result is JSON-serialized via
|
|
308
312
|
* `returnByValue: true`.
|
|
309
313
|
*
|
|
314
|
+
* The function may return a value or a `Promise`. Promise-returning
|
|
315
|
+
* functions are awaited page-side via `awaitPromise: true` (CDP's canonical
|
|
316
|
+
* mechanism for async eval) — without that flag, an `async () => ...`
|
|
317
|
+
* function round-trips its returned Promise as `undefined` because CDP
|
|
318
|
+
* serializes the Promise object itself, not its resolution. `awaitPromise`
|
|
319
|
+
* is NOT on PLAN.md §8.2's forbidden list — only `Runtime.enable` and
|
|
320
|
+
* `Page.createIsolatedWorld` are. Available since Chromium 67.
|
|
321
|
+
*
|
|
310
322
|
* Limitations (documented in docs/limits.md):
|
|
311
323
|
* - Non-JSON return values (functions, DOM nodes, undefined) are
|
|
312
324
|
* coerced/dropped per CDP semantics.
|
|
@@ -315,13 +327,14 @@ export class Page {
|
|
|
315
327
|
* standard for any cross-process evaluator).
|
|
316
328
|
* - Arguments cannot be passed in v0.1; the function takes no args.
|
|
317
329
|
*/
|
|
318
|
-
async evaluate<T>(fn: () => T): Promise<T> {
|
|
330
|
+
async evaluate<T>(fn: () => T | Promise<T>): Promise<T> {
|
|
319
331
|
this.assertOpen();
|
|
320
332
|
const docId = await this.documentObjectId();
|
|
321
333
|
const result = await this.send<{ result: RemoteObject }>("Runtime.callFunctionOn", {
|
|
322
334
|
objectId: docId,
|
|
323
335
|
functionDeclaration: fn.toString(),
|
|
324
336
|
returnByValue: true,
|
|
337
|
+
awaitPromise: true,
|
|
325
338
|
});
|
|
326
339
|
return result.result.value as T;
|
|
327
340
|
}
|
|
@@ -527,6 +540,39 @@ export class Page {
|
|
|
527
540
|
});
|
|
528
541
|
const targetBox = boxFromBorderQuad(box.model);
|
|
529
542
|
const callSeed = this.nextCallSeed();
|
|
543
|
+
// Trajectory synth lives here (not in `performClickAt`) so prototype
|
|
544
|
+
// inspection in conformance tests can see the synthesize / trajectory
|
|
545
|
+
// / cursor markers — they're a consumer-side smoke check that the
|
|
546
|
+
// behavioral synth is wired in.
|
|
547
|
+
const traj = synthesizeMouseTrajectory({
|
|
548
|
+
from: { x: this.cursor.x, y: this.cursor.y },
|
|
549
|
+
to: { x: targetBox.x + targetBox.width / 2, y: targetBox.y + targetBox.height / 2 },
|
|
550
|
+
box: targetBox,
|
|
551
|
+
profile: this.behavior,
|
|
552
|
+
seed: callSeed,
|
|
553
|
+
...(opts.duration !== undefined ? { durationMs: opts.duration } : {}),
|
|
554
|
+
});
|
|
555
|
+
await this.dispatchClickTrajectory(traj, callSeed, opts);
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
/**
|
|
559
|
+
* Variant of {@link humanClick} that operates on an {@link ElementHandle}
|
|
560
|
+
* resolved via {@link querySelectorPiercing} — required when the target
|
|
561
|
+
* element lives inside a closed shadow root (no CSS path can name it from
|
|
562
|
+
* the parent document, so the regular `humanClick(selector)` route fails).
|
|
563
|
+
*
|
|
564
|
+
* Pipeline differs from {@link humanClick} only in step 1: the box model
|
|
565
|
+
* is resolved via `DOM.getBoxModel({ backendNodeId })` instead of through a
|
|
566
|
+
* `DOM.querySelector`-resolved nodeId. Everything downstream (trajectory
|
|
567
|
+
* synth, dispatch loop, press/release) is identical.
|
|
568
|
+
*/
|
|
569
|
+
async humanClickHandle(handle: ElementHandle, opts: HumanClickOptions = {}): Promise<void> {
|
|
570
|
+
this.assertOpen();
|
|
571
|
+
const box = await this.send<{ model: BoxModel }>("DOM.getBoxModel", {
|
|
572
|
+
backendNodeId: handle.backendNodeId,
|
|
573
|
+
});
|
|
574
|
+
const targetBox = boxFromBorderQuad(box.model);
|
|
575
|
+
const callSeed = this.nextCallSeed();
|
|
530
576
|
const traj = synthesizeMouseTrajectory({
|
|
531
577
|
from: { x: this.cursor.x, y: this.cursor.y },
|
|
532
578
|
to: { x: targetBox.x + targetBox.width / 2, y: targetBox.y + targetBox.height / 2 },
|
|
@@ -535,6 +581,22 @@ export class Page {
|
|
|
535
581
|
seed: callSeed,
|
|
536
582
|
...(opts.duration !== undefined ? { durationMs: opts.duration } : {}),
|
|
537
583
|
});
|
|
584
|
+
await this.dispatchClickTrajectory(traj, callSeed, opts);
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
/**
|
|
588
|
+
* Inner dispatch loop shared by {@link humanClick} and
|
|
589
|
+
* {@link humanClickHandle}. Takes the synthesised trajectory, paces the
|
|
590
|
+
* `mouseMoved` events, then fires `mousePressed` + `mouseReleased` at the
|
|
591
|
+
* arrival point with realistic press duration. Trajectory synth itself
|
|
592
|
+
* stays inside the public methods so source-grep conformance checks can
|
|
593
|
+
* verify the synth is reachable from the public API.
|
|
594
|
+
*/
|
|
595
|
+
private async dispatchClickTrajectory(
|
|
596
|
+
traj: ReturnType<typeof synthesizeMouseTrajectory>,
|
|
597
|
+
callSeed: string,
|
|
598
|
+
opts: HumanClickOptions,
|
|
599
|
+
): Promise<void> {
|
|
538
600
|
if (traj.length === 0) return;
|
|
539
601
|
|
|
540
602
|
// Pre-move settle: Gaussian(150, 50) ms idle. Cheaply approximated via
|
|
@@ -724,6 +786,95 @@ export class Page {
|
|
|
724
786
|
}
|
|
725
787
|
}
|
|
726
788
|
|
|
789
|
+
/**
|
|
790
|
+
* Closed-shadow-root piercing locator — find the first element matching the
|
|
791
|
+
* CSS selector across the entire DOM tree, including elements nested inside
|
|
792
|
+
* **closed** shadow roots (which {@link text}, {@link humanClick}, etc. can
|
|
793
|
+
* NOT reach because `DOM.querySelector` does not traverse closed shadows
|
|
794
|
+
* even with `pierce: true` set on the parent `getDocument` call).
|
|
795
|
+
*
|
|
796
|
+
* Required for Cloudflare Turnstile auto-click on integrations where the
|
|
797
|
+
* widget iframe lives behind a closed shadow root (Cloudflare Challenge
|
|
798
|
+
* pages, Workers Static Assets, some CDN configs). Without this, task
|
|
799
|
+
* 0220's auto-click silently fails on those flows.
|
|
800
|
+
*
|
|
801
|
+
* Algorithm (port of patchright `framesPatch.ts:868-1012`
|
|
802
|
+
* `_customFindElementsByParsed`):
|
|
803
|
+
* 1. `DOM.getDocument({ depth: -1, pierce: true })` — yields the full
|
|
804
|
+
* tree, with shadow descendants under `shadowRoots[]` for both open
|
|
805
|
+
* AND closed roots.
|
|
806
|
+
* 2. Recursive walk in JS, matching against a parsed CSS selector. We
|
|
807
|
+
* can't `DOM.querySelector` per shadow because the per-shadow query
|
|
808
|
+
* itself doesn't pierce closed roots either.
|
|
809
|
+
* 3. For matches, `DOM.resolveNode({ backendNodeId })` to get a
|
|
810
|
+
* `RemoteObject.objectId`, wrapped in {@link ElementHandle}.
|
|
811
|
+
*
|
|
812
|
+
* Supported selectors (see `selector.ts`): tag / id / class / attribute /
|
|
813
|
+
* descendant combinator / comma-separated lists. **Not** supported:
|
|
814
|
+
* `>`/`+`/`~` combinators, `:pseudo-classes`, `::pseudo-elements`, XPath.
|
|
815
|
+
* XPath is a stretch goal per task 0253 brief — TODO if a future surface
|
|
816
|
+
* needs it (Turnstile detection only needs CSS).
|
|
817
|
+
*
|
|
818
|
+
* Performance: O(N) in DOM size per call. Acceptable for v0.2; a per-page
|
|
819
|
+
* cache layer is a v0.3+ concern (also called out in 0253).
|
|
820
|
+
*
|
|
821
|
+
* @see tasks/0253-closed-shadow-piercing-locator.md
|
|
822
|
+
* @see PLAN.md §8.2 (`DOM.getDocument` and `DOM.resolveNode` are not on the
|
|
823
|
+
* forbidden list — both fine to use here).
|
|
824
|
+
*/
|
|
825
|
+
async querySelectorPiercing(selector: string): Promise<ElementHandle | null> {
|
|
826
|
+
const handles = await this.queryPiercing(selector, 1);
|
|
827
|
+
return handles[0] ?? null;
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
/**
|
|
831
|
+
* The "all matches" variant of {@link querySelectorPiercing}. Returns every
|
|
832
|
+
* element that satisfies the selector, in depth-first pre-order — same
|
|
833
|
+
* traversal a regular `querySelectorAll` produces, with closed-shadow
|
|
834
|
+
* descendants spliced in at the position they'd appear under the host.
|
|
835
|
+
*
|
|
836
|
+
* Returns an empty array when nothing matches.
|
|
837
|
+
*/
|
|
838
|
+
async querySelectorAllPiercing(selector: string): Promise<ElementHandle[]> {
|
|
839
|
+
return this.queryPiercing(selector);
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
/** Shared implementation for the piercing locator. `limit` short-circuits the walk. */
|
|
843
|
+
private async queryPiercing(selector: string, limit?: number): Promise<ElementHandle[]> {
|
|
844
|
+
this.assertOpen();
|
|
845
|
+
const parsed = parseSelector(selector);
|
|
846
|
+
// depth: -1 + pierce: true is the magic combination patchright uses; CDP
|
|
847
|
+
// returns a fully-flattened tree including shadow descendants on both
|
|
848
|
+
// open and closed roots, AND iframe contentDocuments for same-origin
|
|
849
|
+
// children.
|
|
850
|
+
const root = await this.send<{ root: PierceDomNode }>("DOM.getDocument", {
|
|
851
|
+
depth: -1,
|
|
852
|
+
pierce: true,
|
|
853
|
+
});
|
|
854
|
+
const matches = findPiercingMatches(root.root, parsed, limit);
|
|
855
|
+
if (matches.length === 0) return [];
|
|
856
|
+
const handles: ElementHandle[] = [];
|
|
857
|
+
for (const m of matches) {
|
|
858
|
+
const resolved = await this.send<{ object: RemoteObject }>("DOM.resolveNode", {
|
|
859
|
+
backendNodeId: m.backendNodeId,
|
|
860
|
+
});
|
|
861
|
+
const objectId = resolved.object.objectId;
|
|
862
|
+
// Skip nodes the protocol couldn't bind to a RemoteObject (rare — e.g.
|
|
863
|
+
// detached subtree races). Surfacing a partial set is more useful than
|
|
864
|
+
// throwing for the Turnstile detector path.
|
|
865
|
+
if (objectId === undefined) continue;
|
|
866
|
+
handles.push(
|
|
867
|
+
new ElementHandle({
|
|
868
|
+
router: this.router,
|
|
869
|
+
sessionId: this.sessionId,
|
|
870
|
+
objectId,
|
|
871
|
+
backendNodeId: m.backendNodeId,
|
|
872
|
+
}),
|
|
873
|
+
);
|
|
874
|
+
}
|
|
875
|
+
return handles;
|
|
876
|
+
}
|
|
877
|
+
|
|
727
878
|
screenshot(_opts?: unknown): Promise<Uint8Array> {
|
|
728
879
|
return Promise.reject(new NotImplementedError("page.screenshot"));
|
|
729
880
|
}
|