@mochi.js/core 0.1.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -5
- package/src/__tests__/inject.test.ts +2 -0
- package/src/__tests__/piercing.test.ts +164 -0
- package/src/__tests__/proc.test.ts +383 -0
- package/src/__tests__/proxy-auth.test.ts +2 -2
- package/src/__tests__/selector.test.ts +188 -0
- package/src/__tests__/window-size.e2e.test.ts +130 -0
- package/src/cdp/types.ts +47 -0
- package/src/index.ts +2 -0
- package/src/launch.ts +119 -8
- package/src/page/element-handle.ts +110 -0
- package/src/page/piercing.ts +135 -0
- package/src/page/selector.ts +423 -0
- package/src/page.ts +191 -0
- package/src/proc.ts +386 -41
- package/src/proxy-auth.ts +36 -19
- package/src/session.ts +197 -12
package/src/session.ts
CHANGED
|
@@ -12,6 +12,11 @@
|
|
|
12
12
|
* @see PLAN.md §7
|
|
13
13
|
*/
|
|
14
14
|
|
|
15
|
+
import {
|
|
16
|
+
type Disposable as ChallengeHandle,
|
|
17
|
+
installTurnstileAutoClick,
|
|
18
|
+
type TurnstileEscalationReason,
|
|
19
|
+
} from "@mochi.js/challenges";
|
|
15
20
|
import type { MatrixV1 } from "@mochi.js/consistency";
|
|
16
21
|
import { buildPayload, type PayloadResult } from "@mochi.js/inject";
|
|
17
22
|
import {
|
|
@@ -82,6 +87,23 @@ export interface SessionInit {
|
|
|
82
87
|
* @internal
|
|
83
88
|
*/
|
|
84
89
|
netAdapter?: NetAdapter;
|
|
90
|
+
/**
|
|
91
|
+
* Convenience layer toggles surfaced via
|
|
92
|
+
* `LaunchOptions.challenges`. When `challenges.turnstile.autoClick` is
|
|
93
|
+
* `true`, every page returned by `Session.newPage` has
|
|
94
|
+
* `installTurnstileAutoClick(page, opts)` wired automatically.
|
|
95
|
+
* See `@mochi.js/challenges`.
|
|
96
|
+
*/
|
|
97
|
+
challenges?: {
|
|
98
|
+
turnstile?: {
|
|
99
|
+
autoClick?: boolean;
|
|
100
|
+
timeout?: number;
|
|
101
|
+
humanize?: boolean;
|
|
102
|
+
onSolved?: (token: string) => void;
|
|
103
|
+
onEscalation?: (reason: TurnstileEscalationReason) => void;
|
|
104
|
+
pollIntervalMs?: number;
|
|
105
|
+
};
|
|
106
|
+
};
|
|
85
107
|
}
|
|
86
108
|
|
|
87
109
|
/** Public Cookie shape (re-exported from page.ts). */
|
|
@@ -153,6 +175,27 @@ export class Session {
|
|
|
153
175
|
* `Session.close`. Undefined when the session has no proxy auth.
|
|
154
176
|
*/
|
|
155
177
|
private proxyAuthHandle: ProxyAuthHandle | undefined;
|
|
178
|
+
/**
|
|
179
|
+
* Snapshot of the `challenges` launch option, retained so
|
|
180
|
+
* {@link newPage} can install the per-page auto-click handler. Undefined
|
|
181
|
+
* when no challenge convenience layer is enabled. Each page gets its
|
|
182
|
+
* own {@link ChallengeHandle} tracked here for disposal on
|
|
183
|
+
* {@link close}.
|
|
184
|
+
*/
|
|
185
|
+
private readonly challengesOpts: SessionInit["challenges"] | undefined;
|
|
186
|
+
private readonly challengeHandles: ChallengeHandle[] = [];
|
|
187
|
+
/**
|
|
188
|
+
* Cache of resolved execution-context ids for worker-style targets,
|
|
189
|
+
* keyed by the worker session id. Populated by
|
|
190
|
+
* {@link extractWorkerExecutionContextId} on first attach and reused by
|
|
191
|
+
* any later worker CDP op that needs an `executionContextId`. Patchright
|
|
192
|
+
* keeps this on a per-target `CRExecutionContext`; mochi keeps the
|
|
193
|
+
* Session-local map until we grow a real worker-target abstraction.
|
|
194
|
+
*
|
|
195
|
+
* @see crServiceWorkerPatch.ts:32-43, crPagePatch.ts:404-417
|
|
196
|
+
* @internal
|
|
197
|
+
*/
|
|
198
|
+
private readonly workerExecutionContextIds = new Map<string, number>();
|
|
156
199
|
|
|
157
200
|
constructor(init: SessionInit) {
|
|
158
201
|
this.proc = init.proc;
|
|
@@ -161,6 +204,7 @@ export class Session {
|
|
|
161
204
|
this.bypassInject = init.bypassInject === true;
|
|
162
205
|
this.netProxy = init.netProxy;
|
|
163
206
|
this.netAdapter = init.netAdapter ?? defaultNetAdapter;
|
|
207
|
+
this.challengesOpts = init.challenges;
|
|
164
208
|
// Skip payload compilation entirely when bypassed — capture flows must
|
|
165
209
|
// not pay the build cost AND must not see the matrix-derived bytes.
|
|
166
210
|
this._payload = this.bypassInject ? null : buildPayload(init.matrix);
|
|
@@ -222,6 +266,30 @@ export class Session {
|
|
|
222
266
|
// (only Runtime.enable is forbidden). We enable here so subsequent
|
|
223
267
|
// addScriptToEvaluateOnNewDocument is honoured by the page domain.
|
|
224
268
|
await this.router.send("Page.enable", undefined, { sessionId: attached.sessionId });
|
|
269
|
+
// Task 0255: defensive UA override at the network layer.
|
|
270
|
+
//
|
|
271
|
+
// The inject payload (Page.addScriptToEvaluateOnNewDocument) spoofs
|
|
272
|
+
// `navigator.userAgent` in the JS surface, but `Network.requestWillBeSent`
|
|
273
|
+
// events (and the request line itself) carry the BARE browser UA — which
|
|
274
|
+
// under `--headless=new` still contains the substring "HeadlessChrome".
|
|
275
|
+
// The inject can never reach those bytes because they're emitted before
|
|
276
|
+
// any document script runs.
|
|
277
|
+
//
|
|
278
|
+
// `Network.setUserAgentOverride` is a per-target setter that does NOT
|
|
279
|
+
// require `Network.enable` (it only stores override state); §8.2's ban
|
|
280
|
+
// on `Network.enable` is therefore unaffected. Sent immediately after
|
|
281
|
+
// attach and before any navigation so the very first request the page
|
|
282
|
+
// issues already carries the matrix UA.
|
|
283
|
+
//
|
|
284
|
+
// Skipped under `bypassInject:true` (PLAN.md §12.1) — capture flows must
|
|
285
|
+
// record the bare browser fingerprint, including its raw UA.
|
|
286
|
+
if (!this.bypassInject) {
|
|
287
|
+
await this.router.send(
|
|
288
|
+
"Network.setUserAgentOverride",
|
|
289
|
+
{ userAgent: this.profile.userAgent },
|
|
290
|
+
{ sessionId: attached.sessionId },
|
|
291
|
+
);
|
|
292
|
+
}
|
|
225
293
|
// PLAN.md §12.1 / task 0040 — capture flow short-circuits inject so the
|
|
226
294
|
// browser reports its bare fingerprint. Otherwise install the payload
|
|
227
295
|
// main-world via §8.4. worldName MUST be the empty string.
|
|
@@ -259,6 +327,21 @@ export class Session {
|
|
|
259
327
|
},
|
|
260
328
|
});
|
|
261
329
|
this._pages.push(page);
|
|
330
|
+
// Wire the Turnstile auto-click convenience layer if the session was
|
|
331
|
+
// launched with `challenges.turnstile.autoClick: true`. The handle is
|
|
332
|
+
// tracked on the Session so it disposes on close (and the page-close
|
|
333
|
+
// path also cleans up via the disposable's idempotent dispose).
|
|
334
|
+
const ts = this.challengesOpts?.turnstile;
|
|
335
|
+
if (ts !== undefined && ts.autoClick === true) {
|
|
336
|
+
const tsOpts: Parameters<typeof installTurnstileAutoClick>[1] = {};
|
|
337
|
+
if (ts.timeout !== undefined) tsOpts.timeout = ts.timeout;
|
|
338
|
+
if (ts.humanize !== undefined) tsOpts.humanize = ts.humanize;
|
|
339
|
+
if (ts.onSolved !== undefined) tsOpts.onSolved = ts.onSolved;
|
|
340
|
+
if (ts.onEscalation !== undefined) tsOpts.onEscalation = ts.onEscalation;
|
|
341
|
+
if (ts.pollIntervalMs !== undefined) tsOpts.pollIntervalMs = ts.pollIntervalMs;
|
|
342
|
+
const handle = installTurnstileAutoClick(page, tsOpts);
|
|
343
|
+
this.challengeHandles.push(handle);
|
|
344
|
+
}
|
|
262
345
|
return page;
|
|
263
346
|
}
|
|
264
347
|
|
|
@@ -388,6 +471,16 @@ export class Session {
|
|
|
388
471
|
async close(): Promise<void> {
|
|
389
472
|
if (this.closed) return;
|
|
390
473
|
this.closed = true;
|
|
474
|
+
// Dispose any challenge convenience-layer handles first so background
|
|
475
|
+
// pollers stop before pages tear down their CDP sessions.
|
|
476
|
+
for (const h of this.challengeHandles) {
|
|
477
|
+
try {
|
|
478
|
+
h.dispose();
|
|
479
|
+
} catch {
|
|
480
|
+
// ignore — best-effort
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
this.challengeHandles.length = 0;
|
|
391
484
|
// Mark all pages as closed (they'll error on further use).
|
|
392
485
|
for (const p of this._pages) {
|
|
393
486
|
// close() is idempotent on Page.
|
|
@@ -500,18 +593,35 @@ export class Session {
|
|
|
500
593
|
|
|
501
594
|
/**
|
|
502
595
|
* Inject the payload into a freshly-attached target if it's a worker-
|
|
503
|
-
* style target (dedicated worker, shared worker,
|
|
504
|
-
*
|
|
596
|
+
* style target (dedicated worker, shared worker, audio worklet — service
|
|
597
|
+
* workers go through the same path; see notes below), then resume it.
|
|
505
598
|
*
|
|
506
599
|
* Worker targets do NOT support `Page.addScriptToEvaluateOnNewDocument`
|
|
507
|
-
* (no Page domain). PLAN.md §8.4 calls out that
|
|
508
|
-
*
|
|
509
|
-
*
|
|
510
|
-
*
|
|
511
|
-
* `
|
|
600
|
+
* (no Page domain). PLAN.md §8.4 calls out that the worker target accepts
|
|
601
|
+
* `Runtime.evaluate` even though `Runtime.enable` is forbidden by §8.2.
|
|
602
|
+
*
|
|
603
|
+
* The Patchright-cited bootstrap (task 0254 — `crServiceWorkerPatch.ts:32-43`,
|
|
604
|
+
* `crPagePatch.ts:404-417`) tightens the inject race window:
|
|
605
|
+
* 1. `Runtime.evaluate("globalThis", { serialization: "idOnly" })` —
|
|
606
|
+
* returns a `RemoteObject` whose `objectId` carries the worker's
|
|
607
|
+
* execution-context id. `serialization: "idOnly"` skips the value
|
|
608
|
+
* preview round-trip we don't need.
|
|
609
|
+
* 2. Parse `objectId.split(".")[1]` for the contextId. The wire format
|
|
610
|
+
* is `"<runtimeAgentId>.<contextId>.<remoteObjectId>"`; we validate
|
|
611
|
+
* the split and fail loudly if Chromium has moved the goalposts.
|
|
612
|
+
* 3. Inject the payload via `Runtime.callFunctionOn({ functionDeclaration,
|
|
613
|
+
* executionContextId, returnByValue: true })`. This binds the call
|
|
614
|
+
* to the worker's own context rather than relying on
|
|
615
|
+
* `Runtime.evaluate`'s implicit context resolution, which is the
|
|
616
|
+
* coarser pattern v0.1.x used.
|
|
617
|
+
* 4. `Runtime.runIfWaitingForDebugger` to resume the target.
|
|
512
618
|
*
|
|
513
|
-
*
|
|
514
|
-
*
|
|
619
|
+
* We never send `Runtime.enable` — that's the whole point of extracting
|
|
620
|
+
* the contextId via the idOnly trick instead of waiting for an
|
|
621
|
+
* `Runtime.executionContextCreated` event.
|
|
622
|
+
*
|
|
623
|
+
* Caveat: worker injection has a smaller stealth ceiling than main-world
|
|
624
|
+
* Page injection. Documented in `docs/limits.md`.
|
|
515
625
|
*/
|
|
516
626
|
private async handleAttachedTarget(
|
|
517
627
|
ev: AttachedToTargetEvent,
|
|
@@ -527,12 +637,20 @@ export class Session {
|
|
|
527
637
|
// PLAN.md §12.1 / task 0040 — capture flow skips worker injection too.
|
|
528
638
|
if (isWorkerLike && !this.bypassInject && this._payload !== null) {
|
|
529
639
|
try {
|
|
640
|
+
const executionContextId = await this.extractWorkerExecutionContextId(childSessionId);
|
|
641
|
+
this.workerExecutionContextIds.set(childSessionId, executionContextId);
|
|
642
|
+
// `Runtime.callFunctionOn` requires either an `objectId` OR an
|
|
643
|
+
// `executionContextId`. We use the latter — patchright's pattern —
|
|
644
|
+
// so the call binds to the worker's own context, not whatever
|
|
645
|
+
// `Runtime.evaluate` happens to resolve. The payload IIFE is wrapped
|
|
646
|
+
// as a function declaration so `callFunctionOn` accepts it.
|
|
530
647
|
await this.router.send(
|
|
531
|
-
"Runtime.
|
|
648
|
+
"Runtime.callFunctionOn",
|
|
532
649
|
{
|
|
533
|
-
|
|
650
|
+
functionDeclaration: `function() { ${this._payload.code} }`,
|
|
651
|
+
executionContextId,
|
|
652
|
+
returnByValue: true,
|
|
534
653
|
awaitPromise: false,
|
|
535
|
-
returnByValue: false,
|
|
536
654
|
// includeCommandLineAPI must remain false (§8.2).
|
|
537
655
|
},
|
|
538
656
|
{ sessionId: childSessionId },
|
|
@@ -560,6 +678,73 @@ export class Session {
|
|
|
560
678
|
}
|
|
561
679
|
}
|
|
562
680
|
|
|
681
|
+
/**
|
|
682
|
+
* Resolve the worker target's execution-context id WITHOUT
|
|
683
|
+
* `Runtime.enable` — patchright's trick.
|
|
684
|
+
*
|
|
685
|
+
* Sends `Runtime.evaluate("globalThis", { serialization: "idOnly" })`
|
|
686
|
+
* against the paused worker session. The returned `RemoteObject.objectId`
|
|
687
|
+
* has the on-the-wire shape `"<runtimeAgentId>.<contextId>.<localId>"`
|
|
688
|
+
* (Chromium >= v131; verified against patchright's parser). We extract
|
|
689
|
+
* `split(".")[1]` and assert it's a positive integer.
|
|
690
|
+
*
|
|
691
|
+
* Throws with a precise diagnostic if Chromium changes the format —
|
|
692
|
+
* silent fallback would mask a real wire-protocol shift, which we want
|
|
693
|
+
* to catch in CI rather than ship as a degraded inject path.
|
|
694
|
+
*
|
|
695
|
+
* @see crServiceWorkerPatch.ts:32-43
|
|
696
|
+
*/
|
|
697
|
+
private async extractWorkerExecutionContextId(childSessionId: string): Promise<number> {
|
|
698
|
+
const evalRes = await this.router.send<{ result: { objectId?: string; type?: string } }>(
|
|
699
|
+
"Runtime.evaluate",
|
|
700
|
+
{
|
|
701
|
+
expression: "globalThis",
|
|
702
|
+
// idOnly skips full value serialisation — we want the objectId
|
|
703
|
+
// alone. Supported on Chromium >= v124 (chrome-for-testing v131+
|
|
704
|
+
// in the mochi profile floor).
|
|
705
|
+
serialization: "idOnly",
|
|
706
|
+
// includeCommandLineAPI must remain false (§8.2).
|
|
707
|
+
},
|
|
708
|
+
{ sessionId: childSessionId },
|
|
709
|
+
);
|
|
710
|
+
const objectId = evalRes.result.objectId;
|
|
711
|
+
if (typeof objectId !== "string" || objectId.length === 0) {
|
|
712
|
+
throw new Error(
|
|
713
|
+
`[mochi] worker idOnly bootstrap: Runtime.evaluate("globalThis") returned no objectId (got ${JSON.stringify(evalRes.result)})`,
|
|
714
|
+
);
|
|
715
|
+
}
|
|
716
|
+
const parts = objectId.split(".");
|
|
717
|
+
// Format: "<runtimeAgentId>.<contextId>.<localId>" — patchright also
|
|
718
|
+
// pulls index [1]. Refuse to guess if the segment count shifts.
|
|
719
|
+
if (parts.length < 2) {
|
|
720
|
+
throw new Error(
|
|
721
|
+
`[mochi] worker idOnly bootstrap: unexpected objectId shape "${objectId}" (expected dotted segments)`,
|
|
722
|
+
);
|
|
723
|
+
}
|
|
724
|
+
const ctxRaw = parts[1];
|
|
725
|
+
if (ctxRaw === undefined || ctxRaw.length === 0) {
|
|
726
|
+
throw new Error(
|
|
727
|
+
`[mochi] worker idOnly bootstrap: objectId "${objectId}" has empty contextId segment`,
|
|
728
|
+
);
|
|
729
|
+
}
|
|
730
|
+
const contextId = Number.parseInt(ctxRaw, 10);
|
|
731
|
+
if (!Number.isInteger(contextId) || contextId <= 0 || String(contextId) !== ctxRaw) {
|
|
732
|
+
throw new Error(
|
|
733
|
+
`[mochi] worker idOnly bootstrap: contextId segment "${ctxRaw}" of objectId "${objectId}" is not a positive integer`,
|
|
734
|
+
);
|
|
735
|
+
}
|
|
736
|
+
return contextId;
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
/**
|
|
740
|
+
* Snapshot of the worker → executionContextId cache. Test-only.
|
|
741
|
+
*
|
|
742
|
+
* @internal
|
|
743
|
+
*/
|
|
744
|
+
_internalWorkerExecutionContextIds(): ReadonlyMap<string, number> {
|
|
745
|
+
return new Map(this.workerExecutionContextIds);
|
|
746
|
+
}
|
|
747
|
+
|
|
563
748
|
private installCrashGuard(): void {
|
|
564
749
|
// If Chromium dies unexpectedly, we want to mark the session closed so
|
|
565
750
|
// pending and future calls reject cleanly.
|