@mochi.js/core 0.1.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/session.ts CHANGED
@@ -12,6 +12,11 @@
12
12
  * @see PLAN.md §7
13
13
  */
14
14
 
15
+ import {
16
+ type Disposable as ChallengeHandle,
17
+ installTurnstileAutoClick,
18
+ type TurnstileEscalationReason,
19
+ } from "@mochi.js/challenges";
15
20
  import type { MatrixV1 } from "@mochi.js/consistency";
16
21
  import { buildPayload, type PayloadResult } from "@mochi.js/inject";
17
22
  import {
@@ -82,6 +87,23 @@ export interface SessionInit {
82
87
  * @internal
83
88
  */
84
89
  netAdapter?: NetAdapter;
90
+ /**
91
+ * Convenience layer toggles surfaced via
92
+ * `LaunchOptions.challenges`. When `challenges.turnstile.autoClick` is
93
+ * `true`, every page returned by `Session.newPage` has
94
+ * `installTurnstileAutoClick(page, opts)` wired automatically.
95
+ * See `@mochi.js/challenges`.
96
+ */
97
+ challenges?: {
98
+ turnstile?: {
99
+ autoClick?: boolean;
100
+ timeout?: number;
101
+ humanize?: boolean;
102
+ onSolved?: (token: string) => void;
103
+ onEscalation?: (reason: TurnstileEscalationReason) => void;
104
+ pollIntervalMs?: number;
105
+ };
106
+ };
85
107
  }
86
108
 
87
109
  /** Public Cookie shape (re-exported from page.ts). */
@@ -153,6 +175,27 @@ export class Session {
153
175
  * `Session.close`. Undefined when the session has no proxy auth.
154
176
  */
155
177
  private proxyAuthHandle: ProxyAuthHandle | undefined;
178
+ /**
179
+ * Snapshot of the `challenges` launch option, retained so
180
+ * {@link newPage} can install the per-page auto-click handler. Undefined
181
+ * when no challenge convenience layer is enabled. Each page gets its
182
+ * own {@link ChallengeHandle} tracked here for disposal on
183
+ * {@link close}.
184
+ */
185
+ private readonly challengesOpts: SessionInit["challenges"] | undefined;
186
+ private readonly challengeHandles: ChallengeHandle[] = [];
187
+ /**
188
+ * Cache of resolved execution-context ids for worker-style targets,
189
+ * keyed by the worker session id. Populated by
190
+ * {@link extractWorkerExecutionContextId} on first attach and reused by
191
+ * any later worker CDP op that needs an `executionContextId`. Patchright
192
+ * keeps this on a per-target `CRExecutionContext`; mochi keeps the
193
+ * Session-local map until we grow a real worker-target abstraction.
194
+ *
195
+ * @see crServiceWorkerPatch.ts:32-43, crPagePatch.ts:404-417
196
+ * @internal
197
+ */
198
+ private readonly workerExecutionContextIds = new Map<string, number>();
156
199
 
157
200
  constructor(init: SessionInit) {
158
201
  this.proc = init.proc;
@@ -161,6 +204,7 @@ export class Session {
161
204
  this.bypassInject = init.bypassInject === true;
162
205
  this.netProxy = init.netProxy;
163
206
  this.netAdapter = init.netAdapter ?? defaultNetAdapter;
207
+ this.challengesOpts = init.challenges;
164
208
  // Skip payload compilation entirely when bypassed — capture flows must
165
209
  // not pay the build cost AND must not see the matrix-derived bytes.
166
210
  this._payload = this.bypassInject ? null : buildPayload(init.matrix);
@@ -222,6 +266,30 @@ export class Session {
222
266
  // (only Runtime.enable is forbidden). We enable here so subsequent
223
267
  // addScriptToEvaluateOnNewDocument is honoured by the page domain.
224
268
  await this.router.send("Page.enable", undefined, { sessionId: attached.sessionId });
269
+ // Task 0255: defensive UA override at the network layer.
270
+ //
271
+ // The inject payload (Page.addScriptToEvaluateOnNewDocument) spoofs
272
+ // `navigator.userAgent` in the JS surface, but `Network.requestWillBeSent`
273
+ // events (and the request line itself) carry the BARE browser UA — which
274
+ // under `--headless=new` still contains the substring "HeadlessChrome".
275
+ // The inject can never reach those bytes because they're emitted before
276
+ // any document script runs.
277
+ //
278
+ // `Network.setUserAgentOverride` is a per-target setter that does NOT
279
+ // require `Network.enable` (it only stores override state); §8.2's ban
280
+ // on `Network.enable` is therefore unaffected. Sent immediately after
281
+ // attach and before any navigation so the very first request the page
282
+ // issues already carries the matrix UA.
283
+ //
284
+ // Skipped under `bypassInject:true` (PLAN.md §12.1) — capture flows must
285
+ // record the bare browser fingerprint, including its raw UA.
286
+ if (!this.bypassInject) {
287
+ await this.router.send(
288
+ "Network.setUserAgentOverride",
289
+ { userAgent: this.profile.userAgent },
290
+ { sessionId: attached.sessionId },
291
+ );
292
+ }
225
293
  // PLAN.md §12.1 / task 0040 — capture flow short-circuits inject so the
226
294
  // browser reports its bare fingerprint. Otherwise install the payload
227
295
  // main-world via §8.4. worldName MUST be the empty string.
@@ -259,6 +327,21 @@ export class Session {
259
327
  },
260
328
  });
261
329
  this._pages.push(page);
330
+ // Wire the Turnstile auto-click convenience layer if the session was
331
+ // launched with `challenges.turnstile.autoClick: true`. The handle is
332
+ // tracked on the Session so it disposes on close (and the page-close
333
+ // path also cleans up via the disposable's idempotent dispose).
334
+ const ts = this.challengesOpts?.turnstile;
335
+ if (ts !== undefined && ts.autoClick === true) {
336
+ const tsOpts: Parameters<typeof installTurnstileAutoClick>[1] = {};
337
+ if (ts.timeout !== undefined) tsOpts.timeout = ts.timeout;
338
+ if (ts.humanize !== undefined) tsOpts.humanize = ts.humanize;
339
+ if (ts.onSolved !== undefined) tsOpts.onSolved = ts.onSolved;
340
+ if (ts.onEscalation !== undefined) tsOpts.onEscalation = ts.onEscalation;
341
+ if (ts.pollIntervalMs !== undefined) tsOpts.pollIntervalMs = ts.pollIntervalMs;
342
+ const handle = installTurnstileAutoClick(page, tsOpts);
343
+ this.challengeHandles.push(handle);
344
+ }
262
345
  return page;
263
346
  }
264
347
 
@@ -388,6 +471,16 @@ export class Session {
388
471
  async close(): Promise<void> {
389
472
  if (this.closed) return;
390
473
  this.closed = true;
474
+ // Dispose any challenge convenience-layer handles first so background
475
+ // pollers stop before pages tear down their CDP sessions.
476
+ for (const h of this.challengeHandles) {
477
+ try {
478
+ h.dispose();
479
+ } catch {
480
+ // ignore — best-effort
481
+ }
482
+ }
483
+ this.challengeHandles.length = 0;
391
484
  // Mark all pages as closed (they'll error on further use).
392
485
  for (const p of this._pages) {
393
486
  // close() is idempotent on Page.
@@ -500,18 +593,35 @@ export class Session {
500
593
 
501
594
  /**
502
595
  * Inject the payload into a freshly-attached target if it's a worker-
503
- * style target (dedicated worker, shared worker, service worker, audio
504
- * worklet, etc.), then resume it.
596
+ * style target (dedicated worker, shared worker, audio worklet — service
597
+ * workers go through the same path; see notes below), then resume it.
505
598
  *
506
599
  * Worker targets do NOT support `Page.addScriptToEvaluateOnNewDocument`
507
- * (no Page domain). PLAN.md §8.4 calls out that we use `Runtime.evaluate`
508
- * against the paused worker session before issuing
509
- * `Runtime.runIfWaitingForDebugger`. The §8.2 forbidden-method assertion
510
- * does NOT trip because we never send `Runtime.enable` — only
511
- * `Runtime.evaluate` against an already-paused worker target.
600
+ * (no Page domain). PLAN.md §8.4 calls out that the worker target accepts
601
+ * `Runtime.evaluate` even though `Runtime.enable` is forbidden by §8.2.
602
+ *
603
+ * The Patchright-cited bootstrap (task 0254 `crServiceWorkerPatch.ts:32-43`,
604
+ * `crPagePatch.ts:404-417`) tightens the inject race window:
605
+ * 1. `Runtime.evaluate("globalThis", { serialization: "idOnly" })` —
606
+ * returns a `RemoteObject` whose `objectId` carries the worker's
607
+ * execution-context id. `serialization: "idOnly"` skips the value
608
+ * preview round-trip we don't need.
609
+ * 2. Parse `objectId.split(".")[1]` for the contextId. The wire format
610
+ * is `"<runtimeAgentId>.<contextId>.<remoteObjectId>"`; we validate
611
+ * the split and fail loudly if Chromium has moved the goalposts.
612
+ * 3. Inject the payload via `Runtime.callFunctionOn({ functionDeclaration,
613
+ * executionContextId, returnByValue: true })`. This binds the call
614
+ * to the worker's own context rather than relying on
615
+ * `Runtime.evaluate`'s implicit context resolution, which is the
616
+ * coarser pattern v0.1.x used.
617
+ * 4. `Runtime.runIfWaitingForDebugger` to resume the target.
512
618
  *
513
- * Caveat: worker injection has a smaller stealth ceiling than main-
514
- * world Page injection. Documented in `docs/limits.md`.
619
+ * We never send `Runtime.enable` that's the whole point of extracting
620
+ * the contextId via the idOnly trick instead of waiting for an
621
+ * `Runtime.executionContextCreated` event.
622
+ *
623
+ * Caveat: worker injection has a smaller stealth ceiling than main-world
624
+ * Page injection. Documented in `docs/limits.md`.
515
625
  */
516
626
  private async handleAttachedTarget(
517
627
  ev: AttachedToTargetEvent,
@@ -527,12 +637,20 @@ export class Session {
527
637
  // PLAN.md §12.1 / task 0040 — capture flow skips worker injection too.
528
638
  if (isWorkerLike && !this.bypassInject && this._payload !== null) {
529
639
  try {
640
+ const executionContextId = await this.extractWorkerExecutionContextId(childSessionId);
641
+ this.workerExecutionContextIds.set(childSessionId, executionContextId);
642
+ // `Runtime.callFunctionOn` requires either an `objectId` OR an
643
+ // `executionContextId`. We use the latter — patchright's pattern —
644
+ // so the call binds to the worker's own context, not whatever
645
+ // `Runtime.evaluate` happens to resolve. The payload IIFE is wrapped
646
+ // as a function declaration so `callFunctionOn` accepts it.
530
647
  await this.router.send(
531
- "Runtime.evaluate",
648
+ "Runtime.callFunctionOn",
532
649
  {
533
- expression: this._payload.code,
650
+ functionDeclaration: `function() { ${this._payload.code} }`,
651
+ executionContextId,
652
+ returnByValue: true,
534
653
  awaitPromise: false,
535
- returnByValue: false,
536
654
  // includeCommandLineAPI must remain false (§8.2).
537
655
  },
538
656
  { sessionId: childSessionId },
@@ -560,6 +678,73 @@ export class Session {
560
678
  }
561
679
  }
562
680
 
681
+ /**
682
+ * Resolve the worker target's execution-context id WITHOUT
683
+ * `Runtime.enable` — patchright's trick.
684
+ *
685
+ * Sends `Runtime.evaluate("globalThis", { serialization: "idOnly" })`
686
+ * against the paused worker session. The returned `RemoteObject.objectId`
687
+ * has the on-the-wire shape `"<runtimeAgentId>.<contextId>.<localId>"`
688
+ * (Chromium >= v131; verified against patchright's parser). We extract
689
+ * `split(".")[1]` and assert it's a positive integer.
690
+ *
691
+ * Throws with a precise diagnostic if Chromium changes the format —
692
+ * silent fallback would mask a real wire-protocol shift, which we want
693
+ * to catch in CI rather than ship as a degraded inject path.
694
+ *
695
+ * @see crServiceWorkerPatch.ts:32-43
696
+ */
697
+ private async extractWorkerExecutionContextId(childSessionId: string): Promise<number> {
698
+ const evalRes = await this.router.send<{ result: { objectId?: string; type?: string } }>(
699
+ "Runtime.evaluate",
700
+ {
701
+ expression: "globalThis",
702
+ // idOnly skips full value serialisation — we want the objectId
703
+ // alone. Supported on Chromium >= v124 (chrome-for-testing v131+
704
+ // in the mochi profile floor).
705
+ serialization: "idOnly",
706
+ // includeCommandLineAPI must remain false (§8.2).
707
+ },
708
+ { sessionId: childSessionId },
709
+ );
710
+ const objectId = evalRes.result.objectId;
711
+ if (typeof objectId !== "string" || objectId.length === 0) {
712
+ throw new Error(
713
+ `[mochi] worker idOnly bootstrap: Runtime.evaluate("globalThis") returned no objectId (got ${JSON.stringify(evalRes.result)})`,
714
+ );
715
+ }
716
+ const parts = objectId.split(".");
717
+ // Format: "<runtimeAgentId>.<contextId>.<localId>" — patchright also
718
+ // pulls index [1]. Refuse to guess if the segment count shifts.
719
+ if (parts.length < 2) {
720
+ throw new Error(
721
+ `[mochi] worker idOnly bootstrap: unexpected objectId shape "${objectId}" (expected dotted segments)`,
722
+ );
723
+ }
724
+ const ctxRaw = parts[1];
725
+ if (ctxRaw === undefined || ctxRaw.length === 0) {
726
+ throw new Error(
727
+ `[mochi] worker idOnly bootstrap: objectId "${objectId}" has empty contextId segment`,
728
+ );
729
+ }
730
+ const contextId = Number.parseInt(ctxRaw, 10);
731
+ if (!Number.isInteger(contextId) || contextId <= 0 || String(contextId) !== ctxRaw) {
732
+ throw new Error(
733
+ `[mochi] worker idOnly bootstrap: contextId segment "${ctxRaw}" of objectId "${objectId}" is not a positive integer`,
734
+ );
735
+ }
736
+ return contextId;
737
+ }
738
+
739
+ /**
740
+ * Snapshot of the worker → executionContextId cache. Test-only.
741
+ *
742
+ * @internal
743
+ */
744
+ _internalWorkerExecutionContextIds(): ReadonlyMap<string, number> {
745
+ return new Map(this.workerExecutionContextIds);
746
+ }
747
+
563
748
  private installCrashGuard(): void {
564
749
  // If Chromium dies unexpectedly, we want to mark the session closed so
565
750
  // pending and future calls reject cleanly.