@mochi.js/core 0.1.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/session.ts CHANGED
@@ -184,6 +184,18 @@ export class Session {
184
184
  */
185
185
  private readonly challengesOpts: SessionInit["challenges"] | undefined;
186
186
  private readonly challengeHandles: ChallengeHandle[] = [];
187
+ /**
188
+ * Cache of resolved execution-context ids for worker-style targets,
189
+ * keyed by the worker session id. Populated by
190
+ * {@link extractWorkerExecutionContextId} on first attach and reused by
191
+ * any later worker CDP op that needs an `executionContextId`. Patchright
192
+ * keeps this on a per-target `CRExecutionContext`; mochi keeps the
193
+ * Session-local map until we grow a real worker-target abstraction.
194
+ *
195
+ * @see crServiceWorkerPatch.ts:32-43, crPagePatch.ts:404-417
196
+ * @internal
197
+ */
198
+ private readonly workerExecutionContextIds = new Map<string, number>();
187
199
 
188
200
  constructor(init: SessionInit) {
189
201
  this.proc = init.proc;
@@ -254,6 +266,73 @@ export class Session {
254
266
  // (only Runtime.enable is forbidden). We enable here so subsequent
255
267
  // addScriptToEvaluateOnNewDocument is honoured by the page domain.
256
268
  await this.router.send("Page.enable", undefined, { sessionId: attached.sessionId });
269
+ // Task 0262: timezone spoof via CDP `Emulation.setTimezoneOverride`.
270
+ //
271
+ // Drives BOTH `Intl.DateTimeFormat().resolvedOptions().timeZone` AND
272
+ // `Date.getTimezoneOffset()` because Chromium's V8 reads from the same
273
+ // internal timezone source. We do NOT manually rewrite
274
+ // `Date.prototype.getTimezoneOffset` in inject — that's detectable via
275
+ // prototype-shape checks. The CDP override is the canonical
276
+ // mechanism.
277
+ //
278
+ // Per the CDP docs (`tot/Emulation/#method-setTimezoneOverride`),
279
+ // this method does NOT require `Emulation.enable` (it stores override
280
+ // state directly on the target's `EmulationAgent`). §8.2's bans are
281
+ // unaffected. Sent per-target before any navigation so the very first
282
+ // document JS already sees the spoofed zone.
283
+ //
284
+ // The empty-string sentinel in the protocol means "clear override";
285
+ // we never send empty here because that would defeat the purpose.
286
+ //
287
+ // Skipped under `bypassInject:true` (PLAN.md §12.1) — capture flows
288
+ // record the bare browser timezone.
289
+ if (!this.bypassInject) {
290
+ await this.router.send(
291
+ "Emulation.setTimezoneOverride",
292
+ { timezoneId: this.profile.timezone },
293
+ { sessionId: attached.sessionId },
294
+ );
295
+ }
296
+ // Task 0255: defensive UA override at the network layer.
297
+ //
298
+ // The inject payload (Page.addScriptToEvaluateOnNewDocument) spoofs
299
+ // `navigator.userAgent` and `navigator.userAgentData` in the JS
300
+ // surface, but `Network.requestWillBeSent` events (and the request
301
+ // line itself) carry the BARE browser UA — which under `--headless=new`
302
+ // still contains the substring "HeadlessChrome" — AND the bare
303
+ // `Sec-CH-UA*` request-header set. The inject can never reach those
304
+ // bytes because they're emitted before any document script runs.
305
+ //
306
+ // 0255 plumbed `userAgent`. 0261 closes the cross-layer leak that left
307
+ // open: without `userAgentMetadata`, the request `Sec-CH-UA*` headers
308
+ // carry CfT defaults instead of the matrix, so a fingerprinter doing
309
+ // `getHighEntropyValues()` and comparing against the request headers
310
+ // sees a mismatch (direct PLAN.md I-5 violation). The metadata struct
311
+ // is the CDP-canonical UA-CH descriptor; Chromium derives every
312
+ // `Sec-CH-UA*` header from it. Both surfaces (this network call and
313
+ // the inject's `client-hints.ts` getHighEntropyValues) read the SAME
314
+ // matrix fields, so they cannot drift.
315
+ //
316
+ // `Network.setUserAgentOverride` is a per-target setter that does NOT
317
+ // require `Network.enable` (it only stores override state); §8.2's ban
318
+ // on `Network.enable` is therefore unaffected, with or without the
319
+ // metadata payload. Sent immediately after attach and before any
320
+ // navigation so the very first request the page issues already carries
321
+ // the matrix UA + UA-CH headers.
322
+ //
323
+ // Skipped under `bypassInject:true` (PLAN.md §12.1) — capture flows must
324
+ // record the bare browser fingerprint, including its raw UA AND raw
325
+ // `Sec-CH-UA*` headers.
326
+ if (!this.bypassInject) {
327
+ await this.router.send(
328
+ "Network.setUserAgentOverride",
329
+ {
330
+ userAgent: this.profile.userAgent,
331
+ userAgentMetadata: buildUserAgentMetadata(this.profile),
332
+ },
333
+ { sessionId: attached.sessionId },
334
+ );
335
+ }
257
336
  // PLAN.md §12.1 / task 0040 — capture flow short-circuits inject so the
258
337
  // browser reports its bare fingerprint. Otherwise install the payload
259
338
  // main-world via §8.4. worldName MUST be the empty string.
@@ -557,18 +636,35 @@ export class Session {
557
636
 
558
637
  /**
559
638
  * Inject the payload into a freshly-attached target if it's a worker-
560
- * style target (dedicated worker, shared worker, service worker, audio
561
- * worklet, etc.), then resume it.
639
+ * style target (dedicated worker, shared worker, audio worklet — service
640
+ * workers go through the same path; see notes below), then resume it.
562
641
  *
563
642
  * Worker targets do NOT support `Page.addScriptToEvaluateOnNewDocument`
564
- * (no Page domain). PLAN.md §8.4 calls out that we use `Runtime.evaluate`
565
- * against the paused worker session before issuing
566
- * `Runtime.runIfWaitingForDebugger`. The §8.2 forbidden-method assertion
567
- * does NOT trip because we never send `Runtime.enable` — only
568
- * `Runtime.evaluate` against an already-paused worker target.
643
+ * (no Page domain). PLAN.md §8.4 calls out that the worker target accepts
644
+ * `Runtime.evaluate` even though `Runtime.enable` is forbidden by §8.2.
645
+ *
646
+ * The Patchright-cited bootstrap (task 0254 `crServiceWorkerPatch.ts:32-43`,
647
+ * `crPagePatch.ts:404-417`) tightens the inject race window:
648
+ * 1. `Runtime.evaluate("globalThis", { serialization: "idOnly" })` —
649
+ * returns a `RemoteObject` whose `objectId` carries the worker's
650
+ * execution-context id. `serialization: "idOnly"` skips the value
651
+ * preview round-trip we don't need.
652
+ * 2. Parse `objectId.split(".")[1]` for the contextId. The wire format
653
+ * is `"<runtimeAgentId>.<contextId>.<remoteObjectId>"`; we validate
654
+ * the split and fail loudly if Chromium has moved the goalposts.
655
+ * 3. Inject the payload via `Runtime.callFunctionOn({ functionDeclaration,
656
+ * executionContextId, returnByValue: true })`. This binds the call
657
+ * to the worker's own context rather than relying on
658
+ * `Runtime.evaluate`'s implicit context resolution, which is the
659
+ * coarser pattern v0.1.x used.
660
+ * 4. `Runtime.runIfWaitingForDebugger` to resume the target.
569
661
  *
570
- * Caveat: worker injection has a smaller stealth ceiling than main-
571
- * world Page injection. Documented in `docs/limits.md`.
662
+ * We never send `Runtime.enable` that's the whole point of extracting
663
+ * the contextId via the idOnly trick instead of waiting for an
664
+ * `Runtime.executionContextCreated` event.
665
+ *
666
+ * Caveat: worker injection has a smaller stealth ceiling than main-world
667
+ * Page injection. Documented in `docs/limits.md`.
572
668
  */
573
669
  private async handleAttachedTarget(
574
670
  ev: AttachedToTargetEvent,
@@ -584,12 +680,20 @@ export class Session {
584
680
  // PLAN.md §12.1 / task 0040 — capture flow skips worker injection too.
585
681
  if (isWorkerLike && !this.bypassInject && this._payload !== null) {
586
682
  try {
683
+ const executionContextId = await this.extractWorkerExecutionContextId(childSessionId);
684
+ this.workerExecutionContextIds.set(childSessionId, executionContextId);
685
+ // `Runtime.callFunctionOn` requires either an `objectId` OR an
686
+ // `executionContextId`. We use the latter — patchright's pattern —
687
+ // so the call binds to the worker's own context, not whatever
688
+ // `Runtime.evaluate` happens to resolve. The payload IIFE is wrapped
689
+ // as a function declaration so `callFunctionOn` accepts it.
587
690
  await this.router.send(
588
- "Runtime.evaluate",
691
+ "Runtime.callFunctionOn",
589
692
  {
590
- expression: this._payload.code,
693
+ functionDeclaration: `function() { ${this._payload.code} }`,
694
+ executionContextId,
695
+ returnByValue: true,
591
696
  awaitPromise: false,
592
- returnByValue: false,
593
697
  // includeCommandLineAPI must remain false (§8.2).
594
698
  },
595
699
  { sessionId: childSessionId },
@@ -617,6 +721,73 @@ export class Session {
617
721
  }
618
722
  }
619
723
 
724
+ /**
725
+ * Resolve the worker target's execution-context id WITHOUT
726
+ * `Runtime.enable` — patchright's trick.
727
+ *
728
+ * Sends `Runtime.evaluate("globalThis", { serialization: "idOnly" })`
729
+ * against the paused worker session. The returned `RemoteObject.objectId`
730
+ * has the on-the-wire shape `"<runtimeAgentId>.<contextId>.<localId>"`
731
+ * (Chromium >= v131; verified against patchright's parser). We extract
732
+ * `split(".")[1]` and assert it's a positive integer.
733
+ *
734
+ * Throws with a precise diagnostic if Chromium changes the format —
735
+ * silent fallback would mask a real wire-protocol shift, which we want
736
+ * to catch in CI rather than ship as a degraded inject path.
737
+ *
738
+ * @see crServiceWorkerPatch.ts:32-43
739
+ */
740
+ private async extractWorkerExecutionContextId(childSessionId: string): Promise<number> {
741
+ const evalRes = await this.router.send<{ result: { objectId?: string; type?: string } }>(
742
+ "Runtime.evaluate",
743
+ {
744
+ expression: "globalThis",
745
+ // idOnly skips full value serialisation — we want the objectId
746
+ // alone. Supported on Chromium >= v124 (chrome-for-testing v131+
747
+ // in the mochi profile floor).
748
+ serialization: "idOnly",
749
+ // includeCommandLineAPI must remain false (§8.2).
750
+ },
751
+ { sessionId: childSessionId },
752
+ );
753
+ const objectId = evalRes.result.objectId;
754
+ if (typeof objectId !== "string" || objectId.length === 0) {
755
+ throw new Error(
756
+ `[mochi] worker idOnly bootstrap: Runtime.evaluate("globalThis") returned no objectId (got ${JSON.stringify(evalRes.result)})`,
757
+ );
758
+ }
759
+ const parts = objectId.split(".");
760
+ // Format: "<runtimeAgentId>.<contextId>.<localId>" — patchright also
761
+ // pulls index [1]. Refuse to guess if the segment count shifts.
762
+ if (parts.length < 2) {
763
+ throw new Error(
764
+ `[mochi] worker idOnly bootstrap: unexpected objectId shape "${objectId}" (expected dotted segments)`,
765
+ );
766
+ }
767
+ const ctxRaw = parts[1];
768
+ if (ctxRaw === undefined || ctxRaw.length === 0) {
769
+ throw new Error(
770
+ `[mochi] worker idOnly bootstrap: objectId "${objectId}" has empty contextId segment`,
771
+ );
772
+ }
773
+ const contextId = Number.parseInt(ctxRaw, 10);
774
+ if (!Number.isInteger(contextId) || contextId <= 0 || String(contextId) !== ctxRaw) {
775
+ throw new Error(
776
+ `[mochi] worker idOnly bootstrap: contextId segment "${ctxRaw}" of objectId "${objectId}" is not a positive integer`,
777
+ );
778
+ }
779
+ return contextId;
780
+ }
781
+
782
+ /**
783
+ * Snapshot of the worker → executionContextId cache. Test-only.
784
+ *
785
+ * @internal
786
+ */
787
+ _internalWorkerExecutionContextIds(): ReadonlyMap<string, number> {
788
+ return new Map(this.workerExecutionContextIds);
789
+ }
790
+
620
791
  private installCrashGuard(): void {
621
792
  // If Chromium dies unexpectedly, we want to mark the session closed so
622
793
  // pending and future calls reject cleanly.
@@ -636,3 +807,178 @@ export class Session {
636
807
  }
637
808
  }
638
809
  }
810
+
811
+ // ---- UA-CH metadata helpers (task 0261) -------------------------------------
812
+
813
+ /**
814
+ * Single brand entry as accepted by `Network.setUserAgentOverride`'s
815
+ * `userAgentMetadata.brands` / `fullVersionList`.
816
+ *
817
+ * @internal
818
+ */
819
+ interface UaMetadataBrand {
820
+ brand: string;
821
+ version: string;
822
+ }
823
+
824
+ /**
825
+ * Strip surrounding ASCII double-quotes (the on-the-wire form for several
826
+ * `Sec-CH-UA*` headers — `'"macOS"'`, `'"14.0.0"'`, `'"arm"'`, `'"64"'`).
827
+ * The CDP `userAgentMetadata` enums consume the unquoted form.
828
+ */
829
+ function unquoteUaCh(s: string): string {
830
+ if (s.length >= 2 && s.startsWith('"') && s.endsWith('"')) {
831
+ return s.slice(1, -1);
832
+ }
833
+ return s;
834
+ }
835
+
836
+ /**
837
+ * Parse a Sec-CH-UA-style header value
838
+ * (`'"Brand A";v="123", "Not.A/Brand";v="8", "Brand B";v="456"'`) into the
839
+ * `[{brand, version}, ...]` shape `userAgentMetadata.brands` expects.
840
+ *
841
+ * Hand-written state machine — Sec-CH-UA is RFC 8941 Structured Headers
842
+ * with quoted strings, so a regex split on `,` would break on
843
+ * `"Brand,with,commas"`. Mirrors `parseSecChUa` in
844
+ * `@mochi.js/inject/src/modules/client-hints.ts` byte-for-byte: same
845
+ * source field (`matrix.uaCh["sec-ch-ua"]`), same output shape, so the
846
+ * network surface and the JS surface cannot drift.
847
+ *
848
+ * @internal
849
+ */
850
+ function parseSecChUaBrandList(s: string): UaMetadataBrand[] {
851
+ const out: UaMetadataBrand[] = [];
852
+ // Split on `,` outside quoted segments. `depth` toggles inside `"…"`.
853
+ const parts: string[] = [];
854
+ let depth = 0;
855
+ let cur = "";
856
+ for (let i = 0; i < s.length; i++) {
857
+ const c = s[i] as string;
858
+ if (c === '"') {
859
+ depth = depth === 0 ? 1 : 0;
860
+ cur += c;
861
+ } else if (c === "," && depth === 0) {
862
+ parts.push(cur);
863
+ cur = "";
864
+ } else {
865
+ cur += c;
866
+ }
867
+ }
868
+ if (cur.length > 0) parts.push(cur);
869
+ for (const raw of parts) {
870
+ const piece = raw.trim();
871
+ if (piece.length === 0) continue;
872
+ const semi = piece.indexOf(";");
873
+ if (semi === -1) {
874
+ out.push({ brand: unquoteUaCh(piece), version: "" });
875
+ continue;
876
+ }
877
+ const brandPart = piece.slice(0, semi).trim();
878
+ const rest = piece.slice(semi + 1).trim();
879
+ let version = "";
880
+ if (rest.startsWith("v=")) {
881
+ version = unquoteUaCh(rest.slice(2).trim());
882
+ }
883
+ out.push({ brand: unquoteUaCh(brandPart), version });
884
+ }
885
+ return out;
886
+ }
887
+
888
+ /**
889
+ * Parse the JSON-encoded `uaCh.ua-full-version-list` (R-031) into the
890
+ * `[{brand, version}]` shape. Falls through to the brand-list parser if
891
+ * the matrix doesn't carry the field — every shipped profile does, so
892
+ * the fallback is purely defensive.
893
+ *
894
+ * @internal
895
+ */
896
+ function parseFullVersionList(matrix: MatrixV1): UaMetadataBrand[] {
897
+ const raw = matrix.uaCh["ua-full-version-list"];
898
+ if (typeof raw === "string" && raw.length > 0) {
899
+ try {
900
+ const parsed = JSON.parse(raw) as unknown;
901
+ if (Array.isArray(parsed)) {
902
+ return parsed
903
+ .filter(
904
+ (e): e is UaMetadataBrand =>
905
+ typeof e === "object" &&
906
+ e !== null &&
907
+ typeof (e as { brand?: unknown }).brand === "string" &&
908
+ typeof (e as { version?: unknown }).version === "string",
909
+ )
910
+ .map((e) => ({ brand: e.brand, version: e.version }));
911
+ }
912
+ } catch {
913
+ // Fall through.
914
+ }
915
+ }
916
+ // Fallback: reuse the brand-list majors. Matches the inject side's same
917
+ // fallback in client-hints.ts.
918
+ const secChUa = matrix.uaCh["sec-ch-ua"] ?? "";
919
+ return parseSecChUaBrandList(secChUa);
920
+ }
921
+
922
+ /**
923
+ * Build the `userAgentMetadata` parameter for `Network.setUserAgentOverride`
924
+ * from a derived MatrixV1. Single source of truth = the matrix; the inject
925
+ * `client-hints.ts` module reads the same fields, so the JS-API surface
926
+ * (`navigator.userAgentData.getHighEntropyValues`) and the request-header
927
+ * surface (`Sec-CH-UA*`) cannot drift.
928
+ *
929
+ * Field shape per CDP spec:
930
+ * - `brands` — `[{brand, version}]`, brand-list majors.
931
+ * - `fullVersionList` — `[{brand, version}]`, tip-locked full versions.
932
+ * - `fullVersion` — string, branded entry's version (R-046).
933
+ * - `platform` — unquoted Sec-CH-UA-Platform value.
934
+ * - `platformVersion` — unquoted Sec-CH-UA-Platform-Version.
935
+ * - `architecture` — `"arm" | "x86" | ""` (R-042 unquoted).
936
+ * - `model` — free-form string, empty for desktop (R-045).
937
+ * - `mobile` — boolean (R-044 → `?1` mapped to true).
938
+ * - `bitness` — STRING `"64" | "32" | ""` (R-043 unquoted),
939
+ * never numeric.
940
+ * - `wow64` — boolean; matrix doesn't model nested-WOW64,
941
+ * we always emit false (task 0261 out-of-scope).
942
+ *
943
+ * @internal
944
+ */
945
+ export function buildUserAgentMetadata(matrix: MatrixV1): {
946
+ brands: UaMetadataBrand[];
947
+ fullVersionList: UaMetadataBrand[];
948
+ fullVersion: string;
949
+ platform: string;
950
+ platformVersion: string;
951
+ architecture: string;
952
+ model: string;
953
+ mobile: boolean;
954
+ bitness: string;
955
+ wow64: boolean;
956
+ } {
957
+ const ua = matrix.uaCh;
958
+ const brandsRaw = ua["sec-ch-ua"] ?? "";
959
+ const brands = parseSecChUaBrandList(brandsRaw);
960
+ const fullVersionList = parseFullVersionList(matrix);
961
+ const fullVersion =
962
+ typeof ua["ua-full-version"] === "string" && ua["ua-full-version"].length > 0
963
+ ? ua["ua-full-version"]
964
+ : (fullVersionList[0]?.version ?? "");
965
+ const platform = unquoteUaCh(ua["sec-ch-ua-platform"] ?? "");
966
+ const platformVersion = unquoteUaCh(ua["sec-ch-ua-platform-version"] ?? "");
967
+ const architecture = unquoteUaCh(ua["sec-ch-ua-arch"] ?? "");
968
+ const bitness = unquoteUaCh(ua["sec-ch-ua-bitness"] ?? "");
969
+ const model = unquoteUaCh(ua["sec-ch-ua-model"] ?? "");
970
+ // Sec-CH-UA-Mobile wire form is "?0" / "?1" (Structured-Headers boolean).
971
+ const mobile = ua["sec-ch-ua-mobile"] === "?1";
972
+ return {
973
+ brands,
974
+ fullVersionList,
975
+ fullVersion,
976
+ platform,
977
+ platformVersion,
978
+ architecture,
979
+ model,
980
+ mobile,
981
+ bitness,
982
+ wow64: false,
983
+ };
984
+ }