@mochi.js/core 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/launch.ts CHANGED
@@ -12,6 +12,8 @@
12
12
 
13
13
  import { deriveMatrix, type ProfileV1 } from "@mochi.js/consistency";
14
14
  import { resolveBinary } from "./binary";
15
+ import { type GeoConsistencyMode, reconcileGeoConsistency } from "./geo-consistency";
16
+ import { probeExitGeo } from "./geo-probe";
15
17
  import { spawnChromium } from "./proc";
16
18
  import { parseProxyUrl } from "./proxy-auth";
17
19
  import { Session } from "./session";
@@ -151,6 +153,35 @@ export interface LaunchOptions {
151
153
  * solving is v0.3+).
152
154
  */
153
155
  challenges?: ChallengeLaunchOptions;
156
+ /**
157
+ * Reconcile `(matrix.timezone, matrix.locale)` against the proxy's
158
+ * exit-IP geolocation. Closes the cross-layer leak where a US profile
159
+ * over an EU proxy would have `Date.getTimezoneOffset()` reporting PT
160
+ * while the IP geolocates to UTC+1 — the canonical bot signature.
161
+ *
162
+ * - `"privacy-fallback"` *(default)* — on mismatch (or probe failure),
163
+ * override the matrix to UTC + `en-US`. The session fingerprints as
164
+ * a privacy-conscious user (Tor / Brave / hardened-FF style), which
165
+ * is benign in most threat models.
166
+ * - `"auto-correct"` — on mismatch, override the matrix's timezone
167
+ * with the IP's timezone and the locale with a primary-locale
168
+ * guess for the IP's country. Most "stealth" but trusts mochi's
169
+ * IP-derived defaults over the user's declared profile.
170
+ * - `"strict"` — throw `GeoMismatchError` on mismatch. The user must
171
+ * change profile or change proxy. Probe failure (null) does NOT
172
+ * throw under strict — that's a network blip, not a mismatch.
173
+ * - `"off"` — skip the probe entirely. Use in offline tests / when
174
+ * the probe service is rate-limited.
175
+ *
176
+ * The probe is a single GET through wreq (using the matrix's
177
+ * `wreqPreset`, so the geo service sees the same JA4/headers as user
178
+ * traffic). 4-attempt cap, 2s per endpoint. Probe results are NOT
179
+ * cached across sessions — proxy IPs rotate.
180
+ *
181
+ * @see PLAN.md §9 (relational consistency, IP/TZ/Locale axis)
182
+ * @see tasks/0262-ip-tz-locale-exit-consistency.md
183
+ */
184
+ geoConsistency?: GeoConsistencyMode;
154
185
  }
155
186
 
156
187
  /**
@@ -175,6 +206,37 @@ export async function launch(opts: LaunchOptions): Promise<Session> {
175
206
  const profile = resolveProfile(opts.profile);
176
207
  const matrix = deriveMatrix(profile, opts.seed);
177
208
 
209
+ // Task 0262 — exit-IP / TZ / locale reconciliation.
210
+ //
211
+ // Probe the apparent exit IP through the configured proxy (using wreq
212
+ // with the matrix's `wreqPreset` so the geo service sees the same JA4
213
+ // / headers as user traffic). Then cross-reference against
214
+ // `(matrix.timezone, matrix.locale)` and apply `geoConsistency`. The
215
+ // adjusted matrix flows into BOTH `spawnChromium` (so `--lang` reflects
216
+ // any override) AND `Session` (so inject + the CDP `Emulation.set
217
+ // TimezoneOverride` send pick it up). PLAN.md §9.
218
+ //
219
+ // `"off"` short-circuits the probe — the probe call itself respects
220
+ // the mode so we don't pay the network round-trip in offline tests.
221
+ const geoMode: GeoConsistencyMode = opts.geoConsistency ?? "privacy-fallback";
222
+ let adjustedMatrix = matrix;
223
+ if (geoMode !== "off") {
224
+ const geo = await probeExitGeo({
225
+ ...(normalized?.netProxy !== undefined ? { proxy: normalized.netProxy } : {}),
226
+ matrix,
227
+ });
228
+ // Strict mode throws GeoMismatchError on real mismatch; let it
229
+ // propagate up so callers can recover (the orchestrator surfaced
230
+ // it as the canonical failure mode for "wrong proxy for profile").
231
+ const result = reconcileGeoConsistency(matrix, geo, geoMode);
232
+ adjustedMatrix = result.matrix;
233
+ if (result.action === "privacy-fallback" || result.action === "auto-correct") {
234
+ console.warn(
235
+ `[mochi] geoConsistency=${geoMode}: ${result.action} applied — ${result.reason ?? "(no reason)"}`,
236
+ );
237
+ }
238
+ }
239
+
178
240
  const proc = await spawnChromium({
179
241
  binary,
180
242
  extraArgs: opts.args,
@@ -191,17 +253,26 @@ export async function launch(opts: LaunchOptions): Promise<Session> {
191
253
  // inject layer's `navigator.languages` spoof; Chromium derives the
192
254
  // q-weighted `Accept-Language` value from the single `--lang` primary
193
255
  // automatically. Task 0251.
194
- locale: matrix.locale,
256
+ locale: adjustedMatrix.locale,
195
257
  // Pin OS-level outer window from the matrix's display geometry so
196
258
  // `window.outerWidth/outerHeight` (which reads from the OS window,
197
259
  // NOT the JS-spoofed `screen.*`) matches the spoof. Closes the
198
260
  // `fingerprint-scan.com` 800×600 leak under `--headless=new`.
199
261
  // UDC fixes the same issue at `__init__.py:410-411`. Task 0252.
200
- ...(Number.isInteger(matrix.display.width) &&
201
- Number.isInteger(matrix.display.height) &&
202
- matrix.display.width > 0 &&
203
- matrix.display.height > 0
204
- ? { windowSize: { width: matrix.display.width, height: matrix.display.height } }
262
+ //
263
+ // (`adjustedMatrix.display` === `matrix.display` since geo reconcile
264
+ // only touches timezone/locale/languages — but we use the adjusted
265
+ // ref for forward-compat.)
266
+ ...(Number.isInteger(adjustedMatrix.display.width) &&
267
+ Number.isInteger(adjustedMatrix.display.height) &&
268
+ adjustedMatrix.display.width > 0 &&
269
+ adjustedMatrix.display.height > 0
270
+ ? {
271
+ windowSize: {
272
+ width: adjustedMatrix.display.width,
273
+ height: adjustedMatrix.display.height,
274
+ },
275
+ }
205
276
  : {}),
206
277
  // Hermetic harness/CI escape hatch — re-applies the patchright-trim
207
278
  // flags (`--disable-component-update`, `--disable-default-apps`,
@@ -213,7 +284,7 @@ export async function launch(opts: LaunchOptions): Promise<Session> {
213
284
 
214
285
  const session = new Session({
215
286
  proc,
216
- matrix,
287
+ matrix: adjustedMatrix,
217
288
  seed: opts.seed,
218
289
  ...(opts.timeout !== undefined ? { defaultTimeoutMs: opts.timeout } : {}),
219
290
  ...(opts.bypassInject === true ? { bypassInject: true } : {}),
package/src/page.ts CHANGED
@@ -311,6 +311,14 @@ export class Page {
311
311
  * document (so `this` === document). Result is JSON-serialized via
312
312
  * `returnByValue: true`.
313
313
  *
314
+ * The function may return a value or a `Promise`. Promise-returning
315
+ * functions are awaited page-side via `awaitPromise: true` (CDP's canonical
316
+ * mechanism for async eval) — without that flag, an `async () => ...`
317
+ * function round-trips its returned Promise as `undefined` because CDP
318
+ * serializes the Promise object itself, not its resolution. `awaitPromise`
319
+ * is NOT on PLAN.md §8.2's forbidden list — only `Runtime.enable` and
320
+ * `Page.createIsolatedWorld` are. Available since Chromium 67.
321
+ *
314
322
  * Limitations (documented in docs/limits.md):
315
323
  * - Non-JSON return values (functions, DOM nodes, undefined) are
316
324
  * coerced/dropped per CDP semantics.
@@ -319,13 +327,14 @@ export class Page {
319
327
  * standard for any cross-process evaluator).
320
328
  * - Arguments cannot be passed in v0.1; the function takes no args.
321
329
  */
322
- async evaluate<T>(fn: () => T): Promise<T> {
330
+ async evaluate<T>(fn: () => T | Promise<T>): Promise<T> {
323
331
  this.assertOpen();
324
332
  const docId = await this.documentObjectId();
325
333
  const result = await this.send<{ result: RemoteObject }>("Runtime.callFunctionOn", {
326
334
  objectId: docId,
327
335
  functionDeclaration: fn.toString(),
328
336
  returnByValue: true,
337
+ awaitPromise: true,
329
338
  });
330
339
  return result.result.value as T;
331
340
  }
package/src/session.ts CHANGED
@@ -266,27 +266,70 @@ export class Session {
266
266
  // (only Runtime.enable is forbidden). We enable here so subsequent
267
267
  // addScriptToEvaluateOnNewDocument is honoured by the page domain.
268
268
  await this.router.send("Page.enable", undefined, { sessionId: attached.sessionId });
269
+ // Task 0262: timezone spoof via CDP `Emulation.setTimezoneOverride`.
270
+ //
271
+ // Drives BOTH `Intl.DateTimeFormat().resolvedOptions().timeZone` AND
272
+ // `Date.getTimezoneOffset()` because Chromium's V8 reads from the same
273
+ // internal timezone source. We do NOT manually rewrite
274
+ // `Date.prototype.getTimezoneOffset` in inject — that's detectable via
275
+ // prototype-shape checks. The CDP override is the canonical
276
+ // mechanism.
277
+ //
278
+ // Per the CDP docs (`tot/Emulation/#method-setTimezoneOverride`),
279
+ // this method does NOT require `Emulation.enable` (it stores override
280
+ // state directly on the target's `EmulationAgent`). §8.2's bans are
281
+ // unaffected. Sent per-target before any navigation so the very first
282
+ // document JS already sees the spoofed zone.
283
+ //
284
+ // The empty-string sentinel in the protocol means "clear override";
285
+ // we never send empty here because that would defeat the purpose.
286
+ //
287
+ // Skipped under `bypassInject:true` (PLAN.md §12.1) — capture flows
288
+ // record the bare browser timezone.
289
+ if (!this.bypassInject) {
290
+ await this.router.send(
291
+ "Emulation.setTimezoneOverride",
292
+ { timezoneId: this.profile.timezone },
293
+ { sessionId: attached.sessionId },
294
+ );
295
+ }
269
296
  // Task 0255: defensive UA override at the network layer.
270
297
  //
271
298
  // The inject payload (Page.addScriptToEvaluateOnNewDocument) spoofs
272
- // `navigator.userAgent` in the JS surface, but `Network.requestWillBeSent`
273
- // events (and the request line itself) carry the BARE browser UA — which
274
- // under `--headless=new` still contains the substring "HeadlessChrome".
275
- // The inject can never reach those bytes because they're emitted before
276
- // any document script runs.
299
+ // `navigator.userAgent` and `navigator.userAgentData` in the JS
300
+ // surface, but `Network.requestWillBeSent` events (and the request
301
+ // line itself) carry the BARE browser UA — which under `--headless=new`
302
+ // still contains the substring "HeadlessChrome" AND the bare
303
+ // `Sec-CH-UA*` request-header set. The inject can never reach those
304
+ // bytes because they're emitted before any document script runs.
305
+ //
306
+ // 0255 plumbed `userAgent`. 0261 closes the cross-layer leak that left
307
+ // open: without `userAgentMetadata`, the request `Sec-CH-UA*` headers
308
+ // carry CfT defaults instead of the matrix, so a fingerprinter doing
309
+ // `getHighEntropyValues()` and comparing against the request headers
310
+ // sees a mismatch (direct PLAN.md I-5 violation). The metadata struct
311
+ // is the CDP-canonical UA-CH descriptor; Chromium derives every
312
+ // `Sec-CH-UA*` header from it. Both surfaces (this network call and
313
+ // the inject's `client-hints.ts` getHighEntropyValues) read the SAME
314
+ // matrix fields, so they cannot drift.
277
315
  //
278
316
  // `Network.setUserAgentOverride` is a per-target setter that does NOT
279
317
  // require `Network.enable` (it only stores override state); §8.2's ban
280
- // on `Network.enable` is therefore unaffected. Sent immediately after
281
- // attach and before any navigation so the very first request the page
282
- // issues already carries the matrix UA.
318
+ // on `Network.enable` is therefore unaffected, with or without the
319
+ // metadata payload. Sent immediately after attach and before any
320
+ // navigation so the very first request the page issues already carries
321
+ // the matrix UA + UA-CH headers.
283
322
  //
284
323
  // Skipped under `bypassInject:true` (PLAN.md §12.1) — capture flows must
285
- // record the bare browser fingerprint, including its raw UA.
324
+ // record the bare browser fingerprint, including its raw UA AND raw
325
+ // `Sec-CH-UA*` headers.
286
326
  if (!this.bypassInject) {
287
327
  await this.router.send(
288
328
  "Network.setUserAgentOverride",
289
- { userAgent: this.profile.userAgent },
329
+ {
330
+ userAgent: this.profile.userAgent,
331
+ userAgentMetadata: buildUserAgentMetadata(this.profile),
332
+ },
290
333
  { sessionId: attached.sessionId },
291
334
  );
292
335
  }
@@ -764,3 +807,178 @@ export class Session {
764
807
  }
765
808
  }
766
809
  }
810
+
811
+ // ---- UA-CH metadata helpers (task 0261) -------------------------------------
812
+
813
+ /**
814
+ * Single brand entry as accepted by `Network.setUserAgentOverride`'s
815
+ * `userAgentMetadata.brands` / `fullVersionList`.
816
+ *
817
+ * @internal
818
+ */
819
+ interface UaMetadataBrand {
820
+ brand: string;
821
+ version: string;
822
+ }
823
+
824
+ /**
825
+ * Strip surrounding ASCII double-quotes (the on-the-wire form for several
826
+ * `Sec-CH-UA*` headers — `'"macOS"'`, `'"14.0.0"'`, `'"arm"'`, `'"64"'`).
827
+ * The CDP `userAgentMetadata` enums consume the unquoted form.
828
+ */
829
+ function unquoteUaCh(s: string): string {
830
+ if (s.length >= 2 && s.startsWith('"') && s.endsWith('"')) {
831
+ return s.slice(1, -1);
832
+ }
833
+ return s;
834
+ }
835
+
836
+ /**
837
+ * Parse a Sec-CH-UA-style header value
838
+ * (`'"Brand A";v="123", "Not.A/Brand";v="8", "Brand B";v="456"'`) into the
839
+ * `[{brand, version}, ...]` shape `userAgentMetadata.brands` expects.
840
+ *
841
+ * Hand-written state machine — Sec-CH-UA is RFC 8941 Structured Headers
842
+ * with quoted strings, so a regex split on `,` would break on
843
+ * `"Brand,with,commas"`. Mirrors `parseSecChUa` in
844
+ * `@mochi.js/inject/src/modules/client-hints.ts` byte-for-byte: same
845
+ * source field (`matrix.uaCh["sec-ch-ua"]`), same output shape, so the
846
+ * network surface and the JS surface cannot drift.
847
+ *
848
+ * @internal
849
+ */
850
+ function parseSecChUaBrandList(s: string): UaMetadataBrand[] {
851
+ const out: UaMetadataBrand[] = [];
852
+ // Split on `,` outside quoted segments. `depth` toggles inside `"…"`.
853
+ const parts: string[] = [];
854
+ let depth = 0;
855
+ let cur = "";
856
+ for (let i = 0; i < s.length; i++) {
857
+ const c = s[i] as string;
858
+ if (c === '"') {
859
+ depth = depth === 0 ? 1 : 0;
860
+ cur += c;
861
+ } else if (c === "," && depth === 0) {
862
+ parts.push(cur);
863
+ cur = "";
864
+ } else {
865
+ cur += c;
866
+ }
867
+ }
868
+ if (cur.length > 0) parts.push(cur);
869
+ for (const raw of parts) {
870
+ const piece = raw.trim();
871
+ if (piece.length === 0) continue;
872
+ const semi = piece.indexOf(";");
873
+ if (semi === -1) {
874
+ out.push({ brand: unquoteUaCh(piece), version: "" });
875
+ continue;
876
+ }
877
+ const brandPart = piece.slice(0, semi).trim();
878
+ const rest = piece.slice(semi + 1).trim();
879
+ let version = "";
880
+ if (rest.startsWith("v=")) {
881
+ version = unquoteUaCh(rest.slice(2).trim());
882
+ }
883
+ out.push({ brand: unquoteUaCh(brandPart), version });
884
+ }
885
+ return out;
886
+ }
887
+
888
+ /**
889
+ * Parse the JSON-encoded `uaCh.ua-full-version-list` (R-031) into the
890
+ * `[{brand, version}]` shape. Falls through to the brand-list parser if
891
+ * the matrix doesn't carry the field — every shipped profile does, so
892
+ * the fallback is purely defensive.
893
+ *
894
+ * @internal
895
+ */
896
+ function parseFullVersionList(matrix: MatrixV1): UaMetadataBrand[] {
897
+ const raw = matrix.uaCh["ua-full-version-list"];
898
+ if (typeof raw === "string" && raw.length > 0) {
899
+ try {
900
+ const parsed = JSON.parse(raw) as unknown;
901
+ if (Array.isArray(parsed)) {
902
+ return parsed
903
+ .filter(
904
+ (e): e is UaMetadataBrand =>
905
+ typeof e === "object" &&
906
+ e !== null &&
907
+ typeof (e as { brand?: unknown }).brand === "string" &&
908
+ typeof (e as { version?: unknown }).version === "string",
909
+ )
910
+ .map((e) => ({ brand: e.brand, version: e.version }));
911
+ }
912
+ } catch {
913
+ // Fall through.
914
+ }
915
+ }
916
+ // Fallback: reuse the brand-list majors. Matches the inject side's same
917
+ // fallback in client-hints.ts.
918
+ const secChUa = matrix.uaCh["sec-ch-ua"] ?? "";
919
+ return parseSecChUaBrandList(secChUa);
920
+ }
921
+
922
+ /**
923
+ * Build the `userAgentMetadata` parameter for `Network.setUserAgentOverride`
924
+ * from a derived MatrixV1. Single source of truth = the matrix; the inject
925
+ * `client-hints.ts` module reads the same fields, so the JS-API surface
926
+ * (`navigator.userAgentData.getHighEntropyValues`) and the request-header
927
+ * surface (`Sec-CH-UA*`) cannot drift.
928
+ *
929
+ * Field shape per CDP spec:
930
+ * - `brands` — `[{brand, version}]`, brand-list majors.
931
+ * - `fullVersionList` — `[{brand, version}]`, tip-locked full versions.
932
+ * - `fullVersion` — string, branded entry's version (R-046).
933
+ * - `platform` — unquoted Sec-CH-UA-Platform value.
934
+ * - `platformVersion` — unquoted Sec-CH-UA-Platform-Version.
935
+ * - `architecture` — `"arm" | "x86" | ""` (R-042 unquoted).
936
+ * - `model` — free-form string, empty for desktop (R-045).
937
+ * - `mobile` — boolean (R-044 → `?1` mapped to true).
938
+ * - `bitness` — STRING `"64" | "32" | ""` (R-043 unquoted),
939
+ * never numeric.
940
+ * - `wow64` — boolean; matrix doesn't model nested-WOW64,
941
+ * we always emit false (task 0261 out-of-scope).
942
+ *
943
+ * @internal
944
+ */
945
+ export function buildUserAgentMetadata(matrix: MatrixV1): {
946
+ brands: UaMetadataBrand[];
947
+ fullVersionList: UaMetadataBrand[];
948
+ fullVersion: string;
949
+ platform: string;
950
+ platformVersion: string;
951
+ architecture: string;
952
+ model: string;
953
+ mobile: boolean;
954
+ bitness: string;
955
+ wow64: boolean;
956
+ } {
957
+ const ua = matrix.uaCh;
958
+ const brandsRaw = ua["sec-ch-ua"] ?? "";
959
+ const brands = parseSecChUaBrandList(brandsRaw);
960
+ const fullVersionList = parseFullVersionList(matrix);
961
+ const fullVersion =
962
+ typeof ua["ua-full-version"] === "string" && ua["ua-full-version"].length > 0
963
+ ? ua["ua-full-version"]
964
+ : (fullVersionList[0]?.version ?? "");
965
+ const platform = unquoteUaCh(ua["sec-ch-ua-platform"] ?? "");
966
+ const platformVersion = unquoteUaCh(ua["sec-ch-ua-platform-version"] ?? "");
967
+ const architecture = unquoteUaCh(ua["sec-ch-ua-arch"] ?? "");
968
+ const bitness = unquoteUaCh(ua["sec-ch-ua-bitness"] ?? "");
969
+ const model = unquoteUaCh(ua["sec-ch-ua-model"] ?? "");
970
+ // Sec-CH-UA-Mobile wire form is "?0" / "?1" (Structured-Headers boolean).
971
+ const mobile = ua["sec-ch-ua-mobile"] === "?1";
972
+ return {
973
+ brands,
974
+ fullVersionList,
975
+ fullVersion,
976
+ platform,
977
+ platformVersion,
978
+ architecture,
979
+ model,
980
+ mobile,
981
+ bitness,
982
+ wow64: false,
983
+ };
984
+ }