@mochi.js/core 0.2.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/session.ts CHANGED
@@ -25,11 +25,15 @@ import {
25
25
  type NetCtx,
26
26
  type NetFetchInit,
27
27
  } from "@mochi.js/net";
28
+ import {
29
+ type InitInjectorHandle,
30
+ installInitInjector,
31
+ wrapSelfRemovingPayload,
32
+ } from "./cdp/init-injector";
28
33
  import { MessageRouter } from "./cdp/router";
29
34
  import type { AttachedToTargetEvent } from "./cdp/types";
30
35
  import { Page } from "./page";
31
36
  import type { ChromiumProcess } from "./proc";
32
- import { installProxyAuth, type ProxyAuthHandle } from "./proxy-auth";
33
37
  import { VERSION } from "./version";
34
38
 
35
39
  /**
@@ -49,6 +53,40 @@ const defaultNetAdapter: NetAdapter = {
49
53
  requestOnCtx: defaultRequestOnCtx,
50
54
  };
51
55
 
56
+ /**
57
+ * Per-call timeout for the worker idOnly inject roundtrip. 5s, not the
58
+ * router's 30s default — workers spawned by sites like sannysoft,
59
+ * bot.incolumitas, fingerprintjs probes routinely die between
60
+ * `Target.attachedToTarget` and our reply. Without a tight cap, every
61
+ * orphan worker stalls the route loop for the full 30s. Real workers
62
+ * resolve in single-digit ms; 5s is generous.
63
+ *
64
+ * If you ever see a legitimate worker fail at 5s, raise this — but the
65
+ * symptom would be a missing inject on a long-running worker, which is
66
+ * separate from the orphan-worker race we're sizing for.
67
+ */
68
+ const WORKER_INJECT_TIMEOUT_MS = 5_000;
69
+
70
+ /**
71
+ * Predicate: is this an "expected" failure from the worker idOnly inject
72
+ * race (worker died between attach and our roundtrip)? Recognized:
73
+ * - `CdpTimeoutError` — router gave up after WORKER_INJECT_TIMEOUT_MS
74
+ * because the target stopped responding. Most common path.
75
+ * - CDP `Session with given id not found` — target detached mid-call.
76
+ * - CDP `Target closed` — same race, different message variant.
77
+ *
78
+ * All three are routine and silent. A genuine bug (bad contextId,
79
+ * wrong serialization, schema drift) surfaces as anything else and
80
+ * still warns through the console.
81
+ */
82
+ function isTransientWorkerError(err: unknown): boolean {
83
+ if (err === null || typeof err !== "object") return false;
84
+ const name = (err as { name?: string }).name;
85
+ if (name === "CdpTimeoutError") return true;
86
+ const msg = (err as { message?: string }).message ?? "";
87
+ return msg.includes("Session with given id not found") || msg.includes("Target closed");
88
+ }
89
+
52
90
  export interface SessionInit {
53
91
  proc: ChromiumProcess;
54
92
  matrix: MatrixV1;
@@ -56,10 +94,10 @@ export interface SessionInit {
56
94
  /** Optional overrides for the underlying message-router timeout. */
57
95
  defaultTimeoutMs?: number;
58
96
  /**
59
- * When true, skip {@link buildPayload} AND skip
60
- * `Page.addScriptToEvaluateOnNewDocument` on every new page; worker
61
- * targets receive no inject either. Intended for `mochi capture` and
62
- * similar baseline-collection flows. PLAN.md §12.1, task 0040.
97
+ * When true, skip {@link buildPayload} AND skip the init-injector install
98
+ * (no `Fetch.fulfillRequest` body splice on documents); worker targets
99
+ * receive no inject either. Intended for `mochi capture` and similar
100
+ * baseline-collection flows. PLAN.md §12.1, task 0040.
63
101
  */
64
102
  bypassInject?: boolean;
65
103
  /**
@@ -118,6 +156,86 @@ export interface StorageSnapshot {
118
156
  sessionStorage: Record<string, Record<string, string>>;
119
157
  }
120
158
 
159
+ // ---- cookie-jar persistence (task 0257) -------------------------------------
160
+
161
+ /**
162
+ * Current on-disk cookie-file format version. Bumped on incompatible header
163
+ * changes. The reader refuses unknown majors with a precise diagnostic so a
164
+ * stale jar doesn't silently load with the wrong shape.
165
+ */
166
+ export const COOKIE_JAR_FORMAT_VERSION = 1 as const;
167
+
168
+ /**
169
+ * On-disk shape for {@link Session.cookies.save}. The `cookies` array is the
170
+ * verbatim `Storage.getCookies` payload — every shipped Chromium revision
171
+ * agrees on this shape, so loading on a newer Chromium round-trips losslessly.
172
+ *
173
+ * @see tasks/0257-dx-cluster-cookies-storage-permissions.md (success criteria)
174
+ * @see https://chromedevtools.github.io/devtools-protocol/tot/Storage/#method-getCookies
175
+ */
176
+ export interface CookieJarFile {
177
+ /** Format version (currently `1`). */
178
+ version: typeof COOKIE_JAR_FORMAT_VERSION;
179
+ /** ISO-8601 UTC timestamp of `save()` (ends in `Z`). */
180
+ savedAt: string;
181
+ /** Mochi core version that produced the file. */
182
+ mochiVersion: string;
183
+ /** The regex source that filtered the saved set (default `".*"`). */
184
+ pattern: string;
185
+ /** Number of cookies in the `cookies` array — redundant with `cookies.length`, kept for trace logs. */
186
+ count: number;
187
+ /** Raw `Storage.getCookies` cookies, optionally filtered by `pattern`. */
188
+ cookies: import("./page").Cookie[];
189
+ }
190
+
191
+ /** Options shared by `cookies.save` / `cookies.load`. */
192
+ export interface CookieJarOptions {
193
+ /**
194
+ * Optional regex matched against each cookie's `domain`. Default `.*`
195
+ * (everything). Cookies failing the match are skipped on save AND on load
196
+ * (so a saved-with-everything jar can be partially restored).
197
+ */
198
+ pattern?: RegExp;
199
+ }
200
+
201
+ /**
202
+ * `Session.cookies` namespace — exposes the read/write/persist surface for the
203
+ * session's cookie jar. The legacy `Session.cookies(filter)` and
204
+ * `Session.setCookies(...)` shapes are gone; callers go through this object.
205
+ *
206
+ * The whole namespace is bound to a Session instance via the `Session.cookies`
207
+ * getter — every method routes through `Storage.getCookies` /
208
+ * `Storage.setCookies` on the root browser target (the only domain that
209
+ * exposes a global cookie reader without a per-page Network domain).
210
+ */
211
+ export interface CookieJar {
212
+ /**
213
+ * All cookies the browser is aware of, optionally filtered by url. The url
214
+ * filter is a coarse hostname match (no path / secure / sameSite handling) —
215
+ * sufficient for "scope down to a session" use cases.
216
+ */
217
+ get(filter?: { url?: string }): Promise<import("./page").Cookie[]>;
218
+ /** Set cookies via the root-target Storage domain. */
219
+ set(cookies: import("./page").Cookie[]): Promise<void>;
220
+ /**
221
+ * Persist cookies to a JSON file at `path`. Cookies whose `domain` does NOT
222
+ * match `opts.pattern` (default: every domain) are skipped. The file format
223
+ * is {@link CookieJarFile}.
224
+ */
225
+ save(path: string, opts?: CookieJarOptions): Promise<void>;
226
+ /**
227
+ * Read a JSON file written by {@link save} and replay every cookie back into
228
+ * the browser via `Storage.setCookies`. Cookies whose `domain` does NOT
229
+ * match `opts.pattern` (default: everything) are skipped — useful when one
230
+ * jar holds multi-domain state but only a slice should be re-installed for
231
+ * the current run.
232
+ *
233
+ * Throws on missing/corrupt files or version mismatch with a diagnostic that
234
+ * pins the exact failure point.
235
+ */
236
+ load(path: string, opts?: CookieJarOptions): Promise<void>;
237
+ }
238
+
121
239
  export class Session {
122
240
  /**
123
241
  * The resolved Matrix for this session — a relationally-locked snapshot
@@ -163,18 +281,22 @@ export class Session {
163
281
  private readonly _payload: PayloadResult | null;
164
282
  /**
165
283
  * Whether this session bypasses the inject pipeline (no `buildPayload`,
166
- * no `Page.addScriptToEvaluateOnNewDocument`, no worker injection).
167
- * Set from {@link SessionInit.bypassInject}. PLAN.md §12.1, task 0040.
284
+ * no body splice via `Fetch.fulfillRequest`, no worker injection). Set
285
+ * from {@link SessionInit.bypassInject}. PLAN.md §12.1, task 0040.
168
286
  *
169
287
  * @internal
170
288
  */
171
289
  private readonly bypassInject: boolean;
172
290
  /**
173
- * Live handle for the CDP `Fetch.authRequired` subscription. Created
174
- * lazily on construction when `init.proxyAuth` is set; disposed on
175
- * `Session.close`. Undefined when the session has no proxy auth.
291
+ * Live handle for the unified `Fetch` domain owner — installs once on
292
+ * construction and tears down on `Session.close`. Owns BOTH the
293
+ * Document-body splice (init-script delivery, task 0266) AND the
294
+ * `Fetch.authRequired` listener for proxy creds. Undefined when neither
295
+ * inject nor proxy auth is in play (capture-with-no-proxy short-circuit).
296
+ *
297
+ * @see PLAN.md §8.4, tasks/0266-fetch-fulfill-init-script.md
176
298
  */
177
- private proxyAuthHandle: ProxyAuthHandle | undefined;
299
+ private initInjectorHandle: InitInjectorHandle | undefined;
178
300
  /**
179
301
  * Snapshot of the `challenges` launch option, retained so
180
302
  * {@link newPage} can install the per-page auto-click handler. Undefined
@@ -196,6 +318,13 @@ export class Session {
196
318
  * @internal
197
319
  */
198
320
  private readonly workerExecutionContextIds = new Map<string, number>();
321
+ /**
322
+ * The `CookieJar` instance returned by the {@link cookies} getter. Created
323
+ * once at construction and bound to this Session — every call routes
324
+ * through `Storage.getCookies` / `Storage.setCookies` on the root browser
325
+ * target. See {@link CookieJar} for the surface contract.
326
+ */
327
+ private readonly cookieJar: CookieJar;
199
328
 
200
329
  constructor(init: SessionInit) {
201
330
  this.proc = init.proc;
@@ -212,26 +341,40 @@ export class Session {
212
341
  defaultTimeoutMs: init.defaultTimeoutMs,
213
342
  });
214
343
  this.router.start();
344
+ this.cookieJar = createCookieJar(this);
215
345
  this.installAutoAttach();
216
346
  this.installCrashGuard();
217
- // Wire CDP-driven proxy auth only when credentials were supplied. The
218
- // no-auth path skips Fetch.enable entirely so we don't pay the
219
- // protocol-attach cost or surface any extra CDP traffic.
220
- if (init.proxyAuth !== undefined) {
347
+ // Task 0266: unified Fetch.enable owner handles both Document-body
348
+ // splice (init-script delivery via Fetch.fulfillRequest, replacing
349
+ // Page.addScriptToEvaluateOnNewDocument) AND the proxy-auth handler
350
+ // when credentials are supplied. Single Fetch.enable per session.
351
+ //
352
+ // The injector skips Fetch.enable entirely when both are inactive
353
+ // (capture flow with no proxy) so we keep the §8.2-clean
354
+ // "no extra protocol surface" property of the v0.1 baseline for that
355
+ // narrow case.
356
+ const payloadCode = this._payload?.code ?? null;
357
+ const auth = init.proxyAuth;
358
+ if (payloadCode !== null || auth !== undefined) {
221
359
  // Fire-and-forget: surface failures via console.warn but don't reject
222
- // the constructor pages still launch and unauthenticated traffic
223
- // will simply 407, giving callers a recoverable signal.
224
- void installProxyAuth(this.router, init.proxyAuth)
360
+ // the constructor. The init-script path means a failure to install
361
+ // breaks inject delivery (the page still loads with the bare
362
+ // browser fingerprint), so we log loudly to keep the failure
363
+ // visible.
364
+ void installInitInjector(this.router, {
365
+ payloadCode,
366
+ ...(auth !== undefined ? { auth } : {}),
367
+ })
225
368
  .then((handle) => {
226
369
  if (this.closed) {
227
370
  void handle.dispose();
228
371
  return;
229
372
  }
230
- this.proxyAuthHandle = handle;
373
+ this.initInjectorHandle = handle;
231
374
  })
232
375
  .catch((err: unknown) => {
233
376
  if (!this.closed) {
234
- console.warn("[mochi] proxy-auth installation failed:", err);
377
+ console.warn("[mochi] init-injector installation failed:", err);
235
378
  }
236
379
  });
237
380
  }
@@ -242,12 +385,15 @@ export class Session {
242
385
  * 1. `Target.createTarget` opens a new browser tab.
243
386
  * 2. `Target.attachToTarget({ flatten: true })` returns a flat-mode session
244
387
  * id we'll use to address page-level CDP methods.
245
- * 3. `Page.addScriptToEvaluateOnNewDocument({ source, runImmediately: true,
246
- * worldName: "" })` installs the inject payload to run main-world,
247
- * before any page script, on every navigation. The returned identifier
248
- * is tracked on the {@link Page} so it can be removed on close.
249
- * Critical: `worldName: ""` any non-empty string creates an isolated
250
- * world (PLAN.md §8.4) which is detectable.
388
+ * 3. The inject payload is delivered NOT via
389
+ * `Page.addScriptToEvaluateOnNewDocument` but via the always-on
390
+ * `Fetch` domain handler installed once at session-construction time
391
+ * (`installInitInjector`). When this page navigates, the document
392
+ * response is intercepted, its CSP rewritten, and the payload
393
+ * spliced as an inline `<script>` at end-of-`<head>` before the
394
+ * first non-comment `<script>`. See PLAN.md §8.4 / task 0266 for
395
+ * the rationale (closes the source-attribution leak that
396
+ * `addScriptToEvaluateOnNewDocument` otherwise carries).
251
397
  *
252
398
  * `flatten: true` is critical — without it, page CDP messages would need to
253
399
  * be wrapped in `Target.sendMessageToTarget` envelopes. Flat mode lets us
@@ -266,42 +412,111 @@ export class Session {
266
412
  // (only Runtime.enable is forbidden). We enable here so subsequent
267
413
  // addScriptToEvaluateOnNewDocument is honoured by the page domain.
268
414
  await this.router.send("Page.enable", undefined, { sessionId: attached.sessionId });
415
+ // Task 0262: timezone spoof via CDP `Emulation.setTimezoneOverride`.
416
+ //
417
+ // Drives BOTH `Intl.DateTimeFormat().resolvedOptions().timeZone` AND
418
+ // `Date.getTimezoneOffset()` because Chromium's V8 reads from the same
419
+ // internal timezone source. We do NOT manually rewrite
420
+ // `Date.prototype.getTimezoneOffset` in inject — that's detectable via
421
+ // prototype-shape checks. The CDP override is the canonical
422
+ // mechanism.
423
+ //
424
+ // Per the CDP docs (`tot/Emulation/#method-setTimezoneOverride`),
425
+ // this method does NOT require `Emulation.enable` (it stores override
426
+ // state directly on the target's `EmulationAgent`). §8.2's bans are
427
+ // unaffected. Sent per-target before any navigation so the very first
428
+ // document JS already sees the spoofed zone.
429
+ //
430
+ // The empty-string sentinel in the protocol means "clear override";
431
+ // we never send empty here because that would defeat the purpose.
432
+ //
433
+ // Skipped under `bypassInject:true` (PLAN.md §12.1) — capture flows
434
+ // record the bare browser timezone.
435
+ if (!this.bypassInject) {
436
+ await this.router.send(
437
+ "Emulation.setTimezoneOverride",
438
+ { timezoneId: this.profile.timezone },
439
+ { sessionId: attached.sessionId },
440
+ );
441
+ }
269
442
  // Task 0255: defensive UA override at the network layer.
270
443
  //
271
444
  // The inject payload (Page.addScriptToEvaluateOnNewDocument) spoofs
272
- // `navigator.userAgent` in the JS surface, but `Network.requestWillBeSent`
273
- // events (and the request line itself) carry the BARE browser UA — which
274
- // under `--headless=new` still contains the substring "HeadlessChrome".
275
- // The inject can never reach those bytes because they're emitted before
276
- // any document script runs.
445
+ // `navigator.userAgent` and `navigator.userAgentData` in the JS
446
+ // surface, but `Network.requestWillBeSent` events (and the request
447
+ // line itself) carry the BARE browser UA — which under `--headless=new`
448
+ // still contains the substring "HeadlessChrome" AND the bare
449
+ // `Sec-CH-UA*` request-header set. The inject can never reach those
450
+ // bytes because they're emitted before any document script runs.
451
+ //
452
+ // 0255 plumbed `userAgent`. 0261 closes the cross-layer leak that left
453
+ // open: without `userAgentMetadata`, the request `Sec-CH-UA*` headers
454
+ // carry CfT defaults instead of the matrix, so a fingerprinter doing
455
+ // `getHighEntropyValues()` and comparing against the request headers
456
+ // sees a mismatch (direct PLAN.md I-5 violation). The metadata struct
457
+ // is the CDP-canonical UA-CH descriptor; Chromium derives every
458
+ // `Sec-CH-UA*` header from it. Both surfaces (this network call and
459
+ // the inject's `client-hints.ts` getHighEntropyValues) read the SAME
460
+ // matrix fields, so they cannot drift.
277
461
  //
278
462
  // `Network.setUserAgentOverride` is a per-target setter that does NOT
279
463
  // require `Network.enable` (it only stores override state); §8.2's ban
280
- // on `Network.enable` is therefore unaffected. Sent immediately after
281
- // attach and before any navigation so the very first request the page
282
- // issues already carries the matrix UA.
464
+ // on `Network.enable` is therefore unaffected, with or without the
465
+ // metadata payload. Sent immediately after attach and before any
466
+ // navigation so the very first request the page issues already carries
467
+ // the matrix UA + UA-CH headers.
283
468
  //
284
469
  // Skipped under `bypassInject:true` (PLAN.md §12.1) — capture flows must
285
- // record the bare browser fingerprint, including its raw UA.
470
+ // record the bare browser fingerprint, including its raw UA AND raw
471
+ // `Sec-CH-UA*` headers.
286
472
  if (!this.bypassInject) {
287
473
  await this.router.send(
288
474
  "Network.setUserAgentOverride",
289
- { userAgent: this.profile.userAgent },
475
+ {
476
+ userAgent: this.profile.userAgent,
477
+ userAgentMetadata: buildUserAgentMetadata(this.profile),
478
+ },
290
479
  { sessionId: attached.sessionId },
291
480
  );
292
481
  }
293
- // PLAN.md §12.1 / task 0040 capture flow short-circuits inject so the
294
- // browser reports its bare fingerprint. Otherwise install the payload
295
- // main-world via §8.4. worldName MUST be the empty string.
482
+ // Task 0266: the inject payload is delivered via a TWO-MECHANISM strategy:
483
+ //
484
+ // 1. Session-level `installInitInjector` (constructor) listens on
485
+ // `Fetch.requestPaused`, splices the wrapped payload into every
486
+ // HTTP/HTTPS Document response. This is the load-bearing path for
487
+ // real navigations: closes the `addScriptToEvaluateOnNewDocument`
488
+ // source-attribution leak.
489
+ //
490
+ // 2. Per-page `Page.addScriptToEvaluateOnNewDocument` (this block) —
491
+ // registers the SAME wrapped payload as a fallback for URL schemes
492
+ // that the Fetch domain does NOT intercept: `about:blank`,
493
+ // `data:`, `blob:`. Without this, an `await page.goto("about:blank")`
494
+ // followed by an inject-dependent assertion (e.g. `navigator.
495
+ // webdriver` patched via R-022) would fail because the inject
496
+ // never fired.
497
+ //
498
+ // The wrapper sets `__mochi_inject_marker = true` on globalThis and
499
+ // checks for it at entry, so when both paths fire on the same realm
500
+ // (a normal HTTP nav has Fetch splice + new-document fire), the second
501
+ // invocation early-returns before any side effect. PLAN.md §8.4
502
+ // documents this dual-mechanism design and the trade-off it accepts:
503
+ // the source-attribution leak is closed for every URL scheme that
504
+ // matters (HTTP/HTTPS — i.e. every fingerprinter-relevant page) but
505
+ // remains for transitional URLs (about:blank/data:/blob:) where no
506
+ // fingerprinter typically reads.
296
507
  let injectScriptIdentifier: string | undefined;
297
508
  if (!this.bypassInject && this._payload !== null) {
509
+ const wrapped = wrapSelfRemovingPayload(this._payload.code);
298
510
  const installed = await this.router.send<{ identifier: string }>(
299
511
  "Page.addScriptToEvaluateOnNewDocument",
300
512
  {
301
- source: this._payload.code,
513
+ source: wrapped,
514
+ // Run before the first script in the document — same timing the
515
+ // Fetch.fulfillRequest splice achieves on HTTP nav.
302
516
  runImmediately: true,
517
+ // Empty `worldName` MUST be the literal empty string — naming any
518
+ // world creates a fingerprintable isolated world (PLAN.md §8.4).
303
519
  worldName: "",
304
- // includeCommandLineAPI defaults to false; we don't set it.
305
520
  },
306
521
  { sessionId: attached.sessionId },
307
522
  );
@@ -351,38 +566,26 @@ export class Session {
351
566
  }
352
567
 
353
568
  /**
354
- * All cookies the browser is aware of, optionally filtered by url.
569
+ * Cookie-jar surface: `get`, `set`, `save`, `load`. See {@link CookieJar}.
355
570
  *
356
- * Uses `Storage.getCookies` on the *root* browser target (the only domain
357
- * that exposes a global cookie reader without a per-page Network domain).
571
+ * All four methods route through `Storage.getCookies` /
572
+ * `Storage.setCookies` on the *root* browser target the only domain that
573
+ * exposes a global cookie reader/writer without a per-page Network domain.
574
+ *
575
+ * The persistence layer (`save`/`load`) is JSON, NOT pickle (per audit:
576
+ * `docs/audits/nodriver.md` LOW finding 2 — Bun-native code uses JSON).
577
+ * Format pinned by {@link CookieJarFile}; a small header (`version`,
578
+ * `savedAt`, `mochiVersion`, `pattern`, `count`) lets a future incompatible
579
+ * change be detected before any cookie touches the browser.
358
580
  */
359
- async cookies(filter: { url?: string } = {}): Promise<import("./page").Cookie[]> {
360
- this.assertOpen();
361
- const result = await this.router.send<{ cookies: import("./page").Cookie[] }>(
362
- "Storage.getCookies",
363
- );
364
- if (filter.url === undefined) return result.cookies;
365
- // v0.1 only supports a coarse host-string filter — full URL matching with
366
- // path, secure, etc. is out of scope per the brief.
367
- let host: string;
368
- try {
369
- host = new URL(filter.url).hostname;
370
- } catch {
371
- return [];
372
- }
373
- return result.cookies.filter((c) => c.domain.endsWith(host) || host.endsWith(c.domain));
374
- }
375
-
376
- /** Set cookies via the root-target Storage domain. */
377
- async setCookies(cookies: import("./page").Cookie[]): Promise<void> {
378
- this.assertOpen();
379
- await this.router.send("Storage.setCookies", { cookies });
581
+ get cookies(): CookieJar {
582
+ return this.cookieJar;
380
583
  }
381
584
 
382
585
  /** Storage snapshot. v0.1: cookies only. localStorage/sessionStorage are empty placeholders pending phase 0.7. */
383
586
  async storage(): Promise<StorageSnapshot> {
384
587
  this.assertOpen();
385
- const c = await this.cookies();
588
+ const c = await this.cookieJar.get();
386
589
  return { cookies: c, localStorage: {}, sessionStorage: {} };
387
590
  }
388
591
 
@@ -501,15 +704,16 @@ export class Session {
501
704
  }
502
705
  this.netCtx = undefined;
503
706
  }
504
- // Drop the proxy-auth subscription + Fetch.disable BEFORE we tear down
505
- // the router so the disable round-trip can still complete.
506
- if (this.proxyAuthHandle !== undefined) {
707
+ // Drop the unified init-injector subscription (and its `Fetch.disable`)
708
+ // BEFORE we tear down the router so the disable round-trip can still
709
+ // complete on the live transport.
710
+ if (this.initInjectorHandle !== undefined) {
507
711
  try {
508
- await this.proxyAuthHandle.dispose();
712
+ await this.initInjectorHandle.dispose();
509
713
  } catch (err) {
510
- console.warn("[mochi] proxy-auth dispose failed:", err);
714
+ console.warn("[mochi] init-injector dispose failed:", err);
511
715
  }
512
- this.proxyAuthHandle = undefined;
716
+ this.initInjectorHandle = undefined;
513
717
  }
514
718
  await this.router.close();
515
719
  await this.proc.close();
@@ -567,6 +771,25 @@ export class Session {
567
771
  */
568
772
  static readonly VERSION = VERSION;
569
773
 
774
+ /**
775
+ * Module-private accessor used by {@link createCookieJar}. The cookie-jar
776
+ * factory lives in module scope (so callers can subclass via the public
777
+ * {@link CookieJar} interface without touching the Session internals); this
778
+ * accessor lets the factory reach the router + the open-state guard while
779
+ * keeping both genuinely private to user code.
780
+ *
781
+ * @internal
782
+ */
783
+ _internalCookieJarPlumbing(): {
784
+ router: MessageRouter;
785
+ assertOpen: () => void;
786
+ } {
787
+ return {
788
+ router: this.router,
789
+ assertOpen: () => this.assertOpen(),
790
+ };
791
+ }
792
+
570
793
  // ---- internals --------------------------------------------------------------
571
794
 
572
795
  private installAutoAttach(): void {
@@ -644,6 +867,14 @@ export class Session {
644
867
  // so the call binds to the worker's own context, not whatever
645
868
  // `Runtime.evaluate` happens to resolve. The payload IIFE is wrapped
646
869
  // as a function declaration so `callFunctionOn` accepts it.
870
+ //
871
+ // Timeout: 5s, not the 30s default. Transient workers (sannysoft,
872
+ // bot.incolumitas, etc. spawn brief workers that die between attach
873
+ // and inject) WILL silently disappear; without a per-call cap the
874
+ // route loop blocks for 30s waiting on a reply that's never coming,
875
+ // adding 30s × N orphan workers per test run. 5s is plenty for a
876
+ // real worker (callFunctionOn against a live context returns in
877
+ // single-digit ms); anything past that, the target is dead.
647
878
  await this.router.send(
648
879
  "Runtime.callFunctionOn",
649
880
  {
@@ -653,11 +884,26 @@ export class Session {
653
884
  awaitPromise: false,
654
885
  // includeCommandLineAPI must remain false (§8.2).
655
886
  },
656
- { sessionId: childSessionId },
887
+ { sessionId: childSessionId, timeoutMs: WORKER_INJECT_TIMEOUT_MS },
657
888
  );
658
889
  } catch (err: unknown) {
659
890
  if (!this.closed) {
660
- console.warn(`[mochi] payload inject into worker ${ev.targetInfo.targetId} failed:`, err);
891
+ // Downgrade to debug for the expected race (worker died before
892
+ // inject completed). The two error fingerprints are: our own
893
+ // CdpTimeoutError (router gave up), or CDP's own "Session with
894
+ // given id not found" / "Target closed" (target detached
895
+ // mid-roundtrip). Both are routine on real-world pages with
896
+ // short-lived workers; warning on every one is just noise. A
897
+ // genuine bug (e.g. the idOnly extraction returning a bad
898
+ // contextId) is anything else and still warns.
899
+ if (isTransientWorkerError(err)) {
900
+ // best-effort: silent. The worker is gone; nothing to do.
901
+ } else {
902
+ console.warn(
903
+ `[mochi] payload inject into worker ${ev.targetInfo.targetId} failed:`,
904
+ err,
905
+ );
906
+ }
661
907
  }
662
908
  }
663
909
  }
@@ -666,13 +912,18 @@ export class Session {
666
912
  try {
667
913
  await this.router.send("Runtime.runIfWaitingForDebugger", undefined, {
668
914
  sessionId: childSessionId,
915
+ timeoutMs: WORKER_INJECT_TIMEOUT_MS,
669
916
  });
670
917
  } catch (err: unknown) {
671
918
  if (!this.closed) {
672
- console.warn(
673
- `[mochi] Runtime.runIfWaitingForDebugger on target ${ev.targetInfo.targetId} failed:`,
674
- err,
675
- );
919
+ if (isTransientWorkerError(err)) {
920
+ // best-effort: silent. Same race as the inject path above.
921
+ } else {
922
+ console.warn(
923
+ `[mochi] Runtime.runIfWaitingForDebugger on target ${ev.targetInfo.targetId} failed:`,
924
+ err,
925
+ );
926
+ }
676
927
  }
677
928
  }
678
929
  }
@@ -764,3 +1015,269 @@ export class Session {
764
1015
  }
765
1016
  }
766
1017
  }
1018
+
1019
+ // ---- UA-CH metadata helpers (task 0261) -------------------------------------
1020
+
1021
+ /**
1022
+ * Single brand entry as accepted by `Network.setUserAgentOverride`'s
1023
+ * `userAgentMetadata.brands` / `fullVersionList`.
1024
+ *
1025
+ * @internal
1026
+ */
1027
+ interface UaMetadataBrand {
1028
+ brand: string;
1029
+ version: string;
1030
+ }
1031
+
1032
+ /**
1033
+ * Strip surrounding ASCII double-quotes (the on-the-wire form for several
1034
+ * `Sec-CH-UA*` headers — `'"macOS"'`, `'"14.0.0"'`, `'"arm"'`, `'"64"'`).
1035
+ * The CDP `userAgentMetadata` enums consume the unquoted form.
1036
+ */
1037
+ function unquoteUaCh(s: string): string {
1038
+ if (s.length >= 2 && s.startsWith('"') && s.endsWith('"')) {
1039
+ return s.slice(1, -1);
1040
+ }
1041
+ return s;
1042
+ }
1043
+
1044
+ /**
1045
+ * Parse a Sec-CH-UA-style header value
1046
+ * (`'"Brand A";v="123", "Not.A/Brand";v="8", "Brand B";v="456"'`) into the
1047
+ * `[{brand, version}, ...]` shape `userAgentMetadata.brands` expects.
1048
+ *
1049
+ * Hand-written state machine — Sec-CH-UA is RFC 8941 Structured Headers
1050
+ * with quoted strings, so a regex split on `,` would break on
1051
+ * `"Brand,with,commas"`. Mirrors `parseSecChUa` in
1052
+ * `@mochi.js/inject/src/modules/client-hints.ts` byte-for-byte: same
1053
+ * source field (`matrix.uaCh["sec-ch-ua"]`), same output shape, so the
1054
+ * network surface and the JS surface cannot drift.
1055
+ *
1056
+ * @internal
1057
+ */
1058
+ function parseSecChUaBrandList(s: string): UaMetadataBrand[] {
1059
+ const out: UaMetadataBrand[] = [];
1060
+ // Split on `,` outside quoted segments. `depth` toggles inside `"…"`.
1061
+ const parts: string[] = [];
1062
+ let depth = 0;
1063
+ let cur = "";
1064
+ for (let i = 0; i < s.length; i++) {
1065
+ const c = s[i] as string;
1066
+ if (c === '"') {
1067
+ depth = depth === 0 ? 1 : 0;
1068
+ cur += c;
1069
+ } else if (c === "," && depth === 0) {
1070
+ parts.push(cur);
1071
+ cur = "";
1072
+ } else {
1073
+ cur += c;
1074
+ }
1075
+ }
1076
+ if (cur.length > 0) parts.push(cur);
1077
+ for (const raw of parts) {
1078
+ const piece = raw.trim();
1079
+ if (piece.length === 0) continue;
1080
+ const semi = piece.indexOf(";");
1081
+ if (semi === -1) {
1082
+ out.push({ brand: unquoteUaCh(piece), version: "" });
1083
+ continue;
1084
+ }
1085
+ const brandPart = piece.slice(0, semi).trim();
1086
+ const rest = piece.slice(semi + 1).trim();
1087
+ let version = "";
1088
+ if (rest.startsWith("v=")) {
1089
+ version = unquoteUaCh(rest.slice(2).trim());
1090
+ }
1091
+ out.push({ brand: unquoteUaCh(brandPart), version });
1092
+ }
1093
+ return out;
1094
+ }
1095
+
1096
+ /**
1097
+ * Parse the JSON-encoded `uaCh.ua-full-version-list` (R-031) into the
1098
+ * `[{brand, version}]` shape. Falls through to the brand-list parser if
1099
+ * the matrix doesn't carry the field — every shipped profile does, so
1100
+ * the fallback is purely defensive.
1101
+ *
1102
+ * @internal
1103
+ */
1104
+ function parseFullVersionList(matrix: MatrixV1): UaMetadataBrand[] {
1105
+ const raw = matrix.uaCh["ua-full-version-list"];
1106
+ if (typeof raw === "string" && raw.length > 0) {
1107
+ try {
1108
+ const parsed = JSON.parse(raw) as unknown;
1109
+ if (Array.isArray(parsed)) {
1110
+ return parsed
1111
+ .filter(
1112
+ (e): e is UaMetadataBrand =>
1113
+ typeof e === "object" &&
1114
+ e !== null &&
1115
+ typeof (e as { brand?: unknown }).brand === "string" &&
1116
+ typeof (e as { version?: unknown }).version === "string",
1117
+ )
1118
+ .map((e) => ({ brand: e.brand, version: e.version }));
1119
+ }
1120
+ } catch {
1121
+ // Fall through.
1122
+ }
1123
+ }
1124
+ // Fallback: reuse the brand-list majors. Matches the inject side's same
1125
+ // fallback in client-hints.ts.
1126
+ const secChUa = matrix.uaCh["sec-ch-ua"] ?? "";
1127
+ return parseSecChUaBrandList(secChUa);
1128
+ }
1129
+
1130
+ /**
1131
+ * Build the `userAgentMetadata` parameter for `Network.setUserAgentOverride`
1132
+ * from a derived MatrixV1. Single source of truth = the matrix; the inject
1133
+ * `client-hints.ts` module reads the same fields, so the JS-API surface
1134
+ * (`navigator.userAgentData.getHighEntropyValues`) and the request-header
1135
+ * surface (`Sec-CH-UA*`) cannot drift.
1136
+ *
1137
+ * Field shape per CDP spec:
1138
+ * - `brands` — `[{brand, version}]`, brand-list majors.
1139
+ * - `fullVersionList` — `[{brand, version}]`, tip-locked full versions.
1140
+ * - `fullVersion` — string, branded entry's version (R-046).
1141
+ * - `platform` — unquoted Sec-CH-UA-Platform value.
1142
+ * - `platformVersion` — unquoted Sec-CH-UA-Platform-Version.
1143
+ * - `architecture` — `"arm" | "x86" | ""` (R-042 unquoted).
1144
+ * - `model` — free-form string, empty for desktop (R-045).
1145
+ * - `mobile` — boolean (R-044 → `?1` mapped to true).
1146
+ * - `bitness` — STRING `"64" | "32" | ""` (R-043 unquoted),
1147
+ * never numeric.
1148
+ * - `wow64` — boolean; matrix doesn't model nested-WOW64,
1149
+ * we always emit false (task 0261 out-of-scope).
1150
+ *
1151
+ * @internal
1152
+ */
1153
+ export function buildUserAgentMetadata(matrix: MatrixV1): {
1154
+ brands: UaMetadataBrand[];
1155
+ fullVersionList: UaMetadataBrand[];
1156
+ fullVersion: string;
1157
+ platform: string;
1158
+ platformVersion: string;
1159
+ architecture: string;
1160
+ model: string;
1161
+ mobile: boolean;
1162
+ bitness: string;
1163
+ wow64: boolean;
1164
+ } {
1165
+ const ua = matrix.uaCh;
1166
+ const brandsRaw = ua["sec-ch-ua"] ?? "";
1167
+ const brands = parseSecChUaBrandList(brandsRaw);
1168
+ const fullVersionList = parseFullVersionList(matrix);
1169
+ const fullVersion =
1170
+ typeof ua["ua-full-version"] === "string" && ua["ua-full-version"].length > 0
1171
+ ? ua["ua-full-version"]
1172
+ : (fullVersionList[0]?.version ?? "");
1173
+ const platform = unquoteUaCh(ua["sec-ch-ua-platform"] ?? "");
1174
+ const platformVersion = unquoteUaCh(ua["sec-ch-ua-platform-version"] ?? "");
1175
+ const architecture = unquoteUaCh(ua["sec-ch-ua-arch"] ?? "");
1176
+ const bitness = unquoteUaCh(ua["sec-ch-ua-bitness"] ?? "");
1177
+ const model = unquoteUaCh(ua["sec-ch-ua-model"] ?? "");
1178
+ // Sec-CH-UA-Mobile wire form is "?0" / "?1" (Structured-Headers boolean).
1179
+ const mobile = ua["sec-ch-ua-mobile"] === "?1";
1180
+ return {
1181
+ brands,
1182
+ fullVersionList,
1183
+ fullVersion,
1184
+ platform,
1185
+ platformVersion,
1186
+ architecture,
1187
+ model,
1188
+ mobile,
1189
+ bitness,
1190
+ wow64: false,
1191
+ };
1192
+ }
1193
+
1194
+ // ---- cookie-jar factory (task 0257) -----------------------------------------
1195
+
1196
+ /**
1197
+ * Build the {@link CookieJar} returned by `Session.cookies`. Bound to one
1198
+ * Session instance via {@link Session._internalCookieJarPlumbing}. Module-
1199
+ * private; the public surface is the interface — instances are only created
1200
+ * by the Session constructor.
1201
+ *
1202
+ * `save`/`load` use Bun's filesystem APIs (`Bun.file`, `Bun.write`) — Bun is
1203
+ * the only supported runtime per PLAN.md I-3 so there's no Node fallback.
1204
+ *
1205
+ * @internal
1206
+ */
1207
+ function createCookieJar(session: Session): CookieJar {
1208
+ const { router, assertOpen } = session._internalCookieJarPlumbing();
1209
+ return {
1210
+ async get(filter: { url?: string } = {}) {
1211
+ assertOpen();
1212
+ const result = await router.send<{ cookies: import("./page").Cookie[] }>(
1213
+ "Storage.getCookies",
1214
+ );
1215
+ if (filter.url === undefined) return result.cookies;
1216
+ // Coarse host-string filter — full URL matching with path / secure /
1217
+ // sameSite is out of scope per the brief. Mirrors the pre-0257
1218
+ // behaviour of the legacy `Session.cookies(filter)` method.
1219
+ let host: string;
1220
+ try {
1221
+ host = new URL(filter.url).hostname;
1222
+ } catch {
1223
+ return [];
1224
+ }
1225
+ return result.cookies.filter((c) => c.domain.endsWith(host) || host.endsWith(c.domain));
1226
+ },
1227
+ async set(cookies: import("./page").Cookie[]) {
1228
+ assertOpen();
1229
+ await router.send("Storage.setCookies", { cookies });
1230
+ },
1231
+ async save(path: string, opts: CookieJarOptions = {}) {
1232
+ assertOpen();
1233
+ const pattern = opts.pattern ?? /.*/;
1234
+ const all = await router.send<{ cookies: import("./page").Cookie[] }>("Storage.getCookies");
1235
+ const filtered = all.cookies.filter((c) => pattern.test(c.domain));
1236
+ const file: CookieJarFile = {
1237
+ version: COOKIE_JAR_FORMAT_VERSION,
1238
+ savedAt: new Date().toISOString(),
1239
+ mochiVersion: VERSION,
1240
+ pattern: pattern.source,
1241
+ count: filtered.length,
1242
+ cookies: filtered,
1243
+ };
1244
+ // Pretty-print with 2-space indent: jars are committed by some users
1245
+ // alongside fixtures (per nodriver's `pickle` use case); pretty JSON
1246
+ // diffs cleanly. Negligible size impact for a few-kB cookie set.
1247
+ await Bun.write(path, `${JSON.stringify(file, null, 2)}\n`);
1248
+ },
1249
+ async load(path: string, opts: CookieJarOptions = {}) {
1250
+ assertOpen();
1251
+ const pattern = opts.pattern ?? /.*/;
1252
+ const file = Bun.file(path);
1253
+ const exists = await file.exists();
1254
+ if (!exists) {
1255
+ throw new Error(`[mochi] cookies.load: file not found at ${path}`);
1256
+ }
1257
+ let parsed: unknown;
1258
+ try {
1259
+ const text = await file.text();
1260
+ parsed = JSON.parse(text);
1261
+ } catch (err) {
1262
+ throw new Error(`[mochi] cookies.load: ${path} is not valid JSON: ${String(err)}`);
1263
+ }
1264
+ const jar = parsed as Partial<CookieJarFile>;
1265
+ if (typeof jar !== "object" || jar === null) {
1266
+ throw new Error(`[mochi] cookies.load: ${path} is not a JSON object`);
1267
+ }
1268
+ if (jar.version !== COOKIE_JAR_FORMAT_VERSION) {
1269
+ throw new Error(
1270
+ `[mochi] cookies.load: ${path} version ${String(jar.version)} is not supported (expected ${COOKIE_JAR_FORMAT_VERSION})`,
1271
+ );
1272
+ }
1273
+ if (!Array.isArray(jar.cookies)) {
1274
+ throw new Error(`[mochi] cookies.load: ${path} has no \`cookies\` array`);
1275
+ }
1276
+ // Filter on load too: a single saved-with-everything jar can be sliced
1277
+ // domain-wise without re-saving.
1278
+ const toLoad = jar.cookies.filter((c) => pattern.test(c.domain));
1279
+ if (toLoad.length === 0) return;
1280
+ await router.send("Storage.setCookies", { cookies: toLoad });
1281
+ },
1282
+ };
1283
+ }