npm - @mochi.js/core - Versions diffs - 0.1.2 → 0.2.2 - Mend

@mochi.js/core 0.1.2 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/package.json +5 -5
package/src/__tests__/inject.test.ts +2 -0
package/src/__tests__/piercing.test.ts +164 -0
package/src/__tests__/proc.test.ts +383 -0
package/src/__tests__/selector.test.ts +188 -0
package/src/__tests__/window-size.e2e.test.ts +130 -0
package/src/cdp/types.ts +47 -0
package/src/index.ts +1 -0
package/src/launch.ts +73 -8
package/src/page/element-handle.ts +110 -0
package/src/page/piercing.ts +135 -0
package/src/page/selector.ts +423 -0
package/src/page.ts +142 -0
package/src/proc.ts +386 -41
package/src/session.ts +140 -12

package/src/page.ts CHANGED Viewed

@@ -35,9 +35,13 @@ import type {
   DispatchMouseEventParams,
   DomNode,
   FrameNavigatedEvent,
+  PierceDomNode,
   RemoteObject,
 } from "./cdp/types";
 import { NotImplementedError } from "./errors";
+import { ElementHandle } from "./page/element-handle";
+import { findPiercingMatches } from "./page/piercing";
+import { parseSelector } from "./page/selector";
 /** Wait conditions for `Page.goto`. */
 export type WaitUntil = "load" | "domcontentloaded" | "networkidle";
@@ -527,6 +531,10 @@ export class Page {
     });
     const targetBox = boxFromBorderQuad(box.model);
     const callSeed = this.nextCallSeed();
+    // Trajectory synth lives here (not in `performClickAt`) so prototype
+    // inspection in conformance tests can see the synthesize / trajectory
+    // / cursor markers — they're a consumer-side smoke check that the
+    // behavioral synth is wired in.
     const traj = synthesizeMouseTrajectory({
       from: { x: this.cursor.x, y: this.cursor.y },
       to: { x: targetBox.x + targetBox.width / 2, y: targetBox.y + targetBox.height / 2 },
@@ -535,6 +543,51 @@ export class Page {
       seed: callSeed,
       ...(opts.duration !== undefined ? { durationMs: opts.duration } : {}),
     });
+    await this.dispatchClickTrajectory(traj, callSeed, opts);
+  }
+  /**
+   * Variant of {@link humanClick} that operates on an {@link ElementHandle}
+   * resolved via {@link querySelectorPiercing} — required when the target
+   * element lives inside a closed shadow root (no CSS path can name it from
+   * the parent document, so the regular `humanClick(selector)` route fails).
+   *
+   * Pipeline differs from {@link humanClick} only in step 1: the box model
+   * is resolved via `DOM.getBoxModel({ backendNodeId })` instead of through a
+   * `DOM.querySelector`-resolved nodeId. Everything downstream (trajectory
+   * synth, dispatch loop, press/release) is identical.
+   */
+  async humanClickHandle(handle: ElementHandle, opts: HumanClickOptions = {}): Promise<void> {
+    this.assertOpen();
+    const box = await this.send<{ model: BoxModel }>("DOM.getBoxModel", {
+      backendNodeId: handle.backendNodeId,
+    });
+    const targetBox = boxFromBorderQuad(box.model);
+    const callSeed = this.nextCallSeed();
+    const traj = synthesizeMouseTrajectory({
+      from: { x: this.cursor.x, y: this.cursor.y },
+      to: { x: targetBox.x + targetBox.width / 2, y: targetBox.y + targetBox.height / 2 },
+      box: targetBox,
+      profile: this.behavior,
+      seed: callSeed,
+      ...(opts.duration !== undefined ? { durationMs: opts.duration } : {}),
+    });
+    await this.dispatchClickTrajectory(traj, callSeed, opts);
+  }
+  /**
+   * Inner dispatch loop shared by {@link humanClick} and
+   * {@link humanClickHandle}. Takes the synthesised trajectory, paces the
+   * `mouseMoved` events, then fires `mousePressed` + `mouseReleased` at the
+   * arrival point with realistic press duration. Trajectory synth itself
+   * stays inside the public methods so source-grep conformance checks can
+   * verify the synth is reachable from the public API.
+   */
+  private async dispatchClickTrajectory(
+    traj: ReturnType<typeof synthesizeMouseTrajectory>,
+    callSeed: string,
+    opts: HumanClickOptions,
+  ): Promise<void> {
     if (traj.length === 0) return;
     // Pre-move settle: Gaussian(150, 50) ms idle. Cheaply approximated via
@@ -724,6 +777,95 @@ export class Page {
     }
   }
+  /**
+   * Closed-shadow-root piercing locator — find the first element matching the
+   * CSS selector across the entire DOM tree, including elements nested inside
+   * **closed** shadow roots (which {@link text}, {@link humanClick}, etc. can
+   * NOT reach because `DOM.querySelector` does not traverse closed shadows
+   * even with `pierce: true` set on the parent `getDocument` call).
+   *
+   * Required for Cloudflare Turnstile auto-click on integrations where the
+   * widget iframe lives behind a closed shadow root (Cloudflare Challenge
+   * pages, Workers Static Assets, some CDN configs). Without this, task
+   * 0220's auto-click silently fails on those flows.
+   *
+   * Algorithm (port of patchright `framesPatch.ts:868-1012`
+   * `_customFindElementsByParsed`):
+   *   1. `DOM.getDocument({ depth: -1, pierce: true })` — yields the full
+   *      tree, with shadow descendants under `shadowRoots[]` for both open
+   *      AND closed roots.
+   *   2. Recursive walk in JS, matching against a parsed CSS selector. We
+   *      can't `DOM.querySelector` per shadow because the per-shadow query
+   *      itself doesn't pierce closed roots either.
+   *   3. For matches, `DOM.resolveNode({ backendNodeId })` to get a
+   *      `RemoteObject.objectId`, wrapped in {@link ElementHandle}.
+   *
+   * Supported selectors (see `selector.ts`): tag / id / class / attribute /
+   * descendant combinator / comma-separated lists. **Not** supported:
+   * `>`/`+`/`~` combinators, `:pseudo-classes`, `::pseudo-elements`, XPath.
+   * XPath is a stretch goal per task 0253 brief — TODO if a future surface
+   * needs it (Turnstile detection only needs CSS).
+   *
+   * Performance: O(N) in DOM size per call. Acceptable for v0.2; a per-page
+   * cache layer is a v0.3+ concern (also called out in 0253).
+   *
+   * @see tasks/0253-closed-shadow-piercing-locator.md
+   * @see PLAN.md §8.2 (`DOM.getDocument` and `DOM.resolveNode` are not on the
+   *   forbidden list — both fine to use here).
+   */
+  async querySelectorPiercing(selector: string): Promise<ElementHandle | null> {
+    const handles = await this.queryPiercing(selector, 1);
+    return handles[0] ?? null;
+  }
+  /**
+   * The "all matches" variant of {@link querySelectorPiercing}. Returns every
+   * element that satisfies the selector, in depth-first pre-order — same
+   * traversal a regular `querySelectorAll` produces, with closed-shadow
+   * descendants spliced in at the position they'd appear under the host.
+   *
+   * Returns an empty array when nothing matches.
+   */
+  async querySelectorAllPiercing(selector: string): Promise<ElementHandle[]> {
+    return this.queryPiercing(selector);
+  }
+  /** Shared implementation for the piercing locator. `limit` short-circuits the walk. */
+  private async queryPiercing(selector: string, limit?: number): Promise<ElementHandle[]> {
+    this.assertOpen();
+    const parsed = parseSelector(selector);
+    // depth: -1 + pierce: true is the magic combination patchright uses; CDP
+    // returns a fully-flattened tree including shadow descendants on both
+    // open and closed roots, AND iframe contentDocuments for same-origin
+    // children.
+    const root = await this.send<{ root: PierceDomNode }>("DOM.getDocument", {
+      depth: -1,
+      pierce: true,
+    });
+    const matches = findPiercingMatches(root.root, parsed, limit);
+    if (matches.length === 0) return [];
+    const handles: ElementHandle[] = [];
+    for (const m of matches) {
+      const resolved = await this.send<{ object: RemoteObject }>("DOM.resolveNode", {
+        backendNodeId: m.backendNodeId,
+      });
+      const objectId = resolved.object.objectId;
+      // Skip nodes the protocol couldn't bind to a RemoteObject (rare — e.g.
+      // detached subtree races). Surfacing a partial set is more useful than
+      // throwing for the Turnstile detector path.
+      if (objectId === undefined) continue;
+      handles.push(
+        new ElementHandle({
+          router: this.router,
+          sessionId: this.sessionId,
+          objectId,
+          backendNodeId: m.backendNodeId,
+        }),
+      );
+    }
+    return handles;
+  }
   screenshot(_opts?: unknown): Promise<Uint8Array> {
     return Promise.reject(new NotImplementedError("page.screenshot"));
   }

package/src/proc.ts CHANGED Viewed

@@ -13,9 +13,45 @@ import { join } from "node:path";
 import type { PipeReader, PipeWriter } from "./cdp/transport";
 /**
- * The chromium flags PLAN.md §8.6 mandates we always pass. Order does not
- * matter; Chromium accepts late-arriving overrides for most flags but we
- * never override these.
+ * The chromium flags PLAN.md §8.6 mandates we always pass in PRODUCTION
+ * (non-hermetic) mode. Trimmed against patchright's
+ * `chromiumSwitchesPatch.ts:20-34` removal list (task 0256): every flag
+ * here passes two tests — (a) it isn't a passive command-line bot-tell that
+ * patchright explicitly drops, AND (b) we have a concrete production reason
+ * to keep it (CDP transport, UI suppression that matters in headed mode,
+ * keychain/keyring, or load-bearing for inject reach).
+ *
+ * Flags moved to {@link HERMETIC_ONLY_CHROMIUM_FLAGS} (re-applied when
+ * `LaunchOptions.hermetic === true`):
+ *   - `--disable-component-update`  — patchright drops; cmdline tell.
+ *   - `--disable-default-apps`      — patchright drops; cmdline tell.
+ *   - `--disable-background-networking` — patchright drops; updater-traffic suppressor.
+ *   - `--disable-sync`              — patchright drops; cmdline tell.
+ *   - `--disable-features` extras   — `OptimizationHints,MediaRouter,
+ *     InterestFeedContentSuggestions,CalculateNativeWinOcclusion` are
+ *     network/noise suppressors valid only for hermetic harness/CI runs;
+ *     real users want the natural network surface so the production list
+ *     keeps just the load-bearing entries.
+ *
+ * Production `--disable-features=` keepers + rationale:
+ *   - `Translate`            — suppresses the translate-prompt UI bar that
+ *                              would surface in headed mode.
+ *   - `AcceptCHFrame`        — keeps UA-CH negotiation off the frame path
+ *                              so our `Sec-CH-UA` headers (R-007) stay the
+ *                              single source of truth.
+ *   - `IsolateOrigins,site-per-process` — load-bearing for inject reach:
+ *                              mochi doesn't yet resolve cross-origin OOPIF
+ *                              contexts, so disabling site isolation keeps
+ *                              cross-origin frames in the same renderer
+ *                              process where `addScriptToEvaluateOnNewDocument`
+ *                              actually runs.
+ *
+ * Order does not matter; Chromium accepts late-arriving overrides for most
+ * flags but we never override these.
+ *
+ * @see PLAN.md §8.6 (decision ledger).
+ * @see docs/audits/patchright.md MED finding (chromiumSwitchesPatch.ts:20-34).
+ * @see docs/audits/puppeteer-real-browser.md LOW finding (lib/cjs/index.js:57-58).
  */
 export const DEFAULT_CHROMIUM_FLAGS: readonly string[] = [
   "--remote-debugging-pipe",
@@ -24,13 +60,41 @@ export const DEFAULT_CHROMIUM_FLAGS: readonly string[] = [
   "--no-service-autorun",
   "--password-store=basic",
   "--use-mock-keychain",
+  "--disable-features=Translate,AcceptCHFrame,IsolateOrigins,site-per-process",
+  "--enable-features=NetworkService,NetworkServiceInProcess",
+];
+/**
+ * Flags re-applied on top of {@link DEFAULT_CHROMIUM_FLAGS} when
+ * `LaunchOptions.hermetic === true`. The harness fixture matrix, CI runs,
+ * and capture flows pair `bypassInject: true` with `hermetic: true` so
+ * baseline collection isn't perturbed by updater traffic, default-apps
+ * auto-install, sync, or feed prefetches.
+ *
+ * Production users (the non-hermetic default) get a clean production flag
+ * set: no obvious cmdline tells, normal-looking updater + sync traffic.
+ *
+ * Each entry here was either explicitly removed by patchright as a passive
+ * bot-tell (`--disable-component-update`, `--disable-default-apps`,
+ * `--disable-background-networking`, `--disable-sync`) or is a noise-
+ * reduction `--disable-features=` token whose suppression is desirable for
+ * hermetic determinism but undesirable for production stealth.
+ *
+ * The hermetic `--disable-features=` token is appended SEPARATELY from the
+ * production one — Chromium merges multiple `--disable-features=` flags on
+ * the command line into a union, so the final disabled set is
+ * `{Translate,AcceptCHFrame,IsolateOrigins,site-per-process} ∪
+ *  {OptimizationHints,MediaRouter,InterestFeedContentSuggestions,
+ *   CalculateNativeWinOcclusion}`. Keeping them separate makes the
+ * production-only subset legible and avoids fingerprintable list-order
+ * coincidence with Playwright defaults.
+ */
+export const HERMETIC_ONLY_CHROMIUM_FLAGS: readonly string[] = [
   "--disable-default-apps",
   "--disable-component-update",
-  // Single comma-joined --disable-features flag (Chromium accepts comma list).
-  "--disable-features=Translate,OptimizationHints,MediaRouter,AcceptCHFrame,InterestFeedContentSuggestions,CalculateNativeWinOcclusion,IsolateOrigins,site-per-process",
-  "--enable-features=NetworkService,NetworkServiceInProcess",
   "--disable-background-networking",
   "--disable-sync",
+  "--disable-features=OptimizationHints,MediaRouter,InterestFeedContentSuggestions,CalculateNativeWinOcclusion",
 ];
 const SIGTERM_GRACE_MS = 2000;
@@ -48,8 +112,82 @@ export interface SpawnConfig {
   headless: boolean;
   /** Optional proxy server, e.g. "http://host:port" or "socks5://host:port". */
   proxy?: string;
+  /**
+   * Opt out of mochi's "auto-add `--no-sandbox` when running as root on Linux"
+   * fallback. Chromium refuses to launch as root with the user-namespace
+   * sandbox enabled; mochi normally injects `--no-sandbox` (with a warning)
+   * so the launch succeeds. Set to `true` if you have a working
+   * `chrome-sandbox` SUID helper and want to keep the sandbox under root —
+   * the launch will then crash with the original `EPIPE` if the SUID setup
+   * is wrong. PLAN.md §8.6 + `docs/quickstart.md` "Linux gotcha — Chromium
+   * and root".
+   */
+  allowRootWithSandbox?: boolean;
+  /**
+   * Primary BCP-47 locale for the spawned Chromium. Passed as `--lang=<value>`
+   * so Chromium's network stack derives an `Accept-Language` header that
+   * agrees with the JS-layer `navigator.language(s)` spoof. Without this,
+   * Chromium falls back to the host OS locale (or `en-US,en;q=0.9`), which a
+   * site can cross-reference against `navigator.languages` to detect the
+   * mismatch — direct PLAN.md I-5 violation.
+   *
+   * Sourced from `MatrixV1.locale` (the canonical primary BCP-47 string,
+   * e.g. `"en-US"`). Multi-locale `Accept-Language` q-weighting is derived
+   * by Chromium itself from this single primary; the broader list is
+   * surfaced separately via the JS-side `navigator.languages` spoof.
+   *
+   * Honored under `--headless=new` — the flag drives `ICU::Locale::Default`
+   * and `IOThread::Globals::system_request_context_->set_accept_language()`,
+   * both of which run regardless of headless mode.
+   *
+   * Source-cited from undetected-chromedriver `__init__.py:359-369` (which
+   * falls back to `locale.getdefaultlocale()` → `en-US`); we deliberately
+   * do NOT fall back to host locale — locale must come from the matrix.
+   */
+  locale?: string;
+  /**
+   * Outer window geometry to pin via `--window-size=<width>,<height>`. When
+   * supplied, Chromium's OS-level outer-window dimensions match the spoofed
+   * `screen.*` so `window.outerWidth/outerHeight` (read at the OS level
+   * under `--headless=new`) don't expose the default 800×600 leak that
+   * `fingerprint-scan.com` flags. Both dimensions must be finite positive
+   * integers; otherwise the flag is omitted. Sourced from
+   * `matrix.display.{width,height}` by `launch.ts` — the matrix is canonical.
+   *
+   * @see UDC `__init__.py:410-411`, UDC issue #2242, task 0252.
+   */
+  windowSize?: { width: number; height: number };
+  /**
+   * When `true`, re-apply {@link HERMETIC_ONLY_CHROMIUM_FLAGS} on top of
+   * {@link DEFAULT_CHROMIUM_FLAGS}. Used by the harness, CI, and
+   * `mochi capture` flows where update-checks, sync traffic, default-apps
+   * auto-install, and feed prefetches would inject non-determinism.
+   *
+   * Defaults to `false` (production posture). Production users get the
+   * cleaner flag set without obvious command-line bot-tells.
+   *
+   * Sourced from `LaunchOptions.hermetic` (see `launch.ts`). Pairs with
+   * `bypassInject: true` for capture flows but is independent — a hermetic
+   * launch with full inject is the harness's fingerprint-conformance run.
+   *
+   * @see task 0256, PLAN.md §8.6.
+   */
+  hermetic?: boolean;
 }
+/**
+ * Flags we deliberately strip from any user-supplied extra args. UDC ships
+ * with `--start-maximized`; mochi must not — it produces host-OS-dependent
+ * geometry that drifts from the matrix's `display.*` spoof and re-introduces
+ * the same outer-window mismatch `--window-size` is here to close.
+ *
+ * Applied to `extraArgs` and to the `MOCHI_EXTRA_ARGS` env split so users /
+ * CI cannot accidentally re-introduce non-determinism.
+ *
+ * @see task 0252 success criterion #3.
+ */
+const FORBIDDEN_FLAG_PREFIXES: readonly string[] = ["--start-maximized"];
 /**
  * The handle returned by {@link spawnChromium}. Owns the user-data-dir, the
  * subprocess, and the BunFile FD wrappers used by the CDP transport.
@@ -73,43 +211,47 @@ export interface ChromiumProcess {
 }
 /**
- * Spawn Chromium with `--remote-debugging-pipe` and the standard flag set.
- *
- * Pipe FD convention (Chromium CDP pipe spec, matches Puppeteer / Playwright):
- *   - FD 3 in the *child* is the read end. The parent writes commands to it.
- *   - FD 4 in the *child* is the write end. The parent reads responses from it.
+ * Build the full Chromium arg vector for a given spawn config + user-data-dir.
  *
- * Note: task brief 0011 has the FD direction labels reversed; we follow
- * Chromium's actual convention here so the protocol works. Either way Bun's
- * `stdio: ["pipe", "pipe", "pipe", "pipe", "pipe"]` allocates two extra pipes
- * and gives us back numeric FDs at `proc.stdio[3]` and `proc.stdio[4]`.
+ * Pure / synchronous so the launcher can unit-test the flag set without
+ * spawning a real process. Order of pushes is documented in line — the only
+ * load-bearing ordering is `--lang` BEFORE `extraArgs` so a deliberate
+ * user-supplied `--lang=<override>` in `args` wins (Chromium honors last
+ * occurrence on the command line for this flag).
  */
 export async function spawnChromium(cfg: SpawnConfig): Promise<ChromiumProcess> {
   const userDataDir = await mkdtemp(join(tmpdir(), "mochi-"));
-  const args: string[] = [`--user-data-dir=${userDataDir}`, ...DEFAULT_CHROMIUM_FLAGS];
-  if (cfg.headless) {
-    // Modern headless mode (matches stable Chrome behavior more closely than
-    // legacy --headless). The `=new` is critical — old `--headless` is
-    // detectable.
-    args.push("--headless=new");
-  }
-  if (cfg.proxy !== undefined && cfg.proxy.length > 0) {
-    args.push(`--proxy-server=${cfg.proxy}`);
-  }
-  if (cfg.extraArgs !== undefined && cfg.extraArgs.length > 0) {
-    args.push(...cfg.extraArgs);
-  }
-  // Whitespace-separated extra args from the environment. Same effect as
-  // `LaunchOptions.args` but settable from outside the calling code — load-
-  // bearing for CI environments that need `--no-sandbox` (Linux user-namespace
-  // sandbox doesn't work in unprivileged containers / GH Actions runners) and
-  // for ad-hoc local debugging without touching test fixtures. Production code
-  // SHOULD NOT set this — `--no-sandbox` is a fingerprint leak in real-user
-  // contexts. PLAN.md §8.6 explicitly omits it from DEFAULT_CHROMIUM_FLAGS.
   const envExtra = process.env.MOCHI_EXTRA_ARGS;
-  if (typeof envExtra === "string" && envExtra.trim().length > 0) {
-    args.push(...envExtra.trim().split(/\s+/));
+  const args = buildChromiumArgs(cfg, userDataDir, envExtra);
+  // Linux + uid 0 (root) + no `--no-sandbox` anywhere → Chromium will refuse
+  // to start with the user-namespace sandbox. We auto-inject `--no-sandbox`
+  // (with a one-line warning naming the fingerprint trade-off) instead of
+  // letting `spawnChromium` crash with `EPIPE`. Users who explicitly want
+  // the sandbox under root can either run as a non-root user, `chmod 4755`
+  // the chrome-sandbox SUID helper, or pass their own `--no-sandbox` (which
+  // we'd see in args and skip this branch).
+  //
+  // We DO NOT add `--no-sandbox` to DEFAULT_CHROMIUM_FLAGS (PLAN.md §8.6
+  // explicitly omits it as a fingerprint leak). This is a runtime fallback,
+  // not a default — only fires under the specific environment that would
+  // otherwise crash. The fingerprint-leak risk is documented in
+  // docs/quickstart.md "Linux gotcha — Chromium and root".
+  if (
+    process.platform === "linux" &&
+    process.getuid?.() === 0 &&
+    !args.some((a) => a === "--no-sandbox" || a.startsWith("--no-sandbox=")) &&
+    !cfg.allowRootWithSandbox
+  ) {
+    console.warn(
+      "[mochi] Detected root + Linux + missing --no-sandbox. " +
+        "Auto-adding --no-sandbox so Chromium can launch. " +
+        "This is a fingerprint leak per PLAN.md §8.6 — run as non-root or " +
+        "use the chrome-sandbox SUID helper for stealth-critical workloads. " +
+        "See docs/quickstart.md 'Linux gotcha — Chromium and root'. " +
+        "Pass `allowRootWithSandbox: true` to mochi.launch() to opt out of this fallback.",
+    );
+    args.push("--no-sandbox");
   }
   const proc = Bun.spawn([cfg.binary, ...args], {
@@ -129,11 +271,35 @@ export async function spawnChromium(cfg: SpawnConfig): Promise<ChromiumProcess>
     );
   }
-  // Drain stderr so Chromium doesn't block writing diagnostics. We don't read
-  // it (yet); piping to /dev/null keeps the buffer empty.
-  void drainToVoid(proc.stderr);
+  // Drain stderr so Chromium doesn't block writing diagnostics. We capture
+  // the tail (last ~4KB) so the early-exit diagnostic below has something
+  // human-readable to surface — e.g. Chromium's own
+  // "Running as root without --no-sandbox is not supported" message.
+  const stderrTail: string[] = [];
+  void drainToText(proc.stderr, stderrTail);
   void drainToVoid(proc.stdout);
+  // Diagnose early process death: Chromium that dies within ~750ms of spawn
+  // is almost always failing on a startup precondition (sandbox refusal under
+  // root, missing libs, malformed flags). We watch `proc.exited` race with
+  // a short timer and surface a clearer error than the eventual EPIPE on the
+  // first CDP write. See docs/quickstart.md "Linux gotcha — Chromium and root".
+  const earlyExitCode = await Promise.race([
+    proc.exited.then((c) => ({ kind: "exited" as const, code: c })),
+    new Promise<{ kind: "alive" }>((resolve) => setTimeout(() => resolve({ kind: "alive" }), 750)),
+  ]);
+  if (earlyExitCode.kind === "exited") {
+    await rm(userDataDir, { recursive: true, force: true }).catch(() => {});
+    const tail = stderrTail.join("").trim().split("\n").slice(-12).join("\n");
+    throw new Error(
+      `[mochi] Chromium exited (code ${earlyExitCode.code}) within 750ms of spawn — ` +
+        "the CDP pipe never opened. Most likely a startup precondition failure " +
+        "(sandbox refusal, missing libs, malformed flags).\n\n" +
+        `Stderr tail:\n${tail || "(empty)"}` +
+        diagnoseEarlyExitTail(tail),
+    );
+  }
   // Build PipeReader/PipeWriter wrappers around the raw FDs.
   const writer: PipeWriter = (() => {
     const sink = Bun.file(writeFd).writer();
@@ -196,6 +362,148 @@ export async function spawnChromium(cfg: SpawnConfig): Promise<ChromiumProcess>
   };
 }
+/**
+ * Pure builder for the Chromium argv used by {@link spawnChromium}. Extracted
+ * so tests can assert flag composition (window-size, headless, forbidden-flag
+ * scrub, env extras) without spawning a real binary.
+ *
+ * @param cfg        — the {@link SpawnConfig} the caller passed.
+ * @param userDataDir — absolute path to the ephemeral profile dir.
+ * @param envExtra   — value of `MOCHI_EXTRA_ARGS` (pass `process.env.MOCHI_EXTRA_ARGS`
+ *                    in production; tests pass a string or `undefined`).
+ */
+export function buildChromiumArgs(
+  cfg: SpawnConfig,
+  userDataDir: string,
+  envExtra: string | undefined,
+): string[] {
+  const args: string[] = [`--user-data-dir=${userDataDir}`, ...DEFAULT_CHROMIUM_FLAGS];
+  // Hermetic harness/CI escape hatch: re-apply the trim-list flags Chromium
+  // would otherwise leak as passive bot-tells. Inserted directly after the
+  // production defaults so the relative order is `defaults → hermetic-extras
+  // → headless → proxy → lang → window-size → extras → env-extras` — i.e. a
+  // user-supplied `--disable-features=…` in `extraArgs` still wins by virtue
+  // of Chromium's last-occurrence semantics for repeated `--disable-features`
+  // tokens (which are merged, not overwritten — but ordering matters for
+  // tooling that greps argv).
+  if (cfg.hermetic === true) {
+    args.push(...HERMETIC_ONLY_CHROMIUM_FLAGS);
+  }
+  if (cfg.headless) {
+    // Modern headless mode (matches stable Chrome behavior more closely than
+    // legacy --headless). The `=new` is critical — old `--headless` is
+    // detectable.
+    args.push("--headless=new");
+  }
+  if (cfg.proxy !== undefined && cfg.proxy.length > 0) {
+    args.push(`--proxy-server=${cfg.proxy}`);
+  }
+  // Matrix-derived primary locale — feeds Chromium's `Accept-Language`
+  // header so the network surface matches the JS-layer `navigator.language`
+  // spoof (PLAN.md I-5). Pushed BEFORE `extraArgs` so a user-supplied
+  // override in `args` can win on the command line if absolutely needed —
+  // Chromium honors the last-occurrence on the line for `--lang`. Task 0251.
+  if (cfg.locale !== undefined && cfg.locale.length > 0) {
+    args.push(`--lang=${cfg.locale}`);
+  }
+  // `--window-size=<W>,<H>` — pin the OS-level outer window so
+  // `window.outerWidth/outerHeight` match `matrix.display.*` instead of
+  // Chromium's headless 800×600 default. The matrix is canonical: when
+  // `display.{width,height}` is missing or non-finite we omit the flag
+  // rather than fall back to a hardcoded value (a hardcoded value would
+  // mismatch a profile that legitimately uses different dimensions). Task 0252.
+  if (cfg.windowSize !== undefined) {
+    const { width, height } = cfg.windowSize;
+    if (
+      Number.isFinite(width) &&
+      Number.isFinite(height) &&
+      Number.isInteger(width) &&
+      Number.isInteger(height) &&
+      width > 0 &&
+      height > 0
+    ) {
+      args.push(`--window-size=${width},${height}`);
+    }
+  }
+  if (cfg.extraArgs !== undefined && cfg.extraArgs.length > 0) {
+    args.push(...stripForbiddenFlags(cfg.extraArgs));
+  }
+  // Whitespace-separated extra args from the environment. Same effect as
+  // `LaunchOptions.args` but settable from outside the calling code — load-
+  // bearing for CI environments that need `--no-sandbox` (Linux user-namespace
+  // sandbox doesn't work in unprivileged containers / GH Actions runners) and
+  // for ad-hoc local debugging without touching test fixtures. Production code
+  // SHOULD NOT set this — `--no-sandbox` is a fingerprint leak in real-user
+  // contexts. PLAN.md §8.6 explicitly omits it from DEFAULT_CHROMIUM_FLAGS.
+  if (typeof envExtra === "string" && envExtra.trim().length > 0) {
+    args.push(...stripForbiddenFlags(envExtra.trim().split(/\s+/)));
+  }
+  return args;
+}
+/**
+ * Heuristic-classify a stderr tail from a Chromium that died within 750ms of
+ * spawn and emit a remediation hint. Two patterns matter today:
+ *
+ *   1. "Running as root without --no-sandbox is not supported" — the user-
+ *      namespace sandbox refusal under root. Fixes: non-root, SUID helper,
+ *      or `--no-sandbox` (with the documented fingerprint cost).
+ *   2. "error while loading shared libraries: <name>.so" — fresh Linux server
+ *      without the Chromium runtime deps. Fix: apt-install the canonical dep
+ *      list (full bytes live in `@mochi.js/cli/src/lib/linux-deps.ts` — we
+ *      keep just a short pointer here because @mochi.js/core cannot depend on
+ *      @mochi.js/cli without inverting the package graph).
+ *
+ * Returns the empty string when no pattern matches, in which case the caller
+ * surfaces only the raw stderr tail. Exported for unit tests so we can lock
+ * the regexes against regressions without spawning Chromium.
+ *
+ * @see tasks/0259-linux-first-run-experience.md
+ */
+export function diagnoseEarlyExitTail(tail: string): string {
+  if (/running.*root.*without.*--no-sandbox|--no-sandbox.*required/i.test(tail)) {
+    return (
+      "\n\nChromium refuses to start as root with the user-namespace sandbox enabled.\n" +
+      "Fixes (preferred → workaround):\n" +
+      "  1. Run as a non-root user.\n" +
+      "  2. `chmod 4755 chrome-sandbox` on the SUID helper next to the CfT binary.\n" +
+      "  3. Pass args: ['--no-sandbox'] to mochi.launch() — fingerprint leak (PLAN §8.6),\n" +
+      "     OK for testing, not for stealth-critical production."
+    );
+  }
+  const libMatch = /error while loading shared libraries:\s+([^\s:]+)/i.exec(tail);
+  if (libMatch !== null) {
+    const lib = libMatch[1] ?? "(unknown .so)";
+    return (
+      `\n\nChromium failed to load a system library: '${lib}'.\n` +
+      "Chromium-for-Testing ships only the binary; on a fresh Linux server the\n" +
+      "system libs Chromium links against are not preinstalled. Install the\n" +
+      "canonical dep list with apt:\n\n" +
+      "  bunx mochi browsers install   # re-run; the install command prints the\n" +
+      "                                # exact apt line for your system.\n\n" +
+      "Or install directly — full list at\n" +
+      "  https://mochijs.com/docs/getting-started/install#linux-runtime-dependencies"
+    );
+  }
+  return "";
+}
+/**
+ * Drop any flag in `args` whose prefix matches {@link FORBIDDEN_FLAG_PREFIXES}.
+ * Match is `=` / boundary-aware so `--start-maximized` and
+ * `--start-maximized=1` both go, but `--start-maximized-foo` (hypothetical)
+ * would not. Preserves order of surviving args.
+ */
+function stripForbiddenFlags(args: readonly string[]): string[] {
+  return args.filter((arg) => {
+    for (const prefix of FORBIDDEN_FLAG_PREFIXES) {
+      if (arg === prefix) return false;
+      if (arg.startsWith(`${prefix}=`)) return false;
+    }
+    return true;
+  });
+}
 /** Read-and-discard a ReadableStream so Chromium's pipe buffers don't fill. */
 async function drainToVoid(stream: ReadableStream<Uint8Array> | null): Promise<void> {
   if (stream === null) return;
@@ -211,3 +519,40 @@ async function drainToVoid(stream: ReadableStream<Uint8Array> | null): Promise<v
     reader.releaseLock();
   }
 }
+/**
+ * Read a ReadableStream and append decoded chunks to `tail`, capping the
+ * accumulated buffer to ~4KB so a chatty Chromium can't blow memory. Used
+ * by `spawnChromium`'s early-exit diagnostic to recover the last few lines
+ * of stderr from a process that died within 750ms of spawn.
+ */
+async function drainToText(
+  stream: ReadableStream<Uint8Array> | null,
+  tail: string[],
+): Promise<void> {
+  if (stream === null) return;
+  const reader = stream.getReader();
+  const decoder = new TextDecoder();
+  let bufferedLen = 0;
+  const cap = 4096;
+  try {
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) return;
+      if (value !== undefined) {
+        const text = decoder.decode(value, { stream: true });
+        tail.push(text);
+        bufferedLen += text.length;
+        // Trim from the front when over cap so we always keep the *tail*.
+        while (bufferedLen > cap && tail.length > 1) {
+          const dropped = tail.shift();
+          bufferedLen -= dropped !== undefined ? dropped.length : 0;
+        }
+      }
+    }
+  } catch {
+    // ignore — stream errored or was cancelled
+  } finally {
+    reader.releaseLock();
+  }
+}