@mochi.js/core 0.0.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/proc.ts ADDED
@@ -0,0 +1,213 @@
1
+ /**
2
+ * Chromium process lifecycle.
3
+ *
4
+ * Owns spawn (Bun.spawn with pipe-mode FDs 3+4), stdio bookkeeping, graceful
5
+ * shutdown (SIGTERM → 2s grace → SIGKILL), and ephemeral user-data-dir cleanup.
6
+ *
7
+ * @see PLAN.md §8.5 / §8.6
8
+ */
9
+
10
+ import { mkdtemp, rm } from "node:fs/promises";
11
+ import { tmpdir } from "node:os";
12
+ import { join } from "node:path";
13
+ import type { PipeReader, PipeWriter } from "./cdp/transport";
14
+
15
+ /**
16
+ * The chromium flags PLAN.md §8.6 mandates we always pass. Order does not
17
+ * matter; Chromium accepts late-arriving overrides for most flags but we
18
+ * never override these.
19
+ */
20
+ export const DEFAULT_CHROMIUM_FLAGS: readonly string[] = [
21
+ "--remote-debugging-pipe",
22
+ "--no-default-browser-check",
23
+ "--no-first-run",
24
+ "--no-service-autorun",
25
+ "--password-store=basic",
26
+ "--use-mock-keychain",
27
+ "--disable-default-apps",
28
+ "--disable-component-update",
29
+ // Single comma-joined --disable-features flag (Chromium accepts comma list).
30
+ "--disable-features=Translate,OptimizationHints,MediaRouter,AcceptCHFrame,InterestFeedContentSuggestions,CalculateNativeWinOcclusion,IsolateOrigins,site-per-process",
31
+ "--enable-features=NetworkService,NetworkServiceInProcess",
32
+ "--disable-background-networking",
33
+ "--disable-sync",
34
+ ];
35
+
36
+ const SIGTERM_GRACE_MS = 2000;
37
+
38
+ /**
39
+ * Public knobs surfaced through `LaunchOptions`. Held here so `launch.ts` can
40
+ * pass a small immutable record into `spawnChromium` without leaking the full
41
+ * options shape.
42
+ */
43
+ export interface SpawnConfig {
44
+ binary: string;
45
+ /** User-supplied extra flags appended after the defaults. Null to skip. */
46
+ extraArgs?: readonly string[];
47
+ /** Run headless via Chromium's modern `--headless=new` flag. */
48
+ headless: boolean;
49
+ /** Optional proxy server, e.g. "http://host:port" or "socks5://host:port". */
50
+ proxy?: string;
51
+ }
52
+
53
+ /**
54
+ * The handle returned by {@link spawnChromium}. Owns the user-data-dir, the
55
+ * subprocess, and the BunFile FD wrappers used by the CDP transport.
56
+ */
57
+ export interface ChromiumProcess {
58
+ /** Absolute path to the ephemeral user-data-dir. Removed on close(). */
59
+ readonly userDataDir: string;
60
+ /** OS process id for diagnostics. */
61
+ readonly pid: number;
62
+ /** Resolves to the exit code once the child terminates (normal or signaled). */
63
+ readonly exited: Promise<number>;
64
+ /** Pipe reader for the CDP transport (browser → us; FD 4). */
65
+ readonly reader: PipeReader;
66
+ /** Pipe writer for the CDP transport (us → browser; FD 3). */
67
+ readonly writer: PipeWriter;
68
+ /**
69
+ * Graceful shutdown: SIGTERM, 2s grace, SIGKILL, then `rm -rf` the
70
+ * user-data-dir. Idempotent; safe to call multiple times.
71
+ */
72
+ close(): Promise<void>;
73
+ }
74
+
75
+ /**
76
+ * Spawn Chromium with `--remote-debugging-pipe` and the standard flag set.
77
+ *
78
+ * Pipe FD convention (Chromium CDP pipe spec, matches Puppeteer / Playwright):
79
+ * - FD 3 in the *child* is the read end. The parent writes commands to it.
80
+ * - FD 4 in the *child* is the write end. The parent reads responses from it.
81
+ *
82
+ * Note: task brief 0011 has the FD direction labels reversed; we follow
83
+ * Chromium's actual convention here so the protocol works. Either way Bun's
84
+ * `stdio: ["pipe", "pipe", "pipe", "pipe", "pipe"]` allocates two extra pipes
85
+ * and gives us back numeric FDs at `proc.stdio[3]` and `proc.stdio[4]`.
86
+ */
87
+ export async function spawnChromium(cfg: SpawnConfig): Promise<ChromiumProcess> {
88
+ const userDataDir = await mkdtemp(join(tmpdir(), "mochi-"));
89
+
90
+ const args: string[] = [`--user-data-dir=${userDataDir}`, ...DEFAULT_CHROMIUM_FLAGS];
91
+ if (cfg.headless) {
92
+ // Modern headless mode (matches stable Chrome behavior more closely than
93
+ // legacy --headless). The `=new` is critical — old `--headless` is
94
+ // detectable.
95
+ args.push("--headless=new");
96
+ }
97
+ if (cfg.proxy !== undefined && cfg.proxy.length > 0) {
98
+ args.push(`--proxy-server=${cfg.proxy}`);
99
+ }
100
+ if (cfg.extraArgs !== undefined && cfg.extraArgs.length > 0) {
101
+ args.push(...cfg.extraArgs);
102
+ }
103
+ // Whitespace-separated extra args from the environment. Same effect as
104
+ // `LaunchOptions.args` but settable from outside the calling code — load-
105
+ // bearing for CI environments that need `--no-sandbox` (Linux user-namespace
106
+ // sandbox doesn't work in unprivileged containers / GH Actions runners) and
107
+ // for ad-hoc local debugging without touching test fixtures. Production code
108
+ // SHOULD NOT set this — `--no-sandbox` is a fingerprint leak in real-user
109
+ // contexts. PLAN.md §8.6 explicitly omits it from DEFAULT_CHROMIUM_FLAGS.
110
+ const envExtra = process.env.MOCHI_EXTRA_ARGS;
111
+ if (typeof envExtra === "string" && envExtra.trim().length > 0) {
112
+ args.push(...envExtra.trim().split(/\s+/));
113
+ }
114
+
115
+ const proc = Bun.spawn([cfg.binary, ...args], {
116
+ // stdin, stdout, stderr, then two extra pipes for CDP framing.
117
+ stdio: ["pipe", "pipe", "pipe", "pipe", "pipe"],
118
+ // Chromium needs a real CWD for crash dumps etc; user-data-dir is fine.
119
+ cwd: userDataDir,
120
+ });
121
+
122
+ const writeFd = proc.stdio[3];
123
+ const readFd = proc.stdio[4];
124
+ if (typeof writeFd !== "number" || typeof readFd !== "number") {
125
+ proc.kill();
126
+ await rm(userDataDir, { recursive: true, force: true }).catch(() => {});
127
+ throw new Error(
128
+ "[mochi] Bun.spawn did not return numeric FDs at stdio[3]/stdio[4]; cannot establish CDP pipe.",
129
+ );
130
+ }
131
+
132
+ // Drain stderr so Chromium doesn't block writing diagnostics. We don't read
133
+ // it (yet); piping to /dev/null keeps the buffer empty.
134
+ void drainToVoid(proc.stderr);
135
+ void drainToVoid(proc.stdout);
136
+
137
+ // Build PipeReader/PipeWriter wrappers around the raw FDs.
138
+ const writer: PipeWriter = (() => {
139
+ const sink = Bun.file(writeFd).writer();
140
+ return {
141
+ write: (chunk) => sink.write(chunk),
142
+ flush: () => sink.flush(),
143
+ end: () => sink.end(),
144
+ };
145
+ })();
146
+
147
+ const reader: PipeReader = {
148
+ getReader: () => Bun.file(readFd).stream().getReader(),
149
+ };
150
+
151
+ let closing = false;
152
+ const close = async (): Promise<void> => {
153
+ if (closing) {
154
+ // Wait until the in-flight close finishes.
155
+ await proc.exited.catch(() => 0);
156
+ return;
157
+ }
158
+ closing = true;
159
+ // Try to flush+end the writer first so Chromium's read side sees EOF.
160
+ try {
161
+ await writer.end?.();
162
+ } catch {
163
+ // ignore
164
+ }
165
+ // SIGTERM, then 2s grace, then SIGKILL.
166
+ try {
167
+ proc.kill("SIGTERM");
168
+ } catch {
169
+ // process may have already exited
170
+ }
171
+ const timer = setTimeout(() => {
172
+ try {
173
+ proc.kill("SIGKILL");
174
+ } catch {
175
+ // ignore
176
+ }
177
+ }, SIGTERM_GRACE_MS);
178
+ try {
179
+ await proc.exited;
180
+ } finally {
181
+ clearTimeout(timer);
182
+ }
183
+ // Best-effort user-data-dir cleanup. Failures are non-fatal but logged.
184
+ await rm(userDataDir, { recursive: true, force: true }).catch((err: unknown) => {
185
+ console.warn(`[mochi] failed to remove user-data-dir ${userDataDir}:`, err);
186
+ });
187
+ };
188
+
189
+ return {
190
+ userDataDir,
191
+ pid: proc.pid,
192
+ exited: proc.exited,
193
+ reader,
194
+ writer,
195
+ close,
196
+ };
197
+ }
198
+
199
+ /** Read-and-discard a ReadableStream so Chromium's pipe buffers don't fill. */
200
+ async function drainToVoid(stream: ReadableStream<Uint8Array> | null): Promise<void> {
201
+ if (stream === null) return;
202
+ const reader = stream.getReader();
203
+ try {
204
+ while (true) {
205
+ const { done } = await reader.read();
206
+ if (done) return;
207
+ }
208
+ } catch {
209
+ // ignore — stream errored or was cancelled
210
+ } finally {
211
+ reader.releaseLock();
212
+ }
213
+ }
@@ -0,0 +1,252 @@
1
+ /**
2
+ * Proxy authentication via CDP `Fetch.authRequired`.
3
+ *
4
+ * Background
5
+ * ----------
6
+ * Chromium's `--proxy-server=` flag accepts the address but rejects inline
7
+ * credentials in the URL — `--proxy-server=http://user:pass@host:8080` is
8
+ * silently stripped. The historical "proxy-auth-extension" workaround ships
9
+ * a tiny chrome-extension that subscribes to `chrome.webRequest.onAuthRequired`,
10
+ * but `--load-extension` is itself a fingerprint leak (chrome.runtime
11
+ * weirdness, observable extension ids) and so is forbidden by mochi's
12
+ * stealth invariants.
13
+ *
14
+ * The CDP path is invariant-clean: enable `Fetch` with `handleAuthRequests`
15
+ * AND a wildcard pattern. Chromium rejects `patterns: []` when
16
+ * `handleAuthRequests: true` (`-32602 Can't specify empty patterns with
17
+ * handleAuth set`, verified on CfT linux ~2026-05) — the original 0160
18
+ * design assumed empty patterns would only fire `Fetch.authRequired`
19
+ * events, but modern Chromium requires at least one URL pattern when auth
20
+ * handling is on. We use `[{ urlPattern: "*" }]` and forward every paused
21
+ * request immediately via `Fetch.continueRequest`. The auth challenges
22
+ * separately fire `Fetch.authRequired`; we answer those with
23
+ * `Fetch.continueWithAuth` carrying the parsed credentials.
24
+ *
25
+ * PLAN.md §8.2 invariant check
26
+ * ----------------------------
27
+ * `Fetch.enable` is NOT on the forbidden list. Only `Runtime.enable`
28
+ * (leaks execution-context-created events to page-observable side
29
+ * channels) and `Page.createIsolatedWorld` (creates a fingerprintable
30
+ * isolated world) are forbidden. `Fetch.enable` operates at the network
31
+ * layer below page script — it does not produce execution-context-creation
32
+ * events, does not surface a `chrome.devtools` global, and is not
33
+ * detectable from page JavaScript. With `patterns: [{urlPattern: "*"}]`
34
+ * every request pauses for one CDP round-trip before continuing — that's
35
+ * a measurable but bounded overhead (sub-ms per request on modern
36
+ * hardware) and only active on sessions with proxy auth credentials
37
+ * (the function early-returns when `auth` is undefined).
38
+ *
39
+ * Protocols
40
+ * ---------
41
+ * Chromium surfaces both HTTP and SOCKS5 auth challenges through the same
42
+ * `Fetch.authRequired` event. SOCKS5 user/pass auth happens at the SOCKS
43
+ * handshake (before any HTTP request) but Chromium wraps it as an
44
+ * `authRequired` for the first request through the proxy, so the same
45
+ * handler covers both.
46
+ *
47
+ * @see PLAN.md §8.2 / §10
48
+ * @see tasks/0160-proxy-auth-and-ci-fix.md
49
+ */
50
+
51
+ import type { MessageRouter, Unsubscribe } from "./cdp/router";
52
+
53
+ /** Parsed proxy URL — what `parseProxyUrl` returns. */
54
+ export interface ParsedProxy {
55
+ /**
56
+ * The auth-stripped server URL safe to pass to Chromium's
57
+ * `--proxy-server=` flag. Format: `<protocol>//<host>:<port>`.
58
+ */
59
+ server: string;
60
+ /**
61
+ * Decoded credentials, present only when the input URL carried a
62
+ * `user[:pass]@` segment. `password` is `""` (empty string) when the
63
+ * URL had a username but no password (`http://user@host:8080`).
64
+ */
65
+ auth?: { username: string; password: string };
66
+ /** Lowercased protocol (`http`, `https`, `socks5`, `socks4`). */
67
+ protocol: "http" | "https" | "socks5" | "socks4";
68
+ }
69
+
70
+ /** Default ports used when the input URL omits one. */
71
+ const DEFAULT_PORTS: Record<string, string> = {
72
+ http: "80",
73
+ https: "443",
74
+ socks5: "1080",
75
+ socks4: "1080",
76
+ };
77
+
78
+ /**
79
+ * Parse a proxy URL string into `{ server, auth?, protocol }`.
80
+ *
81
+ * Handles:
82
+ * - `http://user:pass@host:port` → auth + server
83
+ * - `socks5://user@host:1080` → auth.password = ""
84
+ * - `http://host:8080` → no auth
85
+ * - `http://user%40d:p%40ss@host:80` → percent-decoded credentials
86
+ * - `http://user:pass@[::1]:8080` → IPv6 hosts (URL parser handles)
87
+ * - `http://host` → port defaults per protocol
88
+ *
89
+ * Implementation uses `new URL()` so percent-decoding and IPv6 host
90
+ * bracketing are handled natively.
91
+ */
92
+ export function parseProxyUrl(input: string): ParsedProxy {
93
+ let url: URL;
94
+ try {
95
+ url = new URL(input);
96
+ } catch (err) {
97
+ throw new Error(
98
+ `[mochi] invalid proxy URL ${JSON.stringify(input)}: ${err instanceof Error ? err.message : String(err)}`,
99
+ );
100
+ }
101
+ const rawProto = url.protocol.replace(/:$/, "").toLowerCase();
102
+ if (
103
+ rawProto !== "http" &&
104
+ rawProto !== "https" &&
105
+ rawProto !== "socks5" &&
106
+ rawProto !== "socks4"
107
+ ) {
108
+ throw new Error(
109
+ `[mochi] unsupported proxy protocol ${JSON.stringify(rawProto)} — supported: http, https, socks5, socks4`,
110
+ );
111
+ }
112
+ const protocol = rawProto;
113
+ // `URL.hostname` may keep or strip IPv6 brackets depending on the
114
+ // runtime — normalize to a single `[…]`-bracketed form so we can format
115
+ // the server URL deterministically.
116
+ const rawHost = url.hostname;
117
+ const stripped =
118
+ rawHost.startsWith("[") && rawHost.endsWith("]") ? rawHost.slice(1, -1) : rawHost;
119
+ const isIpv6 = stripped.includes(":");
120
+ const host = isIpv6 ? `[${stripped}]` : stripped;
121
+ const port = url.port.length > 0 ? url.port : DEFAULT_PORTS[protocol];
122
+ if (host.length === 0) {
123
+ throw new Error(`[mochi] proxy URL ${JSON.stringify(input)} is missing a host`);
124
+ }
125
+ const server = `${protocol}://${host}:${port}`;
126
+
127
+ // `URL.username`/`URL.password` are already percent-decoded.
128
+ if (url.username.length > 0) {
129
+ return {
130
+ server,
131
+ auth: {
132
+ username: decodeURIComponent(url.username),
133
+ password: url.password.length > 0 ? decodeURIComponent(url.password) : "",
134
+ },
135
+ protocol,
136
+ };
137
+ }
138
+ return { server, protocol };
139
+ }
140
+
141
+ /**
142
+ * Result of {@link installProxyAuth}: an unsubscriber that removes the
143
+ * router listeners and disables the Fetch domain. Idempotent.
144
+ */
145
+ export interface ProxyAuthHandle {
146
+ /** Tear down the listeners + send `Fetch.disable`. Idempotent. */
147
+ dispose(): Promise<void>;
148
+ }
149
+
150
+ /**
151
+ * Wire proxy-auth handling into a {@link MessageRouter}. No-op when
152
+ * `auth` is undefined — saves the `Fetch.enable` round-trip and avoids
153
+ * any protocol surface for sessions that don't need it.
154
+ *
155
+ * Behavior:
156
+ * - Sends `Fetch.enable { handleAuthRequests: true, patterns: [{
157
+ * urlPattern: "*" }] }` once.
158
+ * - On `Fetch.authRequired`, replies with `Fetch.continueWithAuth` and
159
+ * the parsed creds.
160
+ * - On `Fetch.requestPaused`, forwards `Fetch.continueRequest`
161
+ * immediately so the network model stays unchanged (every request
162
+ * still flows; we just take one CDP round-trip to wave it through).
163
+ *
164
+ * Why wildcard patterns instead of empty: modern Chromium (CfT linux
165
+ * ~2026-05) rejects `patterns: []` when `handleAuthRequests: true` is set
166
+ * with `-32602 Can't specify empty patterns with handleAuth set`. The
167
+ * wildcard plus an immediate-continue handler is the equivalent of
168
+ * "auth-only interception" with one extra round-trip per request — only
169
+ * active on proxy-authed sessions.
170
+ */
171
+ export async function installProxyAuth(
172
+ router: MessageRouter,
173
+ auth: { username: string; password: string },
174
+ ): Promise<ProxyAuthHandle> {
175
+ // Subscribe FIRST so we don't miss the very first authRequired event the
176
+ // browser fires after Fetch.enable.
177
+ const offAuth: Unsubscribe = router.on("Fetch.authRequired", (params) => {
178
+ const requestId = (params as { requestId?: string } | null)?.requestId;
179
+ if (typeof requestId !== "string") return;
180
+ // Fire-and-forget — failures here are non-fatal (the request will
181
+ // simply 407 and the page-level fetch will see it). We log on
182
+ // unexpected errors so users can diagnose creds issues.
183
+ router
184
+ .send("Fetch.continueWithAuth", {
185
+ requestId,
186
+ authChallengeResponse: {
187
+ response: "ProvideCredentials",
188
+ username: auth.username,
189
+ password: auth.password,
190
+ },
191
+ })
192
+ .catch((err: unknown) => {
193
+ if (!isClosedError(err)) {
194
+ console.warn("[mochi] Fetch.continueWithAuth failed:", err);
195
+ }
196
+ });
197
+ });
198
+
199
+ // Pattern is REQUIRED with handleAuthRequests: true. Modern Chromium
200
+ // rejects an empty `patterns` array with `-32602 Can't specify empty
201
+ // patterns with handleAuth set` (verified on CfT linux ~2026-05). Use
202
+ // a wildcard pattern so every request paus es, then immediately
203
+ // forward in the requestPaused handler below — that gets us auth
204
+ // challenge interception without altering the user-visible network
205
+ // model. The per-request CDP round-trip is real overhead but only
206
+ // active when the session has proxy auth credentials (this whole
207
+ // function early-returns when `auth` is undefined), so non-proxied
208
+ // sessions pay zero cost.
209
+ const offPaused: Unsubscribe = router.on("Fetch.requestPaused", (params) => {
210
+ const requestId = (params as { requestId?: string } | null)?.requestId;
211
+ if (typeof requestId !== "string") return;
212
+ router.send("Fetch.continueRequest", { requestId }).catch((err: unknown) => {
213
+ if (!isClosedError(err)) {
214
+ console.warn("[mochi] Fetch.continueRequest failed:", err);
215
+ }
216
+ });
217
+ });
218
+
219
+ await router.send("Fetch.enable", {
220
+ handleAuthRequests: true,
221
+ patterns: [{ urlPattern: "*" }],
222
+ });
223
+
224
+ let disposed = false;
225
+ return {
226
+ async dispose(): Promise<void> {
227
+ if (disposed) return;
228
+ disposed = true;
229
+ offAuth();
230
+ offPaused();
231
+ try {
232
+ await router.send("Fetch.disable");
233
+ } catch (err) {
234
+ // Closed-pipe failures are expected during session teardown.
235
+ if (!isClosedError(err)) {
236
+ console.warn("[mochi] Fetch.disable failed:", err);
237
+ }
238
+ }
239
+ },
240
+ };
241
+ }
242
+
243
+ /** True when an error reflects the transport already being closed. */
244
+ function isClosedError(err: unknown): boolean {
245
+ if (err instanceof Error) {
246
+ return (
247
+ err.name === "BrowserCrashedError" ||
248
+ /transport already closed|pipe closed|browser process exited/i.test(err.message)
249
+ );
250
+ }
251
+ return false;
252
+ }