@mochi.js/core 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/proc.ts ADDED
@@ -0,0 +1,213 @@
1
+ /**
2
+ * Chromium process lifecycle.
3
+ *
4
+ * Owns spawn (Bun.spawn with pipe-mode FDs 3+4), stdio bookkeeping, graceful
5
+ * shutdown (SIGTERM → 2s grace → SIGKILL), and ephemeral user-data-dir cleanup.
6
+ *
7
+ * @see PLAN.md §8.5 / §8.6
8
+ */
9
+
10
+ import { mkdtemp, rm } from "node:fs/promises";
11
+ import { tmpdir } from "node:os";
12
+ import { join } from "node:path";
13
+ import type { PipeReader, PipeWriter } from "./cdp/transport";
14
+
15
+ /**
16
+ * The chromium flags PLAN.md §8.6 mandates we always pass. Order does not
17
+ * matter; Chromium accepts late-arriving overrides for most flags but we
18
+ * never override these.
19
+ */
20
+ export const DEFAULT_CHROMIUM_FLAGS: readonly string[] = [
21
+ "--remote-debugging-pipe",
22
+ "--no-default-browser-check",
23
+ "--no-first-run",
24
+ "--no-service-autorun",
25
+ "--password-store=basic",
26
+ "--use-mock-keychain",
27
+ "--disable-default-apps",
28
+ "--disable-component-update",
29
+ // Single comma-joined --disable-features flag (Chromium accepts comma list).
30
+ "--disable-features=Translate,OptimizationHints,MediaRouter,AcceptCHFrame,InterestFeedContentSuggestions,CalculateNativeWinOcclusion,IsolateOrigins,site-per-process",
31
+ "--enable-features=NetworkService,NetworkServiceInProcess",
32
+ "--disable-background-networking",
33
+ "--disable-sync",
34
+ ];
35
+
36
+ const SIGTERM_GRACE_MS = 2000;
37
+
38
+ /**
39
+ * Public knobs surfaced through `LaunchOptions`. Held here so `launch.ts` can
40
+ * pass a small immutable record into `spawnChromium` without leaking the full
41
+ * options shape.
42
+ */
43
+ export interface SpawnConfig {
44
+ binary: string;
45
+ /** User-supplied extra flags appended after the defaults. Null to skip. */
46
+ extraArgs?: readonly string[];
47
+ /** Run headless via Chromium's modern `--headless=new` flag. */
48
+ headless: boolean;
49
+ /** Optional proxy server, e.g. "http://host:port" or "socks5://host:port". */
50
+ proxy?: string;
51
+ }
52
+
53
+ /**
54
+ * The handle returned by {@link spawnChromium}. Owns the user-data-dir, the
55
+ * subprocess, and the BunFile FD wrappers used by the CDP transport.
56
+ */
57
+ export interface ChromiumProcess {
58
+ /** Absolute path to the ephemeral user-data-dir. Removed on close(). */
59
+ readonly userDataDir: string;
60
+ /** OS process id for diagnostics. */
61
+ readonly pid: number;
62
+ /** Resolves to the exit code once the child terminates (normal or signaled). */
63
+ readonly exited: Promise<number>;
64
+ /** Pipe reader for the CDP transport (browser → us; FD 4). */
65
+ readonly reader: PipeReader;
66
+ /** Pipe writer for the CDP transport (us → browser; FD 3). */
67
+ readonly writer: PipeWriter;
68
+ /**
69
+ * Graceful shutdown: SIGTERM, 2s grace, SIGKILL, then `rm -rf` the
70
+ * user-data-dir. Idempotent; safe to call multiple times.
71
+ */
72
+ close(): Promise<void>;
73
+ }
74
+
75
+ /**
76
+ * Spawn Chromium with `--remote-debugging-pipe` and the standard flag set.
77
+ *
78
+ * Pipe FD convention (Chromium CDP pipe spec, matches Puppeteer / Playwright):
79
+ * - FD 3 in the *child* is the read end. The parent writes commands to it.
80
+ * - FD 4 in the *child* is the write end. The parent reads responses from it.
81
+ *
82
+ * Note: task brief 0011 has the FD direction labels reversed; we follow
83
+ * Chromium's actual convention here so the protocol works. Either way Bun's
84
+ * `stdio: ["pipe", "pipe", "pipe", "pipe", "pipe"]` allocates two extra pipes
85
+ * and gives us back numeric FDs at `proc.stdio[3]` and `proc.stdio[4]`.
86
+ */
87
+ export async function spawnChromium(cfg: SpawnConfig): Promise<ChromiumProcess> {
88
+ const userDataDir = await mkdtemp(join(tmpdir(), "mochi-"));
89
+
90
+ const args: string[] = [`--user-data-dir=${userDataDir}`, ...DEFAULT_CHROMIUM_FLAGS];
91
+ if (cfg.headless) {
92
+ // Modern headless mode (matches stable Chrome behavior more closely than
93
+ // legacy --headless). The `=new` is critical — old `--headless` is
94
+ // detectable.
95
+ args.push("--headless=new");
96
+ }
97
+ if (cfg.proxy !== undefined && cfg.proxy.length > 0) {
98
+ args.push(`--proxy-server=${cfg.proxy}`);
99
+ }
100
+ if (cfg.extraArgs !== undefined && cfg.extraArgs.length > 0) {
101
+ args.push(...cfg.extraArgs);
102
+ }
103
+ // Whitespace-separated extra args from the environment. Same effect as
104
+ // `LaunchOptions.args` but settable from outside the calling code — load-
105
+ // bearing for CI environments that need `--no-sandbox` (Linux user-namespace
106
+ // sandbox doesn't work in unprivileged containers / GH Actions runners) and
107
+ // for ad-hoc local debugging without touching test fixtures. Production code
108
+ // SHOULD NOT set this — `--no-sandbox` is a fingerprint leak in real-user
109
+ // contexts. PLAN.md §8.6 explicitly omits it from DEFAULT_CHROMIUM_FLAGS.
110
+ const envExtra = process.env.MOCHI_EXTRA_ARGS;
111
+ if (typeof envExtra === "string" && envExtra.trim().length > 0) {
112
+ args.push(...envExtra.trim().split(/\s+/));
113
+ }
114
+
115
+ const proc = Bun.spawn([cfg.binary, ...args], {
116
+ // stdin, stdout, stderr, then two extra pipes for CDP framing.
117
+ stdio: ["pipe", "pipe", "pipe", "pipe", "pipe"],
118
+ // Chromium needs a real CWD for crash dumps etc; user-data-dir is fine.
119
+ cwd: userDataDir,
120
+ });
121
+
122
+ const writeFd = proc.stdio[3];
123
+ const readFd = proc.stdio[4];
124
+ if (typeof writeFd !== "number" || typeof readFd !== "number") {
125
+ proc.kill();
126
+ await rm(userDataDir, { recursive: true, force: true }).catch(() => {});
127
+ throw new Error(
128
+ "[mochi] Bun.spawn did not return numeric FDs at stdio[3]/stdio[4]; cannot establish CDP pipe.",
129
+ );
130
+ }
131
+
132
+ // Drain stderr so Chromium doesn't block writing diagnostics. We don't read
133
+ // it (yet); piping to /dev/null keeps the buffer empty.
134
+ void drainToVoid(proc.stderr);
135
+ void drainToVoid(proc.stdout);
136
+
137
+ // Build PipeReader/PipeWriter wrappers around the raw FDs.
138
+ const writer: PipeWriter = (() => {
139
+ const sink = Bun.file(writeFd).writer();
140
+ return {
141
+ write: (chunk) => sink.write(chunk),
142
+ flush: () => sink.flush(),
143
+ end: () => sink.end(),
144
+ };
145
+ })();
146
+
147
+ const reader: PipeReader = {
148
+ getReader: () => Bun.file(readFd).stream().getReader(),
149
+ };
150
+
151
+ let closing = false;
152
+ const close = async (): Promise<void> => {
153
+ if (closing) {
154
+ // Wait until the in-flight close finishes.
155
+ await proc.exited.catch(() => 0);
156
+ return;
157
+ }
158
+ closing = true;
159
+ // Try to flush+end the writer first so Chromium's read side sees EOF.
160
+ try {
161
+ await writer.end?.();
162
+ } catch {
163
+ // ignore
164
+ }
165
+ // SIGTERM, then 2s grace, then SIGKILL.
166
+ try {
167
+ proc.kill("SIGTERM");
168
+ } catch {
169
+ // process may have already exited
170
+ }
171
+ const timer = setTimeout(() => {
172
+ try {
173
+ proc.kill("SIGKILL");
174
+ } catch {
175
+ // ignore
176
+ }
177
+ }, SIGTERM_GRACE_MS);
178
+ try {
179
+ await proc.exited;
180
+ } finally {
181
+ clearTimeout(timer);
182
+ }
183
+ // Best-effort user-data-dir cleanup. Failures are non-fatal but logged.
184
+ await rm(userDataDir, { recursive: true, force: true }).catch((err: unknown) => {
185
+ console.warn(`[mochi] failed to remove user-data-dir ${userDataDir}:`, err);
186
+ });
187
+ };
188
+
189
+ return {
190
+ userDataDir,
191
+ pid: proc.pid,
192
+ exited: proc.exited,
193
+ reader,
194
+ writer,
195
+ close,
196
+ };
197
+ }
198
+
199
+ /** Read-and-discard a ReadableStream so Chromium's pipe buffers don't fill. */
200
+ async function drainToVoid(stream: ReadableStream<Uint8Array> | null): Promise<void> {
201
+ if (stream === null) return;
202
+ const reader = stream.getReader();
203
+ try {
204
+ while (true) {
205
+ const { done } = await reader.read();
206
+ if (done) return;
207
+ }
208
+ } catch {
209
+ // ignore — stream errored or was cancelled
210
+ } finally {
211
+ reader.releaseLock();
212
+ }
213
+ }
@@ -0,0 +1,235 @@
1
+ /**
2
+ * Proxy authentication via CDP `Fetch.authRequired`.
3
+ *
4
+ * Background
5
+ * ----------
6
+ * Chromium's `--proxy-server=` flag accepts the address but rejects inline
7
+ * credentials in the URL — `--proxy-server=http://user:pass@host:8080` is
8
+ * silently stripped. The historical "proxy-auth-extension" workaround ships
9
+ * a tiny chrome-extension that subscribes to `chrome.webRequest.onAuthRequired`,
10
+ * but `--load-extension` is itself a fingerprint leak (chrome.runtime
11
+ * weirdness, observable extension ids) and so is forbidden by mochi's
12
+ * stealth invariants.
13
+ *
14
+ * The CDP path is invariant-clean: enable `Fetch` with `handleAuthRequests`
15
+ * and *empty* request patterns. Chromium fires `Fetch.authRequired` ONLY for
16
+ * proxy auth challenges; regular request flow is unaffected (no
17
+ * `Fetch.requestPaused` events when patterns is `[]`). We answer with
18
+ * `Fetch.continueWithAuth` carrying the parsed credentials.
19
+ *
20
+ * PLAN.md §8.2 invariant check
21
+ * ----------------------------
22
+ * `Fetch.enable` is NOT on the forbidden list. Only `Runtime.enable`
23
+ * (leaks execution-context-created events to page-observable side
24
+ * channels) and `Page.createIsolatedWorld` (creates a fingerprintable
25
+ * isolated world) are forbidden. `Fetch.enable` operates at the network
26
+ * layer below page script — it does not produce execution-context-creation
27
+ * events, does not surface a `chrome.devtools` global, and is not
28
+ * detectable from page JavaScript. The defensive `Fetch.requestPaused`
29
+ * handler below is unreachable when `patterns: []` is set, but registered
30
+ * as belt-and-braces in case a Chromium quirk triggers a pause.
31
+ *
32
+ * Protocols
33
+ * ---------
34
+ * Chromium surfaces both HTTP and SOCKS5 auth challenges through the same
35
+ * `Fetch.authRequired` event. SOCKS5 user/pass auth happens at the SOCKS
36
+ * handshake (before any HTTP request) but Chromium wraps it as an
37
+ * `authRequired` for the first request through the proxy, so the same
38
+ * handler covers both.
39
+ *
40
+ * @see PLAN.md §8.2 / §10
41
+ * @see tasks/0160-proxy-auth-and-ci-fix.md
42
+ */
43
+
44
+ import type { MessageRouter, Unsubscribe } from "./cdp/router";
45
+
46
+ /** Parsed proxy URL — what `parseProxyUrl` returns. */
47
+ export interface ParsedProxy {
48
+ /**
49
+ * The auth-stripped server URL safe to pass to Chromium's
50
+ * `--proxy-server=` flag. Format: `<protocol>//<host>:<port>`.
51
+ */
52
+ server: string;
53
+ /**
54
+ * Decoded credentials, present only when the input URL carried a
55
+ * `user[:pass]@` segment. `password` is `""` (empty string) when the
56
+ * URL had a username but no password (`http://user@host:8080`).
57
+ */
58
+ auth?: { username: string; password: string };
59
+ /** Lowercased protocol (`http`, `https`, `socks5`, `socks4`). */
60
+ protocol: "http" | "https" | "socks5" | "socks4";
61
+ }
62
+
63
+ /** Default ports used when the input URL omits one. */
64
+ const DEFAULT_PORTS: Record<string, string> = {
65
+ http: "80",
66
+ https: "443",
67
+ socks5: "1080",
68
+ socks4: "1080",
69
+ };
70
+
71
+ /**
72
+ * Parse a proxy URL string into `{ server, auth?, protocol }`.
73
+ *
74
+ * Handles:
75
+ * - `http://user:pass@host:port` → auth + server
76
+ * - `socks5://user@host:1080` → auth.password = ""
77
+ * - `http://host:8080` → no auth
78
+ * - `http://user%40d:p%40ss@host:80` → percent-decoded credentials
79
+ * - `http://user:pass@[::1]:8080` → IPv6 hosts (URL parser handles)
80
+ * - `http://host` → port defaults per protocol
81
+ *
82
+ * Implementation uses `new URL()` so percent-decoding and IPv6 host
83
+ * bracketing are handled natively.
84
+ */
85
+ export function parseProxyUrl(input: string): ParsedProxy {
86
+ let url: URL;
87
+ try {
88
+ url = new URL(input);
89
+ } catch (err) {
90
+ throw new Error(
91
+ `[mochi] invalid proxy URL ${JSON.stringify(input)}: ${err instanceof Error ? err.message : String(err)}`,
92
+ );
93
+ }
94
+ const rawProto = url.protocol.replace(/:$/, "").toLowerCase();
95
+ if (
96
+ rawProto !== "http" &&
97
+ rawProto !== "https" &&
98
+ rawProto !== "socks5" &&
99
+ rawProto !== "socks4"
100
+ ) {
101
+ throw new Error(
102
+ `[mochi] unsupported proxy protocol ${JSON.stringify(rawProto)} — supported: http, https, socks5, socks4`,
103
+ );
104
+ }
105
+ const protocol = rawProto;
106
+ // `URL.hostname` may keep or strip IPv6 brackets depending on the
107
+ // runtime — normalize to a single `[…]`-bracketed form so we can format
108
+ // the server URL deterministically.
109
+ const rawHost = url.hostname;
110
+ const stripped =
111
+ rawHost.startsWith("[") && rawHost.endsWith("]") ? rawHost.slice(1, -1) : rawHost;
112
+ const isIpv6 = stripped.includes(":");
113
+ const host = isIpv6 ? `[${stripped}]` : stripped;
114
+ const port = url.port.length > 0 ? url.port : DEFAULT_PORTS[protocol];
115
+ if (host.length === 0) {
116
+ throw new Error(`[mochi] proxy URL ${JSON.stringify(input)} is missing a host`);
117
+ }
118
+ const server = `${protocol}://${host}:${port}`;
119
+
120
+ // `URL.username`/`URL.password` are already percent-decoded.
121
+ if (url.username.length > 0) {
122
+ return {
123
+ server,
124
+ auth: {
125
+ username: decodeURIComponent(url.username),
126
+ password: url.password.length > 0 ? decodeURIComponent(url.password) : "",
127
+ },
128
+ protocol,
129
+ };
130
+ }
131
+ return { server, protocol };
132
+ }
133
+
134
+ /**
135
+ * Result of {@link installProxyAuth}: an unsubscriber that removes the
136
+ * router listeners and disables the Fetch domain. Idempotent.
137
+ */
138
+ export interface ProxyAuthHandle {
139
+ /** Tear down the listeners + send `Fetch.disable`. Idempotent. */
140
+ dispose(): Promise<void>;
141
+ }
142
+
143
+ /**
144
+ * Wire proxy-auth handling into a {@link MessageRouter}. No-op when
145
+ * `auth` is undefined — saves the `Fetch.enable` round-trip and avoids
146
+ * any protocol surface for sessions that don't need it.
147
+ *
148
+ * Behavior:
149
+ * - Sends `Fetch.enable { handleAuthRequests: true, patterns: [] }` once.
150
+ * - On `Fetch.authRequired`, replies with `Fetch.continueWithAuth` and
151
+ * the parsed creds.
152
+ * - On `Fetch.requestPaused` (defensive — should never fire with empty
153
+ * patterns), forwards `Fetch.continueRequest` so we don't hang.
154
+ *
155
+ * The empty `patterns` array is critical: any non-empty patterns turn
156
+ * Chromium into an interception proxy for matching requests, which tanks
157
+ * page perf and changes the network model. Empty patterns +
158
+ * `handleAuthRequests: true` is the documented contract for "auth-only
159
+ * interception".
160
+ */
161
+ export async function installProxyAuth(
162
+ router: MessageRouter,
163
+ auth: { username: string; password: string },
164
+ ): Promise<ProxyAuthHandle> {
165
+ // Subscribe FIRST so we don't miss the very first authRequired event the
166
+ // browser fires after Fetch.enable.
167
+ const offAuth: Unsubscribe = router.on("Fetch.authRequired", (params) => {
168
+ const requestId = (params as { requestId?: string } | null)?.requestId;
169
+ if (typeof requestId !== "string") return;
170
+ // Fire-and-forget — failures here are non-fatal (the request will
171
+ // simply 407 and the page-level fetch will see it). We log on
172
+ // unexpected errors so users can diagnose creds issues.
173
+ router
174
+ .send("Fetch.continueWithAuth", {
175
+ requestId,
176
+ authChallengeResponse: {
177
+ response: "ProvideCredentials",
178
+ username: auth.username,
179
+ password: auth.password,
180
+ },
181
+ })
182
+ .catch((err: unknown) => {
183
+ if (!isClosedError(err)) {
184
+ console.warn("[mochi] Fetch.continueWithAuth failed:", err);
185
+ }
186
+ });
187
+ });
188
+
189
+ // Defensive — `patterns: []` means this event should never fire, but
190
+ // some Chromium builds may pause requests adjacent to auth challenges.
191
+ // If it ever fires, immediately continue so we don't hang the request.
192
+ const offPaused: Unsubscribe = router.on("Fetch.requestPaused", (params) => {
193
+ const requestId = (params as { requestId?: string } | null)?.requestId;
194
+ if (typeof requestId !== "string") return;
195
+ router.send("Fetch.continueRequest", { requestId }).catch((err: unknown) => {
196
+ if (!isClosedError(err)) {
197
+ console.warn("[mochi] Fetch.continueRequest (defensive) failed:", err);
198
+ }
199
+ });
200
+ });
201
+
202
+ await router.send("Fetch.enable", {
203
+ handleAuthRequests: true,
204
+ patterns: [],
205
+ });
206
+
207
+ let disposed = false;
208
+ return {
209
+ async dispose(): Promise<void> {
210
+ if (disposed) return;
211
+ disposed = true;
212
+ offAuth();
213
+ offPaused();
214
+ try {
215
+ await router.send("Fetch.disable");
216
+ } catch (err) {
217
+ // Closed-pipe failures are expected during session teardown.
218
+ if (!isClosedError(err)) {
219
+ console.warn("[mochi] Fetch.disable failed:", err);
220
+ }
221
+ }
222
+ },
223
+ };
224
+ }
225
+
226
+ /** True when an error reflects the transport already being closed. */
227
+ function isClosedError(err: unknown): boolean {
228
+ if (err instanceof Error) {
229
+ return (
230
+ err.name === "BrowserCrashedError" ||
231
+ /transport already closed|pipe closed|browser process exited/i.test(err.message)
232
+ );
233
+ }
234
+ return false;
235
+ }