@specific.dev/spectest 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/daemon.ts ADDED
@@ -0,0 +1,3910 @@
1
+ // Long-running HTTP daemon. Runs as a systemd unit on the VM host (not in
2
+ // a container). Owns:
3
+ //
4
+ // * Loading the user's `spectest/index.ts` and exposing the parsed
5
+ // `{ environment, tests? }` to the control plane.
6
+ // * Orchestrating Docker: image prep (pull/build), network + volumes,
7
+ // container start, ready probes — all by shelling out to the local
8
+ // `docker` CLI.
9
+ // * Running individual test cases on demand.
10
+ //
11
+ // The control plane handles VM lifecycle (create, snapshot, fork,
12
+ // terminate) and the tarball uploads to `/workspace` and `/opt/spectest/app`.
13
+ // Once those are in place the daemon does the rest.
14
+ //
15
+ // One sandbox = one daemon. Concurrency between tests is achieved by
16
+ // forking the sandbox; inside a single daemon we never run two tests at
17
+ // once — that keeps stdout capture and timeouts simple.
18
+
19
+ import http from "node:http";
20
+ import { execFile, spawn } from "node:child_process";
21
+ import { randomUUID } from "node:crypto";
22
+ import { existsSync, promises as fs, readFileSync } from "node:fs";
23
+ import net from "node:net";
24
+ import path from "node:path";
25
+ import { pathToFileURL } from "node:url";
26
+
27
+ import { assert, expect, expectRaw, lowerIngress, dnsName as makeDnsDecl, isWildcard } from "./index.js";
28
+ import type { DnsTarget, LoweredIngress } from "./index.js";
29
+ import { openBrowser } from "./browser.js";
30
+ import { openTerminal } from "./terminal.js";
31
+ import {
32
+ recordEnv,
33
+ recordExec,
34
+ recordFake,
35
+ recordHttp,
36
+ recordTerminal,
37
+ recordWait,
38
+ reserveEvent,
39
+ recorderEventCount,
40
+ recorderMarkChildren,
41
+ recorderTruncate,
42
+ startRecording,
43
+ stopRecording,
44
+ truncateUtf8,
45
+ type TestEvent,
46
+ } from "./recorder.js";
47
+ import { wrap, wrapResponse } from "./inspect.js";
48
+ import type { SpectestFetch, Wrapped, WrappedResponse } from "./inspect.js";
49
+ import { clearRecordSecrets, setRecordSecrets } from "./record-secrets.js";
50
+
51
+ import type {
52
+ Browser,
53
+ BrowserOptions,
54
+ BrowserSessionRecorder,
55
+ BrowserSessionStep,
56
+ } from "./browser.js";
57
+ import type {
58
+ EnvironmentConfig,
59
+ ExecResult,
60
+ FakeContext,
61
+ FakeDefinition,
62
+ FileMount,
63
+ Project,
64
+ ProjectSetupContext,
65
+ ReadyCheck,
66
+ RuntimeServiceHandle,
67
+ RuntimeServiceSpec,
68
+ ServiceConfig,
69
+ ServiceDefinition,
70
+ ServiceHandles,
71
+ ServiceImage,
72
+ Terminal,
73
+ TerminalOpts,
74
+ TerminalResult,
75
+ TestCase,
76
+ TestContext,
77
+ TestSuite,
78
+ VolumeMount,
79
+ } from "./index.js";
80
+ import type { InternalTerminal, TerminalFrameSink } from "./terminal.js";
81
+
82
+ /** A `ServiceDefinition` with its map key threaded back in. Used by the
83
+ * docker-orchestration helpers below that need to reference a service by
84
+ * name (container name, image tag, ready-probe host). Widened to
85
+ * accept any helpers shape so a typed `setup({ helpers })` call from
86
+ * a component (e.g. K3sHelpers) lines up at the call site. */
87
+ type NamedService = ServiceDefinition<Record<string, unknown>> & { name: string };
88
+
89
+ function namedServices(cfg: EnvironmentConfig): NamedService[] {
90
+ return Object.entries(cfg.services).map(([name, def]) => ({ name, ...def }));
91
+ }
92
+
93
+ const DEFAULT_PORT = 9876;
94
+ const DEFAULT_TEST_TIMEOUT_MS = 60_000;
95
+ const NETWORK_NAME = process.env.SPECTEST_NETWORK ?? "spectest-net";
96
+ const WORKSPACE = process.env.SPECTEST_WORKSPACE ?? "/workspace";
97
+
98
+ // Stable hostname every service container resolves to the host (the
99
+ // `spectest-br0` gateway) — so apps that build or pull images at runtime
100
+ // can point a builder at `spectest-host:5000` (the zot Docker Hub mirror)
101
+ // or `spectest-host:1234` (the shared buildkitd) without hard-coding the
102
+ // gateway IP. Injected into each container's /etc/hosts in runContainer.
103
+ const SPECTEST_HOST_NAME = "spectest-host";
104
+
105
+ // The host image-cache gateway, discovered once from the same
106
+ // `registry-mirrors` entry the in-VM dockerd already uses (baked into the
107
+ // local provider's golden /etc/docker/daemon.json). `null` when there's
108
+ // no host cache, so nothing is injected.
109
+ let _hostCacheGateway: string | null | undefined;
110
+ function hostCacheGateway(): string | null {
111
+ if (_hostCacheGateway !== undefined) return _hostCacheGateway;
112
+ try {
113
+ const cfg = JSON.parse(
114
+ readFileSync("/etc/docker/daemon.json", "utf8"),
115
+ ) as { "registry-mirrors"?: string[] };
116
+ const first = cfg["registry-mirrors"]?.[0];
117
+ _hostCacheGateway = first ? new URL(first).hostname || null : null;
118
+ } catch {
119
+ _hostCacheGateway = null;
120
+ }
121
+ return _hostCacheGateway;
122
+ }
123
+ const APP_DIR = process.env.SPECTEST_APP_DIR ?? "/opt/spectest/app";
124
+ // Root CA baked into the base snapshot at base-snapshot build time
125
+ // (see base.rs::BASE_SETUP_SH). Bind-mounted into every service
126
+ // container so apps can verify HTTPS to the daemon's fakes, and
127
+ // referenced when we layer it into each image's system trust store.
128
+ const CA_PATH = process.env.SPECTEST_CA_PATH ?? "/etc/spectest/ca.crt";
129
+ const CA_KEY_PATH = process.env.SPECTEST_CA_KEY_PATH ?? "/etc/spectest/ca.key";
130
+
131
+ // ────────────────────────────────────────────────────────────────────────
132
+ // Loaded-project state
133
+ // ────────────────────────────────────────────────────────────────────────
134
+
135
+ interface Loaded {
136
+ project: Project;
137
+ byId: Map<string, TestCase<unknown>>;
138
+ }
139
+
140
+ let loaded: Loaded | null = null;
141
+
142
+ interface CaseMeta {
143
+ id: string;
144
+ name: string;
145
+ dependsOn?: string;
146
+ timeoutMs?: number;
147
+ }
148
+
149
+ function casesMetadata(suite: TestSuite | undefined): CaseMeta[] {
150
+ if (!suite) return [];
151
+ return suite.tests.map((t) => ({
152
+ id: t.id,
153
+ name: t.name,
154
+ dependsOn: t.dependsOn?.id,
155
+ timeoutMs: t.timeoutMs,
156
+ }));
157
+ }
158
+
159
+ function resolveEntry(): string {
160
+ const explicit = process.env.SPECTEST_PROJECT_ENTRY;
161
+ if (explicit && existsSync(explicit)) return explicit;
162
+ const dir = process.env.SPECTEST_PROJECT_DIR ?? path.join(APP_DIR, "spectest");
163
+ for (const name of ["index.ts", "index.mts", "index.mjs", "index.js"]) {
164
+ const p = path.join(dir, name);
165
+ if (existsSync(p)) return p;
166
+ }
167
+ throw new Error(
168
+ `could not find project entry in ${dir} (looked for index.ts/.mts/.mjs/.js)`,
169
+ );
170
+ }
171
+
172
+ // Import the env entry (`spectest/index.ts`) and load everything that
173
+ // defines the *environment* — services, fakes, project setup — but not the
174
+ // test bodies (those live in `spectest/tests/**`; see `loadTests`). The
175
+ // import URL is deliberately stable (no cache-busting query): test files
176
+ // import this same module (`import { env } from "../index"`) and must resolve
177
+ // to the SAME `env` instance, so their `env.test(...)` calls land in the
178
+ // registry the default-exported Project reads back. Each daemon process
179
+ // imports the entry at most once — an env change always takes the cold path
180
+ // with a fresh daemon — so there's nothing to bust.
181
+ async function loadEnv(): Promise<Project> {
182
+ const entry = resolveEntry();
183
+ const url = pathToFileURL(entry).href;
184
+ const mod = await import(url);
185
+ const candidate = (mod && typeof mod === "object" && "default" in mod ? mod.default : mod) as
186
+ | Project
187
+ | undefined;
188
+ if (!candidate || !candidate.environment) {
189
+ throw new Error(
190
+ `project entry ${entry} must default-export a Project (from env.project(...) where env = defineEnvironment(...))`,
191
+ );
192
+ }
193
+ loaded = { project: candidate, byId: new Map() };
194
+ rebuildCatalogue();
195
+ // Any cached convenience clients belong to the previous project; drop
196
+ // them so the next test rebuilds against the freshly loaded definitions.
197
+ HELPERS_CACHE.clear();
198
+ // Register fakes + service-tls proxies here; startIngress() during
199
+ // /bootstrap actually binds the listeners — this just parses + validates
200
+ // and tears down any prior runtime so a reload picks up edits.
201
+ buildIngress(candidate);
202
+ return candidate;
203
+ }
204
+
205
+ // Import the test files under `spectest/tests/**` into the already-loaded
206
+ // env, then refresh the catalogue. The split layout keeps test bodies out of
207
+ // the warm-template cache key, so a test-only edit restores the cached env
208
+ // and lands here to pick up the new tests. A no-op for the legacy single-file
209
+ // layout (no `tests/` dir; the suite is already on the default export).
210
+ //
211
+ // Crucially this is only ever called AFTER the warm-template snapshot is
212
+ // captured (cold path) or against a freshly restored VM (warm path), so each
213
+ // test file is imported for the first time in that daemon process — no
214
+ // cache-busting needed and no stale ESM module to fight.
215
+ async function loadTests(): Promise<void> {
216
+ requireLoaded();
217
+ const dir = path.join(path.dirname(resolveEntry()), "tests");
218
+ if (existsSync(dir)) {
219
+ for (const file of await collectTestFiles(dir)) {
220
+ await import(pathToFileURL(file).href);
221
+ }
222
+ }
223
+ rebuildCatalogue();
224
+ // The test set may have changed; drop cached helpers so the next test
225
+ // rebuilds cleanly.
226
+ HELPERS_CACHE.clear();
227
+ }
228
+
229
+ // (Re)build the id→TestCase index from whatever the loaded project currently
230
+ // exposes as its suite: the lazy registry getter for split layouts, or the
231
+ // frozen explicit suite for inline ones.
232
+ function rebuildCatalogue(): void {
233
+ const l = requireLoaded();
234
+ const byId = new Map<string, TestCase<unknown>>();
235
+ if (l.project.tests) {
236
+ for (const t of l.project.tests.tests) byId.set(t.id, t);
237
+ }
238
+ l.byId = byId;
239
+ }
240
+
241
+ // Recursively collect importable test modules under `spectest/tests/`, sorted
242
+ // for deterministic import order. Skips declaration files and dependency dirs.
243
+ async function collectTestFiles(dir: string): Promise<string[]> {
244
+ const out: string[] = [];
245
+ const walk = async (d: string): Promise<void> => {
246
+ const entries = await fs.readdir(d, { withFileTypes: true });
247
+ for (const e of entries) {
248
+ const full = path.join(d, e.name);
249
+ if (e.isDirectory()) {
250
+ if (e.name === "node_modules" || e.name === ".spectest") continue;
251
+ await walk(full);
252
+ } else if (
253
+ e.isFile() &&
254
+ /\.(ts|mts|cts|js|mjs|cjs)$/.test(e.name) &&
255
+ !e.name.endsWith(".d.ts")
256
+ ) {
257
+ out.push(full);
258
+ }
259
+ }
260
+ };
261
+ await walk(dir);
262
+ out.sort();
263
+ return out;
264
+ }
265
+
266
+ function requireLoaded(): Loaded {
267
+ if (!loaded) {
268
+ throw new Error("no project loaded; call POST /load first");
269
+ }
270
+ return loaded;
271
+ }
272
+
273
+ // ────────────────────────────────────────────────────────────────────────
274
+ // Docker CLI helpers
275
+ // ────────────────────────────────────────────────────────────────────────
276
+
277
+ interface CmdResult {
278
+ stdout: string;
279
+ stderr: string;
280
+ code: number;
281
+ }
282
+
283
+ function shx(
284
+ file: string,
285
+ args: string[],
286
+ timeoutMs?: number,
287
+ env?: Record<string, string>,
288
+ ): Promise<CmdResult> {
289
+ return new Promise((resolve) => {
290
+ let done = false;
291
+ const child = execFile(
292
+ file,
293
+ args,
294
+ { maxBuffer: 64 * 1024 * 1024, env: env ? { ...process.env, ...env } : process.env },
295
+ (err, stdout, stderr) => {
296
+ if (done) return;
297
+ done = true;
298
+ const code =
299
+ err && typeof (err as NodeJS.ErrnoException & { code?: number }).code === "number"
300
+ ? Number((err as NodeJS.ErrnoException & { code?: number }).code)
301
+ : err
302
+ ? 1
303
+ : 0;
304
+ resolve({ stdout: String(stdout), stderr: String(stderr), code });
305
+ },
306
+ );
307
+ if (timeoutMs && timeoutMs > 0) {
308
+ setTimeout(() => {
309
+ if (!done) {
310
+ done = true;
311
+ try {
312
+ child.kill("SIGKILL");
313
+ } catch {
314
+ /* already exited */
315
+ }
316
+ resolve({ stdout: "", stderr: `timeout after ${timeoutMs}ms`, code: 124 });
317
+ }
318
+ }, timeoutMs);
319
+ }
320
+ });
321
+ }
322
+
323
+ function docker(
324
+ args: string[],
325
+ timeoutMs?: number,
326
+ env?: Record<string, string>,
327
+ ): Promise<CmdResult> {
328
+ return shx("docker", args, timeoutMs, env);
329
+ }
330
+
331
+ /**
332
+ * Like `shx` but invokes `onLine` for each line of combined stdout/stderr
333
+ * as it streams in, so callers can surface live progress (docker build
334
+ * steps, image pull layers) into bootstrap progress. Still resolves with
335
+ * the full captured output + exit code, so existing error handling and
336
+ * post-hoc parsing (`summarizeBuildKit`) are unchanged.
337
+ */
338
+ function shxStream(
339
+ file: string,
340
+ args: string[],
341
+ timeoutMs: number | undefined,
342
+ env: Record<string, string> | undefined,
343
+ onLine: (line: string) => void,
344
+ ): Promise<CmdResult> {
345
+ return new Promise((resolve) => {
346
+ const child = spawn(file, args, {
347
+ env: env ? { ...process.env, ...env } : process.env,
348
+ });
349
+ let stdout = "";
350
+ let stderr = "";
351
+ let buf = "";
352
+ let done = false;
353
+ const feed = (chunk: string) => {
354
+ buf += chunk;
355
+ let nl: number;
356
+ while ((nl = buf.indexOf("\n")) >= 0) {
357
+ const line = buf.slice(0, nl).replace(/\r$/, "");
358
+ buf = buf.slice(nl + 1);
359
+ try {
360
+ onLine(line);
361
+ } catch {
362
+ /* a progress callback must never break the build */
363
+ }
364
+ }
365
+ };
366
+ child.stdout?.on("data", (d) => {
367
+ const s = String(d);
368
+ stdout += s;
369
+ feed(s);
370
+ });
371
+ child.stderr?.on("data", (d) => {
372
+ const s = String(d);
373
+ stderr += s;
374
+ feed(s);
375
+ });
376
+ const finish = (code: number, extraStderr?: string) => {
377
+ if (done) return;
378
+ done = true;
379
+ resolve({ stdout, stderr: extraStderr ? stderr + extraStderr : stderr, code });
380
+ };
381
+ child.on("error", () => finish(1));
382
+ child.on("close", (code) => finish(code == null ? 1 : code));
383
+ if (timeoutMs && timeoutMs > 0) {
384
+ setTimeout(() => {
385
+ if (done) return;
386
+ try {
387
+ child.kill("SIGKILL");
388
+ } catch {
389
+ /* already exited */
390
+ }
391
+ finish(124, `\ntimeout after ${timeoutMs}ms`);
392
+ }, timeoutMs);
393
+ }
394
+ });
395
+ }
396
+
397
+ // ── Live bootstrap progress ──────────────────────────────────────────────
398
+ // During /bootstrap the control plane polls GET /progress (~every 1.5s) and
399
+ // streams this snapshot into the test-run row, so the web UI shows what's
400
+ // being built *before* the suite starts. In-memory only; reset at the top
401
+ // of each bootstrap(). All updaters no-op when no bootstrap is in flight.
402
+ type ServiceProgressStatus =
403
+ | "pending"
404
+ | "pulling"
405
+ | "building"
406
+ | "prepared"
407
+ | "starting"
408
+ | "probing"
409
+ | "ready"
410
+ | "failed";
411
+
412
+ interface ServiceProgress {
413
+ name: string;
414
+ kind: "pull" | "build";
415
+ status: ServiceProgressStatus;
416
+ /** Live free-text detail, e.g. "step 4/9 RUN bun install" or "12 layers". */
417
+ detail?: string;
418
+ }
419
+
420
+ interface BootstrapProgress {
421
+ phase: string;
422
+ services: ServiceProgress[];
423
+ startedAt: number;
424
+ updatedAt: number;
425
+ done: boolean;
426
+ }
427
+
428
+ let BOOTSTRAP_PROGRESS: BootstrapProgress | null = null;
429
+
430
+ function progressInit(services: NamedService[]): void {
431
+ BOOTSTRAP_PROGRESS = {
432
+ phase: "Preparing images",
433
+ services: services.map((s) => ({
434
+ name: s.name,
435
+ kind: s.image.type === "registry" ? "pull" : "build",
436
+ status: "pending",
437
+ })),
438
+ startedAt: Date.now(),
439
+ updatedAt: Date.now(),
440
+ done: false,
441
+ };
442
+ }
443
+
444
+ function progressPhase(phase: string): void {
445
+ if (!BOOTSTRAP_PROGRESS) return;
446
+ BOOTSTRAP_PROGRESS.phase = phase;
447
+ BOOTSTRAP_PROGRESS.updatedAt = Date.now();
448
+ }
449
+
450
+ function progressService(name: string, patch: Partial<ServiceProgress>): void {
451
+ if (!BOOTSTRAP_PROGRESS) return;
452
+ const svc = BOOTSTRAP_PROGRESS.services.find((s) => s.name === name);
453
+ if (!svc) return;
454
+ Object.assign(svc, patch);
455
+ BOOTSTRAP_PROGRESS.updatedAt = Date.now();
456
+ }
457
+
458
+ function progressDone(): void {
459
+ if (!BOOTSTRAP_PROGRESS) return;
460
+ BOOTSTRAP_PROGRESS.phase = "Ready";
461
+ BOOTSTRAP_PROGRESS.done = true;
462
+ BOOTSTRAP_PROGRESS.updatedAt = Date.now();
463
+ }
464
+
465
+ // BuildKit (docker buildx) gives per-step timing via `--progress=plain`,
466
+ // parallel stages, and `RUN --mount=type=cache`. Detected once: where the
467
+ // buildx plugin isn't installed (e.g. a Freestyle base without it) we fall
468
+ // back to the legacy builder, which takes no `--progress` flag.
469
+ let _buildxAvailable: boolean | undefined;
470
+ async function hasBuildx(): Promise<boolean> {
471
+ if (_buildxAvailable === undefined) {
472
+ const r = await docker(["buildx", "version"], 15_000);
473
+ _buildxAvailable = r.code === 0;
474
+ }
475
+ return _buildxAvailable;
476
+ }
477
+
478
+ // A single buildkitd runs on the host (see scripts/install-buildkitd.sh),
479
+ // reachable from every VM at the bridge gateway. Building against it as a
480
+ // `remote` buildx builder gives a persistent, shared layer/mount cache that
481
+ // survives forks and warm-template misses — a fresh VM no longer rebuilds
482
+ // from scratch. The build runs on the host (runc-isolated); `--load` pulls
483
+ // the finished image back into the in-VM dockerd. Detected once; if the
484
+ // builder can't be created or buildkitd is unreachable we fall back to the
485
+ // in-VM builder, so a missing/dead buildkitd just means slower builds.
486
+ const REMOTE_BUILDER_ADDR = process.env.SPECTEST_BUILDKIT_ADDR ?? "tcp://10.42.0.1:1234";
487
+ const REMOTE_BUILDER_NAME = "spectest-remote";
488
+ let _remoteBuilder: boolean | undefined;
489
+ async function ensureRemoteBuilder(): Promise<boolean> {
490
+ if (_remoteBuilder !== undefined) return _remoteBuilder;
491
+ if (!(await hasBuildx())) {
492
+ _remoteBuilder = false;
493
+ return false;
494
+ }
495
+ // Idempotent: a repeat create with the same name errors ("existing
496
+ // instance"), which we treat as already-present.
497
+ const create = await docker(
498
+ ["buildx", "create", "--name", REMOTE_BUILDER_NAME, "--driver", "remote", REMOTE_BUILDER_ADDR],
499
+ 30_000,
500
+ );
501
+ if (create.code !== 0 && !/existing instance|already exists/i.test(create.stderr)) {
502
+ _remoteBuilder = false;
503
+ return false;
504
+ }
505
+ // `inspect --bootstrap` actually dials buildkitd, so it's our reachability
506
+ // probe. If buildkitd is down this fails and we fall back.
507
+ const boot = await docker(["buildx", "inspect", "--bootstrap", REMOTE_BUILDER_NAME], 60_000);
508
+ _remoteBuilder = boot.code === 0;
509
+ if (!_remoteBuilder) {
510
+ // eslint-disable-next-line no-console
511
+ console.warn(
512
+ `[build] remote buildkitd at ${REMOTE_BUILDER_ADDR} unreachable; using in-VM builder:\n${boot.stderr.trim()}`,
513
+ );
514
+ }
515
+ return _remoteBuilder;
516
+ }
517
+
518
+ interface BuildStep {
519
+ name: string;
520
+ secs: number;
521
+ cached: boolean;
522
+ }
523
+
524
+ // Parse `docker build --progress=plain` (BuildKit) output into per-step
525
+ // timings, sorted slowest-first. Steps are correlated by their `#N` id:
526
+ // the declaration line carries the command, the `DONE`/`CACHED` line the
527
+ // duration. Best-effort — unparseable output yields an empty list.
528
+ function summarizeBuildKit(out: string): BuildStep[] {
529
+ const names = new Map<string, string>();
530
+ const secs = new Map<string, number>();
531
+ const cached = new Set<string>();
532
+ for (const line of out.split("\n")) {
533
+ let m = line.match(/^#(\d+)\s+\[[^\]]*\]\s+(.+)$/);
534
+ if (m) {
535
+ const id = `#${m[1]}`;
536
+ if (!names.has(id)) names.set(id, m[2].trim().slice(0, 80));
537
+ continue;
538
+ }
539
+ m = line.match(/^#(\d+)\s+DONE\s+([\d.]+)s/);
540
+ if (m) {
541
+ secs.set(`#${m[1]}`, parseFloat(m[2]));
542
+ continue;
543
+ }
544
+ m = line.match(/^#(\d+)\s+CACHED/);
545
+ if (m) {
546
+ const id = `#${m[1]}`;
547
+ cached.add(id);
548
+ if (!secs.has(id)) secs.set(id, 0);
549
+ }
550
+ }
551
+ const steps: BuildStep[] = [];
552
+ for (const [id, name] of names) {
553
+ steps.push({ name, secs: secs.get(id) ?? 0, cached: cached.has(id) });
554
+ }
555
+ return steps.sort((a, b) => b.secs - a.secs);
556
+ }
557
+
558
+ // ────────────────────────────────────────────────────────────────────────
559
+ // Bootstrap stages
560
+ // ────────────────────────────────────────────────────────────────────────
561
+
562
+ async function ensureNetwork(): Promise<void> {
563
+ const inspect = await docker(["network", "inspect", NETWORK_NAME], 30_000);
564
+ if (inspect.code === 0) return;
565
+ const create = await docker(["network", "create", NETWORK_NAME], 60_000);
566
+ if (create.code !== 0) {
567
+ throw new Error(
568
+ `docker network create ${NETWORK_NAME} failed: ${create.stderr.trim() || create.stdout.trim()}`,
569
+ );
570
+ }
571
+ }
572
+
573
+ function sanitizeSegment(p: string): string {
574
+ return p
575
+ .replace(/^\/+/, "")
576
+ .replace(/[^A-Za-z0-9_-]/g, "-")
577
+ .replace(/^-+|-+$/g, "");
578
+ }
579
+
580
+ function resolveHostPath(service: string, vol: VolumeMount): string {
581
+ if (vol.source && vol.source.startsWith("/")) return vol.source;
582
+ // Cache volumes root OUTSIDE /workspace so the delta-restore teardown
583
+ // (rm -rf /workspace) keeps them — they hold only content-addressed
584
+ // accelerator data (see VolumeMount.cache), never env state.
585
+ const root = vol.cache
586
+ ? ["/var/cache/spectest/volumes", service]
587
+ : [WORKSPACE, ".spectest", "volumes", service];
588
+ if (vol.source) {
589
+ return path.join(...root, vol.source.replace(/^\/+/, ""));
590
+ }
591
+ return path.join(...root, sanitizeSegment(vol.target));
592
+ }
593
+
594
+ /// Where the daemon records every ABSOLUTE-source, non-cache volume dir it
595
+ /// has created, one path per line. The delta-restore teardown wipes the
596
+ /// listed dirs: they live outside /workspace (which the teardown removes
597
+ /// wholesale) and outside /var/cache/spectest (deliberately kept), so
598
+ /// without this manifest a `source: "/data/pg"` volume would carry the
599
+ /// previous generation's data into a "fresh" environment. tmpfs-backed
600
+ /// (/run) — survives snapshots like all guest memory, dies with the VM.
601
+ const VOLUME_DIRS_MANIFEST = "/run/spectest-volume-dirs";
602
+ const ABS_VOLUME_DIRS = new Set<string>();
603
+
604
+ async function recordAbsoluteVolumeDir(host: string): Promise<void> {
605
+ if (ABS_VOLUME_DIRS.has(host)) return;
606
+ ABS_VOLUME_DIRS.add(host);
607
+ await fs.writeFile(VOLUME_DIRS_MANIFEST, [...ABS_VOLUME_DIRS].join("\n") + "\n");
608
+ }
609
+
610
+ async function ensureVolumes(svc: NamedService): Promise<string[]> {
611
+ const flags: string[] = [];
612
+ if (!svc.volumes || svc.volumes.length === 0) return flags;
613
+ for (const vol of svc.volumes) {
614
+ const host = resolveHostPath(svc.name, vol);
615
+ await fs.mkdir(host, { recursive: true });
616
+ if (vol.source?.startsWith("/") && !host.startsWith("/var/cache/spectest/")) {
617
+ await recordAbsoluteVolumeDir(host);
618
+ }
619
+ flags.push(`--volume=${host}:${vol.target}${vol.readOnly ? ":ro" : ""}`);
620
+ }
621
+ return flags;
622
+ }
623
+
624
+ // Materialize `svc.files` onto the VM host and return `--volume` flags
625
+ // bind-mounting each into the container (read-only). Single-file bind
626
+ // mounts mean the seeded config lands in place *before the container's
627
+ // entrypoint runs* — the one injection point earlier than any setup
628
+ // hook. Staging path mirrors ensureVolumes: a per-service dir derived
629
+ // from the in-container path, so two files never collide and the
630
+ // content is captured by snapshots like everything else under WORKSPACE.
631
+ async function ensureFiles(svc: NamedService): Promise<string[]> {
632
+ const flags: string[] = [];
633
+ if (!svc.files || svc.files.length === 0) return flags;
634
+ const dir = path.join(WORKSPACE, ".spectest", "files", svc.name);
635
+ await fs.mkdir(dir, { recursive: true });
636
+ for (const f of svc.files) {
637
+ if (!f.path.startsWith("/")) {
638
+ throw new Error(
639
+ `service "${svc.name}": file path ${JSON.stringify(f.path)} must be absolute`,
640
+ );
641
+ }
642
+ // `{{SPECTEST_SERVICE}}` expands to this service's name (its
643
+ // services-map key) so a component can author self-referential
644
+ // config without knowing the key the user will choose — e.g. k3s's
645
+ // registries.yaml keying on `<key>.internal:5000`.
646
+ const content = f.content.replaceAll("{{SPECTEST_SERVICE}}", svc.name);
647
+ const host = path.join(dir, sanitizeSegment(f.path));
648
+ await fs.writeFile(host, content);
649
+ if (f.mode) await fs.chmod(host, parseInt(f.mode, 8));
650
+ flags.push(`--volume=${host}:${f.path}:ro`);
651
+ }
652
+ return flags;
653
+ }
654
+
655
+ function imageTag(name: string): string {
656
+ return `spectest/${name}:latest`;
657
+ }
658
+
659
+ const DEFAULT_DOCKERIGNORE: string[] = [
660
+ ".git",
661
+ ".spectest",
662
+ "spectest",
663
+ "node_modules",
664
+ "target",
665
+ "__pycache__",
666
+ ".venv",
667
+ ".env",
668
+ ".env.local",
669
+ ".env.*",
670
+ "dist",
671
+ "build",
672
+ ".next",
673
+ ".turbo",
674
+ ".DS_Store",
675
+ ];
676
+
677
+ function unionDockerignore(services: NamedService[]): string {
678
+ const seen = new Set<string>(DEFAULT_DOCKERIGNORE);
679
+ const extras: string[] = [];
680
+ for (const s of services) {
681
+ if (s.image.type === "dockerfile" && s.image.exclude) {
682
+ for (const e of s.image.exclude) {
683
+ if (!seen.has(e)) {
684
+ seen.add(e);
685
+ extras.push(e);
686
+ }
687
+ }
688
+ }
689
+ }
690
+ return [...DEFAULT_DOCKERIGNORE, ...extras].join("\n") + "\n";
691
+ }
692
+
693
+ /// In-flight/finished dockerfile builds of this bootstrap, keyed by
694
+ /// sha256(dockerfile content + exclude list). Services that share an
695
+ /// identical image definition (e.g. an API server and a worker running the
696
+ /// same codebase with different entrypoints) build ONCE; the others wait
697
+ /// and `docker tag` the result. Cleared at every bootstrap() — the build
698
+ /// CONTEXT (/workspace) is an input too, so dedup is only valid within one
699
+ /// workspace generation (runtime services started mid-test share it).
700
+ const BUILD_DEDUP = new Map<
701
+ string,
702
+ { name: string; promise: Promise<{ tag: string; buildSteps?: BuildStep[] }> }
703
+ >();
704
+
705
+ function buildContentKey(image: { content: string; exclude?: string[] }): string {
706
+ return new Bun.CryptoHasher("sha256")
707
+ .update(image.content)
708
+ .update("\0")
709
+ .update(JSON.stringify(image.exclude ?? []))
710
+ .digest("hex");
711
+ }
712
+
713
+ async function prepareServiceImage(
714
+ svc: NamedService,
715
+ opts?: { dedup?: boolean },
716
+ ): Promise<{ tag: string; buildSteps?: BuildStep[] }> {
717
+ const tag = imageTag(svc.name);
718
+ if (svc.image.type === "registry") {
719
+ const ref = svc.image.reference;
720
+ // Always pull — even when the (delta-restored) store already has the
721
+ // ref. With the layers present this costs ~a manifest round-trip per
722
+ // image ("Already exists" all the way down, through the zot mirror),
723
+ // off the bootstrap critical path; skipping it would freeze floating
724
+ // tags (`foo:latest`) at whatever the previous generation pulled, for
725
+ // as long as the delta chain lives — a silent semantic divergence
726
+ // from the cold build a delta restore must be equivalent to.
727
+ progressService(svc.name, { status: "pulling", detail: `pulling ${ref}` });
728
+ let layers = 0;
729
+ const pull = await shxStream("docker", ["pull", ref], 900_000, undefined, (line) => {
730
+ // `docker pull` (no TTY) prints one line per layer: "<id>: Pull
731
+ // complete" / "Already exists". Count them for a live layer tally.
732
+ if (/(?:Pull complete|Already exists)\s*$/.test(line)) {
733
+ layers++;
734
+ progressService(svc.name, { status: "pulling", detail: `${layers} layers` });
735
+ }
736
+ });
737
+ if (pull.code !== 0) {
738
+ progressService(svc.name, { status: "failed" });
739
+ throw new Error(`docker pull ${ref} failed: ${pull.stderr.trim() || pull.stdout.trim()}`);
740
+ }
741
+ const tagr = await docker(["tag", ref, tag], 30_000);
742
+ if (tagr.code !== 0) {
743
+ throw new Error(`docker tag ${ref} ${tag} failed: ${tagr.stderr.trim()}`);
744
+ }
745
+ await ensureCaTrustedImage(svc.name, tag);
746
+ return { tag };
747
+ }
748
+ // Dockerfile build. Within one bootstrap, identical definitions (shared
749
+ // codebase images) dedup to a single build. Only bootstrap opts in: the
750
+ // dedup key is dockerfile content + exclude, but the build CONTEXT
751
+ // (/workspace) is an input too — a runtime service started mid-test
752
+ // after setup/test code mutated /workspace must rebuild, not share a
753
+ // pre-mutation image.
754
+ const image = svc.image;
755
+ if (opts?.dedup) {
756
+ const key = buildContentKey(image);
757
+ const inflight = BUILD_DEDUP.get(key);
758
+ if (inflight) {
759
+ progressService(svc.name, { status: "building", detail: `sharing ${inflight.name}'s build` });
760
+ let first;
761
+ try {
762
+ first = await inflight.promise;
763
+ } catch (err) {
764
+ progressService(svc.name, { status: "failed" });
765
+ throw err;
766
+ }
767
+ // first.tag is already CA-layered; tagging it covers this service too.
768
+ const tagr = await docker(["tag", first.tag, tag], 30_000);
769
+ if (tagr.code !== 0) {
770
+ throw new Error(`docker tag ${first.tag} ${tag} failed: ${tagr.stderr.trim()}`);
771
+ }
772
+ progressService(svc.name, { status: "prepared" });
773
+ return { tag, buildSteps: first.buildSteps };
774
+ }
775
+ const promise = buildServiceImage(svc.name, image, tag);
776
+ BUILD_DEDUP.set(key, { name: svc.name, promise });
777
+ try {
778
+ return await promise;
779
+ } catch (err) {
780
+ // Let a sharer arriving later rebuild rather than inherit this
781
+ // build's failure forever.
782
+ BUILD_DEDUP.delete(key);
783
+ throw err;
784
+ }
785
+ }
786
+ return buildServiceImage(svc.name, image, tag);
787
+ }
788
+
789
+ async function buildServiceImage(
790
+ name: string,
791
+ image: { content: string; exclude?: string[] },
792
+ tag: string,
793
+ ): Promise<{ tag: string; buildSteps?: BuildStep[] }> {
794
+ let buildSteps: BuildStep[] | undefined;
795
+ {
796
+ const dfDir = path.join(WORKSPACE, ".spectest", "services", name);
797
+ await fs.mkdir(dfDir, { recursive: true });
798
+ const dfPath = path.join(dfDir, "Dockerfile");
799
+ await fs.writeFile(dfPath, image.content);
800
+ const useRemote = await ensureRemoteBuilder();
801
+ // Both the remote builder and a local buildx are BuildKit, so both emit
802
+ // per-step timing on stderr under `--progress=plain` (parsed below). Only
803
+ // the legacy in-VM builder takes no progress flag.
804
+ const useBuildKit = useRemote || (await hasBuildx());
805
+ const buildEnv: Record<string, string> = {};
806
+ let buildArgs: string[];
807
+ if (useRemote) {
808
+ // Build on the host-side shared buildkitd (persistent cross-VM cache);
809
+ // `--load` brings the finished image back into the in-VM dockerd so
810
+ // runContainer can `docker run` it. The build context (WORKSPACE, minus
811
+ // .dockerignore) streams to buildkitd over the bridge.
812
+ buildArgs = [
813
+ "buildx", "build",
814
+ "--builder", REMOTE_BUILDER_NAME,
815
+ "--load",
816
+ "--progress=plain",
817
+ "-t", tag, "-f", dfPath, WORKSPACE,
818
+ ];
819
+ } else if (useBuildKit) {
820
+ buildArgs = ["build", "-t", tag, "-f", dfPath, "--progress=plain", WORKSPACE];
821
+ buildEnv.DOCKER_BUILDKIT = "1";
822
+ } else {
823
+ buildArgs = ["build", "-t", tag, "-f", dfPath, WORKSPACE];
824
+ }
825
+ progressService(name, { status: "building", detail: "starting build" });
826
+ const build = await shxStream("docker", buildArgs, 1_800_000, buildEnv, (line) => {
827
+ // BuildKit `--progress=plain` declares each step as
828
+ // `#N [<stage> M/N] <cmd>`; the legacy builder as `Step M/N : <cmd>`.
829
+ // Track the most-recent step as live detail.
830
+ let m = line.match(/^#\d+\s+\[([^\]]*)\]\s+(.+)$/);
831
+ if (m) {
832
+ const step = m[1].match(/\d+\/\d+/)?.[0];
833
+ const cmd = m[2].trim().slice(0, 60);
834
+ progressService(name, {
835
+ status: "building",
836
+ detail: step ? `step ${step} ${cmd}` : cmd,
837
+ });
838
+ return;
839
+ }
840
+ m = line.match(/^Step (\d+\/\d+)\s*:\s*(.+)$/);
841
+ if (m) {
842
+ progressService(name, {
843
+ status: "building",
844
+ detail: `step ${m[1]} ${m[2].trim().slice(0, 60)}`,
845
+ });
846
+ }
847
+ });
848
+ if (build.code !== 0) {
849
+ progressService(name, { status: "failed" });
850
+ throw new Error(
851
+ `docker build for ${name} failed:\n${build.stderr.trim()}\n${build.stdout.trim()}`,
852
+ );
853
+ }
854
+ if (useBuildKit) {
855
+ // Keep only the slowest dozen steps ≥1s — enough to profile, small
856
+ // enough to ride back in the /bootstrap response and the journal.
857
+ buildSteps = summarizeBuildKit(build.stderr)
858
+ .filter((s) => s.secs >= 1)
859
+ .slice(0, 12);
860
+ }
861
+ }
862
+ // Layer the spectest CA into the image's system trust store so apps
863
+ // that read the system bundle (Go, Java, CLIs that don't honour the
864
+ // SSL_CERT_FILE env vars) accept HTTPS to fakes. Best-effort: images
865
+ // without `update-ca-certificates` / `update-ca-trust` (distroless,
866
+ // scratch) fall through to the env-var path that `runContainer` sets.
867
+ await ensureCaTrustedImage(name, tag);
868
+ return { tag, buildSteps };
869
+ }
870
+
871
+ /**
872
+ * Build a derivative image on top of `tag` that copies the spectest
873
+ * root CA into the system trust store. Tagged back as `tag`, so the
874
+ * rest of the orchestrator (runContainer, image cache) is oblivious.
875
+ * Failures are warned-and-ignored: the env-var injection in
876
+ * runContainer is the universal fallback, so apps that use it (most
877
+ * Node/Python/Ruby/AWS clients) still trust the CA even when the
878
+ * image's trust store can't be updated.
879
+ */
880
+ async function ensureCaTrustedImage(serviceName: string, tag: string): Promise<void> {
881
+ if (!existsSync(CA_PATH)) {
882
+ // Daemon running outside a base-snapshot VM (dev/test). Nothing to
883
+ // layer; env vars also harmless (they point at a missing path, but
884
+ // most consumers ignore missing files).
885
+ return;
886
+ }
887
+ const ctxDir = path.join(WORKSPACE, ".spectest", "ca-trust", serviceName);
888
+ await fs.mkdir(ctxDir, { recursive: true });
889
+ await fs.copyFile(CA_PATH, path.join(ctxDir, "spectest-ca.crt"));
890
+ const dockerfile = `FROM ${tag}
891
+ COPY spectest-ca.crt /usr/local/share/ca-certificates/spectest-ca.crt
892
+ RUN if command -v update-ca-certificates >/dev/null 2>&1; then \\
893
+ update-ca-certificates; \\
894
+ elif command -v update-ca-trust >/dev/null 2>&1; then \\
895
+ cp /usr/local/share/ca-certificates/spectest-ca.crt /etc/pki/ca-trust/source/anchors/spectest-ca.crt && update-ca-trust extract; \\
896
+ else \\
897
+ echo "[spectest] no system CA trust tool in image; env-var trust only"; \\
898
+ fi
899
+ `;
900
+ await fs.writeFile(path.join(ctxDir, "Dockerfile"), dockerfile);
901
+ const build = await docker(["build", "-t", tag, ctxDir], 300_000);
902
+ if (build.code !== 0) {
903
+ // eslint-disable-next-line no-console
904
+ console.warn(
905
+ `[ca-trust] could not layer spectest CA into ${serviceName} (${tag}); env-var fallback only:\n${build.stderr.trim() || build.stdout.trim()}`,
906
+ );
907
+ }
908
+ }
909
+
910
+ async function runContainer(
911
+ svc: NamedService,
912
+ tag: string,
913
+ volumeFlags: string[],
914
+ // Extra `--network-alias`es beyond the lowered `aliasesByService`. Used by
915
+ // runtime `startService` (whose service isn't in LOWERED) to give the new
916
+ // container docker-native multi-label resolution for its `hostnames`.
917
+ extraAliases: string[] = [],
918
+ ): Promise<void> {
919
+ // Idempotent: clean up any leftover container with the same name.
920
+ await docker(["rm", "-f", svc.name], 30_000);
921
+
922
+ const args = [
923
+ "run",
924
+ "-d",
925
+ "--restart=no",
926
+ `--name=${svc.name}`,
927
+ `--hostname=${svc.name}`,
928
+ `--network=${NETWORK_NAME}`,
929
+ // Every service is reachable at `<name>.internal` as well as its
930
+ // bare `<name>`. The fully-qualified form is what kubeconfigs and
931
+ // other tooling that expect a multi-label hostname should use; it's
932
+ // resolved both inside containers (Docker's embedded DNS) and on
933
+ // the VM host (spectest-resolver scans aliases).
934
+ `--network-alias=${svc.name}.internal`,
935
+ ];
936
+ // Extra peer aliases for this service — lowered from `hostnames` and any
937
+ // dnsName(h, { service }) into LOWERED.aliasesByService, plus any passed
938
+ // explicitly by a runtime startService (not present in LOWERED).
939
+ for (const h of [...(LOWERED.aliasesByService[svc.name] ?? []), ...extraAliases]) {
940
+ args.push(`--network-alias=${h}`);
941
+ }
942
+ // Bound TCP give-up time inside THIS container's network namespace.
943
+ // net.ipv4.tcp_retries2 is per-netns and a fresh netns resets to the kernel
944
+ // default (15 ≈ ~15 min of RTO backoff), so lowering it on the guest's init
945
+ // netns (BASE_SETUP_SH) does NOT reach containers — and the connections that
946
+ // actually wedge run here: buildkit/buildctl pulling base images + exporting
947
+ // cache, and the k3s container's containerd pulling images, all to the host
948
+ // zot over the VM↔host path. On a lost-retransmit (transient loss under
949
+ // concurrent forks) such a flow otherwise stalls a build/pull for minutes.
950
+ // Setting it per container resets a genuinely-stuck flow in ~tens of seconds
951
+ // so the client retries on a fresh connection; live connections keep getting
952
+ // ACKs and are unaffected. Safe because every service runs on the
953
+ // spectest-net bridge (own netns), never --network=host where net.* is denied.
954
+ args.push("--sysctl", "net.ipv4.tcp_retries2=6");
955
+ // Wire every ingress hostname (fakes, TLS-terminated proxies, and any
956
+ // dnsName(h, { ingress: true })) into the container's /etc/hosts so
957
+ // `fetch("http://api.stripe.com")` or `fetch("https://app.test")` from
958
+ // app code reaches the daemon's ingress listener via the bridge gateway.
959
+ // /etc/hosts beats Docker's embedded DNS (127.0.0.11), so we don't need
960
+ // to touch the container's resolver settings.
961
+ if (cachedGatewayIp) {
962
+ for (const h of LOWERED.ingressHosts) {
963
+ args.push(`--add-host=${h}:${cachedGatewayIp}`);
964
+ }
965
+ }
966
+ // Resolve `spectest-host` to the host image-cache gateway so apps can
967
+ // address the zot mirrors / shared buildkitd by name (see
968
+ // SPECTEST_HOST_NAME). Skipped where there's no host cache.
969
+ const hostGw = hostCacheGateway();
970
+ if (hostGw) args.push(`--add-host=${SPECTEST_HOST_NAME}:${hostGw}`);
971
+ if (svc.workdir) args.push(`--workdir=${svc.workdir}`);
972
+ // Trust the spectest root CA from inside the container. Bind-mount
973
+ // the cert + set the conventional env vars so language runtimes
974
+ // (Node, Python requests/httpx, AWS SDKs) pick it up without
975
+ // touching the image's system trust store. The per-image
976
+ // ensureCaTrustedImage layer also installs it into the system
977
+ // trust store; this env-var path is the belt-and-braces fallback
978
+ // for images where the layer step couldn't run (no
979
+ // update-ca-certificates).
980
+ args.push(`--volume=${CA_PATH}:${CA_PATH}:ro`);
981
+ args.push("-e", `NODE_EXTRA_CA_CERTS=${CA_PATH}`);
982
+ args.push("-e", `SSL_CERT_FILE=${CA_PATH}`);
983
+ args.push("-e", `REQUESTS_CA_BUNDLE=${CA_PATH}`);
984
+ args.push("-e", `AWS_CA_BUNDLE=${CA_PATH}`);
985
+ if (svc.env) {
986
+ for (const [k, v] of Object.entries(svc.env)) {
987
+ args.push("-e", `${k}=${v}`);
988
+ }
989
+ }
990
+ for (const flag of volumeFlags) args.push(flag);
991
+ if (svc.privileged) args.push("--privileged");
992
+ for (const p of svc.tmpfs ?? []) args.push(`--tmpfs=${p}`);
993
+ if (svc.cgroupns) args.push(`--cgroupns=${svc.cgroupns}`);
994
+ // `command` runs via sh -c, replacing the image entrypoint; `args` is a
995
+ // plain CMD override (`docker run <image> <args…>`) that keeps the
996
+ // entrypoint — what init-wrapped images (postgres) need for extra flags.
997
+ if (svc.command && svc.args?.length) {
998
+ throw new Error(
999
+ `service ${svc.name}: \`command\` and \`args\` are mutually exclusive ` +
1000
+ `(command replaces the entrypoint with /bin/sh -c; args keeps it)`,
1001
+ );
1002
+ }
1003
+ if (svc.command) args.push("--entrypoint=/bin/sh");
1004
+ args.push(tag);
1005
+ if (svc.command) args.push("-c", svc.command);
1006
+ else if (svc.args?.length) args.push(...svc.args);
1007
+
1008
+ const r = await docker(args, 300_000);
1009
+ if (r.code !== 0) {
1010
+ throw new Error(
1011
+ `docker run ${svc.name} failed: ${r.stderr.trim() || r.stdout.trim()}`,
1012
+ );
1013
+ }
1014
+ }
1015
+
1016
+ async function probeTcp(host: string, port: number): Promise<boolean> {
1017
+ return new Promise((resolve) => {
1018
+ const sock = net.createConnection({ host, port });
1019
+ let settled = false;
1020
+ const finish = (v: boolean) => {
1021
+ if (settled) return;
1022
+ settled = true;
1023
+ try {
1024
+ sock.destroy();
1025
+ } catch {
1026
+ /* ignore */
1027
+ }
1028
+ resolve(v);
1029
+ };
1030
+ sock.setTimeout(2000);
1031
+ sock.once("connect", () => finish(true));
1032
+ sock.once("error", () => finish(false));
1033
+ sock.once("timeout", () => finish(false));
1034
+ });
1035
+ }
1036
+
1037
+ async function probeHttp(host: string, port: number, urlPath: string): Promise<boolean> {
1038
+ const ctrl = new AbortController();
1039
+ const to = setTimeout(() => ctrl.abort(), 5000);
1040
+ try {
1041
+ const res = await fetch(`http://${host}:${port}${urlPath}`, { signal: ctrl.signal });
1042
+ return res.ok;
1043
+ } catch {
1044
+ return false;
1045
+ } finally {
1046
+ clearTimeout(to);
1047
+ }
1048
+ }
1049
+
1050
+ async function probeExec(name: string, command: string): Promise<boolean> {
1051
+ const r = await docker(["exec", name, "sh", "-c", command], 10_000);
1052
+ return r.code === 0;
1053
+ }
1054
+
1055
+ async function waitForReady(svc: NamedService): Promise<void> {
1056
+ const check: ReadyCheck | undefined = svc.readyCheck;
1057
+ if (!check) return;
1058
+ const timeoutSecs = check.timeoutSecs ?? 60;
1059
+ const deadline = Date.now() + timeoutSecs * 1000;
1060
+ // Ramped poll: a flat 500ms quantized every service's ready latency
1061
+ // (and compounds down dependsOn chains). Fast early probes catch
1062
+ // quick services; the ramp caps the polling load on slow ones. Exec
1063
+ // probes keep a higher floor — each attempt spawns a docker exec.
1064
+ const ramp =
1065
+ check.type === "exec" ? [250, 250, 400, 400, 500] : [50, 100, 150, 250, 400, 500];
1066
+ let attempt = 0;
1067
+ while (Date.now() < deadline) {
1068
+ let ok = false;
1069
+ if (check.type === "tcp") {
1070
+ ok = await probeTcp(svc.name, check.port);
1071
+ } else if (check.type === "http") {
1072
+ ok = await probeHttp(svc.name, check.port, check.path ?? "/");
1073
+ } else {
1074
+ ok = await probeExec(svc.name, check.command);
1075
+ }
1076
+ if (ok) return;
1077
+ const delay = ramp[Math.min(attempt, ramp.length - 1)]!;
1078
+ attempt++;
1079
+ await new Promise((r) => setTimeout(r, delay));
1080
+ }
1081
+ const logs = await docker(["logs", "--tail=200", svc.name], 30_000);
1082
+ throw new Error(
1083
+ `service ${svc.name} not ready within ${timeoutSecs}s. Recent container logs:\n${logs.stdout}\n${logs.stderr}`,
1084
+ );
1085
+ }
1086
+
1087
+ /**
1088
+ * Validate the `dependsOn` graph and return the name→service map used to
1089
+ * walk it. Rejects unknown dependencies and cycles (the same two errors
1090
+ * the old level scheduler raised) so the DAG runner can assume a clean
1091
+ * graph.
1092
+ */
1093
+ function validateServiceGraph(services: NamedService[]): Map<string, NamedService> {
1094
+ const byName = new Map(services.map((s) => [s.name, s]));
1095
+ for (const s of services) {
1096
+ for (const d of s.dependsOn ?? []) {
1097
+ if (!byName.has(d)) {
1098
+ throw new Error(`service ${s.name} depends on unknown service ${d}`);
1099
+ }
1100
+ }
1101
+ }
1102
+ // Cycle detection via DFS coloring (white=unseen, gray=on stack, black=done).
1103
+ const WHITE = 0, GRAY = 1, BLACK = 2;
1104
+ const color = new Map<string, number>(services.map((s) => [s.name, WHITE]));
1105
+ const visit = (name: string): void => {
1106
+ color.set(name, GRAY);
1107
+ for (const d of byName.get(name)!.dependsOn ?? []) {
1108
+ const c = color.get(d);
1109
+ if (c === GRAY) throw new Error("service dependency cycle");
1110
+ if (c === WHITE) visit(d);
1111
+ }
1112
+ color.set(name, BLACK);
1113
+ };
1114
+ for (const s of services) if (color.get(s.name) === WHITE) visit(s.name);
1115
+ return byName;
1116
+ }
1117
+
1118
+ /**
1119
+ * Bring up every service as early as its own dependencies allow.
1120
+ *
1121
+ * Each service starts the instant all of its `dependsOn` services have
1122
+ * finished `startOne` (run → readyCheck → setup) — not when its whole
1123
+ * topological "level" has. Independent branches run fully concurrently;
1124
+ * a slow probe on one service delays only its own transitive dependents,
1125
+ * never an unrelated branch. `startOne(svc)`'s promise is memoized so each
1126
+ * service runs exactly once even when several dependents share a dep, and
1127
+ * a dependency failure propagates by rejecting every dependent's await.
1128
+ */
1129
+ async function startServices(
1130
+ services: NamedService[],
1131
+ startOne: (svc: NamedService) => Promise<void>,
1132
+ ): Promise<void> {
1133
+ const byName = validateServiceGraph(services);
1134
+ const started = new Map<string, Promise<void>>();
1135
+ const start = (svc: NamedService): Promise<void> => {
1136
+ const existing = started.get(svc.name);
1137
+ if (existing) return existing;
1138
+ const p = (async () => {
1139
+ await Promise.all(
1140
+ (svc.dependsOn ?? [])
1141
+ .filter((d) => byName.has(d))
1142
+ .map((d) => start(byName.get(d)!)),
1143
+ );
1144
+ await startOne(svc);
1145
+ })();
1146
+ started.set(svc.name, p);
1147
+ return p;
1148
+ };
1149
+ await Promise.all(services.map(start));
1150
+ }
1151
+
1152
+ // ────────────────────────────────────────────────────────────────────────
1153
+ // Ingress — in-daemon HTTP/HTTPS listeners that route by Host header.
1154
+ //
1155
+ // Two kinds of routes share the same listeners:
1156
+ //
1157
+ // * Fakes — in-daemon mock APIs. Each fake declares `hostnames` and a
1158
+ // `port` (default 80); the request hits the fake's handler with the
1159
+ // fake's `state`. HTTPS always serves on 443 (SNI per hostname,
1160
+ // leaf cert signed by the in-VM root CA).
1161
+ //
1162
+ // * Service TLS — reverse-proxy fronts for user services. Each
1163
+ // `services.<name>.tls` entry declares `{ hostname, port }`; the
1164
+ // daemon binds the hostname on :80 AND :443 and proxies each
1165
+ // request to `http://<service>:<port>` inside the docker network.
1166
+ // WebSocket upgrades are bridged. The leaf cert is signed by the
1167
+ // same root CA, so `ctx.browser()` and peer services trust it.
1168
+ //
1169
+ // Both listeners bind on 0.0.0.0 so containers reach them via the
1170
+ // bridge gateway IP (also written into /run/spectest-fakes.json for
1171
+ // spectest-resolver and injected as --add-host on every container).
1172
+ //
1173
+ // Per-fake `state` is plain JS memory and lives across snapshot/fork
1174
+ // along with the rest of the daemon — every fork sees its own copy.
1175
+ // ────────────────────────────────────────────────────────────────────────
1176
+
1177
+ const FAKES_REGISTRY_PATH =
1178
+ process.env.SPECTEST_FAKES_REGISTRY ?? "/run/spectest-fakes.json";
1179
+ const DEFAULT_FAKE_PORT = 80;
1180
+ /** Fixed HTTPS port shared by every route (fakes + service-tls). */
1181
+ const INGRESS_HTTPS_PORT = 443;
1182
+ /** Fixed HTTP port always bound for service-tls (alongside any
1183
+ * fakes whose `port` happens to be 80). */
1184
+ const INGRESS_HTTP_PORT = 80;
1185
+
1186
+ /** Runtime record for one configured fake. */
1187
+ interface FakeRuntime {
1188
+ def: FakeDefinition;
1189
+ /** Per-fake mutable state. Built once; lives in daemon memory. */
1190
+ state: unknown;
1191
+ /** Lowercased hostnames the fake answers to. */
1192
+ hostnames: string[];
1193
+ /** HTTP port the fake listens on. */
1194
+ port: number;
1195
+ /** Cached helpers (built lazily on first ctx.fakes access). */
1196
+ helpers?: Record<string, unknown>;
1197
+ /** Cached tracking proxy over `helpers` — the value actually handed to
1198
+ * tests via `ctx.fakes.<name>`. Records a `fake` event per helper
1199
+ * call / getter read and wraps the result for assertion provenance. */
1200
+ trackedHelpers?: Record<string, unknown>;
1201
+ }
1202
+
1203
+ /** All loaded fakes, keyed by stable name (the `fakes` map key). Holds the
1204
+ * in-daemon handler, forked state, and helpers — the parts intrinsic to a
1205
+ * fake. Their *networking* (certs, DNS, routes) comes from `LOWERED`. */
1206
+ const FAKES = new Map<string, FakeRuntime>();
1207
+ /** Generic ingress derived from the loaded project (tls/hostnames/fakes/
1208
+ * component `provides`) by the SDK's `lowerIngress`. The daemon executes
1209
+ * this and never reads `svc.tls`/`svc.hostnames` itself. Rebuilt on /load. */
1210
+ let LOWERED: LoweredIngress = {
1211
+ certificates: [],
1212
+ proxies: [],
1213
+ ingressHosts: [],
1214
+ aliasesByService: {},
1215
+ wildcards: [],
1216
+ };
1217
+ /** Running HTTP servers per port (Bun.Server). Rebuilt on /load. */
1218
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1219
+ const INGRESS_HTTP_SERVERS = new Map<number, any>();
1220
+ /** Running HTTPS servers per port (currently always {INGRESS_HTTPS_PORT}). */
1221
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1222
+ const INGRESS_HTTPS_SERVERS = new Map<number, any>();
1223
+
1224
+ /**
1225
+ * Tear down listener servers between /load calls so the new project's
1226
+ * routes can rebind cleanly.
1227
+ */
1228
+ function stopIngressServers(): void {
1229
+ for (const [port, srv] of INGRESS_HTTP_SERVERS) {
1230
+ try {
1231
+ srv.stop?.();
1232
+ } catch (err) {
1233
+ // eslint-disable-next-line no-console
1234
+ console.warn(`[ingress] failed to stop http server on :${port}:`, err);
1235
+ }
1236
+ }
1237
+ INGRESS_HTTP_SERVERS.clear();
1238
+ for (const [port, srv] of INGRESS_HTTPS_SERVERS) {
1239
+ try {
1240
+ srv.stop?.();
1241
+ } catch (err) {
1242
+ // eslint-disable-next-line no-console
1243
+ console.warn(`[ingress] failed to stop https server on :${port}:`, err);
1244
+ }
1245
+ }
1246
+ INGRESS_HTTPS_SERVERS.clear();
1247
+ }
1248
+
1249
+ function buildIngress(project: Project): void {
1250
+ stopIngressServers();
1251
+ FAKES.clear();
1252
+ // Lower the friendly surface (tls/hostnames/provides/fakes) into the
1253
+ // generic decl set the daemon executes. The special-casing lives in the
1254
+ // SDK's lowerIngress, not here.
1255
+ LOWERED = lowerIngress(project);
1256
+ if (!project.fakes) return;
1257
+ for (const [name, def] of Object.entries(project.fakes)) {
1258
+ FAKES.set(name, {
1259
+ def,
1260
+ state: undefined, // built in startIngress after `state()` runs
1261
+ hostnames: def.hostnames.map((h) => h.toLowerCase()),
1262
+ port: def.port ?? DEFAULT_FAKE_PORT,
1263
+ });
1264
+ }
1265
+ }
1266
+
1267
+ /**
1268
+ * Generate a leaf cert + key for one ingress route (fake or service
1269
+ * proxy), signed by the in-VM root CA at {CA_PATH}. SANs cover every
1270
+ * hostname the route answers to, so a client connecting with TLS
1271
+ * verifies cleanly regardless of which hostname it used. Shells out
1272
+ * to `openssl req -x509 -CA ... -CAkey ...` (OpenSSL 3.0+; Debian
1273
+ * bookworm ships 3.0.x).
1274
+ *
1275
+ * `label` is a short tag baked into the cert Subject CN and the temp
1276
+ * file names — only used for diagnostics, not for TLS verification.
1277
+ */
1278
+ async function generateHostCert(
1279
+ label: string,
1280
+ hostnames: string[],
1281
+ ): Promise<{ cert: string; key: string }> {
1282
+ const id = `spectest-host-${sanitizeSegment(label)}-${randomUUID().slice(0, 8)}`;
1283
+ const keyPath = path.join("/tmp", `${id}.key`);
1284
+ const crtPath = path.join("/tmp", `${id}.crt`);
1285
+ const sans = hostnames.map((h) => `DNS:${h}`).join(",");
1286
+ const args = [
1287
+ "req",
1288
+ "-newkey",
1289
+ "rsa:2048",
1290
+ "-nodes",
1291
+ "-keyout",
1292
+ keyPath,
1293
+ "-out",
1294
+ crtPath,
1295
+ "-x509",
1296
+ "-CA",
1297
+ CA_PATH,
1298
+ "-CAkey",
1299
+ CA_KEY_PATH,
1300
+ "-days",
1301
+ "3650",
1302
+ "-subj",
1303
+ `/CN=spectest-${label}`,
1304
+ "-addext",
1305
+ `subjectAltName=${sans}`,
1306
+ "-addext",
1307
+ "basicConstraints=CA:FALSE",
1308
+ "-addext",
1309
+ "extendedKeyUsage=serverAuth",
1310
+ "-addext",
1311
+ "keyUsage=digitalSignature,keyEncipherment",
1312
+ ];
1313
+ const r = await shx("openssl", args, 30_000);
1314
+ if (r.code !== 0) {
1315
+ throw new Error(
1316
+ `openssl req for ${label} failed (rc=${r.code}): ${r.stderr.trim() || r.stdout.trim()}`,
1317
+ );
1318
+ }
1319
+ try {
1320
+ const [cert, key] = await Promise.all([
1321
+ fs.readFile(crtPath, "utf8"),
1322
+ fs.readFile(keyPath, "utf8"),
1323
+ ]);
1324
+ return { cert, key };
1325
+ } finally {
1326
+ await Promise.all([
1327
+ fs.unlink(keyPath).catch(() => {}),
1328
+ fs.unlink(crtPath).catch(() => {}),
1329
+ ]);
1330
+ }
1331
+ }
1332
+
1333
+ /** Resolve the spectest-net bridge gateway IP — the address containers
1334
+ * use to reach the VM host. Asks dockerd via the docker CLI; cached for
1335
+ * the daemon's life because the network is recreated only on reload. */
1336
+ let cachedGatewayIp: string | null = null;
1337
+ async function bridgeGatewayIp(): Promise<string> {
1338
+ if (cachedGatewayIp) return cachedGatewayIp;
1339
+ const out = await docker(
1340
+ [
1341
+ "network",
1342
+ "inspect",
1343
+ "--format",
1344
+ "{{(index .IPAM.Config 0).Gateway}}",
1345
+ NETWORK_NAME,
1346
+ ],
1347
+ 10_000,
1348
+ );
1349
+ if (out.code !== 0) {
1350
+ throw new Error(
1351
+ `docker network inspect ${NETWORK_NAME} failed (rc=${out.code}): ${out.stderr.trim()}`,
1352
+ );
1353
+ }
1354
+ const ip = out.stdout.trim();
1355
+ if (!ip) throw new Error(`no gateway IP returned for network ${NETWORK_NAME}`);
1356
+ cachedGatewayIp = ip;
1357
+ return ip;
1358
+ }
1359
+
1360
+ /** One hostname → its handler. Either a fake (run user handler in-process)
1361
+ * or a service proxy (reverse-proxy to a container). */
1362
+ type Route =
1363
+ | { kind: "fake"; fake: FakeRuntime }
1364
+ | { kind: "proxy"; service: string; port: number };
1365
+
1366
+ /** Per-WS-upgrade context: the upstream URL we'll open in `open()` and
1367
+ * keep the bridge in `message()`/`close()`. Stored on `ws.data`. */
1368
+ interface WsBridgeData {
1369
+ upstreamUrl: string;
1370
+ upstream: WebSocket | null;
1371
+ pending: Array<string | ArrayBuffer | Uint8Array>;
1372
+ }
1373
+
1374
+ /**
1375
+ * Pristine `fetch` captured at module load, before any test-scoped
1376
+ * fetch wrapper can monkey-patch `globalThis.fetch`. The reverse-proxy
1377
+ * uses this directly so its outbound HTTP calls aren't intercepted by
1378
+ * the test recorder — they'd be (a) misattributed to the test's
1379
+ * timeline, and (b) trip up the Response constructor because the
1380
+ * recorder wraps `res.status` / `res.body` in inspectable proxies
1381
+ * that don't pass through as primitives.
1382
+ */
1383
+ const NATIVE_FETCH: typeof fetch = globalThis.fetch.bind(globalThis);
1384
+
1385
+ /** Hop-by-hop headers per RFC 7230 §6.1 — never forwarded by a proxy. */
1386
+ const HOP_BY_HOP_HEADERS = new Set([
1387
+ "connection",
1388
+ "keep-alive",
1389
+ "proxy-authenticate",
1390
+ "proxy-authorization",
1391
+ "te",
1392
+ "trailers",
1393
+ "transfer-encoding",
1394
+ "upgrade",
1395
+ "host",
1396
+ ]);
1397
+
1398
+ /**
1399
+ * Bring ingress servers up: bind one Bun.serve per unique HTTP port
1400
+ * (fakes' ports plus the always-on :80 for service proxies), plus a
1401
+ * shared HTTPS :443 (SNI per hostname). Build each fake's initial state,
1402
+ * then write the hostname→ip registry that spectest-resolver consults
1403
+ * for DNS. Idempotent — calling twice rebuilds.
1404
+ */
1405
+ async function startIngress(): Promise<void> {
1406
+ const hasIngress =
1407
+ FAKES.size > 0 ||
1408
+ LOWERED.proxies.length > 0 ||
1409
+ LOWERED.ingressHosts.length > 0 ||
1410
+ LOWERED.wildcards.length > 0;
1411
+ if (!hasIngress) {
1412
+ // Make sure the resolver doesn't see stale entries from a prior project.
1413
+ REGISTRY.hosts = {};
1414
+ REGISTRY.wildcards = [];
1415
+ await writeRegistry();
1416
+ return;
1417
+ }
1418
+ const gw = await bridgeGatewayIp();
1419
+
1420
+ // Initialise per-fake state. Awaited sequentially — state factories
1421
+ // are expected to be tiny constructors; the cost of serial init is
1422
+ // dwarfed by the eventual snapshot.
1423
+ for (const [name, fake] of FAKES) {
1424
+ if (fake.def.state) {
1425
+ try {
1426
+ fake.state = await fake.def.state();
1427
+ } catch (err) {
1428
+ throw new Error(
1429
+ `fake ${JSON.stringify(name)} state() factory threw: ${(err as Error).message}`,
1430
+ );
1431
+ }
1432
+ } else {
1433
+ fake.state = {};
1434
+ }
1435
+ }
1436
+
1437
+ // Mint one leaf cert per `certificate` decl (SANs = its hostnames),
1438
+ // signed by the in-VM root CA, and index it by hostname for SNI. Done
1439
+ // before binding so the HTTPS listener has certs ready and a startup
1440
+ // failure aborts /bootstrap cleanly.
1441
+ const caPresent = existsSync(CA_PATH) && existsSync(CA_KEY_PATH);
1442
+ const certByHost = new Map<string, { cert: string; key: string }>();
1443
+ if (caPresent) {
1444
+ for (const group of LOWERED.certificates) {
1445
+ if (group.hostnames.length === 0) continue;
1446
+ const leaf = await generateHostCert(group.hostnames[0], group.hostnames);
1447
+ for (const h of group.hostnames) certByHost.set(h, leaf);
1448
+ }
1449
+ } else if (LOWERED.certificates.length > 0) {
1450
+ // eslint-disable-next-line no-console
1451
+ console.warn(
1452
+ `[ingress] root CA missing at ${CA_PATH}; skipping HTTPS bind (fakes/proxies will be HTTP-only)`,
1453
+ );
1454
+ }
1455
+
1456
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1457
+ const Bun = (globalThis as any).Bun;
1458
+ if (!Bun?.serve) {
1459
+ throw new Error(
1460
+ "ingress requires Bun.serve; the daemon must run under Bun (it does in-VM)",
1461
+ );
1462
+ }
1463
+
1464
+ // Resolve every ingress hostname to its handler. Fakes run an in-daemon
1465
+ // handler; proxies reverse-proxy to a service:port. This single table
1466
+ // drives both the HTTP and HTTPS listeners.
1467
+ const routeByHost = new Map<string, Route>();
1468
+ for (const fake of FAKES.values()) {
1469
+ for (const h of fake.hostnames) routeByHost.set(h, { kind: "fake", fake });
1470
+ }
1471
+ for (const p of LOWERED.proxies) {
1472
+ routeByHost.set(p.hostname, { kind: "proxy", service: p.service, port: p.port });
1473
+ }
1474
+
1475
+ // ── HTTP listeners: group routes by port, dispatch per-request by Host.
1476
+ // Proxies bind :80 (HTTPS, if any, is on :443); fakes use their
1477
+ // declared port. Skip :443 in the HTTP map — HTTPS wins.
1478
+ const httpRoutesByPort = new Map<number, Map<string, Route>>();
1479
+ const ensurePort = (port: number): Map<string, Route> => {
1480
+ const m = httpRoutesByPort.get(port) ?? new Map<string, Route>();
1481
+ httpRoutesByPort.set(port, m);
1482
+ return m;
1483
+ };
1484
+ for (const fake of FAKES.values()) {
1485
+ if (fake.port === INGRESS_HTTPS_PORT) continue;
1486
+ const routes = ensurePort(fake.port);
1487
+ for (const h of fake.hostnames) routes.set(h, routeByHost.get(h)!);
1488
+ }
1489
+ if (LOWERED.proxies.length > 0) {
1490
+ const routes = ensurePort(INGRESS_HTTP_PORT);
1491
+ for (const p of LOWERED.proxies) routes.set(p.hostname, routeByHost.get(p.hostname)!);
1492
+ }
1493
+ for (const [port, byHost] of httpRoutesByPort) {
1494
+ const label = `port ${port}`;
1495
+ INGRESS_HTTP_SERVERS.set(port, bindIngressServer(Bun, port, byHost, label));
1496
+ const hosts = [...byHost.keys()].join(", ");
1497
+ // eslint-disable-next-line no-console
1498
+ console.log(`[ingress] http :${port} for ${hosts}`);
1499
+ }
1500
+
1501
+ // ── HTTPS listener on INGRESS_HTTPS_PORT: SNI per certificated hostname.
1502
+ if (certByHost.size > 0) {
1503
+ const tlsEntries: Array<{ cert: string; key: string; serverName: string }> = [];
1504
+ const byHostHttps = new Map<string, Route>();
1505
+ for (const [h, leaf] of certByHost) {
1506
+ tlsEntries.push({ cert: leaf.cert, key: leaf.key, serverName: h });
1507
+ const route = routeByHost.get(h);
1508
+ if (route) byHostHttps.set(h, route);
1509
+ }
1510
+ const label = `https :${INGRESS_HTTPS_PORT}`;
1511
+ const server = bindIngressServer(
1512
+ Bun,
1513
+ INGRESS_HTTPS_PORT,
1514
+ byHostHttps,
1515
+ label,
1516
+ tlsEntries,
1517
+ );
1518
+ INGRESS_HTTPS_SERVERS.set(INGRESS_HTTPS_PORT, server);
1519
+ const hosts = [...byHostHttps.keys()].join(", ");
1520
+ // eslint-disable-next-line no-console
1521
+ console.log(`[ingress] https :${INGRESS_HTTPS_PORT} for ${hosts}`);
1522
+ }
1523
+
1524
+ // Seed the resolver's names registry: ingress hostnames (fakes, TLS
1525
+ // proxies, dnsName(→ingress)) → bridge gateway, plus ingress-targeted
1526
+ // wildcards. Service-targeted wildcards wait for the post-container pass
1527
+ // (their containers aren't up yet). Dynamic ctx.dnsName calls extend this.
1528
+ await seedNamesRegistry({ servicesUp: false });
1529
+ }
1530
+
1531
+ /**
1532
+ * Spin up one Bun.serve listener bound to (port, optional TLS) that
1533
+ * dispatches every request to the matching Route by Host header.
1534
+ *
1535
+ * Shared by the HTTP and HTTPS branches. Also exports a `websocket`
1536
+ * handler so reverse-proxy targets can transparently bridge WS
1537
+ * upgrades through to their upstream service.
1538
+ */
1539
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1540
+ function bindIngressServer(
1541
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1542
+ Bun: any,
1543
+ port: number,
1544
+ byHost: Map<string, Route>,
1545
+ listenerLabel: string,
1546
+ tlsEntries?: Array<{ cert: string; key: string; serverName: string }>,
1547
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1548
+ ): any {
1549
+ // A TLS listener terminates https; everything else is plain http. Used
1550
+ // to stamp X-Forwarded-Proto so upstreams that build absolute URLs or
1551
+ // redirect see the scheme the client actually used, not our http hop.
1552
+ const proto = tlsEntries ? "https" : "http";
1553
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1554
+ const opts: Record<string, any> = {
1555
+ port,
1556
+ hostname: "0.0.0.0",
1557
+ // Bun.serve defaults to a 10s idleTimeout, which kills any proxied
1558
+ // request whose upstream takes >10s to produce bytes — under parallel
1559
+ // test load that surfaced as "fetch failed"/"other side closed" on
1560
+ // deploy-archive uploads and ERR_EMPTY_RESPONSE in browser tests
1561
+ // ([Bun.serve]: request timed out after 10 seconds). Ingress fronts
1562
+ // arbitrarily slow app endpoints (deploys can legitimately take
1563
+ // minutes), so disable the idle timeout entirely; forked test VMs are
1564
+ // short-lived, leaked-connection risk is bounded by the fork.
1565
+ idleTimeout: 0,
1566
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1567
+ fetch: (req: Request, server: any): Response | Promise<Response> =>
1568
+ dispatchIngress(req, server, byHost, listenerLabel, proto),
1569
+ websocket: {
1570
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1571
+ async open(ws: any) {
1572
+ const data = ws.data as WsBridgeData;
1573
+ try {
1574
+ const upstream = new WebSocket(data.upstreamUrl);
1575
+ // ArrayBuffer so binary frames can be ws.send()'d to the
1576
+ // downstream client verbatim — Blob would need an extra
1577
+ // .arrayBuffer() round-trip on every message.
1578
+ upstream.binaryType = "arraybuffer";
1579
+ data.upstream = upstream;
1580
+ upstream.addEventListener("open", () => {
1581
+ for (const m of data.pending) upstream.send(m);
1582
+ data.pending = [];
1583
+ });
1584
+ upstream.addEventListener("message", (ev: MessageEvent) => {
1585
+ try {
1586
+ ws.send(ev.data);
1587
+ } catch {
1588
+ /* client gone */
1589
+ }
1590
+ });
1591
+ upstream.addEventListener("close", (ev: CloseEvent) => {
1592
+ try {
1593
+ ws.close(ev.code, ev.reason);
1594
+ } catch {
1595
+ /* already closed */
1596
+ }
1597
+ });
1598
+ upstream.addEventListener("error", () => {
1599
+ try {
1600
+ ws.close(1011, "upstream error");
1601
+ } catch {
1602
+ /* already closed */
1603
+ }
1604
+ });
1605
+ } catch (err) {
1606
+ // eslint-disable-next-line no-console
1607
+ console.warn(`[ingress] ws upstream open failed for ${data.upstreamUrl}:`, err);
1608
+ try {
1609
+ ws.close(1011, "upstream open failed");
1610
+ } catch {
1611
+ /* ignore */
1612
+ }
1613
+ }
1614
+ },
1615
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1616
+ message(ws: any, message: string | Buffer) {
1617
+ const data = ws.data as WsBridgeData;
1618
+ const payload =
1619
+ typeof message === "string" ? message : new Uint8Array(message);
1620
+ if (data.upstream && data.upstream.readyState === WebSocket.OPEN) {
1621
+ data.upstream.send(payload);
1622
+ } else {
1623
+ // Buffer until the upstream finishes its handshake.
1624
+ data.pending.push(payload);
1625
+ }
1626
+ },
1627
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1628
+ close(ws: any, code: number, reason: string) {
1629
+ const data = ws.data as WsBridgeData;
1630
+ try {
1631
+ data.upstream?.close(code, reason);
1632
+ } catch {
1633
+ /* ignore */
1634
+ }
1635
+ },
1636
+ },
1637
+ };
1638
+ if (tlsEntries) opts.tls = tlsEntries;
1639
+ return Bun.serve(opts);
1640
+ }
1641
+
1642
+ /**
1643
+ * Per-request dispatch shared by every ingress listener. Looks up the
1644
+ * Route by Host header (port stripped) and either:
1645
+ * - fake: invokes the handler, wraps thrown errors as 500;
1646
+ * - proxy: WebSocket upgrade → server.upgrade(); else reverse-proxy
1647
+ * to the upstream service over plain HTTP.
1648
+ */
1649
+ async function dispatchIngress(
1650
+ req: Request,
1651
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1652
+ server: any,
1653
+ byHost: Map<string, Route>,
1654
+ listenerLabel: string,
1655
+ proto: string,
1656
+ ): Promise<Response> {
1657
+ const host = (req.headers.get("host") ?? "")
1658
+ .toLowerCase()
1659
+ .split(":")[0]
1660
+ .trim();
1661
+ const route = byHost.get(host);
1662
+ if (!route) {
1663
+ return new Response(
1664
+ `spectest-daemon: no ingress route bound to Host=${JSON.stringify(host)} on ${listenerLabel}\n`,
1665
+ { status: 404, headers: { "content-type": "text/plain" } },
1666
+ );
1667
+ }
1668
+ if (route.kind === "fake") {
1669
+ try {
1670
+ return await route.fake.def.handler(req, route.fake.state, FAKE_CTX);
1671
+ } catch (err) {
1672
+ const e = err as Error;
1673
+ return new Response(
1674
+ `spectest-daemon: fake ${route.fake.def.name} threw: ${e?.message ?? String(err)}\n`,
1675
+ { status: 500, headers: { "content-type": "text/plain" } },
1676
+ );
1677
+ }
1678
+ }
1679
+ return proxyToService(req, server, route.service, route.port, listenerLabel, proto);
1680
+ }
1681
+
1682
+ /**
1683
+ * Reverse-proxy a request to `http://<service>:<port>` on
1684
+ * `spectest-net`. Handles plain HTTP/1.1 + 2 and WebSocket upgrades:
1685
+ *
1686
+ * - WS upgrade requests get routed through `server.upgrade()`, with
1687
+ * the upstream URL stashed on `ws.data`. The shared `websocket`
1688
+ * handler opens the upstream and bridges frames both ways.
1689
+ * - Plain requests pass through via `fetch()` with hop-by-hop
1690
+ * headers stripped; the response body is a ReadableStream returned
1691
+ * verbatim, so it streams back without buffering.
1692
+ *
1693
+ * `decompress: false` makes this a true byte-for-byte pass-through:
1694
+ * Bun's fetch otherwise auto-decompresses the upstream body, which would
1695
+ * leave us forwarding the original `Content-Encoding`/`Content-Length`
1696
+ * over a now-plaintext body — browsers then fail with
1697
+ * ERR_CONTENT_DECODING_FAILED or truncate on the stale length (the
1698
+ * WHATWG fetch footgun in whatwg/fetch#1729). Keeping the body encoded
1699
+ * means those headers still describe the bytes we send, and we relay the
1700
+ * client's `Accept-Encoding` upstream so the upstream picks the scheme.
1701
+ */
1702
+ async function proxyToService(
1703
+ req: Request,
1704
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1705
+ server: any,
1706
+ service: string,
1707
+ port: number,
1708
+ listenerLabel: string,
1709
+ proto: string,
1710
+ ): Promise<Response> {
1711
+ const url = new URL(req.url);
1712
+ const upstreamPath = `${url.pathname}${url.search}`;
1713
+
1714
+ const upgrade = req.headers.get("upgrade")?.toLowerCase() ?? "";
1715
+ if (upgrade === "websocket") {
1716
+ const upstreamUrl = `ws://${await proxyUpstreamHost(service)}:${port}${upstreamPath}`;
1717
+ const wsData: WsBridgeData = {
1718
+ upstreamUrl,
1719
+ upstream: null,
1720
+ pending: [],
1721
+ };
1722
+ const ok = server.upgrade(req, { data: wsData });
1723
+ if (ok) {
1724
+ // Bun has already taken over the response — return a stub.
1725
+ return new Response(null, { status: 101 });
1726
+ }
1727
+ return new Response(
1728
+ `spectest-daemon: ws upgrade refused on ${listenerLabel}\n`,
1729
+ { status: 426, headers: { "content-type": "text/plain" } },
1730
+ );
1731
+ }
1732
+
1733
+ const fwdHeaders = new Headers();
1734
+ for (const [k, v] of req.headers) {
1735
+ if (HOP_BY_HOP_HEADERS.has(k.toLowerCase())) continue;
1736
+ fwdHeaders.append(k, v);
1737
+ }
1738
+ // Standard reverse-proxy provenance headers: the upstream sees the
1739
+ // public scheme/host it was reached through and the client's address,
1740
+ // even though we rewrite Host below to the service-net name.
1741
+ const clientIp = server.requestIP?.(req)?.address as string | undefined;
1742
+ const priorXff = req.headers.get("x-forwarded-for");
1743
+ const xff = clientIp ? (priorXff ? `${priorXff}, ${clientIp}` : clientIp) : priorXff;
1744
+ if (xff) fwdHeaders.set("x-forwarded-for", xff);
1745
+ fwdHeaders.set("x-forwarded-proto", proto);
1746
+ const publicHost = req.headers.get("host");
1747
+ if (publicHost) fwdHeaders.set("x-forwarded-host", publicHost);
1748
+ // Override Host so the upstream sees its own service-net name, not
1749
+ // the public hostname. Lets origin servers that vhost by Host header
1750
+ // continue to find the right virtual host.
1751
+ fwdHeaders.set("host", `${service}:${port}`);
1752
+
1753
+ // Buffer bounded request bodies so a transient upstream connect failure
1754
+ // can be retried (a ReadableStream body is consumed by the first
1755
+ // attempt). Under heavy parallel-fork load an in-guest connect to a
1756
+ // peer container occasionally fails outright ("Unable to connect" on a
1757
+ // healthy upstream) — observed on deploy-tarball uploads to s3mock; a
1758
+ // bounded retry absorbs it. Bodies above the cap (or with unknown
1759
+ // length and a stream that exceeds it) keep streaming semantics and
1760
+ // simply don't retry.
1761
+ const RETRY_BODY_CAP = 128 * 1024 * 1024;
1762
+ const hasBody = req.method !== "GET" && req.method !== "HEAD";
1763
+ // Only bodies with a known, bounded length are buffered — an unknown
1764
+ // (chunked/streaming) length could be an endless client stream, which
1765
+ // must keep flowing through, not accumulate.
1766
+ const declaredLen = Number(req.headers.get("content-length") ?? NaN);
1767
+ let bufferedBody: ArrayBuffer | undefined;
1768
+ if (hasBody && Number.isFinite(declaredLen) && declaredLen <= RETRY_BODY_CAP) {
1769
+ try {
1770
+ bufferedBody = await req.arrayBuffer();
1771
+ } catch {
1772
+ /* client aborted mid-upload; fall through, attempt will fail */
1773
+ }
1774
+ }
1775
+ const retryable =
1776
+ !hasBody || (bufferedBody !== undefined && bufferedBody.byteLength <= RETRY_BODY_CAP);
1777
+
1778
+ const attempts = retryable ? 3 : 1;
1779
+ let lastErr: unknown;
1780
+ for (let attempt = 1; attempt <= attempts; attempt++) {
1781
+ // Resolve the upstream per attempt: a connect failure below drops the
1782
+ // cached IP, so a retry re-inspects the container.
1783
+ const upstreamUrl = `http://${await proxyUpstreamHost(service)}:${port}${upstreamPath}`;
1784
+ try {
1785
+ const upstreamReq = new Request(upstreamUrl, {
1786
+ method: req.method,
1787
+ headers: fwdHeaders,
1788
+ body: hasBody ? (bufferedBody ?? req.body) : undefined,
1789
+ redirect: "manual",
1790
+ });
1791
+ // decompress:false → forward the encoded body untouched (see fn doc).
1792
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1793
+ const upstreamRes = await NATIVE_FETCH(upstreamReq, { decompress: false } as any);
1794
+ // Strip hop-by-hop response headers; let Bun set content-length / TE.
1795
+ const respHeaders = new Headers();
1796
+ for (const [k, v] of upstreamRes.headers) {
1797
+ if (HOP_BY_HOP_HEADERS.has(k.toLowerCase())) continue;
1798
+ respHeaders.append(k, v);
1799
+ }
1800
+ return new Response(upstreamRes.body, {
1801
+ status: upstreamRes.status,
1802
+ statusText: upstreamRes.statusText,
1803
+ headers: respHeaders,
1804
+ });
1805
+ } catch (err) {
1806
+ lastErr = err;
1807
+ // Only connect-class failures are safely retryable — if the request
1808
+ // reached the upstream we must not replay it.
1809
+ const msg = (err as Error)?.message ?? String(err);
1810
+ const connectFailure =
1811
+ /unable to connect|connection refused|connect|typo in the url/i.test(msg);
1812
+ if (!connectFailure || attempt === attempts) break;
1813
+ // The cached IP may be stale (container recreated) — re-resolve.
1814
+ PROXY_IP_CACHE.delete(service);
1815
+ // eslint-disable-next-line no-console
1816
+ console.warn(
1817
+ `[ingress] upstream ${service}:${port} connect failed (attempt ${attempt}/${attempts}), retrying: ${msg}`,
1818
+ );
1819
+ await new Promise((r) => setTimeout(r, 250 * attempt));
1820
+ }
1821
+ }
1822
+ const e = lastErr as Error;
1823
+ return new Response(
1824
+ `spectest-daemon: upstream ${service}:${port} unreachable: ${e?.message ?? String(lastErr)}\n`,
1825
+ { status: 502, headers: { "content-type": "text/plain" } },
1826
+ );
1827
+ }
1828
+
1829
+ /**
1830
+ * In-memory names registry, serialised to FAKES_REGISTRY_PATH for the
1831
+ * resolver. `startIngress` seeds it from LOWERED (the static
1832
+ * tls/hostnames/fakes/wildcard decls); `registerDnsName` mutates it live
1833
+ * when a test calls `ctx.dnsName`. It lives in daemon memory, so it forks
1834
+ * with the rest of the snapshot — a test's dynamic registration is
1835
+ * isolated to its own fork, exactly like fake state.
1836
+ */
1837
+ const REGISTRY: {
1838
+ hosts: Record<string, string>;
1839
+ wildcards: Array<{ suffix: string; ip: string }>;
1840
+ } = { hosts: {}, wildcards: [] };
1841
+
1842
+ async function writeRegistry(): Promise<void> {
1843
+ const body = JSON.stringify({
1844
+ hosts: REGISTRY.hosts,
1845
+ wildcards: REGISTRY.wildcards,
1846
+ updatedAt: Date.now(),
1847
+ });
1848
+ try {
1849
+ await fs.mkdir(path.dirname(FAKES_REGISTRY_PATH), { recursive: true });
1850
+ await fs.writeFile(FAKES_REGISTRY_PATH, body);
1851
+ } catch (err) {
1852
+ // Resolver gracefully degrades; just log.
1853
+ // eslint-disable-next-line no-console
1854
+ console.warn(`[names] failed to write registry at ${FAKES_REGISTRY_PATH}:`, err);
1855
+ }
1856
+ }
1857
+
1858
+ /** `*.example.com` → `.example.com` — the suffix the resolver matches. */
1859
+ function wildcardSuffix(pattern: string): string {
1860
+ return pattern.slice(1); // drop the leading "*"
1861
+ }
1862
+
1863
+ /** A service container's IP on spectest-net. `null` if the container isn't
1864
+ * up or isn't attached to the network yet. */
1865
+ async function serviceContainerIp(name: string): Promise<string | null> {
1866
+ const out = await docker(
1867
+ [
1868
+ "inspect",
1869
+ "--format",
1870
+ `{{(index .NetworkSettings.Networks "${NETWORK_NAME}").IPAddress}}`,
1871
+ name,
1872
+ ],
1873
+ 10_000,
1874
+ );
1875
+ if (out.code !== 0) return null;
1876
+ const ip = out.stdout.trim();
1877
+ return ip.length > 0 && ip !== "<no value>" ? ip : null;
1878
+ }
1879
+
1880
+ /** Cache of service container IPs for the ingress proxy, so the proxy hot
1881
+ * path doesn't depend on in-guest DNS: name resolution goes through
1882
+ * spectest-resolver — a single-threaded Bun process that can be starved
1883
+ * when the guest's vCPUs are saturated (observed as ~30s of
1884
+ * "tarballs-s3:9090 unreachable" 502s during parallel deploy tests while
1885
+ * the container was healthy). Populated lazily via `docker inspect`
1886
+ * (local socket, no DNS); the proxy drops an entry on connect failure so
1887
+ * a recreated container re-resolves on retry. */
1888
+ const PROXY_IP_CACHE = new Map<string, string>();
1889
+
1890
+ async function proxyUpstreamHost(service: string): Promise<string> {
1891
+ const cached = PROXY_IP_CACHE.get(service);
1892
+ if (cached) return cached;
1893
+ const ip = await serviceContainerIp(service).catch(() => null);
1894
+ if (ip) {
1895
+ PROXY_IP_CACHE.set(service, ip);
1896
+ return ip;
1897
+ }
1898
+ // Fall back to the name (resolver / docker DNS) — e.g. a target that
1899
+ // isn't a docker container on spectest-net.
1900
+ return service;
1901
+ }
1902
+
1903
+ /** Resolve a DnsTarget to a concrete IP: ingress → bridge gateway, service
1904
+ * → that container's IP. Throws if a service target has no IP yet. */
1905
+ async function resolveDnsTarget(target: DnsTarget): Promise<string> {
1906
+ if ("ingress" in target) return bridgeGatewayIp();
1907
+ const ip = await serviceContainerIp(target.service);
1908
+ if (!ip) {
1909
+ throw new Error(
1910
+ `dnsName target service ${JSON.stringify(target.service)} has no IP on ${NETWORK_NAME} (is it a running service?)`,
1911
+ );
1912
+ }
1913
+ return ip;
1914
+ }
1915
+
1916
+ /**
1917
+ * Seed REGISTRY from the static lowered decls and write it. Run twice per
1918
+ * /bootstrap: once from startIngress (`servicesUp: false`) so ingress
1919
+ * hostnames answer during container startup, then once after every
1920
+ * container is up (`servicesUp: true`) so service-targeted wildcards (e.g.
1921
+ * k3s `ingressDomains`) can resolve their container IPs. The post-container
1922
+ * pass is what's captured into the warm template.
1923
+ */
1924
+ async function seedNamesRegistry(opts: { servicesUp: boolean }): Promise<void> {
1925
+ const gw = await bridgeGatewayIp();
1926
+ REGISTRY.hosts = {};
1927
+ REGISTRY.wildcards = [];
1928
+ for (const h of LOWERED.ingressHosts) REGISTRY.hosts[h] = gw;
1929
+ for (const w of LOWERED.wildcards) {
1930
+ if ("ingress" in w.target) {
1931
+ REGISTRY.wildcards.push({ suffix: wildcardSuffix(w.pattern), ip: gw });
1932
+ continue;
1933
+ }
1934
+ // Service target — only resolvable once the container has an IP.
1935
+ if (!opts.servicesUp) continue;
1936
+ const ip = await serviceContainerIp(w.target.service);
1937
+ if (ip) {
1938
+ REGISTRY.wildcards.push({ suffix: wildcardSuffix(w.pattern), ip });
1939
+ } else {
1940
+ // eslint-disable-next-line no-console
1941
+ console.warn(
1942
+ `[names] wildcard ${w.pattern}: service ${JSON.stringify(w.target.service)} has no IP on ${NETWORK_NAME}`,
1943
+ );
1944
+ }
1945
+ }
1946
+ await writeRegistry();
1947
+ }
1948
+
1949
+ /**
1950
+ * Register a hostname at runtime — the implementation behind `ctx.dnsName`.
1951
+ * Validates via the same `dnsName` primitive the static path uses, resolves
1952
+ * the target to an IP, and updates + persists the registry. Exact names go
1953
+ * in `hosts`; `*.suffix` wildcards in `wildcards`. The resolver re-reads on
1954
+ * the next query (it watches the file's mtime), so the name is live
1955
+ * immediately — answered for VM-host/test/browser code and for peer
1956
+ * containers (Docker forwards unknown names to the host resolver). It does
1957
+ * NOT land in any container's /etc/hosts.
1958
+ */
1959
+ async function registerDnsName(hostname: string, target: DnsTarget): Promise<void> {
1960
+ const resv = reserveEvent();
1961
+ // Reuse the primitive purely for validation + lowercasing.
1962
+ const decl = makeDnsDecl(hostname, target);
1963
+ const ip = await resolveDnsTarget(target);
1964
+ if (isWildcard(decl.hostname)) {
1965
+ const suffix = wildcardSuffix(decl.hostname);
1966
+ REGISTRY.wildcards = REGISTRY.wildcards.filter((w) => w.suffix !== suffix);
1967
+ REGISTRY.wildcards.push({ suffix, ip });
1968
+ } else {
1969
+ REGISTRY.hosts[decl.hostname] = ip;
1970
+ }
1971
+ await writeRegistry();
1972
+ recordEnv({ op: "dnsName", hostname: decl.hostname, ip, durationMs: 0 }, resv);
1973
+ }
1974
+
1975
+ // ────────────────────────────────────────────────────────────────────────
1976
+ // Runtime services — containers started after bootstrap (from a test, an
1977
+ // eval, project setup, or a fake handler reacting to the app under test).
1978
+ //
1979
+ // A runtime service is a *real machine on the network*: it joins
1980
+ // spectest-net with its own IP and is reached directly by name/IP, not
1981
+ // through the daemon's HTTP ingress. The same helpers bootstrap uses
1982
+ // (prepareServiceImage → runContainer → waitForReady) drive it, so it gets
1983
+ // the same image cache, CA trust, and ready-probing. Because it lives in
1984
+ // dockerd, it's captured by the per-test post-state snapshot exactly like
1985
+ // the boot services — a `dependsOn` child inherits the live container while
1986
+ // siblings (which fork from the parent's earlier snapshot) never see it.
1987
+ //
1988
+ // Tracked here only for in-VM bookkeeping (failure log capture, teardown);
1989
+ // the map forks with daemon memory, so each fork sees the services it (or
1990
+ // its ancestors) actually started.
1991
+ // ────────────────────────────────────────────────────────────────────────
1992
+
1993
+ const RUNTIME_SERVICES = new Map<string, NamedService>();
1994
+
1995
+ // A runtime service spec is a ServiceConfig (minus tls/dependsOn) + a name;
1996
+ // the orchestration helpers want a NamedService, which is the same shape.
1997
+ function specToNamedService(spec: RuntimeServiceSpec): NamedService {
1998
+ const { name, ...rest } = spec;
1999
+ return { name, ...(rest as ServiceConfig) };
2000
+ }
2001
+
2002
+ /** Implementation behind `ctx.startService` / a fake's `ctx.startService`.
2003
+ * Prepares the image (pulling on first use through the host cache), runs
2004
+ * the container on spectest-net, and waits for its readyCheck. Returns the
2005
+ * container's name + IP. */
2006
+ async function startRuntimeService(spec: RuntimeServiceSpec): Promise<RuntimeServiceHandle> {
2007
+ if (!spec.name || spec.name.length === 0) {
2008
+ throw new Error("startService: `name` is required");
2009
+ }
2010
+ const t0 = Date.now();
2011
+ const resv = reserveEvent();
2012
+ const svc = specToNamedService(spec);
2013
+ const aliases = (spec.hostnames ?? []).map((h) => h.toLowerCase());
2014
+ const imageRef = svc.image.type === "registry" ? svc.image.reference : "(dockerfile)";
2015
+ try {
2016
+ const { tag } = await prepareServiceImage(svc);
2017
+ const flags = [...(await ensureVolumes(svc)), ...(await ensureFiles(svc))];
2018
+ await runContainer(svc, tag, flags, aliases);
2019
+ await waitForReady(svc);
2020
+ const ip = (await serviceContainerIp(svc.name)) ?? "";
2021
+ RUNTIME_SERVICES.set(svc.name, svc);
2022
+ recordEnv({
2023
+ op: "startService",
2024
+ service: svc.name,
2025
+ image: imageRef,
2026
+ ip,
2027
+ durationMs: Date.now() - t0,
2028
+ }, resv);
2029
+ return { name: svc.name, ip };
2030
+ } catch (err) {
2031
+ recordEnv({
2032
+ op: "startService",
2033
+ service: svc.name,
2034
+ image: imageRef,
2035
+ durationMs: Date.now() - t0,
2036
+ error: errMessage(err),
2037
+ }, resv);
2038
+ throw err;
2039
+ }
2040
+ }
2041
+
2042
+ /** Implementation behind `ctx.stopService`. Removes the container and drops
2043
+ * it from the runtime registry. No-op (rc ignored) if it's already gone. */
2044
+ async function stopRuntimeService(name: string): Promise<void> {
2045
+ const t0 = Date.now();
2046
+ const resv = reserveEvent();
2047
+ await docker(["rm", "-f", name], 30_000);
2048
+ RUNTIME_SERVICES.delete(name);
2049
+ recordEnv({ op: "stopService", service: name, durationMs: Date.now() - t0 }, resv);
2050
+ }
2051
+
2052
+ /** The runtime environment-control handle handed to fakes (3rd handler arg /
2053
+ * `ctx` in `helpers`). The same primitives tests get on `ctx`; module-level
2054
+ * because none of them depend on a running test. */
2055
+ const FAKE_CTX: FakeContext = {
2056
+ startService: startRuntimeService,
2057
+ stopService: stopRuntimeService,
2058
+ dnsName: registerDnsName,
2059
+ };
2060
+
2061
+ /** Build (or fetch from cache) the helpers record for a fake — the
2062
+ * value that ends up at `ctx.fakes.<name>`. Defaults to `{}` (a fake
2063
+ * with no `helpers` exposes nothing — tests never touch private state
2064
+ * directly). Returns a tracking proxy (see `trackFakeHelpers`) so helper
2065
+ * calls land in the test timeline. */
2066
+ async function ensureFakeHelpers(name: string): Promise<Record<string, unknown>> {
2067
+ const fake = FAKES.get(name);
2068
+ if (!fake) throw new Error(`fake ${JSON.stringify(name)} is not loaded`);
2069
+ if (fake.trackedHelpers) return fake.trackedHelpers;
2070
+ fake.helpers = fake.def.helpers
2071
+ ? ((await fake.def.helpers({
2072
+ name,
2073
+ state: fake.state,
2074
+ ctx: FAKE_CTX,
2075
+ })) as Record<string, unknown>)
2076
+ : {};
2077
+ fake.trackedHelpers = trackFakeHelpers(name, fake.helpers);
2078
+ return fake.trackedHelpers;
2079
+ }
2080
+
2081
+ /** Wrap a fake's helpers so each call becomes a recorded `fake` event
2082
+ * and its return value is `wrap()`ped for assertion provenance. Helpers
2083
+ * are functions that read/mutate the fake's private state via closure;
2084
+ * tests only ever see what those functions return. The proxy is built
2085
+ * once and shared across tests; it consults the recorder at call time,
2086
+ * so it's a transparent no-op when nothing is recording (eval / project
2087
+ * setup).
2088
+ *
2089
+ * Only own function properties are intercepted — inherited members
2090
+ * (`toString`, etc.), symbols, and any stray non-function property pass
2091
+ * straight through untouched. */
2092
+ function trackFakeHelpers(
2093
+ fakeName: string,
2094
+ helpers: Record<string, unknown>,
2095
+ ): Record<string, unknown> {
2096
+ return new Proxy(helpers, {
2097
+ get(target, prop, receiver) {
2098
+ if (typeof prop === "symbol") return Reflect.get(target, prop, receiver);
2099
+ const desc = Object.getOwnPropertyDescriptor(target, prop);
2100
+ if (!desc || typeof desc.value !== "function") {
2101
+ return Reflect.get(target, prop, receiver);
2102
+ }
2103
+ const fn = desc.value as (...a: unknown[]) => unknown;
2104
+ const member = String(prop);
2105
+ return (...args: unknown[]) =>
2106
+ invokeFakeHelper(fakeName, member, fn, target, args);
2107
+ },
2108
+ });
2109
+ }
2110
+
2111
+ /** Invoke a fake helper function, recording a `fake` event and wrapping
2112
+ * the return value. Handles both sync and async helpers, and records an
2113
+ * error event (then rethrows) if the helper throws. */
2114
+ function invokeFakeHelper(
2115
+ fakeName: string,
2116
+ member: string,
2117
+ fn: (...a: unknown[]) => unknown,
2118
+ thisArg: unknown,
2119
+ args: unknown[],
2120
+ ): unknown {
2121
+ const t = Date.now();
2122
+ const resv = reserveEvent();
2123
+ const safeArgs = args.map((a) => safeSerialize(a));
2124
+ const recordResult = (value: unknown): unknown => {
2125
+ const seq = recordFake({
2126
+ fake: fakeName,
2127
+ member,
2128
+ args: safeArgs,
2129
+ result: safeSerialize(value),
2130
+ durationMs: Date.now() - t,
2131
+ }, resv);
2132
+ return wrap(value, seq);
2133
+ };
2134
+ const recordError = (err: unknown): void => {
2135
+ recordFake({
2136
+ fake: fakeName,
2137
+ member,
2138
+ args: safeArgs,
2139
+ durationMs: Date.now() - t,
2140
+ error: errMessage(err),
2141
+ }, resv);
2142
+ };
2143
+
2144
+ let result: unknown;
2145
+ try {
2146
+ result = fn.apply(thisArg, args);
2147
+ } catch (err) {
2148
+ recordError(err);
2149
+ throw err;
2150
+ }
2151
+ if (result instanceof Promise) {
2152
+ return result.then(recordResult, (err) => {
2153
+ recordError(err);
2154
+ throw err;
2155
+ });
2156
+ }
2157
+ return recordResult(result);
2158
+ }
2159
+
2160
+ function errMessage(err: unknown): string {
2161
+ return (err as Error)?.message ?? String(err);
2162
+ }
2163
+
2164
+ /** Build the `fakes` map exposed on the test/eval context. Includes
2165
+ * every loaded fake; helpers are constructed lazily but we eagerly
2166
+ * materialise them here so a test can just read `ctx.fakes.x.y`. */
2167
+ async function buildFakeHandles(): Promise<Record<string, Record<string, unknown>>> {
2168
+ const handles: Record<string, Record<string, unknown>> = {};
2169
+ for (const name of FAKES.keys()) {
2170
+ handles[name] = await ensureFakeHelpers(name);
2171
+ }
2172
+ return handles;
2173
+ }
2174
+
2175
+ interface ServiceTiming {
2176
+ name: string;
2177
+ kind: "pull" | "build";
2178
+ prepMs: number;
2179
+ runMs?: number;
2180
+ readyMs?: number;
2181
+ setupMs?: number;
2182
+ buildSteps?: BuildStep[];
2183
+ }
2184
+
2185
+ interface BootstrapTimings {
2186
+ totalMs: number;
2187
+ services: ServiceTiming[];
2188
+ }
2189
+
2190
+ function serviceTotalMs(s: ServiceTiming): number {
2191
+ return s.prepMs + (s.runMs ?? 0) + (s.readyMs ?? 0) + (s.setupMs ?? 0);
2192
+ }
2193
+
2194
+ // Logged to the daemon journal (also folded into the /bootstrap response,
2195
+ // which the control plane logs). One compact line per service plus the
2196
+ // slowest BuildKit steps, so a slow cold start is profileable without
2197
+ // dumping the full build output.
2198
+ function logBootstrapTimings(t: BootstrapTimings): void {
2199
+ for (const s of t.services) {
2200
+ const parts = [`prep=${s.prepMs}ms(${s.kind})`];
2201
+ if (s.runMs != null) parts.push(`run=${s.runMs}ms`);
2202
+ if (s.readyMs != null) parts.push(`ready=${s.readyMs}ms`);
2203
+ if (s.setupMs) parts.push(`setup=${s.setupMs}ms`);
2204
+ console.log(`[bootstrap] ${s.name}: ${parts.join(" ")}`);
2205
+ if (s.buildSteps && s.buildSteps.length) {
2206
+ const top = s.buildSteps
2207
+ .map((x) => `${x.cached ? "cached" : x.secs.toFixed(1) + "s"} ${x.name}`)
2208
+ .join(" | ");
2209
+ console.log(`[bootstrap] ${s.name} build steps: ${top}`);
2210
+ }
2211
+ }
2212
+ console.log(`[bootstrap] total ${t.totalMs}ms across ${t.services.length} service(s)`);
2213
+ }
2214
+
2215
+ async function bootstrap(): Promise<BootstrapTimings> {
2216
+ const bootStart = Date.now();
2217
+ const cfg: EnvironmentConfig = requireLoaded().project.environment;
2218
+ const services = namedServices(cfg);
2219
+ const timings = new Map<string, ServiceTiming>();
2220
+ progressInit(services);
2221
+ // Build dedup is only valid within one workspace generation — a fresh
2222
+ // bootstrap may follow a workspace re-upload with the same dockerfile
2223
+ // text but different build-context content.
2224
+ BUILD_DEDUP.clear();
2225
+
2226
+ // Network create is independent of the workspace-side prep, so run
2227
+ // them concurrently. .dockerignore only blocks `docker build`s — pulls
2228
+ // wouldn't need it — but the writes are sub-millisecond so we just
2229
+ // gate image prep behind both.
2230
+ await Promise.all([
2231
+ ensureNetwork(),
2232
+ (async () => {
2233
+ await fs.mkdir(WORKSPACE, { recursive: true });
2234
+ await fs.writeFile(
2235
+ path.join(WORKSPACE, ".dockerignore"),
2236
+ unionDockerignore(services),
2237
+ );
2238
+ })(),
2239
+ ]);
2240
+
2241
+ // Image prep is DECOUPLED from container start: every service's image
2242
+ // prep kicks off now, independent of `dependsOn`, and each service's
2243
+ // container start (in startServices below) gates on (its OWN image ready)
2244
+ // AND (its deps up) — there is no whole-graph barrier. So a service whose
2245
+ // image is pulled and whose deps are up starts immediately; it never sits
2246
+ // at "image ready" waiting for an unrelated slow build elsewhere.
2247
+ //
2248
+ // Prep concurrency: registry pulls always run in parallel (network-bound,
2249
+ // low VM RAM). Dockerfile builds parallelize *only* when the host
2250
+ // buildkitd is in play — there the build executes host-side under runc, so
2251
+ // N concurrent builds don't touch the VM's memory ceiling. When we fall
2252
+ // back to the in-VM builder, two or more concurrent builds routinely OOM a
2253
+ // single VM on monorepos with parallel pnpm/npm installs (each install
2254
+ // fans out to ~16 fetchers + lifecycle workers, ~70 MB/process), so we
2255
+ // serialize that case behind a FIFO chain — but only the in-VM builds
2256
+ // serialize; pulls and starts run freely alongside them. The remote-builder
2257
+ // probe is memoized, so this up-front call is free; skip it with no builds.
2258
+ const tags = new Map<string, string>();
2259
+ const builds = services.filter((s) => s.image.type === "dockerfile");
2260
+ const buildsRunHostSide = builds.length > 0 && (await ensureRemoteBuilder());
2261
+ // A promise chain is a fair FIFO mutex: when builds run in-VM, each build
2262
+ // waits for the previous to settle. Pulls and host-side builds bypass it.
2263
+ let inVmBuildChain: Promise<unknown> = Promise.resolve();
2264
+ const prepImage = (
2265
+ svc: NamedService,
2266
+ ): Promise<{ tag: string; buildSteps?: BuildStep[] }> => {
2267
+ // Bootstrap is the only dedup scope: all its builds share one
2268
+ // /workspace generation (see prepareServiceImage).
2269
+ const run = () => prepareServiceImage(svc, { dedup: true });
2270
+ if (svc.image.type === "dockerfile" && !buildsRunHostSide) {
2271
+ const next = inVmBuildChain.then(run, run);
2272
+ // Keep the chain moving even if a build throws; the chain itself never
2273
+ // rejects (the per-service prep promise below is what surfaces errors).
2274
+ inVmBuildChain = next.then(
2275
+ () => undefined,
2276
+ () => undefined,
2277
+ );
2278
+ return next;
2279
+ }
2280
+ return run();
2281
+ };
2282
+ const prep = new Map<string, Promise<void>>();
2283
+ for (const svc of services) {
2284
+ const p = (async () => {
2285
+ const t0 = Date.now();
2286
+ const { tag, buildSteps } = await prepImage(svc);
2287
+ progressService(svc.name, { status: "prepared", detail: undefined });
2288
+ tags.set(svc.name, tag);
2289
+ timings.set(svc.name, {
2290
+ name: svc.name,
2291
+ kind: svc.image.type === "registry" ? "pull" : "build",
2292
+ prepMs: Date.now() - t0,
2293
+ buildSteps,
2294
+ });
2295
+ })();
2296
+ // A dependent whose dep fails aborts before it awaits this prep, which
2297
+ // would leave the prep promise unobserved. Attach a no-op handler so a
2298
+ // late rejection can't crash the daemon; startOne still re-throws it for
2299
+ // services that do reach their await.
2300
+ p.catch(() => undefined);
2301
+ prep.set(svc.name, p);
2302
+ }
2303
+
2304
+ // Ingress (fakes + service-tls proxies) comes up BEFORE services so
2305
+ // that any service that calls a fake URL during its own startup
2306
+ // probe finds it answering. Service-tls proxies will return 502
2307
+ // until their upstream containers start, but no one is hitting
2308
+ // https://<svc>.test/ during bootstrap so that's harmless. The
2309
+ // bridge gateway IP is set on `network create` — independent of
2310
+ // any container being up — so we don't need services to determine
2311
+ // the listener address.
2312
+ await startIngress();
2313
+ progressPhase("Starting services");
2314
+
2315
+ // Container start + ready probe driven by the dependsOn DAG: each
2316
+ // service starts the moment its own dependencies finish run→probe→setup,
2317
+ // instead of waiting for a whole topological level to clear. We chain
2318
+ // run→probe→setup per service so a dependent sees the post-setup state
2319
+ // of its deps (a database with its schema applied, a k3s cluster with
2320
+ // its ingress controller already running) — but an unrelated slow probe
2321
+ // no longer holds back a branch that's ready to go.
2322
+ await startServices(services, async (svc) => {
2323
+ // Gate on our OWN image being ready. startServices already gated on our
2324
+ // deps; this adds the image edge. The two compose: we run the moment
2325
+ // both are satisfied, with no whole-graph barrier between them.
2326
+ await prep.get(svc.name)!;
2327
+ const flags = [...(await ensureVolumes(svc)), ...(await ensureFiles(svc))];
2328
+ const tag = tags.get(svc.name);
2329
+ if (!tag) throw new Error(`internal: no image tag for ${svc.name}`);
2330
+ const tRun = Date.now();
2331
+ progressService(svc.name, { status: "starting", detail: undefined });
2332
+ await runContainer(svc, tag, flags);
2333
+ const tReady = Date.now();
2334
+ progressService(svc.name, { status: "probing", detail: "ready check" });
2335
+ await waitForReady(svc);
2336
+ const tSetup = Date.now();
2337
+ if (svc.setup) {
2338
+ progressService(svc.name, { status: "probing", detail: "running setup" });
2339
+ const helpers = await ensureHelpers(svc.name, svc);
2340
+ await svc.setup({ name: svc.name, helpers });
2341
+ }
2342
+ progressService(svc.name, { status: "ready", detail: undefined });
2343
+ const ti = timings.get(svc.name);
2344
+ if (ti) {
2345
+ ti.runMs = tReady - tRun;
2346
+ ti.readyMs = tSetup - tReady;
2347
+ ti.setupMs = svc.setup ? Date.now() - tSetup : 0;
2348
+ }
2349
+ });
2350
+
2351
+ // Containers now have IPs — re-seed so service-targeted wildcards (e.g.
2352
+ // k3s ingressDomains → the cluster container) resolve. Captured into the
2353
+ // warm template, so warm starts inherit the resolved entries.
2354
+ await seedNamesRegistry({ servicesUp: true });
2355
+
2356
+ // Browser pre-warm DISABLED (2026-06-08). We used to pre-open one view
2357
+ // into the pool (browser.ts VIEW_POOL) here so every fork inherited a
2358
+ // live renderer and the first ctx.browser() skipped the ~1.2-1.5s spawn.
2359
+ // But a renderer spawned BEFORE the snapshot and restored in a fork holds
2360
+ // stale DNS state: its first navigate to an ingress host fails
2361
+ // `net::ERR_NAME_NOT_RESOLVED` even though getaddrinfo/fetch resolve fine
2362
+ // (the --disable-features=AsyncDns flag doesn't save the pooled view). A
2363
+ // view created fresh AFTER the fork (openBrowser → createView, since the
2364
+ // pool is now empty) spawns a post-restore renderer with correct DNS. The
2365
+ // tradeoff is the per-test spawn cost is back on the browser path; we
2366
+ // accept it to keep the suite's browser-rooted DAGs working. See
2367
+ // browser.ts:213 (the long-standing intermittent NAME_NOT_RESOLVED) and
2368
+ // the clocksource-regression notes. Re-enabling requires fixing the
2369
+ // restored-renderer DNS state, not just re-adding the prewarm call.
2370
+
2371
+ const result: BootstrapTimings = {
2372
+ totalMs: Date.now() - bootStart,
2373
+ services: [...timings.values()].sort((a, b) => serviceTotalMs(b) - serviceTotalMs(a)),
2374
+ };
2375
+ progressDone();
2376
+ logBootstrapTimings(result);
2377
+ return result;
2378
+ }
2379
+
2380
+ // ────────────────────────────────────────────────────────────────────────
2381
+ // Project setup (post-bootstrap, pre-test)
2382
+ // ────────────────────────────────────────────────────────────────────────
2383
+
2384
+ interface ProjectSetupResult {
2385
+ ran: boolean;
2386
+ durationMs: number;
2387
+ }
2388
+
2389
+ /**
2390
+ * Run the loaded project's `setup` hook, if any. Called by the control
2391
+ * plane once between /bootstrap and the warm-template snapshot, so the
2392
+ * effects (seeded DB rows, initial pods, fixture files) are captured
2393
+ * exactly once and inherited by every later snapshot/fork.
2394
+ *
2395
+ * Unlike test runs, this is NOT instrumented — no recorder, no event
2396
+ * timeline, no timeout from the test runner. Setup failures abort the
2397
+ * env bring-up; the control plane surfaces them as a start failure.
2398
+ */
2399
+ async function runProjectSetup(): Promise<ProjectSetupResult> {
2400
+ const proj = requireLoaded().project;
2401
+ if (!proj.setup) return { ran: false, durationMs: 0 };
2402
+ const start = Date.now();
2403
+ // Build the same `svc` handles tests see, so setup and tests share
2404
+ // helper instances (e.g. a Bun.SQL pool created here is reused later).
2405
+ const svc = (await buildServiceHandles(proj.environment)) as ProjectSetupContext["svc"];
2406
+ const fakes = await buildFakeHandles();
2407
+ // Install the fetch wrapper for the duration of setup so `ctx.fetch` (and any
2408
+ // client routed through `globalThis.fetch`) returns a wrapped Response, same
2409
+ // as in a test. No recorder is active here, so it wraps without provenance —
2410
+ // but the wrapped type stays honest at runtime (`.unwrap()` works).
2411
+ const restoreFetch = installFetchWrapper();
2412
+ const ctx: ProjectSetupContext = {
2413
+ fetch: globalThis.fetch as unknown as SpectestFetch,
2414
+ exec: execInServiceWrapped,
2415
+ svc,
2416
+ fakes,
2417
+ dnsName: registerDnsName,
2418
+ startService: startRuntimeService,
2419
+ stopService: stopRuntimeService,
2420
+ };
2421
+ try {
2422
+ await proj.setup(ctx);
2423
+ } finally {
2424
+ restoreFetch();
2425
+ }
2426
+ return { ran: true, durationMs: Date.now() - start };
2427
+ }
2428
+
2429
+ // ────────────────────────────────────────────────────────────────────────
2430
+ // Test runner
2431
+ // ────────────────────────────────────────────────────────────────────────
2432
+
2433
+ interface RunResult {
2434
+ status: "passed" | "failed";
2435
+ durationMs: number;
2436
+ log: string;
2437
+ /** Ordered events recorded during the test (exec / assertion / http). */
2438
+ events: TestEvent[];
2439
+ /**
2440
+ * Per-browser rrweb sessions captured during the test. Each session
2441
+ * is the events emitted by a single `ctx.browser()` view, drained in
2442
+ * step-tagged chunks (one chunk per Browser op).
2443
+ */
2444
+ browserSessions: BrowserSessionRecord[];
2445
+ /** asciicast sessions for each `ctx.terminal(...)` call. */
2446
+ terminalSessions: TerminalSessionRecord[];
2447
+ /**
2448
+ * `docker logs` per service, captured only when the test failed (empty
2449
+ * on a pass). Lets the failure post-mortem in the web UI show what each
2450
+ * container printed without the author having to add `ctx.exec` log
2451
+ * grabs by hand.
2452
+ */
2453
+ serviceLogs: ServiceLogCapture[];
2454
+ error?: { message: string; stack?: string };
2455
+ }
2456
+
2457
+ /** Captured container logs for one service. */
2458
+ interface ServiceLogCapture {
2459
+ service: string;
2460
+ /** Trailing slice of the container's stdout (RFC3339-timestamped). */
2461
+ stdout: string;
2462
+ stdoutTruncated: boolean;
2463
+ /** Trailing slice of the container's stderr. */
2464
+ stderr: string;
2465
+ stderrTruncated: boolean;
2466
+ }
2467
+
2468
+ /** How many trailing log lines to grab per service on failure. */
2469
+ const SERVICE_LOG_TAIL_LINES = 500;
2470
+ /** Per-stream byte cap after the line tail (keeps the most recent bytes). */
2471
+ const SERVICE_LOG_MAX_BYTES = 256 * 1024;
2472
+
2473
+ /** Keep the trailing `max` bytes of `s` — the opposite of
2474
+ * `truncateUtf8`'s head-keep, because the most recent output is what
2475
+ * explains a failure. */
2476
+ function tailBytes(s: string, max: number): { value: string; truncated: boolean } {
2477
+ if (s.length <= max) return { value: s, truncated: false };
2478
+ return { value: s.slice(s.length - max), truncated: true };
2479
+ }
2480
+
2481
+ /**
2482
+ * Capture `docker logs` for every service in the loaded environment.
2483
+ * Called when a test case fails so the web UI can show each container's
2484
+ * recent output. Best-effort and bounded: the last
2485
+ * {SERVICE_LOG_TAIL_LINES} lines, trimmed to the trailing
2486
+ * {SERVICE_LOG_MAX_BYTES} bytes per stream. A `docker logs` failure for
2487
+ * one service surfaces as that service's `stderr` rather than aborting
2488
+ * the whole capture, so a crashed/removed container is still visible.
2489
+ */
2490
+ async function captureServiceLogs(): Promise<ServiceLogCapture[]> {
2491
+ const l = loaded;
2492
+ if (!l) return [];
2493
+ // Boot services plus any runtime services this fork started — both are
2494
+ // real containers a failure post-mortem wants to see. Dedup by name.
2495
+ const byName = new Map<string, NamedService>();
2496
+ for (const s of namedServices(l.project.environment)) byName.set(s.name, s);
2497
+ for (const [name, s] of RUNTIME_SERVICES) byName.set(name, s);
2498
+ const services = [...byName.values()];
2499
+ return Promise.all(
2500
+ services.map(async (svc): Promise<ServiceLogCapture> => {
2501
+ const r = await docker(
2502
+ ["logs", "--tail", String(SERVICE_LOG_TAIL_LINES), "--timestamps", svc.name],
2503
+ 30_000,
2504
+ );
2505
+ const stdout = tailBytes(r.stdout, SERVICE_LOG_MAX_BYTES);
2506
+ const stderr = tailBytes(r.stderr, SERVICE_LOG_MAX_BYTES);
2507
+ return {
2508
+ service: svc.name,
2509
+ stdout: stdout.value,
2510
+ stdoutTruncated: stdout.truncated,
2511
+ stderr: stderr.value,
2512
+ stderrTruncated: stderr.truncated,
2513
+ };
2514
+ }),
2515
+ );
2516
+ }
2517
+
2518
+ /** Per-Browser rrweb session shipped to the control plane. */
2519
+ interface BrowserSessionRecord {
2520
+ sessionId: string;
2521
+ openedAtMs: number;
2522
+ closedAtMs?: number;
2523
+ initialUrl?: string;
2524
+ steps: BrowserSessionStep[];
2525
+ }
2526
+
2527
+ /**
2528
+ * Mint a session id. `idScope` (the running test's case id; `"eval"`
2529
+ * for eval-context sessions) is baked in because `randomUUID()` alone
2530
+ * is NOT unique across test forks: sibling cases resume from the same
2531
+ * snapshot, so the daemon process — and the guest kernel CSPRNG it
2532
+ * draws from — restores identical RNG state in every clone, and the
2533
+ * first UUID minted after the fork collides across siblings (observed
2534
+ * in practice, not hypothetical). Persistence keys sessions by
2535
+ * (run, case, session) so the collision never lost data, but anything
2536
+ * that ever aggregates sessions across cases would conflate them.
2537
+ * Sibling forks run different cases by construction, so the case id is
2538
+ * exactly the entropy the clones are missing.
2539
+ */
2540
+ function newSessionId(idScope: string): string {
2541
+ return idScope ? `${idScope}:${randomUUID()}` : randomUUID();
2542
+ }
2543
+
2544
+ /**
2545
+ * Build the recorder sink + bookkeeping for a single Browser session.
2546
+ * The returned `recorder` is what `openBrowser` writes into; the
2547
+ * returned `record` is the in-flight session object the daemon owns.
2548
+ */
2549
+ function newBrowserSession(testStart: number, idScope: string): {
2550
+ recorder: BrowserSessionRecorder;
2551
+ record: BrowserSessionRecord;
2552
+ markClosed(): void;
2553
+ } {
2554
+ const record: BrowserSessionRecord = {
2555
+ sessionId: newSessionId(idScope),
2556
+ openedAtMs: Date.now() - testStart,
2557
+ steps: [],
2558
+ };
2559
+ let closed = false;
2560
+ return {
2561
+ record,
2562
+ recorder: {
2563
+ sessionId: record.sessionId,
2564
+ recordStep(step) {
2565
+ if (closed) return;
2566
+ record.steps.push(step);
2567
+ },
2568
+ noteNavigation(url) {
2569
+ if (closed) return;
2570
+ if (record.initialUrl === undefined) record.initialUrl = url;
2571
+ },
2572
+ },
2573
+ markClosed() {
2574
+ if (closed) return;
2575
+ closed = true;
2576
+ record.closedAtMs = Date.now() - testStart;
2577
+ },
2578
+ };
2579
+ }
2580
+
2581
+ /** Per-Terminal asciicast session shipped to the control plane. */
2582
+ interface TerminalSessionRecord {
2583
+ sessionId: string;
2584
+ openedAtMs: number;
2585
+ closedAtMs?: number;
2586
+ service: string;
2587
+ command: string;
2588
+ cols: number;
2589
+ rows: number;
2590
+ /** asciicast v2 frames. `o` = output, `i` reserved for future input. */
2591
+ frames: Array<[number, "o" | "i", string]>;
2592
+ }
2593
+
2594
+ /** New terminal session bookkeeping for a single `ctx.terminal(...)` call. */
2595
+ function newTerminalSession(
2596
+ testStart: number,
2597
+ service: string,
2598
+ command: string,
2599
+ cols: number,
2600
+ rows: number,
2601
+ idScope: string,
2602
+ ): {
2603
+ record: TerminalSessionRecord;
2604
+ pushFrame(tSec: number, data: string): void;
2605
+ markClosed(): void;
2606
+ } {
2607
+ const record: TerminalSessionRecord = {
2608
+ sessionId: newSessionId(idScope),
2609
+ openedAtMs: Date.now() - testStart,
2610
+ service,
2611
+ command,
2612
+ cols,
2613
+ rows,
2614
+ frames: [],
2615
+ };
2616
+ let closed = false;
2617
+ return {
2618
+ record,
2619
+ pushFrame(tSec, data) {
2620
+ if (closed) return;
2621
+ record.frames.push([tSec, "o", data]);
2622
+ },
2623
+ markClosed() {
2624
+ if (closed) return;
2625
+ closed = true;
2626
+ record.closedAtMs = Date.now() - testStart;
2627
+ },
2628
+ };
2629
+ }
2630
+
2631
+ /** Grid a recorded `ctx.exec` asciicast claims. There's no PTY behind
2632
+ * an exec so no real size exists — 80×24 matches the `ctx.terminal`
2633
+ * default, and the player hard-wraps longer lines the way an actual
2634
+ * 80-col terminal would. */
2635
+ const EXEC_CAST_COLS = 80;
2636
+ const EXEC_CAST_ROWS = 24;
2637
+
2638
+ /** Cumulative cap on asciicast frame bytes per recorded exec. The exec
2639
+ * *event* caps its stdout/stderr separately (256 KiB each); this bounds
2640
+ * the recording, which would otherwise duplicate a huge output in the
2641
+ * run payload and the DB. On overflow the cast gets one trailing
2642
+ * notice frame and stops growing; the ExecResult is unaffected. */
2643
+ const EXEC_FRAME_CAP_BYTES = 1024 * 1024;
2644
+
2645
+ /**
2646
+ * Build the bookkeeping for a fresh `openTerminal` session and return
2647
+ * the open Terminal handle alongside a frame sink the factory drains
2648
+ * into. The daemon owns the `TerminalSessionRecord`; the factory just
2649
+ * pushes frames and tells us when the session ends.
2650
+ *
2651
+ * Used by both:
2652
+ * - the long-lived `ctx.openTerminal(...)` API, where the test owns
2653
+ * the handle and decides when to close;
2654
+ * - the one-shot `ctx.terminal(...)` wrapper below, which opens a
2655
+ * terminal with `opts.command`, waits for the embedded program to
2656
+ * exit, then closes — same code path, just an immediate await.
2657
+ *
2658
+ * Heads-up for test authors: TTY-detecting CLIs may invoke a pager
2659
+ * (psql → less, git → less, etc.) and block waiting for input now that
2660
+ * stdin *is* a TTY. Disable paging in the command itself (e.g.
2661
+ * `psql -P pager=off`) or pass `PAGER=cat` / `PSQL_PAGER=` via
2662
+ * `opts.env`.
2663
+ */
2664
+ async function openInstrumentedTerminal(
2665
+ service: string,
2666
+ opts: TerminalOpts | undefined,
2667
+ testStart: number,
2668
+ sessions: TerminalSessionRecord[],
2669
+ recordEvents: boolean,
2670
+ idScope: string,
2671
+ ): Promise<InternalTerminal> {
2672
+ const cols = opts?.cols ?? 80;
2673
+ const rows = opts?.rows ?? 24;
2674
+ const session = newTerminalSession(
2675
+ testStart,
2676
+ service,
2677
+ opts?.command ?? "(interactive)",
2678
+ cols,
2679
+ rows,
2680
+ idScope,
2681
+ );
2682
+ sessions.push(session.record);
2683
+ const sink: TerminalFrameSink = {
2684
+ pushFrame: (t, data) => session.pushFrame(t, data),
2685
+ markClosed: () => session.markClosed(),
2686
+ };
2687
+ return await openTerminal({
2688
+ service,
2689
+ opts,
2690
+ sink,
2691
+ sessionId: session.record.sessionId,
2692
+ recordEvents,
2693
+ });
2694
+ }
2695
+
2696
+ // Return value of each test that has completed in this daemon's lifetime.
2697
+ // Lives in daemon memory and is captured by every post-test snapshot, so
2698
+ // when a child case forks from its parent's snapshot it sees the same Map
2699
+ // already populated. Carries arbitrary JS values — no JSON round-trip.
2700
+ const TEST_DATA = new Map<string, unknown>();
2701
+
2702
+ // Cached helper namespaces produced by `ServiceDefinition.helpers`
2703
+ // factories. Built lazily on first access and reused for the daemon's
2704
+ // lifetime — Bun.SQL pools and similar resources are happy to live a
2705
+ // long time, and the underlying TCP connections survive snapshot/fork
2706
+ // along with the rest of daemon memory. Cleared on /load and /reload
2707
+ // (project change invalidates any cached state).
2708
+ const HELPERS_CACHE = new Map<string, Record<string, unknown>>();
2709
+
2710
+ /**
2711
+ * Build (or fetch from cache) the helpers record for a single service.
2712
+ * Returns an empty object if the service doesn't ship a `helpers`
2713
+ * factory — symmetric with what gets passed to `setup`.
2714
+ */
2715
+ async function ensureHelpers(
2716
+ name: string,
2717
+ def: ServiceDefinition<Record<string, unknown>>,
2718
+ ): Promise<Record<string, unknown>> {
2719
+ if (!def.helpers) return {};
2720
+ if (!HELPERS_CACHE.has(name)) {
2721
+ HELPERS_CACHE.set(name, await def.helpers({ name }));
2722
+ }
2723
+ return HELPERS_CACHE.get(name)!;
2724
+ }
2725
+
2726
+ /**
2727
+ * Build the `svc` map for one test/eval. The value at `svc[name]` is
2728
+ * exactly the record the service's `helpers` factory returned (e.g.
2729
+ * `{ client: SqlClient }` for `postgres(...)`). Services without a
2730
+ * `helpers` factory don't appear in the map at all.
2731
+ */
2732
+ async function buildServiceHandles(cfg: EnvironmentConfig): Promise<ServiceHandles> {
2733
+ const handles: ServiceHandles = {};
2734
+ for (const [name, rawDef] of Object.entries(cfg.services)) {
2735
+ // The wire type drops the `helpers` function (JSON.stringify ignores
2736
+ // functions), but in the daemon we hold the in-memory definition
2737
+ // from the user's module — `helpers` is still there when present.
2738
+ const def = rawDef as ServiceDefinition<Record<string, unknown>>;
2739
+ if (!def.helpers) continue;
2740
+ handles[name] = await ensureHelpers(name, def);
2741
+ }
2742
+ return handles;
2743
+ }
2744
+
2745
+ // ────────────────────────────────────────────────────────────────────────
2746
+ // Instrumentation helpers
2747
+ // ────────────────────────────────────────────────────────────────────────
2748
+
2749
+ function describeFetchInput(input: Parameters<typeof fetch>[0]): {
2750
+ url: string;
2751
+ methodFromInput?: string;
2752
+ } {
2753
+ if (typeof input === "string") return { url: input };
2754
+ if (input instanceof URL) return { url: input.toString() };
2755
+ // Request instance
2756
+ const req = input as Request;
2757
+ return { url: req.url, methodFromInput: req.method };
2758
+ }
2759
+
2760
+ function describeRequestBody(
2761
+ input: Parameters<typeof fetch>[0],
2762
+ init: Parameters<typeof fetch>[1],
2763
+ ): { body?: string; truncated?: boolean } {
2764
+ // For Request objects, body has already been consumed into the request;
2765
+ // we can't read it back without cloning, which costs. Skip unless init.body
2766
+ // is provided directly.
2767
+ const body = init?.body;
2768
+ if (body === undefined || body === null) {
2769
+ if (input instanceof Request && input.bodyUsed === false) {
2770
+ // Don't drain the request's body here — leaving it for the actual
2771
+ // fetch. Return a marker.
2772
+ return { body: "[Request body not captured]", truncated: false };
2773
+ }
2774
+ return {};
2775
+ }
2776
+ if (typeof body === "string") {
2777
+ const t = truncateUtf8(body);
2778
+ return { body: t.value, truncated: t.truncated };
2779
+ }
2780
+ if (body instanceof URLSearchParams) {
2781
+ const t = truncateUtf8(body.toString());
2782
+ return { body: t.value, truncated: t.truncated };
2783
+ }
2784
+ return { body: `[non-text body: ${body.constructor?.name ?? typeof body}]` };
2785
+ }
2786
+
2787
+ /**
2788
+ * Install a fetch wrapper on `globalThis` that emits HTTP events into the
2789
+ * active recorder. Returns a restore function. Calls outside of a running
2790
+ * test still hit the original fetch (the recorder is null then; the
2791
+ * wrapper just adds a tiny amount of overhead — but we restore after each
2792
+ * test anyway, so this only matters mid-test).
2793
+ */
2794
+ function installFetchWrapper(): () => void {
2795
+ const original = globalThis.fetch;
2796
+ const wrappedFn = async (
2797
+ input: Parameters<typeof fetch>[0],
2798
+ init?: Parameters<typeof fetch>[1],
2799
+ ): Promise<Response | WrappedResponse> => {
2800
+ const start = Date.now();
2801
+ const resv = reserveEvent();
2802
+ const { url, methodFromInput } = describeFetchInput(input);
2803
+ const method = (init?.method ?? methodFromInput ?? "GET").toUpperCase();
2804
+ const reqBody = describeRequestBody(input, init);
2805
+ try {
2806
+ const res = await original(input as RequestInfo, init);
2807
+ let responseBody: string | undefined;
2808
+ let responseBodyTruncated: boolean | undefined;
2809
+ try {
2810
+ const cloned = res.clone();
2811
+ const text = await cloned.text();
2812
+ const t = truncateUtf8(text);
2813
+ responseBody = t.value;
2814
+ responseBodyTruncated = t.truncated;
2815
+ } catch {
2816
+ // Binary or unreadable body — leave undefined.
2817
+ }
2818
+ const seq = recordHttp({
2819
+ method,
2820
+ url,
2821
+ requestBody: reqBody.body,
2822
+ requestBodyTruncated: reqBody.truncated,
2823
+ status: res.status,
2824
+ responseBody,
2825
+ responseBodyTruncated,
2826
+ durationMs: Date.now() - start,
2827
+ }, resv);
2828
+ return wrapResponse(res, seq);
2829
+ } catch (err) {
2830
+ const e = err as Error;
2831
+ recordHttp({
2832
+ method,
2833
+ url,
2834
+ requestBody: reqBody.body,
2835
+ requestBodyTruncated: reqBody.truncated,
2836
+ durationMs: Date.now() - start,
2837
+ error: e?.message ?? String(err),
2838
+ }, resv);
2839
+ throw err;
2840
+ }
2841
+ };
2842
+ // Preserve any provider-specific statics on `fetch` (e.g. Bun's
2843
+ // `fetch.preconnect`) so consumers that touch them keep working.
2844
+ const wrapped = wrappedFn as unknown as typeof fetch;
2845
+ for (const key of Object.keys(original) as (keyof typeof original)[]) {
2846
+ (wrapped as unknown as Record<string, unknown>)[key as string] = (
2847
+ original as unknown as Record<string, unknown>
2848
+ )[key as string];
2849
+ }
2850
+ globalThis.fetch = wrapped;
2851
+ return () => {
2852
+ globalThis.fetch = original;
2853
+ };
2854
+ }
2855
+
2856
+ function execInService(service: string, command: string): Promise<ExecResult> {
2857
+ return new Promise((resolve) => {
2858
+ execFile(
2859
+ "docker",
2860
+ ["exec", service, "sh", "-lc", command],
2861
+ { maxBuffer: 16 * 1024 * 1024 },
2862
+ (err, stdout, stderr) => {
2863
+ const exitCode =
2864
+ err && typeof (err as NodeJS.ErrnoException & { code?: number }).code === "number"
2865
+ ? Number((err as NodeJS.ErrnoException & { code?: number }).code)
2866
+ : err
2867
+ ? 1
2868
+ : 0;
2869
+ resolve({
2870
+ stdout: stdout.toString(),
2871
+ stderr: stderr.toString(),
2872
+ exitCode,
2873
+ });
2874
+ },
2875
+ );
2876
+ });
2877
+ }
2878
+
2879
+ /** `execInService` for the no-recorder contexts (`setup`/`eval`): wraps the
2880
+ * result so `.unwrap()` is available and the ctx's wrapped `exec` type is
2881
+ * honest at runtime, but with no provenance (there's no event to link to).
2882
+ * The recorded `ctx.exec` used during tests is `recordedExec` below. */
2883
+ async function execInServiceWrapped(
2884
+ service: string,
2885
+ command: string,
2886
+ ): Promise<Wrapped<ExecResult>> {
2887
+ const res = await execInService(service, command);
2888
+ return wrap(res, undefined) as unknown as Wrapped<ExecResult>;
2889
+ }
2890
+
2891
+ /** Per-stream cap on the ExecResult strings the streaming variant
2892
+ * accumulates — parity with the buffered `execInService`'s `maxBuffer`.
2893
+ * Past the cap we keep draining (so the child never blocks on a full
2894
+ * pipe) but stop appending; unlike `execFile` we don't kill the
2895
+ * process, which only makes over-cap runs *more* survivable. */
2896
+ const EXEC_RESULT_CAP_BYTES = 16 * 1024 * 1024;
2897
+
2898
+ /**
2899
+ * Streaming variant of `execInService` for the recorded `ctx.exec`:
2900
+ * the same `docker exec <svc> sh -lc <cmd>` invocation with the same
2901
+ * result shape, but stdout/stderr are drained incrementally so the
2902
+ * caller can timestamp each chunk into an asciicast frame as it
2903
+ * arrives. No PTY is involved — the program still sees plain pipes
2904
+ * (`isatty` false), so the streams stay byte-identical to what `exec`
2905
+ * has always returned; the recording adds arrival *timing* only.
2906
+ * `onChunk` fires in arrival order across both streams — the closest
2907
+ * analogue of what a terminal would have shown — while the returned
2908
+ * `ExecResult` keeps them separate as before.
2909
+ */
2910
+ function execInServiceStreaming(
2911
+ service: string,
2912
+ command: string,
2913
+ onChunk: (stream: "stdout" | "stderr", data: string) => void,
2914
+ ): Promise<ExecResult> {
2915
+ return new Promise((resolve) => {
2916
+ const child = spawn("docker", ["exec", service, "sh", "-lc", command], {
2917
+ stdio: ["ignore", "pipe", "pipe"],
2918
+ });
2919
+ const acc = { stdout: "", stderr: "" };
2920
+ const decoders = {
2921
+ stdout: new TextDecoder("utf-8", { fatal: false }),
2922
+ stderr: new TextDecoder("utf-8", { fatal: false }),
2923
+ };
2924
+ const drain = (which: "stdout" | "stderr", chunk: Buffer): void => {
2925
+ const data = decoders[which].decode(chunk, { stream: true });
2926
+ if (data.length === 0) return;
2927
+ if (acc[which].length < EXEC_RESULT_CAP_BYTES) {
2928
+ const room = EXEC_RESULT_CAP_BYTES - acc[which].length;
2929
+ acc[which] += data.length > room ? data.slice(0, room) : data;
2930
+ }
2931
+ try {
2932
+ onChunk(which, data);
2933
+ } catch {
2934
+ // frame capture must never break the exec itself
2935
+ }
2936
+ };
2937
+ child.stdout?.on("data", (c: Buffer) => drain("stdout", c));
2938
+ child.stderr?.on("data", (c: Buffer) => drain("stderr", c));
2939
+ let settled = false;
2940
+ const finish = (exitCode: number): void => {
2941
+ if (settled) return;
2942
+ settled = true;
2943
+ // Flush any multi-byte tail the decoders are still holding.
2944
+ acc.stdout += decoders.stdout.decode();
2945
+ acc.stderr += decoders.stderr.decode();
2946
+ resolve({ stdout: acc.stdout, stderr: acc.stderr, exitCode });
2947
+ };
2948
+ // `close` (not `exit`) so both pipes are fully drained first.
2949
+ child.on("close", (code) => finish(code ?? 1));
2950
+ child.on("error", () => finish(1));
2951
+ });
2952
+ }
2953
+
2954
+ async function pollCall<T>(
2955
+ description: string,
2956
+ fn: () =>
2957
+ | T
2958
+ | null
2959
+ | undefined
2960
+ | false
2961
+ | Promise<T | null | undefined | false>,
2962
+ opts?: { timeoutMs?: number; intervalMs?: number },
2963
+ ): Promise<T> {
2964
+ const timeoutMs = opts?.timeoutMs ?? 30_000;
2965
+ const intervalMs = opts?.intervalMs ?? 1_000;
2966
+ const start = Date.now();
2967
+ // Reserve the wait's slot up front so it sorts at the poll's *start*,
2968
+ // ahead of the iteration events it nests (which record as the poll runs).
2969
+ const resv = reserveEvent();
2970
+ let attempts = 0;
2971
+ let value: T | undefined;
2972
+ let success = false;
2973
+ let predicateError: unknown;
2974
+
2975
+ // Record all iterations normally. Falsy iterations get truncated
2976
+ // from the recorder so the timeline doesn't fill with polling
2977
+ // noise; the LAST iteration's events stay, then get marked as
2978
+ // children of the wait event so the UI can render them nested.
2979
+ const beforePollIdx = recorderEventCount();
2980
+ let lastIterStartIdx = beforePollIdx;
2981
+ let keptIterStartIdx = beforePollIdx;
2982
+
2983
+ while (Date.now() - start < timeoutMs) {
2984
+ attempts += 1;
2985
+ lastIterStartIdx = recorderEventCount();
2986
+ try {
2987
+ const v = await fn();
2988
+ if (v !== null && v !== undefined && v !== false) {
2989
+ value = v as T;
2990
+ success = true;
2991
+ keptIterStartIdx = lastIterStartIdx;
2992
+ break;
2993
+ }
2994
+ } catch (err) {
2995
+ predicateError = err;
2996
+ break;
2997
+ }
2998
+ // Failed iteration — drop the events it emitted.
2999
+ recorderTruncate(lastIterStartIdx);
3000
+ if (Date.now() - start + intervalMs > timeoutMs) break;
3001
+ await new Promise((r) => setTimeout(r, intervalMs));
3002
+ }
3003
+
3004
+ if (!success) {
3005
+ // Timeout or predicate error: drop every attempt's events. The
3006
+ // wait event we emit below is the only trace.
3007
+ recorderTruncate(beforePollIdx);
3008
+ }
3009
+
3010
+ const errMsg =
3011
+ predicateError !== undefined
3012
+ ? ((predicateError as Error)?.message ?? String(predicateError))
3013
+ : success
3014
+ ? undefined
3015
+ : `timed out after ${timeoutMs}ms`;
3016
+ const seq = recordWait({
3017
+ description,
3018
+ attempts,
3019
+ durationMs: Date.now() - start,
3020
+ passed: success,
3021
+ ...(errMsg !== undefined ? { error: errMsg } : {}),
3022
+ }, resv);
3023
+
3024
+ if (success && seq !== undefined) {
3025
+ // Group the kept iteration's events under the wait so the UI can
3026
+ // render them inside the wait card. The wait event itself is the
3027
+ // very last entry; markChildren skips it via the seq match.
3028
+ recorderMarkChildren(keptIterStartIdx, seq);
3029
+ }
3030
+
3031
+ if (predicateError !== undefined) throw predicateError;
3032
+ if (success) {
3033
+ return wrap(value as T, seq) as T;
3034
+ }
3035
+ throw new Error(
3036
+ `poll ${JSON.stringify(description)} timed out after ${timeoutMs}ms (${attempts} attempts)`,
3037
+ );
3038
+ }
3039
+
3040
+ /**
3041
+ * Tee `console.*` output into `chunks` for the duration of a test/eval.
3042
+ * Bun's console writes through its own native sink, NOT
3043
+ * `process.stdout.write`, so patching the streams alone misses every
3044
+ * `console.log` the test makes — the captured `log` came back empty.
3045
+ * The original method still runs, so the daemon journal keeps the line.
3046
+ * Returns a restore function for the caller's `finally`.
3047
+ */
3048
+ function captureConsole(chunks: string[]): () => void {
3049
+ const methods = ["log", "info", "warn", "error", "debug"] as const;
3050
+ const orig = new Map<(typeof methods)[number], (typeof console)["log"]>();
3051
+ for (const m of methods) {
3052
+ const fn = console[m].bind(console);
3053
+ orig.set(m, console[m]);
3054
+ console[m] = (...a: unknown[]) => {
3055
+ try {
3056
+ chunks.push(
3057
+ a.map((x) => (typeof x === "string" ? x : Bun.inspect(x))).join(" ") + "\n",
3058
+ );
3059
+ } catch {
3060
+ // capture must never break the test
3061
+ }
3062
+ fn(...a);
3063
+ };
3064
+ }
3065
+ return () => {
3066
+ for (const m of methods) console[m] = orig.get(m)!;
3067
+ };
3068
+ }
3069
+
3070
+ async function runOne(testCase: TestCase<unknown>): Promise<RunResult> {
3071
+ const start = Date.now();
3072
+ const chunks: string[] = [];
3073
+ const origStdout = process.stdout.write.bind(process.stdout);
3074
+ const origStderr = process.stderr.write.bind(process.stderr);
3075
+ const capture = (s: unknown): boolean => {
3076
+ chunks.push(typeof s === "string" ? s : Buffer.from(s as Uint8Array).toString("utf8"));
3077
+ return true;
3078
+ };
3079
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
3080
+ (process.stdout as any).write = capture;
3081
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
3082
+ (process.stderr as any).write = capture;
3083
+ const restoreConsole = captureConsole(chunks);
3084
+
3085
+ // Terminal sessions: each `ctx.exec` / `ctx.terminal(...)` /
3086
+ // `ctx.openTerminal(...)` call opens its own session (asciicast frames
3087
+ // live on the record; the inline TestEvent just carries metadata + a
3088
+ // sessionId pointer). Drained at the end of the test and shipped on
3089
+ // RunResult.terminalSessions.
3090
+ //
3091
+ // Unlike browsers, we don't auto-close terminals at test end. A
3092
+ // `docker exec` subprocess is cheap to keep alive (a few KB), and
3093
+ // Freestyle's snapshot captures it cleanly along with the container
3094
+ // — so leaving it running between tests doesn't leak in any
3095
+ // meaningful sense. Auto-closing only added a noisy `close` step
3096
+ // at the end of every test that used `openTerminal`.
3097
+ const terminalSessions: TerminalSessionRecord[] = [];
3098
+
3099
+ // Wrap exec so each call shows up in the event log alongside its
3100
+ // result. We do this here (not on `execInService` itself) so the
3101
+ // bootstrap path stays uninstrumented. Each call also captures the
3102
+ // full CLI run as an asciicast — one exec step = one run = one
3103
+ // recording: output chunks are timestamped as they stream in, so slow
3104
+ // or animated output replays with real timing in the web UI. The
3105
+ // frames are presentation-only; the ExecResult (and any assertions on
3106
+ // it) still sees the plain separated stdout/stderr.
3107
+ const recordedExec = async (service: string, command: string): Promise<ExecResult> => {
3108
+ const t = Date.now();
3109
+ const resv = reserveEvent();
3110
+ const session = newTerminalSession(
3111
+ start,
3112
+ service,
3113
+ command,
3114
+ EXEC_CAST_COLS,
3115
+ EXEC_CAST_ROWS,
3116
+ testCase.id,
3117
+ );
3118
+ terminalSessions.push(session.record);
3119
+ // Synthetic prompt frame so the replay is self-describing — the
3120
+ // program's own output starts on the next line, like a real shell.
3121
+ session.pushFrame(0, `\x1b[32m${service} $\x1b[0m \x1b[1m${command}\x1b[0m\r\n`);
3122
+ let frameBytes = 0;
3123
+ let frameCapped = false;
3124
+ const res = await execInServiceStreaming(service, command, (_stream, data) => {
3125
+ if (frameCapped) return;
3126
+ if (frameBytes + data.length > EXEC_FRAME_CAP_BYTES) {
3127
+ frameCapped = true;
3128
+ session.pushFrame(
3129
+ (Date.now() - t) / 1000,
3130
+ "\r\n\x1b[2m[spectest: recording truncated — output exceeded the cast cap]\x1b[0m\r\n",
3131
+ );
3132
+ return;
3133
+ }
3134
+ frameBytes += data.length;
3135
+ // Pipes deliver bare `\n`; a terminal renderer needs `\r\n` or
3136
+ // every line starts at the previous line's end column
3137
+ // (stair-stepping). PTY output is ONLCR-cooked by the kernel —
3138
+ // pipe output is not, so cook it here. Normalising existing
3139
+ // `\r\n` too keeps a CR|LF split across chunk boundaries
3140
+ // harmless (`\r\r\n` renders identically).
3141
+ session.pushFrame((Date.now() - t) / 1000, data.replace(/\r?\n/g, "\r\n"));
3142
+ });
3143
+ session.markClosed();
3144
+ const stdout = truncateUtf8(res.stdout);
3145
+ const stderr = truncateUtf8(res.stderr);
3146
+ const seq = recordExec({
3147
+ service,
3148
+ command,
3149
+ exitCode: res.exitCode,
3150
+ stdout: stdout.value,
3151
+ stdoutTruncated: stdout.truncated,
3152
+ stderr: stderr.value,
3153
+ stderrTruncated: stderr.truncated,
3154
+ durationMs: Date.now() - t,
3155
+ sessionId: session.record.sessionId,
3156
+ }, resv);
3157
+ return wrap(res, seq);
3158
+ };
3159
+
3160
+ // One-shot: open a terminal with `command` as the entrypoint, wait
3161
+ // for it to exit, close, and return the existing TerminalResult
3162
+ // shape. The asciicast and one TerminalEvent line up exactly with
3163
+ // the pre-interactive implementation, just routed through the new
3164
+ // factory.
3165
+ const recordedTerminal = async (
3166
+ service: string,
3167
+ command: string,
3168
+ opts?: TerminalOpts,
3169
+ ): Promise<TerminalResult> => {
3170
+ const timeoutMs = opts?.timeoutMs ?? DEFAULT_TEST_TIMEOUT_MS;
3171
+ const startedAt = Date.now();
3172
+ const resv = reserveEvent();
3173
+ const term = await openInstrumentedTerminal(
3174
+ service,
3175
+ { ...opts, command, timeoutMs },
3176
+ start,
3177
+ terminalSessions,
3178
+ false, // one-shot doesn't emit per-op step events
3179
+ testCase.id,
3180
+ );
3181
+ // `term.exited` resolves to a wrapped result; this one-shot path needs
3182
+ // the plain `exitCode` number, so `.unwrap()` the result first.
3183
+ const { exitCode } = (await term.exited).unwrap();
3184
+ await term.close();
3185
+ const output = term.rawOutput();
3186
+ const preview = truncateUtf8(output);
3187
+ const seq = recordTerminal({
3188
+ service,
3189
+ command,
3190
+ exitCode,
3191
+ durationMs: Date.now() - startedAt,
3192
+ sessionId: term.sessionId,
3193
+ cols: term.cols,
3194
+ rows: term.rows,
3195
+ outputPreview: preview.value,
3196
+ outputTruncated: preview.truncated,
3197
+ }, resv);
3198
+ const result: TerminalResult = {
3199
+ output,
3200
+ exitCode,
3201
+ durationMs: Date.now() - startedAt,
3202
+ sessionId: term.sessionId,
3203
+ };
3204
+ return wrap(result, seq);
3205
+ };
3206
+
3207
+ // Long-lived: open an interactive terminal. Each method on the
3208
+ // returned Terminal records a `terminal-step` event tied back to
3209
+ // this session id; the per-op screen previews are written into the
3210
+ // event so the UI can show "what the user saw after sendLine 'ls'".
3211
+ const recordedOpenTerminal = async (
3212
+ service: string,
3213
+ opts?: TerminalOpts,
3214
+ ): Promise<Terminal> => {
3215
+ const term = await openInstrumentedTerminal(
3216
+ service,
3217
+ opts,
3218
+ start,
3219
+ terminalSessions,
3220
+ true,
3221
+ testCase.id,
3222
+ );
3223
+ // Emit a one-time `terminal` event so the session shows up in the
3224
+ // sidebar even before any step lands. `exitCode` is filled in by
3225
+ // the eventual `exit`/`close` step event; the inline summary here
3226
+ // uses -1 as a sentinel until then.
3227
+ const preview = truncateUtf8(term.rawOutput());
3228
+ recordTerminal({
3229
+ service,
3230
+ command: opts?.command ?? "(interactive)",
3231
+ exitCode: -1,
3232
+ durationMs: 0,
3233
+ sessionId: term.sessionId,
3234
+ cols: term.cols,
3235
+ rows: term.rows,
3236
+ outputPreview: preview.value,
3237
+ outputTruncated: preview.truncated,
3238
+ });
3239
+ return term;
3240
+ };
3241
+
3242
+ startRecording();
3243
+ const restoreFetch = installFetchWrapper();
3244
+
3245
+ // Look up the parent's stored return value (if any). The parent ran in
3246
+ // an ancestor fork; its TEST_DATA entry travels with the snapshot.
3247
+ const parentId = testCase.dependsOn?.id;
3248
+ const parent = parentId !== undefined ? TEST_DATA.get(parentId) : undefined;
3249
+
3250
+ // Track every Browser opened during this test so we can close them in
3251
+ // `finally` — leaked Chromium subprocesses would survive the snapshot
3252
+ // and chew memory across forks. Each Browser also gets a session
3253
+ // recorder; the records flow back to the control plane as part of
3254
+ // RunResult.browserSessions and are persisted to SQLite.
3255
+ const openBrowsers: Browser[] = [];
3256
+ const sessions: Array<ReturnType<typeof newBrowserSession>> = [];
3257
+ const trackedOpenBrowser = async (opts?: BrowserOptions): Promise<Browser> => {
3258
+ const session = newBrowserSession(start, testCase.id);
3259
+ sessions.push(session);
3260
+ const b = await openBrowser({ ...(opts ?? {}), recorder: session.recorder });
3261
+ openBrowsers.push(b);
3262
+ return b;
3263
+ };
3264
+
3265
+ // Build convenience handles (e.g. ctx.svc.db.client) from the loaded
3266
+ // project. Done before installing the timeout so a slow client factory
3267
+ // surfaces as a real error rather than getting attributed to the test.
3268
+ const svc = await buildServiceHandles(requireLoaded().project.environment);
3269
+ const fakes = await buildFakeHandles();
3270
+
3271
+ const ctx: TestContext<unknown> = {
3272
+ // installFetchWrapper just swapped globalThis.fetch for the wrapped
3273
+ // version, so capturing it here gets us instrumentation on ctx.fetch
3274
+ // for free. The recorder is active for the test, so responses come
3275
+ // back wrapped — hence the SpectestFetch type.
3276
+ fetch: globalThis.fetch as unknown as SpectestFetch,
3277
+ // exec/terminal/poll wrap their results at runtime (the recorder is
3278
+ // active), so the ctx interface types them wrapped — same bridge as
3279
+ // `fetch` above. The impls' own return types stay raw.
3280
+ exec: recordedExec as unknown as TestContext<unknown>["exec"],
3281
+ terminal: recordedTerminal as unknown as TestContext<unknown>["terminal"],
3282
+ openTerminal: recordedOpenTerminal,
3283
+ browser: trackedOpenBrowser,
3284
+ testName: testCase.name,
3285
+ parent,
3286
+ svc,
3287
+ fakes,
3288
+ poll: pollCall as unknown as TestContext<unknown>["poll"],
3289
+ dnsName: registerDnsName,
3290
+ startService: startRuntimeService,
3291
+ stopService: stopRuntimeService,
3292
+ };
3293
+
3294
+ const timeoutMs = testCase.timeoutMs ?? DEFAULT_TEST_TIMEOUT_MS;
3295
+ let timer: NodeJS.Timeout | undefined;
3296
+ const timedOut = new Promise<never>((_, reject) => {
3297
+ timer = setTimeout(
3298
+ () => reject(new Error(`test timed out after ${timeoutMs}ms`)),
3299
+ timeoutMs,
3300
+ );
3301
+ });
3302
+
3303
+ // Result events are gathered inside finally (after the final browser
3304
+ // drains) so we hoist these out of the try/catch.
3305
+ let outcome: { status: "passed" | "failed"; error?: RunResult["error"] };
3306
+ try {
3307
+ const value = await Promise.race([
3308
+ Promise.resolve(testCase.run(ctx)),
3309
+ timedOut,
3310
+ ]);
3311
+ // Stash the return value so child cases — which fork from the snapshot
3312
+ // we're about to capture — can read it off ctx.parent.
3313
+ TEST_DATA.set(testCase.id, value);
3314
+ outcome = { status: "passed" };
3315
+ } catch (err) {
3316
+ const e = err as Error;
3317
+ outcome = {
3318
+ status: "failed",
3319
+ error: { message: e.message ?? String(err), stack: e.stack },
3320
+ };
3321
+ } finally {
3322
+ if (timer) clearTimeout(timer);
3323
+ restoreFetch();
3324
+ restoreConsole();
3325
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
3326
+ (process.stdout as any).write = origStdout;
3327
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
3328
+ (process.stderr as any).write = origStderr;
3329
+ // Best-effort browser cleanup. `close()` does a final rrweb drain
3330
+ // before tearing the view down, so we must `await` it before
3331
+ // collecting session records. Leaked Chromium subprocesses would
3332
+ // survive the snapshot and chew memory across forks.
3333
+ for (const b of openBrowsers) {
3334
+ try {
3335
+ await b.close();
3336
+ } catch {
3337
+ /* ignore */
3338
+ }
3339
+ }
3340
+ for (const s of sessions) s.markClosed();
3341
+ }
3342
+ const durationMs = Date.now() - start;
3343
+ // On failure, grab each service's recent container logs for the
3344
+ // post-mortem. Captured after the duration clock stops so the
3345
+ // log-fetch round trips aren't billed to the test.
3346
+ let serviceLogs: ServiceLogCapture[] = [];
3347
+ if (outcome.status === "failed") {
3348
+ try {
3349
+ serviceLogs = await captureServiceLogs();
3350
+ } catch (err) {
3351
+ // eslint-disable-next-line no-console
3352
+ console.warn("[service-logs] capture failed:", err);
3353
+ }
3354
+ }
3355
+ const events = stopRecording();
3356
+ // Drop sessions whose linking event didn't survive — an exec/terminal
3357
+ // inside a failed `ctx.poll` iteration has its events removed by
3358
+ // recorderTruncate, so its recording would be an unreachable orphan in
3359
+ // the UI (and a polled exec would ship one dead cast per attempt).
3360
+ const referencedSessions = new Set<string>();
3361
+ for (const ev of events) {
3362
+ const sid = (ev as { sessionId?: unknown }).sessionId;
3363
+ if (typeof sid === "string") referencedSessions.add(sid);
3364
+ }
3365
+ return {
3366
+ status: outcome.status,
3367
+ durationMs,
3368
+ log: chunks.join(""),
3369
+ events,
3370
+ browserSessions: sessions.map((s) => s.record),
3371
+ terminalSessions: terminalSessions.filter((s) => referencedSessions.has(s.sessionId)),
3372
+ serviceLogs,
3373
+ error: outcome.error,
3374
+ };
3375
+ }
3376
+
3377
+ // ────────────────────────────────────────────────────────────────────────
3378
+ // Ad-hoc eval (manual testing surface — REPL-like)
3379
+ // ────────────────────────────────────────────────────────────────────────
3380
+
3381
+ interface EvalResult {
3382
+ ok: boolean;
3383
+ durationMs: number;
3384
+ log: string;
3385
+ /** JSON-safe serialization of the module's default export. */
3386
+ result?: unknown;
3387
+ /** npm packages auto-installed for this snippet (empty if none). */
3388
+ installed: string[];
3389
+ /** rrweb sessions for any Browser opened during the eval. */
3390
+ browserSessions: BrowserSessionRecord[];
3391
+ /** asciicast sessions for any ctx.terminal() call during the eval. */
3392
+ terminalSessions: TerminalSessionRecord[];
3393
+ error?: { message: string; stack?: string };
3394
+ }
3395
+
3396
+ const EVAL_DIR = path.join(APP_DIR, ".spectest-eval");
3397
+
3398
+ // Persistent state across eval calls. Mutated by snippets via the
3399
+ // `state` global; survives until the daemon process restarts.
3400
+ const EVAL_STATE: Record<string, unknown> = {};
3401
+
3402
+ // Transpiler instance reused for `scanImports`. We don't transpile the
3403
+ // user code — Bun runs the .ts file directly — but scanImports gives us
3404
+ // the imports so we can auto-install missing deps.
3405
+ const SCAN_TRANSPILER = new Bun.Transpiler({ loader: "ts" });
3406
+
3407
+ function safeSerialize(v: unknown): unknown {
3408
+ if (v === undefined) return undefined;
3409
+ try {
3410
+ return JSON.parse(JSON.stringify(v));
3411
+ } catch {
3412
+ return String(v);
3413
+ }
3414
+ }
3415
+
3416
+ /** Top-level package name from an import specifier. */
3417
+ function packageName(spec: string): string {
3418
+ if (spec.startsWith("@")) {
3419
+ return spec.split("/").slice(0, 2).join("/");
3420
+ }
3421
+ return spec.split("/")[0];
3422
+ }
3423
+
3424
+ /**
3425
+ * Scan the snippet's imports and `bun add` anything that doesn't already
3426
+ * resolve. Skips relative paths, absolute paths, `node:`/`bun:` built-ins,
3427
+ * and HTTP(S)/file: URLs.
3428
+ */
3429
+ async function ensureDeps(code: string): Promise<string[]> {
3430
+ let scanned: { path: string; kind?: string }[];
3431
+ try {
3432
+ scanned = SCAN_TRANSPILER.scanImports(code) as { path: string; kind?: string }[];
3433
+ } catch {
3434
+ // Invalid syntax — let the import call surface the real error.
3435
+ return [];
3436
+ }
3437
+ const seen = new Set<string>();
3438
+ const missing: string[] = [];
3439
+ for (const imp of scanned) {
3440
+ const p = imp.path;
3441
+ if (
3442
+ p.startsWith(".") ||
3443
+ p.startsWith("/") ||
3444
+ p.startsWith("node:") ||
3445
+ p.startsWith("bun:") ||
3446
+ p.startsWith("http:") ||
3447
+ p.startsWith("https:") ||
3448
+ p.startsWith("file:")
3449
+ ) {
3450
+ continue;
3451
+ }
3452
+ const pkg = packageName(p);
3453
+ if (seen.has(pkg)) continue;
3454
+ seen.add(pkg);
3455
+ try {
3456
+ Bun.resolveSync(p, APP_DIR);
3457
+ } catch {
3458
+ missing.push(pkg);
3459
+ }
3460
+ }
3461
+ if (missing.length === 0) return [];
3462
+
3463
+ await new Promise<void>((resolve, reject) => {
3464
+ execFile(
3465
+ "/usr/local/bin/bun",
3466
+ ["add", ...missing],
3467
+ { cwd: APP_DIR, maxBuffer: 16 * 1024 * 1024 },
3468
+ (err, stdout, stderr) => {
3469
+ if (err) {
3470
+ reject(
3471
+ new Error(
3472
+ `bun add ${missing.join(" ")} failed:\n${String(stderr).trim()}\n${String(stdout).trim()}`,
3473
+ ),
3474
+ );
3475
+ } else {
3476
+ resolve();
3477
+ }
3478
+ },
3479
+ );
3480
+ });
3481
+ return missing;
3482
+ }
3483
+
3484
+ async function evalCode(
3485
+ code: string,
3486
+ secrets?: Record<string, string>,
3487
+ ): Promise<EvalResult> {
3488
+ const start = Date.now();
3489
+ // Eval-scoped secret channel for record-mode fakes — set before the
3490
+ // snippet runs, cleared in the `finally` below so a secret never
3491
+ // persists into daemon memory (and thus into a forkable snapshot) past
3492
+ // the eval that supplied it. See record-secrets.ts.
3493
+ setRecordSecrets(secrets);
3494
+ const chunks: string[] = [];
3495
+ const origStdout = process.stdout.write.bind(process.stdout);
3496
+ const origStderr = process.stderr.write.bind(process.stderr);
3497
+ const capture = (s: unknown): boolean => {
3498
+ chunks.push(typeof s === "string" ? s : Buffer.from(s as Uint8Array).toString("utf8"));
3499
+ return true;
3500
+ };
3501
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
3502
+ (process.stdout as any).write = capture;
3503
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
3504
+ (process.stderr as any).write = capture;
3505
+ // Bun's console.* bypasses process.stdout.write — tee it too.
3506
+ const restoreConsole = captureConsole(chunks);
3507
+ // Wrap fetch for the snippet's duration so `ctx.fetch` returns a wrapped
3508
+ // Response just like in a test (no recorder here, so no provenance — but the
3509
+ // wrapped type is honest at runtime). Restored in the `finally` below.
3510
+ const restoreFetch = installFetchWrapper();
3511
+
3512
+ const openBrowsers: Browser[] = [];
3513
+ const sessions: Array<ReturnType<typeof newBrowserSession>> = [];
3514
+ const trackedOpenBrowser = async (opts?: BrowserOptions): Promise<Browser> => {
3515
+ const session = newBrowserSession(start, "eval");
3516
+ sessions.push(session);
3517
+ const b = await openBrowser({ ...(opts ?? {}), recorder: session.recorder });
3518
+ openBrowsers.push(b);
3519
+ return b;
3520
+ };
3521
+
3522
+ // Terminal sessions — same shape as runOne, but eval has no active
3523
+ // recorder so we don't emit inline events; the asciicast frames
3524
+ // still ship back on EvalResult.terminalSessions and the web UI
3525
+ // renders the player.
3526
+ const terminalSessions: TerminalSessionRecord[] = [];
3527
+ const evalTerminal = async (
3528
+ service: string,
3529
+ command: string,
3530
+ opts?: TerminalOpts,
3531
+ ): Promise<TerminalResult> => {
3532
+ const timeoutMs = opts?.timeoutMs ?? DEFAULT_TEST_TIMEOUT_MS;
3533
+ const startedAt = Date.now();
3534
+ const term = await openInstrumentedTerminal(
3535
+ service,
3536
+ { ...opts, command, timeoutMs },
3537
+ start,
3538
+ terminalSessions,
3539
+ false,
3540
+ "eval",
3541
+ );
3542
+ const { exitCode } = (await term.exited).unwrap();
3543
+ await term.close();
3544
+ return {
3545
+ output: term.rawOutput(),
3546
+ exitCode,
3547
+ durationMs: Date.now() - startedAt,
3548
+ sessionId: term.sessionId,
3549
+ };
3550
+ };
3551
+ const evalOpenTerminal = async (
3552
+ service: string,
3553
+ opts?: TerminalOpts,
3554
+ ): Promise<Terminal> => {
3555
+ return openInstrumentedTerminal(
3556
+ service,
3557
+ opts,
3558
+ start,
3559
+ terminalSessions,
3560
+ false,
3561
+ "eval",
3562
+ );
3563
+ };
3564
+
3565
+ // Convenience handles are best-effort for eval — if the project isn't
3566
+ // loaded yet, fall back to an empty map so quick `await fetch(...)`
3567
+ // snippets don't require a /load round-trip first.
3568
+ const svc: ServiceHandles = loaded
3569
+ ? await buildServiceHandles(loaded.project.environment)
3570
+ : {};
3571
+ const fakes = loaded ? await buildFakeHandles() : {};
3572
+
3573
+ const ctx: TestContext<undefined> = {
3574
+ // installFetchWrapper swapped globalThis.fetch above, so this captures the
3575
+ // wrapped version — eval results are wrapped just like in a test.
3576
+ fetch: globalThis.fetch as unknown as SpectestFetch,
3577
+ // Wraps its result the same way (no recorder under eval, so no provenance —
3578
+ // but the wrapped type is honest at runtime, so `.unwrap()` works).
3579
+ exec: execInServiceWrapped as unknown as TestContext<undefined>["exec"],
3580
+ terminal: evalTerminal as unknown as TestContext<undefined>["terminal"],
3581
+ openTerminal: evalOpenTerminal,
3582
+ browser: trackedOpenBrowser,
3583
+ testName: "eval",
3584
+ parent: undefined,
3585
+ svc,
3586
+ fakes,
3587
+ poll: pollCall as unknown as TestContext<undefined>["poll"],
3588
+ dnsName: registerDnsName,
3589
+ startService: startRuntimeService,
3590
+ stopService: stopRuntimeService,
3591
+ };
3592
+
3593
+ // Expose the test context, matchers, and persistent state as globals
3594
+ // so the snippet can use them without an explicit import. The user code
3595
+ // is real ESM, so `import { Client } from "pg"` and top-level `await`
3596
+ // work natively.
3597
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
3598
+ const g = globalThis as any;
3599
+ g.ctx = ctx;
3600
+ g.expect = expect;
3601
+ g.expectRaw = expectRaw;
3602
+ g.assert = assert;
3603
+ g.state = EVAL_STATE;
3604
+
3605
+ let installed: string[] = [];
3606
+ let filePath: string | undefined;
3607
+ let outcome:
3608
+ | { ok: true; result?: unknown }
3609
+ | { ok: false; error: EvalResult["error"] };
3610
+ try {
3611
+ installed = await ensureDeps(code);
3612
+ await fs.mkdir(EVAL_DIR, { recursive: true });
3613
+ filePath = path.join(EVAL_DIR, `${randomUUID()}.ts`);
3614
+ await fs.writeFile(filePath, code);
3615
+ const mod = (await import(pathToFileURL(filePath).href)) as { default?: unknown };
3616
+ outcome = { ok: true, result: safeSerialize(mod.default) };
3617
+ } catch (err) {
3618
+ const e = err as Error;
3619
+ outcome = {
3620
+ ok: false,
3621
+ error: { message: e.message ?? String(err), stack: e.stack },
3622
+ };
3623
+ } finally {
3624
+ clearRecordSecrets();
3625
+ restoreFetch();
3626
+ restoreConsole();
3627
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
3628
+ (process.stdout as any).write = origStdout;
3629
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
3630
+ (process.stderr as any).write = origStderr;
3631
+ for (const b of openBrowsers) {
3632
+ try {
3633
+ await b.close();
3634
+ } catch {
3635
+ /* ignore */
3636
+ }
3637
+ }
3638
+ for (const s of sessions) s.markClosed();
3639
+ if (filePath) {
3640
+ fs.unlink(filePath).catch(() => {
3641
+ /* best-effort cleanup */
3642
+ });
3643
+ }
3644
+ }
3645
+ return outcome.ok
3646
+ ? {
3647
+ ok: true,
3648
+ durationMs: Date.now() - start,
3649
+ log: chunks.join(""),
3650
+ installed,
3651
+ result: outcome.result,
3652
+ browserSessions: sessions.map((s) => s.record),
3653
+ terminalSessions,
3654
+ }
3655
+ : {
3656
+ ok: false,
3657
+ durationMs: Date.now() - start,
3658
+ log: chunks.join(""),
3659
+ installed,
3660
+ browserSessions: sessions.map((s) => s.record),
3661
+ terminalSessions,
3662
+ error: outcome.error,
3663
+ };
3664
+ }
3665
+
3666
+ // ────────────────────────────────────────────────────────────────────────
3667
+ // HTTP server
3668
+ // ────────────────────────────────────────────────────────────────────────
3669
+
3670
+ async function readBody(req: http.IncomingMessage): Promise<string> {
3671
+ return new Promise((resolve, reject) => {
3672
+ const chunks: Buffer[] = [];
3673
+ req.on("data", (c) => chunks.push(c));
3674
+ req.on("end", () => resolve(Buffer.concat(chunks).toString("utf8")));
3675
+ req.on("error", reject);
3676
+ });
3677
+ }
3678
+
3679
+ function jsonResponse(res: http.ServerResponse, status: number, body: unknown): void {
3680
+ const payload = Buffer.from(JSON.stringify(body));
3681
+ res.writeHead(status, {
3682
+ "content-type": "application/json",
3683
+ "content-length": payload.length,
3684
+ });
3685
+ res.end(payload);
3686
+ }
3687
+
3688
+ interface RouteState {
3689
+ inFlightTest: Promise<unknown> | null;
3690
+ inFlightBootstrap: Promise<unknown> | null;
3691
+ inFlightProjectSetup: Promise<unknown> | null;
3692
+ }
3693
+
3694
+ async function handle(req: http.IncomingMessage, res: http.ServerResponse, state: RouteState): Promise<void> {
3695
+ const url = req.url ?? "";
3696
+ const method = req.method ?? "GET";
3697
+
3698
+ if (method === "GET" && url === "/health") {
3699
+ res.writeHead(200, { "content-type": "text/plain" });
3700
+ res.end("ok\n");
3701
+ return;
3702
+ }
3703
+
3704
+ if (method === "GET" && url === "/progress") {
3705
+ // Live bootstrap progress, polled by the control plane during
3706
+ // /bootstrap and streamed into the test-run row. `{}` before the
3707
+ // first bootstrap() of this daemon.
3708
+ jsonResponse(res, 200, BOOTSTRAP_PROGRESS ?? {});
3709
+ return;
3710
+ }
3711
+
3712
+ if (method === "POST" && url === "/load") {
3713
+ // Env only — services/fakes/setup. For the legacy single-file layout the
3714
+ // entry also defines the tests, so `cases` is populated here; for the
3715
+ // split layout `cases` is empty until /load-tests runs.
3716
+ const proj = await loadEnv();
3717
+ jsonResponse(res, 200, {
3718
+ environment: proj.environment,
3719
+ cases: casesMetadata(proj.tests),
3720
+ });
3721
+ return;
3722
+ }
3723
+
3724
+ if (method === "POST" && url === "/load-tests") {
3725
+ // Import spectest/tests/** into the already-loaded env and return the
3726
+ // resulting catalogue. Called after the warm snapshot (cold path) or
3727
+ // against a freshly restored VM (warm path).
3728
+ await loadTests();
3729
+ const l = requireLoaded();
3730
+ jsonResponse(res, 200, {
3731
+ environment: l.project.environment,
3732
+ cases: casesMetadata(l.project.tests),
3733
+ });
3734
+ return;
3735
+ }
3736
+
3737
+ if (method === "POST" && url === "/unload") {
3738
+ loaded = null;
3739
+ jsonResponse(res, 200, { unloaded: true });
3740
+ return;
3741
+ }
3742
+
3743
+ if (method === "POST" && url === "/reload") {
3744
+ // Full reload (debug aid): re-import the env, then the tests.
3745
+ await loadEnv();
3746
+ await loadTests();
3747
+ const l = requireLoaded();
3748
+ jsonResponse(res, 200, {
3749
+ environment: l.project.environment,
3750
+ cases: casesMetadata(l.project.tests),
3751
+ });
3752
+ return;
3753
+ }
3754
+
3755
+ if (method === "GET" && url === "/env-config") {
3756
+ const l = requireLoaded();
3757
+ jsonResponse(res, 200, l.project.environment);
3758
+ return;
3759
+ }
3760
+
3761
+ if (method === "GET" && url === "/cases") {
3762
+ const l = requireLoaded();
3763
+ jsonResponse(res, 200, { cases: casesMetadata(l.project.tests) });
3764
+ return;
3765
+ }
3766
+
3767
+ if (method === "GET" && url === "/record-secret-refs") {
3768
+ // Union of platform secret refs the loaded fakes declare (replayFake's
3769
+ // `secretRefs`). The control plane resolves these server-side and
3770
+ // pushes the values on the eval path only. Empty if nothing's loaded.
3771
+ const refs = new Set<string>();
3772
+ for (const fake of FAKES.values()) {
3773
+ for (const ref of fake.def.secretRefs ?? []) refs.add(ref);
3774
+ }
3775
+ jsonResponse(res, 200, { refs: [...refs] });
3776
+ return;
3777
+ }
3778
+
3779
+ if (method === "POST" && url === "/bootstrap") {
3780
+ if (state.inFlightBootstrap) {
3781
+ jsonResponse(res, 409, { error: "bootstrap already in progress" });
3782
+ return;
3783
+ }
3784
+ const job = bootstrap();
3785
+ state.inFlightBootstrap = job;
3786
+ try {
3787
+ const timings = await job;
3788
+ jsonResponse(res, 200, { ok: true, timings });
3789
+ } finally {
3790
+ state.inFlightBootstrap = null;
3791
+ }
3792
+ return;
3793
+ }
3794
+
3795
+ if (method === "POST" && url === "/project-setup") {
3796
+ if (state.inFlightProjectSetup) {
3797
+ jsonResponse(res, 409, { error: "project-setup already in progress" });
3798
+ return;
3799
+ }
3800
+ const job = runProjectSetup();
3801
+ state.inFlightProjectSetup = job;
3802
+ try {
3803
+ const result = await job;
3804
+ jsonResponse(res, 200, result);
3805
+ } finally {
3806
+ state.inFlightProjectSetup = null;
3807
+ }
3808
+ return;
3809
+ }
3810
+
3811
+ if (method === "POST" && url === "/eval") {
3812
+ if (state.inFlightTest) {
3813
+ jsonResponse(res, 409, { error: "a test or eval is already running" });
3814
+ return;
3815
+ }
3816
+ const body = await readBody(req);
3817
+ let parsed: { code?: string; secrets?: Record<string, string> };
3818
+ try {
3819
+ parsed = JSON.parse(body || "{}");
3820
+ } catch {
3821
+ jsonResponse(res, 400, { error: "invalid JSON body" });
3822
+ return;
3823
+ }
3824
+ const code = parsed.code;
3825
+ if (typeof code !== "string" || code.length === 0) {
3826
+ jsonResponse(res, 400, { error: "code (string) is required" });
3827
+ return;
3828
+ }
3829
+ // `secrets` are eval-scoped: the control plane resolves the loaded
3830
+ // project's declared `replayFake` refs and pushes the values here on
3831
+ // the eval path only. Never present on the /run (test) path.
3832
+ const exec = evalCode(code, parsed.secrets);
3833
+ state.inFlightTest = exec;
3834
+ try {
3835
+ const result = await exec;
3836
+ jsonResponse(res, 200, result);
3837
+ } finally {
3838
+ state.inFlightTest = null;
3839
+ }
3840
+ return;
3841
+ }
3842
+
3843
+ if (method === "POST" && url === "/run") {
3844
+ if (state.inFlightTest) {
3845
+ jsonResponse(res, 409, { error: "another test is already running" });
3846
+ return;
3847
+ }
3848
+ const body = await readBody(req);
3849
+ let parsed: { caseId?: string };
3850
+ try {
3851
+ parsed = JSON.parse(body || "{}");
3852
+ } catch {
3853
+ jsonResponse(res, 400, { error: "invalid JSON body" });
3854
+ return;
3855
+ }
3856
+ const caseId = parsed.caseId;
3857
+ if (!caseId) {
3858
+ jsonResponse(res, 400, { error: "caseId is required" });
3859
+ return;
3860
+ }
3861
+ const l = requireLoaded();
3862
+ const tc = l.byId.get(caseId);
3863
+ if (!tc) {
3864
+ jsonResponse(res, 404, { error: `unknown caseId: ${caseId}` });
3865
+ return;
3866
+ }
3867
+ const exec = runOne(tc);
3868
+ state.inFlightTest = exec;
3869
+ try {
3870
+ const result = await exec;
3871
+ jsonResponse(res, 200, result);
3872
+ } finally {
3873
+ state.inFlightTest = null;
3874
+ }
3875
+ return;
3876
+ }
3877
+
3878
+ jsonResponse(res, 404, { error: "not found" });
3879
+ }
3880
+
3881
+ async function main(): Promise<void> {
3882
+ const state: RouteState = {
3883
+ inFlightTest: null,
3884
+ inFlightBootstrap: null,
3885
+ inFlightProjectSetup: null,
3886
+ };
3887
+
3888
+ const server = http.createServer((req, res) => {
3889
+ handle(req, res, state).catch((err) => {
3890
+ const e = err as Error;
3891
+ try {
3892
+ jsonResponse(res, 500, { error: e.message ?? String(err), stack: e.stack });
3893
+ } catch {
3894
+ // headers already sent or socket dead
3895
+ }
3896
+ });
3897
+ });
3898
+
3899
+ const port = Number(process.env.SPECTEST_DAEMON_PORT ?? DEFAULT_PORT);
3900
+ server.listen(port, "0.0.0.0", () => {
3901
+ // eslint-disable-next-line no-console
3902
+ console.log(`spectest-daemon listening on :${port} (idle; awaiting POST /load)`);
3903
+ });
3904
+ }
3905
+
3906
+ main().catch((err) => {
3907
+ // eslint-disable-next-line no-console
3908
+ console.error("spectest-daemon: fatal:", err);
3909
+ process.exit(1);
3910
+ });