npm - @specific.dev/spectest - Versions diffs - 0.4.0 - Mend

@specific.dev/spectest 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/package.json +38 -0
package/src/browser.ts +824 -0
package/src/components/index.ts +32 -0
package/src/components/k3s.ts +1324 -0
package/src/components/postgres.ts +281 -0
package/src/components/replayFake.ts +515 -0
package/src/daemon.ts +3910 -0
package/src/index.ts +1601 -0
package/src/ingress.ts +288 -0
package/src/inspect.ts +604 -0
package/src/record-secrets.ts +41 -0
package/src/recorder.ts +659 -0
package/src/resolver.ts +351 -0
package/src/terminal.ts +740 -0
package/src/vendor/rrweb-plugin-console-record.umd.js +520 -0
package/src/vendor/rrweb-record.min.js +5 -0

package/src/daemon.ts ADDED Viewed

@@ -0,0 +1,3910 @@
+// Long-running HTTP daemon. Runs as a systemd unit on the VM host (not in
+// a container). Owns:
+//
+//   * Loading the user's `spectest/index.ts` and exposing the parsed
+//     `{ environment, tests? }` to the control plane.
+//   * Orchestrating Docker: image prep (pull/build), network + volumes,
+//     container start, ready probes — all by shelling out to the local
+//     `docker` CLI.
+//   * Running individual test cases on demand.
+//
+// The control plane handles VM lifecycle (create, snapshot, fork,
+// terminate) and the tarball uploads to `/workspace` and `/opt/spectest/app`.
+// Once those are in place the daemon does the rest.
+//
+// One sandbox = one daemon. Concurrency between tests is achieved by
+// forking the sandbox; inside a single daemon we never run two tests at
+// once — that keeps stdout capture and timeouts simple.
+import http from "node:http";
+import { execFile, spawn } from "node:child_process";
+import { randomUUID } from "node:crypto";
+import { existsSync, promises as fs, readFileSync } from "node:fs";
+import net from "node:net";
+import path from "node:path";
+import { pathToFileURL } from "node:url";
+import { assert, expect, expectRaw, lowerIngress, dnsName as makeDnsDecl, isWildcard } from "./index.js";
+import type { DnsTarget, LoweredIngress } from "./index.js";
+import { openBrowser } from "./browser.js";
+import { openTerminal } from "./terminal.js";
+import {
+  recordEnv,
+  recordExec,
+  recordFake,
+  recordHttp,
+  recordTerminal,
+  recordWait,
+  reserveEvent,
+  recorderEventCount,
+  recorderMarkChildren,
+  recorderTruncate,
+  startRecording,
+  stopRecording,
+  truncateUtf8,
+  type TestEvent,
+} from "./recorder.js";
+import { wrap, wrapResponse } from "./inspect.js";
+import type { SpectestFetch, Wrapped, WrappedResponse } from "./inspect.js";
+import { clearRecordSecrets, setRecordSecrets } from "./record-secrets.js";
+import type {
+  Browser,
+  BrowserOptions,
+  BrowserSessionRecorder,
+  BrowserSessionStep,
+} from "./browser.js";
+import type {
+  EnvironmentConfig,
+  ExecResult,
+  FakeContext,
+  FakeDefinition,
+  FileMount,
+  Project,
+  ProjectSetupContext,
+  ReadyCheck,
+  RuntimeServiceHandle,
+  RuntimeServiceSpec,
+  ServiceConfig,
+  ServiceDefinition,
+  ServiceHandles,
+  ServiceImage,
+  Terminal,
+  TerminalOpts,
+  TerminalResult,
+  TestCase,
+  TestContext,
+  TestSuite,
+  VolumeMount,
+} from "./index.js";
+import type { InternalTerminal, TerminalFrameSink } from "./terminal.js";
+/** A `ServiceDefinition` with its map key threaded back in. Used by the
+ * docker-orchestration helpers below that need to reference a service by
+ * name (container name, image tag, ready-probe host). Widened to
+ * accept any helpers shape so a typed `setup({ helpers })` call from
+ * a component (e.g. K3sHelpers) lines up at the call site. */
+type NamedService = ServiceDefinition<Record<string, unknown>> & { name: string };
+function namedServices(cfg: EnvironmentConfig): NamedService[] {
+  return Object.entries(cfg.services).map(([name, def]) => ({ name, ...def }));
+}
+const DEFAULT_PORT = 9876;
+const DEFAULT_TEST_TIMEOUT_MS = 60_000;
+const NETWORK_NAME = process.env.SPECTEST_NETWORK ?? "spectest-net";
+const WORKSPACE = process.env.SPECTEST_WORKSPACE ?? "/workspace";
+// Stable hostname every service container resolves to the host (the
+// `spectest-br0` gateway) — so apps that build or pull images at runtime
+// can point a builder at `spectest-host:5000` (the zot Docker Hub mirror)
+// or `spectest-host:1234` (the shared buildkitd) without hard-coding the
+// gateway IP. Injected into each container's /etc/hosts in runContainer.
+const SPECTEST_HOST_NAME = "spectest-host";
+// The host image-cache gateway, discovered once from the same
+// `registry-mirrors` entry the in-VM dockerd already uses (baked into the
+// local provider's golden /etc/docker/daemon.json). `null` when there's
+// no host cache, so nothing is injected.
+let _hostCacheGateway: string | null | undefined;
+function hostCacheGateway(): string | null {
+  if (_hostCacheGateway !== undefined) return _hostCacheGateway;
+  try {
+    const cfg = JSON.parse(
+      readFileSync("/etc/docker/daemon.json", "utf8"),
+    ) as { "registry-mirrors"?: string[] };
+    const first = cfg["registry-mirrors"]?.[0];
+    _hostCacheGateway = first ? new URL(first).hostname || null : null;
+  } catch {
+    _hostCacheGateway = null;
+  }
+  return _hostCacheGateway;
+}
+const APP_DIR = process.env.SPECTEST_APP_DIR ?? "/opt/spectest/app";
+// Root CA baked into the base snapshot at base-snapshot build time
+// (see base.rs::BASE_SETUP_SH). Bind-mounted into every service
+// container so apps can verify HTTPS to the daemon's fakes, and
+// referenced when we layer it into each image's system trust store.
+const CA_PATH = process.env.SPECTEST_CA_PATH ?? "/etc/spectest/ca.crt";
+const CA_KEY_PATH = process.env.SPECTEST_CA_KEY_PATH ?? "/etc/spectest/ca.key";
+// ────────────────────────────────────────────────────────────────────────
+// Loaded-project state
+// ────────────────────────────────────────────────────────────────────────
+interface Loaded {
+  project: Project;
+  byId: Map<string, TestCase<unknown>>;
+}
+let loaded: Loaded | null = null;
+interface CaseMeta {
+  id: string;
+  name: string;
+  dependsOn?: string;
+  timeoutMs?: number;
+}
+function casesMetadata(suite: TestSuite | undefined): CaseMeta[] {
+  if (!suite) return [];
+  return suite.tests.map((t) => ({
+    id: t.id,
+    name: t.name,
+    dependsOn: t.dependsOn?.id,
+    timeoutMs: t.timeoutMs,
+  }));
+}
+function resolveEntry(): string {
+  const explicit = process.env.SPECTEST_PROJECT_ENTRY;
+  if (explicit && existsSync(explicit)) return explicit;
+  const dir = process.env.SPECTEST_PROJECT_DIR ?? path.join(APP_DIR, "spectest");
+  for (const name of ["index.ts", "index.mts", "index.mjs", "index.js"]) {
+    const p = path.join(dir, name);
+    if (existsSync(p)) return p;
+  }
+  throw new Error(
+    `could not find project entry in ${dir} (looked for index.ts/.mts/.mjs/.js)`,
+  );
+}
+// Import the env entry (`spectest/index.ts`) and load everything that
+// defines the *environment* — services, fakes, project setup — but not the
+// test bodies (those live in `spectest/tests/**`; see `loadTests`). The
+// import URL is deliberately stable (no cache-busting query): test files
+// import this same module (`import { env } from "../index"`) and must resolve
+// to the SAME `env` instance, so their `env.test(...)` calls land in the
+// registry the default-exported Project reads back. Each daemon process
+// imports the entry at most once — an env change always takes the cold path
+// with a fresh daemon — so there's nothing to bust.
+async function loadEnv(): Promise<Project> {
+  const entry = resolveEntry();
+  const url = pathToFileURL(entry).href;
+  const mod = await import(url);
+  const candidate = (mod && typeof mod === "object" && "default" in mod ? mod.default : mod) as
+    | Project
+    | undefined;
+  if (!candidate || !candidate.environment) {
+    throw new Error(
+      `project entry ${entry} must default-export a Project (from env.project(...) where env = defineEnvironment(...))`,
+    );
+  }
+  loaded = { project: candidate, byId: new Map() };
+  rebuildCatalogue();
+  // Any cached convenience clients belong to the previous project; drop
+  // them so the next test rebuilds against the freshly loaded definitions.
+  HELPERS_CACHE.clear();
+  // Register fakes + service-tls proxies here; startIngress() during
+  // /bootstrap actually binds the listeners — this just parses + validates
+  // and tears down any prior runtime so a reload picks up edits.
+  buildIngress(candidate);
+  return candidate;
+}
+// Import the test files under `spectest/tests/**` into the already-loaded
+// env, then refresh the catalogue. The split layout keeps test bodies out of
+// the warm-template cache key, so a test-only edit restores the cached env
+// and lands here to pick up the new tests. A no-op for the legacy single-file
+// layout (no `tests/` dir; the suite is already on the default export).
+//
+// Crucially this is only ever called AFTER the warm-template snapshot is
+// captured (cold path) or against a freshly restored VM (warm path), so each
+// test file is imported for the first time in that daemon process — no
+// cache-busting needed and no stale ESM module to fight.
+async function loadTests(): Promise<void> {
+  requireLoaded();
+  const dir = path.join(path.dirname(resolveEntry()), "tests");
+  if (existsSync(dir)) {
+    for (const file of await collectTestFiles(dir)) {
+      await import(pathToFileURL(file).href);
+    }
+  }
+  rebuildCatalogue();
+  // The test set may have changed; drop cached helpers so the next test
+  // rebuilds cleanly.
+  HELPERS_CACHE.clear();
+}
+// (Re)build the id→TestCase index from whatever the loaded project currently
+// exposes as its suite: the lazy registry getter for split layouts, or the
+// frozen explicit suite for inline ones.
+function rebuildCatalogue(): void {
+  const l = requireLoaded();
+  const byId = new Map<string, TestCase<unknown>>();
+  if (l.project.tests) {
+    for (const t of l.project.tests.tests) byId.set(t.id, t);
+  }
+  l.byId = byId;
+}
+// Recursively collect importable test modules under `spectest/tests/`, sorted
+// for deterministic import order. Skips declaration files and dependency dirs.
+async function collectTestFiles(dir: string): Promise<string[]> {
+  const out: string[] = [];
+  const walk = async (d: string): Promise<void> => {
+    const entries = await fs.readdir(d, { withFileTypes: true });
+    for (const e of entries) {
+      const full = path.join(d, e.name);
+      if (e.isDirectory()) {
+        if (e.name === "node_modules" || e.name === ".spectest") continue;
+        await walk(full);
+      } else if (
+        e.isFile() &&
+        /\.(ts|mts|cts|js|mjs|cjs)$/.test(e.name) &&
+        !e.name.endsWith(".d.ts")
+      ) {
+        out.push(full);
+      }
+    }
+  };
+  await walk(dir);
+  out.sort();
+  return out;
+}
+function requireLoaded(): Loaded {
+  if (!loaded) {
+    throw new Error("no project loaded; call POST /load first");
+  }
+  return loaded;
+}
+// ────────────────────────────────────────────────────────────────────────
+// Docker CLI helpers
+// ────────────────────────────────────────────────────────────────────────
+interface CmdResult {
+  stdout: string;
+  stderr: string;
+  code: number;
+}
+function shx(
+  file: string,
+  args: string[],
+  timeoutMs?: number,
+  env?: Record<string, string>,
+): Promise<CmdResult> {
+  return new Promise((resolve) => {
+    let done = false;
+    const child = execFile(
+      file,
+      args,
+      { maxBuffer: 64 * 1024 * 1024, env: env ? { ...process.env, ...env } : process.env },
+      (err, stdout, stderr) => {
+        if (done) return;
+        done = true;
+        const code =
+          err && typeof (err as NodeJS.ErrnoException & { code?: number }).code === "number"
+            ? Number((err as NodeJS.ErrnoException & { code?: number }).code)
+            : err
+              ? 1
+              : 0;
+        resolve({ stdout: String(stdout), stderr: String(stderr), code });
+      },
+    );
+    if (timeoutMs && timeoutMs > 0) {
+      setTimeout(() => {
+        if (!done) {
+          done = true;
+          try {
+            child.kill("SIGKILL");
+          } catch {
+            /* already exited */
+          }
+          resolve({ stdout: "", stderr: `timeout after ${timeoutMs}ms`, code: 124 });
+        }
+      }, timeoutMs);
+    }
+  });
+}
+function docker(
+  args: string[],
+  timeoutMs?: number,
+  env?: Record<string, string>,
+): Promise<CmdResult> {
+  return shx("docker", args, timeoutMs, env);
+}
+/**
+ * Like `shx` but invokes `onLine` for each line of combined stdout/stderr
+ * as it streams in, so callers can surface live progress (docker build
+ * steps, image pull layers) into bootstrap progress. Still resolves with
+ * the full captured output + exit code, so existing error handling and
+ * post-hoc parsing (`summarizeBuildKit`) are unchanged.
+ */
+function shxStream(
+  file: string,
+  args: string[],
+  timeoutMs: number | undefined,
+  env: Record<string, string> | undefined,
+  onLine: (line: string) => void,
+): Promise<CmdResult> {
+  return new Promise((resolve) => {
+    const child = spawn(file, args, {
+      env: env ? { ...process.env, ...env } : process.env,
+    });
+    let stdout = "";
+    let stderr = "";
+    let buf = "";
+    let done = false;
+    const feed = (chunk: string) => {
+      buf += chunk;
+      let nl: number;
+      while ((nl = buf.indexOf("\n")) >= 0) {
+        const line = buf.slice(0, nl).replace(/\r$/, "");
+        buf = buf.slice(nl + 1);
+        try {
+          onLine(line);
+        } catch {
+          /* a progress callback must never break the build */
+        }
+      }
+    };
+    child.stdout?.on("data", (d) => {
+      const s = String(d);
+      stdout += s;
+      feed(s);
+    });
+    child.stderr?.on("data", (d) => {
+      const s = String(d);
+      stderr += s;
+      feed(s);
+    });
+    const finish = (code: number, extraStderr?: string) => {
+      if (done) return;
+      done = true;
+      resolve({ stdout, stderr: extraStderr ? stderr + extraStderr : stderr, code });
+    };
+    child.on("error", () => finish(1));
+    child.on("close", (code) => finish(code == null ? 1 : code));
+    if (timeoutMs && timeoutMs > 0) {
+      setTimeout(() => {
+        if (done) return;
+        try {
+          child.kill("SIGKILL");
+        } catch {
+          /* already exited */
+        }
+        finish(124, `\ntimeout after ${timeoutMs}ms`);
+      }, timeoutMs);
+    }
+  });
+}
+// ── Live bootstrap progress ──────────────────────────────────────────────
+// During /bootstrap the control plane polls GET /progress (~every 1.5s) and
+// streams this snapshot into the test-run row, so the web UI shows what's
+// being built *before* the suite starts. In-memory only; reset at the top
+// of each bootstrap(). All updaters no-op when no bootstrap is in flight.
+type ServiceProgressStatus =
+  | "pending"
+  | "pulling"
+  | "building"
+  | "prepared"
+  | "starting"
+  | "probing"
+  | "ready"
+  | "failed";
+interface ServiceProgress {
+  name: string;
+  kind: "pull" | "build";
+  status: ServiceProgressStatus;
+  /** Live free-text detail, e.g. "step 4/9 RUN bun install" or "12 layers". */
+  detail?: string;
+}
+interface BootstrapProgress {
+  phase: string;
+  services: ServiceProgress[];
+  startedAt: number;
+  updatedAt: number;
+  done: boolean;
+}
+let BOOTSTRAP_PROGRESS: BootstrapProgress | null = null;
+function progressInit(services: NamedService[]): void {
+  BOOTSTRAP_PROGRESS = {
+    phase: "Preparing images",
+    services: services.map((s) => ({
+      name: s.name,
+      kind: s.image.type === "registry" ? "pull" : "build",
+      status: "pending",
+    })),
+    startedAt: Date.now(),
+    updatedAt: Date.now(),
+    done: false,
+  };
+}
+function progressPhase(phase: string): void {
+  if (!BOOTSTRAP_PROGRESS) return;
+  BOOTSTRAP_PROGRESS.phase = phase;
+  BOOTSTRAP_PROGRESS.updatedAt = Date.now();
+}
+function progressService(name: string, patch: Partial<ServiceProgress>): void {
+  if (!BOOTSTRAP_PROGRESS) return;
+  const svc = BOOTSTRAP_PROGRESS.services.find((s) => s.name === name);
+  if (!svc) return;
+  Object.assign(svc, patch);
+  BOOTSTRAP_PROGRESS.updatedAt = Date.now();
+}
+function progressDone(): void {
+  if (!BOOTSTRAP_PROGRESS) return;
+  BOOTSTRAP_PROGRESS.phase = "Ready";
+  BOOTSTRAP_PROGRESS.done = true;
+  BOOTSTRAP_PROGRESS.updatedAt = Date.now();
+}
+// BuildKit (docker buildx) gives per-step timing via `--progress=plain`,
+// parallel stages, and `RUN --mount=type=cache`. Detected once: where the
+// buildx plugin isn't installed (e.g. a Freestyle base without it) we fall
+// back to the legacy builder, which takes no `--progress` flag.
+let _buildxAvailable: boolean | undefined;
+async function hasBuildx(): Promise<boolean> {
+  if (_buildxAvailable === undefined) {
+    const r = await docker(["buildx", "version"], 15_000);
+    _buildxAvailable = r.code === 0;
+  }
+  return _buildxAvailable;
+}
+// A single buildkitd runs on the host (see scripts/install-buildkitd.sh),
+// reachable from every VM at the bridge gateway. Building against it as a
+// `remote` buildx builder gives a persistent, shared layer/mount cache that
+// survives forks and warm-template misses — a fresh VM no longer rebuilds
+// from scratch. The build runs on the host (runc-isolated); `--load` pulls
+// the finished image back into the in-VM dockerd. Detected once; if the
+// builder can't be created or buildkitd is unreachable we fall back to the
+// in-VM builder, so a missing/dead buildkitd just means slower builds.
+const REMOTE_BUILDER_ADDR = process.env.SPECTEST_BUILDKIT_ADDR ?? "tcp://10.42.0.1:1234";
+const REMOTE_BUILDER_NAME = "spectest-remote";
+let _remoteBuilder: boolean | undefined;
+async function ensureRemoteBuilder(): Promise<boolean> {
+  if (_remoteBuilder !== undefined) return _remoteBuilder;
+  if (!(await hasBuildx())) {
+    _remoteBuilder = false;
+    return false;
+  }
+  // Idempotent: a repeat create with the same name errors ("existing
+  // instance"), which we treat as already-present.
+  const create = await docker(
+    ["buildx", "create", "--name", REMOTE_BUILDER_NAME, "--driver", "remote", REMOTE_BUILDER_ADDR],
+    30_000,
+  );
+  if (create.code !== 0 && !/existing instance|already exists/i.test(create.stderr)) {
+    _remoteBuilder = false;
+    return false;
+  }
+  // `inspect --bootstrap` actually dials buildkitd, so it's our reachability
+  // probe. If buildkitd is down this fails and we fall back.
+  const boot = await docker(["buildx", "inspect", "--bootstrap", REMOTE_BUILDER_NAME], 60_000);
+  _remoteBuilder = boot.code === 0;
+  if (!_remoteBuilder) {
+    // eslint-disable-next-line no-console
+    console.warn(
+      `[build] remote buildkitd at ${REMOTE_BUILDER_ADDR} unreachable; using in-VM builder:\n${boot.stderr.trim()}`,
+    );
+  }
+  return _remoteBuilder;
+}
+interface BuildStep {
+  name: string;
+  secs: number;
+  cached: boolean;
+}
+// Parse `docker build --progress=plain` (BuildKit) output into per-step
+// timings, sorted slowest-first. Steps are correlated by their `#N` id:
+// the declaration line carries the command, the `DONE`/`CACHED` line the
+// duration. Best-effort — unparseable output yields an empty list.
+function summarizeBuildKit(out: string): BuildStep[] {
+  const names = new Map<string, string>();
+  const secs = new Map<string, number>();
+  const cached = new Set<string>();
+  for (const line of out.split("\n")) {
+    let m = line.match(/^#(\d+)\s+\[[^\]]*\]\s+(.+)$/);
+    if (m) {
+      const id = `#${m[1]}`;
+      if (!names.has(id)) names.set(id, m[2].trim().slice(0, 80));
+      continue;
+    }
+    m = line.match(/^#(\d+)\s+DONE\s+([\d.]+)s/);
+    if (m) {
+      secs.set(`#${m[1]}`, parseFloat(m[2]));
+      continue;
+    }
+    m = line.match(/^#(\d+)\s+CACHED/);
+    if (m) {
+      const id = `#${m[1]}`;
+      cached.add(id);
+      if (!secs.has(id)) secs.set(id, 0);
+    }
+  }
+  const steps: BuildStep[] = [];
+  for (const [id, name] of names) {
+    steps.push({ name, secs: secs.get(id) ?? 0, cached: cached.has(id) });
+  }
+  return steps.sort((a, b) => b.secs - a.secs);
+}
+// ────────────────────────────────────────────────────────────────────────
+// Bootstrap stages
+// ────────────────────────────────────────────────────────────────────────
+async function ensureNetwork(): Promise<void> {
+  const inspect = await docker(["network", "inspect", NETWORK_NAME], 30_000);
+  if (inspect.code === 0) return;
+  const create = await docker(["network", "create", NETWORK_NAME], 60_000);
+  if (create.code !== 0) {
+    throw new Error(
+      `docker network create ${NETWORK_NAME} failed: ${create.stderr.trim() || create.stdout.trim()}`,
+    );
+  }
+}
+function sanitizeSegment(p: string): string {
+  return p
+    .replace(/^\/+/, "")
+    .replace(/[^A-Za-z0-9_-]/g, "-")
+    .replace(/^-+|-+$/g, "");
+}
+function resolveHostPath(service: string, vol: VolumeMount): string {
+  if (vol.source && vol.source.startsWith("/")) return vol.source;
+  // Cache volumes root OUTSIDE /workspace so the delta-restore teardown
+  // (rm -rf /workspace) keeps them — they hold only content-addressed
+  // accelerator data (see VolumeMount.cache), never env state.
+  const root = vol.cache
+    ? ["/var/cache/spectest/volumes", service]
+    : [WORKSPACE, ".spectest", "volumes", service];
+  if (vol.source) {
+    return path.join(...root, vol.source.replace(/^\/+/, ""));
+  }
+  return path.join(...root, sanitizeSegment(vol.target));
+}
+/// Where the daemon records every ABSOLUTE-source, non-cache volume dir it
+/// has created, one path per line. The delta-restore teardown wipes the
+/// listed dirs: they live outside /workspace (which the teardown removes
+/// wholesale) and outside /var/cache/spectest (deliberately kept), so
+/// without this manifest a `source: "/data/pg"` volume would carry the
+/// previous generation's data into a "fresh" environment. tmpfs-backed
+/// (/run) — survives snapshots like all guest memory, dies with the VM.
+const VOLUME_DIRS_MANIFEST = "/run/spectest-volume-dirs";
+const ABS_VOLUME_DIRS = new Set<string>();
+async function recordAbsoluteVolumeDir(host: string): Promise<void> {
+  if (ABS_VOLUME_DIRS.has(host)) return;
+  ABS_VOLUME_DIRS.add(host);
+  await fs.writeFile(VOLUME_DIRS_MANIFEST, [...ABS_VOLUME_DIRS].join("\n") + "\n");
+}
+async function ensureVolumes(svc: NamedService): Promise<string[]> {
+  const flags: string[] = [];
+  if (!svc.volumes || svc.volumes.length === 0) return flags;
+  for (const vol of svc.volumes) {
+    const host = resolveHostPath(svc.name, vol);
+    await fs.mkdir(host, { recursive: true });
+    if (vol.source?.startsWith("/") && !host.startsWith("/var/cache/spectest/")) {
+      await recordAbsoluteVolumeDir(host);
+    }
+    flags.push(`--volume=${host}:${vol.target}${vol.readOnly ? ":ro" : ""}`);
+  }
+  return flags;
+}
+// Materialize `svc.files` onto the VM host and return `--volume` flags
+// bind-mounting each into the container (read-only). Single-file bind
+// mounts mean the seeded config lands in place *before the container's
+// entrypoint runs* — the one injection point earlier than any setup
+// hook. Staging path mirrors ensureVolumes: a per-service dir derived
+// from the in-container path, so two files never collide and the
+// content is captured by snapshots like everything else under WORKSPACE.
+async function ensureFiles(svc: NamedService): Promise<string[]> {
+  const flags: string[] = [];
+  if (!svc.files || svc.files.length === 0) return flags;
+  const dir = path.join(WORKSPACE, ".spectest", "files", svc.name);
+  await fs.mkdir(dir, { recursive: true });
+  for (const f of svc.files) {
+    if (!f.path.startsWith("/")) {
+      throw new Error(
+        `service "${svc.name}": file path ${JSON.stringify(f.path)} must be absolute`,
+      );
+    }
+    // `{{SPECTEST_SERVICE}}` expands to this service's name (its
+    // services-map key) so a component can author self-referential
+    // config without knowing the key the user will choose — e.g. k3s's
+    // registries.yaml keying on `<key>.internal:5000`.
+    const content = f.content.replaceAll("{{SPECTEST_SERVICE}}", svc.name);
+    const host = path.join(dir, sanitizeSegment(f.path));
+    await fs.writeFile(host, content);
+    if (f.mode) await fs.chmod(host, parseInt(f.mode, 8));
+    flags.push(`--volume=${host}:${f.path}:ro`);
+  }
+  return flags;
+}
+function imageTag(name: string): string {
+  return `spectest/${name}:latest`;
+}
+const DEFAULT_DOCKERIGNORE: string[] = [
+  ".git",
+  ".spectest",
+  "spectest",
+  "node_modules",
+  "target",
+  "__pycache__",
+  ".venv",
+  ".env",
+  ".env.local",
+  ".env.*",
+  "dist",
+  "build",
+  ".next",
+  ".turbo",
+  ".DS_Store",
+];
+function unionDockerignore(services: NamedService[]): string {
+  const seen = new Set<string>(DEFAULT_DOCKERIGNORE);
+  const extras: string[] = [];
+  for (const s of services) {
+    if (s.image.type === "dockerfile" && s.image.exclude) {
+      for (const e of s.image.exclude) {
+        if (!seen.has(e)) {
+          seen.add(e);
+          extras.push(e);
+        }
+      }
+    }
+  }
+  return [...DEFAULT_DOCKERIGNORE, ...extras].join("\n") + "\n";
+}
+/// In-flight/finished dockerfile builds of this bootstrap, keyed by
+/// sha256(dockerfile content + exclude list). Services that share an
+/// identical image definition (e.g. an API server and a worker running the
+/// same codebase with different entrypoints) build ONCE; the others wait
+/// and `docker tag` the result. Cleared at every bootstrap() — the build
+/// CONTEXT (/workspace) is an input too, so dedup is only valid within one
+/// workspace generation (runtime services started mid-test share it).
+const BUILD_DEDUP = new Map<
+  string,
+  { name: string; promise: Promise<{ tag: string; buildSteps?: BuildStep[] }> }
+>();
+function buildContentKey(image: { content: string; exclude?: string[] }): string {
+  return new Bun.CryptoHasher("sha256")
+    .update(image.content)
+    .update("\0")
+    .update(JSON.stringify(image.exclude ?? []))
+    .digest("hex");
+}
+async function prepareServiceImage(
+  svc: NamedService,
+  opts?: { dedup?: boolean },
+): Promise<{ tag: string; buildSteps?: BuildStep[] }> {
+  const tag = imageTag(svc.name);
+  if (svc.image.type === "registry") {
+    const ref = svc.image.reference;
+    // Always pull — even when the (delta-restored) store already has the
+    // ref. With the layers present this costs ~a manifest round-trip per
+    // image ("Already exists" all the way down, through the zot mirror),
+    // off the bootstrap critical path; skipping it would freeze floating
+    // tags (`foo:latest`) at whatever the previous generation pulled, for
+    // as long as the delta chain lives — a silent semantic divergence
+    // from the cold build a delta restore must be equivalent to.
+    progressService(svc.name, { status: "pulling", detail: `pulling ${ref}` });
+    let layers = 0;
+    const pull = await shxStream("docker", ["pull", ref], 900_000, undefined, (line) => {
+      // `docker pull` (no TTY) prints one line per layer: "<id>: Pull
+      // complete" / "Already exists". Count them for a live layer tally.
+      if (/(?:Pull complete|Already exists)\s*$/.test(line)) {
+        layers++;
+        progressService(svc.name, { status: "pulling", detail: `${layers} layers` });
+      }
+    });
+    if (pull.code !== 0) {
+      progressService(svc.name, { status: "failed" });
+      throw new Error(`docker pull ${ref} failed: ${pull.stderr.trim() || pull.stdout.trim()}`);
+    }
+    const tagr = await docker(["tag", ref, tag], 30_000);
+    if (tagr.code !== 0) {
+      throw new Error(`docker tag ${ref} ${tag} failed: ${tagr.stderr.trim()}`);
+    }
+    await ensureCaTrustedImage(svc.name, tag);
+    return { tag };
+  }
+  // Dockerfile build. Within one bootstrap, identical definitions (shared
+  // codebase images) dedup to a single build. Only bootstrap opts in: the
+  // dedup key is dockerfile content + exclude, but the build CONTEXT
+  // (/workspace) is an input too — a runtime service started mid-test
+  // after setup/test code mutated /workspace must rebuild, not share a
+  // pre-mutation image.
+  const image = svc.image;
+  if (opts?.dedup) {
+    const key = buildContentKey(image);
+    const inflight = BUILD_DEDUP.get(key);
+    if (inflight) {
+      progressService(svc.name, { status: "building", detail: `sharing ${inflight.name}'s build` });
+      let first;
+      try {
+        first = await inflight.promise;
+      } catch (err) {
+        progressService(svc.name, { status: "failed" });
+        throw err;
+      }
+      // first.tag is already CA-layered; tagging it covers this service too.
+      const tagr = await docker(["tag", first.tag, tag], 30_000);
+      if (tagr.code !== 0) {
+        throw new Error(`docker tag ${first.tag} ${tag} failed: ${tagr.stderr.trim()}`);
+      }
+      progressService(svc.name, { status: "prepared" });
+      return { tag, buildSteps: first.buildSteps };
+    }
+    const promise = buildServiceImage(svc.name, image, tag);
+    BUILD_DEDUP.set(key, { name: svc.name, promise });
+    try {
+      return await promise;
+    } catch (err) {
+      // Let a sharer arriving later rebuild rather than inherit this
+      // build's failure forever.
+      BUILD_DEDUP.delete(key);
+      throw err;
+    }
+  }
+  return buildServiceImage(svc.name, image, tag);
+}
+async function buildServiceImage(
+  name: string,
+  image: { content: string; exclude?: string[] },
+  tag: string,
+): Promise<{ tag: string; buildSteps?: BuildStep[] }> {
+  let buildSteps: BuildStep[] | undefined;
+  {
+    const dfDir = path.join(WORKSPACE, ".spectest", "services", name);
+    await fs.mkdir(dfDir, { recursive: true });
+    const dfPath = path.join(dfDir, "Dockerfile");
+    await fs.writeFile(dfPath, image.content);
+    const useRemote = await ensureRemoteBuilder();
+    // Both the remote builder and a local buildx are BuildKit, so both emit
+    // per-step timing on stderr under `--progress=plain` (parsed below). Only
+    // the legacy in-VM builder takes no progress flag.
+    const useBuildKit = useRemote || (await hasBuildx());
+    const buildEnv: Record<string, string> = {};
+    let buildArgs: string[];
+    if (useRemote) {
+      // Build on the host-side shared buildkitd (persistent cross-VM cache);
+      // `--load` brings the finished image back into the in-VM dockerd so
+      // runContainer can `docker run` it. The build context (WORKSPACE, minus
+      // .dockerignore) streams to buildkitd over the bridge.
+      buildArgs = [
+        "buildx", "build",
+        "--builder", REMOTE_BUILDER_NAME,
+        "--load",
+        "--progress=plain",
+        "-t", tag, "-f", dfPath, WORKSPACE,
+      ];
+    } else if (useBuildKit) {
+      buildArgs = ["build", "-t", tag, "-f", dfPath, "--progress=plain", WORKSPACE];
+      buildEnv.DOCKER_BUILDKIT = "1";
+    } else {
+      buildArgs = ["build", "-t", tag, "-f", dfPath, WORKSPACE];
+    }
+    progressService(name, { status: "building", detail: "starting build" });
+    const build = await shxStream("docker", buildArgs, 1_800_000, buildEnv, (line) => {
+      // BuildKit `--progress=plain` declares each step as
+      // `#N [<stage> M/N] <cmd>`; the legacy builder as `Step M/N : <cmd>`.
+      // Track the most-recent step as live detail.
+      let m = line.match(/^#\d+\s+\[([^\]]*)\]\s+(.+)$/);
+      if (m) {
+        const step = m[1].match(/\d+\/\d+/)?.[0];
+        const cmd = m[2].trim().slice(0, 60);
+        progressService(name, {
+          status: "building",
+          detail: step ? `step ${step} ${cmd}` : cmd,
+        });
+        return;
+      }
+      m = line.match(/^Step (\d+\/\d+)\s*:\s*(.+)$/);
+      if (m) {
+        progressService(name, {
+          status: "building",
+          detail: `step ${m[1]} ${m[2].trim().slice(0, 60)}`,
+        });
+      }
+    });
+    if (build.code !== 0) {
+      progressService(name, { status: "failed" });
+      throw new Error(
+        `docker build for ${name} failed:\n${build.stderr.trim()}\n${build.stdout.trim()}`,
+      );
+    }
+    if (useBuildKit) {
+      // Keep only the slowest dozen steps ≥1s — enough to profile, small
+      // enough to ride back in the /bootstrap response and the journal.
+      buildSteps = summarizeBuildKit(build.stderr)
+        .filter((s) => s.secs >= 1)
+        .slice(0, 12);
+    }
+  }
+  // Layer the spectest CA into the image's system trust store so apps
+  // that read the system bundle (Go, Java, CLIs that don't honour the
+  // SSL_CERT_FILE env vars) accept HTTPS to fakes. Best-effort: images
+  // without `update-ca-certificates` / `update-ca-trust` (distroless,
+  // scratch) fall through to the env-var path that `runContainer` sets.
+  await ensureCaTrustedImage(name, tag);
+  return { tag, buildSteps };
+}
+/**
+ * Build a derivative image on top of `tag` that copies the spectest
+ * root CA into the system trust store. Tagged back as `tag`, so the
+ * rest of the orchestrator (runContainer, image cache) is oblivious.
+ * Failures are warned-and-ignored: the env-var injection in
+ * runContainer is the universal fallback, so apps that use it (most
+ * Node/Python/Ruby/AWS clients) still trust the CA even when the
+ * image's trust store can't be updated.
+ */
+async function ensureCaTrustedImage(serviceName: string, tag: string): Promise<void> {
+  if (!existsSync(CA_PATH)) {
+    // Daemon running outside a base-snapshot VM (dev/test). Nothing to
+    // layer; env vars also harmless (they point at a missing path, but
+    // most consumers ignore missing files).
+    return;
+  }
+  const ctxDir = path.join(WORKSPACE, ".spectest", "ca-trust", serviceName);
+  await fs.mkdir(ctxDir, { recursive: true });
+  await fs.copyFile(CA_PATH, path.join(ctxDir, "spectest-ca.crt"));
+  const dockerfile = `FROM ${tag}
+COPY spectest-ca.crt /usr/local/share/ca-certificates/spectest-ca.crt
+RUN if command -v update-ca-certificates >/dev/null 2>&1; then \\
+      update-ca-certificates; \\
+    elif command -v update-ca-trust >/dev/null 2>&1; then \\
+      cp /usr/local/share/ca-certificates/spectest-ca.crt /etc/pki/ca-trust/source/anchors/spectest-ca.crt && update-ca-trust extract; \\
+    else \\
+      echo "[spectest] no system CA trust tool in image; env-var trust only"; \\
+    fi
+`;
+  await fs.writeFile(path.join(ctxDir, "Dockerfile"), dockerfile);
+  const build = await docker(["build", "-t", tag, ctxDir], 300_000);
+  if (build.code !== 0) {
+    // eslint-disable-next-line no-console
+    console.warn(
+      `[ca-trust] could not layer spectest CA into ${serviceName} (${tag}); env-var fallback only:\n${build.stderr.trim() || build.stdout.trim()}`,
+    );
+  }
+}
+async function runContainer(
+  svc: NamedService,
+  tag: string,
+  volumeFlags: string[],
+  // Extra `--network-alias`es beyond the lowered `aliasesByService`. Used by
+  // runtime `startService` (whose service isn't in LOWERED) to give the new
+  // container docker-native multi-label resolution for its `hostnames`.
+  extraAliases: string[] = [],
+): Promise<void> {
+  // Idempotent: clean up any leftover container with the same name.
+  await docker(["rm", "-f", svc.name], 30_000);
+  const args = [
+    "run",
+    "-d",
+    "--restart=no",
+    `--name=${svc.name}`,
+    `--hostname=${svc.name}`,
+    `--network=${NETWORK_NAME}`,
+    // Every service is reachable at `<name>.internal` as well as its
+    // bare `<name>`. The fully-qualified form is what kubeconfigs and
+    // other tooling that expect a multi-label hostname should use; it's
+    // resolved both inside containers (Docker's embedded DNS) and on
+    // the VM host (spectest-resolver scans aliases).
+    `--network-alias=${svc.name}.internal`,
+  ];
+  // Extra peer aliases for this service — lowered from `hostnames` and any
+  // dnsName(h, { service }) into LOWERED.aliasesByService, plus any passed
+  // explicitly by a runtime startService (not present in LOWERED).
+  for (const h of [...(LOWERED.aliasesByService[svc.name] ?? []), ...extraAliases]) {
+    args.push(`--network-alias=${h}`);
+  }
+  // Bound TCP give-up time inside THIS container's network namespace.
+  // net.ipv4.tcp_retries2 is per-netns and a fresh netns resets to the kernel
+  // default (15 ≈ ~15 min of RTO backoff), so lowering it on the guest's init
+  // netns (BASE_SETUP_SH) does NOT reach containers — and the connections that
+  // actually wedge run here: buildkit/buildctl pulling base images + exporting
+  // cache, and the k3s container's containerd pulling images, all to the host
+  // zot over the VM↔host path. On a lost-retransmit (transient loss under
+  // concurrent forks) such a flow otherwise stalls a build/pull for minutes.
+  // Setting it per container resets a genuinely-stuck flow in ~tens of seconds
+  // so the client retries on a fresh connection; live connections keep getting
+  // ACKs and are unaffected. Safe because every service runs on the
+  // spectest-net bridge (own netns), never --network=host where net.* is denied.
+  args.push("--sysctl", "net.ipv4.tcp_retries2=6");
+  // Wire every ingress hostname (fakes, TLS-terminated proxies, and any
+  // dnsName(h, { ingress: true })) into the container's /etc/hosts so
+  // `fetch("http://api.stripe.com")` or `fetch("https://app.test")` from
+  // app code reaches the daemon's ingress listener via the bridge gateway.
+  // /etc/hosts beats Docker's embedded DNS (127.0.0.11), so we don't need
+  // to touch the container's resolver settings.
+  if (cachedGatewayIp) {
+    for (const h of LOWERED.ingressHosts) {
+      args.push(`--add-host=${h}:${cachedGatewayIp}`);
+    }
+  }
+  // Resolve `spectest-host` to the host image-cache gateway so apps can
+  // address the zot mirrors / shared buildkitd by name (see
+  // SPECTEST_HOST_NAME). Skipped where there's no host cache.
+  const hostGw = hostCacheGateway();
+  if (hostGw) args.push(`--add-host=${SPECTEST_HOST_NAME}:${hostGw}`);
+  if (svc.workdir) args.push(`--workdir=${svc.workdir}`);
+  // Trust the spectest root CA from inside the container. Bind-mount
+  // the cert + set the conventional env vars so language runtimes
+  // (Node, Python requests/httpx, AWS SDKs) pick it up without
+  // touching the image's system trust store. The per-image
+  // ensureCaTrustedImage layer also installs it into the system
+  // trust store; this env-var path is the belt-and-braces fallback
+  // for images where the layer step couldn't run (no
+  // update-ca-certificates).
+  args.push(`--volume=${CA_PATH}:${CA_PATH}:ro`);
+  args.push("-e", `NODE_EXTRA_CA_CERTS=${CA_PATH}`);
+  args.push("-e", `SSL_CERT_FILE=${CA_PATH}`);
+  args.push("-e", `REQUESTS_CA_BUNDLE=${CA_PATH}`);
+  args.push("-e", `AWS_CA_BUNDLE=${CA_PATH}`);
+  if (svc.env) {
+    for (const [k, v] of Object.entries(svc.env)) {
+      args.push("-e", `${k}=${v}`);
+    }
+  }
+  for (const flag of volumeFlags) args.push(flag);
+  if (svc.privileged) args.push("--privileged");
+  for (const p of svc.tmpfs ?? []) args.push(`--tmpfs=${p}`);
+  if (svc.cgroupns) args.push(`--cgroupns=${svc.cgroupns}`);
+  // `command` runs via sh -c, replacing the image entrypoint; `args` is a
+  // plain CMD override (`docker run <image> <args…>`) that keeps the
+  // entrypoint — what init-wrapped images (postgres) need for extra flags.
+  if (svc.command && svc.args?.length) {
+    throw new Error(
+      `service ${svc.name}: \`command\` and \`args\` are mutually exclusive ` +
+        `(command replaces the entrypoint with /bin/sh -c; args keeps it)`,
+    );
+  }
+  if (svc.command) args.push("--entrypoint=/bin/sh");
+  args.push(tag);
+  if (svc.command) args.push("-c", svc.command);
+  else if (svc.args?.length) args.push(...svc.args);
+  const r = await docker(args, 300_000);
+  if (r.code !== 0) {
+    throw new Error(
+      `docker run ${svc.name} failed: ${r.stderr.trim() || r.stdout.trim()}`,
+    );
+  }
+}
+async function probeTcp(host: string, port: number): Promise<boolean> {
+  return new Promise((resolve) => {
+    const sock = net.createConnection({ host, port });
+    let settled = false;
+    const finish = (v: boolean) => {
+      if (settled) return;
+      settled = true;
+      try {
+        sock.destroy();
+      } catch {
+        /* ignore */
+      }
+      resolve(v);
+    };
+    sock.setTimeout(2000);
+    sock.once("connect", () => finish(true));
+    sock.once("error", () => finish(false));
+    sock.once("timeout", () => finish(false));
+  });
+}
+async function probeHttp(host: string, port: number, urlPath: string): Promise<boolean> {
+  const ctrl = new AbortController();
+  const to = setTimeout(() => ctrl.abort(), 5000);
+  try {
+    const res = await fetch(`http://${host}:${port}${urlPath}`, { signal: ctrl.signal });
+    return res.ok;
+  } catch {
+    return false;
+  } finally {
+    clearTimeout(to);
+  }
+}
+async function probeExec(name: string, command: string): Promise<boolean> {
+  const r = await docker(["exec", name, "sh", "-c", command], 10_000);
+  return r.code === 0;
+}
+async function waitForReady(svc: NamedService): Promise<void> {
+  const check: ReadyCheck | undefined = svc.readyCheck;
+  if (!check) return;
+  const timeoutSecs = check.timeoutSecs ?? 60;
+  const deadline = Date.now() + timeoutSecs * 1000;
+  // Ramped poll: a flat 500ms quantized every service's ready latency
+  // (and compounds down dependsOn chains). Fast early probes catch
+  // quick services; the ramp caps the polling load on slow ones. Exec
+  // probes keep a higher floor — each attempt spawns a docker exec.
+  const ramp =
+    check.type === "exec" ? [250, 250, 400, 400, 500] : [50, 100, 150, 250, 400, 500];
+  let attempt = 0;
+  while (Date.now() < deadline) {
+    let ok = false;
+    if (check.type === "tcp") {
+      ok = await probeTcp(svc.name, check.port);
+    } else if (check.type === "http") {
+      ok = await probeHttp(svc.name, check.port, check.path ?? "/");
+    } else {
+      ok = await probeExec(svc.name, check.command);
+    }
+    if (ok) return;
+    const delay = ramp[Math.min(attempt, ramp.length - 1)]!;
+    attempt++;
+    await new Promise((r) => setTimeout(r, delay));
+  }
+  const logs = await docker(["logs", "--tail=200", svc.name], 30_000);
+  throw new Error(
+    `service ${svc.name} not ready within ${timeoutSecs}s. Recent container logs:\n${logs.stdout}\n${logs.stderr}`,
+  );
+}
+/**
+ * Validate the `dependsOn` graph and return the name→service map used to
+ * walk it. Rejects unknown dependencies and cycles (the same two errors
+ * the old level scheduler raised) so the DAG runner can assume a clean
+ * graph.
+ */
+function validateServiceGraph(services: NamedService[]): Map<string, NamedService> {
+  const byName = new Map(services.map((s) => [s.name, s]));
+  for (const s of services) {
+    for (const d of s.dependsOn ?? []) {
+      if (!byName.has(d)) {
+        throw new Error(`service ${s.name} depends on unknown service ${d}`);
+      }
+    }
+  }
+  // Cycle detection via DFS coloring (white=unseen, gray=on stack, black=done).
+  const WHITE = 0, GRAY = 1, BLACK = 2;
+  const color = new Map<string, number>(services.map((s) => [s.name, WHITE]));
+  const visit = (name: string): void => {
+    color.set(name, GRAY);
+    for (const d of byName.get(name)!.dependsOn ?? []) {
+      const c = color.get(d);
+      if (c === GRAY) throw new Error("service dependency cycle");
+      if (c === WHITE) visit(d);
+    }
+    color.set(name, BLACK);
+  };
+  for (const s of services) if (color.get(s.name) === WHITE) visit(s.name);
+  return byName;
+}
+/**
+ * Bring up every service as early as its own dependencies allow.
+ *
+ * Each service starts the instant all of its `dependsOn` services have
+ * finished `startOne` (run → readyCheck → setup) — not when its whole
+ * topological "level" has. Independent branches run fully concurrently;
+ * a slow probe on one service delays only its own transitive dependents,
+ * never an unrelated branch. `startOne(svc)`'s promise is memoized so each
+ * service runs exactly once even when several dependents share a dep, and
+ * a dependency failure propagates by rejecting every dependent's await.
+ */
+async function startServices(
+  services: NamedService[],
+  startOne: (svc: NamedService) => Promise<void>,
+): Promise<void> {
+  const byName = validateServiceGraph(services);
+  const started = new Map<string, Promise<void>>();
+  const start = (svc: NamedService): Promise<void> => {
+    const existing = started.get(svc.name);
+    if (existing) return existing;
+    const p = (async () => {
+      await Promise.all(
+        (svc.dependsOn ?? [])
+          .filter((d) => byName.has(d))
+          .map((d) => start(byName.get(d)!)),
+      );
+      await startOne(svc);
+    })();
+    started.set(svc.name, p);
+    return p;
+  };
+  await Promise.all(services.map(start));
+}
+// ────────────────────────────────────────────────────────────────────────
+// Ingress — in-daemon HTTP/HTTPS listeners that route by Host header.
+//
+// Two kinds of routes share the same listeners:
+//
+//   * Fakes — in-daemon mock APIs. Each fake declares `hostnames` and a
+//     `port` (default 80); the request hits the fake's handler with the
+//     fake's `state`. HTTPS always serves on 443 (SNI per hostname,
+//     leaf cert signed by the in-VM root CA).
+//
+//   * Service TLS — reverse-proxy fronts for user services. Each
+//     `services.<name>.tls` entry declares `{ hostname, port }`; the
+//     daemon binds the hostname on :80 AND :443 and proxies each
+//     request to `http://<service>:<port>` inside the docker network.
+//     WebSocket upgrades are bridged. The leaf cert is signed by the
+//     same root CA, so `ctx.browser()` and peer services trust it.
+//
+// Both listeners bind on 0.0.0.0 so containers reach them via the
+// bridge gateway IP (also written into /run/spectest-fakes.json for
+// spectest-resolver and injected as --add-host on every container).
+//
+// Per-fake `state` is plain JS memory and lives across snapshot/fork
+// along with the rest of the daemon — every fork sees its own copy.
+// ────────────────────────────────────────────────────────────────────────
+const FAKES_REGISTRY_PATH =
+  process.env.SPECTEST_FAKES_REGISTRY ?? "/run/spectest-fakes.json";
+const DEFAULT_FAKE_PORT = 80;
+/** Fixed HTTPS port shared by every route (fakes + service-tls). */
+const INGRESS_HTTPS_PORT = 443;
+/** Fixed HTTP port always bound for service-tls (alongside any
+ * fakes whose `port` happens to be 80). */
+const INGRESS_HTTP_PORT = 80;
+/** Runtime record for one configured fake. */
+interface FakeRuntime {
+  def: FakeDefinition;
+  /** Per-fake mutable state. Built once; lives in daemon memory. */
+  state: unknown;
+  /** Lowercased hostnames the fake answers to. */
+  hostnames: string[];
+  /** HTTP port the fake listens on. */
+  port: number;
+  /** Cached helpers (built lazily on first ctx.fakes access). */
+  helpers?: Record<string, unknown>;
+  /** Cached tracking proxy over `helpers` — the value actually handed to
+   * tests via `ctx.fakes.<name>`. Records a `fake` event per helper
+   * call / getter read and wraps the result for assertion provenance. */
+  trackedHelpers?: Record<string, unknown>;
+}
+/** All loaded fakes, keyed by stable name (the `fakes` map key). Holds the
+ *  in-daemon handler, forked state, and helpers — the parts intrinsic to a
+ *  fake. Their *networking* (certs, DNS, routes) comes from `LOWERED`. */
+const FAKES = new Map<string, FakeRuntime>();
+/** Generic ingress derived from the loaded project (tls/hostnames/fakes/
+ *  component `provides`) by the SDK's `lowerIngress`. The daemon executes
+ *  this and never reads `svc.tls`/`svc.hostnames` itself. Rebuilt on /load. */
+let LOWERED: LoweredIngress = {
+  certificates: [],
+  proxies: [],
+  ingressHosts: [],
+  aliasesByService: {},
+  wildcards: [],
+};
+/** Running HTTP servers per port (Bun.Server). Rebuilt on /load. */
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+const INGRESS_HTTP_SERVERS = new Map<number, any>();
+/** Running HTTPS servers per port (currently always {INGRESS_HTTPS_PORT}). */
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+const INGRESS_HTTPS_SERVERS = new Map<number, any>();
+/**
+ * Tear down listener servers between /load calls so the new project's
+ * routes can rebind cleanly.
+ */
+function stopIngressServers(): void {
+  for (const [port, srv] of INGRESS_HTTP_SERVERS) {
+    try {
+      srv.stop?.();
+    } catch (err) {
+      // eslint-disable-next-line no-console
+      console.warn(`[ingress] failed to stop http server on :${port}:`, err);
+    }
+  }
+  INGRESS_HTTP_SERVERS.clear();
+  for (const [port, srv] of INGRESS_HTTPS_SERVERS) {
+    try {
+      srv.stop?.();
+    } catch (err) {
+      // eslint-disable-next-line no-console
+      console.warn(`[ingress] failed to stop https server on :${port}:`, err);
+    }
+  }
+  INGRESS_HTTPS_SERVERS.clear();
+}
+function buildIngress(project: Project): void {
+  stopIngressServers();
+  FAKES.clear();
+  // Lower the friendly surface (tls/hostnames/provides/fakes) into the
+  // generic decl set the daemon executes. The special-casing lives in the
+  // SDK's lowerIngress, not here.
+  LOWERED = lowerIngress(project);
+  if (!project.fakes) return;
+  for (const [name, def] of Object.entries(project.fakes)) {
+    FAKES.set(name, {
+      def,
+      state: undefined, // built in startIngress after `state()` runs
+      hostnames: def.hostnames.map((h) => h.toLowerCase()),
+      port: def.port ?? DEFAULT_FAKE_PORT,
+    });
+  }
+}
+/**
+ * Generate a leaf cert + key for one ingress route (fake or service
+ * proxy), signed by the in-VM root CA at {CA_PATH}. SANs cover every
+ * hostname the route answers to, so a client connecting with TLS
+ * verifies cleanly regardless of which hostname it used. Shells out
+ * to `openssl req -x509 -CA ... -CAkey ...` (OpenSSL 3.0+; Debian
+ * bookworm ships 3.0.x).
+ *
+ * `label` is a short tag baked into the cert Subject CN and the temp
+ * file names — only used for diagnostics, not for TLS verification.
+ */
+async function generateHostCert(
+  label: string,
+  hostnames: string[],
+): Promise<{ cert: string; key: string }> {
+  const id = `spectest-host-${sanitizeSegment(label)}-${randomUUID().slice(0, 8)}`;
+  const keyPath = path.join("/tmp", `${id}.key`);
+  const crtPath = path.join("/tmp", `${id}.crt`);
+  const sans = hostnames.map((h) => `DNS:${h}`).join(",");
+  const args = [
+    "req",
+    "-newkey",
+    "rsa:2048",
+    "-nodes",
+    "-keyout",
+    keyPath,
+    "-out",
+    crtPath,
+    "-x509",
+    "-CA",
+    CA_PATH,
+    "-CAkey",
+    CA_KEY_PATH,
+    "-days",
+    "3650",
+    "-subj",
+    `/CN=spectest-${label}`,
+    "-addext",
+    `subjectAltName=${sans}`,
+    "-addext",
+    "basicConstraints=CA:FALSE",
+    "-addext",
+    "extendedKeyUsage=serverAuth",
+    "-addext",
+    "keyUsage=digitalSignature,keyEncipherment",
+  ];
+  const r = await shx("openssl", args, 30_000);
+  if (r.code !== 0) {
+    throw new Error(
+      `openssl req for ${label} failed (rc=${r.code}): ${r.stderr.trim() || r.stdout.trim()}`,
+    );
+  }
+  try {
+    const [cert, key] = await Promise.all([
+      fs.readFile(crtPath, "utf8"),
+      fs.readFile(keyPath, "utf8"),
+    ]);
+    return { cert, key };
+  } finally {
+    await Promise.all([
+      fs.unlink(keyPath).catch(() => {}),
+      fs.unlink(crtPath).catch(() => {}),
+    ]);
+  }
+}
+/** Resolve the spectest-net bridge gateway IP — the address containers
+ * use to reach the VM host. Asks dockerd via the docker CLI; cached for
+ * the daemon's life because the network is recreated only on reload. */
+let cachedGatewayIp: string | null = null;
+async function bridgeGatewayIp(): Promise<string> {
+  if (cachedGatewayIp) return cachedGatewayIp;
+  const out = await docker(
+    [
+      "network",
+      "inspect",
+      "--format",
+      "{{(index .IPAM.Config 0).Gateway}}",
+      NETWORK_NAME,
+    ],
+    10_000,
+  );
+  if (out.code !== 0) {
+    throw new Error(
+      `docker network inspect ${NETWORK_NAME} failed (rc=${out.code}): ${out.stderr.trim()}`,
+    );
+  }
+  const ip = out.stdout.trim();
+  if (!ip) throw new Error(`no gateway IP returned for network ${NETWORK_NAME}`);
+  cachedGatewayIp = ip;
+  return ip;
+}
+/** One hostname → its handler. Either a fake (run user handler in-process)
+ * or a service proxy (reverse-proxy to a container). */
+type Route =
+  | { kind: "fake"; fake: FakeRuntime }
+  | { kind: "proxy"; service: string; port: number };
+/** Per-WS-upgrade context: the upstream URL we'll open in `open()` and
+ * keep the bridge in `message()`/`close()`. Stored on `ws.data`. */
+interface WsBridgeData {
+  upstreamUrl: string;
+  upstream: WebSocket | null;
+  pending: Array<string | ArrayBuffer | Uint8Array>;
+}
+/**
+ * Pristine `fetch` captured at module load, before any test-scoped
+ * fetch wrapper can monkey-patch `globalThis.fetch`. The reverse-proxy
+ * uses this directly so its outbound HTTP calls aren't intercepted by
+ * the test recorder — they'd be (a) misattributed to the test's
+ * timeline, and (b) trip up the Response constructor because the
+ * recorder wraps `res.status` / `res.body` in inspectable proxies
+ * that don't pass through as primitives.
+ */
+const NATIVE_FETCH: typeof fetch = globalThis.fetch.bind(globalThis);
+/** Hop-by-hop headers per RFC 7230 §6.1 — never forwarded by a proxy. */
+const HOP_BY_HOP_HEADERS = new Set([
+  "connection",
+  "keep-alive",
+  "proxy-authenticate",
+  "proxy-authorization",
+  "te",
+  "trailers",
+  "transfer-encoding",
+  "upgrade",
+  "host",
+]);
+/**
+ * Bring ingress servers up: bind one Bun.serve per unique HTTP port
+ * (fakes' ports plus the always-on :80 for service proxies), plus a
+ * shared HTTPS :443 (SNI per hostname). Build each fake's initial state,
+ * then write the hostname→ip registry that spectest-resolver consults
+ * for DNS. Idempotent — calling twice rebuilds.
+ */
+async function startIngress(): Promise<void> {
+  const hasIngress =
+    FAKES.size > 0 ||
+    LOWERED.proxies.length > 0 ||
+    LOWERED.ingressHosts.length > 0 ||
+    LOWERED.wildcards.length > 0;
+  if (!hasIngress) {
+    // Make sure the resolver doesn't see stale entries from a prior project.
+    REGISTRY.hosts = {};
+    REGISTRY.wildcards = [];
+    await writeRegistry();
+    return;
+  }
+  const gw = await bridgeGatewayIp();
+  // Initialise per-fake state. Awaited sequentially — state factories
+  // are expected to be tiny constructors; the cost of serial init is
+  // dwarfed by the eventual snapshot.
+  for (const [name, fake] of FAKES) {
+    if (fake.def.state) {
+      try {
+        fake.state = await fake.def.state();
+      } catch (err) {
+        throw new Error(
+          `fake ${JSON.stringify(name)} state() factory threw: ${(err as Error).message}`,
+        );
+      }
+    } else {
+      fake.state = {};
+    }
+  }
+  // Mint one leaf cert per `certificate` decl (SANs = its hostnames),
+  // signed by the in-VM root CA, and index it by hostname for SNI. Done
+  // before binding so the HTTPS listener has certs ready and a startup
+  // failure aborts /bootstrap cleanly.
+  const caPresent = existsSync(CA_PATH) && existsSync(CA_KEY_PATH);
+  const certByHost = new Map<string, { cert: string; key: string }>();
+  if (caPresent) {
+    for (const group of LOWERED.certificates) {
+      if (group.hostnames.length === 0) continue;
+      const leaf = await generateHostCert(group.hostnames[0], group.hostnames);
+      for (const h of group.hostnames) certByHost.set(h, leaf);
+    }
+  } else if (LOWERED.certificates.length > 0) {
+    // eslint-disable-next-line no-console
+    console.warn(
+      `[ingress] root CA missing at ${CA_PATH}; skipping HTTPS bind (fakes/proxies will be HTTP-only)`,
+    );
+  }
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  const Bun = (globalThis as any).Bun;
+  if (!Bun?.serve) {
+    throw new Error(
+      "ingress requires Bun.serve; the daemon must run under Bun (it does in-VM)",
+    );
+  }
+  // Resolve every ingress hostname to its handler. Fakes run an in-daemon
+  // handler; proxies reverse-proxy to a service:port. This single table
+  // drives both the HTTP and HTTPS listeners.
+  const routeByHost = new Map<string, Route>();
+  for (const fake of FAKES.values()) {
+    for (const h of fake.hostnames) routeByHost.set(h, { kind: "fake", fake });
+  }
+  for (const p of LOWERED.proxies) {
+    routeByHost.set(p.hostname, { kind: "proxy", service: p.service, port: p.port });
+  }
+  // ── HTTP listeners: group routes by port, dispatch per-request by Host.
+  //    Proxies bind :80 (HTTPS, if any, is on :443); fakes use their
+  //    declared port. Skip :443 in the HTTP map — HTTPS wins.
+  const httpRoutesByPort = new Map<number, Map<string, Route>>();
+  const ensurePort = (port: number): Map<string, Route> => {
+    const m = httpRoutesByPort.get(port) ?? new Map<string, Route>();
+    httpRoutesByPort.set(port, m);
+    return m;
+  };
+  for (const fake of FAKES.values()) {
+    if (fake.port === INGRESS_HTTPS_PORT) continue;
+    const routes = ensurePort(fake.port);
+    for (const h of fake.hostnames) routes.set(h, routeByHost.get(h)!);
+  }
+  if (LOWERED.proxies.length > 0) {
+    const routes = ensurePort(INGRESS_HTTP_PORT);
+    for (const p of LOWERED.proxies) routes.set(p.hostname, routeByHost.get(p.hostname)!);
+  }
+  for (const [port, byHost] of httpRoutesByPort) {
+    const label = `port ${port}`;
+    INGRESS_HTTP_SERVERS.set(port, bindIngressServer(Bun, port, byHost, label));
+    const hosts = [...byHost.keys()].join(", ");
+    // eslint-disable-next-line no-console
+    console.log(`[ingress] http :${port} for ${hosts}`);
+  }
+  // ── HTTPS listener on INGRESS_HTTPS_PORT: SNI per certificated hostname.
+  if (certByHost.size > 0) {
+    const tlsEntries: Array<{ cert: string; key: string; serverName: string }> = [];
+    const byHostHttps = new Map<string, Route>();
+    for (const [h, leaf] of certByHost) {
+      tlsEntries.push({ cert: leaf.cert, key: leaf.key, serverName: h });
+      const route = routeByHost.get(h);
+      if (route) byHostHttps.set(h, route);
+    }
+    const label = `https :${INGRESS_HTTPS_PORT}`;
+    const server = bindIngressServer(
+      Bun,
+      INGRESS_HTTPS_PORT,
+      byHostHttps,
+      label,
+      tlsEntries,
+    );
+    INGRESS_HTTPS_SERVERS.set(INGRESS_HTTPS_PORT, server);
+    const hosts = [...byHostHttps.keys()].join(", ");
+    // eslint-disable-next-line no-console
+    console.log(`[ingress] https :${INGRESS_HTTPS_PORT} for ${hosts}`);
+  }
+  // Seed the resolver's names registry: ingress hostnames (fakes, TLS
+  // proxies, dnsName(→ingress)) → bridge gateway, plus ingress-targeted
+  // wildcards. Service-targeted wildcards wait for the post-container pass
+  // (their containers aren't up yet). Dynamic ctx.dnsName calls extend this.
+  await seedNamesRegistry({ servicesUp: false });
+}
+/**
+ * Spin up one Bun.serve listener bound to (port, optional TLS) that
+ * dispatches every request to the matching Route by Host header.
+ *
+ * Shared by the HTTP and HTTPS branches. Also exports a `websocket`
+ * handler so reverse-proxy targets can transparently bridge WS
+ * upgrades through to their upstream service.
+ */
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+function bindIngressServer(
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  Bun: any,
+  port: number,
+  byHost: Map<string, Route>,
+  listenerLabel: string,
+  tlsEntries?: Array<{ cert: string; key: string; serverName: string }>,
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+): any {
+  // A TLS listener terminates https; everything else is plain http. Used
+  // to stamp X-Forwarded-Proto so upstreams that build absolute URLs or
+  // redirect see the scheme the client actually used, not our http hop.
+  const proto = tlsEntries ? "https" : "http";
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  const opts: Record<string, any> = {
+    port,
+    hostname: "0.0.0.0",
+    // Bun.serve defaults to a 10s idleTimeout, which kills any proxied
+    // request whose upstream takes >10s to produce bytes — under parallel
+    // test load that surfaced as "fetch failed"/"other side closed" on
+    // deploy-archive uploads and ERR_EMPTY_RESPONSE in browser tests
+    // ([Bun.serve]: request timed out after 10 seconds). Ingress fronts
+    // arbitrarily slow app endpoints (deploys can legitimately take
+    // minutes), so disable the idle timeout entirely; forked test VMs are
+    // short-lived, leaked-connection risk is bounded by the fork.
+    idleTimeout: 0,
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    fetch: (req: Request, server: any): Response | Promise<Response> =>
+      dispatchIngress(req, server, byHost, listenerLabel, proto),
+    websocket: {
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      async open(ws: any) {
+        const data = ws.data as WsBridgeData;
+        try {
+          const upstream = new WebSocket(data.upstreamUrl);
+          // ArrayBuffer so binary frames can be ws.send()'d to the
+          // downstream client verbatim — Blob would need an extra
+          // .arrayBuffer() round-trip on every message.
+          upstream.binaryType = "arraybuffer";
+          data.upstream = upstream;
+          upstream.addEventListener("open", () => {
+            for (const m of data.pending) upstream.send(m);
+            data.pending = [];
+          });
+          upstream.addEventListener("message", (ev: MessageEvent) => {
+            try {
+              ws.send(ev.data);
+            } catch {
+              /* client gone */
+            }
+          });
+          upstream.addEventListener("close", (ev: CloseEvent) => {
+            try {
+              ws.close(ev.code, ev.reason);
+            } catch {
+              /* already closed */
+            }
+          });
+          upstream.addEventListener("error", () => {
+            try {
+              ws.close(1011, "upstream error");
+            } catch {
+              /* already closed */
+            }
+          });
+        } catch (err) {
+          // eslint-disable-next-line no-console
+          console.warn(`[ingress] ws upstream open failed for ${data.upstreamUrl}:`, err);
+          try {
+            ws.close(1011, "upstream open failed");
+          } catch {
+            /* ignore */
+          }
+        }
+      },
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      message(ws: any, message: string | Buffer) {
+        const data = ws.data as WsBridgeData;
+        const payload =
+          typeof message === "string" ? message : new Uint8Array(message);
+        if (data.upstream && data.upstream.readyState === WebSocket.OPEN) {
+          data.upstream.send(payload);
+        } else {
+          // Buffer until the upstream finishes its handshake.
+          data.pending.push(payload);
+        }
+      },
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      close(ws: any, code: number, reason: string) {
+        const data = ws.data as WsBridgeData;
+        try {
+          data.upstream?.close(code, reason);
+        } catch {
+          /* ignore */
+        }
+      },
+    },
+  };
+  if (tlsEntries) opts.tls = tlsEntries;
+  return Bun.serve(opts);
+}
+/**
+ * Per-request dispatch shared by every ingress listener. Looks up the
+ * Route by Host header (port stripped) and either:
+ *   - fake: invokes the handler, wraps thrown errors as 500;
+ *   - proxy: WebSocket upgrade → server.upgrade(); else reverse-proxy
+ *     to the upstream service over plain HTTP.
+ */
+async function dispatchIngress(
+  req: Request,
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  server: any,
+  byHost: Map<string, Route>,
+  listenerLabel: string,
+  proto: string,
+): Promise<Response> {
+  const host = (req.headers.get("host") ?? "")
+    .toLowerCase()
+    .split(":")[0]
+    .trim();
+  const route = byHost.get(host);
+  if (!route) {
+    return new Response(
+      `spectest-daemon: no ingress route bound to Host=${JSON.stringify(host)} on ${listenerLabel}\n`,
+      { status: 404, headers: { "content-type": "text/plain" } },
+    );
+  }
+  if (route.kind === "fake") {
+    try {
+      return await route.fake.def.handler(req, route.fake.state, FAKE_CTX);
+    } catch (err) {
+      const e = err as Error;
+      return new Response(
+        `spectest-daemon: fake ${route.fake.def.name} threw: ${e?.message ?? String(err)}\n`,
+        { status: 500, headers: { "content-type": "text/plain" } },
+      );
+    }
+  }
+  return proxyToService(req, server, route.service, route.port, listenerLabel, proto);
+}
+/**
+ * Reverse-proxy a request to `http://<service>:<port>` on
+ * `spectest-net`. Handles plain HTTP/1.1 + 2 and WebSocket upgrades:
+ *
+ *   - WS upgrade requests get routed through `server.upgrade()`, with
+ *     the upstream URL stashed on `ws.data`. The shared `websocket`
+ *     handler opens the upstream and bridges frames both ways.
+ *   - Plain requests pass through via `fetch()` with hop-by-hop
+ *     headers stripped; the response body is a ReadableStream returned
+ *     verbatim, so it streams back without buffering.
+ *
+ * `decompress: false` makes this a true byte-for-byte pass-through:
+ * Bun's fetch otherwise auto-decompresses the upstream body, which would
+ * leave us forwarding the original `Content-Encoding`/`Content-Length`
+ * over a now-plaintext body — browsers then fail with
+ * ERR_CONTENT_DECODING_FAILED or truncate on the stale length (the
+ * WHATWG fetch footgun in whatwg/fetch#1729). Keeping the body encoded
+ * means those headers still describe the bytes we send, and we relay the
+ * client's `Accept-Encoding` upstream so the upstream picks the scheme.
+ */
+async function proxyToService(
+  req: Request,
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  server: any,
+  service: string,
+  port: number,
+  listenerLabel: string,
+  proto: string,
+): Promise<Response> {
+  const url = new URL(req.url);
+  const upstreamPath = `${url.pathname}${url.search}`;
+  const upgrade = req.headers.get("upgrade")?.toLowerCase() ?? "";
+  if (upgrade === "websocket") {
+    const upstreamUrl = `ws://${await proxyUpstreamHost(service)}:${port}${upstreamPath}`;
+    const wsData: WsBridgeData = {
+      upstreamUrl,
+      upstream: null,
+      pending: [],
+    };
+    const ok = server.upgrade(req, { data: wsData });
+    if (ok) {
+      // Bun has already taken over the response — return a stub.
+      return new Response(null, { status: 101 });
+    }
+    return new Response(
+      `spectest-daemon: ws upgrade refused on ${listenerLabel}\n`,
+      { status: 426, headers: { "content-type": "text/plain" } },
+    );
+  }
+  const fwdHeaders = new Headers();
+  for (const [k, v] of req.headers) {
+    if (HOP_BY_HOP_HEADERS.has(k.toLowerCase())) continue;
+    fwdHeaders.append(k, v);
+  }
+  // Standard reverse-proxy provenance headers: the upstream sees the
+  // public scheme/host it was reached through and the client's address,
+  // even though we rewrite Host below to the service-net name.
+  const clientIp = server.requestIP?.(req)?.address as string | undefined;
+  const priorXff = req.headers.get("x-forwarded-for");
+  const xff = clientIp ? (priorXff ? `${priorXff}, ${clientIp}` : clientIp) : priorXff;
+  if (xff) fwdHeaders.set("x-forwarded-for", xff);
+  fwdHeaders.set("x-forwarded-proto", proto);
+  const publicHost = req.headers.get("host");
+  if (publicHost) fwdHeaders.set("x-forwarded-host", publicHost);
+  // Override Host so the upstream sees its own service-net name, not
+  // the public hostname. Lets origin servers that vhost by Host header
+  // continue to find the right virtual host.
+  fwdHeaders.set("host", `${service}:${port}`);
+  // Buffer bounded request bodies so a transient upstream connect failure
+  // can be retried (a ReadableStream body is consumed by the first
+  // attempt). Under heavy parallel-fork load an in-guest connect to a
+  // peer container occasionally fails outright ("Unable to connect" on a
+  // healthy upstream) — observed on deploy-tarball uploads to s3mock; a
+  // bounded retry absorbs it. Bodies above the cap (or with unknown
+  // length and a stream that exceeds it) keep streaming semantics and
+  // simply don't retry.
+  const RETRY_BODY_CAP = 128 * 1024 * 1024;
+  const hasBody = req.method !== "GET" && req.method !== "HEAD";
+  // Only bodies with a known, bounded length are buffered — an unknown
+  // (chunked/streaming) length could be an endless client stream, which
+  // must keep flowing through, not accumulate.
+  const declaredLen = Number(req.headers.get("content-length") ?? NaN);
+  let bufferedBody: ArrayBuffer | undefined;
+  if (hasBody && Number.isFinite(declaredLen) && declaredLen <= RETRY_BODY_CAP) {
+    try {
+      bufferedBody = await req.arrayBuffer();
+    } catch {
+      /* client aborted mid-upload; fall through, attempt will fail */
+    }
+  }
+  const retryable =
+    !hasBody || (bufferedBody !== undefined && bufferedBody.byteLength <= RETRY_BODY_CAP);
+  const attempts = retryable ? 3 : 1;
+  let lastErr: unknown;
+  for (let attempt = 1; attempt <= attempts; attempt++) {
+    // Resolve the upstream per attempt: a connect failure below drops the
+    // cached IP, so a retry re-inspects the container.
+    const upstreamUrl = `http://${await proxyUpstreamHost(service)}:${port}${upstreamPath}`;
+    try {
+      const upstreamReq = new Request(upstreamUrl, {
+        method: req.method,
+        headers: fwdHeaders,
+        body: hasBody ? (bufferedBody ?? req.body) : undefined,
+        redirect: "manual",
+      });
+      // decompress:false → forward the encoded body untouched (see fn doc).
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const upstreamRes = await NATIVE_FETCH(upstreamReq, { decompress: false } as any);
+      // Strip hop-by-hop response headers; let Bun set content-length / TE.
+      const respHeaders = new Headers();
+      for (const [k, v] of upstreamRes.headers) {
+        if (HOP_BY_HOP_HEADERS.has(k.toLowerCase())) continue;
+        respHeaders.append(k, v);
+      }
+      return new Response(upstreamRes.body, {
+        status: upstreamRes.status,
+        statusText: upstreamRes.statusText,
+        headers: respHeaders,
+      });
+    } catch (err) {
+      lastErr = err;
+      // Only connect-class failures are safely retryable — if the request
+      // reached the upstream we must not replay it.
+      const msg = (err as Error)?.message ?? String(err);
+      const connectFailure =
+        /unable to connect|connection refused|connect|typo in the url/i.test(msg);
+      if (!connectFailure || attempt === attempts) break;
+      // The cached IP may be stale (container recreated) — re-resolve.
+      PROXY_IP_CACHE.delete(service);
+      // eslint-disable-next-line no-console
+      console.warn(
+        `[ingress] upstream ${service}:${port} connect failed (attempt ${attempt}/${attempts}), retrying: ${msg}`,
+      );
+      await new Promise((r) => setTimeout(r, 250 * attempt));
+    }
+  }
+  const e = lastErr as Error;
+  return new Response(
+    `spectest-daemon: upstream ${service}:${port} unreachable: ${e?.message ?? String(lastErr)}\n`,
+    { status: 502, headers: { "content-type": "text/plain" } },
+  );
+}
+/**
+ * In-memory names registry, serialised to FAKES_REGISTRY_PATH for the
+ * resolver. `startIngress` seeds it from LOWERED (the static
+ * tls/hostnames/fakes/wildcard decls); `registerDnsName` mutates it live
+ * when a test calls `ctx.dnsName`. It lives in daemon memory, so it forks
+ * with the rest of the snapshot — a test's dynamic registration is
+ * isolated to its own fork, exactly like fake state.
+ */
+const REGISTRY: {
+  hosts: Record<string, string>;
+  wildcards: Array<{ suffix: string; ip: string }>;
+} = { hosts: {}, wildcards: [] };
+async function writeRegistry(): Promise<void> {
+  const body = JSON.stringify({
+    hosts: REGISTRY.hosts,
+    wildcards: REGISTRY.wildcards,
+    updatedAt: Date.now(),
+  });
+  try {
+    await fs.mkdir(path.dirname(FAKES_REGISTRY_PATH), { recursive: true });
+    await fs.writeFile(FAKES_REGISTRY_PATH, body);
+  } catch (err) {
+    // Resolver gracefully degrades; just log.
+    // eslint-disable-next-line no-console
+    console.warn(`[names] failed to write registry at ${FAKES_REGISTRY_PATH}:`, err);
+  }
+}
+/** `*.example.com` → `.example.com` — the suffix the resolver matches. */
+function wildcardSuffix(pattern: string): string {
+  return pattern.slice(1); // drop the leading "*"
+}
+/** A service container's IP on spectest-net. `null` if the container isn't
+ *  up or isn't attached to the network yet. */
+async function serviceContainerIp(name: string): Promise<string | null> {
+  const out = await docker(
+    [
+      "inspect",
+      "--format",
+      `{{(index .NetworkSettings.Networks "${NETWORK_NAME}").IPAddress}}`,
+      name,
+    ],
+    10_000,
+  );
+  if (out.code !== 0) return null;
+  const ip = out.stdout.trim();
+  return ip.length > 0 && ip !== "<no value>" ? ip : null;
+}
+/** Cache of service container IPs for the ingress proxy, so the proxy hot
+ *  path doesn't depend on in-guest DNS: name resolution goes through
+ *  spectest-resolver — a single-threaded Bun process that can be starved
+ *  when the guest's vCPUs are saturated (observed as ~30s of
+ *  "tarballs-s3:9090 unreachable" 502s during parallel deploy tests while
+ *  the container was healthy). Populated lazily via `docker inspect`
+ *  (local socket, no DNS); the proxy drops an entry on connect failure so
+ *  a recreated container re-resolves on retry. */
+const PROXY_IP_CACHE = new Map<string, string>();
+async function proxyUpstreamHost(service: string): Promise<string> {
+  const cached = PROXY_IP_CACHE.get(service);
+  if (cached) return cached;
+  const ip = await serviceContainerIp(service).catch(() => null);
+  if (ip) {
+    PROXY_IP_CACHE.set(service, ip);
+    return ip;
+  }
+  // Fall back to the name (resolver / docker DNS) — e.g. a target that
+  // isn't a docker container on spectest-net.
+  return service;
+}
+/** Resolve a DnsTarget to a concrete IP: ingress → bridge gateway, service
+ *  → that container's IP. Throws if a service target has no IP yet. */
+async function resolveDnsTarget(target: DnsTarget): Promise<string> {
+  if ("ingress" in target) return bridgeGatewayIp();
+  const ip = await serviceContainerIp(target.service);
+  if (!ip) {
+    throw new Error(
+      `dnsName target service ${JSON.stringify(target.service)} has no IP on ${NETWORK_NAME} (is it a running service?)`,
+    );
+  }
+  return ip;
+}
+/**
+ * Seed REGISTRY from the static lowered decls and write it. Run twice per
+ * /bootstrap: once from startIngress (`servicesUp: false`) so ingress
+ * hostnames answer during container startup, then once after every
+ * container is up (`servicesUp: true`) so service-targeted wildcards (e.g.
+ * k3s `ingressDomains`) can resolve their container IPs. The post-container
+ * pass is what's captured into the warm template.
+ */
+async function seedNamesRegistry(opts: { servicesUp: boolean }): Promise<void> {
+  const gw = await bridgeGatewayIp();
+  REGISTRY.hosts = {};
+  REGISTRY.wildcards = [];
+  for (const h of LOWERED.ingressHosts) REGISTRY.hosts[h] = gw;
+  for (const w of LOWERED.wildcards) {
+    if ("ingress" in w.target) {
+      REGISTRY.wildcards.push({ suffix: wildcardSuffix(w.pattern), ip: gw });
+      continue;
+    }
+    // Service target — only resolvable once the container has an IP.
+    if (!opts.servicesUp) continue;
+    const ip = await serviceContainerIp(w.target.service);
+    if (ip) {
+      REGISTRY.wildcards.push({ suffix: wildcardSuffix(w.pattern), ip });
+    } else {
+      // eslint-disable-next-line no-console
+      console.warn(
+        `[names] wildcard ${w.pattern}: service ${JSON.stringify(w.target.service)} has no IP on ${NETWORK_NAME}`,
+      );
+    }
+  }
+  await writeRegistry();
+}
+/**
+ * Register a hostname at runtime — the implementation behind `ctx.dnsName`.
+ * Validates via the same `dnsName` primitive the static path uses, resolves
+ * the target to an IP, and updates + persists the registry. Exact names go
+ * in `hosts`; `*.suffix` wildcards in `wildcards`. The resolver re-reads on
+ * the next query (it watches the file's mtime), so the name is live
+ * immediately — answered for VM-host/test/browser code and for peer
+ * containers (Docker forwards unknown names to the host resolver). It does
+ * NOT land in any container's /etc/hosts.
+ */
+async function registerDnsName(hostname: string, target: DnsTarget): Promise<void> {
+  const resv = reserveEvent();
+  // Reuse the primitive purely for validation + lowercasing.
+  const decl = makeDnsDecl(hostname, target);
+  const ip = await resolveDnsTarget(target);
+  if (isWildcard(decl.hostname)) {
+    const suffix = wildcardSuffix(decl.hostname);
+    REGISTRY.wildcards = REGISTRY.wildcards.filter((w) => w.suffix !== suffix);
+    REGISTRY.wildcards.push({ suffix, ip });
+  } else {
+    REGISTRY.hosts[decl.hostname] = ip;
+  }
+  await writeRegistry();
+  recordEnv({ op: "dnsName", hostname: decl.hostname, ip, durationMs: 0 }, resv);
+}
+// ────────────────────────────────────────────────────────────────────────
+// Runtime services — containers started after bootstrap (from a test, an
+// eval, project setup, or a fake handler reacting to the app under test).
+//
+// A runtime service is a *real machine on the network*: it joins
+// spectest-net with its own IP and is reached directly by name/IP, not
+// through the daemon's HTTP ingress. The same helpers bootstrap uses
+// (prepareServiceImage → runContainer → waitForReady) drive it, so it gets
+// the same image cache, CA trust, and ready-probing. Because it lives in
+// dockerd, it's captured by the per-test post-state snapshot exactly like
+// the boot services — a `dependsOn` child inherits the live container while
+// siblings (which fork from the parent's earlier snapshot) never see it.
+//
+// Tracked here only for in-VM bookkeeping (failure log capture, teardown);
+// the map forks with daemon memory, so each fork sees the services it (or
+// its ancestors) actually started.
+// ────────────────────────────────────────────────────────────────────────
+const RUNTIME_SERVICES = new Map<string, NamedService>();
+// A runtime service spec is a ServiceConfig (minus tls/dependsOn) + a name;
+// the orchestration helpers want a NamedService, which is the same shape.
+function specToNamedService(spec: RuntimeServiceSpec): NamedService {
+  const { name, ...rest } = spec;
+  return { name, ...(rest as ServiceConfig) };
+}
+/** Implementation behind `ctx.startService` / a fake's `ctx.startService`.
+ *  Prepares the image (pulling on first use through the host cache), runs
+ *  the container on spectest-net, and waits for its readyCheck. Returns the
+ *  container's name + IP. */
+async function startRuntimeService(spec: RuntimeServiceSpec): Promise<RuntimeServiceHandle> {
+  if (!spec.name || spec.name.length === 0) {
+    throw new Error("startService: `name` is required");
+  }
+  const t0 = Date.now();
+  const resv = reserveEvent();
+  const svc = specToNamedService(spec);
+  const aliases = (spec.hostnames ?? []).map((h) => h.toLowerCase());
+  const imageRef = svc.image.type === "registry" ? svc.image.reference : "(dockerfile)";
+  try {
+    const { tag } = await prepareServiceImage(svc);
+    const flags = [...(await ensureVolumes(svc)), ...(await ensureFiles(svc))];
+    await runContainer(svc, tag, flags, aliases);
+    await waitForReady(svc);
+    const ip = (await serviceContainerIp(svc.name)) ?? "";
+    RUNTIME_SERVICES.set(svc.name, svc);
+    recordEnv({
+      op: "startService",
+      service: svc.name,
+      image: imageRef,
+      ip,
+      durationMs: Date.now() - t0,
+    }, resv);
+    return { name: svc.name, ip };
+  } catch (err) {
+    recordEnv({
+      op: "startService",
+      service: svc.name,
+      image: imageRef,
+      durationMs: Date.now() - t0,
+      error: errMessage(err),
+    }, resv);
+    throw err;
+  }
+}
+/** Implementation behind `ctx.stopService`. Removes the container and drops
+ *  it from the runtime registry. No-op (rc ignored) if it's already gone. */
+async function stopRuntimeService(name: string): Promise<void> {
+  const t0 = Date.now();
+  const resv = reserveEvent();
+  await docker(["rm", "-f", name], 30_000);
+  RUNTIME_SERVICES.delete(name);
+  recordEnv({ op: "stopService", service: name, durationMs: Date.now() - t0 }, resv);
+}
+/** The runtime environment-control handle handed to fakes (3rd handler arg /
+ *  `ctx` in `helpers`). The same primitives tests get on `ctx`; module-level
+ *  because none of them depend on a running test. */
+const FAKE_CTX: FakeContext = {
+  startService: startRuntimeService,
+  stopService: stopRuntimeService,
+  dnsName: registerDnsName,
+};
+/** Build (or fetch from cache) the helpers record for a fake — the
+ * value that ends up at `ctx.fakes.<name>`. Defaults to `{}` (a fake
+ * with no `helpers` exposes nothing — tests never touch private state
+ * directly). Returns a tracking proxy (see `trackFakeHelpers`) so helper
+ * calls land in the test timeline. */
+async function ensureFakeHelpers(name: string): Promise<Record<string, unknown>> {
+  const fake = FAKES.get(name);
+  if (!fake) throw new Error(`fake ${JSON.stringify(name)} is not loaded`);
+  if (fake.trackedHelpers) return fake.trackedHelpers;
+  fake.helpers = fake.def.helpers
+    ? ((await fake.def.helpers({
+        name,
+        state: fake.state,
+        ctx: FAKE_CTX,
+      })) as Record<string, unknown>)
+    : {};
+  fake.trackedHelpers = trackFakeHelpers(name, fake.helpers);
+  return fake.trackedHelpers;
+}
+/** Wrap a fake's helpers so each call becomes a recorded `fake` event
+ * and its return value is `wrap()`ped for assertion provenance. Helpers
+ * are functions that read/mutate the fake's private state via closure;
+ * tests only ever see what those functions return. The proxy is built
+ * once and shared across tests; it consults the recorder at call time,
+ * so it's a transparent no-op when nothing is recording (eval / project
+ * setup).
+ *
+ * Only own function properties are intercepted — inherited members
+ * (`toString`, etc.), symbols, and any stray non-function property pass
+ * straight through untouched. */
+function trackFakeHelpers(
+  fakeName: string,
+  helpers: Record<string, unknown>,
+): Record<string, unknown> {
+  return new Proxy(helpers, {
+    get(target, prop, receiver) {
+      if (typeof prop === "symbol") return Reflect.get(target, prop, receiver);
+      const desc = Object.getOwnPropertyDescriptor(target, prop);
+      if (!desc || typeof desc.value !== "function") {
+        return Reflect.get(target, prop, receiver);
+      }
+      const fn = desc.value as (...a: unknown[]) => unknown;
+      const member = String(prop);
+      return (...args: unknown[]) =>
+        invokeFakeHelper(fakeName, member, fn, target, args);
+    },
+  });
+}
+/** Invoke a fake helper function, recording a `fake` event and wrapping
+ * the return value. Handles both sync and async helpers, and records an
+ * error event (then rethrows) if the helper throws. */
+function invokeFakeHelper(
+  fakeName: string,
+  member: string,
+  fn: (...a: unknown[]) => unknown,
+  thisArg: unknown,
+  args: unknown[],
+): unknown {
+  const t = Date.now();
+  const resv = reserveEvent();
+  const safeArgs = args.map((a) => safeSerialize(a));
+  const recordResult = (value: unknown): unknown => {
+    const seq = recordFake({
+      fake: fakeName,
+      member,
+      args: safeArgs,
+      result: safeSerialize(value),
+      durationMs: Date.now() - t,
+    }, resv);
+    return wrap(value, seq);
+  };
+  const recordError = (err: unknown): void => {
+    recordFake({
+      fake: fakeName,
+      member,
+      args: safeArgs,
+      durationMs: Date.now() - t,
+      error: errMessage(err),
+    }, resv);
+  };
+  let result: unknown;
+  try {
+    result = fn.apply(thisArg, args);
+  } catch (err) {
+    recordError(err);
+    throw err;
+  }
+  if (result instanceof Promise) {
+    return result.then(recordResult, (err) => {
+      recordError(err);
+      throw err;
+    });
+  }
+  return recordResult(result);
+}
+function errMessage(err: unknown): string {
+  return (err as Error)?.message ?? String(err);
+}
+/** Build the `fakes` map exposed on the test/eval context. Includes
+ * every loaded fake; helpers are constructed lazily but we eagerly
+ * materialise them here so a test can just read `ctx.fakes.x.y`. */
+async function buildFakeHandles(): Promise<Record<string, Record<string, unknown>>> {
+  const handles: Record<string, Record<string, unknown>> = {};
+  for (const name of FAKES.keys()) {
+    handles[name] = await ensureFakeHelpers(name);
+  }
+  return handles;
+}
+interface ServiceTiming {
+  name: string;
+  kind: "pull" | "build";
+  prepMs: number;
+  runMs?: number;
+  readyMs?: number;
+  setupMs?: number;
+  buildSteps?: BuildStep[];
+}
+interface BootstrapTimings {
+  totalMs: number;
+  services: ServiceTiming[];
+}
+function serviceTotalMs(s: ServiceTiming): number {
+  return s.prepMs + (s.runMs ?? 0) + (s.readyMs ?? 0) + (s.setupMs ?? 0);
+}
+// Logged to the daemon journal (also folded into the /bootstrap response,
+// which the control plane logs). One compact line per service plus the
+// slowest BuildKit steps, so a slow cold start is profileable without
+// dumping the full build output.
+function logBootstrapTimings(t: BootstrapTimings): void {
+  for (const s of t.services) {
+    const parts = [`prep=${s.prepMs}ms(${s.kind})`];
+    if (s.runMs != null) parts.push(`run=${s.runMs}ms`);
+    if (s.readyMs != null) parts.push(`ready=${s.readyMs}ms`);
+    if (s.setupMs) parts.push(`setup=${s.setupMs}ms`);
+    console.log(`[bootstrap] ${s.name}: ${parts.join(" ")}`);
+    if (s.buildSteps && s.buildSteps.length) {
+      const top = s.buildSteps
+        .map((x) => `${x.cached ? "cached" : x.secs.toFixed(1) + "s"} ${x.name}`)
+        .join(" | ");
+      console.log(`[bootstrap]   ${s.name} build steps: ${top}`);
+    }
+  }
+  console.log(`[bootstrap] total ${t.totalMs}ms across ${t.services.length} service(s)`);
+}
+async function bootstrap(): Promise<BootstrapTimings> {
+  const bootStart = Date.now();
+  const cfg: EnvironmentConfig = requireLoaded().project.environment;
+  const services = namedServices(cfg);
+  const timings = new Map<string, ServiceTiming>();
+  progressInit(services);
+  // Build dedup is only valid within one workspace generation — a fresh
+  // bootstrap may follow a workspace re-upload with the same dockerfile
+  // text but different build-context content.
+  BUILD_DEDUP.clear();
+  // Network create is independent of the workspace-side prep, so run
+  // them concurrently. .dockerignore only blocks `docker build`s — pulls
+  // wouldn't need it — but the writes are sub-millisecond so we just
+  // gate image prep behind both.
+  await Promise.all([
+    ensureNetwork(),
+    (async () => {
+      await fs.mkdir(WORKSPACE, { recursive: true });
+      await fs.writeFile(
+        path.join(WORKSPACE, ".dockerignore"),
+        unionDockerignore(services),
+      );
+    })(),
+  ]);
+  // Image prep is DECOUPLED from container start: every service's image
+  // prep kicks off now, independent of `dependsOn`, and each service's
+  // container start (in startServices below) gates on (its OWN image ready)
+  // AND (its deps up) — there is no whole-graph barrier. So a service whose
+  // image is pulled and whose deps are up starts immediately; it never sits
+  // at "image ready" waiting for an unrelated slow build elsewhere.
+  //
+  // Prep concurrency: registry pulls always run in parallel (network-bound,
+  // low VM RAM). Dockerfile builds parallelize *only* when the host
+  // buildkitd is in play — there the build executes host-side under runc, so
+  // N concurrent builds don't touch the VM's memory ceiling. When we fall
+  // back to the in-VM builder, two or more concurrent builds routinely OOM a
+  // single VM on monorepos with parallel pnpm/npm installs (each install
+  // fans out to ~16 fetchers + lifecycle workers, ~70 MB/process), so we
+  // serialize that case behind a FIFO chain — but only the in-VM builds
+  // serialize; pulls and starts run freely alongside them. The remote-builder
+  // probe is memoized, so this up-front call is free; skip it with no builds.
+  const tags = new Map<string, string>();
+  const builds = services.filter((s) => s.image.type === "dockerfile");
+  const buildsRunHostSide = builds.length > 0 && (await ensureRemoteBuilder());
+  // A promise chain is a fair FIFO mutex: when builds run in-VM, each build
+  // waits for the previous to settle. Pulls and host-side builds bypass it.
+  let inVmBuildChain: Promise<unknown> = Promise.resolve();
+  const prepImage = (
+    svc: NamedService,
+  ): Promise<{ tag: string; buildSteps?: BuildStep[] }> => {
+    // Bootstrap is the only dedup scope: all its builds share one
+    // /workspace generation (see prepareServiceImage).
+    const run = () => prepareServiceImage(svc, { dedup: true });
+    if (svc.image.type === "dockerfile" && !buildsRunHostSide) {
+      const next = inVmBuildChain.then(run, run);
+      // Keep the chain moving even if a build throws; the chain itself never
+      // rejects (the per-service prep promise below is what surfaces errors).
+      inVmBuildChain = next.then(
+        () => undefined,
+        () => undefined,
+      );
+      return next;
+    }
+    return run();
+  };
+  const prep = new Map<string, Promise<void>>();
+  for (const svc of services) {
+    const p = (async () => {
+      const t0 = Date.now();
+      const { tag, buildSteps } = await prepImage(svc);
+      progressService(svc.name, { status: "prepared", detail: undefined });
+      tags.set(svc.name, tag);
+      timings.set(svc.name, {
+        name: svc.name,
+        kind: svc.image.type === "registry" ? "pull" : "build",
+        prepMs: Date.now() - t0,
+        buildSteps,
+      });
+    })();
+    // A dependent whose dep fails aborts before it awaits this prep, which
+    // would leave the prep promise unobserved. Attach a no-op handler so a
+    // late rejection can't crash the daemon; startOne still re-throws it for
+    // services that do reach their await.
+    p.catch(() => undefined);
+    prep.set(svc.name, p);
+  }
+  // Ingress (fakes + service-tls proxies) comes up BEFORE services so
+  // that any service that calls a fake URL during its own startup
+  // probe finds it answering. Service-tls proxies will return 502
+  // until their upstream containers start, but no one is hitting
+  // https://<svc>.test/ during bootstrap so that's harmless. The
+  // bridge gateway IP is set on `network create` — independent of
+  // any container being up — so we don't need services to determine
+  // the listener address.
+  await startIngress();
+  progressPhase("Starting services");
+  // Container start + ready probe driven by the dependsOn DAG: each
+  // service starts the moment its own dependencies finish run→probe→setup,
+  // instead of waiting for a whole topological level to clear. We chain
+  // run→probe→setup per service so a dependent sees the post-setup state
+  // of its deps (a database with its schema applied, a k3s cluster with
+  // its ingress controller already running) — but an unrelated slow probe
+  // no longer holds back a branch that's ready to go.
+  await startServices(services, async (svc) => {
+    // Gate on our OWN image being ready. startServices already gated on our
+    // deps; this adds the image edge. The two compose: we run the moment
+    // both are satisfied, with no whole-graph barrier between them.
+    await prep.get(svc.name)!;
+    const flags = [...(await ensureVolumes(svc)), ...(await ensureFiles(svc))];
+    const tag = tags.get(svc.name);
+    if (!tag) throw new Error(`internal: no image tag for ${svc.name}`);
+    const tRun = Date.now();
+    progressService(svc.name, { status: "starting", detail: undefined });
+    await runContainer(svc, tag, flags);
+    const tReady = Date.now();
+    progressService(svc.name, { status: "probing", detail: "ready check" });
+    await waitForReady(svc);
+    const tSetup = Date.now();
+    if (svc.setup) {
+      progressService(svc.name, { status: "probing", detail: "running setup" });
+      const helpers = await ensureHelpers(svc.name, svc);
+      await svc.setup({ name: svc.name, helpers });
+    }
+    progressService(svc.name, { status: "ready", detail: undefined });
+    const ti = timings.get(svc.name);
+    if (ti) {
+      ti.runMs = tReady - tRun;
+      ti.readyMs = tSetup - tReady;
+      ti.setupMs = svc.setup ? Date.now() - tSetup : 0;
+    }
+  });
+  // Containers now have IPs — re-seed so service-targeted wildcards (e.g.
+  // k3s ingressDomains → the cluster container) resolve. Captured into the
+  // warm template, so warm starts inherit the resolved entries.
+  await seedNamesRegistry({ servicesUp: true });
+  // Browser pre-warm DISABLED (2026-06-08). We used to pre-open one view
+  // into the pool (browser.ts VIEW_POOL) here so every fork inherited a
+  // live renderer and the first ctx.browser() skipped the ~1.2-1.5s spawn.
+  // But a renderer spawned BEFORE the snapshot and restored in a fork holds
+  // stale DNS state: its first navigate to an ingress host fails
+  // `net::ERR_NAME_NOT_RESOLVED` even though getaddrinfo/fetch resolve fine
+  // (the --disable-features=AsyncDns flag doesn't save the pooled view). A
+  // view created fresh AFTER the fork (openBrowser → createView, since the
+  // pool is now empty) spawns a post-restore renderer with correct DNS. The
+  // tradeoff is the per-test spawn cost is back on the browser path; we
+  // accept it to keep the suite's browser-rooted DAGs working. See
+  // browser.ts:213 (the long-standing intermittent NAME_NOT_RESOLVED) and
+  // the clocksource-regression notes. Re-enabling requires fixing the
+  // restored-renderer DNS state, not just re-adding the prewarm call.
+  const result: BootstrapTimings = {
+    totalMs: Date.now() - bootStart,
+    services: [...timings.values()].sort((a, b) => serviceTotalMs(b) - serviceTotalMs(a)),
+  };
+  progressDone();
+  logBootstrapTimings(result);
+  return result;
+}
+// ────────────────────────────────────────────────────────────────────────
+// Project setup (post-bootstrap, pre-test)
+// ────────────────────────────────────────────────────────────────────────
+interface ProjectSetupResult {
+  ran: boolean;
+  durationMs: number;
+}
+/**
+ * Run the loaded project's `setup` hook, if any. Called by the control
+ * plane once between /bootstrap and the warm-template snapshot, so the
+ * effects (seeded DB rows, initial pods, fixture files) are captured
+ * exactly once and inherited by every later snapshot/fork.
+ *
+ * Unlike test runs, this is NOT instrumented — no recorder, no event
+ * timeline, no timeout from the test runner. Setup failures abort the
+ * env bring-up; the control plane surfaces them as a start failure.
+ */
+async function runProjectSetup(): Promise<ProjectSetupResult> {
+  const proj = requireLoaded().project;
+  if (!proj.setup) return { ran: false, durationMs: 0 };
+  const start = Date.now();
+  // Build the same `svc` handles tests see, so setup and tests share
+  // helper instances (e.g. a Bun.SQL pool created here is reused later).
+  const svc = (await buildServiceHandles(proj.environment)) as ProjectSetupContext["svc"];
+  const fakes = await buildFakeHandles();
+  // Install the fetch wrapper for the duration of setup so `ctx.fetch` (and any
+  // client routed through `globalThis.fetch`) returns a wrapped Response, same
+  // as in a test. No recorder is active here, so it wraps without provenance —
+  // but the wrapped type stays honest at runtime (`.unwrap()` works).
+  const restoreFetch = installFetchWrapper();
+  const ctx: ProjectSetupContext = {
+    fetch: globalThis.fetch as unknown as SpectestFetch,
+    exec: execInServiceWrapped,
+    svc,
+    fakes,
+    dnsName: registerDnsName,
+    startService: startRuntimeService,
+    stopService: stopRuntimeService,
+  };
+  try {
+    await proj.setup(ctx);
+  } finally {
+    restoreFetch();
+  }
+  return { ran: true, durationMs: Date.now() - start };
+}
+// ────────────────────────────────────────────────────────────────────────
+// Test runner
+// ────────────────────────────────────────────────────────────────────────
+interface RunResult {
+  status: "passed" | "failed";
+  durationMs: number;
+  log: string;
+  /** Ordered events recorded during the test (exec / assertion / http). */
+  events: TestEvent[];
+  /**
+   * Per-browser rrweb sessions captured during the test. Each session
+   * is the events emitted by a single `ctx.browser()` view, drained in
+   * step-tagged chunks (one chunk per Browser op).
+   */
+  browserSessions: BrowserSessionRecord[];
+  /** asciicast sessions for each `ctx.terminal(...)` call. */
+  terminalSessions: TerminalSessionRecord[];
+  /**
+   * `docker logs` per service, captured only when the test failed (empty
+   * on a pass). Lets the failure post-mortem in the web UI show what each
+   * container printed without the author having to add `ctx.exec` log
+   * grabs by hand.
+   */
+  serviceLogs: ServiceLogCapture[];
+  error?: { message: string; stack?: string };
+}
+/** Captured container logs for one service. */
+interface ServiceLogCapture {
+  service: string;
+  /** Trailing slice of the container's stdout (RFC3339-timestamped). */
+  stdout: string;
+  stdoutTruncated: boolean;
+  /** Trailing slice of the container's stderr. */
+  stderr: string;
+  stderrTruncated: boolean;
+}
+/** How many trailing log lines to grab per service on failure. */
+const SERVICE_LOG_TAIL_LINES = 500;
+/** Per-stream byte cap after the line tail (keeps the most recent bytes). */
+const SERVICE_LOG_MAX_BYTES = 256 * 1024;
+/** Keep the trailing `max` bytes of `s` — the opposite of
+ *  `truncateUtf8`'s head-keep, because the most recent output is what
+ *  explains a failure. */
+function tailBytes(s: string, max: number): { value: string; truncated: boolean } {
+  if (s.length <= max) return { value: s, truncated: false };
+  return { value: s.slice(s.length - max), truncated: true };
+}
+/**
+ * Capture `docker logs` for every service in the loaded environment.
+ * Called when a test case fails so the web UI can show each container's
+ * recent output. Best-effort and bounded: the last
+ * {SERVICE_LOG_TAIL_LINES} lines, trimmed to the trailing
+ * {SERVICE_LOG_MAX_BYTES} bytes per stream. A `docker logs` failure for
+ * one service surfaces as that service's `stderr` rather than aborting
+ * the whole capture, so a crashed/removed container is still visible.
+ */
+async function captureServiceLogs(): Promise<ServiceLogCapture[]> {
+  const l = loaded;
+  if (!l) return [];
+  // Boot services plus any runtime services this fork started — both are
+  // real containers a failure post-mortem wants to see. Dedup by name.
+  const byName = new Map<string, NamedService>();
+  for (const s of namedServices(l.project.environment)) byName.set(s.name, s);
+  for (const [name, s] of RUNTIME_SERVICES) byName.set(name, s);
+  const services = [...byName.values()];
+  return Promise.all(
+    services.map(async (svc): Promise<ServiceLogCapture> => {
+      const r = await docker(
+        ["logs", "--tail", String(SERVICE_LOG_TAIL_LINES), "--timestamps", svc.name],
+        30_000,
+      );
+      const stdout = tailBytes(r.stdout, SERVICE_LOG_MAX_BYTES);
+      const stderr = tailBytes(r.stderr, SERVICE_LOG_MAX_BYTES);
+      return {
+        service: svc.name,
+        stdout: stdout.value,
+        stdoutTruncated: stdout.truncated,
+        stderr: stderr.value,
+        stderrTruncated: stderr.truncated,
+      };
+    }),
+  );
+}
+/** Per-Browser rrweb session shipped to the control plane. */
+interface BrowserSessionRecord {
+  sessionId: string;
+  openedAtMs: number;
+  closedAtMs?: number;
+  initialUrl?: string;
+  steps: BrowserSessionStep[];
+}
+/**
+ * Mint a session id. `idScope` (the running test's case id; `"eval"`
+ * for eval-context sessions) is baked in because `randomUUID()` alone
+ * is NOT unique across test forks: sibling cases resume from the same
+ * snapshot, so the daemon process — and the guest kernel CSPRNG it
+ * draws from — restores identical RNG state in every clone, and the
+ * first UUID minted after the fork collides across siblings (observed
+ * in practice, not hypothetical). Persistence keys sessions by
+ * (run, case, session) so the collision never lost data, but anything
+ * that ever aggregates sessions across cases would conflate them.
+ * Sibling forks run different cases by construction, so the case id is
+ * exactly the entropy the clones are missing.
+ */
+function newSessionId(idScope: string): string {
+  return idScope ? `${idScope}:${randomUUID()}` : randomUUID();
+}
+/**
+ * Build the recorder sink + bookkeeping for a single Browser session.
+ * The returned `recorder` is what `openBrowser` writes into; the
+ * returned `record` is the in-flight session object the daemon owns.
+ */
+function newBrowserSession(testStart: number, idScope: string): {
+  recorder: BrowserSessionRecorder;
+  record: BrowserSessionRecord;
+  markClosed(): void;
+} {
+  const record: BrowserSessionRecord = {
+    sessionId: newSessionId(idScope),
+    openedAtMs: Date.now() - testStart,
+    steps: [],
+  };
+  let closed = false;
+  return {
+    record,
+    recorder: {
+      sessionId: record.sessionId,
+      recordStep(step) {
+        if (closed) return;
+        record.steps.push(step);
+      },
+      noteNavigation(url) {
+        if (closed) return;
+        if (record.initialUrl === undefined) record.initialUrl = url;
+      },
+    },
+    markClosed() {
+      if (closed) return;
+      closed = true;
+      record.closedAtMs = Date.now() - testStart;
+    },
+  };
+}
+/** Per-Terminal asciicast session shipped to the control plane. */
+interface TerminalSessionRecord {
+  sessionId: string;
+  openedAtMs: number;
+  closedAtMs?: number;
+  service: string;
+  command: string;
+  cols: number;
+  rows: number;
+  /** asciicast v2 frames. `o` = output, `i` reserved for future input. */
+  frames: Array<[number, "o" | "i", string]>;
+}
+/** New terminal session bookkeeping for a single `ctx.terminal(...)` call. */
+function newTerminalSession(
+  testStart: number,
+  service: string,
+  command: string,
+  cols: number,
+  rows: number,
+  idScope: string,
+): {
+  record: TerminalSessionRecord;
+  pushFrame(tSec: number, data: string): void;
+  markClosed(): void;
+} {
+  const record: TerminalSessionRecord = {
+    sessionId: newSessionId(idScope),
+    openedAtMs: Date.now() - testStart,
+    service,
+    command,
+    cols,
+    rows,
+    frames: [],
+  };
+  let closed = false;
+  return {
+    record,
+    pushFrame(tSec, data) {
+      if (closed) return;
+      record.frames.push([tSec, "o", data]);
+    },
+    markClosed() {
+      if (closed) return;
+      closed = true;
+      record.closedAtMs = Date.now() - testStart;
+    },
+  };
+}
+/** Grid a recorded `ctx.exec` asciicast claims. There's no PTY behind
+ *  an exec so no real size exists — 80×24 matches the `ctx.terminal`
+ *  default, and the player hard-wraps longer lines the way an actual
+ *  80-col terminal would. */
+const EXEC_CAST_COLS = 80;
+const EXEC_CAST_ROWS = 24;
+/** Cumulative cap on asciicast frame bytes per recorded exec. The exec
+ *  *event* caps its stdout/stderr separately (256 KiB each); this bounds
+ *  the recording, which would otherwise duplicate a huge output in the
+ *  run payload and the DB. On overflow the cast gets one trailing
+ *  notice frame and stops growing; the ExecResult is unaffected. */
+const EXEC_FRAME_CAP_BYTES = 1024 * 1024;
+/**
+ * Build the bookkeeping for a fresh `openTerminal` session and return
+ * the open Terminal handle alongside a frame sink the factory drains
+ * into. The daemon owns the `TerminalSessionRecord`; the factory just
+ * pushes frames and tells us when the session ends.
+ *
+ * Used by both:
+ *   - the long-lived `ctx.openTerminal(...)` API, where the test owns
+ *     the handle and decides when to close;
+ *   - the one-shot `ctx.terminal(...)` wrapper below, which opens a
+ *     terminal with `opts.command`, waits for the embedded program to
+ *     exit, then closes — same code path, just an immediate await.
+ *
+ * Heads-up for test authors: TTY-detecting CLIs may invoke a pager
+ * (psql → less, git → less, etc.) and block waiting for input now that
+ * stdin *is* a TTY. Disable paging in the command itself (e.g.
+ * `psql -P pager=off`) or pass `PAGER=cat` / `PSQL_PAGER=` via
+ * `opts.env`.
+ */
+async function openInstrumentedTerminal(
+  service: string,
+  opts: TerminalOpts | undefined,
+  testStart: number,
+  sessions: TerminalSessionRecord[],
+  recordEvents: boolean,
+  idScope: string,
+): Promise<InternalTerminal> {
+  const cols = opts?.cols ?? 80;
+  const rows = opts?.rows ?? 24;
+  const session = newTerminalSession(
+    testStart,
+    service,
+    opts?.command ?? "(interactive)",
+    cols,
+    rows,
+    idScope,
+  );
+  sessions.push(session.record);
+  const sink: TerminalFrameSink = {
+    pushFrame: (t, data) => session.pushFrame(t, data),
+    markClosed: () => session.markClosed(),
+  };
+  return await openTerminal({
+    service,
+    opts,
+    sink,
+    sessionId: session.record.sessionId,
+    recordEvents,
+  });
+}
+// Return value of each test that has completed in this daemon's lifetime.
+// Lives in daemon memory and is captured by every post-test snapshot, so
+// when a child case forks from its parent's snapshot it sees the same Map
+// already populated. Carries arbitrary JS values — no JSON round-trip.
+const TEST_DATA = new Map<string, unknown>();
+// Cached helper namespaces produced by `ServiceDefinition.helpers`
+// factories. Built lazily on first access and reused for the daemon's
+// lifetime — Bun.SQL pools and similar resources are happy to live a
+// long time, and the underlying TCP connections survive snapshot/fork
+// along with the rest of daemon memory. Cleared on /load and /reload
+// (project change invalidates any cached state).
+const HELPERS_CACHE = new Map<string, Record<string, unknown>>();
+/**
+ * Build (or fetch from cache) the helpers record for a single service.
+ * Returns an empty object if the service doesn't ship a `helpers`
+ * factory — symmetric with what gets passed to `setup`.
+ */
+async function ensureHelpers(
+  name: string,
+  def: ServiceDefinition<Record<string, unknown>>,
+): Promise<Record<string, unknown>> {
+  if (!def.helpers) return {};
+  if (!HELPERS_CACHE.has(name)) {
+    HELPERS_CACHE.set(name, await def.helpers({ name }));
+  }
+  return HELPERS_CACHE.get(name)!;
+}
+/**
+ * Build the `svc` map for one test/eval. The value at `svc[name]` is
+ * exactly the record the service's `helpers` factory returned (e.g.
+ * `{ client: SqlClient }` for `postgres(...)`). Services without a
+ * `helpers` factory don't appear in the map at all.
+ */
+async function buildServiceHandles(cfg: EnvironmentConfig): Promise<ServiceHandles> {
+  const handles: ServiceHandles = {};
+  for (const [name, rawDef] of Object.entries(cfg.services)) {
+    // The wire type drops the `helpers` function (JSON.stringify ignores
+    // functions), but in the daemon we hold the in-memory definition
+    // from the user's module — `helpers` is still there when present.
+    const def = rawDef as ServiceDefinition<Record<string, unknown>>;
+    if (!def.helpers) continue;
+    handles[name] = await ensureHelpers(name, def);
+  }
+  return handles;
+}
+// ────────────────────────────────────────────────────────────────────────
+// Instrumentation helpers
+// ────────────────────────────────────────────────────────────────────────
+function describeFetchInput(input: Parameters<typeof fetch>[0]): {
+  url: string;
+  methodFromInput?: string;
+} {
+  if (typeof input === "string") return { url: input };
+  if (input instanceof URL) return { url: input.toString() };
+  // Request instance
+  const req = input as Request;
+  return { url: req.url, methodFromInput: req.method };
+}
+function describeRequestBody(
+  input: Parameters<typeof fetch>[0],
+  init: Parameters<typeof fetch>[1],
+): { body?: string; truncated?: boolean } {
+  // For Request objects, body has already been consumed into the request;
+  // we can't read it back without cloning, which costs. Skip unless init.body
+  // is provided directly.
+  const body = init?.body;
+  if (body === undefined || body === null) {
+    if (input instanceof Request && input.bodyUsed === false) {
+      // Don't drain the request's body here — leaving it for the actual
+      // fetch. Return a marker.
+      return { body: "[Request body not captured]", truncated: false };
+    }
+    return {};
+  }
+  if (typeof body === "string") {
+    const t = truncateUtf8(body);
+    return { body: t.value, truncated: t.truncated };
+  }
+  if (body instanceof URLSearchParams) {
+    const t = truncateUtf8(body.toString());
+    return { body: t.value, truncated: t.truncated };
+  }
+  return { body: `[non-text body: ${body.constructor?.name ?? typeof body}]` };
+}
+/**
+ * Install a fetch wrapper on `globalThis` that emits HTTP events into the
+ * active recorder. Returns a restore function. Calls outside of a running
+ * test still hit the original fetch (the recorder is null then; the
+ * wrapper just adds a tiny amount of overhead — but we restore after each
+ * test anyway, so this only matters mid-test).
+ */
+function installFetchWrapper(): () => void {
+  const original = globalThis.fetch;
+  const wrappedFn = async (
+    input: Parameters<typeof fetch>[0],
+    init?: Parameters<typeof fetch>[1],
+  ): Promise<Response | WrappedResponse> => {
+    const start = Date.now();
+    const resv = reserveEvent();
+    const { url, methodFromInput } = describeFetchInput(input);
+    const method = (init?.method ?? methodFromInput ?? "GET").toUpperCase();
+    const reqBody = describeRequestBody(input, init);
+    try {
+      const res = await original(input as RequestInfo, init);
+      let responseBody: string | undefined;
+      let responseBodyTruncated: boolean | undefined;
+      try {
+        const cloned = res.clone();
+        const text = await cloned.text();
+        const t = truncateUtf8(text);
+        responseBody = t.value;
+        responseBodyTruncated = t.truncated;
+      } catch {
+        // Binary or unreadable body — leave undefined.
+      }
+      const seq = recordHttp({
+        method,
+        url,
+        requestBody: reqBody.body,
+        requestBodyTruncated: reqBody.truncated,
+        status: res.status,
+        responseBody,
+        responseBodyTruncated,
+        durationMs: Date.now() - start,
+      }, resv);
+      return wrapResponse(res, seq);
+    } catch (err) {
+      const e = err as Error;
+      recordHttp({
+        method,
+        url,
+        requestBody: reqBody.body,
+        requestBodyTruncated: reqBody.truncated,
+        durationMs: Date.now() - start,
+        error: e?.message ?? String(err),
+      }, resv);
+      throw err;
+    }
+  };
+  // Preserve any provider-specific statics on `fetch` (e.g. Bun's
+  // `fetch.preconnect`) so consumers that touch them keep working.
+  const wrapped = wrappedFn as unknown as typeof fetch;
+  for (const key of Object.keys(original) as (keyof typeof original)[]) {
+    (wrapped as unknown as Record<string, unknown>)[key as string] = (
+      original as unknown as Record<string, unknown>
+    )[key as string];
+  }
+  globalThis.fetch = wrapped;
+  return () => {
+    globalThis.fetch = original;
+  };
+}
+function execInService(service: string, command: string): Promise<ExecResult> {
+  return new Promise((resolve) => {
+    execFile(
+      "docker",
+      ["exec", service, "sh", "-lc", command],
+      { maxBuffer: 16 * 1024 * 1024 },
+      (err, stdout, stderr) => {
+        const exitCode =
+          err && typeof (err as NodeJS.ErrnoException & { code?: number }).code === "number"
+            ? Number((err as NodeJS.ErrnoException & { code?: number }).code)
+            : err
+              ? 1
+              : 0;
+        resolve({
+          stdout: stdout.toString(),
+          stderr: stderr.toString(),
+          exitCode,
+        });
+      },
+    );
+  });
+}
+/** `execInService` for the no-recorder contexts (`setup`/`eval`): wraps the
+ *  result so `.unwrap()` is available and the ctx's wrapped `exec` type is
+ *  honest at runtime, but with no provenance (there's no event to link to).
+ *  The recorded `ctx.exec` used during tests is `recordedExec` below. */
+async function execInServiceWrapped(
+  service: string,
+  command: string,
+): Promise<Wrapped<ExecResult>> {
+  const res = await execInService(service, command);
+  return wrap(res, undefined) as unknown as Wrapped<ExecResult>;
+}
+/** Per-stream cap on the ExecResult strings the streaming variant
+ *  accumulates — parity with the buffered `execInService`'s `maxBuffer`.
+ *  Past the cap we keep draining (so the child never blocks on a full
+ *  pipe) but stop appending; unlike `execFile` we don't kill the
+ *  process, which only makes over-cap runs *more* survivable. */
+const EXEC_RESULT_CAP_BYTES = 16 * 1024 * 1024;
+/**
+ * Streaming variant of `execInService` for the recorded `ctx.exec`:
+ * the same `docker exec <svc> sh -lc <cmd>` invocation with the same
+ * result shape, but stdout/stderr are drained incrementally so the
+ * caller can timestamp each chunk into an asciicast frame as it
+ * arrives. No PTY is involved — the program still sees plain pipes
+ * (`isatty` false), so the streams stay byte-identical to what `exec`
+ * has always returned; the recording adds arrival *timing* only.
+ * `onChunk` fires in arrival order across both streams — the closest
+ * analogue of what a terminal would have shown — while the returned
+ * `ExecResult` keeps them separate as before.
+ */
+function execInServiceStreaming(
+  service: string,
+  command: string,
+  onChunk: (stream: "stdout" | "stderr", data: string) => void,
+): Promise<ExecResult> {
+  return new Promise((resolve) => {
+    const child = spawn("docker", ["exec", service, "sh", "-lc", command], {
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+    const acc = { stdout: "", stderr: "" };
+    const decoders = {
+      stdout: new TextDecoder("utf-8", { fatal: false }),
+      stderr: new TextDecoder("utf-8", { fatal: false }),
+    };
+    const drain = (which: "stdout" | "stderr", chunk: Buffer): void => {
+      const data = decoders[which].decode(chunk, { stream: true });
+      if (data.length === 0) return;
+      if (acc[which].length < EXEC_RESULT_CAP_BYTES) {
+        const room = EXEC_RESULT_CAP_BYTES - acc[which].length;
+        acc[which] += data.length > room ? data.slice(0, room) : data;
+      }
+      try {
+        onChunk(which, data);
+      } catch {
+        // frame capture must never break the exec itself
+      }
+    };
+    child.stdout?.on("data", (c: Buffer) => drain("stdout", c));
+    child.stderr?.on("data", (c: Buffer) => drain("stderr", c));
+    let settled = false;
+    const finish = (exitCode: number): void => {
+      if (settled) return;
+      settled = true;
+      // Flush any multi-byte tail the decoders are still holding.
+      acc.stdout += decoders.stdout.decode();
+      acc.stderr += decoders.stderr.decode();
+      resolve({ stdout: acc.stdout, stderr: acc.stderr, exitCode });
+    };
+    // `close` (not `exit`) so both pipes are fully drained first.
+    child.on("close", (code) => finish(code ?? 1));
+    child.on("error", () => finish(1));
+  });
+}
+async function pollCall<T>(
+  description: string,
+  fn: () =>
+    | T
+    | null
+    | undefined
+    | false
+    | Promise<T | null | undefined | false>,
+  opts?: { timeoutMs?: number; intervalMs?: number },
+): Promise<T> {
+  const timeoutMs = opts?.timeoutMs ?? 30_000;
+  const intervalMs = opts?.intervalMs ?? 1_000;
+  const start = Date.now();
+  // Reserve the wait's slot up front so it sorts at the poll's *start*,
+  // ahead of the iteration events it nests (which record as the poll runs).
+  const resv = reserveEvent();
+  let attempts = 0;
+  let value: T | undefined;
+  let success = false;
+  let predicateError: unknown;
+  // Record all iterations normally. Falsy iterations get truncated
+  // from the recorder so the timeline doesn't fill with polling
+  // noise; the LAST iteration's events stay, then get marked as
+  // children of the wait event so the UI can render them nested.
+  const beforePollIdx = recorderEventCount();
+  let lastIterStartIdx = beforePollIdx;
+  let keptIterStartIdx = beforePollIdx;
+  while (Date.now() - start < timeoutMs) {
+    attempts += 1;
+    lastIterStartIdx = recorderEventCount();
+    try {
+      const v = await fn();
+      if (v !== null && v !== undefined && v !== false) {
+        value = v as T;
+        success = true;
+        keptIterStartIdx = lastIterStartIdx;
+        break;
+      }
+    } catch (err) {
+      predicateError = err;
+      break;
+    }
+    // Failed iteration — drop the events it emitted.
+    recorderTruncate(lastIterStartIdx);
+    if (Date.now() - start + intervalMs > timeoutMs) break;
+    await new Promise((r) => setTimeout(r, intervalMs));
+  }
+  if (!success) {
+    // Timeout or predicate error: drop every attempt's events. The
+    // wait event we emit below is the only trace.
+    recorderTruncate(beforePollIdx);
+  }
+  const errMsg =
+    predicateError !== undefined
+      ? ((predicateError as Error)?.message ?? String(predicateError))
+      : success
+        ? undefined
+        : `timed out after ${timeoutMs}ms`;
+  const seq = recordWait({
+    description,
+    attempts,
+    durationMs: Date.now() - start,
+    passed: success,
+    ...(errMsg !== undefined ? { error: errMsg } : {}),
+  }, resv);
+  if (success && seq !== undefined) {
+    // Group the kept iteration's events under the wait so the UI can
+    // render them inside the wait card. The wait event itself is the
+    // very last entry; markChildren skips it via the seq match.
+    recorderMarkChildren(keptIterStartIdx, seq);
+  }
+  if (predicateError !== undefined) throw predicateError;
+  if (success) {
+    return wrap(value as T, seq) as T;
+  }
+  throw new Error(
+    `poll ${JSON.stringify(description)} timed out after ${timeoutMs}ms (${attempts} attempts)`,
+  );
+}
+/**
+ * Tee `console.*` output into `chunks` for the duration of a test/eval.
+ * Bun's console writes through its own native sink, NOT
+ * `process.stdout.write`, so patching the streams alone misses every
+ * `console.log` the test makes — the captured `log` came back empty.
+ * The original method still runs, so the daemon journal keeps the line.
+ * Returns a restore function for the caller's `finally`.
+ */
+function captureConsole(chunks: string[]): () => void {
+  const methods = ["log", "info", "warn", "error", "debug"] as const;
+  const orig = new Map<(typeof methods)[number], (typeof console)["log"]>();
+  for (const m of methods) {
+    const fn = console[m].bind(console);
+    orig.set(m, console[m]);
+    console[m] = (...a: unknown[]) => {
+      try {
+        chunks.push(
+          a.map((x) => (typeof x === "string" ? x : Bun.inspect(x))).join(" ") + "\n",
+        );
+      } catch {
+        // capture must never break the test
+      }
+      fn(...a);
+    };
+  }
+  return () => {
+    for (const m of methods) console[m] = orig.get(m)!;
+  };
+}
+async function runOne(testCase: TestCase<unknown>): Promise<RunResult> {
+  const start = Date.now();
+  const chunks: string[] = [];
+  const origStdout = process.stdout.write.bind(process.stdout);
+  const origStderr = process.stderr.write.bind(process.stderr);
+  const capture = (s: unknown): boolean => {
+    chunks.push(typeof s === "string" ? s : Buffer.from(s as Uint8Array).toString("utf8"));
+    return true;
+  };
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  (process.stdout as any).write = capture;
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  (process.stderr as any).write = capture;
+  const restoreConsole = captureConsole(chunks);
+  // Terminal sessions: each `ctx.exec` / `ctx.terminal(...)` /
+  // `ctx.openTerminal(...)` call opens its own session (asciicast frames
+  // live on the record; the inline TestEvent just carries metadata + a
+  // sessionId pointer). Drained at the end of the test and shipped on
+  // RunResult.terminalSessions.
+  //
+  // Unlike browsers, we don't auto-close terminals at test end. A
+  // `docker exec` subprocess is cheap to keep alive (a few KB), and
+  // Freestyle's snapshot captures it cleanly along with the container
+  // — so leaving it running between tests doesn't leak in any
+  // meaningful sense. Auto-closing only added a noisy `close` step
+  // at the end of every test that used `openTerminal`.
+  const terminalSessions: TerminalSessionRecord[] = [];
+  // Wrap exec so each call shows up in the event log alongside its
+  // result. We do this here (not on `execInService` itself) so the
+  // bootstrap path stays uninstrumented. Each call also captures the
+  // full CLI run as an asciicast — one exec step = one run = one
+  // recording: output chunks are timestamped as they stream in, so slow
+  // or animated output replays with real timing in the web UI. The
+  // frames are presentation-only; the ExecResult (and any assertions on
+  // it) still sees the plain separated stdout/stderr.
+  const recordedExec = async (service: string, command: string): Promise<ExecResult> => {
+    const t = Date.now();
+    const resv = reserveEvent();
+    const session = newTerminalSession(
+      start,
+      service,
+      command,
+      EXEC_CAST_COLS,
+      EXEC_CAST_ROWS,
+      testCase.id,
+    );
+    terminalSessions.push(session.record);
+    // Synthetic prompt frame so the replay is self-describing — the
+    // program's own output starts on the next line, like a real shell.
+    session.pushFrame(0, `\x1b[32m${service} $\x1b[0m \x1b[1m${command}\x1b[0m\r\n`);
+    let frameBytes = 0;
+    let frameCapped = false;
+    const res = await execInServiceStreaming(service, command, (_stream, data) => {
+      if (frameCapped) return;
+      if (frameBytes + data.length > EXEC_FRAME_CAP_BYTES) {
+        frameCapped = true;
+        session.pushFrame(
+          (Date.now() - t) / 1000,
+          "\r\n\x1b[2m[spectest: recording truncated — output exceeded the cast cap]\x1b[0m\r\n",
+        );
+        return;
+      }
+      frameBytes += data.length;
+      // Pipes deliver bare `\n`; a terminal renderer needs `\r\n` or
+      // every line starts at the previous line's end column
+      // (stair-stepping). PTY output is ONLCR-cooked by the kernel —
+      // pipe output is not, so cook it here. Normalising existing
+      // `\r\n` too keeps a CR|LF split across chunk boundaries
+      // harmless (`\r\r\n` renders identically).
+      session.pushFrame((Date.now() - t) / 1000, data.replace(/\r?\n/g, "\r\n"));
+    });
+    session.markClosed();
+    const stdout = truncateUtf8(res.stdout);
+    const stderr = truncateUtf8(res.stderr);
+    const seq = recordExec({
+      service,
+      command,
+      exitCode: res.exitCode,
+      stdout: stdout.value,
+      stdoutTruncated: stdout.truncated,
+      stderr: stderr.value,
+      stderrTruncated: stderr.truncated,
+      durationMs: Date.now() - t,
+      sessionId: session.record.sessionId,
+    }, resv);
+    return wrap(res, seq);
+  };
+  // One-shot: open a terminal with `command` as the entrypoint, wait
+  // for it to exit, close, and return the existing TerminalResult
+  // shape. The asciicast and one TerminalEvent line up exactly with
+  // the pre-interactive implementation, just routed through the new
+  // factory.
+  const recordedTerminal = async (
+    service: string,
+    command: string,
+    opts?: TerminalOpts,
+  ): Promise<TerminalResult> => {
+    const timeoutMs = opts?.timeoutMs ?? DEFAULT_TEST_TIMEOUT_MS;
+    const startedAt = Date.now();
+    const resv = reserveEvent();
+    const term = await openInstrumentedTerminal(
+      service,
+      { ...opts, command, timeoutMs },
+      start,
+      terminalSessions,
+      false, // one-shot doesn't emit per-op step events
+      testCase.id,
+    );
+    // `term.exited` resolves to a wrapped result; this one-shot path needs
+    // the plain `exitCode` number, so `.unwrap()` the result first.
+    const { exitCode } = (await term.exited).unwrap();
+    await term.close();
+    const output = term.rawOutput();
+    const preview = truncateUtf8(output);
+    const seq = recordTerminal({
+      service,
+      command,
+      exitCode,
+      durationMs: Date.now() - startedAt,
+      sessionId: term.sessionId,
+      cols: term.cols,
+      rows: term.rows,
+      outputPreview: preview.value,
+      outputTruncated: preview.truncated,
+    }, resv);
+    const result: TerminalResult = {
+      output,
+      exitCode,
+      durationMs: Date.now() - startedAt,
+      sessionId: term.sessionId,
+    };
+    return wrap(result, seq);
+  };
+  // Long-lived: open an interactive terminal. Each method on the
+  // returned Terminal records a `terminal-step` event tied back to
+  // this session id; the per-op screen previews are written into the
+  // event so the UI can show "what the user saw after sendLine 'ls'".
+  const recordedOpenTerminal = async (
+    service: string,
+    opts?: TerminalOpts,
+  ): Promise<Terminal> => {
+    const term = await openInstrumentedTerminal(
+      service,
+      opts,
+      start,
+      terminalSessions,
+      true,
+      testCase.id,
+    );
+    // Emit a one-time `terminal` event so the session shows up in the
+    // sidebar even before any step lands. `exitCode` is filled in by
+    // the eventual `exit`/`close` step event; the inline summary here
+    // uses -1 as a sentinel until then.
+    const preview = truncateUtf8(term.rawOutput());
+    recordTerminal({
+      service,
+      command: opts?.command ?? "(interactive)",
+      exitCode: -1,
+      durationMs: 0,
+      sessionId: term.sessionId,
+      cols: term.cols,
+      rows: term.rows,
+      outputPreview: preview.value,
+      outputTruncated: preview.truncated,
+    });
+    return term;
+  };
+  startRecording();
+  const restoreFetch = installFetchWrapper();
+  // Look up the parent's stored return value (if any). The parent ran in
+  // an ancestor fork; its TEST_DATA entry travels with the snapshot.
+  const parentId = testCase.dependsOn?.id;
+  const parent = parentId !== undefined ? TEST_DATA.get(parentId) : undefined;
+  // Track every Browser opened during this test so we can close them in
+  // `finally` — leaked Chromium subprocesses would survive the snapshot
+  // and chew memory across forks. Each Browser also gets a session
+  // recorder; the records flow back to the control plane as part of
+  // RunResult.browserSessions and are persisted to SQLite.
+  const openBrowsers: Browser[] = [];
+  const sessions: Array<ReturnType<typeof newBrowserSession>> = [];
+  const trackedOpenBrowser = async (opts?: BrowserOptions): Promise<Browser> => {
+    const session = newBrowserSession(start, testCase.id);
+    sessions.push(session);
+    const b = await openBrowser({ ...(opts ?? {}), recorder: session.recorder });
+    openBrowsers.push(b);
+    return b;
+  };
+  // Build convenience handles (e.g. ctx.svc.db.client) from the loaded
+  // project. Done before installing the timeout so a slow client factory
+  // surfaces as a real error rather than getting attributed to the test.
+  const svc = await buildServiceHandles(requireLoaded().project.environment);
+  const fakes = await buildFakeHandles();
+  const ctx: TestContext<unknown> = {
+    // installFetchWrapper just swapped globalThis.fetch for the wrapped
+    // version, so capturing it here gets us instrumentation on ctx.fetch
+    // for free. The recorder is active for the test, so responses come
+    // back wrapped — hence the SpectestFetch type.
+    fetch: globalThis.fetch as unknown as SpectestFetch,
+    // exec/terminal/poll wrap their results at runtime (the recorder is
+    // active), so the ctx interface types them wrapped — same bridge as
+    // `fetch` above. The impls' own return types stay raw.
+    exec: recordedExec as unknown as TestContext<unknown>["exec"],
+    terminal: recordedTerminal as unknown as TestContext<unknown>["terminal"],
+    openTerminal: recordedOpenTerminal,
+    browser: trackedOpenBrowser,
+    testName: testCase.name,
+    parent,
+    svc,
+    fakes,
+    poll: pollCall as unknown as TestContext<unknown>["poll"],
+    dnsName: registerDnsName,
+    startService: startRuntimeService,
+    stopService: stopRuntimeService,
+  };
+  const timeoutMs = testCase.timeoutMs ?? DEFAULT_TEST_TIMEOUT_MS;
+  let timer: NodeJS.Timeout | undefined;
+  const timedOut = new Promise<never>((_, reject) => {
+    timer = setTimeout(
+      () => reject(new Error(`test timed out after ${timeoutMs}ms`)),
+      timeoutMs,
+    );
+  });
+  // Result events are gathered inside finally (after the final browser
+  // drains) so we hoist these out of the try/catch.
+  let outcome: { status: "passed" | "failed"; error?: RunResult["error"] };
+  try {
+    const value = await Promise.race([
+      Promise.resolve(testCase.run(ctx)),
+      timedOut,
+    ]);
+    // Stash the return value so child cases — which fork from the snapshot
+    // we're about to capture — can read it off ctx.parent.
+    TEST_DATA.set(testCase.id, value);
+    outcome = { status: "passed" };
+  } catch (err) {
+    const e = err as Error;
+    outcome = {
+      status: "failed",
+      error: { message: e.message ?? String(err), stack: e.stack },
+    };
+  } finally {
+    if (timer) clearTimeout(timer);
+    restoreFetch();
+    restoreConsole();
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    (process.stdout as any).write = origStdout;
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    (process.stderr as any).write = origStderr;
+    // Best-effort browser cleanup. `close()` does a final rrweb drain
+    // before tearing the view down, so we must `await` it before
+    // collecting session records. Leaked Chromium subprocesses would
+    // survive the snapshot and chew memory across forks.
+    for (const b of openBrowsers) {
+      try {
+        await b.close();
+      } catch {
+        /* ignore */
+      }
+    }
+    for (const s of sessions) s.markClosed();
+  }
+  const durationMs = Date.now() - start;
+  // On failure, grab each service's recent container logs for the
+  // post-mortem. Captured after the duration clock stops so the
+  // log-fetch round trips aren't billed to the test.
+  let serviceLogs: ServiceLogCapture[] = [];
+  if (outcome.status === "failed") {
+    try {
+      serviceLogs = await captureServiceLogs();
+    } catch (err) {
+      // eslint-disable-next-line no-console
+      console.warn("[service-logs] capture failed:", err);
+    }
+  }
+  const events = stopRecording();
+  // Drop sessions whose linking event didn't survive — an exec/terminal
+  // inside a failed `ctx.poll` iteration has its events removed by
+  // recorderTruncate, so its recording would be an unreachable orphan in
+  // the UI (and a polled exec would ship one dead cast per attempt).
+  const referencedSessions = new Set<string>();
+  for (const ev of events) {
+    const sid = (ev as { sessionId?: unknown }).sessionId;
+    if (typeof sid === "string") referencedSessions.add(sid);
+  }
+  return {
+    status: outcome.status,
+    durationMs,
+    log: chunks.join(""),
+    events,
+    browserSessions: sessions.map((s) => s.record),
+    terminalSessions: terminalSessions.filter((s) => referencedSessions.has(s.sessionId)),
+    serviceLogs,
+    error: outcome.error,
+  };
+}
+// ────────────────────────────────────────────────────────────────────────
+// Ad-hoc eval (manual testing surface — REPL-like)
+// ────────────────────────────────────────────────────────────────────────
+interface EvalResult {
+  ok: boolean;
+  durationMs: number;
+  log: string;
+  /** JSON-safe serialization of the module's default export. */
+  result?: unknown;
+  /** npm packages auto-installed for this snippet (empty if none). */
+  installed: string[];
+  /** rrweb sessions for any Browser opened during the eval. */
+  browserSessions: BrowserSessionRecord[];
+  /** asciicast sessions for any ctx.terminal() call during the eval. */
+  terminalSessions: TerminalSessionRecord[];
+  error?: { message: string; stack?: string };
+}
+const EVAL_DIR = path.join(APP_DIR, ".spectest-eval");
+// Persistent state across eval calls. Mutated by snippets via the
+// `state` global; survives until the daemon process restarts.
+const EVAL_STATE: Record<string, unknown> = {};
+// Transpiler instance reused for `scanImports`. We don't transpile the
+// user code — Bun runs the .ts file directly — but scanImports gives us
+// the imports so we can auto-install missing deps.
+const SCAN_TRANSPILER = new Bun.Transpiler({ loader: "ts" });
+function safeSerialize(v: unknown): unknown {
+  if (v === undefined) return undefined;
+  try {
+    return JSON.parse(JSON.stringify(v));
+  } catch {
+    return String(v);
+  }
+}
+/** Top-level package name from an import specifier. */
+function packageName(spec: string): string {
+  if (spec.startsWith("@")) {
+    return spec.split("/").slice(0, 2).join("/");
+  }
+  return spec.split("/")[0];
+}
+/**
+ * Scan the snippet's imports and `bun add` anything that doesn't already
+ * resolve. Skips relative paths, absolute paths, `node:`/`bun:` built-ins,
+ * and HTTP(S)/file: URLs.
+ */
+async function ensureDeps(code: string): Promise<string[]> {
+  let scanned: { path: string; kind?: string }[];
+  try {
+    scanned = SCAN_TRANSPILER.scanImports(code) as { path: string; kind?: string }[];
+  } catch {
+    // Invalid syntax — let the import call surface the real error.
+    return [];
+  }
+  const seen = new Set<string>();
+  const missing: string[] = [];
+  for (const imp of scanned) {
+    const p = imp.path;
+    if (
+      p.startsWith(".") ||
+      p.startsWith("/") ||
+      p.startsWith("node:") ||
+      p.startsWith("bun:") ||
+      p.startsWith("http:") ||
+      p.startsWith("https:") ||
+      p.startsWith("file:")
+    ) {
+      continue;
+    }
+    const pkg = packageName(p);
+    if (seen.has(pkg)) continue;
+    seen.add(pkg);
+    try {
+      Bun.resolveSync(p, APP_DIR);
+    } catch {
+      missing.push(pkg);
+    }
+  }
+  if (missing.length === 0) return [];
+  await new Promise<void>((resolve, reject) => {
+    execFile(
+      "/usr/local/bin/bun",
+      ["add", ...missing],
+      { cwd: APP_DIR, maxBuffer: 16 * 1024 * 1024 },
+      (err, stdout, stderr) => {
+        if (err) {
+          reject(
+            new Error(
+              `bun add ${missing.join(" ")} failed:\n${String(stderr).trim()}\n${String(stdout).trim()}`,
+            ),
+          );
+        } else {
+          resolve();
+        }
+      },
+    );
+  });
+  return missing;
+}
+async function evalCode(
+  code: string,
+  secrets?: Record<string, string>,
+): Promise<EvalResult> {
+  const start = Date.now();
+  // Eval-scoped secret channel for record-mode fakes — set before the
+  // snippet runs, cleared in the `finally` below so a secret never
+  // persists into daemon memory (and thus into a forkable snapshot) past
+  // the eval that supplied it. See record-secrets.ts.
+  setRecordSecrets(secrets);
+  const chunks: string[] = [];
+  const origStdout = process.stdout.write.bind(process.stdout);
+  const origStderr = process.stderr.write.bind(process.stderr);
+  const capture = (s: unknown): boolean => {
+    chunks.push(typeof s === "string" ? s : Buffer.from(s as Uint8Array).toString("utf8"));
+    return true;
+  };
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  (process.stdout as any).write = capture;
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  (process.stderr as any).write = capture;
+  // Bun's console.* bypasses process.stdout.write — tee it too.
+  const restoreConsole = captureConsole(chunks);
+  // Wrap fetch for the snippet's duration so `ctx.fetch` returns a wrapped
+  // Response just like in a test (no recorder here, so no provenance — but the
+  // wrapped type is honest at runtime). Restored in the `finally` below.
+  const restoreFetch = installFetchWrapper();
+  const openBrowsers: Browser[] = [];
+  const sessions: Array<ReturnType<typeof newBrowserSession>> = [];
+  const trackedOpenBrowser = async (opts?: BrowserOptions): Promise<Browser> => {
+    const session = newBrowserSession(start, "eval");
+    sessions.push(session);
+    const b = await openBrowser({ ...(opts ?? {}), recorder: session.recorder });
+    openBrowsers.push(b);
+    return b;
+  };
+  // Terminal sessions — same shape as runOne, but eval has no active
+  // recorder so we don't emit inline events; the asciicast frames
+  // still ship back on EvalResult.terminalSessions and the web UI
+  // renders the player.
+  const terminalSessions: TerminalSessionRecord[] = [];
+  const evalTerminal = async (
+    service: string,
+    command: string,
+    opts?: TerminalOpts,
+  ): Promise<TerminalResult> => {
+    const timeoutMs = opts?.timeoutMs ?? DEFAULT_TEST_TIMEOUT_MS;
+    const startedAt = Date.now();
+    const term = await openInstrumentedTerminal(
+      service,
+      { ...opts, command, timeoutMs },
+      start,
+      terminalSessions,
+      false,
+      "eval",
+    );
+    const { exitCode } = (await term.exited).unwrap();
+    await term.close();
+    return {
+      output: term.rawOutput(),
+      exitCode,
+      durationMs: Date.now() - startedAt,
+      sessionId: term.sessionId,
+    };
+  };
+  const evalOpenTerminal = async (
+    service: string,
+    opts?: TerminalOpts,
+  ): Promise<Terminal> => {
+    return openInstrumentedTerminal(
+      service,
+      opts,
+      start,
+      terminalSessions,
+      false,
+      "eval",
+    );
+  };
+  // Convenience handles are best-effort for eval — if the project isn't
+  // loaded yet, fall back to an empty map so quick `await fetch(...)`
+  // snippets don't require a /load round-trip first.
+  const svc: ServiceHandles = loaded
+    ? await buildServiceHandles(loaded.project.environment)
+    : {};
+  const fakes = loaded ? await buildFakeHandles() : {};
+  const ctx: TestContext<undefined> = {
+    // installFetchWrapper swapped globalThis.fetch above, so this captures the
+    // wrapped version — eval results are wrapped just like in a test.
+    fetch: globalThis.fetch as unknown as SpectestFetch,
+    // Wraps its result the same way (no recorder under eval, so no provenance —
+    // but the wrapped type is honest at runtime, so `.unwrap()` works).
+    exec: execInServiceWrapped as unknown as TestContext<undefined>["exec"],
+    terminal: evalTerminal as unknown as TestContext<undefined>["terminal"],
+    openTerminal: evalOpenTerminal,
+    browser: trackedOpenBrowser,
+    testName: "eval",
+    parent: undefined,
+    svc,
+    fakes,
+    poll: pollCall as unknown as TestContext<undefined>["poll"],
+    dnsName: registerDnsName,
+    startService: startRuntimeService,
+    stopService: stopRuntimeService,
+  };
+  // Expose the test context, matchers, and persistent state as globals
+  // so the snippet can use them without an explicit import. The user code
+  // is real ESM, so `import { Client } from "pg"` and top-level `await`
+  // work natively.
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  const g = globalThis as any;
+  g.ctx = ctx;
+  g.expect = expect;
+  g.expectRaw = expectRaw;
+  g.assert = assert;
+  g.state = EVAL_STATE;
+  let installed: string[] = [];
+  let filePath: string | undefined;
+  let outcome:
+    | { ok: true; result?: unknown }
+    | { ok: false; error: EvalResult["error"] };
+  try {
+    installed = await ensureDeps(code);
+    await fs.mkdir(EVAL_DIR, { recursive: true });
+    filePath = path.join(EVAL_DIR, `${randomUUID()}.ts`);
+    await fs.writeFile(filePath, code);
+    const mod = (await import(pathToFileURL(filePath).href)) as { default?: unknown };
+    outcome = { ok: true, result: safeSerialize(mod.default) };
+  } catch (err) {
+    const e = err as Error;
+    outcome = {
+      ok: false,
+      error: { message: e.message ?? String(err), stack: e.stack },
+    };
+  } finally {
+    clearRecordSecrets();
+    restoreFetch();
+    restoreConsole();
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    (process.stdout as any).write = origStdout;
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    (process.stderr as any).write = origStderr;
+    for (const b of openBrowsers) {
+      try {
+        await b.close();
+      } catch {
+        /* ignore */
+      }
+    }
+    for (const s of sessions) s.markClosed();
+    if (filePath) {
+      fs.unlink(filePath).catch(() => {
+        /* best-effort cleanup */
+      });
+    }
+  }
+  return outcome.ok
+    ? {
+        ok: true,
+        durationMs: Date.now() - start,
+        log: chunks.join(""),
+        installed,
+        result: outcome.result,
+        browserSessions: sessions.map((s) => s.record),
+        terminalSessions,
+      }
+    : {
+        ok: false,
+        durationMs: Date.now() - start,
+        log: chunks.join(""),
+        installed,
+        browserSessions: sessions.map((s) => s.record),
+        terminalSessions,
+        error: outcome.error,
+      };
+}
+// ────────────────────────────────────────────────────────────────────────
+// HTTP server
+// ────────────────────────────────────────────────────────────────────────
+async function readBody(req: http.IncomingMessage): Promise<string> {
+  return new Promise((resolve, reject) => {
+    const chunks: Buffer[] = [];
+    req.on("data", (c) => chunks.push(c));
+    req.on("end", () => resolve(Buffer.concat(chunks).toString("utf8")));
+    req.on("error", reject);
+  });
+}
+function jsonResponse(res: http.ServerResponse, status: number, body: unknown): void {
+  const payload = Buffer.from(JSON.stringify(body));
+  res.writeHead(status, {
+    "content-type": "application/json",
+    "content-length": payload.length,
+  });
+  res.end(payload);
+}
+interface RouteState {
+  inFlightTest: Promise<unknown> | null;
+  inFlightBootstrap: Promise<unknown> | null;
+  inFlightProjectSetup: Promise<unknown> | null;
+}
+async function handle(req: http.IncomingMessage, res: http.ServerResponse, state: RouteState): Promise<void> {
+  const url = req.url ?? "";
+  const method = req.method ?? "GET";
+  if (method === "GET" && url === "/health") {
+    res.writeHead(200, { "content-type": "text/plain" });
+    res.end("ok\n");
+    return;
+  }
+  if (method === "GET" && url === "/progress") {
+    // Live bootstrap progress, polled by the control plane during
+    // /bootstrap and streamed into the test-run row. `{}` before the
+    // first bootstrap() of this daemon.
+    jsonResponse(res, 200, BOOTSTRAP_PROGRESS ?? {});
+    return;
+  }
+  if (method === "POST" && url === "/load") {
+    // Env only — services/fakes/setup. For the legacy single-file layout the
+    // entry also defines the tests, so `cases` is populated here; for the
+    // split layout `cases` is empty until /load-tests runs.
+    const proj = await loadEnv();
+    jsonResponse(res, 200, {
+      environment: proj.environment,
+      cases: casesMetadata(proj.tests),
+    });
+    return;
+  }
+  if (method === "POST" && url === "/load-tests") {
+    // Import spectest/tests/** into the already-loaded env and return the
+    // resulting catalogue. Called after the warm snapshot (cold path) or
+    // against a freshly restored VM (warm path).
+    await loadTests();
+    const l = requireLoaded();
+    jsonResponse(res, 200, {
+      environment: l.project.environment,
+      cases: casesMetadata(l.project.tests),
+    });
+    return;
+  }
+  if (method === "POST" && url === "/unload") {
+    loaded = null;
+    jsonResponse(res, 200, { unloaded: true });
+    return;
+  }
+  if (method === "POST" && url === "/reload") {
+    // Full reload (debug aid): re-import the env, then the tests.
+    await loadEnv();
+    await loadTests();
+    const l = requireLoaded();
+    jsonResponse(res, 200, {
+      environment: l.project.environment,
+      cases: casesMetadata(l.project.tests),
+    });
+    return;
+  }
+  if (method === "GET" && url === "/env-config") {
+    const l = requireLoaded();
+    jsonResponse(res, 200, l.project.environment);
+    return;
+  }
+  if (method === "GET" && url === "/cases") {
+    const l = requireLoaded();
+    jsonResponse(res, 200, { cases: casesMetadata(l.project.tests) });
+    return;
+  }
+  if (method === "GET" && url === "/record-secret-refs") {
+    // Union of platform secret refs the loaded fakes declare (replayFake's
+    // `secretRefs`). The control plane resolves these server-side and
+    // pushes the values on the eval path only. Empty if nothing's loaded.
+    const refs = new Set<string>();
+    for (const fake of FAKES.values()) {
+      for (const ref of fake.def.secretRefs ?? []) refs.add(ref);
+    }
+    jsonResponse(res, 200, { refs: [...refs] });
+    return;
+  }
+  if (method === "POST" && url === "/bootstrap") {
+    if (state.inFlightBootstrap) {
+      jsonResponse(res, 409, { error: "bootstrap already in progress" });
+      return;
+    }
+    const job = bootstrap();
+    state.inFlightBootstrap = job;
+    try {
+      const timings = await job;
+      jsonResponse(res, 200, { ok: true, timings });
+    } finally {
+      state.inFlightBootstrap = null;
+    }
+    return;
+  }
+  if (method === "POST" && url === "/project-setup") {
+    if (state.inFlightProjectSetup) {
+      jsonResponse(res, 409, { error: "project-setup already in progress" });
+      return;
+    }
+    const job = runProjectSetup();
+    state.inFlightProjectSetup = job;
+    try {
+      const result = await job;
+      jsonResponse(res, 200, result);
+    } finally {
+      state.inFlightProjectSetup = null;
+    }
+    return;
+  }
+  if (method === "POST" && url === "/eval") {
+    if (state.inFlightTest) {
+      jsonResponse(res, 409, { error: "a test or eval is already running" });
+      return;
+    }
+    const body = await readBody(req);
+    let parsed: { code?: string; secrets?: Record<string, string> };
+    try {
+      parsed = JSON.parse(body || "{}");
+    } catch {
+      jsonResponse(res, 400, { error: "invalid JSON body" });
+      return;
+    }
+    const code = parsed.code;
+    if (typeof code !== "string" || code.length === 0) {
+      jsonResponse(res, 400, { error: "code (string) is required" });
+      return;
+    }
+    // `secrets` are eval-scoped: the control plane resolves the loaded
+    // project's declared `replayFake` refs and pushes the values here on
+    // the eval path only. Never present on the /run (test) path.
+    const exec = evalCode(code, parsed.secrets);
+    state.inFlightTest = exec;
+    try {
+      const result = await exec;
+      jsonResponse(res, 200, result);
+    } finally {
+      state.inFlightTest = null;
+    }
+    return;
+  }
+  if (method === "POST" && url === "/run") {
+    if (state.inFlightTest) {
+      jsonResponse(res, 409, { error: "another test is already running" });
+      return;
+    }
+    const body = await readBody(req);
+    let parsed: { caseId?: string };
+    try {
+      parsed = JSON.parse(body || "{}");
+    } catch {
+      jsonResponse(res, 400, { error: "invalid JSON body" });
+      return;
+    }
+    const caseId = parsed.caseId;
+    if (!caseId) {
+      jsonResponse(res, 400, { error: "caseId is required" });
+      return;
+    }
+    const l = requireLoaded();
+    const tc = l.byId.get(caseId);
+    if (!tc) {
+      jsonResponse(res, 404, { error: `unknown caseId: ${caseId}` });
+      return;
+    }
+    const exec = runOne(tc);
+    state.inFlightTest = exec;
+    try {
+      const result = await exec;
+      jsonResponse(res, 200, result);
+    } finally {
+      state.inFlightTest = null;
+    }
+    return;
+  }
+  jsonResponse(res, 404, { error: "not found" });
+}
+async function main(): Promise<void> {
+  const state: RouteState = {
+    inFlightTest: null,
+    inFlightBootstrap: null,
+    inFlightProjectSetup: null,
+  };
+  const server = http.createServer((req, res) => {
+    handle(req, res, state).catch((err) => {
+      const e = err as Error;
+      try {
+        jsonResponse(res, 500, { error: e.message ?? String(err), stack: e.stack });
+      } catch {
+        // headers already sent or socket dead
+      }
+    });
+  });
+  const port = Number(process.env.SPECTEST_DAEMON_PORT ?? DEFAULT_PORT);
+  server.listen(port, "0.0.0.0", () => {
+    // eslint-disable-next-line no-console
+    console.log(`spectest-daemon listening on :${port} (idle; awaiting POST /load)`);
+  });
+}
+main().catch((err) => {
+  // eslint-disable-next-line no-console
+  console.error("spectest-daemon: fatal:", err);
+  process.exit(1);
+});