npm - @checkstack/backend-api - Versions diffs - 0.19.0 → 0.21.0 - Mend

@checkstack/backend-api 0.19.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/CHANGELOG.md +205 -0
package/package.json +12 -11
package/src/advisory-lock-pool.it.test.ts +282 -0
package/src/advisory-lock.test.ts +144 -3
package/src/advisory-lock.ts +97 -55
package/src/auth-strategy.ts +6 -3
package/src/bearer-token.ts +13 -0
package/src/collector-strategy.ts +9 -0
package/src/config-versioning.test.ts +227 -0
package/src/config-versioning.ts +172 -0
package/src/core-services.ts +14 -0
package/src/esm-script-runner.test.ts +55 -16
package/src/esm-script-runner.ts +212 -55
package/src/index.ts +3 -0
package/src/render-templatable-config.test.ts +168 -0
package/src/render-templatable-config.ts +193 -0
package/src/schema-utils.ts +3 -0
package/src/script-sandbox/capabilities.test.ts +122 -0
package/src/script-sandbox/capabilities.ts +372 -0
package/src/script-sandbox/capped-output.test.ts +116 -0
package/src/script-sandbox/capped-output.ts +172 -0
package/src/script-sandbox/env-guard.test.ts +105 -0
package/src/script-sandbox/env-guard.ts +129 -0
package/src/script-sandbox/filesystem.test.ts +437 -0
package/src/script-sandbox/filesystem.ts +514 -0
package/src/script-sandbox/forkbomb.it.test.ts +121 -0
package/src/script-sandbox/global-default.test.ts +161 -0
package/src/script-sandbox/global-default.ts +100 -0
package/src/script-sandbox/index.ts +14 -0
package/src/script-sandbox/network.test.ts +356 -0
package/src/script-sandbox/network.ts +373 -0
package/src/script-sandbox/observability.test.ts +210 -0
package/src/script-sandbox/observability.ts +168 -0
package/src/script-sandbox/output-truncation.test.ts +53 -0
package/src/script-sandbox/output-truncation.ts +69 -0
package/src/script-sandbox/policy.test.ts +189 -0
package/src/script-sandbox/policy.ts +220 -0
package/src/script-sandbox/provider.test.ts +61 -0
package/src/script-sandbox/provider.ts +134 -0
package/src/script-sandbox/readiness.test.ts +80 -0
package/src/script-sandbox/readiness.ts +117 -0
package/src/script-sandbox/report.ts +88 -0
package/src/script-sandbox/rootless-egress.it.test.ts +86 -0
package/src/script-sandbox/rootless-egress.test.ts +99 -0
package/src/script-sandbox/rootless-egress.ts +218 -0
package/src/script-sandbox/shell-quote.test.ts +32 -0
package/src/script-sandbox/shell-quote.ts +10 -0
package/src/script-sandbox/wrapper.test.ts +1194 -0
package/src/script-sandbox/wrapper.ts +714 -0
package/src/shell-script-runner.test.ts +243 -0
package/src/shell-script-runner.ts +210 -45
package/src/zod-config.test.ts +60 -0
package/src/zod-config.ts +38 -14
package/tsconfig.json +3 -0

package/src/shell-script-runner.test.ts ADDED Viewed

@@ -0,0 +1,243 @@
+import { afterEach, describe, expect, it } from "bun:test";
+import { defaultShellScriptRunner } from "./shell-script-runner";
+import {
+  registerSandboxPolicyProvider,
+  resetSandboxPolicyProvider,
+} from "./script-sandbox/provider";
+import {
+  resolveDefaultSandboxProfile,
+  sandboxPolicySchema,
+  type SandboxPolicyInput,
+} from "./script-sandbox/policy";
+/** Register a one-shot provider returning the given (partial) policy. */
+function withPolicy(input: SandboxPolicyInput): void {
+  const policy = sandboxPolicySchema.parse(input);
+  registerSandboxPolicyProvider(async () => policy);
+}
+/**
+ * Register the shipped default profile but pinned to `onUnavailable: "degrade"`.
+ *
+ * The shipped default is now fail-closed (`onUnavailable: "fail"`): on a
+ * capability-poor CI/dev host (non-root macOS, no bwrap/prlimit) it refuses
+ * EVERY spawn. The runner-BEHAVIOR tests below (an ordinary echo runs, the env
+ * denylist is applied, layers degrade-and-surface) need a deterministic spawn
+ * on any host, so they use the `degrade` variant. The fail-closed default value
+ * is asserted in `policy.test.ts`; the fail-closed RUNTIME refusal is asserted
+ * by the dedicated "fails cleanly (no spawn)" test below.
+ */
+function withDefaultDegradePolicy(): void {
+  const policy = sandboxPolicySchema.parse({
+    ...resolveDefaultSandboxProfile(),
+    onUnavailable: "degrade",
+  });
+  registerSandboxPolicyProvider(async () => policy);
+}
+describe("defaultShellScriptRunner — sandbox default profile", () => {
+  afterEach(() => {
+    resetSandboxPolicyProvider();
+  });
+  it("applies the provider's policy (the shipped default) and surfaces a report", async () => {
+    withDefaultDegradePolicy();
+    const result = await defaultShellScriptRunner.run({
+      script: "echo hi",
+      timeoutMs: 5000,
+    });
+    // Default-on must NOT break the common case: an ordinary echo still runs.
+    // On a host lacking the strong primitives (the typical CI/dev box) the FS
+    // and metadata-block layers degrade-and-surface — never hard-break.
+    expect(result.exitCode).toBe(0);
+    expect(result.stdout).toBe("hi");
+    expect(result.sandbox).toBeDefined();
+    // The shipped default profile is the base: resource caps, FS confinement,
+    // a secure-by-default egress posture, and a privilege drop are all
+    // REQUESTED; the report carries the requested policy regardless of host.
+    expect(result.sandbox?.requested.enabled).toBe(true);
+    expect(result.sandbox?.requested.resources.cpuSeconds).toBe(60);
+    expect(result.sandbox?.requested.filesystem.mode).toBe("scratch-plus-ro");
+    // Secure-by-default: egress is denied via an empty allowlist, NOT
+    // unrestricted.
+    expect(result.sandbox?.requested.network.mode).toBe("allowlist");
+    expect(result.sandbox?.requested.network.allow).toEqual([]);
+    expect(result.sandbox?.requested.network.denyLinkLocalAndMetadata).toBe(
+      true,
+    );
+    expect(result.sandbox?.requested.privilege.mode).toBe("drop-to-uid");
+  });
+  it("enforces the provider's policy (network deny is requested)", async () => {
+    withPolicy({ network: { mode: "deny" } });
+    const result = await defaultShellScriptRunner.run({
+      script: "echo enforced",
+      timeoutMs: 5000,
+    });
+    expect(result.stdout).toBe("enforced");
+    expect(result.sandbox?.requested.network.mode).toBe("deny");
+  });
+  it("fails closed (most restrictive policy) when no provider is registered", async () => {
+    resetSandboxPolicyProvider();
+    const result = await defaultShellScriptRunner.run({
+      script: "echo closed",
+      timeoutMs: 5000,
+    });
+    // Still runs the simple case, but under the fail-closed policy: deny
+    // egress, scratch + read-only packages, privilege drop, and a surfaced
+    // notice.
+    expect(result.stdout).toBe("closed");
+    expect(result.sandbox?.requested.network.mode).toBe("deny");
+    expect(result.sandbox?.requested.filesystem.mode).toBe("scratch-plus-ro");
+    const reasons = result.sandbox?.downgrades.map((d) => d.reason) ?? [];
+    expect(
+      reasons.some((r) => r.includes("no global sandbox policy provider")),
+    ).toBe(true);
+  });
+  it("degrades, never hard-breaks, on a host lacking the strong primitives", async () => {
+    withDefaultDegradePolicy();
+    // Default profile requests FS confinement; on a CI/dev box with no
+    // namespace wrapper the FS layer (and the metadata block) cannot be
+    // enforced. The guarantee: the run STILL succeeds and the report surfaces
+    // every dropped layer — it does not refuse to run.
+    const hasWrapper =
+      process.platform === "linux" &&
+      Bun.spawnSync(["sh", "-c", "command -v bwrap || command -v nsjail"])
+        .exitCode === 0;
+    const result = await defaultShellScriptRunner.run({
+      script: "echo ok",
+      timeoutMs: 5000,
+    });
+    expect(result.exitCode).toBe(0);
+    expect(result.stdout).toBe("ok");
+    if (!hasWrapper) {
+      // FS confinement degraded (no wrapper) — surfaced, not silently dropped.
+      const layers = result.sandbox?.downgrades.map((d) => d.layer) ?? [];
+      expect(layers).toContain("filesystem");
+      expect(result.sandbox?.enforced.filesystem).toBe(false);
+    }
+  });
+  it("opts out globally with { enabled: false } and runs exactly as before", async () => {
+    // The documented GLOBAL opt-out: no caps, no denylist, no confinement.
+    withPolicy({ enabled: false });
+    const result = await defaultShellScriptRunner.run({
+      script: "echo out",
+      timeoutMs: 5000,
+    });
+    expect(result.exitCode).toBe(0);
+    expect(result.stdout).toBe("out");
+    expect(result.sandbox?.enforced.resources).toBe(false);
+    expect(result.sandbox?.enforced.filesystem).toBe(false);
+  });
+  it("drops a forbidden env override (LD_PRELOAD) when enabled", async () => {
+    withDefaultDegradePolicy();
+    const result = await defaultShellScriptRunner.run({
+      script: "echo \"LD=$LD_PRELOAD\"",
+      timeoutMs: 5000,
+      env: { LD_PRELOAD: "/evil.so" },
+    });
+    expect(result.exitCode).toBe(0);
+    // The child never received LD_PRELOAD.
+    expect(result.stdout).toBe("LD=");
+    expect(result.sandbox?.downgrades).toBeDefined();
+  });
+  it("opts out with { enabled: false } — forbidden keys pass through (back-compat)", async () => {
+    withPolicy({ enabled: false });
+    const result = await defaultShellScriptRunner.run({
+      script: "echo \"LD=$LD_PRELOAD\"",
+      timeoutMs: 5000,
+      env: { LD_PRELOAD: "/passthrough.so" },
+    });
+    expect(result.exitCode).toBe(0);
+    expect(result.stdout).toBe("LD=/passthrough.so");
+  });
+  it("does not leak unrelated process env (existing security guarantee)", async () => {
+    withDefaultDegradePolicy();
+    process.env.SHELL_RUNNER_SECRET = "DO_NOT_LEAK";
+    const result = await defaultShellScriptRunner.run({
+      script: "env",
+      timeoutMs: 5000,
+    });
+    delete process.env.SHELL_RUNNER_SECRET;
+    expect(result.stdout).not.toContain("DO_NOT_LEAK");
+  });
+  it("truncates output and flags it when over maxOutputBytes", async () => {
+    withPolicy({ resources: { maxOutputBytes: 200 } });
+    const result = await defaultShellScriptRunner.run({
+      // ~2000 bytes of output
+      script: "for i in $(seq 1 100); do echo 'xxxxxxxxxxxxxxxxxxxx'; done",
+      timeoutMs: 5000,
+    });
+    expect(result.outputTruncated).toBe(true);
+    expect(Buffer.byteLength(result.stdout)).toBeLessThanOrEqual(200);
+  });
+  it("kills a flooding child once maxOutputBytes is hit (bounded buffering, no OOM)", async () => {
+    // `yes` emits effectively unbounded output. With a tiny cap the runner must
+    // stream-count, hit the cap, KILL the child, and flag truncation — instead
+    // of buffering gigabytes first. If the OOM-safe streaming path regressed,
+    // this would hang until the wall-clock timeout (and/or balloon memory).
+    withPolicy({ resources: { maxOutputBytes: 4096 } });
+    const start = Date.now();
+    const result = await defaultShellScriptRunner.run({
+      script: "yes xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
+      timeoutMs: 10_000,
+    });
+    const elapsed = Date.now() - start;
+    expect(result.outputTruncated).toBe(true);
+    expect(Buffer.byteLength(result.stdout)).toBeLessThanOrEqual(4096);
+    // Should finish promptly via the kill, NOT ride out the 10s timeout.
+    expect(elapsed).toBeLessThan(8000);
+    expect(result.timedOut).toBe(false);
+  });
+  it("fails cleanly (no spawn) when onUnavailable:fail and a layer is unenforceable", async () => {
+    // On a host lacking the namespace wrapper (the typical non-Linux dev/CI
+    // box), the filesystem + network namespace layers cannot be enforced, so a
+    // fail-closed (onUnavailable:fail) policy must refuse without spawning.
+    // (Privilege is now enforced by non-root inheritance and is NOT the failing
+    // layer; FS/network are.)
+    const isRoot = process.getuid?.() === 0;
+    if (isRoot && process.platform === "linux") {
+      // On a capable host this might enforce; skip the negative assertion.
+      return;
+    }
+    withPolicy({
+      onUnavailable: "fail",
+      filesystem: { mode: "scratch-only" },
+      privilege: { mode: "drop-to-uid", uid: 1001 },
+    });
+    const result = await defaultShellScriptRunner.run({
+      script: "echo should-not-run",
+      timeoutMs: 5000,
+    });
+    expect(result.exitCode).toBe(-1);
+    expect(result.stdout).toBe("");
+    expect(result.stderr).toContain("sandbox unavailable");
+  });
+  it("scratch dir is writable by the run's effective identity (ownership)", async () => {
+    // The reviewer flagged: is the per-run scratch dir writable by the run's
+    // effective identity? Under the non-root supervisor it trivially is - the
+    // supervisor (uid 65532) does the mkdtemp and the script inherits that uid,
+    // so it owns the dir. We prove it end-to-end: a shell run (CWD = the per-run
+    // scratch dir on the unconfined path) writes a file into its CWD and reads
+    // it back. A write failure (EACCES) would surface as a non-zero exit.
+    withDefaultDegradePolicy();
+    const result = await defaultShellScriptRunner.run({
+      // Write into the CWD (the per-run scratch dir), then read it back.
+      script: 'echo owned > probe.txt && cat probe.txt',
+      timeoutMs: 5000,
+    });
+    expect(result.exitCode).toBe(0);
+    expect(result.stdout).toBe("owned");
+  });
+});

package/src/shell-script-runner.ts CHANGED Viewed

@@ -1,4 +1,21 @@
 import { spawn, type Subprocess } from "bun";
+import { mkdtemp, rm, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import path from "node:path";
+import { detectSandboxCapabilities } from "./script-sandbox/capabilities";
+import { readCappedOutput } from "./script-sandbox/capped-output";
+import { pickSafeEnv } from "./script-sandbox/env-guard";
+import { buildNetworkLayer } from "./script-sandbox/network";
+import { truncateCapturedOutput } from "./script-sandbox/output-truncation";
+import {
+  FAIL_CLOSED_DOWNGRADE_REASON,
+  resolveActiveSandboxPolicy,
+} from "./script-sandbox/provider";
+import {
+  type EffectiveSandbox,
+  SandboxUnavailableError,
+} from "./script-sandbox/report";
+import { buildSpawnHardening } from "./script-sandbox/wrapper";
 /**
  * Shared sandbox for executing user-authored shell scripts through
@@ -37,6 +54,14 @@ export interface ShellScriptRunResult {
   stderr: string;
   /** True if the timeout fired before the subprocess exited. */
   timedOut: boolean;
+  /** True if captured output exceeded the sandbox `maxOutputBytes` cap and was trimmed. */
+  outputTruncated?: boolean;
+  /**
+   * What the OS-level sandbox actually enforced / degraded for this run.
+   * Always present: the runner resolves the active GLOBAL policy itself and
+   * reports the result so callers can surface downgrades.
+   */
+  sandbox?: EffectiveSandbox;
 }
 export interface ShellScriptRunOptions {
@@ -56,6 +81,10 @@ export interface ShellScriptRunOptions {
    * layer we accept whatever the caller passes, because the legitimate
    * use cases (e.g. integration shell scripts injecting `PAYLOAD_*`
    * vars) vary too much.
+   *
+   * Note: forbidden keys (`LD_PRELOAD`, `NODE_OPTIONS`, `PATH`-override, ...)
+   * are dropped from these overrides by the shared env denylist whenever the
+   * active sandbox policy is enabled.
    */
   env?: Record<string, string>;
 }
@@ -69,39 +98,6 @@ export interface ShellScriptRunner {
   run(options: ShellScriptRunOptions): Promise<ShellScriptRunResult>;
 }
-// =============================================================================
-// INTERNALS
-// =============================================================================
-/**
- * Vars passed through to the subprocess. We intentionally do NOT
- * forward the satellite's full env so backend secrets (DB URLs, API
- * tokens, signing keys) never reach user-authored scripts.
- */
-const SAFE_ENV_VARS = [
-  "PATH",
-  "HOME",
-  "USER",
-  "LANG",
-  "LC_ALL",
-  "LC_CTYPE",
-  "TZ",
-  "TMPDIR",
-  "HOSTNAME",
-  "SHELL",
-];
-function pickSafeEnv(): Record<string, string> {
-  const env: Record<string, string> = {};
-  for (const key of SAFE_ENV_VARS) {
-    const value = process.env[key];
-    if (value !== undefined) {
-      env[key] = value;
-    }
-  }
-  return env;
-}
 // =============================================================================
 // DEFAULT RUNNER
 // =============================================================================
@@ -112,6 +108,129 @@ function pickSafeEnv(): Record<string, string> {
  */
 export const defaultShellScriptRunner: ShellScriptRunner = {
   async run({ script, timeoutMs, cwd, env }) {
+    // Per-run dir for staging the network egress nftables ruleset, created
+    // lazily only if the resolved policy actually produces one (so an ordinary
+    // shell run pays no extra I/O). Cleaned up in `finally`.
+    let nftDir: string | undefined;
+    // Per-run writable scratch dir. Required to ENGAGE the namespace wrapper
+    // (bwrap/nsjail): the wrapper is what delivers filesystem confinement, the
+    // network namespace, AND the privilege drop (`--uid`, the only mechanism
+    // that actually drops since Bun.spawn ignores uid/gid). Without a scratch
+    // dir the FS layer degrades, the wrapper never engages, and under the
+    // secure fail-closed default the run would be REFUSED. Created for every
+    // run; cleaned up in `finally`. The shell script's CWD is this dir (unless
+    // the caller pinned a `cwd`), so `mktemp`-style writes land in confinement.
+    let scratchDir: string | undefined;
+    // Reconcile the requested policy against this host's capabilities BEFORE
+    // spawning. `buildSpawnHardening` is pure + synchronous (capability
+    // detection is cached per-process), so no `await` is introduced here. When
+    // `onUnavailable: "fail"` and a layer is unavailable it throws, and we
+    // return a clean failure WITHOUT spawning an unsandboxed child.
+    const caps = detectSandboxCapabilities();
+    // Resolve the GLOBAL sandbox policy ourselves (policy is global-only; the
+    // runner no longer accepts a per-run override). With a provider wired at
+    // startup this is the durable cluster-wide default; with NO provider (or a
+    // provider that throws) it FAILS CLOSED to the most restrictive safe policy
+    // (deny egress, scratch + read-only packages, privilege drop) — never the
+    // permissive default. The fail-closed fallback is surfaced as a synthetic
+    // downgrade so callers can see it. On hosts lacking a primitive each layer
+    // degrades-and-surfaces (never hard-breaks) per the resolved
+    // `onUnavailable`.
+    const { policy, failedClosed } = await resolveActiveSandboxPolicy();
+    // Resolve the network decision up front (pure) to learn whether an nftables
+    // ruleset must be staged on disk. Avoids creating a temp dir for the common
+    // no-network run, and lets a fail-closed allowlist still build correctly
+    // (the ruleset is staged BEFORE the hardening build that fails-closed).
+    const netDecision = buildNetworkLayer({ policy: policy.network, caps });
+    let nftRulesetPath: string | undefined;
+    let rootlessLauncherPath: string | undefined;
+    let hardening;
+    try {
+      if (
+        netDecision.kind === "namespaced" &&
+        netDecision.nftRuleset !== undefined
+      ) {
+        nftDir = await mkdtemp(path.join(tmpdir(), "checkstack-egress-"));
+        nftRulesetPath = path.join(nftDir, "egress.nft");
+        // The rootless slirp4netns path additionally needs a launcher script
+        // staged alongside the ruleset (the orchestration is not a plain argv
+        // prelude). Same temp dir.
+        if (netDecision.egressPath === "rootless") {
+          rootlessLauncherPath = path.join(nftDir, "rootless-egress.sh");
+        }
+      }
+      // Stage a per-run scratch dir so the FS/network/privilege wrapper can
+      // engage (see the `scratchDir` declaration). Only needed when the sandbox
+      // is enabled; a disabled policy runs unwrapped exactly as before.
+      if (policy.enabled) {
+        scratchDir = await mkdtemp(path.join(tmpdir(), "checkstack-shell-"));
+      }
+      hardening = buildSpawnHardening({
+        policy,
+        caps,
+        baseEnv: pickSafeEnv(),
+        envOverrides: env,
+        ...(scratchDir === undefined ? {} : { filesystem: { scratchDir } }),
+        nftRulesetPath,
+        rootlessLauncherPath,
+        // Shell scripts exec `sh -c`, which IGNORES NODE_OPTIONS, so the per-run
+        // JS-heap memory cap is NOT applied here. Leaving this false makes the
+        // hardening builder surface an honest, non-fatal memory note (the
+        // ceiling is the container cgroup) rather than implying a per-run
+        // guarantee. See the shell-memory honesty note in wrapper.ts.
+        appliesNodeMemoryCap: false,
+      });
+      // Surface the fail-closed fallback as a notice in the report so a
+      // missing/failed policy provider is never silent (the run still proceeds
+      // under the most restrictive policy).
+      if (failedClosed) {
+        hardening.effective.downgrades.push({
+          layer: "network",
+          reason: FAIL_CLOSED_DOWNGRADE_REASON,
+        });
+      }
+      if (hardening.nftRuleset !== undefined && nftRulesetPath !== undefined) {
+        await writeFile(nftRulesetPath, hardening.nftRuleset, "utf8");
+      }
+      if (
+        hardening.rootlessLauncher !== undefined &&
+        rootlessLauncherPath !== undefined
+      ) {
+        await writeFile(rootlessLauncherPath, hardening.rootlessLauncher, {
+          encoding: "utf8",
+          mode: 0o700,
+        });
+      }
+    } catch (error) {
+      if (error instanceof SandboxUnavailableError) {
+        if (nftDir !== undefined) {
+          await rm(nftDir, { recursive: true, force: true }).catch(() => {});
+        }
+        return {
+          exitCode: -1,
+          stdout: "",
+          stderr: error.message,
+          timedOut: false,
+          sandbox: {
+            requested: policy,
+            enforced: {
+              resources: false,
+              filesystem: false,
+              network: false,
+              privilege: false,
+            },
+            downgrades: error.downgrades,
+            notes: [],
+            platform: caps.platform,
+          },
+        };
+      }
+      if (nftDir !== undefined) {
+        await rm(nftDir, { recursive: true, force: true }).catch(() => {});
+      }
+      throw error;
+    }
     let proc: Subprocess | undefined;
     let timedOut = false;
     let timeoutHandle: ReturnType<typeof setTimeout> | undefined;
@@ -128,29 +247,62 @@ export const defaultShellScriptRunner: ShellScriptRunner = {
       // Execute through `sh -c` so the user's script can use pipes,
       // redirects, variable expansion, conditionals, command
       // substitution, etc. — i.e. behave like a real shell script
-      // rather than a single argv vector.
+      // rather than a single argv vector. The sandbox may prepend an
+      // rlimit prelude (e.g. `prlimit --cpu=... --`) to the argv.
       proc = spawn({
-        cmd: ["sh", "-c", script],
-        cwd,
-        env: { ...pickSafeEnv(), ...env },
+        cmd: hardening.wrapCmd(["sh", "-c", script]),
+        // Default the CWD to the per-run scratch dir so unconfined runs still
+        // write temp files somewhere disposable; an explicit caller `cwd` wins.
+        // Under FS confinement the wrapper `--chdir`s into the scratch dir
+        // itself, so this only affects the non-wrapped path.
+        cwd: cwd ?? scratchDir,
+        env: hardening.env,
+        // NOTE: we deliberately do NOT pass `uid`/`gid` to Bun.spawn. It is a
+        // silent no-op on the shipped Bun versions (the drop is carried by the
+        // namespace wrapper's `--uid`, or by inheritance from a non-root
+        // supervisor) AND a forward-compat hazard: a future Bun honouring it
+        // would spawn the WRAPPER itself as the dropped id and break userns
+        // creation. `hardening.uid` is observability-only. See wrapper.ts.
         stdout: "pipe",
         stderr: "pipe",
       });
-      const [stdout, stderr, exitCode] = await Promise.race([
-        Promise.all([
-          new Response(proc.stdout as ReadableStream).text(),
-          new Response(proc.stderr as ReadableStream).text(),
-          proc.exited,
-        ]),
-        timeoutPromise,
-      ]);
+      // Bounded-buffering capture: count bytes off stdout/stderr against the
+      // shared `maxOutputBytes` budget and kill + flag the child the moment it
+      // is exceeded, instead of buffering the entire (possibly gigabytes-large)
+      // output first. This is the OOM guard for a degraded host without the
+      // RLIMIT_AS cap (plan §5.1).
+      const captureProc = proc;
+      const [{ stdout: stdoutRaw, stderr: stderrRaw, truncated: streamTruncated }, exitCode] =
+        await Promise.race([
+          Promise.all([
+            readCappedOutput({
+              stdout: captureProc.stdout as ReadableStream<Uint8Array>,
+              stderr: captureProc.stderr as ReadableStream<Uint8Array>,
+              maxOutputBytes: hardening.maxOutputBytes,
+              onExceeded: () => captureProc.kill(),
+            }),
+            captureProc.exited,
+          ]),
+          timeoutPromise,
+        ]);
+      // Final cosmetic pass: ensures clean multi-byte boundaries and re-asserts
+      // the combined cap. A no-op when the stream stayed under budget.
+      const { stdout, stderr, truncated: trimTruncated } = truncateCapturedOutput({
+        stdout: stdoutRaw,
+        stderr: stderrRaw,
+        maxOutputBytes: hardening.maxOutputBytes,
+      });
+      const truncated = streamTruncated || trimTruncated;
       return {
         exitCode,
         stdout: stdout.trim(),
         stderr: stderr.trim(),
         timedOut: false,
+        outputTruncated: truncated,
+        sandbox: hardening.effective,
       };
     } catch (error) {
       if (timedOut) {
@@ -159,6 +311,7 @@ export const defaultShellScriptRunner: ShellScriptRunner = {
           stdout: "",
           stderr: "Script execution timed out",
           timedOut: true,
+          sandbox: hardening.effective,
         };
       }
       throw error;
@@ -170,6 +323,18 @@ export const defaultShellScriptRunner: ShellScriptRunner = {
       // cleanly, but guarantees we never leave a runaway `sh` from
       // an exception path.
       proc?.kill();
+      // Remove the per-run scratch dir, if one was created.
+      if (scratchDir !== undefined) {
+        await rm(scratchDir, { recursive: true, force: true }).catch(() => {
+          // Best-effort; the OS reaps anything left in /tmp.
+        });
+      }
+      // Remove the staged egress ruleset dir, if one was created.
+      if (nftDir !== undefined) {
+        await rm(nftDir, { recursive: true, force: true }).catch(() => {
+          // Best-effort; the OS reaps anything left in /tmp.
+        });
+      }
     }
   },
 };

package/src/zod-config.test.ts ADDED Viewed

@@ -0,0 +1,60 @@
+import { describe, expect, test } from "bun:test";
+import { z } from "zod";
+import {
+  configString,
+  getConfigMeta,
+  isTemplatableSchema,
+} from "./zod-config";
+describe("getConfigMeta / unwrapSchema — multi-level wrapper unwrapping", () => {
+  test("finds meta on a plain configString", () => {
+    const field = configString({ "x-templatable": true });
+    expect(getConfigMeta(field)?.["x-templatable"]).toBe(true);
+  });
+  test("finds meta through a single .optional() wrapper", () => {
+    const field = configString({ "x-templatable": true }).optional();
+    expect(getConfigMeta(field)?.["x-templatable"]).toBe(true);
+  });
+  test("finds meta through a single .default() wrapper", () => {
+    const field = configString({ "x-templatable": true }).default("");
+    expect(getConfigMeta(field)?.["x-templatable"]).toBe(true);
+  });
+  test("finds meta through a single .nullable() wrapper", () => {
+    const field = configString({ "x-templatable": true }).nullable();
+    expect(getConfigMeta(field)?.["x-templatable"]).toBe(true);
+  });
+  // Regression: the old single-pass unwrap stopped after one level and returned
+  // `undefined` for a field wrapped in `.optional().default()` (two layers).
+  test("finds meta through .optional().default() — two wrapper levels", () => {
+    const field = configString({ "x-templatable": true }).optional().default("");
+    expect(getConfigMeta(field)?.["x-templatable"]).toBe(true);
+    expect(isTemplatableSchema(field)).toBe(true);
+  });
+  test("finds meta through .default().optional() — reversed two wrapper levels", () => {
+    const field = configString({ "x-templatable": true }).default("").optional();
+    expect(getConfigMeta(field)?.["x-templatable"]).toBe(true);
+  });
+  test("finds meta through .nullable().optional() — two wrapper levels", () => {
+    const field = configString({ "x-secret": true }).nullable().optional();
+    expect(getConfigMeta(field)?.["x-secret"]).toBe(true);
+  });
+  test("finds meta through three wrapper levels (.optional().nullable().default())", () => {
+    const field = configString({ "x-templatable": true })
+      .optional()
+      .nullable()
+      .default(null);
+    expect(getConfigMeta(field)?.["x-templatable"]).toBe(true);
+  });
+  test("returns undefined for a plain z.string() with no registered meta", () => {
+    const field = z.string().optional();
+    expect(getConfigMeta(field)).toBeUndefined();
+  });
+});