@checkstack/backend-api 0.19.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CHANGELOG.md +205 -0
  2. package/package.json +12 -11
  3. package/src/advisory-lock-pool.it.test.ts +282 -0
  4. package/src/advisory-lock.test.ts +144 -3
  5. package/src/advisory-lock.ts +97 -55
  6. package/src/auth-strategy.ts +6 -3
  7. package/src/bearer-token.ts +13 -0
  8. package/src/collector-strategy.ts +9 -0
  9. package/src/config-versioning.test.ts +227 -0
  10. package/src/config-versioning.ts +172 -0
  11. package/src/core-services.ts +14 -0
  12. package/src/esm-script-runner.test.ts +55 -16
  13. package/src/esm-script-runner.ts +212 -55
  14. package/src/index.ts +3 -0
  15. package/src/render-templatable-config.test.ts +168 -0
  16. package/src/render-templatable-config.ts +193 -0
  17. package/src/schema-utils.ts +3 -0
  18. package/src/script-sandbox/capabilities.test.ts +122 -0
  19. package/src/script-sandbox/capabilities.ts +372 -0
  20. package/src/script-sandbox/capped-output.test.ts +116 -0
  21. package/src/script-sandbox/capped-output.ts +172 -0
  22. package/src/script-sandbox/env-guard.test.ts +105 -0
  23. package/src/script-sandbox/env-guard.ts +129 -0
  24. package/src/script-sandbox/filesystem.test.ts +437 -0
  25. package/src/script-sandbox/filesystem.ts +514 -0
  26. package/src/script-sandbox/forkbomb.it.test.ts +121 -0
  27. package/src/script-sandbox/global-default.test.ts +161 -0
  28. package/src/script-sandbox/global-default.ts +100 -0
  29. package/src/script-sandbox/index.ts +14 -0
  30. package/src/script-sandbox/network.test.ts +356 -0
  31. package/src/script-sandbox/network.ts +373 -0
  32. package/src/script-sandbox/observability.test.ts +210 -0
  33. package/src/script-sandbox/observability.ts +168 -0
  34. package/src/script-sandbox/output-truncation.test.ts +53 -0
  35. package/src/script-sandbox/output-truncation.ts +69 -0
  36. package/src/script-sandbox/policy.test.ts +189 -0
  37. package/src/script-sandbox/policy.ts +220 -0
  38. package/src/script-sandbox/provider.test.ts +61 -0
  39. package/src/script-sandbox/provider.ts +134 -0
  40. package/src/script-sandbox/readiness.test.ts +80 -0
  41. package/src/script-sandbox/readiness.ts +117 -0
  42. package/src/script-sandbox/report.ts +88 -0
  43. package/src/script-sandbox/rootless-egress.it.test.ts +86 -0
  44. package/src/script-sandbox/rootless-egress.test.ts +99 -0
  45. package/src/script-sandbox/rootless-egress.ts +218 -0
  46. package/src/script-sandbox/shell-quote.test.ts +32 -0
  47. package/src/script-sandbox/shell-quote.ts +10 -0
  48. package/src/script-sandbox/wrapper.test.ts +1194 -0
  49. package/src/script-sandbox/wrapper.ts +714 -0
  50. package/src/shell-script-runner.test.ts +243 -0
  51. package/src/shell-script-runner.ts +210 -45
  52. package/src/zod-config.test.ts +60 -0
  53. package/src/zod-config.ts +38 -14
  54. package/tsconfig.json +3 -0
@@ -0,0 +1,243 @@
1
+ import { afterEach, describe, expect, it } from "bun:test";
2
+ import { defaultShellScriptRunner } from "./shell-script-runner";
3
+ import {
4
+ registerSandboxPolicyProvider,
5
+ resetSandboxPolicyProvider,
6
+ } from "./script-sandbox/provider";
7
+ import {
8
+ resolveDefaultSandboxProfile,
9
+ sandboxPolicySchema,
10
+ type SandboxPolicyInput,
11
+ } from "./script-sandbox/policy";
12
+
13
+ /** Register a one-shot provider returning the given (partial) policy. */
14
+ function withPolicy(input: SandboxPolicyInput): void {
15
+ const policy = sandboxPolicySchema.parse(input);
16
+ registerSandboxPolicyProvider(async () => policy);
17
+ }
18
+
19
+ /**
20
+ * Register the shipped default profile but pinned to `onUnavailable: "degrade"`.
21
+ *
22
+ * The shipped default is now fail-closed (`onUnavailable: "fail"`): on a
23
+ * capability-poor CI/dev host (non-root macOS, no bwrap/prlimit) it refuses
24
+ * EVERY spawn. The runner-BEHAVIOR tests below (an ordinary echo runs, the env
25
+ * denylist is applied, layers degrade-and-surface) need a deterministic spawn
26
+ * on any host, so they use the `degrade` variant. The fail-closed default value
27
+ * is asserted in `policy.test.ts`; the fail-closed RUNTIME refusal is asserted
28
+ * by the dedicated "fails cleanly (no spawn)" test below.
29
+ */
30
+ function withDefaultDegradePolicy(): void {
31
+ const policy = sandboxPolicySchema.parse({
32
+ ...resolveDefaultSandboxProfile(),
33
+ onUnavailable: "degrade",
34
+ });
35
+ registerSandboxPolicyProvider(async () => policy);
36
+ }
37
+
38
+ describe("defaultShellScriptRunner — sandbox default profile", () => {
39
+ afterEach(() => {
40
+ resetSandboxPolicyProvider();
41
+ });
42
+
43
+ it("applies the provider's policy (the shipped default) and surfaces a report", async () => {
44
+ withDefaultDegradePolicy();
45
+ const result = await defaultShellScriptRunner.run({
46
+ script: "echo hi",
47
+ timeoutMs: 5000,
48
+ });
49
+
50
+ // Default-on must NOT break the common case: an ordinary echo still runs.
51
+ // On a host lacking the strong primitives (the typical CI/dev box) the FS
52
+ // and metadata-block layers degrade-and-surface — never hard-break.
53
+ expect(result.exitCode).toBe(0);
54
+ expect(result.stdout).toBe("hi");
55
+ expect(result.sandbox).toBeDefined();
56
+ // The shipped default profile is the base: resource caps, FS confinement,
57
+ // a secure-by-default egress posture, and a privilege drop are all
58
+ // REQUESTED; the report carries the requested policy regardless of host.
59
+ expect(result.sandbox?.requested.enabled).toBe(true);
60
+ expect(result.sandbox?.requested.resources.cpuSeconds).toBe(60);
61
+ expect(result.sandbox?.requested.filesystem.mode).toBe("scratch-plus-ro");
62
+ // Secure-by-default: egress is denied via an empty allowlist, NOT
63
+ // unrestricted.
64
+ expect(result.sandbox?.requested.network.mode).toBe("allowlist");
65
+ expect(result.sandbox?.requested.network.allow).toEqual([]);
66
+ expect(result.sandbox?.requested.network.denyLinkLocalAndMetadata).toBe(
67
+ true,
68
+ );
69
+ expect(result.sandbox?.requested.privilege.mode).toBe("drop-to-uid");
70
+ });
71
+
72
+ it("enforces the provider's policy (network deny is requested)", async () => {
73
+ withPolicy({ network: { mode: "deny" } });
74
+ const result = await defaultShellScriptRunner.run({
75
+ script: "echo enforced",
76
+ timeoutMs: 5000,
77
+ });
78
+ expect(result.stdout).toBe("enforced");
79
+ expect(result.sandbox?.requested.network.mode).toBe("deny");
80
+ });
81
+
82
+ it("fails closed (most restrictive policy) when no provider is registered", async () => {
83
+ resetSandboxPolicyProvider();
84
+ const result = await defaultShellScriptRunner.run({
85
+ script: "echo closed",
86
+ timeoutMs: 5000,
87
+ });
88
+ // Still runs the simple case, but under the fail-closed policy: deny
89
+ // egress, scratch + read-only packages, privilege drop, and a surfaced
90
+ // notice.
91
+ expect(result.stdout).toBe("closed");
92
+ expect(result.sandbox?.requested.network.mode).toBe("deny");
93
+ expect(result.sandbox?.requested.filesystem.mode).toBe("scratch-plus-ro");
94
+ const reasons = result.sandbox?.downgrades.map((d) => d.reason) ?? [];
95
+ expect(
96
+ reasons.some((r) => r.includes("no global sandbox policy provider")),
97
+ ).toBe(true);
98
+ });
99
+
100
+ it("degrades, never hard-breaks, on a host lacking the strong primitives", async () => {
101
+ withDefaultDegradePolicy();
102
+ // Default profile requests FS confinement; on a CI/dev box with no
103
+ // namespace wrapper the FS layer (and the metadata block) cannot be
104
+ // enforced. The guarantee: the run STILL succeeds and the report surfaces
105
+ // every dropped layer — it does not refuse to run.
106
+ const hasWrapper =
107
+ process.platform === "linux" &&
108
+ Bun.spawnSync(["sh", "-c", "command -v bwrap || command -v nsjail"])
109
+ .exitCode === 0;
110
+ const result = await defaultShellScriptRunner.run({
111
+ script: "echo ok",
112
+ timeoutMs: 5000,
113
+ });
114
+ expect(result.exitCode).toBe(0);
115
+ expect(result.stdout).toBe("ok");
116
+ if (!hasWrapper) {
117
+ // FS confinement degraded (no wrapper) — surfaced, not silently dropped.
118
+ const layers = result.sandbox?.downgrades.map((d) => d.layer) ?? [];
119
+ expect(layers).toContain("filesystem");
120
+ expect(result.sandbox?.enforced.filesystem).toBe(false);
121
+ }
122
+ });
123
+
124
+ it("opts out globally with { enabled: false } and runs exactly as before", async () => {
125
+ // The documented GLOBAL opt-out: no caps, no denylist, no confinement.
126
+ withPolicy({ enabled: false });
127
+ const result = await defaultShellScriptRunner.run({
128
+ script: "echo out",
129
+ timeoutMs: 5000,
130
+ });
131
+ expect(result.exitCode).toBe(0);
132
+ expect(result.stdout).toBe("out");
133
+ expect(result.sandbox?.enforced.resources).toBe(false);
134
+ expect(result.sandbox?.enforced.filesystem).toBe(false);
135
+ });
136
+
137
+ it("drops a forbidden env override (LD_PRELOAD) when enabled", async () => {
138
+ withDefaultDegradePolicy();
139
+ const result = await defaultShellScriptRunner.run({
140
+ script: "echo \"LD=$LD_PRELOAD\"",
141
+ timeoutMs: 5000,
142
+ env: { LD_PRELOAD: "/evil.so" },
143
+ });
144
+ expect(result.exitCode).toBe(0);
145
+ // The child never received LD_PRELOAD.
146
+ expect(result.stdout).toBe("LD=");
147
+ expect(result.sandbox?.downgrades).toBeDefined();
148
+ });
149
+
150
+ it("opts out with { enabled: false } — forbidden keys pass through (back-compat)", async () => {
151
+ withPolicy({ enabled: false });
152
+ const result = await defaultShellScriptRunner.run({
153
+ script: "echo \"LD=$LD_PRELOAD\"",
154
+ timeoutMs: 5000,
155
+ env: { LD_PRELOAD: "/passthrough.so" },
156
+ });
157
+ expect(result.exitCode).toBe(0);
158
+ expect(result.stdout).toBe("LD=/passthrough.so");
159
+ });
160
+
161
+ it("does not leak unrelated process env (existing security guarantee)", async () => {
162
+ withDefaultDegradePolicy();
163
+ process.env.SHELL_RUNNER_SECRET = "DO_NOT_LEAK";
164
+ const result = await defaultShellScriptRunner.run({
165
+ script: "env",
166
+ timeoutMs: 5000,
167
+ });
168
+ delete process.env.SHELL_RUNNER_SECRET;
169
+ expect(result.stdout).not.toContain("DO_NOT_LEAK");
170
+ });
171
+
172
+ it("truncates output and flags it when over maxOutputBytes", async () => {
173
+ withPolicy({ resources: { maxOutputBytes: 200 } });
174
+ const result = await defaultShellScriptRunner.run({
175
+ // ~2000 bytes of output
176
+ script: "for i in $(seq 1 100); do echo 'xxxxxxxxxxxxxxxxxxxx'; done",
177
+ timeoutMs: 5000,
178
+ });
179
+ expect(result.outputTruncated).toBe(true);
180
+ expect(Buffer.byteLength(result.stdout)).toBeLessThanOrEqual(200);
181
+ });
182
+
183
+ it("kills a flooding child once maxOutputBytes is hit (bounded buffering, no OOM)", async () => {
184
+ // `yes` emits effectively unbounded output. With a tiny cap the runner must
185
+ // stream-count, hit the cap, KILL the child, and flag truncation — instead
186
+ // of buffering gigabytes first. If the OOM-safe streaming path regressed,
187
+ // this would hang until the wall-clock timeout (and/or balloon memory).
188
+ withPolicy({ resources: { maxOutputBytes: 4096 } });
189
+ const start = Date.now();
190
+ const result = await defaultShellScriptRunner.run({
191
+ script: "yes xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
192
+ timeoutMs: 10_000,
193
+ });
194
+ const elapsed = Date.now() - start;
195
+ expect(result.outputTruncated).toBe(true);
196
+ expect(Buffer.byteLength(result.stdout)).toBeLessThanOrEqual(4096);
197
+ // Should finish promptly via the kill, NOT ride out the 10s timeout.
198
+ expect(elapsed).toBeLessThan(8000);
199
+ expect(result.timedOut).toBe(false);
200
+ });
201
+
202
+ it("fails cleanly (no spawn) when onUnavailable:fail and a layer is unenforceable", async () => {
203
+ // On a host lacking the namespace wrapper (the typical non-Linux dev/CI
204
+ // box), the filesystem + network namespace layers cannot be enforced, so a
205
+ // fail-closed (onUnavailable:fail) policy must refuse without spawning.
206
+ // (Privilege is now enforced by non-root inheritance and is NOT the failing
207
+ // layer; FS/network are.)
208
+ const isRoot = process.getuid?.() === 0;
209
+ if (isRoot && process.platform === "linux") {
210
+ // On a capable host this might enforce; skip the negative assertion.
211
+ return;
212
+ }
213
+ withPolicy({
214
+ onUnavailable: "fail",
215
+ filesystem: { mode: "scratch-only" },
216
+ privilege: { mode: "drop-to-uid", uid: 1001 },
217
+ });
218
+ const result = await defaultShellScriptRunner.run({
219
+ script: "echo should-not-run",
220
+ timeoutMs: 5000,
221
+ });
222
+ expect(result.exitCode).toBe(-1);
223
+ expect(result.stdout).toBe("");
224
+ expect(result.stderr).toContain("sandbox unavailable");
225
+ });
226
+
227
+ it("scratch dir is writable by the run's effective identity (ownership)", async () => {
228
+ // The reviewer flagged: is the per-run scratch dir writable by the run's
229
+ // effective identity? Under the non-root supervisor it trivially is - the
230
+ // supervisor (uid 65532) does the mkdtemp and the script inherits that uid,
231
+ // so it owns the dir. We prove it end-to-end: a shell run (CWD = the per-run
232
+ // scratch dir on the unconfined path) writes a file into its CWD and reads
233
+ // it back. A write failure (EACCES) would surface as a non-zero exit.
234
+ withDefaultDegradePolicy();
235
+ const result = await defaultShellScriptRunner.run({
236
+ // Write into the CWD (the per-run scratch dir), then read it back.
237
+ script: 'echo owned > probe.txt && cat probe.txt',
238
+ timeoutMs: 5000,
239
+ });
240
+ expect(result.exitCode).toBe(0);
241
+ expect(result.stdout).toBe("owned");
242
+ });
243
+ });
@@ -1,4 +1,21 @@
1
1
  import { spawn, type Subprocess } from "bun";
2
+ import { mkdtemp, rm, writeFile } from "node:fs/promises";
3
+ import { tmpdir } from "node:os";
4
+ import path from "node:path";
5
+ import { detectSandboxCapabilities } from "./script-sandbox/capabilities";
6
+ import { readCappedOutput } from "./script-sandbox/capped-output";
7
+ import { pickSafeEnv } from "./script-sandbox/env-guard";
8
+ import { buildNetworkLayer } from "./script-sandbox/network";
9
+ import { truncateCapturedOutput } from "./script-sandbox/output-truncation";
10
+ import {
11
+ FAIL_CLOSED_DOWNGRADE_REASON,
12
+ resolveActiveSandboxPolicy,
13
+ } from "./script-sandbox/provider";
14
+ import {
15
+ type EffectiveSandbox,
16
+ SandboxUnavailableError,
17
+ } from "./script-sandbox/report";
18
+ import { buildSpawnHardening } from "./script-sandbox/wrapper";
2
19
 
3
20
  /**
4
21
  * Shared sandbox for executing user-authored shell scripts through
@@ -37,6 +54,14 @@ export interface ShellScriptRunResult {
37
54
  stderr: string;
38
55
  /** True if the timeout fired before the subprocess exited. */
39
56
  timedOut: boolean;
57
+ /** True if captured output exceeded the sandbox `maxOutputBytes` cap and was trimmed. */
58
+ outputTruncated?: boolean;
59
+ /**
60
+ * What the OS-level sandbox actually enforced / degraded for this run.
61
+ * Always present: the runner resolves the active GLOBAL policy itself and
62
+ * reports the result so callers can surface downgrades.
63
+ */
64
+ sandbox?: EffectiveSandbox;
40
65
  }
41
66
 
42
67
  export interface ShellScriptRunOptions {
@@ -56,6 +81,10 @@ export interface ShellScriptRunOptions {
56
81
  * layer we accept whatever the caller passes, because the legitimate
57
82
  * use cases (e.g. integration shell scripts injecting `PAYLOAD_*`
58
83
  * vars) vary too much.
84
+ *
85
+ * Note: forbidden keys (`LD_PRELOAD`, `NODE_OPTIONS`, `PATH`-override, ...)
86
+ * are dropped from these overrides by the shared env denylist whenever the
87
+ * active sandbox policy is enabled.
59
88
  */
60
89
  env?: Record<string, string>;
61
90
  }
@@ -69,39 +98,6 @@ export interface ShellScriptRunner {
69
98
  run(options: ShellScriptRunOptions): Promise<ShellScriptRunResult>;
70
99
  }
71
100
 
72
- // =============================================================================
73
- // INTERNALS
74
- // =============================================================================
75
-
76
- /**
77
- * Vars passed through to the subprocess. We intentionally do NOT
78
- * forward the satellite's full env so backend secrets (DB URLs, API
79
- * tokens, signing keys) never reach user-authored scripts.
80
- */
81
- const SAFE_ENV_VARS = [
82
- "PATH",
83
- "HOME",
84
- "USER",
85
- "LANG",
86
- "LC_ALL",
87
- "LC_CTYPE",
88
- "TZ",
89
- "TMPDIR",
90
- "HOSTNAME",
91
- "SHELL",
92
- ];
93
-
94
- function pickSafeEnv(): Record<string, string> {
95
- const env: Record<string, string> = {};
96
- for (const key of SAFE_ENV_VARS) {
97
- const value = process.env[key];
98
- if (value !== undefined) {
99
- env[key] = value;
100
- }
101
- }
102
- return env;
103
- }
104
-
105
101
  // =============================================================================
106
102
  // DEFAULT RUNNER
107
103
  // =============================================================================
@@ -112,6 +108,129 @@ function pickSafeEnv(): Record<string, string> {
112
108
  */
113
109
  export const defaultShellScriptRunner: ShellScriptRunner = {
114
110
  async run({ script, timeoutMs, cwd, env }) {
111
+ // Per-run dir for staging the network egress nftables ruleset, created
112
+ // lazily only if the resolved policy actually produces one (so an ordinary
113
+ // shell run pays no extra I/O). Cleaned up in `finally`.
114
+ let nftDir: string | undefined;
115
+ // Per-run writable scratch dir. Required to ENGAGE the namespace wrapper
116
+ // (bwrap/nsjail): the wrapper is what delivers filesystem confinement, the
117
+ // network namespace, AND the privilege drop (`--uid`, the only mechanism
118
+ // that actually drops since Bun.spawn ignores uid/gid). Without a scratch
119
+ // dir the FS layer degrades, the wrapper never engages, and under the
120
+ // secure fail-closed default the run would be REFUSED. Created for every
121
+ // run; cleaned up in `finally`. The shell script's CWD is this dir (unless
122
+ // the caller pinned a `cwd`), so `mktemp`-style writes land in confinement.
123
+ let scratchDir: string | undefined;
124
+ // Reconcile the requested policy against this host's capabilities BEFORE
125
+ // spawning. `buildSpawnHardening` is pure + synchronous (capability
126
+ // detection is cached per-process), so no `await` is introduced here. When
127
+ // `onUnavailable: "fail"` and a layer is unavailable it throws, and we
128
+ // return a clean failure WITHOUT spawning an unsandboxed child.
129
+ const caps = detectSandboxCapabilities();
130
+ // Resolve the GLOBAL sandbox policy ourselves (policy is global-only; the
131
+ // runner no longer accepts a per-run override). With a provider wired at
132
+ // startup this is the durable cluster-wide default; with NO provider (or a
133
+ // provider that throws) it FAILS CLOSED to the most restrictive safe policy
134
+ // (deny egress, scratch + read-only packages, privilege drop) — never the
135
+ // permissive default. The fail-closed fallback is surfaced as a synthetic
136
+ // downgrade so callers can see it. On hosts lacking a primitive each layer
137
+ // degrades-and-surfaces (never hard-breaks) per the resolved
138
+ // `onUnavailable`.
139
+ const { policy, failedClosed } = await resolveActiveSandboxPolicy();
140
+ // Resolve the network decision up front (pure) to learn whether an nftables
141
+ // ruleset must be staged on disk. Avoids creating a temp dir for the common
142
+ // no-network run, and lets a fail-closed allowlist still build correctly
143
+ // (the ruleset is staged BEFORE the hardening build that fails-closed).
144
+ const netDecision = buildNetworkLayer({ policy: policy.network, caps });
145
+ let nftRulesetPath: string | undefined;
146
+ let rootlessLauncherPath: string | undefined;
147
+ let hardening;
148
+ try {
149
+ if (
150
+ netDecision.kind === "namespaced" &&
151
+ netDecision.nftRuleset !== undefined
152
+ ) {
153
+ nftDir = await mkdtemp(path.join(tmpdir(), "checkstack-egress-"));
154
+ nftRulesetPath = path.join(nftDir, "egress.nft");
155
+ // The rootless slirp4netns path additionally needs a launcher script
156
+ // staged alongside the ruleset (the orchestration is not a plain argv
157
+ // prelude). Same temp dir.
158
+ if (netDecision.egressPath === "rootless") {
159
+ rootlessLauncherPath = path.join(nftDir, "rootless-egress.sh");
160
+ }
161
+ }
162
+ // Stage a per-run scratch dir so the FS/network/privilege wrapper can
163
+ // engage (see the `scratchDir` declaration). Only needed when the sandbox
164
+ // is enabled; a disabled policy runs unwrapped exactly as before.
165
+ if (policy.enabled) {
166
+ scratchDir = await mkdtemp(path.join(tmpdir(), "checkstack-shell-"));
167
+ }
168
+ hardening = buildSpawnHardening({
169
+ policy,
170
+ caps,
171
+ baseEnv: pickSafeEnv(),
172
+ envOverrides: env,
173
+ ...(scratchDir === undefined ? {} : { filesystem: { scratchDir } }),
174
+ nftRulesetPath,
175
+ rootlessLauncherPath,
176
+ // Shell scripts exec `sh -c`, which IGNORES NODE_OPTIONS, so the per-run
177
+ // JS-heap memory cap is NOT applied here. Leaving this false makes the
178
+ // hardening builder surface an honest, non-fatal memory note (the
179
+ // ceiling is the container cgroup) rather than implying a per-run
180
+ // guarantee. See the shell-memory honesty note in wrapper.ts.
181
+ appliesNodeMemoryCap: false,
182
+ });
183
+ // Surface the fail-closed fallback as a notice in the report so a
184
+ // missing/failed policy provider is never silent (the run still proceeds
185
+ // under the most restrictive policy).
186
+ if (failedClosed) {
187
+ hardening.effective.downgrades.push({
188
+ layer: "network",
189
+ reason: FAIL_CLOSED_DOWNGRADE_REASON,
190
+ });
191
+ }
192
+ if (hardening.nftRuleset !== undefined && nftRulesetPath !== undefined) {
193
+ await writeFile(nftRulesetPath, hardening.nftRuleset, "utf8");
194
+ }
195
+ if (
196
+ hardening.rootlessLauncher !== undefined &&
197
+ rootlessLauncherPath !== undefined
198
+ ) {
199
+ await writeFile(rootlessLauncherPath, hardening.rootlessLauncher, {
200
+ encoding: "utf8",
201
+ mode: 0o700,
202
+ });
203
+ }
204
+ } catch (error) {
205
+ if (error instanceof SandboxUnavailableError) {
206
+ if (nftDir !== undefined) {
207
+ await rm(nftDir, { recursive: true, force: true }).catch(() => {});
208
+ }
209
+ return {
210
+ exitCode: -1,
211
+ stdout: "",
212
+ stderr: error.message,
213
+ timedOut: false,
214
+ sandbox: {
215
+ requested: policy,
216
+ enforced: {
217
+ resources: false,
218
+ filesystem: false,
219
+ network: false,
220
+ privilege: false,
221
+ },
222
+ downgrades: error.downgrades,
223
+ notes: [],
224
+ platform: caps.platform,
225
+ },
226
+ };
227
+ }
228
+ if (nftDir !== undefined) {
229
+ await rm(nftDir, { recursive: true, force: true }).catch(() => {});
230
+ }
231
+ throw error;
232
+ }
233
+
115
234
  let proc: Subprocess | undefined;
116
235
  let timedOut = false;
117
236
  let timeoutHandle: ReturnType<typeof setTimeout> | undefined;
@@ -128,29 +247,62 @@ export const defaultShellScriptRunner: ShellScriptRunner = {
128
247
  // Execute through `sh -c` so the user's script can use pipes,
129
248
  // redirects, variable expansion, conditionals, command
130
249
  // substitution, etc. — i.e. behave like a real shell script
131
- // rather than a single argv vector.
250
+ // rather than a single argv vector. The sandbox may prepend an
251
+ // rlimit prelude (e.g. `prlimit --cpu=... --`) to the argv.
132
252
  proc = spawn({
133
- cmd: ["sh", "-c", script],
134
- cwd,
135
- env: { ...pickSafeEnv(), ...env },
253
+ cmd: hardening.wrapCmd(["sh", "-c", script]),
254
+ // Default the CWD to the per-run scratch dir so unconfined runs still
255
+ // write temp files somewhere disposable; an explicit caller `cwd` wins.
256
+ // Under FS confinement the wrapper `--chdir`s into the scratch dir
257
+ // itself, so this only affects the non-wrapped path.
258
+ cwd: cwd ?? scratchDir,
259
+ env: hardening.env,
260
+ // NOTE: we deliberately do NOT pass `uid`/`gid` to Bun.spawn. It is a
261
+ // silent no-op on the shipped Bun versions (the drop is carried by the
262
+ // namespace wrapper's `--uid`, or by inheritance from a non-root
263
+ // supervisor) AND a forward-compat hazard: a future Bun honouring it
264
+ // would spawn the WRAPPER itself as the dropped id and break userns
265
+ // creation. `hardening.uid` is observability-only. See wrapper.ts.
136
266
  stdout: "pipe",
137
267
  stderr: "pipe",
138
268
  });
139
269
 
140
- const [stdout, stderr, exitCode] = await Promise.race([
141
- Promise.all([
142
- new Response(proc.stdout as ReadableStream).text(),
143
- new Response(proc.stderr as ReadableStream).text(),
144
- proc.exited,
145
- ]),
146
- timeoutPromise,
147
- ]);
270
+ // Bounded-buffering capture: count bytes off stdout/stderr against the
271
+ // shared `maxOutputBytes` budget and kill + flag the child the moment it
272
+ // is exceeded, instead of buffering the entire (possibly gigabytes-large)
273
+ // output first. This is the OOM guard for a degraded host without the
274
+ // RLIMIT_AS cap (plan §5.1).
275
+ const captureProc = proc;
276
+ const [{ stdout: stdoutRaw, stderr: stderrRaw, truncated: streamTruncated }, exitCode] =
277
+ await Promise.race([
278
+ Promise.all([
279
+ readCappedOutput({
280
+ stdout: captureProc.stdout as ReadableStream<Uint8Array>,
281
+ stderr: captureProc.stderr as ReadableStream<Uint8Array>,
282
+ maxOutputBytes: hardening.maxOutputBytes,
283
+ onExceeded: () => captureProc.kill(),
284
+ }),
285
+ captureProc.exited,
286
+ ]),
287
+ timeoutPromise,
288
+ ]);
289
+
290
+ // Final cosmetic pass: ensures clean multi-byte boundaries and re-asserts
291
+ // the combined cap. A no-op when the stream stayed under budget.
292
+ const { stdout, stderr, truncated: trimTruncated } = truncateCapturedOutput({
293
+ stdout: stdoutRaw,
294
+ stderr: stderrRaw,
295
+ maxOutputBytes: hardening.maxOutputBytes,
296
+ });
297
+ const truncated = streamTruncated || trimTruncated;
148
298
 
149
299
  return {
150
300
  exitCode,
151
301
  stdout: stdout.trim(),
152
302
  stderr: stderr.trim(),
153
303
  timedOut: false,
304
+ outputTruncated: truncated,
305
+ sandbox: hardening.effective,
154
306
  };
155
307
  } catch (error) {
156
308
  if (timedOut) {
@@ -159,6 +311,7 @@ export const defaultShellScriptRunner: ShellScriptRunner = {
159
311
  stdout: "",
160
312
  stderr: "Script execution timed out",
161
313
  timedOut: true,
314
+ sandbox: hardening.effective,
162
315
  };
163
316
  }
164
317
  throw error;
@@ -170,6 +323,18 @@ export const defaultShellScriptRunner: ShellScriptRunner = {
170
323
  // cleanly, but guarantees we never leave a runaway `sh` from
171
324
  // an exception path.
172
325
  proc?.kill();
326
+ // Remove the per-run scratch dir, if one was created.
327
+ if (scratchDir !== undefined) {
328
+ await rm(scratchDir, { recursive: true, force: true }).catch(() => {
329
+ // Best-effort; the OS reaps anything left in /tmp.
330
+ });
331
+ }
332
+ // Remove the staged egress ruleset dir, if one was created.
333
+ if (nftDir !== undefined) {
334
+ await rm(nftDir, { recursive: true, force: true }).catch(() => {
335
+ // Best-effort; the OS reaps anything left in /tmp.
336
+ });
337
+ }
173
338
  }
174
339
  },
175
340
  };
@@ -0,0 +1,60 @@
1
+ import { describe, expect, test } from "bun:test";
2
+ import { z } from "zod";
3
+ import {
4
+ configString,
5
+ getConfigMeta,
6
+ isTemplatableSchema,
7
+ } from "./zod-config";
8
+
9
+ describe("getConfigMeta / unwrapSchema — multi-level wrapper unwrapping", () => {
10
+ test("finds meta on a plain configString", () => {
11
+ const field = configString({ "x-templatable": true });
12
+ expect(getConfigMeta(field)?.["x-templatable"]).toBe(true);
13
+ });
14
+
15
+ test("finds meta through a single .optional() wrapper", () => {
16
+ const field = configString({ "x-templatable": true }).optional();
17
+ expect(getConfigMeta(field)?.["x-templatable"]).toBe(true);
18
+ });
19
+
20
+ test("finds meta through a single .default() wrapper", () => {
21
+ const field = configString({ "x-templatable": true }).default("");
22
+ expect(getConfigMeta(field)?.["x-templatable"]).toBe(true);
23
+ });
24
+
25
+ test("finds meta through a single .nullable() wrapper", () => {
26
+ const field = configString({ "x-templatable": true }).nullable();
27
+ expect(getConfigMeta(field)?.["x-templatable"]).toBe(true);
28
+ });
29
+
30
+ // Regression: the old single-pass unwrap stopped after one level and returned
31
+ // `undefined` for a field wrapped in `.optional().default()` (two layers).
32
+ test("finds meta through .optional().default() — two wrapper levels", () => {
33
+ const field = configString({ "x-templatable": true }).optional().default("");
34
+ expect(getConfigMeta(field)?.["x-templatable"]).toBe(true);
35
+ expect(isTemplatableSchema(field)).toBe(true);
36
+ });
37
+
38
+ test("finds meta through .default().optional() — reversed two wrapper levels", () => {
39
+ const field = configString({ "x-templatable": true }).default("").optional();
40
+ expect(getConfigMeta(field)?.["x-templatable"]).toBe(true);
41
+ });
42
+
43
+ test("finds meta through .nullable().optional() — two wrapper levels", () => {
44
+ const field = configString({ "x-secret": true }).nullable().optional();
45
+ expect(getConfigMeta(field)?.["x-secret"]).toBe(true);
46
+ });
47
+
48
+ test("finds meta through three wrapper levels (.optional().nullable().default())", () => {
49
+ const field = configString({ "x-templatable": true })
50
+ .optional()
51
+ .nullable()
52
+ .default(null);
53
+ expect(getConfigMeta(field)?.["x-templatable"]).toBe(true);
54
+ });
55
+
56
+ test("returns undefined for a plain z.string() with no registered meta", () => {
57
+ const field = z.string().optional();
58
+ expect(getConfigMeta(field)).toBeUndefined();
59
+ });
60
+ });