@openparachute/agent 0.2.3-rc.2 → 0.2.3-rc.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/package.json +4 -1
  2. package/src/transports/vault.ts +19 -1
  3. package/src/_parked/interactive-spawn.test.ts +0 -324
  4. package/src/_parked/interactive-spawn.ts +0 -701
  5. package/src/agent-defs.test.ts +0 -1504
  6. package/src/agent-mcp-config.test.ts +0 -115
  7. package/src/agents.test.ts +0 -360
  8. package/src/auth.test.ts +0 -46
  9. package/src/backends/attached-queue.test.ts +0 -376
  10. package/src/backends/programmatic.test.ts +0 -1715
  11. package/src/backends/registry.test.ts +0 -1494
  12. package/src/backends/stream-json.test.ts +0 -570
  13. package/src/channel-backend-wiring.test.ts +0 -237
  14. package/src/credentials.test.ts +0 -274
  15. package/src/cron.test.ts +0 -342
  16. package/src/daemon-agent-def-api.test.ts +0 -166
  17. package/src/daemon-agent-defs-api.test.ts +0 -953
  18. package/src/daemon-agent-env-api.test.ts +0 -338
  19. package/src/daemon-attached-queue-store.test.ts +0 -65
  20. package/src/daemon-config-api.test.ts +0 -962
  21. package/src/daemon-jobs-api.test.ts +0 -271
  22. package/src/daemon-vault-chat.test.ts +0 -250
  23. package/src/daemon.test.ts +0 -746
  24. package/src/def-vaults.test.ts +0 -136
  25. package/src/delivery-state.test.ts +0 -110
  26. package/src/effective-env.test.ts +0 -114
  27. package/src/grants.test.ts +0 -638
  28. package/src/hub-jwt.test.ts +0 -161
  29. package/src/jobs.test.ts +0 -245
  30. package/src/mcp-http.test.ts +0 -265
  31. package/src/mint-token.test.ts +0 -152
  32. package/src/module-manifest.test.ts +0 -158
  33. package/src/programmatic-wiring.test.ts +0 -838
  34. package/src/registry.test.ts +0 -227
  35. package/src/resolve-port.test.ts +0 -64
  36. package/src/routing.test.ts +0 -184
  37. package/src/runner.test.ts +0 -506
  38. package/src/sandbox/config.test.ts +0 -150
  39. package/src/sandbox/egress.test.ts +0 -113
  40. package/src/sandbox/live-seatbelt.test.ts +0 -277
  41. package/src/sandbox/mounts.test.ts +0 -154
  42. package/src/sandbox/sandbox.test.ts +0 -168
  43. package/src/services-manifest.test.ts +0 -106
  44. package/src/spa-serve.test.ts +0 -116
  45. package/src/spawn-agent-cli.test.ts +0 -172
  46. package/src/spawn-agent.test.ts +0 -1218
  47. package/src/spawn-deps.test.ts +0 -54
  48. package/src/terminal-assets.test.ts +0 -50
  49. package/src/terminal.test.ts +0 -530
  50. package/src/transports/http-ui.test.ts +0 -455
  51. package/src/transports/telegram.test.ts +0 -174
  52. package/src/transports/vault.test.ts +0 -2012
  53. package/src/ui-kit.test.ts +0 -178
  54. package/web/ui/tsconfig.json +0 -21
@@ -1,1218 +0,0 @@
1
- import { describe, test, expect, afterEach } from "bun:test";
2
- import { mkdtempSync, rmSync, readFileSync, statSync, writeFileSync, existsSync } from "node:fs";
3
- import { join } from "node:path";
4
- import { tmpdir } from "node:os";
5
- // SHARED spawn helpers (live tree).
6
- import {
7
- buildAgentChildEnv,
8
- mergeSandboxLaunchEnv,
9
- SANDBOX_ENV_ALLOWLIST,
10
- resolveAgentCwd,
11
- seedAgentHome,
12
- sessionWorkspace,
13
- shellJoin,
14
- persistSpec,
15
- readPersistedSpec,
16
- specFilePath,
17
- } from "./spawn-agent.ts";
18
- // PARKED interactive spawner (the interactive backend retired 2026-06-19; its
19
- // spawner + tmux launcher live in src/_parked/interactive-spawn.ts now — these tests
20
- // still exercise that parked code so it stays buildable for the future revival).
21
- import {
22
- spawnAgent,
23
- buildAgentClaudeArgs,
24
- buildLaunchScript,
25
- confirmDevChannelsPrompt,
26
- DEV_CHANNELS_PROMPT_MARKER,
27
- DEV_CHANNELS_READY_MARKER,
28
- realTmuxLauncher,
29
- sessionName,
30
- type SpawnAgentDeps,
31
- type TmuxLauncher,
32
- } from "./_parked/interactive-spawn.ts";
33
- import type { SandboxEngine } from "./sandbox/index.ts";
34
- import type { SandboxRuntimeConfig } from "@anthropic-ai/sandbox-runtime";
35
- import type { AgentSpec } from "./sandbox/types.ts";
36
- import { channelEntryKey, vaultEntryKey } from "./agent-mcp-config.ts";
37
- import {
38
- setDefaultClaudeCredential,
39
- setChannelClaudeCredential,
40
- } from "./credentials.ts";
41
-
42
- let sessionsDir: string;
43
- afterEach(() => {
44
- if (sessionsDir) rmSync(sessionsDir, { recursive: true, force: true });
45
- });
46
-
47
- // ---- fakes -----------------------------------------------------------------
48
-
49
- /** A recording tmux launcher. */
50
- function recordingTmux(existing = new Set<string>()): TmuxLauncher & {
51
- launched: Array<{
52
- name: string;
53
- argv: string[];
54
- env: Record<string, string | undefined>;
55
- cwd: string;
56
- scriptDir?: string;
57
- }>;
58
- confirmed: string[];
59
- } {
60
- const launched: Array<{
61
- name: string;
62
- argv: string[];
63
- env: Record<string, string | undefined>;
64
- cwd: string;
65
- scriptDir?: string;
66
- }> = [];
67
- const confirmed: string[] = [];
68
- return {
69
- launched,
70
- confirmed,
71
- async hasSession(name) {
72
- return existing.has(name);
73
- },
74
- async newSession(opts) {
75
- launched.push(opts);
76
- },
77
- async confirmDevChannelsPrompt(session) {
78
- confirmed.push(session);
79
- return "confirmed";
80
- },
81
- };
82
- }
83
-
84
- /** A fake sandbox engine — records config, returns a deterministic wrap. */
85
- function fakeEngine(): SandboxEngine & { initializedWith: SandboxRuntimeConfig | null } {
86
- const rec = {
87
- initializedWith: null as SandboxRuntimeConfig | null,
88
- isSupportedPlatform: () => true,
89
- isSandboxingEnabled: () => true,
90
- async initialize(cfg: SandboxRuntimeConfig) {
91
- rec.initializedWith = cfg;
92
- },
93
- async wrapWithSandboxArgv(command: string) {
94
- // Emulate the real shape: a bash -c wrapper carrying the command + proxy env.
95
- return {
96
- argv: ["/bin/bash", "-c", `SBX ${command}`],
97
- // Include a TMPDIR the engine would set — spawnAgent must OVERRIDE it with
98
- // a workspace-writable path (the override regression guard below).
99
- env: { SANDBOX_RUNTIME: "1", HTTPS_PROXY: "http://localhost:5555", TMPDIR: "/tmp/claude" },
100
- };
101
- },
102
- async reset() {},
103
- };
104
- return rec;
105
- }
106
-
107
- /** A fake mint hub: returns a distinct token per scope so we can tell them apart. */
108
- function fakeMintFetch(): typeof fetch {
109
- let n = 0;
110
- return (async (_url: string | URL | Request, init?: RequestInit) => {
111
- const body = JSON.parse(String(init?.body ?? "{}")) as { scope: string };
112
- n += 1;
113
- const token = `TOK-${n}-${body.scope.replace(/[^a-z]/gi, "").slice(0, 6)}`;
114
- return new Response(
115
- JSON.stringify({ jti: `j${n}`, token, expires_at: "2026-09-01T00:00:00Z", scope: body.scope }),
116
- { status: 200, headers: { "content-type": "application/json" } },
117
- );
118
- }) as unknown as typeof fetch;
119
- }
120
-
121
- function baseDeps(over: Partial<SpawnAgentDeps> = {}): SpawnAgentDeps {
122
- return {
123
- hubOrigin: "https://hub.example.com",
124
- managerBearer: "MANAGER",
125
- channelUrl: "http://127.0.0.1:1941",
126
- vaultUrl: "http://127.0.0.1:1940",
127
- sessionsDir,
128
- runtimeReadOnly: ["/cfg/.claude"],
129
- // Stub the credential resolver so the test never touches a real store; the
130
- // assertion below checks this exact token lands in CLAUDE_CODE_OAUTH_TOKEN.
131
- resolveClaudeToken: () => "OAUTH-CRED-PLACEHOLDER",
132
- sandboxEngine: fakeEngine(),
133
- tmux: recordingTmux(),
134
- fetchFn: fakeMintFetch(),
135
- parentEnv: {
136
- PATH: "/usr/bin",
137
- HOME: "/home/op",
138
- ANTHROPIC_API_KEY: "sk-ant-SHOULD-NOT-LEAK",
139
- CLAUDE_API_KEY: "also-should-not-leak",
140
- SECRET_THING: "do-not-pass",
141
- },
142
- claudeBin: "claude",
143
- ...over,
144
- };
145
- }
146
-
147
- // ---- pure-helper tests -----------------------------------------------------
148
-
149
- describe("buildAgentChildEnv — scrub, inject OAuth, NEVER ANTHROPIC_API_KEY", () => {
150
- test("injects CLAUDE_CODE_OAUTH_TOKEN as the session auth", () => {
151
- const env = buildAgentChildEnv({ PATH: "/usr/bin", HOME: "/h" }, "THE-OAUTH-TOKEN");
152
- expect(env.CLAUDE_CODE_OAUTH_TOKEN).toBe("THE-OAUTH-TOKEN");
153
- });
154
-
155
- test("SECURITY: ANTHROPIC_API_KEY is NOT passed through (would route to API billing)", () => {
156
- const env = buildAgentChildEnv(
157
- { PATH: "/usr/bin", HOME: "/h", ANTHROPIC_API_KEY: "sk-ant-x", CLAUDE_API_KEY: "y" },
158
- "tok",
159
- );
160
- expect(env.ANTHROPIC_API_KEY).toBeUndefined();
161
- expect(env.CLAUDE_API_KEY).toBeUndefined();
162
- });
163
-
164
- test("scrubs unrelated parent env (only the allowlist + locale pass)", () => {
165
- const env = buildAgentChildEnv(
166
- { PATH: "/usr/bin", HOME: "/h", SECRET_THING: "nope", LC_ALL: "en_US.UTF-8" },
167
- "tok",
168
- );
169
- expect(env.SECRET_THING).toBeUndefined();
170
- expect(env.PATH).toBe("/usr/bin");
171
- expect(env.HOME).toBe("/h");
172
- expect(env.LC_ALL).toBe("en_US.UTF-8");
173
- });
174
-
175
- test("provides a default PATH if the parent had none", () => {
176
- const env = buildAgentChildEnv({}, "tok");
177
- expect(env.PATH).toBe("/usr/local/bin:/usr/bin:/bin");
178
- });
179
-
180
- test("INJECTION: the per-channel env reaches the child (gh/git see the token)", () => {
181
- const env = buildAgentChildEnv(
182
- { PATH: "/usr/bin", HOME: "/h" },
183
- "tok",
184
- { GH_TOKEN: "ghp_X", CLOUDFLARE_API_TOKEN: "cf_Y" },
185
- );
186
- expect(env.GH_TOKEN).toBe("ghp_X");
187
- expect(env.CLOUDFLARE_API_TOKEN).toBe("cf_Y");
188
- });
189
-
190
- test("INJECTION: a channel-set var can NOT clobber CLAUDE_CODE_OAUTH_TOKEN (auth wins)", () => {
191
- // Even if the store somehow carried CLAUDE_CODE_OAUTH_TOKEN, the managed token
192
- // set last must win — and the denylist drop means it never even lands.
193
- const env = buildAgentChildEnv(
194
- { PATH: "/usr/bin" },
195
- "THE-REAL-OAUTH",
196
- { CLAUDE_CODE_OAUTH_TOKEN: "ATTACKER-SWAP", GH_TOKEN: "ghp_X" },
197
- );
198
- expect(env.CLAUDE_CODE_OAUTH_TOKEN).toBe("THE-REAL-OAUTH");
199
- expect(env.GH_TOKEN).toBe("ghp_X");
200
- });
201
-
202
- test("INJECTION: a channel-set var can NOT clobber a structural passthrough (PATH/HOME)", () => {
203
- const env = buildAgentChildEnv(
204
- { PATH: "/real/path", HOME: "/real/home" },
205
- "tok",
206
- { PATH: "/evil", HOME: "/evil" },
207
- );
208
- expect(env.PATH).toBe("/real/path");
209
- expect(env.HOME).toBe("/real/home");
210
- });
211
-
212
- test("INJECTION: denylisted keys (API keys) are dropped defensively with a warning", () => {
213
- const warnings: string[] = [];
214
- const orig = console.warn;
215
- console.warn = (...a: unknown[]) => warnings.push(a.map(String).join(" "));
216
- try {
217
- const env = buildAgentChildEnv(
218
- { PATH: "/usr/bin" },
219
- "tok",
220
- { ANTHROPIC_API_KEY: "sk-ant-SMUGGLED", CLAUDE_API_KEY: "y", GH_TOKEN: "ghp_X" },
221
- );
222
- expect(env.ANTHROPIC_API_KEY).toBeUndefined();
223
- expect(env.CLAUDE_API_KEY).toBeUndefined();
224
- expect(env.GH_TOKEN).toBe("ghp_X"); // the legit var still passes
225
- expect(warnings.some((w) => w.includes("ANTHROPIC_API_KEY") && w.includes("denylisted"))).toBe(true);
226
- } finally {
227
- console.warn = orig;
228
- }
229
- });
230
-
231
- test("INJECTION: an empty channel env is a no-op (back-compat default arg)", () => {
232
- const env = buildAgentChildEnv({ PATH: "/usr/bin" }, "tok");
233
- expect(env.CLAUDE_CODE_OAUTH_TOKEN).toBe("tok");
234
- expect(env.PATH).toBe("/usr/bin");
235
- });
236
- });
237
-
238
- describe("mergeSandboxLaunchEnv — the scrub WINS over the engine's returned env", () => {
239
- // The REAL `wrapWithSandboxArgv` returns `env: process.env` (the FULL daemon env) on
240
- // macOS/Linux; on Windows it returns `{...process.env, ...proxy}`. So `wrapped.env` is
241
- // essentially the whole daemon env. The old `{ ...childEnv, ...wrapped.env, ...homeEnv }`
242
- // spread let that OVERRIDE the scrubbed childEnv — re-admitting the daemon's ambient
243
- // ANTHROPIC_API_KEY/secrets into the sandboxed turn (isolation/billing leak).
244
-
245
- const childEnv = buildAgentChildEnv({ PATH: "/usr/bin", HOME: "/h" }, "THE-OAUTH-TOKEN");
246
- // A representative `wrapped.env` = the daemon's process.env + the sandbox/proxy vars.
247
- const wrappedEnv = {
248
- ANTHROPIC_API_KEY: "sk-ant-DAEMON-AMBIENT",
249
- CLAUDE_API_KEY: "daemon-ambient",
250
- CLAUDE_CODE_OAUTH_TOKEN: "WRONG-DAEMON-TOKEN",
251
- SECRET_THING: "daemon-secret",
252
- PATH: "/daemon/bin",
253
- SANDBOX_RUNTIME: "1",
254
- HTTP_PROXY: "http://localhost:5555",
255
- HTTPS_PROXY: "http://localhost:5555",
256
- NO_PROXY: "localhost,127.0.0.1",
257
- NODE_EXTRA_CA_CERTS: "/tmp/claude/ca.pem",
258
- TMPDIR: "/tmp/claude",
259
- };
260
- const homeEnv: Record<string, string> = { CLAUDE_CONFIG_DIR: "/sess/.claude" };
261
-
262
- test("LEAK CLOSED: the daemon's ambient secrets in wrapped.env never reach the launch env", () => {
263
- const env = mergeSandboxLaunchEnv(childEnv, wrappedEnv, homeEnv);
264
- expect(env.ANTHROPIC_API_KEY).toBeUndefined();
265
- expect(env.CLAUDE_API_KEY).toBeUndefined();
266
- expect(env.SECRET_THING).toBeUndefined();
267
- });
268
-
269
- test("MANAGED AUTH WINS: CLAUDE_CODE_OAUTH_TOKEN is the scrubbed value, not the engine env's wrong one", () => {
270
- const env = mergeSandboxLaunchEnv(childEnv, wrappedEnv, homeEnv);
271
- expect(env.CLAUDE_CODE_OAUTH_TOKEN).toBe("THE-OAUTH-TOKEN");
272
- });
273
-
274
- test("EGRESS PRESERVED: the allowlisted sandbox/proxy vars survive (the proxy keeps working)", () => {
275
- const env = mergeSandboxLaunchEnv(childEnv, wrappedEnv, homeEnv);
276
- expect(env.SANDBOX_RUNTIME).toBe("1");
277
- expect(env.HTTP_PROXY).toBe("http://localhost:5555");
278
- expect(env.HTTPS_PROXY).toBe("http://localhost:5555");
279
- expect(env.NO_PROXY).toBe("localhost,127.0.0.1");
280
- expect(env.NODE_EXTRA_CA_CERTS).toBe("/tmp/claude/ca.pem");
281
- });
282
-
283
- test("the scrubbed PATH wins (PATH is not in the sandbox allowlist)", () => {
284
- const env = mergeSandboxLaunchEnv(childEnv, wrappedEnv, homeEnv);
285
- expect(env.PATH).toBe("/usr/bin"); // childEnv's, not the engine env's /daemon/bin
286
- });
287
-
288
- test("homeEnv wins last (CLAUDE_CONFIG_DIR/XDG/TMP overrides)", () => {
289
- const env = mergeSandboxLaunchEnv(childEnv, wrappedEnv, homeEnv);
290
- expect(env.CLAUDE_CONFIG_DIR).toBe("/sess/.claude");
291
- });
292
-
293
- test("the allowlist never contains the Claude-auth trio (defense-in-depth)", () => {
294
- expect(SANDBOX_ENV_ALLOWLIST.has("ANTHROPIC_API_KEY")).toBe(false);
295
- expect(SANDBOX_ENV_ALLOWLIST.has("CLAUDE_API_KEY")).toBe(false);
296
- expect(SANDBOX_ENV_ALLOWLIST.has("CLAUDE_CODE_OAUTH_TOKEN")).toBe(false);
297
- });
298
- });
299
-
300
- describe("buildAgentClaudeArgs", () => {
301
- test("interactive claude (no -p) with strict MCP config + dev-channels for the first channel", () => {
302
- const argv = buildAgentClaudeArgs({
303
- mcpConfigPath: "/ws/.mcp.json",
304
- firstChannelEntryKey: "agent-aaron-dev",
305
- });
306
- expect(argv).toContain("--strict-mcp-config");
307
- expect(argv).toContain("--mcp-config");
308
- expect(argv).toContain("/ws/.mcp.json");
309
- expect(argv).toContain("--dangerously-load-development-channels=server:agent-aaron-dev");
310
- // Autonomous: no human answers tool prompts; the sandbox is the containment.
311
- expect(argv).toContain("--dangerously-skip-permissions");
312
- // NOT headless: no `-p`.
313
- expect(argv).not.toContain("-p");
314
- });
315
- test("no systemPromptFile → neither system-prompt flag (today's behavior)", () => {
316
- const argv = buildAgentClaudeArgs({ mcpConfigPath: "/ws/.mcp.json", firstChannelEntryKey: "agent-c" });
317
- expect(argv).not.toContain("--append-system-prompt-file");
318
- expect(argv).not.toContain("--system-prompt-file");
319
- });
320
- test("systemPromptFile (append, default) → --append-system-prompt-file <path>", () => {
321
- const argv = buildAgentClaudeArgs({
322
- mcpConfigPath: "/ws/.mcp.json",
323
- firstChannelEntryKey: "agent-c",
324
- systemPromptFile: "/ws/system-prompt.txt",
325
- systemPromptMode: "append",
326
- });
327
- expect(argv).toContain("--append-system-prompt-file");
328
- expect(argv[argv.indexOf("--append-system-prompt-file") + 1]).toBe("/ws/system-prompt.txt");
329
- expect(argv).not.toContain("--system-prompt-file");
330
- });
331
- test("systemPromptFile (replace) → --system-prompt-file <path>", () => {
332
- const argv = buildAgentClaudeArgs({
333
- mcpConfigPath: "/ws/.mcp.json",
334
- firstChannelEntryKey: "agent-c",
335
- systemPromptFile: "/ws/system-prompt.txt",
336
- systemPromptMode: "replace",
337
- });
338
- expect(argv).toContain("--system-prompt-file");
339
- expect(argv).not.toContain("--append-system-prompt-file");
340
- });
341
- });
342
-
343
- describe("shellJoin", () => {
344
- test("leaves safe args bare, quotes args with spaces", () => {
345
- expect(shellJoin(["claude", "--mcp-config", "/a/b.json"])).toBe("claude --mcp-config /a/b.json");
346
- expect(shellJoin(["echo", "a b"])).toBe("echo 'a b'");
347
- });
348
- });
349
-
350
- describe("seedAgentHome — the per-session writable HOME (stability keystone)", () => {
351
- test("seeds from the operator config (inherits first-run state), strips projects+oauthAccount, trusts the workspace", () => {
352
- const ws = mkdtempSync(join(tmpdir(), "seed-home-"));
353
- const opDir = mkdtempSync(join(tmpdir(), "seed-op-"));
354
- const opPath = join(opDir, ".claude.json");
355
- // A realistic operator config: completed first-run flags + history + account.
356
- writeFileSync(opPath, JSON.stringify({
357
- hasCompletedOnboarding: true,
358
- theme: "dark",
359
- numStartups: 536,
360
- sonnet45MigrationComplete: true,
361
- oauthAccount: { email: "op@example.com", secret: "DO-NOT-COPY" },
362
- projects: { "/some/other/proj": { hasTrustDialogAccepted: true } },
363
- }));
364
- try {
365
- const env = seedAgentHome(ws, { mcpServers: ["agent-uni-dev", "vault-default"], operatorConfigPath: opPath });
366
- // Config + temp are redirected to per-session dirs INSIDE the workspace.
367
- // (HOME is deliberately NOT overridden — claude finds its real install there.)
368
- expect(env.HOME).toBeUndefined();
369
- expect(env.CLAUDE_CONFIG_DIR).toBe(join(ws, "home", ".claude"));
370
- expect(env.TMPDIR).toBe(join(ws, "tmp"));
371
- expect(env.CLAUDE_CODE_TMPDIR).toBe(join(ws, "tmp"));
372
- const seed = JSON.parse(readFileSync(join(ws, "home", ".claude", ".claude.json"), "utf8")) as Record<string, unknown>;
373
- // Inherits the operator's completed first-run state (onboarding, theme, migrations).
374
- expect(seed.hasCompletedOnboarding).toBe(true);
375
- expect(seed.theme).toBe("dark");
376
- expect(seed.sonnet45MigrationComplete).toBe(true);
377
- // Strips the account; replaces project history with ONLY this workspace, trusted.
378
- expect(seed.oauthAccount).toBeUndefined();
379
- const projects = seed.projects as Record<string, { hasTrustDialogAccepted: boolean; hasCompletedProjectOnboarding: boolean }>;
380
- expect(Object.keys(projects)).toEqual([ws]);
381
- expect(projects[ws]!.hasTrustDialogAccepted).toBe(true);
382
- expect(projects[ws]!.hasCompletedProjectOnboarding).toBe(true);
383
- // Our configured MCP servers are pre-approved (no "trust this MCP server" prompt).
384
- expect((projects[ws] as { enabledMcpjsonServers?: string[] }).enabledMcpjsonServers).toEqual([
385
- "agent-uni-dev",
386
- "vault-default",
387
- ]);
388
- } finally {
389
- rmSync(ws, { recursive: true, force: true });
390
- rmSync(opDir, { recursive: true, force: true });
391
- }
392
- });
393
-
394
- test("falls back to the minimal seed when the operator has no config", () => {
395
- const ws = mkdtempSync(join(tmpdir(), "seed-home-noop-"));
396
- try {
397
- seedAgentHome(ws, { operatorConfigPath: join(ws, "does-not-exist.json") });
398
- const seed = JSON.parse(readFileSync(join(ws, "home", ".claude", ".claude.json"), "utf8")) as {
399
- hasCompletedOnboarding: boolean;
400
- projects: Record<string, { hasTrustDialogAccepted: boolean }>;
401
- };
402
- expect(seed.hasCompletedOnboarding).toBe(true);
403
- expect(seed.projects[ws]!.hasTrustDialogAccepted).toBe(true);
404
- } finally {
405
- rmSync(ws, { recursive: true, force: true });
406
- }
407
- });
408
-
409
- test("projectRoot override → the SHARED working dir is the pre-trusted project, not the private home", () => {
410
- const ws = mkdtempSync(join(tmpdir(), "seed-home-projroot-"));
411
- const noop = join(ws, "no-operator.json");
412
- try {
413
- // The cwd (a shared working dir) is pre-trusted; the seed still lives UNDER ws.
414
- seedAgentHome(ws, { operatorConfigPath: noop, projectRoot: "/Users/op/Code/repo", mcpServers: ["vault-default"] });
415
- const seed = JSON.parse(readFileSync(join(ws, "home", ".claude", ".claude.json"), "utf8")) as {
416
- projects: Record<string, { hasTrustDialogAccepted: boolean }>;
417
- };
418
- // The PROJECT (pre-trusted) is the shared working dir, NOT the private ws.
419
- expect(Object.keys(seed.projects)).toEqual(["/Users/op/Code/repo"]);
420
- expect(seed.projects["/Users/op/Code/repo"]!.hasTrustDialogAccepted).toBe(true);
421
- } finally {
422
- rmSync(ws, { recursive: true, force: true });
423
- }
424
- });
425
-
426
- test("idempotent — an existing seed is left as-is (claude owns it after first boot)", () => {
427
- const ws = mkdtempSync(join(tmpdir(), "seed-home-idem-"));
428
- const noop = join(ws, "no-operator.json");
429
- try {
430
- seedAgentHome(ws, { operatorConfigPath: noop });
431
- const path = join(ws, "home", ".claude", ".claude.json");
432
- writeFileSync(path, JSON.stringify({ hasCompletedOnboarding: true, mine: true }));
433
- seedAgentHome(ws, { operatorConfigPath: noop }); // second call must not clobber
434
- expect(JSON.parse(readFileSync(path, "utf8")).mine).toBe(true);
435
- } finally {
436
- rmSync(ws, { recursive: true, force: true });
437
- }
438
- });
439
- });
440
-
441
- // ---- full wiring tests -----------------------------------------------------
442
-
443
- describe("spawnAgent — full wiring with stubs (no real token)", () => {
444
- test("creates the tmux session, writes a strict MCP config, injects OAuth, omits ANTHROPIC_API_KEY", async () => {
445
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-agent-"));
446
- const tmux = recordingTmux();
447
- const engine = fakeEngine();
448
- const spec: AgentSpec = {
449
- name: "aaron-dev",
450
- channels: ["aaron-dev"],
451
- vault: { name: "default", access: "read", tags: ["agent/message"] },
452
- network: "restricted", // exercise the egress floor; scoped reads are the default (step 6)
453
- };
454
- const res = await spawnAgent(spec, baseDeps({ tmux, sandboxEngine: engine }));
455
-
456
- // 1. tmux session created with the spec's name.
457
- expect(res.alreadyRunning).toBe(false);
458
- expect(res.session).toBe(sessionName("aaron-dev"));
459
- expect(tmux.launched).toHaveLength(1);
460
- const launch = tmux.launched[0]!;
461
- expect(launch.name).toBe("aaron-dev-agent");
462
-
463
- // 1b. The dev-channels consent gate is auto-answered for THIS session after the
464
- // launch (channel#70) — otherwise the headless spawn hangs at the prompt forever.
465
- expect(tmux.confirmed).toEqual(["aaron-dev-agent"]);
466
- expect(res.devChannelsPrompt).toBe("confirmed");
467
-
468
- // 2. The launched argv is the sandbox wrapper carrying the claude command.
469
- expect(launch.argv[0]).toBe("/bin/bash");
470
- expect(launch.argv[2]).toContain("SBX claude");
471
- expect(launch.argv[2]).toContain("--strict-mcp-config");
472
-
473
- // 3. The injected env has CLAUDE_CODE_OAUTH_TOKEN and NO ANTHROPIC_API_KEY.
474
- expect(launch.env.CLAUDE_CODE_OAUTH_TOKEN).toBe("OAUTH-CRED-PLACEHOLDER");
475
- expect(launch.env.ANTHROPIC_API_KEY).toBeUndefined();
476
- expect(launch.env.CLAUDE_API_KEY).toBeUndefined();
477
- // ...and the sandbox proxy env layered on top.
478
- expect(launch.env.SANDBOX_RUNTIME).toBe("1");
479
- expect(launch.env.HTTPS_PROXY).toBe("http://localhost:5555");
480
-
481
- // 3b. TMPDIR (+ claude-specific + generic) point at a WRITABLE dir inside the
482
- // workspace, OVERRIDING the sandbox engine's own TMPDIR — without this claude
483
- // can't create its scratch dir and dies "Claude Code could not start: EPERM".
484
- const wsTmp = join(res.workspace, "tmp");
485
- expect(launch.env.TMPDIR).toBe(wsTmp);
486
- expect(launch.env.CLAUDE_CODE_TMPDIR).toBe(wsTmp);
487
- expect(launch.env.TMP).toBe(wsTmp);
488
- expect(launch.env.TEMP).toBe(wsTmp);
489
- // ...and the dir is actually created on disk (writable, where the child looks).
490
- expect(statSync(wsTmp).isDirectory()).toBe(true);
491
-
492
- // 4. The MCP config has the right entries with DISTINCT tokens (one per aud).
493
- const parsed = JSON.parse(res.mcpConfigJson) as {
494
- mcpServers: Record<string, { url: string; headers?: { Authorization: string } }>;
495
- };
496
- const chKey = channelEntryKey("aaron-dev");
497
- const vKey = vaultEntryKey("default");
498
- expect(parsed.mcpServers[chKey]!.url).toBe("http://127.0.0.1:1941/mcp/aaron-dev");
499
- expect(parsed.mcpServers[vKey]!.url).toBe("http://127.0.0.1:1940/vault/default/mcp");
500
- const chAuth = parsed.mcpServers[chKey]!.headers!.Authorization;
501
- const vAuth = parsed.mcpServers[vKey]!.headers!.Authorization;
502
- expect(chAuth).toMatch(/^Bearer TOK-/);
503
- expect(vAuth).toMatch(/^Bearer TOK-/);
504
- expect(chAuth).not.toBe(vAuth); // distinct tokens, distinct auds
505
-
506
- // 5. The on-disk config is 0600 (it inlines tokens).
507
- const mcpPath = join(res.workspace, ".mcp.json");
508
- expect(statSync(mcpPath).mode & 0o777).toBe(0o600);
509
- expect(readFileSync(mcpPath, "utf8")).toBe(res.mcpConfigJson);
510
-
511
- // 6. The sandbox config carried the egress floor + scoped reads.
512
- expect(engine.initializedWith!.network.allowedDomains).toContain("api.anthropic.com");
513
- expect(engine.initializedWith!.network.allowedDomains).toContain("hub.example.com");
514
- expect(engine.initializedWith!.filesystem.allowWrite).toContain(res.workspace);
515
- });
516
-
517
- test("a spec with systemPrompt writes system-prompt.txt 0600 + passes the -file flag in the launch argv", async () => {
518
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-agent-sysprompt-"));
519
- const tmux = recordingTmux();
520
- const spec: AgentSpec = {
521
- name: "eng",
522
- channels: ["eng"],
523
- systemPrompt: "You are the eng channel's assistant.",
524
- systemPromptMode: "append",
525
- };
526
- const res = await spawnAgent(spec, baseDeps({ tmux }));
527
-
528
- // The prompt file is written 0600 with the exact text.
529
- const promptPath = join(res.workspace, "system-prompt.txt");
530
- expect(statSync(promptPath).mode & 0o777).toBe(0o600);
531
- expect(readFileSync(promptPath, "utf8")).toBe("You are the eng channel's assistant.");
532
- // The launched claude command carries --append-system-prompt-file <path>.
533
- const cmd = tmux.launched[0]!.argv[2]!;
534
- expect(cmd).toContain("--append-system-prompt-file");
535
- expect(cmd).toContain(promptPath);
536
- });
537
-
538
- test("a spec with NO systemPrompt writes no prompt file + no system-prompt flag", async () => {
539
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-agent-nosysprompt-"));
540
- const tmux = recordingTmux();
541
- const res = await spawnAgent({ name: "bare", channels: ["bare"] }, baseDeps({ tmux }));
542
- expect(existsSync(join(res.workspace, "system-prompt.txt"))).toBe(false);
543
- expect(tmux.launched[0]!.argv[2]!).not.toContain("system-prompt-file");
544
- });
545
-
546
- test("mints ONE token per channel for a multi-channel spec", async () => {
547
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-agent-multi-"));
548
- const spec: AgentSpec = { name: "multi", channels: ["a", "b"] };
549
- const res = await spawnAgent(spec, baseDeps());
550
- expect(Object.keys(res.tokens)).toContain("a");
551
- expect(Object.keys(res.tokens)).toContain("b");
552
- expect(res.tokens.a!.token).not.toBe(res.tokens.b!.token);
553
- const parsed = JSON.parse(res.mcpConfigJson) as { mcpServers: Record<string, unknown> };
554
- expect(parsed.mcpServers[channelEntryKey("a")]).toBeDefined();
555
- expect(parsed.mcpServers[channelEntryKey("b")]).toBeDefined();
556
- });
557
-
558
- test("tag-scoped vault: the scoped_tags permission rides the vault mint request", async () => {
559
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-agent-vault-"));
560
- const calls: Array<Record<string, unknown>> = [];
561
- const fetchFn = (async (_u: string | URL | Request, init?: RequestInit) => {
562
- const body = JSON.parse(String(init?.body ?? "{}")) as Record<string, unknown>;
563
- calls.push(body);
564
- return new Response(
565
- JSON.stringify({ jti: "j", token: `T-${calls.length}`, expires_at: "", scope: body.scope }),
566
- { status: 200, headers: { "content-type": "application/json" } },
567
- );
568
- }) as unknown as typeof fetch;
569
-
570
- const spec: AgentSpec = {
571
- name: "weaver",
572
- channels: ["c"],
573
- vault: { name: "default", access: "read", tags: ["agent/message"] },
574
- };
575
- await spawnAgent(spec, baseDeps({ fetchFn }));
576
- const vaultCall = calls.find((c) => String(c.scope).startsWith("vault:"));
577
- expect(vaultCall).toBeDefined();
578
- expect(vaultCall!.scope).toBe("vault:default:read");
579
- expect(vaultCall!.permissions).toEqual({ scoped_tags: ["agent/message"] });
580
- });
581
-
582
- test("idempotent: an already-running session is a no-op", async () => {
583
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-agent-idem-"));
584
- const tmux = recordingTmux(new Set(["arm-agent"]));
585
- const res = await spawnAgent({ name: "arm", channels: ["c"] }, baseDeps({ tmux }));
586
- expect(res.alreadyRunning).toBe(true);
587
- expect(tmux.launched).toHaveLength(0);
588
- // No launch → the dev-channels gate is NOT touched (guards against someone
589
- // moving the confirm call above the early-return — channel#70).
590
- expect(tmux.confirmed).toHaveLength(0);
591
- expect(res.devChannelsPrompt).toBeUndefined();
592
- });
593
-
594
- test("a spec with no channels is rejected", async () => {
595
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-agent-noch-"));
596
- await expect(spawnAgent({ name: "x", channels: [] }, baseDeps())).rejects.toThrow(/no channels/);
597
- });
598
-
599
- test("SECURITY: an over-broad mint (hub 400) aborts the launch — no tmux session created", async () => {
600
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-agent-deny-"));
601
- const tmux = recordingTmux();
602
- const fetchFn = (async () =>
603
- new Response(
604
- JSON.stringify({ error: "invalid_scope", error_description: "not grantable by this bearer" }),
605
- { status: 400, headers: { "content-type": "application/json" } },
606
- )) as unknown as typeof fetch;
607
- await expect(
608
- spawnAgent({ name: "x", channels: ["c"] }, baseDeps({ tmux, fetchFn })),
609
- ).rejects.toThrow(/mint refused/);
610
- // The attenuation failure happened BEFORE any tmux launch.
611
- expect(tmux.launched).toHaveLength(0);
612
- });
613
-
614
- test("SECURITY: an adversarial spec.name is rejected BEFORE any fs/tmux/mint side effect", async () => {
615
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-agent-name-"));
616
- for (const bad of ["..", "a/b", "a b", "../escape", ".", "a..b", "x;rm", ""]) {
617
- const tmux = recordingTmux();
618
- let minted = false;
619
- const fetchFn = (async () => {
620
- minted = true;
621
- return new Response("{}", { status: 200 });
622
- }) as unknown as typeof fetch;
623
- await expect(
624
- spawnAgent({ name: bad, channels: ["c"] }, baseDeps({ tmux, fetchFn })),
625
- ).rejects.toThrow(/slug/);
626
- // No side effects: no tmux launch, no mint attempt.
627
- expect(tmux.launched).toHaveLength(0);
628
- expect(minted).toBe(false);
629
- }
630
- });
631
-
632
- test("a valid slug name is accepted (dashes + underscores ok)", async () => {
633
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-agent-okname-"));
634
- const res = await spawnAgent({ name: "aaron_dev-2", channels: ["c"] }, baseDeps());
635
- expect(res.alreadyRunning).toBe(false);
636
- expect(res.session).toBe("aaron_dev-2-agent");
637
- });
638
-
639
- test("ENV INJECTION: the resolved per-channel env reaches the tmux launch env (Claude auth intact)", async () => {
640
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-agent-env-"));
641
- const tmux = recordingTmux();
642
- const deps = baseDeps({
643
- tmux,
644
- // The wake channel is the first channel ("aaron-dev") — env resolves on it.
645
- resolveChannelEnv: (ch): Record<string, string> =>
646
- ch === "aaron-dev" ? { GH_TOKEN: "ghp_INJECTED", CLOUDFLARE_API_TOKEN: "cf_INJECTED" } : {},
647
- });
648
- await spawnAgent({ name: "aaron-dev", channels: ["aaron-dev"] }, deps);
649
- expect(tmux.launched).toHaveLength(1);
650
- const env = tmux.launched[0]!.env;
651
- // The injected vars reach the child…
652
- expect(env.GH_TOKEN).toBe("ghp_INJECTED");
653
- expect(env.CLOUDFLARE_API_TOKEN).toBe("cf_INJECTED");
654
- // …Claude auth is the stub placeholder (not clobbered), and no API key leaked.
655
- expect(env.CLAUDE_CODE_OAUTH_TOKEN).toBe("OAUTH-CRED-PLACEHOLDER");
656
- expect(env.ANTHROPIC_API_KEY).toBeUndefined();
657
- });
658
-
659
- test("ENV INJECTION: a denylisted key planted in the resolver is dropped at launch", async () => {
660
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-agent-env-deny-"));
661
- const tmux = recordingTmux();
662
- const deps = baseDeps({
663
- tmux,
664
- resolveChannelEnv: () => ({ ANTHROPIC_API_KEY: "sk-ant-SMUGGLED", GH_TOKEN: "ghp_OK" }),
665
- });
666
- await spawnAgent({ name: "x", channels: ["c"] }, deps);
667
- const env = tmux.launched[0]!.env;
668
- expect(env.GH_TOKEN).toBe("ghp_OK");
669
- expect(env.ANTHROPIC_API_KEY).toBeUndefined(); // dropped defensively in buildAgentChildEnv
670
- });
671
-
672
- test("SPEC PERSISTENCE: spawn writes spec.json so a restart can reproduce the launch", async () => {
673
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-agent-spec-"));
674
- const spec: AgentSpec = {
675
- name: "weaver",
676
- channels: [{ name: "weave", access: "read" }],
677
- vault: { name: "default", access: "read", tags: ["agent/message"] },
678
- network: "restricted",
679
- egress: ["registry.npmjs.org"],
680
- };
681
- const res = await spawnAgent(spec, baseDeps());
682
- // The persisted spec round-trips to the exact spec the launch used.
683
- const recovered = readPersistedSpec(res.workspace);
684
- expect(recovered).toEqual(spec);
685
- // And it's at the conventional path.
686
- expect(specFilePath(res.workspace)).toBe(join(res.workspace, "spec.json"));
687
- });
688
-
689
- test("read-only channel mints agent:read ONLY (not read+write)", async () => {
690
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-agent-roch-"));
691
- const scopes: string[] = [];
692
- const fetchFn = (async (_u: string | URL | Request, init?: RequestInit) => {
693
- const body = JSON.parse(String(init?.body ?? "{}")) as { scope: string };
694
- scopes.push(body.scope);
695
- return new Response(
696
- JSON.stringify({ jti: "j", token: `T-${scopes.length}`, expires_at: "", scope: body.scope }),
697
- { status: 200, headers: { "content-type": "application/json" } },
698
- );
699
- }) as unknown as typeof fetch;
700
-
701
- const spec: AgentSpec = {
702
- name: "watcher",
703
- channels: [
704
- { name: "readonly-ch", access: "read" },
705
- { name: "rw-ch", access: "write" },
706
- "bare-ch", // bare string = write (back-compat)
707
- ],
708
- };
709
- await spawnAgent(spec, baseDeps({ fetchFn }));
710
- expect(scopes).toContain("agent:read"); // the read-only channel
711
- expect(scopes.filter((s) => s === "agent:read")).toHaveLength(1);
712
- expect(scopes.filter((s) => s === "agent:read agent:write")).toHaveLength(2); // rw + bare
713
- });
714
-
715
- test("CONCURRENCY: two concurrent spawnAgent calls produce correct, independent MCP configs + wrapping", async () => {
716
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-agent-conc-"));
717
- // Independent engines/tmux per call so we can assert no cross-clobber. Each
718
- // mint hub returns a token namespaced to the spec so configs are tellable apart.
719
- function depsForArm(arm: string) {
720
- let n = 0;
721
- const fetchFn = (async (_u: string | URL | Request, init?: RequestInit) => {
722
- const body = JSON.parse(String(init?.body ?? "{}")) as { scope: string };
723
- n += 1;
724
- return new Response(
725
- JSON.stringify({ jti: `${arm}-${n}`, token: `${arm}-TOK-${n}`, expires_at: "", scope: body.scope }),
726
- { status: 200, headers: { "content-type": "application/json" } },
727
- );
728
- }) as unknown as typeof fetch;
729
- return baseDeps({ tmux: recordingTmux(), sandboxEngine: fakeEngine(), fetchFn });
730
- }
731
-
732
- const [a, b] = await Promise.all([
733
- spawnAgent({ name: "arm-a", channels: ["ca"] }, depsForArm("A")),
734
- spawnAgent({ name: "arm-b", channels: ["cb"] }, depsForArm("B")),
735
- ]);
736
-
737
- // Each got its OWN channel entry + token — no clobber across the race.
738
- const pa = JSON.parse(a.mcpConfigJson) as { mcpServers: Record<string, { url: string; headers?: { Authorization: string } }> };
739
- const pb = JSON.parse(b.mcpConfigJson) as { mcpServers: Record<string, { url: string; headers?: { Authorization: string } }> };
740
- expect(pa.mcpServers[channelEntryKey("ca")]!.url).toBe("http://127.0.0.1:1941/mcp/ca");
741
- expect(pb.mcpServers[channelEntryKey("cb")]!.url).toBe("http://127.0.0.1:1941/mcp/cb");
742
- expect(pa.mcpServers[channelEntryKey("ca")]!.headers!.Authorization).toBe("Bearer A-TOK-1");
743
- expect(pb.mcpServers[channelEntryKey("cb")]!.headers!.Authorization).toBe("Bearer B-TOK-1");
744
- // Independent sandbox configs (each carries its own workspace allowWrite).
745
- expect(a.wrapped.config.filesystem.allowWrite).toContain(a.workspace);
746
- expect(b.wrapped.config.filesystem.allowWrite).toContain(b.workspace);
747
- expect(a.workspace).not.toBe(b.workspace);
748
- });
749
-
750
- test("CONCURRENCY: the init→wrap window is serialized (never two engines in it at once)", async () => {
751
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-agent-serial-"));
752
- // An engine whose initialize overlaps wrap by an await; if the lock didn't
753
- // hold, two would be "in the window" simultaneously and maxActive would be >1.
754
- let active = 0;
755
- let maxActive = 0;
756
- function slowEngine(): SandboxEngine {
757
- return {
758
- isSupportedPlatform: () => true,
759
- isSandboxingEnabled: () => true,
760
- async initialize() {
761
- active += 1;
762
- maxActive = Math.max(maxActive, active);
763
- await Bun.sleep(15);
764
- },
765
- async wrapWithSandboxArgv(command: string) {
766
- await Bun.sleep(15);
767
- active -= 1;
768
- return { argv: ["/bin/bash", "-c", command], env: {} };
769
- },
770
- async reset() {},
771
- };
772
- }
773
- await Promise.all([
774
- spawnAgent({ name: "s-a", channels: ["c"] }, baseDeps({ sandboxEngine: slowEngine(), tmux: recordingTmux() })),
775
- spawnAgent({ name: "s-b", channels: ["c"] }, baseDeps({ sandboxEngine: slowEngine(), tmux: recordingTmux() })),
776
- spawnAgent({ name: "s-c", channels: ["c"] }, baseDeps({ sandboxEngine: slowEngine(), tmux: recordingTmux() })),
777
- ]);
778
- expect(maxActive).toBe(1);
779
- });
780
- });
781
-
782
- // ---- the workspace seam (working-directory axis) ---------------------------
783
- // design 2026-06-16-agent-filesystem-and-sharing.md — a `workspace` host path is
784
- // the agent's cwd + an rw working-root; the credential-bearing private home
785
- // (.mcp.json / system-prompt.txt / spec.json / seeded CLAUDE_CONFIG_DIR) STAYS in
786
- // the per-agent sessions/<name> dir, never written into the shared workspace.
787
-
788
- describe("resolveAgentCwd — cwd is the workspace when set, else the private dir", () => {
789
- test("workspace set → that path; the private dir is untouched as the cwd", () => {
790
- expect(resolveAgentCwd({ name: "a", channels: ["c"], workspace: "/ws/repo" }, "/private/a")).toBe("/ws/repo");
791
- });
792
- test("workspace unset → the private dir (today's behavior)", () => {
793
- expect(resolveAgentCwd({ name: "a", channels: ["c"] }, "/private/a")).toBe("/private/a");
794
- });
795
- test("a blank workspace falls back to the private dir", () => {
796
- expect(resolveAgentCwd({ name: "a", channels: ["c"], workspace: "" }, "/private/a")).toBe("/private/a");
797
- });
798
- });
799
-
800
- describe("spawnAgent — workspace seam (interactive): cwd = workspace, secrets stay private", () => {
801
- test("workspace SET → tmux cwd is the workspace; .mcp.json/system-prompt/spec/home stay in the PRIVATE dir; workspace is in the sandbox rw set", async () => {
802
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-ws-set-"));
803
- const workspaceDir = mkdtempSync(join(tmpdir(), "shared-workdir-"));
804
- const tmux = recordingTmux();
805
- const engine = fakeEngine();
806
- try {
807
- const spec: AgentSpec = {
808
- name: "worker",
809
- channels: ["worker"],
810
- workspace: workspaceDir,
811
- systemPrompt: "You work in the repo.",
812
- };
813
- const res = await spawnAgent(spec, baseDeps({ tmux, sandboxEngine: engine }));
814
- const privateDir = sessionWorkspace(sessionsDir, "worker");
815
- // res.workspace is still the PRIVATE session dir (the home of secrets).
816
- expect(res.workspace).toBe(privateDir);
817
-
818
- // 1. The tmux session's cwd is the SHARED workspace, NOT the private dir.
819
- const launch = tmux.launched[0]!;
820
- expect(launch.cwd).toBe(workspaceDir);
821
- // …and the launch script (private) is written to the PRIVATE dir, never the shared one.
822
- expect(launch.scriptDir).toBe(privateDir);
823
-
824
- // 2. SECRETS-STAY-PRIVATE invariant: .mcp.json / system-prompt.txt / spec.json /
825
- // the seeded home all live UNDER the private dir, and NONE under the workspace.
826
- expect(existsSync(join(privateDir, ".mcp.json"))).toBe(true);
827
- expect(existsSync(join(privateDir, "system-prompt.txt"))).toBe(true);
828
- expect(existsSync(join(privateDir, "spec.json"))).toBe(true);
829
- expect(existsSync(join(privateDir, "home", ".claude", ".claude.json"))).toBe(true);
830
- // The workspace dir is NOT littered with any private artifact.
831
- expect(existsSync(join(workspaceDir, ".mcp.json"))).toBe(false);
832
- expect(existsSync(join(workspaceDir, "system-prompt.txt"))).toBe(false);
833
- expect(existsSync(join(workspaceDir, "spec.json"))).toBe(false);
834
- expect(existsSync(join(workspaceDir, ".launch.sh"))).toBe(false);
835
- expect(existsSync(join(workspaceDir, "home"))).toBe(false);
836
-
837
- // 3. --mcp-config / --append-system-prompt-file point at the PRIVATE absolute
838
- // paths (unaffected by the cwd change).
839
- const cmd = launch.argv[2]!;
840
- expect(cmd).toContain(join(privateDir, ".mcp.json"));
841
- expect(cmd).toContain(join(privateDir, "system-prompt.txt"));
842
-
843
- // 4. The workspace IS an rw working-root in the sandbox (read + write).
844
- expect(engine.initializedWith!.filesystem.allowWrite).toContain(workspaceDir);
845
- expect(engine.initializedWith!.filesystem.allowWrite).toContain(privateDir);
846
- expect(engine.initializedWith!.filesystem.allowRead).toContain(workspaceDir);
847
-
848
- // 5. CLAUDE_CONFIG_DIR / TMPDIR still point at the PRIVATE home (not the workspace).
849
- expect(launch.env.CLAUDE_CONFIG_DIR).toBe(join(privateDir, "home", ".claude"));
850
- expect(launch.env.TMPDIR).toBe(join(privateDir, "tmp"));
851
-
852
- // 6. The seeded project (pre-trusted) is the agent's CWD (the shared workspace).
853
- const seed = JSON.parse(
854
- readFileSync(join(privateDir, "home", ".claude", ".claude.json"), "utf8"),
855
- ) as { projects: Record<string, unknown> };
856
- expect(Object.keys(seed.projects)).toEqual([workspaceDir]);
857
- } finally {
858
- rmSync(workspaceDir, { recursive: true, force: true });
859
- }
860
- });
861
-
862
- test("workspace UNSET → cwd is the private dir (unchanged); workspace not in the rw set beyond the private dir", async () => {
863
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-ws-unset-"));
864
- const tmux = recordingTmux();
865
- const engine = fakeEngine();
866
- const res = await spawnAgent({ name: "plain", channels: ["plain"] }, baseDeps({ tmux, sandboxEngine: engine }));
867
- const launch = tmux.launched[0]!;
868
- // The cwd is the private session dir (today's behavior, exactly).
869
- expect(launch.cwd).toBe(res.workspace);
870
- expect(launch.scriptDir).toBe(res.workspace);
871
- // The only writable dir is the private session dir.
872
- expect(engine.initializedWith!.filesystem.allowWrite).toEqual([res.workspace]);
873
- // The pre-trusted project is the private dir (no shared working dir).
874
- const seed = JSON.parse(
875
- readFileSync(join(res.workspace, "home", ".claude", ".claude.json"), "utf8"),
876
- ) as { projects: Record<string, unknown> };
877
- expect(Object.keys(seed.projects)).toEqual([res.workspace]);
878
- });
879
-
880
- test("SECRETS-STAY-PRIVATE: .mcp.json (scoped tokens) is NEVER written into a shared workspace dir", async () => {
881
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-ws-secrets-"));
882
- const workspaceDir = mkdtempSync(join(tmpdir(), "shared-secrets-"));
883
- try {
884
- const spec: AgentSpec = {
885
- name: "secretkeeper",
886
- channels: ["secretkeeper"],
887
- vault: { name: "default", access: "read" },
888
- workspace: workspaceDir,
889
- };
890
- await spawnAgent(spec, baseDeps({ tmux: recordingTmux() }));
891
- // The shared workspace holds NO .mcp.json (the file that inlines the minted
892
- // vault/channel tokens). It only ever lives in the per-agent private dir.
893
- expect(existsSync(join(workspaceDir, ".mcp.json"))).toBe(false);
894
- // Belt-and-suspenders: no file under the shared workspace contains the minted
895
- // token marker the fake hub stamps (TOK-).
896
- const privateMcp = readFileSync(join(sessionWorkspace(sessionsDir, "secretkeeper"), ".mcp.json"), "utf8");
897
- expect(privateMcp).toContain("Bearer TOK-"); // the secret IS in the private file…
898
- // …and the shared dir has no such file at all (asserted above) — so the token
899
- // never crosses into the shareable dir.
900
- } finally {
901
- rmSync(workspaceDir, { recursive: true, force: true });
902
- }
903
- });
904
-
905
- test("two agents can SHARE one workspace dir (allowed, not solved) — each keeps its OWN private home", async () => {
906
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-ws-shared-"));
907
- const shared = mkdtempSync(join(tmpdir(), "shared-by-two-"));
908
- try {
909
- const tmuxA = recordingTmux();
910
- const tmuxB = recordingTmux();
911
- await spawnAgent({ name: "agent-a", channels: ["a"], workspace: shared }, baseDeps({ tmux: tmuxA }));
912
- await spawnAgent({ name: "agent-b", channels: ["b"], workspace: shared }, baseDeps({ tmux: tmuxB }));
913
- // Both cwd into the SAME shared dir…
914
- expect(tmuxA.launched[0]!.cwd).toBe(shared);
915
- expect(tmuxB.launched[0]!.cwd).toBe(shared);
916
- // …but each has its OWN private home (distinct .mcp.json under distinct dirs).
917
- const aPriv = sessionWorkspace(sessionsDir, "agent-a");
918
- const bPriv = sessionWorkspace(sessionsDir, "agent-b");
919
- expect(aPriv).not.toBe(bPriv);
920
- expect(existsSync(join(aPriv, ".mcp.json"))).toBe(true);
921
- expect(existsSync(join(bPriv, ".mcp.json"))).toBe(true);
922
- // The shared dir holds NEITHER agent's secrets.
923
- expect(existsSync(join(shared, ".mcp.json"))).toBe(false);
924
- } finally {
925
- rmSync(shared, { recursive: true, force: true });
926
- }
927
- });
928
- });
929
-
930
- // ---- credential wiring (Stream 3 — resolve from the per-channel store) -------
931
-
932
- describe("spawnAgent — resolves the Claude credential from the per-channel store", () => {
933
- let storeDir: string;
934
- afterEach(() => {
935
- if (storeDir) rmSync(storeDir, { recursive: true, force: true });
936
- });
937
-
938
- // The wiring under test reads `credentials.ts` keyed on the WAKE channel (the
939
- // first channel). These tests use the REAL resolver (no `resolveClaudeToken`
940
- // stub) against a throwaway store, proving the end-to-end resolve→inject path.
941
- function depsWithRealResolver(): SpawnAgentDeps {
942
- const d = baseDeps();
943
- delete (d as { resolveClaudeToken?: unknown }).resolveClaudeToken;
944
- return d;
945
- }
946
-
947
- test("injects the PER-CHANNEL override when the wake channel has one", async () => {
948
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-agent-cred-ovr-"));
949
- storeDir = mkdtempSync(join(tmpdir(), "channel-creds-ovr-"));
950
- setDefaultClaudeCredential("oat_DEFAULT", storeDir);
951
- setChannelClaudeCredential("aaron-dev", "oat_AARON-OVERRIDE", storeDir);
952
-
953
- const tmux = recordingTmux();
954
- const deps = { ...depsWithRealResolver(), tmux, resolveClaudeToken: (ch: string) => resolveAgainst(storeDir, ch) };
955
- const res = await spawnAgent({ name: "aaron-dev", channels: ["aaron-dev"] }, deps);
956
- expect(res.alreadyRunning).toBe(false);
957
- // The override (not the default) lands in CLAUDE_CODE_OAUTH_TOKEN.
958
- expect(tmux.launched[0]!.env.CLAUDE_CODE_OAUTH_TOKEN).toBe("oat_AARON-OVERRIDE");
959
- expect(tmux.launched[0]!.env.ANTHROPIC_API_KEY).toBeUndefined();
960
- });
961
-
962
- test("falls back to the DEFAULT/operator token when the wake channel has no override", async () => {
963
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-agent-cred-def-"));
964
- storeDir = mkdtempSync(join(tmpdir(), "channel-creds-def-"));
965
- setDefaultClaudeCredential("oat_DEFAULT", storeDir);
966
-
967
- const tmux = recordingTmux();
968
- const deps = { ...baseDeps(), tmux, resolveClaudeToken: (ch: string) => resolveAgainst(storeDir, ch) };
969
- const res = await spawnAgent({ name: "other", channels: ["unconfigured-ch"] }, deps);
970
- expect(res.alreadyRunning).toBe(false);
971
- expect(tmux.launched[0]!.env.CLAUDE_CODE_OAUTH_TOKEN).toBe("oat_DEFAULT");
972
- });
973
-
974
- test("resolves on the WAKE channel (first), not a later one", async () => {
975
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-agent-cred-wake-"));
976
- storeDir = mkdtempSync(join(tmpdir(), "channel-creds-wake-"));
977
- setDefaultClaudeCredential("oat_DEFAULT", storeDir);
978
- setChannelClaudeCredential("first", "oat_FIRST", storeDir);
979
- setChannelClaudeCredential("second", "oat_SECOND", storeDir);
980
-
981
- const tmux = recordingTmux();
982
- const deps = { ...baseDeps(), tmux, resolveClaudeToken: (ch: string) => resolveAgainst(storeDir, ch) };
983
- await spawnAgent({ name: "multi", channels: ["first", "second"] }, deps);
984
- // The wake channel is the first → its override is the session's auth.
985
- expect(tmux.launched[0]!.env.CLAUDE_CODE_OAUTH_TOKEN).toBe("oat_FIRST");
986
- });
987
-
988
- test("SECURITY: an unconfigured store ABORTS the launch BEFORE any mint/tmux side effect", async () => {
989
- sessionsDir = mkdtempSync(join(tmpdir(), "spawn-agent-cred-none-"));
990
- storeDir = mkdtempSync(join(tmpdir(), "channel-creds-none-")); // empty store
991
- const tmux = recordingTmux();
992
- let minted = false;
993
- const fetchFn = (async () => {
994
- minted = true;
995
- return new Response("{}", { status: 200 });
996
- }) as unknown as typeof fetch;
997
- const deps = { ...baseDeps(), tmux, fetchFn, resolveClaudeToken: (ch: string) => resolveAgainst(storeDir, ch) };
998
- await expect(
999
- spawnAgent({ name: "x", channels: ["ghost"] }, deps),
1000
- ).rejects.toThrow(/no Claude credential/);
1001
- // No session launched, no token minted.
1002
- expect(tmux.launched).toHaveLength(0);
1003
- expect(minted).toBe(false);
1004
- });
1005
- });
1006
-
1007
- // Resolve against a specific store dir (the real resolver hard-wires the default
1008
- // state dir; this test helper threads the throwaway dir through, exercising the
1009
- // SAME `resolveClaudeCredential` the production resolver calls).
1010
- function resolveAgainst(storeDir: string, channel: string): string {
1011
- const { resolveClaudeCredential } = require("./credentials.ts") as typeof import("./credentials.ts");
1012
- return resolveClaudeCredential(channel, storeDir);
1013
- }
1014
-
1015
- // ---- buildLaunchScript (the tmux-buffer fix) -------------------------------
1016
-
1017
- describe("buildLaunchScript — script body per argv shape, token-free", () => {
1018
- test("macOS `/bin/bash -c <cmd>` shape: the body IS the command", () => {
1019
- const script = buildLaunchScript(["/bin/bash", "-c", "sandbox-exec -p '...' claude --foo"]);
1020
- expect(script.startsWith("#!/bin/bash\nset -euo pipefail\n")).toBe(true);
1021
- expect(script).toContain("sandbox-exec -p '...' claude --foo");
1022
- // No `exec <bash> -c` re-wrapping for this canonical shape.
1023
- expect(script).not.toContain("exec /bin/bash");
1024
- });
1025
-
1026
- test("general argv (Linux bubblewrap shape): exec's the quoted argv", () => {
1027
- const script = buildLaunchScript(["bwrap", "--ro-bind", "/usr", "/usr", "claude", "--mcp-config", "/ws/.mcp.json"]);
1028
- expect(script.startsWith("#!/bin/bash\nset -euo pipefail\n")).toBe(true);
1029
- expect(script).toContain("exec bwrap --ro-bind /usr /usr claude --mcp-config /ws/.mcp.json");
1030
- });
1031
- });
1032
-
1033
- describe("realTmuxLauncher — launch-script indirection (tmux can't take the ~84KB profile inline)", () => {
1034
- /** A recording spawnFn matching the `Bun.spawn` shape the launcher awaits. */
1035
- function recordingSpawn(): {
1036
- fn: typeof Bun.spawn;
1037
- calls: string[][];
1038
- } {
1039
- const calls: string[][] = [];
1040
- const fn = ((argv: string[]) => {
1041
- calls.push(argv);
1042
- return {
1043
- exited: Promise.resolve(0),
1044
- stderr: new Response("").body,
1045
- };
1046
- }) as unknown as typeof Bun.spawn;
1047
- return { fn, calls };
1048
- }
1049
-
1050
- test("a >100KB wrapped command is NOT passed inline — tmux gets a short script-path argv; the script is written 0600 with the command; token rides env via -e", async () => {
1051
- const workspace = mkdtempSync(join(tmpdir(), "launch-script-"));
1052
- try {
1053
- // A wrapped argv whose command embeds a giant (>100KB) profile inline — the
1054
- // exact shape that overran tmux's buffer in the integration smoke.
1055
- const bigProfile = "X".repeat(100_000);
1056
- const bigCommand = `sandbox-exec -p '${bigProfile}' claude --strict-mcp-config --mcp-config ${join(workspace, ".mcp.json")}`;
1057
- const wrappedArgv = ["/bin/bash", "-c", bigCommand];
1058
- expect(bigCommand.length).toBeGreaterThan(100_000);
1059
-
1060
- const { fn, calls } = recordingSpawn();
1061
- const launcher = realTmuxLauncher(fn);
1062
- await launcher.newSession({
1063
- name: "big-agent",
1064
- argv: wrappedArgv,
1065
- env: { CLAUDE_CODE_OAUTH_TOKEN: "OAUTH-SECRET", SANDBOX_RUNTIME: "1" },
1066
- cwd: workspace,
1067
- });
1068
-
1069
- // (a) the argv handed to tmux is SHORT — a script path, not the 100KB inline.
1070
- expect(calls).toHaveLength(1);
1071
- const tmuxArgv = calls[0]!;
1072
- const scriptPath = join(workspace, ".launch.sh");
1073
- expect(tmuxArgv[tmuxArgv.length - 2]).toBe("/bin/bash");
1074
- expect(tmuxArgv[tmuxArgv.length - 1]).toBe(scriptPath);
1075
- // The 100KB profile is NOWHERE on the tmux command line.
1076
- expect(tmuxArgv.some((a) => a.length > 50_000)).toBe(false);
1077
- expect(tmuxArgv.join(" ")).not.toContain(bigProfile);
1078
-
1079
- // (b) the launch script is written, mode 0600, and contains the wrapped command.
1080
- expect(statSync(scriptPath).mode & 0o777).toBe(0o600);
1081
- const body = readFileSync(scriptPath, "utf8");
1082
- expect(body.startsWith("#!/bin/bash\nset -euo pipefail\n")).toBe(true);
1083
- expect(body).toContain(bigCommand);
1084
-
1085
- // (c) env still passed via `-e KEY=VAL`.
1086
- expect(tmuxArgv).toContain("-e");
1087
- expect(tmuxArgv).toContain("CLAUDE_CODE_OAUTH_TOKEN=OAUTH-SECRET");
1088
- expect(tmuxArgv).toContain("SANDBOX_RUNTIME=1");
1089
-
1090
- // SECURITY: the secret rides the ENV, never the script body.
1091
- expect(body).not.toContain("OAUTH-SECRET");
1092
- } finally {
1093
- rmSync(workspace, { recursive: true, force: true });
1094
- }
1095
- });
1096
- });
1097
-
1098
- describe("confirmDevChannelsPrompt — auto-answer the dev-channels consent gate (channel#70)", () => {
1099
- /**
1100
- * A recording spawnFn whose `tmux capture-pane` returns configurable pane text and
1101
- * whose `tmux send-keys` is recorded. Mirrors the `recordingSpawn` shape above but
1102
- * with a per-argv stdout (capture must return the pane content).
1103
- */
1104
- function recordingSpawn(paneText: string): {
1105
- fn: typeof Bun.spawn;
1106
- calls: string[][];
1107
- } {
1108
- const calls: string[][] = [];
1109
- const fn = ((argv: string[]) => {
1110
- calls.push(argv);
1111
- const isCapture = argv.includes("capture-pane");
1112
- return {
1113
- exited: Promise.resolve(0),
1114
- stdout: new Response(isCapture ? paneText : "").body,
1115
- stderr: new Response("").body,
1116
- };
1117
- }) as unknown as typeof Bun.spawn;
1118
- return { fn, calls };
1119
- }
1120
-
1121
- const noSleep = async () => {};
1122
-
1123
- test("prompt marker present → returns 'confirmed' AND sends Enter to the pane", async () => {
1124
- const pane = `WARNING: Loading development channels\n❯ 1. ${DEV_CHANNELS_PROMPT_MARKER}\n 2. Exit`;
1125
- const { fn, calls } = recordingSpawn(pane);
1126
- const result = await confirmDevChannelsPrompt("aaron-agent", {
1127
- spawnFn: fn,
1128
- timeoutMs: 5_000,
1129
- intervalMs: 10,
1130
- sleepFn: noSleep,
1131
- });
1132
- expect(result).toBe("confirmed");
1133
- // A `tmux send-keys -t aaron-agent Enter` call was recorded.
1134
- const sendKeys = calls.find((c) => c.includes("send-keys"));
1135
- expect(sendKeys).toBeDefined();
1136
- expect(sendKeys).toEqual(["tmux", "send-keys", "-t", "aaron-agent", "Enter"]);
1137
- });
1138
-
1139
- test("ready marker present (no prompt) → returns 'already-running', NO send-keys", async () => {
1140
- const pane = `Welcome to Claude Code\n ${DEV_CHANNELS_READY_MARKER} · /help for help`;
1141
- const { fn, calls } = recordingSpawn(pane);
1142
- const result = await confirmDevChannelsPrompt("aaron-agent", {
1143
- spawnFn: fn,
1144
- timeoutMs: 5_000,
1145
- intervalMs: 10,
1146
- sleepFn: noSleep,
1147
- });
1148
- expect(result).toBe("already-running");
1149
- expect(calls.some((c) => c.includes("send-keys"))).toBe(false);
1150
- });
1151
-
1152
- test("neither marker, tiny timeout + no-op sleep → returns 'timeout', NO throw, NO send-keys", async () => {
1153
- const { fn, calls } = recordingSpawn("just some unrelated pane output\n$ ");
1154
- const result = await confirmDevChannelsPrompt("aaron-agent", {
1155
- spawnFn: fn,
1156
- timeoutMs: 1,
1157
- intervalMs: 1,
1158
- sleepFn: noSleep,
1159
- });
1160
- expect(result).toBe("timeout");
1161
- expect(calls.some((c) => c.includes("send-keys"))).toBe(false);
1162
- // It DID poll at least once (the do-while guarantees a capture even at timeoutMs<=interval).
1163
- expect(calls.some((c) => c.includes("capture-pane"))).toBe(true);
1164
- });
1165
-
1166
- test("a capture subprocess that throws degrades to timeout, never throws", async () => {
1167
- const fn = (() => {
1168
- throw new Error("tmux not found");
1169
- }) as unknown as typeof Bun.spawn;
1170
- const result = await confirmDevChannelsPrompt("aaron-agent", {
1171
- spawnFn: fn,
1172
- timeoutMs: 1,
1173
- intervalMs: 1,
1174
- sleepFn: noSleep,
1175
- });
1176
- expect(result).toBe("timeout");
1177
- });
1178
-
1179
- test("prompt seen but send-keys throws → degrades to timeout (does NOT lie 'confirmed'), never throws", async () => {
1180
- const pane = `❯ 1. ${DEV_CHANNELS_PROMPT_MARKER}\n 2. Exit`;
1181
- // capture-pane succeeds (returns the prompt); send-keys throws.
1182
- const fn = ((argv: string[]) => {
1183
- if (argv.includes("send-keys")) throw new Error("send-keys failed");
1184
- return {
1185
- exited: Promise.resolve(0),
1186
- stdout: new Response(pane).body,
1187
- stderr: new Response("").body,
1188
- };
1189
- }) as unknown as typeof Bun.spawn;
1190
- const result = await confirmDevChannelsPrompt("aaron-agent", {
1191
- spawnFn: fn,
1192
- timeoutMs: 1,
1193
- intervalMs: 1,
1194
- sleepFn: noSleep,
1195
- });
1196
- expect(result).toBe("timeout");
1197
- });
1198
- });
1199
-
1200
- describe("persistSpec / readPersistedSpec — spawn-spec recovery for restart", () => {
1201
- test("round-trips a spec; readPersistedSpec returns null for a missing/garbage file", () => {
1202
- const ws = mkdtempSync(join(tmpdir(), "spec-rt-"));
1203
- try {
1204
- expect(readPersistedSpec(ws)).toBeNull(); // nothing written yet
1205
- const spec: AgentSpec = { name: "a", channels: ["c"], filesystem: "full" };
1206
- persistSpec(ws, spec);
1207
- expect(readPersistedSpec(ws)).toEqual(spec);
1208
- // Written 0600 (matches the secret-bearing .mcp.json discipline; the workspace
1209
- // dir is only umask-tight, so the file perm is the real guard).
1210
- expect(statSync(specFilePath(ws)).mode & 0o777).toBe(0o600);
1211
- // Corrupt it -> null (the restart path treats this as "no spec").
1212
- writeFileSync(specFilePath(ws), "{not json");
1213
- expect(readPersistedSpec(ws)).toBeNull();
1214
- } finally {
1215
- rmSync(ws, { recursive: true, force: true });
1216
- }
1217
- });
1218
- });