@openparachute/agent 0.2.2 → 0.2.3-rc.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/.parachute/module.json +3 -3
  2. package/package.json +4 -1
  3. package/src/agent-defs.ts +9 -0
  4. package/src/auth.ts +182 -14
  5. package/src/backends/programmatic.ts +35 -2
  6. package/src/backends/registry.ts +159 -40
  7. package/src/backends/types.ts +44 -0
  8. package/src/daemon.ts +317 -12
  9. package/src/def-vault-triggers.ts +317 -0
  10. package/src/preflight.ts +139 -0
  11. package/src/spawn-agent.ts +16 -0
  12. package/src/step-up.ts +316 -0
  13. package/src/terminal-ui.ts +73 -0
  14. package/src/transports/http-ui.ts +10 -8
  15. package/src/transports/vault.ts +48 -27
  16. package/src/ui-kit.ts +6 -3
  17. package/src/ui-ticket.ts +121 -0
  18. package/web/ui/dist/assets/index-Dhr5Kl_d.css +1 -0
  19. package/web/ui/dist/assets/index-Di5MmFZR.js +60 -0
  20. package/web/ui/dist/index.html +2 -2
  21. package/src/_parked/interactive-spawn.test.ts +0 -324
  22. package/src/_parked/interactive-spawn.ts +0 -701
  23. package/src/agent-defs.test.ts +0 -1504
  24. package/src/agent-mcp-config.test.ts +0 -115
  25. package/src/agents.test.ts +0 -360
  26. package/src/auth.test.ts +0 -46
  27. package/src/backends/attached-queue.test.ts +0 -376
  28. package/src/backends/programmatic.test.ts +0 -1715
  29. package/src/backends/registry.test.ts +0 -1494
  30. package/src/backends/stream-json.test.ts +0 -570
  31. package/src/channel-backend-wiring.test.ts +0 -237
  32. package/src/credentials.test.ts +0 -274
  33. package/src/cron.test.ts +0 -342
  34. package/src/daemon-agent-def-api.test.ts +0 -166
  35. package/src/daemon-agent-defs-api.test.ts +0 -953
  36. package/src/daemon-agent-env-api.test.ts +0 -338
  37. package/src/daemon-attached-queue-store.test.ts +0 -65
  38. package/src/daemon-config-api.test.ts +0 -962
  39. package/src/daemon-jobs-api.test.ts +0 -271
  40. package/src/daemon-vault-chat.test.ts +0 -250
  41. package/src/daemon.test.ts +0 -746
  42. package/src/def-vaults.test.ts +0 -136
  43. package/src/delivery-state.test.ts +0 -110
  44. package/src/effective-env.test.ts +0 -114
  45. package/src/grants.test.ts +0 -638
  46. package/src/hub-jwt.test.ts +0 -161
  47. package/src/jobs.test.ts +0 -245
  48. package/src/mcp-http.test.ts +0 -265
  49. package/src/mint-token.test.ts +0 -152
  50. package/src/module-manifest.test.ts +0 -158
  51. package/src/programmatic-wiring.test.ts +0 -838
  52. package/src/registry.test.ts +0 -227
  53. package/src/resolve-port.test.ts +0 -64
  54. package/src/routing.test.ts +0 -184
  55. package/src/runner.test.ts +0 -506
  56. package/src/sandbox/config.test.ts +0 -150
  57. package/src/sandbox/egress.test.ts +0 -113
  58. package/src/sandbox/live-seatbelt.test.ts +0 -277
  59. package/src/sandbox/mounts.test.ts +0 -154
  60. package/src/sandbox/sandbox.test.ts +0 -168
  61. package/src/services-manifest.test.ts +0 -106
  62. package/src/spa-serve.test.ts +0 -116
  63. package/src/spawn-agent-cli.test.ts +0 -172
  64. package/src/spawn-agent.test.ts +0 -1218
  65. package/src/spawn-deps.test.ts +0 -54
  66. package/src/terminal-assets.test.ts +0 -50
  67. package/src/terminal.test.ts +0 -530
  68. package/src/transports/http-ui.test.ts +0 -455
  69. package/src/transports/telegram.test.ts +0 -174
  70. package/src/transports/vault.test.ts +0 -2011
  71. package/src/ui-kit.test.ts +0 -178
  72. package/web/ui/dist/assets/index-C-iWdFFV.css +0 -1
  73. package/web/ui/dist/assets/index-VFETBk0a.js +0 -60
  74. package/web/ui/tsconfig.json +0 -21
@@ -1,1715 +0,0 @@
1
- /**
2
- * ProgrammaticBackend tests — the single `claude -p` turn runner.
3
- *
4
- * Inject a FAKE spawnFn that emits canned stream-json (never spawn real claude),
5
- * a fake sandbox engine (records the wrapped argv), and a fake mint hub. Covered:
6
- *
7
- * - FIRST turn (no stored sid): argv has NO `--resume`; session_id captured +
8
- * PERSISTED to the state store; reply extracted from the `result` event.
9
- * - SECOND turn (sid stored): argv includes `--resume <sid>`; reply extracted; sid stable.
10
- * - ERROR turn (is_error / non-success subtype): returns `{ ok:false, error }`, no throw.
11
- * - argv shape: `-p`, `--output-format stream-json`, `--strict-mcp-config`,
12
- * `--mcp-config`, `--dangerously-skip-permissions` present; NO
13
- * `--dangerously-load-development-channels`.
14
- * - env: `CLAUDE_CODE_OAUTH_TOKEN` injected; `ANTHROPIC_API_KEY`/`CLAUDE_API_KEY`
15
- * NOT present (the #68 denylist).
16
- * - robustness: stream-json with interleaved hook/rate_limit_event lines + a
17
- * trailing partial line still parses the result.
18
- * - the vault-only `.mcp.json` (no channel MCP entry — the daemon mediates messaging).
19
- * - a missing Claude credential / a refused mint return `{ ok:false }` (no throw).
20
- * - stop() clears the resume id; status() is live.
21
- */
22
- import { describe, test, expect, afterEach } from "bun:test";
23
- import { mkdtempSync, rmSync, readFileSync, statSync, existsSync } from "node:fs";
24
- import { join } from "node:path";
25
- import { tmpdir } from "node:os";
26
- import {
27
- ProgrammaticBackend,
28
- buildProgrammaticClaudeArgs,
29
- PROGRAMMATIC_BACKEND_KIND,
30
- isTransientTurnError,
31
- isSessionNotFoundError,
32
- safeAttachmentBasename,
33
- ATTACHMENT_STAGING_DIR,
34
- ATTACHMENT_MAX_COUNT,
35
- TURN_MAX_ATTEMPTS,
36
- TURN_RETRY_BACKOFF_MS,
37
- type ProgrammaticBackendDeps,
38
- type ProgrammaticSpawnFn,
39
- } from "./programmatic.ts";
40
- import type { TurnSession } from "./types.ts";
41
- import type { InboundAttachment } from "../transport.ts";
42
- import type { SandboxEngine } from "../sandbox/index.ts";
43
- import type { SandboxRuntimeConfig } from "@anthropic-ai/sandbox-runtime";
44
- import type { AgentSpec } from "../sandbox/types.ts";
45
- import { vaultEntryKey, channelEntryKey } from "../agent-mcp-config.ts";
46
- import {
47
- GrantsClient,
48
- grantVaultEntryKey,
49
- grantServiceEntryKey,
50
- serviceMcpUrl,
51
- type ConnectionSpec,
52
- type GrantMaterial,
53
- } from "../grants.ts";
54
-
55
- let sessionsDir: string;
56
- let stateDir: string;
57
- afterEach(() => {
58
- if (sessionsDir) rmSync(sessionsDir, { recursive: true, force: true });
59
- if (stateDir) rmSync(stateDir, { recursive: true, force: true });
60
- });
61
-
62
- // ---- fakes -----------------------------------------------------------------
63
-
64
- /** Join NDJSON event objects into the blob claude emits on stdout. */
65
- function ndjson(...events: unknown[]): string {
66
- return events.map((e) => JSON.stringify(e)).join("\n") + "\n";
67
- }
68
-
69
- /** A success stream-json turn with the given session id + reply. */
70
- function successTurn(sessionId: string, reply: string): string {
71
- return ndjson(
72
- { type: "system", subtype: "init", session_id: sessionId, apiKeySource: "none", mcp_servers: [] },
73
- { type: "assistant", message: { content: [{ type: "text", text: reply }] }, session_id: sessionId },
74
- {
75
- type: "result",
76
- subtype: "success",
77
- is_error: false,
78
- result: reply,
79
- session_id: sessionId,
80
- usage: { input_tokens: 10, output_tokens: 5 },
81
- total_cost_usd: 0.001,
82
- },
83
- );
84
- }
85
-
86
- /**
87
- * A recording spawnFn that returns a configurable stdout/stderr/exit and records
88
- * the argv + env + cwd it was called with. Mirrors `Bun.spawn`'s stream shape via
89
- * `Response(...).body`.
90
- */
91
- function recordingSpawn(opts: { stdout?: string; stderr?: string; code?: number } = {}): {
92
- fn: ProgrammaticSpawnFn;
93
- calls: Array<{ argv: string[]; env: Record<string, string | undefined>; cwd: string }>;
94
- } {
95
- const calls: Array<{ argv: string[]; env: Record<string, string | undefined>; cwd: string }> = [];
96
- const fn: ProgrammaticSpawnFn = (argv, o) => {
97
- calls.push({ argv, env: o.env, cwd: o.cwd });
98
- return {
99
- stdout: new Response(opts.stdout ?? "").body,
100
- stderr: new Response(opts.stderr ?? "").body,
101
- exited: Promise.resolve(opts.code ?? 0),
102
- };
103
- };
104
- return { fn, calls };
105
- }
106
-
107
- /** A spawnFn that returns a DIFFERENT canned stdout per call (turn 1, turn 2, …). */
108
- function sequencedSpawn(stdouts: string[]): {
109
- fn: ProgrammaticSpawnFn;
110
- calls: Array<{ argv: string[]; env: Record<string, string | undefined> }>;
111
- } {
112
- const calls: Array<{ argv: string[]; env: Record<string, string | undefined> }> = [];
113
- let i = 0;
114
- const fn: ProgrammaticSpawnFn = (argv, o) => {
115
- calls.push({ argv, env: o.env });
116
- const out = stdouts[Math.min(i, stdouts.length - 1)] ?? "";
117
- i += 1;
118
- return { stdout: new Response(out).body, stderr: new Response("").body, exited: Promise.resolve(0) };
119
- };
120
- return { fn, calls };
121
- }
122
-
123
- /** A fake sandbox engine — records config, returns a deterministic wrap (echoes the command). */
124
- function fakeEngine(): SandboxEngine & { initializedWith: SandboxRuntimeConfig | null } {
125
- const rec = {
126
- initializedWith: null as SandboxRuntimeConfig | null,
127
- isSupportedPlatform: () => true,
128
- isSandboxingEnabled: () => true,
129
- async initialize(cfg: SandboxRuntimeConfig) {
130
- rec.initializedWith = cfg;
131
- },
132
- async wrapWithSandboxArgv(command: string) {
133
- // Emulate the REAL `wrapWithSandboxArgv` contract: a bash -c wrapper carrying
134
- // the command, and `env` = the daemon's FULL `process.env` (on macOS/Linux the
135
- // real engine returns `process.env` verbatim; the proxy vars are baked into the
136
- // command string and ALSO present in process.env on Windows). The hand-made
137
- // SMALL env the old fake returned could never catch the passthrough leak — the
138
- // whole-`process.env`-spread that defeats buildAgentChildEnv's scrub. So the fake
139
- // must include the daemon's ambient secrets it would carry in real life PLUS the
140
- // proxy/sandbox vars, so a test can prove the leak is closed AND egress survives.
141
- //
142
- // The "argv" the runner spawns is therefore `["/bin/bash","-c","SBX <command>"]`;
143
- // assertions parse `argv[2]` for the claude flags.
144
- return {
145
- argv: ["/bin/bash", "-c", `SBX ${command}`],
146
- env: {
147
- // The daemon's ambient env (process.env) the real engine passes through —
148
- // these MUST be scrubbed from the launch env (the isolation/billing leak).
149
- ANTHROPIC_API_KEY: "sk-ant-DAEMON-AMBIENT-SHOULD-NOT-LEAK",
150
- CLAUDE_API_KEY: "daemon-ambient-also-should-not-leak",
151
- CLAUDE_CODE_OAUTH_TOKEN: "DAEMON-AMBIENT-WRONG-TOKEN-SHOULD-NOT-WIN",
152
- SECRET_THING: "daemon-ambient-secret-should-not-leak",
153
- PATH: "/daemon/bin",
154
- // The load-bearing sandbox/proxy vars the egress floor depends on — these
155
- // MUST survive into the launch env (allowlisted).
156
- SANDBOX_RUNTIME: "1",
157
- HTTP_PROXY: "http://localhost:5555",
158
- HTTPS_PROXY: "http://localhost:5555",
159
- NO_PROXY: "localhost,127.0.0.1",
160
- TMPDIR: "/tmp/claude",
161
- NODE_EXTRA_CA_CERTS: "/tmp/claude/ca.pem",
162
- },
163
- };
164
- },
165
- async reset() {},
166
- };
167
- return rec;
168
- }
169
-
170
- /** A fake mint hub: returns a distinct token per scope. */
171
- function fakeMintFetch(): typeof fetch {
172
- let n = 0;
173
- return (async (_url: string | URL | Request, init?: RequestInit) => {
174
- const body = JSON.parse(String(init?.body ?? "{}")) as { scope: string };
175
- n += 1;
176
- return new Response(
177
- JSON.stringify({ jti: `j${n}`, token: `TOK-${n}`, expires_at: "2026-09-01T00:00:00Z", scope: body.scope }),
178
- { status: 200, headers: { "content-type": "application/json" } },
179
- );
180
- }) as unknown as typeof fetch;
181
- }
182
-
183
- function baseDeps(
184
- spawnFn: ProgrammaticSpawnFn,
185
- over: Partial<ProgrammaticBackendDeps> = {},
186
- ): ProgrammaticBackendDeps {
187
- return {
188
- hubOrigin: "https://hub.example.com",
189
- managerBearer: "MANAGER",
190
- vaultUrl: "http://127.0.0.1:1940",
191
- sessionsDir,
192
- runtimeReadOnly: ["/cfg/.claude"],
193
- resolveClaudeToken: () => "OAUTH-CRED-PLACEHOLDER",
194
- sandboxEngine: fakeEngine(),
195
- fetchFn: fakeMintFetch(),
196
- spawnFn,
197
- parentEnv: {
198
- PATH: "/usr/bin",
199
- HOME: "/home/op",
200
- ANTHROPIC_API_KEY: "sk-ant-SHOULD-NOT-LEAK",
201
- CLAUDE_API_KEY: "also-should-not-leak",
202
- SECRET_THING: "do-not-pass",
203
- },
204
- claudeBin: "claude",
205
- ...over,
206
- };
207
- }
208
-
209
- function specWithVault(name = "eng"): AgentSpec {
210
- return {
211
- name,
212
- channels: [name],
213
- vault: { name: "default", access: "read", tags: ["#agent/message"] },
214
- };
215
- }
216
-
217
- function specWithSystemPrompt(
218
- prompt: string,
219
- mode: "append" | "replace" | undefined,
220
- name = "eng",
221
- ): AgentSpec {
222
- return {
223
- name,
224
- channels: [name],
225
- vault: { name: "default", access: "read", tags: ["#agent/message"] },
226
- systemPrompt: prompt,
227
- ...(mode ? { systemPromptMode: mode } : {}),
228
- };
229
- }
230
-
231
- /** A multi-threaded spec — fresh-per-fire today (no resume, no persist). */
232
- function specMultiThreaded(name = "eng"): AgentSpec {
233
- return {
234
- name,
235
- channels: [name],
236
- mode: "multi-threaded",
237
- vault: { name: "default", access: "read", tags: ["#agent/message"] },
238
- };
239
- }
240
-
241
- function mkDirs(tag: string): void {
242
- sessionsDir = mkdtempSync(join(tmpdir(), `prog-sessions-${tag}-`));
243
- stateDir = mkdtempSync(join(tmpdir(), `prog-state-${tag}-`));
244
- }
245
-
246
- // ---- TurnSession helpers ---------------------------------------------------
247
- // The daemon (registry) now OWNS the session uuid + the resume-vs-create decision and
248
- // hands it to `deliver` as a {@link TurnSession}. These build the two shapes:
249
- // - createSession(id) → `--session-id <id>` (CREATE: first turn / every multi-threaded fire)
250
- // - resumeSession(id) → `--resume <id>` (CONTINUE: single-threaded turn 2+)
251
- function createSession(id: string): TurnSession {
252
- return { id, resume: false };
253
- }
254
- function resumeSession(id: string): TurnSession {
255
- return { id, resume: true };
256
- }
257
- /** A fresh-create session with a generated uuid (the default when a test doesn't care). */
258
- function freshSession(): TurnSession {
259
- return { id: crypto.randomUUID(), resume: false };
260
- }
261
-
262
- // ---- pure-helper tests -----------------------------------------------------
263
-
264
- describe("buildProgrammaticClaudeArgs", () => {
265
- test("no session: -p + stream-json + strict MCP; NEITHER session flag, NO dev-channels", () => {
266
- const argv = buildProgrammaticClaudeArgs({ message: "hello", mcpConfigPath: "/ws/.mcp.json" });
267
- expect(argv).toContain("-p");
268
- expect(argv).toContain("hello");
269
- expect(argv.join(" ")).toContain("--output-format stream-json");
270
- expect(argv).toContain("--verbose");
271
- expect(argv).toContain("--strict-mcp-config");
272
- expect(argv).toContain("--mcp-config");
273
- expect(argv).toContain("/ws/.mcp.json");
274
- expect(argv).toContain("--dangerously-skip-permissions");
275
- // The daemon mediates messaging — NO channel dev-channels flag here.
276
- expect(argv.some((a) => a.includes("dangerously-load-development-channels"))).toBe(false);
277
- // No sessionId → neither session flag.
278
- expect(argv).not.toContain("--resume");
279
- expect(argv).not.toContain("--session-id");
280
- });
281
-
282
- test("CREATE (resumeSession=false): --session-id <id> appended (NOT --resume)", () => {
283
- const argv = buildProgrammaticClaudeArgs({
284
- message: "first",
285
- mcpConfigPath: "/ws/.mcp.json",
286
- sessionId: "sess-new",
287
- resumeSession: false,
288
- });
289
- expect(argv).toContain("--session-id");
290
- expect(argv[argv.indexOf("--session-id") + 1]).toBe("sess-new");
291
- expect(argv).not.toContain("--resume");
292
- });
293
-
294
- test("CREATE (resumeSession omitted defaults to create): --session-id <id>", () => {
295
- const argv = buildProgrammaticClaudeArgs({
296
- message: "first",
297
- mcpConfigPath: "/ws/.mcp.json",
298
- sessionId: "sess-new",
299
- });
300
- expect(argv).toContain("--session-id");
301
- expect(argv[argv.indexOf("--session-id") + 1]).toBe("sess-new");
302
- expect(argv).not.toContain("--resume");
303
- });
304
-
305
- test("CONTINUE (resumeSession=true): --resume <id> appended (NOT --session-id)", () => {
306
- const argv = buildProgrammaticClaudeArgs({
307
- message: "next",
308
- mcpConfigPath: "/ws/.mcp.json",
309
- sessionId: "sess-xyz",
310
- resumeSession: true,
311
- });
312
- expect(argv).toContain("--resume");
313
- expect(argv[argv.indexOf("--resume") + 1]).toBe("sess-xyz");
314
- expect(argv).not.toContain("--session-id");
315
- });
316
-
317
- test("system prompt (append, default): --append-system-prompt-file <path>", () => {
318
- const argv = buildProgrammaticClaudeArgs({
319
- message: "hi",
320
- mcpConfigPath: "/ws/.mcp.json",
321
- systemPromptFile: "/ws/system-prompt.txt",
322
- systemPromptMode: "append",
323
- });
324
- expect(argv).toContain("--append-system-prompt-file");
325
- expect(argv[argv.indexOf("--append-system-prompt-file") + 1]).toBe("/ws/system-prompt.txt");
326
- expect(argv).not.toContain("--system-prompt-file");
327
- });
328
-
329
- test("system prompt (replace): --system-prompt-file <path>", () => {
330
- const argv = buildProgrammaticClaudeArgs({
331
- message: "hi",
332
- mcpConfigPath: "/ws/.mcp.json",
333
- systemPromptFile: "/ws/system-prompt.txt",
334
- systemPromptMode: "replace",
335
- });
336
- expect(argv).toContain("--system-prompt-file");
337
- expect(argv[argv.indexOf("--system-prompt-file") + 1]).toBe("/ws/system-prompt.txt");
338
- expect(argv).not.toContain("--append-system-prompt-file");
339
- });
340
-
341
- test("system prompt with no mode → append (the -file flag defaults to append)", () => {
342
- const argv = buildProgrammaticClaudeArgs({
343
- message: "hi",
344
- mcpConfigPath: "/ws/.mcp.json",
345
- systemPromptFile: "/ws/system-prompt.txt",
346
- });
347
- expect(argv).toContain("--append-system-prompt-file");
348
- });
349
-
350
- test("no systemPromptFile → neither system-prompt flag", () => {
351
- const argv = buildProgrammaticClaudeArgs({ message: "hi", mcpConfigPath: "/ws/.mcp.json" });
352
- expect(argv).not.toContain("--append-system-prompt-file");
353
- expect(argv).not.toContain("--system-prompt-file");
354
- });
355
-
356
- test("model set → --model <value> as a discrete argv pair", () => {
357
- const argv = buildProgrammaticClaudeArgs({
358
- message: "hi",
359
- mcpConfigPath: "/ws/.mcp.json",
360
- model: "opus",
361
- });
362
- const i = argv.indexOf("--model");
363
- expect(i).toBeGreaterThan(-1);
364
- expect(argv[i + 1]).toBe("opus");
365
- });
366
-
367
- test("model unset/empty/whitespace → NO --model flag (inherit CC default)", () => {
368
- expect(
369
- buildProgrammaticClaudeArgs({ message: "hi", mcpConfigPath: "/ws/.mcp.json" }),
370
- ).not.toContain("--model");
371
- expect(
372
- buildProgrammaticClaudeArgs({ message: "hi", mcpConfigPath: "/ws/.mcp.json", model: "" }),
373
- ).not.toContain("--model");
374
- expect(
375
- buildProgrammaticClaudeArgs({ message: "hi", mcpConfigPath: "/ws/.mcp.json", model: " " }),
376
- ).not.toContain("--model");
377
- });
378
-
379
- test("model is trimmed before becoming the flag value", () => {
380
- const argv = buildProgrammaticClaudeArgs({
381
- message: "hi",
382
- mcpConfigPath: "/ws/.mcp.json",
383
- model: " claude-opus-4-8 ",
384
- });
385
- expect(argv[argv.indexOf("--model") + 1]).toBe("claude-opus-4-8");
386
- });
387
- });
388
-
389
- // ---- single-turn runner tests ----------------------------------------------
390
-
391
- describe("ProgrammaticBackend.deliver — CREATE turn (--session-id)", () => {
392
- test("a create session → argv has --session-id <id> (NOT --resume); reply + sessionId returned", async () => {
393
- mkDirs("first");
394
- const { fn, calls } = recordingSpawn({ stdout: successTurn("sess-FIRST", "the reply text") });
395
- const engine = fakeEngine();
396
- const backend = new ProgrammaticBackend(baseDeps(fn, { sandboxEngine: engine }));
397
-
398
- const handle = await backend.start(specWithVault("eng"));
399
- const result = await backend.deliver(handle, "hi agent", createSession("sess-FIRST"));
400
-
401
- expect(result.ok).toBe(true);
402
- if (result.ok) {
403
- expect(result.reply).toBe("the reply text");
404
- // Claude echoes the session id (matches the uuid we passed) — RETURNED so the
405
- // registry persists it onto the thread note (the backend keeps no store).
406
- expect(result.sessionId).toBe("sess-FIRST");
407
- expect(result.usage).toEqual({ inputTokens: 10, outputTokens: 5, totalCostUsd: 0.001 });
408
- }
409
-
410
- // The wrapped argv (engine echoes the claude command in argv[2]) CREATES the session.
411
- expect(calls).toHaveLength(1);
412
- const cmd = calls[0]!.argv[2]!;
413
- expect(cmd).toContain("SBX claude -p");
414
- expect(cmd).toContain("--session-id sess-FIRST");
415
- expect(cmd).not.toContain("--resume");
416
- });
417
- });
418
-
419
- describe("ProgrammaticBackend.deliver — CONTINUE turn (--resume)", () => {
420
- test("a resume session → argv has --resume <id> (NOT --session-id); reply extracted", async () => {
421
- mkDirs("second");
422
- const { fn, calls } = sequencedSpawn([
423
- successTurn("sess-RESUME", "first reply"),
424
- successTurn("sess-RESUME", "second reply"),
425
- ]);
426
- const backend = new ProgrammaticBackend(baseDeps(fn));
427
- const handle = await backend.start(specWithVault("eng"));
428
-
429
- // Turn 1 CREATES the session (the registry would mint a fresh uuid); turn 2 RESUMES it
430
- // (the registry read it back off the thread note).
431
- const r1 = await backend.deliver(handle, "turn one", createSession("sess-RESUME"));
432
- expect(r1.ok).toBe(true);
433
- if (r1.ok) expect(r1.sessionId).toBe("sess-RESUME");
434
-
435
- const r2 = await backend.deliver(handle, "turn two", resumeSession("sess-RESUME"));
436
- expect(r2.ok).toBe(true);
437
- if (r2.ok) expect(r2.reply).toBe("second reply");
438
-
439
- // Turn 1 argv: --session-id (create). Turn 2 argv: --resume (continue) — same id.
440
- const cmd1 = calls[0]!.argv[2]!;
441
- const cmd2 = calls[1]!.argv[2]!;
442
- expect(cmd1).toContain("--session-id sess-RESUME");
443
- expect(cmd1).not.toContain("--resume");
444
- expect(cmd2).toContain("--resume sess-RESUME");
445
- expect(cmd2).not.toContain("--session-id");
446
- });
447
- });
448
-
449
- describe("ProgrammaticBackend.deliver — the backend is a pure function of the TurnSession", () => {
450
- test("the backend reads no store: it just runs the turn it's handed (create) and returns the id", async () => {
451
- // A multi-threaded fire is just a CREATE turn at this layer — the registry decides the
452
- // mode + the fresh uuid; the backend behaves identically to any create turn.
453
- mkDirs("multithreaded");
454
- const { fn, calls } = recordingSpawn({ stdout: successTurn("sess-NEW", "ephemeral reply") });
455
- const backend = new ProgrammaticBackend(baseDeps(fn));
456
- const handle = await backend.start(specMultiThreaded("eng"));
457
-
458
- const result = await backend.deliver(handle, "fire the turn", createSession("sess-NEW"));
459
- expect(result.ok).toBe(true);
460
- if (result.ok) {
461
- expect(result.reply).toBe("ephemeral reply");
462
- expect(result.sessionId).toBe("sess-NEW");
463
- }
464
-
465
- const cmd = calls[0]!.argv[2]!;
466
- expect(cmd).toContain("SBX claude -p");
467
- expect(cmd).toContain("--session-id sess-NEW");
468
- expect(cmd).not.toContain("--resume");
469
- });
470
-
471
- test("a create turn with a fresh uuid → --session-id <uuid>, NO --resume", async () => {
472
- mkDirs("multithreaded-fresh");
473
- const { fn, calls } = recordingSpawn({ stdout: successTurn("sess-X", "reply") });
474
- const backend = new ProgrammaticBackend(baseDeps(fn));
475
- const handle = await backend.start(specMultiThreaded("eng"));
476
-
477
- const session = freshSession();
478
- await backend.deliver(handle, "go", session);
479
- const cmd = calls[0]!.argv[2]!;
480
- expect(cmd).toContain(`--session-id ${session.id}`);
481
- expect(cmd).not.toContain("--resume");
482
- });
483
-
484
- test("a resume turn → --resume <id>, NO --session-id (single-threaded turn 2+ path)", async () => {
485
- mkDirs("single-threaded-regress");
486
- const { fn, calls } = recordingSpawn({ stdout: successTurn("sess-PRIOR", "continued") });
487
- // specWithVault has NO mode → single-threaded (the default); the registry resumes it.
488
- const backend = new ProgrammaticBackend(baseDeps(fn));
489
- const handle = await backend.start(specWithVault("eng"));
490
-
491
- const result = await backend.deliver(handle, "continue the thread", resumeSession("sess-PRIOR"));
492
- expect(result.ok).toBe(true);
493
- if (result.ok) expect(result.sessionId).toBe("sess-PRIOR");
494
- const cmd = calls[0]!.argv[2]!;
495
- expect(cmd).toContain("--resume sess-PRIOR");
496
- expect(cmd).not.toContain("--session-id");
497
- });
498
- });
499
-
500
- describe("ProgrammaticBackend.deliver — error turn", () => {
501
- test("is_error:true → returns { ok:false, error }, no throw; sid still captured", async () => {
502
- mkDirs("err");
503
- const errBlob = ndjson(
504
- { type: "system", subtype: "init", session_id: "sess-ERR", apiKeySource: "none" },
505
- { type: "result", subtype: "error_during_execution", is_error: true, result: "boom in the agent", session_id: "sess-ERR" },
506
- );
507
- const { fn } = recordingSpawn({ stdout: errBlob });
508
- const backend = new ProgrammaticBackend(baseDeps(fn));
509
- const handle = await backend.start(specWithVault("eng"));
510
-
511
- const result = await backend.deliver(handle, "do a thing", createSession("sess-ERR"));
512
- expect(result.ok).toBe(false);
513
- if (!result.ok) {
514
- expect(result.error).toContain("boom in the agent");
515
- // The id is still RETURNED (a turn can fail AFTER establishing a session) — the
516
- // registry persists it so the next turn resumes the conversation.
517
- expect(result.sessionId).toBe("sess-ERR");
518
- }
519
- });
520
-
521
- test("a non-success subtype → { ok:false } (no throw)", async () => {
522
- mkDirs("err2");
523
- const blob = ndjson(
524
- { type: "system", subtype: "init", session_id: "s" },
525
- { type: "result", subtype: "error_max_turns", is_error: false, result: "ran out", session_id: "s" },
526
- );
527
- const { fn } = recordingSpawn({ stdout: blob });
528
- const backend = new ProgrammaticBackend(baseDeps(fn));
529
- const handle = await backend.start(specWithVault());
530
- const result = await backend.deliver(handle, "x", freshSession());
531
- expect(result.ok).toBe(false);
532
- if (!result.ok) expect(result.error).toContain("error_max_turns");
533
- });
534
-
535
- test("a turn that fails BEFORE any session is established has no sessionId on the result", async () => {
536
- mkDirs("err-presession");
537
- // No init event, no session_id anywhere — an immediate non-success result. Claude
538
- // echoed no id, so the backend reports none (the registry then falls back to the
539
- // turn uuid it passed when persisting).
540
- const blob = ndjson({ type: "result", subtype: "error_during_execution", is_error: true, result: "died early" });
541
- const { fn } = recordingSpawn({ stdout: blob });
542
- const backend = new ProgrammaticBackend(baseDeps(fn));
543
- const handle = await backend.start(specWithVault("eng"));
544
- const result = await backend.deliver(handle, "x", createSession("sess-IGNORED-NO-ECHO"));
545
- expect(result.ok).toBe(false);
546
- if (!result.ok) {
547
- expect(result.error).toContain("died early");
548
- // No echoed id from claude → none on the result (the registry uses turnSession.id).
549
- expect(result.sessionId).toBeUndefined();
550
- }
551
- });
552
-
553
- test("no result event (truncated/crashed turn) + non-zero exit → { ok:false }", async () => {
554
- mkDirs("err3");
555
- const blob = ndjson({ type: "system", subtype: "init", session_id: "s", apiKeySource: "none" });
556
- const { fn } = recordingSpawn({ stdout: blob, stderr: "claude crashed", code: 1 });
557
- const backend = new ProgrammaticBackend(baseDeps(fn));
558
- const handle = await backend.start(specWithVault());
559
- const result = await backend.deliver(handle, "x", freshSession());
560
- expect(result.ok).toBe(false);
561
- if (!result.ok) expect(result.error).toMatch(/no success result|exited 1|crashed/);
562
- });
563
- });
564
-
565
- describe("ProgrammaticBackend.deliver — argv + env shape", () => {
566
- test("argv shape: -p, stream-json, strict-mcp-config, mcp-config, skip-permissions; NO dev-channels", async () => {
567
- mkDirs("argv");
568
- const { fn, calls } = recordingSpawn({ stdout: successTurn("s", "ok") });
569
- const backend = new ProgrammaticBackend(baseDeps(fn));
570
- const handle = await backend.start(specWithVault());
571
- await backend.deliver(handle, "hello", freshSession());
572
-
573
- const cmd = calls[0]!.argv[2]!; // the engine echoes the claude command here
574
- expect(cmd).toContain(" -p ");
575
- expect(cmd).toContain("--output-format stream-json");
576
- expect(cmd).toContain("--strict-mcp-config");
577
- expect(cmd).toContain("--mcp-config");
578
- expect(cmd).toContain("--dangerously-skip-permissions");
579
- expect(cmd).not.toContain("dangerously-load-development-channels");
580
- });
581
-
582
- test("env: CLAUDE_CODE_OAUTH_TOKEN injected; ANTHROPIC_API_KEY / CLAUDE_API_KEY NOT present (#68 denylist)", async () => {
583
- mkDirs("env");
584
- const { fn, calls } = recordingSpawn({ stdout: successTurn("s", "ok") });
585
- const backend = new ProgrammaticBackend(baseDeps(fn));
586
- const handle = await backend.start(specWithVault());
587
- await backend.deliver(handle, "hello", freshSession());
588
-
589
- const env = calls[0]!.env;
590
- expect(env.CLAUDE_CODE_OAUTH_TOKEN).toBe("OAUTH-CRED-PLACEHOLDER");
591
- expect(env.ANTHROPIC_API_KEY).toBeUndefined();
592
- expect(env.CLAUDE_API_KEY).toBeUndefined();
593
- // the sandbox proxy env is layered on top
594
- expect(env.SANDBOX_RUNTIME).toBe("1");
595
- });
596
-
597
- test("REGRESSION (isolation/billing leak): the engine's returned env (= daemon process.env) is NOT spread onto the launch env — only allowlisted sandbox/proxy keys survive; the scrub wins", async () => {
598
- // The real `wrapWithSandboxArgv` returns `env: process.env` (the FULL daemon env)
599
- // on macOS/Linux. The fakeEngine now mirrors that — its returned env carries the
600
- // daemon's ambient ANTHROPIC_API_KEY / CLAUDE_API_KEY / SECRET_THING / a WRONG
601
- // CLAUDE_CODE_OAUTH_TOKEN. The old `{ ...childEnv, ...wrapped.env, ...homeEnv }`
602
- // spread let those OVERRIDE the scrubbed childEnv → reaching the sandboxed turn
603
- // (subscription-billing + secret-leak breach). mergeSandboxLaunchEnv allowlists
604
- // wrapped.env, so the scrub stays authoritative.
605
- mkDirs("env-leak-regression");
606
- const { fn, calls } = recordingSpawn({ stdout: successTurn("s", "ok") });
607
- const backend = new ProgrammaticBackend(baseDeps(fn));
608
- const handle = await backend.start(specWithVault());
609
- await backend.deliver(handle, "hello", freshSession());
610
-
611
- const env = calls[0]!.env;
612
-
613
- // 1. LEAK CLOSED: the daemon's ambient secrets the engine returned never reach
614
- // the launch env (neither the scrubbed childEnv nor the allowlist admits them).
615
- expect(env.ANTHROPIC_API_KEY).toBeUndefined();
616
- expect(env.CLAUDE_API_KEY).toBeUndefined();
617
- expect(env.SECRET_THING).toBeUndefined();
618
-
619
- // 2. MANAGED AUTH WINS: CLAUDE_CODE_OAUTH_TOKEN is the session's resolved token,
620
- // NOT the wrong daemon-ambient one the engine env carried (it's denylisted +
621
- // not allowlisted, so step 2 can never overwrite the scrub's value).
622
- expect(env.CLAUDE_CODE_OAUTH_TOKEN).toBe("OAUTH-CRED-PLACEHOLDER");
623
-
624
- // 3. EGRESS PRESERVED: the load-bearing sandbox/proxy vars DO survive (allowlist),
625
- // so the egress proxy keeps working — the fix doesn't strangle the network.
626
- expect(env.SANDBOX_RUNTIME).toBe("1");
627
- expect(env.HTTP_PROXY).toBe("http://localhost:5555");
628
- expect(env.HTTPS_PROXY).toBe("http://localhost:5555");
629
- expect(env.NO_PROXY).toBe("localhost,127.0.0.1");
630
- expect(env.NODE_EXTRA_CA_CERTS).toBe("/tmp/claude/ca.pem");
631
-
632
- // 4. The daemon's ambient PATH from the engine env does NOT clobber the scrubbed
633
- // PATH (PATH isn't in the sandbox allowlist; childEnv's passthrough owns it).
634
- expect(env.PATH).not.toBe("/daemon/bin");
635
- });
636
-
637
- test("the per-channel env injection (GH_TOKEN) reaches the child; a planted API key is dropped", async () => {
638
- mkDirs("env-inject");
639
- const { fn, calls } = recordingSpawn({ stdout: successTurn("s", "ok") });
640
- const backend = new ProgrammaticBackend(
641
- baseDeps(fn, {
642
- resolveChannelEnv: () => ({ GH_TOKEN: "ghp_INJECTED", ANTHROPIC_API_KEY: "sk-ant-SMUGGLED" }),
643
- }),
644
- );
645
- const handle = await backend.start(specWithVault());
646
- await backend.deliver(handle, "hello", freshSession());
647
-
648
- const env = calls[0]!.env;
649
- expect(env.GH_TOKEN).toBe("ghp_INJECTED");
650
- expect(env.ANTHROPIC_API_KEY).toBeUndefined(); // denylist drops it defensively
651
- });
652
- });
653
-
654
- describe("ProgrammaticBackend.deliver — system prompt (file-backed, per-turn)", () => {
655
- test("append mode → --append-system-prompt-file <path> + the file is written 0600 with the prompt", async () => {
656
- mkDirs("sysprompt-append");
657
- const { fn, calls } = recordingSpawn({ stdout: successTurn("s", "ok") });
658
- const backend = new ProgrammaticBackend(baseDeps(fn));
659
- const handle = await backend.start(specWithSystemPrompt("You are the eng release bot.", "append", "eng"));
660
- await backend.deliver(handle, "hello", freshSession());
661
-
662
- const promptPath = join(sessionsDir, "eng", "system-prompt.txt");
663
- // The file exists, is 0600, and carries the EXACT prompt text.
664
- expect(statSync(promptPath).mode & 0o777).toBe(0o600);
665
- expect(readFileSync(promptPath, "utf8")).toBe("You are the eng release bot.");
666
- // The wrapped claude command (engine echoes it in argv[2]) carries the -file flag.
667
- const cmd = calls[0]!.argv[2]!;
668
- expect(cmd).toContain("--append-system-prompt-file");
669
- expect(cmd).toContain(promptPath);
670
- // `--system-prompt-file` is a substring of `--append-system-prompt-file`, so a
671
- // bare `not.toContain("--system-prompt-file")` would always fail. Assert the
672
- // replace-mode flag was NOT the one applied to the prompt path instead.
673
- expect(cmd).not.toContain("--system-prompt-file " + promptPath);
674
- });
675
-
676
- test("replace mode → --system-prompt-file <path>", async () => {
677
- mkDirs("sysprompt-replace");
678
- const { fn, calls } = recordingSpawn({ stdout: successTurn("s", "ok") });
679
- const backend = new ProgrammaticBackend(baseDeps(fn));
680
- const handle = await backend.start(specWithSystemPrompt("Full custom persona.", "replace", "eng"));
681
- await backend.deliver(handle, "hello", freshSession());
682
-
683
- const promptPath = join(sessionsDir, "eng", "system-prompt.txt");
684
- expect(readFileSync(promptPath, "utf8")).toBe("Full custom persona.");
685
- const cmd = calls[0]!.argv[2]!;
686
- expect(cmd).toContain("--system-prompt-file");
687
- expect(cmd).not.toContain("--append-system-prompt-file");
688
- });
689
-
690
- test("no systemPrompt → no file, no system-prompt flag (today's behavior)", async () => {
691
- mkDirs("sysprompt-none");
692
- const { fn, calls } = recordingSpawn({ stdout: successTurn("s", "ok") });
693
- const backend = new ProgrammaticBackend(baseDeps(fn));
694
- const handle = await backend.start(specWithVault("eng"));
695
- await backend.deliver(handle, "hello", freshSession());
696
-
697
- expect(existsSync(join(sessionsDir, "eng", "system-prompt.txt"))).toBe(false);
698
- const cmd = calls[0]!.argv[2]!;
699
- expect(cmd).not.toContain("system-prompt-file");
700
- });
701
-
702
- test("the prompt file is (re)written + the flag re-passed on EVERY turn — incl. a resume turn", async () => {
703
- mkDirs("sysprompt-perturn");
704
- const { fn, calls } = sequencedSpawn([
705
- successTurn("sess-SP", "turn one reply"),
706
- successTurn("sess-SP", "turn two reply"),
707
- ]);
708
- const backend = new ProgrammaticBackend(baseDeps(fn));
709
- const handle = await backend.start(specWithSystemPrompt("Per-turn role.", "append", "eng"));
710
-
711
- // Turn 1 CREATES the session; turn 2 RESUMES it (the registry's mode decision).
712
- await backend.deliver(handle, "turn one", createSession("sess-SP"));
713
- await backend.deliver(handle, "turn two", resumeSession("sess-SP"));
714
-
715
- const promptPath = join(sessionsDir, "eng", "system-prompt.txt");
716
- // The file is present after the resume turn too (re-written each deliver).
717
- expect(readFileSync(promptPath, "utf8")).toBe("Per-turn role.");
718
- // Turn 1: -file flag + --session-id. Turn 2 (resume): -file flag AND --resume.
719
- const cmd1 = calls[0]!.argv[2]!;
720
- const cmd2 = calls[1]!.argv[2]!;
721
- expect(cmd1).toContain("--append-system-prompt-file");
722
- expect(cmd1).toContain("--session-id sess-SP");
723
- expect(cmd1).not.toContain("--resume");
724
- expect(cmd2).toContain("--append-system-prompt-file"); // re-passed on the resume turn
725
- expect(cmd2).toContain("--resume sess-SP");
726
- });
727
- });
728
-
729
- // ---- the workspace seam (working-directory axis) ---------------------------
730
- // design 2026-06-16-agent-filesystem-and-sharing.md — `workspace` is the agent's
731
- // cwd + an rw working-root; .mcp.json (scoped vault token = secret) /
732
- // system-prompt.txt / seeded home STAY in the per-agent private sessions/<name> dir.
733
-
734
- function specWithWorkspace(workspace: string, name = "eng"): AgentSpec {
735
- return {
736
- name,
737
- channels: [name],
738
- vault: { name: "default", access: "read", tags: ["#agent/message"] },
739
- workspace,
740
- };
741
- }
742
-
743
- describe("ProgrammaticBackend.deliver — workspace seam: cwd = workspace, secrets stay private", () => {
744
- test("workspace SET → the turn's cwd is the workspace; the workspace is the sandbox rw working-root", async () => {
745
- mkDirs("ws-set");
746
- const workspaceDir = mkdtempSync(join(tmpdir(), "prog-shared-workdir-"));
747
- try {
748
- const { fn, calls } = recordingSpawn({ stdout: successTurn("s", "ok") });
749
- const engine = fakeEngine();
750
- const backend = new ProgrammaticBackend(baseDeps(fn, { sandboxEngine: engine }));
751
- const handle = await backend.start(specWithWorkspace(workspaceDir, "eng"));
752
- await backend.deliver(handle, "hello", freshSession());
753
-
754
- const privateDir = join(sessionsDir, "eng");
755
- // The spawned turn's cwd is the SHARED workspace, NOT the private dir.
756
- expect(calls[0]!.cwd).toBe(workspaceDir);
757
- // The workspace is an rw working-root in the sandbox (read + write); the
758
- // private dir stays writable too (it holds .mcp.json/home/tmp).
759
- expect(engine.initializedWith!.filesystem.allowWrite).toContain(workspaceDir);
760
- expect(engine.initializedWith!.filesystem.allowWrite).toContain(privateDir);
761
- expect(engine.initializedWith!.filesystem.allowRead).toContain(workspaceDir);
762
- } finally {
763
- rmSync(workspaceDir, { recursive: true, force: true });
764
- }
765
- });
766
-
767
- test("SECRETS-STAY-PRIVATE: .mcp.json / system-prompt.txt live in the PRIVATE dir, NEVER the shared workspace", async () => {
768
- mkDirs("ws-private");
769
- const workspaceDir = mkdtempSync(join(tmpdir(), "prog-shared-secrets-"));
770
- try {
771
- const { fn } = recordingSpawn({ stdout: successTurn("s", "ok") });
772
- const backend = new ProgrammaticBackend(baseDeps(fn));
773
- const spec: AgentSpec = {
774
- name: "eng",
775
- channels: ["eng"],
776
- vault: { name: "default", access: "read", tags: ["#agent/message"] },
777
- workspace: workspaceDir,
778
- systemPrompt: "Work in the repo.",
779
- };
780
- const handle = await backend.start(spec);
781
- await backend.deliver(handle, "hello", freshSession());
782
-
783
- const privateDir = join(sessionsDir, "eng");
784
- // Private artifacts are under the per-agent dir…
785
- expect(statSync(join(privateDir, ".mcp.json")).mode & 0o777).toBe(0o600);
786
- expect(existsSync(join(privateDir, "system-prompt.txt"))).toBe(true);
787
- // …and the shared workspace has NONE of them (no secrets crossing the boundary).
788
- expect(existsSync(join(workspaceDir, ".mcp.json"))).toBe(false);
789
- expect(existsSync(join(workspaceDir, "system-prompt.txt"))).toBe(false);
790
- expect(existsSync(join(workspaceDir, "home"))).toBe(false);
791
- // The private .mcp.json DOES carry the minted vault token; the shared dir does not.
792
- const privateMcp = readFileSync(join(privateDir, ".mcp.json"), "utf8");
793
- expect(privateMcp).toContain("Bearer TOK-");
794
- } finally {
795
- rmSync(workspaceDir, { recursive: true, force: true });
796
- }
797
- });
798
-
799
- test("workspace UNSET → the turn's cwd is the private dir (unchanged); only the private dir is writable", async () => {
800
- mkDirs("ws-unset");
801
- const { fn, calls } = recordingSpawn({ stdout: successTurn("s", "ok") });
802
- const engine = fakeEngine();
803
- const backend = new ProgrammaticBackend(baseDeps(fn, { sandboxEngine: engine }));
804
- const handle = await backend.start(specWithVault("eng"));
805
- await backend.deliver(handle, "hello", freshSession());
806
-
807
- const privateDir = join(sessionsDir, "eng");
808
- expect(calls[0]!.cwd).toBe(privateDir);
809
- expect(engine.initializedWith!.filesystem.allowWrite).toEqual([privateDir]);
810
- });
811
- });
812
-
813
- describe("ProgrammaticBackend.deliver — MCP config (vault only, no channel)", () => {
814
- test("writes a vault-only .mcp.json 0600 — NO channel MCP entry (daemon mediates messaging)", async () => {
815
- mkDirs("mcp");
816
- const { fn } = recordingSpawn({ stdout: successTurn("s", "ok") });
817
- const backend = new ProgrammaticBackend(baseDeps(fn));
818
- const handle = await backend.start(specWithVault("eng"));
819
- await backend.deliver(handle, "hello", freshSession());
820
-
821
- const mcpPath = join(sessionsDir, "eng", ".mcp.json");
822
- expect(statSync(mcpPath).mode & 0o777).toBe(0o600);
823
- const parsed = JSON.parse(readFileSync(mcpPath, "utf8")) as { mcpServers: Record<string, unknown> };
824
- // The vault entry is present…
825
- expect(parsed.mcpServers[vaultEntryKey("default")]).toBeDefined();
826
- // …and NO channel entry (the daemon, not the agent, handles inbound/outbound).
827
- expect(parsed.mcpServers[channelEntryKey("eng")]).toBeUndefined();
828
- expect(Object.keys(parsed.mcpServers)).toEqual([vaultEntryKey("default")]);
829
- });
830
-
831
- test("a spec with no vault → an EMPTY mcpServers config (agent still runs)", async () => {
832
- mkDirs("novault");
833
- const { fn } = recordingSpawn({ stdout: successTurn("s", "ok") });
834
- const backend = new ProgrammaticBackend(baseDeps(fn));
835
- const handle = await backend.start({ name: "bare", channels: ["bare"] });
836
- const result = await backend.deliver(handle, "hello", freshSession());
837
- expect(result.ok).toBe(true);
838
- const parsed = JSON.parse(readFileSync(join(sessionsDir, "bare", ".mcp.json"), "utf8")) as {
839
- mcpServers: Record<string, unknown>;
840
- };
841
- expect(Object.keys(parsed.mcpServers)).toEqual([]);
842
- });
843
- });
844
-
845
- describe("ProgrammaticBackend.deliver — robustness", () => {
846
- test("interleaved hook/rate_limit lines + a trailing partial line still parse the result", async () => {
847
- mkDirs("robust");
848
- const messyStdout =
849
- "running a user hook...\n" +
850
- JSON.stringify({ type: "system", subtype: "init", session_id: "sess-R", apiKeySource: "none" }) + "\n" +
851
- JSON.stringify({ type: "system", subtype: "rate_limit_event", rate_limit: { five_hour: { overageStatus: "rejected" } } }) + "\n" +
852
- JSON.stringify({ type: "assistant", message: { content: [{ type: "text", text: "..." }] }, session_id: "sess-R" }) + "\n" +
853
- JSON.stringify({ type: "result", subtype: "success", is_error: false, result: "robust reply", session_id: "sess-R" }) + "\n" +
854
- '{"type":"system","subtype":"in'; // a cut-off trailing partial line
855
- const { fn } = recordingSpawn({ stdout: messyStdout });
856
- const backend = new ProgrammaticBackend(baseDeps(fn));
857
- const handle = await backend.start(specWithVault("eng"));
858
- const result = await backend.deliver(handle, "hello", createSession("sess-R"));
859
- expect(result.ok).toBe(true);
860
- if (result.ok) {
861
- expect(result.reply).toBe("robust reply");
862
- // The captured id is RETURNED for the registry to persist (the backend keeps no store).
863
- expect(result.sessionId).toBe("sess-R");
864
- }
865
- });
866
- });
867
-
868
- describe("ProgrammaticBackend.deliver — streaming interim events (the watch-it-work view)", () => {
869
- /** A spawnFn whose stdout emits the given byte CHUNKS in order (multi-chunk stream). */
870
- function chunkedSpawn(chunks: string[]): ProgrammaticSpawnFn {
871
- const enc = new TextEncoder();
872
- return () => ({
873
- stdout: new ReadableStream<Uint8Array>({
874
- start(controller) {
875
- for (const c of chunks) controller.enqueue(enc.encode(c));
876
- controller.close();
877
- },
878
- }),
879
- stderr: new Response("").body,
880
- exited: Promise.resolve(0),
881
- });
882
- }
883
-
884
- test("onInterim receives init + text + tool_use; the durable result is unchanged", async () => {
885
- mkDirs("stream");
886
- const blob =
887
- JSON.stringify({ type: "system", subtype: "init", session_id: "sess-STREAM", apiKeySource: "none" }) + "\n" +
888
- JSON.stringify({ type: "assistant", message: { content: [{ type: "text", text: "looking…" }] }, session_id: "sess-STREAM" }) + "\n" +
889
- JSON.stringify({ type: "assistant", message: { content: [{ type: "tool_use", name: "Grep" }] }, session_id: "sess-STREAM" }) + "\n" +
890
- JSON.stringify({ type: "result", subtype: "success", is_error: false, result: "looking… found it", session_id: "sess-STREAM" }) + "\n";
891
- // Split into two chunks at a mid-line boundary to exercise incremental decoding.
892
- const cut = Math.floor(blob.length / 2);
893
- const backend = new ProgrammaticBackend(
894
- baseDeps(chunkedSpawn([blob.slice(0, cut), blob.slice(cut)])),
895
- );
896
- const handle = await backend.start(specWithVault("eng"));
897
-
898
- const events: unknown[] = [];
899
- const result = await backend.deliver(handle, "where is X", createSession("sess-STREAM"), (e) =>
900
- events.push(e),
901
- );
902
-
903
- expect(events).toEqual([
904
- { kind: "init", sessionId: "sess-STREAM" },
905
- { kind: "text", text: "looking…" },
906
- { kind: "tool", tool: "Grep" },
907
- ]);
908
- // The DURABLE final result is exactly as the non-streaming path would produce it.
909
- expect(result.ok).toBe(true);
910
- if (result.ok) {
911
- expect(result.reply).toBe("looking… found it");
912
- expect(result.sessionId).toBe("sess-STREAM");
913
- }
914
- });
915
-
916
- test("a turn with NO onInterim runs identically (durable reply intact, no throw)", async () => {
917
- mkDirs("nostream");
918
- const { fn } = recordingSpawn({ stdout: successTurn("sess-NS", "plain reply") });
919
- const backend = new ProgrammaticBackend(baseDeps(fn));
920
- const handle = await backend.start(specWithVault("eng"));
921
- const result = await backend.deliver(handle, "hi", freshSession()); // no sink
922
- expect(result.ok).toBe(true);
923
- if (result.ok) expect(result.reply).toBe("plain reply");
924
- });
925
-
926
- test("a THROWING onInterim sink cannot break the turn (durable result still returned)", async () => {
927
- mkDirs("sinkthrow");
928
- const { fn } = recordingSpawn({ stdout: successTurn("sess-THROW", "survives") });
929
- const backend = new ProgrammaticBackend(baseDeps(fn));
930
- const handle = await backend.start(specWithVault("eng"));
931
- const result = await backend.deliver(handle, "hi", freshSession(), () => {
932
- throw new Error("dead SSE stream");
933
- });
934
- expect(result.ok).toBe(true);
935
- if (result.ok) expect(result.reply).toBe("survives");
936
- });
937
-
938
- test("an ERROR turn still streams its init then returns { ok:false } (live view can resolve)", async () => {
939
- mkDirs("streamerr");
940
- const blob =
941
- JSON.stringify({ type: "system", subtype: "init", session_id: "sess-ERR" }) + "\n" +
942
- JSON.stringify({ type: "result", subtype: "error_during_execution", is_error: true, result: "boom", session_id: "sess-ERR" }) + "\n";
943
- const backend = new ProgrammaticBackend(baseDeps(chunkedSpawn([blob])));
944
- const handle = await backend.start(specWithVault("eng"));
945
- const events: unknown[] = [];
946
- const result = await backend.deliver(handle, "go", createSession("sess-ERR"), (e) =>
947
- events.push(e),
948
- );
949
- expect(events).toEqual([{ kind: "init", sessionId: "sess-ERR" }]);
950
- expect(result.ok).toBe(false);
951
- if (!result.ok) {
952
- expect(result.error).toBe("boom");
953
- expect(result.sessionId).toBe("sess-ERR");
954
- }
955
- });
956
- });
957
-
958
- describe("ProgrammaticBackend.deliver — credential / mint failures (value, not throw)", () => {
959
- test("a missing Claude credential returns { ok:false } and never spawns", async () => {
960
- mkDirs("nocred");
961
- const { fn, calls } = recordingSpawn({ stdout: successTurn("s", "ok") });
962
- const backend = new ProgrammaticBackend(
963
- baseDeps(fn, {
964
- resolveClaudeToken: () => {
965
- throw new Error("no Claude credential for channel \"eng\"");
966
- },
967
- }),
968
- );
969
- const handle = await backend.start(specWithVault("eng"));
970
- const result = await backend.deliver(handle, "hello", freshSession());
971
- expect(result.ok).toBe(false);
972
- if (!result.ok) expect(result.error).toContain("no Claude credential");
973
- expect(calls).toHaveLength(0); // never spawned
974
- });
975
-
976
- test("a refused vault mint (hub 400) returns { ok:false } and never spawns", async () => {
977
- mkDirs("badmint");
978
- const { fn, calls } = recordingSpawn({ stdout: successTurn("s", "ok") });
979
- const refusingFetch = (async () =>
980
- new Response(
981
- JSON.stringify({ error: "invalid_scope", error_description: "not grantable by this bearer" }),
982
- { status: 400, headers: { "content-type": "application/json" } },
983
- )) as unknown as typeof fetch;
984
- const backend = new ProgrammaticBackend(baseDeps(fn, { fetchFn: refusingFetch }));
985
- const handle = await backend.start(specWithVault("eng"));
986
- const result = await backend.deliver(handle, "hello", freshSession());
987
- expect(result.ok).toBe(false);
988
- if (!result.ok) expect(result.error).toMatch(/mint refused/);
989
- expect(calls).toHaveLength(0); // mint failed before any spawn
990
- });
991
- });
992
-
993
- describe("ProgrammaticBackend — start / stop / status", () => {
994
- test("start validates the name + channels and carries the spec on the handle", async () => {
995
- mkDirs("start");
996
- const { fn } = recordingSpawn();
997
- const backend = new ProgrammaticBackend(baseDeps(fn));
998
- const spec = specWithVault("eng");
999
- const handle = await backend.start(spec);
1000
- expect(handle.backend).toBe(PROGRAMMATIC_BACKEND_KIND);
1001
- expect(handle.channel).toBe("eng");
1002
- expect(handle.name).toBe("eng");
1003
- expect(handle.spec).toEqual(spec);
1004
-
1005
- await expect(backend.start({ name: "bad name", channels: ["c"] })).rejects.toThrow(/slug/);
1006
- await expect(backend.start({ name: "ok", channels: [] })).rejects.toThrow(/no channels/);
1007
- });
1008
-
1009
- test("stop() is a NO-OP (no store to clear; the session lives on the thread note)", async () => {
1010
- mkDirs("stop");
1011
- const { fn, calls } = recordingSpawn({ stdout: successTurn("sess-STOP", "ok") });
1012
- const backend = new ProgrammaticBackend(baseDeps(fn));
1013
- const handle = await backend.start(specWithVault("eng"));
1014
- // A turn establishes a session; the id is RETURNED (the registry persists it on the note).
1015
- const r = await backend.deliver(handle, "hello", createSession("sess-STOP"));
1016
- expect(r.ok).toBe(true);
1017
- // stop() does not throw and runs no side effect — the backend keeps no session store, so
1018
- // there is nothing to clear (continuity now lives on the durable #agent/thread note).
1019
- await backend.stop(handle);
1020
- // It does not spawn anything or otherwise touch the turn machinery.
1021
- expect(calls).toHaveLength(1);
1022
- // A subsequent RESUME turn still works (the registry would supply the same id off the note).
1023
- const r2 = await backend.deliver(handle, "again", resumeSession("sess-STOP"));
1024
- expect(r2.ok).toBe(true);
1025
- expect(calls[1]!.argv[2]!).toContain("--resume sess-STOP");
1026
- });
1027
-
1028
- test("status() is live (no resident process to keep alive)", async () => {
1029
- mkDirs("status");
1030
- const { fn } = recordingSpawn();
1031
- const backend = new ProgrammaticBackend(baseDeps(fn));
1032
- const handle = await backend.start(specWithVault());
1033
- expect(await backend.status(handle)).toEqual({ live: true });
1034
- });
1035
- });
1036
-
1037
- // ---------------------------------------------------------------------------
1038
- // 4b — cross-resource grant injection (design 2026-06-17-agent-connectors-4b)
1039
- // ---------------------------------------------------------------------------
1040
-
1041
- /**
1042
- * A fake hub grants API for the spawn-injection path. `listGrants` returns the given
1043
- * approved grants; `getMaterial` returns the keyed material (or 404). Records each
1044
- * material fetch so a test can prove FRESH-each-spawn (no caching).
1045
- */
1046
- function grantsClientFor(opts: {
1047
- grants: Array<{ id: string; connection: ConnectionSpec; status: string }>;
1048
- material?: Record<string, GrantMaterial>;
1049
- onMaterialCall?: (id: string) => void;
1050
- }): GrantsClient {
1051
- const fetchFn = (async (url: string | URL | Request) => {
1052
- const u = String(url);
1053
- if (u.includes("/admin/grants/")) {
1054
- const id = u.split("/admin/grants/")[1]!.replace("/material", "");
1055
- opts.onMaterialCall?.(id);
1056
- const m = opts.material?.[id];
1057
- if (!m) return new Response("no", { status: 404 });
1058
- return new Response(JSON.stringify(m), { status: 200 });
1059
- }
1060
- return new Response(
1061
- JSON.stringify({
1062
- grants: opts.grants.map((g) => ({ id: g.id, agent: "eng", connection: g.connection, status: g.status })),
1063
- }),
1064
- { status: 200, headers: { "content-type": "application/json" } },
1065
- );
1066
- }) as typeof fetch;
1067
- return new GrantsClient({ hubOrigin: "https://hub.example.com", managerBearer: "MGR", fetchFn });
1068
- }
1069
-
1070
- /** Read the written per-spawn .mcp.json's mcpServers for a session. */
1071
- function readMcpServers(name: string): Record<string, { type: string; url: string; headers?: { Authorization: string } }> {
1072
- const parsed = JSON.parse(readFileSync(join(sessionsDir, name, ".mcp.json"), "utf8")) as {
1073
- mcpServers: Record<string, { type: string; url: string; headers?: { Authorization: string } }>;
1074
- };
1075
- return parsed.mcpServers;
1076
- }
1077
-
1078
- describe("ProgrammaticBackend.deliver — grant injection (4b)", () => {
1079
- test("approved VAULT grant → an extra MCP server in --mcp-config (alongside own-vault)", async () => {
1080
- mkDirs("grant-vault");
1081
- const conn: ConnectionSpec = { kind: "vault", target: "research", access: "read" };
1082
- const grants = grantsClientFor({
1083
- grants: [{ id: "g1", connection: conn, status: "approved" }],
1084
- material: { g1: { kind: "vault", token: "RTOK", mcpUrl: "https://hub/vault/research/mcp" } },
1085
- });
1086
- const { fn } = recordingSpawn({ stdout: successTurn("s", "ok") });
1087
- const backend = new ProgrammaticBackend(baseDeps(fn, { grants }));
1088
- const handle = await backend.start(specWithVault("eng"));
1089
- await backend.deliver(handle, "hi", freshSession());
1090
-
1091
- const servers = readMcpServers("eng");
1092
- // Own def-vault entry still present…
1093
- expect(servers[vaultEntryKey("default")]).toBeDefined();
1094
- // …PLUS the granted research vault, namespaced + with its Bearer.
1095
- const granted = servers[grantVaultEntryKey("research")]!;
1096
- expect(granted.type).toBe("http");
1097
- expect(granted.url).toBe("https://hub/vault/research/mcp");
1098
- expect(granted.headers!.Authorization).toBe("Bearer RTOK");
1099
- });
1100
-
1101
- test("approved SERVICE grant (env) → an env var for the agent's shell tools", async () => {
1102
- mkDirs("grant-env");
1103
- const conn: ConnectionSpec = { kind: "service", target: "github", inject: ["env"] };
1104
- const grants = grantsClientFor({
1105
- grants: [{ id: "g1", connection: conn, status: "approved" }],
1106
- material: { g1: { kind: "service", token: "ghp_GRANTED", inject: ["env"] } },
1107
- });
1108
- const { fn, calls } = recordingSpawn({ stdout: successTurn("s", "ok") });
1109
- const backend = new ProgrammaticBackend(baseDeps(fn, { grants }));
1110
- const handle = await backend.start(specWithVault("eng"));
1111
- await backend.deliver(handle, "hi", freshSession());
1112
-
1113
- expect(calls[0]!.env.GITHUB_TOKEN).toBe("ghp_GRANTED");
1114
- // The granted env var never clobbers the managed Claude auth.
1115
- expect(calls[0]!.env.CLAUDE_CODE_OAUTH_TOKEN).toBe("OAUTH-CRED-PLACEHOLDER");
1116
- // No service MCP entry for an env-only grant.
1117
- expect(readMcpServers("eng")[grantServiceEntryKey("github")]).toBeUndefined();
1118
- });
1119
-
1120
- test("approved SERVICE grant (mcp) → the service's MCP server in --mcp-config", async () => {
1121
- mkDirs("grant-svc-mcp");
1122
- const conn: ConnectionSpec = { kind: "service", target: "github", inject: ["mcp"] };
1123
- const grants = grantsClientFor({
1124
- grants: [{ id: "g1", connection: conn, status: "approved" }],
1125
- material: { g1: { kind: "service", token: "ghp_MCP", inject: ["mcp"] } },
1126
- });
1127
- const { fn, calls } = recordingSpawn({ stdout: successTurn("s", "ok") });
1128
- const backend = new ProgrammaticBackend(baseDeps(fn, { grants }));
1129
- const handle = await backend.start(specWithVault("eng"));
1130
- await backend.deliver(handle, "hi", freshSession());
1131
-
1132
- const svc = readMcpServers("eng")[grantServiceEntryKey("github")]!;
1133
- expect(svc.type).toBe("http");
1134
- expect(svc.url).toBe(serviceMcpUrl("github")!);
1135
- expect(svc.headers!.Authorization).toBe("Bearer ghp_MCP");
1136
- // mcp-only inject → no GITHUB_TOKEN env var.
1137
- expect(calls[0]!.env.GITHUB_TOKEN).toBeUndefined();
1138
- });
1139
-
1140
- test("MCP-KIND grant is NEVER injected in 4b-1 (no material → 404 → absent)", async () => {
1141
- mkDirs("grant-mcp-kind");
1142
- const conn: ConnectionSpec = { kind: "mcp", target: "https://remote/mcp" };
1143
- // Even modeled as "approved", the hub returns no material in 4b-1 (no OAuth).
1144
- const grants = grantsClientFor({ grants: [{ id: "g1", connection: conn, status: "approved" }] });
1145
- const { fn } = recordingSpawn({ stdout: successTurn("s", "ok") });
1146
- const backend = new ProgrammaticBackend(baseDeps(fn, { grants }));
1147
- const handle = await backend.start(specWithVault("eng"));
1148
- await backend.deliver(handle, "hi", freshSession());
1149
-
1150
- const servers = readMcpServers("eng");
1151
- // Only the own def-vault entry — the mcp-kind grant added nothing.
1152
- expect(Object.keys(servers)).toEqual([vaultEntryKey("default")]);
1153
- });
1154
-
1155
- test("material is fetched FRESH each spawn (revocation takes effect next turn — no cache)", async () => {
1156
- mkDirs("grant-fresh");
1157
- const called: string[] = [];
1158
- const conn: ConnectionSpec = { kind: "vault", target: "research", access: "read" };
1159
- const grants = grantsClientFor({
1160
- grants: [{ id: "g1", connection: conn, status: "approved" }],
1161
- material: { g1: { kind: "vault", token: "RTOK", mcpUrl: "https://hub/vault/research/mcp" } },
1162
- onMaterialCall: (id) => called.push(id),
1163
- });
1164
- const { fn } = sequencedSpawn([successTurn("s", "one"), successTurn("s", "two")]);
1165
- const backend = new ProgrammaticBackend(baseDeps(fn, { grants }));
1166
- const handle = await backend.start(specWithVault("eng"));
1167
- await backend.deliver(handle, "turn one", createSession("s"));
1168
- await backend.deliver(handle, "turn two", resumeSession("s"));
1169
- // Two turns → two material fetches (no caching).
1170
- expect(called).toEqual(["g1", "g1"]);
1171
- });
1172
-
1173
- test("a grants-LIST failure is non-fatal — the turn runs with own-vault only", async () => {
1174
- mkDirs("grant-list-fail");
1175
- const fetchFn = (async (url: string | URL | Request) => {
1176
- // mint succeeds (vault token), grants list 500s.
1177
- if (String(url).includes("/admin/grants")) return new Response("boom", { status: 500 });
1178
- return fakeMintFetch()(url);
1179
- }) as typeof fetch;
1180
- const grants = new GrantsClient({ hubOrigin: "https://hub.example.com", managerBearer: "MGR", fetchFn });
1181
- const { fn } = recordingSpawn({ stdout: successTurn("s", "ok") });
1182
- // Use the same fetch for the mint path so the vault token still mints.
1183
- const backend = new ProgrammaticBackend(baseDeps(fn, { grants, fetchFn }));
1184
- const handle = await backend.start(specWithVault("eng"));
1185
- const result = await backend.deliver(handle, "hi", freshSession());
1186
- expect(result.ok).toBe(true); // own-vault turn unaffected by the grant blip
1187
- const servers = readMcpServers("eng");
1188
- expect(servers[vaultEntryKey("default")]).toBeDefined();
1189
- expect(Object.keys(servers)).toEqual([vaultEntryKey("default")]); // no grants injected
1190
- });
1191
-
1192
- test("NO grants client → today's behavior exactly (own-vault only)", async () => {
1193
- mkDirs("grant-none");
1194
- const { fn } = recordingSpawn({ stdout: successTurn("s", "ok") });
1195
- const backend = new ProgrammaticBackend(baseDeps(fn)); // no grants in deps
1196
- const handle = await backend.start(specWithVault("eng"));
1197
- await backend.deliver(handle, "hi", freshSession());
1198
- expect(Object.keys(readMcpServers("eng"))).toEqual([vaultEntryKey("default")]);
1199
- });
1200
- });
1201
-
1202
- describe("ProgrammaticBackend.deliver — transient-error retry with incremental backoff", () => {
1203
- const transientResult = (sid: string, msg: string) =>
1204
- ndjson(
1205
- { type: "system", subtype: "init", session_id: sid, apiKeySource: "none" },
1206
- { type: "result", subtype: "error_during_execution", is_error: true, result: msg, session_id: sid },
1207
- );
1208
-
1209
- test("retries a TRANSIENT turn error (backoff), then succeeds", async () => {
1210
- mkDirs("retry-ok");
1211
- const sleeps: number[] = [];
1212
- const { fn, calls } = sequencedSpawn([
1213
- transientResult("s1", "API Error: 529 Overloaded. Try again."),
1214
- successTurn("s2", "recovered"),
1215
- ]);
1216
- const backend = new ProgrammaticBackend(
1217
- baseDeps(fn, {
1218
- sleepFn: async (ms) => {
1219
- sleeps.push(ms);
1220
- },
1221
- }),
1222
- );
1223
- const handle = await backend.start(specWithVault("eng"));
1224
- const result = await backend.deliver(handle, "go", createSession("s1"));
1225
- expect(result.ok).toBe(true);
1226
- if (result.ok) {
1227
- expect(result.reply).toBe("recovered");
1228
- // The SUCCESSFUL attempt's sid is RETURNED (not the failed attempt's "s1").
1229
- expect(result.sessionId).toBe("s2");
1230
- }
1231
- expect(calls.length).toBe(2); // one retry
1232
- expect(sleeps).toHaveLength(1); // one backoff
1233
- expect(sleeps[0]).toBe(TURN_RETRY_BACKOFF_MS[0]); // the first (incremental) interval
1234
- });
1235
-
1236
- test("does NOT retry a non-transient turn error (fails fast, no sleep)", async () => {
1237
- mkDirs("retry-no");
1238
- const sleeps: number[] = [];
1239
- const { fn, calls } = recordingSpawn({
1240
- stdout: transientResult("s", "401 unauthorized: invalid token"),
1241
- });
1242
- const backend = new ProgrammaticBackend(
1243
- baseDeps(fn, {
1244
- sleepFn: async (ms) => {
1245
- sleeps.push(ms);
1246
- },
1247
- }),
1248
- );
1249
- const handle = await backend.start(specWithVault());
1250
- const result = await backend.deliver(handle, "go", freshSession());
1251
- expect(result.ok).toBe(false);
1252
- expect(calls.length).toBe(1); // no retry
1253
- expect(sleeps.length).toBe(0);
1254
- });
1255
-
1256
- test("a persistently TRANSIENT error exhausts the retries → { ok:false }", async () => {
1257
- mkDirs("retry-exhaust");
1258
- const sleeps: number[] = [];
1259
- const { fn, calls } = recordingSpawn({
1260
- stdout: transientResult("s", "API Error: 503 Service Unavailable"),
1261
- });
1262
- const backend = new ProgrammaticBackend(
1263
- baseDeps(fn, {
1264
- sleepFn: async (ms) => {
1265
- sleeps.push(ms);
1266
- },
1267
- }),
1268
- );
1269
- const handle = await backend.start(specWithVault("eng"));
1270
- const result = await backend.deliver(handle, "go", createSession("s"));
1271
- expect(result.ok).toBe(false);
1272
- if (!result.ok) {
1273
- expect(result.error).toContain("503");
1274
- // The session id is still RETURNED even on a FINAL failure (continuation handle).
1275
- expect(result.sessionId).toBe("s");
1276
- }
1277
- expect(calls.length).toBe(TURN_MAX_ATTEMPTS); // all attempts used
1278
- expect(sleeps.length).toBe(TURN_MAX_ATTEMPTS - 1); // one backoff before each retry
1279
- });
1280
- });
1281
-
1282
- describe("isTransientTurnError", () => {
1283
- test("transient upstream/network signals → true", () => {
1284
- for (const s of [
1285
- "API Error: 529 Overloaded",
1286
- "503 Service Unavailable",
1287
- "429 rate limit exceeded",
1288
- "Internal Server Error",
1289
- "Bad Gateway",
1290
- "ETIMEDOUT",
1291
- "socket hang up (ECONNRESET)",
1292
- ]) {
1293
- expect(isTransientTurnError(s)).toBe(true);
1294
- }
1295
- });
1296
-
1297
- test("permanent/deterministic signals → false (no pointless retry)", () => {
1298
- for (const s of [
1299
- "401 unauthorized",
1300
- "400 bad request",
1301
- 'no Claude credential for channel "x"',
1302
- "claude -p turn failed (subtype: error_max_turns)",
1303
- "tag_scope_violation",
1304
- ]) {
1305
- expect(isTransientTurnError(s)).toBe(false);
1306
- }
1307
- });
1308
- });
1309
-
1310
- describe("isSessionNotFoundError", () => {
1311
- test("resume-of-a-missing-session phrasings → true", () => {
1312
- for (const s of [
1313
- "No conversation found with session ID: 3f8c-...",
1314
- "no conversation found",
1315
- "Session not found",
1316
- "claude -p exited 1: Error: No session found for id abc",
1317
- "Could not find a session with that id",
1318
- "the conversation with that id was not found",
1319
- ]) {
1320
- expect(isSessionNotFoundError(s)).toBe(true);
1321
- }
1322
- });
1323
-
1324
- test("generic / unrelated failures → false (conservative — never a bare 'not found')", () => {
1325
- for (const s of [
1326
- "claude -p exited 1: some other error",
1327
- "rate limit",
1328
- "",
1329
- "401 unauthorized",
1330
- "file not found", // a bare "not found" without conversation/session
1331
- "tag_scope_violation",
1332
- ]) {
1333
- expect(isSessionNotFoundError(s)).toBe(false);
1334
- }
1335
- });
1336
- });
1337
-
1338
- // ---- session-expiry → fresh-create fallback (#132) -------------------------
1339
-
1340
- /**
1341
- * A spawnFn that branches on whether the wrapped argv carries `--resume`:
1342
- * - a `--resume` turn returns `onResume` (default: a session-not-found failure),
1343
- * - a `--session-id` (create) turn returns `onCreate` (default: success).
1344
- * Records each call so a test can assert the flag transition resume → fresh create.
1345
- */
1346
- function flagBranchingSpawn(opts: {
1347
- onResume?: string;
1348
- onCreate?: string;
1349
- resumeCode?: number;
1350
- createCode?: number;
1351
- }): {
1352
- fn: ProgrammaticSpawnFn;
1353
- calls: Array<{ argv: string[]; cmd: string }>;
1354
- } {
1355
- const calls: Array<{ argv: string[]; cmd: string }> = [];
1356
- const fn: ProgrammaticSpawnFn = (argv) => {
1357
- const cmd = argv[2] ?? ""; // the fake engine echoes the claude command in argv[2]
1358
- calls.push({ argv, cmd });
1359
- const isResume = cmd.includes("--resume");
1360
- const out = isResume ? (opts.onResume ?? "") : (opts.onCreate ?? "");
1361
- const code = isResume ? (opts.resumeCode ?? 0) : (opts.createCode ?? 0);
1362
- return { stdout: new Response(out).body, stderr: new Response("").body, exited: Promise.resolve(code) };
1363
- };
1364
- return { fn, calls };
1365
- }
1366
-
1367
- /** A non-success stream-json result with a session id + an arbitrary error message. */
1368
- function failTurn(sessionId: string, message: string): string {
1369
- return ndjson(
1370
- { type: "system", subtype: "init", session_id: sessionId, apiKeySource: "none" },
1371
- { type: "result", subtype: "error_during_execution", is_error: true, result: message, session_id: sessionId },
1372
- );
1373
- }
1374
-
1375
- describe("ProgrammaticBackend.deliver — session-expiry → fresh-create fallback (#132)", () => {
1376
- test("resume → session-not-found → fresh --session-id create succeeds; NEW id returned; two spawns", async () => {
1377
- mkDirs("fallback-ok");
1378
- const { fn, calls } = flagBranchingSpawn({
1379
- // The --resume turn fails with claude's session-not-found error…
1380
- onResume: failTurn("old-uuid", "No conversation found with session ID: old-uuid"),
1381
- // …and a fresh --session-id create turn SUCCEEDS (claude echoes the create id).
1382
- onCreate: successTurn("fresh-created-uuid", "recovered after fresh create"),
1383
- });
1384
- const backend = new ProgrammaticBackend(baseDeps(fn));
1385
- const handle = await backend.start(specWithVault("eng"));
1386
-
1387
- const result = await backend.deliver(handle, "continue please", resumeSession("old-uuid"));
1388
-
1389
- expect(result.ok).toBe(true);
1390
- if (result.ok) {
1391
- expect(result.reply).toBe("recovered after fresh create");
1392
- // The sessionId is the NEW (create) id echoed by claude — NOT the dead "old-uuid".
1393
- expect(result.sessionId).toBe("fresh-created-uuid");
1394
- expect(result.sessionId).not.toBe("old-uuid");
1395
- }
1396
-
1397
- // TWO spawns: first --resume old-uuid, then --session-id <fresh> (a DIFFERENT uuid).
1398
- expect(calls).toHaveLength(2);
1399
- expect(calls[0]!.cmd).toContain("--resume old-uuid");
1400
- expect(calls[0]!.cmd).not.toContain("--session-id");
1401
- expect(calls[1]!.cmd).toContain("--session-id");
1402
- expect(calls[1]!.cmd).not.toContain("--resume");
1403
- // The fresh create id is a generated uuid — present, and NOT the dead old one.
1404
- const createIdx = calls[1]!.argv[2]!.indexOf("--session-id ");
1405
- const freshId = calls[1]!.argv[2]!.slice(createIdx + "--session-id ".length).split(/\s/)[0]!;
1406
- expect(freshId).not.toBe("old-uuid");
1407
- expect(freshId.length).toBeGreaterThan(0);
1408
- });
1409
-
1410
- test("a resume turn that fails with a NON-not-found (non-transient) error does NOT fall back", async () => {
1411
- mkDirs("fallback-no");
1412
- const { fn, calls } = flagBranchingSpawn({
1413
- // A generic, non-not-found, non-transient failure on the resume turn.
1414
- onResume: failTurn("old-uuid", "401 unauthorized: invalid token"),
1415
- onCreate: successTurn("would-not-be-used", "should never run"),
1416
- });
1417
- const backend = new ProgrammaticBackend(baseDeps(fn));
1418
- const handle = await backend.start(specWithVault("eng"));
1419
-
1420
- const result = await backend.deliver(handle, "continue", resumeSession("old-uuid"));
1421
- expect(result.ok).toBe(false);
1422
- if (!result.ok) expect(result.error).toContain("401 unauthorized");
1423
- // ONE spawn only — no fresh-create fallback for a non-not-found failure.
1424
- expect(calls).toHaveLength(1);
1425
- expect(calls[0]!.cmd).toContain("--resume old-uuid");
1426
- });
1427
-
1428
- test("session-not-found on a CREATE turn does NOT loop (the session.resume guard prevents the fallback)", async () => {
1429
- mkDirs("fallback-create");
1430
- // A CREATE turn (resume:false) that itself returns a not-found error. The fallback
1431
- // is guarded on session.resume, so a create's not-found can never re-trigger it.
1432
- const { fn, calls } = recordingSpawn({
1433
- stdout: failTurn("sess-CREATE", "No conversation found with session ID: sess-CREATE"),
1434
- });
1435
- const backend = new ProgrammaticBackend(baseDeps(fn));
1436
- const handle = await backend.start(specWithVault("eng"));
1437
-
1438
- const result = await backend.deliver(handle, "first turn", createSession("sess-CREATE"));
1439
- expect(result.ok).toBe(false);
1440
- if (!result.ok) expect(result.error).toContain("No conversation found");
1441
- // EXACTLY one spawn — the guard prevented any fallback create.
1442
- expect(calls).toHaveLength(1);
1443
- });
1444
- });
1445
-
1446
- // ---------------------------------------------------------------------------
1447
- // INBOUND FILE ATTACHMENTS (Phase 1) — the programmatic backend stages each
1448
- // attached file into the agent's PRIVATE session workspace (under a SAFE
1449
- // basename, no traversal) and appends a pointer line to the turn prompt so the
1450
- // `claude -p` turn can Read it. Best-effort + isolated per-file.
1451
- // ---------------------------------------------------------------------------
1452
-
1453
- /**
1454
- * A fetch fake that serves BOTH the mint hub (POST → a token) AND vault storage
1455
- * blobs (GET .../api/storage/<path> → bytes). `blobs` maps a storage path → the
1456
- * byte body to return; a path not in the map 404s.
1457
- */
1458
- function mintAndBlobFetch(blobs: Record<string, string> = {}): {
1459
- fetchFn: typeof fetch;
1460
- blobCalls: Array<{ url: string; auth: string | undefined }>;
1461
- } {
1462
- let n = 0;
1463
- const blobCalls: Array<{ url: string; auth: string | undefined }> = [];
1464
- const fetchFn = (async (url: string | URL | Request, init?: RequestInit) => {
1465
- const u = String(url);
1466
- const method = (init?.method ?? "GET").toUpperCase();
1467
- if (u.includes("/api/storage/")) {
1468
- const auth = (init?.headers as Record<string, string> | undefined)?.authorization;
1469
- blobCalls.push({ url: u, auth });
1470
- const enc = u.split("/api/storage/")[1] ?? "";
1471
- const path = decodeURIComponent(enc);
1472
- const body = blobs[path];
1473
- if (body === undefined) return new Response("not found", { status: 404 });
1474
- return new Response(body, { status: 200, headers: { "content-type": "application/octet-stream" } });
1475
- }
1476
- if (method === "POST") {
1477
- const body = JSON.parse(String(init?.body ?? "{}")) as { scope: string };
1478
- n += 1;
1479
- return new Response(
1480
- JSON.stringify({ jti: `j${n}`, token: `TOK-${n}`, expires_at: "2026-09-01T00:00:00Z", scope: body.scope }),
1481
- { status: 200, headers: { "content-type": "application/json" } },
1482
- );
1483
- }
1484
- return new Response("unexpected", { status: 500 });
1485
- }) as unknown as typeof fetch;
1486
- return { fetchFn, blobCalls };
1487
- }
1488
-
1489
- function att(path: string, mimeType: string, filename?: string): InboundAttachment {
1490
- return { path, mimeType, filename: filename ?? path.split("/").pop()! };
1491
- }
1492
-
1493
- describe("safeAttachmentBasename — path-traversal sanitization (security)", () => {
1494
- test("strips directory components + traversal markers to a plain basename", () => {
1495
- expect(safeAttachmentBasename("../../etc/passwd")).toBe("passwd");
1496
- expect(safeAttachmentBasename("/abs/path/report.png")).toBe("report.png");
1497
- expect(safeAttachmentBasename("a/b/c/note.md")).toBe("note.md");
1498
- expect(safeAttachmentBasename("..\\..\\windows\\system32\\cmd.exe")).toBe("cmd.exe");
1499
- });
1500
- test("collapses disallowed chars to underscore + strips leading dots", () => {
1501
- expect(safeAttachmentBasename(".hidden")).toBe("hidden");
1502
- expect(safeAttachmentBasename("a b.png")).toBe("a_b.png");
1503
- });
1504
- test("degenerate input → a stable non-empty default", () => {
1505
- expect(safeAttachmentBasename("")).toBe("file");
1506
- expect(safeAttachmentBasename("..")).toBe("file");
1507
- expect(safeAttachmentBasename("/")).toBe("file");
1508
- expect(safeAttachmentBasename("./")).toBe("file");
1509
- });
1510
- });
1511
-
1512
- describe("ProgrammaticBackend.deliver — inbound attachment staging", () => {
1513
- test("stages each blob into the PRIVATE workspace under a safe basename + appends the prompt line", async () => {
1514
- mkDirs("att-stage");
1515
- const { fn, calls } = recordingSpawn({ stdout: successTurn("sess-A", "looked at the files") });
1516
- const { fetchFn, blobCalls } = mintAndBlobFetch({
1517
- "2026-06-24/abc.png": "PNGBYTES",
1518
- "2026-06-24/def.txt": "hello world",
1519
- });
1520
- const backend = new ProgrammaticBackend(baseDeps(fn, { fetchFn }));
1521
- const handle = await backend.start(specWithVault("eng"));
1522
-
1523
- const result = await backend.deliver(
1524
- handle,
1525
- "what is in these",
1526
- createSession("sess-A"),
1527
- undefined,
1528
- [att("2026-06-24/abc.png", "image/png"), att("2026-06-24/def.txt", "text/plain")],
1529
- );
1530
- expect(result.ok).toBe(true);
1531
-
1532
- // Both blobs fetched from the storage endpoint, Bearer the per-turn minted vault token.
1533
- expect(blobCalls).toHaveLength(2);
1534
- for (const c of blobCalls) {
1535
- expect(c.url).toContain("/vault/default/api/storage/");
1536
- expect(c.auth).toMatch(/^Bearer TOK-/);
1537
- }
1538
-
1539
- // Both files staged into the PRIVATE session workspace's attachments/ subdir.
1540
- const stagingDir = join(sessionsDir, "eng", ATTACHMENT_STAGING_DIR);
1541
- expect(existsSync(join(stagingDir, "abc.png"))).toBe(true);
1542
- expect(existsSync(join(stagingDir, "def.txt"))).toBe(true);
1543
- expect(readFileSync(join(stagingDir, "abc.png"), "utf-8")).toBe("PNGBYTES");
1544
- expect(readFileSync(join(stagingDir, "def.txt"), "utf-8")).toBe("hello world");
1545
-
1546
- // The staged paths are WITHIN the private workspace.
1547
- expect(join(stagingDir, "abc.png").startsWith(join(sessionsDir, "eng"))).toBe(true);
1548
-
1549
- // The turn prompt (argv) carries the attachment pointer line with the staged
1550
- // absolute paths + mime types, appended after the original message.
1551
- const cmd = calls[0]!.argv[2]!;
1552
- expect(cmd).toContain("Attached files");
1553
- expect(cmd).toContain(join(stagingDir, "abc.png"));
1554
- expect(cmd).toContain("image/png");
1555
- expect(cmd).toContain(join(stagingDir, "def.txt"));
1556
- expect(cmd).toContain("text/plain");
1557
- expect(cmd).toContain("what is in these");
1558
- });
1559
-
1560
- test("a malicious filename is staged as a SAFE basename inside the staging dir, never outside (security)", async () => {
1561
- mkDirs("att-traversal");
1562
- const { fn, calls } = recordingSpawn({ stdout: successTurn("sess-T", "ok") });
1563
- // The blob's STORAGE PATH (what we fetch) is benign; the FILENAME is the attack vector.
1564
- const { fetchFn } = mintAndBlobFetch({ "2026-06-24/legit.bin": "EVILBYTES" });
1565
- const backend = new ProgrammaticBackend(baseDeps(fn, { fetchFn }));
1566
- const handle = await backend.start(specWithVault("eng"));
1567
-
1568
- const result = await backend.deliver(
1569
- handle,
1570
- "stage this",
1571
- createSession("sess-T"),
1572
- undefined,
1573
- [att("2026-06-24/legit.bin", "application/octet-stream", "../../../../tmp/pwned.sh")],
1574
- );
1575
- expect(result.ok).toBe(true);
1576
-
1577
- const stagingDir = join(sessionsDir, "eng", ATTACHMENT_STAGING_DIR);
1578
- // Landed INSIDE the staging dir under a sanitized basename, NOT at /tmp/pwned.sh.
1579
- expect(existsSync(join(stagingDir, "pwned.sh"))).toBe(true);
1580
- expect(existsSync("/tmp/pwned.sh")).toBe(false);
1581
- const cmd = calls[0]!.argv[2]!;
1582
- expect(cmd).toContain(join(stagingDir, "pwned.sh"));
1583
- expect(cmd).not.toContain("/tmp/pwned.sh");
1584
- });
1585
-
1586
- test("a malicious storage PATH (no separate filename) also sanitizes to a basename in the staging dir", async () => {
1587
- mkDirs("att-pathtraversal");
1588
- const { fn } = recordingSpawn({ stdout: successTurn("sess-P", "ok") });
1589
- // The path the daemon hands us IS the attack; we fetch it verbatim but stage by basename.
1590
- const traversal = "../../../../tmp/evil.sh";
1591
- const { fetchFn } = mintAndBlobFetch({ [traversal]: "X" });
1592
- const backend = new ProgrammaticBackend(baseDeps(fn, { fetchFn }));
1593
- const handle = await backend.start(specWithVault("eng"));
1594
-
1595
- const result = await backend.deliver(handle, "x", createSession("sess-P"), undefined, [
1596
- { path: traversal, mimeType: "text/plain", filename: traversal },
1597
- ]);
1598
- expect(result.ok).toBe(true);
1599
- const stagingDir = join(sessionsDir, "eng", ATTACHMENT_STAGING_DIR);
1600
- expect(existsSync(join(stagingDir, "evil.sh"))).toBe(true);
1601
- expect(existsSync("/tmp/evil.sh")).toBe(false);
1602
- });
1603
-
1604
- test("a single blob fetch failure is isolated — the other file stages + the turn runs", async () => {
1605
- mkDirs("att-isolated");
1606
- const { fn, calls } = recordingSpawn({ stdout: successTurn("sess-I", "partial") });
1607
- const { fetchFn } = mintAndBlobFetch({ "2026-06-24/good.png": "GOODBYTES" });
1608
- const backend = new ProgrammaticBackend(baseDeps(fn, { fetchFn }));
1609
- const handle = await backend.start(specWithVault("eng"));
1610
-
1611
- const result = await backend.deliver(
1612
- handle,
1613
- "two files",
1614
- createSession("sess-I"),
1615
- undefined,
1616
- [att("2026-06-24/missing.png", "image/png"), att("2026-06-24/good.png", "image/png")],
1617
- );
1618
- expect(result.ok).toBe(true);
1619
-
1620
- const stagingDir = join(sessionsDir, "eng", ATTACHMENT_STAGING_DIR);
1621
- expect(existsSync(join(stagingDir, "good.png"))).toBe(true);
1622
- expect(existsSync(join(stagingDir, "missing.png"))).toBe(false);
1623
- const cmd = calls[0]!.argv[2]!;
1624
- expect(cmd).toContain(join(stagingDir, "good.png"));
1625
- expect(cmd).not.toContain("missing.png");
1626
- });
1627
-
1628
- test("NO attachments → identical behavior to today (no staging dir, no prompt change)", async () => {
1629
- mkDirs("att-none");
1630
- const { fn, calls } = recordingSpawn({ stdout: successTurn("sess-N", "plain reply") });
1631
- const backend = new ProgrammaticBackend(baseDeps(fn));
1632
- const handle = await backend.start(specWithVault("eng"));
1633
-
1634
- const result = await backend.deliver(handle, "no files here", createSession("sess-N"));
1635
- expect(result.ok).toBe(true);
1636
- expect(existsSync(join(sessionsDir, "eng", ATTACHMENT_STAGING_DIR))).toBe(false);
1637
- const cmd = calls[0]!.argv[2]!;
1638
- expect(cmd).not.toContain("Attached files");
1639
- expect(cmd).toContain("no files here");
1640
- });
1641
-
1642
- test("an empty attachments array → no staging, no prompt change", async () => {
1643
- mkDirs("att-empty");
1644
- const { fn, calls } = recordingSpawn({ stdout: successTurn("sess-E", "x") });
1645
- const backend = new ProgrammaticBackend(baseDeps(fn));
1646
- const handle = await backend.start(specWithVault("eng"));
1647
-
1648
- const result = await backend.deliver(handle, "hello", createSession("sess-E"), undefined, []);
1649
- expect(result.ok).toBe(true);
1650
- expect(existsSync(join(sessionsDir, "eng", ATTACHMENT_STAGING_DIR))).toBe(false);
1651
- expect(calls[0]!.argv[2]!).not.toContain("Attached files");
1652
- });
1653
-
1654
- test("caps the number of staged attachments at ATTACHMENT_MAX_COUNT", async () => {
1655
- mkDirs("att-cap");
1656
- const { fn } = recordingSpawn({ stdout: successTurn("sess-C", "ok") });
1657
- // Build MAX_COUNT + 5 fetchable blobs + matching attachment refs.
1658
- const blobs: Record<string, string> = {};
1659
- const refs: InboundAttachment[] = [];
1660
- const total = ATTACHMENT_MAX_COUNT + 5;
1661
- for (let i = 0; i < total; i++) {
1662
- const p = `2026-06-24/f${i}.bin`;
1663
- blobs[p] = `B${i}`;
1664
- refs.push(att(p, "application/octet-stream"));
1665
- }
1666
- const { fetchFn, blobCalls } = mintAndBlobFetch(blobs);
1667
- const backend = new ProgrammaticBackend(baseDeps(fn, { fetchFn }));
1668
- const handle = await backend.start(specWithVault("eng"));
1669
-
1670
- const result = await backend.deliver(handle, "many", createSession("sess-C"), undefined, refs);
1671
- expect(result.ok).toBe(true);
1672
-
1673
- // Only the first MAX_COUNT were fetched + staged; the overflow was dropped.
1674
- expect(blobCalls).toHaveLength(ATTACHMENT_MAX_COUNT);
1675
- const stagingDir = join(sessionsDir, "eng", ATTACHMENT_STAGING_DIR);
1676
- expect(existsSync(join(stagingDir, `f0.bin`))).toBe(true);
1677
- expect(existsSync(join(stagingDir, `f${ATTACHMENT_MAX_COUNT - 1}.bin`))).toBe(true);
1678
- expect(existsSync(join(stagingDir, `f${ATTACHMENT_MAX_COUNT}.bin`))).toBe(false);
1679
- });
1680
-
1681
- test("an agent that binds NO vault → attachments skipped (no token to fetch), turn still runs", async () => {
1682
- mkDirs("att-novault");
1683
- const { fn, calls } = recordingSpawn({ stdout: successTurn("sess-V", "no vault reply") });
1684
- // A fetch that 500s on any blob — proving we NEVER reach it (no vault → no fetch).
1685
- const { fetchFn, blobCalls } = mintAndBlobFetch({});
1686
- const backend = new ProgrammaticBackend(baseDeps(fn, { fetchFn }));
1687
- // Spec with channels but NO vault binding.
1688
- const handle = await backend.start({ name: "eng", channels: ["eng"] } as AgentSpec);
1689
-
1690
- const result = await backend.deliver(handle, "file but no vault", createSession("sess-V"), undefined, [
1691
- att("2026-06-24/x.png", "image/png"),
1692
- ]);
1693
- expect(result.ok).toBe(true);
1694
- // No blob fetched, no staging dir, no prompt pointer — the turn ran with text only.
1695
- expect(blobCalls).toHaveLength(0);
1696
- expect(existsSync(join(sessionsDir, "eng", ATTACHMENT_STAGING_DIR))).toBe(false);
1697
- expect(calls[0]!.argv[2]!).not.toContain("Attached files");
1698
- });
1699
-
1700
- test("all attachments failing → NO empty staging dir left behind", async () => {
1701
- mkDirs("att-allfail");
1702
- const { fn } = recordingSpawn({ stdout: successTurn("sess-F", "ok") });
1703
- const { fetchFn } = mintAndBlobFetch({}); // every blob 404s
1704
- const backend = new ProgrammaticBackend(baseDeps(fn, { fetchFn }));
1705
- const handle = await backend.start(specWithVault("eng"));
1706
-
1707
- const result = await backend.deliver(handle, "all bad", createSession("sess-F"), undefined, [
1708
- att("2026-06-24/a.png", "image/png"),
1709
- att("2026-06-24/b.png", "image/png"),
1710
- ]);
1711
- expect(result.ok).toBe(true);
1712
- // No file staged → the lazy mkdir never fired → no empty attachments/ dir.
1713
- expect(existsSync(join(sessionsDir, "eng", ATTACHMENT_STAGING_DIR))).toBe(false);
1714
- });
1715
- });