@os-eco/overstory-cli 0.9.4 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/README.md +50 -19
  2. package/agents/builder.md +19 -9
  3. package/agents/coordinator.md +6 -6
  4. package/agents/lead.md +204 -87
  5. package/agents/merger.md +25 -14
  6. package/agents/reviewer.md +22 -16
  7. package/agents/scout.md +17 -12
  8. package/package.json +6 -3
  9. package/src/agents/capabilities.test.ts +85 -0
  10. package/src/agents/capabilities.ts +125 -0
  11. package/src/agents/headless-mail-injector.test.ts +448 -0
  12. package/src/agents/headless-mail-injector.ts +219 -0
  13. package/src/agents/headless-prompt.test.ts +102 -0
  14. package/src/agents/headless-prompt.ts +68 -0
  15. package/src/agents/hooks-deployer.test.ts +514 -14
  16. package/src/agents/hooks-deployer.ts +141 -0
  17. package/src/agents/mail-poll-detect.test.ts +153 -0
  18. package/src/agents/mail-poll-detect.ts +73 -0
  19. package/src/agents/overlay.test.ts +60 -4
  20. package/src/agents/overlay.ts +63 -8
  21. package/src/agents/scope-detect.test.ts +190 -0
  22. package/src/agents/scope-detect.ts +146 -0
  23. package/src/agents/turn-lock.test.ts +181 -0
  24. package/src/agents/turn-lock.ts +235 -0
  25. package/src/agents/turn-runner-dispatch.test.ts +182 -0
  26. package/src/agents/turn-runner-dispatch.ts +105 -0
  27. package/src/agents/turn-runner.test.ts +2312 -0
  28. package/src/agents/turn-runner.ts +1383 -0
  29. package/src/commands/agents.ts +9 -0
  30. package/src/commands/clean.ts +54 -0
  31. package/src/commands/coordinator.test.ts +254 -0
  32. package/src/commands/coordinator.ts +273 -8
  33. package/src/commands/dashboard.test.ts +188 -0
  34. package/src/commands/dashboard.ts +14 -4
  35. package/src/commands/doctor.ts +3 -1
  36. package/src/commands/group.test.ts +94 -0
  37. package/src/commands/group.ts +49 -20
  38. package/src/commands/init.test.ts +8 -0
  39. package/src/commands/init.ts +8 -1
  40. package/src/commands/log.test.ts +187 -11
  41. package/src/commands/log.ts +171 -71
  42. package/src/commands/mail.test.ts +162 -0
  43. package/src/commands/mail.ts +64 -9
  44. package/src/commands/merge.test.ts +230 -1
  45. package/src/commands/merge.ts +68 -12
  46. package/src/commands/nudge.test.ts +351 -4
  47. package/src/commands/nudge.ts +356 -34
  48. package/src/commands/run.test.ts +43 -7
  49. package/src/commands/serve/build.test.ts +202 -0
  50. package/src/commands/serve/build.ts +206 -0
  51. package/src/commands/serve/coordinator-actions.test.ts +339 -0
  52. package/src/commands/serve/coordinator-actions.ts +408 -0
  53. package/src/commands/serve/dev.test.ts +168 -0
  54. package/src/commands/serve/dev.ts +117 -0
  55. package/src/commands/serve/mail-actions.test.ts +312 -0
  56. package/src/commands/serve/mail-actions.ts +167 -0
  57. package/src/commands/serve/rest.test.ts +1323 -0
  58. package/src/commands/serve/rest.ts +708 -0
  59. package/src/commands/serve/static.ts +51 -0
  60. package/src/commands/serve/ws.test.ts +361 -0
  61. package/src/commands/serve/ws.ts +332 -0
  62. package/src/commands/serve.test.ts +459 -0
  63. package/src/commands/serve.ts +565 -0
  64. package/src/commands/sling.test.ts +177 -1
  65. package/src/commands/sling.ts +243 -71
  66. package/src/commands/status.test.ts +9 -0
  67. package/src/commands/status.ts +12 -4
  68. package/src/commands/stop.test.ts +255 -1
  69. package/src/commands/stop.ts +107 -8
  70. package/src/commands/watch.test.ts +43 -0
  71. package/src/commands/watch.ts +153 -28
  72. package/src/config.ts +23 -0
  73. package/src/doctor/consistency.test.ts +106 -0
  74. package/src/doctor/consistency.ts +48 -1
  75. package/src/doctor/serve.test.ts +95 -0
  76. package/src/doctor/serve.ts +86 -0
  77. package/src/doctor/types.ts +2 -1
  78. package/src/doctor/watchdog.ts +57 -1
  79. package/src/events/tailer.test.ts +234 -1
  80. package/src/events/tailer.ts +90 -0
  81. package/src/index.ts +57 -6
  82. package/src/insights/quality-gates.test.ts +141 -0
  83. package/src/insights/quality-gates.ts +156 -0
  84. package/src/json.ts +29 -0
  85. package/src/logging/theme.ts +4 -0
  86. package/src/mail/client.ts +15 -2
  87. package/src/mail/store.test.ts +82 -0
  88. package/src/mail/store.ts +41 -4
  89. package/src/merge/lock.test.ts +149 -0
  90. package/src/merge/lock.ts +140 -0
  91. package/src/merge/predict.test.ts +387 -0
  92. package/src/merge/predict.ts +249 -0
  93. package/src/merge/resolver.ts +1 -1
  94. package/src/mulch/client.ts +3 -3
  95. package/src/runtimes/__fixtures__/claude-stream-fixture.ts +22 -0
  96. package/src/runtimes/claude.test.ts +791 -1
  97. package/src/runtimes/claude.ts +323 -1
  98. package/src/runtimes/connections.test.ts +141 -1
  99. package/src/runtimes/connections.ts +73 -4
  100. package/src/runtimes/headless-connection.test.ts +264 -0
  101. package/src/runtimes/headless-connection.ts +158 -0
  102. package/src/runtimes/types.ts +10 -0
  103. package/src/schema-consistency.test.ts +1 -0
  104. package/src/sessions/store.test.ts +657 -29
  105. package/src/sessions/store.ts +286 -23
  106. package/src/test-setup.test.ts +31 -0
  107. package/src/test-setup.ts +28 -0
  108. package/src/types.ts +107 -2
  109. package/src/utils/pid.test.ts +85 -1
  110. package/src/utils/pid.ts +86 -1
  111. package/src/utils/process-scan.test.ts +53 -0
  112. package/src/utils/process-scan.ts +76 -0
  113. package/src/watchdog/daemon.test.ts +1607 -376
  114. package/src/watchdog/daemon.ts +462 -88
  115. package/src/watchdog/health.test.ts +282 -0
  116. package/src/watchdog/health.ts +126 -27
  117. package/src/worktree/manager.test.ts +218 -1
  118. package/src/worktree/manager.ts +55 -0
  119. package/src/worktree/process.test.ts +71 -0
  120. package/src/worktree/process.ts +25 -5
  121. package/src/worktree/tmux.test.ts +28 -0
  122. package/src/worktree/tmux.ts +27 -3
  123. package/templates/CLAUDE.md.tmpl +19 -8
  124. package/templates/overlay.md.tmpl +5 -2
@@ -0,0 +1,2312 @@
1
+ import { afterEach, beforeEach, describe, expect, test } from "bun:test";
2
+ import { existsSync } from "node:fs";
3
+ import { mkdtemp, rm } from "node:fs/promises";
4
+ import { tmpdir } from "node:os";
5
+ import { join } from "node:path";
6
+ import { createEventStore } from "../events/store.ts";
7
+ import { createMailClient } from "../mail/client.ts";
8
+ import { createMailStore } from "../mail/store.ts";
9
+ import { ClaudeRuntime } from "../runtimes/claude.ts";
10
+ import type { AgentRuntime, DirectSpawnOpts } from "../runtimes/types.ts";
11
+ import { createSessionStore } from "../sessions/store.ts";
12
+ import type { AgentSession, ResolvedModel } from "../types.ts";
13
+ import { _resetInProcessLocks, readTurnLock } from "./turn-lock.ts";
14
+ import {
15
+ type RunnerLogger,
16
+ runTurn,
17
+ type TurnSpawnFn,
18
+ type TurnSubprocess,
19
+ } from "./turn-runner.ts";
20
+
21
+ // ---------- fake subprocess plumbing ----------
22
+
23
+ interface FakeProc extends TurnSubprocess {
24
+ _writes: string[];
25
+ _killSignals: Array<string | number | undefined>;
26
+ _killed: boolean;
27
+ _pushLine(line: string): void;
28
+ _closeStdout(): void;
29
+ _exit(code: number | null): void;
30
+ _setStderr(stream: ReadableStream<Uint8Array> | null): void;
31
+ stderr?: ReadableStream<Uint8Array> | null;
32
+ }
33
+
34
+ let fakeProcCounter = 1000;
35
+
36
+ function makeFakeProc(): FakeProc {
37
+ let stdoutController!: ReadableStreamDefaultController<Uint8Array>;
38
+ const stdout = new ReadableStream<Uint8Array>({
39
+ start(c) {
40
+ stdoutController = c;
41
+ },
42
+ });
43
+ let stdoutClosed = false;
44
+ const closeStdout = (): void => {
45
+ if (stdoutClosed) return;
46
+ stdoutClosed = true;
47
+ try {
48
+ stdoutController.close();
49
+ } catch {
50
+ // already closed
51
+ }
52
+ };
53
+
54
+ const writes: string[] = [];
55
+
56
+ let resolveExited!: (code: number | null) => void;
57
+ const exited = new Promise<number | null>((resolve) => {
58
+ resolveExited = resolve;
59
+ });
60
+ let exitedDone = false;
61
+ const finishExit = (code: number | null): void => {
62
+ if (exitedDone) return;
63
+ exitedDone = true;
64
+ resolveExited(code);
65
+ };
66
+
67
+ const killSignals: Array<string | number | undefined> = [];
68
+ let killed = false;
69
+
70
+ const proc: FakeProc = {
71
+ pid: fakeProcCounter++,
72
+ stdin: {
73
+ write(data: string | Uint8Array): number {
74
+ const s = typeof data === "string" ? data : new TextDecoder().decode(data);
75
+ writes.push(s);
76
+ return s.length;
77
+ },
78
+ end(): void {
79
+ // no-op for fakes; production Bun.spawn closes the pipe.
80
+ },
81
+ },
82
+ stdout,
83
+ exited,
84
+ kill(signal?: string | number): void {
85
+ killSignals.push(signal);
86
+ if (killed) return;
87
+ killed = true;
88
+ closeStdout();
89
+ finishExit(null);
90
+ },
91
+ _writes: writes,
92
+ _killSignals: killSignals,
93
+ _killed: false,
94
+ _pushLine(line: string): void {
95
+ if (stdoutClosed) return;
96
+ stdoutController.enqueue(new TextEncoder().encode(`${line}\n`));
97
+ },
98
+ _closeStdout: closeStdout,
99
+ _exit(code: number | null): void {
100
+ closeStdout();
101
+ finishExit(code);
102
+ },
103
+ _setStderr(stream: ReadableStream<Uint8Array> | null): void {
104
+ proc.stderr = stream;
105
+ },
106
+ stderr: null,
107
+ };
108
+ Object.defineProperty(proc, "_killed", {
109
+ get: () => killed,
110
+ });
111
+ return proc;
112
+ }
113
+
114
+ function emitFakeTurn(
115
+ proc: FakeProc,
116
+ opts: { sessionId?: string; isError?: boolean; durationMs?: number },
117
+ ): void {
118
+ const sessionId = opts.sessionId ?? "session-test";
119
+ proc._pushLine(
120
+ JSON.stringify({
121
+ type: "system",
122
+ subtype: "init",
123
+ session_id: sessionId,
124
+ model: "claude-test",
125
+ }),
126
+ );
127
+ proc._pushLine(
128
+ JSON.stringify({
129
+ type: "result",
130
+ subtype: "success",
131
+ session_id: sessionId,
132
+ result: "done",
133
+ is_error: opts.isError ?? false,
134
+ duration_ms: opts.durationMs ?? 50,
135
+ num_turns: 1,
136
+ }),
137
+ );
138
+ }
139
+
140
+ // ---------- runtime spy ----------
141
+
142
+ function makeSpyRuntime(): {
143
+ runtime: AgentRuntime;
144
+ spawnCalls: Array<DirectSpawnOpts & { resumeSessionId?: string | null }>;
145
+ } {
146
+ const calls: Array<DirectSpawnOpts & { resumeSessionId?: string | null }> = [];
147
+ const base = new ClaudeRuntime();
148
+ const original = base.buildDirectSpawn.bind(base);
149
+ // Patch the instance to capture each call's opts (including the future
150
+ // resumeSessionId field that turn-runner threads through).
151
+ (base as unknown as { buildDirectSpawn: typeof original }).buildDirectSpawn = (
152
+ opts: DirectSpawnOpts,
153
+ ) => {
154
+ calls.push({ ...(opts as DirectSpawnOpts & { resumeSessionId?: string | null }) });
155
+ return original(opts);
156
+ };
157
+ return { runtime: base, spawnCalls: calls };
158
+ }
159
+
160
+ // ---------- session bootstrap ----------
161
+
162
+ function seedSession(
163
+ sessionsDbPath: string,
164
+ overrides: Partial<AgentSession> & Pick<AgentSession, "agentName">,
165
+ ): void {
166
+ const store = createSessionStore(sessionsDbPath);
167
+ try {
168
+ const now = new Date().toISOString();
169
+ store.upsert({
170
+ id: `session-${overrides.agentName}`,
171
+ agentName: overrides.agentName,
172
+ capability: overrides.capability ?? "builder",
173
+ worktreePath: overrides.worktreePath ?? "/tmp/worktree",
174
+ branchName: overrides.branchName ?? "branch",
175
+ taskId: overrides.taskId ?? "task-test",
176
+ tmuxSession: overrides.tmuxSession ?? "",
177
+ state: overrides.state ?? "booting",
178
+ pid: overrides.pid ?? null,
179
+ parentAgent: overrides.parentAgent ?? null,
180
+ depth: overrides.depth ?? 0,
181
+ runId: overrides.runId ?? null,
182
+ startedAt: overrides.startedAt ?? now,
183
+ lastActivity: overrides.lastActivity ?? now,
184
+ escalationLevel: overrides.escalationLevel ?? 0,
185
+ stalledSince: overrides.stalledSince ?? null,
186
+ transcriptPath: overrides.transcriptPath ?? null,
187
+ ...(overrides.promptVersion !== undefined ? { promptVersion: overrides.promptVersion } : {}),
188
+ ...(overrides.claudeSessionId !== undefined
189
+ ? { claudeSessionId: overrides.claudeSessionId }
190
+ : {}),
191
+ });
192
+ } finally {
193
+ store.close();
194
+ }
195
+ }
196
+
197
+ function readSession(sessionsDbPath: string, agentName: string): AgentSession | null {
198
+ const store = createSessionStore(sessionsDbPath);
199
+ try {
200
+ return store.getByName(agentName);
201
+ } finally {
202
+ store.close();
203
+ }
204
+ }
205
+
206
+ // ---------- shared fixture context ----------
207
+
208
+ /**
209
+ * Silent diagnostic sink for tests that don't assert on logs. Suppresses the
210
+ * `[turn-runner:error]` stderr mirror so contract-violation messages
211
+ * (overstory-6071) — which are expected for many tests that drive a clean
212
+ * exit without seeding terminal mail — don't pollute the test runner output.
213
+ */
214
+ const silentLogger: RunnerLogger = () => {};
215
+
216
+ interface Ctx {
217
+ overstoryDir: string;
218
+ worktreePath: string;
219
+ projectRoot: string;
220
+ mailDbPath: string;
221
+ eventsDbPath: string;
222
+ sessionsDbPath: string;
223
+ }
224
+
225
+ const RESOLVED_MODEL: ResolvedModel = { model: "sonnet", env: {}, isExplicitOverride: false };
226
+
227
+ function makeRunOpts(
228
+ ctx: Ctx,
229
+ agentName: string,
230
+ overrides: {
231
+ runtime: AgentRuntime;
232
+ userTurnNdjson?: string;
233
+ _spawnFn?: TurnSpawnFn;
234
+ abortSignal?: AbortSignal;
235
+ sigkillDelayMs?: number;
236
+ runId?: string | null;
237
+ capability?: string;
238
+ _logWarning?: RunnerLogger;
239
+ },
240
+ ): Parameters<typeof runTurn>[0] {
241
+ return {
242
+ agentName,
243
+ capability: overrides.capability ?? "builder",
244
+ overstoryDir: ctx.overstoryDir,
245
+ worktreePath: ctx.worktreePath,
246
+ projectRoot: ctx.projectRoot,
247
+ taskId: "task-test",
248
+ userTurnNdjson:
249
+ overrides.userTurnNdjson ??
250
+ `${JSON.stringify({
251
+ type: "user",
252
+ message: { role: "user", content: [{ type: "text", text: "hello" }] },
253
+ })}\n`,
254
+ runtime: overrides.runtime,
255
+ resolvedModel: RESOLVED_MODEL,
256
+ runId: overrides.runId ?? null,
257
+ mailDbPath: ctx.mailDbPath,
258
+ eventsDbPath: ctx.eventsDbPath,
259
+ sessionsDbPath: ctx.sessionsDbPath,
260
+ ...(overrides._spawnFn !== undefined ? { _spawnFn: overrides._spawnFn } : {}),
261
+ ...(overrides.abortSignal !== undefined ? { abortSignal: overrides.abortSignal } : {}),
262
+ ...(overrides.sigkillDelayMs !== undefined ? { sigkillDelayMs: overrides.sigkillDelayMs } : {}),
263
+ _logWarning: overrides._logWarning ?? silentLogger,
264
+ };
265
+ }
266
+
267
+ function turnPidPathFor(ctx: Ctx, agentName: string): string {
268
+ return join(ctx.overstoryDir, "agents", agentName, "turn.pid");
269
+ }
270
+
271
+ // ---------- tests ----------
272
+
273
+ describe("runTurn", () => {
274
+ let ctx: Ctx;
275
+
276
+ beforeEach(async () => {
277
+ const overstoryDir = await mkdtemp(join(tmpdir(), "overstory-turnrunner-test-"));
278
+ ctx = {
279
+ overstoryDir,
280
+ worktreePath: overstoryDir, // arbitrary; spawn is faked
281
+ projectRoot: overstoryDir,
282
+ mailDbPath: join(overstoryDir, "mail.db"),
283
+ eventsDbPath: join(overstoryDir, "events.db"),
284
+ sessionsDbPath: join(overstoryDir, "sessions.db"),
285
+ };
286
+ _resetInProcessLocks();
287
+ });
288
+
289
+ afterEach(async () => {
290
+ _resetInProcessLocks();
291
+ await rm(ctx.overstoryDir, { recursive: true, force: true });
292
+ });
293
+
294
+ test("empty userTurnNdjson is a no-op: no spawn, no state transition", async () => {
295
+ seedSession(ctx.sessionsDbPath, { agentName: "noop", state: "working" });
296
+ const { runtime } = makeSpyRuntime();
297
+ let spawnCount = 0;
298
+ const spawnFn: TurnSpawnFn = () => {
299
+ spawnCount++;
300
+ return makeFakeProc();
301
+ };
302
+
303
+ const result = await runTurn(
304
+ makeRunOpts(ctx, "noop", { runtime, userTurnNdjson: "", _spawnFn: spawnFn }),
305
+ );
306
+
307
+ expect(spawnCount).toBe(0);
308
+ expect(result.exitCode).toBeNull();
309
+ expect(result.cleanResult).toBe(false);
310
+ expect(result.terminalMailObserved).toBe(false);
311
+ expect(result.durationMs).toBe(0);
312
+ expect(result.initialState).toBe("working");
313
+ expect(result.finalState).toBe("working");
314
+
315
+ // Session state must remain untouched.
316
+ const after = readSession(ctx.sessionsDbPath, "noop");
317
+ expect(after?.state).toBe("working");
318
+ });
319
+
320
+ test("happy path: spawn, drain events, capture session id, contract violation surfaces as completed", async () => {
321
+ seedSession(ctx.sessionsDbPath, { agentName: "alpha", state: "booting" });
322
+ const { runtime, spawnCalls } = makeSpyRuntime();
323
+
324
+ const fake = makeFakeProc();
325
+ const spawnFn: TurnSpawnFn = () => {
326
+ emitFakeTurn(fake, { sessionId: "claude-sess-A", isError: false });
327
+ fake._exit(0);
328
+ return fake;
329
+ };
330
+
331
+ // Suppress the contract-violation error log (overstory-6071) so it
332
+ // doesn't leak to test stderr; assertions below still cover the case.
333
+ const logger: RunnerLogger = () => {};
334
+ const result = await runTurn(
335
+ makeRunOpts(ctx, "alpha", { runtime, _spawnFn: spawnFn, _logWarning: logger }),
336
+ );
337
+
338
+ expect(result.exitCode).toBe(0);
339
+ expect(result.cleanResult).toBe(true);
340
+ expect(result.newSessionId).toBe("claude-sess-A");
341
+ expect(result.resumeMismatch).toBe(false);
342
+ expect(result.terminalMailObserved).toBe(false);
343
+ // initial=booting, clean exit but no terminal mail → contract violation,
344
+ // settles to `completed` (overstory-6071).
345
+ expect(result.initialState).toBe("booting");
346
+ expect(result.terminalMailMissing).toBe(true);
347
+ expect(result.finalState).toBe("completed");
348
+
349
+ const after = readSession(ctx.sessionsDbPath, "alpha");
350
+ expect(after?.state).toBe("completed");
351
+ expect(after?.claudeSessionId).toBe("claude-sess-A");
352
+
353
+ // resumeSessionId on first turn is null (no prior id stored).
354
+ expect(spawnCalls.length).toBe(1);
355
+ expect(spawnCalls[0]?.resumeSessionId ?? null).toBeNull();
356
+ });
357
+
358
+ test("re-reads claudeSessionId under the lock — caller view may be stale", async () => {
359
+ seedSession(ctx.sessionsDbPath, {
360
+ agentName: "stale",
361
+ state: "working",
362
+ claudeSessionId: "old-id",
363
+ });
364
+
365
+ // External update BEFORE the runTurn call. runTurn must read this value
366
+ // when it acquires the lock, not the older one any caller might be holding.
367
+ const updateStore = createSessionStore(ctx.sessionsDbPath);
368
+ try {
369
+ updateStore.updateClaudeSessionId("stale", "fresh-id");
370
+ } finally {
371
+ updateStore.close();
372
+ }
373
+
374
+ const { runtime, spawnCalls } = makeSpyRuntime();
375
+ const fake = makeFakeProc();
376
+ const spawnFn: TurnSpawnFn = () => {
377
+ emitFakeTurn(fake, { sessionId: "fresh-id" }); // same id back; no mismatch
378
+ fake._exit(0);
379
+ return fake;
380
+ };
381
+
382
+ const result = await runTurn(makeRunOpts(ctx, "stale", { runtime, _spawnFn: spawnFn }));
383
+
384
+ expect(spawnCalls[0]?.resumeSessionId).toBe("fresh-id");
385
+ expect(result.resumeMismatch).toBe(false);
386
+ });
387
+
388
+ test("resumeMismatch fires when stream-json emits a different session id", async () => {
389
+ seedSession(ctx.sessionsDbPath, {
390
+ agentName: "mismatch",
391
+ state: "working",
392
+ claudeSessionId: "want-resume",
393
+ });
394
+ const { runtime } = makeSpyRuntime();
395
+
396
+ const fake = makeFakeProc();
397
+ const spawnFn: TurnSpawnFn = () => {
398
+ emitFakeTurn(fake, { sessionId: "actually-new" });
399
+ fake._exit(0);
400
+ return fake;
401
+ };
402
+
403
+ const result = await runTurn(makeRunOpts(ctx, "mismatch", { runtime, _spawnFn: spawnFn }));
404
+
405
+ expect(result.newSessionId).toBe("actually-new");
406
+ expect(result.resumeMismatch).toBe(true);
407
+
408
+ // SessionStore overwritten with the observed value.
409
+ const after = readSession(ctx.sessionsDbPath, "mismatch");
410
+ expect(after?.claudeSessionId).toBe("actually-new");
411
+
412
+ // overstory-088b C2: a structured warn event lands in events.db so
413
+ // observability mirrors the runner diagnostic. Carries both the requested
414
+ // and observed session ids in the data payload.
415
+ const eventStore = createEventStore(ctx.eventsDbPath);
416
+ try {
417
+ const events = eventStore.getByAgent("mismatch");
418
+ const mismatchEvent = events.find((e) => {
419
+ if (e.eventType !== "custom" || e.level !== "warn" || !e.data) return false;
420
+ try {
421
+ const parsed = JSON.parse(e.data) as { type?: string };
422
+ return parsed.type === "resume_mismatch";
423
+ } catch {
424
+ return false;
425
+ }
426
+ });
427
+ expect(mismatchEvent).toBeDefined();
428
+ const payload = JSON.parse(mismatchEvent?.data ?? "{}") as {
429
+ type: string;
430
+ requestedSessionId: string;
431
+ observedSessionId: string;
432
+ };
433
+ expect(payload.requestedSessionId).toBe("want-resume");
434
+ expect(payload.observedSessionId).toBe("actually-new");
435
+ } finally {
436
+ eventStore.close();
437
+ }
438
+ });
439
+
440
+ test("resume match (sid === priorSessionId) does NOT emit a mismatch event", async () => {
441
+ seedSession(ctx.sessionsDbPath, {
442
+ agentName: "match",
443
+ state: "working",
444
+ claudeSessionId: "same-id",
445
+ });
446
+ const { runtime } = makeSpyRuntime();
447
+
448
+ const fake = makeFakeProc();
449
+ const spawnFn: TurnSpawnFn = () => {
450
+ emitFakeTurn(fake, { sessionId: "same-id" });
451
+ fake._exit(0);
452
+ return fake;
453
+ };
454
+
455
+ const result = await runTurn(makeRunOpts(ctx, "match", { runtime, _spawnFn: spawnFn }));
456
+ expect(result.resumeMismatch).toBe(false);
457
+
458
+ const eventStore = createEventStore(ctx.eventsDbPath);
459
+ try {
460
+ const events = eventStore.getByAgent("match");
461
+ const mismatchEvent = events.find((e) => e.data?.includes("resume_mismatch") ?? false);
462
+ expect(mismatchEvent).toBeUndefined();
463
+ } finally {
464
+ eventStore.close();
465
+ }
466
+ });
467
+
468
+ test("terminalMailObserved + clean exit → completed state", async () => {
469
+ seedSession(ctx.sessionsDbPath, { agentName: "wd", state: "working" });
470
+ const { runtime } = makeSpyRuntime();
471
+
472
+ // Pre-seed: a worker_done from a PRIOR turn (well in the past). Must not
473
+ // confuse this turn's snapshot.
474
+ const mailStore = createMailStore(ctx.mailDbPath);
475
+ try {
476
+ const client = createMailClient(mailStore);
477
+ client.sendProtocol({
478
+ from: "wd",
479
+ to: "lead",
480
+ subject: "Worker done: prior",
481
+ body: "old",
482
+ type: "worker_done",
483
+ priority: "normal",
484
+ payload: {
485
+ taskId: "old",
486
+ branch: "old",
487
+ exitCode: 0,
488
+ filesModified: [],
489
+ },
490
+ });
491
+ } finally {
492
+ mailStore.close();
493
+ }
494
+
495
+ // Simulate fresh worker_done sent during the spawn.
496
+ const fake = makeFakeProc();
497
+ const spawnFn: TurnSpawnFn = () => {
498
+ (async () => {
499
+ // Wait long enough for snapshot timestamp to be < this insert.
500
+ await Bun.sleep(20);
501
+ const s = createMailStore(ctx.mailDbPath);
502
+ try {
503
+ const c = createMailClient(s);
504
+ c.sendProtocol({
505
+ from: "wd",
506
+ to: "lead",
507
+ subject: "Worker done: this turn",
508
+ body: "new",
509
+ type: "worker_done",
510
+ priority: "normal",
511
+ payload: {
512
+ taskId: "this-turn",
513
+ branch: "branch",
514
+ exitCode: 0,
515
+ filesModified: [],
516
+ },
517
+ });
518
+ } finally {
519
+ s.close();
520
+ }
521
+ emitFakeTurn(fake, { sessionId: "wd-session" });
522
+ fake._exit(0);
523
+ })();
524
+ return fake;
525
+ };
526
+
527
+ const result = await runTurn(makeRunOpts(ctx, "wd", { runtime, _spawnFn: spawnFn }));
528
+
529
+ expect(result.terminalMailObserved).toBe(true);
530
+ expect(result.cleanResult).toBe(true);
531
+ expect(result.finalState).toBe("completed");
532
+
533
+ const after = readSession(ctx.sessionsDbPath, "wd");
534
+ expect(after?.state).toBe("completed");
535
+ });
536
+
537
+ test("turn that runs but does not complete settles to between_turns, not working (overstory-3087)", async () => {
538
+ // Spawn-per-turn substate split: a turn that produced events but did
539
+ // not deliver the terminal mail nor abort must end in `between_turns`
540
+ // so the UI can tell a worker waiting for its next mail batch from
541
+ // one mid-execution. Pre-3087 this settled to `working`.
542
+ seedSession(ctx.sessionsDbPath, { agentName: "settler", state: "booting" });
543
+ const { runtime } = makeSpyRuntime();
544
+ const fake = makeFakeProc();
545
+ const spawnFn: TurnSpawnFn = () => {
546
+ // Force is_error=true so the runner does NOT classify this as a
547
+ // clean exit (which would settle to `completed` via the
548
+ // terminal-mail-missing path). is_error=true keeps cleanResult
549
+ // false, sending us into the observedAnyEvent → between_turns
550
+ // branch we want to test.
551
+ emitFakeTurn(fake, { sessionId: "settler-sid", isError: true });
552
+ fake._exit(0);
553
+ return fake;
554
+ };
555
+
556
+ const result = await runTurn(makeRunOpts(ctx, "settler", { runtime, _spawnFn: spawnFn }));
557
+
558
+ expect(result.cleanResult).toBe(false);
559
+ expect(result.terminalMailObserved).toBe(false);
560
+ expect(result.terminalMailMissing).toBe(false);
561
+ expect(result.finalState).toBe("between_turns");
562
+
563
+ const after = readSession(ctx.sessionsDbPath, "settler");
564
+ expect(after?.state).toBe("between_turns");
565
+ });
566
+
567
+ test("first parser event transitions booting → in_turn (overstory-3087)", async () => {
568
+ // The mid-turn "first event" hook must flip the row out of `booting`
569
+ // (or `between_turns`/`working`) into `in_turn` so observers see the
570
+ // agent as actively executing, distinct from the idle waiting state.
571
+ seedSession(ctx.sessionsDbPath, { agentName: "boots", state: "booting" });
572
+ const { runtime } = makeSpyRuntime();
573
+ const fake = makeFakeProc();
574
+ // Mutable ref so the IIFE assignment is visible to the type checker.
575
+ const captured: { state: string | null } = { state: null };
576
+ const spawnFn: TurnSpawnFn = () => {
577
+ (async () => {
578
+ // Push the init event, then sample the row before result.
579
+ fake._pushLine(
580
+ JSON.stringify({
581
+ type: "system",
582
+ subtype: "init",
583
+ session_id: "boots-sid",
584
+ model: "claude-test",
585
+ }),
586
+ );
587
+ // Yield the event loop so the parser drains the init event
588
+ // and updates the session row before we read it.
589
+ await Bun.sleep(20);
590
+ captured.state = readSession(ctx.sessionsDbPath, "boots")?.state ?? null;
591
+ // Send is_error=true so we settle to between_turns rather than
592
+ // the contract-violation completed path — this test is about
593
+ // the mid-turn transition, not the terminal classification.
594
+ emitFakeTurn(fake, { sessionId: "boots-sid", isError: true });
595
+ fake._exit(0);
596
+ })();
597
+ return fake;
598
+ };
599
+
600
+ await runTurn(makeRunOpts(ctx, "boots", { runtime, _spawnFn: spawnFn }));
601
+
602
+ expect(captured.state).toBe("in_turn");
603
+ });
604
+
605
+ test("between_turns → in_turn → between_turns cycle on a follow-up batch (overstory-3087)", async () => {
606
+ // A spawn-per-turn worker that finished its first turn (state=
607
+ // between_turns) must flip back to in_turn when the next mail batch
608
+ // fires its first parser event, and settle back to between_turns
609
+ // when the turn ends without a terminal mail.
610
+ seedSession(ctx.sessionsDbPath, { agentName: "cycle", state: "between_turns" });
611
+ const { runtime } = makeSpyRuntime();
612
+ const fake = makeFakeProc();
613
+ const captured: { midTurnState: string | null } = { midTurnState: null };
614
+ const spawnFn: TurnSpawnFn = () => {
615
+ (async () => {
616
+ fake._pushLine(
617
+ JSON.stringify({
618
+ type: "system",
619
+ subtype: "init",
620
+ session_id: "cycle-sid",
621
+ model: "claude-test",
622
+ }),
623
+ );
624
+ await Bun.sleep(20);
625
+ captured.midTurnState = readSession(ctx.sessionsDbPath, "cycle")?.state ?? null;
626
+ emitFakeTurn(fake, { sessionId: "cycle-sid", isError: true });
627
+ fake._exit(0);
628
+ })();
629
+ return fake;
630
+ };
631
+
632
+ const result = await runTurn(makeRunOpts(ctx, "cycle", { runtime, _spawnFn: spawnFn }));
633
+
634
+ expect(captured.midTurnState).toBe("in_turn");
635
+ expect(result.initialState).toBe("between_turns");
636
+ expect(result.finalState).toBe("between_turns");
637
+ });
638
+
639
+ test("clean exit but no worker_done → contract violation, completed + error log (overstory-6071)", async () => {
640
+ // Pre-fix: claude exiting cleanly without sending the capability's
641
+ // terminal mail left the session at `working` forever — the process is
642
+ // gone but the row looks alive. Now the runner logs an error and
643
+ // settles to `completed` so operators see something terminal.
644
+ seedSession(ctx.sessionsDbPath, { agentName: "idle", state: "working" });
645
+ const { runtime } = makeSpyRuntime();
646
+ const fake = makeFakeProc();
647
+ const spawnFn: TurnSpawnFn = () => {
648
+ emitFakeTurn(fake, { sessionId: "idle-session", isError: false });
649
+ fake._exit(0);
650
+ return fake;
651
+ };
652
+
653
+ const errors: Array<{ level: string; message: string }> = [];
654
+ const logger: RunnerLogger = (level, message) => {
655
+ errors.push({ level, message });
656
+ };
657
+
658
+ const result = await runTurn(
659
+ makeRunOpts(ctx, "idle", { runtime, _spawnFn: spawnFn, _logWarning: logger }),
660
+ );
661
+
662
+ expect(result.cleanResult).toBe(true);
663
+ expect(result.terminalMailObserved).toBe(false);
664
+ expect(result.terminalMailMissing).toBe(true);
665
+ expect(result.finalState).toBe("completed");
666
+
667
+ // Contract violation must surface via the runner diagnostic sink.
668
+ const violation = errors.find(
669
+ (e) => e.level === "error" && e.message.includes("without sending terminal mail"),
670
+ );
671
+ expect(violation).toBeDefined();
672
+
673
+ const after = readSession(ctx.sessionsDbPath, "idle");
674
+ expect(after?.state).toBe("completed");
675
+ });
676
+
677
+ test("merger: merged mail counts as terminal → completed", async () => {
678
+ seedSession(ctx.sessionsDbPath, {
679
+ agentName: "mg",
680
+ capability: "merger",
681
+ state: "working",
682
+ });
683
+ const { runtime } = makeSpyRuntime();
684
+
685
+ const fake = makeFakeProc();
686
+ const spawnFn: TurnSpawnFn = () => {
687
+ (async () => {
688
+ await Bun.sleep(20);
689
+ const s = createMailStore(ctx.mailDbPath);
690
+ try {
691
+ createMailClient(s).sendProtocol({
692
+ from: "mg",
693
+ to: "lead",
694
+ subject: "Merged: feature/foo",
695
+ body: "ok",
696
+ type: "merged",
697
+ priority: "normal",
698
+ payload: { branch: "feature/foo", taskId: "t-mg", tier: "clean-merge" },
699
+ });
700
+ } finally {
701
+ s.close();
702
+ }
703
+ emitFakeTurn(fake, { sessionId: "mg-session" });
704
+ fake._exit(0);
705
+ })();
706
+ return fake;
707
+ };
708
+
709
+ const result = await runTurn(
710
+ makeRunOpts(ctx, "mg", { runtime, _spawnFn: spawnFn, capability: "merger" }),
711
+ );
712
+
713
+ expect(result.terminalMailObserved).toBe(true);
714
+ expect(result.finalState).toBe("completed");
715
+ });
716
+
717
+ test("merger: merge_failed mail also counts as terminal → completed", async () => {
718
+ seedSession(ctx.sessionsDbPath, {
719
+ agentName: "mgf",
720
+ capability: "merger",
721
+ state: "working",
722
+ });
723
+ const { runtime } = makeSpyRuntime();
724
+
725
+ const fake = makeFakeProc();
726
+ const spawnFn: TurnSpawnFn = () => {
727
+ (async () => {
728
+ await Bun.sleep(20);
729
+ const s = createMailStore(ctx.mailDbPath);
730
+ try {
731
+ createMailClient(s).sendProtocol({
732
+ from: "mgf",
733
+ to: "lead",
734
+ subject: "Merge failed: feature/bar",
735
+ body: "conflict",
736
+ type: "merge_failed",
737
+ priority: "high",
738
+ payload: {
739
+ branch: "feature/bar",
740
+ taskId: "t-mgf",
741
+ conflictFiles: ["src/foo.ts"],
742
+ errorMessage: "conflict",
743
+ },
744
+ });
745
+ } finally {
746
+ s.close();
747
+ }
748
+ emitFakeTurn(fake, { sessionId: "mgf-session" });
749
+ fake._exit(0);
750
+ })();
751
+ return fake;
752
+ };
753
+
754
+ const result = await runTurn(
755
+ makeRunOpts(ctx, "mgf", { runtime, _spawnFn: spawnFn, capability: "merger" }),
756
+ );
757
+
758
+ expect(result.terminalMailObserved).toBe(true);
759
+ expect(result.finalState).toBe("completed");
760
+ });
761
+
762
+ test("scout: --type result mail counts as terminal → completed (overstory-1a4c)", async () => {
763
+ // Regression for overstory-1a4c: workers frequently send `--type result`
764
+ // instead of `--type worker_done` because both are valid mail types and
765
+ // the agent prompts described `result` as a completion signal in some
766
+ // examples. Pre-fix, this left sessions stuck in `working` until the
767
+ // watchdog flipped them to `zombie`. The runner now accepts `result` as
768
+ // a terminal type for builder/scout/reviewer/lead.
769
+ seedSession(ctx.sessionsDbPath, {
770
+ agentName: "scout-result",
771
+ capability: "scout",
772
+ state: "working",
773
+ });
774
+ const { runtime } = makeSpyRuntime();
775
+
776
+ const fake = makeFakeProc();
777
+ const spawnFn: TurnSpawnFn = () => {
778
+ (async () => {
779
+ await Bun.sleep(20);
780
+ const s = createMailStore(ctx.mailDbPath);
781
+ try {
782
+ createMailClient(s).send({
783
+ from: "scout-result",
784
+ to: "coordinator",
785
+ subject: "Spec ready: overstory-4670",
786
+ body: "Spec written.",
787
+ type: "result",
788
+ priority: "normal",
789
+ });
790
+ } finally {
791
+ s.close();
792
+ }
793
+ emitFakeTurn(fake, { sessionId: "scout-result-session" });
794
+ fake._exit(0);
795
+ })();
796
+ return fake;
797
+ };
798
+
799
+ const result = await runTurn(
800
+ makeRunOpts(ctx, "scout-result", { runtime, _spawnFn: spawnFn, capability: "scout" }),
801
+ );
802
+
803
+ expect(result.terminalMailObserved).toBe(true);
804
+ expect(result.cleanResult).toBe(true);
805
+ expect(result.finalState).toBe("completed");
806
+ });
807
+
808
+ test("merger: worker_done is NOT terminal for merger → contract violation, completed", async () => {
809
+ // Mergers must send `merged` or `merge_failed`. A `worker_done` from a
810
+ // merger doesn't count as terminal, so this is the same contract
811
+ // violation as overstory-6071: clean exit, no terminal mail. Pre-fix
812
+ // this stuck at `working`; now it settles to `completed` with a loud
813
+ // error log.
814
+ seedSession(ctx.sessionsDbPath, {
815
+ agentName: "mg-wd",
816
+ capability: "merger",
817
+ state: "working",
818
+ });
819
+ const { runtime } = makeSpyRuntime();
820
+
821
+ const fake = makeFakeProc();
822
+ const spawnFn: TurnSpawnFn = () => {
823
+ (async () => {
824
+ await Bun.sleep(20);
825
+ const s = createMailStore(ctx.mailDbPath);
826
+ try {
827
+ createMailClient(s).sendProtocol({
828
+ from: "mg-wd",
829
+ to: "lead",
830
+ subject: "Worker done (wrong type for merger)",
831
+ body: "x",
832
+ type: "worker_done",
833
+ priority: "normal",
834
+ payload: { taskId: "t", branch: "b", exitCode: 0, filesModified: [] },
835
+ });
836
+ } finally {
837
+ s.close();
838
+ }
839
+ emitFakeTurn(fake, { sessionId: "mg-wd-session" });
840
+ fake._exit(0);
841
+ })();
842
+ return fake;
843
+ };
844
+
845
+ const logger: RunnerLogger = () => {};
846
+ const result = await runTurn(
847
+ makeRunOpts(ctx, "mg-wd", {
848
+ runtime,
849
+ _spawnFn: spawnFn,
850
+ capability: "merger",
851
+ _logWarning: logger,
852
+ }),
853
+ );
854
+
855
+ expect(result.terminalMailObserved).toBe(false);
856
+ expect(result.terminalMailMissing).toBe(true);
857
+ expect(result.finalState).toBe("completed");
858
+ });
859
+
860
+ test("stall watchdog: no parser events for eventStallTimeoutMs → SIGTERM, zombie (overstory-ddb3)", async () => {
861
+ // Pre-fix: a hung claude (alive but stalled — Anthropic API hang,
862
+ // deadlock) would block the parser drain forever because the for-await
863
+ // loop only exits on stdout close. The runner now arms a per-event
864
+ // stall watchdog that resets on every event; on timeout it kills the
865
+ // process via the existing SIGTERM/SIGKILL escalation.
866
+ seedSession(ctx.sessionsDbPath, { agentName: "stalled", state: "working" });
867
+ const { runtime } = makeSpyRuntime();
868
+
869
+ const fake = makeFakeProc();
870
+ const spawnFn: TurnSpawnFn = () => {
871
+ // Emit nothing: simulate claude alive but stalled. The stall
872
+ // watchdog must fire and kill the process.
873
+ return fake;
874
+ };
875
+
876
+ const errors: Array<{ level: string; message: string }> = [];
877
+ const logger: RunnerLogger = (level, message) => {
878
+ errors.push({ level, message });
879
+ };
880
+
881
+ const result = await runTurn({
882
+ ...makeRunOpts(ctx, "stalled", {
883
+ runtime,
884
+ _spawnFn: spawnFn,
885
+ _logWarning: logger,
886
+ }),
887
+ eventStallTimeoutMs: 50,
888
+ sigkillDelayMs: 25,
889
+ });
890
+
891
+ expect(fake._killSignals[0]).toBe("SIGTERM");
892
+ expect(result.stallAborted).toBe(true);
893
+ expect(result.exitCode).toBeNull();
894
+ expect(result.finalState).toBe("zombie");
895
+
896
+ const stallLog = errors.find(
897
+ (e) => e.level === "error" && e.message.includes("parser stalled"),
898
+ );
899
+ expect(stallLog).toBeDefined();
900
+
901
+ const after = readSession(ctx.sessionsDbPath, "stalled");
902
+ expect(after?.state).toBe("zombie");
903
+ });
904
+
905
+ test("stall watchdog: events reset the timer — live turns are not killed (overstory-ddb3)", async () => {
906
+ // Per-event reset: a turn whose events keep arriving must not be
907
+ // aborted by the stall watchdog. We give a generous 500ms stall
908
+ // budget and emit several events each separated by ~50ms; the
909
+ // cumulative runtime exceeds the budget, but no inter-event gap
910
+ // does, so a properly resetting timer never fires.
911
+ seedSession(ctx.sessionsDbPath, { agentName: "live", state: "working" });
912
+ const { runtime } = makeSpyRuntime();
913
+
914
+ const fake = makeFakeProc();
915
+ const spawnFn: TurnSpawnFn = () => {
916
+ (async () => {
917
+ const sessionId = "live-session";
918
+ fake._pushLine(
919
+ JSON.stringify({
920
+ type: "system",
921
+ subtype: "init",
922
+ session_id: sessionId,
923
+ model: "claude-test",
924
+ }),
925
+ );
926
+ for (let i = 0; i < 6; i++) {
927
+ await Bun.sleep(50);
928
+ fake._pushLine(
929
+ JSON.stringify({
930
+ type: "assistant",
931
+ message: {
932
+ role: "assistant",
933
+ content: [{ type: "text", text: `chunk ${i}` }],
934
+ },
935
+ session_id: sessionId,
936
+ }),
937
+ );
938
+ }
939
+ emitFakeTurn(fake, { sessionId });
940
+ fake._exit(0);
941
+ })();
942
+ return fake;
943
+ };
944
+
945
+ const logger: RunnerLogger = () => {};
946
+ const result = await runTurn({
947
+ ...makeRunOpts(ctx, "live", {
948
+ runtime,
949
+ _spawnFn: spawnFn,
950
+ _logWarning: logger,
951
+ }),
952
+ eventStallTimeoutMs: 500,
953
+ sigkillDelayMs: 25,
954
+ });
955
+
956
+ expect(result.stallAborted).toBe(false);
957
+ expect(result.exitCode).toBe(0);
958
+ expect(result.cleanResult).toBe(true);
959
+ // Sanity: turn ran longer than the stall budget would allow if the
960
+ // timer didn't reset on each event (6 × 50ms = 300ms minimum).
961
+ expect(result.durationMs).toBeGreaterThanOrEqual(250);
962
+ });
963
+
964
+ test("abortSignal triggers SIGTERM, finalState becomes zombie", async () => {
965
+ seedSession(ctx.sessionsDbPath, { agentName: "to-kill", state: "working" });
966
+ const { runtime } = makeSpyRuntime();
967
+
968
+ const fake = makeFakeProc();
969
+ const ac = new AbortController();
970
+ const spawnFn: TurnSpawnFn = () => {
971
+ // Emit init but never close — the abort path is what ends this turn.
972
+ fake._pushLine(
973
+ JSON.stringify({
974
+ type: "system",
975
+ subtype: "init",
976
+ session_id: "abort-test",
977
+ }),
978
+ );
979
+ return fake;
980
+ };
981
+
982
+ const runPromise = runTurn(
983
+ makeRunOpts(ctx, "to-kill", {
984
+ runtime,
985
+ _spawnFn: spawnFn,
986
+ abortSignal: ac.signal,
987
+ sigkillDelayMs: 25,
988
+ }),
989
+ );
990
+
991
+ // Give the parser a chance to consume the init event.
992
+ await Bun.sleep(60);
993
+ ac.abort();
994
+ const result = await runPromise;
995
+
996
+ expect(fake._killSignals[0]).toBe("SIGTERM");
997
+ expect(result.exitCode).toBeNull();
998
+ expect(result.finalState).toBe("zombie");
999
+
1000
+ const after = readSession(ctx.sessionsDbPath, "to-kill");
1001
+ expect(after?.state).toBe("zombie");
1002
+ });
1003
+
1004
+ // --- Parent-notify paths (overstory-4159, overstory-c772) ---
1005
+ //
1006
+ // When a turn ends without the capability's terminal mail, the runner emits
1007
+ // a synthetic worker_died mail to the parent so the lead does not block on
1008
+ // a signal that will never arrive. Three trigger paths:
1009
+ // 1. abort (operator or external abortSignal) → finalState=zombie
1010
+ // 2. parser stall → finalState=zombie
1011
+ // 3. clean exit without terminal mail (terminalMailMissing) → completed
1012
+
1013
+ test("abort path: emits worker_died to parent with terminatedBy='runner' (overstory-c772)", async () => {
1014
+ seedSession(ctx.sessionsDbPath, {
1015
+ agentName: "child-abort",
1016
+ state: "working",
1017
+ parentAgent: "lead-x",
1018
+ taskId: "task-c772",
1019
+ });
1020
+ const { runtime } = makeSpyRuntime();
1021
+ const fake = makeFakeProc();
1022
+ const ac = new AbortController();
1023
+ const spawnFn: TurnSpawnFn = () => {
1024
+ fake._pushLine(JSON.stringify({ type: "system", subtype: "init", session_id: "abort-mail" }));
1025
+ return fake;
1026
+ };
1027
+
1028
+ const sharedMail = createMailStore(ctx.mailDbPath);
1029
+ try {
1030
+ const runPromise = runTurn({
1031
+ ...makeRunOpts(ctx, "child-abort", {
1032
+ runtime,
1033
+ _spawnFn: spawnFn,
1034
+ abortSignal: ac.signal,
1035
+ sigkillDelayMs: 25,
1036
+ }),
1037
+ _mailStore: sharedMail,
1038
+ });
1039
+ await Bun.sleep(60);
1040
+ ac.abort();
1041
+ const result = await runPromise;
1042
+ expect(result.finalState).toBe("zombie");
1043
+
1044
+ const inbox = sharedMail.getAll({ to: "lead-x", type: "worker_died" });
1045
+ expect(inbox.length).toBe(1);
1046
+ const msg = inbox[0];
1047
+ expect(msg?.from).toBe("child-abort");
1048
+ expect(msg?.priority).toBe("high");
1049
+ expect(msg?.subject).toContain("worker_died");
1050
+ expect(msg?.subject).toContain("child-abort");
1051
+ const payload = JSON.parse(msg?.payload ?? "{}") as {
1052
+ terminatedBy?: string;
1053
+ reason?: string;
1054
+ agentName?: string;
1055
+ taskId?: string;
1056
+ capability?: string;
1057
+ };
1058
+ expect(payload.terminatedBy).toBe("runner");
1059
+ expect(payload.agentName).toBe("child-abort");
1060
+ // taskId in the mail mirrors the runner's opts.taskId for this turn;
1061
+ // the test rig's makeRunOpts seeds this as "task-test".
1062
+ expect(payload.taskId).toBe("task-test");
1063
+ expect(payload.capability).toBe("builder");
1064
+ expect(payload.reason).toContain("Aborted");
1065
+ } finally {
1066
+ sharedMail.close();
1067
+ }
1068
+ });
1069
+
1070
+ test("stall path: emits worker_died to parent (overstory-c772)", async () => {
1071
+ seedSession(ctx.sessionsDbPath, {
1072
+ agentName: "child-stall",
1073
+ state: "working",
1074
+ parentAgent: "lead-y",
1075
+ taskId: "task-c772-b",
1076
+ });
1077
+ const { runtime } = makeSpyRuntime();
1078
+ const fake = makeFakeProc();
1079
+ const spawnFn: TurnSpawnFn = () => {
1080
+ // Emit nothing — stall watchdog must fire and abort.
1081
+ return fake;
1082
+ };
1083
+
1084
+ const sharedMail = createMailStore(ctx.mailDbPath);
1085
+ try {
1086
+ const result = await runTurn({
1087
+ ...makeRunOpts(ctx, "child-stall", {
1088
+ runtime,
1089
+ _spawnFn: spawnFn,
1090
+ }),
1091
+ _mailStore: sharedMail,
1092
+ eventStallTimeoutMs: 50,
1093
+ sigkillDelayMs: 25,
1094
+ });
1095
+ expect(result.stallAborted).toBe(true);
1096
+ expect(result.finalState).toBe("zombie");
1097
+
1098
+ const inbox = sharedMail.getAll({ to: "lead-y", type: "worker_died" });
1099
+ expect(inbox.length).toBe(1);
1100
+ const payload = JSON.parse(inbox[0]?.payload ?? "{}") as {
1101
+ terminatedBy?: string;
1102
+ reason?: string;
1103
+ };
1104
+ expect(payload.terminatedBy).toBe("runner");
1105
+ expect(payload.reason).toContain("stalled");
1106
+ } finally {
1107
+ sharedMail.close();
1108
+ }
1109
+ });
1110
+
1111
+ // --- Resume-path parent-notify (overstory-de3c) ---
1112
+ //
1113
+ // The witnessed bug: a spawn-per-turn worker that survived a first-turn
1114
+ // parser stall (worker_died emitted, state→zombie) was re-dispatched by its
1115
+ // parent via `ov sling --recover`. The resumed turn ran, then transitioned
1116
+ // to zombie SILENTLY — no second worker_died mail was ever sent. The lead
1117
+ // blocked forever.
1118
+ //
1119
+ // These tests pin down whether the runner itself is responsible. Each seeds
1120
+ // `claudeSessionId` so the runner exercises the --resume code path, and
1121
+ // asserts that worker_died is still emitted on stall / abort / clean-exit-
1122
+ // without-terminal-mail. If these PASS the runner is exonerated and the
1123
+ // fix is upstream (sling.ts re-spawn upsert dropping parentAgent — H1).
1124
+
1125
+ test("resume-stall: parser stall on a resumed session still emits worker_died (overstory-de3c)", async () => {
1126
+ seedSession(ctx.sessionsDbPath, {
1127
+ agentName: "child-resume-stall",
1128
+ state: "working",
1129
+ parentAgent: "lead-r",
1130
+ taskId: "task-de3c-stall",
1131
+ claudeSessionId: "prior-session",
1132
+ });
1133
+ const { runtime, spawnCalls } = makeSpyRuntime();
1134
+ const fake = makeFakeProc();
1135
+ const spawnFn: TurnSpawnFn = () => {
1136
+ // Emit nothing — the resumed turn parser-stalls.
1137
+ return fake;
1138
+ };
1139
+
1140
+ const sharedMail = createMailStore(ctx.mailDbPath);
1141
+ try {
1142
+ const result = await runTurn({
1143
+ ...makeRunOpts(ctx, "child-resume-stall", {
1144
+ runtime,
1145
+ _spawnFn: spawnFn,
1146
+ }),
1147
+ _mailStore: sharedMail,
1148
+ eventStallTimeoutMs: 50,
1149
+ sigkillDelayMs: 25,
1150
+ });
1151
+
1152
+ expect(result.stallAborted).toBe(true);
1153
+ expect(result.finalState).toBe("zombie");
1154
+
1155
+ // The runtime received the prior session id (resume path exercised).
1156
+ expect(spawnCalls[0]?.resumeSessionId).toBe("prior-session");
1157
+
1158
+ const inbox = sharedMail.getAll({ to: "lead-r", type: "worker_died" });
1159
+ expect(inbox.length).toBe(1);
1160
+ const payload = JSON.parse(inbox[0]?.payload ?? "{}") as {
1161
+ terminatedBy?: string;
1162
+ reason?: string;
1163
+ agentName?: string;
1164
+ };
1165
+ expect(payload.terminatedBy).toBe("runner");
1166
+ expect(payload.reason).toContain("stalled");
1167
+ expect(payload.agentName).toBe("child-resume-stall");
1168
+ } finally {
1169
+ sharedMail.close();
1170
+ }
1171
+ });
1172
+
1173
+ test("resume-abort: operator abort on a resumed session still emits worker_died (overstory-de3c)", async () => {
1174
+ seedSession(ctx.sessionsDbPath, {
1175
+ agentName: "child-resume-abort",
1176
+ state: "working",
1177
+ parentAgent: "lead-r",
1178
+ taskId: "task-de3c-abort",
1179
+ claudeSessionId: "prior-session",
1180
+ });
1181
+ const { runtime, spawnCalls } = makeSpyRuntime();
1182
+ const fake = makeFakeProc();
1183
+ const ac = new AbortController();
1184
+ const spawnFn: TurnSpawnFn = () => {
1185
+ fake._pushLine(
1186
+ JSON.stringify({
1187
+ type: "system",
1188
+ subtype: "init",
1189
+ session_id: "prior-session",
1190
+ }),
1191
+ );
1192
+ return fake;
1193
+ };
1194
+
1195
+ const sharedMail = createMailStore(ctx.mailDbPath);
1196
+ try {
1197
+ const runPromise = runTurn({
1198
+ ...makeRunOpts(ctx, "child-resume-abort", {
1199
+ runtime,
1200
+ _spawnFn: spawnFn,
1201
+ abortSignal: ac.signal,
1202
+ sigkillDelayMs: 25,
1203
+ }),
1204
+ _mailStore: sharedMail,
1205
+ });
1206
+ await Bun.sleep(60);
1207
+ ac.abort();
1208
+ const result = await runPromise;
1209
+
1210
+ expect(result.finalState).toBe("zombie");
1211
+ expect(spawnCalls[0]?.resumeSessionId).toBe("prior-session");
1212
+
1213
+ const inbox = sharedMail.getAll({ to: "lead-r", type: "worker_died" });
1214
+ expect(inbox.length).toBe(1);
1215
+ const payload = JSON.parse(inbox[0]?.payload ?? "{}") as {
1216
+ terminatedBy?: string;
1217
+ reason?: string;
1218
+ agentName?: string;
1219
+ };
1220
+ expect(payload.terminatedBy).toBe("runner");
1221
+ expect(payload.reason).toContain("Aborted");
1222
+ expect(payload.agentName).toBe("child-resume-abort");
1223
+ } finally {
1224
+ sharedMail.close();
1225
+ }
1226
+ });
1227
+
1228
+ test("resume-terminalMailMissing: clean exit on a resumed session still emits worker_died (overstory-de3c)", async () => {
1229
+ seedSession(ctx.sessionsDbPath, {
1230
+ agentName: "child-resume-noop",
1231
+ state: "working",
1232
+ parentAgent: "lead-r",
1233
+ taskId: "task-de3c-noop",
1234
+ claudeSessionId: "prior-session",
1235
+ });
1236
+ const { runtime, spawnCalls } = makeSpyRuntime();
1237
+ const fake = makeFakeProc();
1238
+ const spawnFn: TurnSpawnFn = () => {
1239
+ emitFakeTurn(fake, { sessionId: "prior-session", isError: false });
1240
+ fake._exit(0);
1241
+ return fake;
1242
+ };
1243
+
1244
+ const sharedMail = createMailStore(ctx.mailDbPath);
1245
+ try {
1246
+ const result = await runTurn({
1247
+ ...makeRunOpts(ctx, "child-resume-noop", {
1248
+ runtime,
1249
+ _spawnFn: spawnFn,
1250
+ }),
1251
+ _mailStore: sharedMail,
1252
+ });
1253
+
1254
+ expect(result.cleanResult).toBe(true);
1255
+ expect(result.terminalMailMissing).toBe(true);
1256
+ expect(result.finalState).toBe("completed");
1257
+ expect(spawnCalls[0]?.resumeSessionId).toBe("prior-session");
1258
+
1259
+ const inbox = sharedMail.getAll({ to: "lead-r", type: "worker_died" });
1260
+ expect(inbox.length).toBe(1);
1261
+ const payload = JSON.parse(inbox[0]?.payload ?? "{}") as {
1262
+ terminatedBy?: string;
1263
+ reason?: string;
1264
+ agentName?: string;
1265
+ };
1266
+ expect(payload.terminatedBy).toBe("runner");
1267
+ expect(payload.reason).toContain("Clean exit without terminal mail");
1268
+ expect(payload.agentName).toBe("child-resume-noop");
1269
+ } finally {
1270
+ sharedMail.close();
1271
+ }
1272
+ });
1273
+
1274
+ test("terminalMailMissing: emits worker_died to parent (overstory-4159)", async () => {
1275
+ // Silent-no-op: claude exits cleanly but never sends worker_done. The
1276
+ // lead would otherwise block forever waiting for a terminal mail.
1277
+ seedSession(ctx.sessionsDbPath, {
1278
+ agentName: "child-noop",
1279
+ state: "working",
1280
+ parentAgent: "lead-z",
1281
+ taskId: "task-4159",
1282
+ });
1283
+ const { runtime } = makeSpyRuntime();
1284
+ const fake = makeFakeProc();
1285
+ const spawnFn: TurnSpawnFn = () => {
1286
+ emitFakeTurn(fake, { sessionId: "noop-session", isError: false });
1287
+ fake._exit(0);
1288
+ return fake;
1289
+ };
1290
+
1291
+ const sharedMail = createMailStore(ctx.mailDbPath);
1292
+ try {
1293
+ const result = await runTurn({
1294
+ ...makeRunOpts(ctx, "child-noop", {
1295
+ runtime,
1296
+ _spawnFn: spawnFn,
1297
+ }),
1298
+ _mailStore: sharedMail,
1299
+ });
1300
+ expect(result.cleanResult).toBe(true);
1301
+ expect(result.terminalMailMissing).toBe(true);
1302
+ expect(result.finalState).toBe("completed");
1303
+
1304
+ const inbox = sharedMail.getAll({ to: "lead-z", type: "worker_died" });
1305
+ expect(inbox.length).toBe(1);
1306
+ const payload = JSON.parse(inbox[0]?.payload ?? "{}") as {
1307
+ terminatedBy?: string;
1308
+ reason?: string;
1309
+ agentName?: string;
1310
+ };
1311
+ expect(payload.terminatedBy).toBe("runner");
1312
+ expect(payload.agentName).toBe("child-noop");
1313
+ expect(payload.reason).toContain("Clean exit without terminal mail");
1314
+ } finally {
1315
+ sharedMail.close();
1316
+ }
1317
+ });
1318
+
1319
+ test("no parentAgent: skips worker_died mail (orchestrator-spawned worker)", async () => {
1320
+ // Orchestrator-spawned workers have parentAgent=null; there is nobody to
1321
+ // notify. The runner must not fabricate a recipient.
1322
+ seedSession(ctx.sessionsDbPath, {
1323
+ agentName: "orphan-noop",
1324
+ state: "working",
1325
+ parentAgent: null,
1326
+ taskId: "task-orphan",
1327
+ });
1328
+ const { runtime } = makeSpyRuntime();
1329
+ const fake = makeFakeProc();
1330
+ const spawnFn: TurnSpawnFn = () => {
1331
+ emitFakeTurn(fake, { sessionId: "orphan-session" });
1332
+ fake._exit(0);
1333
+ return fake;
1334
+ };
1335
+
1336
+ const sharedMail = createMailStore(ctx.mailDbPath);
1337
+ try {
1338
+ const result = await runTurn({
1339
+ ...makeRunOpts(ctx, "orphan-noop", { runtime, _spawnFn: spawnFn }),
1340
+ _mailStore: sharedMail,
1341
+ });
1342
+ expect(result.terminalMailMissing).toBe(true);
1343
+ const all = sharedMail.getAll({ type: "worker_died" });
1344
+ expect(all.length).toBe(0);
1345
+ } finally {
1346
+ sharedMail.close();
1347
+ }
1348
+ });
1349
+
1350
+ test("happy path: terminal mail observed → no worker_died emitted (no double-signal)", async () => {
1351
+ seedSession(ctx.sessionsDbPath, {
1352
+ agentName: "child-ok",
1353
+ state: "working",
1354
+ parentAgent: "lead-ok",
1355
+ taskId: "task-happy",
1356
+ });
1357
+ const { runtime } = makeSpyRuntime();
1358
+ const fake = makeFakeProc();
1359
+ const spawnFn: TurnSpawnFn = () => {
1360
+ (async () => {
1361
+ await Bun.sleep(15);
1362
+ const s = createMailStore(ctx.mailDbPath);
1363
+ try {
1364
+ createMailClient(s).sendProtocol({
1365
+ from: "child-ok",
1366
+ to: "lead-ok",
1367
+ subject: "Worker done",
1368
+ body: "ok",
1369
+ type: "worker_done",
1370
+ priority: "normal",
1371
+ payload: {
1372
+ taskId: "task-happy",
1373
+ branch: "branch",
1374
+ exitCode: 0,
1375
+ filesModified: [],
1376
+ },
1377
+ });
1378
+ } finally {
1379
+ s.close();
1380
+ }
1381
+ emitFakeTurn(fake, { sessionId: "ok-session" });
1382
+ fake._exit(0);
1383
+ })();
1384
+ return fake;
1385
+ };
1386
+
1387
+ const sharedMail = createMailStore(ctx.mailDbPath);
1388
+ try {
1389
+ const result = await runTurn({
1390
+ ...makeRunOpts(ctx, "child-ok", { runtime, _spawnFn: spawnFn }),
1391
+ _mailStore: sharedMail,
1392
+ });
1393
+ expect(result.terminalMailObserved).toBe(true);
1394
+ expect(result.terminalMailMissing).toBe(false);
1395
+ expect(result.finalState).toBe("completed");
1396
+
1397
+ // Inbox should have the agent's own worker_done, but NO worker_died.
1398
+ const died = sharedMail.getAll({ to: "lead-ok", type: "worker_died" });
1399
+ expect(died.length).toBe(0);
1400
+ } finally {
1401
+ sharedMail.close();
1402
+ }
1403
+ });
1404
+
1405
+ test("two concurrent runTurn calls for the same agent serialize", async () => {
1406
+ seedSession(ctx.sessionsDbPath, { agentName: "serial", state: "working" });
1407
+ const { runtime } = makeSpyRuntime();
1408
+
1409
+ const windows: Array<{ id: number; phase: "start" | "end"; ts: number }> = [];
1410
+ let spawnId = 0;
1411
+ const spawnFn: TurnSpawnFn = () => {
1412
+ const id = ++spawnId;
1413
+ windows.push({ id, phase: "start", ts: Date.now() });
1414
+ const fake = makeFakeProc();
1415
+ (async () => {
1416
+ // Hold the spawn open briefly to widen the overlap window.
1417
+ await Bun.sleep(80);
1418
+ emitFakeTurn(fake, { sessionId: `s-${id}` });
1419
+ fake._exit(0);
1420
+ windows.push({ id, phase: "end", ts: Date.now() });
1421
+ })();
1422
+ return fake;
1423
+ };
1424
+
1425
+ const a = runTurn(makeRunOpts(ctx, "serial", { runtime, _spawnFn: spawnFn }));
1426
+ const b = runTurn(makeRunOpts(ctx, "serial", { runtime, _spawnFn: spawnFn }));
1427
+ await Promise.all([a, b]);
1428
+
1429
+ // Sort by timestamp; verify the second start follows the first end.
1430
+ const ordered = [...windows].sort((x, y) => x.ts - y.ts);
1431
+ expect(ordered.length).toBe(4);
1432
+ expect(ordered[0]?.phase).toBe("start");
1433
+ expect(ordered[1]?.phase).toBe("end");
1434
+ expect(ordered[1]?.id).toBe(ordered[0]?.id);
1435
+ expect(ordered[2]?.phase).toBe("start");
1436
+ expect(ordered[2]?.id).not.toBe(ordered[0]?.id);
1437
+ });
1438
+
1439
+ test("spawn throws — lock is released and error propagates", async () => {
1440
+ seedSession(ctx.sessionsDbPath, { agentName: "fails", state: "booting" });
1441
+ const { runtime } = makeSpyRuntime();
1442
+ const failingSpawn: TurnSpawnFn = () => {
1443
+ throw new Error("ENOENT: claude binary missing");
1444
+ };
1445
+
1446
+ await expect(
1447
+ runTurn(makeRunOpts(ctx, "fails", { runtime, _spawnFn: failingSpawn })),
1448
+ ).rejects.toThrow(/binary missing/);
1449
+
1450
+ // Cross-process lock state must be cleared so a follow-up turn can run.
1451
+ const state = readTurnLock(ctx.overstoryDir, "fails");
1452
+ expect(state.heldByPid).toBeNull();
1453
+
1454
+ // Session state must NOT have transitioned (no events were observed).
1455
+ const after = readSession(ctx.sessionsDbPath, "fails");
1456
+ expect(after?.state).toBe("booting");
1457
+ });
1458
+
1459
+ test("subsequent turn passes the prior session id to runtime.buildDirectSpawn", async () => {
1460
+ seedSession(ctx.sessionsDbPath, { agentName: "two-turns", state: "working" });
1461
+ const { runtime, spawnCalls } = makeSpyRuntime();
1462
+
1463
+ // Turn 1: claude assigns session id "sid-1".
1464
+ const t1Fake = makeFakeProc();
1465
+ const t1Spawn: TurnSpawnFn = () => {
1466
+ emitFakeTurn(t1Fake, { sessionId: "sid-1" });
1467
+ t1Fake._exit(0);
1468
+ return t1Fake;
1469
+ };
1470
+ await runTurn(makeRunOpts(ctx, "two-turns", { runtime, _spawnFn: t1Spawn }));
1471
+
1472
+ // Turn 2: must read sid-1 back from SessionStore and pass it as resumeSessionId.
1473
+ const t2Fake = makeFakeProc();
1474
+ const t2Spawn: TurnSpawnFn = () => {
1475
+ emitFakeTurn(t2Fake, { sessionId: "sid-1" });
1476
+ t2Fake._exit(0);
1477
+ return t2Fake;
1478
+ };
1479
+ await runTurn(makeRunOpts(ctx, "two-turns", { runtime, _spawnFn: t2Spawn }));
1480
+
1481
+ expect(spawnCalls.length).toBe(2);
1482
+ expect(spawnCalls[0]?.resumeSessionId ?? null).toBeNull();
1483
+ expect(spawnCalls[1]?.resumeSessionId).toBe("sid-1");
1484
+ });
1485
+
1486
+ test("user turn payload is written to spawned stdin", async () => {
1487
+ seedSession(ctx.sessionsDbPath, { agentName: "stdin-test", state: "working" });
1488
+ const { runtime } = makeSpyRuntime();
1489
+
1490
+ const payload = `${JSON.stringify({
1491
+ type: "user",
1492
+ message: { role: "user", content: [{ type: "text", text: "ping" }] },
1493
+ })}\n`;
1494
+
1495
+ const fake = makeFakeProc();
1496
+ const spawnFn: TurnSpawnFn = () => {
1497
+ emitFakeTurn(fake, { sessionId: "stdin-sess" });
1498
+ fake._exit(0);
1499
+ return fake;
1500
+ };
1501
+
1502
+ await runTurn(
1503
+ makeRunOpts(ctx, "stdin-test", {
1504
+ runtime,
1505
+ _spawnFn: spawnFn,
1506
+ userTurnNdjson: payload,
1507
+ }),
1508
+ );
1509
+
1510
+ expect(fake._writes.length).toBe(1);
1511
+ expect(fake._writes[0]).toBe(payload);
1512
+ });
1513
+
1514
+ test("does not spawn when the runtime lacks buildDirectSpawn", async () => {
1515
+ seedSession(ctx.sessionsDbPath, { agentName: "no-build", state: "booting" });
1516
+ const incomplete: AgentRuntime = {
1517
+ id: "incomplete",
1518
+ stability: "experimental",
1519
+ instructionPath: "AGENTS.md",
1520
+ buildSpawnCommand: () => "",
1521
+ buildPrintCommand: () => [],
1522
+ deployConfig: async () => {},
1523
+ detectReady: () => ({ phase: "ready" }),
1524
+ parseTranscript: async () => null,
1525
+ getTranscriptDir: () => null,
1526
+ buildEnv: () => ({}),
1527
+ // buildDirectSpawn intentionally omitted
1528
+ parseEvents: async function* () {
1529
+ yield* [];
1530
+ },
1531
+ };
1532
+
1533
+ await expect(runTurn(makeRunOpts(ctx, "no-build", { runtime: incomplete }))).rejects.toThrow(
1534
+ /buildDirectSpawn/,
1535
+ );
1536
+ });
1537
+
1538
+ // ---------- cleanup-invariant tests (overstory-4af3) ----------
1539
+ //
1540
+ // The runner publishes turn.pid for cross-process abort and updates
1541
+ // lastActivity at the end of every turn. Both must hold even when the
1542
+ // inner SessionStore writes silently fail. These tests pin the cleanup
1543
+ // contract so future regressions surface immediately.
1544
+
1545
+ test("happy path: turn.pid is removed and lastActivity advances past startedAt", async () => {
1546
+ const startedAt = new Date(Date.now() - 60_000).toISOString();
1547
+ seedSession(ctx.sessionsDbPath, {
1548
+ agentName: "cleanup-ok",
1549
+ state: "working",
1550
+ startedAt,
1551
+ lastActivity: startedAt,
1552
+ });
1553
+ const { runtime } = makeSpyRuntime();
1554
+ const fake = makeFakeProc();
1555
+ const spawnFn: TurnSpawnFn = () => {
1556
+ emitFakeTurn(fake, { sessionId: "cleanup-ok-session" });
1557
+ fake._exit(0);
1558
+ return fake;
1559
+ };
1560
+
1561
+ const result = await runTurn(makeRunOpts(ctx, "cleanup-ok", { runtime, _spawnFn: spawnFn }));
1562
+
1563
+ expect(result.exitCode).toBe(0);
1564
+
1565
+ const turnPidPath = turnPidPathFor(ctx, "cleanup-ok");
1566
+ expect(existsSync(turnPidPath)).toBe(false);
1567
+
1568
+ const after = readSession(ctx.sessionsDbPath, "cleanup-ok");
1569
+ expect(after?.lastActivity).not.toBe(startedAt);
1570
+ expect(new Date(after?.lastActivity ?? 0).getTime()).toBeGreaterThan(
1571
+ new Date(startedAt).getTime(),
1572
+ );
1573
+ });
1574
+
1575
+ test("spawn throws: turn.pid is never written and finally cleanup is a no-op", async () => {
1576
+ seedSession(ctx.sessionsDbPath, { agentName: "spawn-fail", state: "booting" });
1577
+ const { runtime } = makeSpyRuntime();
1578
+ const failingSpawn: TurnSpawnFn = () => {
1579
+ throw new Error("ENOENT: claude binary missing");
1580
+ };
1581
+
1582
+ await expect(
1583
+ runTurn(makeRunOpts(ctx, "spawn-fail", { runtime, _spawnFn: failingSpawn })),
1584
+ ).rejects.toThrow(/binary missing/);
1585
+
1586
+ expect(existsSync(turnPidPathFor(ctx, "spawn-fail"))).toBe(false);
1587
+ });
1588
+
1589
+ test("parser throws: outer finally still runs and removes turn.pid", async () => {
1590
+ seedSession(ctx.sessionsDbPath, { agentName: "parser-fail", state: "working" });
1591
+
1592
+ // Custom runtime whose parseEvents returns an async iterable that
1593
+ // rejects on first read — mirrors a stream-json parse error mid-turn.
1594
+ const base = new ClaudeRuntime();
1595
+ const failingIterable: AsyncIterable<never> = {
1596
+ [Symbol.asyncIterator](): AsyncIterator<never> {
1597
+ return {
1598
+ next(): Promise<IteratorResult<never>> {
1599
+ return Promise.reject(new Error("synthetic stream-json parse error"));
1600
+ },
1601
+ };
1602
+ },
1603
+ };
1604
+ const broken: AgentRuntime = {
1605
+ ...base,
1606
+ id: base.id,
1607
+ stability: base.stability,
1608
+ instructionPath: base.instructionPath,
1609
+ buildSpawnCommand: base.buildSpawnCommand.bind(base),
1610
+ buildPrintCommand: base.buildPrintCommand.bind(base),
1611
+ deployConfig: base.deployConfig.bind(base),
1612
+ detectReady: base.detectReady.bind(base),
1613
+ parseTranscript: base.parseTranscript.bind(base),
1614
+ getTranscriptDir: base.getTranscriptDir.bind(base),
1615
+ buildEnv: base.buildEnv.bind(base),
1616
+ buildDirectSpawn: base.buildDirectSpawn.bind(base),
1617
+ parseEvents: (() => failingIterable) as unknown as AgentRuntime["parseEvents"],
1618
+ };
1619
+
1620
+ const fake = makeFakeProc();
1621
+ const spawnFn: TurnSpawnFn = () => {
1622
+ // Don't auto-exit: simulate a still-live subprocess so we can verify
1623
+ // the C3 kill path actually fires before the lock is released. If we
1624
+ // pre-exited the fake here, kill() would still record but the test
1625
+ // wouldn't distinguish the runner-driven kill from no-op cleanup.
1626
+ return fake;
1627
+ };
1628
+
1629
+ await expect(
1630
+ runTurn(makeRunOpts(ctx, "parser-fail", { runtime: broken, _spawnFn: spawnFn })),
1631
+ ).rejects.toThrow(/synthetic stream-json/);
1632
+
1633
+ // overstory-088b C3: parser throw must kill the live subprocess to avoid
1634
+ // orphaning past lock.release. SIGKILL is correct here — we are on a
1635
+ // non-recoverable error path and must guarantee the process dies.
1636
+ expect(fake._killSignals).toContain("SIGKILL");
1637
+ expect(fake._killed).toBe(true);
1638
+
1639
+ // Cleanup contract holds even on thrown parser.
1640
+ expect(existsSync(turnPidPathFor(ctx, "parser-fail"))).toBe(false);
1641
+ });
1642
+
1643
+ test("turn.pid write failure SIGKILLs subprocess and aborts the turn (overstory-62a6)", async () => {
1644
+ seedSession(ctx.sessionsDbPath, { agentName: "pid-write-fail", state: "working" });
1645
+ const { runtime } = makeSpyRuntime();
1646
+
1647
+ // Pre-create turn.pid as a DIRECTORY so `Bun.write(turnPidPath, ...)` fails
1648
+ // with EISDIR. This mirrors any real failure mode (read-only fs, permissions,
1649
+ // disk full) where the kill primitive becomes unavailable.
1650
+ const { mkdir } = await import("node:fs/promises");
1651
+ const turnPidPath = turnPidPathFor(ctx, "pid-write-fail");
1652
+ await mkdir(turnPidPath, { recursive: true });
1653
+
1654
+ const fake = makeFakeProc();
1655
+ const spawnFn: TurnSpawnFn = () => fake;
1656
+
1657
+ const events: Array<{ level: string; message: string }> = [];
1658
+ const logger: RunnerLogger = (level, message) => {
1659
+ events.push({ level, message });
1660
+ };
1661
+
1662
+ await expect(
1663
+ runTurn(
1664
+ makeRunOpts(ctx, "pid-write-fail", { runtime, _spawnFn: spawnFn, _logWarning: logger }),
1665
+ ),
1666
+ ).rejects.toThrow(/failed to write turn\.pid/);
1667
+
1668
+ // The kill primitive is unavailable, so the only safe way to avoid a
1669
+ // silently un-killable agent is to SIGKILL the subprocess here.
1670
+ expect(fake._killSignals).toContain("SIGKILL");
1671
+ expect(fake._killed).toBe(true);
1672
+
1673
+ // Surfaces at error level (not warn) so the failure isn't silent.
1674
+ expect(
1675
+ events.some((e) => e.level === "error" && e.message.includes("failed to write turn.pid")),
1676
+ ).toBe(true);
1677
+ });
1678
+
1679
+ test("silent SessionStore failure surfaces as a runner warning", async () => {
1680
+ seedSession(ctx.sessionsDbPath, { agentName: "ss-fail", state: "working" });
1681
+ const { runtime } = makeSpyRuntime();
1682
+
1683
+ const fake = makeFakeProc();
1684
+ const spawnFn: TurnSpawnFn = () => {
1685
+ emitFakeTurn(fake, { sessionId: "ss-fail-session" });
1686
+ fake._exit(0);
1687
+ return fake;
1688
+ };
1689
+
1690
+ const warnings: Array<{ level: string; message: string }> = [];
1691
+ const logger: RunnerLogger = (level, message) => {
1692
+ warnings.push({ level, message });
1693
+ };
1694
+
1695
+ // Point sessionsDbPath at a path that exists as a DIRECTORY so every
1696
+ // SessionStore open in the runner throws. The runner must keep going
1697
+ // (cleanup contract) AND surface the failure via the logger.
1698
+ const badSessionsPath = ctx.overstoryDir; // directory, not a db file
1699
+ const opts = {
1700
+ ...makeRunOpts(ctx, "ss-fail", { runtime, _spawnFn: spawnFn, _logWarning: logger }),
1701
+ sessionsDbPath: badSessionsPath,
1702
+ };
1703
+
1704
+ await runTurn(opts);
1705
+
1706
+ // The lastActivity update silently failed (it's a directory, not a db),
1707
+ // which is exactly the scenario that masked overstory-4af3. The runner
1708
+ // must report the contract violation via _logWarning at error level.
1709
+ const errors = warnings.filter((w) => w.level === "error");
1710
+ expect(errors.some((w) => w.message.includes("lastActivity stayed at startedAt"))).toBe(true);
1711
+
1712
+ // turn.pid must still be cleaned up regardless.
1713
+ expect(existsSync(turnPidPathFor(ctx, "ss-fail"))).toBe(false);
1714
+ });
1715
+
1716
+ // ---------- mid-turn lastActivity refresh (overstory-8e61) ----------
1717
+ //
1718
+ // The watchdog's design (src/watchdog/health.ts:242-243) documents that the
1719
+ // runner advances `session.lastActivity` per parser event during a turn.
1720
+ // Without that, a long-running turn looks stalled to the watchdog and the
1721
+ // agent gets zombified mid-flight. These tests pin the per-event refresh
1722
+ // behavior added inside the parser loop.
1723
+
1724
+ test("mid-turn refresh: lastActivity advances when interval=0 forces per-event refresh", async () => {
1725
+ const startedAt = new Date(Date.now() - 60_000).toISOString();
1726
+ seedSession(ctx.sessionsDbPath, {
1727
+ agentName: "midturn-A",
1728
+ state: "working",
1729
+ startedAt,
1730
+ lastActivity: startedAt,
1731
+ });
1732
+ const { runtime } = makeSpyRuntime();
1733
+ const fake = makeFakeProc();
1734
+ const spawnFn: TurnSpawnFn = () => {
1735
+ emitFakeTurn(fake, { sessionId: "midturn-A-session" });
1736
+ fake._exit(0);
1737
+ return fake;
1738
+ };
1739
+
1740
+ await runTurn({
1741
+ ...makeRunOpts(ctx, "midturn-A", { runtime, _spawnFn: spawnFn }),
1742
+ lastActivityRefreshIntervalMs: 0,
1743
+ });
1744
+
1745
+ const after = readSession(ctx.sessionsDbPath, "midturn-A");
1746
+ expect(after?.lastActivity).not.toBe(startedAt);
1747
+ expect(new Date(after?.lastActivity ?? 0).getTime()).toBeGreaterThan(
1748
+ new Date(startedAt).getTime(),
1749
+ );
1750
+ });
1751
+
1752
+ test("mid-turn refresh: throttle gates updates by simulated time", async () => {
1753
+ seedSession(ctx.sessionsDbPath, { agentName: "midturn-B", state: "working" });
1754
+ const { runtime } = makeSpyRuntime();
1755
+
1756
+ // Controlled sim clock. `_now` is invoked many times during a turn (for
1757
+ // startedAtMs, log timestamps, durationMs) — only the in-loop calls
1758
+ // matter for the throttle. We advance simTime synchronously between
1759
+ // pushes and yield to the parser between each push so the runner reads
1760
+ // the simTime we set just prior. simTime starts well above the throttle
1761
+ // interval so the first event fires (initial lastActivityRefreshMs=0).
1762
+ let simTime = 5000;
1763
+ const _now = (): Date => new Date(simTime);
1764
+
1765
+ let refreshes = 0;
1766
+ const _onLastActivityRefresh = (): void => {
1767
+ refreshes++;
1768
+ };
1769
+
1770
+ const fake = makeFakeProc();
1771
+ const spawnFn: TurnSpawnFn = () => {
1772
+ (async () => {
1773
+ const sessionId = "midturn-B-session";
1774
+ // Use `system` lines because the claude parser does not batch
1775
+ // them — every system line yields exactly one status event,
1776
+ // driving one runner-loop iteration each. Assistant text would
1777
+ // coalesce inside a flush window and defeat the per-event count.
1778
+ const stamps = [5000, 5500, 6000, 6500, 7000, 7500];
1779
+ for (let i = 0; i < stamps.length; i++) {
1780
+ simTime = stamps[i] ?? 0;
1781
+ fake._pushLine(
1782
+ JSON.stringify({
1783
+ type: "system",
1784
+ subtype: i === 0 ? "init" : "progress",
1785
+ session_id: sessionId,
1786
+ }),
1787
+ );
1788
+ // Yield so the for-await loop body runs to completion against
1789
+ // the simTime value we just set.
1790
+ await Bun.sleep(20);
1791
+ }
1792
+ // Trailing result at the same simTime as the last chunk; with a
1793
+ // 1000ms throttle and last refresh at simTime=7000, this event
1794
+ // at simTime=7500 (delta=500) does not fire.
1795
+ fake._pushLine(
1796
+ JSON.stringify({
1797
+ type: "result",
1798
+ subtype: "success",
1799
+ session_id: sessionId,
1800
+ result: "done",
1801
+ is_error: false,
1802
+ duration_ms: 50,
1803
+ num_turns: 1,
1804
+ }),
1805
+ );
1806
+ await Bun.sleep(20);
1807
+ fake._exit(0);
1808
+ })();
1809
+ return fake;
1810
+ };
1811
+
1812
+ await runTurn({
1813
+ ...makeRunOpts(ctx, "midturn-B", { runtime, _spawnFn: spawnFn }),
1814
+ lastActivityRefreshIntervalMs: 1000,
1815
+ _now,
1816
+ _onLastActivityRefresh,
1817
+ });
1818
+
1819
+ // Stamps 5000, 6000, 7000 fire (gap >= 1000). Stamps 5500, 6500, 7500
1820
+ // are throttled (gap = 500). The trailing result event at 7500 also
1821
+ // throttles. Total expected = 3.
1822
+ expect(refreshes).toBe(3);
1823
+ });
1824
+
1825
+ test("mid-turn refresh: parser throw still leaves lastActivity advanced (overstory-8e61)", async () => {
1826
+ // The end-of-turn `updateSessionLastActivity` (around turn-runner.ts:1112)
1827
+ // does NOT fire when the parser iteration throws — the catch path
1828
+ // rethrows before reaching the cleanup write. The mid-turn refresh
1829
+ // covers this gap so a parser-error turn still leaves lastActivity
1830
+ // fresh, mirroring the documented design at src/watchdog/health.ts:242-243.
1831
+ const startedAt = new Date(Date.now() - 60_000).toISOString();
1832
+ seedSession(ctx.sessionsDbPath, {
1833
+ agentName: "midturn-C",
1834
+ state: "working",
1835
+ startedAt,
1836
+ lastActivity: startedAt,
1837
+ });
1838
+
1839
+ // Custom runtime: yield two valid events, then throw on the next read.
1840
+ // Mirrors a malformed stream-json line arriving after some good events.
1841
+ const base = new ClaudeRuntime();
1842
+ let yielded = 0;
1843
+ const yieldThenThrow: AsyncIterable<unknown> = {
1844
+ [Symbol.asyncIterator]() {
1845
+ return {
1846
+ next(): Promise<IteratorResult<unknown>> {
1847
+ if (yielded++ < 2) {
1848
+ return Promise.resolve({
1849
+ value: {
1850
+ type: "assistant_message",
1851
+ timestamp: new Date().toISOString(),
1852
+ },
1853
+ done: false,
1854
+ });
1855
+ }
1856
+ return Promise.reject(new Error("synthetic stream-json parse error"));
1857
+ },
1858
+ };
1859
+ },
1860
+ };
1861
+ const broken: AgentRuntime = {
1862
+ ...base,
1863
+ id: base.id,
1864
+ stability: base.stability,
1865
+ instructionPath: base.instructionPath,
1866
+ buildSpawnCommand: base.buildSpawnCommand.bind(base),
1867
+ buildPrintCommand: base.buildPrintCommand.bind(base),
1868
+ deployConfig: base.deployConfig.bind(base),
1869
+ detectReady: base.detectReady.bind(base),
1870
+ parseTranscript: base.parseTranscript.bind(base),
1871
+ getTranscriptDir: base.getTranscriptDir.bind(base),
1872
+ buildEnv: base.buildEnv.bind(base),
1873
+ buildDirectSpawn: base.buildDirectSpawn.bind(base),
1874
+ parseEvents: (() => yieldThenThrow) as unknown as AgentRuntime["parseEvents"],
1875
+ };
1876
+
1877
+ const fake = makeFakeProc();
1878
+ const spawnFn: TurnSpawnFn = () => fake;
1879
+
1880
+ let refreshes = 0;
1881
+ await expect(
1882
+ runTurn({
1883
+ ...makeRunOpts(ctx, "midturn-C", { runtime: broken, _spawnFn: spawnFn }),
1884
+ lastActivityRefreshIntervalMs: 0,
1885
+ _onLastActivityRefresh: () => {
1886
+ refreshes++;
1887
+ },
1888
+ }),
1889
+ ).rejects.toThrow(/synthetic stream-json/);
1890
+
1891
+ // Mid-turn refresh fired for at least one of the two pre-throw events.
1892
+ expect(refreshes).toBeGreaterThanOrEqual(1);
1893
+
1894
+ // And the persisted lastActivity reflects the mid-turn write — the
1895
+ // end-of-turn write at line ~1112 was skipped by the parser-throw path.
1896
+ const after = readSession(ctx.sessionsDbPath, "midturn-C");
1897
+ expect(after?.lastActivity).not.toBe(startedAt);
1898
+ expect(new Date(after?.lastActivity ?? 0).getTime()).toBeGreaterThan(
1899
+ new Date(startedAt).getTime(),
1900
+ );
1901
+ });
1902
+
1903
+ test("Bash mail-poll detector: warns + records custom event without suppressing tool_use (overstory-c92c)", async () => {
1904
+ // Defense-in-depth: the lead.md prompt forbids Bash mail polling
1905
+ // (overstory-fa84). When a future overlay or contributed agent
1906
+ // reintroduces the pattern, the runner must surface it via the
1907
+ // runner diagnostic sink AND a `mail_poll_detected` event in
1908
+ // events.db, while still recording the original tool_use event
1909
+ // so downstream observability is unaffected.
1910
+ seedSession(ctx.sessionsDbPath, { agentName: "polled", state: "working" });
1911
+ const { runtime } = makeSpyRuntime();
1912
+
1913
+ const fake = makeFakeProc();
1914
+ const sessionId = "polled-session";
1915
+ const pollCommand = "until ov mail list; do sleep 1; done";
1916
+ const spawnFn: TurnSpawnFn = () => {
1917
+ fake._pushLine(
1918
+ JSON.stringify({
1919
+ type: "system",
1920
+ subtype: "init",
1921
+ session_id: sessionId,
1922
+ model: "claude-test",
1923
+ }),
1924
+ );
1925
+ fake._pushLine(
1926
+ JSON.stringify({
1927
+ type: "assistant",
1928
+ session_id: sessionId,
1929
+ message: {
1930
+ role: "assistant",
1931
+ model: "claude-test",
1932
+ content: [
1933
+ {
1934
+ type: "tool_use",
1935
+ id: "toolu_poll_1",
1936
+ name: "Bash",
1937
+ input: { command: pollCommand },
1938
+ },
1939
+ ],
1940
+ },
1941
+ }),
1942
+ );
1943
+ emitFakeTurn(fake, { sessionId });
1944
+ fake._exit(0);
1945
+ return fake;
1946
+ };
1947
+
1948
+ const logs: Array<{ level: string; message: string }> = [];
1949
+ const logger: RunnerLogger = (level, message) => {
1950
+ logs.push({ level, message });
1951
+ };
1952
+
1953
+ const result = await runTurn(
1954
+ makeRunOpts(ctx, "polled", { runtime, _spawnFn: spawnFn, _logWarning: logger }),
1955
+ );
1956
+
1957
+ expect(result.exitCode).toBe(0);
1958
+
1959
+ // Warning was emitted via the runner diagnostic sink (warn level,
1960
+ // message includes "mail-poll").
1961
+ const pollWarn = logs.find((l) => l.level === "warn" && l.message.includes("mail-poll"));
1962
+ expect(pollWarn).toBeDefined();
1963
+
1964
+ const eventStore = createEventStore(ctx.eventsDbPath);
1965
+ try {
1966
+ const events = eventStore.getByAgent("polled");
1967
+
1968
+ // `mail_poll_detected` custom event landed in events.db with the
1969
+ // full (untruncated) command and the matched reason.
1970
+ const detectedEvent = events.find((e) => {
1971
+ if (e.eventType !== "custom" || e.level !== "warn" || !e.data) return false;
1972
+ try {
1973
+ const parsed = JSON.parse(e.data) as { type?: string };
1974
+ return parsed.type === "mail_poll_detected";
1975
+ } catch {
1976
+ return false;
1977
+ }
1978
+ });
1979
+ expect(detectedEvent).toBeDefined();
1980
+ const payload = JSON.parse(detectedEvent?.data ?? "{}") as {
1981
+ type: string;
1982
+ reason: string;
1983
+ command: string;
1984
+ };
1985
+ expect(payload.reason).toBe("until ov mail loop");
1986
+ expect(payload.command).toBe(pollCommand);
1987
+
1988
+ // Regression guard: the original Bash tool_use event MUST still
1989
+ // be recorded — the warning emits IN ADDITION to (not in place
1990
+ // of) the normal recordAgentEvent call.
1991
+ const toolUseEvent = events.find(
1992
+ (e) => e.eventType === "tool_start" && e.toolName === "Bash",
1993
+ );
1994
+ expect(toolUseEvent).toBeDefined();
1995
+ } finally {
1996
+ eventStore.close();
1997
+ }
1998
+ });
1999
+ });
2000
+
2001
+ describe("runTurn scope-violation observability (overstory-9f4d)", () => {
2002
+ let ctx: Ctx;
2003
+
2004
+ beforeEach(async () => {
2005
+ const overstoryDir = await mkdtemp(join(tmpdir(), "overstory-scope-test-"));
2006
+ ctx = {
2007
+ overstoryDir,
2008
+ worktreePath: overstoryDir,
2009
+ projectRoot: overstoryDir,
2010
+ mailDbPath: join(overstoryDir, "mail.db"),
2011
+ eventsDbPath: join(overstoryDir, "events.db"),
2012
+ sessionsDbPath: join(overstoryDir, "sessions.db"),
2013
+ };
2014
+ _resetInProcessLocks();
2015
+ });
2016
+
2017
+ afterEach(async () => {
2018
+ _resetInProcessLocks();
2019
+ await rm(ctx.overstoryDir, { recursive: true, force: true });
2020
+ });
2021
+
2022
+ async function writeOverlayWithScope(scope: string[]): Promise<void> {
2023
+ const dir = join(ctx.worktreePath, ".claude");
2024
+ const { mkdir: mkdirP, writeFile } = await import("node:fs/promises");
2025
+ await mkdirP(dir, { recursive: true });
2026
+ const body = [
2027
+ "## File Scope (exclusive ownership)",
2028
+ "",
2029
+ ...scope.map((p) => `- \`${p}\``),
2030
+ "",
2031
+ "## Expertise",
2032
+ "",
2033
+ "none",
2034
+ ].join("\n");
2035
+ await writeFile(join(dir, "CLAUDE.md"), body);
2036
+ }
2037
+
2038
+ test("builder scope violation without justification emits warn log + scope_violation event", async () => {
2039
+ seedSession(ctx.sessionsDbPath, { agentName: "violator", state: "working" });
2040
+ await writeOverlayWithScope(["src/agents/in-scope.ts"]);
2041
+
2042
+ const { runtime } = makeSpyRuntime();
2043
+ const fake = makeFakeProc();
2044
+ const spawnFn: TurnSpawnFn = () => {
2045
+ (async () => {
2046
+ await Bun.sleep(20);
2047
+ const s = createMailStore(ctx.mailDbPath);
2048
+ try {
2049
+ createMailClient(s).sendProtocol({
2050
+ from: "violator",
2051
+ to: "lead",
2052
+ subject: "Worker done",
2053
+ body: "ok",
2054
+ type: "worker_done",
2055
+ priority: "normal",
2056
+ payload: {
2057
+ taskId: "t",
2058
+ branch: "b",
2059
+ exitCode: 0,
2060
+ filesModified: ["src/other.ts"],
2061
+ },
2062
+ });
2063
+ } finally {
2064
+ s.close();
2065
+ }
2066
+ emitFakeTurn(fake, { sessionId: "violator-session" });
2067
+ fake._exit(0);
2068
+ })();
2069
+ return fake;
2070
+ };
2071
+
2072
+ const logs: Array<{ level: string; message: string }> = [];
2073
+ const logger: RunnerLogger = (level, message) => {
2074
+ logs.push({ level, message });
2075
+ };
2076
+
2077
+ const result = await runTurn({
2078
+ ...makeRunOpts(ctx, "violator", {
2079
+ runtime,
2080
+ _spawnFn: spawnFn,
2081
+ _logWarning: logger,
2082
+ }),
2083
+ _scopeDetect: () => ({
2084
+ violations: ["src/other.ts"],
2085
+ expansionReasons: [],
2086
+ }),
2087
+ });
2088
+
2089
+ expect(result.terminalMailObserved).toBe(true);
2090
+ expect(result.finalState).toBe("completed");
2091
+
2092
+ const warnLog = logs.find(
2093
+ (l) => l.level === "warn" && l.message.includes("outside declared FILE_SCOPE"),
2094
+ );
2095
+ expect(warnLog).toBeDefined();
2096
+ expect(warnLog?.message).toContain("src/other.ts");
2097
+
2098
+ const eventStore = createEventStore(ctx.eventsDbPath);
2099
+ try {
2100
+ const events = eventStore.getByAgent("violator");
2101
+ const violationEvent = events.find((e) => {
2102
+ if (e.eventType !== "custom" || e.level !== "warn" || !e.data) return false;
2103
+ try {
2104
+ const parsed = JSON.parse(e.data) as { type?: string };
2105
+ return parsed.type === "scope_violation";
2106
+ } catch {
2107
+ return false;
2108
+ }
2109
+ });
2110
+ expect(violationEvent).toBeDefined();
2111
+ const payload = JSON.parse(violationEvent?.data ?? "{}") as {
2112
+ type: string;
2113
+ violations: string[];
2114
+ fileScope: string[];
2115
+ };
2116
+ expect(payload.violations).toEqual(["src/other.ts"]);
2117
+ expect(payload.fileScope).toEqual(["src/agents/in-scope.ts"]);
2118
+ } finally {
2119
+ eventStore.close();
2120
+ }
2121
+ });
2122
+
2123
+ test("expansion_reason in commit log suppresses the warning", async () => {
2124
+ seedSession(ctx.sessionsDbPath, { agentName: "justified", state: "working" });
2125
+ await writeOverlayWithScope(["src/agents/in-scope.ts"]);
2126
+
2127
+ const { runtime } = makeSpyRuntime();
2128
+ const fake = makeFakeProc();
2129
+ const spawnFn: TurnSpawnFn = () => {
2130
+ (async () => {
2131
+ await Bun.sleep(20);
2132
+ const s = createMailStore(ctx.mailDbPath);
2133
+ try {
2134
+ createMailClient(s).sendProtocol({
2135
+ from: "justified",
2136
+ to: "lead",
2137
+ subject: "Worker done",
2138
+ body: "ok",
2139
+ type: "worker_done",
2140
+ priority: "normal",
2141
+ payload: {
2142
+ taskId: "t",
2143
+ branch: "b",
2144
+ exitCode: 0,
2145
+ filesModified: ["src/other.ts"],
2146
+ },
2147
+ });
2148
+ } finally {
2149
+ s.close();
2150
+ }
2151
+ emitFakeTurn(fake, { sessionId: "justified-session" });
2152
+ fake._exit(0);
2153
+ })();
2154
+ return fake;
2155
+ };
2156
+
2157
+ const logs: Array<{ level: string; message: string }> = [];
2158
+ const logger: RunnerLogger = (level, message) => {
2159
+ logs.push({ level, message });
2160
+ };
2161
+
2162
+ const result = await runTurn({
2163
+ ...makeRunOpts(ctx, "justified", {
2164
+ runtime,
2165
+ _spawnFn: spawnFn,
2166
+ _logWarning: logger,
2167
+ }),
2168
+ _scopeDetect: () => ({
2169
+ violations: ["src/other.ts"],
2170
+ expansionReasons: ["needed shared type"],
2171
+ }),
2172
+ });
2173
+
2174
+ expect(result.terminalMailObserved).toBe(true);
2175
+
2176
+ const warnLog = logs.find(
2177
+ (l) => l.level === "warn" && l.message.includes("outside declared FILE_SCOPE"),
2178
+ );
2179
+ expect(warnLog).toBeUndefined();
2180
+
2181
+ const eventStore = createEventStore(ctx.eventsDbPath);
2182
+ try {
2183
+ const events = eventStore.getByAgent("justified");
2184
+ const violationEvent = events.find((e) => e.data?.includes("scope_violation") ?? false);
2185
+ expect(violationEvent).toBeUndefined();
2186
+ } finally {
2187
+ eventStore.close();
2188
+ }
2189
+ });
2190
+
2191
+ test("prior scope_expansion mail suppresses the warning", async () => {
2192
+ seedSession(ctx.sessionsDbPath, { agentName: "premail", state: "working" });
2193
+ await writeOverlayWithScope(["src/agents/in-scope.ts"]);
2194
+
2195
+ // Pre-seed: a scope_expansion-prefixed mail from this agent.
2196
+ {
2197
+ const s = createMailStore(ctx.mailDbPath);
2198
+ try {
2199
+ createMailClient(s).send({
2200
+ from: "premail",
2201
+ to: "lead",
2202
+ subject: "scope_expansion: needed shared type",
2203
+ body: "heads up",
2204
+ type: "status",
2205
+ priority: "normal",
2206
+ });
2207
+ } finally {
2208
+ s.close();
2209
+ }
2210
+ }
2211
+
2212
+ const { runtime } = makeSpyRuntime();
2213
+ const fake = makeFakeProc();
2214
+ const spawnFn: TurnSpawnFn = () => {
2215
+ (async () => {
2216
+ await Bun.sleep(20);
2217
+ const s = createMailStore(ctx.mailDbPath);
2218
+ try {
2219
+ createMailClient(s).sendProtocol({
2220
+ from: "premail",
2221
+ to: "lead",
2222
+ subject: "Worker done",
2223
+ body: "ok",
2224
+ type: "worker_done",
2225
+ priority: "normal",
2226
+ payload: {
2227
+ taskId: "t",
2228
+ branch: "b",
2229
+ exitCode: 0,
2230
+ filesModified: ["src/other.ts"],
2231
+ },
2232
+ });
2233
+ } finally {
2234
+ s.close();
2235
+ }
2236
+ emitFakeTurn(fake, { sessionId: "premail-session" });
2237
+ fake._exit(0);
2238
+ })();
2239
+ return fake;
2240
+ };
2241
+
2242
+ const logs: Array<{ level: string; message: string }> = [];
2243
+ const logger: RunnerLogger = (level, message) => {
2244
+ logs.push({ level, message });
2245
+ };
2246
+
2247
+ await runTurn({
2248
+ ...makeRunOpts(ctx, "premail", {
2249
+ runtime,
2250
+ _spawnFn: spawnFn,
2251
+ _logWarning: logger,
2252
+ }),
2253
+ _scopeDetect: () => ({
2254
+ violations: ["src/other.ts"],
2255
+ expansionReasons: [],
2256
+ }),
2257
+ });
2258
+
2259
+ const warnLog = logs.find(
2260
+ (l) => l.level === "warn" && l.message.includes("outside declared FILE_SCOPE"),
2261
+ );
2262
+ expect(warnLog).toBeUndefined();
2263
+ });
2264
+
2265
+ test("scout capability skips scope detection", async () => {
2266
+ seedSession(ctx.sessionsDbPath, {
2267
+ agentName: "scout-x",
2268
+ capability: "scout",
2269
+ state: "working",
2270
+ });
2271
+ await writeOverlayWithScope(["src/agents/in-scope.ts"]);
2272
+
2273
+ const { runtime } = makeSpyRuntime();
2274
+ const fake = makeFakeProc();
2275
+ const spawnFn: TurnSpawnFn = () => {
2276
+ (async () => {
2277
+ await Bun.sleep(20);
2278
+ const s = createMailStore(ctx.mailDbPath);
2279
+ try {
2280
+ createMailClient(s).send({
2281
+ from: "scout-x",
2282
+ to: "lead",
2283
+ subject: "Done",
2284
+ body: "ok",
2285
+ type: "result",
2286
+ priority: "normal",
2287
+ });
2288
+ } finally {
2289
+ s.close();
2290
+ }
2291
+ emitFakeTurn(fake, { sessionId: "scout-x-session" });
2292
+ fake._exit(0);
2293
+ })();
2294
+ return fake;
2295
+ };
2296
+
2297
+ let detectCalled = false;
2298
+ await runTurn({
2299
+ ...makeRunOpts(ctx, "scout-x", {
2300
+ runtime,
2301
+ _spawnFn: spawnFn,
2302
+ capability: "scout",
2303
+ }),
2304
+ _scopeDetect: () => {
2305
+ detectCalled = true;
2306
+ return { violations: [], expansionReasons: [] };
2307
+ },
2308
+ });
2309
+
2310
+ expect(detectCalled).toBe(false);
2311
+ });
2312
+ });