@os-eco/overstory-cli 0.9.3 → 0.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -18
- package/agents/builder.md +9 -8
- package/agents/coordinator.md +6 -6
- package/agents/lead.md +98 -82
- package/agents/merger.md +25 -14
- package/agents/reviewer.md +22 -16
- package/agents/scout.md +17 -12
- package/package.json +6 -3
- package/src/agents/capabilities.test.ts +85 -0
- package/src/agents/capabilities.ts +125 -0
- package/src/agents/headless-mail-injector.test.ts +448 -0
- package/src/agents/headless-mail-injector.ts +211 -0
- package/src/agents/headless-prompt.test.ts +102 -0
- package/src/agents/headless-prompt.ts +68 -0
- package/src/agents/hooks-deployer.test.ts +514 -14
- package/src/agents/hooks-deployer.ts +141 -0
- package/src/agents/overlay.test.ts +4 -4
- package/src/agents/overlay.ts +30 -8
- package/src/agents/turn-lock.test.ts +181 -0
- package/src/agents/turn-lock.ts +235 -0
- package/src/agents/turn-runner-dispatch.test.ts +182 -0
- package/src/agents/turn-runner-dispatch.ts +105 -0
- package/src/agents/turn-runner.test.ts +1450 -0
- package/src/agents/turn-runner.ts +1166 -0
- package/src/commands/clean.ts +56 -1
- package/src/commands/completions.test.ts +4 -1
- package/src/commands/coordinator.test.ts +127 -0
- package/src/commands/coordinator.ts +205 -6
- package/src/commands/dashboard.test.ts +188 -0
- package/src/commands/dashboard.ts +13 -3
- package/src/commands/doctor.ts +94 -77
- package/src/commands/group.test.ts +94 -0
- package/src/commands/group.ts +49 -20
- package/src/commands/init.test.ts +8 -0
- package/src/commands/init.ts +8 -1
- package/src/commands/log.test.ts +56 -11
- package/src/commands/log.ts +134 -69
- package/src/commands/mail.test.ts +162 -0
- package/src/commands/mail.ts +64 -9
- package/src/commands/merge.test.ts +112 -1
- package/src/commands/merge.ts +17 -4
- package/src/commands/monitor.ts +2 -1
- package/src/commands/nudge.test.ts +351 -4
- package/src/commands/nudge.ts +356 -34
- package/src/commands/run.test.ts +43 -7
- package/src/commands/serve/build.test.ts +202 -0
- package/src/commands/serve/build.ts +206 -0
- package/src/commands/serve/coordinator-actions.test.ts +339 -0
- package/src/commands/serve/coordinator-actions.ts +408 -0
- package/src/commands/serve/dev.test.ts +168 -0
- package/src/commands/serve/dev.ts +117 -0
- package/src/commands/serve/mail-actions.test.ts +312 -0
- package/src/commands/serve/mail-actions.ts +167 -0
- package/src/commands/serve/rest.test.ts +1323 -0
- package/src/commands/serve/rest.ts +708 -0
- package/src/commands/serve/static.ts +51 -0
- package/src/commands/serve/ws.test.ts +361 -0
- package/src/commands/serve/ws.ts +332 -0
- package/src/commands/serve.test.ts +459 -0
- package/src/commands/serve.ts +565 -0
- package/src/commands/sling.test.ts +85 -1
- package/src/commands/sling.ts +153 -64
- package/src/commands/status.test.ts +9 -0
- package/src/commands/status.ts +12 -4
- package/src/commands/stop.test.ts +174 -1
- package/src/commands/stop.ts +107 -8
- package/src/commands/supervisor.ts +2 -1
- package/src/commands/watch.test.ts +49 -4
- package/src/commands/watch.ts +153 -28
- package/src/commands/worktree.test.ts +319 -3
- package/src/commands/worktree.ts +86 -0
- package/src/config.test.ts +78 -0
- package/src/config.ts +43 -1
- package/src/doctor/consistency.test.ts +106 -0
- package/src/doctor/consistency.ts +50 -3
- package/src/doctor/serve.test.ts +95 -0
- package/src/doctor/serve.ts +86 -0
- package/src/doctor/types.ts +2 -1
- package/src/doctor/watchdog.ts +57 -1
- package/src/events/tailer.test.ts +234 -1
- package/src/events/tailer.ts +90 -0
- package/src/index.ts +53 -6
- package/src/json.ts +29 -0
- package/src/mail/client.ts +15 -2
- package/src/mail/store.test.ts +82 -0
- package/src/mail/store.ts +41 -4
- package/src/merge/lock.test.ts +149 -0
- package/src/merge/lock.ts +140 -0
- package/src/runtimes/__fixtures__/claude-stream-fixture.ts +22 -0
- package/src/runtimes/claude.test.ts +791 -1
- package/src/runtimes/claude.ts +323 -1
- package/src/runtimes/connections.test.ts +141 -1
- package/src/runtimes/connections.ts +73 -4
- package/src/runtimes/headless-connection.test.ts +264 -0
- package/src/runtimes/headless-connection.ts +158 -0
- package/src/runtimes/types.ts +10 -0
- package/src/schema-consistency.test.ts +1 -0
- package/src/sessions/store.test.ts +390 -24
- package/src/sessions/store.ts +184 -19
- package/src/test-setup.test.ts +31 -0
- package/src/test-setup.ts +28 -0
- package/src/types.ts +56 -1
- package/src/utils/pid.test.ts +85 -1
- package/src/utils/pid.ts +86 -1
- package/src/utils/process-scan.test.ts +53 -0
- package/src/utils/process-scan.ts +76 -0
- package/src/watchdog/daemon.test.ts +1520 -411
- package/src/watchdog/daemon.ts +442 -83
- package/src/watchdog/health.test.ts +157 -0
- package/src/watchdog/health.ts +92 -25
- package/src/worktree/process.test.ts +71 -0
- package/src/worktree/process.ts +25 -5
- package/src/worktree/tmux.test.ts +39 -0
- package/src/worktree/tmux.ts +23 -3
- package/templates/CLAUDE.md.tmpl +19 -8
- package/templates/overlay.md.tmpl +3 -2
|
@@ -0,0 +1,1450 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
2
|
+
import { existsSync } from "node:fs";
|
|
3
|
+
import { mkdtemp, rm } from "node:fs/promises";
|
|
4
|
+
import { tmpdir } from "node:os";
|
|
5
|
+
import { join } from "node:path";
|
|
6
|
+
import { createEventStore } from "../events/store.ts";
|
|
7
|
+
import { createMailClient } from "../mail/client.ts";
|
|
8
|
+
import { createMailStore } from "../mail/store.ts";
|
|
9
|
+
import { ClaudeRuntime } from "../runtimes/claude.ts";
|
|
10
|
+
import type { AgentRuntime, DirectSpawnOpts } from "../runtimes/types.ts";
|
|
11
|
+
import { createSessionStore } from "../sessions/store.ts";
|
|
12
|
+
import type { AgentSession, ResolvedModel } from "../types.ts";
|
|
13
|
+
import { _resetInProcessLocks, readTurnLock } from "./turn-lock.ts";
|
|
14
|
+
import {
|
|
15
|
+
type RunnerLogger,
|
|
16
|
+
runTurn,
|
|
17
|
+
type TurnSpawnFn,
|
|
18
|
+
type TurnSubprocess,
|
|
19
|
+
} from "./turn-runner.ts";
|
|
20
|
+
|
|
21
|
+
// ---------- fake subprocess plumbing ----------
|
|
22
|
+
|
|
23
|
+
interface FakeProc extends TurnSubprocess {
|
|
24
|
+
_writes: string[];
|
|
25
|
+
_killSignals: Array<string | number | undefined>;
|
|
26
|
+
_killed: boolean;
|
|
27
|
+
_pushLine(line: string): void;
|
|
28
|
+
_closeStdout(): void;
|
|
29
|
+
_exit(code: number | null): void;
|
|
30
|
+
_setStderr(stream: ReadableStream<Uint8Array> | null): void;
|
|
31
|
+
stderr?: ReadableStream<Uint8Array> | null;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
let fakeProcCounter = 1000;
|
|
35
|
+
|
|
36
|
+
function makeFakeProc(): FakeProc {
|
|
37
|
+
let stdoutController!: ReadableStreamDefaultController<Uint8Array>;
|
|
38
|
+
const stdout = new ReadableStream<Uint8Array>({
|
|
39
|
+
start(c) {
|
|
40
|
+
stdoutController = c;
|
|
41
|
+
},
|
|
42
|
+
});
|
|
43
|
+
let stdoutClosed = false;
|
|
44
|
+
const closeStdout = (): void => {
|
|
45
|
+
if (stdoutClosed) return;
|
|
46
|
+
stdoutClosed = true;
|
|
47
|
+
try {
|
|
48
|
+
stdoutController.close();
|
|
49
|
+
} catch {
|
|
50
|
+
// already closed
|
|
51
|
+
}
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
const writes: string[] = [];
|
|
55
|
+
|
|
56
|
+
let resolveExited!: (code: number | null) => void;
|
|
57
|
+
const exited = new Promise<number | null>((resolve) => {
|
|
58
|
+
resolveExited = resolve;
|
|
59
|
+
});
|
|
60
|
+
let exitedDone = false;
|
|
61
|
+
const finishExit = (code: number | null): void => {
|
|
62
|
+
if (exitedDone) return;
|
|
63
|
+
exitedDone = true;
|
|
64
|
+
resolveExited(code);
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
const killSignals: Array<string | number | undefined> = [];
|
|
68
|
+
let killed = false;
|
|
69
|
+
|
|
70
|
+
const proc: FakeProc = {
|
|
71
|
+
pid: fakeProcCounter++,
|
|
72
|
+
stdin: {
|
|
73
|
+
write(data: string | Uint8Array): number {
|
|
74
|
+
const s = typeof data === "string" ? data : new TextDecoder().decode(data);
|
|
75
|
+
writes.push(s);
|
|
76
|
+
return s.length;
|
|
77
|
+
},
|
|
78
|
+
end(): void {
|
|
79
|
+
// no-op for fakes; production Bun.spawn closes the pipe.
|
|
80
|
+
},
|
|
81
|
+
},
|
|
82
|
+
stdout,
|
|
83
|
+
exited,
|
|
84
|
+
kill(signal?: string | number): void {
|
|
85
|
+
killSignals.push(signal);
|
|
86
|
+
if (killed) return;
|
|
87
|
+
killed = true;
|
|
88
|
+
closeStdout();
|
|
89
|
+
finishExit(null);
|
|
90
|
+
},
|
|
91
|
+
_writes: writes,
|
|
92
|
+
_killSignals: killSignals,
|
|
93
|
+
_killed: false,
|
|
94
|
+
_pushLine(line: string): void {
|
|
95
|
+
if (stdoutClosed) return;
|
|
96
|
+
stdoutController.enqueue(new TextEncoder().encode(`${line}\n`));
|
|
97
|
+
},
|
|
98
|
+
_closeStdout: closeStdout,
|
|
99
|
+
_exit(code: number | null): void {
|
|
100
|
+
closeStdout();
|
|
101
|
+
finishExit(code);
|
|
102
|
+
},
|
|
103
|
+
_setStderr(stream: ReadableStream<Uint8Array> | null): void {
|
|
104
|
+
proc.stderr = stream;
|
|
105
|
+
},
|
|
106
|
+
stderr: null,
|
|
107
|
+
};
|
|
108
|
+
Object.defineProperty(proc, "_killed", {
|
|
109
|
+
get: () => killed,
|
|
110
|
+
});
|
|
111
|
+
return proc;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function emitFakeTurn(
|
|
115
|
+
proc: FakeProc,
|
|
116
|
+
opts: { sessionId?: string; isError?: boolean; durationMs?: number },
|
|
117
|
+
): void {
|
|
118
|
+
const sessionId = opts.sessionId ?? "session-test";
|
|
119
|
+
proc._pushLine(
|
|
120
|
+
JSON.stringify({
|
|
121
|
+
type: "system",
|
|
122
|
+
subtype: "init",
|
|
123
|
+
session_id: sessionId,
|
|
124
|
+
model: "claude-test",
|
|
125
|
+
}),
|
|
126
|
+
);
|
|
127
|
+
proc._pushLine(
|
|
128
|
+
JSON.stringify({
|
|
129
|
+
type: "result",
|
|
130
|
+
subtype: "success",
|
|
131
|
+
session_id: sessionId,
|
|
132
|
+
result: "done",
|
|
133
|
+
is_error: opts.isError ?? false,
|
|
134
|
+
duration_ms: opts.durationMs ?? 50,
|
|
135
|
+
num_turns: 1,
|
|
136
|
+
}),
|
|
137
|
+
);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// ---------- runtime spy ----------
|
|
141
|
+
|
|
142
|
+
function makeSpyRuntime(): {
|
|
143
|
+
runtime: AgentRuntime;
|
|
144
|
+
spawnCalls: Array<DirectSpawnOpts & { resumeSessionId?: string | null }>;
|
|
145
|
+
} {
|
|
146
|
+
const calls: Array<DirectSpawnOpts & { resumeSessionId?: string | null }> = [];
|
|
147
|
+
const base = new ClaudeRuntime();
|
|
148
|
+
const original = base.buildDirectSpawn.bind(base);
|
|
149
|
+
// Patch the instance to capture each call's opts (including the future
|
|
150
|
+
// resumeSessionId field that turn-runner threads through).
|
|
151
|
+
(base as unknown as { buildDirectSpawn: typeof original }).buildDirectSpawn = (
|
|
152
|
+
opts: DirectSpawnOpts,
|
|
153
|
+
) => {
|
|
154
|
+
calls.push({ ...(opts as DirectSpawnOpts & { resumeSessionId?: string | null }) });
|
|
155
|
+
return original(opts);
|
|
156
|
+
};
|
|
157
|
+
return { runtime: base, spawnCalls: calls };
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// ---------- session bootstrap ----------
|
|
161
|
+
|
|
162
|
+
function seedSession(
|
|
163
|
+
sessionsDbPath: string,
|
|
164
|
+
overrides: Partial<AgentSession> & Pick<AgentSession, "agentName">,
|
|
165
|
+
): void {
|
|
166
|
+
const store = createSessionStore(sessionsDbPath);
|
|
167
|
+
try {
|
|
168
|
+
const now = new Date().toISOString();
|
|
169
|
+
store.upsert({
|
|
170
|
+
id: `session-${overrides.agentName}`,
|
|
171
|
+
agentName: overrides.agentName,
|
|
172
|
+
capability: overrides.capability ?? "builder",
|
|
173
|
+
worktreePath: overrides.worktreePath ?? "/tmp/worktree",
|
|
174
|
+
branchName: overrides.branchName ?? "branch",
|
|
175
|
+
taskId: overrides.taskId ?? "task-test",
|
|
176
|
+
tmuxSession: overrides.tmuxSession ?? "",
|
|
177
|
+
state: overrides.state ?? "booting",
|
|
178
|
+
pid: overrides.pid ?? null,
|
|
179
|
+
parentAgent: overrides.parentAgent ?? null,
|
|
180
|
+
depth: overrides.depth ?? 0,
|
|
181
|
+
runId: overrides.runId ?? null,
|
|
182
|
+
startedAt: overrides.startedAt ?? now,
|
|
183
|
+
lastActivity: overrides.lastActivity ?? now,
|
|
184
|
+
escalationLevel: overrides.escalationLevel ?? 0,
|
|
185
|
+
stalledSince: overrides.stalledSince ?? null,
|
|
186
|
+
transcriptPath: overrides.transcriptPath ?? null,
|
|
187
|
+
...(overrides.promptVersion !== undefined ? { promptVersion: overrides.promptVersion } : {}),
|
|
188
|
+
...(overrides.claudeSessionId !== undefined
|
|
189
|
+
? { claudeSessionId: overrides.claudeSessionId }
|
|
190
|
+
: {}),
|
|
191
|
+
});
|
|
192
|
+
} finally {
|
|
193
|
+
store.close();
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
function readSession(sessionsDbPath: string, agentName: string): AgentSession | null {
|
|
198
|
+
const store = createSessionStore(sessionsDbPath);
|
|
199
|
+
try {
|
|
200
|
+
return store.getByName(agentName);
|
|
201
|
+
} finally {
|
|
202
|
+
store.close();
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// ---------- shared fixture context ----------
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Silent diagnostic sink for tests that don't assert on logs. Suppresses the
|
|
210
|
+
* `[turn-runner:error]` stderr mirror so contract-violation messages
|
|
211
|
+
* (overstory-6071) — which are expected for many tests that drive a clean
|
|
212
|
+
* exit without seeding terminal mail — don't pollute the test runner output.
|
|
213
|
+
*/
|
|
214
|
+
const silentLogger: RunnerLogger = () => {};
|
|
215
|
+
|
|
216
|
+
interface Ctx {
|
|
217
|
+
overstoryDir: string;
|
|
218
|
+
worktreePath: string;
|
|
219
|
+
projectRoot: string;
|
|
220
|
+
mailDbPath: string;
|
|
221
|
+
eventsDbPath: string;
|
|
222
|
+
sessionsDbPath: string;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
const RESOLVED_MODEL: ResolvedModel = { model: "sonnet", env: {}, isExplicitOverride: false };
|
|
226
|
+
|
|
227
|
+
function makeRunOpts(
|
|
228
|
+
ctx: Ctx,
|
|
229
|
+
agentName: string,
|
|
230
|
+
overrides: {
|
|
231
|
+
runtime: AgentRuntime;
|
|
232
|
+
userTurnNdjson?: string;
|
|
233
|
+
_spawnFn?: TurnSpawnFn;
|
|
234
|
+
abortSignal?: AbortSignal;
|
|
235
|
+
sigkillDelayMs?: number;
|
|
236
|
+
runId?: string | null;
|
|
237
|
+
capability?: string;
|
|
238
|
+
_logWarning?: RunnerLogger;
|
|
239
|
+
},
|
|
240
|
+
): Parameters<typeof runTurn>[0] {
|
|
241
|
+
return {
|
|
242
|
+
agentName,
|
|
243
|
+
capability: overrides.capability ?? "builder",
|
|
244
|
+
overstoryDir: ctx.overstoryDir,
|
|
245
|
+
worktreePath: ctx.worktreePath,
|
|
246
|
+
projectRoot: ctx.projectRoot,
|
|
247
|
+
taskId: "task-test",
|
|
248
|
+
userTurnNdjson:
|
|
249
|
+
overrides.userTurnNdjson ??
|
|
250
|
+
`${JSON.stringify({
|
|
251
|
+
type: "user",
|
|
252
|
+
message: { role: "user", content: [{ type: "text", text: "hello" }] },
|
|
253
|
+
})}\n`,
|
|
254
|
+
runtime: overrides.runtime,
|
|
255
|
+
resolvedModel: RESOLVED_MODEL,
|
|
256
|
+
runId: overrides.runId ?? null,
|
|
257
|
+
mailDbPath: ctx.mailDbPath,
|
|
258
|
+
eventsDbPath: ctx.eventsDbPath,
|
|
259
|
+
sessionsDbPath: ctx.sessionsDbPath,
|
|
260
|
+
...(overrides._spawnFn !== undefined ? { _spawnFn: overrides._spawnFn } : {}),
|
|
261
|
+
...(overrides.abortSignal !== undefined ? { abortSignal: overrides.abortSignal } : {}),
|
|
262
|
+
...(overrides.sigkillDelayMs !== undefined ? { sigkillDelayMs: overrides.sigkillDelayMs } : {}),
|
|
263
|
+
_logWarning: overrides._logWarning ?? silentLogger,
|
|
264
|
+
};
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
function turnPidPathFor(ctx: Ctx, agentName: string): string {
|
|
268
|
+
return join(ctx.overstoryDir, "agents", agentName, "turn.pid");
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// ---------- tests ----------
|
|
272
|
+
|
|
273
|
+
describe("runTurn", () => {
|
|
274
|
+
let ctx: Ctx;
|
|
275
|
+
|
|
276
|
+
beforeEach(async () => {
|
|
277
|
+
const overstoryDir = await mkdtemp(join(tmpdir(), "overstory-turnrunner-test-"));
|
|
278
|
+
ctx = {
|
|
279
|
+
overstoryDir,
|
|
280
|
+
worktreePath: overstoryDir, // arbitrary; spawn is faked
|
|
281
|
+
projectRoot: overstoryDir,
|
|
282
|
+
mailDbPath: join(overstoryDir, "mail.db"),
|
|
283
|
+
eventsDbPath: join(overstoryDir, "events.db"),
|
|
284
|
+
sessionsDbPath: join(overstoryDir, "sessions.db"),
|
|
285
|
+
};
|
|
286
|
+
_resetInProcessLocks();
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
afterEach(async () => {
|
|
290
|
+
_resetInProcessLocks();
|
|
291
|
+
await rm(ctx.overstoryDir, { recursive: true, force: true });
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
test("empty userTurnNdjson is a no-op: no spawn, no state transition", async () => {
|
|
295
|
+
seedSession(ctx.sessionsDbPath, { agentName: "noop", state: "working" });
|
|
296
|
+
const { runtime } = makeSpyRuntime();
|
|
297
|
+
let spawnCount = 0;
|
|
298
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
299
|
+
spawnCount++;
|
|
300
|
+
return makeFakeProc();
|
|
301
|
+
};
|
|
302
|
+
|
|
303
|
+
const result = await runTurn(
|
|
304
|
+
makeRunOpts(ctx, "noop", { runtime, userTurnNdjson: "", _spawnFn: spawnFn }),
|
|
305
|
+
);
|
|
306
|
+
|
|
307
|
+
expect(spawnCount).toBe(0);
|
|
308
|
+
expect(result.exitCode).toBeNull();
|
|
309
|
+
expect(result.cleanResult).toBe(false);
|
|
310
|
+
expect(result.terminalMailObserved).toBe(false);
|
|
311
|
+
expect(result.durationMs).toBe(0);
|
|
312
|
+
expect(result.initialState).toBe("working");
|
|
313
|
+
expect(result.finalState).toBe("working");
|
|
314
|
+
|
|
315
|
+
// Session state must remain untouched.
|
|
316
|
+
const after = readSession(ctx.sessionsDbPath, "noop");
|
|
317
|
+
expect(after?.state).toBe("working");
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
test("happy path: spawn, drain events, capture session id, contract violation surfaces as completed", async () => {
|
|
321
|
+
seedSession(ctx.sessionsDbPath, { agentName: "alpha", state: "booting" });
|
|
322
|
+
const { runtime, spawnCalls } = makeSpyRuntime();
|
|
323
|
+
|
|
324
|
+
const fake = makeFakeProc();
|
|
325
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
326
|
+
emitFakeTurn(fake, { sessionId: "claude-sess-A", isError: false });
|
|
327
|
+
fake._exit(0);
|
|
328
|
+
return fake;
|
|
329
|
+
};
|
|
330
|
+
|
|
331
|
+
// Suppress the contract-violation error log (overstory-6071) so it
|
|
332
|
+
// doesn't leak to test stderr; assertions below still cover the case.
|
|
333
|
+
const logger: RunnerLogger = () => {};
|
|
334
|
+
const result = await runTurn(
|
|
335
|
+
makeRunOpts(ctx, "alpha", { runtime, _spawnFn: spawnFn, _logWarning: logger }),
|
|
336
|
+
);
|
|
337
|
+
|
|
338
|
+
expect(result.exitCode).toBe(0);
|
|
339
|
+
expect(result.cleanResult).toBe(true);
|
|
340
|
+
expect(result.newSessionId).toBe("claude-sess-A");
|
|
341
|
+
expect(result.resumeMismatch).toBe(false);
|
|
342
|
+
expect(result.terminalMailObserved).toBe(false);
|
|
343
|
+
// initial=booting, clean exit but no terminal mail → contract violation,
|
|
344
|
+
// settles to `completed` (overstory-6071).
|
|
345
|
+
expect(result.initialState).toBe("booting");
|
|
346
|
+
expect(result.terminalMailMissing).toBe(true);
|
|
347
|
+
expect(result.finalState).toBe("completed");
|
|
348
|
+
|
|
349
|
+
const after = readSession(ctx.sessionsDbPath, "alpha");
|
|
350
|
+
expect(after?.state).toBe("completed");
|
|
351
|
+
expect(after?.claudeSessionId).toBe("claude-sess-A");
|
|
352
|
+
|
|
353
|
+
// resumeSessionId on first turn is null (no prior id stored).
|
|
354
|
+
expect(spawnCalls.length).toBe(1);
|
|
355
|
+
expect(spawnCalls[0]?.resumeSessionId ?? null).toBeNull();
|
|
356
|
+
});
|
|
357
|
+
|
|
358
|
+
test("re-reads claudeSessionId under the lock — caller view may be stale", async () => {
|
|
359
|
+
seedSession(ctx.sessionsDbPath, {
|
|
360
|
+
agentName: "stale",
|
|
361
|
+
state: "working",
|
|
362
|
+
claudeSessionId: "old-id",
|
|
363
|
+
});
|
|
364
|
+
|
|
365
|
+
// External update BEFORE the runTurn call. runTurn must read this value
|
|
366
|
+
// when it acquires the lock, not the older one any caller might be holding.
|
|
367
|
+
const updateStore = createSessionStore(ctx.sessionsDbPath);
|
|
368
|
+
try {
|
|
369
|
+
updateStore.updateClaudeSessionId("stale", "fresh-id");
|
|
370
|
+
} finally {
|
|
371
|
+
updateStore.close();
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
const { runtime, spawnCalls } = makeSpyRuntime();
|
|
375
|
+
const fake = makeFakeProc();
|
|
376
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
377
|
+
emitFakeTurn(fake, { sessionId: "fresh-id" }); // same id back; no mismatch
|
|
378
|
+
fake._exit(0);
|
|
379
|
+
return fake;
|
|
380
|
+
};
|
|
381
|
+
|
|
382
|
+
const result = await runTurn(makeRunOpts(ctx, "stale", { runtime, _spawnFn: spawnFn }));
|
|
383
|
+
|
|
384
|
+
expect(spawnCalls[0]?.resumeSessionId).toBe("fresh-id");
|
|
385
|
+
expect(result.resumeMismatch).toBe(false);
|
|
386
|
+
});
|
|
387
|
+
|
|
388
|
+
test("resumeMismatch fires when stream-json emits a different session id", async () => {
|
|
389
|
+
seedSession(ctx.sessionsDbPath, {
|
|
390
|
+
agentName: "mismatch",
|
|
391
|
+
state: "working",
|
|
392
|
+
claudeSessionId: "want-resume",
|
|
393
|
+
});
|
|
394
|
+
const { runtime } = makeSpyRuntime();
|
|
395
|
+
|
|
396
|
+
const fake = makeFakeProc();
|
|
397
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
398
|
+
emitFakeTurn(fake, { sessionId: "actually-new" });
|
|
399
|
+
fake._exit(0);
|
|
400
|
+
return fake;
|
|
401
|
+
};
|
|
402
|
+
|
|
403
|
+
const result = await runTurn(makeRunOpts(ctx, "mismatch", { runtime, _spawnFn: spawnFn }));
|
|
404
|
+
|
|
405
|
+
expect(result.newSessionId).toBe("actually-new");
|
|
406
|
+
expect(result.resumeMismatch).toBe(true);
|
|
407
|
+
|
|
408
|
+
// SessionStore overwritten with the observed value.
|
|
409
|
+
const after = readSession(ctx.sessionsDbPath, "mismatch");
|
|
410
|
+
expect(after?.claudeSessionId).toBe("actually-new");
|
|
411
|
+
|
|
412
|
+
// overstory-088b C2: a structured warn event lands in events.db so
|
|
413
|
+
// observability mirrors the runner diagnostic. Carries both the requested
|
|
414
|
+
// and observed session ids in the data payload.
|
|
415
|
+
const eventStore = createEventStore(ctx.eventsDbPath);
|
|
416
|
+
try {
|
|
417
|
+
const events = eventStore.getByAgent("mismatch");
|
|
418
|
+
const mismatchEvent = events.find((e) => {
|
|
419
|
+
if (e.eventType !== "custom" || e.level !== "warn" || !e.data) return false;
|
|
420
|
+
try {
|
|
421
|
+
const parsed = JSON.parse(e.data) as { type?: string };
|
|
422
|
+
return parsed.type === "resume_mismatch";
|
|
423
|
+
} catch {
|
|
424
|
+
return false;
|
|
425
|
+
}
|
|
426
|
+
});
|
|
427
|
+
expect(mismatchEvent).toBeDefined();
|
|
428
|
+
const payload = JSON.parse(mismatchEvent?.data ?? "{}") as {
|
|
429
|
+
type: string;
|
|
430
|
+
requestedSessionId: string;
|
|
431
|
+
observedSessionId: string;
|
|
432
|
+
};
|
|
433
|
+
expect(payload.requestedSessionId).toBe("want-resume");
|
|
434
|
+
expect(payload.observedSessionId).toBe("actually-new");
|
|
435
|
+
} finally {
|
|
436
|
+
eventStore.close();
|
|
437
|
+
}
|
|
438
|
+
});
|
|
439
|
+
|
|
440
|
+
test("resume match (sid === priorSessionId) does NOT emit a mismatch event", async () => {
|
|
441
|
+
seedSession(ctx.sessionsDbPath, {
|
|
442
|
+
agentName: "match",
|
|
443
|
+
state: "working",
|
|
444
|
+
claudeSessionId: "same-id",
|
|
445
|
+
});
|
|
446
|
+
const { runtime } = makeSpyRuntime();
|
|
447
|
+
|
|
448
|
+
const fake = makeFakeProc();
|
|
449
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
450
|
+
emitFakeTurn(fake, { sessionId: "same-id" });
|
|
451
|
+
fake._exit(0);
|
|
452
|
+
return fake;
|
|
453
|
+
};
|
|
454
|
+
|
|
455
|
+
const result = await runTurn(makeRunOpts(ctx, "match", { runtime, _spawnFn: spawnFn }));
|
|
456
|
+
expect(result.resumeMismatch).toBe(false);
|
|
457
|
+
|
|
458
|
+
const eventStore = createEventStore(ctx.eventsDbPath);
|
|
459
|
+
try {
|
|
460
|
+
const events = eventStore.getByAgent("match");
|
|
461
|
+
const mismatchEvent = events.find((e) => e.data?.includes("resume_mismatch") ?? false);
|
|
462
|
+
expect(mismatchEvent).toBeUndefined();
|
|
463
|
+
} finally {
|
|
464
|
+
eventStore.close();
|
|
465
|
+
}
|
|
466
|
+
});
|
|
467
|
+
|
|
468
|
+
test("terminalMailObserved + clean exit → completed state", async () => {
|
|
469
|
+
seedSession(ctx.sessionsDbPath, { agentName: "wd", state: "working" });
|
|
470
|
+
const { runtime } = makeSpyRuntime();
|
|
471
|
+
|
|
472
|
+
// Pre-seed: a worker_done from a PRIOR turn (well in the past). Must not
|
|
473
|
+
// confuse this turn's snapshot.
|
|
474
|
+
const mailStore = createMailStore(ctx.mailDbPath);
|
|
475
|
+
try {
|
|
476
|
+
const client = createMailClient(mailStore);
|
|
477
|
+
client.sendProtocol({
|
|
478
|
+
from: "wd",
|
|
479
|
+
to: "lead",
|
|
480
|
+
subject: "Worker done: prior",
|
|
481
|
+
body: "old",
|
|
482
|
+
type: "worker_done",
|
|
483
|
+
priority: "normal",
|
|
484
|
+
payload: {
|
|
485
|
+
taskId: "old",
|
|
486
|
+
branch: "old",
|
|
487
|
+
exitCode: 0,
|
|
488
|
+
filesModified: [],
|
|
489
|
+
},
|
|
490
|
+
});
|
|
491
|
+
} finally {
|
|
492
|
+
mailStore.close();
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
// Simulate fresh worker_done sent during the spawn.
|
|
496
|
+
const fake = makeFakeProc();
|
|
497
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
498
|
+
(async () => {
|
|
499
|
+
// Wait long enough for snapshot timestamp to be < this insert.
|
|
500
|
+
await Bun.sleep(20);
|
|
501
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
502
|
+
try {
|
|
503
|
+
const c = createMailClient(s);
|
|
504
|
+
c.sendProtocol({
|
|
505
|
+
from: "wd",
|
|
506
|
+
to: "lead",
|
|
507
|
+
subject: "Worker done: this turn",
|
|
508
|
+
body: "new",
|
|
509
|
+
type: "worker_done",
|
|
510
|
+
priority: "normal",
|
|
511
|
+
payload: {
|
|
512
|
+
taskId: "this-turn",
|
|
513
|
+
branch: "branch",
|
|
514
|
+
exitCode: 0,
|
|
515
|
+
filesModified: [],
|
|
516
|
+
},
|
|
517
|
+
});
|
|
518
|
+
} finally {
|
|
519
|
+
s.close();
|
|
520
|
+
}
|
|
521
|
+
emitFakeTurn(fake, { sessionId: "wd-session" });
|
|
522
|
+
fake._exit(0);
|
|
523
|
+
})();
|
|
524
|
+
return fake;
|
|
525
|
+
};
|
|
526
|
+
|
|
527
|
+
const result = await runTurn(makeRunOpts(ctx, "wd", { runtime, _spawnFn: spawnFn }));
|
|
528
|
+
|
|
529
|
+
expect(result.terminalMailObserved).toBe(true);
|
|
530
|
+
expect(result.cleanResult).toBe(true);
|
|
531
|
+
expect(result.finalState).toBe("completed");
|
|
532
|
+
|
|
533
|
+
const after = readSession(ctx.sessionsDbPath, "wd");
|
|
534
|
+
expect(after?.state).toBe("completed");
|
|
535
|
+
});
|
|
536
|
+
|
|
537
|
+
test("clean exit but no worker_done → contract violation, completed + error log (overstory-6071)", async () => {
|
|
538
|
+
// Pre-fix: claude exiting cleanly without sending the capability's
|
|
539
|
+
// terminal mail left the session at `working` forever — the process is
|
|
540
|
+
// gone but the row looks alive. Now the runner logs an error and
|
|
541
|
+
// settles to `completed` so operators see something terminal.
|
|
542
|
+
seedSession(ctx.sessionsDbPath, { agentName: "idle", state: "working" });
|
|
543
|
+
const { runtime } = makeSpyRuntime();
|
|
544
|
+
const fake = makeFakeProc();
|
|
545
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
546
|
+
emitFakeTurn(fake, { sessionId: "idle-session", isError: false });
|
|
547
|
+
fake._exit(0);
|
|
548
|
+
return fake;
|
|
549
|
+
};
|
|
550
|
+
|
|
551
|
+
const errors: Array<{ level: string; message: string }> = [];
|
|
552
|
+
const logger: RunnerLogger = (level, message) => {
|
|
553
|
+
errors.push({ level, message });
|
|
554
|
+
};
|
|
555
|
+
|
|
556
|
+
const result = await runTurn(
|
|
557
|
+
makeRunOpts(ctx, "idle", { runtime, _spawnFn: spawnFn, _logWarning: logger }),
|
|
558
|
+
);
|
|
559
|
+
|
|
560
|
+
expect(result.cleanResult).toBe(true);
|
|
561
|
+
expect(result.terminalMailObserved).toBe(false);
|
|
562
|
+
expect(result.terminalMailMissing).toBe(true);
|
|
563
|
+
expect(result.finalState).toBe("completed");
|
|
564
|
+
|
|
565
|
+
// Contract violation must surface via the runner diagnostic sink.
|
|
566
|
+
const violation = errors.find(
|
|
567
|
+
(e) => e.level === "error" && e.message.includes("without sending terminal mail"),
|
|
568
|
+
);
|
|
569
|
+
expect(violation).toBeDefined();
|
|
570
|
+
|
|
571
|
+
const after = readSession(ctx.sessionsDbPath, "idle");
|
|
572
|
+
expect(after?.state).toBe("completed");
|
|
573
|
+
});
|
|
574
|
+
|
|
575
|
+
test("merger: merged mail counts as terminal → completed", async () => {
|
|
576
|
+
seedSession(ctx.sessionsDbPath, {
|
|
577
|
+
agentName: "mg",
|
|
578
|
+
capability: "merger",
|
|
579
|
+
state: "working",
|
|
580
|
+
});
|
|
581
|
+
const { runtime } = makeSpyRuntime();
|
|
582
|
+
|
|
583
|
+
const fake = makeFakeProc();
|
|
584
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
585
|
+
(async () => {
|
|
586
|
+
await Bun.sleep(20);
|
|
587
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
588
|
+
try {
|
|
589
|
+
createMailClient(s).sendProtocol({
|
|
590
|
+
from: "mg",
|
|
591
|
+
to: "lead",
|
|
592
|
+
subject: "Merged: feature/foo",
|
|
593
|
+
body: "ok",
|
|
594
|
+
type: "merged",
|
|
595
|
+
priority: "normal",
|
|
596
|
+
payload: { branch: "feature/foo", taskId: "t-mg", tier: "clean-merge" },
|
|
597
|
+
});
|
|
598
|
+
} finally {
|
|
599
|
+
s.close();
|
|
600
|
+
}
|
|
601
|
+
emitFakeTurn(fake, { sessionId: "mg-session" });
|
|
602
|
+
fake._exit(0);
|
|
603
|
+
})();
|
|
604
|
+
return fake;
|
|
605
|
+
};
|
|
606
|
+
|
|
607
|
+
const result = await runTurn(
|
|
608
|
+
makeRunOpts(ctx, "mg", { runtime, _spawnFn: spawnFn, capability: "merger" }),
|
|
609
|
+
);
|
|
610
|
+
|
|
611
|
+
expect(result.terminalMailObserved).toBe(true);
|
|
612
|
+
expect(result.finalState).toBe("completed");
|
|
613
|
+
});
|
|
614
|
+
|
|
615
|
+
test("merger: merge_failed mail also counts as terminal → completed", async () => {
|
|
616
|
+
seedSession(ctx.sessionsDbPath, {
|
|
617
|
+
agentName: "mgf",
|
|
618
|
+
capability: "merger",
|
|
619
|
+
state: "working",
|
|
620
|
+
});
|
|
621
|
+
const { runtime } = makeSpyRuntime();
|
|
622
|
+
|
|
623
|
+
const fake = makeFakeProc();
|
|
624
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
625
|
+
(async () => {
|
|
626
|
+
await Bun.sleep(20);
|
|
627
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
628
|
+
try {
|
|
629
|
+
createMailClient(s).sendProtocol({
|
|
630
|
+
from: "mgf",
|
|
631
|
+
to: "lead",
|
|
632
|
+
subject: "Merge failed: feature/bar",
|
|
633
|
+
body: "conflict",
|
|
634
|
+
type: "merge_failed",
|
|
635
|
+
priority: "high",
|
|
636
|
+
payload: {
|
|
637
|
+
branch: "feature/bar",
|
|
638
|
+
taskId: "t-mgf",
|
|
639
|
+
conflictFiles: ["src/foo.ts"],
|
|
640
|
+
errorMessage: "conflict",
|
|
641
|
+
},
|
|
642
|
+
});
|
|
643
|
+
} finally {
|
|
644
|
+
s.close();
|
|
645
|
+
}
|
|
646
|
+
emitFakeTurn(fake, { sessionId: "mgf-session" });
|
|
647
|
+
fake._exit(0);
|
|
648
|
+
})();
|
|
649
|
+
return fake;
|
|
650
|
+
};
|
|
651
|
+
|
|
652
|
+
const result = await runTurn(
|
|
653
|
+
makeRunOpts(ctx, "mgf", { runtime, _spawnFn: spawnFn, capability: "merger" }),
|
|
654
|
+
);
|
|
655
|
+
|
|
656
|
+
expect(result.terminalMailObserved).toBe(true);
|
|
657
|
+
expect(result.finalState).toBe("completed");
|
|
658
|
+
});
|
|
659
|
+
|
|
660
|
+
test("scout: --type result mail counts as terminal → completed (overstory-1a4c)", async () => {
|
|
661
|
+
// Regression for overstory-1a4c: workers frequently send `--type result`
|
|
662
|
+
// instead of `--type worker_done` because both are valid mail types and
|
|
663
|
+
// the agent prompts described `result` as a completion signal in some
|
|
664
|
+
// examples. Pre-fix, this left sessions stuck in `working` until the
|
|
665
|
+
// watchdog flipped them to `zombie`. The runner now accepts `result` as
|
|
666
|
+
// a terminal type for builder/scout/reviewer/lead.
|
|
667
|
+
seedSession(ctx.sessionsDbPath, {
|
|
668
|
+
agentName: "scout-result",
|
|
669
|
+
capability: "scout",
|
|
670
|
+
state: "working",
|
|
671
|
+
});
|
|
672
|
+
const { runtime } = makeSpyRuntime();
|
|
673
|
+
|
|
674
|
+
const fake = makeFakeProc();
|
|
675
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
676
|
+
(async () => {
|
|
677
|
+
await Bun.sleep(20);
|
|
678
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
679
|
+
try {
|
|
680
|
+
createMailClient(s).send({
|
|
681
|
+
from: "scout-result",
|
|
682
|
+
to: "coordinator",
|
|
683
|
+
subject: "Spec ready: overstory-4670",
|
|
684
|
+
body: "Spec written.",
|
|
685
|
+
type: "result",
|
|
686
|
+
priority: "normal",
|
|
687
|
+
});
|
|
688
|
+
} finally {
|
|
689
|
+
s.close();
|
|
690
|
+
}
|
|
691
|
+
emitFakeTurn(fake, { sessionId: "scout-result-session" });
|
|
692
|
+
fake._exit(0);
|
|
693
|
+
})();
|
|
694
|
+
return fake;
|
|
695
|
+
};
|
|
696
|
+
|
|
697
|
+
const result = await runTurn(
|
|
698
|
+
makeRunOpts(ctx, "scout-result", { runtime, _spawnFn: spawnFn, capability: "scout" }),
|
|
699
|
+
);
|
|
700
|
+
|
|
701
|
+
expect(result.terminalMailObserved).toBe(true);
|
|
702
|
+
expect(result.cleanResult).toBe(true);
|
|
703
|
+
expect(result.finalState).toBe("completed");
|
|
704
|
+
});
|
|
705
|
+
|
|
706
|
+
test("merger: worker_done is NOT terminal for merger → contract violation, completed", async () => {
|
|
707
|
+
// Mergers must send `merged` or `merge_failed`. A `worker_done` from a
|
|
708
|
+
// merger doesn't count as terminal, so this is the same contract
|
|
709
|
+
// violation as overstory-6071: clean exit, no terminal mail. Pre-fix
|
|
710
|
+
// this stuck at `working`; now it settles to `completed` with a loud
|
|
711
|
+
// error log.
|
|
712
|
+
seedSession(ctx.sessionsDbPath, {
|
|
713
|
+
agentName: "mg-wd",
|
|
714
|
+
capability: "merger",
|
|
715
|
+
state: "working",
|
|
716
|
+
});
|
|
717
|
+
const { runtime } = makeSpyRuntime();
|
|
718
|
+
|
|
719
|
+
const fake = makeFakeProc();
|
|
720
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
721
|
+
(async () => {
|
|
722
|
+
await Bun.sleep(20);
|
|
723
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
724
|
+
try {
|
|
725
|
+
createMailClient(s).sendProtocol({
|
|
726
|
+
from: "mg-wd",
|
|
727
|
+
to: "lead",
|
|
728
|
+
subject: "Worker done (wrong type for merger)",
|
|
729
|
+
body: "x",
|
|
730
|
+
type: "worker_done",
|
|
731
|
+
priority: "normal",
|
|
732
|
+
payload: { taskId: "t", branch: "b", exitCode: 0, filesModified: [] },
|
|
733
|
+
});
|
|
734
|
+
} finally {
|
|
735
|
+
s.close();
|
|
736
|
+
}
|
|
737
|
+
emitFakeTurn(fake, { sessionId: "mg-wd-session" });
|
|
738
|
+
fake._exit(0);
|
|
739
|
+
})();
|
|
740
|
+
return fake;
|
|
741
|
+
};
|
|
742
|
+
|
|
743
|
+
const logger: RunnerLogger = () => {};
|
|
744
|
+
const result = await runTurn(
|
|
745
|
+
makeRunOpts(ctx, "mg-wd", {
|
|
746
|
+
runtime,
|
|
747
|
+
_spawnFn: spawnFn,
|
|
748
|
+
capability: "merger",
|
|
749
|
+
_logWarning: logger,
|
|
750
|
+
}),
|
|
751
|
+
);
|
|
752
|
+
|
|
753
|
+
expect(result.terminalMailObserved).toBe(false);
|
|
754
|
+
expect(result.terminalMailMissing).toBe(true);
|
|
755
|
+
expect(result.finalState).toBe("completed");
|
|
756
|
+
});
|
|
757
|
+
|
|
758
|
+
test("stall watchdog: no parser events for eventStallTimeoutMs → SIGTERM, zombie (overstory-ddb3)", async () => {
|
|
759
|
+
// Pre-fix: a hung claude (alive but stalled — Anthropic API hang,
|
|
760
|
+
// deadlock) would block the parser drain forever because the for-await
|
|
761
|
+
// loop only exits on stdout close. The runner now arms a per-event
|
|
762
|
+
// stall watchdog that resets on every event; on timeout it kills the
|
|
763
|
+
// process via the existing SIGTERM/SIGKILL escalation.
|
|
764
|
+
seedSession(ctx.sessionsDbPath, { agentName: "stalled", state: "working" });
|
|
765
|
+
const { runtime } = makeSpyRuntime();
|
|
766
|
+
|
|
767
|
+
const fake = makeFakeProc();
|
|
768
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
769
|
+
// Emit nothing: simulate claude alive but stalled. The stall
|
|
770
|
+
// watchdog must fire and kill the process.
|
|
771
|
+
return fake;
|
|
772
|
+
};
|
|
773
|
+
|
|
774
|
+
const errors: Array<{ level: string; message: string }> = [];
|
|
775
|
+
const logger: RunnerLogger = (level, message) => {
|
|
776
|
+
errors.push({ level, message });
|
|
777
|
+
};
|
|
778
|
+
|
|
779
|
+
const result = await runTurn({
|
|
780
|
+
...makeRunOpts(ctx, "stalled", {
|
|
781
|
+
runtime,
|
|
782
|
+
_spawnFn: spawnFn,
|
|
783
|
+
_logWarning: logger,
|
|
784
|
+
}),
|
|
785
|
+
eventStallTimeoutMs: 50,
|
|
786
|
+
sigkillDelayMs: 25,
|
|
787
|
+
});
|
|
788
|
+
|
|
789
|
+
expect(fake._killSignals[0]).toBe("SIGTERM");
|
|
790
|
+
expect(result.stallAborted).toBe(true);
|
|
791
|
+
expect(result.exitCode).toBeNull();
|
|
792
|
+
expect(result.finalState).toBe("zombie");
|
|
793
|
+
|
|
794
|
+
const stallLog = errors.find(
|
|
795
|
+
(e) => e.level === "error" && e.message.includes("parser stalled"),
|
|
796
|
+
);
|
|
797
|
+
expect(stallLog).toBeDefined();
|
|
798
|
+
|
|
799
|
+
const after = readSession(ctx.sessionsDbPath, "stalled");
|
|
800
|
+
expect(after?.state).toBe("zombie");
|
|
801
|
+
});
|
|
802
|
+
|
|
803
|
+
test("stall watchdog: events reset the timer — live turns are not killed (overstory-ddb3)", async () => {
|
|
804
|
+
// Per-event reset: a turn whose events keep arriving must not be
|
|
805
|
+
// aborted by the stall watchdog. We give a generous 500ms stall
|
|
806
|
+
// budget and emit several events each separated by ~50ms; the
|
|
807
|
+
// cumulative runtime exceeds the budget, but no inter-event gap
|
|
808
|
+
// does, so a properly resetting timer never fires.
|
|
809
|
+
seedSession(ctx.sessionsDbPath, { agentName: "live", state: "working" });
|
|
810
|
+
const { runtime } = makeSpyRuntime();
|
|
811
|
+
|
|
812
|
+
const fake = makeFakeProc();
|
|
813
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
814
|
+
(async () => {
|
|
815
|
+
const sessionId = "live-session";
|
|
816
|
+
fake._pushLine(
|
|
817
|
+
JSON.stringify({
|
|
818
|
+
type: "system",
|
|
819
|
+
subtype: "init",
|
|
820
|
+
session_id: sessionId,
|
|
821
|
+
model: "claude-test",
|
|
822
|
+
}),
|
|
823
|
+
);
|
|
824
|
+
for (let i = 0; i < 6; i++) {
|
|
825
|
+
await Bun.sleep(50);
|
|
826
|
+
fake._pushLine(
|
|
827
|
+
JSON.stringify({
|
|
828
|
+
type: "assistant",
|
|
829
|
+
message: {
|
|
830
|
+
role: "assistant",
|
|
831
|
+
content: [{ type: "text", text: `chunk ${i}` }],
|
|
832
|
+
},
|
|
833
|
+
session_id: sessionId,
|
|
834
|
+
}),
|
|
835
|
+
);
|
|
836
|
+
}
|
|
837
|
+
emitFakeTurn(fake, { sessionId });
|
|
838
|
+
fake._exit(0);
|
|
839
|
+
})();
|
|
840
|
+
return fake;
|
|
841
|
+
};
|
|
842
|
+
|
|
843
|
+
const logger: RunnerLogger = () => {};
|
|
844
|
+
const result = await runTurn({
|
|
845
|
+
...makeRunOpts(ctx, "live", {
|
|
846
|
+
runtime,
|
|
847
|
+
_spawnFn: spawnFn,
|
|
848
|
+
_logWarning: logger,
|
|
849
|
+
}),
|
|
850
|
+
eventStallTimeoutMs: 500,
|
|
851
|
+
sigkillDelayMs: 25,
|
|
852
|
+
});
|
|
853
|
+
|
|
854
|
+
expect(result.stallAborted).toBe(false);
|
|
855
|
+
expect(result.exitCode).toBe(0);
|
|
856
|
+
expect(result.cleanResult).toBe(true);
|
|
857
|
+
// Sanity: turn ran longer than the stall budget would allow if the
|
|
858
|
+
// timer didn't reset on each event (6 × 50ms = 300ms minimum).
|
|
859
|
+
expect(result.durationMs).toBeGreaterThanOrEqual(250);
|
|
860
|
+
});
|
|
861
|
+
|
|
862
|
+
test("abortSignal triggers SIGTERM, finalState becomes zombie", async () => {
|
|
863
|
+
seedSession(ctx.sessionsDbPath, { agentName: "to-kill", state: "working" });
|
|
864
|
+
const { runtime } = makeSpyRuntime();
|
|
865
|
+
|
|
866
|
+
const fake = makeFakeProc();
|
|
867
|
+
const ac = new AbortController();
|
|
868
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
869
|
+
// Emit init but never close — the abort path is what ends this turn.
|
|
870
|
+
fake._pushLine(
|
|
871
|
+
JSON.stringify({
|
|
872
|
+
type: "system",
|
|
873
|
+
subtype: "init",
|
|
874
|
+
session_id: "abort-test",
|
|
875
|
+
}),
|
|
876
|
+
);
|
|
877
|
+
return fake;
|
|
878
|
+
};
|
|
879
|
+
|
|
880
|
+
const runPromise = runTurn(
|
|
881
|
+
makeRunOpts(ctx, "to-kill", {
|
|
882
|
+
runtime,
|
|
883
|
+
_spawnFn: spawnFn,
|
|
884
|
+
abortSignal: ac.signal,
|
|
885
|
+
sigkillDelayMs: 25,
|
|
886
|
+
}),
|
|
887
|
+
);
|
|
888
|
+
|
|
889
|
+
// Give the parser a chance to consume the init event.
|
|
890
|
+
await Bun.sleep(60);
|
|
891
|
+
ac.abort();
|
|
892
|
+
const result = await runPromise;
|
|
893
|
+
|
|
894
|
+
expect(fake._killSignals[0]).toBe("SIGTERM");
|
|
895
|
+
expect(result.exitCode).toBeNull();
|
|
896
|
+
expect(result.finalState).toBe("zombie");
|
|
897
|
+
|
|
898
|
+
const after = readSession(ctx.sessionsDbPath, "to-kill");
|
|
899
|
+
expect(after?.state).toBe("zombie");
|
|
900
|
+
});
|
|
901
|
+
|
|
902
|
+
// --- Parent-notify paths (overstory-4159, overstory-c772) ---
|
|
903
|
+
//
|
|
904
|
+
// When a turn ends without the capability's terminal mail, the runner emits
|
|
905
|
+
// a synthetic worker_died mail to the parent so the lead does not block on
|
|
906
|
+
// a signal that will never arrive. Three trigger paths:
|
|
907
|
+
// 1. abort (operator or external abortSignal) → finalState=zombie
|
|
908
|
+
// 2. parser stall → finalState=zombie
|
|
909
|
+
// 3. clean exit without terminal mail (terminalMailMissing) → completed
|
|
910
|
+
|
|
911
|
+
test("abort path: emits worker_died to parent with terminatedBy='runner' (overstory-c772)", async () => {
|
|
912
|
+
seedSession(ctx.sessionsDbPath, {
|
|
913
|
+
agentName: "child-abort",
|
|
914
|
+
state: "working",
|
|
915
|
+
parentAgent: "lead-x",
|
|
916
|
+
taskId: "task-c772",
|
|
917
|
+
});
|
|
918
|
+
const { runtime } = makeSpyRuntime();
|
|
919
|
+
const fake = makeFakeProc();
|
|
920
|
+
const ac = new AbortController();
|
|
921
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
922
|
+
fake._pushLine(JSON.stringify({ type: "system", subtype: "init", session_id: "abort-mail" }));
|
|
923
|
+
return fake;
|
|
924
|
+
};
|
|
925
|
+
|
|
926
|
+
const sharedMail = createMailStore(ctx.mailDbPath);
|
|
927
|
+
try {
|
|
928
|
+
const runPromise = runTurn({
|
|
929
|
+
...makeRunOpts(ctx, "child-abort", {
|
|
930
|
+
runtime,
|
|
931
|
+
_spawnFn: spawnFn,
|
|
932
|
+
abortSignal: ac.signal,
|
|
933
|
+
sigkillDelayMs: 25,
|
|
934
|
+
}),
|
|
935
|
+
_mailStore: sharedMail,
|
|
936
|
+
});
|
|
937
|
+
await Bun.sleep(60);
|
|
938
|
+
ac.abort();
|
|
939
|
+
const result = await runPromise;
|
|
940
|
+
expect(result.finalState).toBe("zombie");
|
|
941
|
+
|
|
942
|
+
const inbox = sharedMail.getAll({ to: "lead-x", type: "worker_died" });
|
|
943
|
+
expect(inbox.length).toBe(1);
|
|
944
|
+
const msg = inbox[0];
|
|
945
|
+
expect(msg?.from).toBe("child-abort");
|
|
946
|
+
expect(msg?.priority).toBe("high");
|
|
947
|
+
expect(msg?.subject).toContain("worker_died");
|
|
948
|
+
expect(msg?.subject).toContain("child-abort");
|
|
949
|
+
const payload = JSON.parse(msg?.payload ?? "{}") as {
|
|
950
|
+
terminatedBy?: string;
|
|
951
|
+
reason?: string;
|
|
952
|
+
agentName?: string;
|
|
953
|
+
taskId?: string;
|
|
954
|
+
capability?: string;
|
|
955
|
+
};
|
|
956
|
+
expect(payload.terminatedBy).toBe("runner");
|
|
957
|
+
expect(payload.agentName).toBe("child-abort");
|
|
958
|
+
// taskId in the mail mirrors the runner's opts.taskId for this turn;
|
|
959
|
+
// the test rig's makeRunOpts seeds this as "task-test".
|
|
960
|
+
expect(payload.taskId).toBe("task-test");
|
|
961
|
+
expect(payload.capability).toBe("builder");
|
|
962
|
+
expect(payload.reason).toContain("Aborted");
|
|
963
|
+
} finally {
|
|
964
|
+
sharedMail.close();
|
|
965
|
+
}
|
|
966
|
+
});
|
|
967
|
+
|
|
968
|
+
test("stall path: emits worker_died to parent (overstory-c772)", async () => {
|
|
969
|
+
seedSession(ctx.sessionsDbPath, {
|
|
970
|
+
agentName: "child-stall",
|
|
971
|
+
state: "working",
|
|
972
|
+
parentAgent: "lead-y",
|
|
973
|
+
taskId: "task-c772-b",
|
|
974
|
+
});
|
|
975
|
+
const { runtime } = makeSpyRuntime();
|
|
976
|
+
const fake = makeFakeProc();
|
|
977
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
978
|
+
// Emit nothing — stall watchdog must fire and abort.
|
|
979
|
+
return fake;
|
|
980
|
+
};
|
|
981
|
+
|
|
982
|
+
const sharedMail = createMailStore(ctx.mailDbPath);
|
|
983
|
+
try {
|
|
984
|
+
const result = await runTurn({
|
|
985
|
+
...makeRunOpts(ctx, "child-stall", {
|
|
986
|
+
runtime,
|
|
987
|
+
_spawnFn: spawnFn,
|
|
988
|
+
}),
|
|
989
|
+
_mailStore: sharedMail,
|
|
990
|
+
eventStallTimeoutMs: 50,
|
|
991
|
+
sigkillDelayMs: 25,
|
|
992
|
+
});
|
|
993
|
+
expect(result.stallAborted).toBe(true);
|
|
994
|
+
expect(result.finalState).toBe("zombie");
|
|
995
|
+
|
|
996
|
+
const inbox = sharedMail.getAll({ to: "lead-y", type: "worker_died" });
|
|
997
|
+
expect(inbox.length).toBe(1);
|
|
998
|
+
const payload = JSON.parse(inbox[0]?.payload ?? "{}") as {
|
|
999
|
+
terminatedBy?: string;
|
|
1000
|
+
reason?: string;
|
|
1001
|
+
};
|
|
1002
|
+
expect(payload.terminatedBy).toBe("runner");
|
|
1003
|
+
expect(payload.reason).toContain("stalled");
|
|
1004
|
+
} finally {
|
|
1005
|
+
sharedMail.close();
|
|
1006
|
+
}
|
|
1007
|
+
});
|
|
1008
|
+
|
|
1009
|
+
test("terminalMailMissing: emits worker_died to parent (overstory-4159)", async () => {
|
|
1010
|
+
// Silent-no-op: claude exits cleanly but never sends worker_done. The
|
|
1011
|
+
// lead would otherwise block forever waiting for a terminal mail.
|
|
1012
|
+
seedSession(ctx.sessionsDbPath, {
|
|
1013
|
+
agentName: "child-noop",
|
|
1014
|
+
state: "working",
|
|
1015
|
+
parentAgent: "lead-z",
|
|
1016
|
+
taskId: "task-4159",
|
|
1017
|
+
});
|
|
1018
|
+
const { runtime } = makeSpyRuntime();
|
|
1019
|
+
const fake = makeFakeProc();
|
|
1020
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1021
|
+
emitFakeTurn(fake, { sessionId: "noop-session", isError: false });
|
|
1022
|
+
fake._exit(0);
|
|
1023
|
+
return fake;
|
|
1024
|
+
};
|
|
1025
|
+
|
|
1026
|
+
const sharedMail = createMailStore(ctx.mailDbPath);
|
|
1027
|
+
try {
|
|
1028
|
+
const result = await runTurn({
|
|
1029
|
+
...makeRunOpts(ctx, "child-noop", {
|
|
1030
|
+
runtime,
|
|
1031
|
+
_spawnFn: spawnFn,
|
|
1032
|
+
}),
|
|
1033
|
+
_mailStore: sharedMail,
|
|
1034
|
+
});
|
|
1035
|
+
expect(result.cleanResult).toBe(true);
|
|
1036
|
+
expect(result.terminalMailMissing).toBe(true);
|
|
1037
|
+
expect(result.finalState).toBe("completed");
|
|
1038
|
+
|
|
1039
|
+
const inbox = sharedMail.getAll({ to: "lead-z", type: "worker_died" });
|
|
1040
|
+
expect(inbox.length).toBe(1);
|
|
1041
|
+
const payload = JSON.parse(inbox[0]?.payload ?? "{}") as {
|
|
1042
|
+
terminatedBy?: string;
|
|
1043
|
+
reason?: string;
|
|
1044
|
+
agentName?: string;
|
|
1045
|
+
};
|
|
1046
|
+
expect(payload.terminatedBy).toBe("runner");
|
|
1047
|
+
expect(payload.agentName).toBe("child-noop");
|
|
1048
|
+
expect(payload.reason).toContain("Clean exit without terminal mail");
|
|
1049
|
+
} finally {
|
|
1050
|
+
sharedMail.close();
|
|
1051
|
+
}
|
|
1052
|
+
});
|
|
1053
|
+
|
|
1054
|
+
test("no parentAgent: skips worker_died mail (orchestrator-spawned worker)", async () => {
|
|
1055
|
+
// Orchestrator-spawned workers have parentAgent=null; there is nobody to
|
|
1056
|
+
// notify. The runner must not fabricate a recipient.
|
|
1057
|
+
seedSession(ctx.sessionsDbPath, {
|
|
1058
|
+
agentName: "orphan-noop",
|
|
1059
|
+
state: "working",
|
|
1060
|
+
parentAgent: null,
|
|
1061
|
+
taskId: "task-orphan",
|
|
1062
|
+
});
|
|
1063
|
+
const { runtime } = makeSpyRuntime();
|
|
1064
|
+
const fake = makeFakeProc();
|
|
1065
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1066
|
+
emitFakeTurn(fake, { sessionId: "orphan-session" });
|
|
1067
|
+
fake._exit(0);
|
|
1068
|
+
return fake;
|
|
1069
|
+
};
|
|
1070
|
+
|
|
1071
|
+
const sharedMail = createMailStore(ctx.mailDbPath);
|
|
1072
|
+
try {
|
|
1073
|
+
const result = await runTurn({
|
|
1074
|
+
...makeRunOpts(ctx, "orphan-noop", { runtime, _spawnFn: spawnFn }),
|
|
1075
|
+
_mailStore: sharedMail,
|
|
1076
|
+
});
|
|
1077
|
+
expect(result.terminalMailMissing).toBe(true);
|
|
1078
|
+
const all = sharedMail.getAll({ type: "worker_died" });
|
|
1079
|
+
expect(all.length).toBe(0);
|
|
1080
|
+
} finally {
|
|
1081
|
+
sharedMail.close();
|
|
1082
|
+
}
|
|
1083
|
+
});
|
|
1084
|
+
|
|
1085
|
+
test("happy path: terminal mail observed → no worker_died emitted (no double-signal)", async () => {
|
|
1086
|
+
seedSession(ctx.sessionsDbPath, {
|
|
1087
|
+
agentName: "child-ok",
|
|
1088
|
+
state: "working",
|
|
1089
|
+
parentAgent: "lead-ok",
|
|
1090
|
+
taskId: "task-happy",
|
|
1091
|
+
});
|
|
1092
|
+
const { runtime } = makeSpyRuntime();
|
|
1093
|
+
const fake = makeFakeProc();
|
|
1094
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1095
|
+
(async () => {
|
|
1096
|
+
await Bun.sleep(15);
|
|
1097
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
1098
|
+
try {
|
|
1099
|
+
createMailClient(s).sendProtocol({
|
|
1100
|
+
from: "child-ok",
|
|
1101
|
+
to: "lead-ok",
|
|
1102
|
+
subject: "Worker done",
|
|
1103
|
+
body: "ok",
|
|
1104
|
+
type: "worker_done",
|
|
1105
|
+
priority: "normal",
|
|
1106
|
+
payload: {
|
|
1107
|
+
taskId: "task-happy",
|
|
1108
|
+
branch: "branch",
|
|
1109
|
+
exitCode: 0,
|
|
1110
|
+
filesModified: [],
|
|
1111
|
+
},
|
|
1112
|
+
});
|
|
1113
|
+
} finally {
|
|
1114
|
+
s.close();
|
|
1115
|
+
}
|
|
1116
|
+
emitFakeTurn(fake, { sessionId: "ok-session" });
|
|
1117
|
+
fake._exit(0);
|
|
1118
|
+
})();
|
|
1119
|
+
return fake;
|
|
1120
|
+
};
|
|
1121
|
+
|
|
1122
|
+
const sharedMail = createMailStore(ctx.mailDbPath);
|
|
1123
|
+
try {
|
|
1124
|
+
const result = await runTurn({
|
|
1125
|
+
...makeRunOpts(ctx, "child-ok", { runtime, _spawnFn: spawnFn }),
|
|
1126
|
+
_mailStore: sharedMail,
|
|
1127
|
+
});
|
|
1128
|
+
expect(result.terminalMailObserved).toBe(true);
|
|
1129
|
+
expect(result.terminalMailMissing).toBe(false);
|
|
1130
|
+
expect(result.finalState).toBe("completed");
|
|
1131
|
+
|
|
1132
|
+
// Inbox should have the agent's own worker_done, but NO worker_died.
|
|
1133
|
+
const died = sharedMail.getAll({ to: "lead-ok", type: "worker_died" });
|
|
1134
|
+
expect(died.length).toBe(0);
|
|
1135
|
+
} finally {
|
|
1136
|
+
sharedMail.close();
|
|
1137
|
+
}
|
|
1138
|
+
});
|
|
1139
|
+
|
|
1140
|
+
test("two concurrent runTurn calls for the same agent serialize", async () => {
|
|
1141
|
+
seedSession(ctx.sessionsDbPath, { agentName: "serial", state: "working" });
|
|
1142
|
+
const { runtime } = makeSpyRuntime();
|
|
1143
|
+
|
|
1144
|
+
const windows: Array<{ id: number; phase: "start" | "end"; ts: number }> = [];
|
|
1145
|
+
let spawnId = 0;
|
|
1146
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1147
|
+
const id = ++spawnId;
|
|
1148
|
+
windows.push({ id, phase: "start", ts: Date.now() });
|
|
1149
|
+
const fake = makeFakeProc();
|
|
1150
|
+
(async () => {
|
|
1151
|
+
// Hold the spawn open briefly to widen the overlap window.
|
|
1152
|
+
await Bun.sleep(80);
|
|
1153
|
+
emitFakeTurn(fake, { sessionId: `s-${id}` });
|
|
1154
|
+
fake._exit(0);
|
|
1155
|
+
windows.push({ id, phase: "end", ts: Date.now() });
|
|
1156
|
+
})();
|
|
1157
|
+
return fake;
|
|
1158
|
+
};
|
|
1159
|
+
|
|
1160
|
+
const a = runTurn(makeRunOpts(ctx, "serial", { runtime, _spawnFn: spawnFn }));
|
|
1161
|
+
const b = runTurn(makeRunOpts(ctx, "serial", { runtime, _spawnFn: spawnFn }));
|
|
1162
|
+
await Promise.all([a, b]);
|
|
1163
|
+
|
|
1164
|
+
// Sort by timestamp; verify the second start follows the first end.
|
|
1165
|
+
const ordered = [...windows].sort((x, y) => x.ts - y.ts);
|
|
1166
|
+
expect(ordered.length).toBe(4);
|
|
1167
|
+
expect(ordered[0]?.phase).toBe("start");
|
|
1168
|
+
expect(ordered[1]?.phase).toBe("end");
|
|
1169
|
+
expect(ordered[1]?.id).toBe(ordered[0]?.id);
|
|
1170
|
+
expect(ordered[2]?.phase).toBe("start");
|
|
1171
|
+
expect(ordered[2]?.id).not.toBe(ordered[0]?.id);
|
|
1172
|
+
});
|
|
1173
|
+
|
|
1174
|
+
test("spawn throws — lock is released and error propagates", async () => {
|
|
1175
|
+
seedSession(ctx.sessionsDbPath, { agentName: "fails", state: "booting" });
|
|
1176
|
+
const { runtime } = makeSpyRuntime();
|
|
1177
|
+
const failingSpawn: TurnSpawnFn = () => {
|
|
1178
|
+
throw new Error("ENOENT: claude binary missing");
|
|
1179
|
+
};
|
|
1180
|
+
|
|
1181
|
+
await expect(
|
|
1182
|
+
runTurn(makeRunOpts(ctx, "fails", { runtime, _spawnFn: failingSpawn })),
|
|
1183
|
+
).rejects.toThrow(/binary missing/);
|
|
1184
|
+
|
|
1185
|
+
// Cross-process lock state must be cleared so a follow-up turn can run.
|
|
1186
|
+
const state = readTurnLock(ctx.overstoryDir, "fails");
|
|
1187
|
+
expect(state.heldByPid).toBeNull();
|
|
1188
|
+
|
|
1189
|
+
// Session state must NOT have transitioned (no events were observed).
|
|
1190
|
+
const after = readSession(ctx.sessionsDbPath, "fails");
|
|
1191
|
+
expect(after?.state).toBe("booting");
|
|
1192
|
+
});
|
|
1193
|
+
|
|
1194
|
+
test("subsequent turn passes the prior session id to runtime.buildDirectSpawn", async () => {
|
|
1195
|
+
seedSession(ctx.sessionsDbPath, { agentName: "two-turns", state: "working" });
|
|
1196
|
+
const { runtime, spawnCalls } = makeSpyRuntime();
|
|
1197
|
+
|
|
1198
|
+
// Turn 1: claude assigns session id "sid-1".
|
|
1199
|
+
const t1Fake = makeFakeProc();
|
|
1200
|
+
const t1Spawn: TurnSpawnFn = () => {
|
|
1201
|
+
emitFakeTurn(t1Fake, { sessionId: "sid-1" });
|
|
1202
|
+
t1Fake._exit(0);
|
|
1203
|
+
return t1Fake;
|
|
1204
|
+
};
|
|
1205
|
+
await runTurn(makeRunOpts(ctx, "two-turns", { runtime, _spawnFn: t1Spawn }));
|
|
1206
|
+
|
|
1207
|
+
// Turn 2: must read sid-1 back from SessionStore and pass it as resumeSessionId.
|
|
1208
|
+
const t2Fake = makeFakeProc();
|
|
1209
|
+
const t2Spawn: TurnSpawnFn = () => {
|
|
1210
|
+
emitFakeTurn(t2Fake, { sessionId: "sid-1" });
|
|
1211
|
+
t2Fake._exit(0);
|
|
1212
|
+
return t2Fake;
|
|
1213
|
+
};
|
|
1214
|
+
await runTurn(makeRunOpts(ctx, "two-turns", { runtime, _spawnFn: t2Spawn }));
|
|
1215
|
+
|
|
1216
|
+
expect(spawnCalls.length).toBe(2);
|
|
1217
|
+
expect(spawnCalls[0]?.resumeSessionId ?? null).toBeNull();
|
|
1218
|
+
expect(spawnCalls[1]?.resumeSessionId).toBe("sid-1");
|
|
1219
|
+
});
|
|
1220
|
+
|
|
1221
|
+
test("user turn payload is written to spawned stdin", async () => {
|
|
1222
|
+
seedSession(ctx.sessionsDbPath, { agentName: "stdin-test", state: "working" });
|
|
1223
|
+
const { runtime } = makeSpyRuntime();
|
|
1224
|
+
|
|
1225
|
+
const payload = `${JSON.stringify({
|
|
1226
|
+
type: "user",
|
|
1227
|
+
message: { role: "user", content: [{ type: "text", text: "ping" }] },
|
|
1228
|
+
})}\n`;
|
|
1229
|
+
|
|
1230
|
+
const fake = makeFakeProc();
|
|
1231
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1232
|
+
emitFakeTurn(fake, { sessionId: "stdin-sess" });
|
|
1233
|
+
fake._exit(0);
|
|
1234
|
+
return fake;
|
|
1235
|
+
};
|
|
1236
|
+
|
|
1237
|
+
await runTurn(
|
|
1238
|
+
makeRunOpts(ctx, "stdin-test", {
|
|
1239
|
+
runtime,
|
|
1240
|
+
_spawnFn: spawnFn,
|
|
1241
|
+
userTurnNdjson: payload,
|
|
1242
|
+
}),
|
|
1243
|
+
);
|
|
1244
|
+
|
|
1245
|
+
expect(fake._writes.length).toBe(1);
|
|
1246
|
+
expect(fake._writes[0]).toBe(payload);
|
|
1247
|
+
});
|
|
1248
|
+
|
|
1249
|
+
test("does not spawn when the runtime lacks buildDirectSpawn", async () => {
|
|
1250
|
+
seedSession(ctx.sessionsDbPath, { agentName: "no-build", state: "booting" });
|
|
1251
|
+
const incomplete: AgentRuntime = {
|
|
1252
|
+
id: "incomplete",
|
|
1253
|
+
stability: "experimental",
|
|
1254
|
+
instructionPath: "AGENTS.md",
|
|
1255
|
+
buildSpawnCommand: () => "",
|
|
1256
|
+
buildPrintCommand: () => [],
|
|
1257
|
+
deployConfig: async () => {},
|
|
1258
|
+
detectReady: () => ({ phase: "ready" }),
|
|
1259
|
+
parseTranscript: async () => null,
|
|
1260
|
+
getTranscriptDir: () => null,
|
|
1261
|
+
buildEnv: () => ({}),
|
|
1262
|
+
// buildDirectSpawn intentionally omitted
|
|
1263
|
+
parseEvents: async function* () {
|
|
1264
|
+
yield* [];
|
|
1265
|
+
},
|
|
1266
|
+
};
|
|
1267
|
+
|
|
1268
|
+
await expect(runTurn(makeRunOpts(ctx, "no-build", { runtime: incomplete }))).rejects.toThrow(
|
|
1269
|
+
/buildDirectSpawn/,
|
|
1270
|
+
);
|
|
1271
|
+
});
|
|
1272
|
+
|
|
1273
|
+
// ---------- cleanup-invariant tests (overstory-4af3) ----------
|
|
1274
|
+
//
|
|
1275
|
+
// The runner publishes turn.pid for cross-process abort and updates
|
|
1276
|
+
// lastActivity at the end of every turn. Both must hold even when the
|
|
1277
|
+
// inner SessionStore writes silently fail. These tests pin the cleanup
|
|
1278
|
+
// contract so future regressions surface immediately.
|
|
1279
|
+
|
|
1280
|
+
test("happy path: turn.pid is removed and lastActivity advances past startedAt", async () => {
|
|
1281
|
+
const startedAt = new Date(Date.now() - 60_000).toISOString();
|
|
1282
|
+
seedSession(ctx.sessionsDbPath, {
|
|
1283
|
+
agentName: "cleanup-ok",
|
|
1284
|
+
state: "working",
|
|
1285
|
+
startedAt,
|
|
1286
|
+
lastActivity: startedAt,
|
|
1287
|
+
});
|
|
1288
|
+
const { runtime } = makeSpyRuntime();
|
|
1289
|
+
const fake = makeFakeProc();
|
|
1290
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1291
|
+
emitFakeTurn(fake, { sessionId: "cleanup-ok-session" });
|
|
1292
|
+
fake._exit(0);
|
|
1293
|
+
return fake;
|
|
1294
|
+
};
|
|
1295
|
+
|
|
1296
|
+
const result = await runTurn(makeRunOpts(ctx, "cleanup-ok", { runtime, _spawnFn: spawnFn }));
|
|
1297
|
+
|
|
1298
|
+
expect(result.exitCode).toBe(0);
|
|
1299
|
+
|
|
1300
|
+
const turnPidPath = turnPidPathFor(ctx, "cleanup-ok");
|
|
1301
|
+
expect(existsSync(turnPidPath)).toBe(false);
|
|
1302
|
+
|
|
1303
|
+
const after = readSession(ctx.sessionsDbPath, "cleanup-ok");
|
|
1304
|
+
expect(after?.lastActivity).not.toBe(startedAt);
|
|
1305
|
+
expect(new Date(after?.lastActivity ?? 0).getTime()).toBeGreaterThan(
|
|
1306
|
+
new Date(startedAt).getTime(),
|
|
1307
|
+
);
|
|
1308
|
+
});
|
|
1309
|
+
|
|
1310
|
+
test("spawn throws: turn.pid is never written and finally cleanup is a no-op", async () => {
|
|
1311
|
+
seedSession(ctx.sessionsDbPath, { agentName: "spawn-fail", state: "booting" });
|
|
1312
|
+
const { runtime } = makeSpyRuntime();
|
|
1313
|
+
const failingSpawn: TurnSpawnFn = () => {
|
|
1314
|
+
throw new Error("ENOENT: claude binary missing");
|
|
1315
|
+
};
|
|
1316
|
+
|
|
1317
|
+
await expect(
|
|
1318
|
+
runTurn(makeRunOpts(ctx, "spawn-fail", { runtime, _spawnFn: failingSpawn })),
|
|
1319
|
+
).rejects.toThrow(/binary missing/);
|
|
1320
|
+
|
|
1321
|
+
expect(existsSync(turnPidPathFor(ctx, "spawn-fail"))).toBe(false);
|
|
1322
|
+
});
|
|
1323
|
+
|
|
1324
|
+
test("parser throws: outer finally still runs and removes turn.pid", async () => {
|
|
1325
|
+
seedSession(ctx.sessionsDbPath, { agentName: "parser-fail", state: "working" });
|
|
1326
|
+
|
|
1327
|
+
// Custom runtime whose parseEvents returns an async iterable that
|
|
1328
|
+
// rejects on first read — mirrors a stream-json parse error mid-turn.
|
|
1329
|
+
const base = new ClaudeRuntime();
|
|
1330
|
+
const failingIterable: AsyncIterable<never> = {
|
|
1331
|
+
[Symbol.asyncIterator](): AsyncIterator<never> {
|
|
1332
|
+
return {
|
|
1333
|
+
next(): Promise<IteratorResult<never>> {
|
|
1334
|
+
return Promise.reject(new Error("synthetic stream-json parse error"));
|
|
1335
|
+
},
|
|
1336
|
+
};
|
|
1337
|
+
},
|
|
1338
|
+
};
|
|
1339
|
+
const broken: AgentRuntime = {
|
|
1340
|
+
...base,
|
|
1341
|
+
id: base.id,
|
|
1342
|
+
stability: base.stability,
|
|
1343
|
+
instructionPath: base.instructionPath,
|
|
1344
|
+
buildSpawnCommand: base.buildSpawnCommand.bind(base),
|
|
1345
|
+
buildPrintCommand: base.buildPrintCommand.bind(base),
|
|
1346
|
+
deployConfig: base.deployConfig.bind(base),
|
|
1347
|
+
detectReady: base.detectReady.bind(base),
|
|
1348
|
+
parseTranscript: base.parseTranscript.bind(base),
|
|
1349
|
+
getTranscriptDir: base.getTranscriptDir.bind(base),
|
|
1350
|
+
buildEnv: base.buildEnv.bind(base),
|
|
1351
|
+
buildDirectSpawn: base.buildDirectSpawn.bind(base),
|
|
1352
|
+
parseEvents: (() => failingIterable) as unknown as AgentRuntime["parseEvents"],
|
|
1353
|
+
};
|
|
1354
|
+
|
|
1355
|
+
const fake = makeFakeProc();
|
|
1356
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1357
|
+
// Don't auto-exit: simulate a still-live subprocess so we can verify
|
|
1358
|
+
// the C3 kill path actually fires before the lock is released. If we
|
|
1359
|
+
// pre-exited the fake here, kill() would still record but the test
|
|
1360
|
+
// wouldn't distinguish the runner-driven kill from no-op cleanup.
|
|
1361
|
+
return fake;
|
|
1362
|
+
};
|
|
1363
|
+
|
|
1364
|
+
await expect(
|
|
1365
|
+
runTurn(makeRunOpts(ctx, "parser-fail", { runtime: broken, _spawnFn: spawnFn })),
|
|
1366
|
+
).rejects.toThrow(/synthetic stream-json/);
|
|
1367
|
+
|
|
1368
|
+
// overstory-088b C3: parser throw must kill the live subprocess to avoid
|
|
1369
|
+
// orphaning past lock.release. SIGKILL is correct here — we are on a
|
|
1370
|
+
// non-recoverable error path and must guarantee the process dies.
|
|
1371
|
+
expect(fake._killSignals).toContain("SIGKILL");
|
|
1372
|
+
expect(fake._killed).toBe(true);
|
|
1373
|
+
|
|
1374
|
+
// Cleanup contract holds even on thrown parser.
|
|
1375
|
+
expect(existsSync(turnPidPathFor(ctx, "parser-fail"))).toBe(false);
|
|
1376
|
+
});
|
|
1377
|
+
|
|
1378
|
+
test("turn.pid write failure SIGKILLs subprocess and aborts the turn (overstory-62a6)", async () => {
|
|
1379
|
+
seedSession(ctx.sessionsDbPath, { agentName: "pid-write-fail", state: "working" });
|
|
1380
|
+
const { runtime } = makeSpyRuntime();
|
|
1381
|
+
|
|
1382
|
+
// Pre-create turn.pid as a DIRECTORY so `Bun.write(turnPidPath, ...)` fails
|
|
1383
|
+
// with EISDIR. This mirrors any real failure mode (read-only fs, permissions,
|
|
1384
|
+
// disk full) where the kill primitive becomes unavailable.
|
|
1385
|
+
const { mkdir } = await import("node:fs/promises");
|
|
1386
|
+
const turnPidPath = turnPidPathFor(ctx, "pid-write-fail");
|
|
1387
|
+
await mkdir(turnPidPath, { recursive: true });
|
|
1388
|
+
|
|
1389
|
+
const fake = makeFakeProc();
|
|
1390
|
+
const spawnFn: TurnSpawnFn = () => fake;
|
|
1391
|
+
|
|
1392
|
+
const events: Array<{ level: string; message: string }> = [];
|
|
1393
|
+
const logger: RunnerLogger = (level, message) => {
|
|
1394
|
+
events.push({ level, message });
|
|
1395
|
+
};
|
|
1396
|
+
|
|
1397
|
+
await expect(
|
|
1398
|
+
runTurn(
|
|
1399
|
+
makeRunOpts(ctx, "pid-write-fail", { runtime, _spawnFn: spawnFn, _logWarning: logger }),
|
|
1400
|
+
),
|
|
1401
|
+
).rejects.toThrow(/failed to write turn\.pid/);
|
|
1402
|
+
|
|
1403
|
+
// The kill primitive is unavailable, so the only safe way to avoid a
|
|
1404
|
+
// silently un-killable agent is to SIGKILL the subprocess here.
|
|
1405
|
+
expect(fake._killSignals).toContain("SIGKILL");
|
|
1406
|
+
expect(fake._killed).toBe(true);
|
|
1407
|
+
|
|
1408
|
+
// Surfaces at error level (not warn) so the failure isn't silent.
|
|
1409
|
+
expect(
|
|
1410
|
+
events.some((e) => e.level === "error" && e.message.includes("failed to write turn.pid")),
|
|
1411
|
+
).toBe(true);
|
|
1412
|
+
});
|
|
1413
|
+
|
|
1414
|
+
test("silent SessionStore failure surfaces as a runner warning", async () => {
|
|
1415
|
+
seedSession(ctx.sessionsDbPath, { agentName: "ss-fail", state: "working" });
|
|
1416
|
+
const { runtime } = makeSpyRuntime();
|
|
1417
|
+
|
|
1418
|
+
const fake = makeFakeProc();
|
|
1419
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1420
|
+
emitFakeTurn(fake, { sessionId: "ss-fail-session" });
|
|
1421
|
+
fake._exit(0);
|
|
1422
|
+
return fake;
|
|
1423
|
+
};
|
|
1424
|
+
|
|
1425
|
+
const warnings: Array<{ level: string; message: string }> = [];
|
|
1426
|
+
const logger: RunnerLogger = (level, message) => {
|
|
1427
|
+
warnings.push({ level, message });
|
|
1428
|
+
};
|
|
1429
|
+
|
|
1430
|
+
// Point sessionsDbPath at a path that exists as a DIRECTORY so every
|
|
1431
|
+
// SessionStore open in the runner throws. The runner must keep going
|
|
1432
|
+
// (cleanup contract) AND surface the failure via the logger.
|
|
1433
|
+
const badSessionsPath = ctx.overstoryDir; // directory, not a db file
|
|
1434
|
+
const opts = {
|
|
1435
|
+
...makeRunOpts(ctx, "ss-fail", { runtime, _spawnFn: spawnFn, _logWarning: logger }),
|
|
1436
|
+
sessionsDbPath: badSessionsPath,
|
|
1437
|
+
};
|
|
1438
|
+
|
|
1439
|
+
await runTurn(opts);
|
|
1440
|
+
|
|
1441
|
+
// The lastActivity update silently failed (it's a directory, not a db),
|
|
1442
|
+
// which is exactly the scenario that masked overstory-4af3. The runner
|
|
1443
|
+
// must report the contract violation via _logWarning at error level.
|
|
1444
|
+
const errors = warnings.filter((w) => w.level === "error");
|
|
1445
|
+
expect(errors.some((w) => w.message.includes("lastActivity stayed at startedAt"))).toBe(true);
|
|
1446
|
+
|
|
1447
|
+
// turn.pid must still be cleaned up regardless.
|
|
1448
|
+
expect(existsSync(turnPidPathFor(ctx, "ss-fail"))).toBe(false);
|
|
1449
|
+
});
|
|
1450
|
+
});
|