@os-eco/overstory-cli 0.9.4 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -19
- package/agents/builder.md +19 -9
- package/agents/coordinator.md +6 -6
- package/agents/lead.md +204 -87
- package/agents/merger.md +25 -14
- package/agents/reviewer.md +22 -16
- package/agents/scout.md +17 -12
- package/package.json +6 -3
- package/src/agents/capabilities.test.ts +85 -0
- package/src/agents/capabilities.ts +125 -0
- package/src/agents/headless-mail-injector.test.ts +448 -0
- package/src/agents/headless-mail-injector.ts +219 -0
- package/src/agents/headless-prompt.test.ts +102 -0
- package/src/agents/headless-prompt.ts +68 -0
- package/src/agents/hooks-deployer.test.ts +514 -14
- package/src/agents/hooks-deployer.ts +141 -0
- package/src/agents/mail-poll-detect.test.ts +153 -0
- package/src/agents/mail-poll-detect.ts +73 -0
- package/src/agents/overlay.test.ts +60 -4
- package/src/agents/overlay.ts +63 -8
- package/src/agents/scope-detect.test.ts +190 -0
- package/src/agents/scope-detect.ts +146 -0
- package/src/agents/turn-lock.test.ts +181 -0
- package/src/agents/turn-lock.ts +235 -0
- package/src/agents/turn-runner-dispatch.test.ts +182 -0
- package/src/agents/turn-runner-dispatch.ts +105 -0
- package/src/agents/turn-runner.test.ts +2312 -0
- package/src/agents/turn-runner.ts +1383 -0
- package/src/commands/agents.ts +9 -0
- package/src/commands/clean.ts +54 -0
- package/src/commands/coordinator.test.ts +254 -0
- package/src/commands/coordinator.ts +273 -8
- package/src/commands/dashboard.test.ts +188 -0
- package/src/commands/dashboard.ts +14 -4
- package/src/commands/doctor.ts +3 -1
- package/src/commands/group.test.ts +94 -0
- package/src/commands/group.ts +49 -20
- package/src/commands/init.test.ts +8 -0
- package/src/commands/init.ts +8 -1
- package/src/commands/log.test.ts +187 -11
- package/src/commands/log.ts +171 -71
- package/src/commands/mail.test.ts +162 -0
- package/src/commands/mail.ts +64 -9
- package/src/commands/merge.test.ts +230 -1
- package/src/commands/merge.ts +68 -12
- package/src/commands/nudge.test.ts +351 -4
- package/src/commands/nudge.ts +356 -34
- package/src/commands/run.test.ts +43 -7
- package/src/commands/serve/build.test.ts +202 -0
- package/src/commands/serve/build.ts +206 -0
- package/src/commands/serve/coordinator-actions.test.ts +339 -0
- package/src/commands/serve/coordinator-actions.ts +408 -0
- package/src/commands/serve/dev.test.ts +168 -0
- package/src/commands/serve/dev.ts +117 -0
- package/src/commands/serve/mail-actions.test.ts +312 -0
- package/src/commands/serve/mail-actions.ts +167 -0
- package/src/commands/serve/rest.test.ts +1323 -0
- package/src/commands/serve/rest.ts +708 -0
- package/src/commands/serve/static.ts +51 -0
- package/src/commands/serve/ws.test.ts +361 -0
- package/src/commands/serve/ws.ts +332 -0
- package/src/commands/serve.test.ts +459 -0
- package/src/commands/serve.ts +565 -0
- package/src/commands/sling.test.ts +177 -1
- package/src/commands/sling.ts +243 -71
- package/src/commands/status.test.ts +9 -0
- package/src/commands/status.ts +12 -4
- package/src/commands/stop.test.ts +255 -1
- package/src/commands/stop.ts +107 -8
- package/src/commands/watch.test.ts +43 -0
- package/src/commands/watch.ts +153 -28
- package/src/config.ts +23 -0
- package/src/doctor/consistency.test.ts +106 -0
- package/src/doctor/consistency.ts +48 -1
- package/src/doctor/serve.test.ts +95 -0
- package/src/doctor/serve.ts +86 -0
- package/src/doctor/types.ts +2 -1
- package/src/doctor/watchdog.ts +57 -1
- package/src/events/tailer.test.ts +234 -1
- package/src/events/tailer.ts +90 -0
- package/src/index.ts +57 -6
- package/src/insights/quality-gates.test.ts +141 -0
- package/src/insights/quality-gates.ts +156 -0
- package/src/json.ts +29 -0
- package/src/logging/theme.ts +4 -0
- package/src/mail/client.ts +15 -2
- package/src/mail/store.test.ts +82 -0
- package/src/mail/store.ts +41 -4
- package/src/merge/lock.test.ts +149 -0
- package/src/merge/lock.ts +140 -0
- package/src/merge/predict.test.ts +387 -0
- package/src/merge/predict.ts +249 -0
- package/src/merge/resolver.ts +1 -1
- package/src/mulch/client.ts +3 -3
- package/src/runtimes/__fixtures__/claude-stream-fixture.ts +22 -0
- package/src/runtimes/claude.test.ts +791 -1
- package/src/runtimes/claude.ts +323 -1
- package/src/runtimes/connections.test.ts +141 -1
- package/src/runtimes/connections.ts +73 -4
- package/src/runtimes/headless-connection.test.ts +264 -0
- package/src/runtimes/headless-connection.ts +158 -0
- package/src/runtimes/types.ts +10 -0
- package/src/schema-consistency.test.ts +1 -0
- package/src/sessions/store.test.ts +657 -29
- package/src/sessions/store.ts +286 -23
- package/src/test-setup.test.ts +31 -0
- package/src/test-setup.ts +28 -0
- package/src/types.ts +107 -2
- package/src/utils/pid.test.ts +85 -1
- package/src/utils/pid.ts +86 -1
- package/src/utils/process-scan.test.ts +53 -0
- package/src/utils/process-scan.ts +76 -0
- package/src/watchdog/daemon.test.ts +1607 -376
- package/src/watchdog/daemon.ts +462 -88
- package/src/watchdog/health.test.ts +282 -0
- package/src/watchdog/health.ts +126 -27
- package/src/worktree/manager.test.ts +218 -1
- package/src/worktree/manager.ts +55 -0
- package/src/worktree/process.test.ts +71 -0
- package/src/worktree/process.ts +25 -5
- package/src/worktree/tmux.test.ts +28 -0
- package/src/worktree/tmux.ts +27 -3
- package/templates/CLAUDE.md.tmpl +19 -8
- package/templates/overlay.md.tmpl +5 -2
|
@@ -0,0 +1,2312 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
2
|
+
import { existsSync } from "node:fs";
|
|
3
|
+
import { mkdtemp, rm } from "node:fs/promises";
|
|
4
|
+
import { tmpdir } from "node:os";
|
|
5
|
+
import { join } from "node:path";
|
|
6
|
+
import { createEventStore } from "../events/store.ts";
|
|
7
|
+
import { createMailClient } from "../mail/client.ts";
|
|
8
|
+
import { createMailStore } from "../mail/store.ts";
|
|
9
|
+
import { ClaudeRuntime } from "../runtimes/claude.ts";
|
|
10
|
+
import type { AgentRuntime, DirectSpawnOpts } from "../runtimes/types.ts";
|
|
11
|
+
import { createSessionStore } from "../sessions/store.ts";
|
|
12
|
+
import type { AgentSession, ResolvedModel } from "../types.ts";
|
|
13
|
+
import { _resetInProcessLocks, readTurnLock } from "./turn-lock.ts";
|
|
14
|
+
import {
|
|
15
|
+
type RunnerLogger,
|
|
16
|
+
runTurn,
|
|
17
|
+
type TurnSpawnFn,
|
|
18
|
+
type TurnSubprocess,
|
|
19
|
+
} from "./turn-runner.ts";
|
|
20
|
+
|
|
21
|
+
// ---------- fake subprocess plumbing ----------
|
|
22
|
+
|
|
23
|
+
interface FakeProc extends TurnSubprocess {
|
|
24
|
+
_writes: string[];
|
|
25
|
+
_killSignals: Array<string | number | undefined>;
|
|
26
|
+
_killed: boolean;
|
|
27
|
+
_pushLine(line: string): void;
|
|
28
|
+
_closeStdout(): void;
|
|
29
|
+
_exit(code: number | null): void;
|
|
30
|
+
_setStderr(stream: ReadableStream<Uint8Array> | null): void;
|
|
31
|
+
stderr?: ReadableStream<Uint8Array> | null;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
let fakeProcCounter = 1000;
|
|
35
|
+
|
|
36
|
+
function makeFakeProc(): FakeProc {
|
|
37
|
+
let stdoutController!: ReadableStreamDefaultController<Uint8Array>;
|
|
38
|
+
const stdout = new ReadableStream<Uint8Array>({
|
|
39
|
+
start(c) {
|
|
40
|
+
stdoutController = c;
|
|
41
|
+
},
|
|
42
|
+
});
|
|
43
|
+
let stdoutClosed = false;
|
|
44
|
+
const closeStdout = (): void => {
|
|
45
|
+
if (stdoutClosed) return;
|
|
46
|
+
stdoutClosed = true;
|
|
47
|
+
try {
|
|
48
|
+
stdoutController.close();
|
|
49
|
+
} catch {
|
|
50
|
+
// already closed
|
|
51
|
+
}
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
const writes: string[] = [];
|
|
55
|
+
|
|
56
|
+
let resolveExited!: (code: number | null) => void;
|
|
57
|
+
const exited = new Promise<number | null>((resolve) => {
|
|
58
|
+
resolveExited = resolve;
|
|
59
|
+
});
|
|
60
|
+
let exitedDone = false;
|
|
61
|
+
const finishExit = (code: number | null): void => {
|
|
62
|
+
if (exitedDone) return;
|
|
63
|
+
exitedDone = true;
|
|
64
|
+
resolveExited(code);
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
const killSignals: Array<string | number | undefined> = [];
|
|
68
|
+
let killed = false;
|
|
69
|
+
|
|
70
|
+
const proc: FakeProc = {
|
|
71
|
+
pid: fakeProcCounter++,
|
|
72
|
+
stdin: {
|
|
73
|
+
write(data: string | Uint8Array): number {
|
|
74
|
+
const s = typeof data === "string" ? data : new TextDecoder().decode(data);
|
|
75
|
+
writes.push(s);
|
|
76
|
+
return s.length;
|
|
77
|
+
},
|
|
78
|
+
end(): void {
|
|
79
|
+
// no-op for fakes; production Bun.spawn closes the pipe.
|
|
80
|
+
},
|
|
81
|
+
},
|
|
82
|
+
stdout,
|
|
83
|
+
exited,
|
|
84
|
+
kill(signal?: string | number): void {
|
|
85
|
+
killSignals.push(signal);
|
|
86
|
+
if (killed) return;
|
|
87
|
+
killed = true;
|
|
88
|
+
closeStdout();
|
|
89
|
+
finishExit(null);
|
|
90
|
+
},
|
|
91
|
+
_writes: writes,
|
|
92
|
+
_killSignals: killSignals,
|
|
93
|
+
_killed: false,
|
|
94
|
+
_pushLine(line: string): void {
|
|
95
|
+
if (stdoutClosed) return;
|
|
96
|
+
stdoutController.enqueue(new TextEncoder().encode(`${line}\n`));
|
|
97
|
+
},
|
|
98
|
+
_closeStdout: closeStdout,
|
|
99
|
+
_exit(code: number | null): void {
|
|
100
|
+
closeStdout();
|
|
101
|
+
finishExit(code);
|
|
102
|
+
},
|
|
103
|
+
_setStderr(stream: ReadableStream<Uint8Array> | null): void {
|
|
104
|
+
proc.stderr = stream;
|
|
105
|
+
},
|
|
106
|
+
stderr: null,
|
|
107
|
+
};
|
|
108
|
+
Object.defineProperty(proc, "_killed", {
|
|
109
|
+
get: () => killed,
|
|
110
|
+
});
|
|
111
|
+
return proc;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function emitFakeTurn(
|
|
115
|
+
proc: FakeProc,
|
|
116
|
+
opts: { sessionId?: string; isError?: boolean; durationMs?: number },
|
|
117
|
+
): void {
|
|
118
|
+
const sessionId = opts.sessionId ?? "session-test";
|
|
119
|
+
proc._pushLine(
|
|
120
|
+
JSON.stringify({
|
|
121
|
+
type: "system",
|
|
122
|
+
subtype: "init",
|
|
123
|
+
session_id: sessionId,
|
|
124
|
+
model: "claude-test",
|
|
125
|
+
}),
|
|
126
|
+
);
|
|
127
|
+
proc._pushLine(
|
|
128
|
+
JSON.stringify({
|
|
129
|
+
type: "result",
|
|
130
|
+
subtype: "success",
|
|
131
|
+
session_id: sessionId,
|
|
132
|
+
result: "done",
|
|
133
|
+
is_error: opts.isError ?? false,
|
|
134
|
+
duration_ms: opts.durationMs ?? 50,
|
|
135
|
+
num_turns: 1,
|
|
136
|
+
}),
|
|
137
|
+
);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// ---------- runtime spy ----------
|
|
141
|
+
|
|
142
|
+
function makeSpyRuntime(): {
|
|
143
|
+
runtime: AgentRuntime;
|
|
144
|
+
spawnCalls: Array<DirectSpawnOpts & { resumeSessionId?: string | null }>;
|
|
145
|
+
} {
|
|
146
|
+
const calls: Array<DirectSpawnOpts & { resumeSessionId?: string | null }> = [];
|
|
147
|
+
const base = new ClaudeRuntime();
|
|
148
|
+
const original = base.buildDirectSpawn.bind(base);
|
|
149
|
+
// Patch the instance to capture each call's opts (including the future
|
|
150
|
+
// resumeSessionId field that turn-runner threads through).
|
|
151
|
+
(base as unknown as { buildDirectSpawn: typeof original }).buildDirectSpawn = (
|
|
152
|
+
opts: DirectSpawnOpts,
|
|
153
|
+
) => {
|
|
154
|
+
calls.push({ ...(opts as DirectSpawnOpts & { resumeSessionId?: string | null }) });
|
|
155
|
+
return original(opts);
|
|
156
|
+
};
|
|
157
|
+
return { runtime: base, spawnCalls: calls };
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// ---------- session bootstrap ----------
|
|
161
|
+
|
|
162
|
+
function seedSession(
|
|
163
|
+
sessionsDbPath: string,
|
|
164
|
+
overrides: Partial<AgentSession> & Pick<AgentSession, "agentName">,
|
|
165
|
+
): void {
|
|
166
|
+
const store = createSessionStore(sessionsDbPath);
|
|
167
|
+
try {
|
|
168
|
+
const now = new Date().toISOString();
|
|
169
|
+
store.upsert({
|
|
170
|
+
id: `session-${overrides.agentName}`,
|
|
171
|
+
agentName: overrides.agentName,
|
|
172
|
+
capability: overrides.capability ?? "builder",
|
|
173
|
+
worktreePath: overrides.worktreePath ?? "/tmp/worktree",
|
|
174
|
+
branchName: overrides.branchName ?? "branch",
|
|
175
|
+
taskId: overrides.taskId ?? "task-test",
|
|
176
|
+
tmuxSession: overrides.tmuxSession ?? "",
|
|
177
|
+
state: overrides.state ?? "booting",
|
|
178
|
+
pid: overrides.pid ?? null,
|
|
179
|
+
parentAgent: overrides.parentAgent ?? null,
|
|
180
|
+
depth: overrides.depth ?? 0,
|
|
181
|
+
runId: overrides.runId ?? null,
|
|
182
|
+
startedAt: overrides.startedAt ?? now,
|
|
183
|
+
lastActivity: overrides.lastActivity ?? now,
|
|
184
|
+
escalationLevel: overrides.escalationLevel ?? 0,
|
|
185
|
+
stalledSince: overrides.stalledSince ?? null,
|
|
186
|
+
transcriptPath: overrides.transcriptPath ?? null,
|
|
187
|
+
...(overrides.promptVersion !== undefined ? { promptVersion: overrides.promptVersion } : {}),
|
|
188
|
+
...(overrides.claudeSessionId !== undefined
|
|
189
|
+
? { claudeSessionId: overrides.claudeSessionId }
|
|
190
|
+
: {}),
|
|
191
|
+
});
|
|
192
|
+
} finally {
|
|
193
|
+
store.close();
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
function readSession(sessionsDbPath: string, agentName: string): AgentSession | null {
|
|
198
|
+
const store = createSessionStore(sessionsDbPath);
|
|
199
|
+
try {
|
|
200
|
+
return store.getByName(agentName);
|
|
201
|
+
} finally {
|
|
202
|
+
store.close();
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// ---------- shared fixture context ----------
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Silent diagnostic sink for tests that don't assert on logs. Suppresses the
|
|
210
|
+
* `[turn-runner:error]` stderr mirror so contract-violation messages
|
|
211
|
+
* (overstory-6071) — which are expected for many tests that drive a clean
|
|
212
|
+
* exit without seeding terminal mail — don't pollute the test runner output.
|
|
213
|
+
*/
|
|
214
|
+
const silentLogger: RunnerLogger = () => {};
|
|
215
|
+
|
|
216
|
+
interface Ctx {
|
|
217
|
+
overstoryDir: string;
|
|
218
|
+
worktreePath: string;
|
|
219
|
+
projectRoot: string;
|
|
220
|
+
mailDbPath: string;
|
|
221
|
+
eventsDbPath: string;
|
|
222
|
+
sessionsDbPath: string;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
const RESOLVED_MODEL: ResolvedModel = { model: "sonnet", env: {}, isExplicitOverride: false };
|
|
226
|
+
|
|
227
|
+
function makeRunOpts(
|
|
228
|
+
ctx: Ctx,
|
|
229
|
+
agentName: string,
|
|
230
|
+
overrides: {
|
|
231
|
+
runtime: AgentRuntime;
|
|
232
|
+
userTurnNdjson?: string;
|
|
233
|
+
_spawnFn?: TurnSpawnFn;
|
|
234
|
+
abortSignal?: AbortSignal;
|
|
235
|
+
sigkillDelayMs?: number;
|
|
236
|
+
runId?: string | null;
|
|
237
|
+
capability?: string;
|
|
238
|
+
_logWarning?: RunnerLogger;
|
|
239
|
+
},
|
|
240
|
+
): Parameters<typeof runTurn>[0] {
|
|
241
|
+
return {
|
|
242
|
+
agentName,
|
|
243
|
+
capability: overrides.capability ?? "builder",
|
|
244
|
+
overstoryDir: ctx.overstoryDir,
|
|
245
|
+
worktreePath: ctx.worktreePath,
|
|
246
|
+
projectRoot: ctx.projectRoot,
|
|
247
|
+
taskId: "task-test",
|
|
248
|
+
userTurnNdjson:
|
|
249
|
+
overrides.userTurnNdjson ??
|
|
250
|
+
`${JSON.stringify({
|
|
251
|
+
type: "user",
|
|
252
|
+
message: { role: "user", content: [{ type: "text", text: "hello" }] },
|
|
253
|
+
})}\n`,
|
|
254
|
+
runtime: overrides.runtime,
|
|
255
|
+
resolvedModel: RESOLVED_MODEL,
|
|
256
|
+
runId: overrides.runId ?? null,
|
|
257
|
+
mailDbPath: ctx.mailDbPath,
|
|
258
|
+
eventsDbPath: ctx.eventsDbPath,
|
|
259
|
+
sessionsDbPath: ctx.sessionsDbPath,
|
|
260
|
+
...(overrides._spawnFn !== undefined ? { _spawnFn: overrides._spawnFn } : {}),
|
|
261
|
+
...(overrides.abortSignal !== undefined ? { abortSignal: overrides.abortSignal } : {}),
|
|
262
|
+
...(overrides.sigkillDelayMs !== undefined ? { sigkillDelayMs: overrides.sigkillDelayMs } : {}),
|
|
263
|
+
_logWarning: overrides._logWarning ?? silentLogger,
|
|
264
|
+
};
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
function turnPidPathFor(ctx: Ctx, agentName: string): string {
|
|
268
|
+
return join(ctx.overstoryDir, "agents", agentName, "turn.pid");
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// ---------- tests ----------
|
|
272
|
+
|
|
273
|
+
describe("runTurn", () => {
|
|
274
|
+
let ctx: Ctx;
|
|
275
|
+
|
|
276
|
+
beforeEach(async () => {
|
|
277
|
+
const overstoryDir = await mkdtemp(join(tmpdir(), "overstory-turnrunner-test-"));
|
|
278
|
+
ctx = {
|
|
279
|
+
overstoryDir,
|
|
280
|
+
worktreePath: overstoryDir, // arbitrary; spawn is faked
|
|
281
|
+
projectRoot: overstoryDir,
|
|
282
|
+
mailDbPath: join(overstoryDir, "mail.db"),
|
|
283
|
+
eventsDbPath: join(overstoryDir, "events.db"),
|
|
284
|
+
sessionsDbPath: join(overstoryDir, "sessions.db"),
|
|
285
|
+
};
|
|
286
|
+
_resetInProcessLocks();
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
afterEach(async () => {
|
|
290
|
+
_resetInProcessLocks();
|
|
291
|
+
await rm(ctx.overstoryDir, { recursive: true, force: true });
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
test("empty userTurnNdjson is a no-op: no spawn, no state transition", async () => {
|
|
295
|
+
seedSession(ctx.sessionsDbPath, { agentName: "noop", state: "working" });
|
|
296
|
+
const { runtime } = makeSpyRuntime();
|
|
297
|
+
let spawnCount = 0;
|
|
298
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
299
|
+
spawnCount++;
|
|
300
|
+
return makeFakeProc();
|
|
301
|
+
};
|
|
302
|
+
|
|
303
|
+
const result = await runTurn(
|
|
304
|
+
makeRunOpts(ctx, "noop", { runtime, userTurnNdjson: "", _spawnFn: spawnFn }),
|
|
305
|
+
);
|
|
306
|
+
|
|
307
|
+
expect(spawnCount).toBe(0);
|
|
308
|
+
expect(result.exitCode).toBeNull();
|
|
309
|
+
expect(result.cleanResult).toBe(false);
|
|
310
|
+
expect(result.terminalMailObserved).toBe(false);
|
|
311
|
+
expect(result.durationMs).toBe(0);
|
|
312
|
+
expect(result.initialState).toBe("working");
|
|
313
|
+
expect(result.finalState).toBe("working");
|
|
314
|
+
|
|
315
|
+
// Session state must remain untouched.
|
|
316
|
+
const after = readSession(ctx.sessionsDbPath, "noop");
|
|
317
|
+
expect(after?.state).toBe("working");
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
test("happy path: spawn, drain events, capture session id, contract violation surfaces as completed", async () => {
|
|
321
|
+
seedSession(ctx.sessionsDbPath, { agentName: "alpha", state: "booting" });
|
|
322
|
+
const { runtime, spawnCalls } = makeSpyRuntime();
|
|
323
|
+
|
|
324
|
+
const fake = makeFakeProc();
|
|
325
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
326
|
+
emitFakeTurn(fake, { sessionId: "claude-sess-A", isError: false });
|
|
327
|
+
fake._exit(0);
|
|
328
|
+
return fake;
|
|
329
|
+
};
|
|
330
|
+
|
|
331
|
+
// Suppress the contract-violation error log (overstory-6071) so it
|
|
332
|
+
// doesn't leak to test stderr; assertions below still cover the case.
|
|
333
|
+
const logger: RunnerLogger = () => {};
|
|
334
|
+
const result = await runTurn(
|
|
335
|
+
makeRunOpts(ctx, "alpha", { runtime, _spawnFn: spawnFn, _logWarning: logger }),
|
|
336
|
+
);
|
|
337
|
+
|
|
338
|
+
expect(result.exitCode).toBe(0);
|
|
339
|
+
expect(result.cleanResult).toBe(true);
|
|
340
|
+
expect(result.newSessionId).toBe("claude-sess-A");
|
|
341
|
+
expect(result.resumeMismatch).toBe(false);
|
|
342
|
+
expect(result.terminalMailObserved).toBe(false);
|
|
343
|
+
// initial=booting, clean exit but no terminal mail → contract violation,
|
|
344
|
+
// settles to `completed` (overstory-6071).
|
|
345
|
+
expect(result.initialState).toBe("booting");
|
|
346
|
+
expect(result.terminalMailMissing).toBe(true);
|
|
347
|
+
expect(result.finalState).toBe("completed");
|
|
348
|
+
|
|
349
|
+
const after = readSession(ctx.sessionsDbPath, "alpha");
|
|
350
|
+
expect(after?.state).toBe("completed");
|
|
351
|
+
expect(after?.claudeSessionId).toBe("claude-sess-A");
|
|
352
|
+
|
|
353
|
+
// resumeSessionId on first turn is null (no prior id stored).
|
|
354
|
+
expect(spawnCalls.length).toBe(1);
|
|
355
|
+
expect(spawnCalls[0]?.resumeSessionId ?? null).toBeNull();
|
|
356
|
+
});
|
|
357
|
+
|
|
358
|
+
test("re-reads claudeSessionId under the lock — caller view may be stale", async () => {
|
|
359
|
+
seedSession(ctx.sessionsDbPath, {
|
|
360
|
+
agentName: "stale",
|
|
361
|
+
state: "working",
|
|
362
|
+
claudeSessionId: "old-id",
|
|
363
|
+
});
|
|
364
|
+
|
|
365
|
+
// External update BEFORE the runTurn call. runTurn must read this value
|
|
366
|
+
// when it acquires the lock, not the older one any caller might be holding.
|
|
367
|
+
const updateStore = createSessionStore(ctx.sessionsDbPath);
|
|
368
|
+
try {
|
|
369
|
+
updateStore.updateClaudeSessionId("stale", "fresh-id");
|
|
370
|
+
} finally {
|
|
371
|
+
updateStore.close();
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
const { runtime, spawnCalls } = makeSpyRuntime();
|
|
375
|
+
const fake = makeFakeProc();
|
|
376
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
377
|
+
emitFakeTurn(fake, { sessionId: "fresh-id" }); // same id back; no mismatch
|
|
378
|
+
fake._exit(0);
|
|
379
|
+
return fake;
|
|
380
|
+
};
|
|
381
|
+
|
|
382
|
+
const result = await runTurn(makeRunOpts(ctx, "stale", { runtime, _spawnFn: spawnFn }));
|
|
383
|
+
|
|
384
|
+
expect(spawnCalls[0]?.resumeSessionId).toBe("fresh-id");
|
|
385
|
+
expect(result.resumeMismatch).toBe(false);
|
|
386
|
+
});
|
|
387
|
+
|
|
388
|
+
test("resumeMismatch fires when stream-json emits a different session id", async () => {
|
|
389
|
+
seedSession(ctx.sessionsDbPath, {
|
|
390
|
+
agentName: "mismatch",
|
|
391
|
+
state: "working",
|
|
392
|
+
claudeSessionId: "want-resume",
|
|
393
|
+
});
|
|
394
|
+
const { runtime } = makeSpyRuntime();
|
|
395
|
+
|
|
396
|
+
const fake = makeFakeProc();
|
|
397
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
398
|
+
emitFakeTurn(fake, { sessionId: "actually-new" });
|
|
399
|
+
fake._exit(0);
|
|
400
|
+
return fake;
|
|
401
|
+
};
|
|
402
|
+
|
|
403
|
+
const result = await runTurn(makeRunOpts(ctx, "mismatch", { runtime, _spawnFn: spawnFn }));
|
|
404
|
+
|
|
405
|
+
expect(result.newSessionId).toBe("actually-new");
|
|
406
|
+
expect(result.resumeMismatch).toBe(true);
|
|
407
|
+
|
|
408
|
+
// SessionStore overwritten with the observed value.
|
|
409
|
+
const after = readSession(ctx.sessionsDbPath, "mismatch");
|
|
410
|
+
expect(after?.claudeSessionId).toBe("actually-new");
|
|
411
|
+
|
|
412
|
+
// overstory-088b C2: a structured warn event lands in events.db so
|
|
413
|
+
// observability mirrors the runner diagnostic. Carries both the requested
|
|
414
|
+
// and observed session ids in the data payload.
|
|
415
|
+
const eventStore = createEventStore(ctx.eventsDbPath);
|
|
416
|
+
try {
|
|
417
|
+
const events = eventStore.getByAgent("mismatch");
|
|
418
|
+
const mismatchEvent = events.find((e) => {
|
|
419
|
+
if (e.eventType !== "custom" || e.level !== "warn" || !e.data) return false;
|
|
420
|
+
try {
|
|
421
|
+
const parsed = JSON.parse(e.data) as { type?: string };
|
|
422
|
+
return parsed.type === "resume_mismatch";
|
|
423
|
+
} catch {
|
|
424
|
+
return false;
|
|
425
|
+
}
|
|
426
|
+
});
|
|
427
|
+
expect(mismatchEvent).toBeDefined();
|
|
428
|
+
const payload = JSON.parse(mismatchEvent?.data ?? "{}") as {
|
|
429
|
+
type: string;
|
|
430
|
+
requestedSessionId: string;
|
|
431
|
+
observedSessionId: string;
|
|
432
|
+
};
|
|
433
|
+
expect(payload.requestedSessionId).toBe("want-resume");
|
|
434
|
+
expect(payload.observedSessionId).toBe("actually-new");
|
|
435
|
+
} finally {
|
|
436
|
+
eventStore.close();
|
|
437
|
+
}
|
|
438
|
+
});
|
|
439
|
+
|
|
440
|
+
test("resume match (sid === priorSessionId) does NOT emit a mismatch event", async () => {
|
|
441
|
+
seedSession(ctx.sessionsDbPath, {
|
|
442
|
+
agentName: "match",
|
|
443
|
+
state: "working",
|
|
444
|
+
claudeSessionId: "same-id",
|
|
445
|
+
});
|
|
446
|
+
const { runtime } = makeSpyRuntime();
|
|
447
|
+
|
|
448
|
+
const fake = makeFakeProc();
|
|
449
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
450
|
+
emitFakeTurn(fake, { sessionId: "same-id" });
|
|
451
|
+
fake._exit(0);
|
|
452
|
+
return fake;
|
|
453
|
+
};
|
|
454
|
+
|
|
455
|
+
const result = await runTurn(makeRunOpts(ctx, "match", { runtime, _spawnFn: spawnFn }));
|
|
456
|
+
expect(result.resumeMismatch).toBe(false);
|
|
457
|
+
|
|
458
|
+
const eventStore = createEventStore(ctx.eventsDbPath);
|
|
459
|
+
try {
|
|
460
|
+
const events = eventStore.getByAgent("match");
|
|
461
|
+
const mismatchEvent = events.find((e) => e.data?.includes("resume_mismatch") ?? false);
|
|
462
|
+
expect(mismatchEvent).toBeUndefined();
|
|
463
|
+
} finally {
|
|
464
|
+
eventStore.close();
|
|
465
|
+
}
|
|
466
|
+
});
|
|
467
|
+
|
|
468
|
+
test("terminalMailObserved + clean exit → completed state", async () => {
|
|
469
|
+
seedSession(ctx.sessionsDbPath, { agentName: "wd", state: "working" });
|
|
470
|
+
const { runtime } = makeSpyRuntime();
|
|
471
|
+
|
|
472
|
+
// Pre-seed: a worker_done from a PRIOR turn (well in the past). Must not
|
|
473
|
+
// confuse this turn's snapshot.
|
|
474
|
+
const mailStore = createMailStore(ctx.mailDbPath);
|
|
475
|
+
try {
|
|
476
|
+
const client = createMailClient(mailStore);
|
|
477
|
+
client.sendProtocol({
|
|
478
|
+
from: "wd",
|
|
479
|
+
to: "lead",
|
|
480
|
+
subject: "Worker done: prior",
|
|
481
|
+
body: "old",
|
|
482
|
+
type: "worker_done",
|
|
483
|
+
priority: "normal",
|
|
484
|
+
payload: {
|
|
485
|
+
taskId: "old",
|
|
486
|
+
branch: "old",
|
|
487
|
+
exitCode: 0,
|
|
488
|
+
filesModified: [],
|
|
489
|
+
},
|
|
490
|
+
});
|
|
491
|
+
} finally {
|
|
492
|
+
mailStore.close();
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
// Simulate fresh worker_done sent during the spawn.
|
|
496
|
+
const fake = makeFakeProc();
|
|
497
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
498
|
+
(async () => {
|
|
499
|
+
// Wait long enough for snapshot timestamp to be < this insert.
|
|
500
|
+
await Bun.sleep(20);
|
|
501
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
502
|
+
try {
|
|
503
|
+
const c = createMailClient(s);
|
|
504
|
+
c.sendProtocol({
|
|
505
|
+
from: "wd",
|
|
506
|
+
to: "lead",
|
|
507
|
+
subject: "Worker done: this turn",
|
|
508
|
+
body: "new",
|
|
509
|
+
type: "worker_done",
|
|
510
|
+
priority: "normal",
|
|
511
|
+
payload: {
|
|
512
|
+
taskId: "this-turn",
|
|
513
|
+
branch: "branch",
|
|
514
|
+
exitCode: 0,
|
|
515
|
+
filesModified: [],
|
|
516
|
+
},
|
|
517
|
+
});
|
|
518
|
+
} finally {
|
|
519
|
+
s.close();
|
|
520
|
+
}
|
|
521
|
+
emitFakeTurn(fake, { sessionId: "wd-session" });
|
|
522
|
+
fake._exit(0);
|
|
523
|
+
})();
|
|
524
|
+
return fake;
|
|
525
|
+
};
|
|
526
|
+
|
|
527
|
+
const result = await runTurn(makeRunOpts(ctx, "wd", { runtime, _spawnFn: spawnFn }));
|
|
528
|
+
|
|
529
|
+
expect(result.terminalMailObserved).toBe(true);
|
|
530
|
+
expect(result.cleanResult).toBe(true);
|
|
531
|
+
expect(result.finalState).toBe("completed");
|
|
532
|
+
|
|
533
|
+
const after = readSession(ctx.sessionsDbPath, "wd");
|
|
534
|
+
expect(after?.state).toBe("completed");
|
|
535
|
+
});
|
|
536
|
+
|
|
537
|
+
test("turn that runs but does not complete settles to between_turns, not working (overstory-3087)", async () => {
|
|
538
|
+
// Spawn-per-turn substate split: a turn that produced events but did
|
|
539
|
+
// not deliver the terminal mail nor abort must end in `between_turns`
|
|
540
|
+
// so the UI can tell a worker waiting for its next mail batch from
|
|
541
|
+
// one mid-execution. Pre-3087 this settled to `working`.
|
|
542
|
+
seedSession(ctx.sessionsDbPath, { agentName: "settler", state: "booting" });
|
|
543
|
+
const { runtime } = makeSpyRuntime();
|
|
544
|
+
const fake = makeFakeProc();
|
|
545
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
546
|
+
// Force is_error=true so the runner does NOT classify this as a
|
|
547
|
+
// clean exit (which would settle to `completed` via the
|
|
548
|
+
// terminal-mail-missing path). is_error=true keeps cleanResult
|
|
549
|
+
// false, sending us into the observedAnyEvent → between_turns
|
|
550
|
+
// branch we want to test.
|
|
551
|
+
emitFakeTurn(fake, { sessionId: "settler-sid", isError: true });
|
|
552
|
+
fake._exit(0);
|
|
553
|
+
return fake;
|
|
554
|
+
};
|
|
555
|
+
|
|
556
|
+
const result = await runTurn(makeRunOpts(ctx, "settler", { runtime, _spawnFn: spawnFn }));
|
|
557
|
+
|
|
558
|
+
expect(result.cleanResult).toBe(false);
|
|
559
|
+
expect(result.terminalMailObserved).toBe(false);
|
|
560
|
+
expect(result.terminalMailMissing).toBe(false);
|
|
561
|
+
expect(result.finalState).toBe("between_turns");
|
|
562
|
+
|
|
563
|
+
const after = readSession(ctx.sessionsDbPath, "settler");
|
|
564
|
+
expect(after?.state).toBe("between_turns");
|
|
565
|
+
});
|
|
566
|
+
|
|
567
|
+
test("first parser event transitions booting → in_turn (overstory-3087)", async () => {
|
|
568
|
+
// The mid-turn "first event" hook must flip the row out of `booting`
|
|
569
|
+
// (or `between_turns`/`working`) into `in_turn` so observers see the
|
|
570
|
+
// agent as actively executing, distinct from the idle waiting state.
|
|
571
|
+
seedSession(ctx.sessionsDbPath, { agentName: "boots", state: "booting" });
|
|
572
|
+
const { runtime } = makeSpyRuntime();
|
|
573
|
+
const fake = makeFakeProc();
|
|
574
|
+
// Mutable ref so the IIFE assignment is visible to the type checker.
|
|
575
|
+
const captured: { state: string | null } = { state: null };
|
|
576
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
577
|
+
(async () => {
|
|
578
|
+
// Push the init event, then sample the row before result.
|
|
579
|
+
fake._pushLine(
|
|
580
|
+
JSON.stringify({
|
|
581
|
+
type: "system",
|
|
582
|
+
subtype: "init",
|
|
583
|
+
session_id: "boots-sid",
|
|
584
|
+
model: "claude-test",
|
|
585
|
+
}),
|
|
586
|
+
);
|
|
587
|
+
// Yield the event loop so the parser drains the init event
|
|
588
|
+
// and updates the session row before we read it.
|
|
589
|
+
await Bun.sleep(20);
|
|
590
|
+
captured.state = readSession(ctx.sessionsDbPath, "boots")?.state ?? null;
|
|
591
|
+
// Send is_error=true so we settle to between_turns rather than
|
|
592
|
+
// the contract-violation completed path — this test is about
|
|
593
|
+
// the mid-turn transition, not the terminal classification.
|
|
594
|
+
emitFakeTurn(fake, { sessionId: "boots-sid", isError: true });
|
|
595
|
+
fake._exit(0);
|
|
596
|
+
})();
|
|
597
|
+
return fake;
|
|
598
|
+
};
|
|
599
|
+
|
|
600
|
+
await runTurn(makeRunOpts(ctx, "boots", { runtime, _spawnFn: spawnFn }));
|
|
601
|
+
|
|
602
|
+
expect(captured.state).toBe("in_turn");
|
|
603
|
+
});
|
|
604
|
+
|
|
605
|
+
test("between_turns → in_turn → between_turns cycle on a follow-up batch (overstory-3087)", async () => {
|
|
606
|
+
// A spawn-per-turn worker that finished its first turn (state=
|
|
607
|
+
// between_turns) must flip back to in_turn when the next mail batch
|
|
608
|
+
// fires its first parser event, and settle back to between_turns
|
|
609
|
+
// when the turn ends without a terminal mail.
|
|
610
|
+
seedSession(ctx.sessionsDbPath, { agentName: "cycle", state: "between_turns" });
|
|
611
|
+
const { runtime } = makeSpyRuntime();
|
|
612
|
+
const fake = makeFakeProc();
|
|
613
|
+
const captured: { midTurnState: string | null } = { midTurnState: null };
|
|
614
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
615
|
+
(async () => {
|
|
616
|
+
fake._pushLine(
|
|
617
|
+
JSON.stringify({
|
|
618
|
+
type: "system",
|
|
619
|
+
subtype: "init",
|
|
620
|
+
session_id: "cycle-sid",
|
|
621
|
+
model: "claude-test",
|
|
622
|
+
}),
|
|
623
|
+
);
|
|
624
|
+
await Bun.sleep(20);
|
|
625
|
+
captured.midTurnState = readSession(ctx.sessionsDbPath, "cycle")?.state ?? null;
|
|
626
|
+
emitFakeTurn(fake, { sessionId: "cycle-sid", isError: true });
|
|
627
|
+
fake._exit(0);
|
|
628
|
+
})();
|
|
629
|
+
return fake;
|
|
630
|
+
};
|
|
631
|
+
|
|
632
|
+
const result = await runTurn(makeRunOpts(ctx, "cycle", { runtime, _spawnFn: spawnFn }));
|
|
633
|
+
|
|
634
|
+
expect(captured.midTurnState).toBe("in_turn");
|
|
635
|
+
expect(result.initialState).toBe("between_turns");
|
|
636
|
+
expect(result.finalState).toBe("between_turns");
|
|
637
|
+
});
|
|
638
|
+
|
|
639
|
+
test("clean exit but no worker_done → contract violation, completed + error log (overstory-6071)", async () => {
|
|
640
|
+
// Pre-fix: claude exiting cleanly without sending the capability's
|
|
641
|
+
// terminal mail left the session at `working` forever — the process is
|
|
642
|
+
// gone but the row looks alive. Now the runner logs an error and
|
|
643
|
+
// settles to `completed` so operators see something terminal.
|
|
644
|
+
seedSession(ctx.sessionsDbPath, { agentName: "idle", state: "working" });
|
|
645
|
+
const { runtime } = makeSpyRuntime();
|
|
646
|
+
const fake = makeFakeProc();
|
|
647
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
648
|
+
emitFakeTurn(fake, { sessionId: "idle-session", isError: false });
|
|
649
|
+
fake._exit(0);
|
|
650
|
+
return fake;
|
|
651
|
+
};
|
|
652
|
+
|
|
653
|
+
const errors: Array<{ level: string; message: string }> = [];
|
|
654
|
+
const logger: RunnerLogger = (level, message) => {
|
|
655
|
+
errors.push({ level, message });
|
|
656
|
+
};
|
|
657
|
+
|
|
658
|
+
const result = await runTurn(
|
|
659
|
+
makeRunOpts(ctx, "idle", { runtime, _spawnFn: spawnFn, _logWarning: logger }),
|
|
660
|
+
);
|
|
661
|
+
|
|
662
|
+
expect(result.cleanResult).toBe(true);
|
|
663
|
+
expect(result.terminalMailObserved).toBe(false);
|
|
664
|
+
expect(result.terminalMailMissing).toBe(true);
|
|
665
|
+
expect(result.finalState).toBe("completed");
|
|
666
|
+
|
|
667
|
+
// Contract violation must surface via the runner diagnostic sink.
|
|
668
|
+
const violation = errors.find(
|
|
669
|
+
(e) => e.level === "error" && e.message.includes("without sending terminal mail"),
|
|
670
|
+
);
|
|
671
|
+
expect(violation).toBeDefined();
|
|
672
|
+
|
|
673
|
+
const after = readSession(ctx.sessionsDbPath, "idle");
|
|
674
|
+
expect(after?.state).toBe("completed");
|
|
675
|
+
});
|
|
676
|
+
|
|
677
|
+
test("merger: merged mail counts as terminal → completed", async () => {
|
|
678
|
+
seedSession(ctx.sessionsDbPath, {
|
|
679
|
+
agentName: "mg",
|
|
680
|
+
capability: "merger",
|
|
681
|
+
state: "working",
|
|
682
|
+
});
|
|
683
|
+
const { runtime } = makeSpyRuntime();
|
|
684
|
+
|
|
685
|
+
const fake = makeFakeProc();
|
|
686
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
687
|
+
(async () => {
|
|
688
|
+
await Bun.sleep(20);
|
|
689
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
690
|
+
try {
|
|
691
|
+
createMailClient(s).sendProtocol({
|
|
692
|
+
from: "mg",
|
|
693
|
+
to: "lead",
|
|
694
|
+
subject: "Merged: feature/foo",
|
|
695
|
+
body: "ok",
|
|
696
|
+
type: "merged",
|
|
697
|
+
priority: "normal",
|
|
698
|
+
payload: { branch: "feature/foo", taskId: "t-mg", tier: "clean-merge" },
|
|
699
|
+
});
|
|
700
|
+
} finally {
|
|
701
|
+
s.close();
|
|
702
|
+
}
|
|
703
|
+
emitFakeTurn(fake, { sessionId: "mg-session" });
|
|
704
|
+
fake._exit(0);
|
|
705
|
+
})();
|
|
706
|
+
return fake;
|
|
707
|
+
};
|
|
708
|
+
|
|
709
|
+
const result = await runTurn(
|
|
710
|
+
makeRunOpts(ctx, "mg", { runtime, _spawnFn: spawnFn, capability: "merger" }),
|
|
711
|
+
);
|
|
712
|
+
|
|
713
|
+
expect(result.terminalMailObserved).toBe(true);
|
|
714
|
+
expect(result.finalState).toBe("completed");
|
|
715
|
+
});
|
|
716
|
+
|
|
717
|
+
test("merger: merge_failed mail also counts as terminal → completed", async () => {
|
|
718
|
+
seedSession(ctx.sessionsDbPath, {
|
|
719
|
+
agentName: "mgf",
|
|
720
|
+
capability: "merger",
|
|
721
|
+
state: "working",
|
|
722
|
+
});
|
|
723
|
+
const { runtime } = makeSpyRuntime();
|
|
724
|
+
|
|
725
|
+
const fake = makeFakeProc();
|
|
726
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
727
|
+
(async () => {
|
|
728
|
+
await Bun.sleep(20);
|
|
729
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
730
|
+
try {
|
|
731
|
+
createMailClient(s).sendProtocol({
|
|
732
|
+
from: "mgf",
|
|
733
|
+
to: "lead",
|
|
734
|
+
subject: "Merge failed: feature/bar",
|
|
735
|
+
body: "conflict",
|
|
736
|
+
type: "merge_failed",
|
|
737
|
+
priority: "high",
|
|
738
|
+
payload: {
|
|
739
|
+
branch: "feature/bar",
|
|
740
|
+
taskId: "t-mgf",
|
|
741
|
+
conflictFiles: ["src/foo.ts"],
|
|
742
|
+
errorMessage: "conflict",
|
|
743
|
+
},
|
|
744
|
+
});
|
|
745
|
+
} finally {
|
|
746
|
+
s.close();
|
|
747
|
+
}
|
|
748
|
+
emitFakeTurn(fake, { sessionId: "mgf-session" });
|
|
749
|
+
fake._exit(0);
|
|
750
|
+
})();
|
|
751
|
+
return fake;
|
|
752
|
+
};
|
|
753
|
+
|
|
754
|
+
const result = await runTurn(
|
|
755
|
+
makeRunOpts(ctx, "mgf", { runtime, _spawnFn: spawnFn, capability: "merger" }),
|
|
756
|
+
);
|
|
757
|
+
|
|
758
|
+
expect(result.terminalMailObserved).toBe(true);
|
|
759
|
+
expect(result.finalState).toBe("completed");
|
|
760
|
+
});
|
|
761
|
+
|
|
762
|
+
test("scout: --type result mail counts as terminal → completed (overstory-1a4c)", async () => {
|
|
763
|
+
// Regression for overstory-1a4c: workers frequently send `--type result`
|
|
764
|
+
// instead of `--type worker_done` because both are valid mail types and
|
|
765
|
+
// the agent prompts described `result` as a completion signal in some
|
|
766
|
+
// examples. Pre-fix, this left sessions stuck in `working` until the
|
|
767
|
+
// watchdog flipped them to `zombie`. The runner now accepts `result` as
|
|
768
|
+
// a terminal type for builder/scout/reviewer/lead.
|
|
769
|
+
seedSession(ctx.sessionsDbPath, {
|
|
770
|
+
agentName: "scout-result",
|
|
771
|
+
capability: "scout",
|
|
772
|
+
state: "working",
|
|
773
|
+
});
|
|
774
|
+
const { runtime } = makeSpyRuntime();
|
|
775
|
+
|
|
776
|
+
const fake = makeFakeProc();
|
|
777
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
778
|
+
(async () => {
|
|
779
|
+
await Bun.sleep(20);
|
|
780
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
781
|
+
try {
|
|
782
|
+
createMailClient(s).send({
|
|
783
|
+
from: "scout-result",
|
|
784
|
+
to: "coordinator",
|
|
785
|
+
subject: "Spec ready: overstory-4670",
|
|
786
|
+
body: "Spec written.",
|
|
787
|
+
type: "result",
|
|
788
|
+
priority: "normal",
|
|
789
|
+
});
|
|
790
|
+
} finally {
|
|
791
|
+
s.close();
|
|
792
|
+
}
|
|
793
|
+
emitFakeTurn(fake, { sessionId: "scout-result-session" });
|
|
794
|
+
fake._exit(0);
|
|
795
|
+
})();
|
|
796
|
+
return fake;
|
|
797
|
+
};
|
|
798
|
+
|
|
799
|
+
const result = await runTurn(
|
|
800
|
+
makeRunOpts(ctx, "scout-result", { runtime, _spawnFn: spawnFn, capability: "scout" }),
|
|
801
|
+
);
|
|
802
|
+
|
|
803
|
+
expect(result.terminalMailObserved).toBe(true);
|
|
804
|
+
expect(result.cleanResult).toBe(true);
|
|
805
|
+
expect(result.finalState).toBe("completed");
|
|
806
|
+
});
|
|
807
|
+
|
|
808
|
+
test("merger: worker_done is NOT terminal for merger → contract violation, completed", async () => {
|
|
809
|
+
// Mergers must send `merged` or `merge_failed`. A `worker_done` from a
|
|
810
|
+
// merger doesn't count as terminal, so this is the same contract
|
|
811
|
+
// violation as overstory-6071: clean exit, no terminal mail. Pre-fix
|
|
812
|
+
// this stuck at `working`; now it settles to `completed` with a loud
|
|
813
|
+
// error log.
|
|
814
|
+
seedSession(ctx.sessionsDbPath, {
|
|
815
|
+
agentName: "mg-wd",
|
|
816
|
+
capability: "merger",
|
|
817
|
+
state: "working",
|
|
818
|
+
});
|
|
819
|
+
const { runtime } = makeSpyRuntime();
|
|
820
|
+
|
|
821
|
+
const fake = makeFakeProc();
|
|
822
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
823
|
+
(async () => {
|
|
824
|
+
await Bun.sleep(20);
|
|
825
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
826
|
+
try {
|
|
827
|
+
createMailClient(s).sendProtocol({
|
|
828
|
+
from: "mg-wd",
|
|
829
|
+
to: "lead",
|
|
830
|
+
subject: "Worker done (wrong type for merger)",
|
|
831
|
+
body: "x",
|
|
832
|
+
type: "worker_done",
|
|
833
|
+
priority: "normal",
|
|
834
|
+
payload: { taskId: "t", branch: "b", exitCode: 0, filesModified: [] },
|
|
835
|
+
});
|
|
836
|
+
} finally {
|
|
837
|
+
s.close();
|
|
838
|
+
}
|
|
839
|
+
emitFakeTurn(fake, { sessionId: "mg-wd-session" });
|
|
840
|
+
fake._exit(0);
|
|
841
|
+
})();
|
|
842
|
+
return fake;
|
|
843
|
+
};
|
|
844
|
+
|
|
845
|
+
const logger: RunnerLogger = () => {};
|
|
846
|
+
const result = await runTurn(
|
|
847
|
+
makeRunOpts(ctx, "mg-wd", {
|
|
848
|
+
runtime,
|
|
849
|
+
_spawnFn: spawnFn,
|
|
850
|
+
capability: "merger",
|
|
851
|
+
_logWarning: logger,
|
|
852
|
+
}),
|
|
853
|
+
);
|
|
854
|
+
|
|
855
|
+
expect(result.terminalMailObserved).toBe(false);
|
|
856
|
+
expect(result.terminalMailMissing).toBe(true);
|
|
857
|
+
expect(result.finalState).toBe("completed");
|
|
858
|
+
});
|
|
859
|
+
|
|
860
|
+
test("stall watchdog: no parser events for eventStallTimeoutMs → SIGTERM, zombie (overstory-ddb3)", async () => {
|
|
861
|
+
// Pre-fix: a hung claude (alive but stalled — Anthropic API hang,
|
|
862
|
+
// deadlock) would block the parser drain forever because the for-await
|
|
863
|
+
// loop only exits on stdout close. The runner now arms a per-event
|
|
864
|
+
// stall watchdog that resets on every event; on timeout it kills the
|
|
865
|
+
// process via the existing SIGTERM/SIGKILL escalation.
|
|
866
|
+
seedSession(ctx.sessionsDbPath, { agentName: "stalled", state: "working" });
|
|
867
|
+
const { runtime } = makeSpyRuntime();
|
|
868
|
+
|
|
869
|
+
const fake = makeFakeProc();
|
|
870
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
871
|
+
// Emit nothing: simulate claude alive but stalled. The stall
|
|
872
|
+
// watchdog must fire and kill the process.
|
|
873
|
+
return fake;
|
|
874
|
+
};
|
|
875
|
+
|
|
876
|
+
const errors: Array<{ level: string; message: string }> = [];
|
|
877
|
+
const logger: RunnerLogger = (level, message) => {
|
|
878
|
+
errors.push({ level, message });
|
|
879
|
+
};
|
|
880
|
+
|
|
881
|
+
const result = await runTurn({
|
|
882
|
+
...makeRunOpts(ctx, "stalled", {
|
|
883
|
+
runtime,
|
|
884
|
+
_spawnFn: spawnFn,
|
|
885
|
+
_logWarning: logger,
|
|
886
|
+
}),
|
|
887
|
+
eventStallTimeoutMs: 50,
|
|
888
|
+
sigkillDelayMs: 25,
|
|
889
|
+
});
|
|
890
|
+
|
|
891
|
+
expect(fake._killSignals[0]).toBe("SIGTERM");
|
|
892
|
+
expect(result.stallAborted).toBe(true);
|
|
893
|
+
expect(result.exitCode).toBeNull();
|
|
894
|
+
expect(result.finalState).toBe("zombie");
|
|
895
|
+
|
|
896
|
+
const stallLog = errors.find(
|
|
897
|
+
(e) => e.level === "error" && e.message.includes("parser stalled"),
|
|
898
|
+
);
|
|
899
|
+
expect(stallLog).toBeDefined();
|
|
900
|
+
|
|
901
|
+
const after = readSession(ctx.sessionsDbPath, "stalled");
|
|
902
|
+
expect(after?.state).toBe("zombie");
|
|
903
|
+
});
|
|
904
|
+
|
|
905
|
+
test("stall watchdog: events reset the timer — live turns are not killed (overstory-ddb3)", async () => {
|
|
906
|
+
// Per-event reset: a turn whose events keep arriving must not be
|
|
907
|
+
// aborted by the stall watchdog. We give a generous 500ms stall
|
|
908
|
+
// budget and emit several events each separated by ~50ms; the
|
|
909
|
+
// cumulative runtime exceeds the budget, but no inter-event gap
|
|
910
|
+
// does, so a properly resetting timer never fires.
|
|
911
|
+
seedSession(ctx.sessionsDbPath, { agentName: "live", state: "working" });
|
|
912
|
+
const { runtime } = makeSpyRuntime();
|
|
913
|
+
|
|
914
|
+
const fake = makeFakeProc();
|
|
915
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
916
|
+
(async () => {
|
|
917
|
+
const sessionId = "live-session";
|
|
918
|
+
fake._pushLine(
|
|
919
|
+
JSON.stringify({
|
|
920
|
+
type: "system",
|
|
921
|
+
subtype: "init",
|
|
922
|
+
session_id: sessionId,
|
|
923
|
+
model: "claude-test",
|
|
924
|
+
}),
|
|
925
|
+
);
|
|
926
|
+
for (let i = 0; i < 6; i++) {
|
|
927
|
+
await Bun.sleep(50);
|
|
928
|
+
fake._pushLine(
|
|
929
|
+
JSON.stringify({
|
|
930
|
+
type: "assistant",
|
|
931
|
+
message: {
|
|
932
|
+
role: "assistant",
|
|
933
|
+
content: [{ type: "text", text: `chunk ${i}` }],
|
|
934
|
+
},
|
|
935
|
+
session_id: sessionId,
|
|
936
|
+
}),
|
|
937
|
+
);
|
|
938
|
+
}
|
|
939
|
+
emitFakeTurn(fake, { sessionId });
|
|
940
|
+
fake._exit(0);
|
|
941
|
+
})();
|
|
942
|
+
return fake;
|
|
943
|
+
};
|
|
944
|
+
|
|
945
|
+
const logger: RunnerLogger = () => {};
|
|
946
|
+
const result = await runTurn({
|
|
947
|
+
...makeRunOpts(ctx, "live", {
|
|
948
|
+
runtime,
|
|
949
|
+
_spawnFn: spawnFn,
|
|
950
|
+
_logWarning: logger,
|
|
951
|
+
}),
|
|
952
|
+
eventStallTimeoutMs: 500,
|
|
953
|
+
sigkillDelayMs: 25,
|
|
954
|
+
});
|
|
955
|
+
|
|
956
|
+
expect(result.stallAborted).toBe(false);
|
|
957
|
+
expect(result.exitCode).toBe(0);
|
|
958
|
+
expect(result.cleanResult).toBe(true);
|
|
959
|
+
// Sanity: turn ran longer than the stall budget would allow if the
|
|
960
|
+
// timer didn't reset on each event (6 × 50ms = 300ms minimum).
|
|
961
|
+
expect(result.durationMs).toBeGreaterThanOrEqual(250);
|
|
962
|
+
});
|
|
963
|
+
|
|
964
|
+
test("abortSignal triggers SIGTERM, finalState becomes zombie", async () => {
|
|
965
|
+
seedSession(ctx.sessionsDbPath, { agentName: "to-kill", state: "working" });
|
|
966
|
+
const { runtime } = makeSpyRuntime();
|
|
967
|
+
|
|
968
|
+
const fake = makeFakeProc();
|
|
969
|
+
const ac = new AbortController();
|
|
970
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
971
|
+
// Emit init but never close — the abort path is what ends this turn.
|
|
972
|
+
fake._pushLine(
|
|
973
|
+
JSON.stringify({
|
|
974
|
+
type: "system",
|
|
975
|
+
subtype: "init",
|
|
976
|
+
session_id: "abort-test",
|
|
977
|
+
}),
|
|
978
|
+
);
|
|
979
|
+
return fake;
|
|
980
|
+
};
|
|
981
|
+
|
|
982
|
+
const runPromise = runTurn(
|
|
983
|
+
makeRunOpts(ctx, "to-kill", {
|
|
984
|
+
runtime,
|
|
985
|
+
_spawnFn: spawnFn,
|
|
986
|
+
abortSignal: ac.signal,
|
|
987
|
+
sigkillDelayMs: 25,
|
|
988
|
+
}),
|
|
989
|
+
);
|
|
990
|
+
|
|
991
|
+
// Give the parser a chance to consume the init event.
|
|
992
|
+
await Bun.sleep(60);
|
|
993
|
+
ac.abort();
|
|
994
|
+
const result = await runPromise;
|
|
995
|
+
|
|
996
|
+
expect(fake._killSignals[0]).toBe("SIGTERM");
|
|
997
|
+
expect(result.exitCode).toBeNull();
|
|
998
|
+
expect(result.finalState).toBe("zombie");
|
|
999
|
+
|
|
1000
|
+
const after = readSession(ctx.sessionsDbPath, "to-kill");
|
|
1001
|
+
expect(after?.state).toBe("zombie");
|
|
1002
|
+
});
|
|
1003
|
+
|
|
1004
|
+
// --- Parent-notify paths (overstory-4159, overstory-c772) ---
|
|
1005
|
+
//
|
|
1006
|
+
// When a turn ends without the capability's terminal mail, the runner emits
|
|
1007
|
+
// a synthetic worker_died mail to the parent so the lead does not block on
|
|
1008
|
+
// a signal that will never arrive. Three trigger paths:
|
|
1009
|
+
// 1. abort (operator or external abortSignal) → finalState=zombie
|
|
1010
|
+
// 2. parser stall → finalState=zombie
|
|
1011
|
+
// 3. clean exit without terminal mail (terminalMailMissing) → completed
|
|
1012
|
+
|
|
1013
|
+
test("abort path: emits worker_died to parent with terminatedBy='runner' (overstory-c772)", async () => {
|
|
1014
|
+
seedSession(ctx.sessionsDbPath, {
|
|
1015
|
+
agentName: "child-abort",
|
|
1016
|
+
state: "working",
|
|
1017
|
+
parentAgent: "lead-x",
|
|
1018
|
+
taskId: "task-c772",
|
|
1019
|
+
});
|
|
1020
|
+
const { runtime } = makeSpyRuntime();
|
|
1021
|
+
const fake = makeFakeProc();
|
|
1022
|
+
const ac = new AbortController();
|
|
1023
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1024
|
+
fake._pushLine(JSON.stringify({ type: "system", subtype: "init", session_id: "abort-mail" }));
|
|
1025
|
+
return fake;
|
|
1026
|
+
};
|
|
1027
|
+
|
|
1028
|
+
const sharedMail = createMailStore(ctx.mailDbPath);
|
|
1029
|
+
try {
|
|
1030
|
+
const runPromise = runTurn({
|
|
1031
|
+
...makeRunOpts(ctx, "child-abort", {
|
|
1032
|
+
runtime,
|
|
1033
|
+
_spawnFn: spawnFn,
|
|
1034
|
+
abortSignal: ac.signal,
|
|
1035
|
+
sigkillDelayMs: 25,
|
|
1036
|
+
}),
|
|
1037
|
+
_mailStore: sharedMail,
|
|
1038
|
+
});
|
|
1039
|
+
await Bun.sleep(60);
|
|
1040
|
+
ac.abort();
|
|
1041
|
+
const result = await runPromise;
|
|
1042
|
+
expect(result.finalState).toBe("zombie");
|
|
1043
|
+
|
|
1044
|
+
const inbox = sharedMail.getAll({ to: "lead-x", type: "worker_died" });
|
|
1045
|
+
expect(inbox.length).toBe(1);
|
|
1046
|
+
const msg = inbox[0];
|
|
1047
|
+
expect(msg?.from).toBe("child-abort");
|
|
1048
|
+
expect(msg?.priority).toBe("high");
|
|
1049
|
+
expect(msg?.subject).toContain("worker_died");
|
|
1050
|
+
expect(msg?.subject).toContain("child-abort");
|
|
1051
|
+
const payload = JSON.parse(msg?.payload ?? "{}") as {
|
|
1052
|
+
terminatedBy?: string;
|
|
1053
|
+
reason?: string;
|
|
1054
|
+
agentName?: string;
|
|
1055
|
+
taskId?: string;
|
|
1056
|
+
capability?: string;
|
|
1057
|
+
};
|
|
1058
|
+
expect(payload.terminatedBy).toBe("runner");
|
|
1059
|
+
expect(payload.agentName).toBe("child-abort");
|
|
1060
|
+
// taskId in the mail mirrors the runner's opts.taskId for this turn;
|
|
1061
|
+
// the test rig's makeRunOpts seeds this as "task-test".
|
|
1062
|
+
expect(payload.taskId).toBe("task-test");
|
|
1063
|
+
expect(payload.capability).toBe("builder");
|
|
1064
|
+
expect(payload.reason).toContain("Aborted");
|
|
1065
|
+
} finally {
|
|
1066
|
+
sharedMail.close();
|
|
1067
|
+
}
|
|
1068
|
+
});
|
|
1069
|
+
|
|
1070
|
+
test("stall path: emits worker_died to parent (overstory-c772)", async () => {
|
|
1071
|
+
seedSession(ctx.sessionsDbPath, {
|
|
1072
|
+
agentName: "child-stall",
|
|
1073
|
+
state: "working",
|
|
1074
|
+
parentAgent: "lead-y",
|
|
1075
|
+
taskId: "task-c772-b",
|
|
1076
|
+
});
|
|
1077
|
+
const { runtime } = makeSpyRuntime();
|
|
1078
|
+
const fake = makeFakeProc();
|
|
1079
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1080
|
+
// Emit nothing — stall watchdog must fire and abort.
|
|
1081
|
+
return fake;
|
|
1082
|
+
};
|
|
1083
|
+
|
|
1084
|
+
const sharedMail = createMailStore(ctx.mailDbPath);
|
|
1085
|
+
try {
|
|
1086
|
+
const result = await runTurn({
|
|
1087
|
+
...makeRunOpts(ctx, "child-stall", {
|
|
1088
|
+
runtime,
|
|
1089
|
+
_spawnFn: spawnFn,
|
|
1090
|
+
}),
|
|
1091
|
+
_mailStore: sharedMail,
|
|
1092
|
+
eventStallTimeoutMs: 50,
|
|
1093
|
+
sigkillDelayMs: 25,
|
|
1094
|
+
});
|
|
1095
|
+
expect(result.stallAborted).toBe(true);
|
|
1096
|
+
expect(result.finalState).toBe("zombie");
|
|
1097
|
+
|
|
1098
|
+
const inbox = sharedMail.getAll({ to: "lead-y", type: "worker_died" });
|
|
1099
|
+
expect(inbox.length).toBe(1);
|
|
1100
|
+
const payload = JSON.parse(inbox[0]?.payload ?? "{}") as {
|
|
1101
|
+
terminatedBy?: string;
|
|
1102
|
+
reason?: string;
|
|
1103
|
+
};
|
|
1104
|
+
expect(payload.terminatedBy).toBe("runner");
|
|
1105
|
+
expect(payload.reason).toContain("stalled");
|
|
1106
|
+
} finally {
|
|
1107
|
+
sharedMail.close();
|
|
1108
|
+
}
|
|
1109
|
+
});
|
|
1110
|
+
|
|
1111
|
+
// --- Resume-path parent-notify (overstory-de3c) ---
|
|
1112
|
+
//
|
|
1113
|
+
// The witnessed bug: a spawn-per-turn worker that survived a first-turn
|
|
1114
|
+
// parser stall (worker_died emitted, state→zombie) was re-dispatched by its
|
|
1115
|
+
// parent via `ov sling --recover`. The resumed turn ran, then transitioned
|
|
1116
|
+
// to zombie SILENTLY — no second worker_died mail was ever sent. The lead
|
|
1117
|
+
// blocked forever.
|
|
1118
|
+
//
|
|
1119
|
+
// These tests pin down whether the runner itself is responsible. Each seeds
|
|
1120
|
+
// `claudeSessionId` so the runner exercises the --resume code path, and
|
|
1121
|
+
// asserts that worker_died is still emitted on stall / abort / clean-exit-
|
|
1122
|
+
// without-terminal-mail. If these PASS the runner is exonerated and the
|
|
1123
|
+
// fix is upstream (sling.ts re-spawn upsert dropping parentAgent — H1).
|
|
1124
|
+
|
|
1125
|
+
test("resume-stall: parser stall on a resumed session still emits worker_died (overstory-de3c)", async () => {
|
|
1126
|
+
seedSession(ctx.sessionsDbPath, {
|
|
1127
|
+
agentName: "child-resume-stall",
|
|
1128
|
+
state: "working",
|
|
1129
|
+
parentAgent: "lead-r",
|
|
1130
|
+
taskId: "task-de3c-stall",
|
|
1131
|
+
claudeSessionId: "prior-session",
|
|
1132
|
+
});
|
|
1133
|
+
const { runtime, spawnCalls } = makeSpyRuntime();
|
|
1134
|
+
const fake = makeFakeProc();
|
|
1135
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1136
|
+
// Emit nothing — the resumed turn parser-stalls.
|
|
1137
|
+
return fake;
|
|
1138
|
+
};
|
|
1139
|
+
|
|
1140
|
+
const sharedMail = createMailStore(ctx.mailDbPath);
|
|
1141
|
+
try {
|
|
1142
|
+
const result = await runTurn({
|
|
1143
|
+
...makeRunOpts(ctx, "child-resume-stall", {
|
|
1144
|
+
runtime,
|
|
1145
|
+
_spawnFn: spawnFn,
|
|
1146
|
+
}),
|
|
1147
|
+
_mailStore: sharedMail,
|
|
1148
|
+
eventStallTimeoutMs: 50,
|
|
1149
|
+
sigkillDelayMs: 25,
|
|
1150
|
+
});
|
|
1151
|
+
|
|
1152
|
+
expect(result.stallAborted).toBe(true);
|
|
1153
|
+
expect(result.finalState).toBe("zombie");
|
|
1154
|
+
|
|
1155
|
+
// The runtime received the prior session id (resume path exercised).
|
|
1156
|
+
expect(spawnCalls[0]?.resumeSessionId).toBe("prior-session");
|
|
1157
|
+
|
|
1158
|
+
const inbox = sharedMail.getAll({ to: "lead-r", type: "worker_died" });
|
|
1159
|
+
expect(inbox.length).toBe(1);
|
|
1160
|
+
const payload = JSON.parse(inbox[0]?.payload ?? "{}") as {
|
|
1161
|
+
terminatedBy?: string;
|
|
1162
|
+
reason?: string;
|
|
1163
|
+
agentName?: string;
|
|
1164
|
+
};
|
|
1165
|
+
expect(payload.terminatedBy).toBe("runner");
|
|
1166
|
+
expect(payload.reason).toContain("stalled");
|
|
1167
|
+
expect(payload.agentName).toBe("child-resume-stall");
|
|
1168
|
+
} finally {
|
|
1169
|
+
sharedMail.close();
|
|
1170
|
+
}
|
|
1171
|
+
});
|
|
1172
|
+
|
|
1173
|
+
test("resume-abort: operator abort on a resumed session still emits worker_died (overstory-de3c)", async () => {
|
|
1174
|
+
seedSession(ctx.sessionsDbPath, {
|
|
1175
|
+
agentName: "child-resume-abort",
|
|
1176
|
+
state: "working",
|
|
1177
|
+
parentAgent: "lead-r",
|
|
1178
|
+
taskId: "task-de3c-abort",
|
|
1179
|
+
claudeSessionId: "prior-session",
|
|
1180
|
+
});
|
|
1181
|
+
const { runtime, spawnCalls } = makeSpyRuntime();
|
|
1182
|
+
const fake = makeFakeProc();
|
|
1183
|
+
const ac = new AbortController();
|
|
1184
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1185
|
+
fake._pushLine(
|
|
1186
|
+
JSON.stringify({
|
|
1187
|
+
type: "system",
|
|
1188
|
+
subtype: "init",
|
|
1189
|
+
session_id: "prior-session",
|
|
1190
|
+
}),
|
|
1191
|
+
);
|
|
1192
|
+
return fake;
|
|
1193
|
+
};
|
|
1194
|
+
|
|
1195
|
+
const sharedMail = createMailStore(ctx.mailDbPath);
|
|
1196
|
+
try {
|
|
1197
|
+
const runPromise = runTurn({
|
|
1198
|
+
...makeRunOpts(ctx, "child-resume-abort", {
|
|
1199
|
+
runtime,
|
|
1200
|
+
_spawnFn: spawnFn,
|
|
1201
|
+
abortSignal: ac.signal,
|
|
1202
|
+
sigkillDelayMs: 25,
|
|
1203
|
+
}),
|
|
1204
|
+
_mailStore: sharedMail,
|
|
1205
|
+
});
|
|
1206
|
+
await Bun.sleep(60);
|
|
1207
|
+
ac.abort();
|
|
1208
|
+
const result = await runPromise;
|
|
1209
|
+
|
|
1210
|
+
expect(result.finalState).toBe("zombie");
|
|
1211
|
+
expect(spawnCalls[0]?.resumeSessionId).toBe("prior-session");
|
|
1212
|
+
|
|
1213
|
+
const inbox = sharedMail.getAll({ to: "lead-r", type: "worker_died" });
|
|
1214
|
+
expect(inbox.length).toBe(1);
|
|
1215
|
+
const payload = JSON.parse(inbox[0]?.payload ?? "{}") as {
|
|
1216
|
+
terminatedBy?: string;
|
|
1217
|
+
reason?: string;
|
|
1218
|
+
agentName?: string;
|
|
1219
|
+
};
|
|
1220
|
+
expect(payload.terminatedBy).toBe("runner");
|
|
1221
|
+
expect(payload.reason).toContain("Aborted");
|
|
1222
|
+
expect(payload.agentName).toBe("child-resume-abort");
|
|
1223
|
+
} finally {
|
|
1224
|
+
sharedMail.close();
|
|
1225
|
+
}
|
|
1226
|
+
});
|
|
1227
|
+
|
|
1228
|
+
test("resume-terminalMailMissing: clean exit on a resumed session still emits worker_died (overstory-de3c)", async () => {
|
|
1229
|
+
seedSession(ctx.sessionsDbPath, {
|
|
1230
|
+
agentName: "child-resume-noop",
|
|
1231
|
+
state: "working",
|
|
1232
|
+
parentAgent: "lead-r",
|
|
1233
|
+
taskId: "task-de3c-noop",
|
|
1234
|
+
claudeSessionId: "prior-session",
|
|
1235
|
+
});
|
|
1236
|
+
const { runtime, spawnCalls } = makeSpyRuntime();
|
|
1237
|
+
const fake = makeFakeProc();
|
|
1238
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1239
|
+
emitFakeTurn(fake, { sessionId: "prior-session", isError: false });
|
|
1240
|
+
fake._exit(0);
|
|
1241
|
+
return fake;
|
|
1242
|
+
};
|
|
1243
|
+
|
|
1244
|
+
const sharedMail = createMailStore(ctx.mailDbPath);
|
|
1245
|
+
try {
|
|
1246
|
+
const result = await runTurn({
|
|
1247
|
+
...makeRunOpts(ctx, "child-resume-noop", {
|
|
1248
|
+
runtime,
|
|
1249
|
+
_spawnFn: spawnFn,
|
|
1250
|
+
}),
|
|
1251
|
+
_mailStore: sharedMail,
|
|
1252
|
+
});
|
|
1253
|
+
|
|
1254
|
+
expect(result.cleanResult).toBe(true);
|
|
1255
|
+
expect(result.terminalMailMissing).toBe(true);
|
|
1256
|
+
expect(result.finalState).toBe("completed");
|
|
1257
|
+
expect(spawnCalls[0]?.resumeSessionId).toBe("prior-session");
|
|
1258
|
+
|
|
1259
|
+
const inbox = sharedMail.getAll({ to: "lead-r", type: "worker_died" });
|
|
1260
|
+
expect(inbox.length).toBe(1);
|
|
1261
|
+
const payload = JSON.parse(inbox[0]?.payload ?? "{}") as {
|
|
1262
|
+
terminatedBy?: string;
|
|
1263
|
+
reason?: string;
|
|
1264
|
+
agentName?: string;
|
|
1265
|
+
};
|
|
1266
|
+
expect(payload.terminatedBy).toBe("runner");
|
|
1267
|
+
expect(payload.reason).toContain("Clean exit without terminal mail");
|
|
1268
|
+
expect(payload.agentName).toBe("child-resume-noop");
|
|
1269
|
+
} finally {
|
|
1270
|
+
sharedMail.close();
|
|
1271
|
+
}
|
|
1272
|
+
});
|
|
1273
|
+
|
|
1274
|
+
test("terminalMailMissing: emits worker_died to parent (overstory-4159)", async () => {
|
|
1275
|
+
// Silent-no-op: claude exits cleanly but never sends worker_done. The
|
|
1276
|
+
// lead would otherwise block forever waiting for a terminal mail.
|
|
1277
|
+
seedSession(ctx.sessionsDbPath, {
|
|
1278
|
+
agentName: "child-noop",
|
|
1279
|
+
state: "working",
|
|
1280
|
+
parentAgent: "lead-z",
|
|
1281
|
+
taskId: "task-4159",
|
|
1282
|
+
});
|
|
1283
|
+
const { runtime } = makeSpyRuntime();
|
|
1284
|
+
const fake = makeFakeProc();
|
|
1285
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1286
|
+
emitFakeTurn(fake, { sessionId: "noop-session", isError: false });
|
|
1287
|
+
fake._exit(0);
|
|
1288
|
+
return fake;
|
|
1289
|
+
};
|
|
1290
|
+
|
|
1291
|
+
const sharedMail = createMailStore(ctx.mailDbPath);
|
|
1292
|
+
try {
|
|
1293
|
+
const result = await runTurn({
|
|
1294
|
+
...makeRunOpts(ctx, "child-noop", {
|
|
1295
|
+
runtime,
|
|
1296
|
+
_spawnFn: spawnFn,
|
|
1297
|
+
}),
|
|
1298
|
+
_mailStore: sharedMail,
|
|
1299
|
+
});
|
|
1300
|
+
expect(result.cleanResult).toBe(true);
|
|
1301
|
+
expect(result.terminalMailMissing).toBe(true);
|
|
1302
|
+
expect(result.finalState).toBe("completed");
|
|
1303
|
+
|
|
1304
|
+
const inbox = sharedMail.getAll({ to: "lead-z", type: "worker_died" });
|
|
1305
|
+
expect(inbox.length).toBe(1);
|
|
1306
|
+
const payload = JSON.parse(inbox[0]?.payload ?? "{}") as {
|
|
1307
|
+
terminatedBy?: string;
|
|
1308
|
+
reason?: string;
|
|
1309
|
+
agentName?: string;
|
|
1310
|
+
};
|
|
1311
|
+
expect(payload.terminatedBy).toBe("runner");
|
|
1312
|
+
expect(payload.agentName).toBe("child-noop");
|
|
1313
|
+
expect(payload.reason).toContain("Clean exit without terminal mail");
|
|
1314
|
+
} finally {
|
|
1315
|
+
sharedMail.close();
|
|
1316
|
+
}
|
|
1317
|
+
});
|
|
1318
|
+
|
|
1319
|
+
test("no parentAgent: skips worker_died mail (orchestrator-spawned worker)", async () => {
|
|
1320
|
+
// Orchestrator-spawned workers have parentAgent=null; there is nobody to
|
|
1321
|
+
// notify. The runner must not fabricate a recipient.
|
|
1322
|
+
seedSession(ctx.sessionsDbPath, {
|
|
1323
|
+
agentName: "orphan-noop",
|
|
1324
|
+
state: "working",
|
|
1325
|
+
parentAgent: null,
|
|
1326
|
+
taskId: "task-orphan",
|
|
1327
|
+
});
|
|
1328
|
+
const { runtime } = makeSpyRuntime();
|
|
1329
|
+
const fake = makeFakeProc();
|
|
1330
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1331
|
+
emitFakeTurn(fake, { sessionId: "orphan-session" });
|
|
1332
|
+
fake._exit(0);
|
|
1333
|
+
return fake;
|
|
1334
|
+
};
|
|
1335
|
+
|
|
1336
|
+
const sharedMail = createMailStore(ctx.mailDbPath);
|
|
1337
|
+
try {
|
|
1338
|
+
const result = await runTurn({
|
|
1339
|
+
...makeRunOpts(ctx, "orphan-noop", { runtime, _spawnFn: spawnFn }),
|
|
1340
|
+
_mailStore: sharedMail,
|
|
1341
|
+
});
|
|
1342
|
+
expect(result.terminalMailMissing).toBe(true);
|
|
1343
|
+
const all = sharedMail.getAll({ type: "worker_died" });
|
|
1344
|
+
expect(all.length).toBe(0);
|
|
1345
|
+
} finally {
|
|
1346
|
+
sharedMail.close();
|
|
1347
|
+
}
|
|
1348
|
+
});
|
|
1349
|
+
|
|
1350
|
+
test("happy path: terminal mail observed → no worker_died emitted (no double-signal)", async () => {
|
|
1351
|
+
seedSession(ctx.sessionsDbPath, {
|
|
1352
|
+
agentName: "child-ok",
|
|
1353
|
+
state: "working",
|
|
1354
|
+
parentAgent: "lead-ok",
|
|
1355
|
+
taskId: "task-happy",
|
|
1356
|
+
});
|
|
1357
|
+
const { runtime } = makeSpyRuntime();
|
|
1358
|
+
const fake = makeFakeProc();
|
|
1359
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1360
|
+
(async () => {
|
|
1361
|
+
await Bun.sleep(15);
|
|
1362
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
1363
|
+
try {
|
|
1364
|
+
createMailClient(s).sendProtocol({
|
|
1365
|
+
from: "child-ok",
|
|
1366
|
+
to: "lead-ok",
|
|
1367
|
+
subject: "Worker done",
|
|
1368
|
+
body: "ok",
|
|
1369
|
+
type: "worker_done",
|
|
1370
|
+
priority: "normal",
|
|
1371
|
+
payload: {
|
|
1372
|
+
taskId: "task-happy",
|
|
1373
|
+
branch: "branch",
|
|
1374
|
+
exitCode: 0,
|
|
1375
|
+
filesModified: [],
|
|
1376
|
+
},
|
|
1377
|
+
});
|
|
1378
|
+
} finally {
|
|
1379
|
+
s.close();
|
|
1380
|
+
}
|
|
1381
|
+
emitFakeTurn(fake, { sessionId: "ok-session" });
|
|
1382
|
+
fake._exit(0);
|
|
1383
|
+
})();
|
|
1384
|
+
return fake;
|
|
1385
|
+
};
|
|
1386
|
+
|
|
1387
|
+
const sharedMail = createMailStore(ctx.mailDbPath);
|
|
1388
|
+
try {
|
|
1389
|
+
const result = await runTurn({
|
|
1390
|
+
...makeRunOpts(ctx, "child-ok", { runtime, _spawnFn: spawnFn }),
|
|
1391
|
+
_mailStore: sharedMail,
|
|
1392
|
+
});
|
|
1393
|
+
expect(result.terminalMailObserved).toBe(true);
|
|
1394
|
+
expect(result.terminalMailMissing).toBe(false);
|
|
1395
|
+
expect(result.finalState).toBe("completed");
|
|
1396
|
+
|
|
1397
|
+
// Inbox should have the agent's own worker_done, but NO worker_died.
|
|
1398
|
+
const died = sharedMail.getAll({ to: "lead-ok", type: "worker_died" });
|
|
1399
|
+
expect(died.length).toBe(0);
|
|
1400
|
+
} finally {
|
|
1401
|
+
sharedMail.close();
|
|
1402
|
+
}
|
|
1403
|
+
});
|
|
1404
|
+
|
|
1405
|
+
test("two concurrent runTurn calls for the same agent serialize", async () => {
|
|
1406
|
+
seedSession(ctx.sessionsDbPath, { agentName: "serial", state: "working" });
|
|
1407
|
+
const { runtime } = makeSpyRuntime();
|
|
1408
|
+
|
|
1409
|
+
const windows: Array<{ id: number; phase: "start" | "end"; ts: number }> = [];
|
|
1410
|
+
let spawnId = 0;
|
|
1411
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1412
|
+
const id = ++spawnId;
|
|
1413
|
+
windows.push({ id, phase: "start", ts: Date.now() });
|
|
1414
|
+
const fake = makeFakeProc();
|
|
1415
|
+
(async () => {
|
|
1416
|
+
// Hold the spawn open briefly to widen the overlap window.
|
|
1417
|
+
await Bun.sleep(80);
|
|
1418
|
+
emitFakeTurn(fake, { sessionId: `s-${id}` });
|
|
1419
|
+
fake._exit(0);
|
|
1420
|
+
windows.push({ id, phase: "end", ts: Date.now() });
|
|
1421
|
+
})();
|
|
1422
|
+
return fake;
|
|
1423
|
+
};
|
|
1424
|
+
|
|
1425
|
+
const a = runTurn(makeRunOpts(ctx, "serial", { runtime, _spawnFn: spawnFn }));
|
|
1426
|
+
const b = runTurn(makeRunOpts(ctx, "serial", { runtime, _spawnFn: spawnFn }));
|
|
1427
|
+
await Promise.all([a, b]);
|
|
1428
|
+
|
|
1429
|
+
// Sort by timestamp; verify the second start follows the first end.
|
|
1430
|
+
const ordered = [...windows].sort((x, y) => x.ts - y.ts);
|
|
1431
|
+
expect(ordered.length).toBe(4);
|
|
1432
|
+
expect(ordered[0]?.phase).toBe("start");
|
|
1433
|
+
expect(ordered[1]?.phase).toBe("end");
|
|
1434
|
+
expect(ordered[1]?.id).toBe(ordered[0]?.id);
|
|
1435
|
+
expect(ordered[2]?.phase).toBe("start");
|
|
1436
|
+
expect(ordered[2]?.id).not.toBe(ordered[0]?.id);
|
|
1437
|
+
});
|
|
1438
|
+
|
|
1439
|
+
test("spawn throws — lock is released and error propagates", async () => {
|
|
1440
|
+
seedSession(ctx.sessionsDbPath, { agentName: "fails", state: "booting" });
|
|
1441
|
+
const { runtime } = makeSpyRuntime();
|
|
1442
|
+
const failingSpawn: TurnSpawnFn = () => {
|
|
1443
|
+
throw new Error("ENOENT: claude binary missing");
|
|
1444
|
+
};
|
|
1445
|
+
|
|
1446
|
+
await expect(
|
|
1447
|
+
runTurn(makeRunOpts(ctx, "fails", { runtime, _spawnFn: failingSpawn })),
|
|
1448
|
+
).rejects.toThrow(/binary missing/);
|
|
1449
|
+
|
|
1450
|
+
// Cross-process lock state must be cleared so a follow-up turn can run.
|
|
1451
|
+
const state = readTurnLock(ctx.overstoryDir, "fails");
|
|
1452
|
+
expect(state.heldByPid).toBeNull();
|
|
1453
|
+
|
|
1454
|
+
// Session state must NOT have transitioned (no events were observed).
|
|
1455
|
+
const after = readSession(ctx.sessionsDbPath, "fails");
|
|
1456
|
+
expect(after?.state).toBe("booting");
|
|
1457
|
+
});
|
|
1458
|
+
|
|
1459
|
+
test("subsequent turn passes the prior session id to runtime.buildDirectSpawn", async () => {
|
|
1460
|
+
seedSession(ctx.sessionsDbPath, { agentName: "two-turns", state: "working" });
|
|
1461
|
+
const { runtime, spawnCalls } = makeSpyRuntime();
|
|
1462
|
+
|
|
1463
|
+
// Turn 1: claude assigns session id "sid-1".
|
|
1464
|
+
const t1Fake = makeFakeProc();
|
|
1465
|
+
const t1Spawn: TurnSpawnFn = () => {
|
|
1466
|
+
emitFakeTurn(t1Fake, { sessionId: "sid-1" });
|
|
1467
|
+
t1Fake._exit(0);
|
|
1468
|
+
return t1Fake;
|
|
1469
|
+
};
|
|
1470
|
+
await runTurn(makeRunOpts(ctx, "two-turns", { runtime, _spawnFn: t1Spawn }));
|
|
1471
|
+
|
|
1472
|
+
// Turn 2: must read sid-1 back from SessionStore and pass it as resumeSessionId.
|
|
1473
|
+
const t2Fake = makeFakeProc();
|
|
1474
|
+
const t2Spawn: TurnSpawnFn = () => {
|
|
1475
|
+
emitFakeTurn(t2Fake, { sessionId: "sid-1" });
|
|
1476
|
+
t2Fake._exit(0);
|
|
1477
|
+
return t2Fake;
|
|
1478
|
+
};
|
|
1479
|
+
await runTurn(makeRunOpts(ctx, "two-turns", { runtime, _spawnFn: t2Spawn }));
|
|
1480
|
+
|
|
1481
|
+
expect(spawnCalls.length).toBe(2);
|
|
1482
|
+
expect(spawnCalls[0]?.resumeSessionId ?? null).toBeNull();
|
|
1483
|
+
expect(spawnCalls[1]?.resumeSessionId).toBe("sid-1");
|
|
1484
|
+
});
|
|
1485
|
+
|
|
1486
|
+
test("user turn payload is written to spawned stdin", async () => {
|
|
1487
|
+
seedSession(ctx.sessionsDbPath, { agentName: "stdin-test", state: "working" });
|
|
1488
|
+
const { runtime } = makeSpyRuntime();
|
|
1489
|
+
|
|
1490
|
+
const payload = `${JSON.stringify({
|
|
1491
|
+
type: "user",
|
|
1492
|
+
message: { role: "user", content: [{ type: "text", text: "ping" }] },
|
|
1493
|
+
})}\n`;
|
|
1494
|
+
|
|
1495
|
+
const fake = makeFakeProc();
|
|
1496
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1497
|
+
emitFakeTurn(fake, { sessionId: "stdin-sess" });
|
|
1498
|
+
fake._exit(0);
|
|
1499
|
+
return fake;
|
|
1500
|
+
};
|
|
1501
|
+
|
|
1502
|
+
await runTurn(
|
|
1503
|
+
makeRunOpts(ctx, "stdin-test", {
|
|
1504
|
+
runtime,
|
|
1505
|
+
_spawnFn: spawnFn,
|
|
1506
|
+
userTurnNdjson: payload,
|
|
1507
|
+
}),
|
|
1508
|
+
);
|
|
1509
|
+
|
|
1510
|
+
expect(fake._writes.length).toBe(1);
|
|
1511
|
+
expect(fake._writes[0]).toBe(payload);
|
|
1512
|
+
});
|
|
1513
|
+
|
|
1514
|
+
test("does not spawn when the runtime lacks buildDirectSpawn", async () => {
|
|
1515
|
+
seedSession(ctx.sessionsDbPath, { agentName: "no-build", state: "booting" });
|
|
1516
|
+
const incomplete: AgentRuntime = {
|
|
1517
|
+
id: "incomplete",
|
|
1518
|
+
stability: "experimental",
|
|
1519
|
+
instructionPath: "AGENTS.md",
|
|
1520
|
+
buildSpawnCommand: () => "",
|
|
1521
|
+
buildPrintCommand: () => [],
|
|
1522
|
+
deployConfig: async () => {},
|
|
1523
|
+
detectReady: () => ({ phase: "ready" }),
|
|
1524
|
+
parseTranscript: async () => null,
|
|
1525
|
+
getTranscriptDir: () => null,
|
|
1526
|
+
buildEnv: () => ({}),
|
|
1527
|
+
// buildDirectSpawn intentionally omitted
|
|
1528
|
+
parseEvents: async function* () {
|
|
1529
|
+
yield* [];
|
|
1530
|
+
},
|
|
1531
|
+
};
|
|
1532
|
+
|
|
1533
|
+
await expect(runTurn(makeRunOpts(ctx, "no-build", { runtime: incomplete }))).rejects.toThrow(
|
|
1534
|
+
/buildDirectSpawn/,
|
|
1535
|
+
);
|
|
1536
|
+
});
|
|
1537
|
+
|
|
1538
|
+
// ---------- cleanup-invariant tests (overstory-4af3) ----------
|
|
1539
|
+
//
|
|
1540
|
+
// The runner publishes turn.pid for cross-process abort and updates
|
|
1541
|
+
// lastActivity at the end of every turn. Both must hold even when the
|
|
1542
|
+
// inner SessionStore writes silently fail. These tests pin the cleanup
|
|
1543
|
+
// contract so future regressions surface immediately.
|
|
1544
|
+
|
|
1545
|
+
test("happy path: turn.pid is removed and lastActivity advances past startedAt", async () => {
|
|
1546
|
+
const startedAt = new Date(Date.now() - 60_000).toISOString();
|
|
1547
|
+
seedSession(ctx.sessionsDbPath, {
|
|
1548
|
+
agentName: "cleanup-ok",
|
|
1549
|
+
state: "working",
|
|
1550
|
+
startedAt,
|
|
1551
|
+
lastActivity: startedAt,
|
|
1552
|
+
});
|
|
1553
|
+
const { runtime } = makeSpyRuntime();
|
|
1554
|
+
const fake = makeFakeProc();
|
|
1555
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1556
|
+
emitFakeTurn(fake, { sessionId: "cleanup-ok-session" });
|
|
1557
|
+
fake._exit(0);
|
|
1558
|
+
return fake;
|
|
1559
|
+
};
|
|
1560
|
+
|
|
1561
|
+
const result = await runTurn(makeRunOpts(ctx, "cleanup-ok", { runtime, _spawnFn: spawnFn }));
|
|
1562
|
+
|
|
1563
|
+
expect(result.exitCode).toBe(0);
|
|
1564
|
+
|
|
1565
|
+
const turnPidPath = turnPidPathFor(ctx, "cleanup-ok");
|
|
1566
|
+
expect(existsSync(turnPidPath)).toBe(false);
|
|
1567
|
+
|
|
1568
|
+
const after = readSession(ctx.sessionsDbPath, "cleanup-ok");
|
|
1569
|
+
expect(after?.lastActivity).not.toBe(startedAt);
|
|
1570
|
+
expect(new Date(after?.lastActivity ?? 0).getTime()).toBeGreaterThan(
|
|
1571
|
+
new Date(startedAt).getTime(),
|
|
1572
|
+
);
|
|
1573
|
+
});
|
|
1574
|
+
|
|
1575
|
+
test("spawn throws: turn.pid is never written and finally cleanup is a no-op", async () => {
|
|
1576
|
+
seedSession(ctx.sessionsDbPath, { agentName: "spawn-fail", state: "booting" });
|
|
1577
|
+
const { runtime } = makeSpyRuntime();
|
|
1578
|
+
const failingSpawn: TurnSpawnFn = () => {
|
|
1579
|
+
throw new Error("ENOENT: claude binary missing");
|
|
1580
|
+
};
|
|
1581
|
+
|
|
1582
|
+
await expect(
|
|
1583
|
+
runTurn(makeRunOpts(ctx, "spawn-fail", { runtime, _spawnFn: failingSpawn })),
|
|
1584
|
+
).rejects.toThrow(/binary missing/);
|
|
1585
|
+
|
|
1586
|
+
expect(existsSync(turnPidPathFor(ctx, "spawn-fail"))).toBe(false);
|
|
1587
|
+
});
|
|
1588
|
+
|
|
1589
|
+
test("parser throws: outer finally still runs and removes turn.pid", async () => {
|
|
1590
|
+
seedSession(ctx.sessionsDbPath, { agentName: "parser-fail", state: "working" });
|
|
1591
|
+
|
|
1592
|
+
// Custom runtime whose parseEvents returns an async iterable that
|
|
1593
|
+
// rejects on first read — mirrors a stream-json parse error mid-turn.
|
|
1594
|
+
const base = new ClaudeRuntime();
|
|
1595
|
+
const failingIterable: AsyncIterable<never> = {
|
|
1596
|
+
[Symbol.asyncIterator](): AsyncIterator<never> {
|
|
1597
|
+
return {
|
|
1598
|
+
next(): Promise<IteratorResult<never>> {
|
|
1599
|
+
return Promise.reject(new Error("synthetic stream-json parse error"));
|
|
1600
|
+
},
|
|
1601
|
+
};
|
|
1602
|
+
},
|
|
1603
|
+
};
|
|
1604
|
+
const broken: AgentRuntime = {
|
|
1605
|
+
...base,
|
|
1606
|
+
id: base.id,
|
|
1607
|
+
stability: base.stability,
|
|
1608
|
+
instructionPath: base.instructionPath,
|
|
1609
|
+
buildSpawnCommand: base.buildSpawnCommand.bind(base),
|
|
1610
|
+
buildPrintCommand: base.buildPrintCommand.bind(base),
|
|
1611
|
+
deployConfig: base.deployConfig.bind(base),
|
|
1612
|
+
detectReady: base.detectReady.bind(base),
|
|
1613
|
+
parseTranscript: base.parseTranscript.bind(base),
|
|
1614
|
+
getTranscriptDir: base.getTranscriptDir.bind(base),
|
|
1615
|
+
buildEnv: base.buildEnv.bind(base),
|
|
1616
|
+
buildDirectSpawn: base.buildDirectSpawn.bind(base),
|
|
1617
|
+
parseEvents: (() => failingIterable) as unknown as AgentRuntime["parseEvents"],
|
|
1618
|
+
};
|
|
1619
|
+
|
|
1620
|
+
const fake = makeFakeProc();
|
|
1621
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1622
|
+
// Don't auto-exit: simulate a still-live subprocess so we can verify
|
|
1623
|
+
// the C3 kill path actually fires before the lock is released. If we
|
|
1624
|
+
// pre-exited the fake here, kill() would still record but the test
|
|
1625
|
+
// wouldn't distinguish the runner-driven kill from no-op cleanup.
|
|
1626
|
+
return fake;
|
|
1627
|
+
};
|
|
1628
|
+
|
|
1629
|
+
await expect(
|
|
1630
|
+
runTurn(makeRunOpts(ctx, "parser-fail", { runtime: broken, _spawnFn: spawnFn })),
|
|
1631
|
+
).rejects.toThrow(/synthetic stream-json/);
|
|
1632
|
+
|
|
1633
|
+
// overstory-088b C3: parser throw must kill the live subprocess to avoid
|
|
1634
|
+
// orphaning past lock.release. SIGKILL is correct here — we are on a
|
|
1635
|
+
// non-recoverable error path and must guarantee the process dies.
|
|
1636
|
+
expect(fake._killSignals).toContain("SIGKILL");
|
|
1637
|
+
expect(fake._killed).toBe(true);
|
|
1638
|
+
|
|
1639
|
+
// Cleanup contract holds even on thrown parser.
|
|
1640
|
+
expect(existsSync(turnPidPathFor(ctx, "parser-fail"))).toBe(false);
|
|
1641
|
+
});
|
|
1642
|
+
|
|
1643
|
+
test("turn.pid write failure SIGKILLs subprocess and aborts the turn (overstory-62a6)", async () => {
|
|
1644
|
+
seedSession(ctx.sessionsDbPath, { agentName: "pid-write-fail", state: "working" });
|
|
1645
|
+
const { runtime } = makeSpyRuntime();
|
|
1646
|
+
|
|
1647
|
+
// Pre-create turn.pid as a DIRECTORY so `Bun.write(turnPidPath, ...)` fails
|
|
1648
|
+
// with EISDIR. This mirrors any real failure mode (read-only fs, permissions,
|
|
1649
|
+
// disk full) where the kill primitive becomes unavailable.
|
|
1650
|
+
const { mkdir } = await import("node:fs/promises");
|
|
1651
|
+
const turnPidPath = turnPidPathFor(ctx, "pid-write-fail");
|
|
1652
|
+
await mkdir(turnPidPath, { recursive: true });
|
|
1653
|
+
|
|
1654
|
+
const fake = makeFakeProc();
|
|
1655
|
+
const spawnFn: TurnSpawnFn = () => fake;
|
|
1656
|
+
|
|
1657
|
+
const events: Array<{ level: string; message: string }> = [];
|
|
1658
|
+
const logger: RunnerLogger = (level, message) => {
|
|
1659
|
+
events.push({ level, message });
|
|
1660
|
+
};
|
|
1661
|
+
|
|
1662
|
+
await expect(
|
|
1663
|
+
runTurn(
|
|
1664
|
+
makeRunOpts(ctx, "pid-write-fail", { runtime, _spawnFn: spawnFn, _logWarning: logger }),
|
|
1665
|
+
),
|
|
1666
|
+
).rejects.toThrow(/failed to write turn\.pid/);
|
|
1667
|
+
|
|
1668
|
+
// The kill primitive is unavailable, so the only safe way to avoid a
|
|
1669
|
+
// silently un-killable agent is to SIGKILL the subprocess here.
|
|
1670
|
+
expect(fake._killSignals).toContain("SIGKILL");
|
|
1671
|
+
expect(fake._killed).toBe(true);
|
|
1672
|
+
|
|
1673
|
+
// Surfaces at error level (not warn) so the failure isn't silent.
|
|
1674
|
+
expect(
|
|
1675
|
+
events.some((e) => e.level === "error" && e.message.includes("failed to write turn.pid")),
|
|
1676
|
+
).toBe(true);
|
|
1677
|
+
});
|
|
1678
|
+
|
|
1679
|
+
test("silent SessionStore failure surfaces as a runner warning", async () => {
|
|
1680
|
+
seedSession(ctx.sessionsDbPath, { agentName: "ss-fail", state: "working" });
|
|
1681
|
+
const { runtime } = makeSpyRuntime();
|
|
1682
|
+
|
|
1683
|
+
const fake = makeFakeProc();
|
|
1684
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1685
|
+
emitFakeTurn(fake, { sessionId: "ss-fail-session" });
|
|
1686
|
+
fake._exit(0);
|
|
1687
|
+
return fake;
|
|
1688
|
+
};
|
|
1689
|
+
|
|
1690
|
+
const warnings: Array<{ level: string; message: string }> = [];
|
|
1691
|
+
const logger: RunnerLogger = (level, message) => {
|
|
1692
|
+
warnings.push({ level, message });
|
|
1693
|
+
};
|
|
1694
|
+
|
|
1695
|
+
// Point sessionsDbPath at a path that exists as a DIRECTORY so every
|
|
1696
|
+
// SessionStore open in the runner throws. The runner must keep going
|
|
1697
|
+
// (cleanup contract) AND surface the failure via the logger.
|
|
1698
|
+
const badSessionsPath = ctx.overstoryDir; // directory, not a db file
|
|
1699
|
+
const opts = {
|
|
1700
|
+
...makeRunOpts(ctx, "ss-fail", { runtime, _spawnFn: spawnFn, _logWarning: logger }),
|
|
1701
|
+
sessionsDbPath: badSessionsPath,
|
|
1702
|
+
};
|
|
1703
|
+
|
|
1704
|
+
await runTurn(opts);
|
|
1705
|
+
|
|
1706
|
+
// The lastActivity update silently failed (it's a directory, not a db),
|
|
1707
|
+
// which is exactly the scenario that masked overstory-4af3. The runner
|
|
1708
|
+
// must report the contract violation via _logWarning at error level.
|
|
1709
|
+
const errors = warnings.filter((w) => w.level === "error");
|
|
1710
|
+
expect(errors.some((w) => w.message.includes("lastActivity stayed at startedAt"))).toBe(true);
|
|
1711
|
+
|
|
1712
|
+
// turn.pid must still be cleaned up regardless.
|
|
1713
|
+
expect(existsSync(turnPidPathFor(ctx, "ss-fail"))).toBe(false);
|
|
1714
|
+
});
|
|
1715
|
+
|
|
1716
|
+
// ---------- mid-turn lastActivity refresh (overstory-8e61) ----------
|
|
1717
|
+
//
|
|
1718
|
+
// The watchdog's design (src/watchdog/health.ts:242-243) documents that the
|
|
1719
|
+
// runner advances `session.lastActivity` per parser event during a turn.
|
|
1720
|
+
// Without that, a long-running turn looks stalled to the watchdog and the
|
|
1721
|
+
// agent gets zombified mid-flight. These tests pin the per-event refresh
|
|
1722
|
+
// behavior added inside the parser loop.
|
|
1723
|
+
|
|
1724
|
+
test("mid-turn refresh: lastActivity advances when interval=0 forces per-event refresh", async () => {
|
|
1725
|
+
const startedAt = new Date(Date.now() - 60_000).toISOString();
|
|
1726
|
+
seedSession(ctx.sessionsDbPath, {
|
|
1727
|
+
agentName: "midturn-A",
|
|
1728
|
+
state: "working",
|
|
1729
|
+
startedAt,
|
|
1730
|
+
lastActivity: startedAt,
|
|
1731
|
+
});
|
|
1732
|
+
const { runtime } = makeSpyRuntime();
|
|
1733
|
+
const fake = makeFakeProc();
|
|
1734
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1735
|
+
emitFakeTurn(fake, { sessionId: "midturn-A-session" });
|
|
1736
|
+
fake._exit(0);
|
|
1737
|
+
return fake;
|
|
1738
|
+
};
|
|
1739
|
+
|
|
1740
|
+
await runTurn({
|
|
1741
|
+
...makeRunOpts(ctx, "midturn-A", { runtime, _spawnFn: spawnFn }),
|
|
1742
|
+
lastActivityRefreshIntervalMs: 0,
|
|
1743
|
+
});
|
|
1744
|
+
|
|
1745
|
+
const after = readSession(ctx.sessionsDbPath, "midturn-A");
|
|
1746
|
+
expect(after?.lastActivity).not.toBe(startedAt);
|
|
1747
|
+
expect(new Date(after?.lastActivity ?? 0).getTime()).toBeGreaterThan(
|
|
1748
|
+
new Date(startedAt).getTime(),
|
|
1749
|
+
);
|
|
1750
|
+
});
|
|
1751
|
+
|
|
1752
|
+
test("mid-turn refresh: throttle gates updates by simulated time", async () => {
|
|
1753
|
+
seedSession(ctx.sessionsDbPath, { agentName: "midturn-B", state: "working" });
|
|
1754
|
+
const { runtime } = makeSpyRuntime();
|
|
1755
|
+
|
|
1756
|
+
// Controlled sim clock. `_now` is invoked many times during a turn (for
|
|
1757
|
+
// startedAtMs, log timestamps, durationMs) — only the in-loop calls
|
|
1758
|
+
// matter for the throttle. We advance simTime synchronously between
|
|
1759
|
+
// pushes and yield to the parser between each push so the runner reads
|
|
1760
|
+
// the simTime we set just prior. simTime starts well above the throttle
|
|
1761
|
+
// interval so the first event fires (initial lastActivityRefreshMs=0).
|
|
1762
|
+
let simTime = 5000;
|
|
1763
|
+
const _now = (): Date => new Date(simTime);
|
|
1764
|
+
|
|
1765
|
+
let refreshes = 0;
|
|
1766
|
+
const _onLastActivityRefresh = (): void => {
|
|
1767
|
+
refreshes++;
|
|
1768
|
+
};
|
|
1769
|
+
|
|
1770
|
+
const fake = makeFakeProc();
|
|
1771
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1772
|
+
(async () => {
|
|
1773
|
+
const sessionId = "midturn-B-session";
|
|
1774
|
+
// Use `system` lines because the claude parser does not batch
|
|
1775
|
+
// them — every system line yields exactly one status event,
|
|
1776
|
+
// driving one runner-loop iteration each. Assistant text would
|
|
1777
|
+
// coalesce inside a flush window and defeat the per-event count.
|
|
1778
|
+
const stamps = [5000, 5500, 6000, 6500, 7000, 7500];
|
|
1779
|
+
for (let i = 0; i < stamps.length; i++) {
|
|
1780
|
+
simTime = stamps[i] ?? 0;
|
|
1781
|
+
fake._pushLine(
|
|
1782
|
+
JSON.stringify({
|
|
1783
|
+
type: "system",
|
|
1784
|
+
subtype: i === 0 ? "init" : "progress",
|
|
1785
|
+
session_id: sessionId,
|
|
1786
|
+
}),
|
|
1787
|
+
);
|
|
1788
|
+
// Yield so the for-await loop body runs to completion against
|
|
1789
|
+
// the simTime value we just set.
|
|
1790
|
+
await Bun.sleep(20);
|
|
1791
|
+
}
|
|
1792
|
+
// Trailing result at the same simTime as the last chunk; with a
|
|
1793
|
+
// 1000ms throttle and last refresh at simTime=7000, this event
|
|
1794
|
+
// at simTime=7500 (delta=500) does not fire.
|
|
1795
|
+
fake._pushLine(
|
|
1796
|
+
JSON.stringify({
|
|
1797
|
+
type: "result",
|
|
1798
|
+
subtype: "success",
|
|
1799
|
+
session_id: sessionId,
|
|
1800
|
+
result: "done",
|
|
1801
|
+
is_error: false,
|
|
1802
|
+
duration_ms: 50,
|
|
1803
|
+
num_turns: 1,
|
|
1804
|
+
}),
|
|
1805
|
+
);
|
|
1806
|
+
await Bun.sleep(20);
|
|
1807
|
+
fake._exit(0);
|
|
1808
|
+
})();
|
|
1809
|
+
return fake;
|
|
1810
|
+
};
|
|
1811
|
+
|
|
1812
|
+
await runTurn({
|
|
1813
|
+
...makeRunOpts(ctx, "midturn-B", { runtime, _spawnFn: spawnFn }),
|
|
1814
|
+
lastActivityRefreshIntervalMs: 1000,
|
|
1815
|
+
_now,
|
|
1816
|
+
_onLastActivityRefresh,
|
|
1817
|
+
});
|
|
1818
|
+
|
|
1819
|
+
// Stamps 5000, 6000, 7000 fire (gap >= 1000). Stamps 5500, 6500, 7500
|
|
1820
|
+
// are throttled (gap = 500). The trailing result event at 7500 also
|
|
1821
|
+
// throttles. Total expected = 3.
|
|
1822
|
+
expect(refreshes).toBe(3);
|
|
1823
|
+
});
|
|
1824
|
+
|
|
1825
|
+
test("mid-turn refresh: parser throw still leaves lastActivity advanced (overstory-8e61)", async () => {
|
|
1826
|
+
// The end-of-turn `updateSessionLastActivity` (around turn-runner.ts:1112)
|
|
1827
|
+
// does NOT fire when the parser iteration throws — the catch path
|
|
1828
|
+
// rethrows before reaching the cleanup write. The mid-turn refresh
|
|
1829
|
+
// covers this gap so a parser-error turn still leaves lastActivity
|
|
1830
|
+
// fresh, mirroring the documented design at src/watchdog/health.ts:242-243.
|
|
1831
|
+
const startedAt = new Date(Date.now() - 60_000).toISOString();
|
|
1832
|
+
seedSession(ctx.sessionsDbPath, {
|
|
1833
|
+
agentName: "midturn-C",
|
|
1834
|
+
state: "working",
|
|
1835
|
+
startedAt,
|
|
1836
|
+
lastActivity: startedAt,
|
|
1837
|
+
});
|
|
1838
|
+
|
|
1839
|
+
// Custom runtime: yield two valid events, then throw on the next read.
|
|
1840
|
+
// Mirrors a malformed stream-json line arriving after some good events.
|
|
1841
|
+
const base = new ClaudeRuntime();
|
|
1842
|
+
let yielded = 0;
|
|
1843
|
+
const yieldThenThrow: AsyncIterable<unknown> = {
|
|
1844
|
+
[Symbol.asyncIterator]() {
|
|
1845
|
+
return {
|
|
1846
|
+
next(): Promise<IteratorResult<unknown>> {
|
|
1847
|
+
if (yielded++ < 2) {
|
|
1848
|
+
return Promise.resolve({
|
|
1849
|
+
value: {
|
|
1850
|
+
type: "assistant_message",
|
|
1851
|
+
timestamp: new Date().toISOString(),
|
|
1852
|
+
},
|
|
1853
|
+
done: false,
|
|
1854
|
+
});
|
|
1855
|
+
}
|
|
1856
|
+
return Promise.reject(new Error("synthetic stream-json parse error"));
|
|
1857
|
+
},
|
|
1858
|
+
};
|
|
1859
|
+
},
|
|
1860
|
+
};
|
|
1861
|
+
const broken: AgentRuntime = {
|
|
1862
|
+
...base,
|
|
1863
|
+
id: base.id,
|
|
1864
|
+
stability: base.stability,
|
|
1865
|
+
instructionPath: base.instructionPath,
|
|
1866
|
+
buildSpawnCommand: base.buildSpawnCommand.bind(base),
|
|
1867
|
+
buildPrintCommand: base.buildPrintCommand.bind(base),
|
|
1868
|
+
deployConfig: base.deployConfig.bind(base),
|
|
1869
|
+
detectReady: base.detectReady.bind(base),
|
|
1870
|
+
parseTranscript: base.parseTranscript.bind(base),
|
|
1871
|
+
getTranscriptDir: base.getTranscriptDir.bind(base),
|
|
1872
|
+
buildEnv: base.buildEnv.bind(base),
|
|
1873
|
+
buildDirectSpawn: base.buildDirectSpawn.bind(base),
|
|
1874
|
+
parseEvents: (() => yieldThenThrow) as unknown as AgentRuntime["parseEvents"],
|
|
1875
|
+
};
|
|
1876
|
+
|
|
1877
|
+
const fake = makeFakeProc();
|
|
1878
|
+
const spawnFn: TurnSpawnFn = () => fake;
|
|
1879
|
+
|
|
1880
|
+
let refreshes = 0;
|
|
1881
|
+
await expect(
|
|
1882
|
+
runTurn({
|
|
1883
|
+
...makeRunOpts(ctx, "midturn-C", { runtime: broken, _spawnFn: spawnFn }),
|
|
1884
|
+
lastActivityRefreshIntervalMs: 0,
|
|
1885
|
+
_onLastActivityRefresh: () => {
|
|
1886
|
+
refreshes++;
|
|
1887
|
+
},
|
|
1888
|
+
}),
|
|
1889
|
+
).rejects.toThrow(/synthetic stream-json/);
|
|
1890
|
+
|
|
1891
|
+
// Mid-turn refresh fired for at least one of the two pre-throw events.
|
|
1892
|
+
expect(refreshes).toBeGreaterThanOrEqual(1);
|
|
1893
|
+
|
|
1894
|
+
// And the persisted lastActivity reflects the mid-turn write — the
|
|
1895
|
+
// end-of-turn write at line ~1112 was skipped by the parser-throw path.
|
|
1896
|
+
const after = readSession(ctx.sessionsDbPath, "midturn-C");
|
|
1897
|
+
expect(after?.lastActivity).not.toBe(startedAt);
|
|
1898
|
+
expect(new Date(after?.lastActivity ?? 0).getTime()).toBeGreaterThan(
|
|
1899
|
+
new Date(startedAt).getTime(),
|
|
1900
|
+
);
|
|
1901
|
+
});
|
|
1902
|
+
|
|
1903
|
+
test("Bash mail-poll detector: warns + records custom event without suppressing tool_use (overstory-c92c)", async () => {
|
|
1904
|
+
// Defense-in-depth: the lead.md prompt forbids Bash mail polling
|
|
1905
|
+
// (overstory-fa84). When a future overlay or contributed agent
|
|
1906
|
+
// reintroduces the pattern, the runner must surface it via the
|
|
1907
|
+
// runner diagnostic sink AND a `mail_poll_detected` event in
|
|
1908
|
+
// events.db, while still recording the original tool_use event
|
|
1909
|
+
// so downstream observability is unaffected.
|
|
1910
|
+
seedSession(ctx.sessionsDbPath, { agentName: "polled", state: "working" });
|
|
1911
|
+
const { runtime } = makeSpyRuntime();
|
|
1912
|
+
|
|
1913
|
+
const fake = makeFakeProc();
|
|
1914
|
+
const sessionId = "polled-session";
|
|
1915
|
+
const pollCommand = "until ov mail list; do sleep 1; done";
|
|
1916
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1917
|
+
fake._pushLine(
|
|
1918
|
+
JSON.stringify({
|
|
1919
|
+
type: "system",
|
|
1920
|
+
subtype: "init",
|
|
1921
|
+
session_id: sessionId,
|
|
1922
|
+
model: "claude-test",
|
|
1923
|
+
}),
|
|
1924
|
+
);
|
|
1925
|
+
fake._pushLine(
|
|
1926
|
+
JSON.stringify({
|
|
1927
|
+
type: "assistant",
|
|
1928
|
+
session_id: sessionId,
|
|
1929
|
+
message: {
|
|
1930
|
+
role: "assistant",
|
|
1931
|
+
model: "claude-test",
|
|
1932
|
+
content: [
|
|
1933
|
+
{
|
|
1934
|
+
type: "tool_use",
|
|
1935
|
+
id: "toolu_poll_1",
|
|
1936
|
+
name: "Bash",
|
|
1937
|
+
input: { command: pollCommand },
|
|
1938
|
+
},
|
|
1939
|
+
],
|
|
1940
|
+
},
|
|
1941
|
+
}),
|
|
1942
|
+
);
|
|
1943
|
+
emitFakeTurn(fake, { sessionId });
|
|
1944
|
+
fake._exit(0);
|
|
1945
|
+
return fake;
|
|
1946
|
+
};
|
|
1947
|
+
|
|
1948
|
+
const logs: Array<{ level: string; message: string }> = [];
|
|
1949
|
+
const logger: RunnerLogger = (level, message) => {
|
|
1950
|
+
logs.push({ level, message });
|
|
1951
|
+
};
|
|
1952
|
+
|
|
1953
|
+
const result = await runTurn(
|
|
1954
|
+
makeRunOpts(ctx, "polled", { runtime, _spawnFn: spawnFn, _logWarning: logger }),
|
|
1955
|
+
);
|
|
1956
|
+
|
|
1957
|
+
expect(result.exitCode).toBe(0);
|
|
1958
|
+
|
|
1959
|
+
// Warning was emitted via the runner diagnostic sink (warn level,
|
|
1960
|
+
// message includes "mail-poll").
|
|
1961
|
+
const pollWarn = logs.find((l) => l.level === "warn" && l.message.includes("mail-poll"));
|
|
1962
|
+
expect(pollWarn).toBeDefined();
|
|
1963
|
+
|
|
1964
|
+
const eventStore = createEventStore(ctx.eventsDbPath);
|
|
1965
|
+
try {
|
|
1966
|
+
const events = eventStore.getByAgent("polled");
|
|
1967
|
+
|
|
1968
|
+
// `mail_poll_detected` custom event landed in events.db with the
|
|
1969
|
+
// full (untruncated) command and the matched reason.
|
|
1970
|
+
const detectedEvent = events.find((e) => {
|
|
1971
|
+
if (e.eventType !== "custom" || e.level !== "warn" || !e.data) return false;
|
|
1972
|
+
try {
|
|
1973
|
+
const parsed = JSON.parse(e.data) as { type?: string };
|
|
1974
|
+
return parsed.type === "mail_poll_detected";
|
|
1975
|
+
} catch {
|
|
1976
|
+
return false;
|
|
1977
|
+
}
|
|
1978
|
+
});
|
|
1979
|
+
expect(detectedEvent).toBeDefined();
|
|
1980
|
+
const payload = JSON.parse(detectedEvent?.data ?? "{}") as {
|
|
1981
|
+
type: string;
|
|
1982
|
+
reason: string;
|
|
1983
|
+
command: string;
|
|
1984
|
+
};
|
|
1985
|
+
expect(payload.reason).toBe("until ov mail loop");
|
|
1986
|
+
expect(payload.command).toBe(pollCommand);
|
|
1987
|
+
|
|
1988
|
+
// Regression guard: the original Bash tool_use event MUST still
|
|
1989
|
+
// be recorded — the warning emits IN ADDITION to (not in place
|
|
1990
|
+
// of) the normal recordAgentEvent call.
|
|
1991
|
+
const toolUseEvent = events.find(
|
|
1992
|
+
(e) => e.eventType === "tool_start" && e.toolName === "Bash",
|
|
1993
|
+
);
|
|
1994
|
+
expect(toolUseEvent).toBeDefined();
|
|
1995
|
+
} finally {
|
|
1996
|
+
eventStore.close();
|
|
1997
|
+
}
|
|
1998
|
+
});
|
|
1999
|
+
});
|
|
2000
|
+
|
|
2001
|
+
describe("runTurn scope-violation observability (overstory-9f4d)", () => {
|
|
2002
|
+
let ctx: Ctx;
|
|
2003
|
+
|
|
2004
|
+
beforeEach(async () => {
|
|
2005
|
+
const overstoryDir = await mkdtemp(join(tmpdir(), "overstory-scope-test-"));
|
|
2006
|
+
ctx = {
|
|
2007
|
+
overstoryDir,
|
|
2008
|
+
worktreePath: overstoryDir,
|
|
2009
|
+
projectRoot: overstoryDir,
|
|
2010
|
+
mailDbPath: join(overstoryDir, "mail.db"),
|
|
2011
|
+
eventsDbPath: join(overstoryDir, "events.db"),
|
|
2012
|
+
sessionsDbPath: join(overstoryDir, "sessions.db"),
|
|
2013
|
+
};
|
|
2014
|
+
_resetInProcessLocks();
|
|
2015
|
+
});
|
|
2016
|
+
|
|
2017
|
+
afterEach(async () => {
|
|
2018
|
+
_resetInProcessLocks();
|
|
2019
|
+
await rm(ctx.overstoryDir, { recursive: true, force: true });
|
|
2020
|
+
});
|
|
2021
|
+
|
|
2022
|
+
async function writeOverlayWithScope(scope: string[]): Promise<void> {
|
|
2023
|
+
const dir = join(ctx.worktreePath, ".claude");
|
|
2024
|
+
const { mkdir: mkdirP, writeFile } = await import("node:fs/promises");
|
|
2025
|
+
await mkdirP(dir, { recursive: true });
|
|
2026
|
+
const body = [
|
|
2027
|
+
"## File Scope (exclusive ownership)",
|
|
2028
|
+
"",
|
|
2029
|
+
...scope.map((p) => `- \`${p}\``),
|
|
2030
|
+
"",
|
|
2031
|
+
"## Expertise",
|
|
2032
|
+
"",
|
|
2033
|
+
"none",
|
|
2034
|
+
].join("\n");
|
|
2035
|
+
await writeFile(join(dir, "CLAUDE.md"), body);
|
|
2036
|
+
}
|
|
2037
|
+
|
|
2038
|
+
test("builder scope violation without justification emits warn log + scope_violation event", async () => {
|
|
2039
|
+
seedSession(ctx.sessionsDbPath, { agentName: "violator", state: "working" });
|
|
2040
|
+
await writeOverlayWithScope(["src/agents/in-scope.ts"]);
|
|
2041
|
+
|
|
2042
|
+
const { runtime } = makeSpyRuntime();
|
|
2043
|
+
const fake = makeFakeProc();
|
|
2044
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
2045
|
+
(async () => {
|
|
2046
|
+
await Bun.sleep(20);
|
|
2047
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
2048
|
+
try {
|
|
2049
|
+
createMailClient(s).sendProtocol({
|
|
2050
|
+
from: "violator",
|
|
2051
|
+
to: "lead",
|
|
2052
|
+
subject: "Worker done",
|
|
2053
|
+
body: "ok",
|
|
2054
|
+
type: "worker_done",
|
|
2055
|
+
priority: "normal",
|
|
2056
|
+
payload: {
|
|
2057
|
+
taskId: "t",
|
|
2058
|
+
branch: "b",
|
|
2059
|
+
exitCode: 0,
|
|
2060
|
+
filesModified: ["src/other.ts"],
|
|
2061
|
+
},
|
|
2062
|
+
});
|
|
2063
|
+
} finally {
|
|
2064
|
+
s.close();
|
|
2065
|
+
}
|
|
2066
|
+
emitFakeTurn(fake, { sessionId: "violator-session" });
|
|
2067
|
+
fake._exit(0);
|
|
2068
|
+
})();
|
|
2069
|
+
return fake;
|
|
2070
|
+
};
|
|
2071
|
+
|
|
2072
|
+
const logs: Array<{ level: string; message: string }> = [];
|
|
2073
|
+
const logger: RunnerLogger = (level, message) => {
|
|
2074
|
+
logs.push({ level, message });
|
|
2075
|
+
};
|
|
2076
|
+
|
|
2077
|
+
const result = await runTurn({
|
|
2078
|
+
...makeRunOpts(ctx, "violator", {
|
|
2079
|
+
runtime,
|
|
2080
|
+
_spawnFn: spawnFn,
|
|
2081
|
+
_logWarning: logger,
|
|
2082
|
+
}),
|
|
2083
|
+
_scopeDetect: () => ({
|
|
2084
|
+
violations: ["src/other.ts"],
|
|
2085
|
+
expansionReasons: [],
|
|
2086
|
+
}),
|
|
2087
|
+
});
|
|
2088
|
+
|
|
2089
|
+
expect(result.terminalMailObserved).toBe(true);
|
|
2090
|
+
expect(result.finalState).toBe("completed");
|
|
2091
|
+
|
|
2092
|
+
const warnLog = logs.find(
|
|
2093
|
+
(l) => l.level === "warn" && l.message.includes("outside declared FILE_SCOPE"),
|
|
2094
|
+
);
|
|
2095
|
+
expect(warnLog).toBeDefined();
|
|
2096
|
+
expect(warnLog?.message).toContain("src/other.ts");
|
|
2097
|
+
|
|
2098
|
+
const eventStore = createEventStore(ctx.eventsDbPath);
|
|
2099
|
+
try {
|
|
2100
|
+
const events = eventStore.getByAgent("violator");
|
|
2101
|
+
const violationEvent = events.find((e) => {
|
|
2102
|
+
if (e.eventType !== "custom" || e.level !== "warn" || !e.data) return false;
|
|
2103
|
+
try {
|
|
2104
|
+
const parsed = JSON.parse(e.data) as { type?: string };
|
|
2105
|
+
return parsed.type === "scope_violation";
|
|
2106
|
+
} catch {
|
|
2107
|
+
return false;
|
|
2108
|
+
}
|
|
2109
|
+
});
|
|
2110
|
+
expect(violationEvent).toBeDefined();
|
|
2111
|
+
const payload = JSON.parse(violationEvent?.data ?? "{}") as {
|
|
2112
|
+
type: string;
|
|
2113
|
+
violations: string[];
|
|
2114
|
+
fileScope: string[];
|
|
2115
|
+
};
|
|
2116
|
+
expect(payload.violations).toEqual(["src/other.ts"]);
|
|
2117
|
+
expect(payload.fileScope).toEqual(["src/agents/in-scope.ts"]);
|
|
2118
|
+
} finally {
|
|
2119
|
+
eventStore.close();
|
|
2120
|
+
}
|
|
2121
|
+
});
|
|
2122
|
+
|
|
2123
|
+
test("expansion_reason in commit log suppresses the warning", async () => {
|
|
2124
|
+
seedSession(ctx.sessionsDbPath, { agentName: "justified", state: "working" });
|
|
2125
|
+
await writeOverlayWithScope(["src/agents/in-scope.ts"]);
|
|
2126
|
+
|
|
2127
|
+
const { runtime } = makeSpyRuntime();
|
|
2128
|
+
const fake = makeFakeProc();
|
|
2129
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
2130
|
+
(async () => {
|
|
2131
|
+
await Bun.sleep(20);
|
|
2132
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
2133
|
+
try {
|
|
2134
|
+
createMailClient(s).sendProtocol({
|
|
2135
|
+
from: "justified",
|
|
2136
|
+
to: "lead",
|
|
2137
|
+
subject: "Worker done",
|
|
2138
|
+
body: "ok",
|
|
2139
|
+
type: "worker_done",
|
|
2140
|
+
priority: "normal",
|
|
2141
|
+
payload: {
|
|
2142
|
+
taskId: "t",
|
|
2143
|
+
branch: "b",
|
|
2144
|
+
exitCode: 0,
|
|
2145
|
+
filesModified: ["src/other.ts"],
|
|
2146
|
+
},
|
|
2147
|
+
});
|
|
2148
|
+
} finally {
|
|
2149
|
+
s.close();
|
|
2150
|
+
}
|
|
2151
|
+
emitFakeTurn(fake, { sessionId: "justified-session" });
|
|
2152
|
+
fake._exit(0);
|
|
2153
|
+
})();
|
|
2154
|
+
return fake;
|
|
2155
|
+
};
|
|
2156
|
+
|
|
2157
|
+
const logs: Array<{ level: string; message: string }> = [];
|
|
2158
|
+
const logger: RunnerLogger = (level, message) => {
|
|
2159
|
+
logs.push({ level, message });
|
|
2160
|
+
};
|
|
2161
|
+
|
|
2162
|
+
const result = await runTurn({
|
|
2163
|
+
...makeRunOpts(ctx, "justified", {
|
|
2164
|
+
runtime,
|
|
2165
|
+
_spawnFn: spawnFn,
|
|
2166
|
+
_logWarning: logger,
|
|
2167
|
+
}),
|
|
2168
|
+
_scopeDetect: () => ({
|
|
2169
|
+
violations: ["src/other.ts"],
|
|
2170
|
+
expansionReasons: ["needed shared type"],
|
|
2171
|
+
}),
|
|
2172
|
+
});
|
|
2173
|
+
|
|
2174
|
+
expect(result.terminalMailObserved).toBe(true);
|
|
2175
|
+
|
|
2176
|
+
const warnLog = logs.find(
|
|
2177
|
+
(l) => l.level === "warn" && l.message.includes("outside declared FILE_SCOPE"),
|
|
2178
|
+
);
|
|
2179
|
+
expect(warnLog).toBeUndefined();
|
|
2180
|
+
|
|
2181
|
+
const eventStore = createEventStore(ctx.eventsDbPath);
|
|
2182
|
+
try {
|
|
2183
|
+
const events = eventStore.getByAgent("justified");
|
|
2184
|
+
const violationEvent = events.find((e) => e.data?.includes("scope_violation") ?? false);
|
|
2185
|
+
expect(violationEvent).toBeUndefined();
|
|
2186
|
+
} finally {
|
|
2187
|
+
eventStore.close();
|
|
2188
|
+
}
|
|
2189
|
+
});
|
|
2190
|
+
|
|
2191
|
+
test("prior scope_expansion mail suppresses the warning", async () => {
|
|
2192
|
+
seedSession(ctx.sessionsDbPath, { agentName: "premail", state: "working" });
|
|
2193
|
+
await writeOverlayWithScope(["src/agents/in-scope.ts"]);
|
|
2194
|
+
|
|
2195
|
+
// Pre-seed: a scope_expansion-prefixed mail from this agent.
|
|
2196
|
+
{
|
|
2197
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
2198
|
+
try {
|
|
2199
|
+
createMailClient(s).send({
|
|
2200
|
+
from: "premail",
|
|
2201
|
+
to: "lead",
|
|
2202
|
+
subject: "scope_expansion: needed shared type",
|
|
2203
|
+
body: "heads up",
|
|
2204
|
+
type: "status",
|
|
2205
|
+
priority: "normal",
|
|
2206
|
+
});
|
|
2207
|
+
} finally {
|
|
2208
|
+
s.close();
|
|
2209
|
+
}
|
|
2210
|
+
}
|
|
2211
|
+
|
|
2212
|
+
const { runtime } = makeSpyRuntime();
|
|
2213
|
+
const fake = makeFakeProc();
|
|
2214
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
2215
|
+
(async () => {
|
|
2216
|
+
await Bun.sleep(20);
|
|
2217
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
2218
|
+
try {
|
|
2219
|
+
createMailClient(s).sendProtocol({
|
|
2220
|
+
from: "premail",
|
|
2221
|
+
to: "lead",
|
|
2222
|
+
subject: "Worker done",
|
|
2223
|
+
body: "ok",
|
|
2224
|
+
type: "worker_done",
|
|
2225
|
+
priority: "normal",
|
|
2226
|
+
payload: {
|
|
2227
|
+
taskId: "t",
|
|
2228
|
+
branch: "b",
|
|
2229
|
+
exitCode: 0,
|
|
2230
|
+
filesModified: ["src/other.ts"],
|
|
2231
|
+
},
|
|
2232
|
+
});
|
|
2233
|
+
} finally {
|
|
2234
|
+
s.close();
|
|
2235
|
+
}
|
|
2236
|
+
emitFakeTurn(fake, { sessionId: "premail-session" });
|
|
2237
|
+
fake._exit(0);
|
|
2238
|
+
})();
|
|
2239
|
+
return fake;
|
|
2240
|
+
};
|
|
2241
|
+
|
|
2242
|
+
const logs: Array<{ level: string; message: string }> = [];
|
|
2243
|
+
const logger: RunnerLogger = (level, message) => {
|
|
2244
|
+
logs.push({ level, message });
|
|
2245
|
+
};
|
|
2246
|
+
|
|
2247
|
+
await runTurn({
|
|
2248
|
+
...makeRunOpts(ctx, "premail", {
|
|
2249
|
+
runtime,
|
|
2250
|
+
_spawnFn: spawnFn,
|
|
2251
|
+
_logWarning: logger,
|
|
2252
|
+
}),
|
|
2253
|
+
_scopeDetect: () => ({
|
|
2254
|
+
violations: ["src/other.ts"],
|
|
2255
|
+
expansionReasons: [],
|
|
2256
|
+
}),
|
|
2257
|
+
});
|
|
2258
|
+
|
|
2259
|
+
const warnLog = logs.find(
|
|
2260
|
+
(l) => l.level === "warn" && l.message.includes("outside declared FILE_SCOPE"),
|
|
2261
|
+
);
|
|
2262
|
+
expect(warnLog).toBeUndefined();
|
|
2263
|
+
});
|
|
2264
|
+
|
|
2265
|
+
test("scout capability skips scope detection", async () => {
|
|
2266
|
+
seedSession(ctx.sessionsDbPath, {
|
|
2267
|
+
agentName: "scout-x",
|
|
2268
|
+
capability: "scout",
|
|
2269
|
+
state: "working",
|
|
2270
|
+
});
|
|
2271
|
+
await writeOverlayWithScope(["src/agents/in-scope.ts"]);
|
|
2272
|
+
|
|
2273
|
+
const { runtime } = makeSpyRuntime();
|
|
2274
|
+
const fake = makeFakeProc();
|
|
2275
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
2276
|
+
(async () => {
|
|
2277
|
+
await Bun.sleep(20);
|
|
2278
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
2279
|
+
try {
|
|
2280
|
+
createMailClient(s).send({
|
|
2281
|
+
from: "scout-x",
|
|
2282
|
+
to: "lead",
|
|
2283
|
+
subject: "Done",
|
|
2284
|
+
body: "ok",
|
|
2285
|
+
type: "result",
|
|
2286
|
+
priority: "normal",
|
|
2287
|
+
});
|
|
2288
|
+
} finally {
|
|
2289
|
+
s.close();
|
|
2290
|
+
}
|
|
2291
|
+
emitFakeTurn(fake, { sessionId: "scout-x-session" });
|
|
2292
|
+
fake._exit(0);
|
|
2293
|
+
})();
|
|
2294
|
+
return fake;
|
|
2295
|
+
};
|
|
2296
|
+
|
|
2297
|
+
let detectCalled = false;
|
|
2298
|
+
await runTurn({
|
|
2299
|
+
...makeRunOpts(ctx, "scout-x", {
|
|
2300
|
+
runtime,
|
|
2301
|
+
_spawnFn: spawnFn,
|
|
2302
|
+
capability: "scout",
|
|
2303
|
+
}),
|
|
2304
|
+
_scopeDetect: () => {
|
|
2305
|
+
detectCalled = true;
|
|
2306
|
+
return { violations: [], expansionReasons: [] };
|
|
2307
|
+
},
|
|
2308
|
+
});
|
|
2309
|
+
|
|
2310
|
+
expect(detectCalled).toBe(false);
|
|
2311
|
+
});
|
|
2312
|
+
});
|