@os-eco/overstory-cli 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +381 -0
  3. package/agents/builder.md +137 -0
  4. package/agents/coordinator.md +263 -0
  5. package/agents/lead.md +301 -0
  6. package/agents/merger.md +160 -0
  7. package/agents/monitor.md +214 -0
  8. package/agents/reviewer.md +140 -0
  9. package/agents/scout.md +119 -0
  10. package/agents/supervisor.md +423 -0
  11. package/package.json +47 -0
  12. package/src/agents/checkpoint.test.ts +88 -0
  13. package/src/agents/checkpoint.ts +101 -0
  14. package/src/agents/hooks-deployer.test.ts +2040 -0
  15. package/src/agents/hooks-deployer.ts +607 -0
  16. package/src/agents/identity.test.ts +603 -0
  17. package/src/agents/identity.ts +384 -0
  18. package/src/agents/lifecycle.test.ts +196 -0
  19. package/src/agents/lifecycle.ts +183 -0
  20. package/src/agents/manifest.test.ts +746 -0
  21. package/src/agents/manifest.ts +354 -0
  22. package/src/agents/overlay.test.ts +676 -0
  23. package/src/agents/overlay.ts +308 -0
  24. package/src/beads/client.test.ts +217 -0
  25. package/src/beads/client.ts +202 -0
  26. package/src/beads/molecules.test.ts +338 -0
  27. package/src/beads/molecules.ts +198 -0
  28. package/src/commands/agents.test.ts +322 -0
  29. package/src/commands/agents.ts +287 -0
  30. package/src/commands/clean.test.ts +670 -0
  31. package/src/commands/clean.ts +618 -0
  32. package/src/commands/completions.test.ts +342 -0
  33. package/src/commands/completions.ts +887 -0
  34. package/src/commands/coordinator.test.ts +1530 -0
  35. package/src/commands/coordinator.ts +733 -0
  36. package/src/commands/costs.test.ts +1119 -0
  37. package/src/commands/costs.ts +564 -0
  38. package/src/commands/dashboard.test.ts +308 -0
  39. package/src/commands/dashboard.ts +838 -0
  40. package/src/commands/doctor.test.ts +294 -0
  41. package/src/commands/doctor.ts +213 -0
  42. package/src/commands/errors.test.ts +647 -0
  43. package/src/commands/errors.ts +248 -0
  44. package/src/commands/feed.test.ts +578 -0
  45. package/src/commands/feed.ts +361 -0
  46. package/src/commands/group.test.ts +262 -0
  47. package/src/commands/group.ts +511 -0
  48. package/src/commands/hooks.test.ts +458 -0
  49. package/src/commands/hooks.ts +253 -0
  50. package/src/commands/init.test.ts +347 -0
  51. package/src/commands/init.ts +650 -0
  52. package/src/commands/inspect.test.ts +670 -0
  53. package/src/commands/inspect.ts +431 -0
  54. package/src/commands/log.test.ts +1454 -0
  55. package/src/commands/log.ts +724 -0
  56. package/src/commands/logs.test.ts +379 -0
  57. package/src/commands/logs.ts +546 -0
  58. package/src/commands/mail.test.ts +1270 -0
  59. package/src/commands/mail.ts +771 -0
  60. package/src/commands/merge.test.ts +670 -0
  61. package/src/commands/merge.ts +355 -0
  62. package/src/commands/metrics.test.ts +444 -0
  63. package/src/commands/metrics.ts +143 -0
  64. package/src/commands/monitor.test.ts +191 -0
  65. package/src/commands/monitor.ts +390 -0
  66. package/src/commands/nudge.test.ts +230 -0
  67. package/src/commands/nudge.ts +372 -0
  68. package/src/commands/prime.test.ts +470 -0
  69. package/src/commands/prime.ts +381 -0
  70. package/src/commands/replay.test.ts +741 -0
  71. package/src/commands/replay.ts +360 -0
  72. package/src/commands/run.test.ts +431 -0
  73. package/src/commands/run.ts +351 -0
  74. package/src/commands/sling.test.ts +657 -0
  75. package/src/commands/sling.ts +661 -0
  76. package/src/commands/spec.test.ts +203 -0
  77. package/src/commands/spec.ts +168 -0
  78. package/src/commands/status.test.ts +430 -0
  79. package/src/commands/status.ts +398 -0
  80. package/src/commands/stop.test.ts +420 -0
  81. package/src/commands/stop.ts +151 -0
  82. package/src/commands/supervisor.test.ts +187 -0
  83. package/src/commands/supervisor.ts +535 -0
  84. package/src/commands/trace.test.ts +745 -0
  85. package/src/commands/trace.ts +325 -0
  86. package/src/commands/watch.test.ts +145 -0
  87. package/src/commands/watch.ts +247 -0
  88. package/src/commands/worktree.test.ts +786 -0
  89. package/src/commands/worktree.ts +311 -0
  90. package/src/config.test.ts +822 -0
  91. package/src/config.ts +829 -0
  92. package/src/doctor/agents.test.ts +454 -0
  93. package/src/doctor/agents.ts +396 -0
  94. package/src/doctor/config-check.test.ts +190 -0
  95. package/src/doctor/config-check.ts +183 -0
  96. package/src/doctor/consistency.test.ts +651 -0
  97. package/src/doctor/consistency.ts +294 -0
  98. package/src/doctor/databases.test.ts +290 -0
  99. package/src/doctor/databases.ts +218 -0
  100. package/src/doctor/dependencies.test.ts +184 -0
  101. package/src/doctor/dependencies.ts +175 -0
  102. package/src/doctor/logs.test.ts +251 -0
  103. package/src/doctor/logs.ts +295 -0
  104. package/src/doctor/merge-queue.test.ts +216 -0
  105. package/src/doctor/merge-queue.ts +144 -0
  106. package/src/doctor/structure.test.ts +291 -0
  107. package/src/doctor/structure.ts +198 -0
  108. package/src/doctor/types.ts +37 -0
  109. package/src/doctor/version.test.ts +136 -0
  110. package/src/doctor/version.ts +129 -0
  111. package/src/e2e/init-sling-lifecycle.test.ts +277 -0
  112. package/src/errors.ts +217 -0
  113. package/src/events/store.test.ts +660 -0
  114. package/src/events/store.ts +369 -0
  115. package/src/events/tool-filter.test.ts +330 -0
  116. package/src/events/tool-filter.ts +126 -0
  117. package/src/index.ts +316 -0
  118. package/src/insights/analyzer.test.ts +466 -0
  119. package/src/insights/analyzer.ts +203 -0
  120. package/src/logging/color.test.ts +142 -0
  121. package/src/logging/color.ts +71 -0
  122. package/src/logging/logger.test.ts +813 -0
  123. package/src/logging/logger.ts +266 -0
  124. package/src/logging/reporter.test.ts +259 -0
  125. package/src/logging/reporter.ts +109 -0
  126. package/src/logging/sanitizer.test.ts +190 -0
  127. package/src/logging/sanitizer.ts +57 -0
  128. package/src/mail/broadcast.test.ts +203 -0
  129. package/src/mail/broadcast.ts +92 -0
  130. package/src/mail/client.test.ts +773 -0
  131. package/src/mail/client.ts +223 -0
  132. package/src/mail/store.test.ts +705 -0
  133. package/src/mail/store.ts +387 -0
  134. package/src/merge/queue.test.ts +359 -0
  135. package/src/merge/queue.ts +231 -0
  136. package/src/merge/resolver.test.ts +1345 -0
  137. package/src/merge/resolver.ts +645 -0
  138. package/src/metrics/store.test.ts +667 -0
  139. package/src/metrics/store.ts +445 -0
  140. package/src/metrics/summary.test.ts +398 -0
  141. package/src/metrics/summary.ts +178 -0
  142. package/src/metrics/transcript.test.ts +356 -0
  143. package/src/metrics/transcript.ts +175 -0
  144. package/src/mulch/client.test.ts +671 -0
  145. package/src/mulch/client.ts +332 -0
  146. package/src/sessions/compat.test.ts +280 -0
  147. package/src/sessions/compat.ts +104 -0
  148. package/src/sessions/store.test.ts +873 -0
  149. package/src/sessions/store.ts +494 -0
  150. package/src/test-helpers.test.ts +124 -0
  151. package/src/test-helpers.ts +126 -0
  152. package/src/tracker/beads.ts +56 -0
  153. package/src/tracker/factory.test.ts +80 -0
  154. package/src/tracker/factory.ts +64 -0
  155. package/src/tracker/seeds.ts +182 -0
  156. package/src/tracker/types.ts +52 -0
  157. package/src/types.ts +724 -0
  158. package/src/watchdog/daemon.test.ts +1975 -0
  159. package/src/watchdog/daemon.ts +671 -0
  160. package/src/watchdog/health.test.ts +431 -0
  161. package/src/watchdog/health.ts +264 -0
  162. package/src/watchdog/triage.test.ts +164 -0
  163. package/src/watchdog/triage.ts +179 -0
  164. package/src/worktree/manager.test.ts +439 -0
  165. package/src/worktree/manager.ts +198 -0
  166. package/src/worktree/tmux.test.ts +1009 -0
  167. package/src/worktree/tmux.ts +509 -0
  168. package/templates/CLAUDE.md.tmpl +89 -0
  169. package/templates/hooks.json.tmpl +105 -0
  170. package/templates/overlay.md.tmpl +81 -0
@@ -0,0 +1,1975 @@
1
+ /**
2
+ * Integration tests for the watchdog daemon tick loop.
3
+ *
4
+ * Uses real filesystem (temp directories via mkdtemp) and real SessionStore
5
+ * (bun:sqlite) for session persistence, plus real health evaluation logic.
6
+ *
7
+ * Only tmux operations (isSessionAlive, killSession), triage, and nudge are
8
+ * mocked via dependency injection (_tmux, _triage, _nudge params) because:
9
+ * - Real tmux interferes with developer sessions and is fragile in CI.
10
+ * - Real triage spawns Claude CLI which has cost and latency.
11
+ * - Real nudge requires active tmux sessions.
12
+ *
13
+ * Does NOT use mock.module() — it leaks across test files. See mulch record
14
+ * mx-56558b for background.
15
+ */
16
+
17
+ import { afterEach, beforeEach, describe, expect, test } from "bun:test";
18
+ import { mkdir, mkdtemp, rm } from "node:fs/promises";
19
+ import { tmpdir } from "node:os";
20
+ import { join } from "node:path";
21
+ import { createEventStore } from "../events/store.ts";
22
+ import { createSessionStore } from "../sessions/store.ts";
23
+ import type { AgentSession, HealthCheck, StoredEvent } from "../types.ts";
24
+ import { buildCompletionMessage, runDaemonTick } from "./daemon.ts";
25
+
26
+ // === Test constants ===
27
+
28
+ const THRESHOLDS = {
29
+ staleThresholdMs: 30_000,
30
+ zombieThresholdMs: 120_000,
31
+ };
32
+
33
+ // === Helpers ===
34
+
35
+ /** Create a temp directory with .overstory/ subdirectory, ready for sessions.db. */
36
+ async function createTempRoot(): Promise<string> {
37
+ const dir = await mkdtemp(join(tmpdir(), "overstory-daemon-test-"));
38
+ await mkdir(join(dir, ".overstory"), { recursive: true });
39
+ return dir;
40
+ }
41
+
42
+ /** Write sessions to the SessionStore (sessions.db) at the given root. */
43
+ function writeSessionsToStore(root: string, sessions: AgentSession[]): void {
44
+ const dbPath = join(root, ".overstory", "sessions.db");
45
+ const store = createSessionStore(dbPath);
46
+ for (const session of sessions) {
47
+ store.upsert(session);
48
+ }
49
+ store.close();
50
+ }
51
+
52
+ /** Read sessions from the SessionStore (sessions.db) at the given root. */
53
+ function readSessionsFromStore(root: string): AgentSession[] {
54
+ const dbPath = join(root, ".overstory", "sessions.db");
55
+ const store = createSessionStore(dbPath);
56
+ const sessions = store.getAll();
57
+ store.close();
58
+ return sessions;
59
+ }
60
+
61
+ /** Build a test AgentSession with sensible defaults. */
62
+ function makeSession(overrides: Partial<AgentSession> = {}): AgentSession {
63
+ return {
64
+ id: "session-test",
65
+ agentName: "test-agent",
66
+ capability: "builder",
67
+ worktreePath: "/tmp/test",
68
+ branchName: "overstory/test-agent/test-task",
69
+ beadId: "test-task",
70
+ tmuxSession: "overstory-test-agent",
71
+ state: "working",
72
+ pid: process.pid, // Use our own PID so isProcessRunning returns true
73
+ parentAgent: null,
74
+ depth: 0,
75
+ runId: null,
76
+ escalationLevel: 0,
77
+ stalledSince: null,
78
+ startedAt: new Date().toISOString(),
79
+ lastActivity: new Date().toISOString(),
80
+ ...overrides,
81
+ };
82
+ }
83
+
84
+ /** Create a fake _tmux dependency where all sessions are alive. */
85
+ function tmuxAllAlive(): {
86
+ isSessionAlive: (name: string) => Promise<boolean>;
87
+ killSession: (name: string) => Promise<void>;
88
+ } {
89
+ return {
90
+ isSessionAlive: async () => true,
91
+ killSession: async () => {},
92
+ };
93
+ }
94
+
95
+ /** Create a fake _tmux dependency where all sessions are dead. */
96
+ function tmuxAllDead(): {
97
+ isSessionAlive: (name: string) => Promise<boolean>;
98
+ killSession: (name: string) => Promise<void>;
99
+ } {
100
+ return {
101
+ isSessionAlive: async () => false,
102
+ killSession: async () => {},
103
+ };
104
+ }
105
+
106
+ /**
107
+ * Create a fake _tmux dependency with per-session liveness control.
108
+ * Also tracks killSession calls for assertions.
109
+ */
110
+ function tmuxWithLiveness(aliveMap: Record<string, boolean>): {
111
+ isSessionAlive: (name: string) => Promise<boolean>;
112
+ killSession: (name: string) => Promise<void>;
113
+ killed: string[];
114
+ } {
115
+ const killed: string[] = [];
116
+ return {
117
+ isSessionAlive: async (name: string) => aliveMap[name] ?? false,
118
+ killSession: async (name: string) => {
119
+ killed.push(name);
120
+ },
121
+ killed,
122
+ };
123
+ }
124
+
125
+ /** Create a fake _triage that always returns the given verdict. */
126
+ function triageAlways(
127
+ verdict: "retry" | "terminate" | "extend",
128
+ ): (options: {
129
+ agentName: string;
130
+ root: string;
131
+ lastActivity: string;
132
+ }) => Promise<"retry" | "terminate" | "extend"> {
133
+ return async () => verdict;
134
+ }
135
+
136
+ /** Create a fake _nudge that tracks calls and always succeeds. */
137
+ function nudgeTracker(): {
138
+ nudge: (
139
+ projectRoot: string,
140
+ agentName: string,
141
+ message: string,
142
+ force: boolean,
143
+ ) => Promise<{ delivered: boolean; reason?: string }>;
144
+ calls: Array<{ agentName: string; message: string }>;
145
+ } {
146
+ const calls: Array<{ agentName: string; message: string }> = [];
147
+ return {
148
+ nudge: async (_projectRoot: string, agentName: string, message: string, _force: boolean) => {
149
+ calls.push({ agentName, message });
150
+ return { delivered: true };
151
+ },
152
+ calls,
153
+ };
154
+ }
155
+
156
+ // === Tests ===
157
+
158
+ let tempRoot: string;
159
+
160
+ beforeEach(async () => {
161
+ tempRoot = await createTempRoot();
162
+ });
163
+
164
+ afterEach(async () => {
165
+ await rm(tempRoot, { recursive: true, force: true });
166
+ });
167
+
168
+ describe("daemon tick", () => {
169
+ // --- Test 1: tick with no sessions file ---
170
+
171
+ test("tick with no sessions is a graceful no-op", async () => {
172
+ // No sessions in the store — daemon should not crash
173
+ const checks: HealthCheck[] = [];
174
+
175
+ await runDaemonTick({
176
+ root: tempRoot,
177
+ ...THRESHOLDS,
178
+ onHealthCheck: (c) => checks.push(c),
179
+ _tmux: tmuxAllAlive(),
180
+ _triage: triageAlways("extend"),
181
+ });
182
+
183
+ // No health checks should have been produced (no sessions to check)
184
+ expect(checks).toHaveLength(0);
185
+ });
186
+
187
+ // --- Test 2: tick with healthy sessions ---
188
+
189
+ test("tick with healthy sessions produces no state changes", async () => {
190
+ const session = makeSession({
191
+ state: "working",
192
+ lastActivity: new Date().toISOString(),
193
+ });
194
+
195
+ writeSessionsToStore(tempRoot, [session]);
196
+
197
+ const checks: HealthCheck[] = [];
198
+
199
+ await runDaemonTick({
200
+ root: tempRoot,
201
+ ...THRESHOLDS,
202
+ onHealthCheck: (c) => checks.push(c),
203
+ _tmux: tmuxAllAlive(),
204
+ _triage: triageAlways("extend"),
205
+ });
206
+
207
+ expect(checks).toHaveLength(1);
208
+ const check = checks[0];
209
+ expect(check).toBeDefined();
210
+ expect(check?.state).toBe("working");
211
+ expect(check?.action).toBe("none");
212
+
213
+ // Session state should be unchanged because state didn't change.
214
+ const reloaded = readSessionsFromStore(tempRoot);
215
+ expect(reloaded).toHaveLength(1);
216
+ expect(reloaded[0]?.state).toBe("working");
217
+ });
218
+
219
+ // --- Test 3: tick with dead tmux -> zombie transition ---
220
+
221
+ test("tick with dead tmux transitions session to zombie and fires terminate", async () => {
222
+ const session = makeSession({
223
+ agentName: "dead-agent",
224
+ tmuxSession: "overstory-dead-agent",
225
+ state: "working",
226
+ lastActivity: new Date().toISOString(),
227
+ });
228
+
229
+ writeSessionsToStore(tempRoot, [session]);
230
+
231
+ const tmuxMock = tmuxWithLiveness({ "overstory-dead-agent": false });
232
+ const checks: HealthCheck[] = [];
233
+
234
+ await runDaemonTick({
235
+ root: tempRoot,
236
+ ...THRESHOLDS,
237
+ onHealthCheck: (c) => checks.push(c),
238
+ _tmux: tmuxMock,
239
+ _triage: triageAlways("extend"),
240
+ });
241
+
242
+ // Health check should detect zombie with terminate action
243
+ expect(checks).toHaveLength(1);
244
+ expect(checks[0]?.state).toBe("zombie");
245
+ expect(checks[0]?.action).toBe("terminate");
246
+
247
+ // tmux is dead so killSession should NOT be called (only kills if tmuxAlive)
248
+ expect(tmuxMock.killed).toHaveLength(0);
249
+
250
+ // Session state should be persisted as zombie
251
+ const reloaded = readSessionsFromStore(tempRoot);
252
+ expect(reloaded).toHaveLength(1);
253
+ expect(reloaded[0]?.state).toBe("zombie");
254
+ });
255
+
256
+ test("tick with alive tmux but zombie-old activity calls killSession", async () => {
257
+ // tmux IS alive but time-based zombie threshold is exceeded,
258
+ // causing a terminate action — killSession SHOULD be called.
259
+ const oldActivity = new Date(Date.now() - 200_000).toISOString();
260
+ const session = makeSession({
261
+ agentName: "zombie-agent",
262
+ tmuxSession: "overstory-zombie-agent",
263
+ state: "working",
264
+ lastActivity: oldActivity,
265
+ });
266
+
267
+ writeSessionsToStore(tempRoot, [session]);
268
+
269
+ const tmuxMock = tmuxWithLiveness({ "overstory-zombie-agent": true });
270
+ const checks: HealthCheck[] = [];
271
+
272
+ await runDaemonTick({
273
+ root: tempRoot,
274
+ ...THRESHOLDS,
275
+ onHealthCheck: (c) => checks.push(c),
276
+ _tmux: tmuxMock,
277
+ _triage: triageAlways("extend"),
278
+ });
279
+
280
+ expect(checks).toHaveLength(1);
281
+ expect(checks[0]?.action).toBe("terminate");
282
+
283
+ // tmux was alive, so killSession SHOULD have been called
284
+ expect(tmuxMock.killed).toContain("overstory-zombie-agent");
285
+
286
+ // Session persisted as zombie
287
+ const reloaded = readSessionsFromStore(tempRoot);
288
+ expect(reloaded[0]?.state).toBe("zombie");
289
+ });
290
+
291
+ // --- Test 4: progressive nudging for stalled agents ---
292
+
293
+ test("first tick with stalled agent sets stalledSince and stays at level 0 (warn)", async () => {
294
+ const staleActivity = new Date(Date.now() - 60_000).toISOString();
295
+ const session = makeSession({
296
+ agentName: "stalled-agent",
297
+ tmuxSession: "overstory-stalled-agent",
298
+ state: "working",
299
+ lastActivity: staleActivity,
300
+ });
301
+
302
+ writeSessionsToStore(tempRoot, [session]);
303
+
304
+ const tmuxMock = tmuxWithLiveness({ "overstory-stalled-agent": true });
305
+ const checks: HealthCheck[] = [];
306
+ const nudgeMock = nudgeTracker();
307
+
308
+ await runDaemonTick({
309
+ root: tempRoot,
310
+ ...THRESHOLDS,
311
+ nudgeIntervalMs: 60_000,
312
+ onHealthCheck: (c) => checks.push(c),
313
+ _tmux: tmuxMock,
314
+ _triage: triageAlways("extend"),
315
+ _nudge: nudgeMock.nudge,
316
+ });
317
+
318
+ expect(checks).toHaveLength(1);
319
+ expect(checks[0]?.action).toBe("escalate");
320
+
321
+ // No kill at level 0
322
+ expect(tmuxMock.killed).toHaveLength(0);
323
+
324
+ // No nudge at level 0 (warn only)
325
+ expect(nudgeMock.calls).toHaveLength(0);
326
+
327
+ // Session should be stalled with stalledSince set and escalationLevel 0
328
+ const reloaded = readSessionsFromStore(tempRoot);
329
+ expect(reloaded[0]?.state).toBe("stalled");
330
+ expect(reloaded[0]?.escalationLevel).toBe(0);
331
+ expect(reloaded[0]?.stalledSince).not.toBeNull();
332
+ });
333
+
334
+ test("stalled agent at level 1 sends nudge", async () => {
335
+ const staleActivity = new Date(Date.now() - 60_000).toISOString();
336
+ // Pre-set stalledSince to > nudgeIntervalMs ago so level advances to 1
337
+ const stalledSince = new Date(Date.now() - 70_000).toISOString();
338
+ const session = makeSession({
339
+ agentName: "stalled-agent",
340
+ tmuxSession: "overstory-stalled-agent",
341
+ state: "stalled",
342
+ lastActivity: staleActivity,
343
+ escalationLevel: 0,
344
+ stalledSince,
345
+ });
346
+
347
+ writeSessionsToStore(tempRoot, [session]);
348
+
349
+ const tmuxMock = tmuxWithLiveness({ "overstory-stalled-agent": true });
350
+ const nudgeMock = nudgeTracker();
351
+
352
+ await runDaemonTick({
353
+ root: tempRoot,
354
+ ...THRESHOLDS,
355
+ nudgeIntervalMs: 60_000,
356
+ _tmux: tmuxMock,
357
+ _triage: triageAlways("extend"),
358
+ _nudge: nudgeMock.nudge,
359
+ });
360
+
361
+ // Level should advance to 1 and nudge should be sent
362
+ const reloaded = readSessionsFromStore(tempRoot);
363
+ expect(reloaded[0]?.escalationLevel).toBe(1);
364
+ expect(nudgeMock.calls).toHaveLength(1);
365
+ expect(nudgeMock.calls[0]?.agentName).toBe("stalled-agent");
366
+ expect(nudgeMock.calls[0]?.message).toContain("WATCHDOG");
367
+
368
+ // No kill
369
+ expect(tmuxMock.killed).toHaveLength(0);
370
+ });
371
+
372
+ test("stalled agent at level 2 calls triage when tier1Enabled", async () => {
373
+ const staleActivity = new Date(Date.now() - 60_000).toISOString();
374
+ // Pre-set stalledSince to > 2*nudgeIntervalMs ago so level advances to 2
375
+ const stalledSince = new Date(Date.now() - 130_000).toISOString();
376
+ const session = makeSession({
377
+ agentName: "stalled-agent",
378
+ tmuxSession: "overstory-stalled-agent",
379
+ state: "stalled",
380
+ lastActivity: staleActivity,
381
+ escalationLevel: 1,
382
+ stalledSince,
383
+ });
384
+
385
+ writeSessionsToStore(tempRoot, [session]);
386
+
387
+ const tmuxMock = tmuxWithLiveness({ "overstory-stalled-agent": true });
388
+ let triageCalled = false;
389
+
390
+ const triageMock = async (opts: {
391
+ agentName: string;
392
+ root: string;
393
+ lastActivity: string;
394
+ }): Promise<"retry" | "terminate" | "extend"> => {
395
+ triageCalled = true;
396
+ expect(opts.agentName).toBe("stalled-agent");
397
+ return "terminate";
398
+ };
399
+
400
+ await runDaemonTick({
401
+ root: tempRoot,
402
+ ...THRESHOLDS,
403
+ nudgeIntervalMs: 60_000,
404
+ tier1Enabled: true,
405
+ _tmux: tmuxMock,
406
+ _triage: triageMock,
407
+ _nudge: nudgeTracker().nudge,
408
+ });
409
+
410
+ expect(triageCalled).toBe(true);
411
+
412
+ // Triage returned terminate — session should be zombie
413
+ expect(tmuxMock.killed).toContain("overstory-stalled-agent");
414
+ const reloaded = readSessionsFromStore(tempRoot);
415
+ expect(reloaded[0]?.state).toBe("zombie");
416
+ });
417
+
418
+ test("stalled agent at level 2 skips triage when tier1Enabled is false", async () => {
419
+ const staleActivity = new Date(Date.now() - 60_000).toISOString();
420
+ const stalledSince = new Date(Date.now() - 130_000).toISOString();
421
+ const session = makeSession({
422
+ agentName: "stalled-agent",
423
+ tmuxSession: "overstory-stalled-agent",
424
+ state: "stalled",
425
+ lastActivity: staleActivity,
426
+ escalationLevel: 1,
427
+ stalledSince,
428
+ });
429
+
430
+ writeSessionsToStore(tempRoot, [session]);
431
+
432
+ const tmuxMock = tmuxWithLiveness({ "overstory-stalled-agent": true });
433
+ let triageCalled = false;
434
+
435
+ const triageMock = async (): Promise<"retry" | "terminate" | "extend"> => {
436
+ triageCalled = true;
437
+ return "terminate";
438
+ };
439
+
440
+ await runDaemonTick({
441
+ root: tempRoot,
442
+ ...THRESHOLDS,
443
+ nudgeIntervalMs: 60_000,
444
+ tier1Enabled: false, // Triage disabled
445
+ _tmux: tmuxMock,
446
+ _triage: triageMock,
447
+ _nudge: nudgeTracker().nudge,
448
+ });
449
+
450
+ // Triage should NOT have been called
451
+ expect(triageCalled).toBe(false);
452
+
453
+ // No kill — level 2 with tier1 disabled just skips
454
+ expect(tmuxMock.killed).toHaveLength(0);
455
+
456
+ // Session stays stalled at level 2
457
+ const reloaded = readSessionsFromStore(tempRoot);
458
+ expect(reloaded[0]?.state).toBe("stalled");
459
+ expect(reloaded[0]?.escalationLevel).toBe(2);
460
+ });
461
+
462
+ test("stalled agent at level 3 is terminated", async () => {
463
+ const staleActivity = new Date(Date.now() - 60_000).toISOString();
464
+ // Pre-set stalledSince to > 3*nudgeIntervalMs ago so level advances to 3
465
+ const stalledSince = new Date(Date.now() - 200_000).toISOString();
466
+ const session = makeSession({
467
+ agentName: "doomed-agent",
468
+ tmuxSession: "overstory-doomed-agent",
469
+ state: "stalled",
470
+ lastActivity: staleActivity,
471
+ escalationLevel: 2,
472
+ stalledSince,
473
+ });
474
+
475
+ writeSessionsToStore(tempRoot, [session]);
476
+
477
+ const tmuxMock = tmuxWithLiveness({ "overstory-doomed-agent": true });
478
+
479
+ await runDaemonTick({
480
+ root: tempRoot,
481
+ ...THRESHOLDS,
482
+ nudgeIntervalMs: 60_000,
483
+ _tmux: tmuxMock,
484
+ _triage: triageAlways("extend"),
485
+ _nudge: nudgeTracker().nudge,
486
+ });
487
+
488
+ // Level 3 = terminate
489
+ expect(tmuxMock.killed).toContain("overstory-doomed-agent");
490
+
491
+ const reloaded = readSessionsFromStore(tempRoot);
492
+ expect(reloaded[0]?.state).toBe("zombie");
493
+ // Escalation is reset after termination
494
+ expect(reloaded[0]?.escalationLevel).toBe(0);
495
+ expect(reloaded[0]?.stalledSince).toBeNull();
496
+ });
497
+
498
+ test("triage retry sends nudge with recovery message", async () => {
499
+ const staleActivity = new Date(Date.now() - 60_000).toISOString();
500
+ const stalledSince = new Date(Date.now() - 130_000).toISOString();
501
+ const session = makeSession({
502
+ agentName: "retry-agent",
503
+ tmuxSession: "overstory-retry-agent",
504
+ state: "stalled",
505
+ lastActivity: staleActivity,
506
+ escalationLevel: 1,
507
+ stalledSince,
508
+ });
509
+
510
+ writeSessionsToStore(tempRoot, [session]);
511
+
512
+ const tmuxMock = tmuxWithLiveness({ "overstory-retry-agent": true });
513
+ const nudgeMock = nudgeTracker();
514
+
515
+ await runDaemonTick({
516
+ root: tempRoot,
517
+ ...THRESHOLDS,
518
+ nudgeIntervalMs: 60_000,
519
+ tier1Enabled: true,
520
+ _tmux: tmuxMock,
521
+ _triage: triageAlways("retry"),
522
+ _nudge: nudgeMock.nudge,
523
+ });
524
+
525
+ // Triage returned "retry" — nudge should be sent with recovery message
526
+ expect(nudgeMock.calls).toHaveLength(1);
527
+ expect(nudgeMock.calls[0]?.message).toContain("recovery");
528
+
529
+ // No kill
530
+ expect(tmuxMock.killed).toHaveLength(0);
531
+
532
+ // Session stays stalled
533
+ const reloaded = readSessionsFromStore(tempRoot);
534
+ expect(reloaded[0]?.state).toBe("stalled");
535
+ });
536
+
537
+ test("agent recovery resets escalation tracking", async () => {
538
+ // Agent was stalled but now has recent activity
539
+ const session = makeSession({
540
+ agentName: "recovered-agent",
541
+ tmuxSession: "overstory-recovered-agent",
542
+ state: "working",
543
+ lastActivity: new Date().toISOString(), // Recent activity
544
+ escalationLevel: 2,
545
+ stalledSince: new Date(Date.now() - 130_000).toISOString(),
546
+ });
547
+
548
+ writeSessionsToStore(tempRoot, [session]);
549
+
550
+ await runDaemonTick({
551
+ root: tempRoot,
552
+ ...THRESHOLDS,
553
+ _tmux: tmuxAllAlive(),
554
+ _triage: triageAlways("extend"),
555
+ _nudge: nudgeTracker().nudge,
556
+ });
557
+
558
+ // Health check should return action: "none" for recovered agent
559
+ // Escalation tracking should be reset
560
+ const reloaded = readSessionsFromStore(tempRoot);
561
+ expect(reloaded[0]?.state).toBe("working");
562
+ expect(reloaded[0]?.escalationLevel).toBe(0);
563
+ expect(reloaded[0]?.stalledSince).toBeNull();
564
+ });
565
+
566
+ // --- Test 5: session persistence round-trip ---
567
+
568
+ test("session persistence round-trip: load, modify, save, reload", async () => {
569
+ const sessions: AgentSession[] = [
570
+ makeSession({
571
+ id: "session-1",
572
+ agentName: "agent-alpha",
573
+ tmuxSession: "overstory-agent-alpha",
574
+ state: "working",
575
+ lastActivity: new Date().toISOString(),
576
+ }),
577
+ makeSession({
578
+ id: "session-2",
579
+ agentName: "agent-beta",
580
+ tmuxSession: "overstory-agent-beta",
581
+ state: "working",
582
+ // Make beta's tmux dead so it transitions to zombie
583
+ lastActivity: new Date().toISOString(),
584
+ }),
585
+ makeSession({
586
+ id: "session-3",
587
+ agentName: "agent-gamma",
588
+ tmuxSession: "overstory-agent-gamma",
589
+ state: "completed",
590
+ lastActivity: new Date().toISOString(),
591
+ }),
592
+ ];
593
+
594
+ writeSessionsToStore(tempRoot, sessions);
595
+
596
+ const tmuxMock = tmuxWithLiveness({
597
+ "overstory-agent-alpha": true,
598
+ "overstory-agent-beta": false, // Dead — should become zombie
599
+ "overstory-agent-gamma": true, // Doesn't matter — completed is skipped
600
+ });
601
+
602
+ const checks: HealthCheck[] = [];
603
+
604
+ await runDaemonTick({
605
+ root: tempRoot,
606
+ ...THRESHOLDS,
607
+ onHealthCheck: (c) => checks.push(c),
608
+ _tmux: tmuxMock,
609
+ _triage: triageAlways("extend"),
610
+ });
611
+
612
+ // Completed sessions are skipped — only 2 health checks
613
+ expect(checks).toHaveLength(2);
614
+
615
+ // Reload and verify persistence
616
+ const reloaded = readSessionsFromStore(tempRoot);
617
+ expect(reloaded).toHaveLength(3);
618
+
619
+ const alpha = reloaded.find((s) => s.agentName === "agent-alpha");
620
+ const beta = reloaded.find((s) => s.agentName === "agent-beta");
621
+ const gamma = reloaded.find((s) => s.agentName === "agent-gamma");
622
+
623
+ expect(alpha).toBeDefined();
624
+ expect(beta).toBeDefined();
625
+ expect(gamma).toBeDefined();
626
+
627
+ // Alpha: tmux alive + recent activity — stays working
628
+ expect(alpha?.state).toBe("working");
629
+
630
+ // Beta: tmux dead — zombie (ZFC rule 1)
631
+ expect(beta?.state).toBe("zombie");
632
+
633
+ // Gamma: completed — unchanged (skipped by daemon)
634
+ expect(gamma?.state).toBe("completed");
635
+ });
636
+
637
+ test("session persistence: state unchanged when nothing changes", async () => {
638
+ const session = makeSession({
639
+ state: "working",
640
+ lastActivity: new Date().toISOString(),
641
+ });
642
+
643
+ writeSessionsToStore(tempRoot, [session]);
644
+
645
+ await runDaemonTick({
646
+ root: tempRoot,
647
+ ...THRESHOLDS,
648
+ _tmux: tmuxAllAlive(),
649
+ _triage: triageAlways("extend"),
650
+ });
651
+
652
+ // Session state should remain unchanged since nothing triggered a transition
653
+ const reloaded = readSessionsFromStore(tempRoot);
654
+ expect(reloaded).toHaveLength(1);
655
+ expect(reloaded[0]?.state).toBe("working");
656
+ });
657
+
658
+ // --- Edge cases ---
659
+
660
+ test("completed sessions are skipped entirely", async () => {
661
+ const session = makeSession({ state: "completed" });
662
+
663
+ writeSessionsToStore(tempRoot, [session]);
664
+
665
+ const checks: HealthCheck[] = [];
666
+
667
+ await runDaemonTick({
668
+ root: tempRoot,
669
+ ...THRESHOLDS,
670
+ onHealthCheck: (c) => checks.push(c),
671
+ _tmux: tmuxAllDead(), // Would be zombie if not skipped
672
+ _triage: triageAlways("extend"),
673
+ });
674
+
675
+ // No health checks emitted for completed sessions
676
+ expect(checks).toHaveLength(0);
677
+
678
+ // State unchanged
679
+ const reloaded = readSessionsFromStore(tempRoot);
680
+ expect(reloaded[0]?.state).toBe("completed");
681
+ });
682
+
683
+ test("multiple sessions with mixed states are all processed", async () => {
684
+ const now = Date.now();
685
+ const sessions: AgentSession[] = [
686
+ makeSession({
687
+ id: "s1",
688
+ agentName: "healthy",
689
+ tmuxSession: "overstory-healthy",
690
+ state: "working",
691
+ lastActivity: new Date(now).toISOString(),
692
+ }),
693
+ makeSession({
694
+ id: "s2",
695
+ agentName: "dying",
696
+ tmuxSession: "overstory-dying",
697
+ state: "working",
698
+ lastActivity: new Date(now).toISOString(),
699
+ }),
700
+ makeSession({
701
+ id: "s3",
702
+ agentName: "stale",
703
+ tmuxSession: "overstory-stale",
704
+ state: "working",
705
+ lastActivity: new Date(now - 60_000).toISOString(),
706
+ }),
707
+ makeSession({
708
+ id: "s4",
709
+ agentName: "done",
710
+ tmuxSession: "overstory-done",
711
+ state: "completed",
712
+ }),
713
+ ];
714
+
715
+ writeSessionsToStore(tempRoot, sessions);
716
+
717
+ const tmuxMock = tmuxWithLiveness({
718
+ "overstory-healthy": true,
719
+ "overstory-dying": false,
720
+ "overstory-stale": true,
721
+ "overstory-done": false,
722
+ });
723
+
724
+ const checks: HealthCheck[] = [];
725
+
726
+ await runDaemonTick({
727
+ root: tempRoot,
728
+ ...THRESHOLDS,
729
+ onHealthCheck: (c) => checks.push(c),
730
+ _tmux: tmuxMock,
731
+ _triage: triageAlways("extend"),
732
+ _nudge: nudgeTracker().nudge,
733
+ });
734
+
735
+ // 3 non-completed sessions processed
736
+ expect(checks).toHaveLength(3);
737
+
738
+ const reloaded = readSessionsFromStore(tempRoot);
739
+
740
+ const healthy = reloaded.find((s) => s.agentName === "healthy");
741
+ const dying = reloaded.find((s) => s.agentName === "dying");
742
+ const stale = reloaded.find((s) => s.agentName === "stale");
743
+ const done = reloaded.find((s) => s.agentName === "done");
744
+
745
+ expect(healthy?.state).toBe("working");
746
+ expect(dying?.state).toBe("zombie");
747
+ expect(stale?.state).toBe("stalled");
748
+ expect(done?.state).toBe("completed");
749
+ });
750
+
751
+ test("empty sessions array is a no-op", async () => {
752
+ writeSessionsToStore(tempRoot, []);
753
+
754
+ const checks: HealthCheck[] = [];
755
+
756
+ await runDaemonTick({
757
+ root: tempRoot,
758
+ ...THRESHOLDS,
759
+ onHealthCheck: (c) => checks.push(c),
760
+ _tmux: tmuxAllAlive(),
761
+ _triage: triageAlways("extend"),
762
+ });
763
+
764
+ expect(checks).toHaveLength(0);
765
+ });
766
+
767
+ test("booting session with recent activity transitions to working", async () => {
768
+ const session = makeSession({
769
+ state: "booting",
770
+ lastActivity: new Date().toISOString(),
771
+ });
772
+
773
+ writeSessionsToStore(tempRoot, [session]);
774
+
775
+ const checks: HealthCheck[] = [];
776
+
777
+ await runDaemonTick({
778
+ root: tempRoot,
779
+ ...THRESHOLDS,
780
+ onHealthCheck: (c) => checks.push(c),
781
+ _tmux: tmuxAllAlive(),
782
+ _triage: triageAlways("extend"),
783
+ });
784
+
785
+ expect(checks).toHaveLength(1);
786
+ expect(checks[0]?.state).toBe("working");
787
+
788
+ const reloaded = readSessionsFromStore(tempRoot);
789
+ expect(reloaded[0]?.state).toBe("working");
790
+ });
791
+
792
+ // --- Backward compatibility ---
793
+
794
+ test("sessions with default escalation fields are processed correctly", async () => {
795
+ // Write a session with default (zero) escalation fields
796
+ const session = makeSession({
797
+ id: "session-old",
798
+ agentName: "old-agent",
799
+ worktreePath: "/tmp/test",
800
+ branchName: "overstory/old-agent/task",
801
+ beadId: "task",
802
+ tmuxSession: "overstory-old-agent",
803
+ state: "working",
804
+ pid: process.pid,
805
+ escalationLevel: 0,
806
+ stalledSince: null,
807
+ });
808
+
809
+ writeSessionsToStore(tempRoot, [session]);
810
+
811
+ const checks: HealthCheck[] = [];
812
+
813
+ await runDaemonTick({
814
+ root: tempRoot,
815
+ ...THRESHOLDS,
816
+ onHealthCheck: (c) => checks.push(c),
817
+ _tmux: tmuxAllAlive(),
818
+ _triage: triageAlways("extend"),
819
+ });
820
+
821
+ // Should process without errors
822
+ expect(checks).toHaveLength(1);
823
+ expect(checks[0]?.state).toBe("working");
824
+ });
825
+ });
826
+
827
+ // === Event recording tests ===
828
+
829
+ describe("daemon event recording", () => {
830
+ /** Open the events.db in the temp root and return all events. */
831
+ function readEvents(root: string): StoredEvent[] {
832
+ const dbPath = join(root, ".overstory", "events.db");
833
+ const store = createEventStore(dbPath);
834
+ try {
835
+ // Get all events (no agent filter — use a broad timeline)
836
+ return store.getTimeline({ since: "2000-01-01T00:00:00Z" });
837
+ } finally {
838
+ store.close();
839
+ }
840
+ }
841
+
842
+ test("escalation level 0 (warn) records event with type=escalation", async () => {
843
+ const staleActivity = new Date(Date.now() - 60_000).toISOString();
844
+ const session = makeSession({
845
+ agentName: "stalled-agent",
846
+ tmuxSession: "overstory-stalled-agent",
847
+ state: "working",
848
+ lastActivity: staleActivity,
849
+ });
850
+
851
+ writeSessionsToStore(tempRoot, [session]);
852
+
853
+ // Create EventStore and inject it
854
+ const eventsDbPath = join(tempRoot, ".overstory", "events.db");
855
+ const eventStore = createEventStore(eventsDbPath);
856
+
857
+ try {
858
+ await runDaemonTick({
859
+ root: tempRoot,
860
+ ...THRESHOLDS,
861
+ nudgeIntervalMs: 60_000,
862
+ _tmux: tmuxWithLiveness({ "overstory-stalled-agent": true }),
863
+ _triage: triageAlways("extend"),
864
+ _nudge: nudgeTracker().nudge,
865
+ _eventStore: eventStore,
866
+ });
867
+ } finally {
868
+ eventStore.close();
869
+ }
870
+
871
+ const events = readEvents(tempRoot);
872
+ expect(events.length).toBeGreaterThanOrEqual(1);
873
+
874
+ const warnEvent = events.find((e) => {
875
+ if (!e.data) return false;
876
+ const data = JSON.parse(e.data) as Record<string, unknown>;
877
+ return data.type === "escalation" && data.escalationLevel === 0;
878
+ });
879
+ expect(warnEvent).toBeDefined();
880
+ expect(warnEvent?.eventType).toBe("custom");
881
+ expect(warnEvent?.level).toBe("warn");
882
+ expect(warnEvent?.agentName).toBe("stalled-agent");
883
+ });
884
+
885
+ test("escalation level 1 (nudge) records event with delivered status", async () => {
886
+ const staleActivity = new Date(Date.now() - 60_000).toISOString();
887
+ const stalledSince = new Date(Date.now() - 70_000).toISOString();
888
+ const session = makeSession({
889
+ agentName: "stalled-agent",
890
+ tmuxSession: "overstory-stalled-agent",
891
+ state: "stalled",
892
+ lastActivity: staleActivity,
893
+ escalationLevel: 0,
894
+ stalledSince,
895
+ });
896
+
897
+ writeSessionsToStore(tempRoot, [session]);
898
+
899
+ const eventsDbPath = join(tempRoot, ".overstory", "events.db");
900
+ const eventStore = createEventStore(eventsDbPath);
901
+ const nudgeMock = nudgeTracker();
902
+
903
+ try {
904
+ await runDaemonTick({
905
+ root: tempRoot,
906
+ ...THRESHOLDS,
907
+ nudgeIntervalMs: 60_000,
908
+ _tmux: tmuxWithLiveness({ "overstory-stalled-agent": true }),
909
+ _triage: triageAlways("extend"),
910
+ _nudge: nudgeMock.nudge,
911
+ _eventStore: eventStore,
912
+ });
913
+ } finally {
914
+ eventStore.close();
915
+ }
916
+
917
+ const events = readEvents(tempRoot);
918
+ const nudgeEvent = events.find((e) => {
919
+ if (!e.data) return false;
920
+ const data = JSON.parse(e.data) as Record<string, unknown>;
921
+ return data.type === "nudge" && data.escalationLevel === 1;
922
+ });
923
+ expect(nudgeEvent).toBeDefined();
924
+ expect(nudgeEvent?.eventType).toBe("custom");
925
+ expect(nudgeEvent?.level).toBe("warn");
926
+
927
+ const nudgeData = JSON.parse(nudgeEvent?.data ?? "{}") as Record<string, unknown>;
928
+ expect(nudgeData.delivered).toBe(true);
929
+ });
930
+
931
+ test("escalation level 2 (triage) records event with verdict", async () => {
932
+ const staleActivity = new Date(Date.now() - 60_000).toISOString();
933
+ const stalledSince = new Date(Date.now() - 130_000).toISOString();
934
+ const session = makeSession({
935
+ agentName: "stalled-agent",
936
+ tmuxSession: "overstory-stalled-agent",
937
+ state: "stalled",
938
+ lastActivity: staleActivity,
939
+ escalationLevel: 1,
940
+ stalledSince,
941
+ });
942
+
943
+ writeSessionsToStore(tempRoot, [session]);
944
+
945
+ const eventsDbPath = join(tempRoot, ".overstory", "events.db");
946
+ const eventStore = createEventStore(eventsDbPath);
947
+
948
+ try {
949
+ await runDaemonTick({
950
+ root: tempRoot,
951
+ ...THRESHOLDS,
952
+ nudgeIntervalMs: 60_000,
953
+ tier1Enabled: true,
954
+ _tmux: tmuxWithLiveness({ "overstory-stalled-agent": true }),
955
+ _triage: triageAlways("extend"),
956
+ _nudge: nudgeTracker().nudge,
957
+ _eventStore: eventStore,
958
+ });
959
+ } finally {
960
+ eventStore.close();
961
+ }
962
+
963
+ const events = readEvents(tempRoot);
964
+ const triageEvent = events.find((e) => {
965
+ if (!e.data) return false;
966
+ const data = JSON.parse(e.data) as Record<string, unknown>;
967
+ return data.type === "triage" && data.escalationLevel === 2;
968
+ });
969
+ expect(triageEvent).toBeDefined();
970
+ expect(triageEvent?.eventType).toBe("custom");
971
+ expect(triageEvent?.level).toBe("warn");
972
+
973
+ const triageData = JSON.parse(triageEvent?.data ?? "{}") as Record<string, unknown>;
974
+ expect(triageData.verdict).toBe("extend");
975
+ });
976
+
977
+ test("escalation level 3 (terminate) records event with level=error", async () => {
978
+ const staleActivity = new Date(Date.now() - 60_000).toISOString();
979
+ const stalledSince = new Date(Date.now() - 200_000).toISOString();
980
+ const session = makeSession({
981
+ agentName: "doomed-agent",
982
+ tmuxSession: "overstory-doomed-agent",
983
+ state: "stalled",
984
+ lastActivity: staleActivity,
985
+ escalationLevel: 2,
986
+ stalledSince,
987
+ });
988
+
989
+ writeSessionsToStore(tempRoot, [session]);
990
+
991
+ const eventsDbPath = join(tempRoot, ".overstory", "events.db");
992
+ const eventStore = createEventStore(eventsDbPath);
993
+
994
+ try {
995
+ await runDaemonTick({
996
+ root: tempRoot,
997
+ ...THRESHOLDS,
998
+ nudgeIntervalMs: 60_000,
999
+ _tmux: tmuxWithLiveness({ "overstory-doomed-agent": true }),
1000
+ _triage: triageAlways("extend"),
1001
+ _nudge: nudgeTracker().nudge,
1002
+ _eventStore: eventStore,
1003
+ });
1004
+ } finally {
1005
+ eventStore.close();
1006
+ }
1007
+
1008
+ const events = readEvents(tempRoot);
1009
+ const terminateEvent = events.find((e) => {
1010
+ if (!e.data) return false;
1011
+ const data = JSON.parse(e.data) as Record<string, unknown>;
1012
+ return data.type === "escalation" && data.escalationLevel === 3;
1013
+ });
1014
+ expect(terminateEvent).toBeDefined();
1015
+ expect(terminateEvent?.eventType).toBe("custom");
1016
+ expect(terminateEvent?.level).toBe("error");
1017
+
1018
+ const terminateData = JSON.parse(terminateEvent?.data ?? "{}") as Record<string, unknown>;
1019
+ expect(terminateData.action).toBe("terminate");
1020
+ });
1021
+
1022
+ test("run_id is included in events when current-run.txt exists", async () => {
1023
+ const staleActivity = new Date(Date.now() - 60_000).toISOString();
1024
+ const session = makeSession({
1025
+ agentName: "stalled-agent",
1026
+ tmuxSession: "overstory-stalled-agent",
1027
+ state: "working",
1028
+ lastActivity: staleActivity,
1029
+ });
1030
+
1031
+ writeSessionsToStore(tempRoot, [session]);
1032
+
1033
+ // Write a current-run.txt
1034
+ const runId = "run-2026-02-13T10-00-00-000Z";
1035
+ await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
1036
+
1037
+ const eventsDbPath = join(tempRoot, ".overstory", "events.db");
1038
+ const eventStore = createEventStore(eventsDbPath);
1039
+
1040
+ try {
1041
+ await runDaemonTick({
1042
+ root: tempRoot,
1043
+ ...THRESHOLDS,
1044
+ nudgeIntervalMs: 60_000,
1045
+ _tmux: tmuxWithLiveness({ "overstory-stalled-agent": true }),
1046
+ _triage: triageAlways("extend"),
1047
+ _nudge: nudgeTracker().nudge,
1048
+ _eventStore: eventStore,
1049
+ });
1050
+ } finally {
1051
+ eventStore.close();
1052
+ }
1053
+
1054
+ const events = readEvents(tempRoot);
1055
+ expect(events.length).toBeGreaterThanOrEqual(1);
1056
+ const event = events[0];
1057
+ expect(event?.runId).toBe(runId);
1058
+ });
1059
+
1060
+ test("daemon continues normally when _eventStore is null", async () => {
1061
+ const staleActivity = new Date(Date.now() - 60_000).toISOString();
1062
+ const session = makeSession({
1063
+ agentName: "stalled-agent",
1064
+ tmuxSession: "overstory-stalled-agent",
1065
+ state: "working",
1066
+ lastActivity: staleActivity,
1067
+ });
1068
+
1069
+ writeSessionsToStore(tempRoot, [session]);
1070
+
1071
+ const checks: HealthCheck[] = [];
1072
+
1073
+ // Inject null EventStore — daemon should still work fine
1074
+ await runDaemonTick({
1075
+ root: tempRoot,
1076
+ ...THRESHOLDS,
1077
+ nudgeIntervalMs: 60_000,
1078
+ onHealthCheck: (c) => checks.push(c),
1079
+ _tmux: tmuxWithLiveness({ "overstory-stalled-agent": true }),
1080
+ _triage: triageAlways("extend"),
1081
+ _nudge: nudgeTracker().nudge,
1082
+ _eventStore: null,
1083
+ });
1084
+
1085
+ // Daemon should still produce health checks even without EventStore
1086
+ expect(checks).toHaveLength(1);
1087
+ expect(checks[0]?.action).toBe("escalate");
1088
+ });
1089
+ });
1090
+
1091
+ // === Mulch failure recording tests ===
1092
+
1093
+ describe("daemon mulch failure recording", () => {
1094
+ let tempRoot: string;
1095
+
1096
+ beforeEach(async () => {
1097
+ tempRoot = await createTempRoot();
1098
+ });
1099
+
1100
+ afterEach(async () => {
1101
+ await rm(tempRoot, { recursive: true, force: true });
1102
+ });
1103
+
1104
+ /** Track calls to the recordFailure mock. */
1105
+ interface FailureRecord {
1106
+ root: string;
1107
+ session: AgentSession;
1108
+ reason: string;
1109
+ tier: 0 | 1;
1110
+ triageSuggestion?: string;
1111
+ }
1112
+
1113
+ function failureTracker(): {
1114
+ calls: FailureRecord[];
1115
+ recordFailure: (
1116
+ root: string,
1117
+ session: AgentSession,
1118
+ reason: string,
1119
+ tier: 0 | 1,
1120
+ triageSuggestion?: string,
1121
+ ) => Promise<void>;
1122
+ } {
1123
+ const calls: FailureRecord[] = [];
1124
+ return {
1125
+ calls,
1126
+ async recordFailure(root, session, reason, tier, triageSuggestion) {
1127
+ calls.push({ root, session, reason, tier, triageSuggestion });
1128
+ },
1129
+ };
1130
+ }
1131
+
1132
+ test("Tier 0: recordFailure called when action=terminate (process death)", async () => {
1133
+ const session = makeSession({
1134
+ agentName: "dying-agent",
1135
+ capability: "builder",
1136
+ beadId: "task-123",
1137
+ tmuxSession: "overstory-dying-agent",
1138
+ state: "working",
1139
+ lastActivity: new Date().toISOString(),
1140
+ });
1141
+
1142
+ writeSessionsToStore(tempRoot, [session]);
1143
+
1144
+ const tmuxMock = tmuxWithLiveness({ "overstory-dying-agent": false });
1145
+ const failureMock = failureTracker();
1146
+
1147
+ await runDaemonTick({
1148
+ root: tempRoot,
1149
+ ...THRESHOLDS,
1150
+ _tmux: tmuxMock,
1151
+ _triage: triageAlways("extend"),
1152
+ _nudge: nudgeTracker().nudge,
1153
+ _recordFailure: failureMock.recordFailure,
1154
+ });
1155
+
1156
+ // recordFailure should be called with Tier 0
1157
+ expect(failureMock.calls).toHaveLength(1);
1158
+ expect(failureMock.calls[0]?.tier).toBe(0);
1159
+ expect(failureMock.calls[0]?.session.agentName).toBe("dying-agent");
1160
+ expect(failureMock.calls[0]?.session.capability).toBe("builder");
1161
+ expect(failureMock.calls[0]?.session.beadId).toBe("task-123");
1162
+ // Reason should be either the reconciliationNote or default "Process terminated"
1163
+ expect(failureMock.calls[0]?.reason).toBeDefined();
1164
+ });
1165
+
1166
+ test("Tier 1: recordFailure called when triage returns terminate", async () => {
1167
+ const staleActivity = new Date(Date.now() - 60_000).toISOString();
1168
+ const stalledSince = new Date(Date.now() - 130_000).toISOString();
1169
+ const session = makeSession({
1170
+ agentName: "triaged-agent",
1171
+ capability: "scout",
1172
+ beadId: "task-456",
1173
+ tmuxSession: "overstory-triaged-agent",
1174
+ state: "stalled",
1175
+ lastActivity: staleActivity,
1176
+ escalationLevel: 1,
1177
+ stalledSince,
1178
+ });
1179
+
1180
+ writeSessionsToStore(tempRoot, [session]);
1181
+
1182
+ const tmuxMock = tmuxWithLiveness({ "overstory-triaged-agent": true });
1183
+ const failureMock = failureTracker();
1184
+
1185
+ await runDaemonTick({
1186
+ root: tempRoot,
1187
+ ...THRESHOLDS,
1188
+ nudgeIntervalMs: 60_000,
1189
+ tier1Enabled: true,
1190
+ _tmux: tmuxMock,
1191
+ _triage: triageAlways("terminate"),
1192
+ _nudge: nudgeTracker().nudge,
1193
+ _recordFailure: failureMock.recordFailure,
1194
+ });
1195
+
1196
+ // recordFailure should be called with Tier 1 and triage verdict
1197
+ expect(failureMock.calls).toHaveLength(1);
1198
+ expect(failureMock.calls[0]?.tier).toBe(1);
1199
+ expect(failureMock.calls[0]?.session.agentName).toBe("triaged-agent");
1200
+ expect(failureMock.calls[0]?.session.capability).toBe("scout");
1201
+ expect(failureMock.calls[0]?.session.beadId).toBe("task-456");
1202
+ expect(failureMock.calls[0]?.triageSuggestion).toBe("terminate");
1203
+ expect(failureMock.calls[0]?.reason).toContain("AI triage");
1204
+ });
1205
+
1206
+ test("recordFailure not called when triage returns retry", async () => {
1207
+ const staleActivity = new Date(Date.now() - 60_000).toISOString();
1208
+ const stalledSince = new Date(Date.now() - 130_000).toISOString();
1209
+ const session = makeSession({
1210
+ agentName: "retry-agent",
1211
+ tmuxSession: "overstory-retry-agent",
1212
+ state: "stalled",
1213
+ lastActivity: staleActivity,
1214
+ escalationLevel: 1,
1215
+ stalledSince,
1216
+ });
1217
+
1218
+ writeSessionsToStore(tempRoot, [session]);
1219
+
1220
+ const tmuxMock = tmuxWithLiveness({ "overstory-retry-agent": true });
1221
+ const failureMock = failureTracker();
1222
+
1223
+ await runDaemonTick({
1224
+ root: tempRoot,
1225
+ ...THRESHOLDS,
1226
+ nudgeIntervalMs: 60_000,
1227
+ tier1Enabled: true,
1228
+ _tmux: tmuxMock,
1229
+ _triage: triageAlways("retry"),
1230
+ _nudge: nudgeTracker().nudge,
1231
+ _recordFailure: failureMock.recordFailure,
1232
+ });
1233
+
1234
+ // recordFailure should NOT be called for retry verdict
1235
+ expect(failureMock.calls).toHaveLength(0);
1236
+ });
1237
+
1238
+ test("recordFailure not called when triage returns extend", async () => {
1239
+ const staleActivity = new Date(Date.now() - 60_000).toISOString();
1240
+ const stalledSince = new Date(Date.now() - 130_000).toISOString();
1241
+ const session = makeSession({
1242
+ agentName: "extend-agent",
1243
+ tmuxSession: "overstory-extend-agent",
1244
+ state: "stalled",
1245
+ lastActivity: staleActivity,
1246
+ escalationLevel: 1,
1247
+ stalledSince,
1248
+ });
1249
+
1250
+ writeSessionsToStore(tempRoot, [session]);
1251
+
1252
+ const tmuxMock = tmuxWithLiveness({ "overstory-extend-agent": true });
1253
+ const failureMock = failureTracker();
1254
+
1255
+ await runDaemonTick({
1256
+ root: tempRoot,
1257
+ ...THRESHOLDS,
1258
+ nudgeIntervalMs: 60_000,
1259
+ tier1Enabled: true,
1260
+ _tmux: tmuxMock,
1261
+ _triage: triageAlways("extend"),
1262
+ _nudge: nudgeTracker().nudge,
1263
+ _recordFailure: failureMock.recordFailure,
1264
+ });
1265
+
1266
+ // recordFailure should NOT be called for extend verdict
1267
+ expect(failureMock.calls).toHaveLength(0);
1268
+ });
1269
+
1270
+ test("recordFailure includes evidenceBead when beadId is present", async () => {
1271
+ const session = makeSession({
1272
+ agentName: "beaded-agent",
1273
+ capability: "builder",
1274
+ beadId: "task-789",
1275
+ tmuxSession: "overstory-beaded-agent",
1276
+ state: "working",
1277
+ lastActivity: new Date().toISOString(),
1278
+ });
1279
+
1280
+ writeSessionsToStore(tempRoot, [session]);
1281
+
1282
+ const tmuxMock = tmuxWithLiveness({ "overstory-beaded-agent": false });
1283
+ const failureMock = failureTracker();
1284
+
1285
+ await runDaemonTick({
1286
+ root: tempRoot,
1287
+ ...THRESHOLDS,
1288
+ _tmux: tmuxMock,
1289
+ _triage: triageAlways("extend"),
1290
+ _nudge: nudgeTracker().nudge,
1291
+ _recordFailure: failureMock.recordFailure,
1292
+ });
1293
+
1294
+ expect(failureMock.calls).toHaveLength(1);
1295
+ expect(failureMock.calls[0]?.session.beadId).toBe("task-789");
1296
+ });
1297
+
1298
+ test("Tier 0: recordFailure called at escalation level 3+ (progressive termination)", async () => {
1299
+ const staleActivity = new Date(Date.now() - 60_000).toISOString();
1300
+ const stalledSince = new Date(Date.now() - 200_000).toISOString();
1301
+ const session = makeSession({
1302
+ agentName: "doomed-agent",
1303
+ capability: "builder",
1304
+ beadId: "task-999",
1305
+ tmuxSession: "overstory-doomed-agent",
1306
+ state: "stalled",
1307
+ lastActivity: staleActivity,
1308
+ escalationLevel: 2,
1309
+ stalledSince,
1310
+ });
1311
+
1312
+ writeSessionsToStore(tempRoot, [session]);
1313
+
1314
+ const tmuxMock = tmuxWithLiveness({ "overstory-doomed-agent": true });
1315
+ const failureMock = failureTracker();
1316
+
1317
+ await runDaemonTick({
1318
+ root: tempRoot,
1319
+ ...THRESHOLDS,
1320
+ nudgeIntervalMs: 60_000,
1321
+ _tmux: tmuxMock,
1322
+ _triage: triageAlways("extend"),
1323
+ _nudge: nudgeTracker().nudge,
1324
+ _recordFailure: failureMock.recordFailure,
1325
+ });
1326
+
1327
+ // recordFailure should be called with Tier 0 for progressive escalation
1328
+ expect(failureMock.calls).toHaveLength(1);
1329
+ expect(failureMock.calls[0]?.tier).toBe(0);
1330
+ expect(failureMock.calls[0]?.session.agentName).toBe("doomed-agent");
1331
+ expect(failureMock.calls[0]?.reason).toContain("Progressive escalation");
1332
+ });
1333
+ });
1334
+
1335
+ // === Run completion detection tests ===
1336
+
1337
+ describe("run completion detection", () => {
1338
+ const runId = "run-2026-02-18T15-00-00-000Z";
1339
+
1340
+ test("nudges coordinator when all workers completed", async () => {
1341
+ const sessions = [
1342
+ makeSession({
1343
+ id: "s1",
1344
+ agentName: "builder-one",
1345
+ capability: "builder",
1346
+ tmuxSession: "overstory-agent-fake-builder-one",
1347
+ state: "completed",
1348
+ runId,
1349
+ lastActivity: new Date().toISOString(),
1350
+ }),
1351
+ makeSession({
1352
+ id: "s2",
1353
+ agentName: "builder-two",
1354
+ capability: "builder",
1355
+ tmuxSession: "overstory-agent-fake-builder-two",
1356
+ state: "completed",
1357
+ runId,
1358
+ lastActivity: new Date().toISOString(),
1359
+ }),
1360
+ makeSession({
1361
+ id: "s3",
1362
+ agentName: "coordinator",
1363
+ capability: "coordinator",
1364
+ tmuxSession: "overstory-agent-fake-coordinator",
1365
+ state: "working",
1366
+ runId,
1367
+ lastActivity: new Date().toISOString(),
1368
+ }),
1369
+ ];
1370
+
1371
+ writeSessionsToStore(tempRoot, sessions);
1372
+ await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
1373
+
1374
+ const nudgeMock = nudgeTracker();
1375
+
1376
+ await runDaemonTick({
1377
+ root: tempRoot,
1378
+ ...THRESHOLDS,
1379
+ _tmux: tmuxAllAlive(),
1380
+ _triage: triageAlways("extend"),
1381
+ _nudge: nudgeMock.nudge,
1382
+ _eventStore: null,
1383
+ });
1384
+
1385
+ // Filter to only run-completion nudges targeting the coordinator
1386
+ const coordinatorNudges = nudgeMock.calls.filter(
1387
+ (c) => c.agentName === "coordinator" && c.message.includes("WATCHDOG"),
1388
+ );
1389
+ expect(coordinatorNudges).toHaveLength(1);
1390
+ // The test creates builders, so the message should be builder-specific
1391
+ expect(coordinatorNudges[0]?.message).toContain("builder");
1392
+ expect(coordinatorNudges[0]?.message).toContain("merge/cleanup");
1393
+ });
1394
+
1395
+ test("does not nudge when some workers still active", async () => {
1396
+ const sessions = [
1397
+ makeSession({
1398
+ id: "s1",
1399
+ agentName: "builder-one",
1400
+ capability: "builder",
1401
+ tmuxSession: "overstory-agent-fake-builder-one",
1402
+ state: "completed",
1403
+ runId,
1404
+ lastActivity: new Date().toISOString(),
1405
+ }),
1406
+ makeSession({
1407
+ id: "s2",
1408
+ agentName: "builder-two",
1409
+ capability: "builder",
1410
+ tmuxSession: "overstory-agent-fake-builder-two",
1411
+ state: "working",
1412
+ runId,
1413
+ lastActivity: new Date().toISOString(),
1414
+ }),
1415
+ ];
1416
+
1417
+ writeSessionsToStore(tempRoot, sessions);
1418
+ await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
1419
+
1420
+ const nudgeMock = nudgeTracker();
1421
+
1422
+ await runDaemonTick({
1423
+ root: tempRoot,
1424
+ ...THRESHOLDS,
1425
+ _tmux: tmuxAllAlive(),
1426
+ _triage: triageAlways("extend"),
1427
+ _nudge: nudgeMock.nudge,
1428
+ _eventStore: null,
1429
+ });
1430
+
1431
+ const coordinatorNudges = nudgeMock.calls.filter(
1432
+ (c) => c.agentName === "coordinator" && c.message.includes("worker"),
1433
+ );
1434
+ expect(coordinatorNudges).toHaveLength(0);
1435
+ });
1436
+
1437
+ test("does not nudge when already notified (dedup marker)", async () => {
1438
+ const sessions = [
1439
+ makeSession({
1440
+ id: "s1",
1441
+ agentName: "builder-one",
1442
+ capability: "builder",
1443
+ tmuxSession: "overstory-agent-fake-builder-one",
1444
+ state: "completed",
1445
+ runId,
1446
+ lastActivity: new Date().toISOString(),
1447
+ }),
1448
+ makeSession({
1449
+ id: "s2",
1450
+ agentName: "builder-two",
1451
+ capability: "builder",
1452
+ tmuxSession: "overstory-agent-fake-builder-two",
1453
+ state: "completed",
1454
+ runId,
1455
+ lastActivity: new Date().toISOString(),
1456
+ }),
1457
+ ];
1458
+
1459
+ writeSessionsToStore(tempRoot, sessions);
1460
+ await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
1461
+ // Pre-write dedup marker
1462
+ await Bun.write(join(tempRoot, ".overstory", "run-complete-notified.txt"), runId);
1463
+
1464
+ const nudgeMock = nudgeTracker();
1465
+
1466
+ await runDaemonTick({
1467
+ root: tempRoot,
1468
+ ...THRESHOLDS,
1469
+ _tmux: tmuxAllAlive(),
1470
+ _triage: triageAlways("extend"),
1471
+ _nudge: nudgeMock.nudge,
1472
+ _eventStore: null,
1473
+ });
1474
+
1475
+ const coordinatorNudges = nudgeMock.calls.filter(
1476
+ (c) => c.agentName === "coordinator" && c.message.includes("worker"),
1477
+ );
1478
+ expect(coordinatorNudges).toHaveLength(0);
1479
+ });
1480
+
1481
+ test("skips completion check when no run ID", async () => {
1482
+ const sessions = [
1483
+ makeSession({
1484
+ id: "s1",
1485
+ agentName: "builder-one",
1486
+ capability: "builder",
1487
+ tmuxSession: "overstory-agent-fake-builder-one",
1488
+ state: "completed",
1489
+ runId,
1490
+ lastActivity: new Date().toISOString(),
1491
+ }),
1492
+ makeSession({
1493
+ id: "s2",
1494
+ agentName: "builder-two",
1495
+ capability: "builder",
1496
+ tmuxSession: "overstory-agent-fake-builder-two",
1497
+ state: "completed",
1498
+ runId,
1499
+ lastActivity: new Date().toISOString(),
1500
+ }),
1501
+ ];
1502
+
1503
+ writeSessionsToStore(tempRoot, sessions);
1504
+ // Do NOT write current-run.txt
1505
+
1506
+ const nudgeMock = nudgeTracker();
1507
+
1508
+ await runDaemonTick({
1509
+ root: tempRoot,
1510
+ ...THRESHOLDS,
1511
+ _tmux: tmuxAllAlive(),
1512
+ _triage: triageAlways("extend"),
1513
+ _nudge: nudgeMock.nudge,
1514
+ _eventStore: null,
1515
+ });
1516
+
1517
+ const coordinatorNudges = nudgeMock.calls.filter(
1518
+ (c) => c.agentName === "coordinator" && c.message.includes("worker"),
1519
+ );
1520
+ expect(coordinatorNudges).toHaveLength(0);
1521
+ });
1522
+
1523
+ test("ignores coordinator and monitor sessions for completion check", async () => {
1524
+ const sessions = [
1525
+ makeSession({
1526
+ id: "s1",
1527
+ agentName: "coordinator",
1528
+ capability: "coordinator",
1529
+ tmuxSession: "overstory-agent-fake-coordinator",
1530
+ state: "working",
1531
+ runId,
1532
+ lastActivity: new Date().toISOString(),
1533
+ }),
1534
+ makeSession({
1535
+ id: "s2",
1536
+ agentName: "monitor",
1537
+ capability: "monitor",
1538
+ tmuxSession: "overstory-agent-fake-monitor",
1539
+ state: "working",
1540
+ runId,
1541
+ lastActivity: new Date().toISOString(),
1542
+ }),
1543
+ makeSession({
1544
+ id: "s3",
1545
+ agentName: "builder-one",
1546
+ capability: "builder",
1547
+ tmuxSession: "overstory-agent-fake-builder-one",
1548
+ state: "completed",
1549
+ runId,
1550
+ lastActivity: new Date().toISOString(),
1551
+ }),
1552
+ makeSession({
1553
+ id: "s4",
1554
+ agentName: "builder-two",
1555
+ capability: "builder",
1556
+ tmuxSession: "overstory-agent-fake-builder-two",
1557
+ state: "completed",
1558
+ runId,
1559
+ lastActivity: new Date().toISOString(),
1560
+ }),
1561
+ ];
1562
+
1563
+ writeSessionsToStore(tempRoot, sessions);
1564
+ await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
1565
+
1566
+ const nudgeMock = nudgeTracker();
1567
+
1568
+ await runDaemonTick({
1569
+ root: tempRoot,
1570
+ ...THRESHOLDS,
1571
+ _tmux: tmuxAllAlive(),
1572
+ _triage: triageAlways("extend"),
1573
+ _nudge: nudgeMock.nudge,
1574
+ _eventStore: null,
1575
+ });
1576
+
1577
+ // Nudge IS sent because coordinator/monitor are excluded from worker count
1578
+ const coordinatorNudges = nudgeMock.calls.filter(
1579
+ (c) => c.agentName === "coordinator" && c.message.includes("WATCHDOG"),
1580
+ );
1581
+ expect(coordinatorNudges).toHaveLength(1);
1582
+ // The test creates builders, so the message should be builder-specific
1583
+ expect(coordinatorNudges[0]?.message).toContain("builder");
1584
+ expect(coordinatorNudges[0]?.message).toContain("merge/cleanup");
1585
+ });
1586
+
1587
+ test("does not nudge when no worker sessions in run", async () => {
1588
+ const sessions = [
1589
+ makeSession({
1590
+ id: "s1",
1591
+ agentName: "coordinator",
1592
+ capability: "coordinator",
1593
+ tmuxSession: "overstory-agent-fake-coordinator",
1594
+ state: "working",
1595
+ runId,
1596
+ lastActivity: new Date().toISOString(),
1597
+ }),
1598
+ makeSession({
1599
+ id: "s2",
1600
+ agentName: "monitor",
1601
+ capability: "monitor",
1602
+ tmuxSession: "overstory-agent-fake-monitor",
1603
+ state: "working",
1604
+ runId,
1605
+ lastActivity: new Date().toISOString(),
1606
+ }),
1607
+ ];
1608
+
1609
+ writeSessionsToStore(tempRoot, sessions);
1610
+ await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
1611
+
1612
+ const nudgeMock = nudgeTracker();
1613
+
1614
+ await runDaemonTick({
1615
+ root: tempRoot,
1616
+ ...THRESHOLDS,
1617
+ _tmux: tmuxAllAlive(),
1618
+ _triage: triageAlways("extend"),
1619
+ _nudge: nudgeMock.nudge,
1620
+ _eventStore: null,
1621
+ });
1622
+
1623
+ const coordinatorNudges = nudgeMock.calls.filter(
1624
+ (c) => c.agentName === "coordinator" && c.message.includes("worker"),
1625
+ );
1626
+ expect(coordinatorNudges).toHaveLength(0);
1627
+ });
1628
+
1629
+ test("records run_complete event when all workers done", async () => {
1630
+ const sessions = [
1631
+ makeSession({
1632
+ id: "s1",
1633
+ agentName: "builder-one",
1634
+ capability: "builder",
1635
+ tmuxSession: "overstory-agent-fake-builder-one",
1636
+ state: "completed",
1637
+ runId,
1638
+ lastActivity: new Date().toISOString(),
1639
+ }),
1640
+ makeSession({
1641
+ id: "s2",
1642
+ agentName: "builder-two",
1643
+ capability: "builder",
1644
+ tmuxSession: "overstory-agent-fake-builder-two",
1645
+ state: "completed",
1646
+ runId,
1647
+ lastActivity: new Date().toISOString(),
1648
+ }),
1649
+ ];
1650
+
1651
+ writeSessionsToStore(tempRoot, sessions);
1652
+ await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
1653
+
1654
+ const eventsDbPath = join(tempRoot, ".overstory", "events.db");
1655
+ const eventStore = createEventStore(eventsDbPath);
1656
+
1657
+ try {
1658
+ await runDaemonTick({
1659
+ root: tempRoot,
1660
+ ...THRESHOLDS,
1661
+ _tmux: tmuxAllAlive(),
1662
+ _triage: triageAlways("extend"),
1663
+ _nudge: nudgeTracker().nudge,
1664
+ _eventStore: eventStore,
1665
+ });
1666
+ } finally {
1667
+ eventStore.close();
1668
+ }
1669
+
1670
+ // Read events back
1671
+ const store = createEventStore(eventsDbPath);
1672
+ try {
1673
+ const events = store.getTimeline({ since: "2000-01-01T00:00:00Z" });
1674
+ const runCompleteEvent = events.find((e) => {
1675
+ if (!e.data) return false;
1676
+ const data = JSON.parse(e.data) as Record<string, unknown>;
1677
+ return data.type === "run_complete";
1678
+ });
1679
+ expect(runCompleteEvent).toBeDefined();
1680
+ expect(runCompleteEvent?.level).toBe("info");
1681
+ expect(runCompleteEvent?.agentName).toBe("watchdog");
1682
+ } finally {
1683
+ store.close();
1684
+ }
1685
+ });
1686
+
1687
+ test("writes dedup marker after nudging", async () => {
1688
+ const sessions = [
1689
+ makeSession({
1690
+ id: "s1",
1691
+ agentName: "builder-one",
1692
+ capability: "builder",
1693
+ tmuxSession: "overstory-agent-fake-builder-one",
1694
+ state: "completed",
1695
+ runId,
1696
+ lastActivity: new Date().toISOString(),
1697
+ }),
1698
+ makeSession({
1699
+ id: "s2",
1700
+ agentName: "builder-two",
1701
+ capability: "builder",
1702
+ tmuxSession: "overstory-agent-fake-builder-two",
1703
+ state: "completed",
1704
+ runId,
1705
+ lastActivity: new Date().toISOString(),
1706
+ }),
1707
+ ];
1708
+
1709
+ writeSessionsToStore(tempRoot, sessions);
1710
+ await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
1711
+
1712
+ await runDaemonTick({
1713
+ root: tempRoot,
1714
+ ...THRESHOLDS,
1715
+ _tmux: tmuxAllAlive(),
1716
+ _triage: triageAlways("extend"),
1717
+ _nudge: nudgeTracker().nudge,
1718
+ _eventStore: null,
1719
+ });
1720
+
1721
+ // Verify dedup marker was written
1722
+ const markerFile = Bun.file(join(tempRoot, ".overstory", "run-complete-notified.txt"));
1723
+ expect(await markerFile.exists()).toBe(true);
1724
+ const markerContent = await markerFile.text();
1725
+ expect(markerContent.trim()).toBe(runId);
1726
+ });
1727
+
1728
+ test("scout-only completion sends phase-appropriate message", async () => {
1729
+ const sessions = [
1730
+ makeSession({
1731
+ id: "s1",
1732
+ agentName: "scout-one",
1733
+ capability: "scout",
1734
+ tmuxSession: "overstory-agent-fake-scout-one",
1735
+ state: "completed",
1736
+ runId,
1737
+ lastActivity: new Date().toISOString(),
1738
+ }),
1739
+ makeSession({
1740
+ id: "s2",
1741
+ agentName: "scout-two",
1742
+ capability: "scout",
1743
+ tmuxSession: "overstory-agent-fake-scout-two",
1744
+ state: "completed",
1745
+ runId,
1746
+ lastActivity: new Date().toISOString(),
1747
+ }),
1748
+ ];
1749
+
1750
+ writeSessionsToStore(tempRoot, sessions);
1751
+ await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
1752
+
1753
+ const nudgeMock = nudgeTracker();
1754
+
1755
+ await runDaemonTick({
1756
+ root: tempRoot,
1757
+ ...THRESHOLDS,
1758
+ _tmux: tmuxAllAlive(),
1759
+ _triage: triageAlways("extend"),
1760
+ _nudge: nudgeMock.nudge,
1761
+ _eventStore: null,
1762
+ });
1763
+
1764
+ const coordinatorNudges = nudgeMock.calls.filter(
1765
+ (c) => c.agentName === "coordinator" && c.message.includes("WATCHDOG"),
1766
+ );
1767
+ expect(coordinatorNudges).toHaveLength(1);
1768
+ expect(coordinatorNudges[0]?.message).toContain("scout");
1769
+ expect(coordinatorNudges[0]?.message).toContain("next phase");
1770
+ // Must NOT say "merge/cleanup" for scouts
1771
+ expect(coordinatorNudges[0]?.message).not.toContain("merge/cleanup");
1772
+ });
1773
+
1774
+ test("mixed capabilities send generic message with breakdown", async () => {
1775
+ const sessions = [
1776
+ makeSession({
1777
+ id: "s1",
1778
+ agentName: "scout-one",
1779
+ capability: "scout",
1780
+ tmuxSession: "overstory-agent-fake-scout-one",
1781
+ state: "completed",
1782
+ runId,
1783
+ lastActivity: new Date().toISOString(),
1784
+ }),
1785
+ makeSession({
1786
+ id: "s2",
1787
+ agentName: "builder-one",
1788
+ capability: "builder",
1789
+ tmuxSession: "overstory-agent-fake-builder-one",
1790
+ state: "completed",
1791
+ runId,
1792
+ lastActivity: new Date().toISOString(),
1793
+ }),
1794
+ ];
1795
+
1796
+ writeSessionsToStore(tempRoot, sessions);
1797
+ await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
1798
+
1799
+ const nudgeMock = nudgeTracker();
1800
+
1801
+ await runDaemonTick({
1802
+ root: tempRoot,
1803
+ ...THRESHOLDS,
1804
+ _tmux: tmuxAllAlive(),
1805
+ _triage: triageAlways("extend"),
1806
+ _nudge: nudgeMock.nudge,
1807
+ _eventStore: null,
1808
+ });
1809
+
1810
+ const coordinatorNudges = nudgeMock.calls.filter(
1811
+ (c) => c.agentName === "coordinator" && c.message.includes("WATCHDOG"),
1812
+ );
1813
+ expect(coordinatorNudges).toHaveLength(1);
1814
+ expect(coordinatorNudges[0]?.message).toContain("(builder, scout)");
1815
+ expect(coordinatorNudges[0]?.message).toContain("next steps");
1816
+ });
1817
+
1818
+ test("reviewer-only completion sends review-specific message", async () => {
1819
+ const sessions = [
1820
+ makeSession({
1821
+ id: "s1",
1822
+ agentName: "reviewer-one",
1823
+ capability: "reviewer",
1824
+ tmuxSession: "overstory-agent-fake-reviewer-one",
1825
+ state: "completed",
1826
+ runId,
1827
+ lastActivity: new Date().toISOString(),
1828
+ }),
1829
+ ];
1830
+
1831
+ writeSessionsToStore(tempRoot, sessions);
1832
+ await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
1833
+
1834
+ const nudgeMock = nudgeTracker();
1835
+
1836
+ await runDaemonTick({
1837
+ root: tempRoot,
1838
+ ...THRESHOLDS,
1839
+ _tmux: tmuxAllAlive(),
1840
+ _triage: triageAlways("extend"),
1841
+ _nudge: nudgeMock.nudge,
1842
+ _eventStore: null,
1843
+ });
1844
+
1845
+ const coordinatorNudges = nudgeMock.calls.filter(
1846
+ (c) => c.agentName === "coordinator" && c.message.includes("WATCHDOG"),
1847
+ );
1848
+ expect(coordinatorNudges).toHaveLength(1);
1849
+ expect(coordinatorNudges[0]?.message).toContain("reviewer");
1850
+ expect(coordinatorNudges[0]?.message).toContain("Reviews done");
1851
+ });
1852
+
1853
+ test("run_complete event includes capabilities and phase fields", async () => {
1854
+ const sessions = [
1855
+ makeSession({
1856
+ id: "s1",
1857
+ agentName: "builder-one",
1858
+ capability: "builder",
1859
+ tmuxSession: "overstory-agent-fake-builder-one",
1860
+ state: "completed",
1861
+ runId,
1862
+ lastActivity: new Date().toISOString(),
1863
+ }),
1864
+ ];
1865
+
1866
+ writeSessionsToStore(tempRoot, sessions);
1867
+ await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
1868
+
1869
+ const eventsDbPath = join(tempRoot, ".overstory", "events.db");
1870
+ const eventStore = createEventStore(eventsDbPath);
1871
+
1872
+ try {
1873
+ await runDaemonTick({
1874
+ root: tempRoot,
1875
+ ...THRESHOLDS,
1876
+ _tmux: tmuxAllAlive(),
1877
+ _triage: triageAlways("extend"),
1878
+ _nudge: nudgeTracker().nudge,
1879
+ _eventStore: eventStore,
1880
+ });
1881
+ } finally {
1882
+ eventStore.close();
1883
+ }
1884
+
1885
+ const store = createEventStore(eventsDbPath);
1886
+ try {
1887
+ const events = store.getTimeline({ since: "2000-01-01T00:00:00Z" });
1888
+ const runCompleteEvent = events.find((e) => {
1889
+ if (!e.data) return false;
1890
+ const data = JSON.parse(e.data) as Record<string, unknown>;
1891
+ return data.type === "run_complete";
1892
+ });
1893
+ expect(runCompleteEvent).toBeDefined();
1894
+ const data = JSON.parse(runCompleteEvent?.data ?? "{}") as Record<string, unknown>;
1895
+ expect(data.capabilities).toEqual(["builder"]);
1896
+ expect(data.phase).toBe("builder");
1897
+ } finally {
1898
+ store.close();
1899
+ }
1900
+ });
1901
+ });
1902
+
1903
+ // === buildCompletionMessage unit tests ===
1904
+
1905
+ describe("buildCompletionMessage", () => {
1906
+ const testRunId = "run-test-123";
1907
+
1908
+ test("all scouts → contains 'scout' and 'Ready for next phase'", () => {
1909
+ const sessions = [
1910
+ makeSession({ capability: "scout", agentName: "scout-1" }),
1911
+ makeSession({ capability: "scout", agentName: "scout-2" }),
1912
+ ];
1913
+ const msg = buildCompletionMessage(sessions, testRunId);
1914
+ expect(msg).toContain("scout");
1915
+ expect(msg).toContain("Ready for next phase");
1916
+ expect(msg).not.toContain("merge/cleanup");
1917
+ });
1918
+
1919
+ test("all builders → contains 'builder' and 'Ready for merge/cleanup'", () => {
1920
+ const sessions = [
1921
+ makeSession({ capability: "builder", agentName: "builder-1" }),
1922
+ makeSession({ capability: "builder", agentName: "builder-2" }),
1923
+ ];
1924
+ const msg = buildCompletionMessage(sessions, testRunId);
1925
+ expect(msg).toContain("builder");
1926
+ expect(msg).toContain("Ready for merge/cleanup");
1927
+ });
1928
+
1929
+ test("all reviewers → contains 'reviewer' and 'Reviews done'", () => {
1930
+ const sessions = [makeSession({ capability: "reviewer", agentName: "reviewer-1" })];
1931
+ const msg = buildCompletionMessage(sessions, testRunId);
1932
+ expect(msg).toContain("reviewer");
1933
+ expect(msg).toContain("Reviews done");
1934
+ });
1935
+
1936
+ test("all leads → contains 'lead' and 'Ready for merge/cleanup'", () => {
1937
+ const sessions = [makeSession({ capability: "lead", agentName: "lead-1" })];
1938
+ const msg = buildCompletionMessage(sessions, testRunId);
1939
+ expect(msg).toContain("lead");
1940
+ expect(msg).toContain("Ready for merge/cleanup");
1941
+ });
1942
+
1943
+ test("all mergers → contains 'merger' and 'Merges done'", () => {
1944
+ const sessions = [makeSession({ capability: "merger", agentName: "merger-1" })];
1945
+ const msg = buildCompletionMessage(sessions, testRunId);
1946
+ expect(msg).toContain("merger");
1947
+ expect(msg).toContain("Merges done");
1948
+ });
1949
+
1950
+ test("mixed capabilities → contains breakdown and 'Ready for next steps'", () => {
1951
+ const sessions = [
1952
+ makeSession({ capability: "scout", agentName: "scout-1" }),
1953
+ makeSession({ capability: "builder", agentName: "builder-1" }),
1954
+ ];
1955
+ const msg = buildCompletionMessage(sessions, testRunId);
1956
+ expect(msg).toContain("(builder, scout)");
1957
+ expect(msg).toContain("Ready for next steps");
1958
+ });
1959
+
1960
+ test("message includes the run ID", () => {
1961
+ const sessions = [makeSession({ capability: "builder", agentName: "builder-1" })];
1962
+ const msg = buildCompletionMessage(sessions, testRunId);
1963
+ expect(msg).toContain(testRunId);
1964
+ });
1965
+
1966
+ test("message includes the worker count", () => {
1967
+ const sessions = [
1968
+ makeSession({ capability: "scout", agentName: "scout-1" }),
1969
+ makeSession({ capability: "scout", agentName: "scout-2" }),
1970
+ makeSession({ capability: "scout", agentName: "scout-3" }),
1971
+ ];
1972
+ const msg = buildCompletionMessage(sessions, testRunId);
1973
+ expect(msg).toContain("3");
1974
+ });
1975
+ });