@os-eco/overstory-cli 0.9.4 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/README.md +50 -19
  2. package/agents/builder.md +19 -9
  3. package/agents/coordinator.md +6 -6
  4. package/agents/lead.md +204 -87
  5. package/agents/merger.md +25 -14
  6. package/agents/reviewer.md +22 -16
  7. package/agents/scout.md +17 -12
  8. package/package.json +6 -3
  9. package/src/agents/capabilities.test.ts +85 -0
  10. package/src/agents/capabilities.ts +125 -0
  11. package/src/agents/headless-mail-injector.test.ts +448 -0
  12. package/src/agents/headless-mail-injector.ts +219 -0
  13. package/src/agents/headless-prompt.test.ts +102 -0
  14. package/src/agents/headless-prompt.ts +68 -0
  15. package/src/agents/hooks-deployer.test.ts +514 -14
  16. package/src/agents/hooks-deployer.ts +141 -0
  17. package/src/agents/mail-poll-detect.test.ts +153 -0
  18. package/src/agents/mail-poll-detect.ts +73 -0
  19. package/src/agents/overlay.test.ts +60 -4
  20. package/src/agents/overlay.ts +63 -8
  21. package/src/agents/scope-detect.test.ts +190 -0
  22. package/src/agents/scope-detect.ts +146 -0
  23. package/src/agents/turn-lock.test.ts +181 -0
  24. package/src/agents/turn-lock.ts +235 -0
  25. package/src/agents/turn-runner-dispatch.test.ts +182 -0
  26. package/src/agents/turn-runner-dispatch.ts +105 -0
  27. package/src/agents/turn-runner.test.ts +2312 -0
  28. package/src/agents/turn-runner.ts +1383 -0
  29. package/src/commands/agents.ts +9 -0
  30. package/src/commands/clean.ts +54 -0
  31. package/src/commands/coordinator.test.ts +254 -0
  32. package/src/commands/coordinator.ts +273 -8
  33. package/src/commands/dashboard.test.ts +188 -0
  34. package/src/commands/dashboard.ts +14 -4
  35. package/src/commands/doctor.ts +3 -1
  36. package/src/commands/group.test.ts +94 -0
  37. package/src/commands/group.ts +49 -20
  38. package/src/commands/init.test.ts +8 -0
  39. package/src/commands/init.ts +8 -1
  40. package/src/commands/log.test.ts +187 -11
  41. package/src/commands/log.ts +171 -71
  42. package/src/commands/mail.test.ts +162 -0
  43. package/src/commands/mail.ts +64 -9
  44. package/src/commands/merge.test.ts +230 -1
  45. package/src/commands/merge.ts +68 -12
  46. package/src/commands/nudge.test.ts +351 -4
  47. package/src/commands/nudge.ts +356 -34
  48. package/src/commands/run.test.ts +43 -7
  49. package/src/commands/serve/build.test.ts +202 -0
  50. package/src/commands/serve/build.ts +206 -0
  51. package/src/commands/serve/coordinator-actions.test.ts +339 -0
  52. package/src/commands/serve/coordinator-actions.ts +408 -0
  53. package/src/commands/serve/dev.test.ts +168 -0
  54. package/src/commands/serve/dev.ts +117 -0
  55. package/src/commands/serve/mail-actions.test.ts +312 -0
  56. package/src/commands/serve/mail-actions.ts +167 -0
  57. package/src/commands/serve/rest.test.ts +1323 -0
  58. package/src/commands/serve/rest.ts +708 -0
  59. package/src/commands/serve/static.ts +51 -0
  60. package/src/commands/serve/ws.test.ts +361 -0
  61. package/src/commands/serve/ws.ts +332 -0
  62. package/src/commands/serve.test.ts +459 -0
  63. package/src/commands/serve.ts +565 -0
  64. package/src/commands/sling.test.ts +177 -1
  65. package/src/commands/sling.ts +243 -71
  66. package/src/commands/status.test.ts +9 -0
  67. package/src/commands/status.ts +12 -4
  68. package/src/commands/stop.test.ts +255 -1
  69. package/src/commands/stop.ts +107 -8
  70. package/src/commands/watch.test.ts +43 -0
  71. package/src/commands/watch.ts +153 -28
  72. package/src/config.ts +23 -0
  73. package/src/doctor/consistency.test.ts +106 -0
  74. package/src/doctor/consistency.ts +48 -1
  75. package/src/doctor/serve.test.ts +95 -0
  76. package/src/doctor/serve.ts +86 -0
  77. package/src/doctor/types.ts +2 -1
  78. package/src/doctor/watchdog.ts +57 -1
  79. package/src/events/tailer.test.ts +234 -1
  80. package/src/events/tailer.ts +90 -0
  81. package/src/index.ts +57 -6
  82. package/src/insights/quality-gates.test.ts +141 -0
  83. package/src/insights/quality-gates.ts +156 -0
  84. package/src/json.ts +29 -0
  85. package/src/logging/theme.ts +4 -0
  86. package/src/mail/client.ts +15 -2
  87. package/src/mail/store.test.ts +82 -0
  88. package/src/mail/store.ts +41 -4
  89. package/src/merge/lock.test.ts +149 -0
  90. package/src/merge/lock.ts +140 -0
  91. package/src/merge/predict.test.ts +387 -0
  92. package/src/merge/predict.ts +249 -0
  93. package/src/merge/resolver.ts +1 -1
  94. package/src/mulch/client.ts +3 -3
  95. package/src/runtimes/__fixtures__/claude-stream-fixture.ts +22 -0
  96. package/src/runtimes/claude.test.ts +791 -1
  97. package/src/runtimes/claude.ts +323 -1
  98. package/src/runtimes/connections.test.ts +141 -1
  99. package/src/runtimes/connections.ts +73 -4
  100. package/src/runtimes/headless-connection.test.ts +264 -0
  101. package/src/runtimes/headless-connection.ts +158 -0
  102. package/src/runtimes/types.ts +10 -0
  103. package/src/schema-consistency.test.ts +1 -0
  104. package/src/sessions/store.test.ts +657 -29
  105. package/src/sessions/store.ts +286 -23
  106. package/src/test-setup.test.ts +31 -0
  107. package/src/test-setup.ts +28 -0
  108. package/src/types.ts +107 -2
  109. package/src/utils/pid.test.ts +85 -1
  110. package/src/utils/pid.ts +86 -1
  111. package/src/utils/process-scan.test.ts +53 -0
  112. package/src/utils/process-scan.ts +76 -0
  113. package/src/watchdog/daemon.test.ts +1607 -376
  114. package/src/watchdog/daemon.ts +462 -88
  115. package/src/watchdog/health.test.ts +282 -0
  116. package/src/watchdog/health.ts +126 -27
  117. package/src/worktree/manager.test.ts +218 -1
  118. package/src/worktree/manager.ts +55 -0
  119. package/src/worktree/process.test.ts +71 -0
  120. package/src/worktree/process.ts +25 -5
  121. package/src/worktree/tmux.test.ts +28 -0
  122. package/src/worktree/tmux.ts +27 -3
  123. package/templates/CLAUDE.md.tmpl +19 -8
  124. package/templates/overlay.md.tmpl +5 -2
@@ -19,10 +19,16 @@ import { mkdir, mkdtemp } from "node:fs/promises";
19
19
  import { tmpdir } from "node:os";
20
20
  import { join } from "node:path";
21
21
  import { createEventStore } from "../events/store.ts";
22
- import { createSessionStore } from "../sessions/store.ts";
22
+ import { createMailStore } from "../mail/store.ts";
23
+ import { createRunStore, createSessionStore } from "../sessions/store.ts";
23
24
  import { cleanupTempDir } from "../test-helpers.ts";
24
- import type { AgentSession, HealthCheck, StoredEvent } from "../types.ts";
25
- import { buildCompletionMessage, runDaemonTick, startDaemon } from "./daemon.ts";
25
+ import type { AgentSession, HealthCheck, StoredEvent, WorkerDiedPayload } from "../types.ts";
26
+ import {
27
+ buildCompletionMessage,
28
+ type RunIdWarnState,
29
+ runDaemonTick,
30
+ startDaemon,
31
+ } from "./daemon.ts";
26
32
 
27
33
  // === Test constants ===
28
34
 
@@ -50,6 +56,34 @@ function writeSessionsToStore(root: string, sessions: AgentSession[]): void {
50
56
  store.close();
51
57
  }
52
58
 
59
+ /**
60
+ * Mark a run as active: write current-run.txt AND insert a row in the runs
61
+ * table (sessions.db). The watchdog now validates the id against the runs
62
+ * table before running the run-completion check (overstory-87bf), so tests
63
+ * must seed both surfaces to mirror production reality.
64
+ */
65
+ async function setActiveRun(root: string, runId: string): Promise<void> {
66
+ await Bun.write(join(root, ".overstory", "current-run.txt"), runId);
67
+ const runStore = createRunStore(join(root, ".overstory", "sessions.db"));
68
+ try {
69
+ runStore.createRun({
70
+ id: runId,
71
+ startedAt: new Date().toISOString(),
72
+ coordinatorSessionId: null,
73
+ status: "active",
74
+ });
75
+ } catch {
76
+ // Row may already exist (re-seeding within one test) — non-fatal.
77
+ } finally {
78
+ runStore.close();
79
+ }
80
+ }
81
+
82
+ /** Build a fresh, isolated RunIdWarnState for tests (overstory-87bf). */
83
+ function freshRunIdWarnState(): RunIdWarnState {
84
+ return { missingFileWarned: false, unknownIds: new Set() };
85
+ }
86
+
53
87
  /** Read sessions from the SessionStore (sessions.db) at the given root. */
54
88
  function readSessionsFromStore(root: string): AgentSession[] {
55
89
  const dbPath = join(root, ".overstory", "sessions.db");
@@ -497,6 +531,123 @@ describe("daemon tick", () => {
497
531
  expect(reloaded[0]?.stalledSince).toBeNull();
498
532
  });
499
533
 
534
+ // Regression tests for overstory-74ce: killAgent() must never call
535
+ // tmux.killSession("") for headless agents — an empty `-t` argument is
536
+ // prefix-matched and would wildcard-kill the entire overstory tmux server.
537
+
538
+ test("spawn-per-turn agent at level 3 termination does NOT call tmux.killSession", async () => {
539
+ const nudgeIntervalMs = 60_000;
540
+ const stalledSince = new Date(Date.now() - 4 * nudgeIntervalMs).toISOString();
541
+ const staleActivity = new Date(Date.now() - THRESHOLDS.staleThresholdMs * 2).toISOString();
542
+
543
+ // Spawn-per-turn worker between turns: tmuxSession === "" AND pid === null.
544
+ // Before the fix, killAgent fell through to tmux.killSession("") which
545
+ // prefix-matches every session in the overstory tmux server.
546
+ const session = makeSession({
547
+ agentName: "spawn-per-turn-doomed",
548
+ tmuxSession: "",
549
+ pid: null,
550
+ state: "stalled",
551
+ lastActivity: staleActivity,
552
+ escalationLevel: 2,
553
+ stalledSince,
554
+ });
555
+
556
+ writeSessionsToStore(tempRoot, [session]);
557
+
558
+ // No tmux sessions registered — emulates production where the spawn-per-turn
559
+ // agent has no named session.
560
+ const tmuxMock = tmuxWithLiveness({});
561
+
562
+ await runDaemonTick({
563
+ root: tempRoot,
564
+ ...THRESHOLDS,
565
+ nudgeIntervalMs,
566
+ tier1Enabled: false,
567
+ _tmux: tmuxMock,
568
+ _triage: triageAlways("extend"),
569
+ _nudge: nudgeTracker().nudge,
570
+ _eventStore: null,
571
+ _recordFailure: async () => {},
572
+ _getConnection: () => undefined,
573
+ _removeConnection: () => {},
574
+ _tailerRegistry: new Map(),
575
+ _findLatestStdoutLog: async () => null,
576
+ });
577
+
578
+ // Critical assertion: no wildcard kill attempt. tmuxMock.killed must be empty.
579
+ expect(tmuxMock.killed).toHaveLength(0);
580
+
581
+ // The session is still transitioned to zombie — termination semantics are preserved,
582
+ // just without the wildcard tmux kill.
583
+ const reloaded = readSessionsFromStore(tempRoot);
584
+ expect(reloaded[0]?.state).toBe("zombie");
585
+ expect(reloaded[0]?.escalationLevel).toBe(0);
586
+ expect(reloaded[0]?.stalledSince).toBeNull();
587
+ });
588
+
589
+ test("long-lived headless agent at level 3 termination kills pid tree, not tmux", async () => {
590
+ const nudgeIntervalMs = 60_000;
591
+ const stalledSince = new Date(Date.now() - 4 * nudgeIntervalMs).toISOString();
592
+ const staleActivity = new Date(Date.now() - THRESHOLDS.staleThresholdMs * 2).toISOString();
593
+
594
+ // Long-lived headless capability (e.g. coordinator/orchestrator/monitor):
595
+ // tmuxSession === "" AND pid !== null. The PID tree should be killed; tmux
596
+ // must not be touched.
597
+ const session = makeSession({
598
+ agentName: "headless-long-lived-doomed",
599
+ tmuxSession: "",
600
+ pid: process.pid, // alive PID — health eval won't short-circuit to direct terminate
601
+ state: "stalled",
602
+ lastActivity: staleActivity,
603
+ escalationLevel: 2,
604
+ stalledSince,
605
+ });
606
+
607
+ writeSessionsToStore(tempRoot, [session]);
608
+
609
+ const killedPids: number[] = [];
610
+ const procMock = {
611
+ isAlive: (pid: number) => {
612
+ try {
613
+ process.kill(pid, 0);
614
+ return true;
615
+ } catch {
616
+ return false;
617
+ }
618
+ },
619
+ killTree: async (pid: number) => {
620
+ killedPids.push(pid);
621
+ },
622
+ };
623
+
624
+ const tmuxMock = tmuxWithLiveness({});
625
+
626
+ await runDaemonTick({
627
+ root: tempRoot,
628
+ ...THRESHOLDS,
629
+ nudgeIntervalMs,
630
+ tier1Enabled: false,
631
+ _tmux: tmuxMock,
632
+ _triage: triageAlways("extend"),
633
+ _nudge: nudgeTracker().nudge,
634
+ _process: procMock,
635
+ _eventStore: null,
636
+ _recordFailure: async () => {},
637
+ _getConnection: () => undefined,
638
+ _removeConnection: () => {},
639
+ _tailerRegistry: new Map(),
640
+ _findLatestStdoutLog: async () => null,
641
+ });
642
+
643
+ // PID tree was killed; tmux.killSession was never called.
644
+ expect(killedPids).toContain(process.pid);
645
+ expect(tmuxMock.killed).toHaveLength(0);
646
+
647
+ const reloaded = readSessionsFromStore(tempRoot);
648
+ expect(reloaded[0]?.state).toBe("zombie");
649
+ });
650
+
500
651
  test("triage retry sends nudge with recovery message", async () => {
501
652
  const staleActivity = new Date(Date.now() - 60_000).toISOString();
502
653
  const stalledSince = new Date(Date.now() - 130_000).toISOString();
@@ -1084,7 +1235,7 @@ describe("daemon event recording", () => {
1084
1235
 
1085
1236
  // Write a current-run.txt
1086
1237
  const runId = "run-2026-02-13T10-00-00-000Z";
1087
- await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
1238
+ await setActiveRun(tempRoot, runId);
1088
1239
 
1089
1240
  const eventsDbPath = join(tempRoot, ".overstory", "events.db");
1090
1241
  const eventStore = createEventStore(eventsDbPath);
@@ -1421,7 +1572,7 @@ describe("run completion detection", () => {
1421
1572
  ];
1422
1573
 
1423
1574
  writeSessionsToStore(tempRoot, sessions);
1424
- await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
1575
+ await setActiveRun(tempRoot, runId);
1425
1576
 
1426
1577
  const nudgeMock = nudgeTracker();
1427
1578
 
@@ -1467,7 +1618,7 @@ describe("run completion detection", () => {
1467
1618
  ];
1468
1619
 
1469
1620
  writeSessionsToStore(tempRoot, sessions);
1470
- await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
1621
+ await setActiveRun(tempRoot, runId);
1471
1622
 
1472
1623
  const nudgeMock = nudgeTracker();
1473
1624
 
@@ -1509,7 +1660,7 @@ describe("run completion detection", () => {
1509
1660
  ];
1510
1661
 
1511
1662
  writeSessionsToStore(tempRoot, sessions);
1512
- await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
1663
+ await setActiveRun(tempRoot, runId);
1513
1664
  // Pre-write dedup marker
1514
1665
  await Bun.write(join(tempRoot, ".overstory", "run-complete-notified.txt"), runId);
1515
1666
 
@@ -1613,7 +1764,7 @@ describe("run completion detection", () => {
1613
1764
  ];
1614
1765
 
1615
1766
  writeSessionsToStore(tempRoot, sessions);
1616
- await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
1767
+ await setActiveRun(tempRoot, runId);
1617
1768
 
1618
1769
  const nudgeMock = nudgeTracker();
1619
1770
 
@@ -1659,7 +1810,7 @@ describe("run completion detection", () => {
1659
1810
  ];
1660
1811
 
1661
1812
  writeSessionsToStore(tempRoot, sessions);
1662
- await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
1813
+ await setActiveRun(tempRoot, runId);
1663
1814
 
1664
1815
  const nudgeMock = nudgeTracker();
1665
1816
 
@@ -1701,7 +1852,7 @@ describe("run completion detection", () => {
1701
1852
  ];
1702
1853
 
1703
1854
  writeSessionsToStore(tempRoot, sessions);
1704
- await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
1855
+ await setActiveRun(tempRoot, runId);
1705
1856
 
1706
1857
  const eventsDbPath = join(tempRoot, ".overstory", "events.db");
1707
1858
  const eventStore = createEventStore(eventsDbPath);
@@ -1759,7 +1910,7 @@ describe("run completion detection", () => {
1759
1910
  ];
1760
1911
 
1761
1912
  writeSessionsToStore(tempRoot, sessions);
1762
- await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
1913
+ await setActiveRun(tempRoot, runId);
1763
1914
 
1764
1915
  await runDaemonTick({
1765
1916
  root: tempRoot,
@@ -1800,7 +1951,7 @@ describe("run completion detection", () => {
1800
1951
  ];
1801
1952
 
1802
1953
  writeSessionsToStore(tempRoot, sessions);
1803
- await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
1954
+ await setActiveRun(tempRoot, runId);
1804
1955
 
1805
1956
  const nudgeMock = nudgeTracker();
1806
1957
 
@@ -1846,7 +1997,7 @@ describe("run completion detection", () => {
1846
1997
  ];
1847
1998
 
1848
1999
  writeSessionsToStore(tempRoot, sessions);
1849
- await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
2000
+ await setActiveRun(tempRoot, runId);
1850
2001
 
1851
2002
  const nudgeMock = nudgeTracker();
1852
2003
 
@@ -1881,7 +2032,7 @@ describe("run completion detection", () => {
1881
2032
  ];
1882
2033
 
1883
2034
  writeSessionsToStore(tempRoot, sessions);
1884
- await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
2035
+ await setActiveRun(tempRoot, runId);
1885
2036
 
1886
2037
  const nudgeMock = nudgeTracker();
1887
2038
 
@@ -1916,7 +2067,7 @@ describe("run completion detection", () => {
1916
2067
  ];
1917
2068
 
1918
2069
  writeSessionsToStore(tempRoot, sessions);
1919
- await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), runId);
2070
+ await setActiveRun(tempRoot, runId);
1920
2071
 
1921
2072
  const eventsDbPath = join(tempRoot, ".overstory", "events.db");
1922
2073
  const eventStore = createEventStore(eventsDbPath);
@@ -1950,104 +2101,1031 @@ describe("run completion detection", () => {
1950
2101
  store.close();
1951
2102
  }
1952
2103
  });
1953
- });
1954
-
1955
- // === buildCompletionMessage unit tests ===
1956
-
1957
- describe("buildCompletionMessage", () => {
1958
- const testRunId = "run-test-123";
1959
2104
 
1960
- test("all scouts contains 'scout' and 'Ready for next phase'", () => {
2105
+ // overstory-e130: a run that mixes `completed` and `zombie` workers must
2106
+ // still notify the coordinator. Before the fix, the every-completed predicate
2107
+ // stranded the coordinator forever whenever the watchdog killed any worker.
2108
+ test("nudges coordinator when workers are a mix of completed and zombie", async () => {
1961
2109
  const sessions = [
1962
- makeSession({ capability: "scout", agentName: "scout-1" }),
1963
- makeSession({ capability: "scout", agentName: "scout-2" }),
2110
+ makeSession({
2111
+ id: "s1",
2112
+ agentName: "builder-one",
2113
+ capability: "builder",
2114
+ tmuxSession: "overstory-agent-fake-builder-one",
2115
+ state: "completed",
2116
+ runId,
2117
+ lastActivity: new Date().toISOString(),
2118
+ }),
2119
+ makeSession({
2120
+ id: "s2",
2121
+ agentName: "builder-two",
2122
+ capability: "builder",
2123
+ tmuxSession: "overstory-agent-fake-builder-two",
2124
+ state: "zombie",
2125
+ runId,
2126
+ lastActivity: new Date().toISOString(),
2127
+ }),
1964
2128
  ];
1965
- const msg = buildCompletionMessage(sessions, testRunId);
1966
- expect(msg).toContain("scout");
1967
- expect(msg).toContain("Ready for next phase");
1968
- expect(msg).not.toContain("merge/cleanup");
1969
- });
1970
2129
 
1971
- test("all builders → contains 'builder' and 'Awaiting lead verification' (not merge authorization)", () => {
1972
- const sessions = [
1973
- makeSession({ capability: "builder", agentName: "builder-1" }),
1974
- makeSession({ capability: "builder", agentName: "builder-2" }),
1975
- ];
1976
- const msg = buildCompletionMessage(sessions, testRunId);
1977
- expect(msg).toContain("builder");
1978
- expect(msg).toContain("Awaiting lead verification");
1979
- expect(msg).not.toContain("merge/cleanup");
1980
- });
2130
+ writeSessionsToStore(tempRoot, sessions);
2131
+ await setActiveRun(tempRoot, runId);
1981
2132
 
1982
- test("all reviewers contains 'reviewer' and 'Reviews done'", () => {
1983
- const sessions = [makeSession({ capability: "reviewer", agentName: "reviewer-1" })];
1984
- const msg = buildCompletionMessage(sessions, testRunId);
1985
- expect(msg).toContain("reviewer");
1986
- expect(msg).toContain("Reviews done");
1987
- });
2133
+ const nudgeMock = nudgeTracker();
1988
2134
 
1989
- test("all leads → contains 'lead' and 'Ready for merge/cleanup'", () => {
1990
- const sessions = [makeSession({ capability: "lead", agentName: "lead-1" })];
1991
- const msg = buildCompletionMessage(sessions, testRunId);
1992
- expect(msg).toContain("lead");
1993
- expect(msg).toContain("Ready for merge/cleanup");
1994
- });
2135
+ await runDaemonTick({
2136
+ root: tempRoot,
2137
+ ...THRESHOLDS,
2138
+ _tmux: tmuxAllAlive(),
2139
+ _triage: triageAlways("extend"),
2140
+ _nudge: nudgeMock.nudge,
2141
+ _eventStore: null,
2142
+ });
1995
2143
 
1996
- test("all mergers contains 'merger' and 'Merges done'", () => {
1997
- const sessions = [makeSession({ capability: "merger", agentName: "merger-1" })];
1998
- const msg = buildCompletionMessage(sessions, testRunId);
1999
- expect(msg).toContain("merger");
2000
- expect(msg).toContain("Merges done");
2144
+ const coordinatorNudges = nudgeMock.calls.filter(
2145
+ (c) => c.agentName === "coordinator" && c.message.includes("WATCHDOG"),
2146
+ );
2147
+ expect(coordinatorNudges).toHaveLength(1);
2148
+ expect(coordinatorNudges[0]?.message).toContain("have terminated");
2149
+ expect(coordinatorNudges[0]?.message).toContain("(1 completed, 1 zombie)");
2001
2150
  });
2002
2151
 
2003
- test("mixed capabilities contains breakdown and 'Ready for next steps'", () => {
2152
+ test("nudges coordinator when every worker is zombie", async () => {
2004
2153
  const sessions = [
2005
- makeSession({ capability: "scout", agentName: "scout-1" }),
2006
- makeSession({ capability: "builder", agentName: "builder-1" }),
2154
+ makeSession({
2155
+ id: "s1",
2156
+ agentName: "builder-one",
2157
+ capability: "builder",
2158
+ tmuxSession: "overstory-agent-fake-builder-one",
2159
+ state: "zombie",
2160
+ runId,
2161
+ lastActivity: new Date().toISOString(),
2162
+ }),
2163
+ makeSession({
2164
+ id: "s2",
2165
+ agentName: "builder-two",
2166
+ capability: "builder",
2167
+ tmuxSession: "overstory-agent-fake-builder-two",
2168
+ state: "zombie",
2169
+ runId,
2170
+ lastActivity: new Date().toISOString(),
2171
+ }),
2007
2172
  ];
2008
- const msg = buildCompletionMessage(sessions, testRunId);
2009
- expect(msg).toContain("(builder, scout)");
2010
- expect(msg).toContain("Ready for next steps");
2011
- });
2012
2173
 
2013
- test("message includes the run ID", () => {
2014
- const sessions = [makeSession({ capability: "builder", agentName: "builder-1" })];
2015
- const msg = buildCompletionMessage(sessions, testRunId);
2016
- expect(msg).toContain(testRunId);
2174
+ writeSessionsToStore(tempRoot, sessions);
2175
+ await setActiveRun(tempRoot, runId);
2176
+
2177
+ const nudgeMock = nudgeTracker();
2178
+
2179
+ await runDaemonTick({
2180
+ root: tempRoot,
2181
+ ...THRESHOLDS,
2182
+ _tmux: tmuxAllAlive(),
2183
+ _triage: triageAlways("extend"),
2184
+ _nudge: nudgeMock.nudge,
2185
+ _eventStore: null,
2186
+ });
2187
+
2188
+ const coordinatorNudges = nudgeMock.calls.filter(
2189
+ (c) => c.agentName === "coordinator" && c.message.includes("WATCHDOG"),
2190
+ );
2191
+ expect(coordinatorNudges).toHaveLength(1);
2192
+ expect(coordinatorNudges[0]?.message).toContain("(0 completed, 2 zombie)");
2017
2193
  });
2018
2194
 
2019
- test("message includes the worker count", () => {
2195
+ test("does not nudge when a working worker remains alongside a zombie", async () => {
2020
2196
  const sessions = [
2021
- makeSession({ capability: "scout", agentName: "scout-1" }),
2022
- makeSession({ capability: "scout", agentName: "scout-2" }),
2023
- makeSession({ capability: "scout", agentName: "scout-3" }),
2197
+ makeSession({
2198
+ id: "s1",
2199
+ agentName: "builder-one",
2200
+ capability: "builder",
2201
+ tmuxSession: "overstory-agent-fake-builder-one",
2202
+ state: "zombie",
2203
+ runId,
2204
+ lastActivity: new Date().toISOString(),
2205
+ }),
2206
+ makeSession({
2207
+ id: "s2",
2208
+ agentName: "builder-two",
2209
+ capability: "builder",
2210
+ tmuxSession: "overstory-agent-fake-builder-two",
2211
+ state: "working",
2212
+ runId,
2213
+ lastActivity: new Date().toISOString(),
2214
+ }),
2024
2215
  ];
2025
- const msg = buildCompletionMessage(sessions, testRunId);
2026
- expect(msg).toContain("3");
2027
- });
2028
- });
2029
2216
 
2030
- // === Bug fix tests: headless agent kill blast radius + stale detection ===
2217
+ writeSessionsToStore(tempRoot, sessions);
2218
+ await setActiveRun(tempRoot, runId);
2031
2219
 
2032
- describe("headless agent kill blast radius fix (Bug 1)", () => {
2033
- /**
2034
- * Track PID kill calls without spawning real processes.
2035
- * Also surfaces killTree calls so tests can assert on them.
2036
- */
2037
- function processTracker(): {
2038
- isAlive: (pid: number) => boolean;
2039
- killTree: (pid: number) => Promise<void>;
2040
- killed: number[];
2041
- } {
2042
- const killed: number[] = [];
2043
- return {
2044
- isAlive: (pid: number) => {
2045
- try {
2046
- process.kill(pid, 0);
2047
- return true;
2048
- } catch {
2049
- return false;
2050
- }
2220
+ const nudgeMock = nudgeTracker();
2221
+
2222
+ await runDaemonTick({
2223
+ root: tempRoot,
2224
+ ...THRESHOLDS,
2225
+ _tmux: tmuxAllAlive(),
2226
+ _triage: triageAlways("extend"),
2227
+ _nudge: nudgeMock.nudge,
2228
+ _eventStore: null,
2229
+ });
2230
+
2231
+ const coordinatorNudges = nudgeMock.calls.filter(
2232
+ (c) => c.agentName === "coordinator" && c.message.includes("WATCHDOG"),
2233
+ );
2234
+ expect(coordinatorNudges).toHaveLength(0);
2235
+ });
2236
+
2237
+ test("run_complete event with zombies records zombieAgents and warn level", async () => {
2238
+ const sessions = [
2239
+ makeSession({
2240
+ id: "s1",
2241
+ agentName: "builder-one",
2242
+ capability: "builder",
2243
+ tmuxSession: "overstory-agent-fake-builder-one",
2244
+ state: "completed",
2245
+ runId,
2246
+ lastActivity: new Date().toISOString(),
2247
+ }),
2248
+ makeSession({
2249
+ id: "s2",
2250
+ agentName: "builder-two",
2251
+ capability: "builder",
2252
+ tmuxSession: "overstory-agent-fake-builder-two",
2253
+ state: "zombie",
2254
+ runId,
2255
+ lastActivity: new Date().toISOString(),
2256
+ }),
2257
+ ];
2258
+
2259
+ writeSessionsToStore(tempRoot, sessions);
2260
+ await setActiveRun(tempRoot, runId);
2261
+
2262
+ const eventsDbPath = join(tempRoot, ".overstory", "events.db");
2263
+ const eventStore = createEventStore(eventsDbPath);
2264
+
2265
+ try {
2266
+ await runDaemonTick({
2267
+ root: tempRoot,
2268
+ ...THRESHOLDS,
2269
+ _tmux: tmuxAllAlive(),
2270
+ _triage: triageAlways("extend"),
2271
+ _nudge: nudgeTracker().nudge,
2272
+ _eventStore: eventStore,
2273
+ });
2274
+ } finally {
2275
+ eventStore.close();
2276
+ }
2277
+
2278
+ const store = createEventStore(eventsDbPath);
2279
+ try {
2280
+ const events = store.getTimeline({ since: "2000-01-01T00:00:00Z" });
2281
+ const runCompleteEvent = events.find((e) => {
2282
+ if (!e.data) return false;
2283
+ const data = JSON.parse(e.data) as Record<string, unknown>;
2284
+ return data.type === "run_complete";
2285
+ });
2286
+ expect(runCompleteEvent).toBeDefined();
2287
+ expect(runCompleteEvent?.level).toBe("warn");
2288
+ const data = JSON.parse(runCompleteEvent?.data ?? "{}") as Record<string, unknown>;
2289
+ expect(data.completedAgents).toEqual(["builder-one"]);
2290
+ expect(data.zombieAgents).toEqual(["builder-two"]);
2291
+ expect(data.workerCount).toBe(2);
2292
+ } finally {
2293
+ store.close();
2294
+ }
2295
+ });
2296
+
2297
+ test("missing current-run.txt: warns once, skips run-completion check (overstory-87bf)", async () => {
2298
+ const sessions = [
2299
+ makeSession({
2300
+ id: "s1",
2301
+ agentName: "builder-one",
2302
+ capability: "builder",
2303
+ tmuxSession: "overstory-agent-fake-builder-one",
2304
+ state: "completed",
2305
+ runId,
2306
+ lastActivity: new Date().toISOString(),
2307
+ }),
2308
+ makeSession({
2309
+ id: "s2",
2310
+ agentName: "builder-two",
2311
+ capability: "builder",
2312
+ tmuxSession: "overstory-agent-fake-builder-two",
2313
+ state: "completed",
2314
+ runId,
2315
+ lastActivity: new Date().toISOString(),
2316
+ }),
2317
+ ];
2318
+
2319
+ writeSessionsToStore(tempRoot, sessions);
2320
+ // Deliberately do NOT call setActiveRun — current-run.txt absent.
2321
+
2322
+ const nudgeMock = nudgeTracker();
2323
+ const warnState = freshRunIdWarnState();
2324
+
2325
+ const stderrWrites: string[] = [];
2326
+ const originalStderrWrite = process.stderr.write.bind(process.stderr);
2327
+ process.stderr.write = ((chunk: unknown, ...rest: unknown[]) => {
2328
+ stderrWrites.push(typeof chunk === "string" ? chunk : String(chunk));
2329
+ return originalStderrWrite(chunk as string, ...(rest as []));
2330
+ }) as typeof process.stderr.write;
2331
+
2332
+ try {
2333
+ await runDaemonTick({
2334
+ root: tempRoot,
2335
+ ...THRESHOLDS,
2336
+ _tmux: tmuxAllAlive(),
2337
+ _triage: triageAlways("extend"),
2338
+ _nudge: nudgeMock.nudge,
2339
+ _eventStore: null,
2340
+ _runIdWarnState: warnState,
2341
+ });
2342
+
2343
+ // Tick again to confirm the warning dedupes for the same cause.
2344
+ await runDaemonTick({
2345
+ root: tempRoot,
2346
+ ...THRESHOLDS,
2347
+ _tmux: tmuxAllAlive(),
2348
+ _triage: triageAlways("extend"),
2349
+ _nudge: nudgeMock.nudge,
2350
+ _eventStore: null,
2351
+ _runIdWarnState: warnState,
2352
+ });
2353
+ } finally {
2354
+ process.stderr.write = originalStderrWrite;
2355
+ }
2356
+
2357
+ // Run-completion skip is observable: no coordinator nudge was sent.
2358
+ const coordinatorNudges = nudgeMock.calls.filter(
2359
+ (c) => c.agentName === "coordinator" && c.message.includes("WATCHDOG"),
2360
+ );
2361
+ expect(coordinatorNudges).toHaveLength(0);
2362
+
2363
+ // Warning logged exactly once across the two ticks.
2364
+ expect(warnState.missingFileWarned).toBe(true);
2365
+ const missingWarnings = stderrWrites.filter((w) =>
2366
+ w.includes("[WATCHDOG] current-run.txt missing"),
2367
+ );
2368
+ expect(missingWarnings).toHaveLength(1);
2369
+ });
2370
+
2371
+ test("stale current-run.txt id (no row in runs table): warns once per id, skips check (overstory-87bf)", async () => {
2372
+ const staleId = "run-stale-2026-01-01T00-00-00-000Z";
2373
+ const sessions = [
2374
+ makeSession({
2375
+ id: "s1",
2376
+ agentName: "builder-one",
2377
+ capability: "builder",
2378
+ tmuxSession: "overstory-agent-fake-builder-one",
2379
+ state: "completed",
2380
+ runId: staleId,
2381
+ lastActivity: new Date().toISOString(),
2382
+ }),
2383
+ makeSession({
2384
+ id: "s2",
2385
+ agentName: "builder-two",
2386
+ capability: "builder",
2387
+ tmuxSession: "overstory-agent-fake-builder-two",
2388
+ state: "completed",
2389
+ runId: staleId,
2390
+ lastActivity: new Date().toISOString(),
2391
+ }),
2392
+ ];
2393
+
2394
+ writeSessionsToStore(tempRoot, sessions);
2395
+ // Write current-run.txt but DO NOT seed the runs table — the lookup
2396
+ // will return null, exercising the stale-id branch.
2397
+ await Bun.write(join(tempRoot, ".overstory", "current-run.txt"), staleId);
2398
+
2399
+ const nudgeMock = nudgeTracker();
2400
+ const warnState = freshRunIdWarnState();
2401
+
2402
+ const stderrWrites: string[] = [];
2403
+ const originalStderrWrite = process.stderr.write.bind(process.stderr);
2404
+ process.stderr.write = ((chunk: unknown, ...rest: unknown[]) => {
2405
+ stderrWrites.push(typeof chunk === "string" ? chunk : String(chunk));
2406
+ return originalStderrWrite(chunk as string, ...(rest as []));
2407
+ }) as typeof process.stderr.write;
2408
+
2409
+ try {
2410
+ await runDaemonTick({
2411
+ root: tempRoot,
2412
+ ...THRESHOLDS,
2413
+ _tmux: tmuxAllAlive(),
2414
+ _triage: triageAlways("extend"),
2415
+ _nudge: nudgeMock.nudge,
2416
+ _eventStore: null,
2417
+ _runIdWarnState: warnState,
2418
+ });
2419
+
2420
+ await runDaemonTick({
2421
+ root: tempRoot,
2422
+ ...THRESHOLDS,
2423
+ _tmux: tmuxAllAlive(),
2424
+ _triage: triageAlways("extend"),
2425
+ _nudge: nudgeMock.nudge,
2426
+ _eventStore: null,
2427
+ _runIdWarnState: warnState,
2428
+ });
2429
+ } finally {
2430
+ process.stderr.write = originalStderrWrite;
2431
+ }
2432
+
2433
+ // Run-completion skip is observable: no coordinator nudge.
2434
+ const coordinatorNudges = nudgeMock.calls.filter(
2435
+ (c) => c.agentName === "coordinator" && c.message.includes("WATCHDOG"),
2436
+ );
2437
+ expect(coordinatorNudges).toHaveLength(0);
2438
+
2439
+ // Stale-id was recorded once, missing-file path was NOT triggered.
2440
+ expect(warnState.unknownIds.has(staleId)).toBe(true);
2441
+ expect(warnState.missingFileWarned).toBe(false);
2442
+ const staleWarnings = stderrWrites.filter((w) =>
2443
+ w.includes(`points to unknown run "${staleId}"`),
2444
+ );
2445
+ expect(staleWarnings).toHaveLength(1);
2446
+ });
2447
+ });
2448
+
2449
+ // === buildCompletionMessage unit tests ===
2450
+
2451
+ describe("buildCompletionMessage", () => {
2452
+ const testRunId = "run-test-123";
2453
+
2454
+ test("all scouts → contains 'scout' and 'Ready for next phase'", () => {
2455
+ const sessions = [
2456
+ makeSession({ capability: "scout", agentName: "scout-1" }),
2457
+ makeSession({ capability: "scout", agentName: "scout-2" }),
2458
+ ];
2459
+ const msg = buildCompletionMessage(sessions, testRunId);
2460
+ expect(msg).toContain("scout");
2461
+ expect(msg).toContain("Ready for next phase");
2462
+ expect(msg).not.toContain("merge/cleanup");
2463
+ });
2464
+
2465
+ test("all builders → contains 'builder' and 'Awaiting lead verification' (not merge authorization)", () => {
2466
+ const sessions = [
2467
+ makeSession({ capability: "builder", agentName: "builder-1" }),
2468
+ makeSession({ capability: "builder", agentName: "builder-2" }),
2469
+ ];
2470
+ const msg = buildCompletionMessage(sessions, testRunId);
2471
+ expect(msg).toContain("builder");
2472
+ expect(msg).toContain("Awaiting lead verification");
2473
+ expect(msg).not.toContain("merge/cleanup");
2474
+ });
2475
+
2476
+ test("all reviewers → contains 'reviewer' and 'Reviews done'", () => {
2477
+ const sessions = [makeSession({ capability: "reviewer", agentName: "reviewer-1" })];
2478
+ const msg = buildCompletionMessage(sessions, testRunId);
2479
+ expect(msg).toContain("reviewer");
2480
+ expect(msg).toContain("Reviews done");
2481
+ });
2482
+
2483
+ test("all leads → contains 'lead' and 'Ready for merge/cleanup'", () => {
2484
+ const sessions = [makeSession({ capability: "lead", agentName: "lead-1" })];
2485
+ const msg = buildCompletionMessage(sessions, testRunId);
2486
+ expect(msg).toContain("lead");
2487
+ expect(msg).toContain("Ready for merge/cleanup");
2488
+ });
2489
+
2490
+ test("all mergers → contains 'merger' and 'Merges done'", () => {
2491
+ const sessions = [makeSession({ capability: "merger", agentName: "merger-1" })];
2492
+ const msg = buildCompletionMessage(sessions, testRunId);
2493
+ expect(msg).toContain("merger");
2494
+ expect(msg).toContain("Merges done");
2495
+ });
2496
+
2497
+ test("mixed capabilities → contains breakdown and 'Ready for next steps'", () => {
2498
+ const sessions = [
2499
+ makeSession({ capability: "scout", agentName: "scout-1" }),
2500
+ makeSession({ capability: "builder", agentName: "builder-1" }),
2501
+ ];
2502
+ const msg = buildCompletionMessage(sessions, testRunId);
2503
+ expect(msg).toContain("(builder, scout)");
2504
+ expect(msg).toContain("Ready for next steps");
2505
+ });
2506
+
2507
+ test("message includes the run ID", () => {
2508
+ const sessions = [makeSession({ capability: "builder", agentName: "builder-1" })];
2509
+ const msg = buildCompletionMessage(sessions, testRunId);
2510
+ expect(msg).toContain(testRunId);
2511
+ });
2512
+
2513
+ test("message includes the worker count", () => {
2514
+ const sessions = [
2515
+ makeSession({ capability: "scout", agentName: "scout-1" }),
2516
+ makeSession({ capability: "scout", agentName: "scout-2" }),
2517
+ makeSession({ capability: "scout", agentName: "scout-3" }),
2518
+ ];
2519
+ const msg = buildCompletionMessage(sessions, testRunId);
2520
+ expect(msg).toContain("3");
2521
+ });
2522
+
2523
+ // overstory-e130: zombie workers must surface in the message so the coordinator
2524
+ // reads "have terminated (...)" instead of being misled into "have completed".
2525
+ test("mix of completed and zombie workers → 'have terminated' with completed/zombie qualifier", () => {
2526
+ const sessions = [
2527
+ makeSession({ capability: "builder", agentName: "builder-1", state: "completed" }),
2528
+ makeSession({ capability: "builder", agentName: "builder-2", state: "zombie" }),
2529
+ makeSession({ capability: "builder", agentName: "builder-3", state: "completed" }),
2530
+ ];
2531
+ const msg = buildCompletionMessage(sessions, testRunId);
2532
+ expect(msg).toContain("have terminated");
2533
+ expect(msg).toContain("(2 completed, 1 zombie)");
2534
+ expect(msg).not.toContain("have completed");
2535
+ // Capability-specific suffix is preserved
2536
+ expect(msg).toContain("Awaiting lead verification");
2537
+ });
2538
+
2539
+ test("all-zombie batch → '(0 completed, N zombie)' qualifier", () => {
2540
+ const sessions = [
2541
+ makeSession({ capability: "scout", agentName: "scout-1", state: "zombie" }),
2542
+ makeSession({ capability: "scout", agentName: "scout-2", state: "zombie" }),
2543
+ ];
2544
+ const msg = buildCompletionMessage(sessions, testRunId);
2545
+ expect(msg).toContain("have terminated");
2546
+ expect(msg).toContain("(0 completed, 2 zombie)");
2547
+ expect(msg).toContain("Ready for next phase");
2548
+ });
2549
+
2550
+ test("mixed-capability batch with zombies includes both qualifier and capability breakdown", () => {
2551
+ const sessions = [
2552
+ makeSession({ capability: "scout", agentName: "scout-1", state: "completed" }),
2553
+ makeSession({ capability: "builder", agentName: "builder-1", state: "zombie" }),
2554
+ ];
2555
+ const msg = buildCompletionMessage(sessions, testRunId);
2556
+ expect(msg).toContain("have terminated");
2557
+ expect(msg).toContain("(1 completed, 1 zombie)");
2558
+ expect(msg).toContain("(builder, scout)");
2559
+ expect(msg).toContain("Ready for next steps");
2560
+ });
2561
+
2562
+ test("all-completed batch keeps existing 'have completed' phrasing (no zombie qualifier)", () => {
2563
+ const sessions = [
2564
+ makeSession({ capability: "builder", agentName: "builder-1", state: "completed" }),
2565
+ makeSession({ capability: "builder", agentName: "builder-2", state: "completed" }),
2566
+ ];
2567
+ const msg = buildCompletionMessage(sessions, testRunId);
2568
+ expect(msg).toContain("have completed");
2569
+ expect(msg).not.toContain("have terminated");
2570
+ expect(msg).not.toContain("zombie");
2571
+ });
2572
+ });
2573
+
2574
+ // === Bug fix tests: headless agent kill blast radius + stale detection ===
2575
+
2576
+ describe("headless agent kill blast radius fix (Bug 1)", () => {
2577
+ /**
2578
+ * Track PID kill calls without spawning real processes.
2579
+ * Also surfaces killTree calls so tests can assert on them.
2580
+ */
2581
+ function processTracker(): {
2582
+ isAlive: (pid: number) => boolean;
2583
+ killTree: (pid: number) => Promise<void>;
2584
+ killed: number[];
2585
+ } {
2586
+ const killed: number[] = [];
2587
+ return {
2588
+ isAlive: (pid: number) => {
2589
+ try {
2590
+ process.kill(pid, 0);
2591
+ return true;
2592
+ } catch {
2593
+ return false;
2594
+ }
2595
+ },
2596
+ killTree: async (pid: number) => {
2597
+ killed.push(pid);
2598
+ },
2599
+ killed,
2600
+ };
2601
+ }
2602
+
2603
+ test("headless agent at escalation level 3 kills PID, not tmux session", async () => {
2604
+ const nudgeIntervalMs = 60_000;
2605
+ // stalledSince is 4 intervals ago — expectedLevel = floor(4) = 4, clamped to MAX (3)
2606
+ const stalledSince = new Date(Date.now() - 4 * nudgeIntervalMs).toISOString();
2607
+ const staleActivity = new Date(Date.now() - THRESHOLDS.staleThresholdMs * 2).toISOString();
2608
+
2609
+ const session = makeSession({
2610
+ agentName: "headless-stalled",
2611
+ tmuxSession: "", // headless
2612
+ pid: process.pid, // alive PID — ZFC won't trigger direct terminate
2613
+ state: "stalled",
2614
+ lastActivity: staleActivity,
2615
+ escalationLevel: 2,
2616
+ stalledSince,
2617
+ });
2618
+
2619
+ writeSessionsToStore(tempRoot, [session]);
2620
+
2621
+ const proc = processTracker();
2622
+ // tmux mock: isSessionAlive("") returns true — simulates prefix-match bug scenario
2623
+ const tmuxMock = tmuxWithLiveness({ "": true });
2624
+
2625
+ await runDaemonTick({
2626
+ root: tempRoot,
2627
+ ...THRESHOLDS,
2628
+ nudgeIntervalMs,
2629
+ tier1Enabled: false,
2630
+ _tmux: tmuxMock,
2631
+ _triage: triageAlways("extend"),
2632
+ _process: proc,
2633
+ _eventStore: null,
2634
+ _recordFailure: async () => {},
2635
+ _getConnection: () => undefined,
2636
+ _removeConnection: () => {},
2637
+ _tailerRegistry: new Map(),
2638
+ _findLatestStdoutLog: async () => null,
2639
+ });
2640
+
2641
+ // PID was killed via killTree, NOT via tmux killSession("")
2642
+ expect(proc.killed).toContain(process.pid);
2643
+ expect(tmuxMock.killed).not.toContain("");
2644
+ });
2645
+
2646
+ test("headless agent direct terminate kills PID, not tmux", async () => {
2647
+ // PID 999999 is virtually guaranteed not to exist — health check sees it as dead
2648
+ const deadPid = 999999;
2649
+ const session = makeSession({
2650
+ agentName: "headless-dead-pid",
2651
+ tmuxSession: "", // headless
2652
+ pid: deadPid,
2653
+ state: "working",
2654
+ lastActivity: new Date().toISOString(),
2655
+ });
2656
+
2657
+ writeSessionsToStore(tempRoot, [session]);
2658
+
2659
+ const proc = processTracker();
2660
+ // tmux mock: isSessionAlive("") returns true — would kill everything without the fix
2661
+ const tmuxMock = tmuxWithLiveness({ "": true });
2662
+
2663
+ await runDaemonTick({
2664
+ root: tempRoot,
2665
+ ...THRESHOLDS,
2666
+ _tmux: tmuxMock,
2667
+ _triage: triageAlways("extend"),
2668
+ _process: proc,
2669
+ _eventStore: null,
2670
+ _recordFailure: async () => {},
2671
+ _getConnection: () => undefined,
2672
+ _removeConnection: () => {},
2673
+ _tailerRegistry: new Map(),
2674
+ _findLatestStdoutLog: async () => null,
2675
+ });
2676
+
2677
+ // Should have attempted PID kill, NOT tmux killSession("")
2678
+ expect(proc.killed).toContain(deadPid);
2679
+ expect(tmuxMock.killed).not.toContain("");
2680
+ });
2681
+
2682
+ test("triage terminate on headless agent kills PID, not tmux", async () => {
2683
+ const nudgeIntervalMs = 60_000;
2684
+ // stalledSince is 2.5 intervals ago — expectedLevel = floor(2.5) = 2 → triage fires
2685
+ const stalledSince = new Date(Date.now() - 2.5 * nudgeIntervalMs).toISOString();
2686
+ const staleActivity = new Date(Date.now() - THRESHOLDS.staleThresholdMs * 2).toISOString();
2687
+
2688
+ const session = makeSession({
2689
+ agentName: "headless-triage-terminate",
2690
+ tmuxSession: "", // headless
2691
+ pid: process.pid, // alive
2692
+ state: "stalled",
2693
+ lastActivity: staleActivity,
2694
+ escalationLevel: 1,
2695
+ stalledSince,
2696
+ });
2697
+
2698
+ writeSessionsToStore(tempRoot, [session]);
2699
+
2700
+ const proc = processTracker();
2701
+ const tmuxMock = tmuxWithLiveness({ "": true });
2702
+
2703
+ await runDaemonTick({
2704
+ root: tempRoot,
2705
+ ...THRESHOLDS,
2706
+ nudgeIntervalMs,
2707
+ tier1Enabled: true,
2708
+ _tmux: tmuxMock,
2709
+ _triage: triageAlways("terminate"), // AI triage says terminate
2710
+ _nudge: nudgeTracker().nudge,
2711
+ _process: proc,
2712
+ _eventStore: null,
2713
+ _recordFailure: async () => {},
2714
+ _getConnection: () => undefined,
2715
+ _removeConnection: () => {},
2716
+ _tailerRegistry: new Map(),
2717
+ _findLatestStdoutLog: async () => null,
2718
+ });
2719
+
2720
+ // Should have killed the PID, not the tmux session
2721
+ expect(proc.killed).toContain(process.pid);
2722
+ expect(tmuxMock.killed).not.toContain("");
2723
+ });
2724
+ });
2725
+
2726
+ describe("headless agent stale detection via events.db (Bug 2)", () => {
2727
+ test("headless agent with recent events in events.db is not flagged stale", async () => {
2728
+ const staleActivity = new Date(Date.now() - THRESHOLDS.staleThresholdMs * 2).toISOString();
2729
+
2730
+ const session = makeSession({
2731
+ agentName: "headless-active",
2732
+ tmuxSession: "", // headless
2733
+ pid: process.pid, // alive
2734
+ state: "working",
2735
+ lastActivity: staleActivity, // stale — would trigger escalate without event fallback
2736
+ });
2737
+
2738
+ writeSessionsToStore(tempRoot, [session]);
2739
+
2740
+ const eventsDbPath = join(tempRoot, ".overstory", "events.db");
2741
+ const eventStore = createEventStore(eventsDbPath);
2742
+
2743
+ try {
2744
+ // Insert a recent event for this agent (within the stale threshold window)
2745
+ eventStore.insert({
2746
+ runId: null,
2747
+ agentName: "headless-active",
2748
+ sessionId: null,
2749
+ eventType: "tool_end",
2750
+ toolName: "Read",
2751
+ toolArgs: null,
2752
+ toolDurationMs: 100,
2753
+ level: "info",
2754
+ data: null,
2755
+ });
2756
+
2757
+ const checks: HealthCheck[] = [];
2758
+
2759
+ await runDaemonTick({
2760
+ root: tempRoot,
2761
+ ...THRESHOLDS,
2762
+ onHealthCheck: (c) => checks.push(c),
2763
+ _tmux: tmuxAllAlive(),
2764
+ _triage: triageAlways("extend"),
2765
+ _process: { isAlive: () => true, killTree: async () => {} },
2766
+ _eventStore: eventStore,
2767
+ _recordFailure: async () => {},
2768
+ _getConnection: () => undefined,
2769
+ _removeConnection: () => {},
2770
+ _tailerRegistry: new Map(),
2771
+ _findLatestStdoutLog: async () => null,
2772
+ });
2773
+
2774
+ // Recent events found — lastActivity was refreshed, agent is NOT stalled
2775
+ expect(checks).toHaveLength(1);
2776
+ expect(checks[0]?.action).toBe("none");
2777
+ expect(checks[0]?.state).toBe("working");
2778
+
2779
+ const reloaded = readSessionsFromStore(tempRoot);
2780
+ expect(reloaded[0]?.state).toBe("working");
2781
+ } finally {
2782
+ eventStore.close();
2783
+ }
2784
+ });
2785
+
2786
+ test("headless agent with no recent events IS flagged stale", async () => {
2787
+ const staleActivity = new Date(Date.now() - THRESHOLDS.staleThresholdMs * 2).toISOString();
2788
+
2789
+ const session = makeSession({
2790
+ agentName: "headless-silent",
2791
+ tmuxSession: "", // headless
2792
+ pid: process.pid, // alive
2793
+ state: "working",
2794
+ lastActivity: staleActivity, // stale
2795
+ });
2796
+
2797
+ writeSessionsToStore(tempRoot, [session]);
2798
+
2799
+ const eventsDbPath = join(tempRoot, ".overstory", "events.db");
2800
+ const eventStore = createEventStore(eventsDbPath);
2801
+
2802
+ try {
2803
+ // No events inserted for this agent — event fallback finds nothing
2804
+
2805
+ const checks: HealthCheck[] = [];
2806
+
2807
+ await runDaemonTick({
2808
+ root: tempRoot,
2809
+ ...THRESHOLDS,
2810
+ onHealthCheck: (c) => checks.push(c),
2811
+ _tmux: tmuxAllAlive(),
2812
+ _triage: triageAlways("extend"),
2813
+ _process: { isAlive: () => true, killTree: async () => {} },
2814
+ _eventStore: eventStore,
2815
+ _recordFailure: async () => {},
2816
+ _getConnection: () => undefined,
2817
+ _removeConnection: () => {},
2818
+ _tailerRegistry: new Map(),
2819
+ _findLatestStdoutLog: async () => null,
2820
+ });
2821
+
2822
+ // No recent events — lastActivity stays stale, agent IS flagged stalled
2823
+ expect(checks).toHaveLength(1);
2824
+ expect(checks[0]?.action).toBe("escalate");
2825
+ } finally {
2826
+ eventStore.close();
2827
+ }
2828
+ });
2829
+
2830
+ test("spawn-per-turn worker (pid=null) is NOT flagged zombie when actively emitting events (overstory-7a34)", async () => {
2831
+ // Repro: ov sling --capability lead → freshly slung headless lead has
2832
+ // tmuxSession='' AND pid=null (no persistent process between turns).
2833
+ // Previously the daemon's event-based liveness fallback was gated by
2834
+ // `pid !== null`, so spawn-per-turn workers' lastActivity was never
2835
+ // refreshed from events.db and they would flip to stalled / zombie
2836
+ // despite ov feed showing live tool activity.
2837
+ const staleActivity = new Date(Date.now() - THRESHOLDS.staleThresholdMs * 2).toISOString();
2838
+
2839
+ const session = makeSession({
2840
+ agentName: "spawn-per-turn-lead",
2841
+ capability: "lead",
2842
+ tmuxSession: "", // headless
2843
+ pid: null, // spawn-per-turn: no persistent process between turns
2844
+ state: "working",
2845
+ lastActivity: staleActivity, // stale — would flip without event fallback
2846
+ });
2847
+
2848
+ writeSessionsToStore(tempRoot, [session]);
2849
+
2850
+ const eventsDbPath = join(tempRoot, ".overstory", "events.db");
2851
+ const eventStore = createEventStore(eventsDbPath);
2852
+
2853
+ try {
2854
+ // Insert a recent tool event for this agent (matches ov feed activity)
2855
+ eventStore.insert({
2856
+ runId: null,
2857
+ agentName: "spawn-per-turn-lead",
2858
+ sessionId: null,
2859
+ eventType: "tool_end",
2860
+ toolName: "Edit",
2861
+ toolArgs: null,
2862
+ toolDurationMs: 50,
2863
+ level: "info",
2864
+ data: null,
2865
+ });
2866
+
2867
+ const checks: HealthCheck[] = [];
2868
+
2869
+ await runDaemonTick({
2870
+ root: tempRoot,
2871
+ ...THRESHOLDS,
2872
+ onHealthCheck: (c) => checks.push(c),
2873
+ _tmux: tmuxAllAlive(),
2874
+ _triage: triageAlways("extend"),
2875
+ _process: { isAlive: () => true, killTree: async () => {} },
2876
+ _eventStore: eventStore,
2877
+ _recordFailure: async () => {},
2878
+ _getConnection: () => undefined,
2879
+ _removeConnection: () => {},
2880
+ _tailerRegistry: new Map(),
2881
+ _findLatestStdoutLog: async () => null,
2882
+ });
2883
+
2884
+ // lastActivity refreshed from events.db → spawn-per-turn evaluation
2885
+ // path keeps the agent active (action=none), NOT zombie. The
2886
+ // healthy classification reports `between_turns` (overstory-3087)
2887
+ // for spawn-per-turn workers; the legacy `working` row stays at
2888
+ // `working` on disk because the matrix does not list `working` as
2889
+ // a predecessor of `between_turns` and the CAS rejects the write
2890
+ // (the substate cycle is reserved for the turn-runner).
2891
+ expect(checks).toHaveLength(1);
2892
+ expect(checks[0]?.action).toBe("none");
2893
+ expect(checks[0]?.state).toBe("between_turns");
2894
+
2895
+ const reloaded = readSessionsFromStore(tempRoot);
2896
+ expect(reloaded[0]?.state).toBe("working");
2897
+ } finally {
2898
+ eventStore.close();
2899
+ }
2900
+ });
2901
+ });
2902
+
2903
+ // ============================================================
2904
+ // startDaemon() shutdown cleanup
2905
+ // ============================================================
2906
+
2907
+ describe("startDaemon() stop() cleans up tailer registry", () => {
2908
+ let tempRoot: string;
2909
+
2910
+ beforeEach(async () => {
2911
+ tempRoot = await createTempRoot();
2912
+ });
2913
+
2914
+ afterEach(async () => {
2915
+ await cleanupTempDir(tempRoot);
2916
+ });
2917
+
2918
+ test("stop() calls handle.stop() on all registry entries and empties the map", async () => {
2919
+ // Build a fake tailer registry with two entries.
2920
+ const stopped: Record<string, boolean> = { tailer1: false, tailer2: false };
2921
+
2922
+ const registry = new Map<string, { agentName: string; logPath: string; stop(): void }>([
2923
+ [
2924
+ "agent-one",
2925
+ {
2926
+ agentName: "agent-one",
2927
+ logPath: "/fake/one/stdout.log",
2928
+ stop: () => {
2929
+ stopped.tailer1 = true;
2930
+ },
2931
+ },
2932
+ ],
2933
+ [
2934
+ "agent-two",
2935
+ {
2936
+ agentName: "agent-two",
2937
+ logPath: "/fake/two/stdout.log",
2938
+ stop: () => {
2939
+ stopped.tailer2 = true;
2940
+ },
2941
+ },
2942
+ ],
2943
+ ]);
2944
+
2945
+ // Use a long interval so the periodic tick never fires during this test.
2946
+ const daemon = startDaemon({
2947
+ root: tempRoot,
2948
+ intervalMs: 60_000,
2949
+ ...THRESHOLDS,
2950
+ _tmux: { isSessionAlive: async () => false, killSession: async () => {} },
2951
+ _nudge: async () => ({ delivered: false }),
2952
+ _process: { isAlive: () => false, killTree: async () => {} },
2953
+ _triage: async () => "extend",
2954
+ _recordFailure: async () => {},
2955
+ _getConnection: () => undefined,
2956
+ _removeConnection: () => {},
2957
+ _eventStore: null,
2958
+ _mailStore: null,
2959
+ _tailerRegistry: registry,
2960
+ _tailerFactory: () => ({ agentName: "", logPath: "", stop: () => {} }),
2961
+ _findLatestStdoutLog: async () => null,
2962
+ });
2963
+
2964
+ // Allow the first (immediate) tick to settle.
2965
+ await new Promise<void>((resolve) => setTimeout(resolve, 20));
2966
+
2967
+ daemon.stop();
2968
+
2969
+ expect(stopped.tailer1).toBe(true);
2970
+ expect(stopped.tailer2).toBe(true);
2971
+ expect(registry.size).toBe(0);
2972
+ });
2973
+ });
2974
+
2975
+ // ============================================================
2976
+ // RPC getState() timeout removes stale connection
2977
+ // ============================================================
2978
+
2979
+ describe("RPC getState() timeout removes stale connection", () => {
2980
+ test("_removeConnection is called when getState() rejects", async () => {
2981
+ const session = makeSession({
2982
+ agentName: "rpc-agent",
2983
+ tmuxSession: "", // headless
2984
+ pid: process.pid, // alive
2985
+ state: "working",
2986
+ lastActivity: new Date().toISOString(),
2987
+ });
2988
+
2989
+ writeSessionsToStore(tempRoot, [session]);
2990
+
2991
+ const removedNames: string[] = [];
2992
+
2993
+ await runDaemonTick({
2994
+ root: tempRoot,
2995
+ ...THRESHOLDS,
2996
+ _tmux: { isSessionAlive: async () => false, killSession: async () => {} },
2997
+ _triage: triageAlways("extend"),
2998
+ _process: { isAlive: () => true, killTree: async () => {} },
2999
+ _eventStore: null,
3000
+ _recordFailure: async () => {},
3001
+ _getConnection: (name: string) => {
3002
+ if (name !== "rpc-agent") return undefined;
3003
+ return {
3004
+ getState: () => Promise.reject(new Error("connection error")),
3005
+ sendPrompt: async () => {},
3006
+ followUp: async () => {},
3007
+ abort: async () => {},
3008
+ close: () => {},
3009
+ };
3010
+ },
3011
+ _removeConnection: (name: string) => {
3012
+ removedNames.push(name);
3013
+ },
3014
+ _tailerRegistry: new Map(),
3015
+ _findLatestStdoutLog: async () => null,
3016
+ _mailStore: null,
3017
+ });
3018
+
3019
+ expect(removedNames).toContain("rpc-agent");
3020
+ });
3021
+ });
3022
+
3023
+ // ============================================================
3024
+ // Triage concurrency limit (_maxTriagePerTick)
3025
+ // ============================================================
3026
+
3027
+ describe("triage concurrency limit (_maxTriagePerTick)", () => {
3028
+ test("only _maxTriagePerTick triage calls happen when multiple sessions need level-2 escalation", async () => {
3029
+ const staleActivity = new Date(Date.now() - 60_000).toISOString();
3030
+ const stalledSince = new Date(Date.now() - 130_000).toISOString();
3031
+
3032
+ // 4 sessions all at escalation level 2
3033
+ const sessions: AgentSession[] = [
3034
+ makeSession({
3035
+ id: "s-1",
3036
+ agentName: "agent-1",
3037
+ tmuxSession: "ov-agent-1",
3038
+ state: "stalled",
3039
+ lastActivity: staleActivity,
3040
+ escalationLevel: 2,
3041
+ stalledSince,
3042
+ }),
3043
+ makeSession({
3044
+ id: "s-2",
3045
+ agentName: "agent-2",
3046
+ tmuxSession: "ov-agent-2",
3047
+ state: "stalled",
3048
+ lastActivity: staleActivity,
3049
+ escalationLevel: 2,
3050
+ stalledSince,
3051
+ }),
3052
+ makeSession({
3053
+ id: "s-3",
3054
+ agentName: "agent-3",
3055
+ tmuxSession: "ov-agent-3",
3056
+ state: "stalled",
3057
+ lastActivity: staleActivity,
3058
+ escalationLevel: 2,
3059
+ stalledSince,
3060
+ }),
3061
+ makeSession({
3062
+ id: "s-4",
3063
+ agentName: "agent-4",
3064
+ tmuxSession: "ov-agent-4",
3065
+ state: "stalled",
3066
+ lastActivity: staleActivity,
3067
+ escalationLevel: 2,
3068
+ stalledSince,
3069
+ }),
3070
+ ];
3071
+
3072
+ writeSessionsToStore(tempRoot, sessions);
3073
+
3074
+ let triageCallCount = 0;
3075
+ const triageMock = async (_opts: { agentName: string; root: string; lastActivity: string }) => {
3076
+ triageCallCount++;
3077
+ return "extend" as const;
3078
+ };
3079
+
3080
+ await runDaemonTick({
3081
+ root: tempRoot,
3082
+ ...THRESHOLDS,
3083
+ nudgeIntervalMs: 60_000,
3084
+ tier1Enabled: true,
3085
+ _maxTriagePerTick: 2,
3086
+ _tmux: tmuxWithLiveness({
3087
+ "ov-agent-1": true,
3088
+ "ov-agent-2": true,
3089
+ "ov-agent-3": true,
3090
+ "ov-agent-4": true,
3091
+ }),
3092
+ _triage: triageMock,
3093
+ _nudge: nudgeTracker().nudge,
3094
+ _eventStore: null,
3095
+ _recordFailure: async () => {},
3096
+ _getConnection: () => undefined,
3097
+ _removeConnection: () => {},
3098
+ _tailerRegistry: new Map(),
3099
+ _findLatestStdoutLog: async () => null,
3100
+ _mailStore: null,
3101
+ });
3102
+
3103
+ // Only 2 of the 4 sessions should have triggered triage
3104
+ expect(triageCallCount).toBe(2);
3105
+ });
3106
+ });
3107
+
3108
+ // ============================================================
3109
+ // RuntimeConnection-aware kill and liveness (overstory-32cd)
3110
+ // ============================================================
3111
+
3112
+ describe("killAgent uses RuntimeConnection.abort() when available", () => {
3113
+ const deadPid = 999999;
3114
+
3115
+ function connProcessTracker(): {
3116
+ isAlive: (pid: number) => boolean;
3117
+ killTree: (pid: number) => Promise<void>;
3118
+ killed: number[];
3119
+ } {
3120
+ const killed: number[] = [];
3121
+ return {
3122
+ isAlive: (pid: number) => {
3123
+ try {
3124
+ process.kill(pid, 0);
3125
+ return true;
3126
+ } catch {
3127
+ return false;
3128
+ }
2051
3129
  },
2052
3130
  killTree: async (pid: number) => {
2053
3131
  killed.push(pid);
@@ -2056,55 +3134,64 @@ describe("headless agent kill blast radius fix (Bug 1)", () => {
2056
3134
  };
2057
3135
  }
2058
3136
 
2059
- test("headless agent at escalation level 3 kills PID, not tmux session", async () => {
2060
- const nudgeIntervalMs = 60_000;
2061
- // stalledSince is 4 intervals ago — expectedLevel = floor(4) = 4, clamped to MAX (3)
2062
- const stalledSince = new Date(Date.now() - 4 * nudgeIntervalMs).toISOString();
2063
- const staleActivity = new Date(Date.now() - THRESHOLDS.staleThresholdMs * 2).toISOString();
2064
-
3137
+ // Test A: killAgent uses connection.abort() when a connection is registered
3138
+ test("Test A: abort() called for ZFC-terminated headless agent with registered connection", async () => {
2065
3139
  const session = makeSession({
2066
- agentName: "headless-stalled",
3140
+ agentName: "headless-conn-agent",
2067
3141
  tmuxSession: "", // headless
2068
- pid: process.pid, // alive PID ZFC won't trigger direct terminate
2069
- state: "stalled",
2070
- lastActivity: staleActivity,
2071
- escalationLevel: 2,
2072
- stalledSince,
3142
+ pid: deadPid, // dead PID ZFC fires (pidAlive=false)
3143
+ state: "working",
3144
+ lastActivity: new Date().toISOString(),
2073
3145
  });
2074
3146
 
2075
3147
  writeSessionsToStore(tempRoot, [session]);
2076
3148
 
2077
- const proc = processTracker();
2078
- // tmux mock: isSessionAlive("") returns true — simulates prefix-match bug scenario
3149
+ let abortCount = 0;
3150
+ const removedNames: string[] = [];
3151
+ const proc = connProcessTracker();
2079
3152
  const tmuxMock = tmuxWithLiveness({ "": true });
2080
3153
 
2081
3154
  await runDaemonTick({
2082
3155
  root: tempRoot,
2083
3156
  ...THRESHOLDS,
2084
- nudgeIntervalMs,
2085
- tier1Enabled: false,
2086
3157
  _tmux: tmuxMock,
2087
3158
  _triage: triageAlways("extend"),
2088
3159
  _process: proc,
2089
3160
  _eventStore: null,
2090
3161
  _recordFailure: async () => {},
2091
- _getConnection: () => undefined,
2092
- _removeConnection: () => {},
3162
+ _getConnection: (name: string) => {
3163
+ if (name !== "headless-conn-agent") return undefined;
3164
+ return {
3165
+ getState: async () => ({ status: "working" as const }),
3166
+ sendPrompt: async () => {},
3167
+ followUp: async () => {},
3168
+ abort: async () => {
3169
+ abortCount++;
3170
+ },
3171
+ close: () => {},
3172
+ };
3173
+ },
3174
+ _removeConnection: (name: string) => {
3175
+ removedNames.push(name);
3176
+ },
2093
3177
  _tailerRegistry: new Map(),
2094
3178
  _findLatestStdoutLog: async () => null,
3179
+ _mailStore: null,
2095
3180
  });
2096
3181
 
2097
- // PID was killed via killTree, NOT via tmux killSession("")
2098
- expect(proc.killed).toContain(process.pid);
2099
- expect(tmuxMock.killed).not.toContain("");
3182
+ // abort() called exactly once
3183
+ expect(abortCount).toBe(1);
3184
+ // killTree NOT called (abort succeeded)
3185
+ expect(proc.killed).toHaveLength(0);
3186
+ // removeConnection called for the agent
3187
+ expect(removedNames).toContain("headless-conn-agent");
2100
3188
  });
2101
3189
 
2102
- test("headless agent direct terminate kills PID, not tmux", async () => {
2103
- // PID 999999 is virtually guaranteed not to exist health check sees it as dead
2104
- const deadPid = 999999;
3190
+ // Test B: killAgent falls back to killTree when conn.abort() throws
3191
+ test("Test B: killTree called as fallback when abort() throws", async () => {
2105
3192
  const session = makeSession({
2106
- agentName: "headless-dead-pid",
2107
- tmuxSession: "", // headless
3193
+ agentName: "headless-abort-fail",
3194
+ tmuxSession: "",
2108
3195
  pid: deadPid,
2109
3196
  state: "working",
2110
3197
  lastActivity: new Date().toISOString(),
@@ -2112,8 +3199,9 @@ describe("headless agent kill blast radius fix (Bug 1)", () => {
2112
3199
 
2113
3200
  writeSessionsToStore(tempRoot, [session]);
2114
3201
 
2115
- const proc = processTracker();
2116
- // tmux mock: isSessionAlive("") returns true — would kill everything without the fix
3202
+ let abortCalled = false;
3203
+ const removedNames: string[] = [];
3204
+ const proc = connProcessTracker();
2117
3205
  const tmuxMock = tmuxWithLiveness({ "": true });
2118
3206
 
2119
3207
  await runDaemonTick({
@@ -2124,27 +3212,47 @@ describe("headless agent kill blast radius fix (Bug 1)", () => {
2124
3212
  _process: proc,
2125
3213
  _eventStore: null,
2126
3214
  _recordFailure: async () => {},
2127
- _getConnection: () => undefined,
2128
- _removeConnection: () => {},
3215
+ _getConnection: (name: string) => {
3216
+ if (name !== "headless-abort-fail") return undefined;
3217
+ return {
3218
+ getState: async () => ({ status: "working" as const }),
3219
+ sendPrompt: async () => {},
3220
+ followUp: async () => {},
3221
+ abort: async () => {
3222
+ abortCalled = true;
3223
+ throw new Error("process already dead");
3224
+ },
3225
+ close: () => {},
3226
+ };
3227
+ },
3228
+ _removeConnection: (name: string) => {
3229
+ removedNames.push(name);
3230
+ },
2129
3231
  _tailerRegistry: new Map(),
2130
3232
  _findLatestStdoutLog: async () => null,
3233
+ _mailStore: null,
2131
3234
  });
2132
3235
 
2133
- // Should have attempted PID kill, NOT tmux killSession("")
3236
+ // abort() was attempted
3237
+ expect(abortCalled).toBe(true);
3238
+ // killTree called as defense-in-depth fallback
2134
3239
  expect(proc.killed).toContain(deadPid);
2135
- expect(tmuxMock.killed).not.toContain("");
3240
+ // removeConnection still called (before fallback)
3241
+ expect(removedNames).toContain("headless-abort-fail");
2136
3242
  });
2137
3243
 
2138
- test("triage terminate on headless agent kills PID, not tmux", async () => {
3244
+ // Test C: killAgent uses conn.abort() for triage-terminate path (level 2 → terminate)
3245
+ test("Test C: abort() called in triage-terminate path (level 2 → terminate verdict)", async () => {
2139
3246
  const nudgeIntervalMs = 60_000;
2140
- // stalledSince is 2.5 intervals ago expectedLevel = floor(2.5) = 2 → triage fires
3247
+ // stalledSince 2.5 intervals ago expectedLevel = floor(2.5) = 2 → triage fires
2141
3248
  const stalledSince = new Date(Date.now() - 2.5 * nudgeIntervalMs).toISOString();
3249
+ // staleActivity: 2x staleThreshold (60s) — stale but not zombie, so escalate fires
2142
3250
  const staleActivity = new Date(Date.now() - THRESHOLDS.staleThresholdMs * 2).toISOString();
2143
3251
 
2144
3252
  const session = makeSession({
2145
- agentName: "headless-triage-terminate",
2146
- tmuxSession: "", // headless
2147
- pid: process.pid, // alive
3253
+ agentName: "headless-triage-conn",
3254
+ tmuxSession: "",
3255
+ pid: process.pid, // alive — ZFC won't fire; escalation path triggers triage
2148
3256
  state: "stalled",
2149
3257
  lastActivity: staleActivity,
2150
3258
  escalationLevel: 1,
@@ -2153,7 +3261,9 @@ describe("headless agent kill blast radius fix (Bug 1)", () => {
2153
3261
 
2154
3262
  writeSessionsToStore(tempRoot, [session]);
2155
3263
 
2156
- const proc = processTracker();
3264
+ let abortCount = 0;
3265
+ const removedNames: string[] = [];
3266
+ const proc = connProcessTracker();
2157
3267
  const tmuxMock = tmuxWithLiveness({ "": true });
2158
3268
 
2159
3269
  await runDaemonTick({
@@ -2162,228 +3272,178 @@ describe("headless agent kill blast radius fix (Bug 1)", () => {
2162
3272
  nudgeIntervalMs,
2163
3273
  tier1Enabled: true,
2164
3274
  _tmux: tmuxMock,
2165
- _triage: triageAlways("terminate"), // AI triage says terminate
3275
+ _triage: triageAlways("terminate"),
2166
3276
  _nudge: nudgeTracker().nudge,
2167
3277
  _process: proc,
2168
3278
  _eventStore: null,
2169
3279
  _recordFailure: async () => {},
2170
- _getConnection: () => undefined,
2171
- _removeConnection: () => {},
3280
+ // getState returns "error" so lastActivity is NOT refreshed — stale condition preserved
3281
+ _getConnection: (name: string) => {
3282
+ if (name !== "headless-triage-conn") return undefined;
3283
+ return {
3284
+ getState: async () => ({ status: "error" as const }),
3285
+ sendPrompt: async () => {},
3286
+ followUp: async () => {},
3287
+ abort: async () => {
3288
+ abortCount++;
3289
+ },
3290
+ close: () => {},
3291
+ };
3292
+ },
3293
+ _removeConnection: (name: string) => {
3294
+ removedNames.push(name);
3295
+ },
2172
3296
  _tailerRegistry: new Map(),
2173
3297
  _findLatestStdoutLog: async () => null,
3298
+ _mailStore: null,
2174
3299
  });
2175
3300
 
2176
- // Should have killed the PID, not the tmux session
2177
- expect(proc.killed).toContain(process.pid);
2178
- expect(tmuxMock.killed).not.toContain("");
3301
+ // abort() called via triage-terminate killAgent path
3302
+ expect(abortCount).toBe(1);
3303
+ // killTree NOT called (abort succeeded)
3304
+ expect(proc.killed).toHaveLength(0);
3305
+ // tmux killSession NOT called (headless path only)
3306
+ expect(tmuxMock.killed).toHaveLength(0);
2179
3307
  });
2180
- });
2181
-
2182
- describe("headless agent stale detection via events.db (Bug 2)", () => {
2183
- test("headless agent with recent events in events.db is not flagged stale", async () => {
2184
- const staleActivity = new Date(Date.now() - THRESHOLDS.staleThresholdMs * 2).toISOString();
2185
3308
 
3309
+ // Test D: integration — watchdog terminates a hung headless agent without touching tmux
3310
+ test("Test D: conn.abort() called, tmux.killSession and killTree NEVER called, state → zombie", async () => {
2186
3311
  const session = makeSession({
2187
- agentName: "headless-active",
2188
- tmuxSession: "", // headless
2189
- pid: process.pid, // alive
3312
+ agentName: "headless-zombie-conn",
3313
+ tmuxSession: "",
3314
+ pid: deadPid, // dead PID → ZFC fires
2190
3315
  state: "working",
2191
- lastActivity: staleActivity, // stale would trigger escalate without event fallback
3316
+ lastActivity: new Date(Date.now() - THRESHOLDS.zombieThresholdMs * 2).toISOString(),
2192
3317
  });
2193
3318
 
2194
3319
  writeSessionsToStore(tempRoot, [session]);
2195
3320
 
2196
- const eventsDbPath = join(tempRoot, ".overstory", "events.db");
2197
- const eventStore = createEventStore(eventsDbPath);
2198
-
2199
- try {
2200
- // Insert a recent event for this agent (within the stale threshold window)
2201
- eventStore.insert({
2202
- runId: null,
2203
- agentName: "headless-active",
2204
- sessionId: null,
2205
- eventType: "tool_end",
2206
- toolName: "Read",
2207
- toolArgs: null,
2208
- toolDurationMs: 100,
2209
- level: "info",
2210
- data: null,
2211
- });
2212
-
2213
- const checks: HealthCheck[] = [];
2214
-
2215
- await runDaemonTick({
2216
- root: tempRoot,
2217
- ...THRESHOLDS,
2218
- onHealthCheck: (c) => checks.push(c),
2219
- _tmux: tmuxAllAlive(),
2220
- _triage: triageAlways("extend"),
2221
- _process: { isAlive: () => true, killTree: async () => {} },
2222
- _eventStore: eventStore,
2223
- _recordFailure: async () => {},
2224
- _getConnection: () => undefined,
2225
- _removeConnection: () => {},
2226
- _tailerRegistry: new Map(),
2227
- _findLatestStdoutLog: async () => null,
2228
- });
3321
+ let abortCount = 0;
3322
+ const proc = connProcessTracker();
3323
+ const tmuxMock = tmuxWithLiveness({ "": true });
2229
3324
 
2230
- // Recent events found — lastActivity was refreshed, agent is NOT stalled
2231
- expect(checks).toHaveLength(1);
2232
- expect(checks[0]?.action).toBe("none");
2233
- expect(checks[0]?.state).toBe("working");
3325
+ await runDaemonTick({
3326
+ root: tempRoot,
3327
+ ...THRESHOLDS,
3328
+ _tmux: tmuxMock,
3329
+ _triage: triageAlways("extend"),
3330
+ _process: proc,
3331
+ _eventStore: null,
3332
+ _recordFailure: async () => {},
3333
+ _getConnection: (name: string) => {
3334
+ if (name !== "headless-zombie-conn") return undefined;
3335
+ return {
3336
+ getState: async () => ({ status: "working" as const }),
3337
+ sendPrompt: async () => {},
3338
+ followUp: async () => {},
3339
+ abort: async () => {
3340
+ abortCount++;
3341
+ },
3342
+ close: () => {},
3343
+ };
3344
+ },
3345
+ _removeConnection: () => {},
3346
+ _tailerRegistry: new Map(),
3347
+ _findLatestStdoutLog: async () => null,
3348
+ _mailStore: null,
3349
+ });
2234
3350
 
2235
- const reloaded = readSessionsFromStore(tempRoot);
2236
- expect(reloaded[0]?.state).toBe("working");
2237
- } finally {
2238
- eventStore.close();
2239
- }
3351
+ // abort() called
3352
+ expect(abortCount).toBe(1);
3353
+ // tmux.killSession NEVER called
3354
+ expect(tmuxMock.killed).toHaveLength(0);
3355
+ // killTree NEVER called (abort succeeded)
3356
+ expect(proc.killed).toHaveLength(0);
3357
+ // Agent state transitioned to zombie
3358
+ const reloaded = readSessionsFromStore(tempRoot);
3359
+ expect(reloaded[0]?.state).toBe("zombie");
2240
3360
  });
2241
3361
 
2242
- test("headless agent with no recent events IS flagged stale", async () => {
2243
- const staleActivity = new Date(Date.now() - THRESHOLDS.staleThresholdMs * 2).toISOString();
2244
-
3362
+ // Test E: liveness getState() returning error status drives the agent toward zombie
3363
+ test("Test E: getState()=error + dead PID tmuxAlive=false, state=zombie, terminate, abort called", async () => {
2245
3364
  const session = makeSession({
2246
- agentName: "headless-silent",
2247
- tmuxSession: "", // headless
2248
- pid: process.pid, // alive
3365
+ agentName: "headless-error-conn",
3366
+ tmuxSession: "",
3367
+ pid: deadPid, // dead → ZFC fires: pidAlive=false
2249
3368
  state: "working",
2250
- lastActivity: staleActivity, // stale
3369
+ lastActivity: new Date().toISOString(), // fresh — time-based won't fire; ZFC does
2251
3370
  });
2252
3371
 
2253
3372
  writeSessionsToStore(tempRoot, [session]);
2254
3373
 
2255
- const eventsDbPath = join(tempRoot, ".overstory", "events.db");
2256
- const eventStore = createEventStore(eventsDbPath);
2257
-
2258
- try {
2259
- // No events inserted for this agent — event fallback finds nothing
2260
-
2261
- const checks: HealthCheck[] = [];
2262
-
2263
- await runDaemonTick({
2264
- root: tempRoot,
2265
- ...THRESHOLDS,
2266
- onHealthCheck: (c) => checks.push(c),
2267
- _tmux: tmuxAllAlive(),
2268
- _triage: triageAlways("extend"),
2269
- _process: { isAlive: () => true, killTree: async () => {} },
2270
- _eventStore: eventStore,
2271
- _recordFailure: async () => {},
2272
- _getConnection: () => undefined,
2273
- _removeConnection: () => {},
2274
- _tailerRegistry: new Map(),
2275
- _findLatestStdoutLog: async () => null,
2276
- });
2277
-
2278
- // No recent events — lastActivity stays stale, agent IS flagged stalled
2279
- expect(checks).toHaveLength(1);
2280
- expect(checks[0]?.action).toBe("escalate");
2281
- } finally {
2282
- eventStore.close();
2283
- }
2284
- });
2285
- });
2286
-
2287
- // ============================================================
2288
- // startDaemon() shutdown cleanup
2289
- // ============================================================
2290
-
2291
- describe("startDaemon() stop() cleans up tailer registry", () => {
2292
- let tempRoot: string;
2293
-
2294
- beforeEach(async () => {
2295
- tempRoot = await createTempRoot();
2296
- });
2297
-
2298
- afterEach(async () => {
2299
- await cleanupTempDir(tempRoot);
2300
- });
2301
-
2302
- test("stop() calls handle.stop() on all registry entries and empties the map", async () => {
2303
- // Build a fake tailer registry with two entries.
2304
- const stopped: Record<string, boolean> = { tailer1: false, tailer2: false };
2305
-
2306
- const registry = new Map<string, { agentName: string; logPath: string; stop(): void }>([
2307
- [
2308
- "agent-one",
2309
- {
2310
- agentName: "agent-one",
2311
- logPath: "/fake/one/stdout.log",
2312
- stop: () => {
2313
- stopped["tailer1"] = true;
2314
- },
2315
- },
2316
- ],
2317
- [
2318
- "agent-two",
2319
- {
2320
- agentName: "agent-two",
2321
- logPath: "/fake/two/stdout.log",
2322
- stop: () => {
2323
- stopped["tailer2"] = true;
2324
- },
2325
- },
2326
- ],
2327
- ]);
3374
+ let abortCount = 0;
3375
+ const proc = connProcessTracker();
3376
+ const checks: HealthCheck[] = [];
3377
+ const tmuxMock = tmuxWithLiveness({ "": true });
2328
3378
 
2329
- // Use a long interval so the periodic tick never fires during this test.
2330
- const daemon = startDaemon({
3379
+ await runDaemonTick({
2331
3380
  root: tempRoot,
2332
- intervalMs: 60_000,
2333
3381
  ...THRESHOLDS,
2334
- _tmux: { isSessionAlive: async () => false, killSession: async () => {} },
2335
- _nudge: async () => ({ delivered: false }),
2336
- _process: { isAlive: () => false, killTree: async () => {} },
2337
- _triage: async () => "extend",
3382
+ onHealthCheck: (c) => checks.push(c),
3383
+ _tmux: tmuxMock,
3384
+ _triage: triageAlways("extend"),
3385
+ _process: proc,
3386
+ _eventStore: null,
2338
3387
  _recordFailure: async () => {},
2339
- _getConnection: () => undefined,
3388
+ _getConnection: (name: string) => {
3389
+ if (name !== "headless-error-conn") return undefined;
3390
+ return {
3391
+ getState: async () => ({ status: "error" as const }),
3392
+ sendPrompt: async () => {},
3393
+ followUp: async () => {},
3394
+ abort: async () => {
3395
+ abortCount++;
3396
+ },
3397
+ close: () => {},
3398
+ };
3399
+ },
2340
3400
  _removeConnection: () => {},
2341
- _eventStore: null,
2342
- _mailStore: null,
2343
- _tailerRegistry: registry,
2344
- _tailerFactory: () => ({ agentName: "", logPath: "", stop: () => {} }),
3401
+ _tailerRegistry: new Map(),
2345
3402
  _findLatestStdoutLog: async () => null,
3403
+ _mailStore: null,
2346
3404
  });
2347
3405
 
2348
- // Allow the first (immediate) tick to settle.
2349
- await new Promise<void>((resolve) => setTimeout(resolve, 20));
2350
-
2351
- daemon.stop();
2352
-
2353
- expect(stopped["tailer1"]).toBe(true);
2354
- expect(stopped["tailer2"]).toBe(true);
2355
- expect(registry.size).toBe(0);
3406
+ // Health check produced
3407
+ expect(checks).toHaveLength(1);
3408
+ // tmuxAlive=false because getState returned "error"
3409
+ expect(checks[0]?.tmuxAlive).toBe(false);
3410
+ // ZFC fires (pidAlive=false for dead PID) → zombie/terminate
3411
+ expect(checks[0]?.state).toBe("zombie");
3412
+ expect(checks[0]?.action).toBe("terminate");
3413
+ // abort() called via killAgent
3414
+ expect(abortCount).toBe(1);
3415
+ // killTree NOT called (abort succeeded)
3416
+ expect(proc.killed).toHaveLength(0);
2356
3417
  });
2357
- });
2358
-
2359
- // ============================================================
2360
- // RPC getState() timeout removes stale connection
2361
- // ============================================================
2362
3418
 
2363
- describe("RPC getState() timeout removes stale connection", () => {
2364
- test("_removeConnection is called when getState() rejects", async () => {
3419
+ // Test F: connection.getState() rejection drops the connection and falls back to tmux
3420
+ test("Test F: getState() rejection → removeConnection called, tmux liveness used as fallback", async () => {
2365
3421
  const session = makeSession({
2366
- agentName: "rpc-agent",
2367
- tmuxSession: "", // headless
3422
+ agentName: "headless-reject-conn",
3423
+ tmuxSession: "",
2368
3424
  pid: process.pid, // alive
2369
3425
  state: "working",
2370
- lastActivity: new Date().toISOString(),
3426
+ lastActivity: new Date().toISOString(), // fresh — no stale
2371
3427
  });
2372
3428
 
2373
3429
  writeSessionsToStore(tempRoot, [session]);
2374
3430
 
2375
3431
  const removedNames: string[] = [];
3432
+ const checks: HealthCheck[] = [];
3433
+ // tmux returns alive — used as fallback when getState rejects
3434
+ const tmuxMock = tmuxWithLiveness({ "": true });
2376
3435
 
2377
3436
  await runDaemonTick({
2378
3437
  root: tempRoot,
2379
3438
  ...THRESHOLDS,
2380
- _tmux: { isSessionAlive: async () => false, killSession: async () => {} },
3439
+ onHealthCheck: (c) => checks.push(c),
3440
+ _tmux: tmuxMock,
2381
3441
  _triage: triageAlways("extend"),
2382
3442
  _process: { isAlive: () => true, killTree: async () => {} },
2383
3443
  _eventStore: null,
2384
3444
  _recordFailure: async () => {},
2385
3445
  _getConnection: (name: string) => {
2386
- if (name !== "rpc-agent") return undefined;
3446
+ if (name !== "headless-reject-conn") return undefined;
2387
3447
  return {
2388
3448
  getState: () => Promise.reject(new Error("connection error")),
2389
3449
  sendPrompt: async () => {},
@@ -2400,91 +3460,262 @@ describe("RPC getState() timeout removes stale connection", () => {
2400
3460
  _mailStore: null,
2401
3461
  });
2402
3462
 
2403
- expect(removedNames).toContain("rpc-agent");
3463
+ // removeConnection called (connection dropped after rejection)
3464
+ expect(removedNames).toContain("headless-reject-conn");
3465
+ // Agent is healthy (alive PID, fresh lastActivity, tmux fallback returns alive)
3466
+ expect(checks).toHaveLength(1);
3467
+ expect(checks[0]?.action).toBe("none");
2404
3468
  });
2405
3469
  });
2406
3470
 
2407
3471
  // ============================================================
2408
- // Triage concurrency limit (_maxTriagePerTick)
3472
+ // worker_died notification (overstory-c111)
2409
3473
  // ============================================================
2410
3474
 
2411
- describe("triage concurrency limit (_maxTriagePerTick)", () => {
2412
- test("only _maxTriagePerTick triage calls happen when multiple sessions need level-2 escalation", async () => {
2413
- const staleActivity = new Date(Date.now() - 60_000).toISOString();
2414
- const stalledSince = new Date(Date.now() - 130_000).toISOString();
3475
+ describe("worker_died parent notification", () => {
3476
+ let tempRoot: string;
2415
3477
 
2416
- // 4 sessions all at escalation level 2
2417
- const sessions: AgentSession[] = [
2418
- makeSession({
2419
- id: "s-1",
2420
- agentName: "agent-1",
2421
- tmuxSession: "ov-agent-1",
2422
- state: "stalled",
2423
- lastActivity: staleActivity,
2424
- escalationLevel: 2,
2425
- stalledSince,
2426
- }),
2427
- makeSession({
2428
- id: "s-2",
2429
- agentName: "agent-2",
2430
- tmuxSession: "ov-agent-2",
2431
- state: "stalled",
2432
- lastActivity: staleActivity,
2433
- escalationLevel: 2,
2434
- stalledSince,
2435
- }),
2436
- makeSession({
2437
- id: "s-3",
2438
- agentName: "agent-3",
2439
- tmuxSession: "ov-agent-3",
2440
- state: "stalled",
2441
- lastActivity: staleActivity,
2442
- escalationLevel: 2,
2443
- stalledSince,
2444
- }),
2445
- makeSession({
2446
- id: "s-4",
2447
- agentName: "agent-4",
2448
- tmuxSession: "ov-agent-4",
2449
- state: "stalled",
2450
- lastActivity: staleActivity,
2451
- escalationLevel: 2,
2452
- stalledSince,
2453
- }),
2454
- ];
3478
+ beforeEach(async () => {
3479
+ tempRoot = await createTempRoot();
3480
+ });
2455
3481
 
2456
- writeSessionsToStore(tempRoot, sessions);
3482
+ afterEach(async () => {
3483
+ await cleanupTempDir(tempRoot);
3484
+ });
2457
3485
 
2458
- let triageCallCount = 0;
2459
- const triageMock = async (_opts: { agentName: string; root: string; lastActivity: string }) => {
2460
- triageCallCount++;
2461
- return "extend" as const;
2462
- };
3486
+ test("terminate path sends worker_died mail to parentAgent on first zombify", async () => {
3487
+ const session = makeSession({
3488
+ agentName: "dead-builder",
3489
+ capability: "builder",
3490
+ parentAgent: "lead-1",
3491
+ tmuxSession: "overstory-dead-builder",
3492
+ state: "working",
3493
+ lastActivity: new Date().toISOString(),
3494
+ });
2463
3495
 
2464
- await runDaemonTick({
2465
- root: tempRoot,
2466
- ...THRESHOLDS,
2467
- nudgeIntervalMs: 60_000,
2468
- tier1Enabled: true,
2469
- _maxTriagePerTick: 2,
2470
- _tmux: tmuxWithLiveness({
2471
- "ov-agent-1": true,
2472
- "ov-agent-2": true,
2473
- "ov-agent-3": true,
2474
- "ov-agent-4": true,
2475
- }),
2476
- _triage: triageMock,
2477
- _nudge: nudgeTracker().nudge,
2478
- _eventStore: null,
2479
- _recordFailure: async () => {},
2480
- _getConnection: () => undefined,
2481
- _removeConnection: () => {},
2482
- _tailerRegistry: new Map(),
2483
- _findLatestStdoutLog: async () => null,
2484
- _mailStore: null,
3496
+ writeSessionsToStore(tempRoot, [session]);
3497
+
3498
+ const mailDb = join(tempRoot, ".overstory", "mail.db");
3499
+ const mailStore = createMailStore(mailDb);
3500
+
3501
+ try {
3502
+ await runDaemonTick({
3503
+ root: tempRoot,
3504
+ ...THRESHOLDS,
3505
+ _tmux: tmuxWithLiveness({ "overstory-dead-builder": false }),
3506
+ _triage: triageAlways("extend"),
3507
+ _recordFailure: async () => {},
3508
+ _mailStore: mailStore,
3509
+ });
3510
+
3511
+ const inbox = mailStore.getUnread("lead-1");
3512
+ expect(inbox).toHaveLength(1);
3513
+ const msg = inbox[0];
3514
+ expect(msg).toBeDefined();
3515
+ if (!msg) return;
3516
+ expect(msg.type).toBe("worker_died");
3517
+ expect(msg.from).toBe("dead-builder");
3518
+ expect(msg.to).toBe("lead-1");
3519
+ expect(msg.priority).toBe("high");
3520
+ expect(msg.payload).not.toBeNull();
3521
+ const payload = JSON.parse(msg.payload ?? "{}") as WorkerDiedPayload;
3522
+ expect(payload.agentName).toBe("dead-builder");
3523
+ expect(payload.capability).toBe("builder");
3524
+ expect(payload.terminatedBy).toBe("tier0");
3525
+ expect(payload.reason).toBeTruthy();
3526
+ } finally {
3527
+ mailStore.close();
3528
+ }
3529
+ });
3530
+
3531
+ test("orphan agent (parentAgent=null) receives no notification", async () => {
3532
+ const session = makeSession({
3533
+ agentName: "orphan-agent",
3534
+ parentAgent: null,
3535
+ tmuxSession: "overstory-orphan-agent",
3536
+ state: "working",
3537
+ lastActivity: new Date().toISOString(),
2485
3538
  });
2486
3539
 
2487
- // Only 2 of the 4 sessions should have triggered triage
2488
- expect(triageCallCount).toBe(2);
3540
+ writeSessionsToStore(tempRoot, [session]);
3541
+
3542
+ const mailDb = join(tempRoot, ".overstory", "mail.db");
3543
+ const mailStore = createMailStore(mailDb);
3544
+
3545
+ try {
3546
+ await runDaemonTick({
3547
+ root: tempRoot,
3548
+ ...THRESHOLDS,
3549
+ _tmux: tmuxWithLiveness({ "overstory-orphan-agent": false }),
3550
+ _triage: triageAlways("extend"),
3551
+ _recordFailure: async () => {},
3552
+ _mailStore: mailStore,
3553
+ });
3554
+
3555
+ expect(mailStore.getAll({ type: "worker_died" })).toHaveLength(0);
3556
+ } finally {
3557
+ mailStore.close();
3558
+ }
3559
+ });
3560
+
3561
+ test("re-tick on already-zombie session does not send a second worker_died", async () => {
3562
+ // Subsequent ticks see the session already in `zombie`. The state matrix
3563
+ // rejects zombie → zombie transitions, so notify is gated on `outcome.ok`.
3564
+ const session = makeSession({
3565
+ agentName: "re-zombie-agent",
3566
+ parentAgent: "lead-2",
3567
+ tmuxSession: "overstory-re-zombie-agent",
3568
+ state: "working",
3569
+ lastActivity: new Date().toISOString(),
3570
+ });
3571
+
3572
+ writeSessionsToStore(tempRoot, [session]);
3573
+
3574
+ const mailDb = join(tempRoot, ".overstory", "mail.db");
3575
+ const mailStore = createMailStore(mailDb);
3576
+
3577
+ try {
3578
+ const tickOpts = {
3579
+ root: tempRoot,
3580
+ ...THRESHOLDS,
3581
+ _tmux: tmuxWithLiveness({ "overstory-re-zombie-agent": false }),
3582
+ _triage: triageAlways("extend"),
3583
+ _recordFailure: async () => {},
3584
+ _mailStore: mailStore,
3585
+ };
3586
+ await runDaemonTick(tickOpts);
3587
+ await runDaemonTick(tickOpts);
3588
+ await runDaemonTick(tickOpts);
3589
+
3590
+ expect(mailStore.getAll({ to: "lead-2", type: "worker_died" })).toHaveLength(1);
3591
+ } finally {
3592
+ mailStore.close();
3593
+ }
3594
+ });
3595
+
3596
+ test("notifyParentOnDeath=false suppresses the synthetic mail", async () => {
3597
+ const session = makeSession({
3598
+ agentName: "opt-out-agent",
3599
+ parentAgent: "lead-3",
3600
+ tmuxSession: "overstory-opt-out-agent",
3601
+ state: "working",
3602
+ lastActivity: new Date().toISOString(),
3603
+ });
3604
+
3605
+ writeSessionsToStore(tempRoot, [session]);
3606
+
3607
+ const mailDb = join(tempRoot, ".overstory", "mail.db");
3608
+ const mailStore = createMailStore(mailDb);
3609
+
3610
+ try {
3611
+ await runDaemonTick({
3612
+ root: tempRoot,
3613
+ ...THRESHOLDS,
3614
+ notifyParentOnDeath: false,
3615
+ _tmux: tmuxWithLiveness({ "overstory-opt-out-agent": false }),
3616
+ _triage: triageAlways("extend"),
3617
+ _recordFailure: async () => {},
3618
+ _mailStore: mailStore,
3619
+ });
3620
+
3621
+ expect(mailStore.getAll({ type: "worker_died" })).toHaveLength(0);
3622
+ // State should still transition normally
3623
+ const reloaded = readSessionsFromStore(tempRoot);
3624
+ expect(reloaded[0]?.state).toBe("zombie");
3625
+ } finally {
3626
+ mailStore.close();
3627
+ }
3628
+ });
3629
+
3630
+ test("escalation-level-3 terminate also notifies parent with tier0 reason", async () => {
3631
+ // Stalled agent with alive tmux: progressive escalation drives it to level 3
3632
+ // terminate. The notify path runs through the escalation branch, not the
3633
+ // `check.action === "terminate"` branch.
3634
+ const stalledSince = new Date(Date.now() - 4 * 60_000).toISOString();
3635
+ const lastActivity = new Date(Date.now() - 60_000).toISOString();
3636
+ const session = makeSession({
3637
+ agentName: "escalated-agent",
3638
+ parentAgent: "coordinator",
3639
+ tmuxSession: "overstory-escalated-agent",
3640
+ state: "working",
3641
+ lastActivity,
3642
+ stalledSince,
3643
+ escalationLevel: 3,
3644
+ });
3645
+
3646
+ writeSessionsToStore(tempRoot, [session]);
3647
+
3648
+ const mailDb = join(tempRoot, ".overstory", "mail.db");
3649
+ const mailStore = createMailStore(mailDb);
3650
+
3651
+ try {
3652
+ await runDaemonTick({
3653
+ root: tempRoot,
3654
+ ...THRESHOLDS,
3655
+ nudgeIntervalMs: 60_000,
3656
+ _tmux: tmuxWithLiveness({ "overstory-escalated-agent": true }),
3657
+ _triage: triageAlways("extend"),
3658
+ _nudge: async () => ({ delivered: true }),
3659
+ _recordFailure: async () => {},
3660
+ _mailStore: mailStore,
3661
+ });
3662
+
3663
+ const inbox = mailStore.getUnread("coordinator");
3664
+ expect(inbox).toHaveLength(1);
3665
+ const msg = inbox[0];
3666
+ if (!msg) return;
3667
+ expect(msg.type).toBe("worker_died");
3668
+ const payload = JSON.parse(msg.payload ?? "{}") as WorkerDiedPayload;
3669
+ expect(payload.terminatedBy).toBe("tier0");
3670
+ expect(payload.reason).toContain("Progressive escalation");
3671
+ } finally {
3672
+ mailStore.close();
3673
+ }
3674
+ });
3675
+
3676
+ test("tier1 triage terminate sets terminatedBy=tier1 in payload", async () => {
3677
+ // stalledSince must produce expectedLevel==2 from nudgeIntervalMs=60_000:
3678
+ // floor(stalledMs / 60_000) === 2 requires 2*60_000 <= stalledMs < 3*60_000.
3679
+ const stalledSince = new Date(Date.now() - 150_000).toISOString();
3680
+ const lastActivity = new Date(Date.now() - 60_000).toISOString();
3681
+ const session = makeSession({
3682
+ agentName: "triaged-agent",
3683
+ parentAgent: "lead-triage",
3684
+ tmuxSession: "overstory-triaged-agent",
3685
+ state: "working",
3686
+ lastActivity,
3687
+ stalledSince,
3688
+ escalationLevel: 2,
3689
+ });
3690
+
3691
+ writeSessionsToStore(tempRoot, [session]);
3692
+
3693
+ const mailDb = join(tempRoot, ".overstory", "mail.db");
3694
+ const mailStore = createMailStore(mailDb);
3695
+
3696
+ try {
3697
+ await runDaemonTick({
3698
+ root: tempRoot,
3699
+ ...THRESHOLDS,
3700
+ nudgeIntervalMs: 60_000,
3701
+ tier1Enabled: true,
3702
+ _tmux: tmuxWithLiveness({ "overstory-triaged-agent": true }),
3703
+ _triage: triageAlways("terminate"),
3704
+ _nudge: async () => ({ delivered: true }),
3705
+ _recordFailure: async () => {},
3706
+ _mailStore: mailStore,
3707
+ });
3708
+
3709
+ const inbox = mailStore.getUnread("lead-triage");
3710
+ expect(inbox).toHaveLength(1);
3711
+ const msg = inbox[0];
3712
+ if (!msg) return;
3713
+ expect(msg.type).toBe("worker_died");
3714
+ const payload = JSON.parse(msg.payload ?? "{}") as WorkerDiedPayload;
3715
+ expect(payload.terminatedBy).toBe("tier1");
3716
+ expect(payload.reason).toContain("AI triage");
3717
+ } finally {
3718
+ mailStore.close();
3719
+ }
2489
3720
  });
2490
3721
  });