@os-eco/overstory-cli 0.10.3 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/README.md +4 -2
  2. package/agents/builder.md +10 -1
  3. package/agents/lead.md +106 -5
  4. package/package.json +1 -1
  5. package/src/agents/headless-mail-injector.ts +8 -0
  6. package/src/agents/mail-poll-detect.test.ts +153 -0
  7. package/src/agents/mail-poll-detect.ts +73 -0
  8. package/src/agents/overlay.test.ts +56 -0
  9. package/src/agents/overlay.ts +33 -0
  10. package/src/agents/scope-detect.test.ts +190 -0
  11. package/src/agents/scope-detect.ts +146 -0
  12. package/src/agents/turn-runner.test.ts +862 -0
  13. package/src/agents/turn-runner.ts +225 -8
  14. package/src/commands/agents.ts +9 -0
  15. package/src/commands/coordinator.test.ts +127 -0
  16. package/src/commands/coordinator.ts +71 -4
  17. package/src/commands/dashboard.ts +1 -1
  18. package/src/commands/log.test.ts +131 -0
  19. package/src/commands/log.ts +37 -2
  20. package/src/commands/merge.test.ts +118 -0
  21. package/src/commands/merge.ts +51 -8
  22. package/src/commands/sling.test.ts +104 -0
  23. package/src/commands/sling.ts +95 -8
  24. package/src/commands/stop.test.ts +81 -0
  25. package/src/index.ts +5 -1
  26. package/src/insights/quality-gates.test.ts +141 -0
  27. package/src/insights/quality-gates.ts +156 -0
  28. package/src/logging/theme.ts +4 -0
  29. package/src/merge/predict.test.ts +387 -0
  30. package/src/merge/predict.ts +249 -0
  31. package/src/merge/resolver.ts +1 -1
  32. package/src/mulch/client.ts +3 -3
  33. package/src/sessions/store.test.ts +267 -5
  34. package/src/sessions/store.ts +105 -7
  35. package/src/types.ts +51 -1
  36. package/src/watchdog/daemon.test.ts +124 -2
  37. package/src/watchdog/daemon.ts +27 -12
  38. package/src/watchdog/health.test.ts +133 -8
  39. package/src/watchdog/health.ts +37 -5
  40. package/src/worktree/manager.test.ts +218 -1
  41. package/src/worktree/manager.ts +55 -0
  42. package/src/worktree/tmux.test.ts +25 -0
  43. package/src/worktree/tmux.ts +17 -0
  44. package/templates/overlay.md.tmpl +2 -0
@@ -534,6 +534,108 @@ describe("runTurn", () => {
534
534
  expect(after?.state).toBe("completed");
535
535
  });
536
536
 
537
+ test("turn that runs but does not complete settles to between_turns, not working (overstory-3087)", async () => {
538
+ // Spawn-per-turn substate split: a turn that produced events but did
539
+ // not deliver the terminal mail nor abort must end in `between_turns`
540
+ // so the UI can tell a worker waiting for its next mail batch from
541
+ // one mid-execution. Pre-3087 this settled to `working`.
542
+ seedSession(ctx.sessionsDbPath, { agentName: "settler", state: "booting" });
543
+ const { runtime } = makeSpyRuntime();
544
+ const fake = makeFakeProc();
545
+ const spawnFn: TurnSpawnFn = () => {
546
+ // Force is_error=true so the runner does NOT classify this as a
547
+ // clean exit (which would settle to `completed` via the
548
+ // terminal-mail-missing path). is_error=true keeps cleanResult
549
+ // false, sending us into the observedAnyEvent → between_turns
550
+ // branch we want to test.
551
+ emitFakeTurn(fake, { sessionId: "settler-sid", isError: true });
552
+ fake._exit(0);
553
+ return fake;
554
+ };
555
+
556
+ const result = await runTurn(makeRunOpts(ctx, "settler", { runtime, _spawnFn: spawnFn }));
557
+
558
+ expect(result.cleanResult).toBe(false);
559
+ expect(result.terminalMailObserved).toBe(false);
560
+ expect(result.terminalMailMissing).toBe(false);
561
+ expect(result.finalState).toBe("between_turns");
562
+
563
+ const after = readSession(ctx.sessionsDbPath, "settler");
564
+ expect(after?.state).toBe("between_turns");
565
+ });
566
+
567
+ test("first parser event transitions booting → in_turn (overstory-3087)", async () => {
568
+ // The mid-turn "first event" hook must flip the row out of `booting`
569
+ // (or `between_turns`/`working`) into `in_turn` so observers see the
570
+ // agent as actively executing, distinct from the idle waiting state.
571
+ seedSession(ctx.sessionsDbPath, { agentName: "boots", state: "booting" });
572
+ const { runtime } = makeSpyRuntime();
573
+ const fake = makeFakeProc();
574
+ // Mutable ref so the IIFE assignment is visible to the type checker.
575
+ const captured: { state: string | null } = { state: null };
576
+ const spawnFn: TurnSpawnFn = () => {
577
+ (async () => {
578
+ // Push the init event, then sample the row before result.
579
+ fake._pushLine(
580
+ JSON.stringify({
581
+ type: "system",
582
+ subtype: "init",
583
+ session_id: "boots-sid",
584
+ model: "claude-test",
585
+ }),
586
+ );
587
+ // Yield the event loop so the parser drains the init event
588
+ // and updates the session row before we read it.
589
+ await Bun.sleep(20);
590
+ captured.state = readSession(ctx.sessionsDbPath, "boots")?.state ?? null;
591
+ // Send is_error=true so we settle to between_turns rather than
592
+ // the contract-violation completed path — this test is about
593
+ // the mid-turn transition, not the terminal classification.
594
+ emitFakeTurn(fake, { sessionId: "boots-sid", isError: true });
595
+ fake._exit(0);
596
+ })();
597
+ return fake;
598
+ };
599
+
600
+ await runTurn(makeRunOpts(ctx, "boots", { runtime, _spawnFn: spawnFn }));
601
+
602
+ expect(captured.state).toBe("in_turn");
603
+ });
604
+
605
+ test("between_turns → in_turn → between_turns cycle on a follow-up batch (overstory-3087)", async () => {
606
+ // A spawn-per-turn worker that finished its first turn (state=
607
+ // between_turns) must flip back to in_turn when the next mail batch
608
+ // fires its first parser event, and settle back to between_turns
609
+ // when the turn ends without a terminal mail.
610
+ seedSession(ctx.sessionsDbPath, { agentName: "cycle", state: "between_turns" });
611
+ const { runtime } = makeSpyRuntime();
612
+ const fake = makeFakeProc();
613
+ const captured: { midTurnState: string | null } = { midTurnState: null };
614
+ const spawnFn: TurnSpawnFn = () => {
615
+ (async () => {
616
+ fake._pushLine(
617
+ JSON.stringify({
618
+ type: "system",
619
+ subtype: "init",
620
+ session_id: "cycle-sid",
621
+ model: "claude-test",
622
+ }),
623
+ );
624
+ await Bun.sleep(20);
625
+ captured.midTurnState = readSession(ctx.sessionsDbPath, "cycle")?.state ?? null;
626
+ emitFakeTurn(fake, { sessionId: "cycle-sid", isError: true });
627
+ fake._exit(0);
628
+ })();
629
+ return fake;
630
+ };
631
+
632
+ const result = await runTurn(makeRunOpts(ctx, "cycle", { runtime, _spawnFn: spawnFn }));
633
+
634
+ expect(captured.midTurnState).toBe("in_turn");
635
+ expect(result.initialState).toBe("between_turns");
636
+ expect(result.finalState).toBe("between_turns");
637
+ });
638
+
537
639
  test("clean exit but no worker_done → contract violation, completed + error log (overstory-6071)", async () => {
538
640
  // Pre-fix: claude exiting cleanly without sending the capability's
539
641
  // terminal mail left the session at `working` forever — the process is
@@ -1006,6 +1108,169 @@ describe("runTurn", () => {
1006
1108
  }
1007
1109
  });
1008
1110
 
1111
+ // --- Resume-path parent-notify (overstory-de3c) ---
1112
+ //
1113
+ // The witnessed bug: a spawn-per-turn worker that survived a first-turn
1114
+ // parser stall (worker_died emitted, state→zombie) was re-dispatched by its
1115
+ // parent via `ov sling --recover`. The resumed turn ran, then transitioned
1116
+ // to zombie SILENTLY — no second worker_died mail was ever sent. The lead
1117
+ // blocked forever.
1118
+ //
1119
+ // These tests pin down whether the runner itself is responsible. Each seeds
1120
+ // `claudeSessionId` so the runner exercises the --resume code path, and
1121
+ // asserts that worker_died is still emitted on stall / abort / clean-exit-
1122
+ // without-terminal-mail. If these PASS the runner is exonerated and the
1123
+ // fix is upstream (sling.ts re-spawn upsert dropping parentAgent — H1).
1124
+
1125
+ test("resume-stall: parser stall on a resumed session still emits worker_died (overstory-de3c)", async () => {
1126
+ seedSession(ctx.sessionsDbPath, {
1127
+ agentName: "child-resume-stall",
1128
+ state: "working",
1129
+ parentAgent: "lead-r",
1130
+ taskId: "task-de3c-stall",
1131
+ claudeSessionId: "prior-session",
1132
+ });
1133
+ const { runtime, spawnCalls } = makeSpyRuntime();
1134
+ const fake = makeFakeProc();
1135
+ const spawnFn: TurnSpawnFn = () => {
1136
+ // Emit nothing — the resumed turn parser-stalls.
1137
+ return fake;
1138
+ };
1139
+
1140
+ const sharedMail = createMailStore(ctx.mailDbPath);
1141
+ try {
1142
+ const result = await runTurn({
1143
+ ...makeRunOpts(ctx, "child-resume-stall", {
1144
+ runtime,
1145
+ _spawnFn: spawnFn,
1146
+ }),
1147
+ _mailStore: sharedMail,
1148
+ eventStallTimeoutMs: 50,
1149
+ sigkillDelayMs: 25,
1150
+ });
1151
+
1152
+ expect(result.stallAborted).toBe(true);
1153
+ expect(result.finalState).toBe("zombie");
1154
+
1155
+ // The runtime received the prior session id (resume path exercised).
1156
+ expect(spawnCalls[0]?.resumeSessionId).toBe("prior-session");
1157
+
1158
+ const inbox = sharedMail.getAll({ to: "lead-r", type: "worker_died" });
1159
+ expect(inbox.length).toBe(1);
1160
+ const payload = JSON.parse(inbox[0]?.payload ?? "{}") as {
1161
+ terminatedBy?: string;
1162
+ reason?: string;
1163
+ agentName?: string;
1164
+ };
1165
+ expect(payload.terminatedBy).toBe("runner");
1166
+ expect(payload.reason).toContain("stalled");
1167
+ expect(payload.agentName).toBe("child-resume-stall");
1168
+ } finally {
1169
+ sharedMail.close();
1170
+ }
1171
+ });
1172
+
1173
+ test("resume-abort: operator abort on a resumed session still emits worker_died (overstory-de3c)", async () => {
1174
+ seedSession(ctx.sessionsDbPath, {
1175
+ agentName: "child-resume-abort",
1176
+ state: "working",
1177
+ parentAgent: "lead-r",
1178
+ taskId: "task-de3c-abort",
1179
+ claudeSessionId: "prior-session",
1180
+ });
1181
+ const { runtime, spawnCalls } = makeSpyRuntime();
1182
+ const fake = makeFakeProc();
1183
+ const ac = new AbortController();
1184
+ const spawnFn: TurnSpawnFn = () => {
1185
+ fake._pushLine(
1186
+ JSON.stringify({
1187
+ type: "system",
1188
+ subtype: "init",
1189
+ session_id: "prior-session",
1190
+ }),
1191
+ );
1192
+ return fake;
1193
+ };
1194
+
1195
+ const sharedMail = createMailStore(ctx.mailDbPath);
1196
+ try {
1197
+ const runPromise = runTurn({
1198
+ ...makeRunOpts(ctx, "child-resume-abort", {
1199
+ runtime,
1200
+ _spawnFn: spawnFn,
1201
+ abortSignal: ac.signal,
1202
+ sigkillDelayMs: 25,
1203
+ }),
1204
+ _mailStore: sharedMail,
1205
+ });
1206
+ await Bun.sleep(60);
1207
+ ac.abort();
1208
+ const result = await runPromise;
1209
+
1210
+ expect(result.finalState).toBe("zombie");
1211
+ expect(spawnCalls[0]?.resumeSessionId).toBe("prior-session");
1212
+
1213
+ const inbox = sharedMail.getAll({ to: "lead-r", type: "worker_died" });
1214
+ expect(inbox.length).toBe(1);
1215
+ const payload = JSON.parse(inbox[0]?.payload ?? "{}") as {
1216
+ terminatedBy?: string;
1217
+ reason?: string;
1218
+ agentName?: string;
1219
+ };
1220
+ expect(payload.terminatedBy).toBe("runner");
1221
+ expect(payload.reason).toContain("Aborted");
1222
+ expect(payload.agentName).toBe("child-resume-abort");
1223
+ } finally {
1224
+ sharedMail.close();
1225
+ }
1226
+ });
1227
+
1228
+ test("resume-terminalMailMissing: clean exit on a resumed session still emits worker_died (overstory-de3c)", async () => {
1229
+ seedSession(ctx.sessionsDbPath, {
1230
+ agentName: "child-resume-noop",
1231
+ state: "working",
1232
+ parentAgent: "lead-r",
1233
+ taskId: "task-de3c-noop",
1234
+ claudeSessionId: "prior-session",
1235
+ });
1236
+ const { runtime, spawnCalls } = makeSpyRuntime();
1237
+ const fake = makeFakeProc();
1238
+ const spawnFn: TurnSpawnFn = () => {
1239
+ emitFakeTurn(fake, { sessionId: "prior-session", isError: false });
1240
+ fake._exit(0);
1241
+ return fake;
1242
+ };
1243
+
1244
+ const sharedMail = createMailStore(ctx.mailDbPath);
1245
+ try {
1246
+ const result = await runTurn({
1247
+ ...makeRunOpts(ctx, "child-resume-noop", {
1248
+ runtime,
1249
+ _spawnFn: spawnFn,
1250
+ }),
1251
+ _mailStore: sharedMail,
1252
+ });
1253
+
1254
+ expect(result.cleanResult).toBe(true);
1255
+ expect(result.terminalMailMissing).toBe(true);
1256
+ expect(result.finalState).toBe("completed");
1257
+ expect(spawnCalls[0]?.resumeSessionId).toBe("prior-session");
1258
+
1259
+ const inbox = sharedMail.getAll({ to: "lead-r", type: "worker_died" });
1260
+ expect(inbox.length).toBe(1);
1261
+ const payload = JSON.parse(inbox[0]?.payload ?? "{}") as {
1262
+ terminatedBy?: string;
1263
+ reason?: string;
1264
+ agentName?: string;
1265
+ };
1266
+ expect(payload.terminatedBy).toBe("runner");
1267
+ expect(payload.reason).toContain("Clean exit without terminal mail");
1268
+ expect(payload.agentName).toBe("child-resume-noop");
1269
+ } finally {
1270
+ sharedMail.close();
1271
+ }
1272
+ });
1273
+
1009
1274
  test("terminalMailMissing: emits worker_died to parent (overstory-4159)", async () => {
1010
1275
  // Silent-no-op: claude exits cleanly but never sends worker_done. The
1011
1276
  // lead would otherwise block forever waiting for a terminal mail.
@@ -1447,4 +1712,601 @@ describe("runTurn", () => {
1447
1712
  // turn.pid must still be cleaned up regardless.
1448
1713
  expect(existsSync(turnPidPathFor(ctx, "ss-fail"))).toBe(false);
1449
1714
  });
1715
+
1716
+ // ---------- mid-turn lastActivity refresh (overstory-8e61) ----------
1717
+ //
1718
+ // The watchdog's design (src/watchdog/health.ts:242-243) documents that the
1719
+ // runner advances `session.lastActivity` per parser event during a turn.
1720
+ // Without that, a long-running turn looks stalled to the watchdog and the
1721
+ // agent gets zombified mid-flight. These tests pin the per-event refresh
1722
+ // behavior added inside the parser loop.
1723
+
1724
+ test("mid-turn refresh: lastActivity advances when interval=0 forces per-event refresh", async () => {
1725
+ const startedAt = new Date(Date.now() - 60_000).toISOString();
1726
+ seedSession(ctx.sessionsDbPath, {
1727
+ agentName: "midturn-A",
1728
+ state: "working",
1729
+ startedAt,
1730
+ lastActivity: startedAt,
1731
+ });
1732
+ const { runtime } = makeSpyRuntime();
1733
+ const fake = makeFakeProc();
1734
+ const spawnFn: TurnSpawnFn = () => {
1735
+ emitFakeTurn(fake, { sessionId: "midturn-A-session" });
1736
+ fake._exit(0);
1737
+ return fake;
1738
+ };
1739
+
1740
+ await runTurn({
1741
+ ...makeRunOpts(ctx, "midturn-A", { runtime, _spawnFn: spawnFn }),
1742
+ lastActivityRefreshIntervalMs: 0,
1743
+ });
1744
+
1745
+ const after = readSession(ctx.sessionsDbPath, "midturn-A");
1746
+ expect(after?.lastActivity).not.toBe(startedAt);
1747
+ expect(new Date(after?.lastActivity ?? 0).getTime()).toBeGreaterThan(
1748
+ new Date(startedAt).getTime(),
1749
+ );
1750
+ });
1751
+
1752
+ test("mid-turn refresh: throttle gates updates by simulated time", async () => {
1753
+ seedSession(ctx.sessionsDbPath, { agentName: "midturn-B", state: "working" });
1754
+ const { runtime } = makeSpyRuntime();
1755
+
1756
+ // Controlled sim clock. `_now` is invoked many times during a turn (for
1757
+ // startedAtMs, log timestamps, durationMs) — only the in-loop calls
1758
+ // matter for the throttle. We advance simTime synchronously between
1759
+ // pushes and yield to the parser between each push so the runner reads
1760
+ // the simTime we set just prior. simTime starts well above the throttle
1761
+ // interval so the first event fires (initial lastActivityRefreshMs=0).
1762
+ let simTime = 5000;
1763
+ const _now = (): Date => new Date(simTime);
1764
+
1765
+ let refreshes = 0;
1766
+ const _onLastActivityRefresh = (): void => {
1767
+ refreshes++;
1768
+ };
1769
+
1770
+ const fake = makeFakeProc();
1771
+ const spawnFn: TurnSpawnFn = () => {
1772
+ (async () => {
1773
+ const sessionId = "midturn-B-session";
1774
+ // Use `system` lines because the claude parser does not batch
1775
+ // them — every system line yields exactly one status event,
1776
+ // driving one runner-loop iteration each. Assistant text would
1777
+ // coalesce inside a flush window and defeat the per-event count.
1778
+ const stamps = [5000, 5500, 6000, 6500, 7000, 7500];
1779
+ for (let i = 0; i < stamps.length; i++) {
1780
+ simTime = stamps[i] ?? 0;
1781
+ fake._pushLine(
1782
+ JSON.stringify({
1783
+ type: "system",
1784
+ subtype: i === 0 ? "init" : "progress",
1785
+ session_id: sessionId,
1786
+ }),
1787
+ );
1788
+ // Yield so the for-await loop body runs to completion against
1789
+ // the simTime value we just set.
1790
+ await Bun.sleep(20);
1791
+ }
1792
+ // Trailing result at the same simTime as the last chunk; with a
1793
+ // 1000ms throttle and last refresh at simTime=7000, this event
1794
+ // at simTime=7500 (delta=500) does not fire.
1795
+ fake._pushLine(
1796
+ JSON.stringify({
1797
+ type: "result",
1798
+ subtype: "success",
1799
+ session_id: sessionId,
1800
+ result: "done",
1801
+ is_error: false,
1802
+ duration_ms: 50,
1803
+ num_turns: 1,
1804
+ }),
1805
+ );
1806
+ await Bun.sleep(20);
1807
+ fake._exit(0);
1808
+ })();
1809
+ return fake;
1810
+ };
1811
+
1812
+ await runTurn({
1813
+ ...makeRunOpts(ctx, "midturn-B", { runtime, _spawnFn: spawnFn }),
1814
+ lastActivityRefreshIntervalMs: 1000,
1815
+ _now,
1816
+ _onLastActivityRefresh,
1817
+ });
1818
+
1819
+ // Stamps 5000, 6000, 7000 fire (gap >= 1000). Stamps 5500, 6500, 7500
1820
+ // are throttled (gap = 500). The trailing result event at 7500 also
1821
+ // throttles. Total expected = 3.
1822
+ expect(refreshes).toBe(3);
1823
+ });
1824
+
1825
+ test("mid-turn refresh: parser throw still leaves lastActivity advanced (overstory-8e61)", async () => {
1826
+ // The end-of-turn `updateSessionLastActivity` (around turn-runner.ts:1112)
1827
+ // does NOT fire when the parser iteration throws — the catch path
1828
+ // rethrows before reaching the cleanup write. The mid-turn refresh
1829
+ // covers this gap so a parser-error turn still leaves lastActivity
1830
+ // fresh, mirroring the documented design at src/watchdog/health.ts:242-243.
1831
+ const startedAt = new Date(Date.now() - 60_000).toISOString();
1832
+ seedSession(ctx.sessionsDbPath, {
1833
+ agentName: "midturn-C",
1834
+ state: "working",
1835
+ startedAt,
1836
+ lastActivity: startedAt,
1837
+ });
1838
+
1839
+ // Custom runtime: yield two valid events, then throw on the next read.
1840
+ // Mirrors a malformed stream-json line arriving after some good events.
1841
+ const base = new ClaudeRuntime();
1842
+ let yielded = 0;
1843
+ const yieldThenThrow: AsyncIterable<unknown> = {
1844
+ [Symbol.asyncIterator]() {
1845
+ return {
1846
+ next(): Promise<IteratorResult<unknown>> {
1847
+ if (yielded++ < 2) {
1848
+ return Promise.resolve({
1849
+ value: {
1850
+ type: "assistant_message",
1851
+ timestamp: new Date().toISOString(),
1852
+ },
1853
+ done: false,
1854
+ });
1855
+ }
1856
+ return Promise.reject(new Error("synthetic stream-json parse error"));
1857
+ },
1858
+ };
1859
+ },
1860
+ };
1861
+ const broken: AgentRuntime = {
1862
+ ...base,
1863
+ id: base.id,
1864
+ stability: base.stability,
1865
+ instructionPath: base.instructionPath,
1866
+ buildSpawnCommand: base.buildSpawnCommand.bind(base),
1867
+ buildPrintCommand: base.buildPrintCommand.bind(base),
1868
+ deployConfig: base.deployConfig.bind(base),
1869
+ detectReady: base.detectReady.bind(base),
1870
+ parseTranscript: base.parseTranscript.bind(base),
1871
+ getTranscriptDir: base.getTranscriptDir.bind(base),
1872
+ buildEnv: base.buildEnv.bind(base),
1873
+ buildDirectSpawn: base.buildDirectSpawn.bind(base),
1874
+ parseEvents: (() => yieldThenThrow) as unknown as AgentRuntime["parseEvents"],
1875
+ };
1876
+
1877
+ const fake = makeFakeProc();
1878
+ const spawnFn: TurnSpawnFn = () => fake;
1879
+
1880
+ let refreshes = 0;
1881
+ await expect(
1882
+ runTurn({
1883
+ ...makeRunOpts(ctx, "midturn-C", { runtime: broken, _spawnFn: spawnFn }),
1884
+ lastActivityRefreshIntervalMs: 0,
1885
+ _onLastActivityRefresh: () => {
1886
+ refreshes++;
1887
+ },
1888
+ }),
1889
+ ).rejects.toThrow(/synthetic stream-json/);
1890
+
1891
+ // Mid-turn refresh fired for at least one of the two pre-throw events.
1892
+ expect(refreshes).toBeGreaterThanOrEqual(1);
1893
+
1894
+ // And the persisted lastActivity reflects the mid-turn write — the
1895
+ // end-of-turn write at line ~1112 was skipped by the parser-throw path.
1896
+ const after = readSession(ctx.sessionsDbPath, "midturn-C");
1897
+ expect(after?.lastActivity).not.toBe(startedAt);
1898
+ expect(new Date(after?.lastActivity ?? 0).getTime()).toBeGreaterThan(
1899
+ new Date(startedAt).getTime(),
1900
+ );
1901
+ });
1902
+
1903
+ test("Bash mail-poll detector: warns + records custom event without suppressing tool_use (overstory-c92c)", async () => {
1904
+ // Defense-in-depth: the lead.md prompt forbids Bash mail polling
1905
+ // (overstory-fa84). When a future overlay or contributed agent
1906
+ // reintroduces the pattern, the runner must surface it via the
1907
+ // runner diagnostic sink AND a `mail_poll_detected` event in
1908
+ // events.db, while still recording the original tool_use event
1909
+ // so downstream observability is unaffected.
1910
+ seedSession(ctx.sessionsDbPath, { agentName: "polled", state: "working" });
1911
+ const { runtime } = makeSpyRuntime();
1912
+
1913
+ const fake = makeFakeProc();
1914
+ const sessionId = "polled-session";
1915
+ const pollCommand = "until ov mail list; do sleep 1; done";
1916
+ const spawnFn: TurnSpawnFn = () => {
1917
+ fake._pushLine(
1918
+ JSON.stringify({
1919
+ type: "system",
1920
+ subtype: "init",
1921
+ session_id: sessionId,
1922
+ model: "claude-test",
1923
+ }),
1924
+ );
1925
+ fake._pushLine(
1926
+ JSON.stringify({
1927
+ type: "assistant",
1928
+ session_id: sessionId,
1929
+ message: {
1930
+ role: "assistant",
1931
+ model: "claude-test",
1932
+ content: [
1933
+ {
1934
+ type: "tool_use",
1935
+ id: "toolu_poll_1",
1936
+ name: "Bash",
1937
+ input: { command: pollCommand },
1938
+ },
1939
+ ],
1940
+ },
1941
+ }),
1942
+ );
1943
+ emitFakeTurn(fake, { sessionId });
1944
+ fake._exit(0);
1945
+ return fake;
1946
+ };
1947
+
1948
+ const logs: Array<{ level: string; message: string }> = [];
1949
+ const logger: RunnerLogger = (level, message) => {
1950
+ logs.push({ level, message });
1951
+ };
1952
+
1953
+ const result = await runTurn(
1954
+ makeRunOpts(ctx, "polled", { runtime, _spawnFn: spawnFn, _logWarning: logger }),
1955
+ );
1956
+
1957
+ expect(result.exitCode).toBe(0);
1958
+
1959
+ // Warning was emitted via the runner diagnostic sink (warn level,
1960
+ // message includes "mail-poll").
1961
+ const pollWarn = logs.find((l) => l.level === "warn" && l.message.includes("mail-poll"));
1962
+ expect(pollWarn).toBeDefined();
1963
+
1964
+ const eventStore = createEventStore(ctx.eventsDbPath);
1965
+ try {
1966
+ const events = eventStore.getByAgent("polled");
1967
+
1968
+ // `mail_poll_detected` custom event landed in events.db with the
1969
+ // full (untruncated) command and the matched reason.
1970
+ const detectedEvent = events.find((e) => {
1971
+ if (e.eventType !== "custom" || e.level !== "warn" || !e.data) return false;
1972
+ try {
1973
+ const parsed = JSON.parse(e.data) as { type?: string };
1974
+ return parsed.type === "mail_poll_detected";
1975
+ } catch {
1976
+ return false;
1977
+ }
1978
+ });
1979
+ expect(detectedEvent).toBeDefined();
1980
+ const payload = JSON.parse(detectedEvent?.data ?? "{}") as {
1981
+ type: string;
1982
+ reason: string;
1983
+ command: string;
1984
+ };
1985
+ expect(payload.reason).toBe("until ov mail loop");
1986
+ expect(payload.command).toBe(pollCommand);
1987
+
1988
+ // Regression guard: the original Bash tool_use event MUST still
1989
+ // be recorded — the warning emits IN ADDITION to (not in place
1990
+ // of) the normal recordAgentEvent call.
1991
+ const toolUseEvent = events.find(
1992
+ (e) => e.eventType === "tool_start" && e.toolName === "Bash",
1993
+ );
1994
+ expect(toolUseEvent).toBeDefined();
1995
+ } finally {
1996
+ eventStore.close();
1997
+ }
1998
+ });
1999
+ });
2000
+
2001
+ describe("runTurn scope-violation observability (overstory-9f4d)", () => {
2002
+ let ctx: Ctx;
2003
+
2004
+ beforeEach(async () => {
2005
+ const overstoryDir = await mkdtemp(join(tmpdir(), "overstory-scope-test-"));
2006
+ ctx = {
2007
+ overstoryDir,
2008
+ worktreePath: overstoryDir,
2009
+ projectRoot: overstoryDir,
2010
+ mailDbPath: join(overstoryDir, "mail.db"),
2011
+ eventsDbPath: join(overstoryDir, "events.db"),
2012
+ sessionsDbPath: join(overstoryDir, "sessions.db"),
2013
+ };
2014
+ _resetInProcessLocks();
2015
+ });
2016
+
2017
+ afterEach(async () => {
2018
+ _resetInProcessLocks();
2019
+ await rm(ctx.overstoryDir, { recursive: true, force: true });
2020
+ });
2021
+
2022
+ async function writeOverlayWithScope(scope: string[]): Promise<void> {
2023
+ const dir = join(ctx.worktreePath, ".claude");
2024
+ const { mkdir: mkdirP, writeFile } = await import("node:fs/promises");
2025
+ await mkdirP(dir, { recursive: true });
2026
+ const body = [
2027
+ "## File Scope (exclusive ownership)",
2028
+ "",
2029
+ ...scope.map((p) => `- \`${p}\``),
2030
+ "",
2031
+ "## Expertise",
2032
+ "",
2033
+ "none",
2034
+ ].join("\n");
2035
+ await writeFile(join(dir, "CLAUDE.md"), body);
2036
+ }
2037
+
2038
+ test("builder scope violation without justification emits warn log + scope_violation event", async () => {
2039
+ seedSession(ctx.sessionsDbPath, { agentName: "violator", state: "working" });
2040
+ await writeOverlayWithScope(["src/agents/in-scope.ts"]);
2041
+
2042
+ const { runtime } = makeSpyRuntime();
2043
+ const fake = makeFakeProc();
2044
+ const spawnFn: TurnSpawnFn = () => {
2045
+ (async () => {
2046
+ await Bun.sleep(20);
2047
+ const s = createMailStore(ctx.mailDbPath);
2048
+ try {
2049
+ createMailClient(s).sendProtocol({
2050
+ from: "violator",
2051
+ to: "lead",
2052
+ subject: "Worker done",
2053
+ body: "ok",
2054
+ type: "worker_done",
2055
+ priority: "normal",
2056
+ payload: {
2057
+ taskId: "t",
2058
+ branch: "b",
2059
+ exitCode: 0,
2060
+ filesModified: ["src/other.ts"],
2061
+ },
2062
+ });
2063
+ } finally {
2064
+ s.close();
2065
+ }
2066
+ emitFakeTurn(fake, { sessionId: "violator-session" });
2067
+ fake._exit(0);
2068
+ })();
2069
+ return fake;
2070
+ };
2071
+
2072
+ const logs: Array<{ level: string; message: string }> = [];
2073
+ const logger: RunnerLogger = (level, message) => {
2074
+ logs.push({ level, message });
2075
+ };
2076
+
2077
+ const result = await runTurn({
2078
+ ...makeRunOpts(ctx, "violator", {
2079
+ runtime,
2080
+ _spawnFn: spawnFn,
2081
+ _logWarning: logger,
2082
+ }),
2083
+ _scopeDetect: () => ({
2084
+ violations: ["src/other.ts"],
2085
+ expansionReasons: [],
2086
+ }),
2087
+ });
2088
+
2089
+ expect(result.terminalMailObserved).toBe(true);
2090
+ expect(result.finalState).toBe("completed");
2091
+
2092
+ const warnLog = logs.find(
2093
+ (l) => l.level === "warn" && l.message.includes("outside declared FILE_SCOPE"),
2094
+ );
2095
+ expect(warnLog).toBeDefined();
2096
+ expect(warnLog?.message).toContain("src/other.ts");
2097
+
2098
+ const eventStore = createEventStore(ctx.eventsDbPath);
2099
+ try {
2100
+ const events = eventStore.getByAgent("violator");
2101
+ const violationEvent = events.find((e) => {
2102
+ if (e.eventType !== "custom" || e.level !== "warn" || !e.data) return false;
2103
+ try {
2104
+ const parsed = JSON.parse(e.data) as { type?: string };
2105
+ return parsed.type === "scope_violation";
2106
+ } catch {
2107
+ return false;
2108
+ }
2109
+ });
2110
+ expect(violationEvent).toBeDefined();
2111
+ const payload = JSON.parse(violationEvent?.data ?? "{}") as {
2112
+ type: string;
2113
+ violations: string[];
2114
+ fileScope: string[];
2115
+ };
2116
+ expect(payload.violations).toEqual(["src/other.ts"]);
2117
+ expect(payload.fileScope).toEqual(["src/agents/in-scope.ts"]);
2118
+ } finally {
2119
+ eventStore.close();
2120
+ }
2121
+ });
2122
+
2123
+ test("expansion_reason in commit log suppresses the warning", async () => {
2124
+ seedSession(ctx.sessionsDbPath, { agentName: "justified", state: "working" });
2125
+ await writeOverlayWithScope(["src/agents/in-scope.ts"]);
2126
+
2127
+ const { runtime } = makeSpyRuntime();
2128
+ const fake = makeFakeProc();
2129
+ const spawnFn: TurnSpawnFn = () => {
2130
+ (async () => {
2131
+ await Bun.sleep(20);
2132
+ const s = createMailStore(ctx.mailDbPath);
2133
+ try {
2134
+ createMailClient(s).sendProtocol({
2135
+ from: "justified",
2136
+ to: "lead",
2137
+ subject: "Worker done",
2138
+ body: "ok",
2139
+ type: "worker_done",
2140
+ priority: "normal",
2141
+ payload: {
2142
+ taskId: "t",
2143
+ branch: "b",
2144
+ exitCode: 0,
2145
+ filesModified: ["src/other.ts"],
2146
+ },
2147
+ });
2148
+ } finally {
2149
+ s.close();
2150
+ }
2151
+ emitFakeTurn(fake, { sessionId: "justified-session" });
2152
+ fake._exit(0);
2153
+ })();
2154
+ return fake;
2155
+ };
2156
+
2157
+ const logs: Array<{ level: string; message: string }> = [];
2158
+ const logger: RunnerLogger = (level, message) => {
2159
+ logs.push({ level, message });
2160
+ };
2161
+
2162
+ const result = await runTurn({
2163
+ ...makeRunOpts(ctx, "justified", {
2164
+ runtime,
2165
+ _spawnFn: spawnFn,
2166
+ _logWarning: logger,
2167
+ }),
2168
+ _scopeDetect: () => ({
2169
+ violations: ["src/other.ts"],
2170
+ expansionReasons: ["needed shared type"],
2171
+ }),
2172
+ });
2173
+
2174
+ expect(result.terminalMailObserved).toBe(true);
2175
+
2176
+ const warnLog = logs.find(
2177
+ (l) => l.level === "warn" && l.message.includes("outside declared FILE_SCOPE"),
2178
+ );
2179
+ expect(warnLog).toBeUndefined();
2180
+
2181
+ const eventStore = createEventStore(ctx.eventsDbPath);
2182
+ try {
2183
+ const events = eventStore.getByAgent("justified");
2184
+ const violationEvent = events.find((e) => e.data?.includes("scope_violation") ?? false);
2185
+ expect(violationEvent).toBeUndefined();
2186
+ } finally {
2187
+ eventStore.close();
2188
+ }
2189
+ });
2190
+
2191
+ test("prior scope_expansion mail suppresses the warning", async () => {
2192
+ seedSession(ctx.sessionsDbPath, { agentName: "premail", state: "working" });
2193
+ await writeOverlayWithScope(["src/agents/in-scope.ts"]);
2194
+
2195
+ // Pre-seed: a scope_expansion-prefixed mail from this agent.
2196
+ {
2197
+ const s = createMailStore(ctx.mailDbPath);
2198
+ try {
2199
+ createMailClient(s).send({
2200
+ from: "premail",
2201
+ to: "lead",
2202
+ subject: "scope_expansion: needed shared type",
2203
+ body: "heads up",
2204
+ type: "status",
2205
+ priority: "normal",
2206
+ });
2207
+ } finally {
2208
+ s.close();
2209
+ }
2210
+ }
2211
+
2212
+ const { runtime } = makeSpyRuntime();
2213
+ const fake = makeFakeProc();
2214
+ const spawnFn: TurnSpawnFn = () => {
2215
+ (async () => {
2216
+ await Bun.sleep(20);
2217
+ const s = createMailStore(ctx.mailDbPath);
2218
+ try {
2219
+ createMailClient(s).sendProtocol({
2220
+ from: "premail",
2221
+ to: "lead",
2222
+ subject: "Worker done",
2223
+ body: "ok",
2224
+ type: "worker_done",
2225
+ priority: "normal",
2226
+ payload: {
2227
+ taskId: "t",
2228
+ branch: "b",
2229
+ exitCode: 0,
2230
+ filesModified: ["src/other.ts"],
2231
+ },
2232
+ });
2233
+ } finally {
2234
+ s.close();
2235
+ }
2236
+ emitFakeTurn(fake, { sessionId: "premail-session" });
2237
+ fake._exit(0);
2238
+ })();
2239
+ return fake;
2240
+ };
2241
+
2242
+ const logs: Array<{ level: string; message: string }> = [];
2243
+ const logger: RunnerLogger = (level, message) => {
2244
+ logs.push({ level, message });
2245
+ };
2246
+
2247
+ await runTurn({
2248
+ ...makeRunOpts(ctx, "premail", {
2249
+ runtime,
2250
+ _spawnFn: spawnFn,
2251
+ _logWarning: logger,
2252
+ }),
2253
+ _scopeDetect: () => ({
2254
+ violations: ["src/other.ts"],
2255
+ expansionReasons: [],
2256
+ }),
2257
+ });
2258
+
2259
+ const warnLog = logs.find(
2260
+ (l) => l.level === "warn" && l.message.includes("outside declared FILE_SCOPE"),
2261
+ );
2262
+ expect(warnLog).toBeUndefined();
2263
+ });
2264
+
2265
+ test("scout capability skips scope detection", async () => {
2266
+ seedSession(ctx.sessionsDbPath, {
2267
+ agentName: "scout-x",
2268
+ capability: "scout",
2269
+ state: "working",
2270
+ });
2271
+ await writeOverlayWithScope(["src/agents/in-scope.ts"]);
2272
+
2273
+ const { runtime } = makeSpyRuntime();
2274
+ const fake = makeFakeProc();
2275
+ const spawnFn: TurnSpawnFn = () => {
2276
+ (async () => {
2277
+ await Bun.sleep(20);
2278
+ const s = createMailStore(ctx.mailDbPath);
2279
+ try {
2280
+ createMailClient(s).send({
2281
+ from: "scout-x",
2282
+ to: "lead",
2283
+ subject: "Done",
2284
+ body: "ok",
2285
+ type: "result",
2286
+ priority: "normal",
2287
+ });
2288
+ } finally {
2289
+ s.close();
2290
+ }
2291
+ emitFakeTurn(fake, { sessionId: "scout-x-session" });
2292
+ fake._exit(0);
2293
+ })();
2294
+ return fake;
2295
+ };
2296
+
2297
+ let detectCalled = false;
2298
+ await runTurn({
2299
+ ...makeRunOpts(ctx, "scout-x", {
2300
+ runtime,
2301
+ _spawnFn: spawnFn,
2302
+ capability: "scout",
2303
+ }),
2304
+ _scopeDetect: () => {
2305
+ detectCalled = true;
2306
+ return { violations: [], expansionReasons: [] };
2307
+ },
2308
+ });
2309
+
2310
+ expect(detectCalled).toBe(false);
2311
+ });
1450
2312
  });