@os-eco/overstory-cli 0.10.3 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/README.md +4 -2
  2. package/agents/builder.md +10 -1
  3. package/agents/lead.md +106 -5
  4. package/package.json +1 -1
  5. package/src/agents/headless-mail-injector.ts +8 -0
  6. package/src/agents/mail-poll-detect.test.ts +153 -0
  7. package/src/agents/mail-poll-detect.ts +73 -0
  8. package/src/agents/overlay.test.ts +56 -0
  9. package/src/agents/overlay.ts +33 -0
  10. package/src/agents/scope-detect.test.ts +190 -0
  11. package/src/agents/scope-detect.ts +146 -0
  12. package/src/agents/turn-runner.test.ts +862 -0
  13. package/src/agents/turn-runner.ts +225 -8
  14. package/src/commands/agents.ts +9 -0
  15. package/src/commands/coordinator.test.ts +127 -0
  16. package/src/commands/coordinator.ts +71 -4
  17. package/src/commands/dashboard.ts +1 -1
  18. package/src/commands/log.test.ts +131 -0
  19. package/src/commands/log.ts +37 -2
  20. package/src/commands/merge.test.ts +118 -0
  21. package/src/commands/merge.ts +51 -8
  22. package/src/commands/sling.test.ts +104 -0
  23. package/src/commands/sling.ts +95 -8
  24. package/src/commands/stop.test.ts +81 -0
  25. package/src/index.ts +5 -1
  26. package/src/insights/quality-gates.test.ts +141 -0
  27. package/src/insights/quality-gates.ts +156 -0
  28. package/src/logging/theme.ts +4 -0
  29. package/src/merge/predict.test.ts +387 -0
  30. package/src/merge/predict.ts +249 -0
  31. package/src/merge/resolver.ts +1 -1
  32. package/src/mulch/client.ts +3 -3
  33. package/src/sessions/store.test.ts +267 -5
  34. package/src/sessions/store.ts +105 -7
  35. package/src/types.ts +51 -1
  36. package/src/watchdog/daemon.test.ts +124 -2
  37. package/src/watchdog/daemon.ts +27 -12
  38. package/src/watchdog/health.test.ts +133 -8
  39. package/src/watchdog/health.ts +37 -5
  40. package/src/worktree/manager.test.ts +218 -1
  41. package/src/worktree/manager.ts +55 -0
  42. package/src/worktree/tmux.test.ts +25 -0
  43. package/src/worktree/tmux.ts +17 -0
  44. package/templates/overlay.md.tmpl +2 -0
@@ -23,6 +23,7 @@ import { Database } from "bun:sqlite";
23
23
  import { appendFileSync, existsSync } from "node:fs";
24
24
  import { mkdir, unlink } from "node:fs/promises";
25
25
  import { join } from "node:path";
26
+ import { extractFileScope } from "../commands/agents.ts";
26
27
  import { AgentError } from "../errors.ts";
27
28
  import { createEventStore } from "../events/store.ts";
28
29
  import { filterToolArgs } from "../events/tool-filter.ts";
@@ -37,6 +38,13 @@ import type {
37
38
  WorkerDiedPayload,
38
39
  } from "../types.ts";
39
40
  import { terminalMailTypesFor } from "./capabilities.ts";
41
+ import { detectMailPollPattern } from "./mail-poll-detect.ts";
42
+ import {
43
+ type DetectScopeViolationOpts,
44
+ detectScopeViolation as defaultDetectScopeViolation,
45
+ IMPLEMENTATION_CAPABILITIES,
46
+ type ScopeViolationResult,
47
+ } from "./scope-detect.ts";
40
48
  import { acquireTurnLock } from "./turn-lock.ts";
41
49
 
42
50
  /** Subprocess shape required by `runTurn`. Compatible with `Bun.spawn`. */
@@ -125,6 +133,32 @@ export interface RunTurnOpts {
125
133
  * Set to `0` to disable (test injection / explicit opt-out only).
126
134
  */
127
135
  eventStallTimeoutMs?: number;
136
+ /**
137
+ * Throttle (ms) for refreshing `session.lastActivity` while events stream
138
+ * from the parser loop. Default `2000` (every 2s). The watchdog at
139
+ * `src/watchdog/health.ts:242-243` documents its design as: "the
140
+ * turn-runner updates [lastActivity] on every parser event during a turn,
141
+ * and the watchdog refreshes it from events.db between turns" — so the
142
+ * runner must drive lastActivity itself or a long turn looks stalled and
143
+ * gets zombified mid-flight (overstory-8e61).
144
+ *
145
+ * Set to `0` to refresh on every event (test injection / explicit opt-out).
146
+ */
147
+ lastActivityRefreshIntervalMs?: number;
148
+ /**
149
+ * Test injection: invoked each time the parser loop fires a mid-turn
150
+ * `lastActivity` refresh (after the throttle gate, before/after the
151
+ * SessionStore write). Used by tests to count refresh attempts directly
152
+ * rather than inferring from observable timestamps (overstory-8e61).
153
+ */
154
+ _onLastActivityRefresh?: () => void;
155
+ /**
156
+ * Test injection: replaces the real `detectScopeViolation` from
157
+ * `scope-detect.ts`. Tests pass a stubbed runner via the wrapper so they
158
+ * can drive the scope-violation observability path without spawning git
159
+ * (overstory-9f4d). Defaults to the real implementation.
160
+ */
161
+ _scopeDetect?: (opts: DetectScopeViolationOpts) => ScopeViolationResult;
128
162
  }
129
163
 
130
164
  export interface TurnResult {
@@ -288,6 +322,38 @@ function checkTerminalMailSince(
288
322
  }
289
323
  }
290
324
 
325
+ /**
326
+ * Check whether the agent has previously sent a `scope_expansion`-prefixed
327
+ * status mail (overstory-9f4d). When such a mail exists, the runner suppresses
328
+ * the soft scope-violation warning — the lead has already been informed.
329
+ *
330
+ * Soft signal — every failure (DB unavailable, missing table, etc.) returns
331
+ * false so observability never breaks the runner.
332
+ */
333
+ function hasScopeExpansionMail(mailDbPath: string, agentName: string): boolean {
334
+ let db: Database;
335
+ try {
336
+ db = new Database(mailDbPath);
337
+ } catch {
338
+ return false;
339
+ }
340
+ try {
341
+ db.exec("PRAGMA busy_timeout = 5000");
342
+ const stmt = db.prepare<{ c: number }, { $a: string }>(
343
+ "SELECT 1 AS c FROM messages WHERE from_agent = $a AND subject LIKE 'scope_expansion%' LIMIT 1",
344
+ );
345
+ return stmt.get({ $a: agentName }) !== null;
346
+ } catch {
347
+ return false;
348
+ } finally {
349
+ try {
350
+ db.close();
351
+ } catch {
352
+ // best-effort
353
+ }
354
+ }
355
+ }
356
+
291
357
  /**
292
358
  * Latest `created_at` timestamp of a terminal mail (`worker_done`/`result` for
293
359
  * task-scoped workers; `merged`/`merge_failed` for merger) sent by `agentName`.
@@ -850,7 +916,12 @@ export async function runTurn(opts: RunTurnOpts): Promise<TurnResult> {
850
916
  let newSessionId: string | null = null;
851
917
  let cleanResult = false;
852
918
  let observedAnyEvent = false;
853
- let bootedToWorking = false;
919
+ // True iff this turn fired the "first parser event" transition into
920
+ // `in_turn`. Replaces the legacy `bootedToWorking` flag; the trigger
921
+ // now fires from booting OR between_turns OR working (legacy migration)
922
+ // so a resumed spawn-per-turn agent flips back to `in_turn` at the
923
+ // start of every batch (overstory-3087).
924
+ let transitionedToInTurn = false;
854
925
 
855
926
  // Stall watchdog (overstory-ddb3): if no parser event arrives for
856
927
  // `eventStallTimeoutMs`, abort the turn via SIGTERM/SIGKILL. Otherwise a
@@ -945,21 +1016,52 @@ export async function runTurn(opts: RunTurnOpts): Promise<TurnResult> {
945
1016
  },
946
1017
  });
947
1018
 
1019
+ // Mid-turn `lastActivity` refresh (overstory-8e61). The watchdog at
1020
+ // `src/watchdog/health.ts:242-243` documents that the runner advances
1021
+ // lastActivity per parser event; without this the row stayed at
1022
+ // `startedAt` for the whole turn and long turns got zombified live.
1023
+ const lastActivityRefreshIntervalMs = opts.lastActivityRefreshIntervalMs ?? 2000;
1024
+ let lastActivityRefreshMs = 0; // first event always refreshes
1025
+
948
1026
  for await (const event of parser) {
949
1027
  armStallTimer();
950
1028
  observedAnyEvent = true;
951
1029
 
952
- if (!bootedToWorking && initialState === "booting") {
953
- bootedToWorking = true;
1030
+ // Keep `session.lastActivity` advancing while events flow so the
1031
+ // watchdog does not zombify a live agent mid-turn — see
1032
+ // `src/watchdog/health.ts:242-243` and overstory-8e61.
1033
+ const nowMs = now().getTime();
1034
+ if (nowMs - lastActivityRefreshMs >= lastActivityRefreshIntervalMs) {
1035
+ lastActivityRefreshMs = nowMs;
1036
+ updateSessionLastActivity(sessionsDbPath, agentName, (err) =>
1037
+ runnerLog("warn", "failed to refresh lastActivity mid-turn", err),
1038
+ );
1039
+ opts._onLastActivityRefresh?.();
1040
+ }
1041
+
1042
+ // First parser event of a turn → settle into `in_turn`. Allowed
1043
+ // predecessors are `booting` (initial dispatch), `between_turns`
1044
+ // (next mail batch on a healthy worker), or already-`in_turn`
1045
+ // (idempotent — covers the case where a prior turn somehow left
1046
+ // the row at in_turn). Legacy `working` rows are intentionally
1047
+ // not in the matrix predecessor set (overstory-3087): spawn-
1048
+ // per-turn workers should not flow through `working`, so the
1049
+ // matrix keeps the substate path disjoint and a stale `working`
1050
+ // row is left alone rather than silently coerced.
1051
+ if (
1052
+ !transitionedToInTurn &&
1053
+ (initialState === "booting" || initialState === "between_turns")
1054
+ ) {
1055
+ transitionedToInTurn = true;
954
1056
  updateSessionState(
955
1057
  sessionsDbPath,
956
1058
  agentName,
957
- "working",
958
- (err) => runnerLog("warn", "failed to transition bootingworking", err),
1059
+ "in_turn",
1060
+ (err) => runnerLog("warn", `failed to transition ${initialState}in_turn`, err),
959
1061
  (prev, attempted) =>
960
1062
  runnerLog(
961
1063
  "warn",
962
- `bootingworking rejected: state is now ${prev} (attempted ${attempted})`,
1064
+ `${initialState}in_turn rejected: state is now ${prev} (attempted ${attempted})`,
963
1065
  ),
964
1066
  );
965
1067
  }
@@ -968,6 +1070,51 @@ export async function runTurn(opts: RunTurnOpts): Promise<TurnResult> {
968
1070
  cleanResult = event.isError !== true;
969
1071
  }
970
1072
 
1073
+ // Defense-in-depth (overstory-c92c): detect Bash mail-poll patterns
1074
+ // the lead.md prompt forbids (overstory-fa84). Warn-only — emit a
1075
+ // custom event before the original tool_use so observability tools
1076
+ // see the warning ahead of the offending call. Wrapped in try/catch
1077
+ // so detection failure cannot break the turn.
1078
+ if (event.type === "tool_use" && event.name === "Bash") {
1079
+ try {
1080
+ const input =
1081
+ typeof event.input === "object" && event.input !== null
1082
+ ? (event.input as Record<string, unknown>)
1083
+ : null;
1084
+ const command = input?.command;
1085
+ const detection = detectMailPollPattern(command);
1086
+ if (detection.matched) {
1087
+ const cmdStr = typeof command === "string" ? command : "";
1088
+ const truncated = cmdStr.length > 200 ? `${cmdStr.slice(0, 200)}…` : cmdStr;
1089
+ runnerLog(
1090
+ "warn",
1091
+ `detected mail-poll pattern in Bash command (${detection.reason}): ${truncated}`,
1092
+ );
1093
+ try {
1094
+ eventStore.insert({
1095
+ runId,
1096
+ agentName,
1097
+ sessionId: newSessionId,
1098
+ eventType: "custom",
1099
+ toolName: null,
1100
+ toolArgs: null,
1101
+ toolDurationMs: null,
1102
+ level: "warn",
1103
+ data: JSON.stringify({
1104
+ type: "mail_poll_detected",
1105
+ reason: detection.reason,
1106
+ command: cmdStr,
1107
+ }),
1108
+ });
1109
+ } catch (insertErr) {
1110
+ runnerLog("warn", "failed to insert mail_poll_detected event", insertErr);
1111
+ }
1112
+ }
1113
+ } catch (detectErr) {
1114
+ runnerLog("warn", "mail-poll detector threw", detectErr);
1115
+ }
1116
+ }
1117
+
971
1118
  try {
972
1119
  recordAgentEvent(eventStore, agentName, runId, newSessionId, event);
973
1120
  } catch {
@@ -1032,6 +1179,70 @@ export async function runTurn(opts: RunTurnOpts): Promise<TurnResult> {
1032
1179
  snapshotTs,
1033
1180
  );
1034
1181
 
1182
+ // Soft scope-violation observability (overstory-9f4d). Builders sometimes
1183
+ // expand beyond their declared FILE_SCOPE; the lead needs a way to spot it
1184
+ // during merge verification. Surface a warn-level event into events.db
1185
+ // when the worker's modified files exceed FILE_SCOPE without an
1186
+ // `expansion_reason:` justification (commit body OR prior scope_expansion
1187
+ // mail). This is advisory — never aborts the turn, never blocks the
1188
+ // completed transition. All errors are swallowed.
1189
+ //
1190
+ // TODO: baseRef is hard-coded to "main"; a future improvement could
1191
+ // resolve the actual session-branch.txt for projects whose canonical
1192
+ // branch differs.
1193
+ if (terminalMailObserved && IMPLEMENTATION_CAPABILITIES.has(capability)) {
1194
+ try {
1195
+ const fileScope = await extractFileScope(worktreePath, runtime.instructionPath);
1196
+ if (fileScope.length > 0) {
1197
+ const detectFn = opts._scopeDetect ?? defaultDetectScopeViolation;
1198
+ const { violations, expansionReasons } = detectFn({
1199
+ worktreePath,
1200
+ baseRef: "main",
1201
+ fileScope,
1202
+ });
1203
+ if (violations.length > 0 && expansionReasons.length === 0) {
1204
+ const justified = hasScopeExpansionMail(mailDbPath, agentName);
1205
+ if (!justified) {
1206
+ runnerLog(
1207
+ "warn",
1208
+ `agent modified ${violations.length} file(s) outside declared FILE_SCOPE without justification: ${violations.join(", ")}. To suppress, include 'expansion_reason: <why>' in your last commit message OR send a scope_expansion mail to your lead.`,
1209
+ );
1210
+ try {
1211
+ const evStore = createEventStore(eventsDbPath);
1212
+ try {
1213
+ evStore.insert({
1214
+ runId,
1215
+ agentName,
1216
+ sessionId: newSessionId,
1217
+ eventType: "custom",
1218
+ toolName: null,
1219
+ toolArgs: null,
1220
+ toolDurationMs: null,
1221
+ level: "warn",
1222
+ data: JSON.stringify({
1223
+ type: "scope_violation",
1224
+ violations,
1225
+ fileScope,
1226
+ }),
1227
+ });
1228
+ } finally {
1229
+ try {
1230
+ evStore.close();
1231
+ } catch {
1232
+ // best-effort
1233
+ }
1234
+ }
1235
+ } catch {
1236
+ // observability must never break the runner
1237
+ }
1238
+ }
1239
+ }
1240
+ }
1241
+ } catch {
1242
+ // scope detection is advisory — swallow all errors
1243
+ }
1244
+ }
1245
+
1035
1246
  const resumeMismatch =
1036
1247
  priorSessionId !== null && newSessionId !== null && newSessionId !== priorSessionId;
1037
1248
 
@@ -1057,8 +1268,14 @@ export async function runTurn(opts: RunTurnOpts): Promise<TurnResult> {
1057
1268
  finalState = "completed";
1058
1269
  } else if (terminalMailMissing) {
1059
1270
  finalState = "completed";
1060
- } else if (observedAnyEvent || bootedToWorking) {
1061
- finalState = "working";
1271
+ } else if (observedAnyEvent || transitionedToInTurn) {
1272
+ // Turn produced events but did not complete — settle to
1273
+ // `between_turns`, NOT `working`, so the UI can distinguish a
1274
+ // spawn-per-turn worker waiting for its next mail batch from one
1275
+ // mid-execution. The watchdog will flip the row back to `in_turn`
1276
+ // on the next batch when the parser fires its first event
1277
+ // (overstory-3087).
1278
+ finalState = "between_turns";
1062
1279
  } else {
1063
1280
  finalState = initialState;
1064
1281
  }
@@ -166,11 +166,20 @@ export async function discoverAgents(
166
166
 
167
167
  /**
168
168
  * Format the state icon for display.
169
+ *
170
+ * `in_turn` and `between_turns` (overstory-3087) render with the same cyan
171
+ * accent as `working` so a spawn-per-turn worker is visually grouped with
172
+ * other healthy/active agents in `ov agents` output. They use distinct
173
+ * glyphs ('>' vs '~') to mirror the dashboard / theme.ts mapping.
169
174
  */
170
175
  function getStateIcon(state: string): string {
171
176
  switch (state) {
172
177
  case "working":
173
178
  return color.cyan(">");
179
+ case "in_turn":
180
+ return color.cyan(">");
181
+ case "between_turns":
182
+ return color.cyan("~");
174
183
  case "booting":
175
184
  return color.green("-");
176
185
  case "stalled":
@@ -1601,6 +1601,133 @@ describe("watchdog integration", () => {
1601
1601
  expect(output).toContain("--watchdog");
1602
1602
  expect(output).toContain("watchdog");
1603
1603
  });
1604
+
1605
+ test("start help text includes --accept-existing-watchdog flag", async () => {
1606
+ const cmd = createCoordinatorCommand({});
1607
+ for (const sub of cmd.commands) {
1608
+ sub.exitOverride();
1609
+ }
1610
+ const output = await captureStdout(async () => {
1611
+ await cmd.parseAsync(["start", "--help"], { from: "user" }).catch(() => {});
1612
+ });
1613
+ expect(output).toContain("--accept-existing-watchdog");
1614
+ });
1615
+ });
1616
+
1617
+ // overstory-3f0c: detect leftover watchdog from a previous session before
1618
+ // spawning, so operators do not get unexpected watchdog supervision.
1619
+ describe("orphan watchdog detection (overstory-3f0c)", () => {
1620
+ // (a) start (no --watchdog) + isRunning=true -> throws AgentError with PID
1621
+ // and mention of --accept-existing-watchdog in the message
1622
+ test("rejects start with AgentError when no flag passed and watchdog already running", async () => {
1623
+ const { deps, watchdogCalls } = makeDeps({}, { running: true, startSuccess: true });
1624
+ const originalSleep = Bun.sleep;
1625
+ Bun.sleep = (() => Promise.resolve()) as typeof Bun.sleep;
1626
+
1627
+ try {
1628
+ await coordinatorCommand(["start", "--json"], deps);
1629
+ expect.unreachable("should have thrown AgentError");
1630
+ } catch (err) {
1631
+ expect(err).toBeInstanceOf(AgentError);
1632
+ const ae = err as AgentError;
1633
+ expect(ae.message).toContain("Watchdog daemon");
1634
+ // PID is unavailable from the fake watchdog (no PID file written),
1635
+ // so the message reports "unknown PID" — but it must reference the
1636
+ // concept and the suppress flag explicitly.
1637
+ expect(ae.message).toMatch(/PID/);
1638
+ expect(ae.message).toContain("--accept-existing-watchdog");
1639
+ expect(ae.message).toContain("--watchdog");
1640
+ expect(ae.message).toContain("ov watch --kill-others");
1641
+ } finally {
1642
+ Bun.sleep = originalSleep;
1643
+ }
1644
+
1645
+ // Detection ran but auto-start did NOT — the throw fired first.
1646
+ expect(watchdogCalls?.isRunning).toBeGreaterThanOrEqual(1);
1647
+ expect(watchdogCalls?.start).toBe(0);
1648
+ });
1649
+
1650
+ // (b) start --watchdog + isRunning=true -> does NOT throw;
1651
+ // watchdog.start() is still called once
1652
+ test("--watchdog with already-running daemon does NOT throw and still calls start()", async () => {
1653
+ const { deps, watchdogCalls } = makeDeps(
1654
+ {},
1655
+ { running: true, startSuccess: false }, // startSuccess:false simulates the no-op-when-already-running return
1656
+ );
1657
+ const originalSleep = Bun.sleep;
1658
+ Bun.sleep = (() => Promise.resolve()) as typeof Bun.sleep;
1659
+
1660
+ let output: string;
1661
+ try {
1662
+ output = await captureStdout(() =>
1663
+ coordinatorCommand(["start", "--watchdog", "--json"], deps),
1664
+ );
1665
+ } finally {
1666
+ Bun.sleep = originalSleep;
1667
+ }
1668
+
1669
+ expect(watchdogCalls?.start).toBe(1);
1670
+ const parsed = JSON.parse(output) as Record<string, unknown>;
1671
+ // reused-daemon sentinel keeps watchdog truthy in the JSON output
1672
+ expect(parsed.watchdog).toBe(true);
1673
+ expect(parsed.watchdogPreexisting).toBe(true);
1674
+ });
1675
+
1676
+ // (c) start --accept-existing-watchdog + isRunning=true -> does NOT throw;
1677
+ // coordinator starts normally; watchdog.start() is NOT called
1678
+ test("--accept-existing-watchdog allows start without calling watchdog.start()", async () => {
1679
+ const { deps, watchdogCalls } = makeDeps({}, { running: true, startSuccess: true });
1680
+ const originalSleep = Bun.sleep;
1681
+ Bun.sleep = (() => Promise.resolve()) as typeof Bun.sleep;
1682
+
1683
+ let output: string;
1684
+ try {
1685
+ output = await captureStdout(() =>
1686
+ coordinatorCommand(["start", "--accept-existing-watchdog", "--json"], deps),
1687
+ );
1688
+ } finally {
1689
+ Bun.sleep = originalSleep;
1690
+ }
1691
+
1692
+ expect(watchdogCalls?.start).toBe(0);
1693
+ const parsed = JSON.parse(output) as Record<string, unknown>;
1694
+ expect(parsed.watchdog).toBe(true);
1695
+ expect(parsed.watchdogPreexisting).toBe(true);
1696
+ });
1697
+
1698
+ // (d) start (no --watchdog) + isRunning=false -> no error, no start
1699
+ // (regression — preserves the original "no flag, no daemon activity" path)
1700
+ test("no flag + watchdog not running: starts normally without calling start()", async () => {
1701
+ const { deps, watchdogCalls } = makeDeps({}, { running: false, startSuccess: true });
1702
+ const originalSleep = Bun.sleep;
1703
+ Bun.sleep = (() => Promise.resolve()) as typeof Bun.sleep;
1704
+
1705
+ let output: string;
1706
+ try {
1707
+ output = await captureStdout(() => coordinatorCommand(["start", "--json"], deps));
1708
+ } finally {
1709
+ Bun.sleep = originalSleep;
1710
+ }
1711
+
1712
+ expect(watchdogCalls?.start).toBe(0);
1713
+ const parsed = JSON.parse(output) as Record<string, unknown>;
1714
+ expect(parsed.watchdog).toBe(false);
1715
+ expect(parsed.watchdogPreexisting).toBe(false);
1716
+ });
1717
+
1718
+ test("orchestrator inherits the same orphan-watchdog detection", async () => {
1719
+ const { deps, watchdogCalls } = makeDeps({}, { running: true });
1720
+ const originalSleep = Bun.sleep;
1721
+ Bun.sleep = (() => Promise.resolve()) as typeof Bun.sleep;
1722
+
1723
+ try {
1724
+ await expect(orchestratorCommand(["start", "--json"], deps)).rejects.toThrow(AgentError);
1725
+ } finally {
1726
+ Bun.sleep = originalSleep;
1727
+ }
1728
+
1729
+ expect(watchdogCalls?.start).toBe(0);
1730
+ });
1604
1731
  });
1605
1732
  });
1606
1733
 
@@ -351,6 +351,14 @@ export interface CoordinatorSessionOptions {
351
351
  * the web UI's POST /api/coordinator/start endpoint.
352
352
  */
353
353
  headless?: boolean;
354
+ /**
355
+ * Acknowledge that a watchdog daemon from a previous session may already be
356
+ * running and should be allowed to supervise this coordinator. Without this
357
+ * (or `--watchdog`), the start command refuses to spawn when a leftover
358
+ * daemon is detected, to surface the "watchdog persists across runs" trap
359
+ * that overstory-3f0c was filed for.
360
+ */
361
+ acceptExistingWatchdog?: boolean;
354
362
  }
355
363
 
356
364
  /**
@@ -385,6 +393,7 @@ export async function startCoordinatorSession(
385
393
  displayName: displayNameOpt,
386
394
  beaconBuilder: beaconBuilderOpt,
387
395
  headless: headlessFlag,
396
+ acceptExistingWatchdog: acceptExistingWatchdogFlag,
388
397
  } = opts;
389
398
 
390
399
  const coordinatorName = agentNameOpt ?? coordinatorNameOpt ?? COORDINATOR_NAME;
@@ -406,6 +415,25 @@ export async function startCoordinatorSession(
406
415
  const monitor = deps._monitor ?? createDefaultMonitor(projectRoot);
407
416
  const tmuxSession = coordinatorTmuxSession(config.project.name, coordinatorName);
408
417
 
418
+ // Detect leftover watchdog daemon from a previous session (overstory-3f0c).
419
+ // If a watchdog is already running and the operator did not pass --watchdog
420
+ // or --accept-existing-watchdog, refuse to start: a persistent daemon will
421
+ // supervise this coordinator with policy decided by the original invocation,
422
+ // not the current one. This prevents "I didn't run --watchdog, why is the
423
+ // watchdog killing things?" surprises.
424
+ const watchdogAlreadyRunning = await watchdog.isRunning();
425
+ if (watchdogAlreadyRunning && !watchdogFlag && !acceptExistingWatchdogFlag) {
426
+ const existingPid = await readWatchdogPid(projectRoot);
427
+ const pidLabel = existingPid !== null ? `PID ${existingPid}` : "unknown PID";
428
+ throw new AgentError(
429
+ `Watchdog daemon (${pidLabel}) is already running from a previous session. ` +
430
+ `It will supervise this ${displayName.toLowerCase()} run and may take escalation actions you did not opt into. ` +
431
+ `To proceed: pass --watchdog to acknowledge, pass --accept-existing-watchdog to suppress this check, ` +
432
+ `or run 'ov watch --kill-others' (or remove .overstory/watchdog.pid) first.`,
433
+ { agentName: coordinatorName },
434
+ );
435
+ }
436
+
409
437
  // Check for existing coordinator session with the same name
410
438
  const overstoryDir = join(projectRoot, ".overstory");
411
439
  const { store } = openSessionStore(overstoryDir);
@@ -589,9 +617,21 @@ export async function startCoordinatorSession(
589
617
  if (watchdogResult) {
590
618
  watchdogPid = watchdogResult.pid;
591
619
  if (!json) printHint("Watchdog started");
620
+ } else if (watchdogAlreadyRunning) {
621
+ // createDefaultWatchdog.start() returns null when an existing PID
622
+ // is alive — that's a no-op success, not a failure. Reuse the
623
+ // existing daemon. Sentinel value keeps `watchdogPid !== undefined`
624
+ // truthy in the JSON output.
625
+ watchdogPid = -1;
626
+ if (!json) printHint("Watchdog already running, reusing existing daemon");
592
627
  } else {
593
628
  if (!json) printWarning("Watchdog failed to start");
594
629
  }
630
+ } else if (watchdogAlreadyRunning && acceptExistingWatchdogFlag) {
631
+ // --accept-existing-watchdog without --watchdog: surface that an
632
+ // existing daemon is supervising this run, but do not call start().
633
+ watchdogPid = -1;
634
+ if (!json) printHint("Watchdog already running, reusing existing daemon");
595
635
  }
596
636
  let monitorPid: number | undefined;
597
637
  if (monitorFlag) {
@@ -615,7 +655,8 @@ export async function startCoordinatorSession(
615
655
  projectRoot,
616
656
  pid: headlessProc.pid,
617
657
  headless: true,
618
- watchdog: watchdogFlag ? watchdogPid !== undefined : false,
658
+ watchdog: watchdogPid !== undefined,
659
+ watchdogPreexisting: watchdogAlreadyRunning,
619
660
  monitor: monitorFlag ? monitorPid !== undefined : false,
620
661
  };
621
662
 
@@ -755,16 +796,28 @@ export async function startCoordinatorSession(
755
796
  await tmux.sendKeys(tmuxSession, "");
756
797
  }
757
798
 
758
- // Auto-start watchdog if --watchdog flag is present
799
+ // Auto-start watchdog if --watchdog flag is present.
759
800
  let watchdogPid: number | undefined;
760
801
  if (watchdogFlag) {
761
802
  const watchdogResult = await watchdog.start();
762
803
  if (watchdogResult) {
763
804
  watchdogPid = watchdogResult.pid;
764
805
  if (!json) printHint("Watchdog started");
806
+ } else if (watchdogAlreadyRunning) {
807
+ // createDefaultWatchdog.start() returns null when an existing PID
808
+ // is alive — that's a no-op success, not a failure. Reuse the
809
+ // existing daemon. Sentinel value keeps `watchdogPid !== undefined`
810
+ // truthy in the JSON output.
811
+ watchdogPid = -1;
812
+ if (!json) printHint("Watchdog already running, reusing existing daemon");
765
813
  } else {
766
814
  if (!json) printWarning("Watchdog failed to start");
767
815
  }
816
+ } else if (watchdogAlreadyRunning && acceptExistingWatchdogFlag) {
817
+ // --accept-existing-watchdog without --watchdog: surface that an
818
+ // existing daemon is supervising this run, but do not call start().
819
+ watchdogPid = -1;
820
+ if (!json) printHint("Watchdog already running, reusing existing daemon");
768
821
  }
769
822
 
770
823
  // Auto-start monitor if --monitor flag is present and tier2 is enabled
@@ -789,7 +842,8 @@ export async function startCoordinatorSession(
789
842
  tmuxSession,
790
843
  projectRoot,
791
844
  pid,
792
- watchdog: watchdogFlag ? watchdogPid !== undefined : false,
845
+ watchdog: watchdogPid !== undefined,
846
+ watchdogPreexisting: watchdogAlreadyRunning,
793
847
  monitor: monitorFlag ? monitorPid !== undefined : false,
794
848
  };
795
849
 
@@ -815,7 +869,14 @@ export async function startCoordinatorSession(
815
869
 
816
870
  async function startPersistentAgent(
817
871
  spec: PersistentAgentSpec,
818
- opts: { json: boolean; attach: boolean; watchdog: boolean; monitor: boolean; profile?: string },
872
+ opts: {
873
+ json: boolean;
874
+ attach: boolean;
875
+ watchdog: boolean;
876
+ monitor: boolean;
877
+ profile?: string;
878
+ acceptExistingWatchdog?: boolean;
879
+ },
819
880
  deps: CoordinatorDeps = {},
820
881
  ): Promise<void> {
821
882
  await startCoordinatorSession(
@@ -1557,6 +1618,10 @@ export function createPersistentAgentCommand(
1557
1618
  .option("--attach", "Always attach to tmux session after start")
1558
1619
  .option("--no-attach", "Never attach to tmux session after start")
1559
1620
  .option("--watchdog", `Auto-start watchdog daemon with ${spec.commandName}`)
1621
+ .option(
1622
+ "--accept-existing-watchdog",
1623
+ "Continue when a watchdog daemon from a previous session is already running (it will supervise this run)",
1624
+ )
1560
1625
  .option("--monitor", `Auto-start Tier 2 monitor agent with ${spec.commandName}`)
1561
1626
  .option("--profile <name>", "Canopy profile to apply to spawned agents")
1562
1627
  .option("--json", "Output as JSON")
@@ -1564,6 +1629,7 @@ export function createPersistentAgentCommand(
1564
1629
  async (opts: {
1565
1630
  attach?: boolean;
1566
1631
  watchdog?: boolean;
1632
+ acceptExistingWatchdog?: boolean;
1567
1633
  monitor?: boolean;
1568
1634
  json?: boolean;
1569
1635
  profile?: string;
@@ -1576,6 +1642,7 @@ export function createPersistentAgentCommand(
1576
1642
  json: opts.json ?? false,
1577
1643
  attach: shouldAttach,
1578
1644
  watchdog: opts.watchdog ?? false,
1645
+ acceptExistingWatchdog: opts.acceptExistingWatchdog ?? false,
1579
1646
  monitor: opts.monitor ?? false,
1580
1647
  profile: opts.profile,
1581
1648
  },
@@ -615,7 +615,7 @@ export function renderAgentPanel(
615
615
 
616
616
  // Sort agents: active first, then completed, then zombie
617
617
  const agents = [...data.status.agents].sort((a, b) => {
618
- const activeStates = ["working", "booting", "stalled"];
618
+ const activeStates = ["working", "in_turn", "between_turns", "booting", "stalled"];
619
619
  const aActive = activeStates.includes(a.state);
620
620
  const bActive = activeStates.includes(b.state);
621
621
  if (aActive && !bActive) return -1;