@os-eco/overstory-cli 0.10.3 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/agents/builder.md +10 -1
- package/agents/lead.md +106 -5
- package/package.json +1 -1
- package/src/agents/headless-mail-injector.ts +8 -0
- package/src/agents/mail-poll-detect.test.ts +153 -0
- package/src/agents/mail-poll-detect.ts +73 -0
- package/src/agents/overlay.test.ts +56 -0
- package/src/agents/overlay.ts +33 -0
- package/src/agents/scope-detect.test.ts +190 -0
- package/src/agents/scope-detect.ts +146 -0
- package/src/agents/turn-runner.test.ts +862 -0
- package/src/agents/turn-runner.ts +225 -8
- package/src/commands/agents.ts +9 -0
- package/src/commands/coordinator.test.ts +127 -0
- package/src/commands/coordinator.ts +71 -4
- package/src/commands/dashboard.ts +1 -1
- package/src/commands/log.test.ts +131 -0
- package/src/commands/log.ts +37 -2
- package/src/commands/merge.test.ts +118 -0
- package/src/commands/merge.ts +51 -8
- package/src/commands/sling.test.ts +104 -0
- package/src/commands/sling.ts +95 -8
- package/src/commands/stop.test.ts +81 -0
- package/src/index.ts +5 -1
- package/src/insights/quality-gates.test.ts +141 -0
- package/src/insights/quality-gates.ts +156 -0
- package/src/logging/theme.ts +4 -0
- package/src/merge/predict.test.ts +387 -0
- package/src/merge/predict.ts +249 -0
- package/src/merge/resolver.ts +1 -1
- package/src/mulch/client.ts +3 -3
- package/src/sessions/store.test.ts +267 -5
- package/src/sessions/store.ts +105 -7
- package/src/types.ts +51 -1
- package/src/watchdog/daemon.test.ts +124 -2
- package/src/watchdog/daemon.ts +27 -12
- package/src/watchdog/health.test.ts +133 -8
- package/src/watchdog/health.ts +37 -5
- package/src/worktree/manager.test.ts +218 -1
- package/src/worktree/manager.ts +55 -0
- package/src/worktree/tmux.test.ts +25 -0
- package/src/worktree/tmux.ts +17 -0
- package/templates/overlay.md.tmpl +2 -0
|
@@ -23,6 +23,7 @@ import { Database } from "bun:sqlite";
|
|
|
23
23
|
import { appendFileSync, existsSync } from "node:fs";
|
|
24
24
|
import { mkdir, unlink } from "node:fs/promises";
|
|
25
25
|
import { join } from "node:path";
|
|
26
|
+
import { extractFileScope } from "../commands/agents.ts";
|
|
26
27
|
import { AgentError } from "../errors.ts";
|
|
27
28
|
import { createEventStore } from "../events/store.ts";
|
|
28
29
|
import { filterToolArgs } from "../events/tool-filter.ts";
|
|
@@ -37,6 +38,13 @@ import type {
|
|
|
37
38
|
WorkerDiedPayload,
|
|
38
39
|
} from "../types.ts";
|
|
39
40
|
import { terminalMailTypesFor } from "./capabilities.ts";
|
|
41
|
+
import { detectMailPollPattern } from "./mail-poll-detect.ts";
|
|
42
|
+
import {
|
|
43
|
+
type DetectScopeViolationOpts,
|
|
44
|
+
detectScopeViolation as defaultDetectScopeViolation,
|
|
45
|
+
IMPLEMENTATION_CAPABILITIES,
|
|
46
|
+
type ScopeViolationResult,
|
|
47
|
+
} from "./scope-detect.ts";
|
|
40
48
|
import { acquireTurnLock } from "./turn-lock.ts";
|
|
41
49
|
|
|
42
50
|
/** Subprocess shape required by `runTurn`. Compatible with `Bun.spawn`. */
|
|
@@ -125,6 +133,32 @@ export interface RunTurnOpts {
|
|
|
125
133
|
* Set to `0` to disable (test injection / explicit opt-out only).
|
|
126
134
|
*/
|
|
127
135
|
eventStallTimeoutMs?: number;
|
|
136
|
+
/**
|
|
137
|
+
* Throttle (ms) for refreshing `session.lastActivity` while events stream
|
|
138
|
+
* from the parser loop. Default `2000` (every 2s). The watchdog at
|
|
139
|
+
* `src/watchdog/health.ts:242-243` documents its design as: "the
|
|
140
|
+
* turn-runner updates [lastActivity] on every parser event during a turn,
|
|
141
|
+
* and the watchdog refreshes it from events.db between turns" — so the
|
|
142
|
+
* runner must drive lastActivity itself or a long turn looks stalled and
|
|
143
|
+
* gets zombified mid-flight (overstory-8e61).
|
|
144
|
+
*
|
|
145
|
+
* Set to `0` to refresh on every event (test injection / explicit opt-out).
|
|
146
|
+
*/
|
|
147
|
+
lastActivityRefreshIntervalMs?: number;
|
|
148
|
+
/**
|
|
149
|
+
* Test injection: invoked each time the parser loop fires a mid-turn
|
|
150
|
+
* `lastActivity` refresh (after the throttle gate, before/after the
|
|
151
|
+
* SessionStore write). Used by tests to count refresh attempts directly
|
|
152
|
+
* rather than inferring from observable timestamps (overstory-8e61).
|
|
153
|
+
*/
|
|
154
|
+
_onLastActivityRefresh?: () => void;
|
|
155
|
+
/**
|
|
156
|
+
* Test injection: replaces the real `detectScopeViolation` from
|
|
157
|
+
* `scope-detect.ts`. Tests pass a stubbed runner via the wrapper so they
|
|
158
|
+
* can drive the scope-violation observability path without spawning git
|
|
159
|
+
* (overstory-9f4d). Defaults to the real implementation.
|
|
160
|
+
*/
|
|
161
|
+
_scopeDetect?: (opts: DetectScopeViolationOpts) => ScopeViolationResult;
|
|
128
162
|
}
|
|
129
163
|
|
|
130
164
|
export interface TurnResult {
|
|
@@ -288,6 +322,38 @@ function checkTerminalMailSince(
|
|
|
288
322
|
}
|
|
289
323
|
}
|
|
290
324
|
|
|
325
|
+
/**
|
|
326
|
+
* Check whether the agent has previously sent a `scope_expansion`-prefixed
|
|
327
|
+
* status mail (overstory-9f4d). When such a mail exists, the runner suppresses
|
|
328
|
+
* the soft scope-violation warning — the lead has already been informed.
|
|
329
|
+
*
|
|
330
|
+
* Soft signal — every failure (DB unavailable, missing table, etc.) returns
|
|
331
|
+
* false so observability never breaks the runner.
|
|
332
|
+
*/
|
|
333
|
+
function hasScopeExpansionMail(mailDbPath: string, agentName: string): boolean {
|
|
334
|
+
let db: Database;
|
|
335
|
+
try {
|
|
336
|
+
db = new Database(mailDbPath);
|
|
337
|
+
} catch {
|
|
338
|
+
return false;
|
|
339
|
+
}
|
|
340
|
+
try {
|
|
341
|
+
db.exec("PRAGMA busy_timeout = 5000");
|
|
342
|
+
const stmt = db.prepare<{ c: number }, { $a: string }>(
|
|
343
|
+
"SELECT 1 AS c FROM messages WHERE from_agent = $a AND subject LIKE 'scope_expansion%' LIMIT 1",
|
|
344
|
+
);
|
|
345
|
+
return stmt.get({ $a: agentName }) !== null;
|
|
346
|
+
} catch {
|
|
347
|
+
return false;
|
|
348
|
+
} finally {
|
|
349
|
+
try {
|
|
350
|
+
db.close();
|
|
351
|
+
} catch {
|
|
352
|
+
// best-effort
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
|
|
291
357
|
/**
|
|
292
358
|
* Latest `created_at` timestamp of a terminal mail (`worker_done`/`result` for
|
|
293
359
|
* task-scoped workers; `merged`/`merge_failed` for merger) sent by `agentName`.
|
|
@@ -850,7 +916,12 @@ export async function runTurn(opts: RunTurnOpts): Promise<TurnResult> {
|
|
|
850
916
|
let newSessionId: string | null = null;
|
|
851
917
|
let cleanResult = false;
|
|
852
918
|
let observedAnyEvent = false;
|
|
853
|
-
|
|
919
|
+
// True iff this turn fired the "first parser event" transition into
|
|
920
|
+
// `in_turn`. Replaces the legacy `bootedToWorking` flag; the trigger
|
|
921
|
+
// now fires from booting OR between_turns OR working (legacy migration)
|
|
922
|
+
// so a resumed spawn-per-turn agent flips back to `in_turn` at the
|
|
923
|
+
// start of every batch (overstory-3087).
|
|
924
|
+
let transitionedToInTurn = false;
|
|
854
925
|
|
|
855
926
|
// Stall watchdog (overstory-ddb3): if no parser event arrives for
|
|
856
927
|
// `eventStallTimeoutMs`, abort the turn via SIGTERM/SIGKILL. Otherwise a
|
|
@@ -945,21 +1016,52 @@ export async function runTurn(opts: RunTurnOpts): Promise<TurnResult> {
|
|
|
945
1016
|
},
|
|
946
1017
|
});
|
|
947
1018
|
|
|
1019
|
+
// Mid-turn `lastActivity` refresh (overstory-8e61). The watchdog at
|
|
1020
|
+
// `src/watchdog/health.ts:242-243` documents that the runner advances
|
|
1021
|
+
// lastActivity per parser event; without this the row stayed at
|
|
1022
|
+
// `startedAt` for the whole turn and long turns got zombified live.
|
|
1023
|
+
const lastActivityRefreshIntervalMs = opts.lastActivityRefreshIntervalMs ?? 2000;
|
|
1024
|
+
let lastActivityRefreshMs = 0; // first event always refreshes
|
|
1025
|
+
|
|
948
1026
|
for await (const event of parser) {
|
|
949
1027
|
armStallTimer();
|
|
950
1028
|
observedAnyEvent = true;
|
|
951
1029
|
|
|
952
|
-
|
|
953
|
-
|
|
1030
|
+
// Keep `session.lastActivity` advancing while events flow so the
|
|
1031
|
+
// watchdog does not zombify a live agent mid-turn — see
|
|
1032
|
+
// `src/watchdog/health.ts:242-243` and overstory-8e61.
|
|
1033
|
+
const nowMs = now().getTime();
|
|
1034
|
+
if (nowMs - lastActivityRefreshMs >= lastActivityRefreshIntervalMs) {
|
|
1035
|
+
lastActivityRefreshMs = nowMs;
|
|
1036
|
+
updateSessionLastActivity(sessionsDbPath, agentName, (err) =>
|
|
1037
|
+
runnerLog("warn", "failed to refresh lastActivity mid-turn", err),
|
|
1038
|
+
);
|
|
1039
|
+
opts._onLastActivityRefresh?.();
|
|
1040
|
+
}
|
|
1041
|
+
|
|
1042
|
+
// First parser event of a turn → settle into `in_turn`. Allowed
|
|
1043
|
+
// predecessors are `booting` (initial dispatch), `between_turns`
|
|
1044
|
+
// (next mail batch on a healthy worker), or already-`in_turn`
|
|
1045
|
+
// (idempotent — covers the case where a prior turn somehow left
|
|
1046
|
+
// the row at in_turn). Legacy `working` rows are intentionally
|
|
1047
|
+
// not in the matrix predecessor set (overstory-3087): spawn-
|
|
1048
|
+
// per-turn workers should not flow through `working`, so the
|
|
1049
|
+
// matrix keeps the substate path disjoint and a stale `working`
|
|
1050
|
+
// row is left alone rather than silently coerced.
|
|
1051
|
+
if (
|
|
1052
|
+
!transitionedToInTurn &&
|
|
1053
|
+
(initialState === "booting" || initialState === "between_turns")
|
|
1054
|
+
) {
|
|
1055
|
+
transitionedToInTurn = true;
|
|
954
1056
|
updateSessionState(
|
|
955
1057
|
sessionsDbPath,
|
|
956
1058
|
agentName,
|
|
957
|
-
"
|
|
958
|
-
(err) => runnerLog("warn",
|
|
1059
|
+
"in_turn",
|
|
1060
|
+
(err) => runnerLog("warn", `failed to transition ${initialState} → in_turn`, err),
|
|
959
1061
|
(prev, attempted) =>
|
|
960
1062
|
runnerLog(
|
|
961
1063
|
"warn",
|
|
962
|
-
|
|
1064
|
+
`${initialState} → in_turn rejected: state is now ${prev} (attempted ${attempted})`,
|
|
963
1065
|
),
|
|
964
1066
|
);
|
|
965
1067
|
}
|
|
@@ -968,6 +1070,51 @@ export async function runTurn(opts: RunTurnOpts): Promise<TurnResult> {
|
|
|
968
1070
|
cleanResult = event.isError !== true;
|
|
969
1071
|
}
|
|
970
1072
|
|
|
1073
|
+
// Defense-in-depth (overstory-c92c): detect Bash mail-poll patterns
|
|
1074
|
+
// the lead.md prompt forbids (overstory-fa84). Warn-only — emit a
|
|
1075
|
+
// custom event before the original tool_use so observability tools
|
|
1076
|
+
// see the warning ahead of the offending call. Wrapped in try/catch
|
|
1077
|
+
// so detection failure cannot break the turn.
|
|
1078
|
+
if (event.type === "tool_use" && event.name === "Bash") {
|
|
1079
|
+
try {
|
|
1080
|
+
const input =
|
|
1081
|
+
typeof event.input === "object" && event.input !== null
|
|
1082
|
+
? (event.input as Record<string, unknown>)
|
|
1083
|
+
: null;
|
|
1084
|
+
const command = input?.command;
|
|
1085
|
+
const detection = detectMailPollPattern(command);
|
|
1086
|
+
if (detection.matched) {
|
|
1087
|
+
const cmdStr = typeof command === "string" ? command : "";
|
|
1088
|
+
const truncated = cmdStr.length > 200 ? `${cmdStr.slice(0, 200)}…` : cmdStr;
|
|
1089
|
+
runnerLog(
|
|
1090
|
+
"warn",
|
|
1091
|
+
`detected mail-poll pattern in Bash command (${detection.reason}): ${truncated}`,
|
|
1092
|
+
);
|
|
1093
|
+
try {
|
|
1094
|
+
eventStore.insert({
|
|
1095
|
+
runId,
|
|
1096
|
+
agentName,
|
|
1097
|
+
sessionId: newSessionId,
|
|
1098
|
+
eventType: "custom",
|
|
1099
|
+
toolName: null,
|
|
1100
|
+
toolArgs: null,
|
|
1101
|
+
toolDurationMs: null,
|
|
1102
|
+
level: "warn",
|
|
1103
|
+
data: JSON.stringify({
|
|
1104
|
+
type: "mail_poll_detected",
|
|
1105
|
+
reason: detection.reason,
|
|
1106
|
+
command: cmdStr,
|
|
1107
|
+
}),
|
|
1108
|
+
});
|
|
1109
|
+
} catch (insertErr) {
|
|
1110
|
+
runnerLog("warn", "failed to insert mail_poll_detected event", insertErr);
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
} catch (detectErr) {
|
|
1114
|
+
runnerLog("warn", "mail-poll detector threw", detectErr);
|
|
1115
|
+
}
|
|
1116
|
+
}
|
|
1117
|
+
|
|
971
1118
|
try {
|
|
972
1119
|
recordAgentEvent(eventStore, agentName, runId, newSessionId, event);
|
|
973
1120
|
} catch {
|
|
@@ -1032,6 +1179,70 @@ export async function runTurn(opts: RunTurnOpts): Promise<TurnResult> {
|
|
|
1032
1179
|
snapshotTs,
|
|
1033
1180
|
);
|
|
1034
1181
|
|
|
1182
|
+
// Soft scope-violation observability (overstory-9f4d). Builders sometimes
|
|
1183
|
+
// expand beyond their declared FILE_SCOPE; the lead needs a way to spot it
|
|
1184
|
+
// during merge verification. Surface a warn-level event into events.db
|
|
1185
|
+
// when the worker's modified files exceed FILE_SCOPE without an
|
|
1186
|
+
// `expansion_reason:` justification (commit body OR prior scope_expansion
|
|
1187
|
+
// mail). This is advisory — never aborts the turn, never blocks the
|
|
1188
|
+
// completed transition. All errors are swallowed.
|
|
1189
|
+
//
|
|
1190
|
+
// TODO: baseRef is hard-coded to "main"; a future improvement could
|
|
1191
|
+
// resolve the actual session-branch.txt for projects whose canonical
|
|
1192
|
+
// branch differs.
|
|
1193
|
+
if (terminalMailObserved && IMPLEMENTATION_CAPABILITIES.has(capability)) {
|
|
1194
|
+
try {
|
|
1195
|
+
const fileScope = await extractFileScope(worktreePath, runtime.instructionPath);
|
|
1196
|
+
if (fileScope.length > 0) {
|
|
1197
|
+
const detectFn = opts._scopeDetect ?? defaultDetectScopeViolation;
|
|
1198
|
+
const { violations, expansionReasons } = detectFn({
|
|
1199
|
+
worktreePath,
|
|
1200
|
+
baseRef: "main",
|
|
1201
|
+
fileScope,
|
|
1202
|
+
});
|
|
1203
|
+
if (violations.length > 0 && expansionReasons.length === 0) {
|
|
1204
|
+
const justified = hasScopeExpansionMail(mailDbPath, agentName);
|
|
1205
|
+
if (!justified) {
|
|
1206
|
+
runnerLog(
|
|
1207
|
+
"warn",
|
|
1208
|
+
`agent modified ${violations.length} file(s) outside declared FILE_SCOPE without justification: ${violations.join(", ")}. To suppress, include 'expansion_reason: <why>' in your last commit message OR send a scope_expansion mail to your lead.`,
|
|
1209
|
+
);
|
|
1210
|
+
try {
|
|
1211
|
+
const evStore = createEventStore(eventsDbPath);
|
|
1212
|
+
try {
|
|
1213
|
+
evStore.insert({
|
|
1214
|
+
runId,
|
|
1215
|
+
agentName,
|
|
1216
|
+
sessionId: newSessionId,
|
|
1217
|
+
eventType: "custom",
|
|
1218
|
+
toolName: null,
|
|
1219
|
+
toolArgs: null,
|
|
1220
|
+
toolDurationMs: null,
|
|
1221
|
+
level: "warn",
|
|
1222
|
+
data: JSON.stringify({
|
|
1223
|
+
type: "scope_violation",
|
|
1224
|
+
violations,
|
|
1225
|
+
fileScope,
|
|
1226
|
+
}),
|
|
1227
|
+
});
|
|
1228
|
+
} finally {
|
|
1229
|
+
try {
|
|
1230
|
+
evStore.close();
|
|
1231
|
+
} catch {
|
|
1232
|
+
// best-effort
|
|
1233
|
+
}
|
|
1234
|
+
}
|
|
1235
|
+
} catch {
|
|
1236
|
+
// observability must never break the runner
|
|
1237
|
+
}
|
|
1238
|
+
}
|
|
1239
|
+
}
|
|
1240
|
+
}
|
|
1241
|
+
} catch {
|
|
1242
|
+
// scope detection is advisory — swallow all errors
|
|
1243
|
+
}
|
|
1244
|
+
}
|
|
1245
|
+
|
|
1035
1246
|
const resumeMismatch =
|
|
1036
1247
|
priorSessionId !== null && newSessionId !== null && newSessionId !== priorSessionId;
|
|
1037
1248
|
|
|
@@ -1057,8 +1268,14 @@ export async function runTurn(opts: RunTurnOpts): Promise<TurnResult> {
|
|
|
1057
1268
|
finalState = "completed";
|
|
1058
1269
|
} else if (terminalMailMissing) {
|
|
1059
1270
|
finalState = "completed";
|
|
1060
|
-
} else if (observedAnyEvent ||
|
|
1061
|
-
|
|
1271
|
+
} else if (observedAnyEvent || transitionedToInTurn) {
|
|
1272
|
+
// Turn produced events but did not complete — settle to
|
|
1273
|
+
// `between_turns`, NOT `working`, so the UI can distinguish a
|
|
1274
|
+
// spawn-per-turn worker waiting for its next mail batch from one
|
|
1275
|
+
// mid-execution. The watchdog will flip the row back to `in_turn`
|
|
1276
|
+
// on the next batch when the parser fires its first event
|
|
1277
|
+
// (overstory-3087).
|
|
1278
|
+
finalState = "between_turns";
|
|
1062
1279
|
} else {
|
|
1063
1280
|
finalState = initialState;
|
|
1064
1281
|
}
|
package/src/commands/agents.ts
CHANGED
|
@@ -166,11 +166,20 @@ export async function discoverAgents(
|
|
|
166
166
|
|
|
167
167
|
/**
|
|
168
168
|
* Format the state icon for display.
|
|
169
|
+
*
|
|
170
|
+
* `in_turn` and `between_turns` (overstory-3087) render with the same cyan
|
|
171
|
+
* accent as `working` so a spawn-per-turn worker is visually grouped with
|
|
172
|
+
* other healthy/active agents in `ov agents` output. They use distinct
|
|
173
|
+
* glyphs ('>' vs '~') to mirror the dashboard / theme.ts mapping.
|
|
169
174
|
*/
|
|
170
175
|
function getStateIcon(state: string): string {
|
|
171
176
|
switch (state) {
|
|
172
177
|
case "working":
|
|
173
178
|
return color.cyan(">");
|
|
179
|
+
case "in_turn":
|
|
180
|
+
return color.cyan(">");
|
|
181
|
+
case "between_turns":
|
|
182
|
+
return color.cyan("~");
|
|
174
183
|
case "booting":
|
|
175
184
|
return color.green("-");
|
|
176
185
|
case "stalled":
|
|
@@ -1601,6 +1601,133 @@ describe("watchdog integration", () => {
|
|
|
1601
1601
|
expect(output).toContain("--watchdog");
|
|
1602
1602
|
expect(output).toContain("watchdog");
|
|
1603
1603
|
});
|
|
1604
|
+
|
|
1605
|
+
test("start help text includes --accept-existing-watchdog flag", async () => {
|
|
1606
|
+
const cmd = createCoordinatorCommand({});
|
|
1607
|
+
for (const sub of cmd.commands) {
|
|
1608
|
+
sub.exitOverride();
|
|
1609
|
+
}
|
|
1610
|
+
const output = await captureStdout(async () => {
|
|
1611
|
+
await cmd.parseAsync(["start", "--help"], { from: "user" }).catch(() => {});
|
|
1612
|
+
});
|
|
1613
|
+
expect(output).toContain("--accept-existing-watchdog");
|
|
1614
|
+
});
|
|
1615
|
+
});
|
|
1616
|
+
|
|
1617
|
+
// overstory-3f0c: detect leftover watchdog from a previous session before
|
|
1618
|
+
// spawning, so operators do not get unexpected watchdog supervision.
|
|
1619
|
+
describe("orphan watchdog detection (overstory-3f0c)", () => {
|
|
1620
|
+
// (a) start (no --watchdog) + isRunning=true -> throws AgentError with PID
|
|
1621
|
+
// and mention of --accept-existing-watchdog in the message
|
|
1622
|
+
test("rejects start with AgentError when no flag passed and watchdog already running", async () => {
|
|
1623
|
+
const { deps, watchdogCalls } = makeDeps({}, { running: true, startSuccess: true });
|
|
1624
|
+
const originalSleep = Bun.sleep;
|
|
1625
|
+
Bun.sleep = (() => Promise.resolve()) as typeof Bun.sleep;
|
|
1626
|
+
|
|
1627
|
+
try {
|
|
1628
|
+
await coordinatorCommand(["start", "--json"], deps);
|
|
1629
|
+
expect.unreachable("should have thrown AgentError");
|
|
1630
|
+
} catch (err) {
|
|
1631
|
+
expect(err).toBeInstanceOf(AgentError);
|
|
1632
|
+
const ae = err as AgentError;
|
|
1633
|
+
expect(ae.message).toContain("Watchdog daemon");
|
|
1634
|
+
// PID is unavailable from the fake watchdog (no PID file written),
|
|
1635
|
+
// so the message reports "unknown PID" — but it must reference the
|
|
1636
|
+
// concept and the suppress flag explicitly.
|
|
1637
|
+
expect(ae.message).toMatch(/PID/);
|
|
1638
|
+
expect(ae.message).toContain("--accept-existing-watchdog");
|
|
1639
|
+
expect(ae.message).toContain("--watchdog");
|
|
1640
|
+
expect(ae.message).toContain("ov watch --kill-others");
|
|
1641
|
+
} finally {
|
|
1642
|
+
Bun.sleep = originalSleep;
|
|
1643
|
+
}
|
|
1644
|
+
|
|
1645
|
+
// Detection ran but auto-start did NOT — the throw fired first.
|
|
1646
|
+
expect(watchdogCalls?.isRunning).toBeGreaterThanOrEqual(1);
|
|
1647
|
+
expect(watchdogCalls?.start).toBe(0);
|
|
1648
|
+
});
|
|
1649
|
+
|
|
1650
|
+
// (b) start --watchdog + isRunning=true -> does NOT throw;
|
|
1651
|
+
// watchdog.start() is still called once
|
|
1652
|
+
test("--watchdog with already-running daemon does NOT throw and still calls start()", async () => {
|
|
1653
|
+
const { deps, watchdogCalls } = makeDeps(
|
|
1654
|
+
{},
|
|
1655
|
+
{ running: true, startSuccess: false }, // startSuccess:false simulates the no-op-when-already-running return
|
|
1656
|
+
);
|
|
1657
|
+
const originalSleep = Bun.sleep;
|
|
1658
|
+
Bun.sleep = (() => Promise.resolve()) as typeof Bun.sleep;
|
|
1659
|
+
|
|
1660
|
+
let output: string;
|
|
1661
|
+
try {
|
|
1662
|
+
output = await captureStdout(() =>
|
|
1663
|
+
coordinatorCommand(["start", "--watchdog", "--json"], deps),
|
|
1664
|
+
);
|
|
1665
|
+
} finally {
|
|
1666
|
+
Bun.sleep = originalSleep;
|
|
1667
|
+
}
|
|
1668
|
+
|
|
1669
|
+
expect(watchdogCalls?.start).toBe(1);
|
|
1670
|
+
const parsed = JSON.parse(output) as Record<string, unknown>;
|
|
1671
|
+
// reused-daemon sentinel keeps watchdog truthy in the JSON output
|
|
1672
|
+
expect(parsed.watchdog).toBe(true);
|
|
1673
|
+
expect(parsed.watchdogPreexisting).toBe(true);
|
|
1674
|
+
});
|
|
1675
|
+
|
|
1676
|
+
// (c) start --accept-existing-watchdog + isRunning=true -> does NOT throw;
|
|
1677
|
+
// coordinator starts normally; watchdog.start() is NOT called
|
|
1678
|
+
test("--accept-existing-watchdog allows start without calling watchdog.start()", async () => {
|
|
1679
|
+
const { deps, watchdogCalls } = makeDeps({}, { running: true, startSuccess: true });
|
|
1680
|
+
const originalSleep = Bun.sleep;
|
|
1681
|
+
Bun.sleep = (() => Promise.resolve()) as typeof Bun.sleep;
|
|
1682
|
+
|
|
1683
|
+
let output: string;
|
|
1684
|
+
try {
|
|
1685
|
+
output = await captureStdout(() =>
|
|
1686
|
+
coordinatorCommand(["start", "--accept-existing-watchdog", "--json"], deps),
|
|
1687
|
+
);
|
|
1688
|
+
} finally {
|
|
1689
|
+
Bun.sleep = originalSleep;
|
|
1690
|
+
}
|
|
1691
|
+
|
|
1692
|
+
expect(watchdogCalls?.start).toBe(0);
|
|
1693
|
+
const parsed = JSON.parse(output) as Record<string, unknown>;
|
|
1694
|
+
expect(parsed.watchdog).toBe(true);
|
|
1695
|
+
expect(parsed.watchdogPreexisting).toBe(true);
|
|
1696
|
+
});
|
|
1697
|
+
|
|
1698
|
+
// (d) start (no --watchdog) + isRunning=false -> no error, no start
|
|
1699
|
+
// (regression — preserves the original "no flag, no daemon activity" path)
|
|
1700
|
+
test("no flag + watchdog not running: starts normally without calling start()", async () => {
|
|
1701
|
+
const { deps, watchdogCalls } = makeDeps({}, { running: false, startSuccess: true });
|
|
1702
|
+
const originalSleep = Bun.sleep;
|
|
1703
|
+
Bun.sleep = (() => Promise.resolve()) as typeof Bun.sleep;
|
|
1704
|
+
|
|
1705
|
+
let output: string;
|
|
1706
|
+
try {
|
|
1707
|
+
output = await captureStdout(() => coordinatorCommand(["start", "--json"], deps));
|
|
1708
|
+
} finally {
|
|
1709
|
+
Bun.sleep = originalSleep;
|
|
1710
|
+
}
|
|
1711
|
+
|
|
1712
|
+
expect(watchdogCalls?.start).toBe(0);
|
|
1713
|
+
const parsed = JSON.parse(output) as Record<string, unknown>;
|
|
1714
|
+
expect(parsed.watchdog).toBe(false);
|
|
1715
|
+
expect(parsed.watchdogPreexisting).toBe(false);
|
|
1716
|
+
});
|
|
1717
|
+
|
|
1718
|
+
test("orchestrator inherits the same orphan-watchdog detection", async () => {
|
|
1719
|
+
const { deps, watchdogCalls } = makeDeps({}, { running: true });
|
|
1720
|
+
const originalSleep = Bun.sleep;
|
|
1721
|
+
Bun.sleep = (() => Promise.resolve()) as typeof Bun.sleep;
|
|
1722
|
+
|
|
1723
|
+
try {
|
|
1724
|
+
await expect(orchestratorCommand(["start", "--json"], deps)).rejects.toThrow(AgentError);
|
|
1725
|
+
} finally {
|
|
1726
|
+
Bun.sleep = originalSleep;
|
|
1727
|
+
}
|
|
1728
|
+
|
|
1729
|
+
expect(watchdogCalls?.start).toBe(0);
|
|
1730
|
+
});
|
|
1604
1731
|
});
|
|
1605
1732
|
});
|
|
1606
1733
|
|
|
@@ -351,6 +351,14 @@ export interface CoordinatorSessionOptions {
|
|
|
351
351
|
* the web UI's POST /api/coordinator/start endpoint.
|
|
352
352
|
*/
|
|
353
353
|
headless?: boolean;
|
|
354
|
+
/**
|
|
355
|
+
* Acknowledge that a watchdog daemon from a previous session may already be
|
|
356
|
+
* running and should be allowed to supervise this coordinator. Without this
|
|
357
|
+
* (or `--watchdog`), the start command refuses to spawn when a leftover
|
|
358
|
+
* daemon is detected, to surface the "watchdog persists across runs" trap
|
|
359
|
+
* that overstory-3f0c was filed for.
|
|
360
|
+
*/
|
|
361
|
+
acceptExistingWatchdog?: boolean;
|
|
354
362
|
}
|
|
355
363
|
|
|
356
364
|
/**
|
|
@@ -385,6 +393,7 @@ export async function startCoordinatorSession(
|
|
|
385
393
|
displayName: displayNameOpt,
|
|
386
394
|
beaconBuilder: beaconBuilderOpt,
|
|
387
395
|
headless: headlessFlag,
|
|
396
|
+
acceptExistingWatchdog: acceptExistingWatchdogFlag,
|
|
388
397
|
} = opts;
|
|
389
398
|
|
|
390
399
|
const coordinatorName = agentNameOpt ?? coordinatorNameOpt ?? COORDINATOR_NAME;
|
|
@@ -406,6 +415,25 @@ export async function startCoordinatorSession(
|
|
|
406
415
|
const monitor = deps._monitor ?? createDefaultMonitor(projectRoot);
|
|
407
416
|
const tmuxSession = coordinatorTmuxSession(config.project.name, coordinatorName);
|
|
408
417
|
|
|
418
|
+
// Detect leftover watchdog daemon from a previous session (overstory-3f0c).
|
|
419
|
+
// If a watchdog is already running and the operator did not pass --watchdog
|
|
420
|
+
// or --accept-existing-watchdog, refuse to start: a persistent daemon will
|
|
421
|
+
// supervise this coordinator with policy decided by the original invocation,
|
|
422
|
+
// not the current one. This prevents "I didn't run --watchdog, why is the
|
|
423
|
+
// watchdog killing things?" surprises.
|
|
424
|
+
const watchdogAlreadyRunning = await watchdog.isRunning();
|
|
425
|
+
if (watchdogAlreadyRunning && !watchdogFlag && !acceptExistingWatchdogFlag) {
|
|
426
|
+
const existingPid = await readWatchdogPid(projectRoot);
|
|
427
|
+
const pidLabel = existingPid !== null ? `PID ${existingPid}` : "unknown PID";
|
|
428
|
+
throw new AgentError(
|
|
429
|
+
`Watchdog daemon (${pidLabel}) is already running from a previous session. ` +
|
|
430
|
+
`It will supervise this ${displayName.toLowerCase()} run and may take escalation actions you did not opt into. ` +
|
|
431
|
+
`To proceed: pass --watchdog to acknowledge, pass --accept-existing-watchdog to suppress this check, ` +
|
|
432
|
+
`or run 'ov watch --kill-others' (or remove .overstory/watchdog.pid) first.`,
|
|
433
|
+
{ agentName: coordinatorName },
|
|
434
|
+
);
|
|
435
|
+
}
|
|
436
|
+
|
|
409
437
|
// Check for existing coordinator session with the same name
|
|
410
438
|
const overstoryDir = join(projectRoot, ".overstory");
|
|
411
439
|
const { store } = openSessionStore(overstoryDir);
|
|
@@ -589,9 +617,21 @@ export async function startCoordinatorSession(
|
|
|
589
617
|
if (watchdogResult) {
|
|
590
618
|
watchdogPid = watchdogResult.pid;
|
|
591
619
|
if (!json) printHint("Watchdog started");
|
|
620
|
+
} else if (watchdogAlreadyRunning) {
|
|
621
|
+
// createDefaultWatchdog.start() returns null when an existing PID
|
|
622
|
+
// is alive — that's a no-op success, not a failure. Reuse the
|
|
623
|
+
// existing daemon. Sentinel value keeps `watchdogPid !== undefined`
|
|
624
|
+
// truthy in the JSON output.
|
|
625
|
+
watchdogPid = -1;
|
|
626
|
+
if (!json) printHint("Watchdog already running, reusing existing daemon");
|
|
592
627
|
} else {
|
|
593
628
|
if (!json) printWarning("Watchdog failed to start");
|
|
594
629
|
}
|
|
630
|
+
} else if (watchdogAlreadyRunning && acceptExistingWatchdogFlag) {
|
|
631
|
+
// --accept-existing-watchdog without --watchdog: surface that an
|
|
632
|
+
// existing daemon is supervising this run, but do not call start().
|
|
633
|
+
watchdogPid = -1;
|
|
634
|
+
if (!json) printHint("Watchdog already running, reusing existing daemon");
|
|
595
635
|
}
|
|
596
636
|
let monitorPid: number | undefined;
|
|
597
637
|
if (monitorFlag) {
|
|
@@ -615,7 +655,8 @@ export async function startCoordinatorSession(
|
|
|
615
655
|
projectRoot,
|
|
616
656
|
pid: headlessProc.pid,
|
|
617
657
|
headless: true,
|
|
618
|
-
watchdog:
|
|
658
|
+
watchdog: watchdogPid !== undefined,
|
|
659
|
+
watchdogPreexisting: watchdogAlreadyRunning,
|
|
619
660
|
monitor: monitorFlag ? monitorPid !== undefined : false,
|
|
620
661
|
};
|
|
621
662
|
|
|
@@ -755,16 +796,28 @@ export async function startCoordinatorSession(
|
|
|
755
796
|
await tmux.sendKeys(tmuxSession, "");
|
|
756
797
|
}
|
|
757
798
|
|
|
758
|
-
// Auto-start watchdog if --watchdog flag is present
|
|
799
|
+
// Auto-start watchdog if --watchdog flag is present.
|
|
759
800
|
let watchdogPid: number | undefined;
|
|
760
801
|
if (watchdogFlag) {
|
|
761
802
|
const watchdogResult = await watchdog.start();
|
|
762
803
|
if (watchdogResult) {
|
|
763
804
|
watchdogPid = watchdogResult.pid;
|
|
764
805
|
if (!json) printHint("Watchdog started");
|
|
806
|
+
} else if (watchdogAlreadyRunning) {
|
|
807
|
+
// createDefaultWatchdog.start() returns null when an existing PID
|
|
808
|
+
// is alive — that's a no-op success, not a failure. Reuse the
|
|
809
|
+
// existing daemon. Sentinel value keeps `watchdogPid !== undefined`
|
|
810
|
+
// truthy in the JSON output.
|
|
811
|
+
watchdogPid = -1;
|
|
812
|
+
if (!json) printHint("Watchdog already running, reusing existing daemon");
|
|
765
813
|
} else {
|
|
766
814
|
if (!json) printWarning("Watchdog failed to start");
|
|
767
815
|
}
|
|
816
|
+
} else if (watchdogAlreadyRunning && acceptExistingWatchdogFlag) {
|
|
817
|
+
// --accept-existing-watchdog without --watchdog: surface that an
|
|
818
|
+
// existing daemon is supervising this run, but do not call start().
|
|
819
|
+
watchdogPid = -1;
|
|
820
|
+
if (!json) printHint("Watchdog already running, reusing existing daemon");
|
|
768
821
|
}
|
|
769
822
|
|
|
770
823
|
// Auto-start monitor if --monitor flag is present and tier2 is enabled
|
|
@@ -789,7 +842,8 @@ export async function startCoordinatorSession(
|
|
|
789
842
|
tmuxSession,
|
|
790
843
|
projectRoot,
|
|
791
844
|
pid,
|
|
792
|
-
watchdog:
|
|
845
|
+
watchdog: watchdogPid !== undefined,
|
|
846
|
+
watchdogPreexisting: watchdogAlreadyRunning,
|
|
793
847
|
monitor: monitorFlag ? monitorPid !== undefined : false,
|
|
794
848
|
};
|
|
795
849
|
|
|
@@ -815,7 +869,14 @@ export async function startCoordinatorSession(
|
|
|
815
869
|
|
|
816
870
|
async function startPersistentAgent(
|
|
817
871
|
spec: PersistentAgentSpec,
|
|
818
|
-
opts: {
|
|
872
|
+
opts: {
|
|
873
|
+
json: boolean;
|
|
874
|
+
attach: boolean;
|
|
875
|
+
watchdog: boolean;
|
|
876
|
+
monitor: boolean;
|
|
877
|
+
profile?: string;
|
|
878
|
+
acceptExistingWatchdog?: boolean;
|
|
879
|
+
},
|
|
819
880
|
deps: CoordinatorDeps = {},
|
|
820
881
|
): Promise<void> {
|
|
821
882
|
await startCoordinatorSession(
|
|
@@ -1557,6 +1618,10 @@ export function createPersistentAgentCommand(
|
|
|
1557
1618
|
.option("--attach", "Always attach to tmux session after start")
|
|
1558
1619
|
.option("--no-attach", "Never attach to tmux session after start")
|
|
1559
1620
|
.option("--watchdog", `Auto-start watchdog daemon with ${spec.commandName}`)
|
|
1621
|
+
.option(
|
|
1622
|
+
"--accept-existing-watchdog",
|
|
1623
|
+
"Continue when a watchdog daemon from a previous session is already running (it will supervise this run)",
|
|
1624
|
+
)
|
|
1560
1625
|
.option("--monitor", `Auto-start Tier 2 monitor agent with ${spec.commandName}`)
|
|
1561
1626
|
.option("--profile <name>", "Canopy profile to apply to spawned agents")
|
|
1562
1627
|
.option("--json", "Output as JSON")
|
|
@@ -1564,6 +1629,7 @@ export function createPersistentAgentCommand(
|
|
|
1564
1629
|
async (opts: {
|
|
1565
1630
|
attach?: boolean;
|
|
1566
1631
|
watchdog?: boolean;
|
|
1632
|
+
acceptExistingWatchdog?: boolean;
|
|
1567
1633
|
monitor?: boolean;
|
|
1568
1634
|
json?: boolean;
|
|
1569
1635
|
profile?: string;
|
|
@@ -1576,6 +1642,7 @@ export function createPersistentAgentCommand(
|
|
|
1576
1642
|
json: opts.json ?? false,
|
|
1577
1643
|
attach: shouldAttach,
|
|
1578
1644
|
watchdog: opts.watchdog ?? false,
|
|
1645
|
+
acceptExistingWatchdog: opts.acceptExistingWatchdog ?? false,
|
|
1579
1646
|
monitor: opts.monitor ?? false,
|
|
1580
1647
|
profile: opts.profile,
|
|
1581
1648
|
},
|
|
@@ -615,7 +615,7 @@ export function renderAgentPanel(
|
|
|
615
615
|
|
|
616
616
|
// Sort agents: active first, then completed, then zombie
|
|
617
617
|
const agents = [...data.status.agents].sort((a, b) => {
|
|
618
|
-
const activeStates = ["working", "booting", "stalled"];
|
|
618
|
+
const activeStates = ["working", "in_turn", "between_turns", "booting", "stalled"];
|
|
619
619
|
const aActive = activeStates.includes(a.state);
|
|
620
620
|
const bActive = activeStates.includes(b.state);
|
|
621
621
|
if (aActive && !bActive) return -1;
|