@os-eco/overstory-cli 0.10.3 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/agents/builder.md +10 -1
- package/agents/lead.md +106 -5
- package/package.json +1 -1
- package/src/agents/headless-mail-injector.ts +8 -0
- package/src/agents/mail-poll-detect.test.ts +153 -0
- package/src/agents/mail-poll-detect.ts +73 -0
- package/src/agents/overlay.test.ts +56 -0
- package/src/agents/overlay.ts +33 -0
- package/src/agents/scope-detect.test.ts +190 -0
- package/src/agents/scope-detect.ts +146 -0
- package/src/agents/turn-runner.test.ts +862 -0
- package/src/agents/turn-runner.ts +225 -8
- package/src/commands/agents.ts +9 -0
- package/src/commands/coordinator.test.ts +127 -0
- package/src/commands/coordinator.ts +71 -4
- package/src/commands/dashboard.ts +1 -1
- package/src/commands/log.test.ts +131 -0
- package/src/commands/log.ts +37 -2
- package/src/commands/merge.test.ts +118 -0
- package/src/commands/merge.ts +51 -8
- package/src/commands/sling.test.ts +104 -0
- package/src/commands/sling.ts +95 -8
- package/src/commands/stop.test.ts +81 -0
- package/src/index.ts +5 -1
- package/src/insights/quality-gates.test.ts +141 -0
- package/src/insights/quality-gates.ts +156 -0
- package/src/logging/theme.ts +4 -0
- package/src/merge/predict.test.ts +387 -0
- package/src/merge/predict.ts +249 -0
- package/src/merge/resolver.ts +1 -1
- package/src/mulch/client.ts +3 -3
- package/src/sessions/store.test.ts +267 -5
- package/src/sessions/store.ts +105 -7
- package/src/types.ts +51 -1
- package/src/watchdog/daemon.test.ts +124 -2
- package/src/watchdog/daemon.ts +27 -12
- package/src/watchdog/health.test.ts +133 -8
- package/src/watchdog/health.ts +37 -5
- package/src/worktree/manager.test.ts +218 -1
- package/src/worktree/manager.ts +55 -0
- package/src/worktree/tmux.test.ts +25 -0
- package/src/worktree/tmux.ts +17 -0
- package/templates/overlay.md.tmpl +2 -0
|
@@ -534,6 +534,108 @@ describe("runTurn", () => {
|
|
|
534
534
|
expect(after?.state).toBe("completed");
|
|
535
535
|
});
|
|
536
536
|
|
|
537
|
+
test("turn that runs but does not complete settles to between_turns, not working (overstory-3087)", async () => {
|
|
538
|
+
// Spawn-per-turn substate split: a turn that produced events but did
|
|
539
|
+
// not deliver the terminal mail nor abort must end in `between_turns`
|
|
540
|
+
// so the UI can tell a worker waiting for its next mail batch from
|
|
541
|
+
// one mid-execution. Pre-3087 this settled to `working`.
|
|
542
|
+
seedSession(ctx.sessionsDbPath, { agentName: "settler", state: "booting" });
|
|
543
|
+
const { runtime } = makeSpyRuntime();
|
|
544
|
+
const fake = makeFakeProc();
|
|
545
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
546
|
+
// Force is_error=true so the runner does NOT classify this as a
|
|
547
|
+
// clean exit (which would settle to `completed` via the
|
|
548
|
+
// terminal-mail-missing path). is_error=true keeps cleanResult
|
|
549
|
+
// false, sending us into the observedAnyEvent → between_turns
|
|
550
|
+
// branch we want to test.
|
|
551
|
+
emitFakeTurn(fake, { sessionId: "settler-sid", isError: true });
|
|
552
|
+
fake._exit(0);
|
|
553
|
+
return fake;
|
|
554
|
+
};
|
|
555
|
+
|
|
556
|
+
const result = await runTurn(makeRunOpts(ctx, "settler", { runtime, _spawnFn: spawnFn }));
|
|
557
|
+
|
|
558
|
+
expect(result.cleanResult).toBe(false);
|
|
559
|
+
expect(result.terminalMailObserved).toBe(false);
|
|
560
|
+
expect(result.terminalMailMissing).toBe(false);
|
|
561
|
+
expect(result.finalState).toBe("between_turns");
|
|
562
|
+
|
|
563
|
+
const after = readSession(ctx.sessionsDbPath, "settler");
|
|
564
|
+
expect(after?.state).toBe("between_turns");
|
|
565
|
+
});
|
|
566
|
+
|
|
567
|
+
test("first parser event transitions booting → in_turn (overstory-3087)", async () => {
|
|
568
|
+
// The mid-turn "first event" hook must flip the row out of `booting`
|
|
569
|
+
// (or `between_turns`/`working`) into `in_turn` so observers see the
|
|
570
|
+
// agent as actively executing, distinct from the idle waiting state.
|
|
571
|
+
seedSession(ctx.sessionsDbPath, { agentName: "boots", state: "booting" });
|
|
572
|
+
const { runtime } = makeSpyRuntime();
|
|
573
|
+
const fake = makeFakeProc();
|
|
574
|
+
// Mutable ref so the IIFE assignment is visible to the type checker.
|
|
575
|
+
const captured: { state: string | null } = { state: null };
|
|
576
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
577
|
+
(async () => {
|
|
578
|
+
// Push the init event, then sample the row before result.
|
|
579
|
+
fake._pushLine(
|
|
580
|
+
JSON.stringify({
|
|
581
|
+
type: "system",
|
|
582
|
+
subtype: "init",
|
|
583
|
+
session_id: "boots-sid",
|
|
584
|
+
model: "claude-test",
|
|
585
|
+
}),
|
|
586
|
+
);
|
|
587
|
+
// Yield the event loop so the parser drains the init event
|
|
588
|
+
// and updates the session row before we read it.
|
|
589
|
+
await Bun.sleep(20);
|
|
590
|
+
captured.state = readSession(ctx.sessionsDbPath, "boots")?.state ?? null;
|
|
591
|
+
// Send is_error=true so we settle to between_turns rather than
|
|
592
|
+
// the contract-violation completed path — this test is about
|
|
593
|
+
// the mid-turn transition, not the terminal classification.
|
|
594
|
+
emitFakeTurn(fake, { sessionId: "boots-sid", isError: true });
|
|
595
|
+
fake._exit(0);
|
|
596
|
+
})();
|
|
597
|
+
return fake;
|
|
598
|
+
};
|
|
599
|
+
|
|
600
|
+
await runTurn(makeRunOpts(ctx, "boots", { runtime, _spawnFn: spawnFn }));
|
|
601
|
+
|
|
602
|
+
expect(captured.state).toBe("in_turn");
|
|
603
|
+
});
|
|
604
|
+
|
|
605
|
+
test("between_turns → in_turn → between_turns cycle on a follow-up batch (overstory-3087)", async () => {
|
|
606
|
+
// A spawn-per-turn worker that finished its first turn (state=
|
|
607
|
+
// between_turns) must flip back to in_turn when the next mail batch
|
|
608
|
+
// fires its first parser event, and settle back to between_turns
|
|
609
|
+
// when the turn ends without a terminal mail.
|
|
610
|
+
seedSession(ctx.sessionsDbPath, { agentName: "cycle", state: "between_turns" });
|
|
611
|
+
const { runtime } = makeSpyRuntime();
|
|
612
|
+
const fake = makeFakeProc();
|
|
613
|
+
const captured: { midTurnState: string | null } = { midTurnState: null };
|
|
614
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
615
|
+
(async () => {
|
|
616
|
+
fake._pushLine(
|
|
617
|
+
JSON.stringify({
|
|
618
|
+
type: "system",
|
|
619
|
+
subtype: "init",
|
|
620
|
+
session_id: "cycle-sid",
|
|
621
|
+
model: "claude-test",
|
|
622
|
+
}),
|
|
623
|
+
);
|
|
624
|
+
await Bun.sleep(20);
|
|
625
|
+
captured.midTurnState = readSession(ctx.sessionsDbPath, "cycle")?.state ?? null;
|
|
626
|
+
emitFakeTurn(fake, { sessionId: "cycle-sid", isError: true });
|
|
627
|
+
fake._exit(0);
|
|
628
|
+
})();
|
|
629
|
+
return fake;
|
|
630
|
+
};
|
|
631
|
+
|
|
632
|
+
const result = await runTurn(makeRunOpts(ctx, "cycle", { runtime, _spawnFn: spawnFn }));
|
|
633
|
+
|
|
634
|
+
expect(captured.midTurnState).toBe("in_turn");
|
|
635
|
+
expect(result.initialState).toBe("between_turns");
|
|
636
|
+
expect(result.finalState).toBe("between_turns");
|
|
637
|
+
});
|
|
638
|
+
|
|
537
639
|
test("clean exit but no worker_done → contract violation, completed + error log (overstory-6071)", async () => {
|
|
538
640
|
// Pre-fix: claude exiting cleanly without sending the capability's
|
|
539
641
|
// terminal mail left the session at `working` forever — the process is
|
|
@@ -1006,6 +1108,169 @@ describe("runTurn", () => {
|
|
|
1006
1108
|
}
|
|
1007
1109
|
});
|
|
1008
1110
|
|
|
1111
|
+
// --- Resume-path parent-notify (overstory-de3c) ---
|
|
1112
|
+
//
|
|
1113
|
+
// The witnessed bug: a spawn-per-turn worker that survived a first-turn
|
|
1114
|
+
// parser stall (worker_died emitted, state→zombie) was re-dispatched by its
|
|
1115
|
+
// parent via `ov sling --recover`. The resumed turn ran, then transitioned
|
|
1116
|
+
// to zombie SILENTLY — no second worker_died mail was ever sent. The lead
|
|
1117
|
+
// blocked forever.
|
|
1118
|
+
//
|
|
1119
|
+
// These tests pin down whether the runner itself is responsible. Each seeds
|
|
1120
|
+
// `claudeSessionId` so the runner exercises the --resume code path, and
|
|
1121
|
+
// asserts that worker_died is still emitted on stall / abort / clean-exit-
|
|
1122
|
+
// without-terminal-mail. If these PASS the runner is exonerated and the
|
|
1123
|
+
// fix is upstream (sling.ts re-spawn upsert dropping parentAgent — H1).
|
|
1124
|
+
|
|
1125
|
+
test("resume-stall: parser stall on a resumed session still emits worker_died (overstory-de3c)", async () => {
|
|
1126
|
+
seedSession(ctx.sessionsDbPath, {
|
|
1127
|
+
agentName: "child-resume-stall",
|
|
1128
|
+
state: "working",
|
|
1129
|
+
parentAgent: "lead-r",
|
|
1130
|
+
taskId: "task-de3c-stall",
|
|
1131
|
+
claudeSessionId: "prior-session",
|
|
1132
|
+
});
|
|
1133
|
+
const { runtime, spawnCalls } = makeSpyRuntime();
|
|
1134
|
+
const fake = makeFakeProc();
|
|
1135
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1136
|
+
// Emit nothing — the resumed turn parser-stalls.
|
|
1137
|
+
return fake;
|
|
1138
|
+
};
|
|
1139
|
+
|
|
1140
|
+
const sharedMail = createMailStore(ctx.mailDbPath);
|
|
1141
|
+
try {
|
|
1142
|
+
const result = await runTurn({
|
|
1143
|
+
...makeRunOpts(ctx, "child-resume-stall", {
|
|
1144
|
+
runtime,
|
|
1145
|
+
_spawnFn: spawnFn,
|
|
1146
|
+
}),
|
|
1147
|
+
_mailStore: sharedMail,
|
|
1148
|
+
eventStallTimeoutMs: 50,
|
|
1149
|
+
sigkillDelayMs: 25,
|
|
1150
|
+
});
|
|
1151
|
+
|
|
1152
|
+
expect(result.stallAborted).toBe(true);
|
|
1153
|
+
expect(result.finalState).toBe("zombie");
|
|
1154
|
+
|
|
1155
|
+
// The runtime received the prior session id (resume path exercised).
|
|
1156
|
+
expect(spawnCalls[0]?.resumeSessionId).toBe("prior-session");
|
|
1157
|
+
|
|
1158
|
+
const inbox = sharedMail.getAll({ to: "lead-r", type: "worker_died" });
|
|
1159
|
+
expect(inbox.length).toBe(1);
|
|
1160
|
+
const payload = JSON.parse(inbox[0]?.payload ?? "{}") as {
|
|
1161
|
+
terminatedBy?: string;
|
|
1162
|
+
reason?: string;
|
|
1163
|
+
agentName?: string;
|
|
1164
|
+
};
|
|
1165
|
+
expect(payload.terminatedBy).toBe("runner");
|
|
1166
|
+
expect(payload.reason).toContain("stalled");
|
|
1167
|
+
expect(payload.agentName).toBe("child-resume-stall");
|
|
1168
|
+
} finally {
|
|
1169
|
+
sharedMail.close();
|
|
1170
|
+
}
|
|
1171
|
+
});
|
|
1172
|
+
|
|
1173
|
+
test("resume-abort: operator abort on a resumed session still emits worker_died (overstory-de3c)", async () => {
|
|
1174
|
+
seedSession(ctx.sessionsDbPath, {
|
|
1175
|
+
agentName: "child-resume-abort",
|
|
1176
|
+
state: "working",
|
|
1177
|
+
parentAgent: "lead-r",
|
|
1178
|
+
taskId: "task-de3c-abort",
|
|
1179
|
+
claudeSessionId: "prior-session",
|
|
1180
|
+
});
|
|
1181
|
+
const { runtime, spawnCalls } = makeSpyRuntime();
|
|
1182
|
+
const fake = makeFakeProc();
|
|
1183
|
+
const ac = new AbortController();
|
|
1184
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1185
|
+
fake._pushLine(
|
|
1186
|
+
JSON.stringify({
|
|
1187
|
+
type: "system",
|
|
1188
|
+
subtype: "init",
|
|
1189
|
+
session_id: "prior-session",
|
|
1190
|
+
}),
|
|
1191
|
+
);
|
|
1192
|
+
return fake;
|
|
1193
|
+
};
|
|
1194
|
+
|
|
1195
|
+
const sharedMail = createMailStore(ctx.mailDbPath);
|
|
1196
|
+
try {
|
|
1197
|
+
const runPromise = runTurn({
|
|
1198
|
+
...makeRunOpts(ctx, "child-resume-abort", {
|
|
1199
|
+
runtime,
|
|
1200
|
+
_spawnFn: spawnFn,
|
|
1201
|
+
abortSignal: ac.signal,
|
|
1202
|
+
sigkillDelayMs: 25,
|
|
1203
|
+
}),
|
|
1204
|
+
_mailStore: sharedMail,
|
|
1205
|
+
});
|
|
1206
|
+
await Bun.sleep(60);
|
|
1207
|
+
ac.abort();
|
|
1208
|
+
const result = await runPromise;
|
|
1209
|
+
|
|
1210
|
+
expect(result.finalState).toBe("zombie");
|
|
1211
|
+
expect(spawnCalls[0]?.resumeSessionId).toBe("prior-session");
|
|
1212
|
+
|
|
1213
|
+
const inbox = sharedMail.getAll({ to: "lead-r", type: "worker_died" });
|
|
1214
|
+
expect(inbox.length).toBe(1);
|
|
1215
|
+
const payload = JSON.parse(inbox[0]?.payload ?? "{}") as {
|
|
1216
|
+
terminatedBy?: string;
|
|
1217
|
+
reason?: string;
|
|
1218
|
+
agentName?: string;
|
|
1219
|
+
};
|
|
1220
|
+
expect(payload.terminatedBy).toBe("runner");
|
|
1221
|
+
expect(payload.reason).toContain("Aborted");
|
|
1222
|
+
expect(payload.agentName).toBe("child-resume-abort");
|
|
1223
|
+
} finally {
|
|
1224
|
+
sharedMail.close();
|
|
1225
|
+
}
|
|
1226
|
+
});
|
|
1227
|
+
|
|
1228
|
+
test("resume-terminalMailMissing: clean exit on a resumed session still emits worker_died (overstory-de3c)", async () => {
|
|
1229
|
+
seedSession(ctx.sessionsDbPath, {
|
|
1230
|
+
agentName: "child-resume-noop",
|
|
1231
|
+
state: "working",
|
|
1232
|
+
parentAgent: "lead-r",
|
|
1233
|
+
taskId: "task-de3c-noop",
|
|
1234
|
+
claudeSessionId: "prior-session",
|
|
1235
|
+
});
|
|
1236
|
+
const { runtime, spawnCalls } = makeSpyRuntime();
|
|
1237
|
+
const fake = makeFakeProc();
|
|
1238
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1239
|
+
emitFakeTurn(fake, { sessionId: "prior-session", isError: false });
|
|
1240
|
+
fake._exit(0);
|
|
1241
|
+
return fake;
|
|
1242
|
+
};
|
|
1243
|
+
|
|
1244
|
+
const sharedMail = createMailStore(ctx.mailDbPath);
|
|
1245
|
+
try {
|
|
1246
|
+
const result = await runTurn({
|
|
1247
|
+
...makeRunOpts(ctx, "child-resume-noop", {
|
|
1248
|
+
runtime,
|
|
1249
|
+
_spawnFn: spawnFn,
|
|
1250
|
+
}),
|
|
1251
|
+
_mailStore: sharedMail,
|
|
1252
|
+
});
|
|
1253
|
+
|
|
1254
|
+
expect(result.cleanResult).toBe(true);
|
|
1255
|
+
expect(result.terminalMailMissing).toBe(true);
|
|
1256
|
+
expect(result.finalState).toBe("completed");
|
|
1257
|
+
expect(spawnCalls[0]?.resumeSessionId).toBe("prior-session");
|
|
1258
|
+
|
|
1259
|
+
const inbox = sharedMail.getAll({ to: "lead-r", type: "worker_died" });
|
|
1260
|
+
expect(inbox.length).toBe(1);
|
|
1261
|
+
const payload = JSON.parse(inbox[0]?.payload ?? "{}") as {
|
|
1262
|
+
terminatedBy?: string;
|
|
1263
|
+
reason?: string;
|
|
1264
|
+
agentName?: string;
|
|
1265
|
+
};
|
|
1266
|
+
expect(payload.terminatedBy).toBe("runner");
|
|
1267
|
+
expect(payload.reason).toContain("Clean exit without terminal mail");
|
|
1268
|
+
expect(payload.agentName).toBe("child-resume-noop");
|
|
1269
|
+
} finally {
|
|
1270
|
+
sharedMail.close();
|
|
1271
|
+
}
|
|
1272
|
+
});
|
|
1273
|
+
|
|
1009
1274
|
test("terminalMailMissing: emits worker_died to parent (overstory-4159)", async () => {
|
|
1010
1275
|
// Silent-no-op: claude exits cleanly but never sends worker_done. The
|
|
1011
1276
|
// lead would otherwise block forever waiting for a terminal mail.
|
|
@@ -1447,4 +1712,601 @@ describe("runTurn", () => {
|
|
|
1447
1712
|
// turn.pid must still be cleaned up regardless.
|
|
1448
1713
|
expect(existsSync(turnPidPathFor(ctx, "ss-fail"))).toBe(false);
|
|
1449
1714
|
});
|
|
1715
|
+
|
|
1716
|
+
// ---------- mid-turn lastActivity refresh (overstory-8e61) ----------
|
|
1717
|
+
//
|
|
1718
|
+
// The watchdog's design (src/watchdog/health.ts:242-243) documents that the
|
|
1719
|
+
// runner advances `session.lastActivity` per parser event during a turn.
|
|
1720
|
+
// Without that, a long-running turn looks stalled to the watchdog and the
|
|
1721
|
+
// agent gets zombified mid-flight. These tests pin the per-event refresh
|
|
1722
|
+
// behavior added inside the parser loop.
|
|
1723
|
+
|
|
1724
|
+
test("mid-turn refresh: lastActivity advances when interval=0 forces per-event refresh", async () => {
|
|
1725
|
+
const startedAt = new Date(Date.now() - 60_000).toISOString();
|
|
1726
|
+
seedSession(ctx.sessionsDbPath, {
|
|
1727
|
+
agentName: "midturn-A",
|
|
1728
|
+
state: "working",
|
|
1729
|
+
startedAt,
|
|
1730
|
+
lastActivity: startedAt,
|
|
1731
|
+
});
|
|
1732
|
+
const { runtime } = makeSpyRuntime();
|
|
1733
|
+
const fake = makeFakeProc();
|
|
1734
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1735
|
+
emitFakeTurn(fake, { sessionId: "midturn-A-session" });
|
|
1736
|
+
fake._exit(0);
|
|
1737
|
+
return fake;
|
|
1738
|
+
};
|
|
1739
|
+
|
|
1740
|
+
await runTurn({
|
|
1741
|
+
...makeRunOpts(ctx, "midturn-A", { runtime, _spawnFn: spawnFn }),
|
|
1742
|
+
lastActivityRefreshIntervalMs: 0,
|
|
1743
|
+
});
|
|
1744
|
+
|
|
1745
|
+
const after = readSession(ctx.sessionsDbPath, "midturn-A");
|
|
1746
|
+
expect(after?.lastActivity).not.toBe(startedAt);
|
|
1747
|
+
expect(new Date(after?.lastActivity ?? 0).getTime()).toBeGreaterThan(
|
|
1748
|
+
new Date(startedAt).getTime(),
|
|
1749
|
+
);
|
|
1750
|
+
});
|
|
1751
|
+
|
|
1752
|
+
test("mid-turn refresh: throttle gates updates by simulated time", async () => {
|
|
1753
|
+
seedSession(ctx.sessionsDbPath, { agentName: "midturn-B", state: "working" });
|
|
1754
|
+
const { runtime } = makeSpyRuntime();
|
|
1755
|
+
|
|
1756
|
+
// Controlled sim clock. `_now` is invoked many times during a turn (for
|
|
1757
|
+
// startedAtMs, log timestamps, durationMs) — only the in-loop calls
|
|
1758
|
+
// matter for the throttle. We advance simTime synchronously between
|
|
1759
|
+
// pushes and yield to the parser between each push so the runner reads
|
|
1760
|
+
// the simTime we set just prior. simTime starts well above the throttle
|
|
1761
|
+
// interval so the first event fires (initial lastActivityRefreshMs=0).
|
|
1762
|
+
let simTime = 5000;
|
|
1763
|
+
const _now = (): Date => new Date(simTime);
|
|
1764
|
+
|
|
1765
|
+
let refreshes = 0;
|
|
1766
|
+
const _onLastActivityRefresh = (): void => {
|
|
1767
|
+
refreshes++;
|
|
1768
|
+
};
|
|
1769
|
+
|
|
1770
|
+
const fake = makeFakeProc();
|
|
1771
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1772
|
+
(async () => {
|
|
1773
|
+
const sessionId = "midturn-B-session";
|
|
1774
|
+
// Use `system` lines because the claude parser does not batch
|
|
1775
|
+
// them — every system line yields exactly one status event,
|
|
1776
|
+
// driving one runner-loop iteration each. Assistant text would
|
|
1777
|
+
// coalesce inside a flush window and defeat the per-event count.
|
|
1778
|
+
const stamps = [5000, 5500, 6000, 6500, 7000, 7500];
|
|
1779
|
+
for (let i = 0; i < stamps.length; i++) {
|
|
1780
|
+
simTime = stamps[i] ?? 0;
|
|
1781
|
+
fake._pushLine(
|
|
1782
|
+
JSON.stringify({
|
|
1783
|
+
type: "system",
|
|
1784
|
+
subtype: i === 0 ? "init" : "progress",
|
|
1785
|
+
session_id: sessionId,
|
|
1786
|
+
}),
|
|
1787
|
+
);
|
|
1788
|
+
// Yield so the for-await loop body runs to completion against
|
|
1789
|
+
// the simTime value we just set.
|
|
1790
|
+
await Bun.sleep(20);
|
|
1791
|
+
}
|
|
1792
|
+
// Trailing result at the same simTime as the last chunk; with a
|
|
1793
|
+
// 1000ms throttle and last refresh at simTime=7000, this event
|
|
1794
|
+
// at simTime=7500 (delta=500) does not fire.
|
|
1795
|
+
fake._pushLine(
|
|
1796
|
+
JSON.stringify({
|
|
1797
|
+
type: "result",
|
|
1798
|
+
subtype: "success",
|
|
1799
|
+
session_id: sessionId,
|
|
1800
|
+
result: "done",
|
|
1801
|
+
is_error: false,
|
|
1802
|
+
duration_ms: 50,
|
|
1803
|
+
num_turns: 1,
|
|
1804
|
+
}),
|
|
1805
|
+
);
|
|
1806
|
+
await Bun.sleep(20);
|
|
1807
|
+
fake._exit(0);
|
|
1808
|
+
})();
|
|
1809
|
+
return fake;
|
|
1810
|
+
};
|
|
1811
|
+
|
|
1812
|
+
await runTurn({
|
|
1813
|
+
...makeRunOpts(ctx, "midturn-B", { runtime, _spawnFn: spawnFn }),
|
|
1814
|
+
lastActivityRefreshIntervalMs: 1000,
|
|
1815
|
+
_now,
|
|
1816
|
+
_onLastActivityRefresh,
|
|
1817
|
+
});
|
|
1818
|
+
|
|
1819
|
+
// Stamps 5000, 6000, 7000 fire (gap >= 1000). Stamps 5500, 6500, 7500
|
|
1820
|
+
// are throttled (gap = 500). The trailing result event at 7500 also
|
|
1821
|
+
// throttles. Total expected = 3.
|
|
1822
|
+
expect(refreshes).toBe(3);
|
|
1823
|
+
});
|
|
1824
|
+
|
|
1825
|
+
test("mid-turn refresh: parser throw still leaves lastActivity advanced (overstory-8e61)", async () => {
|
|
1826
|
+
// The end-of-turn `updateSessionLastActivity` (around turn-runner.ts:1112)
|
|
1827
|
+
// does NOT fire when the parser iteration throws — the catch path
|
|
1828
|
+
// rethrows before reaching the cleanup write. The mid-turn refresh
|
|
1829
|
+
// covers this gap so a parser-error turn still leaves lastActivity
|
|
1830
|
+
// fresh, mirroring the documented design at src/watchdog/health.ts:242-243.
|
|
1831
|
+
const startedAt = new Date(Date.now() - 60_000).toISOString();
|
|
1832
|
+
seedSession(ctx.sessionsDbPath, {
|
|
1833
|
+
agentName: "midturn-C",
|
|
1834
|
+
state: "working",
|
|
1835
|
+
startedAt,
|
|
1836
|
+
lastActivity: startedAt,
|
|
1837
|
+
});
|
|
1838
|
+
|
|
1839
|
+
// Custom runtime: yield two valid events, then throw on the next read.
|
|
1840
|
+
// Mirrors a malformed stream-json line arriving after some good events.
|
|
1841
|
+
const base = new ClaudeRuntime();
|
|
1842
|
+
let yielded = 0;
|
|
1843
|
+
const yieldThenThrow: AsyncIterable<unknown> = {
|
|
1844
|
+
[Symbol.asyncIterator]() {
|
|
1845
|
+
return {
|
|
1846
|
+
next(): Promise<IteratorResult<unknown>> {
|
|
1847
|
+
if (yielded++ < 2) {
|
|
1848
|
+
return Promise.resolve({
|
|
1849
|
+
value: {
|
|
1850
|
+
type: "assistant_message",
|
|
1851
|
+
timestamp: new Date().toISOString(),
|
|
1852
|
+
},
|
|
1853
|
+
done: false,
|
|
1854
|
+
});
|
|
1855
|
+
}
|
|
1856
|
+
return Promise.reject(new Error("synthetic stream-json parse error"));
|
|
1857
|
+
},
|
|
1858
|
+
};
|
|
1859
|
+
},
|
|
1860
|
+
};
|
|
1861
|
+
const broken: AgentRuntime = {
|
|
1862
|
+
...base,
|
|
1863
|
+
id: base.id,
|
|
1864
|
+
stability: base.stability,
|
|
1865
|
+
instructionPath: base.instructionPath,
|
|
1866
|
+
buildSpawnCommand: base.buildSpawnCommand.bind(base),
|
|
1867
|
+
buildPrintCommand: base.buildPrintCommand.bind(base),
|
|
1868
|
+
deployConfig: base.deployConfig.bind(base),
|
|
1869
|
+
detectReady: base.detectReady.bind(base),
|
|
1870
|
+
parseTranscript: base.parseTranscript.bind(base),
|
|
1871
|
+
getTranscriptDir: base.getTranscriptDir.bind(base),
|
|
1872
|
+
buildEnv: base.buildEnv.bind(base),
|
|
1873
|
+
buildDirectSpawn: base.buildDirectSpawn.bind(base),
|
|
1874
|
+
parseEvents: (() => yieldThenThrow) as unknown as AgentRuntime["parseEvents"],
|
|
1875
|
+
};
|
|
1876
|
+
|
|
1877
|
+
const fake = makeFakeProc();
|
|
1878
|
+
const spawnFn: TurnSpawnFn = () => fake;
|
|
1879
|
+
|
|
1880
|
+
let refreshes = 0;
|
|
1881
|
+
await expect(
|
|
1882
|
+
runTurn({
|
|
1883
|
+
...makeRunOpts(ctx, "midturn-C", { runtime: broken, _spawnFn: spawnFn }),
|
|
1884
|
+
lastActivityRefreshIntervalMs: 0,
|
|
1885
|
+
_onLastActivityRefresh: () => {
|
|
1886
|
+
refreshes++;
|
|
1887
|
+
},
|
|
1888
|
+
}),
|
|
1889
|
+
).rejects.toThrow(/synthetic stream-json/);
|
|
1890
|
+
|
|
1891
|
+
// Mid-turn refresh fired for at least one of the two pre-throw events.
|
|
1892
|
+
expect(refreshes).toBeGreaterThanOrEqual(1);
|
|
1893
|
+
|
|
1894
|
+
// And the persisted lastActivity reflects the mid-turn write — the
|
|
1895
|
+
// end-of-turn write at line ~1112 was skipped by the parser-throw path.
|
|
1896
|
+
const after = readSession(ctx.sessionsDbPath, "midturn-C");
|
|
1897
|
+
expect(after?.lastActivity).not.toBe(startedAt);
|
|
1898
|
+
expect(new Date(after?.lastActivity ?? 0).getTime()).toBeGreaterThan(
|
|
1899
|
+
new Date(startedAt).getTime(),
|
|
1900
|
+
);
|
|
1901
|
+
});
|
|
1902
|
+
|
|
1903
|
+
test("Bash mail-poll detector: warns + records custom event without suppressing tool_use (overstory-c92c)", async () => {
|
|
1904
|
+
// Defense-in-depth: the lead.md prompt forbids Bash mail polling
|
|
1905
|
+
// (overstory-fa84). When a future overlay or contributed agent
|
|
1906
|
+
// reintroduces the pattern, the runner must surface it via the
|
|
1907
|
+
// runner diagnostic sink AND a `mail_poll_detected` event in
|
|
1908
|
+
// events.db, while still recording the original tool_use event
|
|
1909
|
+
// so downstream observability is unaffected.
|
|
1910
|
+
seedSession(ctx.sessionsDbPath, { agentName: "polled", state: "working" });
|
|
1911
|
+
const { runtime } = makeSpyRuntime();
|
|
1912
|
+
|
|
1913
|
+
const fake = makeFakeProc();
|
|
1914
|
+
const sessionId = "polled-session";
|
|
1915
|
+
const pollCommand = "until ov mail list; do sleep 1; done";
|
|
1916
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
1917
|
+
fake._pushLine(
|
|
1918
|
+
JSON.stringify({
|
|
1919
|
+
type: "system",
|
|
1920
|
+
subtype: "init",
|
|
1921
|
+
session_id: sessionId,
|
|
1922
|
+
model: "claude-test",
|
|
1923
|
+
}),
|
|
1924
|
+
);
|
|
1925
|
+
fake._pushLine(
|
|
1926
|
+
JSON.stringify({
|
|
1927
|
+
type: "assistant",
|
|
1928
|
+
session_id: sessionId,
|
|
1929
|
+
message: {
|
|
1930
|
+
role: "assistant",
|
|
1931
|
+
model: "claude-test",
|
|
1932
|
+
content: [
|
|
1933
|
+
{
|
|
1934
|
+
type: "tool_use",
|
|
1935
|
+
id: "toolu_poll_1",
|
|
1936
|
+
name: "Bash",
|
|
1937
|
+
input: { command: pollCommand },
|
|
1938
|
+
},
|
|
1939
|
+
],
|
|
1940
|
+
},
|
|
1941
|
+
}),
|
|
1942
|
+
);
|
|
1943
|
+
emitFakeTurn(fake, { sessionId });
|
|
1944
|
+
fake._exit(0);
|
|
1945
|
+
return fake;
|
|
1946
|
+
};
|
|
1947
|
+
|
|
1948
|
+
const logs: Array<{ level: string; message: string }> = [];
|
|
1949
|
+
const logger: RunnerLogger = (level, message) => {
|
|
1950
|
+
logs.push({ level, message });
|
|
1951
|
+
};
|
|
1952
|
+
|
|
1953
|
+
const result = await runTurn(
|
|
1954
|
+
makeRunOpts(ctx, "polled", { runtime, _spawnFn: spawnFn, _logWarning: logger }),
|
|
1955
|
+
);
|
|
1956
|
+
|
|
1957
|
+
expect(result.exitCode).toBe(0);
|
|
1958
|
+
|
|
1959
|
+
// Warning was emitted via the runner diagnostic sink (warn level,
|
|
1960
|
+
// message includes "mail-poll").
|
|
1961
|
+
const pollWarn = logs.find((l) => l.level === "warn" && l.message.includes("mail-poll"));
|
|
1962
|
+
expect(pollWarn).toBeDefined();
|
|
1963
|
+
|
|
1964
|
+
const eventStore = createEventStore(ctx.eventsDbPath);
|
|
1965
|
+
try {
|
|
1966
|
+
const events = eventStore.getByAgent("polled");
|
|
1967
|
+
|
|
1968
|
+
// `mail_poll_detected` custom event landed in events.db with the
|
|
1969
|
+
// full (untruncated) command and the matched reason.
|
|
1970
|
+
const detectedEvent = events.find((e) => {
|
|
1971
|
+
if (e.eventType !== "custom" || e.level !== "warn" || !e.data) return false;
|
|
1972
|
+
try {
|
|
1973
|
+
const parsed = JSON.parse(e.data) as { type?: string };
|
|
1974
|
+
return parsed.type === "mail_poll_detected";
|
|
1975
|
+
} catch {
|
|
1976
|
+
return false;
|
|
1977
|
+
}
|
|
1978
|
+
});
|
|
1979
|
+
expect(detectedEvent).toBeDefined();
|
|
1980
|
+
const payload = JSON.parse(detectedEvent?.data ?? "{}") as {
|
|
1981
|
+
type: string;
|
|
1982
|
+
reason: string;
|
|
1983
|
+
command: string;
|
|
1984
|
+
};
|
|
1985
|
+
expect(payload.reason).toBe("until ov mail loop");
|
|
1986
|
+
expect(payload.command).toBe(pollCommand);
|
|
1987
|
+
|
|
1988
|
+
// Regression guard: the original Bash tool_use event MUST still
|
|
1989
|
+
// be recorded — the warning emits IN ADDITION to (not in place
|
|
1990
|
+
// of) the normal recordAgentEvent call.
|
|
1991
|
+
const toolUseEvent = events.find(
|
|
1992
|
+
(e) => e.eventType === "tool_start" && e.toolName === "Bash",
|
|
1993
|
+
);
|
|
1994
|
+
expect(toolUseEvent).toBeDefined();
|
|
1995
|
+
} finally {
|
|
1996
|
+
eventStore.close();
|
|
1997
|
+
}
|
|
1998
|
+
});
|
|
1999
|
+
});
|
|
2000
|
+
|
|
2001
|
+
describe("runTurn scope-violation observability (overstory-9f4d)", () => {
|
|
2002
|
+
let ctx: Ctx;
|
|
2003
|
+
|
|
2004
|
+
beforeEach(async () => {
|
|
2005
|
+
const overstoryDir = await mkdtemp(join(tmpdir(), "overstory-scope-test-"));
|
|
2006
|
+
ctx = {
|
|
2007
|
+
overstoryDir,
|
|
2008
|
+
worktreePath: overstoryDir,
|
|
2009
|
+
projectRoot: overstoryDir,
|
|
2010
|
+
mailDbPath: join(overstoryDir, "mail.db"),
|
|
2011
|
+
eventsDbPath: join(overstoryDir, "events.db"),
|
|
2012
|
+
sessionsDbPath: join(overstoryDir, "sessions.db"),
|
|
2013
|
+
};
|
|
2014
|
+
_resetInProcessLocks();
|
|
2015
|
+
});
|
|
2016
|
+
|
|
2017
|
+
afterEach(async () => {
|
|
2018
|
+
_resetInProcessLocks();
|
|
2019
|
+
await rm(ctx.overstoryDir, { recursive: true, force: true });
|
|
2020
|
+
});
|
|
2021
|
+
|
|
2022
|
+
async function writeOverlayWithScope(scope: string[]): Promise<void> {
|
|
2023
|
+
const dir = join(ctx.worktreePath, ".claude");
|
|
2024
|
+
const { mkdir: mkdirP, writeFile } = await import("node:fs/promises");
|
|
2025
|
+
await mkdirP(dir, { recursive: true });
|
|
2026
|
+
const body = [
|
|
2027
|
+
"## File Scope (exclusive ownership)",
|
|
2028
|
+
"",
|
|
2029
|
+
...scope.map((p) => `- \`${p}\``),
|
|
2030
|
+
"",
|
|
2031
|
+
"## Expertise",
|
|
2032
|
+
"",
|
|
2033
|
+
"none",
|
|
2034
|
+
].join("\n");
|
|
2035
|
+
await writeFile(join(dir, "CLAUDE.md"), body);
|
|
2036
|
+
}
|
|
2037
|
+
|
|
2038
|
+
test("builder scope violation without justification emits warn log + scope_violation event", async () => {
|
|
2039
|
+
seedSession(ctx.sessionsDbPath, { agentName: "violator", state: "working" });
|
|
2040
|
+
await writeOverlayWithScope(["src/agents/in-scope.ts"]);
|
|
2041
|
+
|
|
2042
|
+
const { runtime } = makeSpyRuntime();
|
|
2043
|
+
const fake = makeFakeProc();
|
|
2044
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
2045
|
+
(async () => {
|
|
2046
|
+
await Bun.sleep(20);
|
|
2047
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
2048
|
+
try {
|
|
2049
|
+
createMailClient(s).sendProtocol({
|
|
2050
|
+
from: "violator",
|
|
2051
|
+
to: "lead",
|
|
2052
|
+
subject: "Worker done",
|
|
2053
|
+
body: "ok",
|
|
2054
|
+
type: "worker_done",
|
|
2055
|
+
priority: "normal",
|
|
2056
|
+
payload: {
|
|
2057
|
+
taskId: "t",
|
|
2058
|
+
branch: "b",
|
|
2059
|
+
exitCode: 0,
|
|
2060
|
+
filesModified: ["src/other.ts"],
|
|
2061
|
+
},
|
|
2062
|
+
});
|
|
2063
|
+
} finally {
|
|
2064
|
+
s.close();
|
|
2065
|
+
}
|
|
2066
|
+
emitFakeTurn(fake, { sessionId: "violator-session" });
|
|
2067
|
+
fake._exit(0);
|
|
2068
|
+
})();
|
|
2069
|
+
return fake;
|
|
2070
|
+
};
|
|
2071
|
+
|
|
2072
|
+
const logs: Array<{ level: string; message: string }> = [];
|
|
2073
|
+
const logger: RunnerLogger = (level, message) => {
|
|
2074
|
+
logs.push({ level, message });
|
|
2075
|
+
};
|
|
2076
|
+
|
|
2077
|
+
const result = await runTurn({
|
|
2078
|
+
...makeRunOpts(ctx, "violator", {
|
|
2079
|
+
runtime,
|
|
2080
|
+
_spawnFn: spawnFn,
|
|
2081
|
+
_logWarning: logger,
|
|
2082
|
+
}),
|
|
2083
|
+
_scopeDetect: () => ({
|
|
2084
|
+
violations: ["src/other.ts"],
|
|
2085
|
+
expansionReasons: [],
|
|
2086
|
+
}),
|
|
2087
|
+
});
|
|
2088
|
+
|
|
2089
|
+
expect(result.terminalMailObserved).toBe(true);
|
|
2090
|
+
expect(result.finalState).toBe("completed");
|
|
2091
|
+
|
|
2092
|
+
const warnLog = logs.find(
|
|
2093
|
+
(l) => l.level === "warn" && l.message.includes("outside declared FILE_SCOPE"),
|
|
2094
|
+
);
|
|
2095
|
+
expect(warnLog).toBeDefined();
|
|
2096
|
+
expect(warnLog?.message).toContain("src/other.ts");
|
|
2097
|
+
|
|
2098
|
+
const eventStore = createEventStore(ctx.eventsDbPath);
|
|
2099
|
+
try {
|
|
2100
|
+
const events = eventStore.getByAgent("violator");
|
|
2101
|
+
const violationEvent = events.find((e) => {
|
|
2102
|
+
if (e.eventType !== "custom" || e.level !== "warn" || !e.data) return false;
|
|
2103
|
+
try {
|
|
2104
|
+
const parsed = JSON.parse(e.data) as { type?: string };
|
|
2105
|
+
return parsed.type === "scope_violation";
|
|
2106
|
+
} catch {
|
|
2107
|
+
return false;
|
|
2108
|
+
}
|
|
2109
|
+
});
|
|
2110
|
+
expect(violationEvent).toBeDefined();
|
|
2111
|
+
const payload = JSON.parse(violationEvent?.data ?? "{}") as {
|
|
2112
|
+
type: string;
|
|
2113
|
+
violations: string[];
|
|
2114
|
+
fileScope: string[];
|
|
2115
|
+
};
|
|
2116
|
+
expect(payload.violations).toEqual(["src/other.ts"]);
|
|
2117
|
+
expect(payload.fileScope).toEqual(["src/agents/in-scope.ts"]);
|
|
2118
|
+
} finally {
|
|
2119
|
+
eventStore.close();
|
|
2120
|
+
}
|
|
2121
|
+
});
|
|
2122
|
+
|
|
2123
|
+
test("expansion_reason in commit log suppresses the warning", async () => {
|
|
2124
|
+
seedSession(ctx.sessionsDbPath, { agentName: "justified", state: "working" });
|
|
2125
|
+
await writeOverlayWithScope(["src/agents/in-scope.ts"]);
|
|
2126
|
+
|
|
2127
|
+
const { runtime } = makeSpyRuntime();
|
|
2128
|
+
const fake = makeFakeProc();
|
|
2129
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
2130
|
+
(async () => {
|
|
2131
|
+
await Bun.sleep(20);
|
|
2132
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
2133
|
+
try {
|
|
2134
|
+
createMailClient(s).sendProtocol({
|
|
2135
|
+
from: "justified",
|
|
2136
|
+
to: "lead",
|
|
2137
|
+
subject: "Worker done",
|
|
2138
|
+
body: "ok",
|
|
2139
|
+
type: "worker_done",
|
|
2140
|
+
priority: "normal",
|
|
2141
|
+
payload: {
|
|
2142
|
+
taskId: "t",
|
|
2143
|
+
branch: "b",
|
|
2144
|
+
exitCode: 0,
|
|
2145
|
+
filesModified: ["src/other.ts"],
|
|
2146
|
+
},
|
|
2147
|
+
});
|
|
2148
|
+
} finally {
|
|
2149
|
+
s.close();
|
|
2150
|
+
}
|
|
2151
|
+
emitFakeTurn(fake, { sessionId: "justified-session" });
|
|
2152
|
+
fake._exit(0);
|
|
2153
|
+
})();
|
|
2154
|
+
return fake;
|
|
2155
|
+
};
|
|
2156
|
+
|
|
2157
|
+
const logs: Array<{ level: string; message: string }> = [];
|
|
2158
|
+
const logger: RunnerLogger = (level, message) => {
|
|
2159
|
+
logs.push({ level, message });
|
|
2160
|
+
};
|
|
2161
|
+
|
|
2162
|
+
const result = await runTurn({
|
|
2163
|
+
...makeRunOpts(ctx, "justified", {
|
|
2164
|
+
runtime,
|
|
2165
|
+
_spawnFn: spawnFn,
|
|
2166
|
+
_logWarning: logger,
|
|
2167
|
+
}),
|
|
2168
|
+
_scopeDetect: () => ({
|
|
2169
|
+
violations: ["src/other.ts"],
|
|
2170
|
+
expansionReasons: ["needed shared type"],
|
|
2171
|
+
}),
|
|
2172
|
+
});
|
|
2173
|
+
|
|
2174
|
+
expect(result.terminalMailObserved).toBe(true);
|
|
2175
|
+
|
|
2176
|
+
const warnLog = logs.find(
|
|
2177
|
+
(l) => l.level === "warn" && l.message.includes("outside declared FILE_SCOPE"),
|
|
2178
|
+
);
|
|
2179
|
+
expect(warnLog).toBeUndefined();
|
|
2180
|
+
|
|
2181
|
+
const eventStore = createEventStore(ctx.eventsDbPath);
|
|
2182
|
+
try {
|
|
2183
|
+
const events = eventStore.getByAgent("justified");
|
|
2184
|
+
const violationEvent = events.find((e) => e.data?.includes("scope_violation") ?? false);
|
|
2185
|
+
expect(violationEvent).toBeUndefined();
|
|
2186
|
+
} finally {
|
|
2187
|
+
eventStore.close();
|
|
2188
|
+
}
|
|
2189
|
+
});
|
|
2190
|
+
|
|
2191
|
+
test("prior scope_expansion mail suppresses the warning", async () => {
|
|
2192
|
+
seedSession(ctx.sessionsDbPath, { agentName: "premail", state: "working" });
|
|
2193
|
+
await writeOverlayWithScope(["src/agents/in-scope.ts"]);
|
|
2194
|
+
|
|
2195
|
+
// Pre-seed: a scope_expansion-prefixed mail from this agent.
|
|
2196
|
+
{
|
|
2197
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
2198
|
+
try {
|
|
2199
|
+
createMailClient(s).send({
|
|
2200
|
+
from: "premail",
|
|
2201
|
+
to: "lead",
|
|
2202
|
+
subject: "scope_expansion: needed shared type",
|
|
2203
|
+
body: "heads up",
|
|
2204
|
+
type: "status",
|
|
2205
|
+
priority: "normal",
|
|
2206
|
+
});
|
|
2207
|
+
} finally {
|
|
2208
|
+
s.close();
|
|
2209
|
+
}
|
|
2210
|
+
}
|
|
2211
|
+
|
|
2212
|
+
const { runtime } = makeSpyRuntime();
|
|
2213
|
+
const fake = makeFakeProc();
|
|
2214
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
2215
|
+
(async () => {
|
|
2216
|
+
await Bun.sleep(20);
|
|
2217
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
2218
|
+
try {
|
|
2219
|
+
createMailClient(s).sendProtocol({
|
|
2220
|
+
from: "premail",
|
|
2221
|
+
to: "lead",
|
|
2222
|
+
subject: "Worker done",
|
|
2223
|
+
body: "ok",
|
|
2224
|
+
type: "worker_done",
|
|
2225
|
+
priority: "normal",
|
|
2226
|
+
payload: {
|
|
2227
|
+
taskId: "t",
|
|
2228
|
+
branch: "b",
|
|
2229
|
+
exitCode: 0,
|
|
2230
|
+
filesModified: ["src/other.ts"],
|
|
2231
|
+
},
|
|
2232
|
+
});
|
|
2233
|
+
} finally {
|
|
2234
|
+
s.close();
|
|
2235
|
+
}
|
|
2236
|
+
emitFakeTurn(fake, { sessionId: "premail-session" });
|
|
2237
|
+
fake._exit(0);
|
|
2238
|
+
})();
|
|
2239
|
+
return fake;
|
|
2240
|
+
};
|
|
2241
|
+
|
|
2242
|
+
const logs: Array<{ level: string; message: string }> = [];
|
|
2243
|
+
const logger: RunnerLogger = (level, message) => {
|
|
2244
|
+
logs.push({ level, message });
|
|
2245
|
+
};
|
|
2246
|
+
|
|
2247
|
+
await runTurn({
|
|
2248
|
+
...makeRunOpts(ctx, "premail", {
|
|
2249
|
+
runtime,
|
|
2250
|
+
_spawnFn: spawnFn,
|
|
2251
|
+
_logWarning: logger,
|
|
2252
|
+
}),
|
|
2253
|
+
_scopeDetect: () => ({
|
|
2254
|
+
violations: ["src/other.ts"],
|
|
2255
|
+
expansionReasons: [],
|
|
2256
|
+
}),
|
|
2257
|
+
});
|
|
2258
|
+
|
|
2259
|
+
const warnLog = logs.find(
|
|
2260
|
+
(l) => l.level === "warn" && l.message.includes("outside declared FILE_SCOPE"),
|
|
2261
|
+
);
|
|
2262
|
+
expect(warnLog).toBeUndefined();
|
|
2263
|
+
});
|
|
2264
|
+
|
|
2265
|
+
test("scout capability skips scope detection", async () => {
|
|
2266
|
+
seedSession(ctx.sessionsDbPath, {
|
|
2267
|
+
agentName: "scout-x",
|
|
2268
|
+
capability: "scout",
|
|
2269
|
+
state: "working",
|
|
2270
|
+
});
|
|
2271
|
+
await writeOverlayWithScope(["src/agents/in-scope.ts"]);
|
|
2272
|
+
|
|
2273
|
+
const { runtime } = makeSpyRuntime();
|
|
2274
|
+
const fake = makeFakeProc();
|
|
2275
|
+
const spawnFn: TurnSpawnFn = () => {
|
|
2276
|
+
(async () => {
|
|
2277
|
+
await Bun.sleep(20);
|
|
2278
|
+
const s = createMailStore(ctx.mailDbPath);
|
|
2279
|
+
try {
|
|
2280
|
+
createMailClient(s).send({
|
|
2281
|
+
from: "scout-x",
|
|
2282
|
+
to: "lead",
|
|
2283
|
+
subject: "Done",
|
|
2284
|
+
body: "ok",
|
|
2285
|
+
type: "result",
|
|
2286
|
+
priority: "normal",
|
|
2287
|
+
});
|
|
2288
|
+
} finally {
|
|
2289
|
+
s.close();
|
|
2290
|
+
}
|
|
2291
|
+
emitFakeTurn(fake, { sessionId: "scout-x-session" });
|
|
2292
|
+
fake._exit(0);
|
|
2293
|
+
})();
|
|
2294
|
+
return fake;
|
|
2295
|
+
};
|
|
2296
|
+
|
|
2297
|
+
let detectCalled = false;
|
|
2298
|
+
await runTurn({
|
|
2299
|
+
...makeRunOpts(ctx, "scout-x", {
|
|
2300
|
+
runtime,
|
|
2301
|
+
_spawnFn: spawnFn,
|
|
2302
|
+
capability: "scout",
|
|
2303
|
+
}),
|
|
2304
|
+
_scopeDetect: () => {
|
|
2305
|
+
detectCalled = true;
|
|
2306
|
+
return { violations: [], expansionReasons: [] };
|
|
2307
|
+
},
|
|
2308
|
+
});
|
|
2309
|
+
|
|
2310
|
+
expect(detectCalled).toBe(false);
|
|
2311
|
+
});
|
|
1450
2312
|
});
|