@os-eco/overstory-cli 0.9.4 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/README.md +50 -19
  2. package/agents/builder.md +19 -9
  3. package/agents/coordinator.md +6 -6
  4. package/agents/lead.md +204 -87
  5. package/agents/merger.md +25 -14
  6. package/agents/reviewer.md +22 -16
  7. package/agents/scout.md +17 -12
  8. package/package.json +6 -3
  9. package/src/agents/capabilities.test.ts +85 -0
  10. package/src/agents/capabilities.ts +125 -0
  11. package/src/agents/headless-mail-injector.test.ts +448 -0
  12. package/src/agents/headless-mail-injector.ts +219 -0
  13. package/src/agents/headless-prompt.test.ts +102 -0
  14. package/src/agents/headless-prompt.ts +68 -0
  15. package/src/agents/hooks-deployer.test.ts +514 -14
  16. package/src/agents/hooks-deployer.ts +141 -0
  17. package/src/agents/mail-poll-detect.test.ts +153 -0
  18. package/src/agents/mail-poll-detect.ts +73 -0
  19. package/src/agents/overlay.test.ts +60 -4
  20. package/src/agents/overlay.ts +63 -8
  21. package/src/agents/scope-detect.test.ts +190 -0
  22. package/src/agents/scope-detect.ts +146 -0
  23. package/src/agents/turn-lock.test.ts +181 -0
  24. package/src/agents/turn-lock.ts +235 -0
  25. package/src/agents/turn-runner-dispatch.test.ts +182 -0
  26. package/src/agents/turn-runner-dispatch.ts +105 -0
  27. package/src/agents/turn-runner.test.ts +2312 -0
  28. package/src/agents/turn-runner.ts +1383 -0
  29. package/src/commands/agents.ts +9 -0
  30. package/src/commands/clean.ts +54 -0
  31. package/src/commands/coordinator.test.ts +254 -0
  32. package/src/commands/coordinator.ts +273 -8
  33. package/src/commands/dashboard.test.ts +188 -0
  34. package/src/commands/dashboard.ts +14 -4
  35. package/src/commands/doctor.ts +3 -1
  36. package/src/commands/group.test.ts +94 -0
  37. package/src/commands/group.ts +49 -20
  38. package/src/commands/init.test.ts +8 -0
  39. package/src/commands/init.ts +8 -1
  40. package/src/commands/log.test.ts +187 -11
  41. package/src/commands/log.ts +171 -71
  42. package/src/commands/mail.test.ts +162 -0
  43. package/src/commands/mail.ts +64 -9
  44. package/src/commands/merge.test.ts +230 -1
  45. package/src/commands/merge.ts +68 -12
  46. package/src/commands/nudge.test.ts +351 -4
  47. package/src/commands/nudge.ts +356 -34
  48. package/src/commands/run.test.ts +43 -7
  49. package/src/commands/serve/build.test.ts +202 -0
  50. package/src/commands/serve/build.ts +206 -0
  51. package/src/commands/serve/coordinator-actions.test.ts +339 -0
  52. package/src/commands/serve/coordinator-actions.ts +408 -0
  53. package/src/commands/serve/dev.test.ts +168 -0
  54. package/src/commands/serve/dev.ts +117 -0
  55. package/src/commands/serve/mail-actions.test.ts +312 -0
  56. package/src/commands/serve/mail-actions.ts +167 -0
  57. package/src/commands/serve/rest.test.ts +1323 -0
  58. package/src/commands/serve/rest.ts +708 -0
  59. package/src/commands/serve/static.ts +51 -0
  60. package/src/commands/serve/ws.test.ts +361 -0
  61. package/src/commands/serve/ws.ts +332 -0
  62. package/src/commands/serve.test.ts +459 -0
  63. package/src/commands/serve.ts +565 -0
  64. package/src/commands/sling.test.ts +177 -1
  65. package/src/commands/sling.ts +243 -71
  66. package/src/commands/status.test.ts +9 -0
  67. package/src/commands/status.ts +12 -4
  68. package/src/commands/stop.test.ts +255 -1
  69. package/src/commands/stop.ts +107 -8
  70. package/src/commands/watch.test.ts +43 -0
  71. package/src/commands/watch.ts +153 -28
  72. package/src/config.ts +23 -0
  73. package/src/doctor/consistency.test.ts +106 -0
  74. package/src/doctor/consistency.ts +48 -1
  75. package/src/doctor/serve.test.ts +95 -0
  76. package/src/doctor/serve.ts +86 -0
  77. package/src/doctor/types.ts +2 -1
  78. package/src/doctor/watchdog.ts +57 -1
  79. package/src/events/tailer.test.ts +234 -1
  80. package/src/events/tailer.ts +90 -0
  81. package/src/index.ts +57 -6
  82. package/src/insights/quality-gates.test.ts +141 -0
  83. package/src/insights/quality-gates.ts +156 -0
  84. package/src/json.ts +29 -0
  85. package/src/logging/theme.ts +4 -0
  86. package/src/mail/client.ts +15 -2
  87. package/src/mail/store.test.ts +82 -0
  88. package/src/mail/store.ts +41 -4
  89. package/src/merge/lock.test.ts +149 -0
  90. package/src/merge/lock.ts +140 -0
  91. package/src/merge/predict.test.ts +387 -0
  92. package/src/merge/predict.ts +249 -0
  93. package/src/merge/resolver.ts +1 -1
  94. package/src/mulch/client.ts +3 -3
  95. package/src/runtimes/__fixtures__/claude-stream-fixture.ts +22 -0
  96. package/src/runtimes/claude.test.ts +791 -1
  97. package/src/runtimes/claude.ts +323 -1
  98. package/src/runtimes/connections.test.ts +141 -1
  99. package/src/runtimes/connections.ts +73 -4
  100. package/src/runtimes/headless-connection.test.ts +264 -0
  101. package/src/runtimes/headless-connection.ts +158 -0
  102. package/src/runtimes/types.ts +10 -0
  103. package/src/schema-consistency.test.ts +1 -0
  104. package/src/sessions/store.test.ts +657 -29
  105. package/src/sessions/store.ts +286 -23
  106. package/src/test-setup.test.ts +31 -0
  107. package/src/test-setup.ts +28 -0
  108. package/src/types.ts +107 -2
  109. package/src/utils/pid.test.ts +85 -1
  110. package/src/utils/pid.ts +86 -1
  111. package/src/utils/process-scan.test.ts +53 -0
  112. package/src/utils/process-scan.ts +76 -0
  113. package/src/watchdog/daemon.test.ts +1607 -376
  114. package/src/watchdog/daemon.ts +462 -88
  115. package/src/watchdog/health.test.ts +282 -0
  116. package/src/watchdog/health.ts +126 -27
  117. package/src/worktree/manager.test.ts +218 -1
  118. package/src/worktree/manager.ts +55 -0
  119. package/src/worktree/process.test.ts +71 -0
  120. package/src/worktree/process.ts +25 -5
  121. package/src/worktree/tmux.test.ts +28 -0
  122. package/src/worktree/tmux.ts +27 -3
  123. package/templates/CLAUDE.md.tmpl +19 -8
  124. package/templates/overlay.md.tmpl +5 -2
@@ -0,0 +1,448 @@
1
+ import { afterEach, beforeEach, describe, expect, test } from "bun:test";
2
+ import { mkdtemp, rm } from "node:fs/promises";
3
+ import { tmpdir } from "node:os";
4
+ import { join } from "node:path";
5
+ import { createMailClient } from "../mail/client.ts";
6
+ import { createMailStore } from "../mail/store.ts";
7
+ import type { MailMessage } from "../types.ts";
8
+ import {
9
+ _runTurnRunnerTick,
10
+ formatMailBatch,
11
+ startTurnRunnerMailLoop,
12
+ type TurnRunnerOptsFactory,
13
+ } from "./headless-mail-injector.ts";
14
+ import type { RunTurnOpts, TurnResult } from "./turn-runner.ts";
15
+
16
+ describe("formatMailBatch", () => {
17
+ function makeMessage(overrides: Partial<MailMessage>): MailMessage {
18
+ return {
19
+ id: "m-1",
20
+ from: "lead",
21
+ to: "build-agent",
22
+ subject: "Subject",
23
+ body: "Body",
24
+ type: "dispatch",
25
+ priority: "normal",
26
+ threadId: null,
27
+ payload: null,
28
+ read: false,
29
+ createdAt: "2026-04-30T00:00:00.000Z",
30
+ ...overrides,
31
+ };
32
+ }
33
+
34
+ test("escapes pipes in metadata so a crafted subject can't inject a fake field", () => {
35
+ const text = formatMailBatch([makeMessage({ subject: "Real | Priority: urgent" })]);
36
+ expect(text).toBe(
37
+ "[MAIL] From: lead | Subject: Real \\| Priority: urgent | Priority: normal\n\nBody",
38
+ );
39
+ });
40
+
41
+ test("escapes newlines in metadata so a crafted subject can't smuggle a fake body", () => {
42
+ const text = formatMailBatch([makeMessage({ subject: "line1\nINJECTED BODY" })]);
43
+ // First \n\n must come *after* the metadata, not be introduced by the subject.
44
+ const firstSep = text.indexOf("\n\n");
45
+ const metaLine = text.slice(0, firstSep);
46
+ expect(metaLine).toContain("Subject: line1\\nINJECTED BODY");
47
+ expect(metaLine).not.toContain("\n");
48
+ expect(text.slice(firstSep + 2)).toBe("Body");
49
+ });
50
+
51
+ test("escapes carriage returns and backslashes in metadata", () => {
52
+ const text = formatMailBatch([makeMessage({ from: "a\\b", subject: "c\rd" })]);
53
+ expect(text).toContain("From: a\\\\b");
54
+ expect(text).toContain("Subject: c\\rd");
55
+ });
56
+
57
+ test("does not modify body content", () => {
58
+ const text = formatMailBatch([
59
+ makeMessage({ body: "Body with | pipes\nand newlines\nand \\ backslashes" }),
60
+ ]);
61
+ expect(text.endsWith("Body with | pipes\nand newlines\nand \\ backslashes")).toBe(true);
62
+ });
63
+
64
+ test("preserves benign metadata exactly", () => {
65
+ const text = formatMailBatch([
66
+ makeMessage({ from: "lead", subject: "Plain subject", priority: "high" }),
67
+ ]);
68
+ expect(text).toBe("[MAIL] From: lead | Subject: Plain subject | Priority: high\n\nBody");
69
+ });
70
+ });
71
+
72
+ describe("startTurnRunnerMailLoop", () => {
73
+ let tempDir: string;
74
+ let mailDbPath: string;
75
+
76
+ beforeEach(async () => {
77
+ tempDir = await mkdtemp(join(tmpdir(), "overstory-turnrunner-test-"));
78
+ mailDbPath = join(tempDir, "mail.db");
79
+ });
80
+
81
+ afterEach(async () => {
82
+ await rm(tempDir, { recursive: true, force: true });
83
+ });
84
+
85
+ function makeRunTurnStub(result: Partial<TurnResult> = {}): {
86
+ runTurn: (opts: RunTurnOpts) => Promise<TurnResult>;
87
+ calls: RunTurnOpts[];
88
+ } {
89
+ const calls: RunTurnOpts[] = [];
90
+ const filled: TurnResult = {
91
+ exitCode: 0,
92
+ cleanResult: true,
93
+ newSessionId: null,
94
+ resumeMismatch: false,
95
+ terminalMailObserved: false,
96
+ durationMs: 1,
97
+ initialState: "booting",
98
+ finalState: "working",
99
+ stallAborted: false,
100
+ terminalMailMissing: false,
101
+ ...result,
102
+ };
103
+ return {
104
+ calls,
105
+ runTurn: async (opts) => {
106
+ calls.push(opts);
107
+ return filled;
108
+ },
109
+ };
110
+ }
111
+
112
+ function fakeOptsFactory(agentName: string): TurnRunnerOptsFactory {
113
+ return (userTurnNdjson: string): RunTurnOpts =>
114
+ ({
115
+ agentName,
116
+ capability: "builder",
117
+ overstoryDir: tempDir,
118
+ worktreePath: tempDir,
119
+ projectRoot: tempDir,
120
+ taskId: "task-x",
121
+ userTurnNdjson,
122
+ // `runtime` and `resolvedModel` are placeholders — the stub never calls them.
123
+ runtime: { id: "claude" } as unknown as RunTurnOpts["runtime"],
124
+ resolvedModel: { model: "test", isExplicitOverride: false },
125
+ runId: null,
126
+ mailDbPath,
127
+ eventsDbPath: join(tempDir, "events.db"),
128
+ sessionsDbPath: join(tempDir, "sessions.db"),
129
+ }) satisfies RunTurnOpts;
130
+ }
131
+
132
+ test("invokes runTurn with batched user turn and marks messages read on success", async () => {
133
+ const store = createMailStore(mailDbPath);
134
+ const client = createMailClient(store);
135
+ client.send({
136
+ from: "lead",
137
+ to: "build-agent",
138
+ subject: "Task A",
139
+ body: "Work on A.",
140
+ type: "dispatch",
141
+ priority: "normal",
142
+ });
143
+ client.send({
144
+ from: "lead",
145
+ to: "build-agent",
146
+ subject: "Task B",
147
+ body: "Work on B.",
148
+ type: "status",
149
+ priority: "low",
150
+ });
151
+ store.close();
152
+
153
+ const stub = makeRunTurnStub();
154
+ const result = await _runTurnRunnerTick(
155
+ "build-agent",
156
+ fakeOptsFactory("build-agent"),
157
+ stub.runTurn,
158
+ mailDbPath,
159
+ );
160
+ expect(result.kind).toBe("delivered");
161
+ expect(stub.calls.length).toBe(1);
162
+ const opts = stub.calls[0];
163
+ expect(opts).toBeDefined();
164
+ const parsed = JSON.parse(opts?.userTurnNdjson?.trimEnd() ?? "");
165
+ expect(parsed.type).toBe("user");
166
+ const text: string = parsed.message.content[0].text;
167
+ expect(text).toContain("Task A");
168
+ expect(text).toContain("Task B");
169
+
170
+ const checkStore = createMailStore(mailDbPath);
171
+ try {
172
+ expect(checkStore.getUnread("build-agent").length).toBe(0);
173
+ } finally {
174
+ checkStore.close();
175
+ }
176
+ });
177
+
178
+ test("does not mark messages read when runTurn exits non-zero", async () => {
179
+ const store = createMailStore(mailDbPath);
180
+ const client = createMailClient(store);
181
+ client.send({
182
+ from: "lead",
183
+ to: "fail-agent",
184
+ subject: "Try again",
185
+ body: "Should not be marked read.",
186
+ type: "dispatch",
187
+ priority: "normal",
188
+ });
189
+ store.close();
190
+
191
+ const stub = makeRunTurnStub({ exitCode: 1, cleanResult: false });
192
+ const result = await _runTurnRunnerTick(
193
+ "fail-agent",
194
+ fakeOptsFactory("fail-agent"),
195
+ stub.runTurn,
196
+ mailDbPath,
197
+ );
198
+ expect(result.kind).toBe("delivered");
199
+ const checkStore = createMailStore(mailDbPath);
200
+ try {
201
+ expect(checkStore.getUnread("fail-agent").length).toBe(1);
202
+ } finally {
203
+ checkStore.close();
204
+ }
205
+ });
206
+
207
+ test("does not mark messages read when runTurn throws", async () => {
208
+ const store = createMailStore(mailDbPath);
209
+ const client = createMailClient(store);
210
+ client.send({
211
+ from: "lead",
212
+ to: "throw-agent",
213
+ subject: "Boom",
214
+ body: "Throw inside runTurn.",
215
+ type: "dispatch",
216
+ priority: "normal",
217
+ });
218
+ store.close();
219
+
220
+ const result = await _runTurnRunnerTick(
221
+ "throw-agent",
222
+ fakeOptsFactory("throw-agent"),
223
+ async () => {
224
+ throw new Error("simulated spawn failure");
225
+ },
226
+ mailDbPath,
227
+ );
228
+ expect(result.kind).toBe("error");
229
+ if (result.kind === "error") {
230
+ expect(result.error).toBeInstanceOf(Error);
231
+ }
232
+
233
+ const checkStore = createMailStore(mailDbPath);
234
+ try {
235
+ expect(checkStore.getUnread("throw-agent").length).toBe(1);
236
+ } finally {
237
+ checkStore.close();
238
+ }
239
+ });
240
+
241
+ test("idle tick when no unread mail does not invoke runTurn", async () => {
242
+ const stub = makeRunTurnStub();
243
+ const result = await _runTurnRunnerTick(
244
+ "empty-agent",
245
+ fakeOptsFactory("empty-agent"),
246
+ stub.runTurn,
247
+ mailDbPath,
248
+ );
249
+ expect(result.kind).toBe("idle");
250
+ expect(stub.calls.length).toBe(0);
251
+ });
252
+
253
+ test("loop returns a stop function that prevents further runTurn invocations", async () => {
254
+ const store = createMailStore(mailDbPath);
255
+ const client = createMailClient(store);
256
+ client.send({
257
+ from: "lead",
258
+ to: "loop-agent",
259
+ subject: "Stop test",
260
+ body: "Should be delivered once at most.",
261
+ type: "dispatch",
262
+ priority: "normal",
263
+ });
264
+ store.close();
265
+
266
+ const stub = makeRunTurnStub();
267
+ const stop = startTurnRunnerMailLoop(
268
+ "loop-agent",
269
+ fakeOptsFactory("loop-agent"),
270
+ stub.runTurn,
271
+ mailDbPath,
272
+ 60,
273
+ );
274
+
275
+ await new Promise((r) => setTimeout(r, 250));
276
+ stop();
277
+ const callsAfterStop = stub.calls.length;
278
+ await new Promise((r) => setTimeout(r, 200));
279
+
280
+ expect(stub.calls.length).toBe(callsAfterStop);
281
+ // Should have been invoked at most once (mark-read + idle on subsequent tick).
282
+ expect(callsAfterStop).toBeLessThanOrEqual(1);
283
+ expect(callsAfterStop).toBeGreaterThan(0);
284
+ });
285
+
286
+ test("per-tick isAgentLive=false short-circuits dispatch and self-stops the loop", async () => {
287
+ const store = createMailStore(mailDbPath);
288
+ const client = createMailClient(store);
289
+ client.send({
290
+ from: "lead",
291
+ to: "stopped-agent",
292
+ subject: "Late mail",
293
+ body: "Should never be dispatched to a stopped agent.",
294
+ type: "dispatch",
295
+ priority: "normal",
296
+ });
297
+ store.close();
298
+
299
+ // Simulate the agent being marked completed before the first tick fires.
300
+ // The per-tick guard must short-circuit dispatch — closing the rescan
301
+ // window in serve.ts that allows ov stop to leak a fresh runTurn call
302
+ // (overstory-eb7c).
303
+ const stub = makeRunTurnStub();
304
+ const stop = startTurnRunnerMailLoop(
305
+ "stopped-agent",
306
+ fakeOptsFactory("stopped-agent"),
307
+ stub.runTurn,
308
+ mailDbPath,
309
+ 30,
310
+ () => false,
311
+ );
312
+
313
+ await new Promise((r) => setTimeout(r, 200));
314
+ stop();
315
+
316
+ expect(stub.calls.length).toBe(0);
317
+ // Mail must remain unread because the loop never delivered it.
318
+ const checkStore = createMailStore(mailDbPath);
319
+ try {
320
+ expect(checkStore.getUnread("stopped-agent").length).toBe(1);
321
+ } finally {
322
+ checkStore.close();
323
+ }
324
+ });
325
+
326
+ test("isAgentLive flips to false mid-loop: no further runTurn invocations", async () => {
327
+ const store = createMailStore(mailDbPath);
328
+ const client = createMailClient(store);
329
+ // Two batches of mail. The first runTurn marks batch 1 read; before the
330
+ // next tick fires we flip the agent to terminal, and a second batch of
331
+ // mail arrives. The guard must prevent that second batch from
332
+ // dispatching.
333
+ client.send({
334
+ from: "lead",
335
+ to: "flipping-agent",
336
+ subject: "Batch 1",
337
+ body: "First batch.",
338
+ type: "dispatch",
339
+ priority: "normal",
340
+ });
341
+ store.close();
342
+
343
+ let live = true;
344
+ const stub = makeRunTurnStub();
345
+ const wrappedRunTurn = async (opts: RunTurnOpts): Promise<TurnResult> => {
346
+ // After the first turn completes, simulate ov stop: agent flips to
347
+ // completed and a new mail arrives that the rescan would see.
348
+ const r = await stub.runTurn(opts);
349
+ live = false;
350
+ const s = createMailStore(mailDbPath);
351
+ const c = createMailClient(s);
352
+ c.send({
353
+ from: "lead",
354
+ to: "flipping-agent",
355
+ subject: "Batch 2 (post-stop)",
356
+ body: "Should not be dispatched.",
357
+ type: "dispatch",
358
+ priority: "normal",
359
+ });
360
+ s.close();
361
+ return r;
362
+ };
363
+
364
+ const stop = startTurnRunnerMailLoop(
365
+ "flipping-agent",
366
+ fakeOptsFactory("flipping-agent"),
367
+ wrappedRunTurn,
368
+ mailDbPath,
369
+ 30,
370
+ () => live,
371
+ );
372
+
373
+ await new Promise((r) => setTimeout(r, 300));
374
+ stop();
375
+
376
+ // Exactly one runTurn call: the first batch. Batch 2 must not have
377
+ // reached the dispatcher.
378
+ expect(stub.calls.length).toBe(1);
379
+ const checkStore = createMailStore(mailDbPath);
380
+ try {
381
+ // Batch 1 marked read (delivered). Batch 2 still unread (never
382
+ // dispatched).
383
+ expect(checkStore.getUnread("flipping-agent").length).toBe(1);
384
+ } finally {
385
+ checkStore.close();
386
+ }
387
+ });
388
+
389
+ test("re-entrancy guard: second tick while first is in flight is a no-op", async () => {
390
+ const store = createMailStore(mailDbPath);
391
+ const client = createMailClient(store);
392
+ client.send({
393
+ from: "lead",
394
+ to: "concurrency-agent",
395
+ subject: "First",
396
+ body: "First batch",
397
+ type: "dispatch",
398
+ priority: "normal",
399
+ });
400
+ store.close();
401
+
402
+ // Block the first runTurn until we explicitly resolve it. While in flight,
403
+ // any subsequent tick must short-circuit (the loop's in-flight guard).
404
+ let resolveFirst!: () => void;
405
+ const firstPromise = new Promise<void>((resolve) => {
406
+ resolveFirst = resolve;
407
+ });
408
+
409
+ let calls = 0;
410
+ const slowRun = async (_opts: RunTurnOpts): Promise<TurnResult> => {
411
+ calls++;
412
+ await firstPromise;
413
+ return {
414
+ exitCode: 0,
415
+ cleanResult: true,
416
+ newSessionId: null,
417
+ resumeMismatch: false,
418
+ terminalMailObserved: false,
419
+ durationMs: 0,
420
+ initialState: "booting",
421
+ finalState: "working",
422
+ stallAborted: false,
423
+ terminalMailMissing: false,
424
+ };
425
+ };
426
+
427
+ const stop = startTurnRunnerMailLoop(
428
+ "concurrency-agent",
429
+ fakeOptsFactory("concurrency-agent"),
430
+ slowRun,
431
+ mailDbPath,
432
+ 30,
433
+ );
434
+
435
+ // Allow several ticks to fire while the first runTurn is still pending.
436
+ await new Promise((r) => setTimeout(r, 150));
437
+ expect(calls).toBe(1);
438
+
439
+ resolveFirst();
440
+ await new Promise((r) => setTimeout(r, 80));
441
+ stop();
442
+
443
+ // At most one extra retry tick after the first turn resolved (with the
444
+ // only message already marked read). Allow ≤2 to keep the assertion
445
+ // resilient to scheduler timing on slower CI runners.
446
+ expect(calls).toBeLessThanOrEqual(2);
447
+ });
448
+ });
@@ -0,0 +1,219 @@
1
+ /**
2
+ * Server-side mail dispatcher for spawn-per-turn headless agents.
3
+ *
4
+ * In tmux mode, the UserPromptSubmit hook fires `ov mail check --inject` before
5
+ * each prompt, delivering new mail to the agent. In headless spawn-per-turn
6
+ * mode there is no persistent process — `ov serve` polls the mail store and,
7
+ * when unread mail appears for an agent, drives a fresh `runTurn` that spawns
8
+ * claude with `--resume <session-id>`, writes the batched user turn to a real
9
+ * stdin pipe, and exits when claude does.
10
+ *
11
+ * This module exports `startTurnRunnerMailLoop` (the dispatcher loop) and
12
+ * `_runTurnRunnerTick` (a single-tick variant for deterministic tests).
13
+ *
14
+ * State authority (overstory-3087): this module does NOT write session state.
15
+ * The turn-runner (`src/agents/turn-runner.ts`) is the sole authority for
16
+ * `in_turn` ↔ `between_turns` transitions — it writes `in_turn` on the first
17
+ * parser event of a turn and settles to `between_turns` at end-of-turn when
18
+ * the agent did not deliver a terminal mail. Adding a duplicate writer here
19
+ * would race with the turn-runner under the per-agent turn lock and make
20
+ * the substate non-deterministic.
21
+ */
22
+
23
+ import { createMailStore } from "../mail/store.ts";
24
+ import type { MailMessage } from "../types.ts";
25
+ import { encodeUserTurn } from "./headless-prompt.ts";
26
+ import type { RunTurnOpts, TurnResult } from "./turn-runner.ts";
27
+
28
+ /**
29
+ * Escape characters that would otherwise corrupt the `[MAIL] From: ... | Subject: ... |
30
+ * Priority: ...\n\n<body>` framing. `|` is the field delimiter and `\n\n` separates
31
+ * metadata from body, so an unescaped pipe or newline in a metadata value would let a
32
+ * crafted subject inject a fake field or smuggle a fake body. Backslash is escaped
33
+ * first so the escape sequence itself is unambiguous (overstory-2231).
34
+ */
35
+ function escapeMailMetadata(value: string): string {
36
+ return value
37
+ .replace(/\\/g, "\\\\")
38
+ .replace(/\|/g, "\\|")
39
+ .replace(/\r/g, "\\r")
40
+ .replace(/\n/g, "\\n");
41
+ }
42
+
43
+ /**
44
+ * Format a batch of unread messages into the user-turn text the agent receives.
45
+ * Metadata values are escaped so a hostile or human-authored subject can't break
46
+ * the line framing.
47
+ */
48
+ export function formatMailBatch(messages: readonly MailMessage[]): string {
49
+ return messages
50
+ .map(
51
+ (m) =>
52
+ `[MAIL] From: ${escapeMailMetadata(m.from)} | Subject: ${escapeMailMetadata(
53
+ m.subject,
54
+ )} | Priority: ${escapeMailMetadata(m.priority)}\n\n${m.body}`,
55
+ )
56
+ .join("\n\n---\n\n");
57
+ }
58
+
59
+ /**
60
+ * Build the runTurn opts for delivering a user turn (Phase 2 builder dispatcher).
61
+ *
62
+ * The injector polls mail for a single agent and only knows the agent name,
63
+ * the user-turn payload, and the mail database path. The remaining fields
64
+ * (worktree path, runtime, model, run id, etc.) are provided by the caller
65
+ * (typically `ov serve`) once at install time. This factory produces a
66
+ * `RunTurnOpts` for each batch by combining the static caller-provided
67
+ * fields with the per-batch payload.
68
+ */
69
+ export type TurnRunnerOptsFactory = (userTurnNdjson: string) => RunTurnOpts;
70
+
71
+ /** Function that drives a single agent turn end-to-end. Production passes `runTurn`. */
72
+ export type TurnRunnerFn = (opts: RunTurnOpts) => Promise<TurnResult>;
73
+
74
+ /**
75
+ * Outcome of a single dispatcher tick. Returned for testability so callers
76
+ * can assert delivery behavior without inspecting the runner internals.
77
+ */
78
+ export type TurnRunnerTickResult =
79
+ | { kind: "idle" }
80
+ | { kind: "in-flight" }
81
+ | { kind: "delivered"; result: TurnResult; messageIds: string[] }
82
+ | { kind: "error"; error: unknown; messageIds: string[] };
83
+
84
+ /**
85
+ * Start a server-side mail dispatcher that drives the spawn-per-turn engine.
86
+ *
87
+ * Phase 2 builder path. Polls the mail store every intervalMs milliseconds,
88
+ * batches unread messages into a single stream-json user turn, and invokes
89
+ * `runTurn(...)` to spawn one claude turn that consumes them. While a turn
90
+ * is in flight, subsequent ticks short-circuit — they never spawn a second
91
+ * claude process for the same agent. Per-agent serialization is also enforced
92
+ * cross-process by the turn-lock inside `runTurn`.
93
+ *
94
+ * Mark-as-read happens AFTER the runTurn returns successfully (`exitCode === 0`
95
+ * and no thrown error). On any failure, messages remain unread and will be
96
+ * retried on the next tick.
97
+ *
98
+ * @param agentName - Overstory agent name (mail inbox address)
99
+ * @param optsFactory - Builds the RunTurnOpts from the per-batch user turn payload
100
+ * @param runTurnFn - Function that drives one turn (typically `runTurn` from turn-runner.ts)
101
+ * @param mailStorePath - Absolute path to the project's mail.db
102
+ * @param intervalMs - Poll interval in milliseconds (default: 2000)
103
+ * @param isAgentLive - Optional per-tick predicate. When provided and it returns
104
+ * false, the loop short-circuits (no mail dispatch) and self-terminates.
105
+ * This closes the gap between `ov stop` writing state=completed and the
106
+ * serve.ts rescan timer reaping this loop, which would otherwise keep
107
+ * ticking and dispatch a new turn against a stopped agent (overstory-eb7c).
108
+ * @returns Cleanup function that stops the dispatcher
109
+ */
110
+ export function startTurnRunnerMailLoop(
111
+ agentName: string,
112
+ optsFactory: TurnRunnerOptsFactory,
113
+ runTurnFn: TurnRunnerFn,
114
+ mailStorePath: string,
115
+ intervalMs = 2000,
116
+ isAgentLive?: () => boolean,
117
+ ): () => void {
118
+ let stopped = false;
119
+ let inFlight = false;
120
+ let timer: ReturnType<typeof setInterval> | null = null;
121
+
122
+ const stop = (): void => {
123
+ stopped = true;
124
+ if (timer !== null) {
125
+ clearInterval(timer);
126
+ timer = null;
127
+ }
128
+ };
129
+
130
+ const tick = async (): Promise<TurnRunnerTickResult> => {
131
+ if (stopped) return { kind: "idle" };
132
+ if (inFlight) return { kind: "in-flight" };
133
+ // Per-tick state guard. `ov stop` flips state=completed and kills the
134
+ // in-flight claude, but until the rescan reaps this loop the next tick
135
+ // would otherwise dispatch a fresh turn against the stopped agent.
136
+ if (isAgentLive && !isAgentLive()) {
137
+ stop();
138
+ return { kind: "idle" };
139
+ }
140
+ const store = createMailStore(mailStorePath);
141
+ let messages: ReturnType<typeof store.getUnread>;
142
+ try {
143
+ messages = store.getUnread(agentName);
144
+ } finally {
145
+ store.close();
146
+ }
147
+ if (messages.length === 0) return { kind: "idle" };
148
+
149
+ const userTurnNdjson = encodeUserTurn(formatMailBatch(messages));
150
+ const ids = messages.map((m) => m.id);
151
+
152
+ inFlight = true;
153
+ try {
154
+ const result = await runTurnFn(optsFactory(userTurnNdjson));
155
+ // Mark read only on a clean turn — exit code 0 (or null on abort with
156
+ // no error) AND no thrown error. Failed turns leave messages unread
157
+ // so the next tick retries cleanly.
158
+ if (result.exitCode === 0) {
159
+ const markStore = createMailStore(mailStorePath);
160
+ try {
161
+ for (const id of ids) markStore.markRead(id);
162
+ } finally {
163
+ markStore.close();
164
+ }
165
+ }
166
+ return { kind: "delivered", result, messageIds: ids };
167
+ } catch (error) {
168
+ return { kind: "error", error, messageIds: ids };
169
+ } finally {
170
+ inFlight = false;
171
+ }
172
+ };
173
+
174
+ timer = setInterval(() => {
175
+ // Errors and rejections are absorbed inside tick; this layer just
176
+ // prevents an unhandled-rejection if tick itself throws synchronously.
177
+ tick().catch(() => {});
178
+ }, intervalMs);
179
+
180
+ return stop;
181
+ }
182
+
183
+ /**
184
+ * Internal: run a single dispatcher tick. Exported for tests so they can
185
+ * drive the loop deterministically without setInterval timing.
186
+ */
187
+ export async function _runTurnRunnerTick(
188
+ agentName: string,
189
+ optsFactory: TurnRunnerOptsFactory,
190
+ runTurnFn: TurnRunnerFn,
191
+ mailStorePath: string,
192
+ ): Promise<TurnRunnerTickResult> {
193
+ const store = createMailStore(mailStorePath);
194
+ let messages: ReturnType<typeof store.getUnread>;
195
+ try {
196
+ messages = store.getUnread(agentName);
197
+ } finally {
198
+ store.close();
199
+ }
200
+ if (messages.length === 0) return { kind: "idle" };
201
+
202
+ const userTurnNdjson = encodeUserTurn(formatMailBatch(messages));
203
+ const ids = messages.map((m) => m.id);
204
+
205
+ try {
206
+ const result = await runTurnFn(optsFactory(userTurnNdjson));
207
+ if (result.exitCode === 0) {
208
+ const markStore = createMailStore(mailStorePath);
209
+ try {
210
+ for (const id of ids) markStore.markRead(id);
211
+ } finally {
212
+ markStore.close();
213
+ }
214
+ }
215
+ return { kind: "delivered", result, messageIds: ids };
216
+ } catch (error) {
217
+ return { kind: "error", error, messageIds: ids };
218
+ }
219
+ }