@os-eco/overstory-cli 0.9.3 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/README.md +49 -18
  2. package/agents/builder.md +9 -8
  3. package/agents/coordinator.md +6 -6
  4. package/agents/lead.md +98 -82
  5. package/agents/merger.md +25 -14
  6. package/agents/reviewer.md +22 -16
  7. package/agents/scout.md +17 -12
  8. package/package.json +6 -3
  9. package/src/agents/capabilities.test.ts +85 -0
  10. package/src/agents/capabilities.ts +125 -0
  11. package/src/agents/headless-mail-injector.test.ts +448 -0
  12. package/src/agents/headless-mail-injector.ts +211 -0
  13. package/src/agents/headless-prompt.test.ts +102 -0
  14. package/src/agents/headless-prompt.ts +68 -0
  15. package/src/agents/hooks-deployer.test.ts +514 -14
  16. package/src/agents/hooks-deployer.ts +141 -0
  17. package/src/agents/overlay.test.ts +4 -4
  18. package/src/agents/overlay.ts +30 -8
  19. package/src/agents/turn-lock.test.ts +181 -0
  20. package/src/agents/turn-lock.ts +235 -0
  21. package/src/agents/turn-runner-dispatch.test.ts +182 -0
  22. package/src/agents/turn-runner-dispatch.ts +105 -0
  23. package/src/agents/turn-runner.test.ts +1450 -0
  24. package/src/agents/turn-runner.ts +1166 -0
  25. package/src/commands/clean.ts +56 -1
  26. package/src/commands/completions.test.ts +4 -1
  27. package/src/commands/coordinator.test.ts +127 -0
  28. package/src/commands/coordinator.ts +205 -6
  29. package/src/commands/dashboard.test.ts +188 -0
  30. package/src/commands/dashboard.ts +13 -3
  31. package/src/commands/doctor.ts +94 -77
  32. package/src/commands/group.test.ts +94 -0
  33. package/src/commands/group.ts +49 -20
  34. package/src/commands/init.test.ts +8 -0
  35. package/src/commands/init.ts +8 -1
  36. package/src/commands/log.test.ts +56 -11
  37. package/src/commands/log.ts +134 -69
  38. package/src/commands/mail.test.ts +162 -0
  39. package/src/commands/mail.ts +64 -9
  40. package/src/commands/merge.test.ts +112 -1
  41. package/src/commands/merge.ts +17 -4
  42. package/src/commands/monitor.ts +2 -1
  43. package/src/commands/nudge.test.ts +351 -4
  44. package/src/commands/nudge.ts +356 -34
  45. package/src/commands/run.test.ts +43 -7
  46. package/src/commands/serve/build.test.ts +202 -0
  47. package/src/commands/serve/build.ts +206 -0
  48. package/src/commands/serve/coordinator-actions.test.ts +339 -0
  49. package/src/commands/serve/coordinator-actions.ts +408 -0
  50. package/src/commands/serve/dev.test.ts +168 -0
  51. package/src/commands/serve/dev.ts +117 -0
  52. package/src/commands/serve/mail-actions.test.ts +312 -0
  53. package/src/commands/serve/mail-actions.ts +167 -0
  54. package/src/commands/serve/rest.test.ts +1323 -0
  55. package/src/commands/serve/rest.ts +708 -0
  56. package/src/commands/serve/static.ts +51 -0
  57. package/src/commands/serve/ws.test.ts +361 -0
  58. package/src/commands/serve/ws.ts +332 -0
  59. package/src/commands/serve.test.ts +459 -0
  60. package/src/commands/serve.ts +565 -0
  61. package/src/commands/sling.test.ts +85 -1
  62. package/src/commands/sling.ts +153 -64
  63. package/src/commands/status.test.ts +9 -0
  64. package/src/commands/status.ts +12 -4
  65. package/src/commands/stop.test.ts +174 -1
  66. package/src/commands/stop.ts +107 -8
  67. package/src/commands/supervisor.ts +2 -1
  68. package/src/commands/watch.test.ts +49 -4
  69. package/src/commands/watch.ts +153 -28
  70. package/src/commands/worktree.test.ts +319 -3
  71. package/src/commands/worktree.ts +86 -0
  72. package/src/config.test.ts +78 -0
  73. package/src/config.ts +43 -1
  74. package/src/doctor/consistency.test.ts +106 -0
  75. package/src/doctor/consistency.ts +50 -3
  76. package/src/doctor/serve.test.ts +95 -0
  77. package/src/doctor/serve.ts +86 -0
  78. package/src/doctor/types.ts +2 -1
  79. package/src/doctor/watchdog.ts +57 -1
  80. package/src/events/tailer.test.ts +234 -1
  81. package/src/events/tailer.ts +90 -0
  82. package/src/index.ts +53 -6
  83. package/src/json.ts +29 -0
  84. package/src/mail/client.ts +15 -2
  85. package/src/mail/store.test.ts +82 -0
  86. package/src/mail/store.ts +41 -4
  87. package/src/merge/lock.test.ts +149 -0
  88. package/src/merge/lock.ts +140 -0
  89. package/src/runtimes/__fixtures__/claude-stream-fixture.ts +22 -0
  90. package/src/runtimes/claude.test.ts +791 -1
  91. package/src/runtimes/claude.ts +323 -1
  92. package/src/runtimes/connections.test.ts +141 -1
  93. package/src/runtimes/connections.ts +73 -4
  94. package/src/runtimes/headless-connection.test.ts +264 -0
  95. package/src/runtimes/headless-connection.ts +158 -0
  96. package/src/runtimes/types.ts +10 -0
  97. package/src/schema-consistency.test.ts +1 -0
  98. package/src/sessions/store.test.ts +390 -24
  99. package/src/sessions/store.ts +184 -19
  100. package/src/test-setup.test.ts +31 -0
  101. package/src/test-setup.ts +28 -0
  102. package/src/types.ts +56 -1
  103. package/src/utils/pid.test.ts +85 -1
  104. package/src/utils/pid.ts +86 -1
  105. package/src/utils/process-scan.test.ts +53 -0
  106. package/src/utils/process-scan.ts +76 -0
  107. package/src/watchdog/daemon.test.ts +1520 -411
  108. package/src/watchdog/daemon.ts +442 -83
  109. package/src/watchdog/health.test.ts +157 -0
  110. package/src/watchdog/health.ts +92 -25
  111. package/src/worktree/process.test.ts +71 -0
  112. package/src/worktree/process.ts +25 -5
  113. package/src/worktree/tmux.test.ts +39 -0
  114. package/src/worktree/tmux.ts +23 -3
  115. package/templates/CLAUDE.md.tmpl +19 -8
  116. package/templates/overlay.md.tmpl +3 -2
@@ -103,6 +103,67 @@ describe("evaluateHealth", () => {
103
103
  expect(check.reconciliationNote).toBeNull();
104
104
  });
105
105
 
106
+ // --- ZFC Rule 1 fallback: tmux dead + stale lastActivity → completed ---
107
+
108
+ test("ZFC fallback: tmux dead + stale lastActivity (working) → complete (missed signal)", () => {
109
+ const staleActivity = new Date(Date.now() - 60_000).toISOString();
110
+ const session = makeSession({ state: "working", lastActivity: staleActivity });
111
+ const check = evaluateHealth(session, false, THRESHOLDS);
112
+
113
+ expect(check.state).toBe("completed");
114
+ expect(check.action).toBe("complete");
115
+ expect(check.tmuxAlive).toBe(false);
116
+ expect(check.processAlive).toBe(false);
117
+ expect(check.reconciliationNote).toContain("missed session-end signal");
118
+ });
119
+
120
+ test("ZFC fallback: tmux dead + stale lastActivity (stalled) → complete (missed signal)", () => {
121
+ const staleActivity = new Date(Date.now() - 90_000).toISOString();
122
+ const session = makeSession({ state: "stalled", lastActivity: staleActivity });
123
+ const check = evaluateHealth(session, false, THRESHOLDS);
124
+
125
+ expect(check.state).toBe("completed");
126
+ expect(check.action).toBe("complete");
127
+ });
128
+
129
+ test("ZFC: tmux dead + recent lastActivity → still zombie (true crash)", () => {
130
+ const recentActivity = new Date(Date.now() - 1_000).toISOString();
131
+ const session = makeSession({ state: "working", lastActivity: recentActivity });
132
+ const check = evaluateHealth(session, false, THRESHOLDS);
133
+
134
+ expect(check.state).toBe("zombie");
135
+ expect(check.action).toBe("terminate");
136
+ });
137
+
138
+ test("ZFC fallback (headless): pid dead + stale lastActivity → complete", () => {
139
+ const staleActivity = new Date(Date.now() - 60_000).toISOString();
140
+ const session = makeSession({
141
+ state: "working",
142
+ tmuxSession: "",
143
+ pid: DEAD_PID,
144
+ lastActivity: staleActivity,
145
+ });
146
+ const check = evaluateHealth(session, false, THRESHOLDS);
147
+
148
+ expect(check.state).toBe("completed");
149
+ expect(check.action).toBe("complete");
150
+ expect(check.reconciliationNote).toContain("missed session-end signal");
151
+ });
152
+
153
+ test("ZFC (headless): pid dead + recent lastActivity → still zombie", () => {
154
+ const recentActivity = new Date(Date.now() - 1_000).toISOString();
155
+ const session = makeSession({
156
+ state: "working",
157
+ tmuxSession: "",
158
+ pid: DEAD_PID,
159
+ lastActivity: recentActivity,
160
+ });
161
+ const check = evaluateHealth(session, false, THRESHOLDS);
162
+
163
+ expect(check.state).toBe("zombie");
164
+ expect(check.action).toBe("terminate");
165
+ });
166
+
106
167
  // --- ZFC Rule 2: tmux alive + sessions.json says zombie → investigate ---
107
168
 
108
169
  test("ZFC: tmux alive + sessions.json says zombie → investigate (don't auto-kill)", () => {
@@ -432,6 +493,102 @@ describe("headless agents (tmuxSession empty, PID-based lifecycle)", () => {
432
493
  });
433
494
  });
434
495
 
496
+ // === Spawn-per-turn workers (tmuxSession === '' && pid === null) ===
497
+
498
+ describe("spawn-per-turn workers (overstory-7a34)", () => {
499
+ // Spawn-per-turn workers (builder/scout/reviewer/lead/merger under the
500
+ // headless default) have no persistent process between turns. The previous
501
+ // "headless" branch only matched pid !== null, so these sessions fell into
502
+ // the TUI/tmux path where tmuxAlive=false → ZFC Rule 1 → zombie within
503
+ // seconds of sling, despite being actively executing tools (overstory-7a34).
504
+
505
+ test("freshly slung spawn-per-turn lead (booting, no pid, no tmux) → working", () => {
506
+ const session = makeSession({
507
+ tmuxSession: "",
508
+ pid: null,
509
+ capability: "lead",
510
+ state: "booting",
511
+ lastActivity: new Date().toISOString(),
512
+ });
513
+ const check = evaluateHealth(session, false, THRESHOLDS);
514
+
515
+ expect(check.state).toBe("working");
516
+ expect(check.action).toBe("none");
517
+ expect(check.reconciliationNote).toBeNull();
518
+ });
519
+
520
+ test("active spawn-per-turn worker (working, recent activity) → stays working", () => {
521
+ const session = makeSession({
522
+ tmuxSession: "",
523
+ pid: null,
524
+ capability: "builder",
525
+ state: "working",
526
+ lastActivity: new Date(Date.now() - 5_000).toISOString(),
527
+ });
528
+ const check = evaluateHealth(session, false, THRESHOLDS);
529
+
530
+ expect(check.state).toBe("working");
531
+ expect(check.action).toBe("none");
532
+ });
533
+
534
+ test("spawn-per-turn worker between turns (state working, very recent) → working, NOT zombie", () => {
535
+ // Repro: ov sling --capability lead any-task; within ~30s ov dashboard
536
+ // previously showed state='zombie' while ov feed showed live tool calls.
537
+ const session = makeSession({
538
+ tmuxSession: "",
539
+ pid: null,
540
+ capability: "lead",
541
+ state: "working",
542
+ lastActivity: new Date().toISOString(),
543
+ });
544
+ const check = evaluateHealth(session, false, THRESHOLDS);
545
+
546
+ expect(check.state).toBe("working");
547
+ expect(check.action).toBe("none");
548
+ });
549
+
550
+ test("spawn-per-turn worker with stale activity → stalled", () => {
551
+ const session = makeSession({
552
+ tmuxSession: "",
553
+ pid: null,
554
+ capability: "builder",
555
+ state: "working",
556
+ lastActivity: new Date(Date.now() - 60_000).toISOString(),
557
+ });
558
+ const check = evaluateHealth(session, false, THRESHOLDS);
559
+
560
+ expect(check.state).toBe("stalled");
561
+ expect(check.action).toBe("escalate");
562
+ });
563
+
564
+ test("spawn-per-turn worker with zombie-level staleness → zombie, terminate", () => {
565
+ const session = makeSession({
566
+ tmuxSession: "",
567
+ pid: null,
568
+ capability: "builder",
569
+ state: "working",
570
+ lastActivity: new Date(Date.now() - 200_000).toISOString(),
571
+ });
572
+ const check = evaluateHealth(session, false, THRESHOLDS);
573
+
574
+ expect(check.state).toBe("zombie");
575
+ expect(check.action).toBe("terminate");
576
+ });
577
+
578
+ test("spawn-per-turn worker that already completed → skips monitoring", () => {
579
+ const session = makeSession({
580
+ tmuxSession: "",
581
+ pid: null,
582
+ capability: "builder",
583
+ state: "completed",
584
+ });
585
+ const check = evaluateHealth(session, false, THRESHOLDS);
586
+
587
+ expect(check.state).toBe("completed");
588
+ expect(check.action).toBe("none");
589
+ });
590
+ });
591
+
435
592
  // === transitionState ===
436
593
 
437
594
  describe("transitionState", () => {
@@ -30,18 +30,9 @@
30
30
  * table are always up-to-date because they reflect real kernel state.
31
31
  */
32
32
 
33
+ import { isPersistentCapability } from "../agents/capabilities.ts";
33
34
  import type { AgentSession, AgentState, HealthCheck } from "../types.ts";
34
35
 
35
- /**
36
- * Agent capabilities that run as persistent interactive sessions.
37
- * These agents are expected to have long idle periods (e.g. coordinator waiting
38
- * for worker mail) and should NOT be flagged stale/zombie based on lastActivity.
39
- * Only tmux/pid liveness checks apply to them.
40
- *
41
- * Shared concept with src/commands/log.ts:PERSISTENT_CAPABILITIES.
42
- */
43
- const PERSISTENT_CAPABILITIES = new Set(["coordinator", "orchestrator", "monitor"]);
44
-
45
36
  /** Numeric ordering for forward-only state transitions. */
46
37
  const STATE_ORDER: Record<AgentState, number> = {
47
38
  booting: 0,
@@ -71,15 +62,34 @@ export function isProcessRunning(pid: number): boolean {
71
62
  }
72
63
 
73
64
  /**
74
- * Detect whether a session is a headless agent.
65
+ * Detect whether a session is a long-lived headless agent.
75
66
  *
76
- * Headless agents are spawned without a tmux session (tmuxSession === '') and
77
- * are tracked solely by PID. For these agents, PID is the primary liveness signal.
67
+ * Long-lived headless agents (coordinator, orchestrator, monitor, sapling, etc.)
68
+ * have no tmux session (tmuxSession === '') but do have a persistent process
69
+ * so `session.pid` is non-null and PID is the primary liveness signal.
78
70
  */
79
71
  function isHeadlessSession(session: AgentSession): boolean {
80
72
  return session.tmuxSession === "" && session.pid !== null;
81
73
  }
82
74
 
75
+ /**
76
+ * Detect whether a session is a spawn-per-turn worker between turns.
77
+ *
78
+ * Spawn-per-turn workers (task-scoped capabilities under the new headless
79
+ * default — builder/scout/reviewer/lead/merger) have no tmux session AND no
80
+ * persistent process: `tmuxSession === ''` and `session.pid === null` from
81
+ * sling onward. The per-turn claude PID lives in
82
+ * `.overstory/agents/<name>/turn.pid` only while a turn is in flight.
83
+ *
84
+ * "No process" is the normal state between turns, so neither tmux liveness nor
85
+ * pid liveness can be used as a death signal — only `lastActivity` recency
86
+ * (refreshed by the turn-runner on every event and by the watchdog from
87
+ * events.db) can. (overstory-7a34)
88
+ */
89
+ export function isSpawnPerTurnSession(session: AgentSession): boolean {
90
+ return session.tmuxSession === "" && session.pid === null;
91
+ }
92
+
83
93
  /**
84
94
  * Evaluate time-based health (persistent capability exemptions, stale, zombie thresholds,
85
95
  * booting→working transition). Called after liveness is confirmed for both TUI and headless paths.
@@ -98,7 +108,7 @@ function evaluateTimeBased(
98
108
  // Persistent capabilities (coordinator, monitor) are expected to have long idle
99
109
  // periods waiting for mail/events. Skip time-based stale/zombie detection for
100
110
  // them — only tmux/pid liveness matters (checked above).
101
- if (PERSISTENT_CAPABILITIES.has(session.capability)) {
111
+ if (isPersistentCapability(session.capability)) {
102
112
  // Transition booting → working if we reach here (process alive)
103
113
  const state = session.state === "booting" ? "working" : session.state;
104
114
  return {
@@ -165,19 +175,23 @@ function evaluateTimeBased(
165
175
  * Decision logic (in priority order):
166
176
  *
167
177
  * 1. Completed agents skip monitoring entirely.
168
- * 2. Headless agents (tmuxSession === ''): PID is primary liveness signal.
178
+ * 2. Spawn-per-turn workers (tmuxSession === '' && pid === null): no
179
+ * persistent process between turns — fall straight through to time-based
180
+ * checks driven by lastActivity. PID/tmux liveness are meaningless here.
181
+ * 3. Headless agents with persistent process (tmuxSession === '' && pid !== null):
182
+ * PID is primary liveness signal.
169
183
  * - pid dead → zombie, terminate.
170
184
  * - pid alive + state zombie → investigate.
171
185
  * - pid alive → fall through to time-based checks.
172
- * 3. tmux dead → zombie, terminate (regardless of what sessions.json says).
173
- * 4. tmux alive + sessions.json says zombie → investigate (don't auto-kill).
186
+ * 4. tmux dead → zombie, terminate (regardless of what sessions.json says).
187
+ * 5. tmux alive + sessions.json says zombie → investigate (don't auto-kill).
174
188
  * Something external marked this zombie, but the process is still running.
175
- * 5. pid dead + tmux alive → zombie, terminate. The agent process exited but
189
+ * 6. pid dead + tmux alive → zombie, terminate. The agent process exited but
176
190
  * the tmux pane shell survived. The agent is not doing work.
177
- * 6. lastActivity older than zombieMs → zombie, terminate.
178
- * 7. lastActivity older than staleMs → stalled, escalate.
179
- * 8. booting with recent activity → working.
180
- * 9. Otherwise → working, healthy.
191
+ * 7. lastActivity older than zombieMs → zombie, terminate.
192
+ * 8. lastActivity older than staleMs → stalled, escalate.
193
+ * 9. booting with recent activity → working.
194
+ * 10. Otherwise → working, healthy.
181
195
  *
182
196
  * @param session - The agent session to evaluate
183
197
  * @param tmuxAlive - Whether the agent's tmux session is still running
@@ -222,10 +236,37 @@ export function evaluateHealth(
222
236
  };
223
237
  }
224
238
 
239
+ // === Spawn-per-turn path: no persistent process between turns ===
240
+ // For these workers (overstory-7a34) `session.pid` is null by design and
241
+ // there is no tmux session. Liveness signals reduce to lastActivity
242
+ // recency: the turn-runner updates it on every parser event during a
243
+ // turn, and the watchdog refreshes it from events.db between turns. PID
244
+ // and tmux checks would always say "dead" and false-positive every fresh
245
+ // agent as zombie within seconds of sling.
246
+ if (isSpawnPerTurnSession(session)) {
247
+ return evaluateTimeBased(session, base, elapsedMs, thresholds);
248
+ }
249
+
225
250
  // === Headless path: PID is the primary liveness signal ===
226
251
  if (isHeadlessSession(session)) {
227
- // pid dead zombie immediately (equivalent to ZFC Rule 1 for headless)
252
+ // pid dead: zombie OR completed-with-missed-signal.
253
+ // Distinguish by lastActivity age — recent activity means the agent
254
+ // crashed mid-work (true zombie); stale activity means it likely
255
+ // finished naturally and only the session-end hook didn't deliver
256
+ // (treat as completed). (overstory-e74b)
228
257
  if (pidAlive === false) {
258
+ if (
259
+ elapsedMs > thresholds.staleMs &&
260
+ (session.state === "working" || session.state === "booting" || session.state === "stalled")
261
+ ) {
262
+ return {
263
+ ...base,
264
+ processAlive: false,
265
+ state: "completed",
266
+ action: "complete",
267
+ reconciliationNote: `ZFC: headless pid ${session.pid} dead + stale lastActivity (${Math.round(elapsedMs / 1000)}s ago) — assumed completed (missed session-end signal)`,
268
+ };
269
+ }
229
270
  return {
230
271
  ...base,
231
272
  processAlive: false,
@@ -253,9 +294,25 @@ export function evaluateHealth(
253
294
 
254
295
  // === TUI/tmux path ===
255
296
 
256
- // ZFC Rule 1: tmux dead → zombie immediately, regardless of recorded state.
257
- // Observable state says the process is gone.
297
+ // ZFC Rule 1: tmux dead → zombie OR completed-with-missed-signal.
298
+ // Distinguish by lastActivity age recent activity means the agent
299
+ // crashed mid-work (true zombie); stale activity means it likely
300
+ // finished naturally and only the session-end hook didn't deliver
301
+ // (treat as completed). (overstory-e74b)
258
302
  if (!tmuxAlive) {
303
+ if (
304
+ elapsedMs > thresholds.staleMs &&
305
+ (session.state === "working" || session.state === "booting" || session.state === "stalled")
306
+ ) {
307
+ return {
308
+ ...base,
309
+ processAlive: false,
310
+ state: "completed",
311
+ action: "complete",
312
+ reconciliationNote: `ZFC: tmux dead + stale lastActivity (${Math.round(elapsedMs / 1000)}s ago) — assumed completed (missed session-end signal)`,
313
+ };
314
+ }
315
+
259
316
  const note =
260
317
  session.state === "working" || session.state === "booting"
261
318
  ? `ZFC: tmux dead but sessions.json says "${session.state}" — marking zombie (observable state wins)`
@@ -323,6 +380,16 @@ export function transitionState(currentState: AgentState, check: HealthCheck): A
323
380
  return currentState;
324
381
  }
325
382
 
383
+ // `complete` is a terminal classification triggered when observable state
384
+ // proves the agent finished naturally (missed session-end signal —
385
+ // overstory-e74b). It bypasses the forward-only STATE_ORDER guard because
386
+ // `completed` (order 2) sits before `stalled` (order 3) and would
387
+ // otherwise be blocked from advancing the recorded state. The matrix in
388
+ // SessionStore.tryTransitionState still gates the actual write.
389
+ if (check.action === "complete") {
390
+ return check.state;
391
+ }
392
+
326
393
  const currentOrder = STATE_ORDER[currentState];
327
394
  const checkOrder = STATE_ORDER[check.state];
328
395
 
@@ -2,6 +2,8 @@ import { afterEach, beforeEach, describe, expect, it } from "bun:test";
2
2
  import { mkdtemp, rm } from "node:fs/promises";
3
3
  import { tmpdir } from "node:os";
4
4
  import { join } from "node:path";
5
+ import { getConnection, removeConnection } from "../runtimes/connections.ts";
6
+ import { HeadlessClaudeConnection } from "../runtimes/headless-connection.ts";
5
7
  import { spawnHeadlessAgent } from "./process.ts";
6
8
 
7
9
  describe("spawnHeadlessAgent", () => {
@@ -22,6 +24,75 @@ describe("spawnHeadlessAgent", () => {
22
24
  );
23
25
  });
24
26
 
27
+ describe("agentName connection registration", () => {
28
+ const registeredNames: string[] = [];
29
+
30
+ afterEach(() => {
31
+ for (const name of registeredNames.splice(0)) {
32
+ removeConnection(name);
33
+ }
34
+ });
35
+
36
+ it("registers a HeadlessClaudeConnection when agentName is provided", async () => {
37
+ const agentName = "test-headless-agent-xyz";
38
+ registeredNames.push(agentName);
39
+
40
+ const proc = await spawnHeadlessAgent(["sleep", "5"], {
41
+ cwd: process.cwd(),
42
+ env: { ...(process.env as Record<string, string>) },
43
+ agentName,
44
+ });
45
+
46
+ expect(proc.pid).toBeGreaterThan(0);
47
+ const conn = getConnection(agentName);
48
+ expect(conn).toBeDefined();
49
+ expect(conn).toBeInstanceOf(HeadlessClaudeConnection);
50
+
51
+ // Clean up the spawned process
52
+ try {
53
+ process.kill(proc.pid, "SIGTERM");
54
+ } catch {
55
+ // ignore
56
+ }
57
+ });
58
+
59
+ it("does not register a connection when agentName is omitted", async () => {
60
+ const proc = await spawnHeadlessAgent(["echo", "no-register"], {
61
+ cwd: process.cwd(),
62
+ env: { ...(process.env as Record<string, string>) },
63
+ });
64
+
65
+ // Drain stdout so process exits cleanly
66
+ if (proc.stdout) {
67
+ await new Response(proc.stdout).text();
68
+ }
69
+
70
+ // No connection was registered (use a stable lookup key that was never set)
71
+ expect(getConnection("never-registered-in-this-test")).toBeUndefined();
72
+ });
73
+
74
+ it("registered connection pid matches the spawned process pid", async () => {
75
+ const agentName = "test-headless-pid-check-xyz";
76
+ registeredNames.push(agentName);
77
+
78
+ const proc = await spawnHeadlessAgent(["sleep", "5"], {
79
+ cwd: process.cwd(),
80
+ env: { ...(process.env as Record<string, string>) },
81
+ agentName,
82
+ });
83
+
84
+ const conn = getConnection(agentName) as HeadlessClaudeConnection;
85
+ expect(conn).toBeDefined();
86
+ expect(conn.pid).toBe(proc.pid);
87
+
88
+ try {
89
+ process.kill(proc.pid, "SIGTERM");
90
+ } catch {
91
+ // ignore
92
+ }
93
+ });
94
+ });
95
+
25
96
  describe("file redirect mode", () => {
26
97
  let tmpDir: string;
27
98
 
@@ -1,15 +1,20 @@
1
1
  /**
2
2
  * Headless subprocess management for non-tmux agent runtimes.
3
3
  *
4
- * Used by `ov sling` when runtime.headless === true to bypass tmux entirely.
5
- * Provides spawnHeadlessAgent() for direct Bun.spawn() invocation of
6
- * headless agent processes (e.g., Sapling running with --json).
4
+ * Used by long-lived headless runtimes that bypass tmux (e.g., Sapling running
5
+ * with --json). Provides spawnHeadlessAgent() for direct Bun.spawn() invocation.
6
+ *
7
+ * Headless Claude Code does NOT use this path — under spawn-per-turn (Phase 3),
8
+ * Claude agents have no persistent process; each turn spawns a fresh claude
9
+ * inside `runTurn` (src/agents/turn-runner.ts). This module remains for
10
+ * runtimes that genuinely need a long-lived RPC channel.
7
11
  *
8
12
  * Note: isProcessAlive() and killProcessTree() for headless process lifecycle
9
13
  * management already exist in src/worktree/tmux.ts — not duplicated here.
10
14
  */
11
15
 
12
16
  import { AgentError } from "../errors.ts";
17
+ import { registerHeadlessConnection } from "../runtimes/connections.ts";
13
18
 
14
19
  /**
15
20
  * Handle to a spawned headless agent subprocess.
@@ -57,6 +62,15 @@ export interface SpawnHeadlessOptions {
57
62
  * When set, redirect subprocess stderr to this file path instead of a pipe.
58
63
  */
59
64
  stderrFile?: string;
65
+ /**
66
+ * When set, registers the spawned process as a `RuntimeConnection` keyed by
67
+ * this agent name (sibling of Sapling's RPC connect() flow). Lets `ov nudge`,
68
+ * the watchdog's liveness/abort path, etc. find the live process via
69
+ * `getConnection(agentName)`.
70
+ *
71
+ * Same namespace as AgentSession.agentName.
72
+ */
73
+ agentName?: string;
60
74
  }
61
75
 
62
76
  /**
@@ -103,9 +117,15 @@ export async function spawnHeadlessAgent(
103
117
  stdin: "pipe",
104
118
  });
105
119
 
106
- return {
120
+ const result: HeadlessProcess = {
107
121
  pid: proc.pid,
108
- stdin: proc.stdin,
122
+ stdin: proc.stdin as HeadlessProcess["stdin"],
109
123
  stdout: opts.stdoutFile ? null : (proc.stdout as ReadableStream<Uint8Array>),
110
124
  };
125
+
126
+ if (opts.agentName) {
127
+ registerHeadlessConnection(opts.agentName, result);
128
+ }
129
+
130
+ return result;
111
131
  }
@@ -13,6 +13,7 @@ import {
13
13
  killProcessTree,
14
14
  killSession,
15
15
  listSessions,
16
+ sanitizeTmuxName,
16
17
  sendKeys,
17
18
  waitForTuiReady,
18
19
  } from "./tmux.ts";
@@ -111,6 +112,9 @@ describe("createSession", () => {
111
112
  const wrappedCmd = cmd[9] as string;
112
113
  expect(wrappedCmd).toContain("echo hello");
113
114
  expect(wrappedCmd).toContain("export PATH=");
115
+ // `exec` replaces the bash wrapper with the command so SIGHUP from a
116
+ // dying tmux server is delivered directly to claude (overstory-505d).
117
+ expect(wrappedCmd).toContain("exec echo hello");
114
118
 
115
119
  const opts = tmuxCallArgs[1] as { cwd: string };
116
120
  expect(opts.cwd).toBe("/work/dir");
@@ -1550,3 +1554,38 @@ describe("ensureTmuxAvailable", () => {
1550
1554
  }
1551
1555
  });
1552
1556
  });
1557
+
1558
+ describe("sanitizeTmuxName", () => {
1559
+ test("replaces dots with underscores", () => {
1560
+ expect(sanitizeTmuxName("consulting.jayminwest.com")).toBe("consulting_jayminwest_com");
1561
+ });
1562
+
1563
+ test("replaces colons with underscores", () => {
1564
+ expect(sanitizeTmuxName("host:8080")).toBe("host_8080");
1565
+ });
1566
+
1567
+ test("replaces mixed dots and colons", () => {
1568
+ expect(sanitizeTmuxName("my.project:v2.0")).toBe("my_project_v2_0");
1569
+ });
1570
+
1571
+ test("leaves names without special characters unchanged", () => {
1572
+ expect(sanitizeTmuxName("my-project")).toBe("my-project");
1573
+ });
1574
+
1575
+ test("handles empty string", () => {
1576
+ expect(sanitizeTmuxName("")).toBe("");
1577
+ });
1578
+
1579
+ test("handles name with only dots", () => {
1580
+ expect(sanitizeTmuxName("...")).toBe("___");
1581
+ });
1582
+
1583
+ test("produces valid tmux session name components", () => {
1584
+ // A real-world project name that would break tmux target parsing
1585
+ const projectName = "consulting.jayminwest.com";
1586
+ const sessionName = `overstory-${sanitizeTmuxName(projectName)}-coordinator`;
1587
+ expect(sessionName).toBe("overstory-consulting_jayminwest_com-coordinator");
1588
+ // No dots or colons that tmux would interpret as separators
1589
+ expect(sessionName).not.toMatch(/[.:]/);
1590
+ });
1591
+ });
@@ -21,6 +21,19 @@ import type { ReadyState } from "../runtimes/types.ts";
21
21
  */
22
22
  export const TMUX_SOCKET = "overstory";
23
23
 
24
+ /**
25
+ * Sanitize a name component for use in tmux session names.
26
+ *
27
+ * Tmux interprets dots (.) as session.window.pane separators and colons (:)
28
+ * as session:window separators in target strings (`-t`). If a project name
29
+ * contains these characters (e.g., "consulting.jayminwest.com"), the session
30
+ * is created fine but subsequent lookups via `-t` parse the dots as delimiters
31
+ * and fail to find the session. Replace both with underscores.
32
+ */
33
+ export function sanitizeTmuxName(name: string): string {
34
+ return name.replace(/[.:]/g, "_");
35
+ }
36
+
24
37
  /**
25
38
  * Build a tmux command array with the dedicated server socket.
26
39
  * All agent session operations should use this to ensure isolation.
@@ -136,9 +149,16 @@ export async function createSession(
136
149
  // causes the session to die instantly. Single-quote wrapping with escaped
137
150
  // single quotes prevents any intermediate shell from expanding variables
138
151
  // before bash receives them. (GitHub #86)
139
- const startupScript = exports.length > 0 ? `${exports.join(" && ")} && ${command}` : command;
140
- const wrappedCommand =
141
- exports.length > 0 ? `/bin/bash -c '${startupScript.replace(/'/g, "'\\''")}'` : command;
152
+ //
153
+ // The `exec` prefix replaces the bash wrapper with the spawned command
154
+ // so there is no separate wrapper PID to orphan if the tmux server dies
155
+ // externally. Without exec, bash receives SIGHUP on tmux teardown but its
156
+ // claude child gets reparented to init and continues running. With exec,
157
+ // the wrapper IS the command — SIGHUP is delivered directly to claude.
158
+ // (overstory-505d)
159
+ const startupScript =
160
+ exports.length > 0 ? `${exports.join(" && ")} && exec ${command}` : `exec ${command}`;
161
+ const wrappedCommand = `/bin/bash -c '${startupScript.replace(/'/g, "'\\''")}'`;
142
162
 
143
163
  const { exitCode, stderr } = await runCommand(
144
164
  tmuxCmd("new-session", "-d", "-s", name, "-c", cwd, wrappedCommand),
@@ -6,6 +6,11 @@ This project uses **overstory** for Claude Code agent orchestration. Your sessio
6
6
  acts as the orchestrator: you decide what work to delegate, spawn worker agents,
7
7
  monitor progress, and merge results.
8
8
 
9
+ The **web UI is your primary operator surface** — run `ov serve` and open
10
+ http://localhost:8080 to watch the swarm. Workers spawn headless by default, so
11
+ the UI sees them with full structured-event fidelity. `tmux attach` is the opt-in
12
+ escape hatch when you need to steer a single agent live (`ov sling --no-headless`).
13
+
9
14
  ## Quick Reference
10
15
 
11
16
  ```bash
@@ -48,9 +53,12 @@ ov log <event> --agent <name> # Hook-driven event logging
48
53
  3. Assign exclusive file scope so agents do not conflict
49
54
  4. Spawn: `ov sling <bead-id> --capability <type> --name <unique-name> --files src/foo.ts,src/bar.ts`
50
55
 
51
- Each spawned agent gets its own git worktree, branch, CLAUDE.md overlay, and
52
- tmux session. Agents communicate via `ov mail` and report completion
53
- by closing their {{TRACKER_NAME}} issue (`{{TRACKER_CLI}} close <id> --reason "summary"`).
56
+ Each spawned agent gets its own git worktree, branch, and CLAUDE.md overlay.
57
+ Claude agents spawn **headless by default** — the web UI (`ov serve`, then open
58
+ http://localhost:8080) is the primary operator surface. Pass `--no-headless` to
59
+ spawn into a tmux session you can attach to (`tmux attach -t ov-<agent>`).
60
+ Agents communicate via `ov mail` and report completion by closing their
61
+ {{TRACKER_NAME}} issue (`{{TRACKER_CLI}} close <id> --reason "summary"`).
54
62
 
55
63
  ## Hierarchical Delegation
56
64
 
@@ -69,11 +77,14 @@ to track hierarchy.
69
77
 
70
78
  ## Checking Status
71
79
 
72
- Run `ov status` to see:
73
- - Active agents and their states (booting, working, stalled, zombie)
74
- - Worktree locations and branches
75
- - Beads issue progress
76
- - Unread mail count
80
+ The web UI (`ov serve`, http://localhost:8080) is the primary view —
81
+ fleet topology, per-agent timelines, mail inbox, and live events.
82
+
83
+ CLI alternatives for scripting / quick checks:
84
+
85
+ - `ov status` — active agents and states, worktrees, {{TRACKER_NAME}} progress, unread mail
86
+ - `ov dashboard` — live TUI dashboard if you don't want to leave the terminal
87
+ - `ov inspect <agent>` — deep view of one agent
77
88
 
78
89
  ## Canonical Branch
79
90
 
@@ -68,9 +68,10 @@ ov mail send --to {{PARENT_AGENT}} --subject "status" \
68
68
  ov mail send --to {{PARENT_AGENT}} --subject "question" \
69
69
  --body "Your question here" --type question --priority high --agent {{AGENT_NAME}}
70
70
 
71
- # Report completion
71
+ # Report completion (terminal exit signal — workers send worker_done; merger
72
+ # sends merged/merge_failed; see "Constraints" / "Completion" sections below).
72
73
  ov mail send --to {{PARENT_AGENT}} --subject "done" \
73
- --body "Summary of what was done" --type result --agent {{AGENT_NAME}}
74
+ --body "Summary of what was done" --type worker_done --agent {{AGENT_NAME}}
74
75
 
75
76
  # Reply to a message
76
77
  ov mail reply <message-id> --body "Your reply" --agent {{AGENT_NAME}}