@os-eco/overstory-cli 0.9.3 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/README.md +49 -18
  2. package/agents/builder.md +9 -8
  3. package/agents/coordinator.md +6 -6
  4. package/agents/lead.md +98 -82
  5. package/agents/merger.md +25 -14
  6. package/agents/reviewer.md +22 -16
  7. package/agents/scout.md +17 -12
  8. package/package.json +6 -3
  9. package/src/agents/capabilities.test.ts +85 -0
  10. package/src/agents/capabilities.ts +125 -0
  11. package/src/agents/headless-mail-injector.test.ts +448 -0
  12. package/src/agents/headless-mail-injector.ts +211 -0
  13. package/src/agents/headless-prompt.test.ts +102 -0
  14. package/src/agents/headless-prompt.ts +68 -0
  15. package/src/agents/hooks-deployer.test.ts +514 -14
  16. package/src/agents/hooks-deployer.ts +141 -0
  17. package/src/agents/overlay.test.ts +4 -4
  18. package/src/agents/overlay.ts +30 -8
  19. package/src/agents/turn-lock.test.ts +181 -0
  20. package/src/agents/turn-lock.ts +235 -0
  21. package/src/agents/turn-runner-dispatch.test.ts +182 -0
  22. package/src/agents/turn-runner-dispatch.ts +105 -0
  23. package/src/agents/turn-runner.test.ts +1450 -0
  24. package/src/agents/turn-runner.ts +1166 -0
  25. package/src/commands/clean.ts +56 -1
  26. package/src/commands/completions.test.ts +4 -1
  27. package/src/commands/coordinator.test.ts +127 -0
  28. package/src/commands/coordinator.ts +205 -6
  29. package/src/commands/dashboard.test.ts +188 -0
  30. package/src/commands/dashboard.ts +13 -3
  31. package/src/commands/doctor.ts +94 -77
  32. package/src/commands/group.test.ts +94 -0
  33. package/src/commands/group.ts +49 -20
  34. package/src/commands/init.test.ts +8 -0
  35. package/src/commands/init.ts +8 -1
  36. package/src/commands/log.test.ts +56 -11
  37. package/src/commands/log.ts +134 -69
  38. package/src/commands/mail.test.ts +162 -0
  39. package/src/commands/mail.ts +64 -9
  40. package/src/commands/merge.test.ts +112 -1
  41. package/src/commands/merge.ts +17 -4
  42. package/src/commands/monitor.ts +2 -1
  43. package/src/commands/nudge.test.ts +351 -4
  44. package/src/commands/nudge.ts +356 -34
  45. package/src/commands/run.test.ts +43 -7
  46. package/src/commands/serve/build.test.ts +202 -0
  47. package/src/commands/serve/build.ts +206 -0
  48. package/src/commands/serve/coordinator-actions.test.ts +339 -0
  49. package/src/commands/serve/coordinator-actions.ts +408 -0
  50. package/src/commands/serve/dev.test.ts +168 -0
  51. package/src/commands/serve/dev.ts +117 -0
  52. package/src/commands/serve/mail-actions.test.ts +312 -0
  53. package/src/commands/serve/mail-actions.ts +167 -0
  54. package/src/commands/serve/rest.test.ts +1323 -0
  55. package/src/commands/serve/rest.ts +708 -0
  56. package/src/commands/serve/static.ts +51 -0
  57. package/src/commands/serve/ws.test.ts +361 -0
  58. package/src/commands/serve/ws.ts +332 -0
  59. package/src/commands/serve.test.ts +459 -0
  60. package/src/commands/serve.ts +565 -0
  61. package/src/commands/sling.test.ts +85 -1
  62. package/src/commands/sling.ts +153 -64
  63. package/src/commands/status.test.ts +9 -0
  64. package/src/commands/status.ts +12 -4
  65. package/src/commands/stop.test.ts +174 -1
  66. package/src/commands/stop.ts +107 -8
  67. package/src/commands/supervisor.ts +2 -1
  68. package/src/commands/watch.test.ts +49 -4
  69. package/src/commands/watch.ts +153 -28
  70. package/src/commands/worktree.test.ts +319 -3
  71. package/src/commands/worktree.ts +86 -0
  72. package/src/config.test.ts +78 -0
  73. package/src/config.ts +43 -1
  74. package/src/doctor/consistency.test.ts +106 -0
  75. package/src/doctor/consistency.ts +50 -3
  76. package/src/doctor/serve.test.ts +95 -0
  77. package/src/doctor/serve.ts +86 -0
  78. package/src/doctor/types.ts +2 -1
  79. package/src/doctor/watchdog.ts +57 -1
  80. package/src/events/tailer.test.ts +234 -1
  81. package/src/events/tailer.ts +90 -0
  82. package/src/index.ts +53 -6
  83. package/src/json.ts +29 -0
  84. package/src/mail/client.ts +15 -2
  85. package/src/mail/store.test.ts +82 -0
  86. package/src/mail/store.ts +41 -4
  87. package/src/merge/lock.test.ts +149 -0
  88. package/src/merge/lock.ts +140 -0
  89. package/src/runtimes/__fixtures__/claude-stream-fixture.ts +22 -0
  90. package/src/runtimes/claude.test.ts +791 -1
  91. package/src/runtimes/claude.ts +323 -1
  92. package/src/runtimes/connections.test.ts +141 -1
  93. package/src/runtimes/connections.ts +73 -4
  94. package/src/runtimes/headless-connection.test.ts +264 -0
  95. package/src/runtimes/headless-connection.ts +158 -0
  96. package/src/runtimes/types.ts +10 -0
  97. package/src/schema-consistency.test.ts +1 -0
  98. package/src/sessions/store.test.ts +390 -24
  99. package/src/sessions/store.ts +184 -19
  100. package/src/test-setup.test.ts +31 -0
  101. package/src/test-setup.ts +28 -0
  102. package/src/types.ts +56 -1
  103. package/src/utils/pid.test.ts +85 -1
  104. package/src/utils/pid.ts +86 -1
  105. package/src/utils/process-scan.test.ts +53 -0
  106. package/src/utils/process-scan.ts +76 -0
  107. package/src/watchdog/daemon.test.ts +1520 -411
  108. package/src/watchdog/daemon.ts +442 -83
  109. package/src/watchdog/health.test.ts +157 -0
  110. package/src/watchdog/health.ts +92 -25
  111. package/src/worktree/process.test.ts +71 -0
  112. package/src/worktree/process.ts +25 -5
  113. package/src/worktree/tmux.test.ts +39 -0
  114. package/src/worktree/tmux.ts +23 -3
  115. package/templates/CLAUDE.md.tmpl +19 -8
  116. package/templates/overlay.md.tmpl +3 -2
@@ -7,7 +7,50 @@
7
7
  */
8
8
 
9
9
  import { Database } from "bun:sqlite";
10
- import type { AgentSession, AgentState, InsertRun, Run, RunStatus, RunStore } from "../types.ts";
10
+ import type {
11
+ AgentSession,
12
+ AgentState,
13
+ InsertRun,
14
+ Run,
15
+ RunStatus,
16
+ RunStore,
17
+ TransitionOutcome,
18
+ } from "../types.ts";
19
+
20
+ /**
21
+ * Allowed predecessor states for each target state, enforced by
22
+ * `tryTransitionState` via an atomic SQL compare-and-swap.
23
+ *
24
+ * Invariants:
25
+ * - `completed` is sticky: nothing transitions out of it. The watchdog cannot
26
+ * reclassify a properly-completed agent as zombie.
27
+ * - `zombie` is durable except `ov stop` may promote it to `completed` for
28
+ * cleanup. A turn-runner that "settles to working" after watchdog already
29
+ * wrote zombie is rejected — last writer no longer wins.
30
+ * - Idempotent self-transitions (e.g. `working → working`) are allowed.
31
+ * - `booting` is set only by the initial `upsert` and never re-entered.
32
+ *
33
+ * See overstory-a993 for the race symptoms this guard prevents.
34
+ */
35
+ const TRANSITION_ALLOWED_FROM: Record<AgentState, readonly AgentState[]> = {
36
+ booting: [],
37
+ working: ["booting", "working", "stalled"],
38
+ stalled: ["booting", "working", "stalled"],
39
+ completed: ["booting", "working", "stalled", "zombie", "completed"],
40
+ zombie: ["booting", "working", "stalled", "zombie"],
41
+ };
42
+
43
+ /**
44
+ * States in which an agent's tmux session no longer exists. When a session
45
+ * lands in one of these, `tmux_session` is cleared to `''` so the agents-side
46
+ * view stops surfacing tmux session names that have been torn down.
47
+ *
48
+ * The live `tmuxSessions` array on `ov status` reflects what tmux actually
49
+ * reports; the stored `tmux_session` column is what the agents-side view reads.
50
+ * Without this clear, completed/zombie agents carry stale tmux strings forever
51
+ * (overstory-14c0).
52
+ */
53
+ const TERMINAL_STATES: readonly AgentState[] = ["completed", "zombie"];
11
54
 
12
55
  export interface SessionStore {
13
56
  /** Insert or update a session. Uses agent_name as the unique key. */
@@ -22,14 +65,32 @@ export interface SessionStore {
22
65
  count(): number;
23
66
  /** Get sessions belonging to a specific run. */
24
67
  getByRun(runId: string): AgentSession[];
25
- /** Update only the state of a session. */
68
+ /**
69
+ * Update only the state of a session.
70
+ *
71
+ * Unconditional override — does not validate the prev → next transition.
72
+ * Reserved for forced cleanup paths (`ov clean`, `ov sling` startup failure,
73
+ * supervisor/coordinator/monitor self-management). For race-prone writers
74
+ * (turn-runner settle, `ov stop`, watchdog), use `tryTransitionState`.
75
+ */
26
76
  updateState(agentName: string, state: AgentState): void;
77
+ /**
78
+ * Atomically transition a session's state, validated against the matrix in
79
+ * `TRANSITION_ALLOWED_FROM`. Implemented as a single `UPDATE ... WHERE state
80
+ * IN (...)` so concurrent writers cannot both succeed against the same row.
81
+ *
82
+ * Returns a discriminated outcome describing whether the write landed and,
83
+ * on rejection, whether the row was missing or the transition was illegal.
84
+ */
85
+ tryTransitionState(agentName: string, newState: AgentState): TransitionOutcome;
27
86
  /** Update lastActivity to current ISO timestamp. */
28
87
  updateLastActivity(agentName: string): void;
29
88
  /** Update escalation level and stalled timestamp. */
30
89
  updateEscalation(agentName: string, level: number, stalledSince: string | null): void;
31
90
  /** Update the transcript path for a session. */
32
91
  updateTranscriptPath(agentName: string, path: string): void;
92
+ /** Update the runtime-provided session_id (e.g. Claude stream-json session_id). */
93
+ updateClaudeSessionId(agentName: string, sessionId: string): void;
33
94
  /** Remove a session by agent name. */
34
95
  remove(agentName: string): void;
35
96
  /** Purge sessions matching criteria. Returns count of deleted rows. */
@@ -58,6 +119,7 @@ interface SessionRow {
58
119
  stalled_since: string | null;
59
120
  transcript_path: string | null;
60
121
  prompt_version: string | null;
122
+ claude_session_id: string | null;
61
123
  }
62
124
 
63
125
  /** Row shape for runs table as stored in SQLite (snake_case columns). */
@@ -91,7 +153,8 @@ CREATE TABLE IF NOT EXISTS sessions (
91
153
  escalation_level INTEGER NOT NULL DEFAULT 0,
92
154
  stalled_since TEXT,
93
155
  transcript_path TEXT,
94
- prompt_version TEXT
156
+ prompt_version TEXT,
157
+ claude_session_id TEXT
95
158
  )`;
96
159
 
97
160
  const CREATE_INDEXES = `
@@ -135,6 +198,7 @@ function rowToSession(row: SessionRow): AgentSession {
135
198
  stalledSince: row.stalled_since,
136
199
  transcriptPath: row.transcript_path,
137
200
  ...(row.prompt_version !== null ? { promptVersion: row.prompt_version } : {}),
201
+ ...(row.claude_session_id !== null ? { claudeSessionId: row.claude_session_id } : {}),
138
202
  };
139
203
  }
140
204
 
@@ -175,6 +239,18 @@ function migrateAddPromptVersion(db: Database): void {
175
239
  }
176
240
  }
177
241
 
242
+ /**
243
+ * Migrate an existing sessions table to add the claude_session_id column.
244
+ * Safe to call multiple times — only adds the column if it does not exist.
245
+ */
246
+ function migrateAddClaudeSessionId(db: Database): void {
247
+ const rows = db.prepare("PRAGMA table_info(sessions)").all() as Array<{ name: string }>;
248
+ const existingColumns = new Set(rows.map((r) => r.name));
249
+ if (!existingColumns.has("claude_session_id")) {
250
+ db.exec("ALTER TABLE sessions ADD COLUMN claude_session_id TEXT");
251
+ }
252
+ }
253
+
178
254
  /**
179
255
  * Migrate an existing sessions table from bead_id to task_id column.
180
256
  * Safe to call multiple times — only renames if bead_id exists and task_id does not.
@@ -209,6 +285,7 @@ export function createSessionStore(dbPath: string): SessionStore {
209
285
  migrateBeadIdToTaskId(db);
210
286
  migrateAddTranscriptPath(db);
211
287
  migrateAddPromptVersion(db);
288
+ migrateAddClaudeSessionId(db);
212
289
  migrateAddCoordinatorName(db);
213
290
 
214
291
  // Now safe to create indexes (all columns exist).
@@ -237,18 +314,19 @@ export function createSessionStore(dbPath: string): SessionStore {
237
314
  $stalled_since: string | null;
238
315
  $transcript_path: string | null;
239
316
  $prompt_version: string | null;
317
+ $claude_session_id: string | null;
240
318
  }
241
319
  >(`
242
320
  INSERT INTO sessions
243
321
  (id, agent_name, capability, worktree_path, branch_name, task_id,
244
322
  tmux_session, state, pid, parent_agent, depth, run_id,
245
323
  started_at, last_activity, escalation_level, stalled_since, transcript_path,
246
- prompt_version)
324
+ prompt_version, claude_session_id)
247
325
  VALUES
248
326
  ($id, $agent_name, $capability, $worktree_path, $branch_name, $task_id,
249
327
  $tmux_session, $state, $pid, $parent_agent, $depth, $run_id,
250
328
  $started_at, $last_activity, $escalation_level, $stalled_since, $transcript_path,
251
- $prompt_version)
329
+ $prompt_version, $claude_session_id)
252
330
  ON CONFLICT(agent_name) DO UPDATE SET
253
331
  id = excluded.id,
254
332
  capability = excluded.capability,
@@ -266,7 +344,8 @@ export function createSessionStore(dbPath: string): SessionStore {
266
344
  escalation_level = excluded.escalation_level,
267
345
  stalled_since = excluded.stalled_since,
268
346
  transcript_path = excluded.transcript_path,
269
- prompt_version = excluded.prompt_version
347
+ prompt_version = excluded.prompt_version,
348
+ claude_session_id = excluded.claude_session_id
270
349
  `);
271
350
 
272
351
  const getByNameStmt = db.prepare<SessionRow, { $agent_name: string }>(`
@@ -290,10 +369,39 @@ export function createSessionStore(dbPath: string): SessionStore {
290
369
  SELECT * FROM sessions WHERE run_id = $run_id ORDER BY started_at ASC
291
370
  `);
292
371
 
372
+ // Clear tmux_session when landing in a terminal state — the tmux session
373
+ // has already been torn down by ov stop / watchdog / coordinator cleanup,
374
+ // so the stored string is stale (overstory-14c0).
375
+ const terminalInList = TERMINAL_STATES.map((s) => `'${s}'`).join(",");
293
376
  const updateStateStmt = db.prepare<void, { $agent_name: string; $state: string }>(`
294
- UPDATE sessions SET state = $state WHERE agent_name = $agent_name
377
+ UPDATE sessions
378
+ SET state = $state,
379
+ tmux_session = CASE WHEN $state IN (${terminalInList}) THEN '' ELSE tmux_session END
380
+ WHERE agent_name = $agent_name
295
381
  `);
296
382
 
383
+ // Per-target-state CAS statements. The IN-list values come from a static
384
+ // matrix we control (TRANSITION_ALLOWED_FROM), so inlining as literals is
385
+ // safe and lets bun:sqlite re-use the prepared plan without dynamic params.
386
+ const tryTransitionStmts = (() => {
387
+ const stmts: Partial<
388
+ Record<AgentState, ReturnType<typeof db.prepare<void, { $agent_name: string }>>>
389
+ > = {};
390
+ const terminalSet = new Set<AgentState>(TERMINAL_STATES);
391
+ for (const target of Object.keys(TRANSITION_ALLOWED_FROM) as AgentState[]) {
392
+ const allowed = TRANSITION_ALLOWED_FROM[target];
393
+ if (allowed.length === 0) continue;
394
+ const inList = allowed.map((s) => `'${s}'`).join(",");
395
+ const setClause = terminalSet.has(target)
396
+ ? `state = '${target}', tmux_session = ''`
397
+ : `state = '${target}'`;
398
+ stmts[target] = db.prepare<void, { $agent_name: string }>(
399
+ `UPDATE sessions SET ${setClause} WHERE agent_name = $agent_name AND state IN (${inList})`,
400
+ );
401
+ }
402
+ return stmts;
403
+ })();
404
+
297
405
  const updateLastActivityStmt = db.prepare<void, { $agent_name: string; $last_activity: string }>(`
298
406
  UPDATE sessions SET last_activity = $last_activity WHERE agent_name = $agent_name
299
407
  `);
@@ -322,6 +430,13 @@ export function createSessionStore(dbPath: string): SessionStore {
322
430
  UPDATE sessions SET transcript_path = $transcript_path WHERE agent_name = $agent_name
323
431
  `);
324
432
 
433
+ const updateClaudeSessionIdStmt = db.prepare<
434
+ void,
435
+ { $agent_name: string; $claude_session_id: string }
436
+ >(`
437
+ UPDATE sessions SET claude_session_id = $claude_session_id WHERE agent_name = $agent_name
438
+ `);
439
+
325
440
  return {
326
441
  upsert(session: AgentSession): void {
327
442
  upsertStmt.run({
@@ -343,6 +458,7 @@ export function createSessionStore(dbPath: string): SessionStore {
343
458
  $stalled_since: session.stalledSince,
344
459
  $transcript_path: session.transcriptPath,
345
460
  $prompt_version: session.promptVersion ?? null,
461
+ $claude_session_id: session.claudeSessionId ?? null,
346
462
  });
347
463
  },
348
464
 
@@ -375,6 +491,37 @@ export function createSessionStore(dbPath: string): SessionStore {
375
491
  updateStateStmt.run({ $agent_name: agentName, $state: state });
376
492
  },
377
493
 
494
+ tryTransitionState(agentName: string, newState: AgentState): TransitionOutcome {
495
+ // Read prev for diagnostic accuracy before the CAS. The read is racy
496
+ // against another writer landing first, but the CAS that follows is
497
+ // authoritative — `changes === 0` means the CAS rejected against
498
+ // whatever the row holds NOW, regardless of what we read here.
499
+ const before = getByNameStmt.get({ $agent_name: agentName });
500
+ if (before === null) {
501
+ return { ok: false, reason: "not_found", attempted: newState };
502
+ }
503
+ const stmt = tryTransitionStmts[newState];
504
+ if (stmt !== undefined) {
505
+ const result = stmt.run({ $agent_name: agentName });
506
+ if (result.changes > 0) {
507
+ return { ok: true, prev: before.state as AgentState, next: newState };
508
+ }
509
+ }
510
+ // CAS rejected (or no stmt for this target, e.g. booting). Re-read to
511
+ // report the state that actually blocked us — another writer may have
512
+ // landed between our `before` read and the CAS.
513
+ const after = getByNameStmt.get({ $agent_name: agentName });
514
+ if (after === null) {
515
+ return { ok: false, reason: "not_found", attempted: newState };
516
+ }
517
+ return {
518
+ ok: false,
519
+ reason: "illegal_transition",
520
+ prev: after.state as AgentState,
521
+ attempted: newState,
522
+ };
523
+ },
524
+
378
525
  updateLastActivity(agentName: string): void {
379
526
  updateLastActivityStmt.run({
380
527
  $agent_name: agentName,
@@ -394,6 +541,10 @@ export function createSessionStore(dbPath: string): SessionStore {
394
541
  updateTranscriptPathStmt.run({ $agent_name: agentName, $transcript_path: path });
395
542
  },
396
543
 
544
+ updateClaudeSessionId(agentName: string, sessionId: string): void {
545
+ updateClaudeSessionIdStmt.run({ $agent_name: agentName, $claude_session_id: sessionId });
546
+ },
547
+
397
548
  remove(agentName: string): void {
398
549
  removeStmt.run({ $agent_name: agentName });
399
550
  },
@@ -473,7 +624,12 @@ export function createRunStore(dbPath: string): RunStore {
473
624
  db.exec("PRAGMA synchronous = NORMAL");
474
625
  db.exec("PRAGMA busy_timeout = 5000");
475
626
 
476
- // Create schema (idempotent — safe if SessionStore already created these)
627
+ // Create schema (idempotent — safe if SessionStore already created these).
628
+ // `agent_count` is derived from the sessions table at read time, so the
629
+ // sessions table must exist when the run-read statements are prepared
630
+ // — even if the caller only opens a RunStore and never opens a SessionStore.
631
+ db.exec(CREATE_TABLE);
632
+ db.exec(CREATE_INDEXES);
477
633
  db.exec(CREATE_RUNS_TABLE);
478
634
 
479
635
  // Migrate: add coordinator_name column BEFORE creating indexes that reference it.
@@ -499,26 +655,35 @@ export function createRunStore(dbPath: string): RunStore {
499
655
  VALUES ($id, $started_at, $completed_at, $agent_count, $coordinator_session_id, $coordinator_name, $status)
500
656
  `);
501
657
 
658
+ // `agent_count` is derived from the sessions table at read time rather than
659
+ // read from the column. The cached column value drifted because only sling
660
+ // incremented it — coordinator startup never did, so for every run with a
661
+ // coordinator the count was off by one (overstory-8e69). Sourcing from
662
+ // sessions makes the count match `SELECT * FROM sessions WHERE run_id = ?`
663
+ // and removes the writer/reader asymmetry. The column is still written so
664
+ // older overstory binaries pointed at the same db can keep functioning.
665
+ const RUN_COLUMNS = `
666
+ id, started_at, completed_at,
667
+ (SELECT COUNT(*) FROM sessions WHERE sessions.run_id = runs.id) AS agent_count,
668
+ coordinator_session_id, coordinator_name, status
669
+ `;
670
+
502
671
  const getRunStmt = db.prepare<RunRow, { $id: string }>(`
503
- SELECT * FROM runs WHERE id = $id
672
+ SELECT ${RUN_COLUMNS} FROM runs WHERE id = $id
504
673
  `);
505
674
 
506
675
  const getActiveRunStmt = db.prepare<RunRow, Record<string, never>>(`
507
- SELECT * FROM runs WHERE status = 'active'
676
+ SELECT ${RUN_COLUMNS} FROM runs WHERE status = 'active'
508
677
  ORDER BY started_at DESC
509
678
  LIMIT 1
510
679
  `);
511
680
 
512
681
  const getActiveRunForCoordinatorStmt = db.prepare<RunRow, { $coordinator_name: string }>(`
513
- SELECT * FROM runs WHERE status = 'active' AND coordinator_name = $coordinator_name
682
+ SELECT ${RUN_COLUMNS} FROM runs WHERE status = 'active' AND coordinator_name = $coordinator_name
514
683
  ORDER BY started_at DESC
515
684
  LIMIT 1
516
685
  `);
517
686
 
518
- const incrementAgentCountStmt = db.prepare<void, { $id: string }>(`
519
- UPDATE runs SET agent_count = agent_count + 1 WHERE id = $id
520
- `);
521
-
522
687
  const completeRunStmt = db.prepare<
523
688
  void,
524
689
  { $id: string; $status: string; $completed_at: string }
@@ -565,15 +730,15 @@ export function createRunStore(dbPath: string): RunStore {
565
730
 
566
731
  const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
567
732
  const limitClause = opts?.limit !== undefined ? `LIMIT ${opts.limit}` : "";
568
- const query = `SELECT * FROM runs ${whereClause} ORDER BY started_at DESC ${limitClause}`;
733
+ const query = `SELECT ${RUN_COLUMNS} FROM runs ${whereClause} ORDER BY started_at DESC ${limitClause}`;
569
734
 
570
735
  const rows = db.prepare<RunRow, Record<string, string | number>>(query).all(params);
571
736
  return rows.map(rowToRun);
572
737
  },
573
738
 
574
- incrementAgentCount(runId: string): void {
575
- incrementAgentCountStmt.run({ $id: runId });
576
- },
739
+ // Kept for API stability but a no-op: `agent_count` is now derived from
740
+ // the sessions table on every read (see RUN_COLUMNS above).
741
+ incrementAgentCount(_runId: string): void {},
577
742
 
578
743
  completeRun(runId: string, status: "completed" | "failed"): void {
579
744
  completeRunStmt.run({
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Regression test for overstory-6d42: bun test must not be redirectable to a
3
+ * real .overstory/ via inherited OVERSTORY_PROJECT_ROOT (or sibling) env vars.
4
+ *
5
+ * The preload in bunfig.toml runs src/test-setup.ts before any test loads,
6
+ * deleting OVERSTORY_* env vars and clearing the project-root override. By
7
+ * the time this test executes, those values must already be gone — even if a
8
+ * worker agent's environment had them set when bun test was invoked.
9
+ */
10
+
11
+ import { expect, test } from "bun:test";
12
+ import { getProjectRootOverride } from "./config.ts";
13
+
14
+ const ENV_KEYS = [
15
+ "OVERSTORY_PROJECT_ROOT",
16
+ "OVERSTORY_AGENT_NAME",
17
+ "OVERSTORY_WORKTREE_PATH",
18
+ "OVERSTORY_TASK_ID",
19
+ "OVERSTORY_PROFILE",
20
+ "OVERSTORY_RUN_ID",
21
+ ] as const;
22
+
23
+ for (const key of ENV_KEYS) {
24
+ test(`${key} is unset by the test preload`, () => {
25
+ expect(process.env[key]).toBeUndefined();
26
+ });
27
+ }
28
+
29
+ test("project-root override is cleared by the test preload", () => {
30
+ expect(getProjectRootOverride()).toBeUndefined();
31
+ });
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Global test preload (referenced from bunfig.toml [test] preload).
3
+ *
4
+ * Prevents test runs from leaking into a real .overstory/ when bun test is
5
+ * executed inside an agent worktree (where ov sling injects OVERSTORY_PROJECT_ROOT
6
+ * into the spawned process — see src/commands/sling.ts:928).
7
+ *
8
+ * Without this preload, resolveProjectRoot() short-circuits to the env var
9
+ * before consulting the per-test temp dir, so tests calling cleanCommand,
10
+ * coordinatorCommand, mailCommand, etc. silently target the live project.
11
+ * That's how overstory-6d42 contamination occurred: a worker agent ran
12
+ * bun test, clean.test.ts wiped the live .overstory/, coordinator.test.ts
13
+ * left dozens of bogus runs, and mail.test.ts inserted fixture messages.
14
+ *
15
+ * Tests that need OVERSTORY_PROJECT_ROOT set (e.g. config.test.ts) set it
16
+ * explicitly inside the test body and restore it in afterEach.
17
+ */
18
+
19
+ import { clearProjectRootOverride } from "./config.ts";
20
+
21
+ delete process.env.OVERSTORY_PROJECT_ROOT;
22
+ delete process.env.OVERSTORY_AGENT_NAME;
23
+ delete process.env.OVERSTORY_WORKTREE_PATH;
24
+ delete process.env.OVERSTORY_TASK_ID;
25
+ delete process.env.OVERSTORY_PROFILE;
26
+ delete process.env.OVERSTORY_RUN_ID;
27
+
28
+ clearProjectRootOverride();
package/src/types.ts CHANGED
@@ -108,6 +108,7 @@ export interface OverstoryConfig {
108
108
  rpcTimeoutMs?: number; // Timeout for RPC getState() calls (default 5_000)
109
109
  triageTimeoutMs?: number; // Timeout for Tier 1 AI triage calls (default 30_000)
110
110
  maxEscalationLevel?: number; // Maximum escalation level before termination (default 3)
111
+ notifyParentOnDeath?: boolean; // Send synthetic worker_died mail to parent on watchdog termination (default true)
111
112
  };
112
113
  models: Partial<Record<string, ModelRef>>;
113
114
  logging: {
@@ -141,6 +142,13 @@ export interface OverstoryConfig {
141
142
  * Default: 0 (no delay).
142
143
  */
143
144
  shellInitDelayMs?: number;
145
+ /**
146
+ * Project-level default for spawning Claude Code agents in headless mode
147
+ * (Bun.spawn + stream-json) instead of the tmux interactive runtime.
148
+ * Per-spawn `--headless` / `--no-headless` flags on `ov sling` override this.
149
+ * Default: false (tmux).
150
+ */
151
+ claudeHeadlessByDefault?: boolean;
144
152
  };
145
153
  }
146
154
 
@@ -181,6 +189,22 @@ export type Capability = (typeof SUPPORTED_CAPABILITIES)[number];
181
189
 
182
190
  export type AgentState = "booting" | "working" | "completed" | "stalled" | "zombie";
183
191
 
192
+ /**
193
+ * Result of a guarded state transition attempt (`SessionStore.tryTransitionState`).
194
+ *
195
+ * Discriminated by `ok`. When `ok` is false, `reason` distinguishes:
196
+ * - `not_found`: no session exists for the given name.
197
+ * - `illegal_transition`: a session exists but the matrix forbids prev → attempted.
198
+ *
199
+ * `prev` is always the state observed by the SQL CAS. For `illegal_transition` it
200
+ * is the state that blocked the write (which may differ from what the caller read,
201
+ * if another writer landed first).
202
+ */
203
+ export type TransitionOutcome =
204
+ | { ok: true; prev: AgentState; next: AgentState }
205
+ | { ok: false; reason: "not_found"; attempted: AgentState }
206
+ | { ok: false; reason: "illegal_transition"; prev: AgentState; attempted: AgentState };
207
+
184
208
  export interface AgentSession {
185
209
  id: string; // Unique session ID
186
210
  agentName: string; // Unique per-session name
@@ -200,6 +224,7 @@ export interface AgentSession {
200
224
  stalledSince: string | null; // ISO timestamp when agent first entered stalled state
201
225
  transcriptPath: string | null; // Runtime-provided transcript JSONL path (decoupled from ~/.claude/)
202
226
  promptVersion?: string | null; // Canopy prompt version used at sling time (e.g. "builder@17")
227
+ claudeSessionId?: string | null; // Runtime-provided session_id (Claude stream-json), eagerly pinned on first event
203
228
  }
204
229
 
205
230
  // === Agent Identity ===
@@ -225,6 +250,7 @@ export type MailSemanticType = "status" | "question" | "result" | "error";
225
250
  /** Protocol message types for structured agent coordination. */
226
251
  export type MailProtocolType =
227
252
  | "worker_done"
253
+ | "worker_died"
228
254
  | "merge_ready"
229
255
  | "merged"
230
256
  | "merge_failed"
@@ -244,6 +270,7 @@ export const MAIL_MESSAGE_TYPES: readonly MailMessageType[] = [
244
270
  "result",
245
271
  "error",
246
272
  "worker_done",
273
+ "worker_died",
247
274
  "merge_ready",
248
275
  "merged",
249
276
  "merge_failed",
@@ -278,6 +305,33 @@ export interface WorkerDonePayload {
278
305
  filesModified: string[];
279
306
  }
280
307
 
308
+ /**
309
+ * Watchdog signals the parent that one of its children was terminated.
310
+ *
311
+ * Synthetic mail injected by the Tier 0 daemon when it transitions a worker
312
+ * to `zombie` (overstory-c111). Without this, the parent — typically a lead
313
+ * waiting for `worker_done` from this child — would block indefinitely on
314
+ * mail that will never arrive. The parent reads this on its next mail-injector
315
+ * tick and decides whether to retry, escalate, or report up.
316
+ */
317
+ export interface WorkerDiedPayload {
318
+ agentName: string;
319
+ capability: string;
320
+ taskId: string;
321
+ /** Reason the watchdog or runner terminated the child (e.g. "Process terminated"). */
322
+ reason: string;
323
+ /** ISO timestamp of the child's last observed activity. */
324
+ lastActivity: string;
325
+ /**
326
+ * Source that detected the failure.
327
+ * - `tier0`/`tier1`: watchdog daemon detected a dead/stuck process out-of-band.
328
+ * - `runner`: the per-turn runner observed an in-band failure — either an
329
+ * abort/stall that forced SIGTERM/SIGKILL, or a clean exit without the
330
+ * capability's terminal mail (silent-no-op, overstory-4159 / overstory-c772).
331
+ */
332
+ terminatedBy: "tier0" | "tier1" | "runner";
333
+ }
334
+
281
335
  /** Supervisor signals branch is verified and ready for merge. */
282
336
  export interface MergeReadyPayload {
283
337
  branch: string;
@@ -349,6 +403,7 @@ export interface DecisionGatePayload {
349
403
  /** Maps protocol message types to their payload interfaces. */
350
404
  export interface MailPayloadMap {
351
405
  worker_done: WorkerDonePayload;
406
+ worker_died: WorkerDiedPayload;
352
407
  merge_ready: MergeReadyPayload;
353
408
  merged: MergedPayload;
354
409
  merge_failed: MergeFailedPayload;
@@ -446,7 +501,7 @@ export interface HealthCheck {
446
501
  pidAlive: boolean | null; // null when pid is unavailable
447
502
  lastActivity: string;
448
503
  state: AgentState;
449
- action: "none" | "escalate" | "terminate" | "investigate";
504
+ action: "none" | "escalate" | "terminate" | "investigate" | "complete";
450
505
  /** Describes any conflict between observable state and recorded state. */
451
506
  reconciliationNote: string | null;
452
507
  }
@@ -3,7 +3,7 @@ import { mkdtemp } from "node:fs/promises";
3
3
  import { tmpdir } from "node:os";
4
4
  import { join } from "node:path";
5
5
  import { cleanupTempDir } from "../test-helpers.ts";
6
- import { readPidFile, removePidFile, writePidFile } from "./pid.ts";
6
+ import { acquirePidLock, readPidFile, removePidFile, writePidFile } from "./pid.ts";
7
7
 
8
8
  let tempDir: string;
9
9
 
@@ -66,3 +66,87 @@ describe("removePidFile", () => {
66
66
  // No throw = pass
67
67
  });
68
68
  });
69
+
70
+ describe("acquirePidLock", () => {
71
+ const alwaysAlive = (_pid: number) => true;
72
+ const alwaysDead = (_pid: number) => false;
73
+
74
+ test("acquires when no lock file exists", async () => {
75
+ const pidPath = join(tempDir, "lock.pid");
76
+ const result = await acquirePidLock(pidPath, 1234, alwaysAlive);
77
+ expect(result.acquired).toBe(true);
78
+ expect(await readPidFile(pidPath)).toBe(1234);
79
+ });
80
+
81
+ test("creates parent directory if missing", async () => {
82
+ const pidPath = join(tempDir, "nested", "deeper", "lock.pid");
83
+ const result = await acquirePidLock(pidPath, 555, alwaysAlive);
84
+ expect(result.acquired).toBe(true);
85
+ expect(await readPidFile(pidPath)).toBe(555);
86
+ });
87
+
88
+ test("refuses when a live foreign PID owns the lock", async () => {
89
+ const pidPath = join(tempDir, "lock.pid");
90
+ await Bun.write(pidPath, "9999\n");
91
+ const result = await acquirePidLock(pidPath, 1234, alwaysAlive);
92
+ expect(result.acquired).toBe(false);
93
+ if (!result.acquired) {
94
+ expect(result.existingPid).toBe(9999);
95
+ }
96
+ // File untouched.
97
+ expect(await readPidFile(pidPath)).toBe(9999);
98
+ });
99
+
100
+ test("idempotent when file already contains caller's own PID", async () => {
101
+ const pidPath = join(tempDir, "lock.pid");
102
+ await Bun.write(pidPath, "1234\n");
103
+ // alwaysAlive would say 1234 is alive, but acquirePidLock should detect
104
+ // own-PID first and accept.
105
+ const result = await acquirePidLock(pidPath, 1234, alwaysAlive);
106
+ expect(result.acquired).toBe(true);
107
+ expect(await readPidFile(pidPath)).toBe(1234);
108
+ });
109
+
110
+ test("reclaims stale lock with dead PID", async () => {
111
+ const pidPath = join(tempDir, "lock.pid");
112
+ await Bun.write(pidPath, "9999\n");
113
+ const result = await acquirePidLock(pidPath, 1234, alwaysDead);
114
+ expect(result.acquired).toBe(true);
115
+ expect(await readPidFile(pidPath)).toBe(1234);
116
+ });
117
+
118
+ test("reclaims unreadable/corrupted lock file", async () => {
119
+ const pidPath = join(tempDir, "lock.pid");
120
+ await Bun.write(pidPath, "garbage-not-a-pid\n");
121
+ const result = await acquirePidLock(pidPath, 1234, alwaysAlive);
122
+ expect(result.acquired).toBe(true);
123
+ expect(await readPidFile(pidPath)).toBe(1234);
124
+ });
125
+
126
+ test("two simultaneous acquirers — only one wins", async () => {
127
+ const pidPath = join(tempDir, "lock.pid");
128
+ const [a, b] = await Promise.all([
129
+ acquirePidLock(pidPath, 1111, alwaysAlive),
130
+ acquirePidLock(pidPath, 2222, alwaysAlive),
131
+ ]);
132
+ const winners = [a, b].filter((r) => r.acquired);
133
+ const losers = [a, b].filter((r) => !r.acquired);
134
+ expect(winners.length).toBe(1);
135
+ expect(losers.length).toBe(1);
136
+ const loser = losers[0];
137
+ if (loser && !loser.acquired) {
138
+ expect([1111, 2222]).toContain(loser.existingPid);
139
+ }
140
+ });
141
+
142
+ test("two simultaneous acquirers — file content matches the winner", async () => {
143
+ const pidPath = join(tempDir, "lock.pid");
144
+ const [a, b] = await Promise.all([
145
+ acquirePidLock(pidPath, 1111, alwaysAlive),
146
+ acquirePidLock(pidPath, 2222, alwaysAlive),
147
+ ]);
148
+ const fileContent = await readPidFile(pidPath);
149
+ const winnerPid = a.acquired ? 1111 : b.acquired ? 2222 : -1;
150
+ expect(fileContent).toBe(winnerPid);
151
+ });
152
+ });