@os-eco/overstory-cli 0.8.6 → 0.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -19,6 +19,7 @@ Requires [Bun](https://bun.sh) v1.0+, git, and tmux. At least one supported agen
19
19
  - [GitHub Copilot](https://github.com/features/copilot) (`copilot` CLI)
20
20
  - [Codex](https://github.com/openai/codex) (`codex` CLI)
21
21
  - [Gemini CLI](https://github.com/google-gemini/gemini-cli) (`gemini` CLI)
22
+ - [Cursor CLI](https://cursor.com/docs/cli/overview) (`agent` CLI)
22
23
  - [Sapling](https://github.com/jayminwest/sapling) (`sp` CLI)
23
24
  - [OpenCode](https://opencode.ai) (`opencode` CLI)
24
25
 
@@ -177,14 +178,16 @@ Overstory uses instruction overlays and tool-call guards to turn agent sessions
177
178
 
178
179
  Overstory is runtime-agnostic. The `AgentRuntime` interface (`src/runtimes/types.ts`) defines the contract — each adapter handles spawning, config deployment, guard enforcement, readiness detection, and transcript parsing for its runtime. Set the default in `config.yaml` or override per-agent with `ov sling --runtime <name>`.
179
180
 
180
- | Runtime | CLI | Guard Mechanism | Status |
181
- |---------|-----|-----------------|--------|
181
+ | Runtime | CLI | Guard Mechanism | Stability |
182
+ |---------|-----|-----------------|-----------|
182
183
  | Claude Code | `claude` | `settings.local.json` hooks | Stable |
183
- | Pi | `pi` | `.pi/extensions/` guard extension | Active development |
184
- | Copilot | `copilot` | (none `--allow-all-tools`) | Active development |
185
- | Codex | `codex` | OS-level sandbox (Seatbelt/Landlock) | Active development |
186
- | Gemini | `gemini` | `--sandbox` flag | Active development |
187
- | Sapling | `sp` | `.sapling/guards.json` | Active development |
184
+ | Sapling | `sp` | `.sapling/guards.json` | Stable |
185
+ | Pi | `pi` | `.pi/extensions/` guard extension | Experimental |
186
+ | Copilot | `copilot` | (none `--allow-all-tools`) | Experimental |
187
+ | Cursor | `agent` | (none — `--yolo`) | Experimental |
188
+ | Codex | `codex` | OS-level sandbox (Seatbelt/Landlock) | Experimental |
189
+ | Gemini | `gemini` | `--sandbox` flag | Experimental |
190
+ | OpenCode | `opencode` | (none) | Experimental |
188
191
 
189
192
  ## How It Works
190
193
 
@@ -284,7 +287,7 @@ overstory/
284
287
  metrics/ SQLite metrics + pricing + transcript parsing
285
288
  doctor/ Health check modules (11 checks)
286
289
  insights/ Session insight analyzer for auto-expertise
287
- runtimes/ AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex, Gemini, Sapling, OpenCode)
290
+ runtimes/ AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex, Gemini, Sapling, OpenCode, Cursor)
288
291
  tracker/ Pluggable task tracker (beads + seeds backends)
289
292
  mulch/ mulch client (programmatic API + CLI wrapper)
290
293
  e2e/ End-to-end lifecycle tests
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@os-eco/overstory-cli",
3
- "version": "0.8.6",
3
+ "version": "0.8.7",
4
4
  "description": "Multi-agent orchestration for AI coding agents — spawn workers in git worktrees via tmux, coordinate through SQLite mail, merge with tiered conflict resolution. Pluggable runtime adapters for Claude Code, Pi, and more.",
5
5
  "author": "Jaymin West",
6
6
  "license": "MIT",
@@ -8,6 +8,7 @@ import {
8
8
  buildBashFileGuardScript,
9
9
  buildBashPathBoundaryScript,
10
10
  buildPathBoundaryGuardScript,
11
+ buildTrackerCloseGuardScript,
11
12
  deployHooks,
12
13
  escapeForSingleQuotedShell,
13
14
  extractQualityGatePrefixes,
@@ -15,6 +16,7 @@ import {
15
16
  getCapabilityGuards,
16
17
  getDangerGuards,
17
18
  getPathBoundaryGuards,
19
+ getTrackerCloseGuards,
18
20
  isOverstoryHookEntry,
19
21
  PATH_PREFIX,
20
22
  } from "./hooks-deployer.ts";
@@ -468,9 +470,9 @@ describe("deployHooks", () => {
468
470
  expect(writeBlockGuard).toBeDefined();
469
471
  expect(writeBlockGuard.hooks[0].command).toContain('"decision":"block"');
470
472
 
471
- // Should have multiple Bash guards: danger guard + file guard + universal push guard
473
+ // Should have multiple Bash guards: danger guard + file guard + tracker close guard + universal push guard
472
474
  const bashGuards = preToolUse.filter((h: { matcher: string }) => h.matcher === "Bash");
473
- expect(bashGuards.length).toBe(3); // danger guard + file guard + universal push guard
475
+ expect(bashGuards.length).toBe(4); // danger guard + file guard + tracker close guard + universal push guard
474
476
  });
475
477
 
476
478
  test("reviewer capability adds same guards as scout", async () => {
@@ -512,9 +514,9 @@ describe("deployHooks", () => {
512
514
  expect(guardMatchers).toContain("NotebookEdit");
513
515
  expect(guardMatchers).toContain("Bash");
514
516
 
515
- // Should have 3 Bash guards: danger guard + file guard + universal push guard
517
+ // Should have 4 Bash guards: danger guard + file guard + tracker close guard + universal push guard
516
518
  const bashGuards = preToolUse.filter((h: { matcher: string }) => h.matcher === "Bash");
517
- expect(bashGuards.length).toBe(3);
519
+ expect(bashGuards.length).toBe(4);
518
520
  });
519
521
 
520
522
  test("builder capability gets path boundary + Bash danger + Bash path boundary guards + native team tool blocks", async () => {
@@ -544,9 +546,9 @@ describe("deployHooks", () => {
544
546
  expect(writeGuards[0].hooks[0].command).toContain("OVERSTORY_WORKTREE_PATH");
545
547
  expect(writeGuards[0].hooks[0].command).not.toContain("cannot modify files");
546
548
 
547
- // Builder should have 3 Bash guards: danger guard + path boundary guard + universal push guard
549
+ // Builder should have 4 Bash guards: danger guard + path boundary guard + tracker close guard + universal push guard
548
550
  const bashGuards = preToolUse.filter((h: { matcher: string }) => h.matcher === "Bash");
549
- expect(bashGuards.length).toBe(3);
551
+ expect(bashGuards.length).toBe(4);
550
552
  // One should be the danger guard (checks git push)
551
553
  const dangerGuard = bashGuards.find(
552
554
  (h: { hooks: Array<{ command: string }> }) =>
@@ -1607,7 +1609,7 @@ describe("structural enforcement integration", () => {
1607
1609
 
1608
1610
  // Find the bash file guard (the second Bash entry, after the danger guard)
1609
1611
  const bashGuards = preToolUse.filter((h: { matcher: string }) => h.matcher === "Bash");
1610
- expect(bashGuards.length).toBe(3);
1612
+ expect(bashGuards.length).toBe(4);
1611
1613
 
1612
1614
  // The file guard (second Bash guard) should whitelist git add/commit
1613
1615
  const fileGuard = bashGuards[1];
@@ -2070,8 +2072,8 @@ describe("bash path boundary integration", () => {
2070
2072
  const preToolUse = parsed.hooks.PreToolUse;
2071
2073
 
2072
2074
  const bashGuards = preToolUse.filter((h: { matcher: string }) => h.matcher === "Bash");
2073
- // Should have 3 Bash guards: danger guard + path boundary guard + universal push guard
2074
- expect(bashGuards.length).toBe(3);
2075
+ // Should have 4 Bash guards: danger guard + path boundary guard + tracker close guard + universal push guard
2076
+ expect(bashGuards.length).toBe(4);
2075
2077
 
2076
2078
  // Find the path boundary guard
2077
2079
  const pathGuard = bashGuards.find((h: { hooks: Array<{ command: string }> }) =>
@@ -2092,7 +2094,7 @@ describe("bash path boundary integration", () => {
2092
2094
  const preToolUse = parsed.hooks.PreToolUse;
2093
2095
 
2094
2096
  const bashGuards = preToolUse.filter((h: { matcher: string }) => h.matcher === "Bash");
2095
- expect(bashGuards.length).toBe(3);
2097
+ expect(bashGuards.length).toBe(4);
2096
2098
 
2097
2099
  const pathGuard = bashGuards.find((h: { hooks: Array<{ command: string }> }) =>
2098
2100
  h.hooks[0]?.command?.includes("Bash path boundary violation"),
@@ -2110,9 +2112,9 @@ describe("bash path boundary integration", () => {
2110
2112
  const parsed = JSON.parse(content);
2111
2113
  const preToolUse = parsed.hooks.PreToolUse;
2112
2114
 
2113
- // Scout gets danger guard + file guard + universal push guard (3 Bash guards), but NOT path boundary
2115
+ // Scout gets danger guard + file guard + tracker close guard + universal push guard (4 Bash guards), but NOT path boundary
2114
2116
  const bashGuards = preToolUse.filter((h: { matcher: string }) => h.matcher === "Bash");
2115
- expect(bashGuards.length).toBe(3);
2117
+ expect(bashGuards.length).toBe(4);
2116
2118
 
2117
2119
  const pathGuard = bashGuards.find((h: { hooks: Array<{ command: string }> }) =>
2118
2120
  h.hooks[0]?.command?.includes("Bash path boundary violation"),
@@ -2401,6 +2403,177 @@ describe("PATH prefix in deployed hooks", () => {
2401
2403
  });
2402
2404
  });
2403
2405
 
2406
+ describe("buildTrackerCloseGuardScript", () => {
2407
+ test("returns a string containing key patterns", () => {
2408
+ const script = buildTrackerCloseGuardScript();
2409
+ expect(typeof script).toBe("string");
2410
+ expect(script.length).toBeGreaterThan(0);
2411
+ expect(script).toContain("sd");
2412
+ expect(script).toContain("bd");
2413
+ expect(script).toContain("close");
2414
+ expect(script).toContain("update");
2415
+ });
2416
+
2417
+ test("contains ENV_GUARD prefix", () => {
2418
+ const script = buildTrackerCloseGuardScript();
2419
+ expect(script).toContain('[ -z "$OVERSTORY_AGENT_NAME" ] && exit 0;');
2420
+ });
2421
+
2422
+ test("contains OVERSTORY_TASK_ID early-exit check", () => {
2423
+ const script = buildTrackerCloseGuardScript();
2424
+ expect(script).toContain('[ -z "$OVERSTORY_TASK_ID" ] && exit 0;');
2425
+ });
2426
+
2427
+ test("blocks sd close with wrong ID", async () => {
2428
+ const script = buildTrackerCloseGuardScript();
2429
+ const input = JSON.stringify({ command: "sd close other-task" });
2430
+ const proc = Bun.spawn(["sh", "-c", script], {
2431
+ stdin: new TextEncoder().encode(input),
2432
+ stdout: "pipe",
2433
+ stderr: "pipe",
2434
+ env: { ...process.env, OVERSTORY_AGENT_NAME: "test-agent", OVERSTORY_TASK_ID: "my-task" },
2435
+ });
2436
+ const output = await new Response(proc.stdout).text();
2437
+ await proc.exited;
2438
+ const parsed = JSON.parse(output.trim());
2439
+ expect(parsed.decision).toBe("block");
2440
+ expect(parsed.reason).toContain("other-task");
2441
+ expect(parsed.reason).toContain("my-task");
2442
+ });
2443
+
2444
+ test("allows sd close with matching ID", async () => {
2445
+ const script = buildTrackerCloseGuardScript();
2446
+ const input = JSON.stringify({ command: "sd close my-task" });
2447
+ const proc = Bun.spawn(["sh", "-c", script], {
2448
+ stdin: new TextEncoder().encode(input),
2449
+ stdout: "pipe",
2450
+ stderr: "pipe",
2451
+ env: { ...process.env, OVERSTORY_AGENT_NAME: "test-agent", OVERSTORY_TASK_ID: "my-task" },
2452
+ });
2453
+ const output = await new Response(proc.stdout).text();
2454
+ await proc.exited;
2455
+ expect(output.trim()).toBe("");
2456
+ });
2457
+
2458
+ test("blocks bd close with wrong ID", async () => {
2459
+ const script = buildTrackerCloseGuardScript();
2460
+ const input = JSON.stringify({ command: "bd close other-task" });
2461
+ const proc = Bun.spawn(["sh", "-c", script], {
2462
+ stdin: new TextEncoder().encode(input),
2463
+ stdout: "pipe",
2464
+ stderr: "pipe",
2465
+ env: { ...process.env, OVERSTORY_AGENT_NAME: "test-agent", OVERSTORY_TASK_ID: "my-task" },
2466
+ });
2467
+ const output = await new Response(proc.stdout).text();
2468
+ await proc.exited;
2469
+ const parsed = JSON.parse(output.trim());
2470
+ expect(parsed.decision).toBe("block");
2471
+ expect(parsed.reason).toContain("other-task");
2472
+ });
2473
+
2474
+ test("blocks sd update --status with wrong ID", async () => {
2475
+ const script = buildTrackerCloseGuardScript();
2476
+ const input = JSON.stringify({ command: "sd update other-task --status in_progress" });
2477
+ const proc = Bun.spawn(["sh", "-c", script], {
2478
+ stdin: new TextEncoder().encode(input),
2479
+ stdout: "pipe",
2480
+ stderr: "pipe",
2481
+ env: { ...process.env, OVERSTORY_AGENT_NAME: "test-agent", OVERSTORY_TASK_ID: "my-task" },
2482
+ });
2483
+ const output = await new Response(proc.stdout).text();
2484
+ await proc.exited;
2485
+ const parsed = JSON.parse(output.trim());
2486
+ expect(parsed.decision).toBe("block");
2487
+ expect(parsed.reason).toContain("other-task");
2488
+ });
2489
+
2490
+ test("exits early when OVERSTORY_TASK_ID is empty (coordinator/monitor)", async () => {
2491
+ const script = buildTrackerCloseGuardScript();
2492
+ const input = JSON.stringify({ command: "sd close coordinator-task" });
2493
+ const proc = Bun.spawn(["sh", "-c", script], {
2494
+ stdin: new TextEncoder().encode(input),
2495
+ stdout: "pipe",
2496
+ stderr: "pipe",
2497
+ env: { ...process.env, OVERSTORY_AGENT_NAME: "coordinator", OVERSTORY_TASK_ID: "" },
2498
+ });
2499
+ const output = await new Response(proc.stdout).text();
2500
+ await proc.exited;
2501
+ expect(output.trim()).toBe("");
2502
+ });
2503
+ });
2504
+
2505
+ describe("getTrackerCloseGuards", () => {
2506
+ test("returns exactly 1 Bash guard entry", () => {
2507
+ const guards = getTrackerCloseGuards();
2508
+ expect(guards).toHaveLength(1);
2509
+ expect(guards[0]?.matcher).toBe("Bash");
2510
+ });
2511
+
2512
+ test("guard hook type is command", () => {
2513
+ const guards = getTrackerCloseGuards();
2514
+ expect(guards[0]?.hooks[0]?.type).toBe("command");
2515
+ });
2516
+
2517
+ test("guard command contains OVERSTORY_TASK_ID check", () => {
2518
+ const guards = getTrackerCloseGuards();
2519
+ const command = guards[0]?.hooks[0]?.command ?? "";
2520
+ expect(command).toContain("OVERSTORY_TASK_ID");
2521
+ });
2522
+
2523
+ test("guard command includes ENV_GUARD prefix", () => {
2524
+ const guards = getTrackerCloseGuards();
2525
+ const command = guards[0]?.hooks[0]?.command ?? "";
2526
+ expect(command).toContain('[ -z "$OVERSTORY_AGENT_NAME" ] && exit 0;');
2527
+ });
2528
+ });
2529
+
2530
+ describe("deployHooks tracker close guard integration", () => {
2531
+ let tempDir: string;
2532
+
2533
+ beforeEach(async () => {
2534
+ tempDir = await mkdtemp(join(tmpdir(), "overstory-tracker-close-test-"));
2535
+ });
2536
+
2537
+ afterEach(async () => {
2538
+ await cleanupTempDir(tempDir);
2539
+ });
2540
+
2541
+ test("deployHooks includes tracker close guard in PreToolUse for builder", async () => {
2542
+ const worktreePath = join(tempDir, "builder-tc-wt");
2543
+ await deployHooks(worktreePath, "builder-tc", "builder");
2544
+
2545
+ const content = await Bun.file(join(worktreePath, ".claude", "settings.local.json")).text();
2546
+ const parsed = JSON.parse(content);
2547
+ const preToolUse = parsed.hooks.PreToolUse;
2548
+
2549
+ const trackerGuard = preToolUse.find(
2550
+ (h: { matcher: string; hooks: Array<{ command: string }> }) =>
2551
+ h.matcher === "Bash" && h.hooks[0]?.command?.includes("OVERSTORY_TASK_ID"),
2552
+ );
2553
+ expect(trackerGuard).toBeDefined();
2554
+ expect(trackerGuard.hooks[0].command).toContain("OVERSTORY_TASK_ID");
2555
+ });
2556
+
2557
+ test("deployHooks includes tracker close guard in PreToolUse for all capabilities", async () => {
2558
+ const capabilities = ["builder", "scout", "reviewer", "lead", "merger", "coordinator"];
2559
+
2560
+ for (const cap of capabilities) {
2561
+ const wt = join(tempDir, `${cap}-tc-wt`);
2562
+ await deployHooks(wt, `${cap}-tc`, cap);
2563
+
2564
+ const content = await Bun.file(join(wt, ".claude", "settings.local.json")).text();
2565
+ const parsed = JSON.parse(content);
2566
+ const preToolUse = parsed.hooks.PreToolUse;
2567
+
2568
+ const trackerGuard = preToolUse.find(
2569
+ (h: { matcher: string; hooks: Array<{ command: string }> }) =>
2570
+ h.matcher === "Bash" && h.hooks[0]?.command?.includes("OVERSTORY_TASK_ID"),
2571
+ );
2572
+ expect(trackerGuard).toBeDefined();
2573
+ }
2574
+ });
2575
+ });
2576
+
2404
2577
  describe("escapeForSingleQuotedShell", () => {
2405
2578
  test("no single quotes: string passes through unchanged", () => {
2406
2579
  expect(escapeForSingleQuotedShell("hello world")).toBe("hello world");
@@ -283,6 +283,61 @@ export function buildBashFileGuardScript(
283
283
  return script;
284
284
  }
285
285
 
286
+ /**
287
+ * Build a PreToolUse guard script that prevents agents from closing or updating
288
+ * issues they don't own.
289
+ *
290
+ * Guards against two patterns:
291
+ * - `sd/bd close <id>` — blocks if <id> != $OVERSTORY_TASK_ID
292
+ * - `sd/bd update <id> --status` — blocks if <id> != $OVERSTORY_TASK_ID
293
+ *
294
+ * Agents without OVERSTORY_TASK_ID (coordinator, monitor) exit early and are unaffected.
295
+ */
296
+ export function buildTrackerCloseGuardScript(): string {
297
+ const script = [
298
+ // Only enforce for overstory agent sessions
299
+ ENV_GUARD,
300
+ // Skip if task ID is not set (coordinator/monitor have no task)
301
+ '[ -z "$OVERSTORY_TASK_ID" ] && exit 0;',
302
+ "read -r INPUT;",
303
+ // Extract command value from JSON
304
+ 'CMD=$(echo "$INPUT" | sed \'s/.*"command": *"\\([^"]*\\)".*/\\1/\');',
305
+ // Check for sd/bd close <id>
306
+ "if echo \"$CMD\" | grep -qE '^\\s*(sd|bd)\\s+close\\s'; then",
307
+ " ISSUE_ID=$(echo \"$CMD\" | sed -E 's/^[[:space:]]*(sd|bd)[[:space:]]+close[[:space:]]+([^ ]+).*/\\2/');",
308
+ ' if [ "$ISSUE_ID" != "$OVERSTORY_TASK_ID" ]; then',
309
+ ' echo "{\\"decision\\":\\"block\\",\\"reason\\":\\"Cannot close issue $ISSUE_ID — agents may only close their own task ($OVERSTORY_TASK_ID). Report completion via worker_done mail to your parent instead.\\"}";',
310
+ " exit 0;",
311
+ " fi;",
312
+ "fi;",
313
+ // Check for sd/bd update <id> --status
314
+ "if echo \"$CMD\" | grep -qE '^\\s*(sd|bd)\\s+update\\s.*--status'; then",
315
+ " ISSUE_ID=$(echo \"$CMD\" | sed -E 's/^[[:space:]]*(sd|bd)[[:space:]]+update[[:space:]]+([^ ]+).*/\\2/');",
316
+ ' if [ "$ISSUE_ID" != "$OVERSTORY_TASK_ID" ]; then',
317
+ ' echo "{\\"decision\\":\\"block\\",\\"reason\\":\\"Cannot update issue $ISSUE_ID — agents may only update their own task ($OVERSTORY_TASK_ID).\\"}";',
318
+ " exit 0;",
319
+ " fi;",
320
+ "fi;",
321
+ ].join(" ");
322
+ return script;
323
+ }
324
+
325
+ /**
326
+ * Generate a PreToolUse guard that blocks tracker close/update for foreign issues.
327
+ *
328
+ * Returns a single Bash matcher entry. Applied to ALL agent capabilities
329
+ * so that no agent can accidentally close the coordinator's dispatch issue.
330
+ * Agents without OVERSTORY_TASK_ID (coordinator, monitor) are unaffected.
331
+ */
332
+ export function getTrackerCloseGuards(): HookEntry[] {
333
+ return [
334
+ {
335
+ matcher: "Bash",
336
+ hooks: [{ type: "command", command: buildTrackerCloseGuardScript() }],
337
+ },
338
+ ];
339
+ }
340
+
286
341
  /**
287
342
  * Capabilities that are allowed to modify files via Bash commands.
288
343
  * These get the Bash path boundary guard instead of a blanket file-modification block.
@@ -539,7 +594,8 @@ export async function deployHooks(
539
594
  const pathGuards = getPathBoundaryGuards();
540
595
  const dangerGuards = getDangerGuards(agentName);
541
596
  const capabilityGuards = getCapabilityGuards(capability, qualityGates);
542
- const allGuards = [...pathGuards, ...dangerGuards, ...capabilityGuards];
597
+ const trackerCloseGuards = getTrackerCloseGuards();
598
+ const allGuards = [...pathGuards, ...dangerGuards, ...capabilityGuards, ...trackerCloseGuards];
543
599
 
544
600
  if (allGuards.length > 0) {
545
601
  const preToolUse = config.hooks.PreToolUse ?? [];
@@ -460,6 +460,10 @@ describe("startCoordinator", () => {
460
460
  expect(session?.worktreePath).toBe(tempDir);
461
461
  expect(session?.id).toMatch(/^session-\d+-coordinator$/);
462
462
 
463
+ // Verify the session has a runId set (not null)
464
+ expect(session?.runId).not.toBeNull();
465
+ expect(session?.runId).toMatch(/^run-/);
466
+
463
467
  // Verify tmux createSession was called
464
468
  expect(calls.createSession).toHaveLength(1);
465
469
  expect(calls.createSession[0]?.name).toBe("overstory-test-project-coordinator");
@@ -469,6 +473,67 @@ describe("startCoordinator", () => {
469
473
  expect(calls.sendKeys.length).toBeGreaterThanOrEqual(1);
470
474
  });
471
475
 
476
+ test("creates a run record with coordinatorName set", async () => {
477
+ const { deps } = makeDeps();
478
+ const originalSleep = Bun.sleep;
479
+ Bun.sleep = (() => Promise.resolve()) as typeof Bun.sleep;
480
+
481
+ try {
482
+ await captureStdout(() => coordinatorCommand(["start", "--no-attach"], deps));
483
+ } finally {
484
+ Bun.sleep = originalSleep;
485
+ }
486
+
487
+ const runStore = createRunStore(join(overstoryDir, "sessions.db"));
488
+ try {
489
+ const run = runStore.getActiveRunForCoordinator("coordinator");
490
+ expect(run).not.toBeNull();
491
+ expect(run?.coordinatorName).toBe("coordinator");
492
+ expect(run?.status).toBe("active");
493
+ expect(run?.coordinatorSessionId).toMatch(/^session-\d+-coordinator$/);
494
+ } finally {
495
+ runStore.close();
496
+ }
497
+ });
498
+
499
+ test("writes current-run.txt for backward compatibility", async () => {
500
+ const { deps } = makeDeps();
501
+ const originalSleep = Bun.sleep;
502
+ Bun.sleep = (() => Promise.resolve()) as typeof Bun.sleep;
503
+
504
+ try {
505
+ await captureStdout(() => coordinatorCommand(["start", "--no-attach"], deps));
506
+ } finally {
507
+ Bun.sleep = originalSleep;
508
+ }
509
+
510
+ const currentRunFile = Bun.file(join(overstoryDir, "current-run.txt"));
511
+ expect(await currentRunFile.exists()).toBe(true);
512
+ const runId = (await currentRunFile.text()).trim();
513
+ expect(runId).toMatch(/^run-/);
514
+ });
515
+
516
+ test("run ID in current-run.txt matches session runId", async () => {
517
+ const { deps } = makeDeps();
518
+ const originalSleep = Bun.sleep;
519
+ Bun.sleep = (() => Promise.resolve()) as typeof Bun.sleep;
520
+
521
+ try {
522
+ await captureStdout(() => coordinatorCommand(["start", "--no-attach"], deps));
523
+ } finally {
524
+ Bun.sleep = originalSleep;
525
+ }
526
+
527
+ const sessions = loadSessionsFromDb();
528
+ const session = sessions[0];
529
+ expect(session?.runId).toBeDefined();
530
+
531
+ const currentRunFile = Bun.file(join(overstoryDir, "current-run.txt"));
532
+ const fileRunId = (await currentRunFile.text()).trim();
533
+
534
+ expect(session?.runId).toBe(fileRunId);
535
+ });
536
+
472
537
  test("deploys hooks to project root .claude/settings.local.json", async () => {
473
538
  const { deps } = makeDeps();
474
539
  const originalSleep = Bun.sleep;
@@ -845,9 +910,10 @@ describe("startCoordinator", () => {
845
910
  }
846
911
  });
847
912
 
848
- test("continues when waitForTuiReady times out but session is still alive", async () => {
849
- // waitForTuiReady returns false (timeout) but session IS alive
850
- const { deps } = makeDeps(
913
+ test("kills the coordinator and throws when waitForTuiReady times out but session is still alive", async () => {
914
+ // waitForTuiReady returns false (timeout) and the session is still alive,
915
+ // so startup should fail explicitly instead of sending the beacon blindly.
916
+ const { deps, calls } = makeDeps(
851
917
  { "overstory-test-project-coordinator": true },
852
918
  undefined,
853
919
  undefined,
@@ -866,8 +932,11 @@ describe("startCoordinator", () => {
866
932
  Bun.sleep = originalSleep;
867
933
  }
868
934
 
869
- // Should NOT throw — session is alive, just slow TUI
870
- expect(thrownError).toBeUndefined();
935
+ expect(thrownError).toBeInstanceOf(AgentError);
936
+ const agentErr = thrownError as AgentError;
937
+ expect(agentErr.message).toContain("did not become ready during startup");
938
+ expect(calls.killSession).toHaveLength(1);
939
+ expect(calls.killSession[0]?.name).toBe("overstory-test-project-coordinator");
871
940
  });
872
941
  });
873
942
 
@@ -424,12 +424,31 @@ async function startCoordinator(
424
424
  OVERSTORY_AGENT_NAME: COORDINATOR_NAME,
425
425
  });
426
426
 
427
+ // Create a run for this coordinator session BEFORE recording the session,
428
+ // so the session can reference the run ID from the start.
429
+ const sessionId = `session-${Date.now()}-${COORDINATOR_NAME}`;
430
+ const runId = `run-${new Date().toISOString().replace(/[:.]/g, "-")}`;
431
+ const runStore = createRunStore(join(overstoryDir, "sessions.db"));
432
+ try {
433
+ runStore.createRun({
434
+ id: runId,
435
+ startedAt: new Date().toISOString(),
436
+ coordinatorSessionId: sessionId,
437
+ coordinatorName: COORDINATOR_NAME,
438
+ status: "active",
439
+ });
440
+ } finally {
441
+ runStore.close();
442
+ }
443
+ // Write current-run.txt for backward compatibility with ov sling and other consumers.
444
+ await Bun.write(join(overstoryDir, "current-run.txt"), runId);
445
+
427
446
  // Record session BEFORE sending the beacon so that hook-triggered
428
447
  // updateLastActivity() can find the entry and transition booting->working.
429
448
  // Without this, a race exists: hooks fire before the session is persisted,
430
449
  // leaving the coordinator stuck in "booting" (overstory-036f).
431
450
  const session: AgentSession = {
432
- id: `session-${Date.now()}-${COORDINATOR_NAME}`,
451
+ id: sessionId,
433
452
  agentName: COORDINATOR_NAME,
434
453
  capability: "coordinator",
435
454
  worktreePath: projectRoot, // Coordinator uses project root, not a worktree
@@ -440,7 +459,7 @@ async function startCoordinator(
440
459
  pid,
441
460
  parentAgent: null, // Top of hierarchy
442
461
  depth: 0,
443
- runId: null,
462
+ runId,
444
463
  startedAt: new Date().toISOString(),
445
464
  lastActivity: new Date().toISOString(),
446
465
  escalationLevel: 0,
@@ -476,7 +495,12 @@ async function startCoordinator(
476
495
  { agentName: COORDINATOR_NAME },
477
496
  );
478
497
  }
479
- // Session is alive but TUI didn't render in time — proceed with warning
498
+ await tmux.killSession(tmuxSession);
499
+ store.updateState(COORDINATOR_NAME, "completed");
500
+ throw new AgentError(
501
+ `Coordinator tmux session "${tmuxSession}" did not become ready during startup. Claude Code may still be waiting on an interactive dialog or initializing too slowly.`,
502
+ { agentName: COORDINATOR_NAME },
503
+ );
480
504
  }
481
505
  await Bun.sleep(1_000);
482
506