@os-eco/overstory-cli 0.10.3 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/README.md +4 -2
  2. package/agents/builder.md +10 -1
  3. package/agents/lead.md +106 -5
  4. package/package.json +1 -1
  5. package/src/agents/headless-mail-injector.ts +8 -0
  6. package/src/agents/mail-poll-detect.test.ts +153 -0
  7. package/src/agents/mail-poll-detect.ts +73 -0
  8. package/src/agents/overlay.test.ts +56 -0
  9. package/src/agents/overlay.ts +33 -0
  10. package/src/agents/scope-detect.test.ts +190 -0
  11. package/src/agents/scope-detect.ts +146 -0
  12. package/src/agents/turn-runner.test.ts +862 -0
  13. package/src/agents/turn-runner.ts +225 -8
  14. package/src/commands/agents.ts +9 -0
  15. package/src/commands/coordinator.test.ts +127 -0
  16. package/src/commands/coordinator.ts +71 -4
  17. package/src/commands/dashboard.ts +1 -1
  18. package/src/commands/log.test.ts +131 -0
  19. package/src/commands/log.ts +37 -2
  20. package/src/commands/merge.test.ts +118 -0
  21. package/src/commands/merge.ts +51 -8
  22. package/src/commands/sling.test.ts +104 -0
  23. package/src/commands/sling.ts +95 -8
  24. package/src/commands/stop.test.ts +81 -0
  25. package/src/index.ts +5 -1
  26. package/src/insights/quality-gates.test.ts +141 -0
  27. package/src/insights/quality-gates.ts +156 -0
  28. package/src/logging/theme.ts +4 -0
  29. package/src/merge/predict.test.ts +387 -0
  30. package/src/merge/predict.ts +249 -0
  31. package/src/merge/resolver.ts +1 -1
  32. package/src/mulch/client.ts +3 -3
  33. package/src/sessions/store.test.ts +267 -5
  34. package/src/sessions/store.ts +105 -7
  35. package/src/types.ts +51 -1
  36. package/src/watchdog/daemon.test.ts +124 -2
  37. package/src/watchdog/daemon.ts +27 -12
  38. package/src/watchdog/health.test.ts +133 -8
  39. package/src/watchdog/health.ts +37 -5
  40. package/src/worktree/manager.test.ts +218 -1
  41. package/src/worktree/manager.ts +55 -0
  42. package/src/worktree/tmux.test.ts +25 -0
  43. package/src/worktree/tmux.ts +17 -0
  44. package/templates/overlay.md.tmpl +2 -0
@@ -487,12 +487,17 @@ export function startDaemon(options: DaemonOptions & { intervalMs: number }): {
487
487
  * Prefers runtime-agnostic `conn.abort()` when a RuntimeConnection is registered.
488
488
  * If abort() succeeds, returns immediately — no PID/tmux kill needed.
489
489
  * If abort() throws (e.g. process already exited), falls through to the
490
- * defense-in-depth path: PID kill for headless agents, tmux kill for TUI agents.
490
+ * defense-in-depth path below.
491
491
  *
492
- * Headless agents without a connection (tmuxSession === "" && pid !== null) are
493
- * killed via PID process tree. TUI agents are killed via their named tmux session
494
- * (only if tmuxAlive). This prevents the blast-radius bug where killSession("")
495
- * with tmux prefix matching would kill ALL tmux sessions.
492
+ * Branching after abort:
493
+ * - tmuxSession === "" (headless): never call tmux.killSession an empty `-t`
494
+ * prefix-matches every session in the tmux server, wildcard-killing the entire
495
+ * overstory swarm (overstory-74ce). Branch by pid:
496
+ * - pid !== null → kill the process tree (long-lived headless capability).
497
+ * - pid === null → no-op (spawn-per-turn agent between turns; the in-flight
498
+ * process, if any, was already handled by the abort/connection path).
499
+ * - tmuxSession !== "" (TUI): kill the named tmux session, but only when
500
+ * `tmuxAlive` to avoid spurious "session not found" errors.
496
501
  */
497
502
  async function killAgent(ctx: {
498
503
  session: AgentSession;
@@ -503,7 +508,6 @@ async function killAgent(ctx: {
503
508
  removeConnection: (name: string) => void;
504
509
  }): Promise<void> {
505
510
  const { session, tmuxAlive, tmux, process: proc, getConnection, removeConnection } = ctx;
506
- const isHeadless = session.tmuxSession === "" && session.pid !== null;
507
511
 
508
512
  // Prefer runtime-agnostic abort() when a connection is registered.
509
513
  const conn = getConnection(session.agentName);
@@ -522,13 +526,24 @@ async function killAgent(ctx: {
522
526
  // abort() threw — fall through to PID/tmux kill below as defense-in-depth
523
527
  }
524
528
 
525
- if (isHeadless && session.pid !== null) {
526
- try {
527
- await proc.killTree(session.pid);
528
- } catch {
529
- // Already exited not an error
529
+ // Headless agents (no tmux session) must never reach tmux.killSession.
530
+ // An empty `-t` argument is prefix-matched and would kill every overstory
531
+ // tmux session in the server (overstory-74ce).
532
+ if (session.tmuxSession === "") {
533
+ if (session.pid !== null) {
534
+ try {
535
+ await proc.killTree(session.pid);
536
+ } catch {
537
+ // Already exited — not an error
538
+ }
530
539
  }
531
- } else if (tmuxAlive) {
540
+ // pid === null: spawn-per-turn agent between turns. Any in-flight process
541
+ // was handled by abort/connection above. No-op — next dispatch will spawn fresh.
542
+ return;
543
+ }
544
+
545
+ // Named tmux session path (TUI agents).
546
+ if (tmuxAlive) {
532
547
  try {
533
548
  await tmux.killSession(session.tmuxSession);
534
549
  } catch {
@@ -502,7 +502,11 @@ describe("spawn-per-turn workers (overstory-7a34)", () => {
502
502
  // the TUI/tmux path where tmuxAlive=false → ZFC Rule 1 → zombie within
503
503
  // seconds of sling, despite being actively executing tools (overstory-7a34).
504
504
 
505
- test("freshly slung spawn-per-turn lead (booting, no pid, no tmux) → working", () => {
505
+ test("freshly slung spawn-per-turn lead (booting, no pid, no tmux) → between_turns (overstory-3087)", () => {
506
+ // Spec change: spawn-per-turn workers report `between_turns` instead
507
+ // of `working` for the healthy classification, including the booting
508
+ // → healthy transition. The turn-runner authoritatively writes
509
+ // `in_turn` once the first parser event of a turn arrives.
506
510
  const session = makeSession({
507
511
  tmuxSession: "",
508
512
  pid: null,
@@ -512,12 +516,18 @@ describe("spawn-per-turn workers (overstory-7a34)", () => {
512
516
  });
513
517
  const check = evaluateHealth(session, false, THRESHOLDS);
514
518
 
515
- expect(check.state).toBe("working");
519
+ expect(check.state).toBe("between_turns");
516
520
  expect(check.action).toBe("none");
517
521
  expect(check.reconciliationNote).toBeNull();
518
522
  });
519
523
 
520
- test("active spawn-per-turn worker (working, recent activity) stays working", () => {
524
+ test("legacy spawn-per-turn worker still at 'working' is reported as between_turns (overstory-3087)", () => {
525
+ // A row that predates the substate split (state=working) gets
526
+ // reclassified to `between_turns` by the watchdog's healthy-state
527
+ // reporter. transitionState then promotes the row forward (working
528
+ // and between_turns share rank 1 in STATE_ORDER, so the actual
529
+ // promotion happens via tryTransitionState elsewhere — here we just
530
+ // verify the check itself reports the new substate).
521
531
  const session = makeSession({
522
532
  tmuxSession: "",
523
533
  pid: null,
@@ -527,13 +537,16 @@ describe("spawn-per-turn workers (overstory-7a34)", () => {
527
537
  });
528
538
  const check = evaluateHealth(session, false, THRESHOLDS);
529
539
 
530
- expect(check.state).toBe("working");
540
+ expect(check.state).toBe("between_turns");
531
541
  expect(check.action).toBe("none");
532
542
  });
533
543
 
534
- test("spawn-per-turn worker between turns (state working, very recent) → working, NOT zombie", () => {
535
- // Repro: ov sling --capability lead any-task; within ~30s ov dashboard
536
- // previously showed state='zombie' while ov feed showed live tool calls.
544
+ test("spawn-per-turn worker between turns (recent activity) → between_turns, NOT zombie (overstory-3087)", () => {
545
+ // Repro of overstory-7a34: ov sling --capability lead any-task; within
546
+ // ~30s ov dashboard previously showed state='zombie' while ov feed
547
+ // showed live tool calls. The healthy classification now lands
548
+ // between_turns; the test still verifies that recent activity does
549
+ // not trigger zombie classification.
537
550
  const session = makeSession({
538
551
  tmuxSession: "",
539
552
  pid: null,
@@ -543,7 +556,7 @@ describe("spawn-per-turn workers (overstory-7a34)", () => {
543
556
  });
544
557
  const check = evaluateHealth(session, false, THRESHOLDS);
545
558
 
546
- expect(check.state).toBe("working");
559
+ expect(check.state).toBe("between_turns");
547
560
  expect(check.action).toBe("none");
548
561
  });
549
562
 
@@ -587,6 +600,53 @@ describe("spawn-per-turn workers (overstory-7a34)", () => {
587
600
  expect(check.state).toBe("completed");
588
601
  expect(check.action).toBe("none");
589
602
  });
603
+
604
+ test("preserves in_turn for healthy spawn-per-turn worker (overstory-3087)", () => {
605
+ // A spawn-per-turn worker the turn-runner has marked in_turn must
606
+ // have its state preserved by the health evaluation when activity is
607
+ // recent — otherwise the watchdog would stomp the substate back to
608
+ // `working` and the UI would lose the distinction between mid-turn
609
+ // and idling.
610
+ const session = makeSession({
611
+ tmuxSession: "",
612
+ pid: null,
613
+ capability: "builder",
614
+ state: "in_turn",
615
+ lastActivity: new Date().toISOString(),
616
+ });
617
+ const check = evaluateHealth(session, false, THRESHOLDS);
618
+
619
+ expect(check.state).toBe("in_turn");
620
+ expect(check.action).toBe("none");
621
+ });
622
+
623
+ test("preserves between_turns for healthy spawn-per-turn worker (overstory-3087)", () => {
624
+ const session = makeSession({
625
+ tmuxSession: "",
626
+ pid: null,
627
+ capability: "builder",
628
+ state: "between_turns",
629
+ lastActivity: new Date().toISOString(),
630
+ });
631
+ const check = evaluateHealth(session, false, THRESHOLDS);
632
+
633
+ expect(check.state).toBe("between_turns");
634
+ expect(check.action).toBe("none");
635
+ });
636
+
637
+ test("escalates an in_turn worker with stale activity to stalled (overstory-3087)", () => {
638
+ const session = makeSession({
639
+ tmuxSession: "",
640
+ pid: null,
641
+ capability: "builder",
642
+ state: "in_turn",
643
+ lastActivity: new Date(Date.now() - 60_000).toISOString(),
644
+ });
645
+ const check = evaluateHealth(session, false, THRESHOLDS);
646
+
647
+ expect(check.state).toBe("stalled");
648
+ expect(check.action).toBe("escalate");
649
+ });
590
650
  });
591
651
 
592
652
  // === transitionState ===
@@ -702,4 +762,69 @@ describe("transitionState", () => {
702
762
  // the state should NOT advance
703
763
  expect(transitionState("working", check)).toBe("working");
704
764
  });
765
+
766
+ // --- in_turn / between_turns coexist with working at the active rank (overstory-3087) ---
767
+
768
+ test("preserves in_turn when watchdog reports a healthy 'working' check", () => {
769
+ // The watchdog's healthy-classification check returns state=working;
770
+ // since in_turn shares rank 1 with working, transitionState must not
771
+ // advance and the spawn-per-turn substate the turn-runner wrote stays.
772
+ const check = {
773
+ state: "working" as const,
774
+ agentName: "a",
775
+ timestamp: "",
776
+ tmuxAlive: true,
777
+ pidAlive: true as boolean | null,
778
+ lastActivity: "",
779
+ processAlive: true,
780
+ action: "none" as const,
781
+ reconciliationNote: null,
782
+ };
783
+ expect(transitionState("in_turn", check)).toBe("in_turn");
784
+ });
785
+
786
+ test("preserves between_turns when watchdog reports a healthy 'working' check", () => {
787
+ const check = {
788
+ state: "working" as const,
789
+ agentName: "a",
790
+ timestamp: "",
791
+ tmuxAlive: true,
792
+ pidAlive: true as boolean | null,
793
+ lastActivity: "",
794
+ processAlive: true,
795
+ action: "none" as const,
796
+ reconciliationNote: null,
797
+ };
798
+ expect(transitionState("between_turns", check)).toBe("between_turns");
799
+ });
800
+
801
+ test("advances in_turn → stalled when the watchdog escalates", () => {
802
+ const check = {
803
+ state: "stalled" as const,
804
+ agentName: "a",
805
+ timestamp: "",
806
+ tmuxAlive: true,
807
+ pidAlive: true as boolean | null,
808
+ lastActivity: "",
809
+ processAlive: true,
810
+ action: "escalate" as const,
811
+ reconciliationNote: null,
812
+ };
813
+ expect(transitionState("in_turn", check)).toBe("stalled");
814
+ });
815
+
816
+ test("advances between_turns → zombie when the watchdog terminates", () => {
817
+ const check = {
818
+ state: "zombie" as const,
819
+ agentName: "a",
820
+ timestamp: "",
821
+ tmuxAlive: false,
822
+ pidAlive: false as boolean | null,
823
+ lastActivity: "",
824
+ processAlive: false,
825
+ action: "terminate" as const,
826
+ reconciliationNote: null,
827
+ };
828
+ expect(transitionState("between_turns", check)).toBe("zombie");
829
+ });
705
830
  });
@@ -33,10 +33,22 @@
33
33
  import { isPersistentCapability } from "../agents/capabilities.ts";
34
34
  import type { AgentSession, AgentState, HealthCheck } from "../types.ts";
35
35
 
36
- /** Numeric ordering for forward-only state transitions. */
36
+ /**
37
+ * Numeric ordering for forward-only state transitions.
38
+ *
39
+ * `in_turn` and `between_turns` share the `working` rank (1) because, from
40
+ * the watchdog's perspective, all three are "agent is alive and active" —
41
+ * they only differ in whether the spawn-per-turn worker is currently
42
+ * mid-execution or idling between mail batches (overstory-3087). Same rank
43
+ * means a healthy-classification check (`check.state === "working"`) will
44
+ * not stomp on the more specific in_turn/between_turns states the
45
+ * turn-runner has already written.
46
+ */
37
47
  const STATE_ORDER: Record<AgentState, number> = {
38
48
  booting: 0,
39
49
  working: 1,
50
+ in_turn: 1,
51
+ between_turns: 1,
40
52
  completed: 2,
41
53
  stalled: 3,
42
54
  zombie: 4,
@@ -145,22 +157,42 @@ function evaluateTimeBased(
145
157
  };
146
158
  }
147
159
 
148
- // booting transition to working once there's recent activity
160
+ // Spawn-per-turn workers (overstory-3087): healthy classification reports
161
+ // `between_turns` instead of `working`, including the booting → healthy
162
+ // transition. The turn-runner authoritatively writes `in_turn` /
163
+ // `between_turns` while a turn is alive; in_turn is preserved here when
164
+ // already set so a watchdog tick mid-turn does not overwrite it.
165
+ const isSpawnPerTurn = isSpawnPerTurnSession(session);
166
+
167
+ // booting → transition to the healthy state once there's recent activity.
149
168
  if (session.state === "booting") {
150
169
  return {
151
170
  ...base,
152
171
  processAlive: true,
153
- state: "working",
172
+ state: isSpawnPerTurn ? "between_turns" : "working",
154
173
  action: "none",
155
174
  reconciliationNote: null,
156
175
  };
157
176
  }
158
177
 
159
- // Default: healthy and working
178
+ // Default: healthy active state. For spawn-per-turn workers report the
179
+ // existing in_turn/between_turns substate; for tmux/long-lived agents
180
+ // report `working`. The turn-runner is authoritative for in_turn ↔
181
+ // between_turns transitions, so the watchdog must not stomp the more
182
+ // specific state — same rank in STATE_ORDER ensures `transitionState`
183
+ // also leaves the row alone.
184
+ let healthyState: AgentState;
185
+ if (session.state === "in_turn" || session.state === "between_turns") {
186
+ healthyState = session.state;
187
+ } else if (isSpawnPerTurn) {
188
+ healthyState = "between_turns";
189
+ } else {
190
+ healthyState = "working";
191
+ }
160
192
  return {
161
193
  ...base,
162
194
  processAlive: true,
163
- state: "working",
195
+ state: healthyState,
164
196
  action: "none",
165
197
  reconciliationNote: null,
166
198
  };
@@ -1,6 +1,6 @@
1
1
  import { afterEach, beforeEach, describe, expect, test } from "bun:test";
2
2
  import { existsSync, realpathSync } from "node:fs";
3
- import { mkdir, mkdtemp } from "node:fs/promises";
3
+ import { mkdir, mkdtemp, rm } from "node:fs/promises";
4
4
  import { tmpdir } from "node:os";
5
5
  import { join } from "node:path";
6
6
  import { WorktreeError } from "../errors.ts";
@@ -9,6 +9,7 @@ import {
9
9
  commitFile,
10
10
  createTempGitRepo,
11
11
  getDefaultBranch,
12
+ runGitInDir,
12
13
  } from "../test-helpers.ts";
13
14
  import {
14
15
  createWorktree,
@@ -16,6 +17,7 @@ import {
16
17
  listWorktrees,
17
18
  removeWorktree,
18
19
  rollbackWorktree,
20
+ validateWorktreeCreation,
19
21
  } from "./manager.ts";
20
22
 
21
23
  /**
@@ -145,6 +147,61 @@ describe("createWorktree", () => {
145
147
  expect(wtErr.branchName).toBe("overstory/auth-login/bead-abc123");
146
148
  }
147
149
  });
150
+
151
+ test("rejects creation when target branch is already checked out elsewhere", async () => {
152
+ // Pre-check should fail-fast with a precise diagnostic before git
153
+ // worktree add runs, so the operator sees the actual cause rather
154
+ // than git's generic "already exists" error or, worse, a silently
155
+ // half-built worktree (overstory-6878).
156
+ const first = await createWorktree({
157
+ repoRoot: repoDir,
158
+ baseDir: worktreesDir,
159
+ agentName: "auth-login",
160
+ baseBranch: defaultBranch,
161
+ taskId: "bead-abc123",
162
+ });
163
+
164
+ try {
165
+ await createWorktree({
166
+ repoRoot: repoDir,
167
+ baseDir: worktreesDir,
168
+ agentName: "auth-login",
169
+ baseBranch: defaultBranch,
170
+ taskId: "bead-abc123",
171
+ });
172
+ expect(true).toBe(false);
173
+ } catch (err: unknown) {
174
+ expect(err).toBeInstanceOf(WorktreeError);
175
+ const wtErr = err as WorktreeError;
176
+ expect(wtErr.message).toContain("already checked out");
177
+ expect(wtErr.message).toContain(first.path);
178
+ expect(wtErr.branchName).toBe("overstory/auth-login/bead-abc123");
179
+ }
180
+
181
+ // The original worktree must remain intact — the pre-check rejected
182
+ // before any state-mutating git command ran.
183
+ expect(existsSync(first.path)).toBe(true);
184
+ const entries = await listWorktrees(repoDir);
185
+ expect(entries.some((e) => e.path === first.path)).toBe(true);
186
+ });
187
+
188
+ test("post-creation: new worktree is registered and contains tracked files", async () => {
189
+ const { path: wtPath } = await createWorktree({
190
+ repoRoot: repoDir,
191
+ baseDir: worktreesDir,
192
+ agentName: "auth-login",
193
+ baseBranch: defaultBranch,
194
+ taskId: "bead-files",
195
+ });
196
+
197
+ // Registration check — listWorktrees must include the new path
198
+ const entries = await listWorktrees(repoDir);
199
+ expect(entries.map((e) => e.path)).toContain(wtPath);
200
+
201
+ // File-presence check — git ls-files inside the worktree must be non-empty
202
+ const lsFiles = await git(wtPath, ["ls-files"]);
203
+ expect(lsFiles.trim().length).toBeGreaterThan(0);
204
+ });
148
205
  });
149
206
 
150
207
  describe("listWorktrees", () => {
@@ -501,3 +558,163 @@ describe("rollbackWorktree", () => {
501
558
  expect(branchList).toContain("overstory/auth-login/bead-abc");
502
559
  });
503
560
  });
561
+
562
+ describe("validateWorktreeCreation", () => {
563
+ let repoDir: string;
564
+ let worktreesDir: string;
565
+ let defaultBranch: string;
566
+
567
+ beforeEach(async () => {
568
+ repoDir = realpathSync(await createTempGitRepo());
569
+ defaultBranch = await getDefaultBranch(repoDir);
570
+ worktreesDir = join(repoDir, ".overstory", "worktrees");
571
+ await mkdir(worktreesDir, { recursive: true });
572
+ });
573
+
574
+ afterEach(async () => {
575
+ await cleanupTempDir(repoDir);
576
+ });
577
+
578
+ test("passes for a normally created worktree", async () => {
579
+ const { path: wtPath, branch } = await createWorktree({
580
+ repoRoot: repoDir,
581
+ baseDir: worktreesDir,
582
+ agentName: "feature-agent",
583
+ baseBranch: defaultBranch,
584
+ taskId: "bead-ok",
585
+ });
586
+
587
+ // Re-running validation against the live worktree should be a no-op
588
+ await expect(
589
+ validateWorktreeCreation({
590
+ repoRoot: repoDir,
591
+ worktreePath: wtPath,
592
+ branchName: branch,
593
+ }),
594
+ ).resolves.toBeUndefined();
595
+ });
596
+
597
+ test("throws when worktree path is not registered with git", async () => {
598
+ const fakePath = join(worktreesDir, "ghost-agent");
599
+
600
+ try {
601
+ await validateWorktreeCreation({
602
+ repoRoot: repoDir,
603
+ worktreePath: fakePath,
604
+ branchName: "overstory/ghost-agent/bead-missing",
605
+ });
606
+ expect(true).toBe(false);
607
+ } catch (err: unknown) {
608
+ expect(err).toBeInstanceOf(WorktreeError);
609
+ const wtErr = err as WorktreeError;
610
+ expect(wtErr.worktreePath).toBe(fakePath);
611
+ expect(wtErr.branchName).toBe("overstory/ghost-agent/bead-missing");
612
+ expect(wtErr.message).toContain("not registered with git");
613
+ }
614
+ });
615
+
616
+ test("rolls back the dangling branch when validation fails", async () => {
617
+ // Create a real branch that's not attached to any worktree, then ask
618
+ // validation to check a path it can't possibly be registered at.
619
+ await runGitInDir(repoDir, ["branch", "overstory/orphan-agent/bead-x", defaultBranch]);
620
+ const fakePath = join(worktreesDir, "orphan-agent");
621
+
622
+ await expect(
623
+ validateWorktreeCreation({
624
+ repoRoot: repoDir,
625
+ worktreePath: fakePath,
626
+ branchName: "overstory/orphan-agent/bead-x",
627
+ }),
628
+ ).rejects.toThrow(WorktreeError);
629
+
630
+ // rollbackWorktree should have force-deleted the orphan branch
631
+ const branchList = await git(repoDir, ["branch", "--list"]);
632
+ expect(branchList).not.toContain("overstory/orphan-agent/bead-x");
633
+ });
634
+
635
+ test("throws when worktree contains zero tracked files", async () => {
636
+ // Build a base branch that points at an empty tree, then create a
637
+ // worktree from it. git happily registers the worktree, but ls-files
638
+ // returns nothing — the exact silent-failure shape from overstory-6878.
639
+ const emptyTree = (
640
+ await runGitInDir(repoDir, ["hash-object", "-t", "tree", "/dev/null"])
641
+ ).trim();
642
+ const emptyCommit = (
643
+ await runGitInDir(repoDir, ["commit-tree", emptyTree, "-m", "empty base"])
644
+ ).trim();
645
+ await runGitInDir(repoDir, ["branch", "empty-base", emptyCommit]);
646
+
647
+ const wtPath = join(worktreesDir, "empty-agent");
648
+ const branchName = "overstory/empty-agent/bead-empty";
649
+ await runGitInDir(repoDir, ["worktree", "add", "-b", branchName, wtPath, "empty-base"]);
650
+
651
+ try {
652
+ await validateWorktreeCreation({
653
+ repoRoot: repoDir,
654
+ worktreePath: wtPath,
655
+ branchName,
656
+ });
657
+ expect(true).toBe(false);
658
+ } catch (err: unknown) {
659
+ expect(err).toBeInstanceOf(WorktreeError);
660
+ const wtErr = err as WorktreeError;
661
+ expect(wtErr.worktreePath).toBe(wtPath);
662
+ expect(wtErr.branchName).toBe(branchName);
663
+ expect(wtErr.message).toContain("zero tracked files");
664
+ }
665
+
666
+ // Rollback removed both worktree and branch
667
+ expect(existsSync(wtPath)).toBe(false);
668
+ const branchList = await git(repoDir, ["branch", "--list"]);
669
+ expect(branchList).not.toContain(branchName);
670
+ });
671
+
672
+ test("createWorktree rejects when base branch has no tracked files", async () => {
673
+ // End-to-end: createWorktree should surface the same error and clean
674
+ // up after itself, so sling never sees a half-built worktree.
675
+ const emptyTree = (
676
+ await runGitInDir(repoDir, ["hash-object", "-t", "tree", "/dev/null"])
677
+ ).trim();
678
+ const emptyCommit = (
679
+ await runGitInDir(repoDir, ["commit-tree", emptyTree, "-m", "empty base"])
680
+ ).trim();
681
+ await runGitInDir(repoDir, ["branch", "empty-base", emptyCommit]);
682
+
683
+ await expect(
684
+ createWorktree({
685
+ repoRoot: repoDir,
686
+ baseDir: worktreesDir,
687
+ agentName: "empty-agent",
688
+ baseBranch: "empty-base",
689
+ taskId: "bead-empty",
690
+ }),
691
+ ).rejects.toThrow(WorktreeError);
692
+
693
+ // Caller observes a clean repo: no worktree dir, no leaked branch
694
+ expect(existsSync(join(worktreesDir, "empty-agent"))).toBe(false);
695
+ const branchList = await git(repoDir, ["branch", "--list"]);
696
+ expect(branchList).not.toContain("overstory/empty-agent/bead-empty");
697
+ });
698
+
699
+ test("createWorktree rejects when target dir pre-exists with files", async () => {
700
+ // Simulates the witnessed scenario: a stale directory survives at the
701
+ // target path from a previous run. createWorktree must surface a
702
+ // WorktreeError rather than returning a path that points at non-git
703
+ // state — the contract that protects the agent from being trapped.
704
+ const wtPath = join(worktreesDir, "preexisting-agent");
705
+ await mkdir(wtPath, { recursive: true });
706
+ await Bun.write(join(wtPath, "stale.txt"), "leftover from a previous run");
707
+
708
+ await expect(
709
+ createWorktree({
710
+ repoRoot: repoDir,
711
+ baseDir: worktreesDir,
712
+ agentName: "preexisting-agent",
713
+ baseBranch: defaultBranch,
714
+ taskId: "bead-pre",
715
+ }),
716
+ ).rejects.toThrow(WorktreeError);
717
+
718
+ await rm(wtPath, { recursive: true, force: true });
719
+ });
720
+ });
@@ -41,6 +41,14 @@ async function runGit(
41
41
  * Creates a worktree at `{baseDir}/{agentName}` with a new branch
42
42
  * named `overstory/{agentName}/{taskId}` based on `baseBranch`.
43
43
  *
44
+ * Before running `git worktree add`, rejects when the target branch is
45
+ * already checked out in another worktree — this avoids the silent-overwrite
46
+ * class of failure entirely. After `git worktree add` returns, validates
47
+ * that the worktree is actually registered with git AND contains tracked
48
+ * files; if either check fails, rolls back and throws. sling has previously
49
+ * hit edge cases where the dir exists but git did not populate it
50
+ * (overstory-6878), trapping the agent in a non-worktree directory.
51
+ *
44
52
  * @returns The absolute worktree path and branch name.
45
53
  */
46
54
  export async function createWorktree(options: {
@@ -55,14 +63,61 @@ export async function createWorktree(options: {
55
63
  const worktreePath = join(baseDir, agentName);
56
64
  const branchName = `overstory/${agentName}/${taskId}`;
57
65
 
66
+ const existing = await listWorktrees(repoRoot);
67
+ const occupied = existing.find((entry) => entry.branch === branchName);
68
+ if (occupied !== undefined) {
69
+ throw new WorktreeError(`branch ${branchName} is already checked out at ${occupied.path}`, {
70
+ worktreePath,
71
+ branchName,
72
+ });
73
+ }
74
+
58
75
  await runGit(repoRoot, ["worktree", "add", "-b", branchName, worktreePath, baseBranch], {
59
76
  worktreePath,
60
77
  branchName,
61
78
  });
62
79
 
80
+ await validateWorktreeCreation({ repoRoot, worktreePath, branchName });
81
+
63
82
  return { path: worktreePath, branch: branchName };
64
83
  }
65
84
 
85
+ /**
86
+ * Verify that a freshly created worktree is registered with git and contains
87
+ * tracked files. Throws WorktreeError with a precise diagnostic on failure
88
+ * and rolls back the worktree + branch so callers don't leak state.
89
+ *
90
+ * Exported for direct testing of edge cases (empty base branches, racy
91
+ * cleanup) that are awkward to provoke through createWorktree end-to-end.
92
+ */
93
+ export async function validateWorktreeCreation(opts: {
94
+ repoRoot: string;
95
+ worktreePath: string;
96
+ branchName: string;
97
+ }): Promise<void> {
98
+ const { repoRoot, worktreePath, branchName } = opts;
99
+
100
+ const entries = await listWorktrees(repoRoot);
101
+ const registered = entries.some((entry) => entry.path === worktreePath);
102
+ if (!registered) {
103
+ await rollbackWorktree(repoRoot, worktreePath, branchName);
104
+ throw new WorktreeError(
105
+ `Worktree creation reported success but path is not registered with git: ${worktreePath}. Possible causes: pre-existing directory, branch already checked out elsewhere, or git worktree add failed silently.`,
106
+ { worktreePath, branchName },
107
+ );
108
+ }
109
+
110
+ const lsFiles = await runGit(worktreePath, ["ls-files"], { worktreePath, branchName });
111
+ const fileCount = lsFiles.split("\n").filter((line) => line.length > 0).length;
112
+ if (fileCount === 0) {
113
+ await rollbackWorktree(repoRoot, worktreePath, branchName);
114
+ throw new WorktreeError(
115
+ `Worktree was registered but contains zero tracked files: ${worktreePath}. The base branch may be empty or the working tree was not populated.`,
116
+ { worktreePath, branchName },
117
+ );
118
+ }
119
+ }
120
+
66
121
  /**
67
122
  * Roll back a worktree and its associated branch after a failed spawn.
68
123
  *
@@ -830,6 +830,22 @@ describe("killSession", () => {
830
830
  expect(agentErr.agentName).toBe("ghost-agent");
831
831
  }
832
832
  });
833
+
834
+ test("throws AgentError when called with empty session name", async () => {
835
+ // Defense in depth (overstory-74ce): tmux's `-t` argument prefix-matches
836
+ // every session in the server when given an empty string. Without this
837
+ // guard a regression in any caller would wildcard-kill the entire
838
+ // overstory swarm. spawn must NOT be invoked.
839
+ await expect(killSession("")).rejects.toThrow(AgentError);
840
+ expect(spawnSpy).not.toHaveBeenCalled();
841
+
842
+ try {
843
+ await killSession("");
844
+ } catch (err: unknown) {
845
+ const agentErr = err as AgentError;
846
+ expect(agentErr.message).toContain("wildcard");
847
+ }
848
+ });
833
849
  });
834
850
 
835
851
  describe("isSessionAlive", () => {
@@ -869,6 +885,15 @@ describe("isSessionAlive", () => {
869
885
  const cmd = callArgs[0] as string[];
870
886
  expect(cmd).toEqual(["tmux", "-L", "overstory", "has-session", "-t", "my-agent"]);
871
887
  });
888
+
889
+ test("returns false for empty session name without calling tmux", async () => {
890
+ // Defense in depth (overstory-74ce): an empty `-t` argument prefix-matches
891
+ // every overstory session, so `has-session` would falsely report alive
892
+ // whenever any agent is running. Short-circuit to false without invoking tmux.
893
+ const alive = await isSessionAlive("");
894
+ expect(alive).toBe(false);
895
+ expect(spawnSpy).not.toHaveBeenCalled();
896
+ });
872
897
  });
873
898
 
874
899
  describe("checkSessionState", () => {