npm - @os-eco/overstory-cli - Versions diffs - 0.8.2 → 0.8.3 - Mend

@os-eco/overstory-cli 0.8.2 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/agents/builder.md +2 -2
package/agents/lead.md +2 -2
package/agents/merger.md +2 -2
package/agents/orchestrator.md +1 -1
package/agents/reviewer.md +2 -2
package/agents/scout.md +2 -2
package/agents/supervisor.md +3 -3
package/package.json +1 -1
package/src/agents/overlay.test.ts +42 -0
package/src/agents/overlay.ts +1 -0
package/src/commands/sling.test.ts +34 -10
package/src/commands/sling.ts +51 -35
package/src/commands/stop.test.ts +52 -4
package/src/commands/stop.ts +5 -3
package/src/config.test.ts +63 -0
package/src/config.ts +29 -5
package/src/index.ts +2 -2
package/src/runtimes/codex.test.ts +22 -8
package/src/runtimes/codex.ts +21 -16
package/src/types.ts +2 -0

package/agents/builder.md CHANGED Viewed

@@ -20,7 +20,7 @@ These are named failures. If you catch yourself doing any of these, stop and cor
 ## overlay
-Your task-specific context (task ID, file scope, spec path, branch name, parent agent) is in `.claude/CLAUDE.md` in your worktree. That file is generated by `ov sling` and tells you WHAT to work on. This file tells you HOW to work.
+Your task-specific context (task ID, file scope, spec path, branch name, parent agent) is in `{{INSTRUCTION_PATH}}` in your worktree. That file is generated by `ov sling` and tells you WHAT to work on. This file tells you HOW to work.
 ## constraints
@@ -108,7 +108,7 @@ You are an implementation specialist. Given a spec and a set of files you own, y
 ## workflow
-1. **Read your overlay** at `.claude/CLAUDE.md` in your worktree. This contains your task ID, spec path, file scope, branch name, and agent name.
+1. **Read your overlay** at `{{INSTRUCTION_PATH}}` in your worktree. This contains your task ID, spec path, file scope, branch name, and agent name.
 2. **Read the task spec** at the path specified in your overlay. Understand what needs to be built.
 3. **Load expertise** via `ml prime [domain]` for domains listed in your overlay. Apply existing patterns and conventions.
 4. **Implement the changes:**

package/agents/lead.md CHANGED Viewed

@@ -43,7 +43,7 @@ These are named failures. If you catch yourself doing any of these, stop and cor
 ## overlay
-Your task-specific context (task ID, spec path, hierarchy depth, agent name, whether you can spawn) is in `.claude/CLAUDE.md` in your worktree. That file is generated by `ov sling` and tells you WHAT to coordinate. This file tells you HOW to coordinate.
+Your task-specific context (task ID, spec path, hierarchy depth, agent name, whether you can spawn) is in `{{INSTRUCTION_PATH}}` in your worktree. That file is generated by `ov sling` and tells you WHAT to coordinate. This file tells you HOW to coordinate.
 ## constraints
@@ -160,7 +160,7 @@ Action: Full Scout → Build → Verify pipeline. Spawn scouts for exploration,
 Delegate exploration to scouts so you can focus on decomposition and planning.
-1. **Read your overlay** at `.claude/CLAUDE.md` in your worktree. This contains your task ID, hierarchy depth, and agent name.
+1. **Read your overlay** at `{{INSTRUCTION_PATH}}` in your worktree. This contains your task ID, hierarchy depth, and agent name.
 2. **Load expertise** via `ml prime [domain]` for relevant domains.
 3. **Search mulch for relevant context** before decomposing. Run `ml search <task keywords>` and review failure patterns, conventions, and decisions. Factor these insights into your specs.
 4. **Load file-specific expertise** if files are known. Use `ml prime --files <file1,file2,...>` to get file-scoped context. Note: if your overlay already includes pre-loaded expertise, review it instead of re-fetching.

package/agents/merger.md CHANGED Viewed

@@ -19,7 +19,7 @@ These are named failures. If you catch yourself doing any of these, stop and cor
 ## overlay
-Your task-specific context (task ID, branches to merge, target branch, merge order, parent agent) is in `.claude/CLAUDE.md` in your worktree. That file is generated by `overstory sling` and tells you WHAT to merge. This file tells you HOW to merge.
+Your task-specific context (task ID, branches to merge, target branch, merge order, parent agent) is in `{{INSTRUCTION_PATH}}` in your worktree. That file is generated by `overstory sling` and tells you WHAT to merge. This file tells you HOW to merge.
 ## constraints
@@ -97,7 +97,7 @@ You are a branch integration specialist. When workers complete their tasks on se
 ## workflow
-1. **Read your overlay** at `.claude/CLAUDE.md` in your worktree. This contains your task ID, the branches to merge, the target branch, and your agent name.
+1. **Read your overlay** at `{{INSTRUCTION_PATH}}` in your worktree. This contains your task ID, the branches to merge, the target branch, and your agent name.
 2. **Read the task spec** at the path specified in your overlay. Understand which branches need merging and in what order.
 3. **Review the branches** before merging:
    - `git log <target>..<branch>` to see what each branch contains.

package/agents/orchestrator.md CHANGED Viewed

@@ -31,7 +31,7 @@ These are named failures. If you catch yourself doing any of these, stop and cor
 ## overlay
-Your task-specific context (task ID, file scope, spec path, branch name, parent agent) is in `.claude/CLAUDE.md` in your worktree. That file is generated by `ov sling` and tells you WHAT to work on. This file tells you HOW to work.
+Your task-specific context (task ID, file scope, spec path, branch name, parent agent) is in `{{INSTRUCTION_PATH}}` in your worktree. That file is generated by `ov sling` and tells you WHAT to work on. This file tells you HOW to work.
 ## constraints

package/agents/reviewer.md CHANGED Viewed

@@ -16,7 +16,7 @@ These are named failures. If you catch yourself doing any of these, stop and cor
 ## overlay
-Your task-specific context (task ID, code to review, branch name, parent agent) is in `.claude/CLAUDE.md` in your worktree. That file is generated by `overstory sling` and tells you WHAT to review. This file tells you HOW to review.
+Your task-specific context (task ID, code to review, branch name, parent agent) is in `{{INSTRUCTION_PATH}}` in your worktree. That file is generated by `overstory sling` and tells you WHAT to review. This file tells you HOW to review.
 ## constraints
@@ -95,7 +95,7 @@ You are a validation specialist. Given code to review, you check it for correctn
 ## workflow
-1. **Read your overlay** at `.claude/CLAUDE.md` in your worktree. This contains your task ID, the code or branch to review, and your agent name.
+1. **Read your overlay** at `{{INSTRUCTION_PATH}}` in your worktree. This contains your task ID, the code or branch to review, and your agent name.
 2. **Read the task spec** at the path specified in your overlay. Understand what was supposed to be built.
 3. **Load expertise** via `ml prime [domain]` to understand project conventions and standards.
 4. **Review the code changes:**

package/agents/scout.md CHANGED Viewed

@@ -16,7 +16,7 @@ These are named failures. If you catch yourself doing any of these, stop and cor
 ## overlay
-Your task-specific context (what to explore, who spawned you, your agent name) is in `.claude/CLAUDE.md` in your worktree. That file is generated by `overstory sling` and tells you WHAT to work on. This file tells you HOW to work.
+Your task-specific context (what to explore, who spawned you, your agent name) is in `{{INSTRUCTION_PATH}}` in your worktree. That file is generated by `overstory sling` and tells you WHAT to work on. This file tells you HOW to work.
 ## constraints
@@ -97,7 +97,7 @@ You perform reconnaissance. Given a research question, exploration target, or an
 ## workflow
-1. **Read your overlay** at `.claude/CLAUDE.md` in your worktree. This contains your task assignment, spec path, and agent name.
+1. **Read your overlay** at `{{INSTRUCTION_PATH}}` in your worktree. This contains your task assignment, spec path, and agent name.
 2. **Read the task spec** at the path specified in your overlay.
 3. **Load relevant expertise** via `ml prime [domain]` for domains listed in your overlay.
 4. **Explore systematically:**

package/agents/supervisor.md CHANGED Viewed

@@ -31,7 +31,7 @@ These are named failures. If you catch yourself doing any of these, stop and cor
 ## overlay
-Unlike the coordinator (which has no overlay), you receive your task-specific context via the overlay CLAUDE.md at `.claude/CLAUDE.md` in your worktree root. This file is generated by `ov supervisor start` (or `ov sling` with `--capability supervisor`) and provides:
+Unlike the coordinator (which has no overlay), you receive your task-specific context via the overlay CLAUDE.md at `{{INSTRUCTION_PATH}}` in your worktree root. This file is generated by `ov supervisor start` (or `ov sling` with `--capability supervisor`) and provides:
 - **Agent Name** (`$OVERSTORY_AGENT_NAME`) -- your mail address
 - **Task ID** -- the issue you are assigned to
@@ -163,7 +163,7 @@ Before spawning, check `ov status` to ensure non-overlapping file scope across a
 ## workflow
-1. **Receive the dispatch.** Your overlay (`.claude/CLAUDE.md`) contains your task ID and spec path. The coordinator sends you a `dispatch` mail with task details.
+1. **Receive the dispatch.** Your overlay (`{{INSTRUCTION_PATH}}`) contains your task ID and spec path. The coordinator sends you a `dispatch` mail with task details.
 2. **Read your task spec** at the path specified in your overlay. Understand the full scope of work assigned to you.
 3. **Load expertise** via `ml prime [domain]` for each relevant domain. Check `{{TRACKER_CLI}} show <task-id>` for task details and dependencies.
 4. **Analyze scope and decompose.** Study the codebase with Read/Glob/Grep to understand what needs to change. Determine:
@@ -418,7 +418,7 @@ You are long-lived within a project. You survive across batches and can recover
 - **Checkpoints** are saved to `.overstory/agents/$OVERSTORY_AGENT_NAME/checkpoint.json` before compaction or handoff. The checkpoint contains: agent name, assigned task ID, active worker IDs, task group ID, session ID, progress summary, and files modified.
 - **On recovery**, reload context by:
   1. Reading your checkpoint: `.overstory/agents/$OVERSTORY_AGENT_NAME/checkpoint.json`
-  2. Reading your overlay: `.claude/CLAUDE.md` (task ID, spec path, depth, parent)
+  2. Reading your overlay: `{{INSTRUCTION_PATH}}` (task ID, spec path, depth, parent)
   3. Checking active group: `ov group status <group-id>`
   4. Checking worker states: `ov status`
   5. Checking unread mail: `ov mail check --agent $OVERSTORY_AGENT_NAME`

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@os-eco/overstory-cli",
-	"version": "0.8.2",
+	"version": "0.8.3",
 	"description": "Multi-agent orchestration for AI coding agents — spawn workers in git worktrees via tmux, coordinate through SQLite mail, merge with tiered conflict resolution. Pluggable runtime adapters for Claude Code, Pi, and more.",
 	"author": "Jaymin West",
 	"license": "MIT",

package/src/agents/overlay.test.ts CHANGED Viewed

@@ -875,6 +875,48 @@ describe("formatQualityGatesCapabilities", () => {
 	});
 });
+describe("INSTRUCTION_PATH placeholder", () => {
+	test("defaults to .claude/CLAUDE.md when instructionPath is not set", async () => {
+		const config = makeConfig({
+			baseDefinition: "Read your overlay at {{INSTRUCTION_PATH}} in your worktree.",
+		});
+		const output = await generateOverlay(config);
+		expect(output).toContain("Read your overlay at .claude/CLAUDE.md in your worktree.");
+		expect(output).not.toContain("{{INSTRUCTION_PATH}}");
+	});
+	test("uses custom instructionPath when set", async () => {
+		const config = makeConfig({
+			instructionPath: "SAPLING.md",
+			baseDefinition: "Read your overlay at {{INSTRUCTION_PATH}} in your worktree.",
+		});
+		const output = await generateOverlay(config);
+		expect(output).toContain("Read your overlay at SAPLING.md in your worktree.");
+		expect(output).not.toContain("{{INSTRUCTION_PATH}}");
+		expect(output).not.toContain(".claude/CLAUDE.md");
+	});
+	test("INSTRUCTION_PATH in base definition replaced throughout (multiple occurrences)", async () => {
+		const config = makeConfig({
+			instructionPath: "AGENTS.md",
+			baseDefinition: "Step 1: read {{INSTRUCTION_PATH}}.\nContext is in {{INSTRUCTION_PATH}}.",
+		});
+		const output = await generateOverlay(config);
+		expect(output).not.toContain("{{INSTRUCTION_PATH}}");
+		expect(output.split("AGENTS.md").length - 1).toBeGreaterThanOrEqual(2);
+	});
+	test("no unreplaced INSTRUCTION_PATH placeholders in final output", async () => {
+		const config = makeConfig({ instructionPath: "SAPLING.md" });
+		const output = await generateOverlay(config);
+		expect(output).not.toContain("{{INSTRUCTION_PATH}}");
+	});
+});
 describe("quality gate placeholders in base definitions", () => {
 	test("QUALITY_GATE_INLINE in base definition gets replaced", async () => {
 		const config = makeConfig({

package/src/agents/overlay.ts CHANGED Viewed

@@ -320,6 +320,7 @@ export async function generateOverlay(config: OverlayConfig): Promise<string> {
 		"{{QUALITY_GATE_CAPABILITIES}}": formatQualityGatesCapabilities(config.qualityGates),
 		"{{TRACKER_CLI}}": config.trackerCli ?? "sd",
 		"{{TRACKER_NAME}}": config.trackerName ?? "seeds",
+		"{{INSTRUCTION_PATH}}": config.instructionPath ?? ".claude/CLAUDE.md",
 	};
 	let result = template;

package/src/commands/sling.test.ts CHANGED Viewed

@@ -20,6 +20,7 @@ import {
 	checkRunSessionLimit,
 	checkTaskLock,
 	extractMulchRecordIds,
+	generateAgentName,
 	getCurrentBranch,
 	inferDomainsFromFiles,
 	isRunningAsRoot,
@@ -342,6 +343,31 @@ describe("shouldShowScoutWarning", () => {
 	});
 });
+describe("generateAgentName", () => {
+	test("returns capability-taskId when no collision", () => {
+		expect(generateAgentName("builder", "overstory-2f10", [])).toBe("builder-overstory-2f10");
+	});
+	test("returns capability-taskId when takenNames is empty", () => {
+		expect(generateAgentName("scout", "task-123", [])).toBe("scout-task-123");
+	});
+	test("appends -2 when base name is taken", () => {
+		expect(generateAgentName("builder", "overstory-2f10", ["builder-overstory-2f10"])).toBe(
+			"builder-overstory-2f10-2",
+		);
+	});
+	test("skips taken suffixes and returns -3 when -2 is also taken", () => {
+		expect(
+			generateAgentName("builder", "overstory-2f10", [
+				"builder-overstory-2f10",
+				"builder-overstory-2f10-2",
+			]),
+		).toBe("builder-overstory-2f10-3");
+	});
+});
 /**
  * Tests for hierarchy validation in sling.
  *
@@ -352,14 +378,12 @@ describe("shouldShowScoutWarning", () => {
  */
 describe("validateHierarchy", () => {
-	test("rejects builder when parentAgent is null", () => {
-		expect(() => validateHierarchy(null, "builder", "test-builder", 0, false)).toThrow(
-			HierarchyError,
-		);
+	test("allows builder when parentAgent is null", () => {
+		expect(() => validateHierarchy(null, "builder", "test-builder", 0, false)).not.toThrow();
 	});
-	test("rejects scout when parentAgent is null", () => {
-		expect(() => validateHierarchy(null, "scout", "test-scout", 0, false)).toThrow(HierarchyError);
+	test("allows scout when parentAgent is null", () => {
+		expect(() => validateHierarchy(null, "scout", "test-scout", 0, false)).not.toThrow();
 	});
 	test("rejects reviewer when parentAgent is null", () => {
@@ -404,15 +428,15 @@ describe("validateHierarchy", () => {
 	test("error has correct fields and code", () => {
 		try {
-			validateHierarchy(null, "builder", "my-builder", 0, false);
+			validateHierarchy(null, "reviewer", "my-reviewer", 0, false);
 			expect.unreachable("should have thrown");
 		} catch (err) {
 			expect(err).toBeInstanceOf(HierarchyError);
 			const he = err as HierarchyError;
 			expect(he.code).toBe("HIERARCHY_VIOLATION");
-			expect(he.agentName).toBe("my-builder");
-			expect(he.requestedCapability).toBe("builder");
-			expect(he.message).toContain("builder");
+			expect(he.agentName).toBe("my-reviewer");
+			expect(he.requestedCapability).toBe("reviewer");
+			expect(he.message).toContain("reviewer");
 			expect(he.message).toContain("lead");
 		}
 	});

package/src/commands/sling.ts CHANGED Viewed

@@ -32,7 +32,6 @@ import { printSuccess } from "../logging/color.ts";
 import { createMailClient } from "../mail/client.ts";
 import { createMailStore } from "../mail/store.ts";
 import { createMulchClient } from "../mulch/client.ts";
-import { setConnection } from "../runtimes/connections.ts";
 import { getRuntime } from "../runtimes/registry.ts";
 import { openSessionStore } from "../sessions/compat.ts";
 import { createRunStore } from "../sessions/store.ts";
@@ -78,6 +77,29 @@ export function calculateStaggerDelay(
 	return remaining > 0 ? remaining : 0;
 }
+/**
+ * Generate a unique agent name from capability and taskId.
+ * Base: capability-taskId. If that collides with takenNames,
+ * appends -2, -3, etc. up to 100. Falls back to -Date.now() for guaranteed uniqueness.
+ */
+export function generateAgentName(
+	capability: string,
+	taskId: string,
+	takenNames: readonly string[],
+): string {
+	const base = `${capability}-${taskId}`;
+	if (!takenNames.includes(base)) {
+		return base;
+	}
+	for (let i = 2; i <= 100; i++) {
+		const candidate = `${base}-${i}`;
+		if (!takenNames.includes(candidate)) {
+			return candidate;
+		}
+	}
+	return `${base}-${Date.now()}`;
+}
 /**
  * Check if the current process is running as root (UID 0).
  * Returns true if running as root, false otherwise.
@@ -348,9 +370,10 @@ export function validateHierarchy(
 		return;
 	}
-	if (parentAgent === null && capability !== "lead") {
+	const directSpawnCapabilities = ["lead", "scout", "builder"];
+	if (parentAgent === null && !directSpawnCapabilities.includes(capability)) {
 		throw new HierarchyError(
-			`Coordinator cannot spawn "${capability}" directly. Only "lead" is allowed without --parent. Use a lead as intermediary, or pass --force-hierarchy to bypass.`,
+			`Coordinator cannot spawn "${capability}" directly. Only lead, scout, and builder are allowed without --parent. Use a lead as intermediary, or pass --force-hierarchy to bypass.`,
 			{ agentName: name, requestedCapability: capability },
 		);
 	}
@@ -429,7 +452,9 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
 	}
 	const capability = opts.capability ?? "builder";
-	const name = opts.name;
+	const rawName = opts.name?.trim() ?? "";
+	const nameWasAutoGenerated = rawName.length === 0;
+	let name = nameWasAutoGenerated ? `${capability}-${taskId}` : rawName;
 	const specPath = opts.spec ?? null;
 	const filesRaw = opts.files;
 	const parentAgent = opts.parent ?? null;
@@ -439,10 +464,6 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
 	const skipScout = opts.skipScout ?? false;
 	const skipTaskCheck = opts.skipTaskCheck ?? false;
-	if (!name || name.trim().length === 0) {
-		throw new ValidationError("--name is required for sling", { field: "name" });
-	}
 	if (Number.isNaN(depth) || depth < 0) {
 		throw new ValidationError("--depth must be a non-negative integer", {
 			field: "depth",
@@ -597,11 +618,16 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
 			);
 		}
-		const existing = store.getByName(name);
-		if (existing && existing.state !== "zombie" && existing.state !== "completed") {
-			throw new AgentError(`Agent name "${name}" is already in use (state: ${existing.state})`, {
-				agentName: name,
-			});
+		if (nameWasAutoGenerated) {
+			const takenNames = activeSessions.map((s) => s.agentName);
+			name = generateAgentName(capability, taskId, takenNames);
+		} else {
+			const existing = store.getByName(name);
+			if (existing && existing.state !== "zombie" && existing.state !== "completed") {
+				throw new AgentError(`Agent name "${name}" is already in use (state: ${existing.state})`, {
+					agentName: name,
+				});
+			}
 		}
 		// 5d. Task-level locking: prevent concurrent agents on the same task ID.
@@ -717,6 +743,9 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
 			}
 		}
+		// Resolve runtime before overlayConfig so we can pass runtime.instructionPath
+		const runtime = getRuntime(opts.runtime, config);
 		const overlayConfig: OverlayConfig = {
 			agentName: name,
 			taskId: taskId,
@@ -742,11 +771,9 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
 			qualityGates: config.project.qualityGates,
 			trackerCli: trackerCliName(resolvedBackend),
 			trackerName: resolvedBackend,
+			instructionPath: runtime.instructionPath,
 		};
-		// Resolve runtime before writeOverlay so we can pass runtime.instructionPath
-		const runtime = getRuntime(opts.runtime, config);
 		try {
 			await writeOverlay(worktreePath, overlayConfig, config.project.root, runtime.instructionPath);
 		} catch (err) {
@@ -854,14 +881,14 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
 			});
 			// Create a timestamped log dir for this headless agent session.
-			// Redirecting stdout/stderr to files prevents OS pipe buffer backpressure:
-			// when nobody reads the pipe, the child blocks on write() after ~64 KB and
-			// becomes a zombie. File writes have no such limit.
+			// Always redirect stdout to a file. This prevents SIGPIPE death:
+			// ov sling exits after spawning, closing the pipe's read end.
+			// If stdout is a pipe, the agent dies on the next write (SIGPIPE).
+			// File writes have no such limit, and the agent survives the CLI exit.
 			//
-			// Exception: RPC-capable runtimes need a live stdout pipe to receive
-			// JSON-RPC 2.0 responses (getState). In that case stdoutFile is omitted
-			// and the caller consumes the stream via the RuntimeConnection.
-			const hasRpcConnect = typeof runtime.connect === "function";
+			// Note: RPC connection wiring is intentionally omitted here. The RPC pipe
+			// is only useful when the spawner stays alive to consume it. ov sling is
+			// a short-lived CLI — any connection created here dies with the process.
 			const logTimestamp = new Date().toISOString().replace(/[:.]/g, "-");
 			const agentLogDir = join(overstoryDir, "logs", name, logTimestamp);
 			mkdirSync(agentLogDir, { recursive: true });
@@ -869,21 +896,10 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
 			const headlessProc = await spawnHeadlessAgent(argv, {
 				cwd: worktreePath,
 				env: { ...(process.env as Record<string, string>), ...directEnv },
-				stdoutFile: hasRpcConnect ? undefined : join(agentLogDir, "stdout.log"),
+				stdoutFile: join(agentLogDir, "stdout.log"),
 				stderrFile: join(agentLogDir, "stderr.log"),
 			});
-			// Wire up RPC connection for runtimes that support it (e.g., Sapling).
-			// The connection is stored in the module-level registry so the watchdog
-			// and other subsystems can call getState() for health checks.
-			if (hasRpcConnect && headlessProc.stdout && runtime.connect) {
-				const connection = runtime.connect({
-					stdin: headlessProc.stdin,
-					stdout: headlessProc.stdout,
-				});
-				setConnection(name, connection);
-			}
 			// 13. Record session with empty tmuxSession (no tmux pane for headless agents).
 			const session: AgentSession = {
 				id: `session-${Date.now()}-${name}`,

package/src/commands/stop.test.ts CHANGED Viewed

@@ -260,13 +260,61 @@ describe("stopCommand validation", () => {
 		await expect(stopCommand("my-builder", {}, deps)).rejects.toThrow(/already completed/);
 	});
-	test("throws AgentError when agent is already zombie", async () => {
+	test("succeeds when agent is zombie (cleanup, no error)", async () => {
 		const session = makeAgentSession({ state: "zombie" });
 		saveSessionsToDb([session]);
-		const { deps } = makeDeps();
-		await expect(stopCommand("my-builder", {}, deps)).rejects.toThrow(AgentError);
-		await expect(stopCommand("my-builder", {}, deps)).rejects.toThrow(/zombie/);
+		const { deps } = makeDeps({ [session.tmuxSession]: false });
+		const output = await captureStdout(() => stopCommand("my-builder", {}, deps));
+		expect(output).toContain("Agent stopped");
+		expect(output).toContain("Zombie agent cleaned up");
+		const { store } = openSessionStore(overstoryDir);
+		const updated = store.getByName("my-builder");
+		store.close();
+		expect(updated?.state).toBe("completed");
+	});
+});
+describe("stopCommand zombie cleanup", () => {
+	test("zombie + --clean-worktree removes worktree", async () => {
+		const session = makeAgentSession({ state: "zombie" });
+		saveSessionsToDb([session]);
+		const { deps, worktreeCalls } = makeDeps({ [session.tmuxSession]: false });
+		const output = await captureStdout(() =>
+			stopCommand("my-builder", { cleanWorktree: true }, deps),
+		);
+		expect(output).toContain("Agent stopped");
+		expect(output).toContain("Zombie agent cleaned up");
+		expect(output).toContain(`Worktree removed: ${session.worktreePath}`);
+		expect(worktreeCalls.remove).toHaveLength(1);
+		const { store } = openSessionStore(overstoryDir);
+		const updated = store.getByName("my-builder");
+		store.close();
+		expect(updated?.state).toBe("completed");
+	});
+	test("zombie + --json includes wasZombie: true", async () => {
+		const session = makeAgentSession({ state: "zombie" });
+		saveSessionsToDb([session]);
+		const { deps } = makeDeps({ [session.tmuxSession]: false });
+		const output = await captureStdout(() => stopCommand("my-builder", { json: true }, deps));
+		const parsed = JSON.parse(output.trim()) as Record<string, unknown>;
+		expect(parsed.success).toBe(true);
+		expect(parsed.stopped).toBe(true);
+		expect(parsed.wasZombie).toBe(true);
+		expect(parsed.agentName).toBe("my-builder");
+		const { store } = openSessionStore(overstoryDir);
+		const updated = store.getByName("my-builder");
+		store.close();
+		expect(updated?.state).toBe("completed");
 	});
 });

package/src/commands/stop.ts CHANGED Viewed

@@ -86,9 +86,7 @@ export async function stopCommand(
 			throw new AgentError(`Agent "${agentName}" is already completed`, { agentName });
 		}
-		if (session.state === "zombie") {
-			throw new AgentError(`Agent "${agentName}" is already zombie (dead)`, { agentName });
-		}
+		const isZombie = session.state === "zombie";
 		const isHeadless = session.tmuxSession === "" && session.pid !== null;
@@ -140,6 +138,7 @@ export async function stopCommand(
 				pidKilled,
 				worktreeRemoved,
 				force,
+				wasZombie: isZombie,
 			});
 		} else {
 			printSuccess("Agent stopped", agentName);
@@ -156,6 +155,9 @@ export async function stopCommand(
 					process.stdout.write(`  Tmux session was already dead\n`);
 				}
 			}
+			if (isZombie) {
+				process.stdout.write(`  Zombie agent cleaned up (state → completed)\n`);
+			}
 			if (cleanWorktree && worktreeRemoved) {
 				process.stdout.write(`  Worktree removed: ${session.worktreePath}\n`);
 			}

package/src/config.test.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import { tmpdir } from "node:os";
 import { join } from "node:path";
 import {
 	clearProjectRootOverride,
+	clearWarningsSeen,
 	DEFAULT_CONFIG,
 	DEFAULT_QUALITY_GATES,
 	loadConfig,
@@ -432,9 +433,11 @@ describe("validateConfig", () => {
 		tempDir = await mkdtemp(join(tmpdir(), "overstory-test-"));
 		const { mkdir } = await import("node:fs/promises");
 		await mkdir(join(tempDir, ".overstory"), { recursive: true });
+		clearWarningsSeen();
 	});
 	afterEach(async () => {
+		clearWarningsSeen();
 		await cleanupTempDir(tempDir);
 	});
@@ -691,6 +694,39 @@ models:
 		expect((err as ValidationError).message).toContain("provider-prefixed ref");
 	});
+	test("accepts bare model name when runtime.default is codex", async () => {
+		await writeConfig(`
+runtime:
+  default: codex
+models:
+  coordinator: gpt-5.3-codex
+`);
+		const config = await loadConfig(tempDir);
+		expect(config.models.coordinator).toBe("gpt-5.3-codex");
+	});
+	test("warns on bare non-Anthropic model in tool-heavy role when runtime.default is codex", async () => {
+		await writeConfig(`
+runtime:
+  default: codex
+models:
+  builder: gpt-5.3-codex
+`);
+		const origWrite = process.stderr.write;
+		let capturedStderr = "";
+		process.stderr.write = ((s: string | Uint8Array) => {
+			if (typeof s === "string") capturedStderr += s;
+			return true;
+		}) as typeof process.stderr.write;
+		try {
+			await loadConfig(tempDir);
+		} finally {
+			process.stderr.write = origWrite;
+		}
+		expect(capturedStderr).toContain("WARNING: models.builder uses non-Anthropic model");
+		expect(capturedStderr).toContain("gpt-5.3-codex");
+	});
 	test("warns on non-Anthropic model in tool-heavy role", async () => {
 		await writeConfig(`
 providers:
@@ -716,6 +752,33 @@ models:
 		expect(capturedStderr).toContain("openrouter/openai/gpt-4");
 	});
+	test("warns only once per role/model combination across multiple loadConfig calls", async () => {
+		await writeConfig(`
+providers:
+  openrouter:
+    type: gateway
+    baseUrl: https://openrouter.ai/api/v1
+    authTokenEnv: OPENROUTER_API_KEY
+models:
+  builder: openrouter/openai/gpt-4
+`);
+		const origWrite = process.stderr.write;
+		const stderrLines: string[] = [];
+		process.stderr.write = ((s: string | Uint8Array) => {
+			if (typeof s === "string") stderrLines.push(s);
+			return true;
+		}) as typeof process.stderr.write;
+		try {
+			await loadConfig(tempDir);
+			await loadConfig(tempDir);
+			await loadConfig(tempDir);
+		} finally {
+			process.stderr.write = origWrite;
+		}
+		const warnings = stderrLines.filter((l) => l.includes("WARNING: models.builder"));
+		expect(warnings.length).toBe(1);
+	});
 	test("does not warn for non-Anthropic model in non-tool-heavy role", async () => {
 		await writeConfig(`
 providers:

package/src/config.ts CHANGED Viewed

@@ -5,6 +5,14 @@ import type { OverstoryConfig, QualityGate, TaskTrackerBackend } from "./types.t
 // Module-level project root override (set by --project global flag)
 let _projectRootOverride: string | undefined;
+// Tracks warnings already emitted this process to avoid repeating on every loadConfig call.
+const _warnedOnce = new Set<string>();
+/** Clear the dedup warning set. Intended for tests only. */
+export function clearWarningsSeen(): void {
+	_warnedOnce.clear();
+}
 /** Override project root for all config resolution (used by --project global flag). */
 export function setProjectRootOverride(path: string): void {
 	_projectRootOverride = path;
@@ -698,9 +706,13 @@ function validateConfig(config: OverstoryConfig): void {
 		}
 	}
-	// models: validate each value — accepts aliases and provider-prefixed refs
+	// models: validate each value.
+	// - Standard runtimes: aliases (sonnet/opus/haiku) or provider-prefixed refs.
+	// - Codex runtime: also allow bare model refs (e.g. gpt-5.3-codex).
 	const validAliases = ["sonnet", "opus", "haiku"];
 	const toolHeavyRoles = ["builder", "scout"];
+	const defaultRuntime = config.runtime?.default ?? "claude";
+	const allowBareModelRefs = defaultRuntime === "codex";
 	for (const [role, model] of Object.entries(config.models)) {
 		if (model === undefined) continue;
 		if (model.includes("/")) {
@@ -716,13 +728,25 @@ function validateConfig(config: OverstoryConfig): void {
 				);
 			}
 			if (toolHeavyRoles.includes(role)) {
-				process.stderr.write(
-					`[overstory] WARNING: models.${role} uses non-Anthropic model '${model}'. Tool-use compatibility cannot be verified at config time.\n`,
-				);
+				const warnKey = `non-anthropic:${role}:${model}`;
+				if (!_warnedOnce.has(warnKey)) {
+					_warnedOnce.add(warnKey);
+					process.stderr.write(
+						`[overstory] WARNING: models.${role} uses non-Anthropic model '${model}'. Tool-use compatibility cannot be verified at config time.\n`,
+					);
+				}
 			}
 		} else {
-			// Must be a valid alias
+			// Must be a valid alias unless codex runtime is active.
 			if (!validAliases.includes(model)) {
+				if (allowBareModelRefs) {
+					if (toolHeavyRoles.includes(role)) {
+						process.stderr.write(
+							`[overstory] WARNING: models.${role} uses non-Anthropic model '${model}'. Tool-use compatibility cannot be verified at config time.\n`,
+						);
+					}
+					continue;
+				}
 				throw new ValidationError(
 					`models.${role} must be a valid alias (${validAliases.join(", ")}) or a provider-prefixed ref (e.g., openrouter/openai/gpt-4)`,
 					{

package/src/index.ts CHANGED Viewed

@@ -49,7 +49,7 @@ import { ConfigError, OverstoryError, WorktreeError } from "./errors.ts";
 import { jsonError } from "./json.ts";
 import { brand, chalk, muted, setQuiet } from "./logging/color.ts";
-export const VERSION = "0.8.2";
+export const VERSION = "0.8.3";
 const rawArgs = process.argv.slice(2);
@@ -271,7 +271,7 @@ program
 		"Agent type: builder | scout | reviewer | lead | merger",
 		"builder",
 	)
-	.option("--name <name>", "Unique agent name")
+	.option("--name <name>", "Unique agent name (auto-generated if omitted)")
 	.option("--spec <path>", "Path to task spec file")
 	.option("--files <list>", "Exclusive file scope (comma-separated)")
 	.option("--parent <agent>", "Parent agent for hierarchy tracking")

package/src/runtimes/codex.test.ts CHANGED Viewed

@@ -20,7 +20,7 @@ describe("CodexRuntime", () => {
 	});
 	describe("buildSpawnCommand", () => {
-		test("basic command uses codex exec with --full-auto and --json", () => {
+		test("basic command uses interactive codex with --full-auto", () => {
 			const opts: SpawnOpts = {
 				model: "gpt-5-codex",
 				permissionMode: "bypass",
@@ -28,11 +28,25 @@ describe("CodexRuntime", () => {
 				env: {},
 			};
 			const cmd = runtime.buildSpawnCommand(opts);
-			expect(cmd).toContain("codex exec --full-auto --json");
+			expect(cmd).toContain("codex --full-auto");
 			expect(cmd).toContain("--model gpt-5-codex");
 			expect(cmd).toContain("Read AGENTS.md");
 		});
+		test("manifest aliases omit --model so codex uses default configured model", () => {
+			for (const alias of ["sonnet", "opus", "haiku"]) {
+				const opts: SpawnOpts = {
+					model: alias,
+					permissionMode: "bypass",
+					cwd: "/tmp/worktree",
+					env: {},
+				};
+				const cmd = runtime.buildSpawnCommand(opts);
+				expect(cmd).toContain("codex --full-auto");
+				expect(cmd).not.toContain(" --model ");
+			}
+		});
 		test("permissionMode is NOT included in command (Codex uses OS sandbox)", () => {
 			const opts: SpawnOpts = {
 				model: "gpt-5-codex",
@@ -146,7 +160,7 @@ describe("CodexRuntime", () => {
 			};
 			const cmd = runtime.buildSpawnCommand(opts);
 			expect(cmd).toBe(
-				"codex exec --full-auto --json --model gpt-5-codex 'Read AGENTS.md for your task assignment and begin immediately.'",
+				"codex --full-auto --model gpt-5-codex 'Read AGENTS.md for your task assignment and begin immediately.'",
 			);
 		});
@@ -254,7 +268,7 @@ describe("CodexRuntime", () => {
 	});
 	describe("detectReady", () => {
-		test("returns ready for empty pane (headless — always ready)", () => {
+		test("returns ready for empty pane", () => {
 			const state = runtime.detectReady("");
 			expect(state).toEqual({ phase: "ready" });
 		});
@@ -279,7 +293,7 @@ describe("CodexRuntime", () => {
 	});
 	describe("requiresBeaconVerification", () => {
-		test("returns false (headless — no beacon needed)", () => {
+		test("returns false (no beacon verification needed)", () => {
 			expect(runtime.requiresBeaconVerification()).toBe(false);
 		});
 	});
@@ -664,7 +678,7 @@ describe("CodexRuntime integration: spawn command structure", () => {
 			env: { OVERSTORY_AGENT_NAME: "builder-1" },
 		});
 		expect(cmd).toBe(
-			"codex exec --full-auto --json --model gpt-5-codex 'Read AGENTS.md for your task assignment and begin immediately.'",
+			"codex --full-auto --model gpt-5-codex 'Read AGENTS.md for your task assignment and begin immediately.'",
 		);
 	});
@@ -677,7 +691,7 @@ describe("CodexRuntime integration: spawn command structure", () => {
 			appendSystemPrompt: baseDefinition,
 			env: { OVERSTORY_AGENT_NAME: "coordinator" },
 		});
-		expect(cmd).toContain("codex exec --full-auto --json --model gpt-5-codex");
+		expect(cmd).toContain("codex --full-auto --model gpt-5-codex");
 		expect(cmd).toContain("# Coordinator");
 		expect(cmd).toContain("You are the coordinator agent.");
 		expect(cmd).toContain("Read AGENTS.md");
@@ -691,7 +705,7 @@ describe("CodexRuntime integration: spawn command structure", () => {
 			appendSystemPromptFile: "/project/.overstory/agent-defs/coordinator.md",
 			env: { OVERSTORY_AGENT_NAME: "coordinator" },
 		});
-		expect(cmd).toContain("codex exec --full-auto --json --model gpt-5-codex");
+		expect(cmd).toContain("codex --full-auto --model gpt-5-codex");
 		expect(cmd).toContain("$(cat '/project/.overstory/agent-defs/coordinator.md')");
 		expect(cmd).toContain("Read AGENTS.md");
 	});

package/src/runtimes/codex.ts CHANGED Viewed

@@ -2,10 +2,10 @@
 // Implements the AgentRuntime contract for the OpenAI `codex` CLI.
 //
 // Key differences from Claude/Pi adapters:
-// - Headless: `codex exec` exits on completion (no persistent TUI)
+// - Interactive: `codex` (without `exec`) stays alive in tmux for orchestration
 // - Instruction file: AGENTS.md (not .claude/CLAUDE.md)
 // - No hooks: Codex uses OS-level sandbox (Seatbelt/Landlock)
-// - Events: NDJSON stream to stdout (parsed for token usage)
+// - One-shot calls still use `codex exec` (buildPrintCommand)
 import { mkdir } from "node:fs/promises";
 import { dirname, join } from "node:path";
@@ -22,9 +22,9 @@ import type {
 /**
  * Codex runtime adapter.
  *
- * Implements AgentRuntime for the OpenAI `codex` CLI. Codex agents run in
- * headless mode (`codex exec`) — they process a task and exit, rather than
- * maintaining a persistent TUI like Claude Code or Pi.
+ * Implements AgentRuntime for the OpenAI `codex` CLI. Tmux-spawned Codex
+ * agents run in interactive mode (`codex`) so sessions stay alive and can be
+ * nudged via tmux.
  *
  * Security is enforced via Codex's OS-level sandbox (Seatbelt on macOS,
  * Landlock on Linux) rather than hook-based guards. The `--full-auto` flag
@@ -40,11 +40,17 @@ export class CodexRuntime implements AgentRuntime {
 	/** Relative path to the instruction file within a worktree. */
 	readonly instructionPath = "AGENTS.md";
+	/**
+	 * Anthropic aliases used by overstory manifests that Codex CLI does not
+	 * accept as --model values.
+	 */
+	private static readonly MANIFEST_ALIASES = new Set(["sonnet", "opus", "haiku"]);
 	/**
 	 * Build the shell command string to spawn a Codex agent in a tmux pane.
 	 *
-	 * Uses `codex exec` (headless mode) with `--full-auto` for workspace-write
-	 * sandbox + automatic approvals, and `--json` for NDJSON event output.
+	 * Uses interactive `codex` with `--full-auto` for workspace-write sandbox +
+	 * automatic approvals.
 	 *
 	 * The prompt directs the agent to read AGENTS.md for its full instructions.
 	 * If `appendSystemPrompt` or `appendSystemPromptFile` is provided, the
@@ -56,7 +62,12 @@ export class CodexRuntime implements AgentRuntime {
 	 * @returns Shell command string suitable for tmux new-session -c
 	 */
 	buildSpawnCommand(opts: SpawnOpts): string {
-		let cmd = `codex exec --full-auto --json --model ${opts.model}`;
+		// When model comes from default manifest aliases (sonnet/opus/haiku),
+		// omit --model so Codex uses the user's configured default model.
+		let cmd = "codex --full-auto";
+		if (!CodexRuntime.MANIFEST_ALIASES.has(opts.model)) {
+			cmd += ` --model ${opts.model}`;
+		}
 		if (opts.appendSystemPromptFile) {
 			// Read role definition from file at shell expansion time — avoids tmux
@@ -128,11 +139,7 @@ export class CodexRuntime implements AgentRuntime {
 	}
 	/**
-	 * Codex exec is headless — always ready.
-	 *
-	 * Unlike Claude Code and Pi which maintain persistent TUI sessions,
-	 * `codex exec` starts processing immediately and exits on completion.
-	 * No TUI readiness detection is needed.
+	 * Codex interactive startup is treated as ready once a pane exists.
 	 *
 	 * @param _paneContent - Captured tmux pane content (unused)
 	 * @returns Always `{ phase: "ready" }`
@@ -144,9 +151,7 @@ export class CodexRuntime implements AgentRuntime {
 	/**
 	 * Codex does not require beacon verification/resend.
 	 *
-	 * The beacon verification loop exists because Claude Code's TUI sometimes
-	 * swallows the initial Enter during late initialization. Codex exec is
-	 * headless — it processes the prompt immediately with no TUI startup delay.
+	 * Codex accepts startup input reliably once spawned.
 	 */
 	requiresBeaconVerification(): boolean {
 		return false;

package/src/types.ts CHANGED Viewed

@@ -343,6 +343,8 @@ export interface OverlayConfig {
 	trackerName?: string; // "seeds" or "beads"
 	/** Quality gate commands for the agent overlay. Falls back to defaults if undefined. */
 	qualityGates?: QualityGate[];
+	/** Relative path to the instruction file within the worktree (runtime-specific). Defaults to .claude/CLAUDE.md. */
+	instructionPath?: string;
 }
 // === Merge Queue ===