npm - @os-eco/overstory-cli - Versions diffs - 0.9.1 → 0.9.3 - Mend

@os-eco/overstory-cli 0.9.1 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (101) hide show

package/README.md +21 -6
package/agents/coordinator.md +34 -10
package/agents/lead.md +11 -1
package/package.json +1 -1
package/src/agents/copilot-hooks-deployer.test.ts +162 -0
package/src/agents/copilot-hooks-deployer.ts +93 -0
package/src/agents/hooks-deployer.test.ts +9 -1
package/src/agents/hooks-deployer.ts +2 -1
package/src/agents/overlay.test.ts +26 -0
package/src/agents/overlay.ts +18 -4
package/src/beads/client.ts +31 -3
package/src/commands/agents.ts +1 -1
package/src/commands/clean.test.ts +3 -0
package/src/commands/clean.ts +1 -58
package/src/commands/completions.test.ts +18 -6
package/src/commands/completions.ts +40 -1
package/src/commands/coordinator.test.ts +77 -4
package/src/commands/coordinator.ts +228 -125
package/src/commands/dashboard.ts +50 -10
package/src/commands/doctor.ts +3 -1
package/src/commands/ecosystem.test.ts +126 -1
package/src/commands/ecosystem.ts +7 -53
package/src/commands/feed.test.ts +117 -2
package/src/commands/feed.ts +46 -30
package/src/commands/group.test.ts +274 -155
package/src/commands/group.ts +11 -5
package/src/commands/init.ts +50 -0
package/src/commands/inspect.ts +8 -4
package/src/commands/log.test.ts +35 -0
package/src/commands/log.ts +10 -6
package/src/commands/logs.test.ts +423 -1
package/src/commands/logs.ts +99 -104
package/src/commands/monitor.ts +8 -2
package/src/commands/orchestrator.ts +42 -0
package/src/commands/prime.test.ts +177 -2
package/src/commands/prime.ts +4 -2
package/src/commands/sling.ts +8 -3
package/src/commands/upgrade.test.ts +2 -0
package/src/commands/upgrade.ts +1 -17
package/src/commands/watch.test.ts +67 -1
package/src/commands/watch.ts +4 -79
package/src/config.test.ts +250 -0
package/src/config.ts +43 -0
package/src/doctor/agents.test.ts +72 -5
package/src/doctor/agents.ts +10 -10
package/src/doctor/consistency.test.ts +35 -0
package/src/doctor/consistency.ts +7 -3
package/src/doctor/dependencies.test.ts +58 -1
package/src/doctor/dependencies.ts +4 -2
package/src/doctor/providers.test.ts +41 -5
package/src/doctor/types.ts +2 -1
package/src/doctor/version.test.ts +106 -2
package/src/doctor/version.ts +4 -2
package/src/doctor/watchdog.test.ts +167 -0
package/src/doctor/watchdog.ts +158 -0
package/src/e2e/init-sling-lifecycle.test.ts +2 -1
package/src/errors.test.ts +350 -0
package/src/events/tailer.test.ts +25 -0
package/src/events/tailer.ts +8 -1
package/src/index.ts +4 -1
package/src/mail/store.test.ts +110 -0
package/src/runtimes/aider.test.ts +124 -0
package/src/runtimes/aider.ts +147 -0
package/src/runtimes/amp.test.ts +164 -0
package/src/runtimes/amp.ts +154 -0
package/src/runtimes/claude.test.ts +4 -2
package/src/runtimes/codex.test.ts +38 -1
package/src/runtimes/codex.ts +22 -3
package/src/runtimes/copilot.test.ts +213 -13
package/src/runtimes/copilot.ts +93 -11
package/src/runtimes/goose.test.ts +133 -0
package/src/runtimes/goose.ts +157 -0
package/src/runtimes/pi-guards.ts +2 -1
package/src/runtimes/pi.test.ts +33 -9
package/src/runtimes/pi.ts +10 -10
package/src/runtimes/registry.test.ts +1 -1
package/src/runtimes/registry.ts +13 -4
package/src/runtimes/sapling.ts +2 -1
package/src/runtimes/types.ts +9 -2
package/src/tracker/factory.test.ts +10 -0
package/src/tracker/factory.ts +3 -2
package/src/types.ts +4 -0
package/src/utils/bin.test.ts +10 -0
package/src/utils/bin.ts +37 -0
package/src/utils/fs.test.ts +119 -0
package/src/utils/fs.ts +62 -0
package/src/utils/pid.test.ts +68 -0
package/src/utils/pid.ts +45 -0
package/src/utils/time.test.ts +43 -0
package/src/utils/time.ts +37 -0
package/src/utils/version.test.ts +33 -0
package/src/utils/version.ts +70 -0
package/src/watchdog/daemon.test.ts +255 -1
package/src/watchdog/daemon.ts +46 -9
package/src/watchdog/health.test.ts +15 -1
package/src/watchdog/health.ts +1 -1
package/src/watchdog/triage.test.ts +49 -9
package/src/watchdog/triage.ts +21 -5
package/src/worktree/tmux.test.ts +166 -49
package/src/worktree/tmux.ts +36 -37
package/templates/copilot-hooks.json.tmpl +13 -0

package/src/runtimes/aider.ts ADDED Viewed

@@ -0,0 +1,147 @@
+// Aider runtime adapter for overstory's AgentRuntime interface.
+// Implements the AgentRuntime contract for the `aider` CLI (Paul Gauthier's AI pair programming tool).
+//
+// Key differences from Claude/Pi adapters:
+// - Interactive: `aider` stays alive in tmux as a REPL-like session
+// - Instruction file: .aider.conf.yml or CONVENTIONS.md (we use CONVENTIONS.md for overlay)
+// - No hooks: Aider has no PreToolUse/PostToolUse hook system
+// - One-shot calls use `aider --message <prompt> --yes-always`
+// - Model is passed via `--model <model>` (supports litellm model strings)
+import { mkdir } from "node:fs/promises";
+import { join } from "node:path";
+import type { ResolvedModel } from "../types.ts";
+import type {
+	AgentRuntime,
+	HooksDef,
+	OverlayContent,
+	ReadyState,
+	SpawnOpts,
+	TranscriptSummary,
+} from "./types.ts";
+/**
+ * Aider runtime adapter.
+ *
+ * Implements AgentRuntime for Paul Gauthier's `aider` CLI. Tmux-spawned
+ * Aider agents run in interactive mode with `--yes-always` for automatic
+ * confirmation of file edits.
+ *
+ * Security relies on Aider's built-in file-scope limiting — it only edits
+ * files explicitly added to its context. No OS-level sandbox or hook guards.
+ */
+export class AiderRuntime implements AgentRuntime {
+	readonly id = "aider";
+	/** Experimental — community-contributed adapter, not yet battle-tested in production. */
+	readonly stability = "experimental" as const;
+	/**
+	 * Aider reads CONVENTIONS.md from the repo root for project-level instructions.
+	 * We write the overlay here so Aider picks it up natively.
+	 */
+	readonly instructionPath = "CONVENTIONS.md";
+	/**
+	 * Build the shell command string to spawn an Aider agent in a tmux pane.
+	 *
+	 * Uses `--yes-always` for automatic approval of file edits and
+	 * `--no-auto-commits` so overstory controls git operations.
+	 *
+	 * @param opts - Spawn options
+	 * @returns Shell command string suitable for tmux new-session
+	 */
+	buildSpawnCommand(opts: SpawnOpts): string {
+		let cmd = "aider --yes-always --no-auto-commits";
+		// Aider accepts litellm model strings: provider/model-name
+		cmd += ` --model ${opts.model}`;
+		if (opts.appendSystemPromptFile) {
+			const escaped = opts.appendSystemPromptFile.replace(/'/g, "'\\''");
+			cmd += ` --read '${escaped}'`;
+		} else if (opts.appendSystemPrompt) {
+			const escaped = opts.appendSystemPrompt.replace(/'/g, "'\\''");
+			cmd += ` --message '${escaped} Read CONVENTIONS.md for your task assignment and begin.'`;
+		} else {
+			cmd += ` --message 'Read CONVENTIONS.md for your task assignment and begin immediately.'`;
+		}
+		return cmd;
+	}
+	/**
+	 * Build argv for a headless one-shot Aider invocation.
+	 *
+	 * Uses `--message` for the prompt with `--yes-always` for non-interactive mode.
+	 *
+	 * @param prompt - The prompt to pass
+	 * @param model - Optional model override
+	 * @returns Argv array for Bun.spawn
+	 */
+	buildPrintCommand(prompt: string, model?: string): string[] {
+		const cmd = ["aider", "--message", prompt, "--yes-always", "--no-auto-commits"];
+		if (model !== undefined) {
+			cmd.push("--model", model);
+		}
+		return cmd;
+	}
+	/**
+	 * Deploy per-agent instructions to a worktree.
+	 *
+	 * Writes the overlay to CONVENTIONS.md (Aider's native conventions file).
+	 * No hooks or guard extensions — Aider has no hook system.
+	 *
+	 * @param worktreePath - Absolute path to the agent's git worktree
+	 * @param overlay - Overlay content, or undefined for no-op
+	 * @param _hooks - Unused — Aider has no hook system
+	 */
+	async deployConfig(
+		worktreePath: string,
+		overlay: OverlayContent | undefined,
+		_hooks: HooksDef,
+	): Promise<void> {
+		if (!overlay) return;
+		await mkdir(worktreePath, { recursive: true });
+		await Bun.write(join(worktreePath, this.instructionPath), overlay.content);
+	}
+	/**
+	 * Detect Aider TUI readiness from tmux pane content.
+	 *
+	 * Aider shows a prompt like "aider>" or "> " when ready for input.
+	 *
+	 * @param paneContent - Captured tmux pane content
+	 * @returns Readiness phase
+	 */
+	detectReady(paneContent: string): ReadyState {
+		// Aider shows its prompt when ready: "aider> " or "> "
+		if (/(?:aider)?>\s*$/.test(paneContent)) {
+			return { phase: "ready" };
+		}
+		return { phase: "loading" };
+	}
+	/** Aider does not require beacon verification — accepts input reliably. */
+	requiresBeaconVerification(): boolean {
+		return false;
+	}
+	/**
+	 * Aider does not produce machine-readable transcripts.
+	 * Returns null — cost tracking relies on provider billing.
+	 */
+	async parseTranscript(_path: string): Promise<TranscriptSummary | null> {
+		return null;
+	}
+	buildEnv(model: ResolvedModel): Record<string, string> {
+		return model.env ?? {};
+	}
+	/** Aider logs to .aider.chat.history.md but not in a parseable transcript format. */
+	getTranscriptDir(_projectRoot: string): string | null {
+		return null;
+	}
+}

package/src/runtimes/amp.test.ts ADDED Viewed

@@ -0,0 +1,164 @@
+import { afterEach, beforeEach, describe, expect, it } from "bun:test";
+import { mkdtemp, readFile, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { AmpRuntime } from "./amp.ts";
+describe("AmpRuntime", () => {
+	const runtime = new AmpRuntime();
+	let testDir: string;
+	beforeEach(async () => {
+		testDir = await mkdtemp(join(tmpdir(), "overstory-amp-test-"));
+	});
+	afterEach(async () => {
+		await rm(testDir, { recursive: true });
+	});
+	it("has correct id and instruction path", () => {
+		expect(runtime.id).toBe("amp");
+		expect(runtime.instructionPath).toBe(".amp/AGENT.md");
+	});
+	it("buildSpawnCommand includes --model and --yes", () => {
+		const cmd = runtime.buildSpawnCommand({
+			model: "anthropic/claude-sonnet-4-6",
+			permissionMode: "bypass",
+			cwd: "/tmp/test",
+			env: {},
+		});
+		expect(cmd).toContain("amp --model anthropic/claude-sonnet-4-6 --yes");
+	});
+	it("buildSpawnCommand includes append system prompt as --prompt", () => {
+		const cmd = runtime.buildSpawnCommand({
+			model: "sonnet",
+			permissionMode: "bypass",
+			appendSystemPrompt: "You are a reviewer.",
+			cwd: "/tmp/test",
+			env: {},
+		});
+		expect(cmd).toContain("--prompt");
+		expect(cmd).toContain("You are a reviewer.");
+	});
+	it("buildSpawnCommand uses cat for appendSystemPromptFile", () => {
+		const cmd = runtime.buildSpawnCommand({
+			model: "sonnet",
+			permissionMode: "bypass",
+			appendSystemPromptFile: "/tmp/role.md",
+			cwd: "/tmp/test",
+			env: {},
+		});
+		expect(cmd).toContain("--prompt");
+		expect(cmd).toContain("cat '/tmp/role.md'");
+	});
+	it("buildSpawnCommand includes default prompt when no append", () => {
+		const cmd = runtime.buildSpawnCommand({
+			model: "sonnet",
+			permissionMode: "bypass",
+			cwd: "/tmp/test",
+			env: {},
+		});
+		expect(cmd).toContain("--prompt");
+		expect(cmd).toContain("Read .amp/AGENT.md");
+	});
+	it("buildPrintCommand returns correct argv", () => {
+		const argv = runtime.buildPrintCommand("review the diff");
+		expect(argv[0]).toBe("amp");
+		expect(argv).toContain("--prompt");
+		expect(argv).toContain("review the diff");
+		expect(argv).toContain("--no-input");
+		expect(argv).toContain("--yes");
+	});
+	it("buildPrintCommand includes model when provided", () => {
+		const argv = runtime.buildPrintCommand("review the diff", "gpt-4o");
+		expect(argv).toContain("--model");
+		expect(argv).toContain("gpt-4o");
+	});
+	it("deployConfig writes .amp/AGENT.md", async () => {
+		await runtime.deployConfig(
+			testDir,
+			{ content: "# Reviewer instructions" },
+			{
+				agentName: "reviewer-1",
+				capability: "reviewer",
+				worktreePath: testDir,
+			},
+		);
+		const content = await readFile(join(testDir, ".amp", "AGENT.md"), "utf-8");
+		expect(content).toBe("# Reviewer instructions");
+	});
+	it("deployConfig creates parent .amp directory", async () => {
+		await runtime.deployConfig(
+			testDir,
+			{ content: "# Test" },
+			{
+				agentName: "test",
+				capability: "scout",
+				worktreePath: testDir,
+			},
+		);
+		const file = Bun.file(join(testDir, ".amp", "AGENT.md"));
+		expect(await file.exists()).toBe(true);
+	});
+	it("deployConfig is no-op when overlay is undefined", async () => {
+		await runtime.deployConfig(testDir, undefined, {
+			agentName: "test",
+			capability: "scout",
+			worktreePath: testDir,
+		});
+		const file = Bun.file(join(testDir, ".amp", "AGENT.md"));
+		expect(await file.exists()).toBe(false);
+	});
+	it("detectReady requires both prompt AND branding (AND logic)", () => {
+		// Both prompt and branding → ready
+		expect(runtime.detectReady("some output\namp> ").phase).toBe("ready");
+		expect(runtime.detectReady("amp v1.0.0\n> ").phase).toBe("ready");
+		expect(runtime.detectReady("AMP CLI\n> ").phase).toBe("ready");
+		// Prompt only (no branding) → loading
+		expect(runtime.detectReady("some output\n> ").phase).toBe("loading");
+		// Branding only (no prompt) → loading
+		expect(runtime.detectReady("amp v1.0.0").phase).toBe("loading");
+		expect(runtime.detectReady("amp v1.2.3 starting...").phase).toBe("loading");
+		// Neither → loading
+		expect(runtime.detectReady("Initializing...").phase).toBe("loading");
+		// Substring false-positive prevention: "amp" inside other words must NOT match branding
+		expect(runtime.detectReady("this is an example output\n> ").phase).toBe("loading");
+		expect(runtime.detectReady("stamped result\n> ").phase).toBe("loading");
+	});
+	it("does not require beacon verification", () => {
+		expect(runtime.requiresBeaconVerification()).toBe(false);
+	});
+	it("parseTranscript returns null", async () => {
+		expect(await runtime.parseTranscript("/nonexistent")).toBeNull();
+	});
+	it("buildEnv returns model env vars", () => {
+		expect(runtime.buildEnv({ model: "sonnet", env: { SRC_ACCESS_TOKEN: "token" } })).toEqual({
+			SRC_ACCESS_TOKEN: "token",
+		});
+	});
+	it("buildEnv returns empty object when no env", () => {
+		expect(runtime.buildEnv({ model: "sonnet" })).toEqual({});
+	});
+	it("getTranscriptDir returns null", () => {
+		expect(runtime.getTranscriptDir("/tmp/project")).toBeNull();
+	});
+});

package/src/runtimes/amp.ts ADDED Viewed

@@ -0,0 +1,154 @@
+// Amp runtime adapter for overstory's AgentRuntime interface.
+// Implements the AgentRuntime contract for Sourcegraph's `amp` CLI (AI coding agent).
+//
+// Key differences from Claude/Pi adapters:
+// - Interactive: `amp` runs as an interactive chat session in tmux
+// - Instruction file: .amp/AGENT.md (Amp's native instruction file)
+// - No hooks: Amp manages permissions via its own approval system
+// - One-shot calls use `amp --prompt <prompt> --no-input`
+// - Model is passed via `--model <model>`
+import { mkdir } from "node:fs/promises";
+import { dirname, join } from "node:path";
+import type { ResolvedModel } from "../types.ts";
+import type {
+	AgentRuntime,
+	HooksDef,
+	OverlayContent,
+	ReadyState,
+	SpawnOpts,
+	TranscriptSummary,
+} from "./types.ts";
+/**
+ * Amp runtime adapter.
+ *
+ * Implements AgentRuntime for Sourcegraph's `amp` CLI. Amp agents run
+ * as interactive chat sessions with configurable models and tools.
+ *
+ * Security is managed by Amp's built-in approval system for file
+ * modifications and command execution.
+ */
+export class AmpRuntime implements AgentRuntime {
+	readonly id = "amp";
+	/** Experimental — community-contributed adapter, not yet battle-tested in production. */
+	readonly stability = "experimental" as const;
+	/**
+	 * Amp reads .amp/AGENT.md from the repo for project-level instructions.
+	 */
+	readonly instructionPath = ".amp/AGENT.md";
+	/**
+	 * Build the shell command string to spawn an Amp agent in a tmux pane.
+	 *
+	 * Uses `amp` in interactive mode with `--model` for model selection
+	 * and `--yes` for automatic approval.
+	 *
+	 * @param opts - Spawn options
+	 * @returns Shell command string suitable for tmux new-session
+	 */
+	buildSpawnCommand(opts: SpawnOpts): string {
+		let cmd = `amp --model ${opts.model} --yes`;
+		if (opts.appendSystemPromptFile) {
+			const escaped = opts.appendSystemPromptFile.replace(/'/g, "'\\''");
+			cmd += ` --prompt "$(cat '${escaped}') Read .amp/AGENT.md for your task assignment."`;
+		} else if (opts.appendSystemPrompt) {
+			const escaped =
+				`${opts.appendSystemPrompt}\n\nRead .amp/AGENT.md for your task assignment and begin.`.replace(
+					/'/g,
+					"'\\''",
+				);
+			cmd += ` --prompt '${escaped}'`;
+		} else {
+			cmd += ` --prompt 'Read .amp/AGENT.md for your task assignment and begin immediately.'`;
+		}
+		return cmd;
+	}
+	/**
+	 * Build argv for a headless one-shot Amp invocation.
+	 *
+	 * Uses `amp --prompt <prompt> --no-input --yes` for non-interactive execution.
+	 *
+	 * @param prompt - The prompt to pass
+	 * @param model - Optional model override
+	 * @returns Argv array for Bun.spawn
+	 */
+	buildPrintCommand(prompt: string, model?: string): string[] {
+		const cmd = ["amp", "--prompt", prompt, "--no-input", "--yes"];
+		if (model !== undefined) {
+			cmd.push("--model", model);
+		}
+		return cmd;
+	}
+	/**
+	 * Deploy per-agent instructions to a worktree.
+	 *
+	 * Writes the overlay to .amp/AGENT.md (Amp's native instruction file).
+	 * No hooks — Amp manages approvals internally.
+	 *
+	 * @param worktreePath - Absolute path to the agent's git worktree
+	 * @param overlay - Overlay content, or undefined for no-op
+	 * @param _hooks - Unused — Amp has no hook system
+	 */
+	async deployConfig(
+		worktreePath: string,
+		overlay: OverlayContent | undefined,
+		_hooks: HooksDef,
+	): Promise<void> {
+		if (!overlay) return;
+		const agentPath = join(worktreePath, this.instructionPath);
+		await mkdir(dirname(agentPath), { recursive: true });
+		await Bun.write(agentPath, overlay.content);
+	}
+	/**
+	 * Detect Amp TUI readiness from tmux pane content.
+	 *
+	 * Amp shows a prompt indicator when ready for input.
+	 *
+	 * @param paneContent - Captured tmux pane content
+	 * @returns Readiness phase
+	 */
+	detectReady(paneContent: string): ReadyState {
+		const lower = paneContent.toLowerCase();
+		// Prompt indicator: ">" or "amp>" at end of a line
+		const hasPrompt = /(?:amp)?>\s*$/.test(paneContent);
+		// Branding indicator: "amp" as a standalone word (word boundary prevents
+		// matching inside "example", "stamp", "&amp;", etc.)
+		const hasBranding = /\bamp\b/.test(lower);
+		// Both required (AND logic) to prevent premature ready detection
+		// during startup messages like "amp v1.2.3 starting..."
+		if (hasPrompt && hasBranding) {
+			return { phase: "ready" };
+		}
+		return { phase: "loading" };
+	}
+	/** Amp does not require beacon verification. */
+	requiresBeaconVerification(): boolean {
+		return false;
+	}
+	/** Amp does not produce machine-readable transcripts. */
+	async parseTranscript(_path: string): Promise<TranscriptSummary | null> {
+		return null;
+	}
+	buildEnv(model: ResolvedModel): Record<string, string> {
+		return model.env ?? {};
+	}
+	/** Amp does not expose a transcript directory. */
+	getTranscriptDir(_projectRoot: string): string | null {
+		return null;
+	}
+}

package/src/runtimes/claude.test.ts CHANGED Viewed

@@ -676,7 +676,9 @@ describe("ClaudeRuntime integration: registry resolves 'claude' as default", ()
 	test("getRuntime rejects unknown runtimes", async () => {
 		const { getRuntime } = await import("./registry.ts");
-		expect(() => getRuntime("aider")).toThrow('Unknown runtime: "aider"');
-		expect(() => getRuntime("nonexistent")).toThrow('Unknown runtime: "nonexistent"');
+		expect(() => getRuntime("nonexistent-runtime")).toThrow(
+			'Unknown runtime: "nonexistent-runtime"',
+		);
+		expect(() => getRuntime("does-not-exist")).toThrow('Unknown runtime: "does-not-exist"');
 	});
 });

package/src/runtimes/codex.test.ts CHANGED Viewed

@@ -203,7 +203,7 @@ describe("CodexRuntime", () => {
 			expect(cmd1).toBe(cmd2);
 		});
-		test("all model names pass through unchanged", () => {
+		test("all bare model names pass through unchanged", () => {
 			for (const model of ["gpt-5-codex", "gpt-4o", "o3", "custom-model-v2"]) {
 				const opts: SpawnOpts = {
 					model,
@@ -216,6 +216,30 @@ describe("CodexRuntime", () => {
 			}
 		});
+		test("provider-prefixed model strips prefix (openai/gpt-5.4 → gpt-5.4)", () => {
+			const opts: SpawnOpts = {
+				model: "openai/gpt-5.4",
+				permissionMode: "bypass",
+				cwd: "/tmp",
+				env: {},
+			};
+			const cmd = runtime.buildSpawnCommand(opts);
+			expect(cmd).toContain("--model gpt-5.4");
+			expect(cmd).not.toContain("openai/");
+		});
+		test("provider-prefixed model with other providers strips prefix", () => {
+			const opts: SpawnOpts = {
+				model: "azure/gpt-4o",
+				permissionMode: "bypass",
+				cwd: "/tmp",
+				env: {},
+			};
+			const cmd = runtime.buildSpawnCommand(opts);
+			expect(cmd).toContain("--model gpt-4o");
+			expect(cmd).not.toContain("azure/");
+		});
 		test("systemPrompt field is ignored", () => {
 			const opts: SpawnOpts = {
 				model: "gpt-5-codex",
@@ -248,6 +272,19 @@ describe("CodexRuntime", () => {
 			]);
 		});
+		test("provider-prefixed model strips prefix (openai/gpt-5.4 → gpt-5.4)", () => {
+			const argv = runtime.buildPrintCommand("Classify this error", "openai/gpt-5.4");
+			expect(argv).toEqual([
+				"codex",
+				"exec",
+				"--full-auto",
+				"--ephemeral",
+				"--model",
+				"gpt-5.4",
+				"Classify this error",
+			]);
+		});
 		test("model undefined omits --model flag", () => {
 			const argv = runtime.buildPrintCommand("Hello", undefined);
 			expect(argv).not.toContain("--model");

package/src/runtimes/codex.ts CHANGED Viewed

@@ -49,6 +49,21 @@ export class CodexRuntime implements AgentRuntime {
 	 */
 	private static readonly MANIFEST_ALIASES = new Set(["sonnet", "opus", "haiku"]);
+	/**
+	 * Strip a provider prefix from a model ID.
+	 *
+	 * Codex CLI expects bare model names. The orchestrator may resolve a model to
+	 * a provider-qualified form (e.g. `"openai/gpt-5.4"`) — strip the `"openai/"`
+	 * prefix before passing to the CLI.
+	 *
+	 * @param model - Possibly provider-qualified model ID
+	 * @returns Bare model name (everything after the first `/`, or unchanged if no `/`)
+	 */
+	private static stripProviderPrefix(model: string): string {
+		const slashIdx = model.indexOf("/");
+		return slashIdx !== -1 ? model.slice(slashIdx + 1) : model;
+	}
 	/**
 	 * Escape a directory path for use in a single-quoted shell argument.
 	 *
@@ -75,11 +90,14 @@ export class CodexRuntime implements AgentRuntime {
 	 * @returns Shell command string suitable for tmux new-session -c
 	 */
 	buildSpawnCommand(opts: SpawnOpts): string {
+		// Strip provider prefix before alias check and model flag injection.
+		// Codex CLI expects bare model names (e.g. "gpt-5.4", not "openai/gpt-5.4").
+		const bareModel = CodexRuntime.stripProviderPrefix(opts.model);
 		// When model comes from default manifest aliases (sonnet/opus/haiku),
 		// omit --model so Codex uses the user's configured default model.
 		let cmd = "codex --full-auto";
-		if (!CodexRuntime.MANIFEST_ALIASES.has(opts.model)) {
-			cmd += ` --model ${opts.model}`;
+		if (!CodexRuntime.MANIFEST_ALIASES.has(bareModel)) {
+			cmd += ` --model ${bareModel}`;
 		}
 		for (const dir of opts.sharedWritableDirs ?? []) {
 			cmd += ` --add-dir '${CodexRuntime.shellEscape(dir)}'`;
@@ -119,7 +137,8 @@ export class CodexRuntime implements AgentRuntime {
 	buildPrintCommand(prompt: string, model?: string): string[] {
 		const cmd = ["codex", "exec", "--full-auto", "--ephemeral"];
 		if (model !== undefined) {
-			cmd.push("--model", model);
+			// Strip provider prefix — Codex CLI expects bare model names.
+			cmd.push("--model", CodexRuntime.stripProviderPrefix(model));
 		}
 		cmd.push(prompt);
 		return cmd;