@os-eco/overstory-cli 0.8.2 → 0.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -260,13 +260,61 @@ describe("stopCommand validation", () => {
260
260
  await expect(stopCommand("my-builder", {}, deps)).rejects.toThrow(/already completed/);
261
261
  });
262
262
 
263
- test("throws AgentError when agent is already zombie", async () => {
263
+ test("succeeds when agent is zombie (cleanup, no error)", async () => {
264
264
  const session = makeAgentSession({ state: "zombie" });
265
265
  saveSessionsToDb([session]);
266
266
 
267
- const { deps } = makeDeps();
268
- await expect(stopCommand("my-builder", {}, deps)).rejects.toThrow(AgentError);
269
- await expect(stopCommand("my-builder", {}, deps)).rejects.toThrow(/zombie/);
267
+ const { deps } = makeDeps({ [session.tmuxSession]: false });
268
+ const output = await captureStdout(() => stopCommand("my-builder", {}, deps));
269
+
270
+ expect(output).toContain("Agent stopped");
271
+ expect(output).toContain("Zombie agent cleaned up");
272
+
273
+ const { store } = openSessionStore(overstoryDir);
274
+ const updated = store.getByName("my-builder");
275
+ store.close();
276
+ expect(updated?.state).toBe("completed");
277
+ });
278
+ });
279
+
280
+ describe("stopCommand zombie cleanup", () => {
281
+ test("zombie + --clean-worktree removes worktree", async () => {
282
+ const session = makeAgentSession({ state: "zombie" });
283
+ saveSessionsToDb([session]);
284
+
285
+ const { deps, worktreeCalls } = makeDeps({ [session.tmuxSession]: false });
286
+ const output = await captureStdout(() =>
287
+ stopCommand("my-builder", { cleanWorktree: true }, deps),
288
+ );
289
+
290
+ expect(output).toContain("Agent stopped");
291
+ expect(output).toContain("Zombie agent cleaned up");
292
+ expect(output).toContain(`Worktree removed: ${session.worktreePath}`);
293
+ expect(worktreeCalls.remove).toHaveLength(1);
294
+
295
+ const { store } = openSessionStore(overstoryDir);
296
+ const updated = store.getByName("my-builder");
297
+ store.close();
298
+ expect(updated?.state).toBe("completed");
299
+ });
300
+
301
+ test("zombie + --json includes wasZombie: true", async () => {
302
+ const session = makeAgentSession({ state: "zombie" });
303
+ saveSessionsToDb([session]);
304
+
305
+ const { deps } = makeDeps({ [session.tmuxSession]: false });
306
+ const output = await captureStdout(() => stopCommand("my-builder", { json: true }, deps));
307
+
308
+ const parsed = JSON.parse(output.trim()) as Record<string, unknown>;
309
+ expect(parsed.success).toBe(true);
310
+ expect(parsed.stopped).toBe(true);
311
+ expect(parsed.wasZombie).toBe(true);
312
+ expect(parsed.agentName).toBe("my-builder");
313
+
314
+ const { store } = openSessionStore(overstoryDir);
315
+ const updated = store.getByName("my-builder");
316
+ store.close();
317
+ expect(updated?.state).toBe("completed");
270
318
  });
271
319
  });
272
320
 
@@ -86,9 +86,7 @@ export async function stopCommand(
86
86
  throw new AgentError(`Agent "${agentName}" is already completed`, { agentName });
87
87
  }
88
88
 
89
- if (session.state === "zombie") {
90
- throw new AgentError(`Agent "${agentName}" is already zombie (dead)`, { agentName });
91
- }
89
+ const isZombie = session.state === "zombie";
92
90
 
93
91
  const isHeadless = session.tmuxSession === "" && session.pid !== null;
94
92
 
@@ -140,6 +138,7 @@ export async function stopCommand(
140
138
  pidKilled,
141
139
  worktreeRemoved,
142
140
  force,
141
+ wasZombie: isZombie,
143
142
  });
144
143
  } else {
145
144
  printSuccess("Agent stopped", agentName);
@@ -156,6 +155,9 @@ export async function stopCommand(
156
155
  process.stdout.write(` Tmux session was already dead\n`);
157
156
  }
158
157
  }
158
+ if (isZombie) {
159
+ process.stdout.write(` Zombie agent cleaned up (state → completed)\n`);
160
+ }
159
161
  if (cleanWorktree && worktreeRemoved) {
160
162
  process.stdout.write(` Worktree removed: ${session.worktreePath}\n`);
161
163
  }
@@ -143,7 +143,7 @@ async function startSupervisor(opts: {
143
143
  );
144
144
  const manifest = await manifestLoader.load();
145
145
  const resolvedModel = resolveModel(config, manifest, "supervisor", "opus");
146
- const runtime = getRuntime(undefined, config);
146
+ const runtime = getRuntime(undefined, config, "supervisor");
147
147
 
148
148
  // Deploy supervisor-specific hooks to the project root's .claude/ directory.
149
149
  await runtime.deployConfig(projectRoot, undefined, {
@@ -4,6 +4,7 @@ import { tmpdir } from "node:os";
4
4
  import { join } from "node:path";
5
5
  import {
6
6
  clearProjectRootOverride,
7
+ clearWarningsSeen,
7
8
  DEFAULT_CONFIG,
8
9
  DEFAULT_QUALITY_GATES,
9
10
  loadConfig,
@@ -432,9 +433,11 @@ describe("validateConfig", () => {
432
433
  tempDir = await mkdtemp(join(tmpdir(), "overstory-test-"));
433
434
  const { mkdir } = await import("node:fs/promises");
434
435
  await mkdir(join(tempDir, ".overstory"), { recursive: true });
436
+ clearWarningsSeen();
435
437
  });
436
438
 
437
439
  afterEach(async () => {
440
+ clearWarningsSeen();
438
441
  await cleanupTempDir(tempDir);
439
442
  });
440
443
 
@@ -691,6 +694,39 @@ models:
691
694
  expect((err as ValidationError).message).toContain("provider-prefixed ref");
692
695
  });
693
696
 
697
+ test("accepts bare model name when runtime.default is codex", async () => {
698
+ await writeConfig(`
699
+ runtime:
700
+ default: codex
701
+ models:
702
+ coordinator: gpt-5.3-codex
703
+ `);
704
+ const config = await loadConfig(tempDir);
705
+ expect(config.models.coordinator).toBe("gpt-5.3-codex");
706
+ });
707
+
708
+ test("warns on bare non-Anthropic model in tool-heavy role when runtime.default is codex", async () => {
709
+ await writeConfig(`
710
+ runtime:
711
+ default: codex
712
+ models:
713
+ builder: gpt-5.3-codex
714
+ `);
715
+ const origWrite = process.stderr.write;
716
+ let capturedStderr = "";
717
+ process.stderr.write = ((s: string | Uint8Array) => {
718
+ if (typeof s === "string") capturedStderr += s;
719
+ return true;
720
+ }) as typeof process.stderr.write;
721
+ try {
722
+ await loadConfig(tempDir);
723
+ } finally {
724
+ process.stderr.write = origWrite;
725
+ }
726
+ expect(capturedStderr).toContain("WARNING: models.builder uses non-Anthropic model");
727
+ expect(capturedStderr).toContain("gpt-5.3-codex");
728
+ });
729
+
694
730
  test("warns on non-Anthropic model in tool-heavy role", async () => {
695
731
  await writeConfig(`
696
732
  providers:
@@ -716,6 +752,33 @@ models:
716
752
  expect(capturedStderr).toContain("openrouter/openai/gpt-4");
717
753
  });
718
754
 
755
+ test("warns only once per role/model combination across multiple loadConfig calls", async () => {
756
+ await writeConfig(`
757
+ providers:
758
+ openrouter:
759
+ type: gateway
760
+ baseUrl: https://openrouter.ai/api/v1
761
+ authTokenEnv: OPENROUTER_API_KEY
762
+ models:
763
+ builder: openrouter/openai/gpt-4
764
+ `);
765
+ const origWrite = process.stderr.write;
766
+ const stderrLines: string[] = [];
767
+ process.stderr.write = ((s: string | Uint8Array) => {
768
+ if (typeof s === "string") stderrLines.push(s);
769
+ return true;
770
+ }) as typeof process.stderr.write;
771
+ try {
772
+ await loadConfig(tempDir);
773
+ await loadConfig(tempDir);
774
+ await loadConfig(tempDir);
775
+ } finally {
776
+ process.stderr.write = origWrite;
777
+ }
778
+ const warnings = stderrLines.filter((l) => l.includes("WARNING: models.builder"));
779
+ expect(warnings.length).toBe(1);
780
+ });
781
+
719
782
  test("does not warn for non-Anthropic model in non-tool-heavy role", async () => {
720
783
  await writeConfig(`
721
784
  providers:
package/src/config.ts CHANGED
@@ -5,6 +5,14 @@ import type { OverstoryConfig, QualityGate, TaskTrackerBackend } from "./types.t
5
5
  // Module-level project root override (set by --project global flag)
6
6
  let _projectRootOverride: string | undefined;
7
7
 
8
+ // Tracks warnings already emitted this process to avoid repeating on every loadConfig call.
9
+ const _warnedOnce = new Set<string>();
10
+
11
+ /** Clear the dedup warning set. Intended for tests only. */
12
+ export function clearWarningsSeen(): void {
13
+ _warnedOnce.clear();
14
+ }
15
+
8
16
  /** Override project root for all config resolution (used by --project global flag). */
9
17
  export function setProjectRootOverride(path: string): void {
10
18
  _projectRootOverride = path;
@@ -698,9 +706,24 @@ function validateConfig(config: OverstoryConfig): void {
698
706
  }
699
707
  }
700
708
 
701
- // models: validate each value — accepts aliases and provider-prefixed refs
709
+ if (config.runtime?.capabilities) {
710
+ for (const [cap, runtimeName] of Object.entries(config.runtime.capabilities)) {
711
+ if (runtimeName !== undefined && (typeof runtimeName !== "string" || runtimeName === "")) {
712
+ throw new ValidationError(`runtime.capabilities.${cap} must be a non-empty string`, {
713
+ field: `runtime.capabilities.${cap}`,
714
+ value: runtimeName,
715
+ });
716
+ }
717
+ }
718
+ }
719
+
720
+ // models: validate each value.
721
+ // - Standard runtimes: aliases (sonnet/opus/haiku) or provider-prefixed refs.
722
+ // - Codex runtime: also allow bare model refs (e.g. gpt-5.3-codex).
702
723
  const validAliases = ["sonnet", "opus", "haiku"];
703
724
  const toolHeavyRoles = ["builder", "scout"];
725
+ const defaultRuntime = config.runtime?.default ?? "claude";
726
+ const allowBareModelRefs = defaultRuntime === "codex";
704
727
  for (const [role, model] of Object.entries(config.models)) {
705
728
  if (model === undefined) continue;
706
729
  if (model.includes("/")) {
@@ -716,13 +739,25 @@ function validateConfig(config: OverstoryConfig): void {
716
739
  );
717
740
  }
718
741
  if (toolHeavyRoles.includes(role)) {
719
- process.stderr.write(
720
- `[overstory] WARNING: models.${role} uses non-Anthropic model '${model}'. Tool-use compatibility cannot be verified at config time.\n`,
721
- );
742
+ const warnKey = `non-anthropic:${role}:${model}`;
743
+ if (!_warnedOnce.has(warnKey)) {
744
+ _warnedOnce.add(warnKey);
745
+ process.stderr.write(
746
+ `[overstory] WARNING: models.${role} uses non-Anthropic model '${model}'. Tool-use compatibility cannot be verified at config time.\n`,
747
+ );
748
+ }
722
749
  }
723
750
  } else {
724
- // Must be a valid alias
751
+ // Must be a valid alias unless codex runtime is active.
725
752
  if (!validAliases.includes(model)) {
753
+ if (allowBareModelRefs) {
754
+ if (toolHeavyRoles.includes(role)) {
755
+ process.stderr.write(
756
+ `[overstory] WARNING: models.${role} uses non-Anthropic model '${model}'. Tool-use compatibility cannot be verified at config time.\n`,
757
+ );
758
+ }
759
+ continue;
760
+ }
726
761
  throw new ValidationError(
727
762
  `models.${role} must be a valid alias (${validAliases.join(", ")}) or a provider-prefixed ref (e.g., openrouter/openai/gpt-4)`,
728
763
  {
package/src/index.ts CHANGED
@@ -49,7 +49,7 @@ import { ConfigError, OverstoryError, WorktreeError } from "./errors.ts";
49
49
  import { jsonError } from "./json.ts";
50
50
  import { brand, chalk, muted, setQuiet } from "./logging/color.ts";
51
51
 
52
- export const VERSION = "0.8.2";
52
+ export const VERSION = "0.8.4";
53
53
 
54
54
  const rawArgs = process.argv.slice(2);
55
55
 
@@ -271,7 +271,7 @@ program
271
271
  "Agent type: builder | scout | reviewer | lead | merger",
272
272
  "builder",
273
273
  )
274
- .option("--name <name>", "Unique agent name")
274
+ .option("--name <name>", "Unique agent name (auto-generated if omitted)")
275
275
  .option("--spec <path>", "Path to task spec file")
276
276
  .option("--files <list>", "Exclusive file scope (comma-separated)")
277
277
  .option("--parent <agent>", "Parent agent for hierarchy tracking")
@@ -290,6 +290,105 @@ describe("createMergeResolver", () => {
290
290
  });
291
291
  });
292
292
 
293
+ describe("Dirty working tree pre-check", () => {
294
+ test("throws MergeError when unstaged changes exist on tracked files", async () => {
295
+ const repoDir = await createTempGitRepo();
296
+ try {
297
+ const defaultBranch = await getDefaultBranch(repoDir);
298
+ // Create a tracked file and then leave it modified (unstaged)
299
+ await commitFile(repoDir, "src/main.ts", "original content\n");
300
+ await runGitInDir(repoDir, ["checkout", "-b", "feature-branch"]);
301
+ await commitFile(repoDir, "src/feature.ts", "feature content\n");
302
+ await runGitInDir(repoDir, ["checkout", defaultBranch]);
303
+ // Modify a tracked file without staging
304
+ await Bun.write(`${repoDir}/src/main.ts`, "modified content\n");
305
+
306
+ const entry = makeTestEntry({
307
+ branchName: "feature-branch",
308
+ filesModified: ["src/feature.ts"],
309
+ });
310
+
311
+ const resolver = createMergeResolver({
312
+ aiResolveEnabled: false,
313
+ reimagineEnabled: false,
314
+ });
315
+
316
+ await expect(resolver.resolve(entry, defaultBranch, repoDir)).rejects.toThrow(MergeError);
317
+ } finally {
318
+ await cleanupTempDir(repoDir);
319
+ }
320
+ });
321
+
322
+ test("throws MergeError with message listing dirty files", async () => {
323
+ const repoDir = await createTempGitRepo();
324
+ try {
325
+ const defaultBranch = await getDefaultBranch(repoDir);
326
+ await commitFile(repoDir, "src/main.ts", "original content\n");
327
+ await runGitInDir(repoDir, ["checkout", "-b", "feature-branch"]);
328
+ await commitFile(repoDir, "src/feature.ts", "feature content\n");
329
+ await runGitInDir(repoDir, ["checkout", defaultBranch]);
330
+ await Bun.write(`${repoDir}/src/main.ts`, "modified content\n");
331
+
332
+ const entry = makeTestEntry({ branchName: "feature-branch" });
333
+ const resolver = createMergeResolver({ aiResolveEnabled: false, reimagineEnabled: false });
334
+
335
+ try {
336
+ await resolver.resolve(entry, defaultBranch, repoDir);
337
+ expect(true).toBe(false); // should not reach
338
+ } catch (err: unknown) {
339
+ expect(err).toBeInstanceOf(MergeError);
340
+ const mergeErr = err as MergeError;
341
+ expect(mergeErr.message).toContain("src/main.ts");
342
+ expect(mergeErr.message).toContain("Commit or stash");
343
+ }
344
+ } finally {
345
+ await cleanupTempDir(repoDir);
346
+ }
347
+ });
348
+
349
+ test("throws MergeError when staged but uncommitted changes exist", async () => {
350
+ const repoDir = await createTempGitRepo();
351
+ try {
352
+ const defaultBranch = await getDefaultBranch(repoDir);
353
+ await commitFile(repoDir, "src/main.ts", "original content\n");
354
+ await runGitInDir(repoDir, ["checkout", "-b", "feature-branch"]);
355
+ await commitFile(repoDir, "src/feature.ts", "feature content\n");
356
+ await runGitInDir(repoDir, ["checkout", defaultBranch]);
357
+ // Modify and stage (but don't commit)
358
+ await Bun.write(`${repoDir}/src/main.ts`, "staged but not committed\n");
359
+ await runGitInDir(repoDir, ["add", "src/main.ts"]);
360
+
361
+ const entry = makeTestEntry({ branchName: "feature-branch" });
362
+ const resolver = createMergeResolver({ aiResolveEnabled: false, reimagineEnabled: false });
363
+
364
+ await expect(resolver.resolve(entry, defaultBranch, repoDir)).rejects.toThrow(MergeError);
365
+ } finally {
366
+ await cleanupTempDir(repoDir);
367
+ }
368
+ });
369
+
370
+ test("clean working tree proceeds normally to Tier 1", async () => {
371
+ const repoDir = await createTempGitRepo();
372
+ try {
373
+ const defaultBranch = await getDefaultBranch(repoDir);
374
+ await setupCleanMerge(repoDir, defaultBranch);
375
+
376
+ const entry = makeTestEntry({
377
+ branchName: "feature-branch",
378
+ filesModified: ["src/feature-file.ts"],
379
+ });
380
+
381
+ const resolver = createMergeResolver({ aiResolveEnabled: false, reimagineEnabled: false });
382
+ const result = await resolver.resolve(entry, defaultBranch, repoDir);
383
+
384
+ expect(result.success).toBe(true);
385
+ expect(result.tier).toBe("clean-merge");
386
+ } finally {
387
+ await cleanupTempDir(repoDir);
388
+ }
389
+ });
390
+ });
391
+
293
392
  describe("Tier 1 fail -> Tier 2: Auto-resolve", () => {
294
393
  test("auto-resolves conflicts keeping incoming changes with correct content", async () => {
295
394
  const repoDir = await createTempGitRepo();
@@ -50,6 +50,26 @@ async function runGit(
50
50
  return { stdout, stderr, exitCode };
51
51
  }
52
52
 
53
+ /**
54
+ * Get the list of tracked files with uncommitted changes (unstaged or staged).
55
+ * Returns deduplicated list of file paths. An empty list means the working tree is clean.
56
+ */
57
+ async function checkDirtyWorkingTree(repoRoot: string): Promise<string[]> {
58
+ const { stdout: unstaged } = await runGit(repoRoot, ["diff", "--name-only"]);
59
+ const { stdout: staged } = await runGit(repoRoot, ["diff", "--name-only", "--cached"]);
60
+ const files = [
61
+ ...unstaged
62
+ .trim()
63
+ .split("\n")
64
+ .filter((l) => l.length > 0),
65
+ ...staged
66
+ .trim()
67
+ .split("\n")
68
+ .filter((l) => l.length > 0),
69
+ ];
70
+ return [...new Set(files)];
71
+ }
72
+
53
73
  /**
54
74
  * Get the list of conflicted files from `git diff --name-only --diff-filter=U`.
55
75
  */
@@ -593,6 +613,17 @@ export function createMergeResolver(options: {
593
613
  }
594
614
  }
595
615
 
616
+ // Pre-check: abort early if working tree has uncommitted changes.
617
+ // When dirty tracked files exist, git merge refuses to start (exit 1, no conflict markers),
618
+ // causing all tiers to cascade with empty conflict lists and a misleading final error.
619
+ const dirtyFiles = await checkDirtyWorkingTree(repoRoot);
620
+ if (dirtyFiles.length > 0) {
621
+ throw new MergeError(
622
+ `Working tree has uncommitted changes to tracked files: ${dirtyFiles.join(", ")}. Commit or stash changes before running ov merge.`,
623
+ { branchName: entry.branchName },
624
+ );
625
+ }
626
+
596
627
  let lastTier: ResolutionTier = "clean-merge";
597
628
  let conflictFiles: string[] = [];
598
629
 
@@ -6,7 +6,7 @@
6
6
  *
7
7
  * Coverage:
8
8
  * - parseTranscriptUsage (transcript.ts)
9
- * - estimateCost re-export (transcript.ts -> pricing.ts)
9
+ * - estimateCost (pricing.ts, imported directly)
10
10
  * - getPricingForModel (pricing.ts)
11
11
  */
12
12
 
@@ -15,8 +15,8 @@ import { mkdtemp } from "node:fs/promises";
15
15
  import { tmpdir } from "node:os";
16
16
  import { join } from "node:path";
17
17
  import { cleanupTempDir } from "../test-helpers.ts";
18
- import { getPricingForModel, estimateCost as pricingEstimateCost } from "./pricing.ts";
19
- import { estimateCost, parseTranscriptUsage } from "./transcript.ts";
18
+ import { estimateCost, getPricingForModel } from "./pricing.ts";
19
+ import { parseTranscriptUsage } from "./transcript.ts";
20
20
 
21
21
  let tempDir: string;
22
22
 
@@ -479,17 +479,5 @@ describe("getPricingForModel", () => {
479
479
  });
480
480
  });
481
481
 
482
- // === re-export parity ===
483
-
484
- describe("estimateCost re-export parity", () => {
485
- test("transcript.estimateCost and pricing.estimateCost produce same result", () => {
486
- const usage = {
487
- inputTokens: 1_000_000,
488
- outputTokens: 1_000_000,
489
- cacheReadTokens: 1_000_000,
490
- cacheCreationTokens: 1_000_000,
491
- modelUsed: "claude-opus-4-6",
492
- };
493
- expect(estimateCost(usage)).toBe(pricingEstimateCost(usage));
494
- });
495
- });
482
+ // estimateCost re-export removed from transcript.ts (overstory-aa00).
483
+ // estimateCost is now imported directly from pricing.ts everywhere.
@@ -27,8 +27,6 @@ import type { TokenUsage } from "./pricing.ts";
27
27
 
28
28
  export type TranscriptUsage = TokenUsage;
29
29
 
30
- export { estimateCost } from "./pricing.ts";
31
-
32
30
  /**
33
31
  * Narrow an unknown value to determine if it looks like a transcript assistant entry.
34
32
  * Returns the usage fields if valid, or null otherwise.
@@ -5,7 +5,8 @@
5
5
  import { mkdir } from "node:fs/promises";
6
6
  import { join } from "node:path";
7
7
  import { deployHooks } from "../agents/hooks-deployer.ts";
8
- import { estimateCost, parseTranscriptUsage } from "../metrics/transcript.ts";
8
+ import { estimateCost } from "../metrics/pricing.ts";
9
+ import { parseTranscriptUsage } from "../metrics/transcript.ts";
9
10
  import type { ResolvedModel } from "../types.ts";
10
11
  import type {
11
12
  AgentRuntime,
@@ -219,6 +220,22 @@ export class ClaudeRuntime implements AgentRuntime {
219
220
  buildEnv(model: ResolvedModel): Record<string, string> {
220
221
  return model.env ?? {};
221
222
  }
223
+
224
+ /**
225
+ * Return the Claude Code transcript directory for a given project root.
226
+ *
227
+ * Claude Code stores session transcripts at ~/.claude/projects/<projectKey>/
228
+ * where <projectKey> is the project root path with "/" replaced by "-".
229
+ *
230
+ * @param projectRoot - Absolute path to the project root
231
+ * @returns Absolute path to the transcript directory, or null if HOME is unavailable
232
+ */
233
+ getTranscriptDir(projectRoot: string): string | null {
234
+ const home = process.env.HOME ?? "";
235
+ if (home.length === 0) return null;
236
+ const projectKey = projectRoot.replace(/\//g, "-");
237
+ return join(home, ".claude", "projects", projectKey);
238
+ }
222
239
  }
223
240
 
224
241
  /** Singleton instance for use in callers that do not need DI. */
@@ -20,7 +20,7 @@ describe("CodexRuntime", () => {
20
20
  });
21
21
 
22
22
  describe("buildSpawnCommand", () => {
23
- test("basic command uses codex exec with --full-auto and --json", () => {
23
+ test("basic command uses interactive codex with --full-auto", () => {
24
24
  const opts: SpawnOpts = {
25
25
  model: "gpt-5-codex",
26
26
  permissionMode: "bypass",
@@ -28,11 +28,25 @@ describe("CodexRuntime", () => {
28
28
  env: {},
29
29
  };
30
30
  const cmd = runtime.buildSpawnCommand(opts);
31
- expect(cmd).toContain("codex exec --full-auto --json");
31
+ expect(cmd).toContain("codex --full-auto");
32
32
  expect(cmd).toContain("--model gpt-5-codex");
33
33
  expect(cmd).toContain("Read AGENTS.md");
34
34
  });
35
35
 
36
+ test("manifest aliases omit --model so codex uses default configured model", () => {
37
+ for (const alias of ["sonnet", "opus", "haiku"]) {
38
+ const opts: SpawnOpts = {
39
+ model: alias,
40
+ permissionMode: "bypass",
41
+ cwd: "/tmp/worktree",
42
+ env: {},
43
+ };
44
+ const cmd = runtime.buildSpawnCommand(opts);
45
+ expect(cmd).toContain("codex --full-auto");
46
+ expect(cmd).not.toContain(" --model ");
47
+ }
48
+ });
49
+
36
50
  test("permissionMode is NOT included in command (Codex uses OS sandbox)", () => {
37
51
  const opts: SpawnOpts = {
38
52
  model: "gpt-5-codex",
@@ -146,7 +160,7 @@ describe("CodexRuntime", () => {
146
160
  };
147
161
  const cmd = runtime.buildSpawnCommand(opts);
148
162
  expect(cmd).toBe(
149
- "codex exec --full-auto --json --model gpt-5-codex 'Read AGENTS.md for your task assignment and begin immediately.'",
163
+ "codex --full-auto --model gpt-5-codex 'Read AGENTS.md for your task assignment and begin immediately.'",
150
164
  );
151
165
  });
152
166
 
@@ -254,7 +268,7 @@ describe("CodexRuntime", () => {
254
268
  });
255
269
 
256
270
  describe("detectReady", () => {
257
- test("returns ready for empty pane (headless — always ready)", () => {
271
+ test("returns ready for empty pane", () => {
258
272
  const state = runtime.detectReady("");
259
273
  expect(state).toEqual({ phase: "ready" });
260
274
  });
@@ -279,7 +293,7 @@ describe("CodexRuntime", () => {
279
293
  });
280
294
 
281
295
  describe("requiresBeaconVerification", () => {
282
- test("returns false (headless — no beacon needed)", () => {
296
+ test("returns false (no beacon verification needed)", () => {
283
297
  expect(runtime.requiresBeaconVerification()).toBe(false);
284
298
  });
285
299
  });
@@ -664,7 +678,7 @@ describe("CodexRuntime integration: spawn command structure", () => {
664
678
  env: { OVERSTORY_AGENT_NAME: "builder-1" },
665
679
  });
666
680
  expect(cmd).toBe(
667
- "codex exec --full-auto --json --model gpt-5-codex 'Read AGENTS.md for your task assignment and begin immediately.'",
681
+ "codex --full-auto --model gpt-5-codex 'Read AGENTS.md for your task assignment and begin immediately.'",
668
682
  );
669
683
  });
670
684
 
@@ -677,7 +691,7 @@ describe("CodexRuntime integration: spawn command structure", () => {
677
691
  appendSystemPrompt: baseDefinition,
678
692
  env: { OVERSTORY_AGENT_NAME: "coordinator" },
679
693
  });
680
- expect(cmd).toContain("codex exec --full-auto --json --model gpt-5-codex");
694
+ expect(cmd).toContain("codex --full-auto --model gpt-5-codex");
681
695
  expect(cmd).toContain("# Coordinator");
682
696
  expect(cmd).toContain("You are the coordinator agent.");
683
697
  expect(cmd).toContain("Read AGENTS.md");
@@ -691,7 +705,7 @@ describe("CodexRuntime integration: spawn command structure", () => {
691
705
  appendSystemPromptFile: "/project/.overstory/agent-defs/coordinator.md",
692
706
  env: { OVERSTORY_AGENT_NAME: "coordinator" },
693
707
  });
694
- expect(cmd).toContain("codex exec --full-auto --json --model gpt-5-codex");
708
+ expect(cmd).toContain("codex --full-auto --model gpt-5-codex");
695
709
  expect(cmd).toContain("$(cat '/project/.overstory/agent-defs/coordinator.md')");
696
710
  expect(cmd).toContain("Read AGENTS.md");
697
711
  });