@os-eco/overstory-cli 0.8.2 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/builder.md +2 -2
- package/agents/lead.md +2 -2
- package/agents/merger.md +2 -2
- package/agents/orchestrator.md +1 -1
- package/agents/reviewer.md +2 -2
- package/agents/scout.md +2 -2
- package/agents/supervisor.md +3 -3
- package/package.json +1 -1
- package/src/agents/overlay.test.ts +42 -0
- package/src/agents/overlay.ts +1 -0
- package/src/commands/sling.test.ts +34 -10
- package/src/commands/sling.ts +51 -35
- package/src/commands/stop.test.ts +52 -4
- package/src/commands/stop.ts +5 -3
- package/src/config.test.ts +63 -0
- package/src/config.ts +29 -5
- package/src/index.ts +2 -2
- package/src/runtimes/codex.test.ts +22 -8
- package/src/runtimes/codex.ts +21 -16
- package/src/types.ts +2 -0
package/agents/builder.md
CHANGED
|
@@ -20,7 +20,7 @@ These are named failures. If you catch yourself doing any of these, stop and cor
|
|
|
20
20
|
|
|
21
21
|
## overlay
|
|
22
22
|
|
|
23
|
-
Your task-specific context (task ID, file scope, spec path, branch name, parent agent) is in
|
|
23
|
+
Your task-specific context (task ID, file scope, spec path, branch name, parent agent) is in `{{INSTRUCTION_PATH}}` in your worktree. That file is generated by `ov sling` and tells you WHAT to work on. This file tells you HOW to work.
|
|
24
24
|
|
|
25
25
|
## constraints
|
|
26
26
|
|
|
@@ -108,7 +108,7 @@ You are an implementation specialist. Given a spec and a set of files you own, y
|
|
|
108
108
|
|
|
109
109
|
## workflow
|
|
110
110
|
|
|
111
|
-
1. **Read your overlay** at
|
|
111
|
+
1. **Read your overlay** at `{{INSTRUCTION_PATH}}` in your worktree. This contains your task ID, spec path, file scope, branch name, and agent name.
|
|
112
112
|
2. **Read the task spec** at the path specified in your overlay. Understand what needs to be built.
|
|
113
113
|
3. **Load expertise** via `ml prime [domain]` for domains listed in your overlay. Apply existing patterns and conventions.
|
|
114
114
|
4. **Implement the changes:**
|
package/agents/lead.md
CHANGED
|
@@ -43,7 +43,7 @@ These are named failures. If you catch yourself doing any of these, stop and cor
|
|
|
43
43
|
|
|
44
44
|
## overlay
|
|
45
45
|
|
|
46
|
-
Your task-specific context (task ID, spec path, hierarchy depth, agent name, whether you can spawn) is in
|
|
46
|
+
Your task-specific context (task ID, spec path, hierarchy depth, agent name, whether you can spawn) is in `{{INSTRUCTION_PATH}}` in your worktree. That file is generated by `ov sling` and tells you WHAT to coordinate. This file tells you HOW to coordinate.
|
|
47
47
|
|
|
48
48
|
## constraints
|
|
49
49
|
|
|
@@ -160,7 +160,7 @@ Action: Full Scout → Build → Verify pipeline. Spawn scouts for exploration,
|
|
|
160
160
|
|
|
161
161
|
Delegate exploration to scouts so you can focus on decomposition and planning.
|
|
162
162
|
|
|
163
|
-
1. **Read your overlay** at
|
|
163
|
+
1. **Read your overlay** at `{{INSTRUCTION_PATH}}` in your worktree. This contains your task ID, hierarchy depth, and agent name.
|
|
164
164
|
2. **Load expertise** via `ml prime [domain]` for relevant domains.
|
|
165
165
|
3. **Search mulch for relevant context** before decomposing. Run `ml search <task keywords>` and review failure patterns, conventions, and decisions. Factor these insights into your specs.
|
|
166
166
|
4. **Load file-specific expertise** if files are known. Use `ml prime --files <file1,file2,...>` to get file-scoped context. Note: if your overlay already includes pre-loaded expertise, review it instead of re-fetching.
|
package/agents/merger.md
CHANGED
|
@@ -19,7 +19,7 @@ These are named failures. If you catch yourself doing any of these, stop and cor
|
|
|
19
19
|
|
|
20
20
|
## overlay
|
|
21
21
|
|
|
22
|
-
Your task-specific context (task ID, branches to merge, target branch, merge order, parent agent) is in
|
|
22
|
+
Your task-specific context (task ID, branches to merge, target branch, merge order, parent agent) is in `{{INSTRUCTION_PATH}}` in your worktree. That file is generated by `overstory sling` and tells you WHAT to merge. This file tells you HOW to merge.
|
|
23
23
|
|
|
24
24
|
## constraints
|
|
25
25
|
|
|
@@ -97,7 +97,7 @@ You are a branch integration specialist. When workers complete their tasks on se
|
|
|
97
97
|
|
|
98
98
|
## workflow
|
|
99
99
|
|
|
100
|
-
1. **Read your overlay** at
|
|
100
|
+
1. **Read your overlay** at `{{INSTRUCTION_PATH}}` in your worktree. This contains your task ID, the branches to merge, the target branch, and your agent name.
|
|
101
101
|
2. **Read the task spec** at the path specified in your overlay. Understand which branches need merging and in what order.
|
|
102
102
|
3. **Review the branches** before merging:
|
|
103
103
|
- `git log <target>..<branch>` to see what each branch contains.
|
package/agents/orchestrator.md
CHANGED
|
@@ -31,7 +31,7 @@ These are named failures. If you catch yourself doing any of these, stop and cor
|
|
|
31
31
|
|
|
32
32
|
## overlay
|
|
33
33
|
|
|
34
|
-
Your task-specific context (task ID, file scope, spec path, branch name, parent agent) is in
|
|
34
|
+
Your task-specific context (task ID, file scope, spec path, branch name, parent agent) is in `{{INSTRUCTION_PATH}}` in your worktree. That file is generated by `ov sling` and tells you WHAT to work on. This file tells you HOW to work.
|
|
35
35
|
|
|
36
36
|
## constraints
|
|
37
37
|
|
package/agents/reviewer.md
CHANGED
|
@@ -16,7 +16,7 @@ These are named failures. If you catch yourself doing any of these, stop and cor
|
|
|
16
16
|
|
|
17
17
|
## overlay
|
|
18
18
|
|
|
19
|
-
Your task-specific context (task ID, code to review, branch name, parent agent) is in
|
|
19
|
+
Your task-specific context (task ID, code to review, branch name, parent agent) is in `{{INSTRUCTION_PATH}}` in your worktree. That file is generated by `overstory sling` and tells you WHAT to review. This file tells you HOW to review.
|
|
20
20
|
|
|
21
21
|
## constraints
|
|
22
22
|
|
|
@@ -95,7 +95,7 @@ You are a validation specialist. Given code to review, you check it for correctn
|
|
|
95
95
|
|
|
96
96
|
## workflow
|
|
97
97
|
|
|
98
|
-
1. **Read your overlay** at
|
|
98
|
+
1. **Read your overlay** at `{{INSTRUCTION_PATH}}` in your worktree. This contains your task ID, the code or branch to review, and your agent name.
|
|
99
99
|
2. **Read the task spec** at the path specified in your overlay. Understand what was supposed to be built.
|
|
100
100
|
3. **Load expertise** via `ml prime [domain]` to understand project conventions and standards.
|
|
101
101
|
4. **Review the code changes:**
|
package/agents/scout.md
CHANGED
|
@@ -16,7 +16,7 @@ These are named failures. If you catch yourself doing any of these, stop and cor
|
|
|
16
16
|
|
|
17
17
|
## overlay
|
|
18
18
|
|
|
19
|
-
Your task-specific context (what to explore, who spawned you, your agent name) is in
|
|
19
|
+
Your task-specific context (what to explore, who spawned you, your agent name) is in `{{INSTRUCTION_PATH}}` in your worktree. That file is generated by `overstory sling` and tells you WHAT to work on. This file tells you HOW to work.
|
|
20
20
|
|
|
21
21
|
## constraints
|
|
22
22
|
|
|
@@ -97,7 +97,7 @@ You perform reconnaissance. Given a research question, exploration target, or an
|
|
|
97
97
|
|
|
98
98
|
## workflow
|
|
99
99
|
|
|
100
|
-
1. **Read your overlay** at
|
|
100
|
+
1. **Read your overlay** at `{{INSTRUCTION_PATH}}` in your worktree. This contains your task assignment, spec path, and agent name.
|
|
101
101
|
2. **Read the task spec** at the path specified in your overlay.
|
|
102
102
|
3. **Load relevant expertise** via `ml prime [domain]` for domains listed in your overlay.
|
|
103
103
|
4. **Explore systematically:**
|
package/agents/supervisor.md
CHANGED
|
@@ -31,7 +31,7 @@ These are named failures. If you catch yourself doing any of these, stop and cor
|
|
|
31
31
|
|
|
32
32
|
## overlay
|
|
33
33
|
|
|
34
|
-
Unlike the coordinator (which has no overlay), you receive your task-specific context via the overlay CLAUDE.md at
|
|
34
|
+
Unlike the coordinator (which has no overlay), you receive your task-specific context via the overlay CLAUDE.md at `{{INSTRUCTION_PATH}}` in your worktree root. This file is generated by `ov supervisor start` (or `ov sling` with `--capability supervisor`) and provides:
|
|
35
35
|
|
|
36
36
|
- **Agent Name** (`$OVERSTORY_AGENT_NAME`) -- your mail address
|
|
37
37
|
- **Task ID** -- the issue you are assigned to
|
|
@@ -163,7 +163,7 @@ Before spawning, check `ov status` to ensure non-overlapping file scope across a
|
|
|
163
163
|
|
|
164
164
|
## workflow
|
|
165
165
|
|
|
166
|
-
1. **Receive the dispatch.** Your overlay (
|
|
166
|
+
1. **Receive the dispatch.** Your overlay (`{{INSTRUCTION_PATH}}`) contains your task ID and spec path. The coordinator sends you a `dispatch` mail with task details.
|
|
167
167
|
2. **Read your task spec** at the path specified in your overlay. Understand the full scope of work assigned to you.
|
|
168
168
|
3. **Load expertise** via `ml prime [domain]` for each relevant domain. Check `{{TRACKER_CLI}} show <task-id>` for task details and dependencies.
|
|
169
169
|
4. **Analyze scope and decompose.** Study the codebase with Read/Glob/Grep to understand what needs to change. Determine:
|
|
@@ -418,7 +418,7 @@ You are long-lived within a project. You survive across batches and can recover
|
|
|
418
418
|
- **Checkpoints** are saved to `.overstory/agents/$OVERSTORY_AGENT_NAME/checkpoint.json` before compaction or handoff. The checkpoint contains: agent name, assigned task ID, active worker IDs, task group ID, session ID, progress summary, and files modified.
|
|
419
419
|
- **On recovery**, reload context by:
|
|
420
420
|
1. Reading your checkpoint: `.overstory/agents/$OVERSTORY_AGENT_NAME/checkpoint.json`
|
|
421
|
-
2. Reading your overlay:
|
|
421
|
+
2. Reading your overlay: `{{INSTRUCTION_PATH}}` (task ID, spec path, depth, parent)
|
|
422
422
|
3. Checking active group: `ov group status <group-id>`
|
|
423
423
|
4. Checking worker states: `ov status`
|
|
424
424
|
5. Checking unread mail: `ov mail check --agent $OVERSTORY_AGENT_NAME`
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@os-eco/overstory-cli",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.3",
|
|
4
4
|
"description": "Multi-agent orchestration for AI coding agents — spawn workers in git worktrees via tmux, coordinate through SQLite mail, merge with tiered conflict resolution. Pluggable runtime adapters for Claude Code, Pi, and more.",
|
|
5
5
|
"author": "Jaymin West",
|
|
6
6
|
"license": "MIT",
|
|
@@ -875,6 +875,48 @@ describe("formatQualityGatesCapabilities", () => {
|
|
|
875
875
|
});
|
|
876
876
|
});
|
|
877
877
|
|
|
878
|
+
describe("INSTRUCTION_PATH placeholder", () => {
|
|
879
|
+
test("defaults to .claude/CLAUDE.md when instructionPath is not set", async () => {
|
|
880
|
+
const config = makeConfig({
|
|
881
|
+
baseDefinition: "Read your overlay at {{INSTRUCTION_PATH}} in your worktree.",
|
|
882
|
+
});
|
|
883
|
+
const output = await generateOverlay(config);
|
|
884
|
+
|
|
885
|
+
expect(output).toContain("Read your overlay at .claude/CLAUDE.md in your worktree.");
|
|
886
|
+
expect(output).not.toContain("{{INSTRUCTION_PATH}}");
|
|
887
|
+
});
|
|
888
|
+
|
|
889
|
+
test("uses custom instructionPath when set", async () => {
|
|
890
|
+
const config = makeConfig({
|
|
891
|
+
instructionPath: "SAPLING.md",
|
|
892
|
+
baseDefinition: "Read your overlay at {{INSTRUCTION_PATH}} in your worktree.",
|
|
893
|
+
});
|
|
894
|
+
const output = await generateOverlay(config);
|
|
895
|
+
|
|
896
|
+
expect(output).toContain("Read your overlay at SAPLING.md in your worktree.");
|
|
897
|
+
expect(output).not.toContain("{{INSTRUCTION_PATH}}");
|
|
898
|
+
expect(output).not.toContain(".claude/CLAUDE.md");
|
|
899
|
+
});
|
|
900
|
+
|
|
901
|
+
test("INSTRUCTION_PATH in base definition replaced throughout (multiple occurrences)", async () => {
|
|
902
|
+
const config = makeConfig({
|
|
903
|
+
instructionPath: "AGENTS.md",
|
|
904
|
+
baseDefinition: "Step 1: read {{INSTRUCTION_PATH}}.\nContext is in {{INSTRUCTION_PATH}}.",
|
|
905
|
+
});
|
|
906
|
+
const output = await generateOverlay(config);
|
|
907
|
+
|
|
908
|
+
expect(output).not.toContain("{{INSTRUCTION_PATH}}");
|
|
909
|
+
expect(output.split("AGENTS.md").length - 1).toBeGreaterThanOrEqual(2);
|
|
910
|
+
});
|
|
911
|
+
|
|
912
|
+
test("no unreplaced INSTRUCTION_PATH placeholders in final output", async () => {
|
|
913
|
+
const config = makeConfig({ instructionPath: "SAPLING.md" });
|
|
914
|
+
const output = await generateOverlay(config);
|
|
915
|
+
|
|
916
|
+
expect(output).not.toContain("{{INSTRUCTION_PATH}}");
|
|
917
|
+
});
|
|
918
|
+
});
|
|
919
|
+
|
|
878
920
|
describe("quality gate placeholders in base definitions", () => {
|
|
879
921
|
test("QUALITY_GATE_INLINE in base definition gets replaced", async () => {
|
|
880
922
|
const config = makeConfig({
|
package/src/agents/overlay.ts
CHANGED
|
@@ -320,6 +320,7 @@ export async function generateOverlay(config: OverlayConfig): Promise<string> {
|
|
|
320
320
|
"{{QUALITY_GATE_CAPABILITIES}}": formatQualityGatesCapabilities(config.qualityGates),
|
|
321
321
|
"{{TRACKER_CLI}}": config.trackerCli ?? "sd",
|
|
322
322
|
"{{TRACKER_NAME}}": config.trackerName ?? "seeds",
|
|
323
|
+
"{{INSTRUCTION_PATH}}": config.instructionPath ?? ".claude/CLAUDE.md",
|
|
323
324
|
};
|
|
324
325
|
|
|
325
326
|
let result = template;
|
|
@@ -20,6 +20,7 @@ import {
|
|
|
20
20
|
checkRunSessionLimit,
|
|
21
21
|
checkTaskLock,
|
|
22
22
|
extractMulchRecordIds,
|
|
23
|
+
generateAgentName,
|
|
23
24
|
getCurrentBranch,
|
|
24
25
|
inferDomainsFromFiles,
|
|
25
26
|
isRunningAsRoot,
|
|
@@ -342,6 +343,31 @@ describe("shouldShowScoutWarning", () => {
|
|
|
342
343
|
});
|
|
343
344
|
});
|
|
344
345
|
|
|
346
|
+
describe("generateAgentName", () => {
|
|
347
|
+
test("returns capability-taskId when no collision", () => {
|
|
348
|
+
expect(generateAgentName("builder", "overstory-2f10", [])).toBe("builder-overstory-2f10");
|
|
349
|
+
});
|
|
350
|
+
|
|
351
|
+
test("returns capability-taskId when takenNames is empty", () => {
|
|
352
|
+
expect(generateAgentName("scout", "task-123", [])).toBe("scout-task-123");
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
test("appends -2 when base name is taken", () => {
|
|
356
|
+
expect(generateAgentName("builder", "overstory-2f10", ["builder-overstory-2f10"])).toBe(
|
|
357
|
+
"builder-overstory-2f10-2",
|
|
358
|
+
);
|
|
359
|
+
});
|
|
360
|
+
|
|
361
|
+
test("skips taken suffixes and returns -3 when -2 is also taken", () => {
|
|
362
|
+
expect(
|
|
363
|
+
generateAgentName("builder", "overstory-2f10", [
|
|
364
|
+
"builder-overstory-2f10",
|
|
365
|
+
"builder-overstory-2f10-2",
|
|
366
|
+
]),
|
|
367
|
+
).toBe("builder-overstory-2f10-3");
|
|
368
|
+
});
|
|
369
|
+
});
|
|
370
|
+
|
|
345
371
|
/**
|
|
346
372
|
* Tests for hierarchy validation in sling.
|
|
347
373
|
*
|
|
@@ -352,14 +378,12 @@ describe("shouldShowScoutWarning", () => {
|
|
|
352
378
|
*/
|
|
353
379
|
|
|
354
380
|
describe("validateHierarchy", () => {
|
|
355
|
-
test("
|
|
356
|
-
expect(() => validateHierarchy(null, "builder", "test-builder", 0, false)).toThrow(
|
|
357
|
-
HierarchyError,
|
|
358
|
-
);
|
|
381
|
+
test("allows builder when parentAgent is null", () => {
|
|
382
|
+
expect(() => validateHierarchy(null, "builder", "test-builder", 0, false)).not.toThrow();
|
|
359
383
|
});
|
|
360
384
|
|
|
361
|
-
test("
|
|
362
|
-
expect(() => validateHierarchy(null, "scout", "test-scout", 0, false)).toThrow(
|
|
385
|
+
test("allows scout when parentAgent is null", () => {
|
|
386
|
+
expect(() => validateHierarchy(null, "scout", "test-scout", 0, false)).not.toThrow();
|
|
363
387
|
});
|
|
364
388
|
|
|
365
389
|
test("rejects reviewer when parentAgent is null", () => {
|
|
@@ -404,15 +428,15 @@ describe("validateHierarchy", () => {
|
|
|
404
428
|
|
|
405
429
|
test("error has correct fields and code", () => {
|
|
406
430
|
try {
|
|
407
|
-
validateHierarchy(null, "
|
|
431
|
+
validateHierarchy(null, "reviewer", "my-reviewer", 0, false);
|
|
408
432
|
expect.unreachable("should have thrown");
|
|
409
433
|
} catch (err) {
|
|
410
434
|
expect(err).toBeInstanceOf(HierarchyError);
|
|
411
435
|
const he = err as HierarchyError;
|
|
412
436
|
expect(he.code).toBe("HIERARCHY_VIOLATION");
|
|
413
|
-
expect(he.agentName).toBe("my-
|
|
414
|
-
expect(he.requestedCapability).toBe("
|
|
415
|
-
expect(he.message).toContain("
|
|
437
|
+
expect(he.agentName).toBe("my-reviewer");
|
|
438
|
+
expect(he.requestedCapability).toBe("reviewer");
|
|
439
|
+
expect(he.message).toContain("reviewer");
|
|
416
440
|
expect(he.message).toContain("lead");
|
|
417
441
|
}
|
|
418
442
|
});
|
package/src/commands/sling.ts
CHANGED
|
@@ -32,7 +32,6 @@ import { printSuccess } from "../logging/color.ts";
|
|
|
32
32
|
import { createMailClient } from "../mail/client.ts";
|
|
33
33
|
import { createMailStore } from "../mail/store.ts";
|
|
34
34
|
import { createMulchClient } from "../mulch/client.ts";
|
|
35
|
-
import { setConnection } from "../runtimes/connections.ts";
|
|
36
35
|
import { getRuntime } from "../runtimes/registry.ts";
|
|
37
36
|
import { openSessionStore } from "../sessions/compat.ts";
|
|
38
37
|
import { createRunStore } from "../sessions/store.ts";
|
|
@@ -78,6 +77,29 @@ export function calculateStaggerDelay(
|
|
|
78
77
|
return remaining > 0 ? remaining : 0;
|
|
79
78
|
}
|
|
80
79
|
|
|
80
|
+
/**
|
|
81
|
+
* Generate a unique agent name from capability and taskId.
|
|
82
|
+
* Base: capability-taskId. If that collides with takenNames,
|
|
83
|
+
* appends -2, -3, etc. up to 100. Falls back to -Date.now() for guaranteed uniqueness.
|
|
84
|
+
*/
|
|
85
|
+
export function generateAgentName(
|
|
86
|
+
capability: string,
|
|
87
|
+
taskId: string,
|
|
88
|
+
takenNames: readonly string[],
|
|
89
|
+
): string {
|
|
90
|
+
const base = `${capability}-${taskId}`;
|
|
91
|
+
if (!takenNames.includes(base)) {
|
|
92
|
+
return base;
|
|
93
|
+
}
|
|
94
|
+
for (let i = 2; i <= 100; i++) {
|
|
95
|
+
const candidate = `${base}-${i}`;
|
|
96
|
+
if (!takenNames.includes(candidate)) {
|
|
97
|
+
return candidate;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return `${base}-${Date.now()}`;
|
|
101
|
+
}
|
|
102
|
+
|
|
81
103
|
/**
|
|
82
104
|
* Check if the current process is running as root (UID 0).
|
|
83
105
|
* Returns true if running as root, false otherwise.
|
|
@@ -348,9 +370,10 @@ export function validateHierarchy(
|
|
|
348
370
|
return;
|
|
349
371
|
}
|
|
350
372
|
|
|
351
|
-
|
|
373
|
+
const directSpawnCapabilities = ["lead", "scout", "builder"];
|
|
374
|
+
if (parentAgent === null && !directSpawnCapabilities.includes(capability)) {
|
|
352
375
|
throw new HierarchyError(
|
|
353
|
-
`Coordinator cannot spawn "${capability}" directly. Only
|
|
376
|
+
`Coordinator cannot spawn "${capability}" directly. Only lead, scout, and builder are allowed without --parent. Use a lead as intermediary, or pass --force-hierarchy to bypass.`,
|
|
354
377
|
{ agentName: name, requestedCapability: capability },
|
|
355
378
|
);
|
|
356
379
|
}
|
|
@@ -429,7 +452,9 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
|
|
|
429
452
|
}
|
|
430
453
|
|
|
431
454
|
const capability = opts.capability ?? "builder";
|
|
432
|
-
const
|
|
455
|
+
const rawName = opts.name?.trim() ?? "";
|
|
456
|
+
const nameWasAutoGenerated = rawName.length === 0;
|
|
457
|
+
let name = nameWasAutoGenerated ? `${capability}-${taskId}` : rawName;
|
|
433
458
|
const specPath = opts.spec ?? null;
|
|
434
459
|
const filesRaw = opts.files;
|
|
435
460
|
const parentAgent = opts.parent ?? null;
|
|
@@ -439,10 +464,6 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
|
|
|
439
464
|
const skipScout = opts.skipScout ?? false;
|
|
440
465
|
const skipTaskCheck = opts.skipTaskCheck ?? false;
|
|
441
466
|
|
|
442
|
-
if (!name || name.trim().length === 0) {
|
|
443
|
-
throw new ValidationError("--name is required for sling", { field: "name" });
|
|
444
|
-
}
|
|
445
|
-
|
|
446
467
|
if (Number.isNaN(depth) || depth < 0) {
|
|
447
468
|
throw new ValidationError("--depth must be a non-negative integer", {
|
|
448
469
|
field: "depth",
|
|
@@ -597,11 +618,16 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
|
|
|
597
618
|
);
|
|
598
619
|
}
|
|
599
620
|
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
621
|
+
if (nameWasAutoGenerated) {
|
|
622
|
+
const takenNames = activeSessions.map((s) => s.agentName);
|
|
623
|
+
name = generateAgentName(capability, taskId, takenNames);
|
|
624
|
+
} else {
|
|
625
|
+
const existing = store.getByName(name);
|
|
626
|
+
if (existing && existing.state !== "zombie" && existing.state !== "completed") {
|
|
627
|
+
throw new AgentError(`Agent name "${name}" is already in use (state: ${existing.state})`, {
|
|
628
|
+
agentName: name,
|
|
629
|
+
});
|
|
630
|
+
}
|
|
605
631
|
}
|
|
606
632
|
|
|
607
633
|
// 5d. Task-level locking: prevent concurrent agents on the same task ID.
|
|
@@ -717,6 +743,9 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
|
|
|
717
743
|
}
|
|
718
744
|
}
|
|
719
745
|
|
|
746
|
+
// Resolve runtime before overlayConfig so we can pass runtime.instructionPath
|
|
747
|
+
const runtime = getRuntime(opts.runtime, config);
|
|
748
|
+
|
|
720
749
|
const overlayConfig: OverlayConfig = {
|
|
721
750
|
agentName: name,
|
|
722
751
|
taskId: taskId,
|
|
@@ -742,11 +771,9 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
|
|
|
742
771
|
qualityGates: config.project.qualityGates,
|
|
743
772
|
trackerCli: trackerCliName(resolvedBackend),
|
|
744
773
|
trackerName: resolvedBackend,
|
|
774
|
+
instructionPath: runtime.instructionPath,
|
|
745
775
|
};
|
|
746
776
|
|
|
747
|
-
// Resolve runtime before writeOverlay so we can pass runtime.instructionPath
|
|
748
|
-
const runtime = getRuntime(opts.runtime, config);
|
|
749
|
-
|
|
750
777
|
try {
|
|
751
778
|
await writeOverlay(worktreePath, overlayConfig, config.project.root, runtime.instructionPath);
|
|
752
779
|
} catch (err) {
|
|
@@ -854,14 +881,14 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
|
|
|
854
881
|
});
|
|
855
882
|
|
|
856
883
|
// Create a timestamped log dir for this headless agent session.
|
|
857
|
-
//
|
|
858
|
-
//
|
|
859
|
-
//
|
|
884
|
+
// Always redirect stdout to a file. This prevents SIGPIPE death:
|
|
885
|
+
// ov sling exits after spawning, closing the pipe's read end.
|
|
886
|
+
// If stdout is a pipe, the agent dies on the next write (SIGPIPE).
|
|
887
|
+
// File writes have no such limit, and the agent survives the CLI exit.
|
|
860
888
|
//
|
|
861
|
-
//
|
|
862
|
-
//
|
|
863
|
-
//
|
|
864
|
-
const hasRpcConnect = typeof runtime.connect === "function";
|
|
889
|
+
// Note: RPC connection wiring is intentionally omitted here. The RPC pipe
|
|
890
|
+
// is only useful when the spawner stays alive to consume it. ov sling is
|
|
891
|
+
// a short-lived CLI — any connection created here dies with the process.
|
|
865
892
|
const logTimestamp = new Date().toISOString().replace(/[:.]/g, "-");
|
|
866
893
|
const agentLogDir = join(overstoryDir, "logs", name, logTimestamp);
|
|
867
894
|
mkdirSync(agentLogDir, { recursive: true });
|
|
@@ -869,21 +896,10 @@ export async function slingCommand(taskId: string, opts: SlingOptions): Promise<
|
|
|
869
896
|
const headlessProc = await spawnHeadlessAgent(argv, {
|
|
870
897
|
cwd: worktreePath,
|
|
871
898
|
env: { ...(process.env as Record<string, string>), ...directEnv },
|
|
872
|
-
stdoutFile:
|
|
899
|
+
stdoutFile: join(agentLogDir, "stdout.log"),
|
|
873
900
|
stderrFile: join(agentLogDir, "stderr.log"),
|
|
874
901
|
});
|
|
875
902
|
|
|
876
|
-
// Wire up RPC connection for runtimes that support it (e.g., Sapling).
|
|
877
|
-
// The connection is stored in the module-level registry so the watchdog
|
|
878
|
-
// and other subsystems can call getState() for health checks.
|
|
879
|
-
if (hasRpcConnect && headlessProc.stdout && runtime.connect) {
|
|
880
|
-
const connection = runtime.connect({
|
|
881
|
-
stdin: headlessProc.stdin,
|
|
882
|
-
stdout: headlessProc.stdout,
|
|
883
|
-
});
|
|
884
|
-
setConnection(name, connection);
|
|
885
|
-
}
|
|
886
|
-
|
|
887
903
|
// 13. Record session with empty tmuxSession (no tmux pane for headless agents).
|
|
888
904
|
const session: AgentSession = {
|
|
889
905
|
id: `session-${Date.now()}-${name}`,
|
|
@@ -260,13 +260,61 @@ describe("stopCommand validation", () => {
|
|
|
260
260
|
await expect(stopCommand("my-builder", {}, deps)).rejects.toThrow(/already completed/);
|
|
261
261
|
});
|
|
262
262
|
|
|
263
|
-
test("
|
|
263
|
+
test("succeeds when agent is zombie (cleanup, no error)", async () => {
|
|
264
264
|
const session = makeAgentSession({ state: "zombie" });
|
|
265
265
|
saveSessionsToDb([session]);
|
|
266
266
|
|
|
267
|
-
const { deps } = makeDeps();
|
|
268
|
-
await
|
|
269
|
-
|
|
267
|
+
const { deps } = makeDeps({ [session.tmuxSession]: false });
|
|
268
|
+
const output = await captureStdout(() => stopCommand("my-builder", {}, deps));
|
|
269
|
+
|
|
270
|
+
expect(output).toContain("Agent stopped");
|
|
271
|
+
expect(output).toContain("Zombie agent cleaned up");
|
|
272
|
+
|
|
273
|
+
const { store } = openSessionStore(overstoryDir);
|
|
274
|
+
const updated = store.getByName("my-builder");
|
|
275
|
+
store.close();
|
|
276
|
+
expect(updated?.state).toBe("completed");
|
|
277
|
+
});
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
describe("stopCommand zombie cleanup", () => {
|
|
281
|
+
test("zombie + --clean-worktree removes worktree", async () => {
|
|
282
|
+
const session = makeAgentSession({ state: "zombie" });
|
|
283
|
+
saveSessionsToDb([session]);
|
|
284
|
+
|
|
285
|
+
const { deps, worktreeCalls } = makeDeps({ [session.tmuxSession]: false });
|
|
286
|
+
const output = await captureStdout(() =>
|
|
287
|
+
stopCommand("my-builder", { cleanWorktree: true }, deps),
|
|
288
|
+
);
|
|
289
|
+
|
|
290
|
+
expect(output).toContain("Agent stopped");
|
|
291
|
+
expect(output).toContain("Zombie agent cleaned up");
|
|
292
|
+
expect(output).toContain(`Worktree removed: ${session.worktreePath}`);
|
|
293
|
+
expect(worktreeCalls.remove).toHaveLength(1);
|
|
294
|
+
|
|
295
|
+
const { store } = openSessionStore(overstoryDir);
|
|
296
|
+
const updated = store.getByName("my-builder");
|
|
297
|
+
store.close();
|
|
298
|
+
expect(updated?.state).toBe("completed");
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
test("zombie + --json includes wasZombie: true", async () => {
|
|
302
|
+
const session = makeAgentSession({ state: "zombie" });
|
|
303
|
+
saveSessionsToDb([session]);
|
|
304
|
+
|
|
305
|
+
const { deps } = makeDeps({ [session.tmuxSession]: false });
|
|
306
|
+
const output = await captureStdout(() => stopCommand("my-builder", { json: true }, deps));
|
|
307
|
+
|
|
308
|
+
const parsed = JSON.parse(output.trim()) as Record<string, unknown>;
|
|
309
|
+
expect(parsed.success).toBe(true);
|
|
310
|
+
expect(parsed.stopped).toBe(true);
|
|
311
|
+
expect(parsed.wasZombie).toBe(true);
|
|
312
|
+
expect(parsed.agentName).toBe("my-builder");
|
|
313
|
+
|
|
314
|
+
const { store } = openSessionStore(overstoryDir);
|
|
315
|
+
const updated = store.getByName("my-builder");
|
|
316
|
+
store.close();
|
|
317
|
+
expect(updated?.state).toBe("completed");
|
|
270
318
|
});
|
|
271
319
|
});
|
|
272
320
|
|
package/src/commands/stop.ts
CHANGED
|
@@ -86,9 +86,7 @@ export async function stopCommand(
|
|
|
86
86
|
throw new AgentError(`Agent "${agentName}" is already completed`, { agentName });
|
|
87
87
|
}
|
|
88
88
|
|
|
89
|
-
|
|
90
|
-
throw new AgentError(`Agent "${agentName}" is already zombie (dead)`, { agentName });
|
|
91
|
-
}
|
|
89
|
+
const isZombie = session.state === "zombie";
|
|
92
90
|
|
|
93
91
|
const isHeadless = session.tmuxSession === "" && session.pid !== null;
|
|
94
92
|
|
|
@@ -140,6 +138,7 @@ export async function stopCommand(
|
|
|
140
138
|
pidKilled,
|
|
141
139
|
worktreeRemoved,
|
|
142
140
|
force,
|
|
141
|
+
wasZombie: isZombie,
|
|
143
142
|
});
|
|
144
143
|
} else {
|
|
145
144
|
printSuccess("Agent stopped", agentName);
|
|
@@ -156,6 +155,9 @@ export async function stopCommand(
|
|
|
156
155
|
process.stdout.write(` Tmux session was already dead\n`);
|
|
157
156
|
}
|
|
158
157
|
}
|
|
158
|
+
if (isZombie) {
|
|
159
|
+
process.stdout.write(` Zombie agent cleaned up (state → completed)\n`);
|
|
160
|
+
}
|
|
159
161
|
if (cleanWorktree && worktreeRemoved) {
|
|
160
162
|
process.stdout.write(` Worktree removed: ${session.worktreePath}\n`);
|
|
161
163
|
}
|
package/src/config.test.ts
CHANGED
|
@@ -4,6 +4,7 @@ import { tmpdir } from "node:os";
|
|
|
4
4
|
import { join } from "node:path";
|
|
5
5
|
import {
|
|
6
6
|
clearProjectRootOverride,
|
|
7
|
+
clearWarningsSeen,
|
|
7
8
|
DEFAULT_CONFIG,
|
|
8
9
|
DEFAULT_QUALITY_GATES,
|
|
9
10
|
loadConfig,
|
|
@@ -432,9 +433,11 @@ describe("validateConfig", () => {
|
|
|
432
433
|
tempDir = await mkdtemp(join(tmpdir(), "overstory-test-"));
|
|
433
434
|
const { mkdir } = await import("node:fs/promises");
|
|
434
435
|
await mkdir(join(tempDir, ".overstory"), { recursive: true });
|
|
436
|
+
clearWarningsSeen();
|
|
435
437
|
});
|
|
436
438
|
|
|
437
439
|
afterEach(async () => {
|
|
440
|
+
clearWarningsSeen();
|
|
438
441
|
await cleanupTempDir(tempDir);
|
|
439
442
|
});
|
|
440
443
|
|
|
@@ -691,6 +694,39 @@ models:
|
|
|
691
694
|
expect((err as ValidationError).message).toContain("provider-prefixed ref");
|
|
692
695
|
});
|
|
693
696
|
|
|
697
|
+
test("accepts bare model name when runtime.default is codex", async () => {
|
|
698
|
+
await writeConfig(`
|
|
699
|
+
runtime:
|
|
700
|
+
default: codex
|
|
701
|
+
models:
|
|
702
|
+
coordinator: gpt-5.3-codex
|
|
703
|
+
`);
|
|
704
|
+
const config = await loadConfig(tempDir);
|
|
705
|
+
expect(config.models.coordinator).toBe("gpt-5.3-codex");
|
|
706
|
+
});
|
|
707
|
+
|
|
708
|
+
test("warns on bare non-Anthropic model in tool-heavy role when runtime.default is codex", async () => {
|
|
709
|
+
await writeConfig(`
|
|
710
|
+
runtime:
|
|
711
|
+
default: codex
|
|
712
|
+
models:
|
|
713
|
+
builder: gpt-5.3-codex
|
|
714
|
+
`);
|
|
715
|
+
const origWrite = process.stderr.write;
|
|
716
|
+
let capturedStderr = "";
|
|
717
|
+
process.stderr.write = ((s: string | Uint8Array) => {
|
|
718
|
+
if (typeof s === "string") capturedStderr += s;
|
|
719
|
+
return true;
|
|
720
|
+
}) as typeof process.stderr.write;
|
|
721
|
+
try {
|
|
722
|
+
await loadConfig(tempDir);
|
|
723
|
+
} finally {
|
|
724
|
+
process.stderr.write = origWrite;
|
|
725
|
+
}
|
|
726
|
+
expect(capturedStderr).toContain("WARNING: models.builder uses non-Anthropic model");
|
|
727
|
+
expect(capturedStderr).toContain("gpt-5.3-codex");
|
|
728
|
+
});
|
|
729
|
+
|
|
694
730
|
test("warns on non-Anthropic model in tool-heavy role", async () => {
|
|
695
731
|
await writeConfig(`
|
|
696
732
|
providers:
|
|
@@ -716,6 +752,33 @@ models:
|
|
|
716
752
|
expect(capturedStderr).toContain("openrouter/openai/gpt-4");
|
|
717
753
|
});
|
|
718
754
|
|
|
755
|
+
test("warns only once per role/model combination across multiple loadConfig calls", async () => {
|
|
756
|
+
await writeConfig(`
|
|
757
|
+
providers:
|
|
758
|
+
openrouter:
|
|
759
|
+
type: gateway
|
|
760
|
+
baseUrl: https://openrouter.ai/api/v1
|
|
761
|
+
authTokenEnv: OPENROUTER_API_KEY
|
|
762
|
+
models:
|
|
763
|
+
builder: openrouter/openai/gpt-4
|
|
764
|
+
`);
|
|
765
|
+
const origWrite = process.stderr.write;
|
|
766
|
+
const stderrLines: string[] = [];
|
|
767
|
+
process.stderr.write = ((s: string | Uint8Array) => {
|
|
768
|
+
if (typeof s === "string") stderrLines.push(s);
|
|
769
|
+
return true;
|
|
770
|
+
}) as typeof process.stderr.write;
|
|
771
|
+
try {
|
|
772
|
+
await loadConfig(tempDir);
|
|
773
|
+
await loadConfig(tempDir);
|
|
774
|
+
await loadConfig(tempDir);
|
|
775
|
+
} finally {
|
|
776
|
+
process.stderr.write = origWrite;
|
|
777
|
+
}
|
|
778
|
+
const warnings = stderrLines.filter((l) => l.includes("WARNING: models.builder"));
|
|
779
|
+
expect(warnings.length).toBe(1);
|
|
780
|
+
});
|
|
781
|
+
|
|
719
782
|
test("does not warn for non-Anthropic model in non-tool-heavy role", async () => {
|
|
720
783
|
await writeConfig(`
|
|
721
784
|
providers:
|
package/src/config.ts
CHANGED
|
@@ -5,6 +5,14 @@ import type { OverstoryConfig, QualityGate, TaskTrackerBackend } from "./types.t
|
|
|
5
5
|
// Module-level project root override (set by --project global flag)
|
|
6
6
|
let _projectRootOverride: string | undefined;
|
|
7
7
|
|
|
8
|
+
// Tracks warnings already emitted this process to avoid repeating on every loadConfig call.
|
|
9
|
+
const _warnedOnce = new Set<string>();
|
|
10
|
+
|
|
11
|
+
/** Clear the dedup warning set. Intended for tests only. */
|
|
12
|
+
export function clearWarningsSeen(): void {
|
|
13
|
+
_warnedOnce.clear();
|
|
14
|
+
}
|
|
15
|
+
|
|
8
16
|
/** Override project root for all config resolution (used by --project global flag). */
|
|
9
17
|
export function setProjectRootOverride(path: string): void {
|
|
10
18
|
_projectRootOverride = path;
|
|
@@ -698,9 +706,13 @@ function validateConfig(config: OverstoryConfig): void {
|
|
|
698
706
|
}
|
|
699
707
|
}
|
|
700
708
|
|
|
701
|
-
// models: validate each value
|
|
709
|
+
// models: validate each value.
|
|
710
|
+
// - Standard runtimes: aliases (sonnet/opus/haiku) or provider-prefixed refs.
|
|
711
|
+
// - Codex runtime: also allow bare model refs (e.g. gpt-5.3-codex).
|
|
702
712
|
const validAliases = ["sonnet", "opus", "haiku"];
|
|
703
713
|
const toolHeavyRoles = ["builder", "scout"];
|
|
714
|
+
const defaultRuntime = config.runtime?.default ?? "claude";
|
|
715
|
+
const allowBareModelRefs = defaultRuntime === "codex";
|
|
704
716
|
for (const [role, model] of Object.entries(config.models)) {
|
|
705
717
|
if (model === undefined) continue;
|
|
706
718
|
if (model.includes("/")) {
|
|
@@ -716,13 +728,25 @@ function validateConfig(config: OverstoryConfig): void {
|
|
|
716
728
|
);
|
|
717
729
|
}
|
|
718
730
|
if (toolHeavyRoles.includes(role)) {
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
731
|
+
const warnKey = `non-anthropic:${role}:${model}`;
|
|
732
|
+
if (!_warnedOnce.has(warnKey)) {
|
|
733
|
+
_warnedOnce.add(warnKey);
|
|
734
|
+
process.stderr.write(
|
|
735
|
+
`[overstory] WARNING: models.${role} uses non-Anthropic model '${model}'. Tool-use compatibility cannot be verified at config time.\n`,
|
|
736
|
+
);
|
|
737
|
+
}
|
|
722
738
|
}
|
|
723
739
|
} else {
|
|
724
|
-
// Must be a valid alias
|
|
740
|
+
// Must be a valid alias unless codex runtime is active.
|
|
725
741
|
if (!validAliases.includes(model)) {
|
|
742
|
+
if (allowBareModelRefs) {
|
|
743
|
+
if (toolHeavyRoles.includes(role)) {
|
|
744
|
+
process.stderr.write(
|
|
745
|
+
`[overstory] WARNING: models.${role} uses non-Anthropic model '${model}'. Tool-use compatibility cannot be verified at config time.\n`,
|
|
746
|
+
);
|
|
747
|
+
}
|
|
748
|
+
continue;
|
|
749
|
+
}
|
|
726
750
|
throw new ValidationError(
|
|
727
751
|
`models.${role} must be a valid alias (${validAliases.join(", ")}) or a provider-prefixed ref (e.g., openrouter/openai/gpt-4)`,
|
|
728
752
|
{
|
package/src/index.ts
CHANGED
|
@@ -49,7 +49,7 @@ import { ConfigError, OverstoryError, WorktreeError } from "./errors.ts";
|
|
|
49
49
|
import { jsonError } from "./json.ts";
|
|
50
50
|
import { brand, chalk, muted, setQuiet } from "./logging/color.ts";
|
|
51
51
|
|
|
52
|
-
export const VERSION = "0.8.
|
|
52
|
+
export const VERSION = "0.8.3";
|
|
53
53
|
|
|
54
54
|
const rawArgs = process.argv.slice(2);
|
|
55
55
|
|
|
@@ -271,7 +271,7 @@ program
|
|
|
271
271
|
"Agent type: builder | scout | reviewer | lead | merger",
|
|
272
272
|
"builder",
|
|
273
273
|
)
|
|
274
|
-
.option("--name <name>", "Unique agent name")
|
|
274
|
+
.option("--name <name>", "Unique agent name (auto-generated if omitted)")
|
|
275
275
|
.option("--spec <path>", "Path to task spec file")
|
|
276
276
|
.option("--files <list>", "Exclusive file scope (comma-separated)")
|
|
277
277
|
.option("--parent <agent>", "Parent agent for hierarchy tracking")
|
|
@@ -20,7 +20,7 @@ describe("CodexRuntime", () => {
|
|
|
20
20
|
});
|
|
21
21
|
|
|
22
22
|
describe("buildSpawnCommand", () => {
|
|
23
|
-
test("basic command uses codex
|
|
23
|
+
test("basic command uses interactive codex with --full-auto", () => {
|
|
24
24
|
const opts: SpawnOpts = {
|
|
25
25
|
model: "gpt-5-codex",
|
|
26
26
|
permissionMode: "bypass",
|
|
@@ -28,11 +28,25 @@ describe("CodexRuntime", () => {
|
|
|
28
28
|
env: {},
|
|
29
29
|
};
|
|
30
30
|
const cmd = runtime.buildSpawnCommand(opts);
|
|
31
|
-
expect(cmd).toContain("codex
|
|
31
|
+
expect(cmd).toContain("codex --full-auto");
|
|
32
32
|
expect(cmd).toContain("--model gpt-5-codex");
|
|
33
33
|
expect(cmd).toContain("Read AGENTS.md");
|
|
34
34
|
});
|
|
35
35
|
|
|
36
|
+
test("manifest aliases omit --model so codex uses default configured model", () => {
|
|
37
|
+
for (const alias of ["sonnet", "opus", "haiku"]) {
|
|
38
|
+
const opts: SpawnOpts = {
|
|
39
|
+
model: alias,
|
|
40
|
+
permissionMode: "bypass",
|
|
41
|
+
cwd: "/tmp/worktree",
|
|
42
|
+
env: {},
|
|
43
|
+
};
|
|
44
|
+
const cmd = runtime.buildSpawnCommand(opts);
|
|
45
|
+
expect(cmd).toContain("codex --full-auto");
|
|
46
|
+
expect(cmd).not.toContain(" --model ");
|
|
47
|
+
}
|
|
48
|
+
});
|
|
49
|
+
|
|
36
50
|
test("permissionMode is NOT included in command (Codex uses OS sandbox)", () => {
|
|
37
51
|
const opts: SpawnOpts = {
|
|
38
52
|
model: "gpt-5-codex",
|
|
@@ -146,7 +160,7 @@ describe("CodexRuntime", () => {
|
|
|
146
160
|
};
|
|
147
161
|
const cmd = runtime.buildSpawnCommand(opts);
|
|
148
162
|
expect(cmd).toBe(
|
|
149
|
-
"codex
|
|
163
|
+
"codex --full-auto --model gpt-5-codex 'Read AGENTS.md for your task assignment and begin immediately.'",
|
|
150
164
|
);
|
|
151
165
|
});
|
|
152
166
|
|
|
@@ -254,7 +268,7 @@ describe("CodexRuntime", () => {
|
|
|
254
268
|
});
|
|
255
269
|
|
|
256
270
|
describe("detectReady", () => {
|
|
257
|
-
test("returns ready for empty pane
|
|
271
|
+
test("returns ready for empty pane", () => {
|
|
258
272
|
const state = runtime.detectReady("");
|
|
259
273
|
expect(state).toEqual({ phase: "ready" });
|
|
260
274
|
});
|
|
@@ -279,7 +293,7 @@ describe("CodexRuntime", () => {
|
|
|
279
293
|
});
|
|
280
294
|
|
|
281
295
|
describe("requiresBeaconVerification", () => {
|
|
282
|
-
test("returns false (
|
|
296
|
+
test("returns false (no beacon verification needed)", () => {
|
|
283
297
|
expect(runtime.requiresBeaconVerification()).toBe(false);
|
|
284
298
|
});
|
|
285
299
|
});
|
|
@@ -664,7 +678,7 @@ describe("CodexRuntime integration: spawn command structure", () => {
|
|
|
664
678
|
env: { OVERSTORY_AGENT_NAME: "builder-1" },
|
|
665
679
|
});
|
|
666
680
|
expect(cmd).toBe(
|
|
667
|
-
"codex
|
|
681
|
+
"codex --full-auto --model gpt-5-codex 'Read AGENTS.md for your task assignment and begin immediately.'",
|
|
668
682
|
);
|
|
669
683
|
});
|
|
670
684
|
|
|
@@ -677,7 +691,7 @@ describe("CodexRuntime integration: spawn command structure", () => {
|
|
|
677
691
|
appendSystemPrompt: baseDefinition,
|
|
678
692
|
env: { OVERSTORY_AGENT_NAME: "coordinator" },
|
|
679
693
|
});
|
|
680
|
-
expect(cmd).toContain("codex
|
|
694
|
+
expect(cmd).toContain("codex --full-auto --model gpt-5-codex");
|
|
681
695
|
expect(cmd).toContain("# Coordinator");
|
|
682
696
|
expect(cmd).toContain("You are the coordinator agent.");
|
|
683
697
|
expect(cmd).toContain("Read AGENTS.md");
|
|
@@ -691,7 +705,7 @@ describe("CodexRuntime integration: spawn command structure", () => {
|
|
|
691
705
|
appendSystemPromptFile: "/project/.overstory/agent-defs/coordinator.md",
|
|
692
706
|
env: { OVERSTORY_AGENT_NAME: "coordinator" },
|
|
693
707
|
});
|
|
694
|
-
expect(cmd).toContain("codex
|
|
708
|
+
expect(cmd).toContain("codex --full-auto --model gpt-5-codex");
|
|
695
709
|
expect(cmd).toContain("$(cat '/project/.overstory/agent-defs/coordinator.md')");
|
|
696
710
|
expect(cmd).toContain("Read AGENTS.md");
|
|
697
711
|
});
|
package/src/runtimes/codex.ts
CHANGED
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
// Implements the AgentRuntime contract for the OpenAI `codex` CLI.
|
|
3
3
|
//
|
|
4
4
|
// Key differences from Claude/Pi adapters:
|
|
5
|
-
// -
|
|
5
|
+
// - Interactive: `codex` (without `exec`) stays alive in tmux for orchestration
|
|
6
6
|
// - Instruction file: AGENTS.md (not .claude/CLAUDE.md)
|
|
7
7
|
// - No hooks: Codex uses OS-level sandbox (Seatbelt/Landlock)
|
|
8
|
-
// -
|
|
8
|
+
// - One-shot calls still use `codex exec` (buildPrintCommand)
|
|
9
9
|
|
|
10
10
|
import { mkdir } from "node:fs/promises";
|
|
11
11
|
import { dirname, join } from "node:path";
|
|
@@ -22,9 +22,9 @@ import type {
|
|
|
22
22
|
/**
|
|
23
23
|
* Codex runtime adapter.
|
|
24
24
|
*
|
|
25
|
-
* Implements AgentRuntime for the OpenAI `codex` CLI. Codex
|
|
26
|
-
*
|
|
27
|
-
*
|
|
25
|
+
* Implements AgentRuntime for the OpenAI `codex` CLI. Tmux-spawned Codex
|
|
26
|
+
* agents run in interactive mode (`codex`) so sessions stay alive and can be
|
|
27
|
+
* nudged via tmux.
|
|
28
28
|
*
|
|
29
29
|
* Security is enforced via Codex's OS-level sandbox (Seatbelt on macOS,
|
|
30
30
|
* Landlock on Linux) rather than hook-based guards. The `--full-auto` flag
|
|
@@ -40,11 +40,17 @@ export class CodexRuntime implements AgentRuntime {
|
|
|
40
40
|
/** Relative path to the instruction file within a worktree. */
|
|
41
41
|
readonly instructionPath = "AGENTS.md";
|
|
42
42
|
|
|
43
|
+
/**
|
|
44
|
+
* Anthropic aliases used by overstory manifests that Codex CLI does not
|
|
45
|
+
* accept as --model values.
|
|
46
|
+
*/
|
|
47
|
+
private static readonly MANIFEST_ALIASES = new Set(["sonnet", "opus", "haiku"]);
|
|
48
|
+
|
|
43
49
|
/**
|
|
44
50
|
* Build the shell command string to spawn a Codex agent in a tmux pane.
|
|
45
51
|
*
|
|
46
|
-
* Uses `codex
|
|
47
|
-
*
|
|
52
|
+
* Uses interactive `codex` with `--full-auto` for workspace-write sandbox +
|
|
53
|
+
* automatic approvals.
|
|
48
54
|
*
|
|
49
55
|
* The prompt directs the agent to read AGENTS.md for its full instructions.
|
|
50
56
|
* If `appendSystemPrompt` or `appendSystemPromptFile` is provided, the
|
|
@@ -56,7 +62,12 @@ export class CodexRuntime implements AgentRuntime {
|
|
|
56
62
|
* @returns Shell command string suitable for tmux new-session -c
|
|
57
63
|
*/
|
|
58
64
|
buildSpawnCommand(opts: SpawnOpts): string {
|
|
59
|
-
|
|
65
|
+
// When model comes from default manifest aliases (sonnet/opus/haiku),
|
|
66
|
+
// omit --model so Codex uses the user's configured default model.
|
|
67
|
+
let cmd = "codex --full-auto";
|
|
68
|
+
if (!CodexRuntime.MANIFEST_ALIASES.has(opts.model)) {
|
|
69
|
+
cmd += ` --model ${opts.model}`;
|
|
70
|
+
}
|
|
60
71
|
|
|
61
72
|
if (opts.appendSystemPromptFile) {
|
|
62
73
|
// Read role definition from file at shell expansion time — avoids tmux
|
|
@@ -128,11 +139,7 @@ export class CodexRuntime implements AgentRuntime {
|
|
|
128
139
|
}
|
|
129
140
|
|
|
130
141
|
/**
|
|
131
|
-
* Codex
|
|
132
|
-
*
|
|
133
|
-
* Unlike Claude Code and Pi which maintain persistent TUI sessions,
|
|
134
|
-
* `codex exec` starts processing immediately and exits on completion.
|
|
135
|
-
* No TUI readiness detection is needed.
|
|
142
|
+
* Codex interactive startup is treated as ready once a pane exists.
|
|
136
143
|
*
|
|
137
144
|
* @param _paneContent - Captured tmux pane content (unused)
|
|
138
145
|
* @returns Always `{ phase: "ready" }`
|
|
@@ -144,9 +151,7 @@ export class CodexRuntime implements AgentRuntime {
|
|
|
144
151
|
/**
|
|
145
152
|
* Codex does not require beacon verification/resend.
|
|
146
153
|
*
|
|
147
|
-
*
|
|
148
|
-
* swallows the initial Enter during late initialization. Codex exec is
|
|
149
|
-
* headless — it processes the prompt immediately with no TUI startup delay.
|
|
154
|
+
* Codex accepts startup input reliably once spawned.
|
|
150
155
|
*/
|
|
151
156
|
requiresBeaconVerification(): boolean {
|
|
152
157
|
return false;
|
package/src/types.ts
CHANGED
|
@@ -343,6 +343,8 @@ export interface OverlayConfig {
|
|
|
343
343
|
trackerName?: string; // "seeds" or "beads"
|
|
344
344
|
/** Quality gate commands for the agent overlay. Falls back to defaults if undefined. */
|
|
345
345
|
qualityGates?: QualityGate[];
|
|
346
|
+
/** Relative path to the instruction file within the worktree (runtime-specific). Defaults to .claude/CLAUDE.md. */
|
|
347
|
+
instructionPath?: string;
|
|
346
348
|
}
|
|
347
349
|
|
|
348
350
|
// === Merge Queue ===
|