@os-eco/overstory-cli 0.7.6 → 0.7.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,228 @@
1
+ // Codex runtime adapter for overstory's AgentRuntime interface.
2
+ // Implements the AgentRuntime contract for the OpenAI `codex` CLI.
3
+ //
4
+ // Key differences from Claude/Pi adapters:
5
+ // - Headless: `codex exec` exits on completion (no persistent TUI)
6
+ // - Instruction file: AGENTS.md (not .claude/CLAUDE.md)
7
+ // - No hooks: Codex uses OS-level sandbox (Seatbelt/Landlock)
8
+ // - Events: NDJSON stream to stdout (parsed for token usage)
9
+
10
+ import { mkdir } from "node:fs/promises";
11
+ import { dirname, join } from "node:path";
12
+ import type { ResolvedModel } from "../types.ts";
13
+ import type {
14
+ AgentRuntime,
15
+ HooksDef,
16
+ OverlayContent,
17
+ ReadyState,
18
+ SpawnOpts,
19
+ TranscriptSummary,
20
+ } from "./types.ts";
21
+
22
+ /**
23
+ * Codex runtime adapter.
24
+ *
25
+ * Implements AgentRuntime for the OpenAI `codex` CLI. Codex agents run in
26
+ * headless mode (`codex exec`) — they process a task and exit, rather than
27
+ * maintaining a persistent TUI like Claude Code or Pi.
28
+ *
29
+ * Security is enforced via Codex's OS-level sandbox (Seatbelt on macOS,
30
+ * Landlock on Linux) rather than hook-based guards. The `--full-auto` flag
31
+ * enables `workspace-write` sandbox + automatic approvals.
32
+ *
33
+ * Instructions are delivered via `AGENTS.md` (Codex's native convention),
34
+ * not `.claude/CLAUDE.md`.
35
+ */
36
+ export class CodexRuntime implements AgentRuntime {
37
+ /** Unique identifier for this runtime. */
38
+ readonly id = "codex";
39
+
40
+ /** Relative path to the instruction file within a worktree. */
41
+ readonly instructionPath = "AGENTS.md";
42
+
43
+ /**
44
+ * Build the shell command string to spawn a Codex agent in a tmux pane.
45
+ *
46
+ * Uses `codex exec` (headless mode) with `--full-auto` for workspace-write
47
+ * sandbox + automatic approvals, and `--json` for NDJSON event output.
48
+ *
49
+ * The prompt directs the agent to read AGENTS.md for its full instructions.
50
+ * If `appendSystemPrompt` or `appendSystemPromptFile` is provided, the
51
+ * content is prepended to the prompt (Codex has no --append-system-prompt
52
+ * flag — all context goes through the exec prompt or AGENTS.md).
53
+ *
54
+ * @param opts - Spawn options (model, appendSystemPrompt; permissionMode is accepted but
55
+ * not mapped — Codex enforces security via OS sandbox, not permission flags)
56
+ * @returns Shell command string suitable for tmux new-session -c
57
+ */
58
+ buildSpawnCommand(opts: SpawnOpts): string {
59
+ let cmd = `codex exec --full-auto --json --model ${opts.model}`;
60
+
61
+ if (opts.appendSystemPromptFile) {
62
+ // Read role definition from file at shell expansion time — avoids tmux
63
+ // IPC message size limits. Append the "read AGENTS.md" instruction.
64
+ const escaped = opts.appendSystemPromptFile.replace(/'/g, "'\\''");
65
+ cmd += ` "$(cat '${escaped}')"' Read AGENTS.md for your task assignment and begin immediately.'`;
66
+ } else if (opts.appendSystemPrompt) {
67
+ // Inline role definition + instruction to read AGENTS.md.
68
+ const prompt = `${opts.appendSystemPrompt}\n\nRead AGENTS.md for your task assignment and begin immediately.`;
69
+ const escaped = prompt.replace(/'/g, "'\\''");
70
+ cmd += ` '${escaped}'`;
71
+ } else {
72
+ cmd += ` 'Read AGENTS.md for your task assignment and begin immediately.'`;
73
+ }
74
+
75
+ return cmd;
76
+ }
77
+
78
+ /**
79
+ * Build the argv array for a headless one-shot Codex invocation.
80
+ *
81
+ * Returns an argv array suitable for `Bun.spawn()`. Uses `codex exec`
82
+ * with `--full-auto` and `--ephemeral` (no session persistence).
83
+ * Without `--json`, stdout contains the plain text final message.
84
+ *
85
+ * Used by merge/resolver.ts (AI-assisted conflict resolution) and
86
+ * watchdog/triage.ts (AI-assisted failure classification).
87
+ *
88
+ * @param prompt - The prompt to pass as the exec argument
89
+ * @param model - Optional model override
90
+ * @returns Argv array for Bun.spawn
91
+ */
92
+ buildPrintCommand(prompt: string, model?: string): string[] {
93
+ const cmd = ["codex", "exec", "--full-auto", "--ephemeral"];
94
+ if (model !== undefined) {
95
+ cmd.push("--model", model);
96
+ }
97
+ cmd.push(prompt);
98
+ return cmd;
99
+ }
100
+
101
+ /**
102
+ * Deploy per-agent instructions to a worktree.
103
+ *
104
+ * Writes the overlay to `AGENTS.md` in the worktree root (Codex's native
105
+ * instruction file convention). Unlike Claude/Pi adapters, no hooks or
106
+ * guard extensions are deployed — Codex enforces security boundaries via
107
+ * its OS-level sandbox (Seatbelt on macOS, Landlock on Linux).
108
+ *
109
+ * When overlay is undefined (hooks-only deployment for coordinator/supervisor/monitor),
110
+ * this is a no-op since Codex has no hook system to deploy.
111
+ *
112
+ * @param worktreePath - Absolute path to the agent's git worktree
113
+ * @param overlay - Overlay content to write as AGENTS.md, or undefined for no-op
114
+ * @param _hooks - Hook definition (unused — Codex uses OS sandbox, not hooks)
115
+ */
116
+ async deployConfig(
117
+ worktreePath: string,
118
+ overlay: OverlayContent | undefined,
119
+ _hooks: HooksDef,
120
+ ): Promise<void> {
121
+ if (!overlay) return;
122
+
123
+ const agentsPath = join(worktreePath, this.instructionPath);
124
+ // Ensure parent directory exists (AGENTS.md is in the worktree root,
125
+ // but the worktree dir itself might not exist yet).
126
+ await mkdir(dirname(agentsPath), { recursive: true });
127
+ await Bun.write(agentsPath, overlay.content);
128
+ }
129
+
130
+ /**
131
+ * Codex exec is headless — always ready.
132
+ *
133
+ * Unlike Claude Code and Pi which maintain persistent TUI sessions,
134
+ * `codex exec` starts processing immediately and exits on completion.
135
+ * No TUI readiness detection is needed.
136
+ *
137
+ * @param _paneContent - Captured tmux pane content (unused)
138
+ * @returns Always `{ phase: "ready" }`
139
+ */
140
+ detectReady(_paneContent: string): ReadyState {
141
+ return { phase: "ready" };
142
+ }
143
+
144
+ /**
145
+ * Codex does not require beacon verification/resend.
146
+ *
147
+ * The beacon verification loop exists because Claude Code's TUI sometimes
148
+ * swallows the initial Enter during late initialization. Codex exec is
149
+ * headless — it processes the prompt immediately with no TUI startup delay.
150
+ */
151
+ requiresBeaconVerification(): boolean {
152
+ return false;
153
+ }
154
+
155
+ /**
156
+ * Parse a Codex NDJSON transcript file into normalized token usage.
157
+ *
158
+ * Codex NDJSON format (from `--json` flag) differs from Claude/Pi:
159
+ * - Token counts are in `turn.completed` events with
160
+ * `usage.input_tokens` and `usage.output_tokens`
161
+ * - Model identity may appear in `thread.started` events or item metadata
162
+ *
163
+ * Returns null if the file does not exist or cannot be parsed.
164
+ *
165
+ * @param path - Absolute path to the Codex NDJSON transcript file
166
+ * @returns Aggregated token usage, or null if unavailable
167
+ */
168
+ async parseTranscript(path: string): Promise<TranscriptSummary | null> {
169
+ const file = Bun.file(path);
170
+ if (!(await file.exists())) {
171
+ return null;
172
+ }
173
+
174
+ try {
175
+ const text = await file.text();
176
+ const lines = text.split("\n").filter((l) => l.trim().length > 0);
177
+
178
+ let inputTokens = 0;
179
+ let outputTokens = 0;
180
+ let model = "";
181
+
182
+ for (const line of lines) {
183
+ let event: Record<string, unknown>;
184
+ try {
185
+ event = JSON.parse(line) as Record<string, unknown>;
186
+ } catch {
187
+ // Skip malformed lines — partial writes during capture.
188
+ continue;
189
+ }
190
+
191
+ if (event.type === "turn.completed") {
192
+ const usage = event.usage as Record<string, number | undefined> | undefined;
193
+ if (usage) {
194
+ if (typeof usage.input_tokens === "number") {
195
+ inputTokens += usage.input_tokens;
196
+ }
197
+ if (typeof usage.output_tokens === "number") {
198
+ outputTokens += usage.output_tokens;
199
+ }
200
+ }
201
+ }
202
+
203
+ // Capture model from any event that carries it.
204
+ if (typeof event.model === "string") {
205
+ model = event.model;
206
+ }
207
+ }
208
+
209
+ return { inputTokens, outputTokens, model };
210
+ } catch {
211
+ return null;
212
+ }
213
+ }
214
+
215
+ /**
216
+ * Build runtime-specific environment variables for model/provider routing.
217
+ *
218
+ * Returns the provider environment variables from the resolved model.
219
+ * For OpenAI native: may include OPENAI_API_KEY, OPENAI_BASE_URL.
220
+ * For gateway providers: may include gateway-specific auth and routing vars.
221
+ *
222
+ * @param model - Resolved model with optional provider env vars
223
+ * @returns Environment variable map (may be empty)
224
+ */
225
+ buildEnv(model: ResolvedModel): Record<string, string> {
226
+ return model.env ?? {};
227
+ }
228
+ }
@@ -642,7 +642,7 @@ describe("PiRuntime integration: registry resolves 'pi'", () => {
642
642
 
643
643
  test("getRuntime rejects truly unknown runtimes", async () => {
644
644
  const { getRuntime } = await import("./registry.ts");
645
- expect(() => getRuntime("codex")).toThrow('Unknown runtime: "codex"');
646
645
  expect(() => getRuntime("opencode")).toThrow('Unknown runtime: "opencode"');
646
+ expect(() => getRuntime("aider")).toThrow('Unknown runtime: "aider"');
647
647
  });
648
648
  });
@@ -1,6 +1,7 @@
1
1
  import { describe, expect, it } from "bun:test";
2
2
  import type { OverstoryConfig } from "../types.ts";
3
3
  import { ClaudeRuntime } from "./claude.ts";
4
+ import { CodexRuntime } from "./codex.ts";
4
5
  import { CopilotRuntime } from "./copilot.ts";
5
6
  import { PiRuntime } from "./pi.ts";
6
7
  import { getRuntime } from "./registry.ts";
@@ -20,7 +21,7 @@ describe("getRuntime", () => {
20
21
 
21
22
  it("throws with a helpful message for an unknown runtime", () => {
22
23
  expect(() => getRuntime("unknown-runtime")).toThrow(
23
- 'Unknown runtime: "unknown-runtime". Available: claude',
24
+ 'Unknown runtime: "unknown-runtime". Available: claude, codex, pi, copilot',
24
25
  );
25
26
  });
26
27
 
@@ -39,12 +40,11 @@ describe("getRuntime", () => {
39
40
  expect(runtime).toBeInstanceOf(ClaudeRuntime);
40
41
  });
41
42
 
42
- it("throws for unknown runtime even when config default is set", () => {
43
+ it("resolves codex runtime from config default", () => {
43
44
  const config = { runtime: { default: "codex" } } as OverstoryConfig;
44
- // No name arg — falls back to config default "codex" which is unknown.
45
- expect(() => getRuntime(undefined, config)).toThrow(
46
- 'Unknown runtime: "codex". Available: claude',
47
- );
45
+ const runtime = getRuntime(undefined, config);
46
+ expect(runtime).toBeInstanceOf(CodexRuntime);
47
+ expect(runtime.id).toBe("codex");
48
48
  });
49
49
 
50
50
  it("returns a new instance on each call (factory pattern)", () => {
@@ -3,6 +3,7 @@
3
3
 
4
4
  import type { OverstoryConfig } from "../types.ts";
5
5
  import { ClaudeRuntime } from "./claude.ts";
6
+ import { CodexRuntime } from "./codex.ts";
6
7
  import { CopilotRuntime } from "./copilot.ts";
7
8
  import { PiRuntime } from "./pi.ts";
8
9
  import type { AgentRuntime } from "./types.ts";
@@ -10,6 +11,7 @@ import type { AgentRuntime } from "./types.ts";
10
11
  /** Registry of config-independent runtime adapters (name → factory). */
11
12
  const runtimes = new Map<string, () => AgentRuntime>([
12
13
  ["claude", () => new ClaudeRuntime()],
14
+ ["codex", () => new CodexRuntime()],
13
15
  ["pi", () => new PiRuntime()],
14
16
  ["copilot", () => new CopilotRuntime()],
15
17
  ]);