@agentplate/cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/CHANGELOG.md +54 -0
  2. package/LICENSE +21 -0
  3. package/README.md +206 -0
  4. package/agents/architect.md +108 -0
  5. package/agents/builder.md +97 -0
  6. package/agents/coordinator.md +113 -0
  7. package/agents/deployer.md +117 -0
  8. package/agents/devops.md +114 -0
  9. package/agents/lead.md +107 -0
  10. package/agents/merger.md +103 -0
  11. package/agents/reviewer.md +90 -0
  12. package/agents/scout.md +95 -0
  13. package/agents/verifier.md +106 -0
  14. package/package.json +64 -0
  15. package/src/agents/guard-rules.ts +55 -0
  16. package/src/agents/identity.test.ts +161 -0
  17. package/src/agents/identity.ts +229 -0
  18. package/src/agents/manifest.test.ts +260 -0
  19. package/src/agents/manifest.ts +286 -0
  20. package/src/agents/overlay.test.ts +190 -0
  21. package/src/agents/overlay.ts +212 -0
  22. package/src/agents/system-prompt.test.ts +53 -0
  23. package/src/agents/system-prompt.ts +95 -0
  24. package/src/agents/turn-runner.ts +79 -0
  25. package/src/commands/coordinator.test.ts +75 -0
  26. package/src/commands/coordinator.ts +259 -0
  27. package/src/commands/deploy.test.ts +504 -0
  28. package/src/commands/deploy.ts +874 -0
  29. package/src/commands/doctor.test.ts +106 -0
  30. package/src/commands/doctor.ts +208 -0
  31. package/src/commands/init.ts +71 -0
  32. package/src/commands/log.ts +51 -0
  33. package/src/commands/mail.ts +197 -0
  34. package/src/commands/merge.ts +127 -0
  35. package/src/commands/model.ts +58 -0
  36. package/src/commands/prime.ts +61 -0
  37. package/src/commands/reap.ts +87 -0
  38. package/src/commands/serve.ts +61 -0
  39. package/src/commands/setup.ts +48 -0
  40. package/src/commands/ship.test.ts +106 -0
  41. package/src/commands/ship.ts +202 -0
  42. package/src/commands/skill.test.ts +458 -0
  43. package/src/commands/skill.ts +730 -0
  44. package/src/commands/sling.ts +365 -0
  45. package/src/commands/status.ts +60 -0
  46. package/src/commands/stop.ts +56 -0
  47. package/src/commands/tui.ts +199 -0
  48. package/src/commands/worktree.ts +77 -0
  49. package/src/config.test.ts +92 -0
  50. package/src/config.ts +202 -0
  51. package/src/db/sqlite.test.ts +77 -0
  52. package/src/db/sqlite.ts +102 -0
  53. package/src/deploy/audit.test.ts +233 -0
  54. package/src/deploy/audit.ts +245 -0
  55. package/src/deploy/context.test.ts +243 -0
  56. package/src/deploy/context.ts +72 -0
  57. package/src/deploy/registry.test.ts +101 -0
  58. package/src/deploy/registry.ts +86 -0
  59. package/src/deploy/secrets.test.ts +129 -0
  60. package/src/deploy/secrets.ts +69 -0
  61. package/src/deploy/targets/docker-gha.test.ts +323 -0
  62. package/src/deploy/targets/docker-gha.ts +841 -0
  63. package/src/deploy/types.ts +153 -0
  64. package/src/errors.test.ts +42 -0
  65. package/src/errors.ts +69 -0
  66. package/src/events/store.test.ts +183 -0
  67. package/src/events/store.ts +201 -0
  68. package/src/index.ts +137 -0
  69. package/src/insights/quality-gates.ts +73 -0
  70. package/src/json.test.ts +28 -0
  71. package/src/json.ts +50 -0
  72. package/src/logging/color.ts +62 -0
  73. package/src/logging/logger.ts +60 -0
  74. package/src/logging/sanitizer.test.ts +36 -0
  75. package/src/logging/sanitizer.ts +57 -0
  76. package/src/mail/client.test.ts +192 -0
  77. package/src/mail/client.ts +188 -0
  78. package/src/mail/store.test.ts +279 -0
  79. package/src/mail/store.ts +311 -0
  80. package/src/merge/lock.test.ts +88 -0
  81. package/src/merge/lock.ts +84 -0
  82. package/src/merge/queue.test.ts +136 -0
  83. package/src/merge/queue.ts +177 -0
  84. package/src/merge/resolver.test.ts +219 -0
  85. package/src/merge/resolver.ts +274 -0
  86. package/src/paths.ts +36 -0
  87. package/src/providers/apply.test.ts +90 -0
  88. package/src/providers/apply.ts +66 -0
  89. package/src/providers/registry.test.ts +74 -0
  90. package/src/providers/registry.ts +254 -0
  91. package/src/runtimes/claude.ts +313 -0
  92. package/src/runtimes/codex.ts +280 -0
  93. package/src/runtimes/cursor.ts +247 -0
  94. package/src/runtimes/gemini.ts +173 -0
  95. package/src/runtimes/mock.ts +71 -0
  96. package/src/runtimes/opencode.ts +259 -0
  97. package/src/runtimes/registry.test.ts +924 -0
  98. package/src/runtimes/registry.ts +63 -0
  99. package/src/runtimes/resolve.ts +45 -0
  100. package/src/runtimes/types.ts +97 -0
  101. package/src/scaffold.ts +68 -0
  102. package/src/secrets.test.ts +51 -0
  103. package/src/secrets.ts +78 -0
  104. package/src/serve/api.ts +667 -0
  105. package/src/serve/server.test.ts +433 -0
  106. package/src/serve/server.ts +271 -0
  107. package/src/serve/system.ts +90 -0
  108. package/src/serve/weather.ts +140 -0
  109. package/src/sessions/reaper.test.ts +162 -0
  110. package/src/sessions/reaper.ts +149 -0
  111. package/src/sessions/store.test.ts +351 -0
  112. package/src/sessions/store.ts +350 -0
  113. package/src/skills/distiller.test.ts +498 -0
  114. package/src/skills/distiller.ts +426 -0
  115. package/src/skills/feedback.test.ts +300 -0
  116. package/src/skills/feedback.ts +168 -0
  117. package/src/skills/lifecycle.ts +169 -0
  118. package/src/skills/retrieval.test.ts +421 -0
  119. package/src/skills/retrieval.ts +365 -0
  120. package/src/skills/safety.test.ts +335 -0
  121. package/src/skills/safety.ts +216 -0
  122. package/src/skills/store.test.ts +425 -0
  123. package/src/skills/store.ts +684 -0
  124. package/src/skills/types.ts +107 -0
  125. package/src/types.ts +442 -0
  126. package/src/utils/detect.test.ts +35 -0
  127. package/src/utils/detect.ts +82 -0
  128. package/src/version.test.ts +19 -0
  129. package/src/version.ts +7 -0
  130. package/src/wizard/setup.ts +254 -0
  131. package/src/worktree/manager.test.ts +181 -0
  132. package/src/worktree/manager.ts +229 -0
  133. package/templates/overlay.md.tmpl +102 -0
  134. package/ui/dist/assets/index-C7rXIMER.css +1 -0
  135. package/ui/dist/assets/index-W4kbr4by.js +4526 -0
  136. package/ui/dist/favicon.svg +21 -0
  137. package/ui/dist/index.html +16 -0
  138. package/ui/dist/logo-clay.svg +21 -0
  139. package/ui/dist/logo.svg +18 -0
@@ -0,0 +1,313 @@
1
+ /**
2
+ * Claude Code runtime adapter.
3
+ *
4
+ * Drives Anthropic's `claude` CLI in headless, spawn-per-turn mode. Each turn is
5
+ * a single `claude -p … --output-format stream-json` invocation whose stdout is a
6
+ * stream of NDJSON events; {@link ClaudeRuntime.parseEvents} normalizes those into
7
+ * {@link AgentEvent}s. The adapter is stateless — session continuity across turns
8
+ * is carried entirely by the runtime session id (`--resume`), which the caller
9
+ * extracts from the `sessionId` an event reports and threads back in via
10
+ * {@link DirectSpawnOpts.resumeSessionId}.
11
+ */
12
+
13
+ import type { ResolvedModel } from "../types.ts";
14
+ import type { AgentEvent, AgentRuntime, DirectSpawnOpts, InteractiveSpawnOpts } from "./types.ts";
15
+
16
+ /**
17
+ * Claude Code's own sub-agent / orchestration tools, disabled on every
18
+ * Agentplate-driven session. Agents MUST spawn teammates only through
19
+ * `agentplate sling` (via Bash) so the work is tracked in the session store, mail
20
+ * bus, and merge queue. Subagents launched with Claude Code's native tools are
21
+ * invisible to all of that — they never show in `ap serve`/`ap tui` and their
22
+ * work is never merged. Blocking the tools makes sling the ONLY spawn path
23
+ * (Bash/Read/Edit/Write etc. remain available, so sling still works). Unknown
24
+ * names are harmless — Claude Code just ignores tools it doesn't have.
25
+ */
26
+ const BLOCKED_SPAWN_TOOLS = ["Task", "Agent", "Workflow"] as const;
27
+
28
+ /**
29
+ * Additional tools disabled for the INTERACTIVE coordinator only. The coordinator
30
+ * is a dispatcher, not an implementer — it must hire agents via `agentplate sling`
31
+ * (Bash) and never edit code itself. Blocking the file-mutation tools enforces
32
+ * that at the tool layer (Bash/Read/Grep/Glob stay, so sling + surveying work).
33
+ * Headless worker turns ({@link ClaudeRuntime.buildDirectSpawn}) do NOT get this —
34
+ * workers must edit.
35
+ */
36
+ const COORDINATOR_BLOCKED_TOOLS = [
37
+ ...BLOCKED_SPAWN_TOOLS,
38
+ "Edit",
39
+ "Write",
40
+ "MultiEdit",
41
+ "NotebookEdit",
42
+ ] as const;
43
+
44
+ export class ClaudeRuntime implements AgentRuntime {
45
+ /** Registry id; also the value users pass to `--runtime claude`. */
46
+ readonly id = "claude";
47
+
48
+ /** Claude Code is the primary, fully-supported runtime. */
49
+ readonly stability = "stable" as const;
50
+
51
+ /**
52
+ * Claude Code automatically reads `.claude/CLAUDE.md` from the working
53
+ * directory, so the overlay is written there rather than passed as a flag.
54
+ */
55
+ readonly instructionPath = ".claude/CLAUDE.md";
56
+
57
+ /**
58
+ * Build argv for one headless streaming turn (run via `Bun.spawn`).
59
+ *
60
+ * Flag choices:
61
+ * - `-p <prompt>` runs the prompt non-interactively and exits on completion.
62
+ * The prompt defaults to "" so a resume-only nudge turn (where the caller
63
+ * feeds the real text on stdin) still produces a well-formed argv.
64
+ * - `--output-format stream-json` + `--verbose` emit the per-event NDJSON that
65
+ * {@link parseEvents} consumes; `--verbose` is required for the streaming
66
+ * form to include tool-use / session events rather than a single result.
67
+ * - `--model <model>` pins the concrete model resolved upstream.
68
+ * - `--resume <id>` is emitted ONLY on follow-up turns (a non-empty session
69
+ * id). The first turn omits it so Claude Code starts a fresh session.
70
+ * - `--permission-mode bypassPermissions` because workers run unattended in an
71
+ * isolated worktree; interactive permission prompts would deadlock a
72
+ * headless process.
73
+ *
74
+ * Returned as an argv array (never a shell string) so no value is subject to
75
+ * shell interpolation.
76
+ */
77
+ buildDirectSpawn(opts: DirectSpawnOpts): string[] {
78
+ return [
79
+ "claude",
80
+ "-p",
81
+ opts.prompt ?? "",
82
+ "--output-format",
83
+ "stream-json",
84
+ "--verbose",
85
+ "--model",
86
+ opts.model,
87
+ // Only resume when a prior turn handed us a real session id. An empty
88
+ // string is treated as "no resume" so the first turn opens a new session.
89
+ ...(opts.resumeSessionId ? ["--resume", opts.resumeSessionId] : []),
90
+ // Force sling-only spawning. The variadic `--disallowedTools <tools...>` is
91
+ // placed before `--permission-mode` so that flag terminates the list and the
92
+ // `-p` prompt (a leading flag value) is never swallowed.
93
+ "--disallowedTools",
94
+ ...BLOCKED_SPAWN_TOOLS,
95
+ "--permission-mode",
96
+ "bypassPermissions",
97
+ ];
98
+ }
99
+
100
+ /**
101
+ * Build argv for an ATTENDED interactive Claude Code session.
102
+ *
103
+ * Run in the foreground with inherited stdio so the operator chats directly
104
+ * (`coordinator start`). The interactive session IS the coordinator, so it is
105
+ * dispatch-only: {@link COORDINATOR_BLOCKED_TOOLS} disables the native sub-agent
106
+ * tools AND file-mutation tools, leaving Bash (for `agentplate sling`) + Read.
107
+ * The agent's role is injected via `--append-system-prompt` — a literal argv
108
+ * value (no `$(cat …)` shell trick) since we spawn an argv array.
109
+ */
110
+ buildInteractiveSpawn(opts: InteractiveSpawnOpts): string[] {
111
+ const permMode = opts.permissionMode === "bypass" ? "bypassPermissions" : "default";
112
+ // `--disallowedTools` (variadic) sits before `--permission-mode` so that flag
113
+ // terminates the tool list and the trailing seed message is never swallowed.
114
+ const argv = [
115
+ "claude",
116
+ "--model",
117
+ opts.model,
118
+ "--disallowedTools",
119
+ ...COORDINATOR_BLOCKED_TOOLS,
120
+ "--permission-mode",
121
+ permMode,
122
+ ];
123
+ if (opts.systemPrompt && opts.systemPrompt.length > 0) {
124
+ argv.push("--append-system-prompt", opts.systemPrompt);
125
+ }
126
+ // A seed message becomes the first user turn (claude treats a trailing
127
+ // positional as the initial prompt while staying interactive).
128
+ if (opts.initialMessage && opts.initialMessage.length > 0) {
129
+ argv.push(opts.initialMessage);
130
+ }
131
+ return argv;
132
+ }
133
+
134
+ /**
135
+ * Provider env vars for the resolved model (API keys, base URLs).
136
+ *
137
+ * Auth is never hardcoded here — it is whatever the provider layer resolved
138
+ * onto the model. A fresh object is returned (rather than `model.env` itself)
139
+ * so a caller mutating the result cannot leak back into shared config.
140
+ */
141
+ buildEnv(model: ResolvedModel): Record<string, string> {
142
+ return { ...(model.env ?? {}) };
143
+ }
144
+
145
+ /**
146
+ * Build argv for a one-shot, non-streaming call (`claude -p … --output-format
147
+ * text`). Used later by AI-assisted merge resolution and skill distillation,
148
+ * where we want only the final text answer, not an event stream. The model is
149
+ * appended only when provided so the caller can defer to Claude Code's own
150
+ * default.
151
+ */
152
+ buildPrintCommand(prompt: string, model?: string): string[] {
153
+ const argv = ["claude", "-p", prompt, "--output-format", "text"];
154
+ if (model !== undefined) {
155
+ argv.push("--model", model);
156
+ }
157
+ return argv;
158
+ }
159
+
160
+ /**
161
+ * Parse Claude Code's stream-json stdout into normalized {@link AgentEvent}s.
162
+ *
163
+ * The stream is NDJSON: one JSON object per line, but TCP/pipe chunk
164
+ * boundaries do NOT align to newlines, so we keep a `buffer` of the trailing
165
+ * partial line across reads and only parse once a `\n` completes it. For each
166
+ * complete, non-blank line we:
167
+ * - parse JSON (silently skipping malformed lines — a partial flush or a
168
+ * non-JSON diagnostic line must not abort the whole turn),
169
+ * - copy through the message `type`,
170
+ * - capture `session_id` → `sessionId` (Claude emits it on the early
171
+ * `system` init event; the caller needs it for the next turn's --resume),
172
+ * - lift a tool name out of an assistant `tool_use` content block → `tool`,
173
+ * - attach the raw parsed object as `raw` for callers needing more detail.
174
+ */
175
+ async *parseEvents(stream: ReadableStream<Uint8Array>): AsyncIterable<AgentEvent> {
176
+ const reader = stream.getReader();
177
+ const decoder = new TextDecoder();
178
+ let buffer = "";
179
+
180
+ try {
181
+ while (true) {
182
+ const { done, value } = await reader.read();
183
+ if (done) break;
184
+ // `stream: true` lets the decoder hold back a trailing partial
185
+ // multi-byte sequence until the next chunk completes it.
186
+ buffer += decoder.decode(value, { stream: true });
187
+
188
+ const lines = buffer.split("\n");
189
+ // The last element is an incomplete line (no terminating newline yet)
190
+ // or "" — keep it buffered for the next read.
191
+ buffer = lines.pop() ?? "";
192
+
193
+ for (const line of lines) {
194
+ const event = parseClaudeLine(line);
195
+ if (event) yield event;
196
+ }
197
+ }
198
+
199
+ // Emit any final line left after a clean stream end without a trailing
200
+ // newline (e.g. the process exits right after writing the result event).
201
+ const tail = parseClaudeLine(buffer);
202
+ if (tail) yield tail;
203
+ } finally {
204
+ // Always release the lock so the underlying stream can be GC'd / reused
205
+ // even if the consumer breaks out of the loop early.
206
+ reader.releaseLock();
207
+ }
208
+ }
209
+ }
210
+
211
+ /**
212
+ * Parse a single stream-json line into an {@link AgentEvent}, or `null` for a
213
+ * blank or unparseable line. Kept as a free function (not a closure) so it is
214
+ * trivially unit-testable and allocation-free per line.
215
+ */
216
+ function parseClaudeLine(line: string): AgentEvent | null {
217
+ const trimmed = line.trim();
218
+ if (!trimmed) return null;
219
+
220
+ let raw: unknown;
221
+ try {
222
+ raw = JSON.parse(trimmed);
223
+ } catch {
224
+ // Not valid JSON (partial flush, diagnostic noise) — skip, never throw.
225
+ return null;
226
+ }
227
+
228
+ if (typeof raw !== "object" || raw === null) return null;
229
+ const msg = raw as Record<string, unknown>;
230
+
231
+ const event: AgentEvent = {
232
+ type: typeof msg.type === "string" ? msg.type : "unknown",
233
+ raw,
234
+ };
235
+
236
+ // session_id appears on the init/system event and is reused for --resume.
237
+ if (typeof msg.session_id === "string" && msg.session_id.length > 0) {
238
+ event.sessionId = msg.session_id;
239
+ }
240
+
241
+ const tool = extractToolName(msg);
242
+ if (tool !== undefined) event.tool = tool;
243
+
244
+ const usage = extractUsage(msg);
245
+ if (usage !== undefined) event.usage = usage;
246
+
247
+ // Surface a failure reason: a `result` event with `is_error` carries the
248
+ // message in `result`; a bare `error` event carries it in `error`/`message`.
249
+ if (msg.is_error === true && typeof msg.result === "string") {
250
+ event.error = msg.result;
251
+ } else if (msg.type === "error") {
252
+ if (typeof msg.error === "string") event.error = msg.error;
253
+ else if (typeof msg.message === "string") event.error = msg.message;
254
+ }
255
+
256
+ return event;
257
+ }
258
+
259
+ /**
260
+ * Pull token usage + USD cost out of a Claude Code `result` event, which carries
261
+ * `total_cost_usd` and a `usage` object (`input_tokens`, `output_tokens`, and the
262
+ * two cache token counts). We sum all numeric token fields so cache reads/writes
263
+ * are counted too. Returns `undefined` for non-result events or when no spend was
264
+ * reported, so the Costs page only aggregates real usage.
265
+ */
266
+ function extractUsage(
267
+ msg: Record<string, unknown>,
268
+ ): { tokens: number; costUsd: number } | undefined {
269
+ if (msg.type !== "result") return undefined;
270
+
271
+ let tokens = 0;
272
+ const usage = msg.usage;
273
+ if (typeof usage === "object" && usage !== null) {
274
+ for (const key of [
275
+ "input_tokens",
276
+ "output_tokens",
277
+ "cache_creation_input_tokens",
278
+ "cache_read_input_tokens",
279
+ ]) {
280
+ const v = (usage as Record<string, unknown>)[key];
281
+ if (typeof v === "number") tokens += v;
282
+ }
283
+ }
284
+ const costUsd = typeof msg.total_cost_usd === "number" ? msg.total_cost_usd : 0;
285
+ if (tokens === 0 && costUsd === 0) return undefined;
286
+ return { tokens, costUsd };
287
+ }
288
+
289
+ /**
290
+ * Pull a tool name out of an assistant message's content blocks. Claude nests
291
+ * tool calls as `{ type: "tool_use", name: "Edit", … }` blocks inside
292
+ * `message.content`; we return the first such name. Returns `undefined` for any
293
+ * shape that does not carry a tool_use block.
294
+ */
295
+ function extractToolName(msg: Record<string, unknown>): string | undefined {
296
+ const message = msg.message;
297
+ if (typeof message !== "object" || message === null) return undefined;
298
+
299
+ const content = (message as Record<string, unknown>).content;
300
+ if (!Array.isArray(content)) return undefined;
301
+
302
+ for (const block of content) {
303
+ if (typeof block !== "object" || block === null) continue;
304
+ const b = block as Record<string, unknown>;
305
+ if (b.type === "tool_use" && typeof b.name === "string") {
306
+ return b.name;
307
+ }
308
+ }
309
+ return undefined;
310
+ }
311
+
312
+ /** Singleton for callers that do not need dependency injection. */
313
+ export const claudeRuntime = new ClaudeRuntime();
@@ -0,0 +1,280 @@
1
+ /**
2
+ * OpenAI Codex runtime adapter.
3
+ *
4
+ * Drives OpenAI's `codex` CLI in headless, spawn-per-turn mode. Like Claude Code,
5
+ * Codex authenticates with its OWN login: a ChatGPT/Codex OAuth session stored in
6
+ * `~/.codex/auth.json` (`auth_mode` + `tokens`). When the active provider uses
7
+ * `authMode: "subscription"`, the provider layer injects no key (see
8
+ * `src/runtimes/resolve.ts`) and Codex falls back to that OAuth login — the exact
9
+ * mirror of how the Anthropic provider reuses the `claude` login. An
10
+ * `api-key` / `env` provider instead flows `OPENAI_API_KEY` through
11
+ * {@link CodexRuntime.buildEnv}; auth is never hardcoded here.
12
+ *
13
+ * A headless turn is `codex exec --json …`, whose stdout is a stream of JSONL
14
+ * thread/item events (`thread.started`, `item.completed`, `turn.completed`);
15
+ * {@link CodexRuntime.parseEvents} normalizes those into {@link AgentEvent}s.
16
+ * Session continuity across turns is carried by the `thread_id` captured from the
17
+ * `thread.started` event and threaded back via `codex exec resume <id>`
18
+ * ({@link DirectSpawnOpts.resumeSessionId}).
19
+ *
20
+ * Validated against codex-cli 0.128 flag + JSON-event shapes.
21
+ */
22
+
23
+ import type { ResolvedModel } from "../types.ts";
24
+ import type { AgentEvent, AgentRuntime, DirectSpawnOpts, InteractiveSpawnOpts } from "./types.ts";
25
+
26
+ export class CodexRuntime implements AgentRuntime {
27
+ /** Registry id; also the value users pass to `--runtime codex`. */
28
+ readonly id = "codex";
29
+
30
+ /** Beta: validated against codex-cli 0.128 flag shapes. */
31
+ readonly stability = "beta" as const;
32
+
33
+ /** Codex reads `AGENTS.md` from the working directory at startup. */
34
+ readonly instructionPath = "AGENTS.md";
35
+
36
+ /**
37
+ * Build argv for one headless streaming turn (`codex exec --json`).
38
+ *
39
+ * Flag choices:
40
+ * - `exec` runs Codex non-interactively and exits on completion.
41
+ * - `--json` emits the per-event EventMsg JSONL that {@link parseEvents}
42
+ * consumes (including the `session_configured` event carrying the session id).
43
+ * - `--model <model>` pins the concrete model resolved upstream.
44
+ * - `--dangerously-bypass-approvals-and-sandbox` is the analog of Claude Code's
45
+ * `bypassPermissions`: workers run unattended in an isolated worktree, so
46
+ * interactive approval prompts would deadlock a headless process.
47
+ * - `resume <id>` (a subcommand) is emitted ONLY on follow-up turns; the first
48
+ * turn omits it so Codex starts a fresh session.
49
+ *
50
+ * The prompt is the trailing positional. Returned as an argv array (never a
51
+ * shell string) so no value is subject to shell interpolation.
52
+ */
53
+ buildDirectSpawn(opts: DirectSpawnOpts): string[] {
54
+ const prompt = opts.prompt ?? "";
55
+ // Only resume when a prior turn handed us a real session id. An empty string
56
+ // is treated as "no resume" so the first turn opens a new session.
57
+ if (opts.resumeSessionId) {
58
+ return [
59
+ "codex",
60
+ "exec",
61
+ "resume",
62
+ opts.resumeSessionId,
63
+ "--json",
64
+ "--model",
65
+ opts.model,
66
+ "--dangerously-bypass-approvals-and-sandbox",
67
+ prompt,
68
+ ];
69
+ }
70
+ return [
71
+ "codex",
72
+ "exec",
73
+ "--json",
74
+ "--model",
75
+ opts.model,
76
+ "--dangerously-bypass-approvals-and-sandbox",
77
+ prompt,
78
+ ];
79
+ }
80
+
81
+ /**
82
+ * Build argv for an ATTENDED interactive Codex session.
83
+ *
84
+ * Run in the foreground with inherited stdio so the operator chats directly
85
+ * (`coordinator start`). Codex has no `--append-system-prompt` flag, so the
86
+ * agent's role must be supplied via the `AGENTS.md` overlay; `systemPrompt`/
87
+ * `permissionMode` are accepted for interface parity but not passed as flags
88
+ * (the human is present to approve actions, so the default posture applies). A
89
+ * seed message becomes the initial prompt while the TUI stays interactive.
90
+ */
91
+ buildInteractiveSpawn(opts: InteractiveSpawnOpts): string[] {
92
+ const argv = ["codex", "--model", opts.model];
93
+ if (opts.initialMessage && opts.initialMessage.length > 0) {
94
+ argv.push(opts.initialMessage);
95
+ }
96
+ return argv;
97
+ }
98
+
99
+ /**
100
+ * Provider env vars for the resolved model (API keys, base URLs).
101
+ *
102
+ * Auth is never hardcoded here — it is whatever the provider layer resolved
103
+ * onto the model (empty for subscription/OAuth login, `OPENAI_API_KEY` for an
104
+ * api-key/env provider). A fresh object is returned so a caller mutating the
105
+ * result cannot leak back into shared config.
106
+ */
107
+ buildEnv(model: ResolvedModel): Record<string, string> {
108
+ return { ...(model.env ?? {}) };
109
+ }
110
+
111
+ /**
112
+ * Build argv for a one-shot, non-streaming call (`codex exec`). Used by
113
+ * AI-assisted merge resolution and skill distillation, where we want only the
114
+ * final text answer, not an event stream. The bypass flag keeps it
115
+ * non-interactive; the model is appended only when provided so the caller can
116
+ * defer to Codex's own default.
117
+ */
118
+ buildPrintCommand(prompt: string, model?: string): string[] {
119
+ const argv = ["codex", "exec", "--dangerously-bypass-approvals-and-sandbox"];
120
+ if (model !== undefined) {
121
+ argv.push("--model", model);
122
+ }
123
+ argv.push(prompt);
124
+ return argv;
125
+ }
126
+
127
+ /**
128
+ * Parse Codex's `--json` stdout into normalized {@link AgentEvent}s.
129
+ *
130
+ * The stream is JSONL: one JSON object per line, but pipe chunk boundaries do
131
+ * NOT align to newlines, so we keep a `buffer` of the trailing partial line
132
+ * across reads and only parse once a `\n` completes it. Malformed lines are
133
+ * skipped (a partial flush or diagnostic line must not abort the whole turn).
134
+ */
135
+ async *parseEvents(stream: ReadableStream<Uint8Array>): AsyncIterable<AgentEvent> {
136
+ const reader = stream.getReader();
137
+ const decoder = new TextDecoder();
138
+ let buffer = "";
139
+
140
+ try {
141
+ while (true) {
142
+ const { done, value } = await reader.read();
143
+ if (done) break;
144
+ buffer += decoder.decode(value, { stream: true });
145
+
146
+ const lines = buffer.split("\n");
147
+ buffer = lines.pop() ?? "";
148
+
149
+ for (const line of lines) {
150
+ const event = parseCodexLine(line);
151
+ if (event) yield event;
152
+ }
153
+ }
154
+
155
+ const tail = parseCodexLine(buffer);
156
+ if (tail) yield tail;
157
+ } finally {
158
+ reader.releaseLock();
159
+ }
160
+ }
161
+ }
162
+
163
+ /**
164
+ * Parse a single `codex exec --json` line into an {@link AgentEvent}, or `null`
165
+ * for a blank or unparseable line.
166
+ *
167
+ * codex-cli 0.128 emits the thread/item schema, flat per line:
168
+ * - `{ type: "thread.started", thread_id }` ← the resume session id
169
+ * - `{ type: "turn.started" | "turn.completed", usage? }`
170
+ * - `{ type: "item.completed", item: { type, … } }` ← messages / tool actions
171
+ * We also tolerate the older EventMsg form (`{ id, msg: { type, … } }`) so the
172
+ * parser works across codex versions — mirroring the Claude parser's resilience.
173
+ */
174
+ function parseCodexLine(line: string): AgentEvent | null {
175
+ const trimmed = line.trim();
176
+ if (!trimmed) return null;
177
+
178
+ let raw: unknown;
179
+ try {
180
+ raw = JSON.parse(trimmed);
181
+ } catch {
182
+ return null;
183
+ }
184
+
185
+ if (typeof raw !== "object" || raw === null) return null;
186
+ const top = raw as Record<string, unknown>;
187
+
188
+ // Thread/item form is flat; the legacy EventMsg form nests under `msg`.
189
+ const msg =
190
+ typeof top.msg === "object" && top.msg !== null ? (top.msg as Record<string, unknown>) : top;
191
+
192
+ const type =
193
+ typeof top.type === "string" ? top.type : typeof msg.type === "string" ? msg.type : "unknown";
194
+
195
+ const event: AgentEvent = { type, raw };
196
+
197
+ const sessionId = extractCodexSessionId(msg, top);
198
+ if (sessionId !== undefined) event.sessionId = sessionId;
199
+
200
+ const tool = extractCodexTool(top, msg);
201
+ if (tool !== undefined) event.tool = tool;
202
+
203
+ const usage = extractCodexUsage(top);
204
+ if (usage !== undefined) event.usage = usage;
205
+
206
+ return event;
207
+ }
208
+
209
+ /**
210
+ * Pull the resume session id from a Codex event: `thread_id` on `thread.started`
211
+ * (0.128), or `session_id` on the legacy `session_configured` event. A top-level
212
+ * `session_id` is also accepted so resume keeps working across codex versions.
213
+ */
214
+ function extractCodexSessionId(
215
+ msg: Record<string, unknown>,
216
+ top: Record<string, unknown>,
217
+ ): string | undefined {
218
+ for (const candidate of [top.thread_id, msg.session_id, top.session_id]) {
219
+ if (typeof candidate === "string" && candidate.length > 0) return candidate;
220
+ }
221
+ return undefined;
222
+ }
223
+
224
+ /**
225
+ * Derive a coarse tool name from a Codex event. In the thread/item schema, tool
226
+ * activity is an `item` whose `type` is something other than `agent_message` /
227
+ * `reasoning` (e.g. `command_execution`, `mcp_tool_call`, `file_change`,
228
+ * `web_search`). Legacy EventMsg shapes use `exec_command_begin` /
229
+ * `mcp_tool_call_begin`. Returns `undefined` when no tool is involved.
230
+ */
231
+ function extractCodexTool(
232
+ top: Record<string, unknown>,
233
+ msg: Record<string, unknown>,
234
+ ): string | undefined {
235
+ const item = top.item;
236
+ if (typeof item === "object" && item !== null) {
237
+ const it = item as Record<string, unknown>;
238
+ const itype = typeof it.type === "string" ? it.type : undefined;
239
+ if (itype && itype !== "agent_message" && itype !== "reasoning") {
240
+ if (itype === "mcp_tool_call" && typeof it.tool === "string") return it.tool;
241
+ if (itype === "command_execution") return "shell";
242
+ return itype;
243
+ }
244
+ }
245
+
246
+ if (msg.type === "exec_command_begin") return "shell";
247
+ if (msg.type === "mcp_tool_call_begin") {
248
+ if (typeof msg.tool === "string") return msg.tool;
249
+ const invocation = msg.invocation;
250
+ if (typeof invocation === "object" && invocation !== null) {
251
+ const tool = (invocation as Record<string, unknown>).tool;
252
+ if (typeof tool === "string") return tool;
253
+ }
254
+ return "mcp";
255
+ }
256
+ return undefined;
257
+ }
258
+
259
+ /**
260
+ * Pull token usage from a Codex `turn.completed` event's `usage` object. Codex
261
+ * reports token counts but not a USD figure, so `costUsd` is 0. Tokens are
262
+ * `input_tokens + output_tokens` (cached/reasoning counts are subsets of those,
263
+ * so they are not added again). Returns `undefined` when no usage is present.
264
+ */
265
+ function extractCodexUsage(
266
+ top: Record<string, unknown>,
267
+ ): { tokens: number; costUsd: number } | undefined {
268
+ if (top.type !== "turn.completed") return undefined;
269
+ const usage = top.usage;
270
+ if (typeof usage !== "object" || usage === null) return undefined;
271
+ const u = usage as Record<string, unknown>;
272
+ const input = typeof u.input_tokens === "number" ? u.input_tokens : 0;
273
+ const output = typeof u.output_tokens === "number" ? u.output_tokens : 0;
274
+ const tokens = input + output;
275
+ if (tokens === 0) return undefined;
276
+ return { tokens, costUsd: 0 };
277
+ }
278
+
279
+ /** Singleton for callers that do not need dependency injection. */
280
+ export const codexRuntime = new CodexRuntime();