@agentplate/cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +54 -0
- package/LICENSE +21 -0
- package/README.md +206 -0
- package/agents/architect.md +108 -0
- package/agents/builder.md +97 -0
- package/agents/coordinator.md +113 -0
- package/agents/deployer.md +117 -0
- package/agents/devops.md +114 -0
- package/agents/lead.md +107 -0
- package/agents/merger.md +103 -0
- package/agents/reviewer.md +90 -0
- package/agents/scout.md +95 -0
- package/agents/verifier.md +106 -0
- package/package.json +64 -0
- package/src/agents/guard-rules.ts +55 -0
- package/src/agents/identity.test.ts +161 -0
- package/src/agents/identity.ts +229 -0
- package/src/agents/manifest.test.ts +260 -0
- package/src/agents/manifest.ts +286 -0
- package/src/agents/overlay.test.ts +190 -0
- package/src/agents/overlay.ts +212 -0
- package/src/agents/system-prompt.test.ts +53 -0
- package/src/agents/system-prompt.ts +95 -0
- package/src/agents/turn-runner.ts +79 -0
- package/src/commands/coordinator.test.ts +75 -0
- package/src/commands/coordinator.ts +259 -0
- package/src/commands/deploy.test.ts +504 -0
- package/src/commands/deploy.ts +874 -0
- package/src/commands/doctor.test.ts +106 -0
- package/src/commands/doctor.ts +208 -0
- package/src/commands/init.ts +71 -0
- package/src/commands/log.ts +51 -0
- package/src/commands/mail.ts +197 -0
- package/src/commands/merge.ts +127 -0
- package/src/commands/model.ts +58 -0
- package/src/commands/prime.ts +61 -0
- package/src/commands/reap.ts +87 -0
- package/src/commands/serve.ts +61 -0
- package/src/commands/setup.ts +48 -0
- package/src/commands/ship.test.ts +106 -0
- package/src/commands/ship.ts +202 -0
- package/src/commands/skill.test.ts +458 -0
- package/src/commands/skill.ts +730 -0
- package/src/commands/sling.ts +365 -0
- package/src/commands/status.ts +60 -0
- package/src/commands/stop.ts +56 -0
- package/src/commands/tui.ts +199 -0
- package/src/commands/worktree.ts +77 -0
- package/src/config.test.ts +92 -0
- package/src/config.ts +202 -0
- package/src/db/sqlite.test.ts +77 -0
- package/src/db/sqlite.ts +102 -0
- package/src/deploy/audit.test.ts +233 -0
- package/src/deploy/audit.ts +245 -0
- package/src/deploy/context.test.ts +243 -0
- package/src/deploy/context.ts +72 -0
- package/src/deploy/registry.test.ts +101 -0
- package/src/deploy/registry.ts +86 -0
- package/src/deploy/secrets.test.ts +129 -0
- package/src/deploy/secrets.ts +69 -0
- package/src/deploy/targets/docker-gha.test.ts +323 -0
- package/src/deploy/targets/docker-gha.ts +841 -0
- package/src/deploy/types.ts +153 -0
- package/src/errors.test.ts +42 -0
- package/src/errors.ts +69 -0
- package/src/events/store.test.ts +183 -0
- package/src/events/store.ts +201 -0
- package/src/index.ts +137 -0
- package/src/insights/quality-gates.ts +73 -0
- package/src/json.test.ts +28 -0
- package/src/json.ts +50 -0
- package/src/logging/color.ts +62 -0
- package/src/logging/logger.ts +60 -0
- package/src/logging/sanitizer.test.ts +36 -0
- package/src/logging/sanitizer.ts +57 -0
- package/src/mail/client.test.ts +192 -0
- package/src/mail/client.ts +188 -0
- package/src/mail/store.test.ts +279 -0
- package/src/mail/store.ts +311 -0
- package/src/merge/lock.test.ts +88 -0
- package/src/merge/lock.ts +84 -0
- package/src/merge/queue.test.ts +136 -0
- package/src/merge/queue.ts +177 -0
- package/src/merge/resolver.test.ts +219 -0
- package/src/merge/resolver.ts +274 -0
- package/src/paths.ts +36 -0
- package/src/providers/apply.test.ts +90 -0
- package/src/providers/apply.ts +66 -0
- package/src/providers/registry.test.ts +74 -0
- package/src/providers/registry.ts +254 -0
- package/src/runtimes/claude.ts +313 -0
- package/src/runtimes/codex.ts +280 -0
- package/src/runtimes/cursor.ts +247 -0
- package/src/runtimes/gemini.ts +173 -0
- package/src/runtimes/mock.ts +71 -0
- package/src/runtimes/opencode.ts +259 -0
- package/src/runtimes/registry.test.ts +924 -0
- package/src/runtimes/registry.ts +63 -0
- package/src/runtimes/resolve.ts +45 -0
- package/src/runtimes/types.ts +97 -0
- package/src/scaffold.ts +68 -0
- package/src/secrets.test.ts +51 -0
- package/src/secrets.ts +78 -0
- package/src/serve/api.ts +667 -0
- package/src/serve/server.test.ts +433 -0
- package/src/serve/server.ts +271 -0
- package/src/serve/system.ts +90 -0
- package/src/serve/weather.ts +140 -0
- package/src/sessions/reaper.test.ts +162 -0
- package/src/sessions/reaper.ts +149 -0
- package/src/sessions/store.test.ts +351 -0
- package/src/sessions/store.ts +350 -0
- package/src/skills/distiller.test.ts +498 -0
- package/src/skills/distiller.ts +426 -0
- package/src/skills/feedback.test.ts +300 -0
- package/src/skills/feedback.ts +168 -0
- package/src/skills/lifecycle.ts +169 -0
- package/src/skills/retrieval.test.ts +421 -0
- package/src/skills/retrieval.ts +365 -0
- package/src/skills/safety.test.ts +335 -0
- package/src/skills/safety.ts +216 -0
- package/src/skills/store.test.ts +425 -0
- package/src/skills/store.ts +684 -0
- package/src/skills/types.ts +107 -0
- package/src/types.ts +442 -0
- package/src/utils/detect.test.ts +35 -0
- package/src/utils/detect.ts +82 -0
- package/src/version.test.ts +19 -0
- package/src/version.ts +7 -0
- package/src/wizard/setup.ts +254 -0
- package/src/worktree/manager.test.ts +181 -0
- package/src/worktree/manager.ts +229 -0
- package/templates/overlay.md.tmpl +102 -0
- package/ui/dist/assets/index-C7rXIMER.css +1 -0
- package/ui/dist/assets/index-W4kbr4by.js +4526 -0
- package/ui/dist/favicon.svg +21 -0
- package/ui/dist/index.html +16 -0
- package/ui/dist/logo-clay.svg +21 -0
- package/ui/dist/logo.svg +18 -0
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Claude Code runtime adapter.
|
|
3
|
+
*
|
|
4
|
+
* Drives Anthropic's `claude` CLI in headless, spawn-per-turn mode. Each turn is
|
|
5
|
+
* a single `claude -p … --output-format stream-json` invocation whose stdout is a
|
|
6
|
+
* stream of NDJSON events; {@link ClaudeRuntime.parseEvents} normalizes those into
|
|
7
|
+
* {@link AgentEvent}s. The adapter is stateless — session continuity across turns
|
|
8
|
+
* is carried entirely by the runtime session id (`--resume`), which the caller
|
|
9
|
+
* extracts from the `sessionId` an event reports and threads back in via
|
|
10
|
+
* {@link DirectSpawnOpts.resumeSessionId}.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import type { ResolvedModel } from "../types.ts";
|
|
14
|
+
import type { AgentEvent, AgentRuntime, DirectSpawnOpts, InteractiveSpawnOpts } from "./types.ts";
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Claude Code's own sub-agent / orchestration tools, disabled on every
|
|
18
|
+
* Agentplate-driven session. Agents MUST spawn teammates only through
|
|
19
|
+
* `agentplate sling` (via Bash) so the work is tracked in the session store, mail
|
|
20
|
+
* bus, and merge queue. Subagents launched with Claude Code's native tools are
|
|
21
|
+
* invisible to all of that — they never show in `ap serve`/`ap tui` and their
|
|
22
|
+
* work is never merged. Blocking the tools makes sling the ONLY spawn path
|
|
23
|
+
* (Bash/Read/Edit/Write etc. remain available, so sling still works). Unknown
|
|
24
|
+
* names are harmless — Claude Code just ignores tools it doesn't have.
|
|
25
|
+
*/
|
|
26
|
+
const BLOCKED_SPAWN_TOOLS = ["Task", "Agent", "Workflow"] as const;
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Additional tools disabled for the INTERACTIVE coordinator only. The coordinator
|
|
30
|
+
* is a dispatcher, not an implementer — it must hire agents via `agentplate sling`
|
|
31
|
+
* (Bash) and never edit code itself. Blocking the file-mutation tools enforces
|
|
32
|
+
* that at the tool layer (Bash/Read/Grep/Glob stay, so sling + surveying work).
|
|
33
|
+
* Headless worker turns ({@link ClaudeRuntime.buildDirectSpawn}) do NOT get this —
|
|
34
|
+
* workers must edit.
|
|
35
|
+
*/
|
|
36
|
+
const COORDINATOR_BLOCKED_TOOLS = [
|
|
37
|
+
...BLOCKED_SPAWN_TOOLS,
|
|
38
|
+
"Edit",
|
|
39
|
+
"Write",
|
|
40
|
+
"MultiEdit",
|
|
41
|
+
"NotebookEdit",
|
|
42
|
+
] as const;
|
|
43
|
+
|
|
44
|
+
export class ClaudeRuntime implements AgentRuntime {
|
|
45
|
+
/** Registry id; also the value users pass to `--runtime claude`. */
|
|
46
|
+
readonly id = "claude";
|
|
47
|
+
|
|
48
|
+
/** Claude Code is the primary, fully-supported runtime. */
|
|
49
|
+
readonly stability = "stable" as const;
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Claude Code automatically reads `.claude/CLAUDE.md` from the working
|
|
53
|
+
* directory, so the overlay is written there rather than passed as a flag.
|
|
54
|
+
*/
|
|
55
|
+
readonly instructionPath = ".claude/CLAUDE.md";
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Build argv for one headless streaming turn (run via `Bun.spawn`).
|
|
59
|
+
*
|
|
60
|
+
* Flag choices:
|
|
61
|
+
* - `-p <prompt>` runs the prompt non-interactively and exits on completion.
|
|
62
|
+
* The prompt defaults to "" so a resume-only nudge turn (where the caller
|
|
63
|
+
* feeds the real text on stdin) still produces a well-formed argv.
|
|
64
|
+
* - `--output-format stream-json` + `--verbose` emit the per-event NDJSON that
|
|
65
|
+
* {@link parseEvents} consumes; `--verbose` is required for the streaming
|
|
66
|
+
* form to include tool-use / session events rather than a single result.
|
|
67
|
+
* - `--model <model>` pins the concrete model resolved upstream.
|
|
68
|
+
* - `--resume <id>` is emitted ONLY on follow-up turns (a non-empty session
|
|
69
|
+
* id). The first turn omits it so Claude Code starts a fresh session.
|
|
70
|
+
* - `--permission-mode bypassPermissions` because workers run unattended in an
|
|
71
|
+
* isolated worktree; interactive permission prompts would deadlock a
|
|
72
|
+
* headless process.
|
|
73
|
+
*
|
|
74
|
+
* Returned as an argv array (never a shell string) so no value is subject to
|
|
75
|
+
* shell interpolation.
|
|
76
|
+
*/
|
|
77
|
+
buildDirectSpawn(opts: DirectSpawnOpts): string[] {
|
|
78
|
+
return [
|
|
79
|
+
"claude",
|
|
80
|
+
"-p",
|
|
81
|
+
opts.prompt ?? "",
|
|
82
|
+
"--output-format",
|
|
83
|
+
"stream-json",
|
|
84
|
+
"--verbose",
|
|
85
|
+
"--model",
|
|
86
|
+
opts.model,
|
|
87
|
+
// Only resume when a prior turn handed us a real session id. An empty
|
|
88
|
+
// string is treated as "no resume" so the first turn opens a new session.
|
|
89
|
+
...(opts.resumeSessionId ? ["--resume", opts.resumeSessionId] : []),
|
|
90
|
+
// Force sling-only spawning. The variadic `--disallowedTools <tools...>` is
|
|
91
|
+
// placed before `--permission-mode` so that flag terminates the list and the
|
|
92
|
+
// `-p` prompt (a leading flag value) is never swallowed.
|
|
93
|
+
"--disallowedTools",
|
|
94
|
+
...BLOCKED_SPAWN_TOOLS,
|
|
95
|
+
"--permission-mode",
|
|
96
|
+
"bypassPermissions",
|
|
97
|
+
];
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Build argv for an ATTENDED interactive Claude Code session.
|
|
102
|
+
*
|
|
103
|
+
* Run in the foreground with inherited stdio so the operator chats directly
|
|
104
|
+
* (`coordinator start`). The interactive session IS the coordinator, so it is
|
|
105
|
+
* dispatch-only: {@link COORDINATOR_BLOCKED_TOOLS} disables the native sub-agent
|
|
106
|
+
* tools AND file-mutation tools, leaving Bash (for `agentplate sling`) + Read.
|
|
107
|
+
* The agent's role is injected via `--append-system-prompt` — a literal argv
|
|
108
|
+
* value (no `$(cat …)` shell trick) since we spawn an argv array.
|
|
109
|
+
*/
|
|
110
|
+
buildInteractiveSpawn(opts: InteractiveSpawnOpts): string[] {
|
|
111
|
+
const permMode = opts.permissionMode === "bypass" ? "bypassPermissions" : "default";
|
|
112
|
+
// `--disallowedTools` (variadic) sits before `--permission-mode` so that flag
|
|
113
|
+
// terminates the tool list and the trailing seed message is never swallowed.
|
|
114
|
+
const argv = [
|
|
115
|
+
"claude",
|
|
116
|
+
"--model",
|
|
117
|
+
opts.model,
|
|
118
|
+
"--disallowedTools",
|
|
119
|
+
...COORDINATOR_BLOCKED_TOOLS,
|
|
120
|
+
"--permission-mode",
|
|
121
|
+
permMode,
|
|
122
|
+
];
|
|
123
|
+
if (opts.systemPrompt && opts.systemPrompt.length > 0) {
|
|
124
|
+
argv.push("--append-system-prompt", opts.systemPrompt);
|
|
125
|
+
}
|
|
126
|
+
// A seed message becomes the first user turn (claude treats a trailing
|
|
127
|
+
// positional as the initial prompt while staying interactive).
|
|
128
|
+
if (opts.initialMessage && opts.initialMessage.length > 0) {
|
|
129
|
+
argv.push(opts.initialMessage);
|
|
130
|
+
}
|
|
131
|
+
return argv;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Provider env vars for the resolved model (API keys, base URLs).
|
|
136
|
+
*
|
|
137
|
+
* Auth is never hardcoded here — it is whatever the provider layer resolved
|
|
138
|
+
* onto the model. A fresh object is returned (rather than `model.env` itself)
|
|
139
|
+
* so a caller mutating the result cannot leak back into shared config.
|
|
140
|
+
*/
|
|
141
|
+
buildEnv(model: ResolvedModel): Record<string, string> {
|
|
142
|
+
return { ...(model.env ?? {}) };
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Build argv for a one-shot, non-streaming call (`claude -p … --output-format
|
|
147
|
+
* text`). Used later by AI-assisted merge resolution and skill distillation,
|
|
148
|
+
* where we want only the final text answer, not an event stream. The model is
|
|
149
|
+
* appended only when provided so the caller can defer to Claude Code's own
|
|
150
|
+
* default.
|
|
151
|
+
*/
|
|
152
|
+
buildPrintCommand(prompt: string, model?: string): string[] {
|
|
153
|
+
const argv = ["claude", "-p", prompt, "--output-format", "text"];
|
|
154
|
+
if (model !== undefined) {
|
|
155
|
+
argv.push("--model", model);
|
|
156
|
+
}
|
|
157
|
+
return argv;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Parse Claude Code's stream-json stdout into normalized {@link AgentEvent}s.
|
|
162
|
+
*
|
|
163
|
+
* The stream is NDJSON: one JSON object per line, but TCP/pipe chunk
|
|
164
|
+
* boundaries do NOT align to newlines, so we keep a `buffer` of the trailing
|
|
165
|
+
* partial line across reads and only parse once a `\n` completes it. For each
|
|
166
|
+
* complete, non-blank line we:
|
|
167
|
+
* - parse JSON (silently skipping malformed lines — a partial flush or a
|
|
168
|
+
* non-JSON diagnostic line must not abort the whole turn),
|
|
169
|
+
* - copy through the message `type`,
|
|
170
|
+
* - capture `session_id` → `sessionId` (Claude emits it on the early
|
|
171
|
+
* `system` init event; the caller needs it for the next turn's --resume),
|
|
172
|
+
* - lift a tool name out of an assistant `tool_use` content block → `tool`,
|
|
173
|
+
* - attach the raw parsed object as `raw` for callers needing more detail.
|
|
174
|
+
*/
|
|
175
|
+
async *parseEvents(stream: ReadableStream<Uint8Array>): AsyncIterable<AgentEvent> {
|
|
176
|
+
const reader = stream.getReader();
|
|
177
|
+
const decoder = new TextDecoder();
|
|
178
|
+
let buffer = "";
|
|
179
|
+
|
|
180
|
+
try {
|
|
181
|
+
while (true) {
|
|
182
|
+
const { done, value } = await reader.read();
|
|
183
|
+
if (done) break;
|
|
184
|
+
// `stream: true` lets the decoder hold back a trailing partial
|
|
185
|
+
// multi-byte sequence until the next chunk completes it.
|
|
186
|
+
buffer += decoder.decode(value, { stream: true });
|
|
187
|
+
|
|
188
|
+
const lines = buffer.split("\n");
|
|
189
|
+
// The last element is an incomplete line (no terminating newline yet)
|
|
190
|
+
// or "" — keep it buffered for the next read.
|
|
191
|
+
buffer = lines.pop() ?? "";
|
|
192
|
+
|
|
193
|
+
for (const line of lines) {
|
|
194
|
+
const event = parseClaudeLine(line);
|
|
195
|
+
if (event) yield event;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Emit any final line left after a clean stream end without a trailing
|
|
200
|
+
// newline (e.g. the process exits right after writing the result event).
|
|
201
|
+
const tail = parseClaudeLine(buffer);
|
|
202
|
+
if (tail) yield tail;
|
|
203
|
+
} finally {
|
|
204
|
+
// Always release the lock so the underlying stream can be GC'd / reused
|
|
205
|
+
// even if the consumer breaks out of the loop early.
|
|
206
|
+
reader.releaseLock();
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Parse a single stream-json line into an {@link AgentEvent}, or `null` for a
|
|
213
|
+
* blank or unparseable line. Kept as a free function (not a closure) so it is
|
|
214
|
+
* trivially unit-testable and allocation-free per line.
|
|
215
|
+
*/
|
|
216
|
+
function parseClaudeLine(line: string): AgentEvent | null {
|
|
217
|
+
const trimmed = line.trim();
|
|
218
|
+
if (!trimmed) return null;
|
|
219
|
+
|
|
220
|
+
let raw: unknown;
|
|
221
|
+
try {
|
|
222
|
+
raw = JSON.parse(trimmed);
|
|
223
|
+
} catch {
|
|
224
|
+
// Not valid JSON (partial flush, diagnostic noise) — skip, never throw.
|
|
225
|
+
return null;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
if (typeof raw !== "object" || raw === null) return null;
|
|
229
|
+
const msg = raw as Record<string, unknown>;
|
|
230
|
+
|
|
231
|
+
const event: AgentEvent = {
|
|
232
|
+
type: typeof msg.type === "string" ? msg.type : "unknown",
|
|
233
|
+
raw,
|
|
234
|
+
};
|
|
235
|
+
|
|
236
|
+
// session_id appears on the init/system event and is reused for --resume.
|
|
237
|
+
if (typeof msg.session_id === "string" && msg.session_id.length > 0) {
|
|
238
|
+
event.sessionId = msg.session_id;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
const tool = extractToolName(msg);
|
|
242
|
+
if (tool !== undefined) event.tool = tool;
|
|
243
|
+
|
|
244
|
+
const usage = extractUsage(msg);
|
|
245
|
+
if (usage !== undefined) event.usage = usage;
|
|
246
|
+
|
|
247
|
+
// Surface a failure reason: a `result` event with `is_error` carries the
|
|
248
|
+
// message in `result`; a bare `error` event carries it in `error`/`message`.
|
|
249
|
+
if (msg.is_error === true && typeof msg.result === "string") {
|
|
250
|
+
event.error = msg.result;
|
|
251
|
+
} else if (msg.type === "error") {
|
|
252
|
+
if (typeof msg.error === "string") event.error = msg.error;
|
|
253
|
+
else if (typeof msg.message === "string") event.error = msg.message;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
return event;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Pull token usage + USD cost out of a Claude Code `result` event, which carries
|
|
261
|
+
* `total_cost_usd` and a `usage` object (`input_tokens`, `output_tokens`, and the
|
|
262
|
+
* two cache token counts). We sum all numeric token fields so cache reads/writes
|
|
263
|
+
* are counted too. Returns `undefined` for non-result events or when no spend was
|
|
264
|
+
* reported, so the Costs page only aggregates real usage.
|
|
265
|
+
*/
|
|
266
|
+
function extractUsage(
|
|
267
|
+
msg: Record<string, unknown>,
|
|
268
|
+
): { tokens: number; costUsd: number } | undefined {
|
|
269
|
+
if (msg.type !== "result") return undefined;
|
|
270
|
+
|
|
271
|
+
let tokens = 0;
|
|
272
|
+
const usage = msg.usage;
|
|
273
|
+
if (typeof usage === "object" && usage !== null) {
|
|
274
|
+
for (const key of [
|
|
275
|
+
"input_tokens",
|
|
276
|
+
"output_tokens",
|
|
277
|
+
"cache_creation_input_tokens",
|
|
278
|
+
"cache_read_input_tokens",
|
|
279
|
+
]) {
|
|
280
|
+
const v = (usage as Record<string, unknown>)[key];
|
|
281
|
+
if (typeof v === "number") tokens += v;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
const costUsd = typeof msg.total_cost_usd === "number" ? msg.total_cost_usd : 0;
|
|
285
|
+
if (tokens === 0 && costUsd === 0) return undefined;
|
|
286
|
+
return { tokens, costUsd };
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* Pull a tool name out of an assistant message's content blocks. Claude nests
|
|
291
|
+
* tool calls as `{ type: "tool_use", name: "Edit", … }` blocks inside
|
|
292
|
+
* `message.content`; we return the first such name. Returns `undefined` for any
|
|
293
|
+
* shape that does not carry a tool_use block.
|
|
294
|
+
*/
|
|
295
|
+
function extractToolName(msg: Record<string, unknown>): string | undefined {
|
|
296
|
+
const message = msg.message;
|
|
297
|
+
if (typeof message !== "object" || message === null) return undefined;
|
|
298
|
+
|
|
299
|
+
const content = (message as Record<string, unknown>).content;
|
|
300
|
+
if (!Array.isArray(content)) return undefined;
|
|
301
|
+
|
|
302
|
+
for (const block of content) {
|
|
303
|
+
if (typeof block !== "object" || block === null) continue;
|
|
304
|
+
const b = block as Record<string, unknown>;
|
|
305
|
+
if (b.type === "tool_use" && typeof b.name === "string") {
|
|
306
|
+
return b.name;
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
return undefined;
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
/** Singleton for callers that do not need dependency injection. */
|
|
313
|
+
export const claudeRuntime = new ClaudeRuntime();
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI Codex runtime adapter.
|
|
3
|
+
*
|
|
4
|
+
* Drives OpenAI's `codex` CLI in headless, spawn-per-turn mode. Like Claude Code,
|
|
5
|
+
* Codex authenticates with its OWN login: a ChatGPT/Codex OAuth session stored in
|
|
6
|
+
* `~/.codex/auth.json` (`auth_mode` + `tokens`). When the active provider uses
|
|
7
|
+
* `authMode: "subscription"`, the provider layer injects no key (see
|
|
8
|
+
* `src/runtimes/resolve.ts`) and Codex falls back to that OAuth login — the exact
|
|
9
|
+
* mirror of how the Anthropic provider reuses the `claude` login. An
|
|
10
|
+
* `api-key` / `env` provider instead flows `OPENAI_API_KEY` through
|
|
11
|
+
* {@link CodexRuntime.buildEnv}; auth is never hardcoded here.
|
|
12
|
+
*
|
|
13
|
+
* A headless turn is `codex exec --json …`, whose stdout is a stream of JSONL
|
|
14
|
+
* thread/item events (`thread.started`, `item.completed`, `turn.completed`);
|
|
15
|
+
* {@link CodexRuntime.parseEvents} normalizes those into {@link AgentEvent}s.
|
|
16
|
+
* Session continuity across turns is carried by the `thread_id` captured from the
|
|
17
|
+
* `thread.started` event and threaded back via `codex exec resume <id>`
|
|
18
|
+
* ({@link DirectSpawnOpts.resumeSessionId}).
|
|
19
|
+
*
|
|
20
|
+
* Validated against codex-cli 0.128 flag + JSON-event shapes.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import type { ResolvedModel } from "../types.ts";
|
|
24
|
+
import type { AgentEvent, AgentRuntime, DirectSpawnOpts, InteractiveSpawnOpts } from "./types.ts";
|
|
25
|
+
|
|
26
|
+
export class CodexRuntime implements AgentRuntime {
|
|
27
|
+
/** Registry id; also the value users pass to `--runtime codex`. */
|
|
28
|
+
readonly id = "codex";
|
|
29
|
+
|
|
30
|
+
/** Beta: validated against codex-cli 0.128 flag shapes. */
|
|
31
|
+
readonly stability = "beta" as const;
|
|
32
|
+
|
|
33
|
+
/** Codex reads `AGENTS.md` from the working directory at startup. */
|
|
34
|
+
readonly instructionPath = "AGENTS.md";
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Build argv for one headless streaming turn (`codex exec --json`).
|
|
38
|
+
*
|
|
39
|
+
* Flag choices:
|
|
40
|
+
* - `exec` runs Codex non-interactively and exits on completion.
|
|
41
|
+
* - `--json` emits the per-event EventMsg JSONL that {@link parseEvents}
|
|
42
|
+
* consumes (including the `session_configured` event carrying the session id).
|
|
43
|
+
* - `--model <model>` pins the concrete model resolved upstream.
|
|
44
|
+
* - `--dangerously-bypass-approvals-and-sandbox` is the analog of Claude Code's
|
|
45
|
+
* `bypassPermissions`: workers run unattended in an isolated worktree, so
|
|
46
|
+
* interactive approval prompts would deadlock a headless process.
|
|
47
|
+
* - `resume <id>` (a subcommand) is emitted ONLY on follow-up turns; the first
|
|
48
|
+
* turn omits it so Codex starts a fresh session.
|
|
49
|
+
*
|
|
50
|
+
* The prompt is the trailing positional. Returned as an argv array (never a
|
|
51
|
+
* shell string) so no value is subject to shell interpolation.
|
|
52
|
+
*/
|
|
53
|
+
buildDirectSpawn(opts: DirectSpawnOpts): string[] {
|
|
54
|
+
const prompt = opts.prompt ?? "";
|
|
55
|
+
// Only resume when a prior turn handed us a real session id. An empty string
|
|
56
|
+
// is treated as "no resume" so the first turn opens a new session.
|
|
57
|
+
if (opts.resumeSessionId) {
|
|
58
|
+
return [
|
|
59
|
+
"codex",
|
|
60
|
+
"exec",
|
|
61
|
+
"resume",
|
|
62
|
+
opts.resumeSessionId,
|
|
63
|
+
"--json",
|
|
64
|
+
"--model",
|
|
65
|
+
opts.model,
|
|
66
|
+
"--dangerously-bypass-approvals-and-sandbox",
|
|
67
|
+
prompt,
|
|
68
|
+
];
|
|
69
|
+
}
|
|
70
|
+
return [
|
|
71
|
+
"codex",
|
|
72
|
+
"exec",
|
|
73
|
+
"--json",
|
|
74
|
+
"--model",
|
|
75
|
+
opts.model,
|
|
76
|
+
"--dangerously-bypass-approvals-and-sandbox",
|
|
77
|
+
prompt,
|
|
78
|
+
];
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Build argv for an ATTENDED interactive Codex session.
|
|
83
|
+
*
|
|
84
|
+
* Run in the foreground with inherited stdio so the operator chats directly
|
|
85
|
+
* (`coordinator start`). Codex has no `--append-system-prompt` flag, so the
|
|
86
|
+
* agent's role must be supplied via the `AGENTS.md` overlay; `systemPrompt`/
|
|
87
|
+
* `permissionMode` are accepted for interface parity but not passed as flags
|
|
88
|
+
* (the human is present to approve actions, so the default posture applies). A
|
|
89
|
+
* seed message becomes the initial prompt while the TUI stays interactive.
|
|
90
|
+
*/
|
|
91
|
+
buildInteractiveSpawn(opts: InteractiveSpawnOpts): string[] {
|
|
92
|
+
const argv = ["codex", "--model", opts.model];
|
|
93
|
+
if (opts.initialMessage && opts.initialMessage.length > 0) {
|
|
94
|
+
argv.push(opts.initialMessage);
|
|
95
|
+
}
|
|
96
|
+
return argv;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Provider env vars for the resolved model (API keys, base URLs).
|
|
101
|
+
*
|
|
102
|
+
* Auth is never hardcoded here — it is whatever the provider layer resolved
|
|
103
|
+
* onto the model (empty for subscription/OAuth login, `OPENAI_API_KEY` for an
|
|
104
|
+
* api-key/env provider). A fresh object is returned so a caller mutating the
|
|
105
|
+
* result cannot leak back into shared config.
|
|
106
|
+
*/
|
|
107
|
+
buildEnv(model: ResolvedModel): Record<string, string> {
|
|
108
|
+
return { ...(model.env ?? {}) };
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Build argv for a one-shot, non-streaming call (`codex exec`). Used by
|
|
113
|
+
* AI-assisted merge resolution and skill distillation, where we want only the
|
|
114
|
+
* final text answer, not an event stream. The bypass flag keeps it
|
|
115
|
+
* non-interactive; the model is appended only when provided so the caller can
|
|
116
|
+
* defer to Codex's own default.
|
|
117
|
+
*/
|
|
118
|
+
buildPrintCommand(prompt: string, model?: string): string[] {
|
|
119
|
+
const argv = ["codex", "exec", "--dangerously-bypass-approvals-and-sandbox"];
|
|
120
|
+
if (model !== undefined) {
|
|
121
|
+
argv.push("--model", model);
|
|
122
|
+
}
|
|
123
|
+
argv.push(prompt);
|
|
124
|
+
return argv;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Parse Codex's `--json` stdout into normalized {@link AgentEvent}s.
|
|
129
|
+
*
|
|
130
|
+
* The stream is JSONL: one JSON object per line, but pipe chunk boundaries do
|
|
131
|
+
* NOT align to newlines, so we keep a `buffer` of the trailing partial line
|
|
132
|
+
* across reads and only parse once a `\n` completes it. Malformed lines are
|
|
133
|
+
* skipped (a partial flush or diagnostic line must not abort the whole turn).
|
|
134
|
+
*/
|
|
135
|
+
async *parseEvents(stream: ReadableStream<Uint8Array>): AsyncIterable<AgentEvent> {
|
|
136
|
+
const reader = stream.getReader();
|
|
137
|
+
const decoder = new TextDecoder();
|
|
138
|
+
let buffer = "";
|
|
139
|
+
|
|
140
|
+
try {
|
|
141
|
+
while (true) {
|
|
142
|
+
const { done, value } = await reader.read();
|
|
143
|
+
if (done) break;
|
|
144
|
+
buffer += decoder.decode(value, { stream: true });
|
|
145
|
+
|
|
146
|
+
const lines = buffer.split("\n");
|
|
147
|
+
buffer = lines.pop() ?? "";
|
|
148
|
+
|
|
149
|
+
for (const line of lines) {
|
|
150
|
+
const event = parseCodexLine(line);
|
|
151
|
+
if (event) yield event;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const tail = parseCodexLine(buffer);
|
|
156
|
+
if (tail) yield tail;
|
|
157
|
+
} finally {
|
|
158
|
+
reader.releaseLock();
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Parse a single `codex exec --json` line into an {@link AgentEvent}, or `null`
|
|
165
|
+
* for a blank or unparseable line.
|
|
166
|
+
*
|
|
167
|
+
* codex-cli 0.128 emits the thread/item schema, flat per line:
|
|
168
|
+
* - `{ type: "thread.started", thread_id }` ← the resume session id
|
|
169
|
+
* - `{ type: "turn.started" | "turn.completed", usage? }`
|
|
170
|
+
* - `{ type: "item.completed", item: { type, … } }` ← messages / tool actions
|
|
171
|
+
* We also tolerate the older EventMsg form (`{ id, msg: { type, … } }`) so the
|
|
172
|
+
* parser works across codex versions — mirroring the Claude parser's resilience.
|
|
173
|
+
*/
|
|
174
|
+
function parseCodexLine(line: string): AgentEvent | null {
|
|
175
|
+
const trimmed = line.trim();
|
|
176
|
+
if (!trimmed) return null;
|
|
177
|
+
|
|
178
|
+
let raw: unknown;
|
|
179
|
+
try {
|
|
180
|
+
raw = JSON.parse(trimmed);
|
|
181
|
+
} catch {
|
|
182
|
+
return null;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
if (typeof raw !== "object" || raw === null) return null;
|
|
186
|
+
const top = raw as Record<string, unknown>;
|
|
187
|
+
|
|
188
|
+
// Thread/item form is flat; the legacy EventMsg form nests under `msg`.
|
|
189
|
+
const msg =
|
|
190
|
+
typeof top.msg === "object" && top.msg !== null ? (top.msg as Record<string, unknown>) : top;
|
|
191
|
+
|
|
192
|
+
const type =
|
|
193
|
+
typeof top.type === "string" ? top.type : typeof msg.type === "string" ? msg.type : "unknown";
|
|
194
|
+
|
|
195
|
+
const event: AgentEvent = { type, raw };
|
|
196
|
+
|
|
197
|
+
const sessionId = extractCodexSessionId(msg, top);
|
|
198
|
+
if (sessionId !== undefined) event.sessionId = sessionId;
|
|
199
|
+
|
|
200
|
+
const tool = extractCodexTool(top, msg);
|
|
201
|
+
if (tool !== undefined) event.tool = tool;
|
|
202
|
+
|
|
203
|
+
const usage = extractCodexUsage(top);
|
|
204
|
+
if (usage !== undefined) event.usage = usage;
|
|
205
|
+
|
|
206
|
+
return event;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Pull the resume session id from a Codex event: `thread_id` on `thread.started`
|
|
211
|
+
* (0.128), or `session_id` on the legacy `session_configured` event. A top-level
|
|
212
|
+
* `session_id` is also accepted so resume keeps working across codex versions.
|
|
213
|
+
*/
|
|
214
|
+
function extractCodexSessionId(
|
|
215
|
+
msg: Record<string, unknown>,
|
|
216
|
+
top: Record<string, unknown>,
|
|
217
|
+
): string | undefined {
|
|
218
|
+
for (const candidate of [top.thread_id, msg.session_id, top.session_id]) {
|
|
219
|
+
if (typeof candidate === "string" && candidate.length > 0) return candidate;
|
|
220
|
+
}
|
|
221
|
+
return undefined;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Derive a coarse tool name from a Codex event. In the thread/item schema, tool
|
|
226
|
+
* activity is an `item` whose `type` is something other than `agent_message` /
|
|
227
|
+
* `reasoning` (e.g. `command_execution`, `mcp_tool_call`, `file_change`,
|
|
228
|
+
* `web_search`). Legacy EventMsg shapes use `exec_command_begin` /
|
|
229
|
+
* `mcp_tool_call_begin`. Returns `undefined` when no tool is involved.
|
|
230
|
+
*/
|
|
231
|
+
function extractCodexTool(
|
|
232
|
+
top: Record<string, unknown>,
|
|
233
|
+
msg: Record<string, unknown>,
|
|
234
|
+
): string | undefined {
|
|
235
|
+
const item = top.item;
|
|
236
|
+
if (typeof item === "object" && item !== null) {
|
|
237
|
+
const it = item as Record<string, unknown>;
|
|
238
|
+
const itype = typeof it.type === "string" ? it.type : undefined;
|
|
239
|
+
if (itype && itype !== "agent_message" && itype !== "reasoning") {
|
|
240
|
+
if (itype === "mcp_tool_call" && typeof it.tool === "string") return it.tool;
|
|
241
|
+
if (itype === "command_execution") return "shell";
|
|
242
|
+
return itype;
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
if (msg.type === "exec_command_begin") return "shell";
|
|
247
|
+
if (msg.type === "mcp_tool_call_begin") {
|
|
248
|
+
if (typeof msg.tool === "string") return msg.tool;
|
|
249
|
+
const invocation = msg.invocation;
|
|
250
|
+
if (typeof invocation === "object" && invocation !== null) {
|
|
251
|
+
const tool = (invocation as Record<string, unknown>).tool;
|
|
252
|
+
if (typeof tool === "string") return tool;
|
|
253
|
+
}
|
|
254
|
+
return "mcp";
|
|
255
|
+
}
|
|
256
|
+
return undefined;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Pull token usage from a Codex `turn.completed` event's `usage` object. Codex
|
|
261
|
+
* reports token counts but not a USD figure, so `costUsd` is 0. Tokens are
|
|
262
|
+
* `input_tokens + output_tokens` (cached/reasoning counts are subsets of those,
|
|
263
|
+
* so they are not added again). Returns `undefined` when no usage is present.
|
|
264
|
+
*/
|
|
265
|
+
function extractCodexUsage(
|
|
266
|
+
top: Record<string, unknown>,
|
|
267
|
+
): { tokens: number; costUsd: number } | undefined {
|
|
268
|
+
if (top.type !== "turn.completed") return undefined;
|
|
269
|
+
const usage = top.usage;
|
|
270
|
+
if (typeof usage !== "object" || usage === null) return undefined;
|
|
271
|
+
const u = usage as Record<string, unknown>;
|
|
272
|
+
const input = typeof u.input_tokens === "number" ? u.input_tokens : 0;
|
|
273
|
+
const output = typeof u.output_tokens === "number" ? u.output_tokens : 0;
|
|
274
|
+
const tokens = input + output;
|
|
275
|
+
if (tokens === 0) return undefined;
|
|
276
|
+
return { tokens, costUsd: 0 };
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/** Singleton for callers that do not need dependency injection. */
|
|
280
|
+
export const codexRuntime = new CodexRuntime();
|