@desplega.ai/agent-swarm 1.62.0 → 1.63.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,863 @@
1
+ /**
2
+ * Codex provider adapter.
3
+ *
4
+ * Wraps the `@openai/codex-sdk` (which drives the `codex app-server` JSON-RPC
5
+ * protocol via a child process). This file owns:
6
+ *
7
+ * Phase 1 — factory wiring + skeleton classes.
8
+ * Phase 2 — event stream normalization, CostData, AbortController, log file,
9
+ * AGENTS.md system-prompt injection, canResume via resumeThread.
10
+ * Phase 3 — per-session MCP config builder + model catalogue wiring. The
11
+ * baseline Codex config (`~/.codex/config.toml`) is written at
12
+ * Docker image build time (deferred to Phase 6). For local dev
13
+ * we pass the equivalent overrides via `new Codex({ config })`.
14
+ *
15
+ * Phases 4-5 extend this file with:
16
+ * - Skill resolution (slash-command inlining)
17
+ * - Adapter-side swarm hooks (cancellation polling, tool-loop detection, ...)
18
+ *
19
+ * ### Codex SDK `config` option
20
+ *
21
+ * `CodexOptions.config` is typed as `CodexConfigObject` — a recursive
22
+ * `Record<string, CodexConfigValue>` where values are primitives, arrays, or
23
+ * nested objects. The SDK flattens the object into dotted-path `--config`
24
+ * overrides for the underlying Codex CLI. This means we can pass a STRUCTURED
25
+ * object like `{ mcp_servers: { "agent-swarm": { url: "..." } } }` and the
26
+ * SDK handles the flattening — no pre-flattening required on our side.
27
+ * `CodexConfigObject` is NOT exported from the SDK, so we use
28
+ * `NonNullable<CodexOptions["config"]>` (or `Record<string, unknown>` for
29
+ * locally-built fragments) instead of duplicating the type.
30
+ *
31
+ * ### MCP server field names (verified against developers.openai.com/codex/mcp)
32
+ *
33
+ * Streamable HTTP transport (supported):
34
+ * url, http_headers, bearer_token_env_var, enabled, startup_timeout_sec,
35
+ * tool_timeout_sec, enabled_tools, disabled_tools
36
+ *
37
+ * Stdio transport (supported):
38
+ * command, args, env, enabled, startup_timeout_sec, tool_timeout_sec
39
+ *
40
+ * SSE transport is NOT yet supported by Codex (tracked in openai/codex#2129).
41
+ * We skip any SSE servers with a warning so the session still runs.
42
+ *
43
+ * Type discipline: every Codex-related type below is imported directly from
44
+ * `@openai/codex-sdk`. We do NOT hand-roll parallel interfaces for `Thread`,
45
+ * `Turn`, events, or items — the SDK already exports them as a tagged union.
46
+ */
47
+
48
+ import os from "node:os";
49
+ import { join } from "node:path";
50
+ import {
51
+ type AgentMessageItem,
52
+ Codex,
53
+ type CodexOptions,
54
+ type CommandExecutionItem,
55
+ type ErrorItem,
56
+ type FileChangeItem,
57
+ type McpToolCallItem,
58
+ type ReasoningItem,
59
+ type Thread,
60
+ type ThreadEvent,
61
+ type ThreadItem,
62
+ type ThreadOptions,
63
+ type TodoListItem,
64
+ type Usage,
65
+ type WebSearchItem,
66
+ } from "@openai/codex-sdk";
67
+ import { type CodexAgentsMdHandle, writeCodexAgentsMd } from "./codex-agents-md";
68
+ import {
69
+ CODEX_DEFAULT_MODEL,
70
+ type CodexModel,
71
+ computeCodexCostUsd,
72
+ getCodexContextWindow,
73
+ resolveCodexModel,
74
+ } from "./codex-models";
75
+ import { resolveCodexPrompt } from "./codex-skill-resolver";
76
+ import { createCodexSwarmEventHandler } from "./codex-swarm-events";
77
+ import type {
78
+ CostData,
79
+ ProviderAdapter,
80
+ ProviderEvent,
81
+ ProviderResult,
82
+ ProviderSession,
83
+ ProviderSessionConfig,
84
+ } from "./types";
85
+
86
+ /** Alias for the SDK's (unexported) `CodexConfigObject` type. */
87
+ type CodexConfig = NonNullable<CodexOptions["config"]>;
88
+
89
+ /**
90
+ * Shape returned by `GET /api/agents/:id/mcp-servers?resolveSecrets=true`.
91
+ * Mirrors `pi-mono-adapter.ts:430-439` and `claude-adapter.ts:59-72`, plus
92
+ * the DB handler at `src/http/mcp-servers.ts:170-210` which injects the
93
+ * `resolvedEnv` / `resolvedHeaders` fields when `resolveSecrets=true`.
94
+ */
95
+ interface InstalledMcpServersResponse {
96
+ servers: Array<{
97
+ name: string;
98
+ transport: "stdio" | "http" | "sse";
99
+ isActive: boolean;
100
+ isEnabled: boolean;
101
+ command?: string | null;
102
+ args?: string | null;
103
+ url?: string | null;
104
+ headers?: string | null;
105
+ resolvedEnv?: Record<string, string>;
106
+ resolvedHeaders?: Record<string, string>;
107
+ }>;
108
+ total?: number;
109
+ }
110
+
111
+ /**
112
+ * Build the per-session Codex config object, which becomes the
113
+ * `config` option to `new Codex({ config })`. This layers on top of the
114
+ * baseline `~/.codex/config.toml` written at Docker image build time (Phase 6).
115
+ *
116
+ * Includes:
117
+ * 1. Baseline overrides (model, approval_policy, sandbox_mode, …) — repeated
118
+ * here (in addition to the baseline file) so local dev without the baseline
119
+ * file still gets the same settings.
120
+ * 2. The swarm MCP server over Streamable HTTP, with per-task headers so the
121
+ * server can correlate cross-task inheritance.
122
+ * 3. Installed MCP servers fetched from the API, mapped to Codex's MCP config
123
+ * shape (stdio or Streamable HTTP). SSE servers are skipped with a warning.
124
+ *
125
+ * Fetch failures are non-fatal — we emit a `raw_stderr` warning via `emit`
126
+ * and return the config with only the swarm server so the session can still
127
+ * run.
128
+ */
129
+ export async function buildCodexConfig(
130
+ config: ProviderSessionConfig,
131
+ model: CodexModel,
132
+ emit: (event: ProviderEvent) => void,
133
+ ): Promise<CodexConfig> {
134
+ const mcpServers: Record<string, Record<string, unknown>> = {};
135
+
136
+ // (2) Swarm MCP server — Streamable HTTP transport.
137
+ // Field names verified against https://developers.openai.com/codex/mcp:
138
+ // `url`, `http_headers`, `enabled`, `startup_timeout_sec`, `tool_timeout_sec`.
139
+ mcpServers["agent-swarm"] = {
140
+ url: `${config.apiUrl}/mcp`,
141
+ http_headers: {
142
+ Authorization: `Bearer ${config.apiKey}`,
143
+ "X-Agent-ID": config.agentId,
144
+ "X-Source-Task-Id": config.taskId ?? "",
145
+ },
146
+ enabled: true,
147
+ startup_timeout_sec: 30,
148
+ tool_timeout_sec: 120,
149
+ };
150
+
151
+ // (3) Installed MCP servers — fetched from the API. Non-fatal on failure.
152
+ if (config.apiUrl && config.apiKey && config.agentId) {
153
+ try {
154
+ const res = await fetch(
155
+ `${config.apiUrl}/api/agents/${config.agentId}/mcp-servers?resolveSecrets=true`,
156
+ {
157
+ headers: {
158
+ Authorization: `Bearer ${config.apiKey}`,
159
+ "X-Agent-ID": config.agentId,
160
+ },
161
+ },
162
+ );
163
+ if (res.ok) {
164
+ const data = (await res.json()) as InstalledMcpServersResponse;
165
+ for (const srv of data.servers ?? []) {
166
+ if (!srv.isActive || !srv.isEnabled) continue;
167
+
168
+ if (srv.transport === "stdio") {
169
+ if (!srv.command) continue;
170
+ let parsedArgs: string[] = [];
171
+ try {
172
+ parsedArgs = srv.args ? (JSON.parse(srv.args) as string[]) : [];
173
+ } catch {
174
+ // Invalid JSON — fall through with empty args.
175
+ }
176
+ mcpServers[srv.name] = {
177
+ command: srv.command,
178
+ args: parsedArgs,
179
+ env: srv.resolvedEnv ?? {},
180
+ enabled: true,
181
+ startup_timeout_sec: 30,
182
+ tool_timeout_sec: 120,
183
+ };
184
+ continue;
185
+ }
186
+
187
+ if (srv.transport === "http") {
188
+ if (!srv.url) continue;
189
+ let parsedHeaders: Record<string, string> = {};
190
+ try {
191
+ parsedHeaders = srv.headers
192
+ ? (JSON.parse(srv.headers) as Record<string, string>)
193
+ : {};
194
+ } catch {
195
+ // Invalid JSON — fall through with empty headers.
196
+ }
197
+ mcpServers[srv.name] = {
198
+ url: srv.url,
199
+ http_headers: { ...parsedHeaders, ...(srv.resolvedHeaders ?? {}) },
200
+ enabled: true,
201
+ startup_timeout_sec: 30,
202
+ tool_timeout_sec: 120,
203
+ };
204
+ continue;
205
+ }
206
+
207
+ if (srv.transport === "sse") {
208
+ emit({
209
+ type: "raw_stderr",
210
+ content: `[codex] Skipping MCP server "${srv.name}": SSE transport is not yet supported by Codex (tracked in openai/codex#2129).\n`,
211
+ });
212
+ }
213
+ }
214
+ } else {
215
+ emit({
216
+ type: "raw_stderr",
217
+ content: `[codex] Failed to fetch installed MCP servers: HTTP ${res.status}. Continuing with only the swarm MCP server.\n`,
218
+ });
219
+ }
220
+ } catch (err) {
221
+ const message = err instanceof Error ? err.message : String(err);
222
+ emit({
223
+ type: "raw_stderr",
224
+ content: `[codex] Failed to fetch installed MCP servers: ${message}. Continuing with only the swarm MCP server.\n`,
225
+ });
226
+ }
227
+ }
228
+
229
+ // (1) Baseline overrides. Keep these aligned with the Dockerfile baseline
230
+ // at `~/.codex/config.toml` (Phase 6). Repeating them here makes local dev
231
+ // (no baseline file) behave identically to the Docker worker.
232
+ return {
233
+ model,
234
+ approval_policy: "never",
235
+ sandbox_mode: "danger-full-access",
236
+ skip_git_repo_check: true,
237
+ show_raw_agent_reasoning: false,
238
+ mcp_servers: mcpServers as CodexConfig,
239
+ };
240
+ }
241
+
242
+ /** Running session backed by a Codex `Thread`. */
243
+ class CodexSession implements ProviderSession {
244
+ private readonly thread: Thread;
245
+ private readonly config: ProviderSessionConfig;
246
+ private readonly agentsMdHandle: CodexAgentsMdHandle;
247
+ private readonly resolvedModel: CodexModel;
248
+ private readonly contextWindow: number;
249
+ private readonly skillsDir: string;
250
+ private readonly listeners: Array<(event: ProviderEvent) => void> = [];
251
+ private readonly eventQueue: ProviderEvent[] = [];
252
+ private readonly logFileHandle: ReturnType<ReturnType<typeof Bun.file>["writer"]>;
253
+ private readonly startedAt = Date.now();
254
+ private readonly completionPromise: Promise<ProviderResult>;
255
+ private resolveCompletion!: (result: ProviderResult) => void;
256
+ private abortController: AbortController | null = null;
257
+ /**
258
+ * Mutable holder for the current turn's `AbortController`. Shared with the
259
+ * swarm event handler so it can trigger an abort from outside `runSession`
260
+ * (e.g. when a tool-loop is detected or the task has been cancelled).
261
+ */
262
+ private readonly abortRef: { current: AbortController | null } = { current: null };
263
+ private _sessionId: string | undefined;
264
+ private numTurns = 0;
265
+ private lastUsage: Usage | null = null;
266
+ private aborted = false;
267
+ private settled = false;
268
+
269
+ constructor(
270
+ thread: Thread,
271
+ config: ProviderSessionConfig,
272
+ agentsMdHandle: CodexAgentsMdHandle,
273
+ resolvedModel: CodexModel,
274
+ initialEvents: ProviderEvent[] = [],
275
+ skillsDir?: string,
276
+ ) {
277
+ this.thread = thread;
278
+ this.config = config;
279
+ this.agentsMdHandle = agentsMdHandle;
280
+ this.resolvedModel = resolvedModel;
281
+ this.contextWindow = getCodexContextWindow(resolvedModel);
282
+ // `CODEX_SKILLS_DIR` lets tests / non-Docker installs point at a custom
283
+ // tree without polluting `~/.codex/skills` on the host. Fall back to the
284
+ // runtime default of `${HOME}/.codex/skills`.
285
+ this.skillsDir =
286
+ skillsDir ?? process.env.CODEX_SKILLS_DIR ?? join(os.homedir(), ".codex", "skills");
287
+ this.logFileHandle = Bun.file(config.logFile).writer();
288
+
289
+ this.completionPromise = new Promise<ProviderResult>((resolve) => {
290
+ this.resolveCompletion = resolve;
291
+ });
292
+
293
+ // Adapter-side swarm hooks: lower-latency cancellation poll, tool-loop
294
+ // detection, heartbeat, activity ping, and context-usage reporting. The
295
+ // handler reads `abortRef.current` to trigger aborts from outside
296
+ // `runSession` (the runner-side polling at `runner.ts:2812-2841` is the
297
+ // backstop). Skipped when there's no task or API context to talk to.
298
+ if (config.taskId && config.apiUrl && config.apiKey) {
299
+ this.listeners.push(
300
+ createCodexSwarmEventHandler({
301
+ apiUrl: config.apiUrl,
302
+ apiKey: config.apiKey,
303
+ agentId: config.agentId,
304
+ taskId: config.taskId,
305
+ abortRef: this.abortRef,
306
+ }),
307
+ );
308
+ }
309
+
310
+ // Replay any events that fired before the session was constructed
311
+ // (e.g. warnings from `buildCodexConfig`). They enter the same path as
312
+ // events emitted during the session: written to the log file, pushed to
313
+ // any attached listeners, otherwise queued for later flush in `onEvent`.
314
+ for (const event of initialEvents) {
315
+ this.emit(event);
316
+ }
317
+
318
+ // Kick the event loop asynchronously so the constructor can return.
319
+ void this.runSession();
320
+ }
321
+
322
+ get sessionId(): string | undefined {
323
+ return this._sessionId ?? this.thread.id ?? undefined;
324
+ }
325
+
326
+ onEvent(listener: (event: ProviderEvent) => void): void {
327
+ this.listeners.push(listener);
328
+ // Flush any events that fired before a listener was attached.
329
+ for (const event of this.eventQueue) {
330
+ listener(event);
331
+ }
332
+ this.eventQueue.length = 0;
333
+ }
334
+
335
+ async waitForCompletion(): Promise<ProviderResult> {
336
+ return this.completionPromise;
337
+ }
338
+
339
+ async abort(): Promise<void> {
340
+ this.aborted = true;
341
+ this.abortController?.abort();
342
+ }
343
+
344
+ private emit(event: ProviderEvent): void {
345
+ try {
346
+ this.logFileHandle.write(
347
+ `${JSON.stringify({ ...event, timestamp: new Date().toISOString() })}\n`,
348
+ );
349
+ } catch {
350
+ // Log writer failure must not break the event stream.
351
+ }
352
+ if (this.listeners.length > 0) {
353
+ for (const listener of this.listeners) {
354
+ try {
355
+ listener(event);
356
+ } catch {
357
+ // Swallow listener errors — a bad listener must not kill the session.
358
+ }
359
+ }
360
+ } else {
361
+ this.eventQueue.push(event);
362
+ }
363
+ }
364
+
365
+ private settle(result: ProviderResult): void {
366
+ if (this.settled) return;
367
+ this.settled = true;
368
+ this.resolveCompletion(result);
369
+ }
370
+
371
+ /** Build CostData from the most recent turn usage. */
372
+ private buildCostData(usage: Usage | null, isError: boolean): CostData {
373
+ const inputTokens = usage?.input_tokens ?? 0;
374
+ const cachedInputTokens = usage?.cached_input_tokens ?? 0;
375
+ const outputTokens = usage?.output_tokens ?? 0;
376
+ return {
377
+ // Runner overrides with its own session id.
378
+ sessionId: "",
379
+ taskId: this.config.taskId,
380
+ agentId: this.config.agentId,
381
+ // Codex SDK does not report dollar cost directly. We compute it from
382
+ // token counts × per-model pricing in `codex-models.ts`. The pricing
383
+ // table is sourced from developers.openai.com/api/docs/pricing — bump
384
+ // it whenever OpenAI updates published rates.
385
+ totalCostUsd: computeCodexCostUsd(
386
+ this.resolvedModel,
387
+ inputTokens,
388
+ cachedInputTokens,
389
+ outputTokens,
390
+ ),
391
+ inputTokens,
392
+ outputTokens,
393
+ cacheReadTokens: cachedInputTokens,
394
+ // Codex does not distinguish cache writes in its Usage payload.
395
+ cacheWriteTokens: 0,
396
+ durationMs: Date.now() - this.startedAt,
397
+ numTurns: this.numTurns,
398
+ model: this.resolvedModel,
399
+ isError,
400
+ };
401
+ }
402
+
403
+ /** Extract a human-friendly tool name for normalized `tool_start` events. */
404
+ private toolNameForItem(item: ThreadItem): string {
405
+ switch (item.type) {
406
+ case "command_execution":
407
+ return "bash";
408
+ case "file_change": {
409
+ const first = item.changes[0];
410
+ if (!first) return "Edit";
411
+ return first.kind === "add" ? "Write" : first.kind === "delete" ? "Delete" : "Edit";
412
+ }
413
+ case "mcp_tool_call":
414
+ return item.tool;
415
+ case "web_search":
416
+ return "WebSearch";
417
+ default:
418
+ return item.type;
419
+ }
420
+ }
421
+
422
+ /** Arguments payload for a `tool_start` event mirroring the SDK item. */
423
+ private toolArgsForItem(item: ThreadItem): unknown {
424
+ switch (item.type) {
425
+ case "command_execution":
426
+ return { command: (item as CommandExecutionItem).command };
427
+ case "file_change":
428
+ return { changes: (item as FileChangeItem).changes };
429
+ case "mcp_tool_call": {
430
+ const mcpItem = item as McpToolCallItem;
431
+ return { server: mcpItem.server, tool: mcpItem.tool, arguments: mcpItem.arguments };
432
+ }
433
+ case "web_search":
434
+ return { query: (item as WebSearchItem).query };
435
+ default:
436
+ return {};
437
+ }
438
+ }
439
+
440
+ /** Whether the item variant should surface as a `tool_start`/`tool_end` pair. */
441
+ private isToolItem(
442
+ item: ThreadItem,
443
+ ): item is CommandExecutionItem | FileChangeItem | McpToolCallItem | WebSearchItem {
444
+ return (
445
+ item.type === "command_execution" ||
446
+ item.type === "file_change" ||
447
+ item.type === "mcp_tool_call" ||
448
+ item.type === "web_search"
449
+ );
450
+ }
451
+
452
+ private handleEvent(event: ThreadEvent): void {
453
+ // Mirror every raw SDK event into the log as raw_log for debugability —
454
+ // parity with Claude's JSONL envelope.
455
+ this.emit({ type: "raw_log", content: JSON.stringify(event) });
456
+
457
+ switch (event.type) {
458
+ case "thread.started": {
459
+ this._sessionId = event.thread_id;
460
+ this.emit({ type: "session_init", sessionId: event.thread_id });
461
+ break;
462
+ }
463
+ case "turn.started": {
464
+ this.numTurns += 1;
465
+ break;
466
+ }
467
+ case "item.started": {
468
+ if (this.isToolItem(event.item)) {
469
+ this.emit({
470
+ type: "tool_start",
471
+ toolCallId: event.item.id,
472
+ toolName: this.toolNameForItem(event.item),
473
+ args: this.toolArgsForItem(event.item),
474
+ });
475
+ }
476
+ break;
477
+ }
478
+ case "item.updated": {
479
+ // Surface partial agent_message deltas as `custom` events so a future
480
+ // UI can show streaming tokens. We deliberately use `custom` (instead
481
+ // of new ProviderEvent variants) to avoid touching the cross-provider
482
+ // contract — the dashboard can opt-in by listening for the event name.
483
+ // The full text still flows through `item.completed` → `message`
484
+ // below, so consumers that don't subscribe to deltas see no behavior
485
+ // change.
486
+ const updatedItem = event.item as ThreadItem;
487
+ if (updatedItem.type === "agent_message") {
488
+ const msg = updatedItem as AgentMessageItem;
489
+ if (msg.text) {
490
+ this.emit({
491
+ type: "custom",
492
+ name: "codex.message_delta",
493
+ data: { itemId: updatedItem.id, text: msg.text },
494
+ });
495
+ }
496
+ }
497
+ break;
498
+ }
499
+ case "item.completed": {
500
+ const { item } = event;
501
+ if (this.isToolItem(item)) {
502
+ this.emit({
503
+ type: "tool_end",
504
+ toolCallId: item.id,
505
+ toolName: this.toolNameForItem(item),
506
+ result: item,
507
+ });
508
+ break;
509
+ }
510
+ switch (item.type) {
511
+ case "agent_message": {
512
+ const msg = item as AgentMessageItem;
513
+ if (msg.text) {
514
+ this.emit({ type: "message", role: "assistant", content: msg.text });
515
+ }
516
+ break;
517
+ }
518
+ case "reasoning": {
519
+ // Promote Codex reasoning items to first-class `custom` events so
520
+ // the dashboard can render them in a separate "thinking" panel
521
+ // without conflating them with the agent's actual output. Codex
522
+ // emits these between turns when the model produces an explicit
523
+ // reasoning trace (gpt-5.x reasoning effort > none).
524
+ const r = item as ReasoningItem;
525
+ const text =
526
+ (r as { text?: string; summary?: string }).text ??
527
+ (r as { summary?: string }).summary ??
528
+ "";
529
+ if (text) {
530
+ this.emit({
531
+ type: "custom",
532
+ name: "codex.reasoning",
533
+ data: { itemId: r.id, text },
534
+ });
535
+ }
536
+ break;
537
+ }
538
+ case "todo_list": {
539
+ // Promote Codex todo lists to a `custom` event so a future
540
+ // dashboard widget can render the checkbox state. The shape of
541
+ // the items (title, status, etc.) lives in the SDK's
542
+ // `TodoListItem` and is preserved verbatim.
543
+ const todo = item as TodoListItem;
544
+ this.emit({
545
+ type: "custom",
546
+ name: "codex.todo_list",
547
+ data: { itemId: todo.id, items: (todo as { items?: unknown }).items ?? [] },
548
+ });
549
+ break;
550
+ }
551
+ case "error": {
552
+ const errItem = item as ErrorItem;
553
+ this.emit({ type: "error", message: this.formatTerminalError(errItem.message) });
554
+ break;
555
+ }
556
+ }
557
+ break;
558
+ }
559
+ case "turn.completed": {
560
+ this.lastUsage = event.usage;
561
+ if (event.usage) {
562
+ // The Codex SDK reports `input_tokens` as the SUM of every prompt
563
+ // sent to the model across the entire turn (one `codex exec` call
564
+ // can fan out to dozens of model invocations as MCP tools roundtrip
565
+ // back and forth). For chatty turns this number routinely exceeds
566
+ // the model's context window, even though no single model call did.
567
+ //
568
+ // For peak-context reporting we want a proxy for "the largest
569
+ // single-call prompt". We approximate it as the uncached portion
570
+ // (cached tokens are reused across calls so they count once toward
571
+ // the actual peak), plus the output. This isn't perfect — the SDK
572
+ // would have to expose per-call stats for that — but it's far more
573
+ // representative than `(input + output) / window` which clamps to
574
+ // 1.0 the moment a turn makes any meaningful tool history.
575
+ const uncachedInput = Math.max(
576
+ 0,
577
+ event.usage.input_tokens - event.usage.cached_input_tokens,
578
+ );
579
+ const peakProxy = uncachedInput + event.usage.output_tokens;
580
+ // `contextPercent` is on a 0-100 scale across all providers — claude
581
+ // emits `(used / total) * 100`, pi-mono passes through `usage.percent`
582
+ // which is already 0-100. The dashboard at
583
+ // new-ui/src/pages/tasks/[id]/page.tsx renders it via `.toFixed(0)`
584
+ // expecting an integer percent, so a 0-1 fraction would render as
585
+ // "0%" instead of e.g. "40%".
586
+ this.emit({
587
+ type: "context_usage",
588
+ contextUsedTokens: peakProxy,
589
+ contextTotalTokens: this.contextWindow,
590
+ contextPercent: Math.min(100, (peakProxy / this.contextWindow) * 100),
591
+ outputTokens: event.usage.output_tokens,
592
+ });
593
+ }
594
+ break;
595
+ }
596
+ case "turn.failed": {
597
+ const message = this.formatTerminalError(event.error.message);
598
+ this.emit({ type: "error", message });
599
+ break;
600
+ }
601
+ case "error": {
602
+ const message = this.formatTerminalError(event.message);
603
+ this.emit({ type: "error", message });
604
+ break;
605
+ }
606
+ }
607
+ }
608
+
609
+ /**
610
+ * Detect context-window-exceeded errors from the Codex CLI / SDK and rewrite
611
+ * them with a clearer, actionable message. Codex does not auto-compact like
612
+ * Claude does — when context fills, the next model call hard-fails. We can't
613
+ * compact retroactively, so we just mark the failure with a recognizable
614
+ * `[context-overflow]` prefix that the runner can flag in dashboards. See
615
+ * Linear DES-143 (codex auto-compaction follow-up) for the long-term fix.
616
+ *
617
+ * Patterns observed in the wild (case-insensitive):
618
+ * - "context length exceeded"
619
+ * - "maximum context length"
620
+ * - "too many tokens"
621
+ * - "input too long"
622
+ * - "request too large"
623
+ */
624
+ private formatTerminalError(raw: string): string {
625
+ const normalized = raw.toLowerCase();
626
+ const overflowPatterns = [
627
+ "context length exceeded",
628
+ "maximum context length",
629
+ "too many tokens",
630
+ "input too long",
631
+ "request too large",
632
+ "context_length_exceeded",
633
+ ];
634
+ if (overflowPatterns.some((p) => normalized.includes(p))) {
635
+ return `[context-overflow] Codex turn exceeded the model's context window for ${this.resolvedModel} (${this.contextWindow.toLocaleString()} tokens). Codex does not auto-compact conversation history like Claude does — start a fresh task or split the work into smaller turns. Original error: ${raw}`;
636
+ }
637
+ return raw;
638
+ }
639
+
640
+ private async runSession(): Promise<void> {
641
+ this.abortController = new AbortController();
642
+ // Expose the controller to the swarm event handler so it can trigger an
643
+ // abort from outside this method (tool-loop detection, cancellation poll).
644
+ this.abortRef.current = this.abortController;
645
+ let terminalError: string | undefined;
646
+ let sawTurnCompleted = false;
647
+
648
+ try {
649
+ // Inline Codex skills if the prompt starts with a slash command. If the
650
+ // prompt doesn't begin with a recognized slash command (or the skill
651
+ // file is missing), this returns the prompt unchanged and emits a
652
+ // `raw_stderr` warning in the latter case.
653
+ const resolvedPrompt = await resolveCodexPrompt(this.config.prompt, this.skillsDir, (event) =>
654
+ this.emit(event),
655
+ );
656
+
657
+ const streamed = await this.thread.runStreamed(resolvedPrompt, {
658
+ signal: this.abortController.signal,
659
+ });
660
+
661
+ try {
662
+ for await (const event of streamed.events) {
663
+ this.handleEvent(event);
664
+ if (event.type === "turn.completed") {
665
+ sawTurnCompleted = true;
666
+ }
667
+ if (event.type === "turn.failed" && !terminalError) {
668
+ terminalError = this.formatTerminalError(event.error.message);
669
+ }
670
+ if (event.type === "error" && !terminalError) {
671
+ terminalError = this.formatTerminalError(event.message);
672
+ }
673
+ }
674
+ } catch (err) {
675
+ // AbortError from the SDK propagates here when signal.abort() fires.
676
+ if (this.aborted || (err instanceof Error && err.name === "AbortError")) {
677
+ const cost = this.buildCostData(this.lastUsage, true);
678
+ this.emit({ type: "result", cost, isError: true, errorCategory: "cancelled" });
679
+ this.settle({
680
+ exitCode: 130,
681
+ sessionId: this._sessionId,
682
+ cost,
683
+ isError: true,
684
+ failureReason: "cancelled",
685
+ });
686
+ return;
687
+ }
688
+ throw err;
689
+ }
690
+
691
+ const isError = Boolean(terminalError) || !sawTurnCompleted;
692
+ const cost = this.buildCostData(this.lastUsage, isError);
693
+ this.emit({
694
+ type: "result",
695
+ cost,
696
+ isError,
697
+ errorCategory: terminalError ? "turn_failed" : undefined,
698
+ });
699
+ this.settle({
700
+ exitCode: isError ? 1 : 0,
701
+ sessionId: this._sessionId,
702
+ cost,
703
+ isError,
704
+ failureReason: terminalError,
705
+ });
706
+ } catch (err) {
707
+ const message = err instanceof Error ? err.message : String(err);
708
+ this.emit({ type: "raw_stderr", content: `[codex] Error: ${message}\n` });
709
+ this.emit({ type: "error", message });
710
+ const cost = this.buildCostData(this.lastUsage, true);
711
+ this.emit({ type: "result", cost, isError: true, errorCategory: "exception" });
712
+ this.settle({
713
+ exitCode: 1,
714
+ sessionId: this._sessionId,
715
+ cost,
716
+ isError: true,
717
+ failureReason: message,
718
+ });
719
+ } finally {
720
+ // Detach the abort controller now that the turn has settled.
721
+ this.abortRef.current = null;
722
+ try {
723
+ await this.logFileHandle.end();
724
+ } catch {
725
+ // Ignore log writer cleanup failures.
726
+ }
727
+ await this.agentsMdHandle.cleanup();
728
+ }
729
+ }
730
+ }
731
+
732
+ export class CodexAdapter implements ProviderAdapter {
733
+ readonly name = "codex";
734
+
735
+ /**
736
+ * Optional override for the skill resolver's skills directory. When unset,
737
+ * each `CodexSession` falls back to `CODEX_SKILLS_DIR` / `~/.codex/skills`.
738
+ * Primarily a test hook so unit tests can point the adapter at a temp dir
739
+ * without mutating `process.env`.
740
+ */
741
+ private readonly skillsDir?: string;
742
+
743
+ constructor(opts: { skillsDir?: string } = {}) {
744
+ this.skillsDir = opts.skillsDir;
745
+ }
746
+
747
+ async createSession(config: ProviderSessionConfig): Promise<ProviderSession> {
748
+ // Codex ingests per-session instructions via AGENTS.md in the cwd. Write
749
+ // (or refresh) the managed block before we spin up the thread.
750
+ const agentsMdHandle = await writeCodexAgentsMd(config.cwd, config.systemPrompt);
751
+
752
+ try {
753
+ // Resolve the model once and thread it through. Unknown values fall
754
+ // back to `CODEX_DEFAULT_MODEL` (see `codex-models.ts`).
755
+ const resolvedModel = resolveCodexModel(config.model);
756
+
757
+ // Buffer warnings emitted during config-building so they're not lost
758
+ // before `CodexSession.onEvent` attaches a listener. The buffer is
759
+ // replayed into the session's event stream right after construction
760
+ // via the `initialEvents` constructor parameter.
761
+ const preSessionEvents: ProviderEvent[] = [];
762
+ const bufferedEmit = (event: ProviderEvent) => {
763
+ preSessionEvents.push(event);
764
+ };
765
+
766
+ // Warn (as a buffered event) if the caller passed a model that didn't
767
+ // round-trip through `resolveCodexModel`. This catches typos early.
768
+ if (
769
+ config.model &&
770
+ config.model.toLowerCase() !== resolvedModel &&
771
+ !["opus", "sonnet", "haiku"].includes(config.model.toLowerCase())
772
+ ) {
773
+ bufferedEmit({
774
+ type: "raw_stderr",
775
+ content: `[codex] Unknown model "${config.model}" — falling back to ${CODEX_DEFAULT_MODEL}. See src/providers/codex-models.ts for the supported list.\n`,
776
+ });
777
+ }
778
+
779
+ const mergedConfig = await buildCodexConfig(config, resolvedModel, bufferedEmit);
780
+
781
+ // `CodexOptions.env` does NOT inherit from `process.env`. Construct a
782
+ // minimal env explicitly so the spawned Codex CLI can still find its
783
+ // binary (PATH), write to HOME, and authenticate (OPENAI_API_KEY).
784
+ // Merge anything the runner passed in `config.env` on top.
785
+ const env: Record<string, string> = {
786
+ PATH: process.env.PATH ?? "",
787
+ HOME: process.env.HOME ?? "",
788
+ ...(process.env.OPENAI_API_KEY ? { OPENAI_API_KEY: process.env.OPENAI_API_KEY } : {}),
789
+ ...(process.env.NODE_EXTRA_CA_CERTS
790
+ ? { NODE_EXTRA_CA_CERTS: process.env.NODE_EXTRA_CA_CERTS }
791
+ : {}),
792
+ ...(config.env ?? {}),
793
+ };
794
+
795
+ // The SDK's default `findCodexPath()` does `require.resolve("@openai/codex")`
796
+ // from the SDK's own module. When agent-swarm runs as a Bun single-file
797
+ // compiled executable, the bundled SDK can't resolve `@openai/codex` at
798
+ // runtime because it's not part of the bundle — it lives in a global
799
+ // install (`/usr/lib/node_modules/@openai/codex` in the Docker worker
800
+ // image). Honor `CODEX_PATH_OVERRIDE` so Docker can point us at the CLI
801
+ // wrapper (or native binary) directly. Fall back to undefined so local
802
+ // dev with `@openai/codex-sdk` installed as a regular node_modules
803
+ // dependency keeps working via the SDK's own resolver.
804
+ const codexPathOverride = process.env.CODEX_PATH_OVERRIDE;
805
+
806
+ const codex = new Codex({
807
+ ...(codexPathOverride ? { codexPathOverride } : {}),
808
+ env,
809
+ config: mergedConfig,
810
+ });
811
+
812
+ const threadOptions: ThreadOptions = {
813
+ workingDirectory: config.cwd,
814
+ skipGitRepoCheck: true,
815
+ sandboxMode: "danger-full-access",
816
+ approvalPolicy: "never",
817
+ model: resolvedModel,
818
+ };
819
+
820
+ const thread = config.resumeSessionId
821
+ ? codex.resumeThread(config.resumeSessionId, threadOptions)
822
+ : codex.startThread(threadOptions);
823
+
824
+ return new CodexSession(
825
+ thread,
826
+ config,
827
+ agentsMdHandle,
828
+ resolvedModel,
829
+ preSessionEvents,
830
+ this.skillsDir,
831
+ );
832
+ } catch (err) {
833
+ // If we failed to construct the thread, clean up the managed AGENTS.md
834
+ // block so we don't leak state on the filesystem.
835
+ await agentsMdHandle.cleanup();
836
+ throw err;
837
+ }
838
+ }
839
+
840
+ async canResume(sessionId: string): Promise<boolean> {
841
+ if (!sessionId || typeof sessionId !== "string") {
842
+ return false;
843
+ }
844
+ try {
845
+ const codex = new Codex();
846
+ // `resumeThread` is synchronous in 0.118.x and returns a Thread handle.
847
+ // The runner only calls canResume when deciding whether to resume a
848
+ // task, so we accept the (cheap) handshake cost.
849
+ codex.resumeThread(sessionId);
850
+ return true;
851
+ } catch {
852
+ return false;
853
+ }
854
+ }
855
+
856
+ formatCommand(commandName: string): string {
857
+ // Codex has no native slash-command system. Phase 4 adds a skill resolver
858
+ // that inlines the matching SKILL.md content into the turn prompt before
859
+ // it reaches `thread.runStreamed()`. The leading `/<name>` token here is
860
+ // the marker the resolver looks for (mirrors Claude's format).
861
+ return `/${commandName}`;
862
+ }
863
+ }