@desplega.ai/agent-swarm 1.85.0 → 1.86.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/README.md +1 -0
  2. package/openapi.json +1 -1
  3. package/package.json +8 -6
  4. package/src/be/db.ts +44 -0
  5. package/src/be/migrations/078_backfill_gpt_5_5_pricing.sql +15 -0
  6. package/src/be/modelsdev-cache.json +152028 -0
  7. package/src/be/modelsdev-cache.ts +46 -0
  8. package/src/be/seed-pricing.ts +7 -44
  9. package/src/cli.tsx +12 -2
  10. package/src/commands/codex-session-runner.ts +132 -0
  11. package/src/commands/credential-wait.ts +2 -2
  12. package/src/commands/provider-credentials.ts +10 -5
  13. package/src/commands/runner.ts +3 -3
  14. package/src/prompts/base-prompt.ts +49 -3
  15. package/src/providers/claude-adapter.ts +83 -2
  16. package/src/providers/claude-managed-models.ts +18 -2
  17. package/src/providers/codex-adapter.ts +417 -97
  18. package/src/providers/codex-models.ts +9 -2
  19. package/src/providers/index.ts +28 -19
  20. package/src/providers/pricing-sources.md +7 -4
  21. package/src/providers/swarm-events-shared.ts +14 -0
  22. package/src/slack/HEURISTICS.md +5 -1
  23. package/src/slack/handlers.test.ts +35 -0
  24. package/src/slack/handlers.ts +79 -2
  25. package/src/tests/base-prompt.test.ts +46 -8
  26. package/src/tests/claude-managed-adapter.test.ts +4 -4
  27. package/src/tests/codex-adapter-otel.test.ts +4 -4
  28. package/src/tests/codex-adapter.test.ts +20 -7
  29. package/src/tests/codex-swarm-events.test.ts +35 -0
  30. package/src/tests/context-window.test.ts +1 -0
  31. package/src/tests/credential-check.test.ts +48 -29
  32. package/src/tests/entrypoint-config-env-export.test.ts +81 -0
  33. package/src/tests/follow-up-redelivery-guard.test.ts +165 -0
  34. package/src/tests/migration-046-budgets.test.ts +6 -5
  35. package/src/tests/pricing-routes.test.ts +6 -5
  36. package/src/tests/provider-adapter.test.ts +10 -10
  37. package/src/tests/provider-command-format.test.ts +4 -4
  38. package/src/tests/session-costs-codex-recompute.test.ts +25 -0
  39. package/src/tools/send-task.ts +30 -9
  40. package/src/utils/context-window.ts +1 -0
  41. package/templates/schedules/daily-blocker-digest/config.json +13 -0
  42. package/templates/schedules/daily-blocker-digest/content.md +150 -0
  43. package/templates/schedules/daily-compounding-reflection/config.json +21 -0
  44. package/templates/schedules/daily-compounding-reflection/content.md +210 -0
  45. package/templates/schedules/daily-hn-briefing/config.json +13 -0
  46. package/templates/schedules/daily-hn-briefing/content.md +97 -0
  47. package/templates/schedules/daily-workflow-health-audit/config.json +13 -0
  48. package/templates/schedules/daily-workflow-health-audit/content.md +189 -0
  49. package/templates/schedules/gtm-weekly-review/config.json +13 -0
  50. package/templates/schedules/gtm-weekly-review/content.md +58 -0
  51. package/templates/schedules/weekly-dependabot-triage/config.json +13 -0
  52. package/templates/schedules/weekly-dependabot-triage/content.md +45 -0
  53. package/templates/schema.ts +26 -0
  54. package/templates/skills/agentmail-sending/config.json +13 -0
  55. package/templates/skills/agentmail-sending/content.md +48 -0
  56. package/templates/skills/artifacts/config.json +13 -0
  57. package/templates/skills/artifacts/content.md +87 -0
  58. package/templates/skills/browser-use-cloud/config.json +13 -0
  59. package/templates/skills/browser-use-cloud/content.md +155 -0
  60. package/templates/skills/desloppify/config.json +13 -0
  61. package/templates/skills/desloppify/content.md +201 -0
  62. package/templates/skills/exa-search/config.json +13 -0
  63. package/templates/skills/exa-search/content.md +106 -0
  64. package/templates/skills/jira-interaction/config.json +13 -0
  65. package/templates/skills/jira-interaction/content.md +252 -0
  66. package/templates/skills/kapso-whatsapp/config.json +13 -0
  67. package/templates/skills/kapso-whatsapp/content.md +369 -0
  68. package/templates/skills/kv-storage/config.json +13 -0
  69. package/templates/skills/kv-storage/content.md +111 -0
  70. package/templates/skills/linear-interaction/config.json +20 -0
  71. package/templates/skills/linear-interaction/content.md +230 -0
  72. package/templates/skills/pages/config.json +18 -0
  73. package/templates/skills/pages/content.md +85 -0
  74. package/templates/skills/profile-corruption-escalation/config.json +13 -0
  75. package/templates/skills/profile-corruption-escalation/content.md +105 -0
  76. package/templates/skills/scheduled-task-resilience/config.json +13 -0
  77. package/templates/skills/scheduled-task-resilience/content.md +95 -0
  78. package/templates/skills/sprite-cli/config.json +13 -0
  79. package/templates/skills/sprite-cli/content.md +133 -0
  80. package/templates/skills/turso-interaction/config.json +13 -0
  81. package/templates/skills/turso-interaction/content.md +192 -0
  82. package/templates/skills/workflow-iterate/config.json +18 -0
  83. package/templates/skills/workflow-iterate/content.md +399 -0
  84. package/templates/skills/workflow-structured-output/config.json +13 -0
  85. package/templates/skills/workflow-structured-output/content.md +101 -0
  86. package/templates/skills/x-api-interactions/config.json +13 -0
  87. package/templates/skills/x-api-interactions/content.md +109 -0
  88. package/templates/workflows/autopilot/config.json +13 -0
  89. package/templates/workflows/autopilot/content.md +58 -0
  90. package/templates/workflows/linear-drain-loop/config.json +21 -0
  91. package/templates/workflows/linear-drain-loop/content.md +72 -0
  92. package/templates/workflows/ralph-loop/config.json +13 -0
  93. package/templates/workflows/ralph-loop/content.md +75 -0
@@ -66,6 +66,7 @@ import {
66
66
  type WebSearchItem,
67
67
  } from "@openai/codex-sdk";
68
68
  import { buildRatingsFromLlm, fetchRetrievalsForTask, postRatings } from "../be/memory/raters/llm";
69
+ import { getApiKey } from "../utils/api-key";
69
70
  import {
70
71
  CONTEXT_FORMULA,
71
72
  clampContextPercent,
@@ -383,7 +384,7 @@ export interface SummarizeSessionForCodexDeps {
383
384
  }
384
385
 
385
386
  /** Running session backed by a Codex `Thread`. */
386
- class CodexSession implements ProviderSession {
387
+ export class CodexSession implements ProviderSession {
387
388
  private readonly thread: Thread;
388
389
  private readonly config: ProviderSessionConfig;
389
390
  private readonly agentsMdHandle: CodexAgentsMdHandle;
@@ -1036,12 +1037,24 @@ class CodexSession implements ProviderSession {
1036
1037
  // preserve. Wrapped in its own try/catch so summary failure must NOT
1037
1038
  // block the existing log/AGENTS.md cleanup below. Gate `SKIP_SESSION_SUMMARY=1`
1038
1039
  // matches the parity convention used by the claude Stop hook + pi/opencode.
1039
- if (process.env.SKIP_SESSION_SUMMARY !== "1") {
1040
+ //
1041
+ // Skip the summary entirely when the session was aborted. The transcript
1042
+ // is incomplete, the LLM call would retry 3× through openrouter and
1043
+ // spam stderr with structured-output failures (red-herring noise we
1044
+ // saw in the templates-ui incident, 2026-05-28). Losing the summary
1045
+ // on abort is acceptable — it's cleanup, not load-bearing.
1046
+ const sessionWasAborted =
1047
+ this.aborted ||
1048
+ this.abortController?.signal.aborted === true ||
1049
+ this.pendingResult?.exitCode === 130;
1050
+ if (process.env.SKIP_SESSION_SUMMARY !== "1" && !sessionWasAborted) {
1040
1051
  try {
1041
1052
  await this.summarizeAtEnd();
1042
1053
  } catch (err) {
1043
1054
  console.error("session_summary failed (codex):", err);
1044
1055
  }
1056
+ } else if (sessionWasAborted) {
1057
+ console.debug("[codex] session aborted — skipping session_summary");
1045
1058
  }
1046
1059
 
1047
1060
  // Detach the abort controller now that the turn has settled.
@@ -1171,6 +1184,380 @@ class CodexSession implements ProviderSession {
1171
1184
  }
1172
1185
  }
1173
1186
 
1187
+ /**
1188
+ * Build a `CodexSession` running in the *current* process (no subprocess
1189
+ * isolation). Production sessions are now spawned through
1190
+ * `CodexSubprocessSession` to keep the runner's heap bounded across many
1191
+ * task completions (Picateclas spawn-OOM, 2026-05-28). This helper is the
1192
+ * core in-process creation logic — used by:
1193
+ *
1194
+ * 1. `CodexAdapter.createSession` when `bypassSubprocess: true`
1195
+ * (unit tests that monkey-patch the SDK prototype).
1196
+ * 2. `runCodexSessionRunner` (the spawned subprocess entry point in
1197
+ * `src/commands/codex-session-runner.ts`).
1198
+ *
1199
+ * Exported so the subprocess runner — which IS a fresh process — can build
1200
+ * its session via the same path the tests exercise.
1201
+ */
1202
+ export async function createInProcessCodexSession(
1203
+ config: ProviderSessionConfig,
1204
+ opts: { skillsDir?: string; summarizeDeps?: SummarizeSessionForCodexDeps } = {},
1205
+ ): Promise<CodexSession> {
1206
+ // Codex ingests per-session instructions via AGENTS.md in the cwd. Write
1207
+ // (or refresh) the managed block before we spin up the thread.
1208
+ const agentsMdHandle = await writeCodexAgentsMd(config.cwd, config.systemPrompt);
1209
+
1210
+ try {
1211
+ // Resolve the model once and thread it through. Claude shortnames map
1212
+ // to Codex equivalents; everything else passes through verbatim — the
1213
+ // SDK is the source of truth for what's valid.
1214
+ const resolvedModel = resolveCodexModel(config.model);
1215
+
1216
+ // Buffer warnings emitted during config-building so they're not lost
1217
+ // before `CodexSession.onEvent` attaches a listener. The buffer is
1218
+ // replayed into the session's event stream right after construction
1219
+ // via the `initialEvents` constructor parameter.
1220
+ const preSessionEvents: ProviderEvent[] = [];
1221
+ const bufferedEmit = (event: ProviderEvent) => {
1222
+ preSessionEvents.push(event);
1223
+ };
1224
+
1225
+ const mergedConfig = await buildCodexConfig(config, resolvedModel, bufferedEmit);
1226
+
1227
+ // Auth resolution. `codex_oauth` (in the swarm config store) wins over
1228
+ // `OPENAI_API_KEY` so users can keep an OpenAI key set for embeddings
1229
+ // without it shadowing their ChatGPT login. The entrypoint already runs
1230
+ // this same precedence at boot — this block handles local dev (where
1231
+ // the entrypoint didn't run) and any case where auth.json is stale.
1232
+ const authMode = await resolveCodexAuthMode(config, bufferedEmit);
1233
+
1234
+ // `CodexOptions.env` does NOT inherit from `process.env`. Construct a
1235
+ // minimal env explicitly so the spawned Codex CLI can find its binary
1236
+ // (PATH) and HOME (for ~/.codex/auth.json). `OPENAI_API_KEY` is only
1237
+ // forwarded when auth.json is NOT in chatgpt mode — otherwise it would
1238
+ // override the OAuth login at the Codex CLI layer.
1239
+ const env: Record<string, string> = {
1240
+ PATH: process.env.PATH ?? "",
1241
+ HOME: process.env.HOME ?? "",
1242
+ ...(authMode !== "chatgpt" && process.env.OPENAI_API_KEY
1243
+ ? { OPENAI_API_KEY: process.env.OPENAI_API_KEY }
1244
+ : {}),
1245
+ ...(process.env.NODE_EXTRA_CA_CERTS
1246
+ ? { NODE_EXTRA_CA_CERTS: process.env.NODE_EXTRA_CA_CERTS }
1247
+ : {}),
1248
+ ...(config.env ?? {}),
1249
+ // Gated cross-service OTel linking: when SWARM_ENABLE_HARNESS_OTEL (or
1250
+ // the deprecated SWARM_ENABLE_CLAUDE_CODE_OTEL alias) is on, inject
1251
+ // TRACEPARENT from the active worker span so Codex's spans nest under
1252
+ // our worker.session trace. Codex's Rust OTEL SDK reads W3C trace
1253
+ // context from the env via the default tracecontext propagator.
1254
+ // Returns {} (no-op) when off; spread last so the computed value wins.
1255
+ ...buildOtelTraceparentEnv(config.env ?? process.env),
1256
+ };
1257
+
1258
+ // The SDK's default `findCodexPath()` does `require.resolve("@openai/codex")`
1259
+ // from the SDK's own module. When agent-swarm runs as a Bun single-file
1260
+ // compiled executable, the bundled SDK can't resolve `@openai/codex` at
1261
+ // runtime because it's not part of the bundle — it lives in a global
1262
+ // install (`/usr/lib/node_modules/@openai/codex` in the Docker worker
1263
+ // image). Honor `CODEX_PATH_OVERRIDE` so Docker can point us at the CLI
1264
+ // wrapper (or native binary) directly. Fall back to undefined so local
1265
+ // dev with `@openai/codex-sdk` installed as a regular node_modules
1266
+ // dependency keeps working via the SDK's own resolver.
1267
+ const codexPathOverride = process.env.CODEX_PATH_OVERRIDE;
1268
+
1269
+ const codex = new Codex({
1270
+ ...(codexPathOverride ? { codexPathOverride } : {}),
1271
+ env,
1272
+ config: mergedConfig,
1273
+ });
1274
+
1275
+ const threadOptions: ThreadOptions = {
1276
+ workingDirectory: config.cwd,
1277
+ skipGitRepoCheck: true,
1278
+ sandboxMode: "danger-full-access",
1279
+ approvalPolicy: "never",
1280
+ model: resolvedModel,
1281
+ };
1282
+
1283
+ const thread = config.resumeSessionId
1284
+ ? codex.resumeThread(config.resumeSessionId, threadOptions)
1285
+ : codex.startThread(threadOptions);
1286
+
1287
+ return new CodexSession(
1288
+ thread,
1289
+ config,
1290
+ agentsMdHandle,
1291
+ resolvedModel,
1292
+ preSessionEvents,
1293
+ opts.skillsDir,
1294
+ opts.summarizeDeps ?? {},
1295
+ );
1296
+ } catch (err) {
1297
+ // If we failed to construct the thread, clean up the managed AGENTS.md
1298
+ // block so we don't leak state on the filesystem.
1299
+ await agentsMdHandle.cleanup();
1300
+ throw err;
1301
+ }
1302
+ }
1303
+
1304
+ /**
1305
+ * Resolve the argv used to re-launch agent-swarm as a subprocess.
1306
+ *
1307
+ * The codex subprocess runner (`src/commands/codex-session-runner.ts`) is
1308
+ * invoked via the `codex-session-runner` CLI subcommand. Compiled and dev
1309
+ * modes differ in how `process.argv` is laid out:
1310
+ *
1311
+ * - Compiled (`./agent-swarm worker ...`): argv = ["./agent-swarm", "worker", ...]
1312
+ * → re-launch is just [process.execPath, "codex-session-runner"].
1313
+ * - Dev (`bun src/cli.tsx worker ...`): argv = ["bun", ".../cli.tsx", "worker", ...]
1314
+ * → re-launch is [process.execPath, ".../cli.tsx", "codex-session-runner"].
1315
+ *
1316
+ * We pick the dev path when argv[1] looks like a .ts/.tsx/.js/.jsx file (i.e.
1317
+ * a path the runtime is interpreting); otherwise we assume compiled.
1318
+ * `AGENT_SWARM_CODEX_RUNNER_ARGV` lets operators / tests override the prefix
1319
+ * (JSON-encoded string array).
1320
+ *
1321
+ * Exported for unit testing.
1322
+ */
1323
+ export function resolveCodexRunnerArgv(): string[] {
1324
+ const override = process.env.AGENT_SWARM_CODEX_RUNNER_ARGV;
1325
+ if (override) {
1326
+ try {
1327
+ const parsed = JSON.parse(override);
1328
+ if (Array.isArray(parsed) && parsed.every((s) => typeof s === "string")) {
1329
+ return parsed as string[];
1330
+ }
1331
+ } catch {
1332
+ // fall through to inferred resolution
1333
+ }
1334
+ }
1335
+ const execPath = process.execPath;
1336
+ const scriptArg = process.argv[1];
1337
+ if (scriptArg && /\.(t|j)sx?$/.test(scriptArg)) {
1338
+ return [execPath, scriptArg, "codex-session-runner"];
1339
+ }
1340
+ return [execPath, "codex-session-runner"];
1341
+ }
1342
+
1343
+ /** JSON payload passed to the codex subprocess runner via stdin. */
1344
+ interface CodexSubprocessInput {
1345
+ config: ProviderSessionConfig;
1346
+ skillsDir?: string;
1347
+ /**
1348
+ * W3C TRACEPARENT for the parent `worker.session.create` span. Captured in
1349
+ * the parent (where the OTel span context is live) and forwarded so the
1350
+ * subprocess can pass it on to Codex via env. We deliberately do NOT use
1351
+ * `buildOtelTraceparentEnv` inside the subprocess — it would build from a
1352
+ * fresh tracer with no active span. The runner forwards what the parent
1353
+ * captured here back into `config.env` before constructing the SDK.
1354
+ */
1355
+ parentOtelEnv?: Record<string, string>;
1356
+ }
1357
+
1358
+ /**
1359
+ * `ProviderSession` that runs the entire codex session inside a fresh
1360
+ * subprocess. This is the Picateclas spawn-OOM permanent fix — every codex
1361
+ * session's heap (SDK state, transcript buffer, JSON-RPC parser, listeners)
1362
+ * dies with the subprocess. The runner's own VSZ stays bounded across
1363
+ * thousands of task completions.
1364
+ *
1365
+ * Wire protocol over stdout (line-delimited JSON):
1366
+ * {"kind":"event", "event": <ProviderEvent>}
1367
+ * {"kind":"result", "result": <ProviderResult>}
1368
+ *
1369
+ * stderr is forwarded verbatim into the runner's stdout (for prod logs).
1370
+ */
1371
+ class CodexSubprocessSession implements ProviderSession {
1372
+ private readonly proc: ReturnType<typeof Bun.spawn>;
1373
+ private readonly listeners: Array<(event: ProviderEvent) => void> = [];
1374
+ private readonly eventQueue: ProviderEvent[] = [];
1375
+ private readonly completionPromise: Promise<ProviderResult>;
1376
+ private _sessionId: string | undefined;
1377
+
1378
+ constructor(config: ProviderSessionConfig, skillsDir: string | undefined) {
1379
+ const argv = resolveCodexRunnerArgv();
1380
+ const payload: CodexSubprocessInput = {
1381
+ config,
1382
+ skillsDir,
1383
+ // Capture the parent's OTel TRACEPARENT here, in the span context the
1384
+ // runner established. The subprocess can't reconstruct it on its own
1385
+ // since its OTel tracer doesn't share the parent's active-span state.
1386
+ parentOtelEnv: buildOtelTraceparentEnv(config.env ?? process.env),
1387
+ };
1388
+
1389
+ const apiKey = getApiKey();
1390
+
1391
+ this.proc = Bun.spawn(argv, {
1392
+ // Minimal env: forward what the subprocess needs to talk to the API,
1393
+ // load the codex CLI binary, and read OAuth tokens. config.env (which
1394
+ // already includes the swarm-config overlay) is delivered via stdin
1395
+ // — NOT here — so we don't repeat the same string in two places.
1396
+ env: {
1397
+ PATH: process.env.PATH ?? "",
1398
+ HOME: process.env.HOME ?? "",
1399
+ ...(process.env.NODE_EXTRA_CA_CERTS
1400
+ ? { NODE_EXTRA_CA_CERTS: process.env.NODE_EXTRA_CA_CERTS }
1401
+ : {}),
1402
+ ...(process.env.MCP_BASE_URL ? { MCP_BASE_URL: process.env.MCP_BASE_URL } : {}),
1403
+ ...(apiKey ? { AGENT_SWARM_API_KEY: apiKey, API_KEY: apiKey } : {}),
1404
+ // Embedding / summarization paths read these:
1405
+ ...(process.env.OPENAI_API_KEY ? { OPENAI_API_KEY: process.env.OPENAI_API_KEY } : {}),
1406
+ ...(process.env.OPENROUTER_API_KEY
1407
+ ? { OPENROUTER_API_KEY: process.env.OPENROUTER_API_KEY }
1408
+ : {}),
1409
+ ...(process.env.ANTHROPIC_API_KEY
1410
+ ? { ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY }
1411
+ : {}),
1412
+ ...(process.env.CODEX_PATH_OVERRIDE
1413
+ ? { CODEX_PATH_OVERRIDE: process.env.CODEX_PATH_OVERRIDE }
1414
+ : {}),
1415
+ ...(process.env.CODEX_SKILLS_DIR ? { CODEX_SKILLS_DIR: process.env.CODEX_SKILLS_DIR } : {}),
1416
+ ...(process.env.SKIP_SESSION_SUMMARY
1417
+ ? { SKIP_SESSION_SUMMARY: process.env.SKIP_SESSION_SUMMARY }
1418
+ : {}),
1419
+ ...(process.env.MEMORY_RATERS ? { MEMORY_RATERS: process.env.MEMORY_RATERS } : {}),
1420
+ },
1421
+ stdin: "pipe",
1422
+ stdout: "pipe",
1423
+ stderr: "pipe",
1424
+ });
1425
+
1426
+ // `Bun.spawn`'s `stdin` is typed as `number | FileSink`; with `stdin:
1427
+ // "pipe"` it is always a FileSink. Narrow via assertion.
1428
+ const stdin = this.proc.stdin as { write(s: string): void; end(): void };
1429
+ stdin.write(JSON.stringify(payload));
1430
+ stdin.end();
1431
+
1432
+ this.completionPromise = this.processStreams();
1433
+ }
1434
+
1435
+ get sessionId(): string | undefined {
1436
+ return this._sessionId;
1437
+ }
1438
+
1439
+ onEvent(listener: (event: ProviderEvent) => void): void {
1440
+ this.listeners.push(listener);
1441
+ for (const event of this.eventQueue) {
1442
+ listener(event);
1443
+ }
1444
+ this.eventQueue.length = 0;
1445
+ }
1446
+
1447
+ async waitForCompletion(): Promise<ProviderResult> {
1448
+ return this.completionPromise;
1449
+ }
1450
+
1451
+ async abort(): Promise<void> {
1452
+ this.proc.kill("SIGTERM");
1453
+ }
1454
+
1455
+ private emit(event: ProviderEvent): void {
1456
+ if (event.type === "session_init" && event.sessionId) {
1457
+ this._sessionId = event.sessionId;
1458
+ }
1459
+ if (this.listeners.length > 0) {
1460
+ for (const listener of this.listeners) {
1461
+ try {
1462
+ listener(event);
1463
+ } catch {
1464
+ // listener errors must not break the event stream
1465
+ }
1466
+ }
1467
+ } else {
1468
+ this.eventQueue.push(event);
1469
+ }
1470
+ }
1471
+
1472
+ private async processStreams(): Promise<ProviderResult> {
1473
+ let result: ProviderResult | null = null;
1474
+ let partial = "";
1475
+ let stderrTail = "";
1476
+
1477
+ const stdoutPromise = (async () => {
1478
+ const stdout = this.proc.stdout as ReadableStream<Uint8Array> | null;
1479
+ if (!stdout) return;
1480
+ for await (const chunk of stdout) {
1481
+ partial += new TextDecoder().decode(chunk);
1482
+ const parts = partial.split("\n");
1483
+ partial = parts.pop() ?? "";
1484
+ for (const line of parts) {
1485
+ const trimmed = line.trim();
1486
+ if (!trimmed) continue;
1487
+ this.handleLine(trimmed, (r) => {
1488
+ result = r;
1489
+ });
1490
+ }
1491
+ }
1492
+ if (partial.trim()) {
1493
+ this.handleLine(partial.trim(), (r) => {
1494
+ result = r;
1495
+ });
1496
+ partial = "";
1497
+ }
1498
+ })();
1499
+
1500
+ const stderrPromise = (async () => {
1501
+ const stderr = this.proc.stderr as ReadableStream<Uint8Array> | null;
1502
+ if (!stderr) return;
1503
+ for await (const chunk of stderr) {
1504
+ const text = new TextDecoder().decode(chunk);
1505
+ stderrTail = (stderrTail + text).slice(-2000);
1506
+ // Surface subprocess stderr (codex CLI warnings, auth.json
1507
+ // restoration messages) into the parent's event stream so it lands
1508
+ // in /workspace/logs/*.jsonl the way the in-process path did.
1509
+ this.emit({ type: "raw_stderr", content: text });
1510
+ }
1511
+ })();
1512
+
1513
+ await Promise.all([stdoutPromise, stderrPromise]);
1514
+ const exitCode = await this.proc.exited;
1515
+
1516
+ if (result) {
1517
+ return result;
1518
+ }
1519
+ // Subprocess exited before sending a structured result — synthesise one
1520
+ // so the runner doesn't hang on waitForCompletion. Include stderr tail
1521
+ // so the actual error message reaches the task failure reason.
1522
+ const stderrHint = stderrTail.trim() ? ` — stderr: ${stderrTail.trim().slice(-500)}` : "";
1523
+ return {
1524
+ exitCode: exitCode ?? 1,
1525
+ sessionId: this._sessionId,
1526
+ isError: true,
1527
+ failureReason: `codex subprocess exited (code=${exitCode ?? "?"}) without a structured result${stderrHint}`,
1528
+ };
1529
+ }
1530
+
1531
+ private handleLine(line: string, setResult: (r: ProviderResult) => void): void {
1532
+ let msg: { kind?: string; event?: ProviderEvent; result?: ProviderResult; message?: string };
1533
+ try {
1534
+ msg = JSON.parse(line);
1535
+ } catch {
1536
+ // Not a valid JSON envelope — treat as raw stderr-equivalent.
1537
+ this.emit({ type: "raw_stderr", content: `${line}\n` });
1538
+ return;
1539
+ }
1540
+ if (msg.kind === "event" && msg.event) {
1541
+ this.emit(msg.event);
1542
+ return;
1543
+ }
1544
+ if (msg.kind === "result" && msg.result) {
1545
+ setResult(msg.result);
1546
+ return;
1547
+ }
1548
+ if (msg.kind === "error" && msg.message) {
1549
+ this.emit({ type: "error", message: msg.message });
1550
+ setResult({
1551
+ exitCode: 1,
1552
+ sessionId: this._sessionId,
1553
+ isError: true,
1554
+ failureReason: msg.message,
1555
+ });
1556
+ return;
1557
+ }
1558
+ }
1559
+ }
1560
+
1174
1561
  export class CodexAdapter implements ProviderAdapter {
1175
1562
  readonly name = "codex";
1176
1563
  readonly traits = { hasMcp: true, hasLocalEnvironment: true };
@@ -1191,108 +1578,41 @@ export class CodexAdapter implements ProviderAdapter {
1191
1578
  */
1192
1579
  private readonly summarizeDeps: SummarizeSessionForCodexDeps;
1193
1580
 
1194
- constructor(opts: { skillsDir?: string; summarizeDeps?: SummarizeSessionForCodexDeps } = {}) {
1581
+ /**
1582
+ * When true, run the codex session inside the runner process (no subprocess
1583
+ * spawn). Used by:
1584
+ * - Unit tests that monkey-patch `Codex.prototype.startThread` (the patch
1585
+ * would not survive a subprocess boundary).
1586
+ * - The spawned `codex-session-runner` subprocess itself, to avoid
1587
+ * re-spawning recursively.
1588
+ *
1589
+ * Production callers leave this `false`. Each codex session then runs in a
1590
+ * fresh subprocess and its heap dies when the task completes — keeping the
1591
+ * runner's VSZ bounded across thousands of task completions (Picateclas
1592
+ * spawn-OOM permanent fix, 2026-05-28).
1593
+ */
1594
+ private readonly bypassSubprocess: boolean;
1595
+
1596
+ constructor(
1597
+ opts: {
1598
+ skillsDir?: string;
1599
+ summarizeDeps?: SummarizeSessionForCodexDeps;
1600
+ bypassSubprocess?: boolean;
1601
+ } = {},
1602
+ ) {
1195
1603
  this.skillsDir = opts.skillsDir;
1196
1604
  this.summarizeDeps = opts.summarizeDeps ?? {};
1605
+ this.bypassSubprocess = opts.bypassSubprocess ?? false;
1197
1606
  }
1198
1607
 
1199
1608
  async createSession(config: ProviderSessionConfig): Promise<ProviderSession> {
1200
- // Codex ingests per-session instructions via AGENTS.md in the cwd. Write
1201
- // (or refresh) the managed block before we spin up the thread.
1202
- const agentsMdHandle = await writeCodexAgentsMd(config.cwd, config.systemPrompt);
1203
-
1204
- try {
1205
- // Resolve the model once and thread it through. Claude shortnames map
1206
- // to Codex equivalents; everything else passes through verbatim — the
1207
- // SDK is the source of truth for what's valid.
1208
- const resolvedModel = resolveCodexModel(config.model);
1209
-
1210
- // Buffer warnings emitted during config-building so they're not lost
1211
- // before `CodexSession.onEvent` attaches a listener. The buffer is
1212
- // replayed into the session's event stream right after construction
1213
- // via the `initialEvents` constructor parameter.
1214
- const preSessionEvents: ProviderEvent[] = [];
1215
- const bufferedEmit = (event: ProviderEvent) => {
1216
- preSessionEvents.push(event);
1217
- };
1218
-
1219
- const mergedConfig = await buildCodexConfig(config, resolvedModel, bufferedEmit);
1220
-
1221
- // Auth resolution. `codex_oauth` (in the swarm config store) wins over
1222
- // `OPENAI_API_KEY` so users can keep an OpenAI key set for embeddings
1223
- // without it shadowing their ChatGPT login. The entrypoint already runs
1224
- // this same precedence at boot — this block handles local dev (where
1225
- // the entrypoint didn't run) and any case where auth.json is stale.
1226
- const authMode = await resolveCodexAuthMode(config, bufferedEmit);
1227
-
1228
- // `CodexOptions.env` does NOT inherit from `process.env`. Construct a
1229
- // minimal env explicitly so the spawned Codex CLI can find its binary
1230
- // (PATH) and HOME (for ~/.codex/auth.json). `OPENAI_API_KEY` is only
1231
- // forwarded when auth.json is NOT in chatgpt mode — otherwise it would
1232
- // override the OAuth login at the Codex CLI layer.
1233
- const env: Record<string, string> = {
1234
- PATH: process.env.PATH ?? "",
1235
- HOME: process.env.HOME ?? "",
1236
- ...(authMode !== "chatgpt" && process.env.OPENAI_API_KEY
1237
- ? { OPENAI_API_KEY: process.env.OPENAI_API_KEY }
1238
- : {}),
1239
- ...(process.env.NODE_EXTRA_CA_CERTS
1240
- ? { NODE_EXTRA_CA_CERTS: process.env.NODE_EXTRA_CA_CERTS }
1241
- : {}),
1242
- ...(config.env ?? {}),
1243
- // Gated cross-service OTel linking: when SWARM_ENABLE_HARNESS_OTEL (or
1244
- // the deprecated SWARM_ENABLE_CLAUDE_CODE_OTEL alias) is on, inject
1245
- // TRACEPARENT from the active worker span so Codex's spans nest under
1246
- // our worker.session trace. Codex's Rust OTEL SDK reads W3C trace
1247
- // context from the env via the default tracecontext propagator.
1248
- // Returns {} (no-op) when off; spread last so the computed value wins.
1249
- ...buildOtelTraceparentEnv(config.env ?? process.env),
1250
- };
1251
-
1252
- // The SDK's default `findCodexPath()` does `require.resolve("@openai/codex")`
1253
- // from the SDK's own module. When agent-swarm runs as a Bun single-file
1254
- // compiled executable, the bundled SDK can't resolve `@openai/codex` at
1255
- // runtime because it's not part of the bundle — it lives in a global
1256
- // install (`/usr/lib/node_modules/@openai/codex` in the Docker worker
1257
- // image). Honor `CODEX_PATH_OVERRIDE` so Docker can point us at the CLI
1258
- // wrapper (or native binary) directly. Fall back to undefined so local
1259
- // dev with `@openai/codex-sdk` installed as a regular node_modules
1260
- // dependency keeps working via the SDK's own resolver.
1261
- const codexPathOverride = process.env.CODEX_PATH_OVERRIDE;
1262
-
1263
- const codex = new Codex({
1264
- ...(codexPathOverride ? { codexPathOverride } : {}),
1265
- env,
1266
- config: mergedConfig,
1609
+ if (this.bypassSubprocess) {
1610
+ return createInProcessCodexSession(config, {
1611
+ skillsDir: this.skillsDir,
1612
+ summarizeDeps: this.summarizeDeps,
1267
1613
  });
1268
-
1269
- const threadOptions: ThreadOptions = {
1270
- workingDirectory: config.cwd,
1271
- skipGitRepoCheck: true,
1272
- sandboxMode: "danger-full-access",
1273
- approvalPolicy: "never",
1274
- model: resolvedModel,
1275
- };
1276
-
1277
- const thread = config.resumeSessionId
1278
- ? codex.resumeThread(config.resumeSessionId, threadOptions)
1279
- : codex.startThread(threadOptions);
1280
-
1281
- return new CodexSession(
1282
- thread,
1283
- config,
1284
- agentsMdHandle,
1285
- resolvedModel,
1286
- preSessionEvents,
1287
- this.skillsDir,
1288
- this.summarizeDeps,
1289
- );
1290
- } catch (err) {
1291
- // If we failed to construct the thread, clean up the managed AGENTS.md
1292
- // block so we don't leak state on the filesystem.
1293
- await agentsMdHandle.cleanup();
1294
- throw err;
1295
1614
  }
1615
+ return new CodexSubprocessSession(config, this.skillsDir);
1296
1616
  }
1297
1617
 
1298
1618
  async canResume(sessionId: string): Promise<boolean> {
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * Codex API-addressable models, verified from https://developers.openai.com/codex/models
3
- * and https://developers.openai.com/api/docs/deprecations as of 2026-04-09.
3
+ * and https://developers.openai.com/api/docs/deprecations as of 2026-05-28.
4
4
  *
5
5
  * NOTE: `gpt-5.3-codex-spark` is intentionally excluded. It is a ChatGPT Pro
6
6
  * research preview and is NOT API-addressable via the Codex SDK at launch.
@@ -18,6 +18,7 @@
18
18
  * SDK, so new OpenAI models work without a code change.
19
19
  */
20
20
  export const CODEX_MODELS = [
21
+ "gpt-5.5", // newest frontier coding/professional-work model, 1.05M context
21
22
  "gpt-5.4", // default — mainline reasoning model w/ frontier coding
22
23
  "gpt-5.4-mini", // faster/cheaper
23
24
  "gpt-5.3-codex", // coding-specialized, 1M context
@@ -63,6 +64,7 @@ export function resolveCodexModel(modelStr: string | undefined): string {
63
64
  * Update this map whenever a model's context window changes.
64
65
  */
65
66
  export const CODEX_MODEL_CONTEXT_WINDOWS: Record<CodexModel, number> = {
67
+ "gpt-5.5": 1_050_000,
66
68
  "gpt-5.4": 200_000,
67
69
  "gpt-5.4-mini": 200_000,
68
70
  "gpt-5.3-codex": 1_000_000, // 1M context per plan Key Discoveries
@@ -80,7 +82,7 @@ export function getCodexContextWindow(model: string): number {
80
82
 
81
83
  /**
82
84
  * Per-model pricing in USD per million tokens, sourced from
83
- * https://developers.openai.com/api/docs/pricing on 2026-04-09 (Standard tier,
85
+ * https://developers.openai.com/api/docs/pricing on 2026-05-28 (Standard tier,
84
86
  * short-context column — long-context multipliers and Batch / Flex / Priority
85
87
  * tiers exist but the Codex SDK does not expose which tier was used so we
86
88
  * default to the headline rate).
@@ -103,6 +105,11 @@ export interface CodexModelPricing {
103
105
  }
104
106
 
105
107
  export const CODEX_MODEL_PRICING: Record<CodexModel, CodexModelPricing> = {
108
+ "gpt-5.5": {
109
+ inputPerMillion: 5.0,
110
+ cachedInputPerMillion: 0.5,
111
+ outputPerMillion: 30.0,
112
+ },
106
113
  "gpt-5.4": {
107
114
  inputPerMillion: 2.5,
108
115
  cachedInputPerMillion: 0.25,
@@ -1,8 +1,3 @@
1
- export {
2
- checkProviderCredentials,
3
- REQUIRED_CRED_VARS_BY_PROVIDER,
4
- type SupportedProvider,
5
- } from "../commands/provider-credentials";
6
1
  export type {
7
2
  CostData,
8
3
  CredCheckOptions,
@@ -15,29 +10,43 @@ export type {
15
10
  ProviderTraits,
16
11
  } from "./types";
17
12
 
18
- import { ClaudeAdapter } from "./claude-adapter";
19
- import { ClaudeManagedAdapter } from "./claude-managed-adapter";
20
- import { CodexAdapter } from "./codex-adapter";
21
- import { DevinAdapter } from "./devin-adapter";
22
- import { OpencodeAdapter } from "./opencode-adapter";
23
- import { PiMonoAdapter } from "./pi-mono-adapter";
24
13
  import type { ProviderAdapter } from "./types";
25
14
 
26
- /** Create a provider adapter for the given harness provider name. */
27
- export function createProviderAdapter(provider: string): ProviderAdapter {
15
+ /**
16
+ * Create a provider adapter for the given harness provider name.
17
+ *
18
+ * Adapter modules are loaded via dynamic `import()` so their transitive
19
+ * dependencies (e.g. `@earendil-works/pi-coding-agent` for the pi adapter)
20
+ * are NOT evaluated at binary startup. This prevents module-level side
21
+ * effects in third-party SDKs from crashing subcommands that don't need
22
+ * them (the codex-session-runner ENOENT at `/usr/local/bin/package.json`).
23
+ */
24
+ export async function createProviderAdapter(provider: string): Promise<ProviderAdapter> {
28
25
  switch (provider) {
29
- case "claude":
26
+ case "claude": {
27
+ const { ClaudeAdapter } = await import("./claude-adapter");
30
28
  return new ClaudeAdapter();
31
- case "pi":
29
+ }
30
+ case "pi": {
31
+ const { PiMonoAdapter } = await import("./pi-mono-adapter");
32
32
  return new PiMonoAdapter();
33
- case "codex":
33
+ }
34
+ case "codex": {
35
+ const { CodexAdapter } = await import("./codex-adapter");
34
36
  return new CodexAdapter();
35
- case "claude-managed":
37
+ }
38
+ case "claude-managed": {
39
+ const { ClaudeManagedAdapter } = await import("./claude-managed-adapter");
36
40
  return new ClaudeManagedAdapter();
37
- case "devin":
41
+ }
42
+ case "devin": {
43
+ const { DevinAdapter } = await import("./devin-adapter");
38
44
  return new DevinAdapter();
39
- case "opencode":
45
+ }
46
+ case "opencode": {
47
+ const { OpencodeAdapter } = await import("./opencode-adapter");
40
48
  return new OpencodeAdapter();
49
+ }
41
50
  default:
42
51
  throw new Error(
43
52
  `Unknown HARNESS_PROVIDER: "${provider}". Supported: claude, pi, codex, devin, claude-managed, opencode`,