@desplega.ai/agent-swarm 1.85.0 → 1.87.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/README.md +1 -0
  2. package/openapi.json +72 -1
  3. package/package.json +10 -6
  4. package/src/be/db-queries/tracker.ts +21 -0
  5. package/src/be/db.ts +279 -14
  6. package/src/be/migrations/078_backfill_gpt_5_5_pricing.sql +15 -0
  7. package/src/be/migrations/079_task_followup_config.sql +1 -0
  8. package/src/be/modelsdev-cache.json +155618 -0
  9. package/src/be/modelsdev-cache.ts +46 -0
  10. package/src/be/seed-pricing.ts +7 -44
  11. package/src/cli.tsx +38 -2
  12. package/src/commands/codex-session-runner.ts +132 -0
  13. package/src/commands/context-preamble.ts +272 -0
  14. package/src/commands/credential-wait.ts +2 -2
  15. package/src/commands/e2b.ts +728 -0
  16. package/src/commands/provider-credentials.ts +10 -5
  17. package/src/commands/resume-session.ts +35 -78
  18. package/src/commands/runner.ts +128 -16
  19. package/src/e2b/dispatch.ts +429 -0
  20. package/src/e2b/env.ts +206 -0
  21. package/src/heartbeat/heartbeat.ts +145 -30
  22. package/src/heartbeat/templates.ts +11 -7
  23. package/src/http/session-data.ts +8 -1
  24. package/src/http/tasks.ts +152 -3
  25. package/src/jira/sync.ts +4 -4
  26. package/src/linear/sync.ts +6 -5
  27. package/src/prompts/base-prompt.ts +49 -3
  28. package/src/providers/claude-adapter.ts +76 -61
  29. package/src/providers/claude-managed-adapter.ts +61 -75
  30. package/src/providers/claude-managed-models.ts +18 -2
  31. package/src/providers/codex-adapter.ts +429 -112
  32. package/src/providers/codex-models.ts +9 -2
  33. package/src/providers/codex-oauth/auth-json.ts +18 -1
  34. package/src/providers/codex-oauth/flow.ts +24 -1
  35. package/src/providers/index.ts +28 -19
  36. package/src/providers/pricing-sources.md +7 -4
  37. package/src/providers/swarm-events-shared.ts +14 -0
  38. package/src/providers/types.ts +6 -0
  39. package/src/slack/HEURISTICS.md +5 -1
  40. package/src/slack/handlers.test.ts +35 -0
  41. package/src/slack/handlers.ts +79 -2
  42. package/src/tasks/worker-follow-up.ts +162 -2
  43. package/src/telemetry.ts +11 -1
  44. package/src/tests/base-prompt.test.ts +46 -8
  45. package/src/tests/claude-adapter.test.ts +5 -27
  46. package/src/tests/claude-managed-adapter.test.ts +42 -56
  47. package/src/tests/codex-adapter-otel.test.ts +4 -4
  48. package/src/tests/codex-adapter.test.ts +25 -37
  49. package/src/tests/codex-oauth.test.ts +149 -3
  50. package/src/tests/codex-pool.test.ts +14 -3
  51. package/src/tests/codex-swarm-events.test.ts +35 -0
  52. package/src/tests/context-window.test.ts +1 -0
  53. package/src/tests/credential-check.test.ts +48 -29
  54. package/src/tests/e2b-dispatch.test.ts +330 -0
  55. package/src/tests/entrypoint-config-env-export.test.ts +81 -0
  56. package/src/tests/follow-up-redelivery-guard.test.ts +165 -0
  57. package/src/tests/heartbeat-supersede-resume.test.ts +285 -0
  58. package/src/tests/heartbeat.test.ts +26 -16
  59. package/src/tests/migration-046-budgets.test.ts +6 -5
  60. package/src/tests/pricing-routes.test.ts +6 -5
  61. package/src/tests/prompt-template-remaining.test.ts +4 -0
  62. package/src/tests/provider-adapter.test.ts +10 -10
  63. package/src/tests/provider-command-format.test.ts +4 -4
  64. package/src/tests/resume-session.test.ts +42 -50
  65. package/src/tests/session-costs-codex-recompute.test.ts +25 -0
  66. package/src/tests/structured-output.test.ts +69 -0
  67. package/src/tests/task-completion-idempotency.test.ts +185 -2
  68. package/src/tests/task-supersede-resume.test.ts +722 -0
  69. package/src/tests/telemetry-init.test.ts +69 -0
  70. package/src/tests/vcs-tracking.test.ts +39 -0
  71. package/src/tools/send-task.ts +42 -10
  72. package/src/tools/store-progress.ts +2 -2
  73. package/src/tools/templates.ts +14 -2
  74. package/src/types.ts +46 -1
  75. package/src/utils/context-window.ts +1 -0
  76. package/src/workflows/executors/agent-task.ts +3 -0
  77. package/templates/schedules/daily-blocker-digest/config.json +13 -0
  78. package/templates/schedules/daily-blocker-digest/content.md +150 -0
  79. package/templates/schedules/daily-compounding-reflection/config.json +21 -0
  80. package/templates/schedules/daily-compounding-reflection/content.md +210 -0
  81. package/templates/schedules/daily-hn-briefing/config.json +13 -0
  82. package/templates/schedules/daily-hn-briefing/content.md +97 -0
  83. package/templates/schedules/daily-workflow-health-audit/config.json +13 -0
  84. package/templates/schedules/daily-workflow-health-audit/content.md +189 -0
  85. package/templates/schedules/gtm-weekly-review/config.json +13 -0
  86. package/templates/schedules/gtm-weekly-review/content.md +58 -0
  87. package/templates/schedules/weekly-dependabot-triage/config.json +13 -0
  88. package/templates/schedules/weekly-dependabot-triage/content.md +45 -0
  89. package/templates/schema.ts +26 -0
  90. package/templates/skills/agentmail-sending/config.json +13 -0
  91. package/templates/skills/agentmail-sending/content.md +48 -0
  92. package/templates/skills/artifacts/config.json +13 -0
  93. package/templates/skills/artifacts/content.md +87 -0
  94. package/templates/skills/browser-use-cloud/config.json +13 -0
  95. package/templates/skills/browser-use-cloud/content.md +155 -0
  96. package/templates/skills/desloppify/config.json +13 -0
  97. package/templates/skills/desloppify/content.md +201 -0
  98. package/templates/skills/exa-search/config.json +13 -0
  99. package/templates/skills/exa-search/content.md +106 -0
  100. package/templates/skills/jira-interaction/config.json +13 -0
  101. package/templates/skills/jira-interaction/content.md +252 -0
  102. package/templates/skills/kapso-whatsapp/config.json +13 -0
  103. package/templates/skills/kapso-whatsapp/content.md +369 -0
  104. package/templates/skills/kv-storage/config.json +13 -0
  105. package/templates/skills/kv-storage/content.md +111 -0
  106. package/templates/skills/linear-interaction/config.json +20 -0
  107. package/templates/skills/linear-interaction/content.md +230 -0
  108. package/templates/skills/pages/config.json +18 -0
  109. package/templates/skills/pages/content.md +85 -0
  110. package/templates/skills/profile-corruption-escalation/config.json +13 -0
  111. package/templates/skills/profile-corruption-escalation/content.md +105 -0
  112. package/templates/skills/scheduled-task-resilience/config.json +13 -0
  113. package/templates/skills/scheduled-task-resilience/content.md +95 -0
  114. package/templates/skills/sprite-cli/config.json +13 -0
  115. package/templates/skills/sprite-cli/content.md +133 -0
  116. package/templates/skills/turso-interaction/config.json +13 -0
  117. package/templates/skills/turso-interaction/content.md +192 -0
  118. package/templates/skills/workflow-iterate/config.json +18 -0
  119. package/templates/skills/workflow-iterate/content.md +399 -0
  120. package/templates/skills/workflow-structured-output/config.json +13 -0
  121. package/templates/skills/workflow-structured-output/content.md +101 -0
  122. package/templates/skills/x-api-interactions/config.json +13 -0
  123. package/templates/skills/x-api-interactions/content.md +109 -0
  124. package/templates/workflows/autopilot/config.json +13 -0
  125. package/templates/workflows/autopilot/content.md +58 -0
  126. package/templates/workflows/linear-drain-loop/config.json +21 -0
  127. package/templates/workflows/linear-drain-loop/content.md +72 -0
  128. package/templates/workflows/ralph-loop/config.json +13 -0
  129. package/templates/workflows/ralph-loop/content.md +75 -0
@@ -6,7 +6,8 @@
6
6
  *
7
7
  * Phase 1 — factory wiring + skeleton classes.
8
8
  * Phase 2 — event stream normalization, CostData, AbortController, log file,
9
- * AGENTS.md system-prompt injection, canResume via resumeThread.
9
+ * AGENTS.md system-prompt injection. (Native resume was removed in
10
+ * the 2026-05-28 deprecate-native-resume plan — see context-preamble.ts.)
10
11
  * Phase 3 — per-session MCP config builder + model catalogue wiring. The
11
12
  * baseline Codex config (`~/.codex/config.toml`) is written at
12
13
  * Docker image build time (deferred to Phase 6). For local dev
@@ -66,6 +67,7 @@ import {
66
67
  type WebSearchItem,
67
68
  } from "@openai/codex-sdk";
68
69
  import { buildRatingsFromLlm, fetchRetrievalsForTask, postRatings } from "../be/memory/raters/llm";
70
+ import { getApiKey } from "../utils/api-key";
69
71
  import {
70
72
  CONTEXT_FORMULA,
71
73
  clampContextPercent,
@@ -383,7 +385,7 @@ export interface SummarizeSessionForCodexDeps {
383
385
  }
384
386
 
385
387
  /** Running session backed by a Codex `Thread`. */
386
- class CodexSession implements ProviderSession {
388
+ export class CodexSession implements ProviderSession {
387
389
  private readonly thread: Thread;
388
390
  private readonly config: ProviderSessionConfig;
389
391
  private readonly agentsMdHandle: CodexAgentsMdHandle;
@@ -1036,12 +1038,24 @@ class CodexSession implements ProviderSession {
1036
1038
  // preserve. Wrapped in its own try/catch so summary failure must NOT
1037
1039
  // block the existing log/AGENTS.md cleanup below. Gate `SKIP_SESSION_SUMMARY=1`
1038
1040
  // matches the parity convention used by the claude Stop hook + pi/opencode.
1039
- if (process.env.SKIP_SESSION_SUMMARY !== "1") {
1041
+ //
1042
+ // Skip the summary entirely when the session was aborted. The transcript
1043
+ // is incomplete, the LLM call would retry 3× through openrouter and
1044
+ // spam stderr with structured-output failures (red-herring noise we
1045
+ // saw in the templates-ui incident, 2026-05-28). Losing the summary
1046
+ // on abort is acceptable — it's cleanup, not load-bearing.
1047
+ const sessionWasAborted =
1048
+ this.aborted ||
1049
+ this.abortController?.signal.aborted === true ||
1050
+ this.pendingResult?.exitCode === 130;
1051
+ if (process.env.SKIP_SESSION_SUMMARY !== "1" && !sessionWasAborted) {
1040
1052
  try {
1041
1053
  await this.summarizeAtEnd();
1042
1054
  } catch (err) {
1043
1055
  console.error("session_summary failed (codex):", err);
1044
1056
  }
1057
+ } else if (sessionWasAborted) {
1058
+ console.debug("[codex] session aborted — skipping session_summary");
1045
1059
  }
1046
1060
 
1047
1061
  // Detach the abort controller now that the turn has settled.
@@ -1171,6 +1185,386 @@ class CodexSession implements ProviderSession {
1171
1185
  }
1172
1186
  }
1173
1187
 
1188
+ /**
1189
+ * Build a `CodexSession` running in the *current* process (no subprocess
1190
+ * isolation). Production sessions are now spawned through
1191
+ * `CodexSubprocessSession` to keep the runner's heap bounded across many
1192
+ * task completions (Picateclas spawn-OOM, 2026-05-28). This helper is the
1193
+ * core in-process creation logic — used by:
1194
+ *
1195
+ * 1. `CodexAdapter.createSession` when `bypassSubprocess: true`
1196
+ * (unit tests that monkey-patch the SDK prototype).
1197
+ * 2. `runCodexSessionRunner` (the spawned subprocess entry point in
1198
+ * `src/commands/codex-session-runner.ts`).
1199
+ *
1200
+ * Exported so the subprocess runner — which IS a fresh process — can build
1201
+ * its session via the same path the tests exercise.
1202
+ */
1203
+ export async function createInProcessCodexSession(
1204
+ config: ProviderSessionConfig,
1205
+ opts: { skillsDir?: string; summarizeDeps?: SummarizeSessionForCodexDeps } = {},
1206
+ ): Promise<CodexSession> {
1207
+ // Codex ingests per-session instructions via AGENTS.md in the cwd. Write
1208
+ // (or refresh) the managed block before we spin up the thread.
1209
+ const agentsMdHandle = await writeCodexAgentsMd(config.cwd, config.systemPrompt);
1210
+
1211
+ try {
1212
+ // Resolve the model once and thread it through. Claude shortnames map
1213
+ // to Codex equivalents; everything else passes through verbatim — the
1214
+ // SDK is the source of truth for what's valid.
1215
+ const resolvedModel = resolveCodexModel(config.model);
1216
+
1217
+ // Buffer warnings emitted during config-building so they're not lost
1218
+ // before `CodexSession.onEvent` attaches a listener. The buffer is
1219
+ // replayed into the session's event stream right after construction
1220
+ // via the `initialEvents` constructor parameter.
1221
+ const preSessionEvents: ProviderEvent[] = [];
1222
+ const bufferedEmit = (event: ProviderEvent) => {
1223
+ preSessionEvents.push(event);
1224
+ };
1225
+
1226
+ const mergedConfig = await buildCodexConfig(config, resolvedModel, bufferedEmit);
1227
+
1228
+ // Auth resolution. `codex_oauth` (in the swarm config store) wins over
1229
+ // `OPENAI_API_KEY` so users can keep an OpenAI key set for embeddings
1230
+ // without it shadowing their ChatGPT login. The entrypoint already runs
1231
+ // this same precedence at boot — this block handles local dev (where
1232
+ // the entrypoint didn't run) and any case where auth.json is stale.
1233
+ const authMode = await resolveCodexAuthMode(config, bufferedEmit);
1234
+
1235
+ // `CodexOptions.env` does NOT inherit from `process.env`. Construct a
1236
+ // minimal env explicitly so the spawned Codex CLI can find its binary
1237
+ // (PATH) and HOME (for ~/.codex/auth.json). `OPENAI_API_KEY` is only
1238
+ // forwarded when auth.json is NOT in chatgpt mode — otherwise it would
1239
+ // override the OAuth login at the Codex CLI layer.
1240
+ const env: Record<string, string> = {
1241
+ PATH: process.env.PATH ?? "",
1242
+ HOME: process.env.HOME ?? "",
1243
+ ...(authMode !== "chatgpt" && process.env.OPENAI_API_KEY
1244
+ ? { OPENAI_API_KEY: process.env.OPENAI_API_KEY }
1245
+ : {}),
1246
+ ...(process.env.NODE_EXTRA_CA_CERTS
1247
+ ? { NODE_EXTRA_CA_CERTS: process.env.NODE_EXTRA_CA_CERTS }
1248
+ : {}),
1249
+ ...(config.env ?? {}),
1250
+ // Gated cross-service OTel linking: when SWARM_ENABLE_HARNESS_OTEL (or
1251
+ // the deprecated SWARM_ENABLE_CLAUDE_CODE_OTEL alias) is on, inject
1252
+ // TRACEPARENT from the active worker span so Codex's spans nest under
1253
+ // our worker.session trace. Codex's Rust OTEL SDK reads W3C trace
1254
+ // context from the env via the default tracecontext propagator.
1255
+ // Returns {} (no-op) when off; spread last so the computed value wins.
1256
+ ...buildOtelTraceparentEnv(config.env ?? process.env),
1257
+ };
1258
+
1259
+ // The SDK's default `findCodexPath()` does `require.resolve("@openai/codex")`
1260
+ // from the SDK's own module. When agent-swarm runs as a Bun single-file
1261
+ // compiled executable, the bundled SDK can't resolve `@openai/codex` at
1262
+ // runtime because it's not part of the bundle — it lives in a global
1263
+ // install (`/usr/lib/node_modules/@openai/codex` in the Docker worker
1264
+ // image). Honor `CODEX_PATH_OVERRIDE` so Docker can point us at the CLI
1265
+ // wrapper (or native binary) directly. Fall back to undefined so local
1266
+ // dev with `@openai/codex-sdk` installed as a regular node_modules
1267
+ // dependency keeps working via the SDK's own resolver.
1268
+ const codexPathOverride = process.env.CODEX_PATH_OVERRIDE;
1269
+
1270
+ const codex = new Codex({
1271
+ ...(codexPathOverride ? { codexPathOverride } : {}),
1272
+ env,
1273
+ config: mergedConfig,
1274
+ });
1275
+
1276
+ const threadOptions: ThreadOptions = {
1277
+ workingDirectory: config.cwd,
1278
+ skipGitRepoCheck: true,
1279
+ sandboxMode: "danger-full-access",
1280
+ approvalPolicy: "never",
1281
+ model: resolvedModel,
1282
+ };
1283
+
1284
+ // Native resume is deprecated. Follow-up continuity is delivered via the
1285
+ // context preamble (see src/commands/context-preamble.ts). Any stray
1286
+ // resumeSessionId is logged and ignored — we always start a fresh thread.
1287
+ if (config.resumeSessionId) {
1288
+ console.warn(
1289
+ "[codex-adapter] resumeSessionId ignored — native resume is disabled by deprecation plan",
1290
+ );
1291
+ }
1292
+ const thread = codex.startThread(threadOptions);
1293
+
1294
+ return new CodexSession(
1295
+ thread,
1296
+ config,
1297
+ agentsMdHandle,
1298
+ resolvedModel,
1299
+ preSessionEvents,
1300
+ opts.skillsDir,
1301
+ opts.summarizeDeps ?? {},
1302
+ );
1303
+ } catch (err) {
1304
+ // If we failed to construct the thread, clean up the managed AGENTS.md
1305
+ // block so we don't leak state on the filesystem.
1306
+ await agentsMdHandle.cleanup();
1307
+ throw err;
1308
+ }
1309
+ }
1310
+
1311
+ /**
1312
+ * Resolve the argv used to re-launch agent-swarm as a subprocess.
1313
+ *
1314
+ * The codex subprocess runner (`src/commands/codex-session-runner.ts`) is
1315
+ * invoked via the `codex-session-runner` CLI subcommand. Compiled and dev
1316
+ * modes differ in how `process.argv` is laid out:
1317
+ *
1318
+ * - Compiled (`./agent-swarm worker ...`): argv = ["./agent-swarm", "worker", ...]
1319
+ * → re-launch is just [process.execPath, "codex-session-runner"].
1320
+ * - Dev (`bun src/cli.tsx worker ...`): argv = ["bun", ".../cli.tsx", "worker", ...]
1321
+ * → re-launch is [process.execPath, ".../cli.tsx", "codex-session-runner"].
1322
+ *
1323
+ * We pick the dev path when argv[1] looks like a .ts/.tsx/.js/.jsx file (i.e.
1324
+ * a path the runtime is interpreting); otherwise we assume compiled.
1325
+ * `AGENT_SWARM_CODEX_RUNNER_ARGV` lets operators / tests override the prefix
1326
+ * (JSON-encoded string array).
1327
+ *
1328
+ * Exported for unit testing.
1329
+ */
1330
+ export function resolveCodexRunnerArgv(): string[] {
1331
+ const override = process.env.AGENT_SWARM_CODEX_RUNNER_ARGV;
1332
+ if (override) {
1333
+ try {
1334
+ const parsed = JSON.parse(override);
1335
+ if (Array.isArray(parsed) && parsed.every((s) => typeof s === "string")) {
1336
+ return parsed as string[];
1337
+ }
1338
+ } catch {
1339
+ // fall through to inferred resolution
1340
+ }
1341
+ }
1342
+ const execPath = process.execPath;
1343
+ const scriptArg = process.argv[1];
1344
+ if (scriptArg && /\.(t|j)sx?$/.test(scriptArg)) {
1345
+ return [execPath, scriptArg, "codex-session-runner"];
1346
+ }
1347
+ return [execPath, "codex-session-runner"];
1348
+ }
1349
+
1350
+ /** JSON payload passed to the codex subprocess runner via stdin. */
1351
+ interface CodexSubprocessInput {
1352
+ config: ProviderSessionConfig;
1353
+ skillsDir?: string;
1354
+ /**
1355
+ * W3C TRACEPARENT for the parent `worker.session.create` span. Captured in
1356
+ * the parent (where the OTel span context is live) and forwarded so the
1357
+ * subprocess can pass it on to Codex via env. We deliberately do NOT use
1358
+ * `buildOtelTraceparentEnv` inside the subprocess — it would build from a
1359
+ * fresh tracer with no active span. The runner forwards what the parent
1360
+ * captured here back into `config.env` before constructing the SDK.
1361
+ */
1362
+ parentOtelEnv?: Record<string, string>;
1363
+ }
1364
+
1365
+ /**
1366
+ * `ProviderSession` that runs the entire codex session inside a fresh
1367
+ * subprocess. This is the Picateclas spawn-OOM permanent fix — every codex
1368
+ * session's heap (SDK state, transcript buffer, JSON-RPC parser, listeners)
1369
+ * dies with the subprocess. The runner's own VSZ stays bounded across
1370
+ * thousands of task completions.
1371
+ *
1372
+ * Wire protocol over stdout (line-delimited JSON):
1373
+ * {"kind":"event", "event": <ProviderEvent>}
1374
+ * {"kind":"result", "result": <ProviderResult>}
1375
+ *
1376
+ * stderr is forwarded verbatim into the runner's stdout (for prod logs).
1377
+ */
1378
+ class CodexSubprocessSession implements ProviderSession {
1379
+ private readonly proc: ReturnType<typeof Bun.spawn>;
1380
+ private readonly listeners: Array<(event: ProviderEvent) => void> = [];
1381
+ private readonly eventQueue: ProviderEvent[] = [];
1382
+ private readonly completionPromise: Promise<ProviderResult>;
1383
+ private _sessionId: string | undefined;
1384
+
1385
+ constructor(config: ProviderSessionConfig, skillsDir: string | undefined) {
1386
+ const argv = resolveCodexRunnerArgv();
1387
+ const payload: CodexSubprocessInput = {
1388
+ config,
1389
+ skillsDir,
1390
+ // Capture the parent's OTel TRACEPARENT here, in the span context the
1391
+ // runner established. The subprocess can't reconstruct it on its own
1392
+ // since its OTel tracer doesn't share the parent's active-span state.
1393
+ parentOtelEnv: buildOtelTraceparentEnv(config.env ?? process.env),
1394
+ };
1395
+
1396
+ const apiKey = getApiKey();
1397
+
1398
+ this.proc = Bun.spawn(argv, {
1399
+ // Minimal env: forward what the subprocess needs to talk to the API,
1400
+ // load the codex CLI binary, and read OAuth tokens. config.env (which
1401
+ // already includes the swarm-config overlay) is delivered via stdin
1402
+ // — NOT here — so we don't repeat the same string in two places.
1403
+ env: {
1404
+ PATH: process.env.PATH ?? "",
1405
+ HOME: process.env.HOME ?? "",
1406
+ ...(process.env.NODE_EXTRA_CA_CERTS
1407
+ ? { NODE_EXTRA_CA_CERTS: process.env.NODE_EXTRA_CA_CERTS }
1408
+ : {}),
1409
+ ...(process.env.MCP_BASE_URL ? { MCP_BASE_URL: process.env.MCP_BASE_URL } : {}),
1410
+ ...(apiKey ? { AGENT_SWARM_API_KEY: apiKey, API_KEY: apiKey } : {}),
1411
+ // Embedding / summarization paths read these:
1412
+ ...(process.env.OPENAI_API_KEY ? { OPENAI_API_KEY: process.env.OPENAI_API_KEY } : {}),
1413
+ ...(process.env.OPENROUTER_API_KEY
1414
+ ? { OPENROUTER_API_KEY: process.env.OPENROUTER_API_KEY }
1415
+ : {}),
1416
+ ...(process.env.ANTHROPIC_API_KEY
1417
+ ? { ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY }
1418
+ : {}),
1419
+ ...(process.env.CODEX_PATH_OVERRIDE
1420
+ ? { CODEX_PATH_OVERRIDE: process.env.CODEX_PATH_OVERRIDE }
1421
+ : {}),
1422
+ ...(process.env.CODEX_SKILLS_DIR ? { CODEX_SKILLS_DIR: process.env.CODEX_SKILLS_DIR } : {}),
1423
+ ...(process.env.SKIP_SESSION_SUMMARY
1424
+ ? { SKIP_SESSION_SUMMARY: process.env.SKIP_SESSION_SUMMARY }
1425
+ : {}),
1426
+ ...(process.env.MEMORY_RATERS ? { MEMORY_RATERS: process.env.MEMORY_RATERS } : {}),
1427
+ },
1428
+ stdin: "pipe",
1429
+ stdout: "pipe",
1430
+ stderr: "pipe",
1431
+ });
1432
+
1433
+ // `Bun.spawn`'s `stdin` is typed as `number | FileSink`; with `stdin:
1434
+ // "pipe"` it is always a FileSink. Narrow via assertion.
1435
+ const stdin = this.proc.stdin as { write(s: string): void; end(): void };
1436
+ stdin.write(JSON.stringify(payload));
1437
+ stdin.end();
1438
+
1439
+ this.completionPromise = this.processStreams();
1440
+ }
1441
+
1442
+ get sessionId(): string | undefined {
1443
+ return this._sessionId;
1444
+ }
1445
+
1446
+ onEvent(listener: (event: ProviderEvent) => void): void {
1447
+ this.listeners.push(listener);
1448
+ for (const event of this.eventQueue) {
1449
+ listener(event);
1450
+ }
1451
+ this.eventQueue.length = 0;
1452
+ }
1453
+
1454
+ async waitForCompletion(): Promise<ProviderResult> {
1455
+ return this.completionPromise;
1456
+ }
1457
+
1458
+ async abort(): Promise<void> {
1459
+ this.proc.kill("SIGTERM");
1460
+ }
1461
+
1462
+ private emit(event: ProviderEvent): void {
1463
+ if (event.type === "session_init" && event.sessionId) {
1464
+ this._sessionId = event.sessionId;
1465
+ }
1466
+ if (this.listeners.length > 0) {
1467
+ for (const listener of this.listeners) {
1468
+ try {
1469
+ listener(event);
1470
+ } catch {
1471
+ // listener errors must not break the event stream
1472
+ }
1473
+ }
1474
+ } else {
1475
+ this.eventQueue.push(event);
1476
+ }
1477
+ }
1478
+
1479
+ private async processStreams(): Promise<ProviderResult> {
1480
+ let result: ProviderResult | null = null;
1481
+ let partial = "";
1482
+ let stderrTail = "";
1483
+
1484
+ const stdoutPromise = (async () => {
1485
+ const stdout = this.proc.stdout as ReadableStream<Uint8Array> | null;
1486
+ if (!stdout) return;
1487
+ for await (const chunk of stdout) {
1488
+ partial += new TextDecoder().decode(chunk);
1489
+ const parts = partial.split("\n");
1490
+ partial = parts.pop() ?? "";
1491
+ for (const line of parts) {
1492
+ const trimmed = line.trim();
1493
+ if (!trimmed) continue;
1494
+ this.handleLine(trimmed, (r) => {
1495
+ result = r;
1496
+ });
1497
+ }
1498
+ }
1499
+ if (partial.trim()) {
1500
+ this.handleLine(partial.trim(), (r) => {
1501
+ result = r;
1502
+ });
1503
+ partial = "";
1504
+ }
1505
+ })();
1506
+
1507
+ const stderrPromise = (async () => {
1508
+ const stderr = this.proc.stderr as ReadableStream<Uint8Array> | null;
1509
+ if (!stderr) return;
1510
+ for await (const chunk of stderr) {
1511
+ const text = new TextDecoder().decode(chunk);
1512
+ stderrTail = (stderrTail + text).slice(-2000);
1513
+ // Surface subprocess stderr (codex CLI warnings, auth.json
1514
+ // restoration messages) into the parent's event stream so it lands
1515
+ // in /workspace/logs/*.jsonl the way the in-process path did.
1516
+ this.emit({ type: "raw_stderr", content: text });
1517
+ }
1518
+ })();
1519
+
1520
+ await Promise.all([stdoutPromise, stderrPromise]);
1521
+ const exitCode = await this.proc.exited;
1522
+
1523
+ if (result) {
1524
+ return result;
1525
+ }
1526
+ // Subprocess exited before sending a structured result — synthesise one
1527
+ // so the runner doesn't hang on waitForCompletion. Include stderr tail
1528
+ // so the actual error message reaches the task failure reason.
1529
+ const stderrHint = stderrTail.trim() ? ` — stderr: ${stderrTail.trim().slice(-500)}` : "";
1530
+ return {
1531
+ exitCode: exitCode ?? 1,
1532
+ sessionId: this._sessionId,
1533
+ isError: true,
1534
+ failureReason: `codex subprocess exited (code=${exitCode ?? "?"}) without a structured result${stderrHint}`,
1535
+ };
1536
+ }
1537
+
1538
+ private handleLine(line: string, setResult: (r: ProviderResult) => void): void {
1539
+ let msg: { kind?: string; event?: ProviderEvent; result?: ProviderResult; message?: string };
1540
+ try {
1541
+ msg = JSON.parse(line);
1542
+ } catch {
1543
+ // Not a valid JSON envelope — treat as raw stderr-equivalent.
1544
+ this.emit({ type: "raw_stderr", content: `${line}\n` });
1545
+ return;
1546
+ }
1547
+ if (msg.kind === "event" && msg.event) {
1548
+ this.emit(msg.event);
1549
+ return;
1550
+ }
1551
+ if (msg.kind === "result" && msg.result) {
1552
+ setResult(msg.result);
1553
+ return;
1554
+ }
1555
+ if (msg.kind === "error" && msg.message) {
1556
+ this.emit({ type: "error", message: msg.message });
1557
+ setResult({
1558
+ exitCode: 1,
1559
+ sessionId: this._sessionId,
1560
+ isError: true,
1561
+ failureReason: msg.message,
1562
+ });
1563
+ return;
1564
+ }
1565
+ }
1566
+ }
1567
+
1174
1568
  export class CodexAdapter implements ProviderAdapter {
1175
1569
  readonly name = "codex";
1176
1570
  readonly traits = { hasMcp: true, hasLocalEnvironment: true };
@@ -1191,124 +1585,47 @@ export class CodexAdapter implements ProviderAdapter {
1191
1585
  */
1192
1586
  private readonly summarizeDeps: SummarizeSessionForCodexDeps;
1193
1587
 
1194
- constructor(opts: { skillsDir?: string; summarizeDeps?: SummarizeSessionForCodexDeps } = {}) {
1588
+ /**
1589
+ * When true, run the codex session inside the runner process (no subprocess
1590
+ * spawn). Used by:
1591
+ * - Unit tests that monkey-patch `Codex.prototype.startThread` (the patch
1592
+ * would not survive a subprocess boundary).
1593
+ * - The spawned `codex-session-runner` subprocess itself, to avoid
1594
+ * re-spawning recursively.
1595
+ *
1596
+ * Production callers leave this `false`. Each codex session then runs in a
1597
+ * fresh subprocess and its heap dies when the task completes — keeping the
1598
+ * runner's VSZ bounded across thousands of task completions (Picateclas
1599
+ * spawn-OOM permanent fix, 2026-05-28).
1600
+ */
1601
+ private readonly bypassSubprocess: boolean;
1602
+
1603
+ constructor(
1604
+ opts: {
1605
+ skillsDir?: string;
1606
+ summarizeDeps?: SummarizeSessionForCodexDeps;
1607
+ bypassSubprocess?: boolean;
1608
+ } = {},
1609
+ ) {
1195
1610
  this.skillsDir = opts.skillsDir;
1196
1611
  this.summarizeDeps = opts.summarizeDeps ?? {};
1612
+ this.bypassSubprocess = opts.bypassSubprocess ?? false;
1197
1613
  }
1198
1614
 
1199
1615
  async createSession(config: ProviderSessionConfig): Promise<ProviderSession> {
1200
- // Codex ingests per-session instructions via AGENTS.md in the cwd. Write
1201
- // (or refresh) the managed block before we spin up the thread.
1202
- const agentsMdHandle = await writeCodexAgentsMd(config.cwd, config.systemPrompt);
1203
-
1204
- try {
1205
- // Resolve the model once and thread it through. Claude shortnames map
1206
- // to Codex equivalents; everything else passes through verbatim — the
1207
- // SDK is the source of truth for what's valid.
1208
- const resolvedModel = resolveCodexModel(config.model);
1209
-
1210
- // Buffer warnings emitted during config-building so they're not lost
1211
- // before `CodexSession.onEvent` attaches a listener. The buffer is
1212
- // replayed into the session's event stream right after construction
1213
- // via the `initialEvents` constructor parameter.
1214
- const preSessionEvents: ProviderEvent[] = [];
1215
- const bufferedEmit = (event: ProviderEvent) => {
1216
- preSessionEvents.push(event);
1217
- };
1218
-
1219
- const mergedConfig = await buildCodexConfig(config, resolvedModel, bufferedEmit);
1220
-
1221
- // Auth resolution. `codex_oauth` (in the swarm config store) wins over
1222
- // `OPENAI_API_KEY` so users can keep an OpenAI key set for embeddings
1223
- // without it shadowing their ChatGPT login. The entrypoint already runs
1224
- // this same precedence at boot — this block handles local dev (where
1225
- // the entrypoint didn't run) and any case where auth.json is stale.
1226
- const authMode = await resolveCodexAuthMode(config, bufferedEmit);
1227
-
1228
- // `CodexOptions.env` does NOT inherit from `process.env`. Construct a
1229
- // minimal env explicitly so the spawned Codex CLI can find its binary
1230
- // (PATH) and HOME (for ~/.codex/auth.json). `OPENAI_API_KEY` is only
1231
- // forwarded when auth.json is NOT in chatgpt mode — otherwise it would
1232
- // override the OAuth login at the Codex CLI layer.
1233
- const env: Record<string, string> = {
1234
- PATH: process.env.PATH ?? "",
1235
- HOME: process.env.HOME ?? "",
1236
- ...(authMode !== "chatgpt" && process.env.OPENAI_API_KEY
1237
- ? { OPENAI_API_KEY: process.env.OPENAI_API_KEY }
1238
- : {}),
1239
- ...(process.env.NODE_EXTRA_CA_CERTS
1240
- ? { NODE_EXTRA_CA_CERTS: process.env.NODE_EXTRA_CA_CERTS }
1241
- : {}),
1242
- ...(config.env ?? {}),
1243
- // Gated cross-service OTel linking: when SWARM_ENABLE_HARNESS_OTEL (or
1244
- // the deprecated SWARM_ENABLE_CLAUDE_CODE_OTEL alias) is on, inject
1245
- // TRACEPARENT from the active worker span so Codex's spans nest under
1246
- // our worker.session trace. Codex's Rust OTEL SDK reads W3C trace
1247
- // context from the env via the default tracecontext propagator.
1248
- // Returns {} (no-op) when off; spread last so the computed value wins.
1249
- ...buildOtelTraceparentEnv(config.env ?? process.env),
1250
- };
1251
-
1252
- // The SDK's default `findCodexPath()` does `require.resolve("@openai/codex")`
1253
- // from the SDK's own module. When agent-swarm runs as a Bun single-file
1254
- // compiled executable, the bundled SDK can't resolve `@openai/codex` at
1255
- // runtime because it's not part of the bundle — it lives in a global
1256
- // install (`/usr/lib/node_modules/@openai/codex` in the Docker worker
1257
- // image). Honor `CODEX_PATH_OVERRIDE` so Docker can point us at the CLI
1258
- // wrapper (or native binary) directly. Fall back to undefined so local
1259
- // dev with `@openai/codex-sdk` installed as a regular node_modules
1260
- // dependency keeps working via the SDK's own resolver.
1261
- const codexPathOverride = process.env.CODEX_PATH_OVERRIDE;
1262
-
1263
- const codex = new Codex({
1264
- ...(codexPathOverride ? { codexPathOverride } : {}),
1265
- env,
1266
- config: mergedConfig,
1616
+ if (this.bypassSubprocess) {
1617
+ return createInProcessCodexSession(config, {
1618
+ skillsDir: this.skillsDir,
1619
+ summarizeDeps: this.summarizeDeps,
1267
1620
  });
1268
-
1269
- const threadOptions: ThreadOptions = {
1270
- workingDirectory: config.cwd,
1271
- skipGitRepoCheck: true,
1272
- sandboxMode: "danger-full-access",
1273
- approvalPolicy: "never",
1274
- model: resolvedModel,
1275
- };
1276
-
1277
- const thread = config.resumeSessionId
1278
- ? codex.resumeThread(config.resumeSessionId, threadOptions)
1279
- : codex.startThread(threadOptions);
1280
-
1281
- return new CodexSession(
1282
- thread,
1283
- config,
1284
- agentsMdHandle,
1285
- resolvedModel,
1286
- preSessionEvents,
1287
- this.skillsDir,
1288
- this.summarizeDeps,
1289
- );
1290
- } catch (err) {
1291
- // If we failed to construct the thread, clean up the managed AGENTS.md
1292
- // block so we don't leak state on the filesystem.
1293
- await agentsMdHandle.cleanup();
1294
- throw err;
1295
1621
  }
1622
+ return new CodexSubprocessSession(config, this.skillsDir);
1296
1623
  }
1297
1624
 
1298
- async canResume(sessionId: string): Promise<boolean> {
1299
- if (!sessionId || typeof sessionId !== "string") {
1300
- return false;
1301
- }
1302
- try {
1303
- const codex = new Codex();
1304
- // `resumeThread` is synchronous in 0.118.x and returns a Thread handle.
1305
- // The runner only calls canResume when deciding whether to resume a
1306
- // task, so we accept the (cheap) handshake cost.
1307
- codex.resumeThread(sessionId);
1308
- return true;
1309
- } catch {
1310
- return false;
1311
- }
1625
+ async canResume(_sessionId: string): Promise<boolean> {
1626
+ // Native resume is deprecated; runner no longer threads resumeSessionId
1627
+ // to adapters. Follow-up continuity flows via the context preamble.
1628
+ return false;
1312
1629
  }
1313
1630
 
1314
1631
  formatCommand(commandName: string): string {
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * Codex API-addressable models, verified from https://developers.openai.com/codex/models
3
- * and https://developers.openai.com/api/docs/deprecations as of 2026-04-09.
3
+ * and https://developers.openai.com/api/docs/deprecations as of 2026-05-28.
4
4
  *
5
5
  * NOTE: `gpt-5.3-codex-spark` is intentionally excluded. It is a ChatGPT Pro
6
6
  * research preview and is NOT API-addressable via the Codex SDK at launch.
@@ -18,6 +18,7 @@
18
18
  * SDK, so new OpenAI models work without a code change.
19
19
  */
20
20
  export const CODEX_MODELS = [
21
+ "gpt-5.5", // newest frontier coding/professional-work model, 1.05M context
21
22
  "gpt-5.4", // default — mainline reasoning model w/ frontier coding
22
23
  "gpt-5.4-mini", // faster/cheaper
23
24
  "gpt-5.3-codex", // coding-specialized, 1M context
@@ -63,6 +64,7 @@ export function resolveCodexModel(modelStr: string | undefined): string {
63
64
  * Update this map whenever a model's context window changes.
64
65
  */
65
66
  export const CODEX_MODEL_CONTEXT_WINDOWS: Record<CodexModel, number> = {
67
+ "gpt-5.5": 1_050_000,
66
68
  "gpt-5.4": 200_000,
67
69
  "gpt-5.4-mini": 200_000,
68
70
  "gpt-5.3-codex": 1_000_000, // 1M context per plan Key Discoveries
@@ -80,7 +82,7 @@ export function getCodexContextWindow(model: string): number {
80
82
 
81
83
  /**
82
84
  * Per-model pricing in USD per million tokens, sourced from
83
- * https://developers.openai.com/api/docs/pricing on 2026-04-09 (Standard tier,
85
+ * https://developers.openai.com/api/docs/pricing on 2026-05-28 (Standard tier,
84
86
  * short-context column — long-context multipliers and Batch / Flex / Priority
85
87
  * tiers exist but the Codex SDK does not expose which tier was used so we
86
88
  * default to the headline rate).
@@ -103,6 +105,11 @@ export interface CodexModelPricing {
103
105
  }
104
106
 
105
107
  export const CODEX_MODEL_PRICING: Record<CodexModel, CodexModelPricing> = {
108
+ "gpt-5.5": {
109
+ inputPerMillion: 5.0,
110
+ cachedInputPerMillion: 0.5,
111
+ outputPerMillion: 30.0,
112
+ },
106
113
  "gpt-5.4": {
107
114
  inputPerMillion: 2.5,
108
115
  cachedInputPerMillion: 0.25,
@@ -22,14 +22,31 @@
22
22
  * purposes and doesn't validate it as a separate JWT.
23
23
  */
24
24
 
25
+ import { extractChatgptUserId } from "./flow.js";
25
26
  import type { CodexAuthJson, CodexOAuthCredentials } from "./types.js";
26
27
 
27
28
  export function authJsonToCredentialSelection(auth: CodexAuthJson, slot = 0, total = 1) {
29
+ // Prefer the per-grant `chatgpt_user_id` so two slots authenticated against
30
+ // the same ChatGPT Team workspace get distinct suffixes. Fall back to
31
+ // account_id when the JWT lacks the claim — preserves boot for any
32
+ // unexpected token shape, at the cost of re-introducing the slot-collision
33
+ // bug for that specific slot only. The warn is a deliberate canary.
34
+ const userId = extractChatgptUserId(auth.tokens.access_token);
35
+ const suffixSource = userId ?? auth.tokens.account_id;
36
+ if (!userId) {
37
+ console.warn(
38
+ "[codex-oauth] No chatgpt_user_id in JWT — falling back to account_id for keySuffix derivation. " +
39
+ "If two slots share an account, their suffixes will collide.",
40
+ );
41
+ }
28
42
  return {
43
+ // `selected` satisfies the CredentialSelection interface but is never read
44
+ // for CODEX_OAUTH: creds are materialised to ~/.codex/auth.json (not env-injected),
45
+ // and all tracking flows through `keySuffix` + `index` (never `selected`).
29
46
  selected: auth.tokens.account_id,
30
47
  index: slot,
31
48
  total,
32
- keySuffix: auth.tokens.account_id.slice(-5),
49
+ keySuffix: suffixSource.slice(-5),
33
50
  keyType: "CODEX_OAUTH",
34
51
  };
35
52
  }