@desplega.ai/agent-swarm 1.85.0 → 1.86.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/openapi.json +1 -1
- package/package.json +8 -6
- package/src/be/db.ts +44 -0
- package/src/be/migrations/078_backfill_gpt_5_5_pricing.sql +15 -0
- package/src/be/modelsdev-cache.json +152028 -0
- package/src/be/modelsdev-cache.ts +46 -0
- package/src/be/seed-pricing.ts +7 -44
- package/src/cli.tsx +12 -2
- package/src/commands/codex-session-runner.ts +132 -0
- package/src/commands/credential-wait.ts +2 -2
- package/src/commands/provider-credentials.ts +10 -5
- package/src/commands/runner.ts +3 -3
- package/src/prompts/base-prompt.ts +49 -3
- package/src/providers/claude-adapter.ts +83 -2
- package/src/providers/claude-managed-models.ts +18 -2
- package/src/providers/codex-adapter.ts +417 -97
- package/src/providers/codex-models.ts +9 -2
- package/src/providers/index.ts +28 -19
- package/src/providers/pricing-sources.md +7 -4
- package/src/providers/swarm-events-shared.ts +14 -0
- package/src/slack/HEURISTICS.md +5 -1
- package/src/slack/handlers.test.ts +35 -0
- package/src/slack/handlers.ts +79 -2
- package/src/tests/base-prompt.test.ts +46 -8
- package/src/tests/claude-managed-adapter.test.ts +4 -4
- package/src/tests/codex-adapter-otel.test.ts +4 -4
- package/src/tests/codex-adapter.test.ts +20 -7
- package/src/tests/codex-swarm-events.test.ts +35 -0
- package/src/tests/context-window.test.ts +1 -0
- package/src/tests/credential-check.test.ts +48 -29
- package/src/tests/entrypoint-config-env-export.test.ts +81 -0
- package/src/tests/follow-up-redelivery-guard.test.ts +165 -0
- package/src/tests/migration-046-budgets.test.ts +6 -5
- package/src/tests/pricing-routes.test.ts +6 -5
- package/src/tests/provider-adapter.test.ts +10 -10
- package/src/tests/provider-command-format.test.ts +4 -4
- package/src/tests/session-costs-codex-recompute.test.ts +25 -0
- package/src/tools/send-task.ts +30 -9
- package/src/utils/context-window.ts +1 -0
- package/templates/schedules/daily-blocker-digest/config.json +13 -0
- package/templates/schedules/daily-blocker-digest/content.md +150 -0
- package/templates/schedules/daily-compounding-reflection/config.json +21 -0
- package/templates/schedules/daily-compounding-reflection/content.md +210 -0
- package/templates/schedules/daily-hn-briefing/config.json +13 -0
- package/templates/schedules/daily-hn-briefing/content.md +97 -0
- package/templates/schedules/daily-workflow-health-audit/config.json +13 -0
- package/templates/schedules/daily-workflow-health-audit/content.md +189 -0
- package/templates/schedules/gtm-weekly-review/config.json +13 -0
- package/templates/schedules/gtm-weekly-review/content.md +58 -0
- package/templates/schedules/weekly-dependabot-triage/config.json +13 -0
- package/templates/schedules/weekly-dependabot-triage/content.md +45 -0
- package/templates/schema.ts +26 -0
- package/templates/skills/agentmail-sending/config.json +13 -0
- package/templates/skills/agentmail-sending/content.md +48 -0
- package/templates/skills/artifacts/config.json +13 -0
- package/templates/skills/artifacts/content.md +87 -0
- package/templates/skills/browser-use-cloud/config.json +13 -0
- package/templates/skills/browser-use-cloud/content.md +155 -0
- package/templates/skills/desloppify/config.json +13 -0
- package/templates/skills/desloppify/content.md +201 -0
- package/templates/skills/exa-search/config.json +13 -0
- package/templates/skills/exa-search/content.md +106 -0
- package/templates/skills/jira-interaction/config.json +13 -0
- package/templates/skills/jira-interaction/content.md +252 -0
- package/templates/skills/kapso-whatsapp/config.json +13 -0
- package/templates/skills/kapso-whatsapp/content.md +369 -0
- package/templates/skills/kv-storage/config.json +13 -0
- package/templates/skills/kv-storage/content.md +111 -0
- package/templates/skills/linear-interaction/config.json +20 -0
- package/templates/skills/linear-interaction/content.md +230 -0
- package/templates/skills/pages/config.json +18 -0
- package/templates/skills/pages/content.md +85 -0
- package/templates/skills/profile-corruption-escalation/config.json +13 -0
- package/templates/skills/profile-corruption-escalation/content.md +105 -0
- package/templates/skills/scheduled-task-resilience/config.json +13 -0
- package/templates/skills/scheduled-task-resilience/content.md +95 -0
- package/templates/skills/sprite-cli/config.json +13 -0
- package/templates/skills/sprite-cli/content.md +133 -0
- package/templates/skills/turso-interaction/config.json +13 -0
- package/templates/skills/turso-interaction/content.md +192 -0
- package/templates/skills/workflow-iterate/config.json +18 -0
- package/templates/skills/workflow-iterate/content.md +399 -0
- package/templates/skills/workflow-structured-output/config.json +13 -0
- package/templates/skills/workflow-structured-output/content.md +101 -0
- package/templates/skills/x-api-interactions/config.json +13 -0
- package/templates/skills/x-api-interactions/content.md +109 -0
- package/templates/workflows/autopilot/config.json +13 -0
- package/templates/workflows/autopilot/content.md +58 -0
- package/templates/workflows/linear-drain-loop/config.json +21 -0
- package/templates/workflows/linear-drain-loop/content.md +72 -0
- package/templates/workflows/ralph-loop/config.json +13 -0
- package/templates/workflows/ralph-loop/content.md +75 -0
|
@@ -66,6 +66,7 @@ import {
|
|
|
66
66
|
type WebSearchItem,
|
|
67
67
|
} from "@openai/codex-sdk";
|
|
68
68
|
import { buildRatingsFromLlm, fetchRetrievalsForTask, postRatings } from "../be/memory/raters/llm";
|
|
69
|
+
import { getApiKey } from "../utils/api-key";
|
|
69
70
|
import {
|
|
70
71
|
CONTEXT_FORMULA,
|
|
71
72
|
clampContextPercent,
|
|
@@ -383,7 +384,7 @@ export interface SummarizeSessionForCodexDeps {
|
|
|
383
384
|
}
|
|
384
385
|
|
|
385
386
|
/** Running session backed by a Codex `Thread`. */
|
|
386
|
-
class CodexSession implements ProviderSession {
|
|
387
|
+
export class CodexSession implements ProviderSession {
|
|
387
388
|
private readonly thread: Thread;
|
|
388
389
|
private readonly config: ProviderSessionConfig;
|
|
389
390
|
private readonly agentsMdHandle: CodexAgentsMdHandle;
|
|
@@ -1036,12 +1037,24 @@ class CodexSession implements ProviderSession {
|
|
|
1036
1037
|
// preserve. Wrapped in its own try/catch so summary failure must NOT
|
|
1037
1038
|
// block the existing log/AGENTS.md cleanup below. Gate `SKIP_SESSION_SUMMARY=1`
|
|
1038
1039
|
// matches the parity convention used by the claude Stop hook + pi/opencode.
|
|
1039
|
-
|
|
1040
|
+
//
|
|
1041
|
+
// Skip the summary entirely when the session was aborted. The transcript
|
|
1042
|
+
// is incomplete, the LLM call would retry 3× through openrouter and
|
|
1043
|
+
// spam stderr with structured-output failures (red-herring noise we
|
|
1044
|
+
// saw in the templates-ui incident, 2026-05-28). Losing the summary
|
|
1045
|
+
// on abort is acceptable — it's cleanup, not load-bearing.
|
|
1046
|
+
const sessionWasAborted =
|
|
1047
|
+
this.aborted ||
|
|
1048
|
+
this.abortController?.signal.aborted === true ||
|
|
1049
|
+
this.pendingResult?.exitCode === 130;
|
|
1050
|
+
if (process.env.SKIP_SESSION_SUMMARY !== "1" && !sessionWasAborted) {
|
|
1040
1051
|
try {
|
|
1041
1052
|
await this.summarizeAtEnd();
|
|
1042
1053
|
} catch (err) {
|
|
1043
1054
|
console.error("session_summary failed (codex):", err);
|
|
1044
1055
|
}
|
|
1056
|
+
} else if (sessionWasAborted) {
|
|
1057
|
+
console.debug("[codex] session aborted — skipping session_summary");
|
|
1045
1058
|
}
|
|
1046
1059
|
|
|
1047
1060
|
// Detach the abort controller now that the turn has settled.
|
|
@@ -1171,6 +1184,380 @@ class CodexSession implements ProviderSession {
|
|
|
1171
1184
|
}
|
|
1172
1185
|
}
|
|
1173
1186
|
|
|
1187
|
+
/**
|
|
1188
|
+
* Build a `CodexSession` running in the *current* process (no subprocess
|
|
1189
|
+
* isolation). Production sessions are now spawned through
|
|
1190
|
+
* `CodexSubprocessSession` to keep the runner's heap bounded across many
|
|
1191
|
+
* task completions (Picateclas spawn-OOM, 2026-05-28). This helper is the
|
|
1192
|
+
* core in-process creation logic — used by:
|
|
1193
|
+
*
|
|
1194
|
+
* 1. `CodexAdapter.createSession` when `bypassSubprocess: true`
|
|
1195
|
+
* (unit tests that monkey-patch the SDK prototype).
|
|
1196
|
+
* 2. `runCodexSessionRunner` (the spawned subprocess entry point in
|
|
1197
|
+
* `src/commands/codex-session-runner.ts`).
|
|
1198
|
+
*
|
|
1199
|
+
* Exported so the subprocess runner — which IS a fresh process — can build
|
|
1200
|
+
* its session via the same path the tests exercise.
|
|
1201
|
+
*/
|
|
1202
|
+
export async function createInProcessCodexSession(
|
|
1203
|
+
config: ProviderSessionConfig,
|
|
1204
|
+
opts: { skillsDir?: string; summarizeDeps?: SummarizeSessionForCodexDeps } = {},
|
|
1205
|
+
): Promise<CodexSession> {
|
|
1206
|
+
// Codex ingests per-session instructions via AGENTS.md in the cwd. Write
|
|
1207
|
+
// (or refresh) the managed block before we spin up the thread.
|
|
1208
|
+
const agentsMdHandle = await writeCodexAgentsMd(config.cwd, config.systemPrompt);
|
|
1209
|
+
|
|
1210
|
+
try {
|
|
1211
|
+
// Resolve the model once and thread it through. Claude shortnames map
|
|
1212
|
+
// to Codex equivalents; everything else passes through verbatim — the
|
|
1213
|
+
// SDK is the source of truth for what's valid.
|
|
1214
|
+
const resolvedModel = resolveCodexModel(config.model);
|
|
1215
|
+
|
|
1216
|
+
// Buffer warnings emitted during config-building so they're not lost
|
|
1217
|
+
// before `CodexSession.onEvent` attaches a listener. The buffer is
|
|
1218
|
+
// replayed into the session's event stream right after construction
|
|
1219
|
+
// via the `initialEvents` constructor parameter.
|
|
1220
|
+
const preSessionEvents: ProviderEvent[] = [];
|
|
1221
|
+
const bufferedEmit = (event: ProviderEvent) => {
|
|
1222
|
+
preSessionEvents.push(event);
|
|
1223
|
+
};
|
|
1224
|
+
|
|
1225
|
+
const mergedConfig = await buildCodexConfig(config, resolvedModel, bufferedEmit);
|
|
1226
|
+
|
|
1227
|
+
// Auth resolution. `codex_oauth` (in the swarm config store) wins over
|
|
1228
|
+
// `OPENAI_API_KEY` so users can keep an OpenAI key set for embeddings
|
|
1229
|
+
// without it shadowing their ChatGPT login. The entrypoint already runs
|
|
1230
|
+
// this same precedence at boot — this block handles local dev (where
|
|
1231
|
+
// the entrypoint didn't run) and any case where auth.json is stale.
|
|
1232
|
+
const authMode = await resolveCodexAuthMode(config, bufferedEmit);
|
|
1233
|
+
|
|
1234
|
+
// `CodexOptions.env` does NOT inherit from `process.env`. Construct a
|
|
1235
|
+
// minimal env explicitly so the spawned Codex CLI can find its binary
|
|
1236
|
+
// (PATH) and HOME (for ~/.codex/auth.json). `OPENAI_API_KEY` is only
|
|
1237
|
+
// forwarded when auth.json is NOT in chatgpt mode — otherwise it would
|
|
1238
|
+
// override the OAuth login at the Codex CLI layer.
|
|
1239
|
+
const env: Record<string, string> = {
|
|
1240
|
+
PATH: process.env.PATH ?? "",
|
|
1241
|
+
HOME: process.env.HOME ?? "",
|
|
1242
|
+
...(authMode !== "chatgpt" && process.env.OPENAI_API_KEY
|
|
1243
|
+
? { OPENAI_API_KEY: process.env.OPENAI_API_KEY }
|
|
1244
|
+
: {}),
|
|
1245
|
+
...(process.env.NODE_EXTRA_CA_CERTS
|
|
1246
|
+
? { NODE_EXTRA_CA_CERTS: process.env.NODE_EXTRA_CA_CERTS }
|
|
1247
|
+
: {}),
|
|
1248
|
+
...(config.env ?? {}),
|
|
1249
|
+
// Gated cross-service OTel linking: when SWARM_ENABLE_HARNESS_OTEL (or
|
|
1250
|
+
// the deprecated SWARM_ENABLE_CLAUDE_CODE_OTEL alias) is on, inject
|
|
1251
|
+
// TRACEPARENT from the active worker span so Codex's spans nest under
|
|
1252
|
+
// our worker.session trace. Codex's Rust OTEL SDK reads W3C trace
|
|
1253
|
+
// context from the env via the default tracecontext propagator.
|
|
1254
|
+
// Returns {} (no-op) when off; spread last so the computed value wins.
|
|
1255
|
+
...buildOtelTraceparentEnv(config.env ?? process.env),
|
|
1256
|
+
};
|
|
1257
|
+
|
|
1258
|
+
// The SDK's default `findCodexPath()` does `require.resolve("@openai/codex")`
|
|
1259
|
+
// from the SDK's own module. When agent-swarm runs as a Bun single-file
|
|
1260
|
+
// compiled executable, the bundled SDK can't resolve `@openai/codex` at
|
|
1261
|
+
// runtime because it's not part of the bundle — it lives in a global
|
|
1262
|
+
// install (`/usr/lib/node_modules/@openai/codex` in the Docker worker
|
|
1263
|
+
// image). Honor `CODEX_PATH_OVERRIDE` so Docker can point us at the CLI
|
|
1264
|
+
// wrapper (or native binary) directly. Fall back to undefined so local
|
|
1265
|
+
// dev with `@openai/codex-sdk` installed as a regular node_modules
|
|
1266
|
+
// dependency keeps working via the SDK's own resolver.
|
|
1267
|
+
const codexPathOverride = process.env.CODEX_PATH_OVERRIDE;
|
|
1268
|
+
|
|
1269
|
+
const codex = new Codex({
|
|
1270
|
+
...(codexPathOverride ? { codexPathOverride } : {}),
|
|
1271
|
+
env,
|
|
1272
|
+
config: mergedConfig,
|
|
1273
|
+
});
|
|
1274
|
+
|
|
1275
|
+
const threadOptions: ThreadOptions = {
|
|
1276
|
+
workingDirectory: config.cwd,
|
|
1277
|
+
skipGitRepoCheck: true,
|
|
1278
|
+
sandboxMode: "danger-full-access",
|
|
1279
|
+
approvalPolicy: "never",
|
|
1280
|
+
model: resolvedModel,
|
|
1281
|
+
};
|
|
1282
|
+
|
|
1283
|
+
const thread = config.resumeSessionId
|
|
1284
|
+
? codex.resumeThread(config.resumeSessionId, threadOptions)
|
|
1285
|
+
: codex.startThread(threadOptions);
|
|
1286
|
+
|
|
1287
|
+
return new CodexSession(
|
|
1288
|
+
thread,
|
|
1289
|
+
config,
|
|
1290
|
+
agentsMdHandle,
|
|
1291
|
+
resolvedModel,
|
|
1292
|
+
preSessionEvents,
|
|
1293
|
+
opts.skillsDir,
|
|
1294
|
+
opts.summarizeDeps ?? {},
|
|
1295
|
+
);
|
|
1296
|
+
} catch (err) {
|
|
1297
|
+
// If we failed to construct the thread, clean up the managed AGENTS.md
|
|
1298
|
+
// block so we don't leak state on the filesystem.
|
|
1299
|
+
await agentsMdHandle.cleanup();
|
|
1300
|
+
throw err;
|
|
1301
|
+
}
|
|
1302
|
+
}
|
|
1303
|
+
|
|
1304
|
+
/**
|
|
1305
|
+
* Resolve the argv used to re-launch agent-swarm as a subprocess.
|
|
1306
|
+
*
|
|
1307
|
+
* The codex subprocess runner (`src/commands/codex-session-runner.ts`) is
|
|
1308
|
+
* invoked via the `codex-session-runner` CLI subcommand. Compiled and dev
|
|
1309
|
+
* modes differ in how `process.argv` is laid out:
|
|
1310
|
+
*
|
|
1311
|
+
* - Compiled (`./agent-swarm worker ...`): argv = ["./agent-swarm", "worker", ...]
|
|
1312
|
+
* → re-launch is just [process.execPath, "codex-session-runner"].
|
|
1313
|
+
* - Dev (`bun src/cli.tsx worker ...`): argv = ["bun", ".../cli.tsx", "worker", ...]
|
|
1314
|
+
* → re-launch is [process.execPath, ".../cli.tsx", "codex-session-runner"].
|
|
1315
|
+
*
|
|
1316
|
+
* We pick the dev path when argv[1] looks like a .ts/.tsx/.js/.jsx file (i.e.
|
|
1317
|
+
* a path the runtime is interpreting); otherwise we assume compiled.
|
|
1318
|
+
* `AGENT_SWARM_CODEX_RUNNER_ARGV` lets operators / tests override the prefix
|
|
1319
|
+
* (JSON-encoded string array).
|
|
1320
|
+
*
|
|
1321
|
+
* Exported for unit testing.
|
|
1322
|
+
*/
|
|
1323
|
+
export function resolveCodexRunnerArgv(): string[] {
|
|
1324
|
+
const override = process.env.AGENT_SWARM_CODEX_RUNNER_ARGV;
|
|
1325
|
+
if (override) {
|
|
1326
|
+
try {
|
|
1327
|
+
const parsed = JSON.parse(override);
|
|
1328
|
+
if (Array.isArray(parsed) && parsed.every((s) => typeof s === "string")) {
|
|
1329
|
+
return parsed as string[];
|
|
1330
|
+
}
|
|
1331
|
+
} catch {
|
|
1332
|
+
// fall through to inferred resolution
|
|
1333
|
+
}
|
|
1334
|
+
}
|
|
1335
|
+
const execPath = process.execPath;
|
|
1336
|
+
const scriptArg = process.argv[1];
|
|
1337
|
+
if (scriptArg && /\.(t|j)sx?$/.test(scriptArg)) {
|
|
1338
|
+
return [execPath, scriptArg, "codex-session-runner"];
|
|
1339
|
+
}
|
|
1340
|
+
return [execPath, "codex-session-runner"];
|
|
1341
|
+
}
|
|
1342
|
+
|
|
1343
|
+
/** JSON payload passed to the codex subprocess runner via stdin. */
|
|
1344
|
+
interface CodexSubprocessInput {
|
|
1345
|
+
config: ProviderSessionConfig;
|
|
1346
|
+
skillsDir?: string;
|
|
1347
|
+
/**
|
|
1348
|
+
* W3C TRACEPARENT for the parent `worker.session.create` span. Captured in
|
|
1349
|
+
* the parent (where the OTel span context is live) and forwarded so the
|
|
1350
|
+
* subprocess can pass it on to Codex via env. We deliberately do NOT use
|
|
1351
|
+
* `buildOtelTraceparentEnv` inside the subprocess — it would build from a
|
|
1352
|
+
* fresh tracer with no active span. The runner forwards what the parent
|
|
1353
|
+
* captured here back into `config.env` before constructing the SDK.
|
|
1354
|
+
*/
|
|
1355
|
+
parentOtelEnv?: Record<string, string>;
|
|
1356
|
+
}
|
|
1357
|
+
|
|
1358
|
+
/**
|
|
1359
|
+
* `ProviderSession` that runs the entire codex session inside a fresh
|
|
1360
|
+
* subprocess. This is the Picateclas spawn-OOM permanent fix — every codex
|
|
1361
|
+
* session's heap (SDK state, transcript buffer, JSON-RPC parser, listeners)
|
|
1362
|
+
* dies with the subprocess. The runner's own VSZ stays bounded across
|
|
1363
|
+
* thousands of task completions.
|
|
1364
|
+
*
|
|
1365
|
+
* Wire protocol over stdout (line-delimited JSON):
|
|
1366
|
+
* {"kind":"event", "event": <ProviderEvent>}
|
|
1367
|
+
* {"kind":"result", "result": <ProviderResult>}
|
|
1368
|
+
*
|
|
1369
|
+
* stderr is forwarded verbatim into the runner's stdout (for prod logs).
|
|
1370
|
+
*/
|
|
1371
|
+
class CodexSubprocessSession implements ProviderSession {
|
|
1372
|
+
private readonly proc: ReturnType<typeof Bun.spawn>;
|
|
1373
|
+
private readonly listeners: Array<(event: ProviderEvent) => void> = [];
|
|
1374
|
+
private readonly eventQueue: ProviderEvent[] = [];
|
|
1375
|
+
private readonly completionPromise: Promise<ProviderResult>;
|
|
1376
|
+
private _sessionId: string | undefined;
|
|
1377
|
+
|
|
1378
|
+
constructor(config: ProviderSessionConfig, skillsDir: string | undefined) {
|
|
1379
|
+
const argv = resolveCodexRunnerArgv();
|
|
1380
|
+
const payload: CodexSubprocessInput = {
|
|
1381
|
+
config,
|
|
1382
|
+
skillsDir,
|
|
1383
|
+
// Capture the parent's OTel TRACEPARENT here, in the span context the
|
|
1384
|
+
// runner established. The subprocess can't reconstruct it on its own
|
|
1385
|
+
// since its OTel tracer doesn't share the parent's active-span state.
|
|
1386
|
+
parentOtelEnv: buildOtelTraceparentEnv(config.env ?? process.env),
|
|
1387
|
+
};
|
|
1388
|
+
|
|
1389
|
+
const apiKey = getApiKey();
|
|
1390
|
+
|
|
1391
|
+
this.proc = Bun.spawn(argv, {
|
|
1392
|
+
// Minimal env: forward what the subprocess needs to talk to the API,
|
|
1393
|
+
// load the codex CLI binary, and read OAuth tokens. config.env (which
|
|
1394
|
+
// already includes the swarm-config overlay) is delivered via stdin
|
|
1395
|
+
// — NOT here — so we don't repeat the same string in two places.
|
|
1396
|
+
env: {
|
|
1397
|
+
PATH: process.env.PATH ?? "",
|
|
1398
|
+
HOME: process.env.HOME ?? "",
|
|
1399
|
+
...(process.env.NODE_EXTRA_CA_CERTS
|
|
1400
|
+
? { NODE_EXTRA_CA_CERTS: process.env.NODE_EXTRA_CA_CERTS }
|
|
1401
|
+
: {}),
|
|
1402
|
+
...(process.env.MCP_BASE_URL ? { MCP_BASE_URL: process.env.MCP_BASE_URL } : {}),
|
|
1403
|
+
...(apiKey ? { AGENT_SWARM_API_KEY: apiKey, API_KEY: apiKey } : {}),
|
|
1404
|
+
// Embedding / summarization paths read these:
|
|
1405
|
+
...(process.env.OPENAI_API_KEY ? { OPENAI_API_KEY: process.env.OPENAI_API_KEY } : {}),
|
|
1406
|
+
...(process.env.OPENROUTER_API_KEY
|
|
1407
|
+
? { OPENROUTER_API_KEY: process.env.OPENROUTER_API_KEY }
|
|
1408
|
+
: {}),
|
|
1409
|
+
...(process.env.ANTHROPIC_API_KEY
|
|
1410
|
+
? { ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY }
|
|
1411
|
+
: {}),
|
|
1412
|
+
...(process.env.CODEX_PATH_OVERRIDE
|
|
1413
|
+
? { CODEX_PATH_OVERRIDE: process.env.CODEX_PATH_OVERRIDE }
|
|
1414
|
+
: {}),
|
|
1415
|
+
...(process.env.CODEX_SKILLS_DIR ? { CODEX_SKILLS_DIR: process.env.CODEX_SKILLS_DIR } : {}),
|
|
1416
|
+
...(process.env.SKIP_SESSION_SUMMARY
|
|
1417
|
+
? { SKIP_SESSION_SUMMARY: process.env.SKIP_SESSION_SUMMARY }
|
|
1418
|
+
: {}),
|
|
1419
|
+
...(process.env.MEMORY_RATERS ? { MEMORY_RATERS: process.env.MEMORY_RATERS } : {}),
|
|
1420
|
+
},
|
|
1421
|
+
stdin: "pipe",
|
|
1422
|
+
stdout: "pipe",
|
|
1423
|
+
stderr: "pipe",
|
|
1424
|
+
});
|
|
1425
|
+
|
|
1426
|
+
// `Bun.spawn`'s `stdin` is typed as `number | FileSink`; with `stdin:
|
|
1427
|
+
// "pipe"` it is always a FileSink. Narrow via assertion.
|
|
1428
|
+
const stdin = this.proc.stdin as { write(s: string): void; end(): void };
|
|
1429
|
+
stdin.write(JSON.stringify(payload));
|
|
1430
|
+
stdin.end();
|
|
1431
|
+
|
|
1432
|
+
this.completionPromise = this.processStreams();
|
|
1433
|
+
}
|
|
1434
|
+
|
|
1435
|
+
get sessionId(): string | undefined {
|
|
1436
|
+
return this._sessionId;
|
|
1437
|
+
}
|
|
1438
|
+
|
|
1439
|
+
onEvent(listener: (event: ProviderEvent) => void): void {
|
|
1440
|
+
this.listeners.push(listener);
|
|
1441
|
+
for (const event of this.eventQueue) {
|
|
1442
|
+
listener(event);
|
|
1443
|
+
}
|
|
1444
|
+
this.eventQueue.length = 0;
|
|
1445
|
+
}
|
|
1446
|
+
|
|
1447
|
+
async waitForCompletion(): Promise<ProviderResult> {
|
|
1448
|
+
return this.completionPromise;
|
|
1449
|
+
}
|
|
1450
|
+
|
|
1451
|
+
async abort(): Promise<void> {
|
|
1452
|
+
this.proc.kill("SIGTERM");
|
|
1453
|
+
}
|
|
1454
|
+
|
|
1455
|
+
private emit(event: ProviderEvent): void {
|
|
1456
|
+
if (event.type === "session_init" && event.sessionId) {
|
|
1457
|
+
this._sessionId = event.sessionId;
|
|
1458
|
+
}
|
|
1459
|
+
if (this.listeners.length > 0) {
|
|
1460
|
+
for (const listener of this.listeners) {
|
|
1461
|
+
try {
|
|
1462
|
+
listener(event);
|
|
1463
|
+
} catch {
|
|
1464
|
+
// listener errors must not break the event stream
|
|
1465
|
+
}
|
|
1466
|
+
}
|
|
1467
|
+
} else {
|
|
1468
|
+
this.eventQueue.push(event);
|
|
1469
|
+
}
|
|
1470
|
+
}
|
|
1471
|
+
|
|
1472
|
+
private async processStreams(): Promise<ProviderResult> {
|
|
1473
|
+
let result: ProviderResult | null = null;
|
|
1474
|
+
let partial = "";
|
|
1475
|
+
let stderrTail = "";
|
|
1476
|
+
|
|
1477
|
+
const stdoutPromise = (async () => {
|
|
1478
|
+
const stdout = this.proc.stdout as ReadableStream<Uint8Array> | null;
|
|
1479
|
+
if (!stdout) return;
|
|
1480
|
+
for await (const chunk of stdout) {
|
|
1481
|
+
partial += new TextDecoder().decode(chunk);
|
|
1482
|
+
const parts = partial.split("\n");
|
|
1483
|
+
partial = parts.pop() ?? "";
|
|
1484
|
+
for (const line of parts) {
|
|
1485
|
+
const trimmed = line.trim();
|
|
1486
|
+
if (!trimmed) continue;
|
|
1487
|
+
this.handleLine(trimmed, (r) => {
|
|
1488
|
+
result = r;
|
|
1489
|
+
});
|
|
1490
|
+
}
|
|
1491
|
+
}
|
|
1492
|
+
if (partial.trim()) {
|
|
1493
|
+
this.handleLine(partial.trim(), (r) => {
|
|
1494
|
+
result = r;
|
|
1495
|
+
});
|
|
1496
|
+
partial = "";
|
|
1497
|
+
}
|
|
1498
|
+
})();
|
|
1499
|
+
|
|
1500
|
+
const stderrPromise = (async () => {
|
|
1501
|
+
const stderr = this.proc.stderr as ReadableStream<Uint8Array> | null;
|
|
1502
|
+
if (!stderr) return;
|
|
1503
|
+
for await (const chunk of stderr) {
|
|
1504
|
+
const text = new TextDecoder().decode(chunk);
|
|
1505
|
+
stderrTail = (stderrTail + text).slice(-2000);
|
|
1506
|
+
// Surface subprocess stderr (codex CLI warnings, auth.json
|
|
1507
|
+
// restoration messages) into the parent's event stream so it lands
|
|
1508
|
+
// in /workspace/logs/*.jsonl the way the in-process path did.
|
|
1509
|
+
this.emit({ type: "raw_stderr", content: text });
|
|
1510
|
+
}
|
|
1511
|
+
})();
|
|
1512
|
+
|
|
1513
|
+
await Promise.all([stdoutPromise, stderrPromise]);
|
|
1514
|
+
const exitCode = await this.proc.exited;
|
|
1515
|
+
|
|
1516
|
+
if (result) {
|
|
1517
|
+
return result;
|
|
1518
|
+
}
|
|
1519
|
+
// Subprocess exited before sending a structured result — synthesise one
|
|
1520
|
+
// so the runner doesn't hang on waitForCompletion. Include stderr tail
|
|
1521
|
+
// so the actual error message reaches the task failure reason.
|
|
1522
|
+
const stderrHint = stderrTail.trim() ? ` — stderr: ${stderrTail.trim().slice(-500)}` : "";
|
|
1523
|
+
return {
|
|
1524
|
+
exitCode: exitCode ?? 1,
|
|
1525
|
+
sessionId: this._sessionId,
|
|
1526
|
+
isError: true,
|
|
1527
|
+
failureReason: `codex subprocess exited (code=${exitCode ?? "?"}) without a structured result${stderrHint}`,
|
|
1528
|
+
};
|
|
1529
|
+
}
|
|
1530
|
+
|
|
1531
|
+
private handleLine(line: string, setResult: (r: ProviderResult) => void): void {
|
|
1532
|
+
let msg: { kind?: string; event?: ProviderEvent; result?: ProviderResult; message?: string };
|
|
1533
|
+
try {
|
|
1534
|
+
msg = JSON.parse(line);
|
|
1535
|
+
} catch {
|
|
1536
|
+
// Not a valid JSON envelope — treat as raw stderr-equivalent.
|
|
1537
|
+
this.emit({ type: "raw_stderr", content: `${line}\n` });
|
|
1538
|
+
return;
|
|
1539
|
+
}
|
|
1540
|
+
if (msg.kind === "event" && msg.event) {
|
|
1541
|
+
this.emit(msg.event);
|
|
1542
|
+
return;
|
|
1543
|
+
}
|
|
1544
|
+
if (msg.kind === "result" && msg.result) {
|
|
1545
|
+
setResult(msg.result);
|
|
1546
|
+
return;
|
|
1547
|
+
}
|
|
1548
|
+
if (msg.kind === "error" && msg.message) {
|
|
1549
|
+
this.emit({ type: "error", message: msg.message });
|
|
1550
|
+
setResult({
|
|
1551
|
+
exitCode: 1,
|
|
1552
|
+
sessionId: this._sessionId,
|
|
1553
|
+
isError: true,
|
|
1554
|
+
failureReason: msg.message,
|
|
1555
|
+
});
|
|
1556
|
+
return;
|
|
1557
|
+
}
|
|
1558
|
+
}
|
|
1559
|
+
}
|
|
1560
|
+
|
|
1174
1561
|
export class CodexAdapter implements ProviderAdapter {
|
|
1175
1562
|
readonly name = "codex";
|
|
1176
1563
|
readonly traits = { hasMcp: true, hasLocalEnvironment: true };
|
|
@@ -1191,108 +1578,41 @@ export class CodexAdapter implements ProviderAdapter {
|
|
|
1191
1578
|
*/
|
|
1192
1579
|
private readonly summarizeDeps: SummarizeSessionForCodexDeps;
|
|
1193
1580
|
|
|
1194
|
-
|
|
1581
|
+
/**
|
|
1582
|
+
* When true, run the codex session inside the runner process (no subprocess
|
|
1583
|
+
* spawn). Used by:
|
|
1584
|
+
* - Unit tests that monkey-patch `Codex.prototype.startThread` (the patch
|
|
1585
|
+
* would not survive a subprocess boundary).
|
|
1586
|
+
* - The spawned `codex-session-runner` subprocess itself, to avoid
|
|
1587
|
+
* re-spawning recursively.
|
|
1588
|
+
*
|
|
1589
|
+
* Production callers leave this `false`. Each codex session then runs in a
|
|
1590
|
+
* fresh subprocess and its heap dies when the task completes — keeping the
|
|
1591
|
+
* runner's VSZ bounded across thousands of task completions (Picateclas
|
|
1592
|
+
* spawn-OOM permanent fix, 2026-05-28).
|
|
1593
|
+
*/
|
|
1594
|
+
private readonly bypassSubprocess: boolean;
|
|
1595
|
+
|
|
1596
|
+
constructor(
|
|
1597
|
+
opts: {
|
|
1598
|
+
skillsDir?: string;
|
|
1599
|
+
summarizeDeps?: SummarizeSessionForCodexDeps;
|
|
1600
|
+
bypassSubprocess?: boolean;
|
|
1601
|
+
} = {},
|
|
1602
|
+
) {
|
|
1195
1603
|
this.skillsDir = opts.skillsDir;
|
|
1196
1604
|
this.summarizeDeps = opts.summarizeDeps ?? {};
|
|
1605
|
+
this.bypassSubprocess = opts.bypassSubprocess ?? false;
|
|
1197
1606
|
}
|
|
1198
1607
|
|
|
1199
1608
|
async createSession(config: ProviderSessionConfig): Promise<ProviderSession> {
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
try {
|
|
1205
|
-
// Resolve the model once and thread it through. Claude shortnames map
|
|
1206
|
-
// to Codex equivalents; everything else passes through verbatim — the
|
|
1207
|
-
// SDK is the source of truth for what's valid.
|
|
1208
|
-
const resolvedModel = resolveCodexModel(config.model);
|
|
1209
|
-
|
|
1210
|
-
// Buffer warnings emitted during config-building so they're not lost
|
|
1211
|
-
// before `CodexSession.onEvent` attaches a listener. The buffer is
|
|
1212
|
-
// replayed into the session's event stream right after construction
|
|
1213
|
-
// via the `initialEvents` constructor parameter.
|
|
1214
|
-
const preSessionEvents: ProviderEvent[] = [];
|
|
1215
|
-
const bufferedEmit = (event: ProviderEvent) => {
|
|
1216
|
-
preSessionEvents.push(event);
|
|
1217
|
-
};
|
|
1218
|
-
|
|
1219
|
-
const mergedConfig = await buildCodexConfig(config, resolvedModel, bufferedEmit);
|
|
1220
|
-
|
|
1221
|
-
// Auth resolution. `codex_oauth` (in the swarm config store) wins over
|
|
1222
|
-
// `OPENAI_API_KEY` so users can keep an OpenAI key set for embeddings
|
|
1223
|
-
// without it shadowing their ChatGPT login. The entrypoint already runs
|
|
1224
|
-
// this same precedence at boot — this block handles local dev (where
|
|
1225
|
-
// the entrypoint didn't run) and any case where auth.json is stale.
|
|
1226
|
-
const authMode = await resolveCodexAuthMode(config, bufferedEmit);
|
|
1227
|
-
|
|
1228
|
-
// `CodexOptions.env` does NOT inherit from `process.env`. Construct a
|
|
1229
|
-
// minimal env explicitly so the spawned Codex CLI can find its binary
|
|
1230
|
-
// (PATH) and HOME (for ~/.codex/auth.json). `OPENAI_API_KEY` is only
|
|
1231
|
-
// forwarded when auth.json is NOT in chatgpt mode — otherwise it would
|
|
1232
|
-
// override the OAuth login at the Codex CLI layer.
|
|
1233
|
-
const env: Record<string, string> = {
|
|
1234
|
-
PATH: process.env.PATH ?? "",
|
|
1235
|
-
HOME: process.env.HOME ?? "",
|
|
1236
|
-
...(authMode !== "chatgpt" && process.env.OPENAI_API_KEY
|
|
1237
|
-
? { OPENAI_API_KEY: process.env.OPENAI_API_KEY }
|
|
1238
|
-
: {}),
|
|
1239
|
-
...(process.env.NODE_EXTRA_CA_CERTS
|
|
1240
|
-
? { NODE_EXTRA_CA_CERTS: process.env.NODE_EXTRA_CA_CERTS }
|
|
1241
|
-
: {}),
|
|
1242
|
-
...(config.env ?? {}),
|
|
1243
|
-
// Gated cross-service OTel linking: when SWARM_ENABLE_HARNESS_OTEL (or
|
|
1244
|
-
// the deprecated SWARM_ENABLE_CLAUDE_CODE_OTEL alias) is on, inject
|
|
1245
|
-
// TRACEPARENT from the active worker span so Codex's spans nest under
|
|
1246
|
-
// our worker.session trace. Codex's Rust OTEL SDK reads W3C trace
|
|
1247
|
-
// context from the env via the default tracecontext propagator.
|
|
1248
|
-
// Returns {} (no-op) when off; spread last so the computed value wins.
|
|
1249
|
-
...buildOtelTraceparentEnv(config.env ?? process.env),
|
|
1250
|
-
};
|
|
1251
|
-
|
|
1252
|
-
// The SDK's default `findCodexPath()` does `require.resolve("@openai/codex")`
|
|
1253
|
-
// from the SDK's own module. When agent-swarm runs as a Bun single-file
|
|
1254
|
-
// compiled executable, the bundled SDK can't resolve `@openai/codex` at
|
|
1255
|
-
// runtime because it's not part of the bundle — it lives in a global
|
|
1256
|
-
// install (`/usr/lib/node_modules/@openai/codex` in the Docker worker
|
|
1257
|
-
// image). Honor `CODEX_PATH_OVERRIDE` so Docker can point us at the CLI
|
|
1258
|
-
// wrapper (or native binary) directly. Fall back to undefined so local
|
|
1259
|
-
// dev with `@openai/codex-sdk` installed as a regular node_modules
|
|
1260
|
-
// dependency keeps working via the SDK's own resolver.
|
|
1261
|
-
const codexPathOverride = process.env.CODEX_PATH_OVERRIDE;
|
|
1262
|
-
|
|
1263
|
-
const codex = new Codex({
|
|
1264
|
-
...(codexPathOverride ? { codexPathOverride } : {}),
|
|
1265
|
-
env,
|
|
1266
|
-
config: mergedConfig,
|
|
1609
|
+
if (this.bypassSubprocess) {
|
|
1610
|
+
return createInProcessCodexSession(config, {
|
|
1611
|
+
skillsDir: this.skillsDir,
|
|
1612
|
+
summarizeDeps: this.summarizeDeps,
|
|
1267
1613
|
});
|
|
1268
|
-
|
|
1269
|
-
const threadOptions: ThreadOptions = {
|
|
1270
|
-
workingDirectory: config.cwd,
|
|
1271
|
-
skipGitRepoCheck: true,
|
|
1272
|
-
sandboxMode: "danger-full-access",
|
|
1273
|
-
approvalPolicy: "never",
|
|
1274
|
-
model: resolvedModel,
|
|
1275
|
-
};
|
|
1276
|
-
|
|
1277
|
-
const thread = config.resumeSessionId
|
|
1278
|
-
? codex.resumeThread(config.resumeSessionId, threadOptions)
|
|
1279
|
-
: codex.startThread(threadOptions);
|
|
1280
|
-
|
|
1281
|
-
return new CodexSession(
|
|
1282
|
-
thread,
|
|
1283
|
-
config,
|
|
1284
|
-
agentsMdHandle,
|
|
1285
|
-
resolvedModel,
|
|
1286
|
-
preSessionEvents,
|
|
1287
|
-
this.skillsDir,
|
|
1288
|
-
this.summarizeDeps,
|
|
1289
|
-
);
|
|
1290
|
-
} catch (err) {
|
|
1291
|
-
// If we failed to construct the thread, clean up the managed AGENTS.md
|
|
1292
|
-
// block so we don't leak state on the filesystem.
|
|
1293
|
-
await agentsMdHandle.cleanup();
|
|
1294
|
-
throw err;
|
|
1295
1614
|
}
|
|
1615
|
+
return new CodexSubprocessSession(config, this.skillsDir);
|
|
1296
1616
|
}
|
|
1297
1617
|
|
|
1298
1618
|
async canResume(sessionId: string): Promise<boolean> {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Codex API-addressable models, verified from https://developers.openai.com/codex/models
|
|
3
|
-
* and https://developers.openai.com/api/docs/deprecations as of 2026-
|
|
3
|
+
* and https://developers.openai.com/api/docs/deprecations as of 2026-05-28.
|
|
4
4
|
*
|
|
5
5
|
* NOTE: `gpt-5.3-codex-spark` is intentionally excluded. It is a ChatGPT Pro
|
|
6
6
|
* research preview and is NOT API-addressable via the Codex SDK at launch.
|
|
@@ -18,6 +18,7 @@
|
|
|
18
18
|
* SDK, so new OpenAI models work without a code change.
|
|
19
19
|
*/
|
|
20
20
|
export const CODEX_MODELS = [
|
|
21
|
+
"gpt-5.5", // newest frontier coding/professional-work model, 1.05M context
|
|
21
22
|
"gpt-5.4", // default — mainline reasoning model w/ frontier coding
|
|
22
23
|
"gpt-5.4-mini", // faster/cheaper
|
|
23
24
|
"gpt-5.3-codex", // coding-specialized, 1M context
|
|
@@ -63,6 +64,7 @@ export function resolveCodexModel(modelStr: string | undefined): string {
|
|
|
63
64
|
* Update this map whenever a model's context window changes.
|
|
64
65
|
*/
|
|
65
66
|
export const CODEX_MODEL_CONTEXT_WINDOWS: Record<CodexModel, number> = {
|
|
67
|
+
"gpt-5.5": 1_050_000,
|
|
66
68
|
"gpt-5.4": 200_000,
|
|
67
69
|
"gpt-5.4-mini": 200_000,
|
|
68
70
|
"gpt-5.3-codex": 1_000_000, // 1M context per plan Key Discoveries
|
|
@@ -80,7 +82,7 @@ export function getCodexContextWindow(model: string): number {
|
|
|
80
82
|
|
|
81
83
|
/**
|
|
82
84
|
* Per-model pricing in USD per million tokens, sourced from
|
|
83
|
-
* https://developers.openai.com/api/docs/pricing on 2026-
|
|
85
|
+
* https://developers.openai.com/api/docs/pricing on 2026-05-28 (Standard tier,
|
|
84
86
|
* short-context column — long-context multipliers and Batch / Flex / Priority
|
|
85
87
|
* tiers exist but the Codex SDK does not expose which tier was used so we
|
|
86
88
|
* default to the headline rate).
|
|
@@ -103,6 +105,11 @@ export interface CodexModelPricing {
|
|
|
103
105
|
}
|
|
104
106
|
|
|
105
107
|
export const CODEX_MODEL_PRICING: Record<CodexModel, CodexModelPricing> = {
|
|
108
|
+
"gpt-5.5": {
|
|
109
|
+
inputPerMillion: 5.0,
|
|
110
|
+
cachedInputPerMillion: 0.5,
|
|
111
|
+
outputPerMillion: 30.0,
|
|
112
|
+
},
|
|
106
113
|
"gpt-5.4": {
|
|
107
114
|
inputPerMillion: 2.5,
|
|
108
115
|
cachedInputPerMillion: 0.25,
|
package/src/providers/index.ts
CHANGED
|
@@ -1,8 +1,3 @@
|
|
|
1
|
-
export {
|
|
2
|
-
checkProviderCredentials,
|
|
3
|
-
REQUIRED_CRED_VARS_BY_PROVIDER,
|
|
4
|
-
type SupportedProvider,
|
|
5
|
-
} from "../commands/provider-credentials";
|
|
6
1
|
export type {
|
|
7
2
|
CostData,
|
|
8
3
|
CredCheckOptions,
|
|
@@ -15,29 +10,43 @@ export type {
|
|
|
15
10
|
ProviderTraits,
|
|
16
11
|
} from "./types";
|
|
17
12
|
|
|
18
|
-
import { ClaudeAdapter } from "./claude-adapter";
|
|
19
|
-
import { ClaudeManagedAdapter } from "./claude-managed-adapter";
|
|
20
|
-
import { CodexAdapter } from "./codex-adapter";
|
|
21
|
-
import { DevinAdapter } from "./devin-adapter";
|
|
22
|
-
import { OpencodeAdapter } from "./opencode-adapter";
|
|
23
|
-
import { PiMonoAdapter } from "./pi-mono-adapter";
|
|
24
13
|
import type { ProviderAdapter } from "./types";
|
|
25
14
|
|
|
26
|
-
/**
|
|
27
|
-
|
|
15
|
+
/**
|
|
16
|
+
* Create a provider adapter for the given harness provider name.
|
|
17
|
+
*
|
|
18
|
+
* Adapter modules are loaded via dynamic `import()` so their transitive
|
|
19
|
+
* dependencies (e.g. `@earendil-works/pi-coding-agent` for the pi adapter)
|
|
20
|
+
* are NOT evaluated at binary startup. This prevents module-level side
|
|
21
|
+
* effects in third-party SDKs from crashing subcommands that don't need
|
|
22
|
+
* them (the codex-session-runner ENOENT at `/usr/local/bin/package.json`).
|
|
23
|
+
*/
|
|
24
|
+
export async function createProviderAdapter(provider: string): Promise<ProviderAdapter> {
|
|
28
25
|
switch (provider) {
|
|
29
|
-
case "claude":
|
|
26
|
+
case "claude": {
|
|
27
|
+
const { ClaudeAdapter } = await import("./claude-adapter");
|
|
30
28
|
return new ClaudeAdapter();
|
|
31
|
-
|
|
29
|
+
}
|
|
30
|
+
case "pi": {
|
|
31
|
+
const { PiMonoAdapter } = await import("./pi-mono-adapter");
|
|
32
32
|
return new PiMonoAdapter();
|
|
33
|
-
|
|
33
|
+
}
|
|
34
|
+
case "codex": {
|
|
35
|
+
const { CodexAdapter } = await import("./codex-adapter");
|
|
34
36
|
return new CodexAdapter();
|
|
35
|
-
|
|
37
|
+
}
|
|
38
|
+
case "claude-managed": {
|
|
39
|
+
const { ClaudeManagedAdapter } = await import("./claude-managed-adapter");
|
|
36
40
|
return new ClaudeManagedAdapter();
|
|
37
|
-
|
|
41
|
+
}
|
|
42
|
+
case "devin": {
|
|
43
|
+
const { DevinAdapter } = await import("./devin-adapter");
|
|
38
44
|
return new DevinAdapter();
|
|
39
|
-
|
|
45
|
+
}
|
|
46
|
+
case "opencode": {
|
|
47
|
+
const { OpencodeAdapter } = await import("./opencode-adapter");
|
|
40
48
|
return new OpencodeAdapter();
|
|
49
|
+
}
|
|
41
50
|
default:
|
|
42
51
|
throw new Error(
|
|
43
52
|
`Unknown HARNESS_PROVIDER: "${provider}". Supported: claude, pi, codex, devin, claude-managed, opencode`,
|