@desplega.ai/agent-swarm 1.85.0 → 1.86.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/README.md +1 -0
  2. package/openapi.json +1 -1
  3. package/package.json +8 -6
  4. package/src/be/db.ts +44 -0
  5. package/src/be/migrations/078_backfill_gpt_5_5_pricing.sql +15 -0
  6. package/src/be/modelsdev-cache.json +152028 -0
  7. package/src/be/modelsdev-cache.ts +46 -0
  8. package/src/be/seed-pricing.ts +7 -44
  9. package/src/cli.tsx +12 -2
  10. package/src/commands/codex-session-runner.ts +132 -0
  11. package/src/commands/credential-wait.ts +2 -2
  12. package/src/commands/provider-credentials.ts +10 -5
  13. package/src/commands/runner.ts +3 -3
  14. package/src/prompts/base-prompt.ts +49 -3
  15. package/src/providers/claude-adapter.ts +83 -2
  16. package/src/providers/claude-managed-models.ts +18 -2
  17. package/src/providers/codex-adapter.ts +417 -97
  18. package/src/providers/codex-models.ts +9 -2
  19. package/src/providers/index.ts +28 -19
  20. package/src/providers/pricing-sources.md +7 -4
  21. package/src/providers/swarm-events-shared.ts +14 -0
  22. package/src/slack/HEURISTICS.md +5 -1
  23. package/src/slack/handlers.test.ts +35 -0
  24. package/src/slack/handlers.ts +79 -2
  25. package/src/tests/base-prompt.test.ts +46 -8
  26. package/src/tests/claude-managed-adapter.test.ts +4 -4
  27. package/src/tests/codex-adapter-otel.test.ts +4 -4
  28. package/src/tests/codex-adapter.test.ts +20 -7
  29. package/src/tests/codex-swarm-events.test.ts +35 -0
  30. package/src/tests/context-window.test.ts +1 -0
  31. package/src/tests/credential-check.test.ts +48 -29
  32. package/src/tests/entrypoint-config-env-export.test.ts +81 -0
  33. package/src/tests/follow-up-redelivery-guard.test.ts +165 -0
  34. package/src/tests/migration-046-budgets.test.ts +6 -5
  35. package/src/tests/pricing-routes.test.ts +6 -5
  36. package/src/tests/provider-adapter.test.ts +10 -10
  37. package/src/tests/provider-command-format.test.ts +4 -4
  38. package/src/tests/session-costs-codex-recompute.test.ts +25 -0
  39. package/src/tools/send-task.ts +30 -9
  40. package/src/utils/context-window.ts +1 -0
  41. package/templates/schedules/daily-blocker-digest/config.json +13 -0
  42. package/templates/schedules/daily-blocker-digest/content.md +150 -0
  43. package/templates/schedules/daily-compounding-reflection/config.json +21 -0
  44. package/templates/schedules/daily-compounding-reflection/content.md +210 -0
  45. package/templates/schedules/daily-hn-briefing/config.json +13 -0
  46. package/templates/schedules/daily-hn-briefing/content.md +97 -0
  47. package/templates/schedules/daily-workflow-health-audit/config.json +13 -0
  48. package/templates/schedules/daily-workflow-health-audit/content.md +189 -0
  49. package/templates/schedules/gtm-weekly-review/config.json +13 -0
  50. package/templates/schedules/gtm-weekly-review/content.md +58 -0
  51. package/templates/schedules/weekly-dependabot-triage/config.json +13 -0
  52. package/templates/schedules/weekly-dependabot-triage/content.md +45 -0
  53. package/templates/schema.ts +26 -0
  54. package/templates/skills/agentmail-sending/config.json +13 -0
  55. package/templates/skills/agentmail-sending/content.md +48 -0
  56. package/templates/skills/artifacts/config.json +13 -0
  57. package/templates/skills/artifacts/content.md +87 -0
  58. package/templates/skills/browser-use-cloud/config.json +13 -0
  59. package/templates/skills/browser-use-cloud/content.md +155 -0
  60. package/templates/skills/desloppify/config.json +13 -0
  61. package/templates/skills/desloppify/content.md +201 -0
  62. package/templates/skills/exa-search/config.json +13 -0
  63. package/templates/skills/exa-search/content.md +106 -0
  64. package/templates/skills/jira-interaction/config.json +13 -0
  65. package/templates/skills/jira-interaction/content.md +252 -0
  66. package/templates/skills/kapso-whatsapp/config.json +13 -0
  67. package/templates/skills/kapso-whatsapp/content.md +369 -0
  68. package/templates/skills/kv-storage/config.json +13 -0
  69. package/templates/skills/kv-storage/content.md +111 -0
  70. package/templates/skills/linear-interaction/config.json +20 -0
  71. package/templates/skills/linear-interaction/content.md +230 -0
  72. package/templates/skills/pages/config.json +18 -0
  73. package/templates/skills/pages/content.md +85 -0
  74. package/templates/skills/profile-corruption-escalation/config.json +13 -0
  75. package/templates/skills/profile-corruption-escalation/content.md +105 -0
  76. package/templates/skills/scheduled-task-resilience/config.json +13 -0
  77. package/templates/skills/scheduled-task-resilience/content.md +95 -0
  78. package/templates/skills/sprite-cli/config.json +13 -0
  79. package/templates/skills/sprite-cli/content.md +133 -0
  80. package/templates/skills/turso-interaction/config.json +13 -0
  81. package/templates/skills/turso-interaction/content.md +192 -0
  82. package/templates/skills/workflow-iterate/config.json +18 -0
  83. package/templates/skills/workflow-iterate/content.md +399 -0
  84. package/templates/skills/workflow-structured-output/config.json +13 -0
  85. package/templates/skills/workflow-structured-output/content.md +101 -0
  86. package/templates/skills/x-api-interactions/config.json +13 -0
  87. package/templates/skills/x-api-interactions/content.md +109 -0
  88. package/templates/workflows/autopilot/config.json +13 -0
  89. package/templates/workflows/autopilot/content.md +58 -0
  90. package/templates/workflows/linear-drain-loop/config.json +21 -0
  91. package/templates/workflows/linear-drain-loop/content.md +72 -0
  92. package/templates/workflows/ralph-loop/config.json +13 -0
  93. package/templates/workflows/ralph-loop/content.md +75 -0
@@ -0,0 +1,46 @@
1
+ import { readFileSync } from "node:fs";
2
+ import path from "node:path";
3
+
4
+ export interface ModelsDevCostBlock {
5
+ input?: number;
6
+ output?: number;
7
+ cache_read?: number;
8
+ cache_write?: number;
9
+ }
10
+
11
+ export interface ModelsDevModel {
12
+ id?: string;
13
+ cost?: ModelsDevCostBlock;
14
+ }
15
+
16
+ export interface ModelsDevProvider {
17
+ models?: Record<string, ModelsDevModel>;
18
+ }
19
+
20
+ export type ModelsDevCache = Record<string, ModelsDevProvider>;
21
+
22
+ export const MODELSDEV_CACHE_PATH = path.join("src", "be", "modelsdev-cache.json");
23
+
24
+ /**
25
+ * Resolve the vendored models.dev cache from source checkouts and compiled
26
+ * Docker images. The API image copies the snapshot to `/app/src/be/...`.
27
+ */
28
+ export function loadModelsDevCache(): ModelsDevCache | null {
29
+ const explicitPath = process.env.MODELSDEV_CACHE_PATH;
30
+ const candidates = [
31
+ ...(explicitPath ? [explicitPath] : []),
32
+ path.join(process.cwd(), MODELSDEV_CACHE_PATH),
33
+ path.join(process.cwd(), "..", MODELSDEV_CACHE_PATH),
34
+ path.join("/app", MODELSDEV_CACHE_PATH),
35
+ ];
36
+
37
+ for (const candidate of candidates) {
38
+ try {
39
+ return JSON.parse(readFileSync(candidate, "utf-8")) as ModelsDevCache;
40
+ } catch {
41
+ // try next candidate
42
+ }
43
+ }
44
+
45
+ return null;
46
+ }
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Phase 2 of the cost-tracking plan — seed the `pricing` table at server boot.
3
3
  *
4
- * The vendored models.dev snapshot at `ui/src/lib/modelsdev-cache.json` is the
4
+ * The vendored models.dev snapshot at `src/be/modelsdev-cache.json` is the
5
5
  * single source of truth for per-token rates. We project it into rows keyed by
6
6
  * `(provider, model, token_class)` so the recompute path in
7
7
  * `src/http/session-data.ts` can rebuild USD from tokens regardless of which
@@ -17,30 +17,15 @@
17
17
  * admin route (`POST /api/pricing`) — we don't overwrite seed rows.
18
18
  */
19
19
 
20
- import { readFileSync } from "node:fs";
21
- import path from "node:path";
22
20
  import type { PricingProvider, PricingTokenClass } from "../types";
23
21
  import { getDb } from "./db";
22
+ import {
23
+ loadModelsDevCache,
24
+ type ModelsDevCache,
25
+ type ModelsDevCostBlock,
26
+ } from "./modelsdev-cache";
24
27
  import { normalizeModelKey } from "./pricing-normalize";
25
28
 
26
- interface ModelsDevCostBlock {
27
- input?: number;
28
- output?: number;
29
- cache_read?: number;
30
- cache_write?: number;
31
- }
32
-
33
- interface ModelsDevModel {
34
- id?: string;
35
- cost?: ModelsDevCostBlock;
36
- }
37
-
38
- interface ModelsDevProvider {
39
- models?: Record<string, ModelsDevModel>;
40
- }
41
-
42
- type ModelsDevCache = Record<string, ModelsDevProvider>;
43
-
44
29
  /**
45
30
  * Per-harness manual rates that models.dev doesn't carry. Keep the source URL
46
31
  * and a verification date next to each entry so {@link MANUAL_PRICING_OVERRIDES}
@@ -82,33 +67,11 @@ const MANUAL_PRICING_OVERRIDES: Array<{
82
67
  * fields the models.dev snapshot doesn't index directly; we map them here.
83
68
  */
84
69
  const ANTHROPIC_SHORTNAME_TO_MODELSDEV: Record<string, string> = {
85
- opus: "claude-opus-4-7",
70
+ opus: "claude-opus-4-8",
86
71
  sonnet: "claude-sonnet-4-6",
87
72
  haiku: "claude-haiku-4-5",
88
73
  };
89
74
 
90
- /**
91
- * Resolve the path to the vendored models.dev cache. The UI copy is canonical.
92
- * We treat this as best-effort: if the file is missing (developer ran the
93
- * server without `ui/` checked out), we log and continue with manual rates
94
- * only — better than crashing the boot.
95
- */
96
- function loadModelsDevCache(): ModelsDevCache | null {
97
- const candidates = [
98
- path.join(process.cwd(), "ui", "src", "lib", "modelsdev-cache.json"),
99
- path.join(process.cwd(), "..", "ui", "src", "lib", "modelsdev-cache.json"),
100
- ];
101
- for (const cand of candidates) {
102
- try {
103
- const raw = readFileSync(cand, "utf-8");
104
- return JSON.parse(raw) as ModelsDevCache;
105
- } catch {
106
- // try next candidate
107
- }
108
- }
109
- return null;
110
- }
111
-
112
75
  interface PricingSeedRow {
113
76
  provider: PricingProvider;
114
77
  model: string;
package/src/cli.tsx CHANGED
@@ -16,8 +16,12 @@ import { getApiKey, setApiKey } from "./utils/api-key.ts";
16
16
  // Get CLI name from bin field (assumes single key)
17
17
  const binName = Object.keys(pkg.bin)[0];
18
18
 
19
- // Restore cursor on exit
20
- const restoreCursor = () => process.stdout.write("\x1B[?25h");
19
+ // Restore cursor on exit — only when stdout is a TTY. Non-TTY invocations
20
+ // (like the codex-session-runner subprocess whose stdout is a JSON pipe)
21
+ // must not inject terminal escape sequences into the byte stream.
22
+ const restoreCursor = () => {
23
+ if (process.stdout.isTTY) process.stdout.write("\x1B[?25h");
24
+ };
21
25
  process.on("exit", restoreCursor);
22
26
  process.on("SIGINT", () => {
23
27
  restoreCursor();
@@ -570,6 +574,12 @@ if (args.showHelp || args.command === "help" || args.command === undefined) {
570
574
  const { runCodexLogin } = await import("./commands/codex-login");
571
575
  const codexLoginArgs = process.argv.slice(process.argv.indexOf("codex-login") + 1);
572
576
  await runCodexLogin(codexLoginArgs);
577
+ } else if (args.command === "codex-session-runner") {
578
+ // Internal subcommand — invoked by CodexSubprocessSession to host a single
579
+ // codex session in a throwaway subprocess. See src/commands/codex-session-runner.ts
580
+ // for the rationale (Picateclas spawn-OOM permanent fix, 2026-05-28).
581
+ const { runCodexSessionRunner } = await import("./commands/codex-session-runner");
582
+ await runCodexSessionRunner();
573
583
  } else if (args.command === "claude-managed-setup") {
574
584
  const { runClaudeManagedSetup } = await import("./commands/claude-managed-setup");
575
585
  const setupArgs = process.argv.slice(process.argv.indexOf("claude-managed-setup") + 1);
@@ -0,0 +1,132 @@
1
+ /**
2
+ * Codex session subprocess runner.
3
+ *
4
+ * Entry point for the `codex-session-runner` CLI subcommand. Reads a
5
+ * `CodexSubprocessInput` payload from stdin, drives a fresh in-process
6
+ * `CodexSession`, and pipes the session's `ProviderEvent` stream + final
7
+ * `ProviderResult` back to its parent over stdout as line-delimited JSON.
8
+ *
9
+ * Why this exists: the previous architecture ran every codex session
10
+ * directly inside the long-lived worker runner. The `@openai/codex-sdk`
11
+ * leaks SDK state (parsers, transcript buffers, JSON-RPC plumbing) into
12
+ * the runner's heap, and after ~1,500 task completions on a hot worker
13
+ * (Picateclas, 2026-05-28) the runner's VSZ ballooned to 74 GB / RSS to
14
+ * 7.5 GB, causing every subsequent `fork()` to fail ENOMEM regardless of
15
+ * current RSS (the kernel reserves CoW for the full VSZ at fork time).
16
+ *
17
+ * Moving each session into its own subprocess means the SDK state dies
18
+ * with the subprocess. The runner stays at the ~234 MB baseline observed
19
+ * on Reviewer (the cohort partner that did 481 task completions without
20
+ * the OOM symptom). See task `fa0c0681` for the byte-by-byte breakdown.
21
+ *
22
+ * Wire protocol over stdout (one JSON object per line):
23
+ * {"kind":"event", "event": <ProviderEvent>}
24
+ * {"kind":"result", "result": <ProviderResult>}
25
+ * {"kind":"error", "message": "..."}
26
+ */
27
+
28
+ import { createInProcessCodexSession } from "../providers/codex-adapter";
29
+ import type { ProviderEvent, ProviderResult, ProviderSessionConfig } from "../providers/types";
30
+
31
+ interface CodexSubprocessInput {
32
+ config: ProviderSessionConfig;
33
+ skillsDir?: string;
34
+ parentOtelEnv?: Record<string, string>;
35
+ }
36
+
37
+ async function readAllStdin(): Promise<string> {
38
+ // Bun.stdin is a BunFile in some versions, Web stream in others.
39
+ // The safest path is to read the readable stream directly.
40
+ const decoder = new TextDecoder();
41
+ let out = "";
42
+ const stream = (Bun.stdin as unknown as { stream?: () => ReadableStream<Uint8Array> }).stream
43
+ ? (Bun.stdin as unknown as { stream: () => ReadableStream<Uint8Array> }).stream()
44
+ : null;
45
+ if (stream) {
46
+ const reader = stream.getReader();
47
+ while (true) {
48
+ const { done, value } = await reader.read();
49
+ if (done) break;
50
+ if (value) out += decoder.decode(value, { stream: true });
51
+ }
52
+ out += decoder.decode();
53
+ return out;
54
+ }
55
+ // Fallback: read via Bun.file (file-like access works for piped stdin too)
56
+ return await Bun.file("/dev/stdin").text();
57
+ }
58
+
59
+ function writeLine(obj: unknown): void {
60
+ process.stdout.write(`${JSON.stringify(obj)}\n`);
61
+ }
62
+
63
+ export async function runCodexSessionRunner(): Promise<void> {
64
+ try {
65
+ await runCodexSessionRunnerInner();
66
+ } catch (err) {
67
+ const message = err instanceof Error ? err.message : String(err);
68
+ const stack = err instanceof Error ? err.stack : undefined;
69
+ console.error(`[codex-session-runner] top-level crash: ${message}`);
70
+ if (stack) console.error(stack);
71
+ writeLine({ kind: "error", message: `codex-session-runner: unexpected crash: ${message}` });
72
+ process.exit(1);
73
+ }
74
+ }
75
+
76
+ async function runCodexSessionRunnerInner(): Promise<void> {
77
+ let input: CodexSubprocessInput;
78
+ try {
79
+ const raw = await readAllStdin();
80
+ input = JSON.parse(raw) as CodexSubprocessInput;
81
+ } catch (err) {
82
+ const message = err instanceof Error ? err.message : String(err);
83
+ console.error(`[codex-session-runner] stdin parse failed: ${message}`);
84
+ writeLine({
85
+ kind: "error",
86
+ message: `codex-session-runner: failed to parse stdin: ${message}`,
87
+ });
88
+ process.exit(1);
89
+ }
90
+
91
+ // Forward the parent's captured OTel TRACEPARENT (and friends) into the
92
+ // session config's env so the spawned Codex CLI nests its spans under our
93
+ // worker.session trace. We deliberately do NOT call
94
+ // `buildOtelTraceparentEnv` from inside this subprocess — its tracer has
95
+ // no active span, so it would emit nothing.
96
+ if (input.parentOtelEnv && Object.keys(input.parentOtelEnv).length > 0) {
97
+ input.config.env = { ...(input.config.env ?? {}), ...input.parentOtelEnv };
98
+ }
99
+
100
+ let session: Awaited<ReturnType<typeof createInProcessCodexSession>>;
101
+ try {
102
+ session = await createInProcessCodexSession(input.config, {
103
+ skillsDir: input.skillsDir,
104
+ });
105
+ } catch (err) {
106
+ const message = err instanceof Error ? err.message : String(err);
107
+ console.error(`[codex-session-runner] createSession failed: ${message}`);
108
+ writeLine({ kind: "error", message: `codex-session-runner: createSession failed: ${message}` });
109
+ process.exit(1);
110
+ }
111
+
112
+ // Forward SIGTERM / SIGINT to the in-process session so the runner can
113
+ // gracefully cancel us. The parent `CodexSubprocessSession.abort()` sends
114
+ // SIGTERM here; the session's AbortController catches it and the codex
115
+ // CLI subprocess (a grandchild) gets cleaned up.
116
+ const onSignal = (signal: NodeJS.Signals) => {
117
+ void session.abort().finally(() => {
118
+ // give the session a beat to emit its cancellation result, then exit
119
+ setTimeout(() => process.exit(signal === "SIGINT" ? 130 : 143), 250);
120
+ });
121
+ };
122
+ process.on("SIGTERM", () => onSignal("SIGTERM"));
123
+ process.on("SIGINT", () => onSignal("SIGINT"));
124
+
125
+ session.onEvent((event: ProviderEvent) => {
126
+ writeLine({ kind: "event", event });
127
+ });
128
+
129
+ const result: ProviderResult = await session.waitForCompletion();
130
+ writeLine({ kind: "result", result });
131
+ process.exit(result.exitCode ?? 0);
132
+ }
@@ -133,7 +133,7 @@ export async function awaitCredentials(opts: AwaitCredentialsOptions): Promise<C
133
133
 
134
134
  // Fast path: already satisfied at boot.
135
135
  let currentProvider = readProvider();
136
- let status = checkProviderCredentials(currentProvider, initialEnv, opts.credCheckOptions);
136
+ let status = await checkProviderCredentials(currentProvider, initialEnv, opts.credCheckOptions);
137
137
  if (status.ready) {
138
138
  log(
139
139
  `[boot] credentials ready (provider=${currentProvider}, satisfiedBy=${status.satisfiedBy})`,
@@ -183,7 +183,7 @@ export async function awaitCredentials(opts: AwaitCredentialsOptions): Promise<C
183
183
  currentProvider = nextProvider;
184
184
  }
185
185
 
186
- status = checkProviderCredentials(currentProvider, process.env, opts.credCheckOptions);
186
+ status = await checkProviderCredentials(currentProvider, process.env, opts.credCheckOptions);
187
187
 
188
188
  if (!status.ready) {
189
189
  // Exponential backoff with cap.
@@ -23,7 +23,6 @@ import { checkClaudeManagedCredentials } from "../providers/claude-managed-adapt
23
23
  import { checkCodexCredentials } from "../providers/codex-adapter";
24
24
  import { checkDevinCredentials } from "../providers/devin-adapter";
25
25
  import { checkOpencodeCredentials } from "../providers/opencode-adapter";
26
- import { checkPiMonoCredentials } from "../providers/pi-mono-adapter";
27
26
  import type { CredCheckOptions, CredStatus } from "../providers/types";
28
27
  import type { AgentCredStatus, AgentLatestModel, ProviderName } from "../types";
29
28
  import { scrubSecrets } from "../utils/secret-scrubber";
@@ -55,12 +54,16 @@ export const REQUIRED_CRED_VARS_BY_PROVIDER: Record<SupportedProvider, readonly
55
54
  /**
56
55
  * Run the predicate for `provider`. Unknown providers throw — call sites
57
56
  * should treat that as a configuration bug, not a user-correctable state.
57
+ *
58
+ * The `pi` case uses a dynamic import so `@earendil-works/pi-coding-agent`
59
+ * (which has module-level side effects that crash in the Bun compiled
60
+ * binary) is only loaded when the pi provider is actually selected.
58
61
  */
59
- export function checkProviderCredentials(
62
+ export async function checkProviderCredentials(
60
63
  provider: string,
61
64
  env: Record<string, string | undefined>,
62
65
  opts?: CredCheckOptions,
63
- ): CredStatus {
66
+ ): Promise<CredStatus> {
64
67
  switch (provider) {
65
68
  case "claude":
66
69
  return checkClaudeCredentials(env);
@@ -72,8 +75,10 @@ export function checkProviderCredentials(
72
75
  return checkDevinCredentials(env);
73
76
  case "opencode":
74
77
  return checkOpencodeCredentials(env, opts);
75
- case "pi":
78
+ case "pi": {
79
+ const { checkPiMonoCredentials } = await import("../providers/pi-mono-adapter");
76
80
  return checkPiMonoCredentials(env, opts);
81
+ }
77
82
  default:
78
83
  throw new Error(
79
84
  `checkProviderCredentials: unknown provider "${provider}". Supported: claude, claude-managed, codex, devin, opencode, pi.`,
@@ -386,7 +391,7 @@ export async function buildCredStatusReport(
386
391
  opts: CredCheckOptions = {},
387
392
  kind: AgentCredStatus["reportKind"],
388
393
  ): Promise<AgentCredStatus> {
389
- const presence = checkProviderCredentials(provider, env, opts);
394
+ const presence = await checkProviderCredentials(provider, env, opts);
390
395
  let liveTest: AgentCredStatus["liveTest"] = null;
391
396
  if (presence.ready) {
392
397
  const live = await validateProviderCredentials(provider);
@@ -2182,7 +2182,7 @@ export function implicitCloseActiveToolSpans(
2182
2182
  }
2183
2183
 
2184
2184
  async function spawnProviderProcess(
2185
- adapter: ReturnType<typeof createProviderAdapter>,
2185
+ adapter: Awaited<ReturnType<typeof createProviderAdapter>>,
2186
2186
  opts: {
2187
2187
  prompt: string;
2188
2188
  logFile: string;
@@ -3121,7 +3121,7 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
3121
3121
  // Create provider adapter using the resolved value. `let` so the poll-loop
3122
3122
  // reconciliation block (Section 4) can swap it live when an operator changes
3123
3123
  // HARNESS_PROVIDER in swarm_config — call sites read the current binding.
3124
- let adapter = createProviderAdapter(bootProvider);
3124
+ let adapter = await createProviderAdapter(bootProvider);
3125
3125
 
3126
3126
  // Configure HTTP-based template resolution (workers resolve via API, not local DB)
3127
3127
  if (apiKey) {
@@ -3368,7 +3368,7 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
3368
3368
  const previous = state.harnessProvider;
3369
3369
  console.log(`[${role}] [harness] Reconciling adapter: ${previous} → ${resolvedProvider}`);
3370
3370
  try {
3371
- adapter = createProviderAdapter(resolvedProvider);
3371
+ adapter = await createProviderAdapter(resolvedProvider);
3372
3372
  state.harnessProvider = resolvedProvider;
3373
3373
  basePrompt = await buildSystemPrompt();
3374
3374
  resolvedSystemPrompt = additionalSystemPrompt
@@ -16,8 +16,27 @@ import "./session-templates";
16
16
  /** Max characters per individual injected section before truncation */
17
17
  const BOOTSTRAP_MAX_CHARS = 20_000;
18
18
 
19
- /** Max total characters across all injected sections combined */
20
- const BOOTSTRAP_TOTAL_MAX_CHARS = 150_000;
19
+ /**
20
+ * Max total characters across all injected sections combined.
21
+ *
22
+ * Sized to stay safely below Linux's `MAX_ARG_STRLEN = 131,072` bytes — the
23
+ * per-argv-element kernel limit that bit Picateclas attempts 4-6
24
+ * (2026-05-28). The base-prompt becomes one argv element when the claude
25
+ * adapter passes `--append-system-prompt <prompt>`, so the prompt MUST stay
26
+ * under MAX_ARG_STRLEN even with a few KB of growth. The claude-adapter
27
+ * also stages the prompt to a file (`--append-system-prompt-file`) as a
28
+ * belt-and-braces fix, but the budget cap is the cheap insurance for any
29
+ * code path that ever passes the prompt inline.
30
+ */
31
+ const BOOTSTRAP_TOTAL_MAX_CHARS = 120_000;
32
+
33
+ /**
34
+ * Per-section cap applied to the *repo* CLAUDE.md (the agent-swarm OSS
35
+ * one is ~18 KB and the biggest volatile component of the system prompt).
36
+ * 12 KB leaves room for the static prompt scaffold + identity + tools +
37
+ * agent CLAUDE.md without ever crossing MAX_ARG_STRLEN.
38
+ */
39
+ const REPO_CLAUDE_MD_MAX_CHARS = 12_000;
21
40
 
22
41
  /** Truncation notice appended when a section is cut */
23
42
  const truncationNotice = (file: string) =>
@@ -150,7 +169,16 @@ export const getBasePrompt = async (args: BasePromptArgs): Promise<string> => {
150
169
  prompt += `The following CLAUDE.md is from the repository cloned at \`${args.repoContext.clonePath}\`. `;
151
170
  prompt += `**IMPORTANT: These instructions apply ONLY when working within the \`${args.repoContext.clonePath}\` directory.** `;
152
171
  prompt += `Do NOT apply these rules to files outside that directory.\n\n`;
153
- prompt += `${args.repoContext.claudeMd}\n`;
172
+ // Cap the repo CLAUDE.md so it can't blow the bootstrap budget on its
173
+ // own. Pre-cap, the agent-swarm OSS CLAUDE.md was 17,856 B — the
174
+ // single biggest volatile component of the system prompt and the
175
+ // direct driver of the Picateclas argv-E2BIG saga (2026-05-28).
176
+ // Truncation footer points readers at the on-disk copy in the cwd.
177
+ prompt += `${truncateRepoClaudeMd(
178
+ args.repoContext.claudeMd,
179
+ args.repoContext.clonePath,
180
+ REPO_CLAUDE_MD_MAX_CHARS,
181
+ )}\n`;
154
182
  } else if (!args.repoContext.warning) {
155
183
  prompt += `Repository is cloned at \`${args.repoContext.clonePath}\` but has no CLAUDE.md file.\n`;
156
184
  }
@@ -267,6 +295,24 @@ export const getBasePrompt = async (args: BasePromptArgs): Promise<string> => {
267
295
  return prompt;
268
296
  };
269
297
 
298
+ /**
299
+ * Truncate the repo CLAUDE.md to a hard byte budget so it can't blow the
300
+ * bootstrap argv ceiling on its own (Picateclas spawn-OOM, 2026-05-28).
301
+ *
302
+ * The footer is structured as a `[truncated — see <path>/CLAUDE.md for full
303
+ * content]` notice so anyone reading the system prompt knows exactly where
304
+ * the dropped content lives on disk.
305
+ *
306
+ * Exported only for testing.
307
+ */
308
+ export function truncateRepoClaudeMd(content: string, clonePath: string, budget: number): string {
309
+ if (content.length <= budget) return content;
310
+ const notice = `\n\n[...truncated — see ${clonePath}/CLAUDE.md for full content]\n`;
311
+ const contentBudget = budget - notice.length;
312
+ if (contentBudget <= 0) return notice.trimStart();
313
+ return content.slice(0, contentBudget) + notice;
314
+ }
315
+
270
316
  /** Truncate a section to fit within a character budget, appending a notice if cut */
271
317
  function truncateSection(
272
318
  content: string | undefined,
@@ -318,6 +318,30 @@ export function buildClaudeCodeOtelEnv(
318
318
  return otelEnv;
319
319
  }
320
320
 
321
+ /**
322
+ * Resolve the path at which the per-task system prompt is staged on disk.
323
+ *
324
+ * Pushing the prompt as `--append-system-prompt <value>` makes the entire
325
+ * prompt one argv element. Linux's per-arg limit is `MAX_ARG_STRLEN = 131072`
326
+ * bytes — and the system prompt (CLAUDE.md + TOOLS.md + identity files +
327
+ * repo CLAUDE.md) routinely runs 50–80 KB. A few growth nudges push us
328
+ * across the cliff and `posix_spawn` returns E2BIG, killing the worker
329
+ * (Picateclas attempts 4-6, 2026-05-28).
330
+ *
331
+ * `claude --append-system-prompt-file <path>` reads the prompt from disk,
332
+ * so the argv stays bounded by the filename length and the system prompt
333
+ * size is decoupled from the kernel's argv ceiling.
334
+ *
335
+ * Exported for unit testing.
336
+ */
337
+ export function getSystemPromptFilePath(taskId: string): string {
338
+ // The taskId is a UUID; safe to embed in a /tmp filename. Mirrors the
339
+ // existing /tmp/agent-swarm-task-${pid}.json + /tmp/mcp-${taskId}.json
340
+ // convention so a janitor sweeping /tmp can find all session-scoped state
341
+ // under the same prefix.
342
+ return `/tmp/agent-swarm-system-prompt-${taskId}.txt`;
343
+ }
344
+
321
345
  class ClaudeSession implements ProviderSession {
322
346
  private proc: ReturnType<typeof Bun.spawn>;
323
347
  private listeners: Array<(event: ProviderEvent) => void> = [];
@@ -327,6 +351,8 @@ class ClaudeSession implements ProviderSession {
327
351
  private errorTracker = new SessionErrorTracker();
328
352
  private taskFilePid: number;
329
353
  private contextWindowSize: number;
354
+ /** Path to the system-prompt temp file when one was staged for this session. */
355
+ private systemPromptFile: string | null;
330
356
 
331
357
  constructor(
332
358
  private config: ProviderSessionConfig,
@@ -335,9 +361,11 @@ class ClaudeSession implements ProviderSession {
335
361
  taskFilePid: number,
336
362
  private sessionMcpConfig: string | null = null,
337
363
  private claudeBinaryArgv: readonly string[] = ["claude"],
364
+ systemPromptFile: string | null = null,
338
365
  ) {
339
366
  this.taskFilePid = taskFilePid;
340
367
  this.contextWindowSize = getContextWindowSize(model);
368
+ this.systemPromptFile = systemPromptFile;
341
369
  const cmd = this.buildCommand();
342
370
 
343
371
  console.log(
@@ -403,7 +431,13 @@ class ClaudeSession implements ProviderSession {
403
431
  cmd.push(...this.config.additionalArgs);
404
432
  }
405
433
 
406
- if (this.config.systemPrompt) {
434
+ // System prompt is staged on disk and read via the file-flag — see
435
+ // `getSystemPromptFilePath` for the rationale (argv E2BIG hardening,
436
+ // Picateclas spawn-OOM, 2026-05-28). The legacy inline form is kept as
437
+ // a fallback for the (unlikely) case where the file couldn't be staged.
438
+ if (this.systemPromptFile) {
439
+ cmd.push("--append-system-prompt-file", this.systemPromptFile);
440
+ } else if (this.config.systemPrompt) {
407
441
  cmd.push("--append-system-prompt", this.config.systemPrompt);
408
442
  }
409
443
 
@@ -490,7 +524,7 @@ class ClaudeSession implements ProviderSession {
490
524
  await logFileHandle.end();
491
525
  const exitCode = await this.proc.exited;
492
526
 
493
- // Cleanup task file and per-session MCP config
527
+ // Cleanup task file, per-session MCP config, and per-task system prompt
494
528
  await cleanupTaskFile(this.taskFilePid);
495
529
  if (this.sessionMcpConfig) {
496
530
  try {
@@ -499,6 +533,13 @@ class ClaudeSession implements ProviderSession {
499
533
  // ignore — temp file may already be gone
500
534
  }
501
535
  }
536
+ if (this.systemPromptFile) {
537
+ try {
538
+ await unlink(this.systemPromptFile);
539
+ } catch {
540
+ // ignore — temp file may already be gone
541
+ }
542
+ }
502
543
 
503
544
  if (exitCode !== 0 && stderrOutput) {
504
545
  console.error(
@@ -723,6 +764,22 @@ class ClaudeSession implements ProviderSession {
723
764
  startedAt: new Date().toISOString(),
724
765
  });
725
766
 
767
+ // Re-stage the system prompt for the retry — the original was unlinked
768
+ // when the first session finished. Same soft-fail semantics: null
769
+ // falls back to the inline --append-system-prompt argv.
770
+ let retrySystemPromptFile: string | null = null;
771
+ if (retryConfig.systemPrompt) {
772
+ const candidate = getSystemPromptFilePath(retryConfig.taskId);
773
+ try {
774
+ await writeFile(candidate, retryConfig.systemPrompt);
775
+ retrySystemPromptFile = candidate;
776
+ } catch (err) {
777
+ console.warn(
778
+ `\x1b[33m[claude]\x1b[0m Failed to stage retry system prompt to ${candidate} (${err}); falling back to --append-system-prompt argv.`,
779
+ );
780
+ }
781
+ }
782
+
726
783
  const retrySession = new ClaudeSession(
727
784
  retryConfig,
728
785
  this.model,
@@ -730,6 +787,7 @@ class ClaudeSession implements ProviderSession {
730
787
  this.taskFilePid,
731
788
  null,
732
789
  this.claudeBinaryArgv,
790
+ retrySystemPromptFile,
733
791
  );
734
792
 
735
793
  // Forward events from retry to our listeners
@@ -832,6 +890,28 @@ export class ClaudeAdapter implements ProviderAdapter {
832
890
  installedServers,
833
891
  );
834
892
 
893
+ // Stage the system prompt on disk so it can be passed as a file path
894
+ // instead of one giant argv element. This is the structural fix for
895
+ // posix_spawn E2BIG once the prompt grows past MAX_ARG_STRLEN (131,072
896
+ // bytes) — see `getSystemPromptFilePath` and PR description for the
897
+ // Picateclas spawn-OOM saga. Soft-fail (`systemPromptFile = null`) makes
898
+ // the session fall back to the inline `--append-system-prompt` argv;
899
+ // good enough since `BOOTSTRAP_TOTAL_MAX_CHARS` (now 120,000) already
900
+ // caps the worst-case argv element below the kernel limit even without
901
+ // the file path.
902
+ let systemPromptFile: string | null = null;
903
+ if (config.systemPrompt) {
904
+ const candidate = getSystemPromptFilePath(config.taskId);
905
+ try {
906
+ await writeFile(candidate, config.systemPrompt);
907
+ systemPromptFile = candidate;
908
+ } catch (err) {
909
+ console.warn(
910
+ `\x1b[33m[claude]\x1b[0m Failed to stage system prompt to ${candidate} (${err}); falling back to --append-system-prompt argv. Argv may approach MAX_ARG_STRLEN if the prompt is large.`,
911
+ );
912
+ }
913
+ }
914
+
835
915
  return new ClaudeSession(
836
916
  config,
837
917
  model,
@@ -839,6 +919,7 @@ export class ClaudeAdapter implements ProviderAdapter {
839
919
  taskFilePid,
840
920
  sessionMcpConfig,
841
921
  claudeBinaryArgv,
922
+ systemPromptFile,
842
923
  );
843
924
  }
844
925
 
@@ -26,7 +26,9 @@
26
26
  /** Models supported by the managed-agents surface for the swarm worker. */
27
27
  export const CLAUDE_MANAGED_MODELS = [
28
28
  "claude-sonnet-4-6",
29
+ "claude-opus-4-8",
29
30
  "claude-opus-4-7",
31
+ "claude-opus-4-6",
30
32
  "claude-haiku-4-5",
31
33
  ] as const;
32
34
 
@@ -45,11 +47,13 @@ export interface ClaudeManagedModelPricing {
45
47
  }
46
48
 
47
49
  /**
48
- * Anthropic public list pricing as of 2026-04-28. Source:
50
+ * Anthropic public list pricing. Source:
49
51
  * https://platform.claude.com/docs/en/about-claude/pricing
50
52
  *
51
53
  * - claude-sonnet-4-6: $3 / $15 / $0.30 / $3.75 (in / out / cache-read / cache-write)
52
- * - claude-opus-4-7: $15 / $75 / $1.50 / $18.75
54
+ * - claude-opus-4-8: $5 / $25 / $0.50 / $6.25 (verified 2026-05-28)
55
+ * - claude-opus-4-7: $15 / $75 / $1.50 / $18.75 (STALE — was correct at launch, Anthropic has since dropped Opus to $5/$25)
56
+ * - claude-opus-4-6: $5 / $25 / $0.50 / $6.25 (verified 2026-05-28)
53
57
  * - claude-haiku-4-5: $1 / $5 / $0.10 / $1.25
54
58
  */
55
59
  export const CLAUDE_MANAGED_MODEL_PRICING: Record<ClaudeManagedModel, ClaudeManagedModelPricing> = {
@@ -59,12 +63,24 @@ export const CLAUDE_MANAGED_MODEL_PRICING: Record<ClaudeManagedModel, ClaudeMana
59
63
  cacheReadPerMillion: 0.3,
60
64
  cacheWritePerMillion: 3.75,
61
65
  },
66
+ "claude-opus-4-8": {
67
+ inputPerMillion: 5.0,
68
+ outputPerMillion: 25.0,
69
+ cacheReadPerMillion: 0.5,
70
+ cacheWritePerMillion: 6.25,
71
+ },
62
72
  "claude-opus-4-7": {
63
73
  inputPerMillion: 15.0,
64
74
  outputPerMillion: 75.0,
65
75
  cacheReadPerMillion: 1.5,
66
76
  cacheWritePerMillion: 18.75,
67
77
  },
78
+ "claude-opus-4-6": {
79
+ inputPerMillion: 5.0,
80
+ outputPerMillion: 25.0,
81
+ cacheReadPerMillion: 0.5,
82
+ cacheWritePerMillion: 6.25,
83
+ },
68
84
  "claude-haiku-4-5": {
69
85
  inputPerMillion: 1.0,
70
86
  outputPerMillion: 5.0,