npm - @desplega.ai/agent-swarm - Versions diffs - 1.85.0 → 1.86.0 - Mend

@desplega.ai/agent-swarm 1.85.0 → 1.86.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

package/README.md +1 -0
package/openapi.json +1 -1
package/package.json +8 -6
package/src/be/db.ts +44 -0
package/src/be/migrations/078_backfill_gpt_5_5_pricing.sql +15 -0
package/src/be/modelsdev-cache.json +152028 -0
package/src/be/modelsdev-cache.ts +46 -0
package/src/be/seed-pricing.ts +7 -44
package/src/cli.tsx +12 -2
package/src/commands/codex-session-runner.ts +132 -0
package/src/commands/credential-wait.ts +2 -2
package/src/commands/provider-credentials.ts +10 -5
package/src/commands/runner.ts +3 -3
package/src/prompts/base-prompt.ts +49 -3
package/src/providers/claude-adapter.ts +83 -2
package/src/providers/claude-managed-models.ts +18 -2
package/src/providers/codex-adapter.ts +417 -97
package/src/providers/codex-models.ts +9 -2
package/src/providers/index.ts +28 -19
package/src/providers/pricing-sources.md +7 -4
package/src/providers/swarm-events-shared.ts +14 -0
package/src/slack/HEURISTICS.md +5 -1
package/src/slack/handlers.test.ts +35 -0
package/src/slack/handlers.ts +79 -2
package/src/tests/base-prompt.test.ts +46 -8
package/src/tests/claude-managed-adapter.test.ts +4 -4
package/src/tests/codex-adapter-otel.test.ts +4 -4
package/src/tests/codex-adapter.test.ts +20 -7
package/src/tests/codex-swarm-events.test.ts +35 -0
package/src/tests/context-window.test.ts +1 -0
package/src/tests/credential-check.test.ts +48 -29
package/src/tests/entrypoint-config-env-export.test.ts +81 -0
package/src/tests/follow-up-redelivery-guard.test.ts +165 -0
package/src/tests/migration-046-budgets.test.ts +6 -5
package/src/tests/pricing-routes.test.ts +6 -5
package/src/tests/provider-adapter.test.ts +10 -10
package/src/tests/provider-command-format.test.ts +4 -4
package/src/tests/session-costs-codex-recompute.test.ts +25 -0
package/src/tools/send-task.ts +30 -9
package/src/utils/context-window.ts +1 -0
package/templates/schedules/daily-blocker-digest/config.json +13 -0
package/templates/schedules/daily-blocker-digest/content.md +150 -0
package/templates/schedules/daily-compounding-reflection/config.json +21 -0
package/templates/schedules/daily-compounding-reflection/content.md +210 -0
package/templates/schedules/daily-hn-briefing/config.json +13 -0
package/templates/schedules/daily-hn-briefing/content.md +97 -0
package/templates/schedules/daily-workflow-health-audit/config.json +13 -0
package/templates/schedules/daily-workflow-health-audit/content.md +189 -0
package/templates/schedules/gtm-weekly-review/config.json +13 -0
package/templates/schedules/gtm-weekly-review/content.md +58 -0
package/templates/schedules/weekly-dependabot-triage/config.json +13 -0
package/templates/schedules/weekly-dependabot-triage/content.md +45 -0
package/templates/schema.ts +26 -0
package/templates/skills/agentmail-sending/config.json +13 -0
package/templates/skills/agentmail-sending/content.md +48 -0
package/templates/skills/artifacts/config.json +13 -0
package/templates/skills/artifacts/content.md +87 -0
package/templates/skills/browser-use-cloud/config.json +13 -0
package/templates/skills/browser-use-cloud/content.md +155 -0
package/templates/skills/desloppify/config.json +13 -0
package/templates/skills/desloppify/content.md +201 -0
package/templates/skills/exa-search/config.json +13 -0
package/templates/skills/exa-search/content.md +106 -0
package/templates/skills/jira-interaction/config.json +13 -0
package/templates/skills/jira-interaction/content.md +252 -0
package/templates/skills/kapso-whatsapp/config.json +13 -0
package/templates/skills/kapso-whatsapp/content.md +369 -0
package/templates/skills/kv-storage/config.json +13 -0
package/templates/skills/kv-storage/content.md +111 -0
package/templates/skills/linear-interaction/config.json +20 -0
package/templates/skills/linear-interaction/content.md +230 -0
package/templates/skills/pages/config.json +18 -0
package/templates/skills/pages/content.md +85 -0
package/templates/skills/profile-corruption-escalation/config.json +13 -0
package/templates/skills/profile-corruption-escalation/content.md +105 -0
package/templates/skills/scheduled-task-resilience/config.json +13 -0
package/templates/skills/scheduled-task-resilience/content.md +95 -0
package/templates/skills/sprite-cli/config.json +13 -0
package/templates/skills/sprite-cli/content.md +133 -0
package/templates/skills/turso-interaction/config.json +13 -0
package/templates/skills/turso-interaction/content.md +192 -0
package/templates/skills/workflow-iterate/config.json +18 -0
package/templates/skills/workflow-iterate/content.md +399 -0
package/templates/skills/workflow-structured-output/config.json +13 -0
package/templates/skills/workflow-structured-output/content.md +101 -0
package/templates/skills/x-api-interactions/config.json +13 -0
package/templates/skills/x-api-interactions/content.md +109 -0
package/templates/workflows/autopilot/config.json +13 -0
package/templates/workflows/autopilot/content.md +58 -0
package/templates/workflows/linear-drain-loop/config.json +21 -0
package/templates/workflows/linear-drain-loop/content.md +72 -0
package/templates/workflows/ralph-loop/config.json +13 -0
package/templates/workflows/ralph-loop/content.md +75 -0

package/src/be/modelsdev-cache.ts ADDED Viewed

@@ -0,0 +1,46 @@
+import { readFileSync } from "node:fs";
+import path from "node:path";
+export interface ModelsDevCostBlock {
+  input?: number;
+  output?: number;
+  cache_read?: number;
+  cache_write?: number;
+}
+export interface ModelsDevModel {
+  id?: string;
+  cost?: ModelsDevCostBlock;
+}
+export interface ModelsDevProvider {
+  models?: Record<string, ModelsDevModel>;
+}
+export type ModelsDevCache = Record<string, ModelsDevProvider>;
+export const MODELSDEV_CACHE_PATH = path.join("src", "be", "modelsdev-cache.json");
+/**
+ * Resolve the vendored models.dev cache from source checkouts and compiled
+ * Docker images. The API image copies the snapshot to `/app/src/be/...`.
+ */
+export function loadModelsDevCache(): ModelsDevCache | null {
+  const explicitPath = process.env.MODELSDEV_CACHE_PATH;
+  const candidates = [
+    ...(explicitPath ? [explicitPath] : []),
+    path.join(process.cwd(), MODELSDEV_CACHE_PATH),
+    path.join(process.cwd(), "..", MODELSDEV_CACHE_PATH),
+    path.join("/app", MODELSDEV_CACHE_PATH),
+  ];
+  for (const candidate of candidates) {
+    try {
+      return JSON.parse(readFileSync(candidate, "utf-8")) as ModelsDevCache;
+    } catch {
+      // try next candidate
+    }
+  }
+  return null;
+}

package/src/be/seed-pricing.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * Phase 2 of the cost-tracking plan — seed the `pricing` table at server boot.
  *
- * The vendored models.dev snapshot at `ui/src/lib/modelsdev-cache.json` is the
+ * The vendored models.dev snapshot at `src/be/modelsdev-cache.json` is the
  * single source of truth for per-token rates. We project it into rows keyed by
  * `(provider, model, token_class)` so the recompute path in
  * `src/http/session-data.ts` can rebuild USD from tokens regardless of which
@@ -17,30 +17,15 @@
  * admin route (`POST /api/pricing`) — we don't overwrite seed rows.
  */
-import { readFileSync } from "node:fs";
-import path from "node:path";
 import type { PricingProvider, PricingTokenClass } from "../types";
 import { getDb } from "./db";
+import {
+  loadModelsDevCache,
+  type ModelsDevCache,
+  type ModelsDevCostBlock,
+} from "./modelsdev-cache";
 import { normalizeModelKey } from "./pricing-normalize";
-interface ModelsDevCostBlock {
-  input?: number;
-  output?: number;
-  cache_read?: number;
-  cache_write?: number;
-}
-interface ModelsDevModel {
-  id?: string;
-  cost?: ModelsDevCostBlock;
-}
-interface ModelsDevProvider {
-  models?: Record<string, ModelsDevModel>;
-}
-type ModelsDevCache = Record<string, ModelsDevProvider>;
 /**
  * Per-harness manual rates that models.dev doesn't carry. Keep the source URL
  * and a verification date next to each entry so {@link MANUAL_PRICING_OVERRIDES}
@@ -82,33 +67,11 @@ const MANUAL_PRICING_OVERRIDES: Array<{
  * fields the models.dev snapshot doesn't index directly; we map them here.
  */
 const ANTHROPIC_SHORTNAME_TO_MODELSDEV: Record<string, string> = {
-  opus: "claude-opus-4-7",
+  opus: "claude-opus-4-8",
   sonnet: "claude-sonnet-4-6",
   haiku: "claude-haiku-4-5",
 };
-/**
- * Resolve the path to the vendored models.dev cache. The UI copy is canonical.
- * We treat this as best-effort: if the file is missing (developer ran the
- * server without `ui/` checked out), we log and continue with manual rates
- * only — better than crashing the boot.
- */
-function loadModelsDevCache(): ModelsDevCache | null {
-  const candidates = [
-    path.join(process.cwd(), "ui", "src", "lib", "modelsdev-cache.json"),
-    path.join(process.cwd(), "..", "ui", "src", "lib", "modelsdev-cache.json"),
-  ];
-  for (const cand of candidates) {
-    try {
-      const raw = readFileSync(cand, "utf-8");
-      return JSON.parse(raw) as ModelsDevCache;
-    } catch {
-      // try next candidate
-    }
-  }
-  return null;
-}
 interface PricingSeedRow {
   provider: PricingProvider;
   model: string;

package/src/cli.tsx CHANGED Viewed

@@ -16,8 +16,12 @@ import { getApiKey, setApiKey } from "./utils/api-key.ts";
 // Get CLI name from bin field (assumes single key)
 const binName = Object.keys(pkg.bin)[0];
-// Restore cursor on exit
-const restoreCursor = () => process.stdout.write("\x1B[?25h");
+// Restore cursor on exit — only when stdout is a TTY.  Non-TTY invocations
+// (like the codex-session-runner subprocess whose stdout is a JSON pipe)
+// must not inject terminal escape sequences into the byte stream.
+const restoreCursor = () => {
+  if (process.stdout.isTTY) process.stdout.write("\x1B[?25h");
+};
 process.on("exit", restoreCursor);
 process.on("SIGINT", () => {
   restoreCursor();
@@ -570,6 +574,12 @@ if (args.showHelp || args.command === "help" || args.command === undefined) {
   const { runCodexLogin } = await import("./commands/codex-login");
   const codexLoginArgs = process.argv.slice(process.argv.indexOf("codex-login") + 1);
   await runCodexLogin(codexLoginArgs);
+} else if (args.command === "codex-session-runner") {
+  // Internal subcommand — invoked by CodexSubprocessSession to host a single
+  // codex session in a throwaway subprocess. See src/commands/codex-session-runner.ts
+  // for the rationale (Picateclas spawn-OOM permanent fix, 2026-05-28).
+  const { runCodexSessionRunner } = await import("./commands/codex-session-runner");
+  await runCodexSessionRunner();
 } else if (args.command === "claude-managed-setup") {
   const { runClaudeManagedSetup } = await import("./commands/claude-managed-setup");
   const setupArgs = process.argv.slice(process.argv.indexOf("claude-managed-setup") + 1);

package/src/commands/codex-session-runner.ts ADDED Viewed

@@ -0,0 +1,132 @@
+/**
+ * Codex session subprocess runner.
+ *
+ * Entry point for the `codex-session-runner` CLI subcommand. Reads a
+ * `CodexSubprocessInput` payload from stdin, drives a fresh in-process
+ * `CodexSession`, and pipes the session's `ProviderEvent` stream + final
+ * `ProviderResult` back to its parent over stdout as line-delimited JSON.
+ *
+ * Why this exists: the previous architecture ran every codex session
+ * directly inside the long-lived worker runner. The `@openai/codex-sdk`
+ * leaks SDK state (parsers, transcript buffers, JSON-RPC plumbing) into
+ * the runner's heap, and after ~1,500 task completions on a hot worker
+ * (Picateclas, 2026-05-28) the runner's VSZ ballooned to 74 GB / RSS to
+ * 7.5 GB, causing every subsequent `fork()` to fail ENOMEM regardless of
+ * current RSS (the kernel reserves CoW for the full VSZ at fork time).
+ *
+ * Moving each session into its own subprocess means the SDK state dies
+ * with the subprocess. The runner stays at the ~234 MB baseline observed
+ * on Reviewer (the cohort partner that did 481 task completions without
+ * the OOM symptom). See task `fa0c0681` for the byte-by-byte breakdown.
+ *
+ * Wire protocol over stdout (one JSON object per line):
+ *   {"kind":"event", "event": <ProviderEvent>}
+ *   {"kind":"result", "result": <ProviderResult>}
+ *   {"kind":"error", "message": "..."}
+ */
+import { createInProcessCodexSession } from "../providers/codex-adapter";
+import type { ProviderEvent, ProviderResult, ProviderSessionConfig } from "../providers/types";
+interface CodexSubprocessInput {
+  config: ProviderSessionConfig;
+  skillsDir?: string;
+  parentOtelEnv?: Record<string, string>;
+}
+async function readAllStdin(): Promise<string> {
+  // Bun.stdin is a BunFile in some versions, Web stream in others.
+  // The safest path is to read the readable stream directly.
+  const decoder = new TextDecoder();
+  let out = "";
+  const stream = (Bun.stdin as unknown as { stream?: () => ReadableStream<Uint8Array> }).stream
+    ? (Bun.stdin as unknown as { stream: () => ReadableStream<Uint8Array> }).stream()
+    : null;
+  if (stream) {
+    const reader = stream.getReader();
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+      if (value) out += decoder.decode(value, { stream: true });
+    }
+    out += decoder.decode();
+    return out;
+  }
+  // Fallback: read via Bun.file (file-like access works for piped stdin too)
+  return await Bun.file("/dev/stdin").text();
+}
+function writeLine(obj: unknown): void {
+  process.stdout.write(`${JSON.stringify(obj)}\n`);
+}
+export async function runCodexSessionRunner(): Promise<void> {
+  try {
+    await runCodexSessionRunnerInner();
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    const stack = err instanceof Error ? err.stack : undefined;
+    console.error(`[codex-session-runner] top-level crash: ${message}`);
+    if (stack) console.error(stack);
+    writeLine({ kind: "error", message: `codex-session-runner: unexpected crash: ${message}` });
+    process.exit(1);
+  }
+}
+async function runCodexSessionRunnerInner(): Promise<void> {
+  let input: CodexSubprocessInput;
+  try {
+    const raw = await readAllStdin();
+    input = JSON.parse(raw) as CodexSubprocessInput;
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    console.error(`[codex-session-runner] stdin parse failed: ${message}`);
+    writeLine({
+      kind: "error",
+      message: `codex-session-runner: failed to parse stdin: ${message}`,
+    });
+    process.exit(1);
+  }
+  // Forward the parent's captured OTel TRACEPARENT (and friends) into the
+  // session config's env so the spawned Codex CLI nests its spans under our
+  // worker.session trace. We deliberately do NOT call
+  // `buildOtelTraceparentEnv` from inside this subprocess — its tracer has
+  // no active span, so it would emit nothing.
+  if (input.parentOtelEnv && Object.keys(input.parentOtelEnv).length > 0) {
+    input.config.env = { ...(input.config.env ?? {}), ...input.parentOtelEnv };
+  }
+  let session: Awaited<ReturnType<typeof createInProcessCodexSession>>;
+  try {
+    session = await createInProcessCodexSession(input.config, {
+      skillsDir: input.skillsDir,
+    });
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    console.error(`[codex-session-runner] createSession failed: ${message}`);
+    writeLine({ kind: "error", message: `codex-session-runner: createSession failed: ${message}` });
+    process.exit(1);
+  }
+  // Forward SIGTERM / SIGINT to the in-process session so the runner can
+  // gracefully cancel us. The parent `CodexSubprocessSession.abort()` sends
+  // SIGTERM here; the session's AbortController catches it and the codex
+  // CLI subprocess (a grandchild) gets cleaned up.
+  const onSignal = (signal: NodeJS.Signals) => {
+    void session.abort().finally(() => {
+      // give the session a beat to emit its cancellation result, then exit
+      setTimeout(() => process.exit(signal === "SIGINT" ? 130 : 143), 250);
+    });
+  };
+  process.on("SIGTERM", () => onSignal("SIGTERM"));
+  process.on("SIGINT", () => onSignal("SIGINT"));
+  session.onEvent((event: ProviderEvent) => {
+    writeLine({ kind: "event", event });
+  });
+  const result: ProviderResult = await session.waitForCompletion();
+  writeLine({ kind: "result", result });
+  process.exit(result.exitCode ?? 0);
+}

package/src/commands/credential-wait.ts CHANGED Viewed

@@ -133,7 +133,7 @@ export async function awaitCredentials(opts: AwaitCredentialsOptions): Promise<C
   // Fast path: already satisfied at boot.
   let currentProvider = readProvider();
-  let status = checkProviderCredentials(currentProvider, initialEnv, opts.credCheckOptions);
+  let status = await checkProviderCredentials(currentProvider, initialEnv, opts.credCheckOptions);
   if (status.ready) {
     log(
       `[boot] credentials ready (provider=${currentProvider}, satisfiedBy=${status.satisfiedBy})`,
@@ -183,7 +183,7 @@ export async function awaitCredentials(opts: AwaitCredentialsOptions): Promise<C
       currentProvider = nextProvider;
     }
-    status = checkProviderCredentials(currentProvider, process.env, opts.credCheckOptions);
+    status = await checkProviderCredentials(currentProvider, process.env, opts.credCheckOptions);
     if (!status.ready) {
       // Exponential backoff with cap.

package/src/commands/provider-credentials.ts CHANGED Viewed

@@ -23,7 +23,6 @@ import { checkClaudeManagedCredentials } from "../providers/claude-managed-adapt
 import { checkCodexCredentials } from "../providers/codex-adapter";
 import { checkDevinCredentials } from "../providers/devin-adapter";
 import { checkOpencodeCredentials } from "../providers/opencode-adapter";
-import { checkPiMonoCredentials } from "../providers/pi-mono-adapter";
 import type { CredCheckOptions, CredStatus } from "../providers/types";
 import type { AgentCredStatus, AgentLatestModel, ProviderName } from "../types";
 import { scrubSecrets } from "../utils/secret-scrubber";
@@ -55,12 +54,16 @@ export const REQUIRED_CRED_VARS_BY_PROVIDER: Record<SupportedProvider, readonly
 /**
  * Run the predicate for `provider`. Unknown providers throw — call sites
  * should treat that as a configuration bug, not a user-correctable state.
+ *
+ * The `pi` case uses a dynamic import so `@earendil-works/pi-coding-agent`
+ * (which has module-level side effects that crash in the Bun compiled
+ * binary) is only loaded when the pi provider is actually selected.
  */
-export function checkProviderCredentials(
+export async function checkProviderCredentials(
   provider: string,
   env: Record<string, string | undefined>,
   opts?: CredCheckOptions,
-): CredStatus {
+): Promise<CredStatus> {
   switch (provider) {
     case "claude":
       return checkClaudeCredentials(env);
@@ -72,8 +75,10 @@ export function checkProviderCredentials(
       return checkDevinCredentials(env);
     case "opencode":
       return checkOpencodeCredentials(env, opts);
-    case "pi":
+    case "pi": {
+      const { checkPiMonoCredentials } = await import("../providers/pi-mono-adapter");
       return checkPiMonoCredentials(env, opts);
+    }
     default:
       throw new Error(
         `checkProviderCredentials: unknown provider "${provider}". Supported: claude, claude-managed, codex, devin, opencode, pi.`,
@@ -386,7 +391,7 @@ export async function buildCredStatusReport(
   opts: CredCheckOptions = {},
   kind: AgentCredStatus["reportKind"],
 ): Promise<AgentCredStatus> {
-  const presence = checkProviderCredentials(provider, env, opts);
+  const presence = await checkProviderCredentials(provider, env, opts);
   let liveTest: AgentCredStatus["liveTest"] = null;
   if (presence.ready) {
     const live = await validateProviderCredentials(provider);

package/src/commands/runner.ts CHANGED Viewed

@@ -2182,7 +2182,7 @@ export function implicitCloseActiveToolSpans(
 }
 async function spawnProviderProcess(
-  adapter: ReturnType<typeof createProviderAdapter>,
+  adapter: Awaited<ReturnType<typeof createProviderAdapter>>,
   opts: {
     prompt: string;
     logFile: string;
@@ -3121,7 +3121,7 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
   // Create provider adapter using the resolved value. `let` so the poll-loop
   // reconciliation block (Section 4) can swap it live when an operator changes
   // HARNESS_PROVIDER in swarm_config — call sites read the current binding.
-  let adapter = createProviderAdapter(bootProvider);
+  let adapter = await createProviderAdapter(bootProvider);
   // Configure HTTP-based template resolution (workers resolve via API, not local DB)
   if (apiKey) {
@@ -3368,7 +3368,7 @@ export async function runAgent(config: RunnerConfig, opts: RunnerOptions) {
       const previous = state.harnessProvider;
       console.log(`[${role}] [harness] Reconciling adapter: ${previous} → ${resolvedProvider}`);
       try {
-        adapter = createProviderAdapter(resolvedProvider);
+        adapter = await createProviderAdapter(resolvedProvider);
         state.harnessProvider = resolvedProvider;
         basePrompt = await buildSystemPrompt();
         resolvedSystemPrompt = additionalSystemPrompt

package/src/prompts/base-prompt.ts CHANGED Viewed

@@ -16,8 +16,27 @@ import "./session-templates";
 /** Max characters per individual injected section before truncation */
 const BOOTSTRAP_MAX_CHARS = 20_000;
-/** Max total characters across all injected sections combined */
-const BOOTSTRAP_TOTAL_MAX_CHARS = 150_000;
+/**
+ * Max total characters across all injected sections combined.
+ *
+ * Sized to stay safely below Linux's `MAX_ARG_STRLEN = 131,072` bytes — the
+ * per-argv-element kernel limit that bit Picateclas attempts 4-6
+ * (2026-05-28). The base-prompt becomes one argv element when the claude
+ * adapter passes `--append-system-prompt <prompt>`, so the prompt MUST stay
+ * under MAX_ARG_STRLEN even with a few KB of growth. The claude-adapter
+ * also stages the prompt to a file (`--append-system-prompt-file`) as a
+ * belt-and-braces fix, but the budget cap is the cheap insurance for any
+ * code path that ever passes the prompt inline.
+ */
+const BOOTSTRAP_TOTAL_MAX_CHARS = 120_000;
+/**
+ * Per-section cap applied to the *repo* CLAUDE.md (the agent-swarm OSS
+ * one is ~18 KB and the biggest volatile component of the system prompt).
+ * 12 KB leaves room for the static prompt scaffold + identity + tools +
+ * agent CLAUDE.md without ever crossing MAX_ARG_STRLEN.
+ */
+const REPO_CLAUDE_MD_MAX_CHARS = 12_000;
 /** Truncation notice appended when a section is cut */
 const truncationNotice = (file: string) =>
@@ -150,7 +169,16 @@ export const getBasePrompt = async (args: BasePromptArgs): Promise<string> => {
         prompt += `The following CLAUDE.md is from the repository cloned at \`${args.repoContext.clonePath}\`. `;
         prompt += `**IMPORTANT: These instructions apply ONLY when working within the \`${args.repoContext.clonePath}\` directory.** `;
         prompt += `Do NOT apply these rules to files outside that directory.\n\n`;
-        prompt += `${args.repoContext.claudeMd}\n`;
+        // Cap the repo CLAUDE.md so it can't blow the bootstrap budget on its
+        // own. Pre-cap, the agent-swarm OSS CLAUDE.md was 17,856 B — the
+        // single biggest volatile component of the system prompt and the
+        // direct driver of the Picateclas argv-E2BIG saga (2026-05-28).
+        // Truncation footer points readers at the on-disk copy in the cwd.
+        prompt += `${truncateRepoClaudeMd(
+          args.repoContext.claudeMd,
+          args.repoContext.clonePath,
+          REPO_CLAUDE_MD_MAX_CHARS,
+        )}\n`;
       } else if (!args.repoContext.warning) {
         prompt += `Repository is cloned at \`${args.repoContext.clonePath}\` but has no CLAUDE.md file.\n`;
       }
@@ -267,6 +295,24 @@ export const getBasePrompt = async (args: BasePromptArgs): Promise<string> => {
   return prompt;
 };
+/**
+ * Truncate the repo CLAUDE.md to a hard byte budget so it can't blow the
+ * bootstrap argv ceiling on its own (Picateclas spawn-OOM, 2026-05-28).
+ *
+ * The footer is structured as a `[truncated — see <path>/CLAUDE.md for full
+ * content]` notice so anyone reading the system prompt knows exactly where
+ * the dropped content lives on disk.
+ *
+ * Exported only for testing.
+ */
+export function truncateRepoClaudeMd(content: string, clonePath: string, budget: number): string {
+  if (content.length <= budget) return content;
+  const notice = `\n\n[...truncated — see ${clonePath}/CLAUDE.md for full content]\n`;
+  const contentBudget = budget - notice.length;
+  if (contentBudget <= 0) return notice.trimStart();
+  return content.slice(0, contentBudget) + notice;
+}
 /** Truncate a section to fit within a character budget, appending a notice if cut */
 function truncateSection(
   content: string | undefined,

package/src/providers/claude-adapter.ts CHANGED Viewed

@@ -318,6 +318,30 @@ export function buildClaudeCodeOtelEnv(
   return otelEnv;
 }
+/**
+ * Resolve the path at which the per-task system prompt is staged on disk.
+ *
+ * Pushing the prompt as `--append-system-prompt <value>` makes the entire
+ * prompt one argv element. Linux's per-arg limit is `MAX_ARG_STRLEN = 131072`
+ * bytes — and the system prompt (CLAUDE.md + TOOLS.md + identity files +
+ * repo CLAUDE.md) routinely runs 50–80 KB. A few growth nudges push us
+ * across the cliff and `posix_spawn` returns E2BIG, killing the worker
+ * (Picateclas attempts 4-6, 2026-05-28).
+ *
+ * `claude --append-system-prompt-file <path>` reads the prompt from disk,
+ * so the argv stays bounded by the filename length and the system prompt
+ * size is decoupled from the kernel's argv ceiling.
+ *
+ * Exported for unit testing.
+ */
+export function getSystemPromptFilePath(taskId: string): string {
+  // The taskId is a UUID; safe to embed in a /tmp filename. Mirrors the
+  // existing /tmp/agent-swarm-task-${pid}.json + /tmp/mcp-${taskId}.json
+  // convention so a janitor sweeping /tmp can find all session-scoped state
+  // under the same prefix.
+  return `/tmp/agent-swarm-system-prompt-${taskId}.txt`;
+}
 class ClaudeSession implements ProviderSession {
   private proc: ReturnType<typeof Bun.spawn>;
   private listeners: Array<(event: ProviderEvent) => void> = [];
@@ -327,6 +351,8 @@ class ClaudeSession implements ProviderSession {
   private errorTracker = new SessionErrorTracker();
   private taskFilePid: number;
   private contextWindowSize: number;
+  /** Path to the system-prompt temp file when one was staged for this session. */
+  private systemPromptFile: string | null;
   constructor(
     private config: ProviderSessionConfig,
@@ -335,9 +361,11 @@ class ClaudeSession implements ProviderSession {
     taskFilePid: number,
     private sessionMcpConfig: string | null = null,
     private claudeBinaryArgv: readonly string[] = ["claude"],
+    systemPromptFile: string | null = null,
   ) {
     this.taskFilePid = taskFilePid;
     this.contextWindowSize = getContextWindowSize(model);
+    this.systemPromptFile = systemPromptFile;
     const cmd = this.buildCommand();
     console.log(
@@ -403,7 +431,13 @@ class ClaudeSession implements ProviderSession {
       cmd.push(...this.config.additionalArgs);
     }
-    if (this.config.systemPrompt) {
+    // System prompt is staged on disk and read via the file-flag — see
+    // `getSystemPromptFilePath` for the rationale (argv E2BIG hardening,
+    // Picateclas spawn-OOM, 2026-05-28). The legacy inline form is kept as
+    // a fallback for the (unlikely) case where the file couldn't be staged.
+    if (this.systemPromptFile) {
+      cmd.push("--append-system-prompt-file", this.systemPromptFile);
+    } else if (this.config.systemPrompt) {
       cmd.push("--append-system-prompt", this.config.systemPrompt);
     }
@@ -490,7 +524,7 @@ class ClaudeSession implements ProviderSession {
     await logFileHandle.end();
     const exitCode = await this.proc.exited;
-    // Cleanup task file and per-session MCP config
+    // Cleanup task file, per-session MCP config, and per-task system prompt
     await cleanupTaskFile(this.taskFilePid);
     if (this.sessionMcpConfig) {
       try {
@@ -499,6 +533,13 @@ class ClaudeSession implements ProviderSession {
         // ignore — temp file may already be gone
       }
     }
+    if (this.systemPromptFile) {
+      try {
+        await unlink(this.systemPromptFile);
+      } catch {
+        // ignore — temp file may already be gone
+      }
+    }
     if (exitCode !== 0 && stderrOutput) {
       console.error(
@@ -723,6 +764,22 @@ class ClaudeSession implements ProviderSession {
           startedAt: new Date().toISOString(),
         });
+        // Re-stage the system prompt for the retry — the original was unlinked
+        // when the first session finished. Same soft-fail semantics: null
+        // falls back to the inline --append-system-prompt argv.
+        let retrySystemPromptFile: string | null = null;
+        if (retryConfig.systemPrompt) {
+          const candidate = getSystemPromptFilePath(retryConfig.taskId);
+          try {
+            await writeFile(candidate, retryConfig.systemPrompt);
+            retrySystemPromptFile = candidate;
+          } catch (err) {
+            console.warn(
+              `\x1b[33m[claude]\x1b[0m Failed to stage retry system prompt to ${candidate} (${err}); falling back to --append-system-prompt argv.`,
+            );
+          }
+        }
         const retrySession = new ClaudeSession(
           retryConfig,
           this.model,
@@ -730,6 +787,7 @@ class ClaudeSession implements ProviderSession {
           this.taskFilePid,
           null,
           this.claudeBinaryArgv,
+          retrySystemPromptFile,
         );
         // Forward events from retry to our listeners
@@ -832,6 +890,28 @@ export class ClaudeAdapter implements ProviderAdapter {
       installedServers,
     );
+    // Stage the system prompt on disk so it can be passed as a file path
+    // instead of one giant argv element. This is the structural fix for
+    // posix_spawn E2BIG once the prompt grows past MAX_ARG_STRLEN (131,072
+    // bytes) — see `getSystemPromptFilePath` and PR description for the
+    // Picateclas spawn-OOM saga. Soft-fail (`systemPromptFile = null`) makes
+    // the session fall back to the inline `--append-system-prompt` argv;
+    // good enough since `BOOTSTRAP_TOTAL_MAX_CHARS` (now 120,000) already
+    // caps the worst-case argv element below the kernel limit even without
+    // the file path.
+    let systemPromptFile: string | null = null;
+    if (config.systemPrompt) {
+      const candidate = getSystemPromptFilePath(config.taskId);
+      try {
+        await writeFile(candidate, config.systemPrompt);
+        systemPromptFile = candidate;
+      } catch (err) {
+        console.warn(
+          `\x1b[33m[claude]\x1b[0m Failed to stage system prompt to ${candidate} (${err}); falling back to --append-system-prompt argv. Argv may approach MAX_ARG_STRLEN if the prompt is large.`,
+        );
+      }
+    }
     return new ClaudeSession(
       config,
       model,
@@ -839,6 +919,7 @@ export class ClaudeAdapter implements ProviderAdapter {
       taskFilePid,
       sessionMcpConfig,
       claudeBinaryArgv,
+      systemPromptFile,
     );
   }

package/src/providers/claude-managed-models.ts CHANGED Viewed

@@ -26,7 +26,9 @@
 /** Models supported by the managed-agents surface for the swarm worker. */
 export const CLAUDE_MANAGED_MODELS = [
   "claude-sonnet-4-6",
+  "claude-opus-4-8",
   "claude-opus-4-7",
+  "claude-opus-4-6",
   "claude-haiku-4-5",
 ] as const;
@@ -45,11 +47,13 @@ export interface ClaudeManagedModelPricing {
 }
 /**
- * Anthropic public list pricing as of 2026-04-28. Source:
+ * Anthropic public list pricing. Source:
  * https://platform.claude.com/docs/en/about-claude/pricing
  *
  * - claude-sonnet-4-6: $3 / $15 / $0.30 / $3.75    (in / out / cache-read / cache-write)
- * - claude-opus-4-7:   $15 / $75 / $1.50 / $18.75
+ * - claude-opus-4-8:   $5 / $25 / $0.50 / $6.25    (verified 2026-05-28)
+ * - claude-opus-4-7:   $15 / $75 / $1.50 / $18.75  (STALE — was correct at launch, Anthropic has since dropped Opus to $5/$25)
+ * - claude-opus-4-6:   $5 / $25 / $0.50 / $6.25    (verified 2026-05-28)
  * - claude-haiku-4-5:  $1 / $5 / $0.10 / $1.25
  */
 export const CLAUDE_MANAGED_MODEL_PRICING: Record<ClaudeManagedModel, ClaudeManagedModelPricing> = {
@@ -59,12 +63,24 @@ export const CLAUDE_MANAGED_MODEL_PRICING: Record<ClaudeManagedModel, ClaudeMana
     cacheReadPerMillion: 0.3,
     cacheWritePerMillion: 3.75,
   },
+  "claude-opus-4-8": {
+    inputPerMillion: 5.0,
+    outputPerMillion: 25.0,
+    cacheReadPerMillion: 0.5,
+    cacheWritePerMillion: 6.25,
+  },
   "claude-opus-4-7": {
     inputPerMillion: 15.0,
     outputPerMillion: 75.0,
     cacheReadPerMillion: 1.5,
     cacheWritePerMillion: 18.75,
   },
+  "claude-opus-4-6": {
+    inputPerMillion: 5.0,
+    outputPerMillion: 25.0,
+    cacheReadPerMillion: 0.5,
+    cacheWritePerMillion: 6.25,
+  },
   "claude-haiku-4-5": {
     inputPerMillion: 1.0,
     outputPerMillion: 5.0,