npm - @desplega.ai/agent-swarm - Versions diffs - 1.85.0 → 1.86.0 - Mend

@desplega.ai/agent-swarm 1.85.0 → 1.86.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

package/README.md +1 -0
package/openapi.json +1 -1
package/package.json +8 -6
package/src/be/db.ts +44 -0
package/src/be/migrations/078_backfill_gpt_5_5_pricing.sql +15 -0
package/src/be/modelsdev-cache.json +152028 -0
package/src/be/modelsdev-cache.ts +46 -0
package/src/be/seed-pricing.ts +7 -44
package/src/cli.tsx +12 -2
package/src/commands/codex-session-runner.ts +132 -0
package/src/commands/credential-wait.ts +2 -2
package/src/commands/provider-credentials.ts +10 -5
package/src/commands/runner.ts +3 -3
package/src/prompts/base-prompt.ts +49 -3
package/src/providers/claude-adapter.ts +83 -2
package/src/providers/claude-managed-models.ts +18 -2
package/src/providers/codex-adapter.ts +417 -97
package/src/providers/codex-models.ts +9 -2
package/src/providers/index.ts +28 -19
package/src/providers/pricing-sources.md +7 -4
package/src/providers/swarm-events-shared.ts +14 -0
package/src/slack/HEURISTICS.md +5 -1
package/src/slack/handlers.test.ts +35 -0
package/src/slack/handlers.ts +79 -2
package/src/tests/base-prompt.test.ts +46 -8
package/src/tests/claude-managed-adapter.test.ts +4 -4
package/src/tests/codex-adapter-otel.test.ts +4 -4
package/src/tests/codex-adapter.test.ts +20 -7
package/src/tests/codex-swarm-events.test.ts +35 -0
package/src/tests/context-window.test.ts +1 -0
package/src/tests/credential-check.test.ts +48 -29
package/src/tests/entrypoint-config-env-export.test.ts +81 -0
package/src/tests/follow-up-redelivery-guard.test.ts +165 -0
package/src/tests/migration-046-budgets.test.ts +6 -5
package/src/tests/pricing-routes.test.ts +6 -5
package/src/tests/provider-adapter.test.ts +10 -10
package/src/tests/provider-command-format.test.ts +4 -4
package/src/tests/session-costs-codex-recompute.test.ts +25 -0
package/src/tools/send-task.ts +30 -9
package/src/utils/context-window.ts +1 -0
package/templates/schedules/daily-blocker-digest/config.json +13 -0
package/templates/schedules/daily-blocker-digest/content.md +150 -0
package/templates/schedules/daily-compounding-reflection/config.json +21 -0
package/templates/schedules/daily-compounding-reflection/content.md +210 -0
package/templates/schedules/daily-hn-briefing/config.json +13 -0
package/templates/schedules/daily-hn-briefing/content.md +97 -0
package/templates/schedules/daily-workflow-health-audit/config.json +13 -0
package/templates/schedules/daily-workflow-health-audit/content.md +189 -0
package/templates/schedules/gtm-weekly-review/config.json +13 -0
package/templates/schedules/gtm-weekly-review/content.md +58 -0
package/templates/schedules/weekly-dependabot-triage/config.json +13 -0
package/templates/schedules/weekly-dependabot-triage/content.md +45 -0
package/templates/schema.ts +26 -0
package/templates/skills/agentmail-sending/config.json +13 -0
package/templates/skills/agentmail-sending/content.md +48 -0
package/templates/skills/artifacts/config.json +13 -0
package/templates/skills/artifacts/content.md +87 -0
package/templates/skills/browser-use-cloud/config.json +13 -0
package/templates/skills/browser-use-cloud/content.md +155 -0
package/templates/skills/desloppify/config.json +13 -0
package/templates/skills/desloppify/content.md +201 -0
package/templates/skills/exa-search/config.json +13 -0
package/templates/skills/exa-search/content.md +106 -0
package/templates/skills/jira-interaction/config.json +13 -0
package/templates/skills/jira-interaction/content.md +252 -0
package/templates/skills/kapso-whatsapp/config.json +13 -0
package/templates/skills/kapso-whatsapp/content.md +369 -0
package/templates/skills/kv-storage/config.json +13 -0
package/templates/skills/kv-storage/content.md +111 -0
package/templates/skills/linear-interaction/config.json +20 -0
package/templates/skills/linear-interaction/content.md +230 -0
package/templates/skills/pages/config.json +18 -0
package/templates/skills/pages/content.md +85 -0
package/templates/skills/profile-corruption-escalation/config.json +13 -0
package/templates/skills/profile-corruption-escalation/content.md +105 -0
package/templates/skills/scheduled-task-resilience/config.json +13 -0
package/templates/skills/scheduled-task-resilience/content.md +95 -0
package/templates/skills/sprite-cli/config.json +13 -0
package/templates/skills/sprite-cli/content.md +133 -0
package/templates/skills/turso-interaction/config.json +13 -0
package/templates/skills/turso-interaction/content.md +192 -0
package/templates/skills/workflow-iterate/config.json +18 -0
package/templates/skills/workflow-iterate/content.md +399 -0
package/templates/skills/workflow-structured-output/config.json +13 -0
package/templates/skills/workflow-structured-output/content.md +101 -0
package/templates/skills/x-api-interactions/config.json +13 -0
package/templates/skills/x-api-interactions/content.md +109 -0
package/templates/workflows/autopilot/config.json +13 -0
package/templates/workflows/autopilot/content.md +58 -0
package/templates/workflows/linear-drain-loop/config.json +21 -0
package/templates/workflows/linear-drain-loop/content.md +72 -0
package/templates/workflows/ralph-loop/config.json +13 -0
package/templates/workflows/ralph-loop/content.md +75 -0

package/src/providers/pricing-sources.md CHANGED Viewed

@@ -5,8 +5,11 @@ Operators bumping a rate by hand should also update this file.
 ## Primary: vendored models.dev snapshot
-- **Path**: `ui/src/lib/modelsdev-cache.json`
-- **Loaded by**: `src/be/seed-pricing.ts` → `seedPricingFromModelsDev()`,
+- **Source-of-truth path**: `src/be/modelsdev-cache.json`
+- **UI compatibility path**: `ui/src/lib/modelsdev-cache.json` symlinks to the
+  backend snapshot so existing UI imports keep working.
+- **Loaded by**: `src/be/modelsdev-cache.ts` → `src/be/seed-pricing.ts` →
+  `seedPricingFromModelsDev()`,
   called from `src/server.ts` after `initDb`.
 - **Projection rules** (see the same module for code-level detail):
   - Anthropic models → rows under `provider='claude'` AND `provider='claude-managed'`.
@@ -23,7 +26,7 @@ Operators bumping a rate by hand should also update this file.
   - Run `bun run scripts/refresh-modelsdev-pricing.ts` (Phase 2 — adds the
     script). It fetches the latest snapshot from models.dev, diffs against
     the vendored copy, prints a summary, and writes the new file.
-  - Commit the regenerated `modelsdev-cache.json` together with a bump
+  - Commit the regenerated `src/be/modelsdev-cache.json` together with a bump
     note in the PR description.
 ## Manual overrides
@@ -47,6 +50,6 @@ no input/output pricing rows at the lookup time, the row is persisted with
 `costSource='unpriced'` (rather than 'harness'). The UI surfaces this as a
 yellow badge.
-To fix: either add the model to `modelsdev-cache.json` (preferred — the
+To fix: either add the model to `src/be/modelsdev-cache.json` (preferred — the
 upstream snapshot probably needs refreshing) or add a manual override row via
 the existing admin route `POST /api/pricing`.

package/src/providers/swarm-events-shared.ts CHANGED Viewed

@@ -119,6 +119,13 @@ export function createSwarmEventHandler(
         };
         const isCancelled = data.cancelled?.some((t) => t.id === taskId);
         if (isCancelled) {
+          // Log BEFORE aborting so the reason is visible in the worker
+          // transcript even when the abort propagates immediately. Without
+          // this, cancellations originating from this poll were invisible
+          // (only the runner-side poll in runner.ts logs them).
+          console.log(
+            `[swarm-events] aborting task ${taskId}: cancelled via /cancelled-tasks poll`,
+          );
           opts.abortRef.current?.abort();
           if (opts.onCancel) {
             try {
@@ -141,6 +148,13 @@ export function createSwarmEventHandler(
     void checkToolLoop(taskId, toolName, argRecord)
       .then((result) => {
         if (result.blocked) {
+          // Surface the loop-detector's reason. Without this log, the abort
+          // was indistinguishable from a /cancelled-tasks abort or a runner
+          // SIGTERM. `result.reason` already carries the diagnostic detail
+          // ("Tool X called 15 times…", "ping-pong between A and B…").
+          console.log(
+            `[swarm-events] aborting task ${taskId}: tool-loop detected — ${result.reason ?? "unknown reason"}`,
+          );
           opts.abortRef.current?.abort();
         }
       })

package/src/slack/HEURISTICS.md CHANGED Viewed

@@ -29,9 +29,13 @@ When someone @mentions the bot in a thread, the router checks whether a worker a
 When enabled, thread replies that do NOT @mention the bot are captured, buffered, and batched into a single follow-up task. This allows humans to give multi-message feedback in a thread without needing to @mention the bot each time.
+A thread counts as having swarm activity if **either**:
+- a Slack task is already linked to it via `slackChannelId` + `slackThreadTs` (someone started it by @mentioning the bot), **or**
+- the swarm itself posted the thread's **root message** — i.e. the swarm started the thread with a proactive/standalone message (a notification, status update, or an agent posting unprompted). In this case there is no task row yet, so without this the human's reply would otherwise require an @mention. The root author is resolved via a one-time `conversations.replies` lookup (cached per thread) that matches our bot specifically — by `user` for normal posts and by `bot_id` for persona-override (username/icon) posts — so threads started by *other* bots are not picked up. The lookup is skipped when a linked task already matches.
 ### How it works
-1. A human sends a non-@mention message in a thread where the swarm is already active (has existing tasks)
+1. A human sends a non-@mention message in a thread where the swarm is already active (see definition above)
 2. The message enters an in-memory buffer keyed by `channelId:threadTs`
 3. A debounce timer starts (default 10 seconds)
 4. Additional messages within the window are appended to the buffer, resetting the timer each time

package/src/slack/handlers.test.ts CHANGED Viewed

@@ -6,6 +6,7 @@ import {
   checkUserAccess,
   formatFileSize,
   isBotMessage,
+  isSwarmThreadRoot,
   type UserFilterConfig,
 } from "./handlers";
@@ -359,3 +360,37 @@ describe("isBotMessage", () => {
     });
   });
 });
+describe("isSwarmThreadRoot", () => {
+  test("matches our bot by user ID (non-persona post)", () => {
+    expect(isSwarmThreadRoot({ user: "UBOT123" }, "UBOT123", "B_SWARM")).toBe(true);
+  });
+  test("matches our bot by bot_id (persona post omits user)", () => {
+    // Posts with username/icon_emoji override carry bot_id but no user field.
+    expect(isSwarmThreadRoot({ bot_id: "B_SWARM" }, "UBOT123", "B_SWARM")).toBe(true);
+  });
+  test("does NOT match a different bot in the workspace", () => {
+    // A thread started by some OTHER bot must not be treated as swarm-started.
+    expect(isSwarmThreadRoot({ bot_id: "B_OTHER", user: "UOTHER" }, "UBOT123", "B_SWARM")).toBe(
+      false,
+    );
+  });
+  test("does NOT match a human-started thread", () => {
+    expect(isSwarmThreadRoot({ user: "UHUMAN456" }, "UBOT123", "B_SWARM")).toBe(false);
+  });
+  test("returns false for an undefined root message", () => {
+    expect(isSwarmThreadRoot(undefined, "UBOT123", "B_SWARM")).toBe(false);
+  });
+  test("returns false when our bot identity is unknown", () => {
+    expect(isSwarmThreadRoot({ bot_id: "B_SWARM", user: "UBOT123" }, null, null)).toBe(false);
+  });
+  test("falls back to bot_id when bot user ID is unknown but bot_id is", () => {
+    expect(isSwarmThreadRoot({ bot_id: "B_SWARM" }, null, "B_SWARM")).toBe(true);
+  });
+});

package/src/slack/handlers.ts CHANGED Viewed

@@ -178,6 +178,75 @@ interface ThreadMessage {
 // Cache for bot's own user ID (avoids redundant auth.test calls)
 let cachedBotUserId: string | null = null;
+// Cache for bot's own bot_id (auth.test). Persona-override messages
+// (username/icon_emoji) carry `bot_id` but not `user`, so this is needed to
+// recognize swarm-authored messages that the `cachedBotUserId` check would miss.
+let cachedBotId: string | null = null;
+// Cache: `${channelId}:${threadTs}` → whether our swarm bot authored the thread
+// root. A thread's root author never changes, so caching is permanently correct.
+// Bounded to avoid unbounded growth in long-running processes.
+const swarmThreadRootCache = new Map<string, boolean>();
+const SWARM_THREAD_ROOT_CACHE_MAX = 1000;
+/**
+ * Pure check: does the given thread-root message belong to our own swarm bot?
+ * Exported for testing.
+ *
+ * Matches OUR bot specifically (not any bot in the workspace):
+ * - non-persona posts carry `user === <our bot user id>`
+ * - persona posts (username/icon_emoji override) carry `bot_id === <our bot id>`
+ *   but typically omit `user`
+ */
+export function isSwarmThreadRoot(
+  root: { bot_id?: string; user?: string } | undefined,
+  botUserId: string | null,
+  botId: string | null,
+): boolean {
+  if (!root) return false;
+  if (botUserId && root.user === botUserId) return true;
+  if (botId && root.bot_id === botId) return true;
+  return false;
+}
+/**
+ * Returns true if the root message of the given thread was posted by our own
+ * swarm bot (a proactive/standalone message the swarm started). Used to treat
+ * human replies to swarm-initiated threads as follow-ups that don't require an
+ * @mention. Result is cached per thread.
+ */
+async function wasThreadStartedBySwarm(
+  client: WebClient,
+  channelId: string,
+  threadTs: string,
+): Promise<boolean> {
+  const key = `${channelId}:${threadTs}`;
+  const cached = swarmThreadRootCache.get(key);
+  if (cached !== undefined) return cached;
+  let startedBySwarm = false;
+  try {
+    const resp = await client.conversations.replies({
+      channel: channelId,
+      ts: threadTs,
+      limit: 1,
+      inclusive: true,
+    });
+    const root = resp.messages?.[0] as { bot_id?: string; user?: string } | undefined;
+    startedBySwarm = isSwarmThreadRoot(root, cachedBotUserId, cachedBotId);
+  } catch (error) {
+    console.error("[Slack] Failed to check whether thread was started by swarm:", error);
+  }
+  // Evict oldest entry (insertion-ordered Map) once the cap is reached.
+  if (swarmThreadRootCache.size >= SWARM_THREAD_ROOT_CACHE_MAX) {
+    const oldest = swarmThreadRootCache.keys().next().value;
+    if (oldest !== undefined) swarmThreadRootCache.delete(oldest);
+  }
+  swarmThreadRootCache.set(key, startedBySwarm);
+  return startedBySwarm;
+}
 // Cache for user display names
 const userNameCache = new Map<string, string>();
@@ -346,6 +415,7 @@ export function registerMessageHandler(app: App): void {
       try {
         const authResult = await client.auth.test();
         cachedBotUserId = authResult.user_id as string;
+        cachedBotId = (authResult.bot_id as string | undefined) ?? null;
       } catch (error) {
         console.error("[Slack] Failed to cache bot user ID:", error);
       }
@@ -458,8 +528,15 @@ export function registerMessageHandler(app: App): void {
         );
         return;
       }
-      // Check if this thread has any swarm activity (existing tasks)
-      const hasSwarmActivity = getAgentWorkingOnThread(msg.channel, msg.thread_ts) !== null;
+      // Treat the thread as having swarm activity if either:
+      //  - a Slack task is already linked to it (someone started it via @mention), or
+      //  - the swarm itself posted the thread's root message (a proactive/standalone
+      //    message the swarm started). In the latter case there is no task row yet,
+      //    so the human's reply would otherwise require an @mention. The Slack lookup
+      //    is skipped when a task already matches.
+      const hasSwarmActivity =
+        getAgentWorkingOnThread(msg.channel, msg.thread_ts) !== null ||
+        (await wasThreadStartedBySwarm(client, msg.channel, msg.thread_ts));
       if (hasSwarmActivity) {
         const threadKey = `${msg.channel}:${msg.thread_ts}`;

package/src/tests/base-prompt.test.ts CHANGED Viewed

@@ -328,16 +328,17 @@ describe("getBasePrompt — truncation", () => {
   });
   test("total budget respected — tools truncated before claudeMd", async () => {
-    // Use soulMd to eat up most of the 150k total budget so that
-    // truncatable sections (claudeMd, toolsMd) must compete for the remainder.
+    // Use soulMd to eat up most of the 120k total budget (lowered from 150k
+    // in the Picateclas spawn-OOM fix, 2026-05-28) so that truncatable
+    // sections (claudeMd, toolsMd) must compete for the remainder.
     // soulMd is part of `prompt` which counts toward protectedLength.
     const baseResult = await getBasePrompt(minimalArgs);
     const staticLength = baseResult.length; // ~12-13k for static content
     // Leave exactly enough budget for claudeMd but not toolsMd.
-    // Total budget = 150k - protectedLength.
-    // We want: protectedLength ≈ 150k - 18k = 132k, so claudeMd (15k) fits but toolsMd doesn't.
-    const soulSize = 132_000 - staticLength;
+    // Total budget = 120k - protectedLength.
+    // We want: protectedLength ≈ 120k - 18k = 102k, so claudeMd (15k) fits but toolsMd doesn't.
+    const soulSize = 102_000 - staticLength;
     const result = await getBasePrompt({
       ...minimalArgs,
       soulMd: bigString(Math.max(0, soulSize)),
@@ -354,7 +355,29 @@ describe("getBasePrompt — truncation", () => {
     expect(hasToolsTruncation || !hasToolsHeader).toBe(true);
   });
-  test("repo context never truncated", async () => {
+  test("Picateclas spawn-OOM hardening — total prompt stays below MAX_ARG_STRLEN", async () => {
+    // Even at the worst-case where every truncatable section maxes out its
+    // budget and the repo CLAUDE.md is huge, the final prompt must stay
+    // safely below Linux's `MAX_ARG_STRLEN = 131,072` bytes (the per-argv-
+    // element kernel limit that bit Picateclas attempts 4-6, 2026-05-28).
+    const result = await getBasePrompt({
+      ...minimalArgs,
+      soulMd: bigString(40_000),
+      claudeMd: bigString(40_000),
+      toolsMd: bigString(40_000),
+      repoContext: {
+        claudeMd: bigString(60_000),
+        clonePath: "/workspace/repos/big-repo",
+      },
+    });
+    expect(result.length).toBeLessThan(131_072);
+  });
+  test("repo CLAUDE.md is capped at REPO_CLAUDE_MD_MAX_CHARS (12 KB) with on-disk pointer", async () => {
+    // Picateclas spawn-OOM permanent fix (2026-05-28): repo CLAUDE.md was the
+    // single biggest volatile component of the bootstrap argv. It is now
+    // truncated to ~12 KB with a footer pointing at the on-disk file, mirroring
+    // the same shape as the agent claudeMd / toolsMd caps.
     const hugeRepoClaudeMd = bigString(30_000);
     const result = await getBasePrompt({
       ...minimalArgs,
@@ -363,8 +386,23 @@ describe("getBasePrompt — truncation", () => {
         clonePath: "/workspace/big-repo",
       },
     });
-    // The full repo content should be present (never truncated)
-    expect(result).toContain(hugeRepoClaudeMd);
+    // The full 30 KB content should NOT survive — capped at ~12 KB.
+    expect(result).not.toContain(hugeRepoClaudeMd);
+    // The truncation footer points at the on-disk path so readers can find
+    // the full content.
+    expect(result).toContain("[...truncated — see /workspace/big-repo/CLAUDE.md");
+  });
+  test("repo CLAUDE.md under the cap is preserved verbatim", async () => {
+    const smallRepoClaudeMd = bigString(5_000);
+    const result = await getBasePrompt({
+      ...minimalArgs,
+      repoContext: {
+        claudeMd: smallRepoClaudeMd,
+        clonePath: "/workspace/small-repo",
+      },
+    });
+    expect(result).toContain(smallRepoClaudeMd);
     expect(result).not.toContain("[...truncated");
   });
 });

package/src/tests/claude-managed-adapter.test.ts CHANGED Viewed

@@ -40,14 +40,14 @@ describe("ClaudeManagedAdapter (Phase 1 skeleton)", () => {
     }
   });
-  test("factory returns ClaudeManagedAdapter for 'claude-managed'", () => {
-    const adapter = createProviderAdapter("claude-managed");
+  test("factory returns ClaudeManagedAdapter for 'claude-managed'", async () => {
+    const adapter = await createProviderAdapter("claude-managed");
     expect(adapter).toBeInstanceOf(ClaudeManagedAdapter);
     expect(adapter.name).toBe("claude-managed");
   });
-  test("factory still rejects unknown providers and lists claude-managed", () => {
-    expect(() => createProviderAdapter("nope")).toThrow(
+  test("factory still rejects unknown providers and lists claude-managed", async () => {
+    expect(createProviderAdapter("nope")).rejects.toThrow(
       'Unknown HARNESS_PROVIDER: "nope". Supported: claude, pi, codex, devin, claude-managed',
     );
   });

package/src/tests/codex-adapter-otel.test.ts CHANGED Viewed

@@ -85,7 +85,7 @@ describe("CodexAdapter spawn env — harness OTEL gate", () => {
   });
   test("gate on (SWARM_ENABLE_HARNESS_OTEL) → spawn env carries TRACEPARENT", async () => {
-    const adapter = new CodexAdapter();
+    const adapter = new CodexAdapter({ bypassSubprocess: true });
     await adapter.createSession(testConfig({ env: { SWARM_ENABLE_HARNESS_OTEL: "1" } }));
     expect(capturedEnv).toBeDefined();
@@ -93,14 +93,14 @@ describe("CodexAdapter spawn env — harness OTEL gate", () => {
   });
   test("gate on via deprecated SWARM_ENABLE_CLAUDE_CODE_OTEL alias → TRACEPARENT injected", async () => {
-    const adapter = new CodexAdapter();
+    const adapter = new CodexAdapter({ bypassSubprocess: true });
     await adapter.createSession(testConfig({ env: { SWARM_ENABLE_CLAUDE_CODE_OTEL: "1" } }));
     expect(capturedEnv?.TRACEPARENT).toBe(`00-${TRACE_ID}-${SPAN_ID}-01`);
   });
   test("gate off → no TRACEPARENT, existing env wiring intact", async () => {
-    const adapter = new CodexAdapter();
+    const adapter = new CodexAdapter({ bypassSubprocess: true });
     await adapter.createSession(testConfig({ env: {} }));
     expect(capturedEnv).toBeDefined();
@@ -112,7 +112,7 @@ describe("CodexAdapter spawn env — harness OTEL gate", () => {
   test("gate on but unsampled active span → no TRACEPARENT", async () => {
     getActiveSpanSpy.mockReturnValue(makeSpan({ sampled: false }));
-    const adapter = new CodexAdapter();
+    const adapter = new CodexAdapter({ bypassSubprocess: true });
     await adapter.createSession(testConfig({ env: { SWARM_ENABLE_HARNESS_OTEL: "1" } }));
     expect(capturedEnv?.TRACEPARENT).toBeUndefined();

package/src/tests/codex-adapter.test.ts CHANGED Viewed

@@ -96,7 +96,7 @@ async function runSessionWithThrowingThread(
     };
   try {
-    const adapter = new CodexAdapter();
+    const adapter = new CodexAdapter({ bypassSubprocess: true });
     const session = await adapter.createSession(config);
     const emitted: ProviderEvent[] = [];
     session.onEvent((e) => emitted.push(e));
@@ -175,7 +175,7 @@ async function runSessionWithFakeThread(
   };
   try {
-    const adapter = new CodexAdapter();
+    const adapter = new CodexAdapter({ bypassSubprocess: true });
     const session = await adapter.createSession(config);
     const emitted: ProviderEvent[] = [];
@@ -575,7 +575,7 @@ describe("CodexSession event mapping", () => {
     };
     try {
-      const adapter = new CodexAdapter();
+      const adapter = new CodexAdapter({ bypassSubprocess: true });
       const config = testConfig({
         logFile: join(tmpLogDir, "abort.log"),
         cwd: "",
@@ -612,7 +612,7 @@ describe("CodexSession event mapping", () => {
 describe("CodexAdapter.canResume", () => {
   test("returns false for empty / non-string session ids", async () => {
-    const adapter = new CodexAdapter();
+    const adapter = new CodexAdapter({ bypassSubprocess: true });
     expect(await adapter.canResume("")).toBe(false);
     // @ts-expect-error: deliberate runtime check for non-string input
     expect(await adapter.canResume(undefined)).toBe(false);
@@ -631,7 +631,7 @@ describe("CodexAdapter.canResume", () => {
       ).resumeThread = function resumeThread(): unknown {
         return { id: "thread-resumed" };
       };
-      const adapter = new CodexAdapter();
+      const adapter = new CodexAdapter({ bypassSubprocess: true });
       expect(await adapter.canResume("thread-resumed")).toBe(true);
       // Failure path
@@ -789,6 +789,10 @@ describe("resolveCodexModel", () => {
     expect(resolveCodexModel("gpt-5.4-mini")).toBe("gpt-5.4-mini");
   });
+  test("passthrough 'gpt-5.5' → gpt-5.5", () => {
+    expect(resolveCodexModel("gpt-5.5")).toBe("gpt-5.5");
+  });
   test("passthrough 'gpt-5.3-codex' → gpt-5.3-codex", () => {
     expect(resolveCodexModel("gpt-5.3-codex")).toBe("gpt-5.3-codex");
   });
@@ -816,6 +820,10 @@ describe("getCodexContextWindow", () => {
     expect(getCodexContextWindow("gpt-5.4-mini")).toBe(200_000);
   });
+  test("gpt-5.5 → 1_050_000", () => {
+    expect(getCodexContextWindow("gpt-5.5")).toBe(1_050_000);
+  });
   test("gpt-5.3-codex → 1_000_000 (1M context)", () => {
     expect(getCodexContextWindow("gpt-5.3-codex")).toBe(1_000_000);
   });
@@ -833,6 +841,11 @@ describe("computeCodexCostUsd", () => {
     expect(cost).toBeCloseTo(17.5, 4);
   });
+  test("gpt-5.5 with 1M uncached input + 1M output = $5 + $30 = $35", () => {
+    const cost = computeCodexCostUsd("gpt-5.5", 1_000_000, 0, 1_000_000);
+    expect(cost).toBeCloseTo(35, 4);
+  });
   test("gpt-5.4 with cached input applies the cached discount", () => {
     // 1M input, 800k cached → 200k uncached.
     // 200_000 × $2.50/M = $0.50
@@ -1136,7 +1149,7 @@ async function runSessionWithFakeThreadAndDeps(
     };
   try {
-    const adapter = new CodexAdapter({ summarizeDeps });
+    const adapter = new CodexAdapter({ summarizeDeps, bypassSubprocess: true });
     const session = await adapter.createSession(config);
     const emitted: ProviderEvent[] = [];
     session.onEvent((e) => emitted.push(e));
@@ -1626,7 +1639,7 @@ describe("CodexSession — rate-limit error preservation", () => {
     };
     try {
-      const adapter = new CodexAdapter();
+      const adapter = new CodexAdapter({ bypassSubprocess: true });
       const config = testConfig({
         logFile: join(tmpLogDir, "abort-guard.log"),
         cwd: "",

package/src/tests/codex-swarm-events.test.ts CHANGED Viewed

@@ -97,6 +97,41 @@ describe("createCodexSwarmEventHandler", () => {
       expect(controller.signal.aborted).toBe(true);
     });
+    test("logs the abort reason when /cancelled-tasks reports the task", async () => {
+      installFetchStub((url) => {
+        if (url.includes("/cancelled-tasks")) {
+          return new Response(
+            JSON.stringify({ cancelled: [{ id: "task-1", failureReason: "user request" }] }),
+            { status: 200 },
+          );
+        }
+        return new Response("{}", { status: 200 });
+      });
+      const logs: string[] = [];
+      const origLog = console.log;
+      console.log = (...args: unknown[]) => logs.push(args.map(String).join(" "));
+      try {
+        const controller = new AbortController();
+        const handler = createCodexSwarmEventHandler(
+          buildOpts({ abortRef: { current: controller }, taskId: "task-1" }),
+        );
+        handler({
+          type: "tool_start",
+          toolCallId: "call-1",
+          toolName: "bash",
+          args: { command: "sleep 9999" },
+        });
+        await new Promise((resolve) => setTimeout(resolve, 30));
+      } finally {
+        console.log = origLog;
+      }
+      // The log MUST include the literal taskId (not the `${taskId}` template).
+      const abortLog = logs.find((l) =>
+        l.includes("aborting task task-1: cancelled via /cancelled-tasks poll"),
+      );
+      expect(abortLog).toBeDefined();
+    });
     test("throttles the cancellation check across rapid tool_start events", async () => {
       const { calls } = installFetchStub(
         () => new Response(JSON.stringify({ cancelled: [] }), { status: 200 }),

package/src/tests/context-window.test.ts CHANGED Viewed

@@ -9,6 +9,7 @@ import {
 describe("getContextWindowSize", () => {
   test("returns 1M for opus models", () => {
+    expect(getContextWindowSize("claude-opus-4-8")).toBe(1_000_000);
     expect(getContextWindowSize("claude-opus-4-7")).toBe(1_000_000);
     expect(getContextWindowSize("claude-opus-4-6")).toBe(1_000_000);
     expect(getContextWindowSize("opus")).toBe(1_000_000);

package/src/tests/credential-check.test.ts CHANGED Viewed

@@ -315,48 +315,64 @@ describe("checkOpencodeCredentials", () => {
 describe("checkProviderCredentials dispatcher", () => {
   const HOME = "/home/worker";
-  test("dispatches to the right adapter for every supported provider", () => {
-    expect(checkProviderCredentials("claude", { CLAUDE_CODE_OAUTH_TOKEN: "x" }).ready).toBe(true);
-    expect(checkProviderCredentials("claude", {}).ready).toBe(false);
+  test("dispatches to the right adapter for every supported provider", async () => {
+    expect((await checkProviderCredentials("claude", { CLAUDE_CODE_OAUTH_TOKEN: "x" })).ready).toBe(
+      true,
+    );
+    expect((await checkProviderCredentials("claude", {})).ready).toBe(false);
     expect(
-      checkProviderCredentials(
-        "claude-managed",
-        {
-          ANTHROPIC_API_KEY: "x",
-          MANAGED_AGENT_ID: "a",
-          MANAGED_ENVIRONMENT_ID: "e",
-          MCP_BASE_URL: "https://x",
-        },
-        { homeDir: HOME, fs: noFiles },
+      (
+        await checkProviderCredentials(
+          "claude-managed",
+          {
+            ANTHROPIC_API_KEY: "x",
+            MANAGED_AGENT_ID: "a",
+            MANAGED_ENVIRONMENT_ID: "e",
+            MCP_BASE_URL: "https://x",
+          },
+          { homeDir: HOME, fs: noFiles },
+        )
       ).ready,
     ).toBe(true);
-    expect(checkProviderCredentials("devin", { DEVIN_API_KEY: "x", DEVIN_ORG_ID: "y" }).ready).toBe(
-      true,
-    );
+    expect(
+      (await checkProviderCredentials("devin", { DEVIN_API_KEY: "x", DEVIN_ORG_ID: "y" })).ready,
+    ).toBe(true);
     expect(
-      checkProviderCredentials("codex", { OPENAI_API_KEY: "x" }, { homeDir: HOME, fs: noFiles })
-        .ready,
+      (
+        await checkProviderCredentials(
+          "codex",
+          { OPENAI_API_KEY: "x" },
+          { homeDir: HOME, fs: noFiles },
+        )
+      ).ready,
     ).toBe(true);
     expect(
-      checkProviderCredentials("pi", { ANTHROPIC_API_KEY: "x" }, { homeDir: HOME, fs: noFiles })
-        .ready,
+      (
+        await checkProviderCredentials(
+          "pi",
+          { ANTHROPIC_API_KEY: "x" },
+          { homeDir: HOME, fs: noFiles },
+        )
+      ).ready,
     ).toBe(true);
     expect(
-      checkProviderCredentials(
-        "opencode",
-        { OPENROUTER_API_KEY: "x" },
-        { homeDir: HOME, fs: noFiles },
+      (
+        await checkProviderCredentials(
+          "opencode",
+          { OPENROUTER_API_KEY: "x" },
+          { homeDir: HOME, fs: noFiles },
+        )
       ).ready,
     ).toBe(true);
   });
-  test("throws on unknown provider", () => {
-    expect(() => checkProviderCredentials("nope", {})).toThrow(/unknown provider/i);
+  test("throws on unknown provider", async () => {
+    expect(checkProviderCredentials("nope", {})).rejects.toThrow(/unknown provider/i);
   });
 });
@@ -366,16 +382,16 @@ describe("snapshot: every provider", () => {
   const HOME = "/home/worker";
   const providers = ["claude", "claude-managed", "codex", "devin", "opencode", "pi"] as const;
-  test("fully unset env → ready=false with non-empty missing[] and hint", () => {
+  test("fully unset env → ready=false with non-empty missing[] and hint", async () => {
     for (const p of providers) {
-      const status = checkProviderCredentials(p, {}, { homeDir: HOME, fs: noFiles });
+      const status = await checkProviderCredentials(p, {}, { homeDir: HOME, fs: noFiles });
       expect(status.ready).toBe(false);
       expect(status.missing.length).toBeGreaterThan(0);
       expect(status.hint).toBeTruthy();
     }
   });
-  test("minimum sufficient env → ready=true", () => {
+  test("minimum sufficient env → ready=true", async () => {
     const minimums: Record<string, Record<string, string>> = {
       claude: { CLAUDE_CODE_OAUTH_TOKEN: "x" },
       "claude-managed": {
@@ -390,7 +406,10 @@ describe("snapshot: every provider", () => {
       pi: { ANTHROPIC_API_KEY: "x" },
     };
     for (const p of providers) {
-      const status = checkProviderCredentials(p, minimums[p]!, { homeDir: HOME, fs: noFiles });
+      const status = await checkProviderCredentials(p, minimums[p]!, {
+        homeDir: HOME,
+        fs: noFiles,
+      });
       expect(status.ready).toBe(true);
     }
   });