npm - @desplega.ai/agent-swarm - Versions diffs - 1.77.2 → 1.78.0 - Mend

@desplega.ai/agent-swarm 1.77.2 → 1.78.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/openapi.json +1 -1
package/package.json +1 -1
package/src/be/db.ts +14 -4
package/src/commands/runner.ts +8 -2
package/src/providers/codex-adapter.ts +78 -21
package/src/providers/pi-mono-adapter.ts +77 -20
package/src/server.ts +6 -5
package/src/tests/codex-adapter.test.ts +82 -0
package/src/tests/credential-check.test.ts +23 -2
package/src/tests/pi-mono-adapter.test.ts +63 -1
package/src/tests/workflow-executors.test.ts +15 -6
package/src/tests/workflow-wait-event.test.ts +5 -2
package/src/workflows/engine.ts +5 -0
package/src/workflows/executors/script.ts +13 -3
package/src/x402/README.md +67 -0
package/src/x402/index.ts +7 -1

package/openapi.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "openapi": "3.1.0",
   "info": {
     "title": "Agent Swarm API",
-    "version": "1.77.2",
+    "version": "1.78.0",
     "description": "Multi-agent orchestration API for Claude Code, Codex, and Gemini CLI. Enables task distribution, agent communication, and service discovery.\n\nMCP tools are documented separately in [MCP.md](./MCP.md)."
   },
   "servers": [

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@desplega.ai/agent-swarm",
-  "version": "1.77.2",
+  "version": "1.78.0",
   "description": "Multi-agent orchestration for Claude Code, Codex, Gemini CLI, and other AI coding assistants",
   "license": "MIT",
   "author": "desplega.sh <contact@desplega.sh>",

package/src/be/db.ts CHANGED Viewed

@@ -1411,9 +1411,14 @@ export function getAllTasks(filters?: TaskFilters): AgentTask[] {
     params.push(filters.createdAfter);
   }
-  // Exclude heartbeat tasks by default
+  // Exclude system/heartbeat tasks by default. The flag is still called
+  // `includeHeartbeat` for backward compat with existing API callers, but we
+  // also gate boot-triage + heartbeat-checklist behind it since those are
+  // equally noisy in the dashboard task list.
   if (!filters?.includeHeartbeat) {
-    conditions.push("(IFNULL(taskType, '') != 'heartbeat' AND tags NOT LIKE '%\"heartbeat\"%')");
+    conditions.push(
+      "(IFNULL(taskType, '') NOT IN ('heartbeat', 'heartbeat-checklist', 'boot-triage') AND tags NOT LIKE '%\"heartbeat\"%')",
+    );
   }
   const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
@@ -1509,9 +1514,14 @@ export function getTasksCount(filters?: Omit<TaskFilters, "limit" | "readyOnly">
     params.push(filters.createdAfter);
   }
-  // Exclude heartbeat tasks by default
+  // Exclude system/heartbeat tasks by default. The flag is still called
+  // `includeHeartbeat` for backward compat with existing API callers, but we
+  // also gate boot-triage + heartbeat-checklist behind it since those are
+  // equally noisy in the dashboard task list.
   if (!filters?.includeHeartbeat) {
-    conditions.push("(IFNULL(taskType, '') != 'heartbeat' AND tags NOT LIKE '%\"heartbeat\"%')");
+    conditions.push(
+      "(IFNULL(taskType, '') NOT IN ('heartbeat', 'heartbeat-checklist', 'boot-triage') AND tags NOT LIKE '%\"heartbeat\"%')",
+    );
   }
   const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";

package/src/commands/runner.ts CHANGED Viewed

@@ -2257,8 +2257,14 @@ async function checkCompletedProcesses(
         failureReason = result.failureReason;
         console.log(`[${role}] Detected error for task ${taskId.slice(0, 8)}: ${failureReason}`);
-        // If rate-limited and we know which key was used, report it
-        if (credentialInfo && /rate.?limit|hit your limit/i.test(failureReason)) {
+        // If rate-limited and we know which key was used, report it.
+        // Codex adapter prefixes failure reasons with `[rate-limit]` /
+        // `[usage-limit]` (see codex-adapter.formatTerminalError); Claude
+        // surfaces "rate limit" / "hit your limit" via SessionErrorTracker.
+        if (
+          credentialInfo &&
+          /rate.?limit|hit your limit|usage[ _-]?limit|too many requests/i.test(failureReason)
+        ) {
           // Try to extract reset time from the error message (e.g. "resets 3pm (UTC)")
           const parsedResetTime = parseRateLimitResetTime(failureReason);
           const defaultCooldownMs = 5 * 60 * 1000;

package/src/providers/codex-adapter.ts CHANGED Viewed

@@ -748,7 +748,10 @@ class CodexSession implements ProviderSession {
           }
           case "error": {
             const errItem = item as ErrorItem;
-            this.emit({ type: "error", message: this.formatTerminalError(errItem.message) });
+            this.emit({
+              type: "error",
+              message: this.formatTerminalError(errItem.message).message,
+            });
             break;
           }
         }
@@ -792,12 +795,12 @@ class CodexSession implements ProviderSession {
         break;
       }
       case "turn.failed": {
-        const message = this.formatTerminalError(event.error.message);
+        const { message } = this.formatTerminalError(event.error.message);
         this.emit({ type: "error", message });
         break;
       }
       case "error": {
-        const message = this.formatTerminalError(event.message);
+        const { message } = this.formatTerminalError(event.message);
         this.emit({ type: "error", message });
         break;
       }
@@ -805,22 +808,27 @@ class CodexSession implements ProviderSession {
   }
   /**
-   * Detect context-window-exceeded errors from the Codex CLI / SDK and rewrite
-   * them with a clearer, actionable message. Codex does not auto-compact like
-   * Claude does — when context fills, the next model call hard-fails. We can't
-   * compact retroactively, so we just mark the failure with a recognizable
-   * `[context-overflow]` prefix that the runner can flag in dashboards. See
-   * Linear DES-143 (codex auto-compaction follow-up) for the long-term fix.
+   * Categorize a terminal error from the Codex SDK and rewrite with a clearer
+   * prefix that the runner / dashboard can key on. The Codex app-server emits a
+   * structured `codexErrorInfo` discriminator
+   * (https://developers.openai.com/codex/app-server#errors) with values like
+   * `ContextWindowExceeded`, `UsageLimitExceeded`, `Unauthorized`, etc. — but
+   * `@openai/codex-sdk`'s `ThreadError` only surfaces the flat `message`
+   * string, so we still detect by pattern. Patterns below match the canonical
+   * `codexErrorInfo` name (which sometimes appears literally in the message)
+   * AND the human-readable text Codex puts in `error.message`.
    *
-   * Patterns observed in the wild (case-insensitive):
-   *   - "context length exceeded"
-   *   - "maximum context length"
-   *   - "too many tokens"
-   *   - "input too long"
-   *   - "request too large"
+   * Categories returned are consumed two ways:
+   *   1. `errorCategory` on the `result` event (dashboard surfacing).
+   *   2. The bracketed prefix in `failureReason` (`[usage-limit]` etc.) is
+   *      what runner.ts pattern-matches to flag the credential as
+   *      rate-limited in the rotation pool.
    */
-  private formatTerminalError(raw: string): string {
+  private formatTerminalError(raw: string): { message: string; category?: string } {
     const normalized = raw.toLowerCase();
+    // Context window exceeded — Codex has no auto-compact like Claude.
+    // See Linear DES-143 for the long-term fix.
     const overflowPatterns = [
       "context length exceeded",
       "maximum context length",
@@ -828,11 +836,60 @@ class CodexSession implements ProviderSession {
       "input too long",
       "request too large",
       "context_length_exceeded",
+      "contextwindowexceeded",
     ];
     if (overflowPatterns.some((p) => normalized.includes(p))) {
-      return `[context-overflow] Codex turn exceeded the model's context window for ${this.resolvedModel} (${this.contextWindow.toLocaleString()} tokens). Codex does not auto-compact conversation history like Claude does — start a fresh task or split the work into smaller turns. Original error: ${raw}`;
+      return {
+        message: `[context-overflow] Codex turn exceeded the model's context window for ${this.resolvedModel} (${this.contextWindow.toLocaleString()} tokens). Codex does not auto-compact conversation history like Claude does — start a fresh task or split the work into smaller turns. Original error: ${raw}`,
+        category: "context_overflow",
+      };
+    }
+    // Pro / business quota exhausted — codexErrorInfo: "UsageLimitExceeded".
+    // Message text typically reads "You've hit your usage limit. Upgrade to Pro …".
+    const usageLimitPatterns = ["usage limit", "upgrade to pro", "usagelimitexceeded"];
+    if (usageLimitPatterns.some((p) => normalized.includes(p))) {
+      return {
+        message: `[usage-limit] Codex account quota exhausted — upgrade plan or wait for monthly reset. Original error: ${raw}`,
+        category: "usage_limit",
+      };
+    }
+    // Per-minute / per-hour API rate limiting (HTTP 429).
+    const rateLimitPatterns = [
+      "rate limit",
+      "rate_limit",
+      "ratelimit",
+      "too many requests",
+      "http 429",
+      " 429 ",
+    ];
+    if (rateLimitPatterns.some((p) => normalized.includes(p))) {
+      return {
+        message: `[rate-limit] Codex API rate limit hit. Original error: ${raw}`,
+        category: "rate_limit",
+      };
+    }
+    // Bad / missing / invalid API key — codexErrorInfo: "Unauthorized".
+    const authPatterns = [
+      "unauthorized",
+      "http 401",
+      " 401 ",
+      "invalid api key",
+      "invalid_api_key",
+      "missing api key",
+      "no api key",
+      "authentication failed",
+    ];
+    if (authPatterns.some((p) => normalized.includes(p))) {
+      return {
+        message: `[auth-error] Codex authentication failed — check OPENAI_API_KEY or ChatGPT login. Original error: ${raw}`,
+        category: "authentication_failed",
+      };
     }
-    return raw;
+    return { message: raw };
   }
   private async runSession(): Promise<void> {
@@ -840,7 +897,7 @@ class CodexSession implements ProviderSession {
     // Expose the controller to the swarm event handler so it can trigger an
     // abort from outside this method (tool-loop detection, cancellation poll).
     this.abortRef.current = this.abortController;
-    let terminalError: string | undefined;
+    let terminalError: { message: string; category?: string } | undefined;
     let sawTurnCompleted = false;
     try {
@@ -897,14 +954,14 @@ class CodexSession implements ProviderSession {
         type: "result",
         cost,
         isError,
-        errorCategory: terminalError ? "turn_failed" : undefined,
+        errorCategory: terminalError ? (terminalError.category ?? "turn_failed") : undefined,
       });
       this.settle({
         exitCode: isError ? 1 : 0,
         sessionId: this._sessionId,
         cost,
         isError,
-        failureReason: terminalError,
+        failureReason: terminalError?.message,
       });
     } catch (err) {
       const message = err instanceof Error ? err.message : String(err);

package/src/providers/pi-mono-adapter.ts CHANGED Viewed

@@ -38,24 +38,31 @@ import type {
 } from "./types";
 /**
- * Map a `MODEL_OVERRIDE` string to the env var that satisfies it. Mirrors
- * `resolveModel` above (shortname → anthropic, `provider/model-id` → that
- * provider). Returns `null` when the override is empty (boot-loop should
- * treat it as the permissive case) or the provider can't be inferred.
+ * Map a `MODEL_OVERRIDE` string to the env var(s) that can satisfy it.
+ *
+ * Anthropic shortnames (`sonnet` / `haiku` / `opus`) accept EITHER
+ * `ANTHROPIC_API_KEY` (preferred — talks to Anthropic directly) OR
+ * `OPENROUTER_API_KEY` — in the latter case `resolveModel` swaps to the
+ * OpenRouter mirror of the same model so pi-ai's anthropic-provider env
+ * lookup (which only checks `ANTHROPIC_*`) doesn't fail with "No API key
+ * found for anthropic". Provider-prefixed model IDs only accept that one
+ * provider's key. Returns `null` for the permissive case (no MODEL_OVERRIDE
+ * or bare unprefixed model name).
  */
-function modelToCredKey(modelStr: string | undefined): string | null {
+function modelToCredKeys(modelStr: string | undefined): string[] | null {
   if (!modelStr) return null;
   const lower = modelStr.toLowerCase();
-  // Hard-coded shortnames map straight to anthropic.
+  // Hard-coded shortnames: anthropic-shape but pi-mono can route through
+  // OpenRouter (see `resolveModel`) when only an OR key is available.
   if (lower === "opus" || lower === "sonnet" || lower === "haiku") {
-    return "ANTHROPIC_API_KEY";
+    return ["ANTHROPIC_API_KEY", "OPENROUTER_API_KEY"];
   }
   if (modelStr.includes("/")) {
     const provider = modelStr.slice(0, modelStr.indexOf("/")).toLowerCase();
-    if (provider === "anthropic") return "ANTHROPIC_API_KEY";
-    if (provider === "openrouter") return "OPENROUTER_API_KEY";
-    if (provider === "openai") return "OPENAI_API_KEY";
-    if (provider === "google") return "GOOGLE_API_KEY";
+    if (provider === "anthropic") return ["ANTHROPIC_API_KEY"];
+    if (provider === "openrouter") return ["OPENROUTER_API_KEY"];
+    if (provider === "openai") return ["OPENAI_API_KEY"];
+    if (provider === "google") return ["GOOGLE_API_KEY"];
   }
   // Bare model name with no provider prefix — adapter falls through to a
   // best-effort resolution against multiple providers, so the boot loop
@@ -83,15 +90,16 @@ export function checkPiMonoCredentials(
     return { ready: true, missing: [], satisfiedBy: "file" };
   }
-  const requiredKey = modelToCredKey(env.MODEL_OVERRIDE);
-  if (requiredKey) {
-    if (env[requiredKey]) {
+  const requiredKeys = modelToCredKeys(env.MODEL_OVERRIDE);
+  if (requiredKeys) {
+    if (requiredKeys.some((k) => env[k])) {
       return { ready: true, missing: [], satisfiedBy: "env" };
     }
+    const keyList = requiredKeys.join(" / ");
     return {
       ready: false,
-      missing: [requiredKey, authFile],
-      hint: `MODEL_OVERRIDE=${env.MODEL_OVERRIDE} requires ${requiredKey}; or run \`pi auth login\` to create ${authFile}.`,
+      missing: [...requiredKeys, authFile],
+      hint: `MODEL_OVERRIDE=${env.MODEL_OVERRIDE} requires one of ${keyList}; or run \`pi auth login\` to create ${authFile}.`,
     };
   }
@@ -136,18 +144,67 @@ function mcpToolsToDefinitions(
   }));
 }
-/** Resolve a model string to a pi-ai Model object */
-function resolveModel(modelStr: string) {
+/**
+ * Anthropic-shortname → OpenRouter-mirror model IDs. Used by `resolveModel`
+ * when the worker only has `OPENROUTER_API_KEY` so pi-ai's anthropic
+ * provider env lookup (`ANTHROPIC_OAUTH_TOKEN` / `ANTHROPIC_API_KEY` only)
+ * doesn't fail with "No API key found for anthropic".
+ *
+ * The mirror IDs match pi-ai's generated OpenRouter model catalog
+ * (`anthropic/claude-{opus,sonnet,haiku}-*`).
+ */
+const ANTHROPIC_SHORTNAME_OPENROUTER_MIRROR: Record<string, string> = {
+  opus: "anthropic/claude-opus-4",
+  sonnet: "anthropic/claude-sonnet-4",
+  haiku: "anthropic/claude-haiku-4.5",
+};
+function envHasAnthropicCred(env: Record<string, string | undefined>): boolean {
+  return !!(env.ANTHROPIC_API_KEY || env.ANTHROPIC_OAUTH_TOKEN);
+}
+/**
+ * Resolve a model string to a pi-ai Model object.
+ *
+ * When `modelStr` is an anthropic shortname (`sonnet`/`haiku`/`opus`) AND
+ * the env only has `OPENROUTER_API_KEY` (no `ANTHROPIC_API_KEY` /
+ * `ANTHROPIC_OAUTH_TOKEN`), the shortname is rerouted through the
+ * OpenRouter mirror of the same model. This prevents pi-ai's
+ * anthropic-provider env lookup from failing at session-start with
+ * "No API key found for anthropic" — see task 37a4a87a and the chronic
+ * weekly-fire pattern (2026-04-13 → 2026-05-11) tracked in HEARTBEAT.md.
+ */
+export function resolveModel(
+  modelStr: string,
+  env: Record<string, string | undefined> = process.env,
+) {
   if (!modelStr) return undefined;
-  // Map common shortnames to provider/model pairs
+  const lower = modelStr.toLowerCase();
+  const isAnthropicShortname = lower === "opus" || lower === "sonnet" || lower === "haiku";
+  // Reroute anthropic shortnames through OpenRouter when no anthropic cred
+  // is available. The OpenRouter mirror IDs (`anthropic/claude-sonnet-4`,
+  // etc.) are present in pi-ai's model catalog.
+  if (isAnthropicShortname && !envHasAnthropicCred(env) && env.OPENROUTER_API_KEY) {
+    const orModelId = ANTHROPIC_SHORTNAME_OPENROUTER_MIRROR[lower];
+    if (orModelId) {
+      try {
+        return getModel("openrouter" as "anthropic", orModelId as never);
+      } catch {
+        // Fall through to native anthropic mapping below.
+      }
+    }
+  }
+  // Map common shortnames to provider/model pairs (native anthropic path).
   const shortnames: Record<string, [string, string]> = {
     opus: ["anthropic", "claude-opus-4-20250514"],
     sonnet: ["anthropic", "claude-sonnet-4-20250514"],
     haiku: ["anthropic", "claude-haiku-4-5-20251001"],
   };
-  const mapping = shortnames[modelStr.toLowerCase()];
+  const mapping = shortnames[lower];
   if (mapping) {
     try {
       return getModel(mapping[0] as "anthropic", mapping[1] as never);

package/src/server.ts CHANGED Viewed

@@ -120,8 +120,7 @@ import {
 // Capability-based feature flags
 // Default: all capabilities enabled
-const DEFAULT_CAPABILITIES =
-  "core,task-pool,messaging,profiles,services,scheduling,memory,workflows";
+const DEFAULT_CAPABILITIES = "core,task-pool,profiles,services,scheduling,memory,workflows";
 const CAPABILITIES = new Set(
   (process.env.CAPABILITIES || DEFAULT_CAPABILITIES).split(",").map((s) => s.trim()),
 );
@@ -204,13 +203,15 @@ export function createServer() {
     registerTaskActionTool(server);
   }
-  // Messaging capability - channel-based communication
+  // Core messaging tools - always registered (post/read are CORE_TOOLS)
+  registerPostMessageTool(server);
+  registerReadMessagesTool(server);
+  // Messaging capability - channel management (CRUD on channels)
   if (hasCapability("messaging")) {
     registerListChannelsTool(server);
     registerCreateChannelTool(server);
     registerDeleteChannelTool(server);
-    registerPostMessageTool(server);
-    registerReadMessagesTool(server);
   }
   // Profiles capability - agent profile management

package/src/tests/codex-adapter.test.ts CHANGED Viewed

@@ -389,6 +389,88 @@ describe("CodexSession event mapping", () => {
     expect(result.failureReason).toContain("[context-overflow]");
   });
+  test("turn.failed with usage-limit message rewrites + sets errorCategory=usage_limit", async () => {
+    // Codex Pro-quota exhausted: codexErrorInfo: "UsageLimitExceeded".
+    // Adapter must prefix `[usage-limit]` so runner.ts marks the credential
+    // as rate-limited in the rotation pool.
+    const events: ThreadEvent[] = [
+      { type: "thread.started", thread_id: "thread-usage" },
+      { type: "turn.started" },
+      {
+        type: "turn.failed",
+        error: {
+          message: "You've hit your usage limit. Upgrade to Pro (https://chatgpt.com/pricing).",
+        },
+      },
+    ];
+    const { emitted, result } = await runSessionWithFakeThread(
+      events,
+      testConfig({ logFile: join(tmpLogDir, "usage.log"), cwd: "" }),
+    );
+    const errorEvent = emitted.find((e) => e.type === "error");
+    expect(errorEvent?.type === "error" && errorEvent.message).toContain("[usage-limit]");
+    const resultEvent = emitted.findLast((e) => e.type === "result");
+    expect(resultEvent?.type === "result" && resultEvent.errorCategory).toBe("usage_limit");
+    expect(result.isError).toBe(true);
+    expect(result.failureReason).toContain("[usage-limit]");
+  });
+  test("turn.failed with rate-limit message rewrites + sets errorCategory=rate_limit", async () => {
+    const events: ThreadEvent[] = [
+      { type: "thread.started", thread_id: "thread-rate" },
+      { type: "turn.started" },
+      {
+        type: "turn.failed",
+        error: { message: "Request failed: 429 Too Many Requests — rate_limit_exceeded." },
+      },
+    ];
+    const { emitted, result } = await runSessionWithFakeThread(
+      events,
+      testConfig({ logFile: join(tmpLogDir, "rate.log"), cwd: "" }),
+    );
+    const errorEvent = emitted.find((e) => e.type === "error");
+    expect(errorEvent?.type === "error" && errorEvent.message).toContain("[rate-limit]");
+    const resultEvent = emitted.findLast((e) => e.type === "result");
+    expect(resultEvent?.type === "result" && resultEvent.errorCategory).toBe("rate_limit");
+    expect(result.isError).toBe(true);
+    expect(result.failureReason).toContain("[rate-limit]");
+  });
+  test("turn.failed with auth error rewrites + sets errorCategory=authentication_failed", async () => {
+    const events: ThreadEvent[] = [
+      { type: "thread.started", thread_id: "thread-auth" },
+      { type: "turn.started" },
+      {
+        type: "turn.failed",
+        error: { message: "Request failed: HTTP 401 Unauthorized — Invalid API key provided." },
+      },
+    ];
+    const { emitted, result } = await runSessionWithFakeThread(
+      events,
+      testConfig({ logFile: join(tmpLogDir, "auth.log"), cwd: "" }),
+    );
+    const errorEvent = emitted.find((e) => e.type === "error");
+    expect(errorEvent?.type === "error" && errorEvent.message).toContain("[auth-error]");
+    const resultEvent = emitted.findLast((e) => e.type === "result");
+    expect(resultEvent?.type === "result" && resultEvent.errorCategory).toBe(
+      "authentication_failed",
+    );
+    expect(result.isError).toBe(true);
+    expect(result.failureReason).toContain("[auth-error]");
+  });
   test("abort() resolves the session with cancelled result", async () => {
     // Patch startThread with a fake whose runStreamed yields a long stream
     // that respects the AbortSignal — yields one event, awaits, and only

package/src/tests/credential-check.test.ts CHANGED Viewed

@@ -192,7 +192,13 @@ describe("checkPiMonoCredentials", () => {
     ).toBe(false);
   });
-  test("strict: shortname `sonnet` resolves to anthropic", () => {
+  test("shortname `sonnet` accepts ANTHROPIC_API_KEY *or* OPENROUTER_API_KEY", () => {
+    // Anthropic-shortname models (sonnet/haiku/opus) prefer the native
+    // ANTHROPIC_* credential, but pi-mono-adapter reroutes through the
+    // OpenRouter mirror when only OPENROUTER_API_KEY is available — so the
+    // boot-time cred check must accept either key. See task 37a4a87a and
+    // the chronic pi-mono → "No API key found for anthropic" recurrence
+    // tracked in HEARTBEAT.md (2026-04-13 → 2026-05-11).
     const env = { MODEL_OVERRIDE: "sonnet" };
     expect(
       checkPiMonoCredentials({ ...env, ANTHROPIC_API_KEY: "x" }, { homeDir: HOME, fs: noFiles })
@@ -201,7 +207,22 @@ describe("checkPiMonoCredentials", () => {
     expect(
       checkPiMonoCredentials({ ...env, OPENROUTER_API_KEY: "x" }, { homeDir: HOME, fs: noFiles })
         .ready,
-    ).toBe(false);
+    ).toBe(true);
+    // Neither key set → still not ready, and missing includes both options.
+    const empty = checkPiMonoCredentials(env, { homeDir: HOME, fs: noFiles });
+    expect(empty.ready).toBe(false);
+    expect(empty.missing).toContain("ANTHROPIC_API_KEY");
+    expect(empty.missing).toContain("OPENROUTER_API_KEY");
+  });
+  test("haiku and opus shortnames also accept OPENROUTER_API_KEY", () => {
+    for (const model of ["haiku", "opus"]) {
+      const env = { MODEL_OVERRIDE: model };
+      expect(
+        checkPiMonoCredentials({ ...env, OPENROUTER_API_KEY: "x" }, { homeDir: HOME, fs: noFiles })
+          .ready,
+      ).toBe(true);
+    }
   });
 });

package/src/tests/pi-mono-adapter.test.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { afterAll, beforeAll, describe, expect, test } from "bun:test";
 import { existsSync, mkdirSync, readFileSync, rmSync, symlinkSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
-import { PiMonoAdapter } from "../providers/pi-mono-adapter";
+import { PiMonoAdapter, resolveModel } from "../providers/pi-mono-adapter";
 describe("PiMonoAdapter", () => {
   test("name is 'pi'", () => {
@@ -115,6 +115,68 @@ describe("Model name mapping", () => {
   });
 });
+describe("resolveModel — OpenRouter reroute for anthropic shortnames", () => {
+  // Regression coverage for task 37a4a87a: workers spawned with
+  // `provider: pi` + `OPENROUTER_API_KEY` (no ANTHROPIC_API_KEY) and a task
+  // model of `sonnet` / `haiku` / `opus` previously crashed at
+  // session-start with "No API key found for anthropic" because pi-ai's
+  // anthropic provider only checks ANTHROPIC_OAUTH_TOKEN / ANTHROPIC_API_KEY.
+  // The adapter now reroutes the shortname through the OpenRouter mirror.
+  test("sonnet → openrouter/anthropic/claude-sonnet-4 when only OPENROUTER_API_KEY is set", () => {
+    const env = { OPENROUTER_API_KEY: "sk-or-..." };
+    const model = resolveModel("sonnet", env);
+    expect(model).toBeDefined();
+    expect(model?.provider).toBe("openrouter");
+    expect(model?.id).toBe("anthropic/claude-sonnet-4");
+  });
+  test("haiku → openrouter/anthropic/claude-haiku-4.5 when only OPENROUTER_API_KEY is set", () => {
+    const env = { OPENROUTER_API_KEY: "sk-or-..." };
+    const model = resolveModel("haiku", env);
+    expect(model).toBeDefined();
+    expect(model?.provider).toBe("openrouter");
+    expect(model?.id).toBe("anthropic/claude-haiku-4.5");
+  });
+  test("opus → openrouter/anthropic/claude-opus-4 when only OPENROUTER_API_KEY is set", () => {
+    const env = { OPENROUTER_API_KEY: "sk-or-..." };
+    const model = resolveModel("opus", env);
+    expect(model).toBeDefined();
+    expect(model?.provider).toBe("openrouter");
+    expect(model?.id).toBe("anthropic/claude-opus-4");
+  });
+  test("anthropic native path wins when ANTHROPIC_API_KEY is set (even alongside OPENROUTER_API_KEY)", () => {
+    const env = { ANTHROPIC_API_KEY: "sk-ant-...", OPENROUTER_API_KEY: "sk-or-..." };
+    const model = resolveModel("sonnet", env);
+    expect(model).toBeDefined();
+    expect(model?.provider).toBe("anthropic");
+    expect(model?.id).toBe("claude-sonnet-4-20250514");
+  });
+  test("ANTHROPIC_OAUTH_TOKEN alone also wins over OPENROUTER reroute", () => {
+    const env = { ANTHROPIC_OAUTH_TOKEN: "sk-ant-oat-...", OPENROUTER_API_KEY: "sk-or-..." };
+    const model = resolveModel("sonnet", env);
+    expect(model).toBeDefined();
+    expect(model?.provider).toBe("anthropic");
+  });
+  test("no rerouting for non-shortname `anthropic/<model>` strings", () => {
+    // Explicit provider prefix should not be silently swapped — that path is
+    // the caller's explicit choice, surface as-is.
+    const env = { OPENROUTER_API_KEY: "sk-or-..." };
+    const model = resolveModel("anthropic/claude-sonnet-4-20250514", env);
+    expect(model?.provider).toBe("anthropic");
+  });
+  test("default env arg falls back to process.env (smoke test — no creds set)", () => {
+    // Just confirm the default parameter doesn't throw — the actual model
+    // resolution depends on the test runner's env.
+    expect(() => resolveModel("unknown-model-id")).not.toThrow();
+  });
+});
 describe("Pi-mono event normalization", () => {
   test("message_update with text content produces raw_log-style data", () => {
     // Simulates what PiMonoSession.handleAgentEvent does

package/src/tests/workflow-executors.test.ts CHANGED Viewed

@@ -475,22 +475,31 @@ describe("ScriptExecutor", () => {
     expect(result.nextPort).toBe("success");
   });
-  test("captures stderr on failure", async () => {
+  test("marks step failed and captures stderr on non-zero exit", async () => {
     const result = await executor.run(
       input({ runtime: "bash", script: "echo err >&2; exit 1" }, {}),
     );
-    expect(result.status).toBe("success"); // executor succeeds, script fails
+    expect(result.status).toBe("failed");
+    expect(result.error).toBe("err");
     const out = result.output as { exitCode: number; stdout: string; stderr: string };
     expect(out.exitCode).toBe(1);
     expect(out.stderr).toBe("err");
-    expect(result.nextPort).toBe("failure");
   });
-  test("returns failure port on non-zero exit code", async () => {
+  test("marks step failed on non-zero exit code (exit 1)", async () => {
+    const result = await executor.run(input({ runtime: "bash", script: "exit 1" }, {}));
+    expect(result.status).toBe("failed");
+    expect(result.error).toBe("Script exited with code 1");
+    const out = result.output as { exitCode: number };
+    expect(out?.exitCode).toBe(1);
+  });
+  test("marks step failed with exit code in error when no stderr (exit 42)", async () => {
     const result = await executor.run(input({ runtime: "bash", script: "exit 42" }, {}));
-    expect(result.nextPort).toBe("failure");
+    expect(result.status).toBe("failed");
+    expect(result.error).toBe("Script exited with code 42");
     const out = result.output as { exitCode: number };
-    expect(out.exitCode).toBe(42);
+    expect(out?.exitCode).toBe(42);
   });
   test("runs TypeScript script via bun", async () => {

package/src/tests/workflow-wait-event.test.ts CHANGED Viewed

@@ -261,8 +261,11 @@ describe("WaitExecutor — event mode end-to-end", () => {
     // Skip the 5s poller — fast-forward by directly calling the resume helper
     // with status='timeout' (the poller would do exactly this once expiresAt
-    // passes).
-    await new Promise((r) => setTimeout(r, 1100));
+    // passes). Sleep relative to the *actual* expiresAt so we don't race
+    // when startWorkflowExecution overhead eats the cushion on slow CI.
+    const expiresAtMs = new Date(ws!.expiresAt!).getTime();
+    const sleepMs = Math.max(0, expiresAtMs - Date.now()) + 250;
+    await new Promise((r) => setTimeout(r, sleepMs));
     const due = getDueWaitStates();
     expect(due.find((d) => d.id === ws!.id)).toBeDefined();

package/src/workflows/engine.ts CHANGED Viewed

@@ -532,6 +532,11 @@ async function executeStep(
       retryPolicy,
     );
+    // Persist output for observability even on failure (e.g. script nodes keep {exitCode, stdout, stderr})
+    if (result.output !== undefined) {
+      updateWorkflowRunStep(stepId, { output: result.output });
+    }
     if (!shouldRetry) {
       throw new Error(result.error || "Step execution failed");
     }

package/src/workflows/executors/script.ts CHANGED Viewed

@@ -41,11 +41,21 @@ export class ScriptExecutor extends BaseExecutor<
     try {
       const result = await Promise.race([this.runScript(config), this.timeoutPromise(timeoutMs)]);
+      // Non-zero exit code is a hard failure — mark the step failed so the
+      // workflow engine stops the branch and operators can see what went wrong.
+      if (result.exitCode !== 0) {
+        return {
+          status: "failed",
+          error: result.stderr || `Script exited with code ${result.exitCode}`,
+          output: result as unknown as z.infer<typeof ScriptOutputSchema>,
+        };
+      }
       // If stdout is valid JSON object, merge parsed fields into output
       // so downstream nodes can access them via {{myScript.field}} interpolation
       // (mirrors how agent-task nodes parse JSON in resume.ts)
       let output: Record<string, unknown> = result;
-      if (result.exitCode === 0 && result.stdout) {
+      if (result.stdout) {
         try {
           const parsed = JSON.parse(result.stdout);
           if (typeof parsed === "object" && parsed !== null && !Array.isArray(parsed)) {
@@ -58,8 +68,8 @@ export class ScriptExecutor extends BaseExecutor<
       return {
         status: "success",
-        output: output as typeof result,
-        nextPort: result.exitCode === 0 ? "success" : "failure",
+        output: output as z.infer<typeof ScriptOutputSchema>,
+        nextPort: "success",
       };
     } catch (err) {
       return {

package/src/x402/README.md ADDED Viewed

@@ -0,0 +1,67 @@
+# x402 Payment Module
+> **Alpha / Opt-in** — This module is experimental and not wired into any core swarm path. Import it explicitly if you need x402 payment support.
+Gives agents the ability to make [x402](https://github.com/coinbase/x402) payments when calling external APIs that return HTTP 402 responses. Uses USDC on Base (or Base Sepolia for testing) with automatic payment handling.
+## Status
+This module is **not imported by any core swarm code**. It is an opt-in integration — include it only when you need automatic micropayment support in an agent task.
+Knip and other dead-code scanners will flag this directory as unused because there are no production-code imports; that is expected.
+## Signer backends
+| Backend | When to use | Required env vars |
+|---------|-------------|-------------------|
+| Openfort (default) | Managed wallet, keys in TEE | `OPENFORT_API_KEY`, `OPENFORT_WALLET_SECRET` |
+| viem | Raw EVM private key (local/dev) | `EVM_PRIVATE_KEY` |
+## Opt-in usage
+```typescript
+import { createX402Fetch, createX402Client } from "@/x402";
+// Simple: drop-in replacement for fetch
+const paidFetch = await createX402Fetch();
+const response = await paidFetch("https://api.example.com/paid-endpoint");
+// Advanced: full client with spending tracking
+const client = await createX402Client();
+const response = await client.fetch("https://api.example.com/paid-endpoint");
+console.log(client.getSpendingSummary());
+```
+## Env vars
+| Variable | Required | Description |
+|----------|----------|-------------|
+| `X402_SIGNER_TYPE` | No | `openfort` (default) or `viem` |
+| `X402_NETWORK` | No | `eip155:8453` (Base mainnet, default) or `eip155:84532` (Base Sepolia) |
+| `X402_MAX_AUTO_APPROVE_USD` | No | Per-request auto-approve ceiling in USD |
+| `X402_DAILY_LIMIT_USD` | No | Daily spending cap in USD |
+| `OPENFORT_API_KEY` | Openfort only | Openfort API key |
+| `OPENFORT_WALLET_SECRET` | Openfort only | Openfort wallet secret |
+| `OPENFORT_WALLET_ADDRESS` | No | Pre-existing wallet address (optional) |
+| `EVM_PRIVATE_KEY` | viem only | Raw 32-byte hex private key |
+## Architecture
+```
+src/x402/
+  index.ts            # Public exports
+  client.ts           # X402PaymentClient — wraps fetch with payment handling
+  config.ts           # Env-var loader and config types
+  openfort-signer.ts  # Openfort managed-wallet signer adapter
+  spending-tracker.ts # Per-request and daily spending limits
+  cli.ts              # CLI helper for inspecting wallet / config
+```
+## Dependencies
+`@x402/core`, `@x402/evm`, `@x402/fetch`, `viem`, `@openfort/openfort-node` are intentionally kept in `package.json` — they belong to this opt-in module.
+## References
+- [x402 protocol](https://github.com/coinbase/x402)
+- [Openfort docs](https://openfort.xyz/docs)

package/src/x402/index.ts CHANGED Viewed

@@ -1,5 +1,11 @@
 /**
- * x402 Payment Module
+ * x402 Payment Module — **Alpha / Opt-in**
+ *
+ * @alpha
+ *
+ * This module is experimental and not imported by any core swarm path.
+ * Include it explicitly when you need automatic x402 micropayment support.
+ * See `src/x402/README.md` for setup, env vars, and usage examples.
  *
  * Gives agents the ability to make x402 payments when calling external APIs.
  * Uses USDC on Base (or Base Sepolia for testing) with automatic 402 handling.