npm - @desplega.ai/agent-swarm - Versions diffs - 1.63.0 → 1.64.0 - Mend

@desplega.ai/agent-swarm 1.63.0 → 1.64.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/openapi.json +1 -1
package/package.json +1 -1
package/src/cli.tsx +20 -0
package/src/commands/codex-login.ts +263 -0
package/src/commands/runner.ts +86 -2
package/src/http/index.ts +12 -1
package/src/http/poll.ts +12 -0
package/src/http/tasks.ts +27 -0
package/src/providers/codex-adapter.ts +42 -0
package/src/providers/codex-oauth/auth-json.ts +58 -0
package/src/providers/codex-oauth/flow.ts +368 -0
package/src/providers/codex-oauth/pkce.ts +26 -0
package/src/providers/codex-oauth/storage.ts +121 -0
package/src/providers/codex-oauth/types.ts +37 -0
package/src/telemetry.ts +109 -0
package/src/tests/codex-login.test.ts +155 -0
package/src/tests/codex-oauth-storage.test.ts +306 -0
package/src/tests/codex-oauth.test.ts +307 -0
package/src/tests/error-tracker.test.ts +49 -0
package/src/tests/workflow-engine-v2.test.ts +98 -2
package/src/utils/credentials.ts +3 -1
package/src/utils/error-tracker.ts +6 -1
package/src/workflows/checkpoint.ts +10 -6
package/src/workflows/engine.ts +43 -11

package/src/tests/codex-oauth.test.ts ADDED Viewed

@@ -0,0 +1,307 @@
+import { afterEach, describe, expect, it } from "bun:test";
+import {
+  authJsonToCredentialSelection,
+  authJsonToCredentials,
+  credentialsToAuthJson,
+} from "../providers/codex-oauth/auth-json.js";
+import {
+  AUTHORIZE_URL,
+  CLIENT_ID,
+  createAuthorizationFlow,
+  createState,
+  decodeJwt,
+  exchangeAuthorizationCode,
+  getAccountId,
+  JWT_CLAIM_PATH,
+  parseAuthorizationInput,
+  REDIRECT_URI,
+  refreshAccessToken,
+  resetFetchForTesting,
+  SCOPE,
+  setFetchForTesting,
+  TOKEN_URL,
+} from "../providers/codex-oauth/flow.js";
+import { generatePKCE } from "../providers/codex-oauth/pkce.js";
+import type { CodexOAuthCredentials } from "../providers/codex-oauth/types.js";
+describe("generatePKCE", () => {
+  it("produces distinct verifier/challenge pairs", async () => {
+    const a = await generatePKCE();
+    const b = await generatePKCE();
+    expect(a.verifier).not.toEqual(b.verifier);
+    expect(a.challenge).not.toEqual(b.challenge);
+  });
+  it("verifier is base64url (43 chars, URL-safe)", async () => {
+    const { verifier } = await generatePKCE();
+    expect(verifier.length).toBe(43);
+    expect(verifier).toMatch(/^[A-Za-z0-9_-]+$/);
+  });
+  it("challenge is base64url (43 chars, URL-safe)", async () => {
+    const { challenge } = await generatePKCE();
+    expect(challenge.length).toBe(43);
+    expect(challenge).toMatch(/^[A-Za-z0-9_-]+$/);
+  });
+});
+describe("OAuth constants", () => {
+  it("has the correct public client ID", () => {
+    expect(CLIENT_ID).toBe("app_EMoamEEZ73f0CkXaXp7hrann");
+  });
+  it("has the correct OAuth URLs", () => {
+    expect(AUTHORIZE_URL).toBe("https://auth.openai.com/oauth/authorize");
+    expect(TOKEN_URL).toBe("https://auth.openai.com/oauth/token");
+    expect(REDIRECT_URI).toBe("http://localhost:1455/auth/callback");
+  });
+  it("has the correct scope", () => {
+    expect(SCOPE).toBe("openid profile email offline_access");
+  });
+  it("has the correct JWT claim path", () => {
+    expect(JWT_CLAIM_PATH).toBe("https://api.openai.com/auth");
+  });
+});
+describe("createState", () => {
+  it("produces a 32-char hex string", () => {
+    const state = createState();
+    expect(state.length).toBe(32);
+    expect(state).toMatch(/^[0-9a-f]+$/);
+  });
+  it("produces different values each call", () => {
+    expect(createState()).not.toEqual(createState());
+  });
+});
+describe("parseAuthorizationInput", () => {
+  it("parses bare code", () => {
+    expect(parseAuthorizationInput("abc123")).toEqual({ code: "abc123" });
+  });
+  it("parses code=X&state=Y", () => {
+    expect(parseAuthorizationInput("code=abc&state=def")).toEqual({
+      code: "abc",
+      state: "def",
+    });
+  });
+  it("parses full redirect URL", () => {
+    expect(
+      parseAuthorizationInput("http://localhost:1455/auth/callback?code=abc&state=def"),
+    ).toEqual({ code: "abc", state: "def" });
+  });
+  it("parses code#state format", () => {
+    expect(parseAuthorizationInput("abc123#def456")).toEqual({
+      code: "abc123",
+      state: "def456",
+    });
+  });
+  it("returns empty for empty string", () => {
+    expect(parseAuthorizationInput("")).toEqual({});
+  });
+  it("returns empty for whitespace", () => {
+    expect(parseAuthorizationInput("   ")).toEqual({});
+  });
+});
+describe("decodeJwt", () => {
+  it("extracts chatgpt_account_id from a JWT", () => {
+    const payload = { "https://api.openai.com/auth": { chatgpt_account_id: "acc-123" } };
+    const encoded = btoa(JSON.stringify(payload));
+    const token = `header.${encoded}.signature`;
+    const decoded = decodeJwt(token);
+    expect(decoded).not.toBeNull();
+    expect(decoded?.["https://api.openai.com/auth"]?.chatgpt_account_id).toBe("acc-123");
+  });
+  it("returns null for invalid JWT", () => {
+    expect(decodeJwt("not-a-jwt")).toBeNull();
+    expect(decodeJwt("a.b")).toBeNull();
+  });
+});
+describe("getAccountId", () => {
+  it("extracts account ID from access token", () => {
+    const payload = { "https://api.openai.com/auth": { chatgpt_account_id: "c724a178-abc" } };
+    const encoded = btoa(JSON.stringify(payload));
+    const token = `header.${encoded}.signature`;
+    expect(getAccountId(token)).toBe("c724a178-abc");
+  });
+  it("returns null for JWT without claim", () => {
+    const payload = { sub: "user123" };
+    const encoded = btoa(JSON.stringify(payload));
+    const token = `header.${encoded}.signature`;
+    expect(getAccountId(token)).toBeNull();
+  });
+  it("returns null for empty string claim", () => {
+    const payload = { "https://api.openai.com/auth": { chatgpt_account_id: "" } };
+    const encoded = btoa(JSON.stringify(payload));
+    const token = `header.${encoded}.signature`;
+    expect(getAccountId(token)).toBeNull();
+  });
+});
+describe("exchangeAuthorizationCode", () => {
+  afterEach(() => {
+    resetFetchForTesting();
+  });
+  it("constructs expected POST body", async () => {
+    let capturedBody: URLSearchParams | null = null;
+    setFetchForTesting(async (_input: RequestInfo | URL, init?: RequestInit) => {
+      capturedBody = init?.body as URLSearchParams;
+      return new Response(
+        JSON.stringify({
+          access_token: "at_123",
+          refresh_token: "rt_456",
+          expires_in: 3600,
+        }),
+        { headers: { "Content-Type": "application/json" } },
+      );
+    });
+    const result = await exchangeAuthorizationCode("code-abc", "verifier-xyz");
+    expect(result.type).toBe("success");
+    expect(capturedBody).not.toBeNull();
+    expect(capturedBody!.get("grant_type")).toBe("authorization_code");
+    expect(capturedBody!.get("client_id")).toBe("app_EMoamEEZ73f0CkXaXp7hrann");
+    expect(capturedBody!.get("code")).toBe("code-abc");
+    expect(capturedBody!.get("code_verifier")).toBe("verifier-xyz");
+  });
+  it("returns failed on HTTP error", async () => {
+    setFetchForTesting(() => new Response("Bad Request", { status: 400 }));
+    const result = await exchangeAuthorizationCode("code-abc", "verifier-xyz");
+    expect(result.type).toBe("failed");
+  });
+  it("returns failed on missing fields", async () => {
+    setFetchForTesting(
+      () =>
+        new Response(JSON.stringify({ access_token: "at" }), {
+          headers: { "Content-Type": "application/json" },
+        }),
+    );
+    const result = await exchangeAuthorizationCode("code-abc", "verifier-xyz");
+    expect(result.type).toBe("failed");
+  });
+});
+describe("refreshAccessToken", () => {
+  afterEach(() => {
+    resetFetchForTesting();
+  });
+  it("calls token endpoint with grant_type=refresh_token", async () => {
+    let capturedBody: URLSearchParams | null = null;
+    setFetchForTesting(async (_input: RequestInfo | URL, init?: RequestInit) => {
+      capturedBody = init?.body as URLSearchParams;
+      return new Response(
+        JSON.stringify({
+          access_token: "at_new",
+          refresh_token: "rt_new",
+          expires_in: 3600,
+        }),
+        { headers: { "Content-Type": "application/json" } },
+      );
+    });
+    const result = await refreshAccessToken("rt_old");
+    expect(result.type).toBe("success");
+    expect(capturedBody).not.toBeNull();
+    expect(capturedBody!.get("grant_type")).toBe("refresh_token");
+    expect(capturedBody!.get("refresh_token")).toBe("rt_old");
+    expect(capturedBody!.get("client_id")).toBe("app_EMoamEEZ73f0CkXaXp7hrann");
+  });
+  it("returns failed on HTTP error", async () => {
+    setFetchForTesting(() => new Response("Unauthorized", { status: 401 }));
+    const result = await refreshAccessToken("rt_old");
+    expect(result.type).toBe("failed");
+  });
+});
+describe("createAuthorizationFlow", () => {
+  it("includes required query parameters", async () => {
+    const { verifier, state, url } = await createAuthorizationFlow("agent-swarm");
+    expect(verifier).toBeTruthy();
+    expect(state).toBeTruthy();
+    expect(url).toContain(AUTHORIZE_URL);
+    expect(url).toContain("response_type=code");
+    expect(url).toContain("client_id=app_EMoamEEZ73f0CkXaXp7hrann");
+    expect(url).toContain("code_challenge_method=S256");
+    expect(url).toContain("id_token_add_organizations=true");
+    expect(url).toContain("codex_cli_simplified_flow=true");
+    expect(url).toContain("originator=agent-swarm");
+  });
+});
+describe("credentialsToAuthJson", () => {
+  it("produces exact format matching observed ~/.codex/auth.json", () => {
+    const creds: CodexOAuthCredentials = {
+      access: "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.signature",
+      refresh: "rt_abc123",
+      expires: 1712678400000,
+      accountId: "c724a178-abc",
+    };
+    const authJson = credentialsToAuthJson(creds);
+    expect(authJson.auth_mode).toBe("chatgpt");
+    expect(authJson.OPENAI_API_KEY).toBeNull();
+    expect(authJson.tokens.access_token).toBe(creds.access);
+    expect(authJson.tokens.refresh_token).toBe(creds.refresh);
+    expect(authJson.tokens.account_id).toBe(creds.accountId);
+    expect(authJson.tokens.id_token).toBe(creds.access);
+    expect(authJson.last_refresh).toBe(new Date(creds.expires).toISOString());
+  });
+});
+describe("authJsonToCredentials", () => {
+  it("round-trips correctly", () => {
+    const creds: CodexOAuthCredentials = {
+      access: "at_123",
+      refresh: "rt_456",
+      expires: Date.now() + 3600000,
+      accountId: "acc-789",
+    };
+    const authJson = credentialsToAuthJson(creds);
+    const restored = authJsonToCredentials(authJson);
+    expect(restored.access).toBe(creds.access);
+    expect(restored.refresh).toBe(creds.refresh);
+    expect(restored.accountId).toBe(creds.accountId);
+    expect(Math.abs(restored.expires - creds.expires)).toBeLessThan(1000);
+  });
+});
+describe("authJsonToCredentialSelection", () => {
+  it("maps chatgpt auth.json to CODEX_OAUTH tracking info", () => {
+    const creds: CodexOAuthCredentials = {
+      access: "at_123",
+      refresh: "rt_456",
+      expires: Date.now() + 3600000,
+      accountId: "c724a178-3621-41bb-bdb5-7b6ca848c965",
+    };
+    const selection = authJsonToCredentialSelection(credentialsToAuthJson(creds));
+    expect(selection.keyType).toBe("CODEX_OAUTH");
+    expect(selection.index).toBe(0);
+    expect(selection.total).toBe(1);
+    expect(selection.keySuffix).toBe("8c965");
+    expect(selection.selected).toBe(creds.accountId);
+  });
+});
+describe("no secrets in source", () => {
+  it("CLIENT_ID is the public OpenAI client id", () => {
+    expect(CLIENT_ID).toBe("app_EMoamEEZ73f0CkXaXp7hrann");
+  });
+});

package/src/tests/error-tracker.test.ts CHANGED Viewed

@@ -295,6 +295,24 @@ describe("parseStderrForErrors", () => {
     expect(tracker.hasErrors()).toBe(true);
   });
+  test("detects 'hit your limit' as rate limit error", () => {
+    const tracker = new SessionErrorTracker();
+    parseStderrForErrors("You've hit your limit for the day", tracker);
+    expect(tracker.hasErrors()).toBe(true);
+    expect(tracker.getErrors()).toHaveLength(1);
+    expect(tracker.getErrors()[0]!.type).toBe("stderr_error");
+    expect(tracker.getErrors()[0]!.message).toBe("You've hit your limit for the day");
+  });
+  test("detects 'hit your limit' case-insensitively", () => {
+    const tracker = new SessionErrorTracker();
+    parseStderrForErrors("Hit Your Limit · resets 3pm (UTC)", tracker);
+    expect(tracker.hasErrors()).toBe(true);
+    expect(tracker.getErrors()[0]!.message).toBe("Hit Your Limit · resets 3pm (UTC)");
+  });
   test("detects authentication errors", () => {
     const tracker = new SessionErrorTracker();
     parseStderrForErrors("Authentication failed: invalid key", tracker);
@@ -368,6 +386,37 @@ describe("parseStderrForErrors", () => {
   });
 });
+describe("rate limit detection regex (runner)", () => {
+  // This regex is used in runner.ts to detect rate-limited failures from credential errors
+  const rateLimitRegex = /rate.?limit|hit your limit/i;
+  test("matches 'rate limit' with space", () => {
+    expect(rateLimitRegex.test("Rate limit hit: Too many requests")).toBe(true);
+  });
+  test("matches 'rate_limit' with underscore", () => {
+    expect(rateLimitRegex.test("rate_limit exceeded")).toBe(true);
+  });
+  test("matches 'ratelimit' without separator", () => {
+    expect(rateLimitRegex.test("ratelimit error")).toBe(true);
+  });
+  test("matches 'hit your limit' message", () => {
+    expect(rateLimitRegex.test("You've hit your limit · resets 3pm (UTC)")).toBe(true);
+  });
+  test("matches 'Hit Your Limit' case-insensitively", () => {
+    expect(rateLimitRegex.test("Hit Your Limit")).toBe(true);
+  });
+  test("does not match unrelated errors", () => {
+    expect(rateLimitRegex.test("Authentication failed")).toBe(false);
+    expect(rateLimitRegex.test("Server error 500")).toBe(false);
+    expect(rateLimitRegex.test("Connection timeout")).toBe(false);
+  });
+});
 describe("parseRateLimitResetTime", () => {
   test("parses 'resets 3pm (UTC)' format", () => {
     const result = parseRateLimitResetTime(

package/src/tests/workflow-engine-v2.test.ts CHANGED Viewed

@@ -432,7 +432,7 @@ describe("Workflow Engine v2 (Phase 3)", () => {
       expect(steps).toHaveLength(2);
     });
-    test("validation halt (mustPass) fails the run", async () => {
+    test("validation halt (mustPass) fails the run when all branches fail", async () => {
       const registry = createTestRegistry();
       const def: WorkflowDefinition = {
         nodes: [
@@ -456,13 +456,109 @@ describe("Workflow Engine v2 (Phase 3)", () => {
       const run = getWorkflowRun(runId);
       expect(run!.status).toBe("failed");
-      expect(run!.error).toContain("Validation failed");
+      expect(run!.error).toContain("Failed nodes: step1");
       const steps = getWorkflowRunStepsByRunId(runId);
       const nodeIds = steps.map((s) => s.nodeId);
       expect(nodeIds).not.toContain("step2");
     });
+    test("linear workflow: mustPass failure on non-entry node marks run as failed", async () => {
+      // Regression: when the failing mustPass node is NOT the entry node, the
+      // entry node's "completed" status must not count toward hasCompletedSteps,
+      // otherwise the run is incorrectly marked as partial-failure instead of failed.
+      const registry = createTestRegistry();
+      const def: WorkflowDefinition = {
+        nodes: [
+          {
+            id: "trigger",
+            type: "echo",
+            config: { message: "entry node completes" },
+            next: "validator",
+          },
+          {
+            id: "validator",
+            type: "echo",
+            config: { message: "will fail validation" },
+            validation: {
+              executor: "validate",
+              config: { shouldFail: true },
+              mustPass: true,
+            },
+            next: "action",
+          },
+          { id: "action", type: "echo", config: { message: "never reached" } },
+        ],
+      };
+      const workflow = makeWorkflow(def);
+      const runId = await startWorkflowExecution(workflow, {}, registry);
+      const run = getWorkflowRun(runId);
+      // Run should be failed — the only non-entry completed step is none
+      expect(run!.status).toBe("failed");
+      expect(run!.error).toContain("Failed nodes: validator");
+      const steps = getWorkflowRunStepsByRunId(runId);
+      const nodeIds = steps.map((s) => s.nodeId);
+      expect(nodeIds).toContain("trigger");
+      expect(nodeIds).toContain("validator");
+      expect(nodeIds).not.toContain("action");
+    });
+    test("mustPass failure cancels only the failed branch, not parallel branches", async () => {
+      const registry = createTestRegistry();
+      const def: WorkflowDefinition = {
+        nodes: [
+          {
+            id: "start",
+            type: "echo",
+            config: { message: "begin" },
+            next: ["branchA", "branchB"],
+          },
+          {
+            id: "branchA",
+            type: "echo",
+            config: { message: "branch A will fail validation" },
+            validation: {
+              executor: "validate",
+              config: { shouldFail: true },
+              mustPass: true,
+            },
+            next: "afterA",
+          },
+          { id: "afterA", type: "echo", config: { message: "after A — should NOT execute" } },
+          {
+            id: "branchB",
+            type: "echo",
+            config: { message: "branch B succeeds" },
+            next: "afterB",
+          },
+          { id: "afterB", type: "echo", config: { message: "after B — should execute" } },
+        ],
+      };
+      const workflow = makeWorkflow(def);
+      const runId = await startWorkflowExecution(workflow, {}, registry);
+      const run = getWorkflowRun(runId);
+      // Run should complete (not fail) because branchB succeeded
+      expect(run!.status).toBe("completed");
+      // Should note partial failure
+      expect(run!.error).toContain("Partial failure");
+      expect(run!.error).toContain("branchA");
+      const steps = getWorkflowRunStepsByRunId(runId);
+      const nodeIds = steps.map((s) => s.nodeId);
+      // branchA's successor should NOT have executed
+      expect(nodeIds).not.toContain("afterA");
+      // branchB's successor SHOULD have executed
+      expect(nodeIds).toContain("afterB");
+      // branchA step should be marked as failed
+      const branchAStep = steps.find((s) => s.nodeId === "branchA");
+      expect(branchAStep!.status).toBe("failed");
+    });
     test("validation failure without mustPass is advisory (allows completion)", async () => {
       const registry = createTestRegistry();
       const def: WorkflowDefinition = {

package/src/utils/credentials.ts CHANGED Viewed

@@ -4,6 +4,7 @@ export const CREDENTIAL_POOL_VARS = [
   "ANTHROPIC_API_KEY",
   "OPENROUTER_API_KEY",
   "OPENAI_API_KEY",
+  "CODEX_OAUTH",
 ] as const;
 /**
@@ -21,7 +22,7 @@ export const PROVIDER_CREDENTIAL_VARS: Record<string, readonly string[]> = {
   claude: ["CLAUDE_CODE_OAUTH_TOKEN", "ANTHROPIC_API_KEY"],
   // pi-mono accepts either router or anthropic keys
   pi: ["OPENROUTER_API_KEY", "ANTHROPIC_API_KEY"],
-  codex: ["OPENAI_API_KEY"],
+  codex: ["OPENAI_API_KEY", "CODEX_OAUTH"],
 };
 /**
@@ -40,6 +41,7 @@ export function deriveProviderFromKeyType(keyType: string): string {
     case "OPENROUTER_API_KEY":
       return "pi";
     case "OPENAI_API_KEY":
+    case "CODEX_OAUTH":
       return "codex";
     default:
       return "claude";

package/src/utils/error-tracker.ts CHANGED Viewed

@@ -226,7 +226,12 @@ export function parseStderrForErrors(stderr: string, tracker: SessionErrorTracke
   const lower = stderr.toLowerCase();
   const firstLine = stderr.trim().split("\n")[0] ?? stderr.trim();
-  if (lower.includes("rate limit") || lower.includes("rate_limit") || lower.includes("429")) {
+  if (
+    lower.includes("rate limit") ||
+    lower.includes("rate_limit") ||
+    lower.includes("429") ||
+    lower.includes("hit your limit")
+  ) {
     tracker.addStderrError(firstLine);
   } else if (
     lower.includes("authentication") ||

package/src/workflows/checkpoint.ts CHANGED Viewed

@@ -37,6 +37,7 @@ export function checkpointStepFailure(
   error: string,
   retryCount: number,
   retryPolicy?: RetryPolicy,
+  options?: { markRunFailed?: boolean },
 ): { shouldRetry: boolean } {
   const now = new Date().toISOString();
@@ -55,7 +56,7 @@ export function checkpointStepFailure(
     return { shouldRetry: true };
   }
-  // No retries left — mark step and run failed
+  // No retries left — mark step failed, and optionally the run too
   // Clear nextRetryAt so the poller stops picking this step up
   updateWorkflowRunStep(stepId, {
     status: "failed",
@@ -64,11 +65,14 @@ export function checkpointStepFailure(
     nextRetryAt: null,
   });
-  updateWorkflowRun(runId, {
-    status: "failed",
-    error: `Step failed: ${error}`,
-    finishedAt: now,
-  });
+  const markRunFailed = options?.markRunFailed ?? true;
+  if (markRunFailed) {
+    updateWorkflowRun(runId, {
+      status: "failed",
+      error: `Step failed: ${error}`,
+      finishedAt: now,
+    });
+  }
   return { shouldRetry: false };
 }

package/src/workflows/engine.ts CHANGED Viewed

@@ -244,13 +244,16 @@ export async function walkGraph(
     // Collect successors and check for errors/pauses
     const nextBatch = new Map<string, WorkflowNode>();
     let hasWaiting = false;
-    let hasFailed = false;
     for (let i = 0; i < results.length; i++) {
       const result = results[i]!;
       if (result.outcome === "failed") {
-        hasFailed = true;
-        break;
+        // Check if the run was already marked failed in DB (e.g., executor error).
+        // If so, stop immediately. If not (mustPass validation), skip this
+        // node's successors but continue processing other branches.
+        const currentRun = getWorkflowRun(runId);
+        if (currentRun?.status === "failed") return;
+        continue;
       }
       if (result.outcome === "waiting") {
         hasWaiting = true;
@@ -267,7 +270,6 @@ export async function walkGraph(
       }
     }
-    if (hasFailed) return; // Run already marked failed in executeStep
     if (hasWaiting) return; // Run paused, will be resumed by event
     // Convergence check — only wait for predecessors with active edges to
@@ -302,16 +304,46 @@ export async function walkGraph(
     const hasPendingRetries = finalSteps.some(
       (s) => s.status === "failed" && s.nextRetryAt != null,
     );
+    const failedSteps = finalSteps.filter((s) => s.status === "failed" && s.nextRetryAt == null);
+    // Exclude entry/trigger nodes when checking for completed steps — a trigger
+    // completing doesn't mean a meaningful branch succeeded. Without this filter,
+    // a linear workflow (trigger → mustPass validator → action) would be marked
+    // as partial-failure instead of failed when the validator fails.
+    const entryNodeIds = new Set(findEntryNodes(def).map((n) => n.id));
+    const hasCompletedSteps = finalSteps.some(
+      (s) => s.status === "completed" && !entryNodeIds.has(s.nodeId),
+    );
     if (hasWaitingSteps) {
       // Async tasks still in progress — set back to waiting for next event
       updateWorkflowRun(runId, { status: "waiting" });
     } else if (!hasPendingRetries) {
-      updateWorkflowRun(runId, {
-        status: "completed",
-        context: ctx,
-        finishedAt: new Date().toISOString(),
-      });
+      if (failedSteps.length > 0 && !hasCompletedSteps) {
+        // All branches failed — mark run as failed
+        const failedNodeIds = failedSteps.map((s) => s.nodeId).join(", ");
+        updateWorkflowRun(runId, {
+          status: "failed",
+          error: `All branches failed. Failed nodes: ${failedNodeIds}`,
+          context: ctx,
+          finishedAt: new Date().toISOString(),
+        });
+      } else if (failedSteps.length > 0) {
+        // Partial failure — some branches succeeded, some failed.
+        // Mark as completed with error noting partial failure.
+        const failedNodeIds = failedSteps.map((s) => s.nodeId).join(", ");
+        updateWorkflowRun(runId, {
+          status: "completed",
+          error: `Partial failure: nodes [${failedNodeIds}] failed (mustPass validation), but other branches completed successfully`,
+          context: ctx,
+          finishedAt: new Date().toISOString(),
+        });
+      } else {
+        updateWorkflowRun(runId, {
+          status: "completed",
+          context: ctx,
+          finishedAt: new Date().toISOString(),
+        });
+      }
     }
   }
 }
@@ -532,8 +564,8 @@ async function executeStep(
     if (validationResult.outcome === "halt") {
       const errorMsg = "Validation failed (mustPass)";
-      checkpointStepFailure(runId, stepId, errorMsg, 0);
-      throw new Error(errorMsg);
+      checkpointStepFailure(runId, stepId, errorMsg, 0, undefined, { markRunFailed: false });
+      return { outcome: "failed", successors: [] };
     }
     if (validationResult.outcome === "retry") {