npm - agent-relay - Versions diffs - 3.2.18 → 3.2.22 - Mend

agent-relay 3.2.18 → 3.2.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

package/bin/agent-relay-broker-darwin-arm64 +0 -0
package/bin/agent-relay-broker-darwin-x64 +0 -0
package/bin/agent-relay-broker-linux-arm64 +0 -0
package/bin/agent-relay-broker-linux-x64 +0 -0
package/dist/index.cjs +233 -55
package/dist/src/cli/commands/cloud.d.ts +1 -9
package/dist/src/cli/commands/cloud.d.ts.map +1 -1
package/dist/src/cli/commands/cloud.js +326 -323
package/dist/src/cli/commands/cloud.js.map +1 -1
package/dist/src/cli/commands/connect.d.ts.map +1 -1
package/dist/src/cli/commands/connect.js +6 -10
package/dist/src/cli/commands/connect.js.map +1 -1
package/package.json +16 -10
package/packages/acp-bridge/package.json +2 -2
package/packages/brand/README.md +36 -0
package/packages/brand/brand.css +226 -0
package/packages/brand/package.json +20 -0
package/packages/cloud/dist/api-client.d.ts +33 -0
package/packages/cloud/dist/api-client.d.ts.map +1 -0
package/packages/cloud/dist/api-client.js +123 -0
package/packages/cloud/dist/api-client.js.map +1 -0
package/packages/cloud/dist/auth.d.ts +13 -0
package/packages/cloud/dist/auth.d.ts.map +1 -0
package/packages/cloud/dist/auth.js +248 -0
package/packages/cloud/dist/auth.js.map +1 -0
package/packages/cloud/dist/index.d.ts +5 -0
package/packages/cloud/dist/index.d.ts.map +1 -0
package/packages/cloud/dist/index.js +5 -0
package/packages/cloud/dist/index.js.map +1 -0
package/packages/cloud/dist/types.d.ts +73 -0
package/packages/cloud/dist/types.d.ts.map +1 -0
package/packages/cloud/dist/types.js +19 -0
package/packages/cloud/dist/types.js.map +1 -0
package/packages/cloud/dist/workflows.d.ts +34 -0
package/packages/cloud/dist/workflows.d.ts.map +1 -0
package/packages/cloud/dist/workflows.js +389 -0
package/packages/cloud/dist/workflows.js.map +1 -0
package/packages/cloud/package.json +44 -0
package/packages/cloud/src/api-client.ts +169 -0
package/packages/cloud/src/auth.ts +314 -0
package/packages/cloud/src/index.ts +41 -0
package/packages/cloud/src/types.ts +97 -0
package/packages/cloud/src/workflows.ts +539 -0
package/packages/cloud/tsconfig.json +21 -0
package/packages/config/package.json +1 -1
package/packages/hooks/package.json +4 -4
package/packages/memory/package.json +2 -2
package/packages/openclaw/package.json +2 -2
package/packages/policy/package.json +2 -2
package/packages/sdk/dist/workflows/__tests__/e2big-and-verify.test.d.ts +2 -0
package/packages/sdk/dist/workflows/__tests__/e2big-and-verify.test.d.ts.map +1 -0
package/packages/sdk/dist/workflows/__tests__/e2big-and-verify.test.js +62 -0
package/packages/sdk/dist/workflows/__tests__/e2big-and-verify.test.js.map +1 -0
package/packages/sdk/dist/workflows/cli.js +46 -2
package/packages/sdk/dist/workflows/cli.js.map +1 -1
package/packages/sdk/dist/workflows/file-db.d.ts +2 -0
package/packages/sdk/dist/workflows/file-db.d.ts.map +1 -1
package/packages/sdk/dist/workflows/file-db.js +20 -3
package/packages/sdk/dist/workflows/file-db.js.map +1 -1
package/packages/sdk/dist/workflows/runner.d.ts +10 -1
package/packages/sdk/dist/workflows/runner.d.ts.map +1 -1
package/packages/sdk/dist/workflows/runner.js +233 -50
package/packages/sdk/dist/workflows/runner.js.map +1 -1
package/packages/sdk/package.json +2 -2
package/packages/sdk/src/__tests__/resume-fallback.test.ts +415 -0
package/packages/sdk/src/__tests__/workflow-runner.test.ts +73 -2
package/packages/sdk/src/workflows/__tests__/e2big-and-verify.test.ts +117 -0
package/packages/sdk/src/workflows/cli.ts +53 -2
package/packages/sdk/src/workflows/file-db.ts +22 -3
package/packages/sdk/src/workflows/runner.ts +283 -49
package/packages/sdk-py/pyproject.toml +1 -1
package/packages/sdk-swift/Sources/AgentRelaySDK/RelayObserver.swift +2 -0
package/packages/telemetry/package.json +1 -1
package/packages/trajectory/package.json +2 -2
package/packages/user-directory/package.json +2 -2
package/packages/utils/package.json +2 -2

package/packages/sdk/src/workflows/cli.ts CHANGED Viewed

@@ -52,6 +52,21 @@ type ExecuteOptions = {
   previousRunId?: string;
 };
+/** Flags that consume the next argument as their value. Single source of truth for CLI parsing. */
+const FLAGS_WITH_VALUES = new Set(['--resume', '--workflow', '--start-from', '--previous-run-id']);
+function getYamlPathArg(args: string[]): string | undefined {
+  for (let i = 0; i < args.length; i += 1) {
+    const arg = args[i];
+    if (arg.startsWith('--')) {
+      if (FLAGS_WITH_VALUES.has(arg)) i += 1;
+      continue;
+    }
+    return arg;
+  }
+  return undefined;
+}
 interface RenderableTask {
   output?: string;
   title: string;
@@ -302,6 +317,7 @@ async function runWithListr(
 async function main(): Promise<void> {
   const args = process.argv.slice(2);
+  const yamlPath = getYamlPathArg(args);
   if (args.length === 0 || args.includes('--help')) {
     printUsage();
@@ -358,7 +374,37 @@ async function main(): Promise<void> {
           break;
       }
     });
-    const result = await runner.resume(runId);
+    let result: RunnerResult;
+    try {
+      const resumeConfig = yamlPath ? await runner.parseYamlFile(yamlPath) : undefined;
+      if (resumeConfig) {
+        console.warn(
+          chalk.yellow(
+            '[workflow] warning: resuming with current config from disk — ' +
+              'if the workflow YAML changed since the original run, behaviour may differ'
+          )
+        );
+      }
+      result = await runner.resume(runId, undefined, resumeConfig);
+    } catch (err) {
+      const message = err instanceof Error ? err.message : String(err);
+      const isRunNotFound = message.startsWith(`Run "${runId}" not found`);
+      if (isRunNotFound) {
+        if (fileDb.hasStepOutputs(runId)) {
+          console.error(
+            chalk.red(
+              `Error: ${message}. Step outputs exist for this run, but persisted run state is missing from ${dbPath}. ` +
+                `Use --start-from with --previous-run-id ${runId} to recover from the cached step outputs instead.`
+            )
+          );
+        } else {
+          console.error(chalk.red(`Error: ${message}`));
+        }
+      } else {
+        console.error(chalk.red(`Error: ${message}`));
+      }
+      process.exit(1);
+    }
     if (result.status === 'completed') {
       console.log(chalk.green('\nWorkflow completed successfully.'));
@@ -371,7 +417,6 @@ async function main(): Promise<void> {
   }
   // ── Normal / validate / dry-run mode ──────────────────────────────────────
-  const yamlPath = args[0];
   let workflowName: string | undefined;
   const workflowIdx = args.indexOf('--workflow');
@@ -391,6 +436,12 @@ async function main(): Promise<void> {
     previousRunId = args[prevRunIdx + 1];
   }
+  if (!yamlPath) {
+    console.error(chalk.red('Error: workflow YAML path is required'));
+    printUsage();
+    process.exit(1);
+  }
   const isValidate = args.includes('--validate');
   const isDryRun = !!process.env.DRY_RUN;

package/packages/sdk/src/workflows/file-db.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { appendFileSync, mkdirSync, readFileSync } from 'node:fs';
+import { appendFileSync, existsSync, mkdirSync, readdirSync, readFileSync } from 'node:fs';
 import path from 'node:path';
 import type { WorkflowRunRow, WorkflowStepRow } from './types.js';
@@ -24,6 +24,7 @@ export class JsonFileWorkflowDb implements WorkflowDb {
   /** Whether the storage directory is writable. False = silent no-op mode. */
   private readonly writable: boolean;
+  private appendFailedOnce = false;
   constructor(filePath: string) {
     this.filePath = filePath;
@@ -43,14 +44,32 @@ export class JsonFileWorkflowDb implements WorkflowDb {
     return this.writable;
   }
+  hasStepOutputs(runId: string): boolean {
+    try {
+      const dir = path.join(path.dirname(this.filePath), 'step-outputs', runId);
+      return existsSync(dir) && readdirSync(dir).length > 0;
+    } catch {
+      return false;
+    }
+  }
   // ── Private helpers ─────────────────────────────────────────────────────
   private append(entry: DbEntry): void {
     if (!this.writable) return;
     try {
       appendFileSync(this.filePath, JSON.stringify(entry) + '\n', 'utf8');
-    } catch {
-      // Non-critical — workflow execution continues; resume won't be available.
+    } catch (err) {
+      if (!this.appendFailedOnce) {
+        this.appendFailedOnce = true;
+        console.warn(
+          '[workflow] warning: failed to write run state to ' +
+            this.filePath +
+            ' — --resume will not be available for this run. Use --start-from instead. ' +
+            'Error: ' +
+            (err instanceof Error ? err.message : String(err))
+        );
+      }
     }
   }

package/packages/sdk/src/workflows/runner.ts CHANGED Viewed

@@ -9,6 +9,7 @@ import { randomBytes } from 'node:crypto';
 import {
   createWriteStream,
   existsSync,
+  mkdtempSync,
   mkdirSync,
   readFileSync,
   readdirSync,
@@ -17,7 +18,8 @@ import {
   writeFileSync,
 } from 'node:fs';
 import type { Dirent, WriteStream } from 'node:fs';
-import { readFile, writeFile, mkdir } from 'node:fs/promises';
+import { readFile, writeFile, mkdir, unlink } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
 import path from 'node:path';
 import chalk from 'chalk';
@@ -97,6 +99,7 @@ interface SpawnResult {
   output: string;
   exitCode?: number;
   exitSignal?: string;
+  promptTaskText?: string;
 }
 /** Error carrying exit code/signal from a failed subprocess spawn. */
@@ -364,6 +367,7 @@ export class WorkflowRunner {
   private readonly activeReviewers = new Map<string, number>();
   /** Structured CLI session reports captured during the current run, keyed by step name. */
   private readonly agentReports = new Map<string, CliSessionReport>();
+  private static readonly PTY_TASK_ARG_SIZE_LIMIT = 2 * 1024 * 1024; // 2 MB
   constructor(options: WorkflowRunnerOptions = {}) {
     this.db = options.db ?? new InMemoryWorkflowDb();
@@ -1948,14 +1952,25 @@ export class WorkflowRunner {
   }
   /** Resume a previously paused or partially completed run. */
-  async resume(runId: string, vars?: VariableContext): Promise<WorkflowRunRow> {
+  async resume(runId: string, vars?: VariableContext, config?: RelayYamlConfig): Promise<WorkflowRunRow> {
     // Set up abort controller early so callers can abort() even during setup
     this.abortController = new AbortController();
     this.paused = false;
-    const run = await this.db.getRun(runId);
+    let run = await this.db.getRun(runId);
+    let stepStates = new Map<string, StepState>();
     if (!run) {
-      throw new Error(`Run "${runId}" not found`);
+      const reconstructed = this.reconstructRunFromCache(runId, config);
+      if (!reconstructed) {
+        throw new Error(`Run "${runId}" not found (no database entry or cached step outputs)`);
+      }
+      this.log('[resume] Reconstructing run from cached step outputs (workflow-runs.jsonl missing)');
+      run = reconstructed.run;
+      stepStates = reconstructed.stepStates;
+      await this.db.insertRun(run);
+      for (const [, state] of stepStates) {
+        await this.db.insertStep(state.row);
+      }
     }
     this.persistRunIdHint(runId);
@@ -1963,25 +1978,26 @@ export class WorkflowRunner {
       throw new Error(`Run "${runId}" is in status "${run.status}" and cannot be resumed`);
     }
-    const config = vars ? this.resolveVariables(run.config, vars) : run.config;
+    const resolvedConfig = vars ? this.resolveVariables(run.config, vars) : run.config;
     // Resolve path definitions (same as execute()) so workdir lookups work on resume
-    const pathResult = this.resolvePathDefinitions(config.paths, this.cwd);
+    const pathResult = this.resolvePathDefinitions(resolvedConfig.paths, this.cwd);
     if (pathResult.errors.length > 0) {
       throw new Error(`Path validation failed:\n  ${pathResult.errors.join('\n  ')}`);
     }
     this.resolvedPaths = pathResult.resolved;
-    const workflows = config.workflows ?? [];
+    const workflows = resolvedConfig.workflows ?? [];
     const workflow = workflows.find((w) => w.name === run.workflowName);
     if (!workflow) {
       throw new Error(`Workflow "${run.workflowName}" not found in stored config`);
     }
-    const existingSteps = await this.db.getStepsByRunId(runId);
-    const stepStates = new Map<string, StepState>();
-    for (const stepRow of existingSteps) {
-      stepStates.set(stepRow.stepName, { row: stepRow });
+    if (stepStates.size === 0) {
+      const existingSteps = await this.db.getStepsByRunId(runId);
+      for (const stepRow of existingSteps) {
+        stepStates.set(stepRow.stepName, { row: stepRow });
+      }
     }
     // Reset failed steps to pending for retry
@@ -2002,7 +2018,7 @@ export class WorkflowRunner {
     return this.runWorkflowCore({
       run,
       workflow,
-      config,
+      config: resolvedConfig,
       stepStates,
       isResume: true,
     });
@@ -3539,6 +3555,7 @@ export class WorkflowRunner {
         let ownerOutput: string;
         let ownerElapsed: number;
         let completionReason: WorkflowStepCompletionReason | undefined;
+        let promptTaskText: string | undefined;
         if (usesDedicatedOwner) {
           const result = await this.executeSupervisedAgentStep(
@@ -3592,6 +3609,12 @@ export class WorkflowRunner {
                   : undefined,
               });
           const output = typeof spawnResult === 'string' ? spawnResult : spawnResult.output;
+          promptTaskText =
+            typeof spawnResult === 'string'
+              ? effectiveOwner.interactive === false
+                ? undefined
+                : ownerTask
+              : spawnResult.promptTaskText ?? ownerTask;
           lastExitCode = typeof spawnResult === 'string' ? undefined : spawnResult.exitCode;
           lastExitSignal = typeof spawnResult === 'string' ? undefined : spawnResult.exitSignal;
           ownerElapsed = Date.now() - ownerStartTime;
@@ -3602,8 +3625,8 @@ export class WorkflowRunner {
                 step,
                 output,
                 output,
-                ownerTask,
-                resolvedTask
+                promptTaskText ?? ownerTask,
+                promptTaskText ?? ownerTask
               );
               completionReason = completionDecision.completionReason;
             } catch (error) {
@@ -3654,7 +3677,7 @@ export class WorkflowRunner {
             step.verification,
             specialistOutput,
             step.name,
-            effectiveOwner.interactive === false ? undefined : resolvedTask
+            promptTaskText
           );
           completionReason = verificationResult.completionReason;
         }
@@ -4028,7 +4051,14 @@ export class WorkflowRunner {
           detail: `Worker ${workerRuntimeName} exited`,
           raw: { worker: workerRuntimeName, exitCode: result.exitCode, exitSignal: result.exitSignal },
         });
-        if (step.verification?.type === 'output_contains' && result.output.includes(step.verification.value)) {
+        if (
+          step.verification?.type === 'output_contains' &&
+          this.outputContainsVerificationToken(
+            result.output,
+            step.verification.value,
+            result.promptTaskText
+          )
+        ) {
           this.log(
             `[${step.name}] Verification gate observed: output contains ${JSON.stringify(step.verification.value)}`
           );
@@ -4079,13 +4109,14 @@ export class WorkflowRunner {
       const ownerElapsed = Date.now() - ownerStartTime;
       const ownerOutput = ownerResultObj.output;
       this.log(`[${step.name}] Owner "${supervised.owner.name}" exited`);
-      const specialistOutput = (await workerPromise).output;
+      const workerResultObj = await workerPromise;
+      const specialistOutput = workerResultObj.output;
       const completionDecision = this.resolveOwnerCompletionDecision(
         step,
         ownerOutput,
         specialistOutput,
-        supervisorTask,
-        resolvedTask
+        ownerResultObj.promptTaskText ?? supervisorTask,
+        workerResultObj.promptTaskText ?? specialistTask
       );
       return {
         specialistOutput,
@@ -4359,6 +4390,10 @@ export class WorkflowRunner {
     injectedTaskText: string
   ): boolean {
     const marker = `STEP_COMPLETE:${step.name}`;
+    const strippedOutput = this.stripInjectedTaskEcho(output, injectedTaskText);
+    if (strippedOutput.includes(marker)) {
+      return true;
+    }
     const taskHasMarker = injectedTaskText.includes(marker);
     const first = output.indexOf(marker);
     if (first === -1) {
@@ -4448,6 +4483,65 @@ export class WorkflowRunner {
       .join('\n');
   }
+  private stripInjectedTaskEcho(output: string, injectedTaskText?: string): string {
+    if (!injectedTaskText) {
+      return output;
+    }
+    const candidates = [
+      injectedTaskText,
+      injectedTaskText.replace(/\r\n/g, '\n'),
+      injectedTaskText.replace(/\n/g, '\r\n'),
+    ].filter((candidate, index, all) => candidate.length > 0 && all.indexOf(candidate) === index);
+    for (const candidate of candidates) {
+      const start = output.indexOf(candidate);
+      if (start !== -1) {
+        return output.slice(0, start) + output.slice(start + candidate.length);
+      }
+    }
+    return output;
+  }
+  private outputContainsVerificationToken(
+    output: string,
+    token: string,
+    injectedTaskText?: string
+  ): boolean {
+    if (!token) {
+      return false;
+    }
+    return this.stripInjectedTaskEcho(output, injectedTaskText).includes(token);
+  }
+  private prepareInteractiveSpawnTask(
+    agentName: string,
+    taskText: string
+  ): { spawnTaskText: string; promptTaskText: string; taskTmpFile?: string } {
+    if (Buffer.byteLength(taskText, 'utf8') <= WorkflowRunner.PTY_TASK_ARG_SIZE_LIMIT) {
+      return {
+        spawnTaskText: taskText,
+        promptTaskText: taskText,
+      };
+    }
+    const taskTmpDir = mkdtempSync(path.join(tmpdir(), 'relay-pty-task-'));
+    const taskTmpFile = path.join(taskTmpDir, `${agentName}-${Date.now()}.txt`);
+    writeFileSync(taskTmpFile, taskText, { encoding: 'utf8', mode: 0o600, flag: 'wx' });
+    const promptTaskText =
+      `TASK_FILE:${taskTmpFile}\n` +
+      'Read that file completely before taking any action.\n' +
+      'Treat the file contents as the full workflow task and follow them exactly.\n' +
+      'Do not ask for the task again.';
+    return {
+      spawnTaskText: promptTaskText,
+      promptTaskText,
+      taskTmpFile,
+    };
+  }
   private firstMeaningfulLine(output: string): string | undefined {
     return output
       .split('\n')
@@ -5218,6 +5312,7 @@ export class WorkflowRunner {
       '(b) outputting the exact text "/exit" on its own line as a fallback. ' +
       'Do not wait for further input — terminate immediately after finishing. ' +
       'Do NOT spawn sub-agents unless the task explicitly requires it.';
+    const preparedTask = this.prepareInteractiveSpawnTask(agentName, taskWithExit);
     // Register PTY output listener before spawning so we capture everything
     this.ptyOutputBuffers.set(agentName, []);
@@ -5257,7 +5352,7 @@ export class WorkflowRunner {
         model: agentDef.constraints?.model,
         args: interactiveSpawnPolicy.args,
         channels: agentChannels,
-        task: taskWithExit,
+        task: preparedTask.spawnTaskText,
         idleThresholdSecs: agentDef.constraints?.idleThresholdSecs,
         cwd: agentCwd,
       });
@@ -5368,6 +5463,7 @@ export class WorkflowRunner {
         agentDef,
         step,
         timeoutMs,
+        preparedTask.promptTaskText,
         options.preserveOnIdle ?? this.shouldPreserveIdleSupervisor(agentDef, step, options.evidenceRole)
       );
@@ -5385,7 +5481,7 @@ export class WorkflowRunner {
             step.verification,
             ptyOutput,
             step.name,
-            undefined,
+            preparedTask.promptTaskText,
             { allowFailure: true }
           );
           if (verificationResult.passed) {
@@ -5446,6 +5542,9 @@ export class WorkflowRunner {
       this.unregisterWorker(agentName);
       this.supervisedRuntimeAgents.delete(agentName);
       this.runtimeStepAgents.delete(agentName);
+      if (preparedTask.taskTmpFile) {
+        await unlink(preparedTask.taskTmpFile).catch(() => undefined);
+      }
     }
     let output: string;
@@ -5480,6 +5579,7 @@ export class WorkflowRunner {
       output,
       exitCode: agent?.exitCode,
       exitSignal: agent?.exitSignal,
+      promptTaskText: preparedTask.promptTaskText,
     };
   }
@@ -5547,6 +5647,7 @@ export class WorkflowRunner {
     agentDef: AgentDefinition,
     step: WorkflowStep,
     timeoutMs?: number,
+    promptTaskText?: string,
     preserveIdleSupervisor = false
   ): Promise<'exited' | 'timeout' | 'released' | 'force-released'> {
     const nudgeConfig = this.currentConfig?.swarm.idleNudge;
@@ -5572,21 +5673,14 @@ export class WorkflowRunner {
         ]);
         if (result.kind === 'idle' && result.result === 'idle') {
           // Check verification before treating idle as complete.
-          // Mirror runVerification's double-occurrence guard: if the task text
-          // contains the token (from the prompt instruction), require a second
-          // occurrence from the agent's actual output to avoid false positives.
           if (step.verification && step.verification.type === 'output_contains') {
             const token = step.verification.value;
             const ptyOutput = (this.ptyOutputBuffers.get(agent.name) ?? []).join('');
-            const taskText = step.task ?? '';
-            const taskHasToken = taskText.includes(token);
-            let verificationPassed = true;
-            if (taskHasToken) {
-              const first = ptyOutput.indexOf(token);
-              verificationPassed = first !== -1 && ptyOutput.includes(token, first + token.length);
-            } else {
-              verificationPassed = ptyOutput.includes(token);
-            }
+            const verificationPassed = this.outputContainsVerificationToken(
+              ptyOutput,
+              token,
+              promptTaskText
+            );
             if (!verificationPassed) {
               // The broker fires agent_idle only once per idle transition.
               // If the agent is still working (will produce output then idle again),
@@ -5798,23 +5892,8 @@ export class WorkflowRunner {
     switch (check.type) {
       case 'output_contains': {
-        // Guard against false positives: the PTY captures the injected task text
-        // verbatim, so if the verification token appears in the task itself the
-        // check would pass immediately without the agent doing any real work.
-        // When the task contains the token, require a SECOND occurrence — one
-        // from the task injection and one from the agent's actual response.
         const token = check.value;
-        const taskHasToken = injectedTaskText ? injectedTaskText.includes(token) : false;
-        if (taskHasToken) {
-          const first = output.indexOf(token);
-          const hasSecond = first !== -1 && output.includes(token, first + token.length);
-          if (!hasSecond) {
-            return fail(
-              `Verification failed for "${stepName}": output does not contain "${token}" ` +
-                `(token found only in task injection — agent must output it explicitly)`
-            );
-          }
-        } else if (!output.includes(token)) {
+        if (!this.outputContainsVerificationToken(output, token, injectedTaskText)) {
           return fail(`Verification failed for "${stepName}": output does not contain "${token}"`);
         }
         break;
@@ -6480,8 +6559,16 @@ export class WorkflowRunner {
       .slice(0, 32);
   }
+  /** Validate that a runId is safe for use in file paths (no traversal). */
+  private validateRunId(runId: string): void {
+    if (/[/\\]|^\.\.?$/.test(runId) || runId.includes('..')) {
+      throw new Error(`Invalid runId: "${runId}" contains path traversal characters`);
+    }
+  }
   /** Directory for persisted step outputs: .agent-relay/step-outputs/{runId}/ */
   private getStepOutputDir(runId: string): string {
+    this.validateRunId(runId);
     return path.join(this.cwd, '.agent-relay', 'step-outputs', runId);
   }
@@ -6571,6 +6658,153 @@ export class WorkflowRunner {
     }
   }
+  /** Match the best workflow from config given a set of cached step names. */
+  private matchWorkflowFromCache(
+    workflows: WorkflowDefinition[],
+    cachedStepNames: Set<string>
+  ): WorkflowDefinition | null {
+    if (workflows.length === 1) return workflows[0];
+    if (cachedStepNames.size === 0) {
+      // No cached steps to disambiguate — ambiguous when multiple workflows exist
+      this.log('[resume] Multiple workflows in config with empty cache — cannot disambiguate');
+      return null;
+    }
+    // Score each workflow by how many cached steps match, excluding those with unknown steps
+    const scored = workflows
+      .map((candidate) => ({
+        workflow: candidate,
+        matchedSteps: candidate.steps.filter((step) => cachedStepNames.has(step.name)).length,
+        unknownSteps: [...cachedStepNames].filter(
+          (name) => !candidate.steps.some((step) => step.name === name)
+        ).length,
+      }))
+      .filter((candidate) => candidate.unknownSteps === 0)
+      .sort((a, b) => b.matchedSteps - a.matchedSteps);
+    return scored[0]?.workflow ?? null;
+  }
+  private reconstructRunFromCache(
+    runId: string,
+    config?: RelayYamlConfig
+  ): { run: WorkflowRunRow; stepStates: Map<string, StepState> } | null {
+    const stepOutputDir = this.getStepOutputDir(runId);
+    if (!existsSync(stepOutputDir)) return null;
+    let resumeConfig = config ?? this.currentConfig;
+    if (!resumeConfig) {
+      // Attempt to load config from relay.yaml on disk (resume() may call before runWorkflowCore sets currentConfig)
+      const yamlPath = path.join(this.cwd, 'relay.yaml');
+      if (existsSync(yamlPath)) {
+        try {
+          const raw = readFileSync(yamlPath, 'utf-8');
+          resumeConfig = this.parseYamlString(raw, yamlPath);
+        } catch {
+          return null;
+        }
+      } else {
+        return null;
+      }
+    }
+    let entries: Dirent[];
+    try {
+      entries = readdirSync(stepOutputDir, { withFileTypes: true });
+    } catch {
+      return null;
+    }
+    const cachedStepNames = new Set(
+      entries
+        .filter((entry) => entry.isFile() && entry.name.endsWith('.md'))
+        .map((entry) => entry.name.slice(0, -3))
+        .filter(Boolean)
+    );
+    const workflows = resumeConfig.workflows ?? [];
+    if (workflows.length === 0) return null;
+    // Empty cache directory is valid — all steps will be re-run
+    const workflow = this.matchWorkflowFromCache(workflows, cachedStepNames);
+    if (!workflow) return null;
+    // Use actual file modification times from cached outputs instead of synthetic timestamps
+    const stepMtimes = new Map<string, string>();
+    let earliestMtime = Date.now();
+    for (const stepName of cachedStepNames) {
+      try {
+        const mdPath = path.join(stepOutputDir, `${stepName}.md`);
+        const reportPath = path.join(stepOutputDir, `${stepName}.report.json`);
+        const mdStat = existsSync(mdPath) ? statSync(mdPath) : null;
+        const reportStat = existsSync(reportPath) ? statSync(reportPath) : null;
+        // Use the latest mtime between .md and .report.json
+        const mtime = Math.max(mdStat?.mtimeMs ?? 0, reportStat?.mtimeMs ?? 0);
+        if (mtime > 0) {
+          stepMtimes.set(stepName, new Date(mtime).toISOString());
+          if (mtime < earliestMtime) earliestMtime = mtime;
+        }
+      } catch {
+        // Fall back to current time if stat fails
+      }
+    }
+    const fallbackTime = new Date().toISOString();
+    const completedSteps = new Set(workflow.steps.filter((step) => cachedStepNames.has(step.name)).map((step) => step.name));
+    // Heuristic: mark the first eligible non-completed step as failed (the likely failure point)
+    const failedStepName = workflow.steps.find(
+      (step) => !completedSteps.has(step.name) && (step.dependsOn ?? []).every((dep) => completedSteps.has(dep))
+    )?.name;
+    const runStartedAt = new Date(earliestMtime).toISOString();
+    const run: WorkflowRunRow = {
+      id: runId,
+      workspaceId: this.workspaceId,
+      workflowName: workflow.name,
+      pattern: resumeConfig.swarm.pattern,
+      status: 'failed',
+      config: resumeConfig,
+      startedAt: runStartedAt,
+      createdAt: runStartedAt,
+      updatedAt: fallbackTime,
+    };
+    const stepStates = new Map<string, StepState>();
+    for (const step of workflow.steps) {
+      const isNonAgent = step.type === 'deterministic' || step.type === 'worktree' || step.type === 'integration';
+      const cachedOutput = completedSteps.has(step.name) ? this.loadStepOutput(runId, step.name) : undefined;
+      const status: WorkflowStepStatus =
+        completedSteps.has(step.name) ? 'completed' : step.name === failedStepName ? 'failed' : 'pending';
+      const stepRow: WorkflowStepRow = {
+        id: this.generateId(),
+        runId,
+        stepName: step.name,
+        agentName: isNonAgent ? null : (step.agent ?? null),
+        stepType: isNonAgent ? (step.type as 'deterministic' | 'worktree' | 'integration') : 'agent',
+        status,
+        task:
+          step.type === 'deterministic'
+            ? (step.command ?? '')
+            : step.type === 'worktree'
+              ? (step.branch ?? '')
+              : step.type === 'integration'
+                ? (`${step.integration}.${step.action}`)
+                : (step.task ?? ''),
+        dependsOn: step.dependsOn ?? [],
+        output: cachedOutput,
+        error: status === 'failed' ? 'Recovered from cached step outputs' : undefined,
+        completedAt: status === 'completed' ? (stepMtimes.get(step.name) ?? fallbackTime) : undefined,
+        retryCount: 0,
+        createdAt: stepMtimes.get(step.name) ?? fallbackTime,
+        updatedAt: stepMtimes.get(step.name) ?? fallbackTime,
+      };
+      stepStates.set(step.name, { row: stepRow });
+    }
+    return { run, stepStates };
+  }
   /** Get or create the worker logs directory (.agent-relay/team/worker-logs) */
   private getWorkerLogsDir(): string {
     const logsDir = path.join(this.cwd, '.agent-relay', 'team', 'worker-logs');

package/packages/sdk-py/pyproject.toml CHANGED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "agent-relay-sdk"
-version = "3.2.18"
+version = "3.2.22"
 description = "Python SDK for Agent Relay workflows"
 readme = "README.md"
 license = "Apache-2.0"

package/packages/sdk-swift/Sources/AgentRelaySDK/RelayObserver.swift CHANGED Viewed

@@ -218,6 +218,8 @@ public final class RelayObserver: NSObject, URLSessionWebSocketDelegate, @unchec
     }
     private func _handleSocketError(_ error: Error) {
+        isConnectionReady = false
         guard reconnectAttempts < maxReconnectAttempts else {
             _connectionState = .disconnected
             let delegate = self.delegate