npm - keystone-cli - Versions diffs - 0.7.2 → 1.0.0 - Mend

keystone-cli 0.7.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

package/README.md +486 -54
package/package.json +8 -2
package/src/__fixtures__/index.ts +100 -0
package/src/cli.ts +841 -91
package/src/db/memory-db.ts +35 -1
package/src/db/workflow-db.test.ts +24 -0
package/src/db/workflow-db.ts +484 -14
package/src/expression/evaluator.ts +68 -4
package/src/parser/agent-parser.ts +6 -3
package/src/parser/config-schema.ts +38 -2
package/src/parser/schema.ts +192 -7
package/src/parser/test-schema.ts +29 -0
package/src/parser/workflow-parser.test.ts +54 -0
package/src/parser/workflow-parser.ts +153 -7
package/src/runner/aggregate-error.test.ts +57 -0
package/src/runner/aggregate-error.ts +46 -0
package/src/runner/audit-verification.test.ts +2 -2
package/src/runner/auto-heal.test.ts +1 -1
package/src/runner/blueprint-executor.test.ts +63 -0
package/src/runner/blueprint-executor.ts +157 -0
package/src/runner/concurrency-limit.test.ts +82 -0
package/src/runner/debug-repl.ts +18 -3
package/src/runner/durable-timers.test.ts +200 -0
package/src/runner/engine-executor.test.ts +464 -0
package/src/runner/engine-executor.ts +491 -0
package/src/runner/foreach-executor.ts +30 -12
package/src/runner/llm-adapter.test.ts +282 -5
package/src/runner/llm-adapter.ts +581 -8
package/src/runner/llm-clarification.test.ts +79 -21
package/src/runner/llm-errors.ts +83 -0
package/src/runner/llm-executor.test.ts +258 -219
package/src/runner/llm-executor.ts +226 -29
package/src/runner/mcp-client.ts +70 -3
package/src/runner/mcp-manager.test.ts +52 -52
package/src/runner/mcp-manager.ts +12 -5
package/src/runner/mcp-server.test.ts +117 -78
package/src/runner/mcp-server.ts +13 -4
package/src/runner/optimization-runner.ts +48 -31
package/src/runner/reflexion.test.ts +1 -1
package/src/runner/resource-pool.test.ts +113 -0
package/src/runner/resource-pool.ts +164 -0
package/src/runner/shell-executor.ts +130 -32
package/src/runner/standard-tools-execution.test.ts +39 -0
package/src/runner/standard-tools-integration.test.ts +36 -36
package/src/runner/standard-tools.test.ts +18 -0
package/src/runner/standard-tools.ts +174 -93
package/src/runner/step-executor.test.ts +176 -16
package/src/runner/step-executor.ts +534 -83
package/src/runner/stream-utils.test.ts +14 -0
package/src/runner/subflow-outputs.test.ts +103 -0
package/src/runner/test-harness.ts +161 -0
package/src/runner/tool-integration.test.ts +73 -79
package/src/runner/workflow-runner.test.ts +549 -15
package/src/runner/workflow-runner.ts +1448 -79
package/src/runner/workflow-subflows.test.ts +255 -0
package/src/templates/agents/keystone-architect.md +17 -12
package/src/templates/agents/tester.md +21 -0
package/src/templates/child-rollback.yaml +11 -0
package/src/templates/decompose-implement.yaml +53 -0
package/src/templates/decompose-problem.yaml +159 -0
package/src/templates/decompose-research.yaml +52 -0
package/src/templates/decompose-review.yaml +51 -0
package/src/templates/dev.yaml +134 -0
package/src/templates/engine-example.yaml +33 -0
package/src/templates/fan-out-fan-in.yaml +61 -0
package/src/templates/memory-service.yaml +1 -1
package/src/templates/parent-rollback.yaml +16 -0
package/src/templates/robust-automation.yaml +1 -1
package/src/templates/scaffold-feature.yaml +29 -27
package/src/templates/scaffold-generate.yaml +41 -0
package/src/templates/scaffold-plan.yaml +53 -0
package/src/types/status.ts +3 -0
package/src/ui/dashboard.tsx +4 -3
package/src/utils/assets.macro.ts +36 -0
package/src/utils/auth-manager.ts +585 -8
package/src/utils/blueprint-utils.test.ts +49 -0
package/src/utils/blueprint-utils.ts +80 -0
package/src/utils/circuit-breaker.test.ts +177 -0
package/src/utils/circuit-breaker.ts +160 -0
package/src/utils/config-loader.test.ts +100 -13
package/src/utils/config-loader.ts +44 -17
package/src/utils/constants.ts +62 -0
package/src/utils/error-renderer.test.ts +267 -0
package/src/utils/error-renderer.ts +320 -0
package/src/utils/json-parser.test.ts +4 -0
package/src/utils/json-parser.ts +18 -1
package/src/utils/mermaid.ts +4 -0
package/src/utils/paths.test.ts +46 -0
package/src/utils/paths.ts +70 -0
package/src/utils/process-sandbox.test.ts +128 -0
package/src/utils/process-sandbox.ts +293 -0
package/src/utils/rate-limiter.test.ts +143 -0
package/src/utils/rate-limiter.ts +221 -0
package/src/utils/redactor.test.ts +23 -15
package/src/utils/redactor.ts +65 -25
package/src/utils/resource-loader.test.ts +54 -0
package/src/utils/resource-loader.ts +158 -0
package/src/utils/sandbox.test.ts +69 -4
package/src/utils/sandbox.ts +69 -6
package/src/utils/schema-validator.ts +65 -0
package/src/utils/workflow-registry.test.ts +57 -0
package/src/utils/workflow-registry.ts +45 -25
/package/src/expression/{evaluator.audit.test.ts → evaluator-audit.test.ts} +0 -0
/package/src/runner/{mcp-client.audit.test.ts → mcp-client-audit.test.ts} +0 -0

package/src/runner/step-executor.ts CHANGED Viewed

@@ -3,6 +3,8 @@ import type { ExpressionContext } from '../expression/evaluator.ts';
 import { ExpressionEvaluator } from '../expression/evaluator.ts';
 // Removed synchronous file I/O imports - using Bun's async file API instead
 import type {
+  BlueprintStep,
+  EngineStep,
   FileStep,
   HumanStep,
   MemoryStep,
@@ -14,13 +16,17 @@ import type {
   WorkflowStep,
 } from '../parser/schema.ts';
 import { ConsoleLogger, type Logger } from '../utils/logger.ts';
+import { executeBlueprintStep } from './blueprint-executor.ts';
+import { executeEngineStep } from './engine-executor.ts';
 import { getAdapter } from './llm-adapter.ts';
 import { detectShellInjectionRisk, executeShell } from './shell-executor.ts';
 import * as fs from 'node:fs';
+import { createRequire } from 'node:module';
 import * as os from 'node:os';
 import * as path from 'node:path';
 import * as readline from 'node:readline/promises';
+import { LIMITS, TIMEOUTS } from '../utils/constants.ts';
 import { SafeSandbox } from '../utils/sandbox.ts';
 import { executeLlmStep } from './llm-executor.ts';
 import { validateRemoteUrl } from './mcp-client.ts';
@@ -37,9 +43,20 @@ export class WorkflowSuspendedError extends Error {
   }
 }
+export class WorkflowWaitingError extends Error {
+  constructor(
+    public readonly message: string,
+    public readonly stepId: string,
+    public readonly wakeAt?: string
+  ) {
+    super(message);
+    this.name = 'WorkflowWaitingError';
+  }
+}
 export interface StepResult {
   output: unknown;
-  status: 'success' | 'failed' | 'suspended';
+  status: 'success' | 'failed' | 'suspended' | 'skipped' | 'waiting';
   error?: string;
   usage?: {
     prompt_tokens: number;
@@ -57,11 +74,100 @@ export interface StepExecutorOptions {
   memoryDb?: MemoryDb;
   workflowDir?: string;
   dryRun?: boolean;
+  abortSignal?: AbortSignal;
+  runId?: string;
+  stepExecutionId?: string;
+  artifactRoot?: string;
+  redactForStorage?: (value: unknown) => unknown;
+  debug?: boolean;
+  allowInsecure?: boolean;
   // Dependency injection for testing
   getAdapter?: typeof getAdapter;
+  executeStep?: typeof executeStep;
+  executeLlmStep?: typeof executeLlmStep;
   sandbox?: typeof SafeSandbox;
 }
+import type { JoinStep } from '../parser/schema.ts';
+/**
+ * Execute a join step
+ */
+async function executeJoinStep(
+  step: JoinStep,
+  context: ExpressionContext,
+  _logger: Logger
+): Promise<StepResult> {
+  // Join step logic:
+  // It aggregates outputs from its 'needs'.
+  // Since the runner ensures dependencies are met (or processed),
+  // we just need to collect the results from context.steps.
+  const inputs: Record<string, unknown> = {};
+  const statusMap: Record<string, string> = {};
+  const realStatusMap: Record<string, 'success' | 'failed'> = {}; // Status considering allowFailure errors
+  const errors: string[] = [];
+  for (const depId of step.needs) {
+    const depContext = context.steps?.[depId];
+    if (depContext) {
+      inputs[depId] = depContext.output;
+      if (depContext.status) {
+        statusMap[depId] = depContext.status;
+      }
+      // Determine effective status:
+      // If status is success but error exists (allowFailure), treat as failed for the join condition
+      const isRealSuccess = depContext.status === 'success' && !depContext.error;
+      realStatusMap[depId] = isRealSuccess ? 'success' : 'failed';
+      if (depContext.error) {
+        errors.push(`Dependency ${depId} failed: ${depContext.error}`);
+      }
+    }
+  }
+  // Validate condition
+  const condition = step.condition;
+  const total = step.needs.length;
+  // Use realStatusMap to count successes/failures
+  const successCount = Object.values(realStatusMap).filter((s) => s === 'success').length;
+  // Note: We use the strict success count.
+  // If a step was skipped, it's neither success nor failed in this binary map?
+  // Skipped steps usually mean "not run".
+  // If we want skipped steps to count as success? Probably not.
+  // Let's check skipped.
+  let passed = false;
+  if (condition === 'all') {
+    passed = successCount === total;
+  } else if (condition === 'any') {
+    passed = successCount > 0;
+  } else if (typeof condition === 'number') {
+    passed = successCount >= condition;
+  }
+  // NOTE: True "any" or "quorum" (partial completion) requires Runner support to schedule the join
+  // before all dependencies are done. Currently, the runner waits for ALL dependencies.
+  // So this logic works for 'all' or 'any' (if others failed but allowFailure was true).
+  // Use allowFailure on branches to support "best effort" joins with the current runner.
+  if (!passed) {
+    return {
+      output: { inputs, status: statusMap },
+      status: 'failed',
+      error: `Join condition '${condition}' not met. Success: ${successCount}/${total}. Errors: ${errors.join('; ')}`,
+    };
+  }
+  return {
+    output: { inputs, status: statusMap },
+    status: 'success',
+  };
+}
 /**
  * Execute a single step based on its type
  */
@@ -77,36 +183,62 @@ export async function executeStep(
     memoryDb,
     workflowDir,
     dryRun,
+    abortSignal,
+    runId,
+    stepExecutionId,
+    artifactRoot,
+    redactForStorage,
     getAdapter: injectedGetAdapter,
+    executeStep: injectedExecuteStep,
+    executeLlmStep: injectedExecuteLlmStep,
     sandbox: injectedSandbox,
   } = options;
   try {
+    if (abortSignal?.aborted) {
+      throw new Error('Step canceled');
+    }
+    if (dryRun && step.type !== 'shell') {
+      logger.log(`[DRY RUN] Skipping ${step.type} step: ${step.id}`);
+      return {
+        output: null,
+        status: 'skipped',
+      };
+    }
     let result: StepResult;
     switch (step.type) {
       case 'shell':
-        result = await executeShellStep(step, context, logger, dryRun);
+        result = await executeShellStep(step, context, logger, dryRun, abortSignal);
         break;
       case 'file':
         result = await executeFileStep(step, context, logger, dryRun);
         break;
       case 'request':
-        result = await executeRequestStep(step, context, logger);
+        result = await executeRequestStep(step, context, logger, abortSignal);
         break;
       case 'human':
-        result = await executeHumanStep(step, context, logger);
+        result = await executeHumanStep(step, context, logger, abortSignal);
         break;
       case 'sleep':
-        result = await executeSleepStep(step, context, logger);
+        result = await executeSleepStep(step, context, logger, abortSignal);
         break;
       case 'llm':
-        result = await executeLlmStep(
+        result = await (injectedExecuteLlmStep || executeLlmStep)(
           step,
           context,
-          (s, c) => executeStep(s, c, logger, options),
+          (s, c) => {
+            const exec = injectedExecuteStep || executeStep;
+            return exec(s, c, logger, {
+              ...options,
+              stepExecutionId: undefined,
+            });
+          },
           logger,
           mcpManager,
-          workflowDir
+          workflowDir,
+          abortSignal,
+          injectedGetAdapter
         );
         break;
       case 'memory':
@@ -119,7 +251,41 @@ export async function executeStep(
         result = await executeWorkflowFn(step, context);
         break;
       case 'script':
-        result = await executeScriptStep(step, context, logger, injectedSandbox);
+        result = await executeScriptStep(step, context, logger, injectedSandbox, abortSignal);
+        break;
+      case 'engine':
+        result = await executeEngineStepWrapper(step, context, logger, {
+          abortSignal,
+          runId,
+          stepExecutionId,
+          artifactRoot,
+          redactForStorage,
+        });
+        break;
+      case 'blueprint':
+        result = await executeBlueprintStep(
+          step,
+          context,
+          (s, c) => executeStep(s, c, logger, options),
+          logger,
+          {
+            mcpManager,
+            workflowDir,
+            abortSignal,
+            runId,
+            artifactRoot,
+          }
+        );
+        break;
+      case 'join':
+        // Join is handled by the runner logic for aggregation, but we need a placeholder here
+        // or logic to aggregate results from dependencies.
+        // Actually, for 'all', 'any', 'quorum', the step *itself* should process the inputs.
+        // By the time executeStep is called, dependencies are met (for 'all').
+        // But for 'any', the runner must schedule it early.
+        // Assuming the runner handles scheduling, here we just return the aggregated output.
+        // We will assume 'context.steps' contains the dependency outputs.
+        result = await executeJoinStep(step, context, logger);
         break;
       default:
         throw new Error(`Unknown step type: ${(step as Step).type}`);
@@ -167,8 +333,12 @@ async function executeShellStep(
   step: ShellStep,
   context: ExpressionContext,
   logger: Logger,
-  dryRun?: boolean
+  dryRun?: boolean,
+  abortSignal?: AbortSignal
 ): Promise<StepResult> {
+  if (abortSignal?.aborted) {
+    throw new Error('Step canceled');
+  }
   if (dryRun) {
     const command = ExpressionEvaluator.evaluateString(step.run, context);
     logger.log(`[DRY RUN] Would execute shell command: ${command}`);
@@ -187,7 +357,7 @@ async function executeShellStep(
     );
   }
-  const result = await executeShell(step, context, logger);
+  const result = await executeShell(step, context, logger, abortSignal);
   if (result.stdout) {
     logger.log(result.stdout.trim());
@@ -199,6 +369,8 @@ async function executeShellStep(
         stdout: result.stdout,
         stderr: result.stderr,
         exitCode: result.exitCode,
+        stdoutTruncated: result.stdoutTruncated,
+        stderrTruncated: result.stderrTruncated,
       },
       status: 'failed',
       error: `Shell command exited with code ${result.exitCode}: ${result.stderr}`,
@@ -210,11 +382,76 @@ async function executeShellStep(
       stdout: result.stdout,
       stderr: result.stderr,
       exitCode: result.exitCode,
+      stdoutTruncated: result.stdoutTruncated,
+      stderrTruncated: result.stderrTruncated,
     },
     status: 'success',
   };
 }
+async function executeEngineStepWrapper(
+  step: EngineStep,
+  context: ExpressionContext,
+  logger: Logger,
+  options: {
+    abortSignal?: AbortSignal;
+    runId?: string;
+    stepExecutionId?: string;
+    artifactRoot?: string;
+    redactForStorage?: (value: unknown) => unknown;
+  }
+): Promise<StepResult> {
+  const engineResult = await executeEngineStep(step, context, {
+    logger,
+    abortSignal: options.abortSignal,
+    runId: options.runId,
+    stepExecutionId: options.stepExecutionId,
+    artifactRoot: options.artifactRoot,
+    redactForStorage: options.redactForStorage,
+  });
+  const output = {
+    summary: engineResult.summary ?? null,
+    stdout: engineResult.stdout,
+    stderr: engineResult.stderr,
+    exitCode: engineResult.exitCode,
+    stdoutTruncated: engineResult.stdoutTruncated,
+    stderrTruncated: engineResult.stderrTruncated,
+    summarySource: engineResult.summarySource,
+    summaryFormat: engineResult.summaryFormat,
+    artifactPath: engineResult.artifactPath,
+  };
+  if (engineResult.exitCode !== 0) {
+    return {
+      output,
+      status: 'failed',
+      error: `Engine exited with code ${engineResult.exitCode}: ${engineResult.stderr}`,
+    };
+  }
+  if (engineResult.summaryError) {
+    return {
+      output,
+      status: 'failed',
+      error: `Engine summary parse failed: ${engineResult.summaryError}`,
+    };
+  }
+  if (engineResult.summary === null) {
+    return {
+      output,
+      status: 'failed',
+      error: `Engine step "${step.id}" did not produce a structured summary`,
+    };
+  }
+  return {
+    output,
+    status: 'success',
+  };
+}
 /**
  * Execute a file step (read, write, append)
  */
@@ -281,6 +518,12 @@ async function executeFileStep(
       if (!(await file.exists())) {
         throw new Error(`File not found: ${targetPath}`);
       }
+      const stat = fs.statSync(targetPath);
+      if (stat.size > LIMITS.MAX_FILE_READ_BYTES) {
+        throw new Error(
+          `File exceeds maximum read size of ${LIMITS.MAX_FILE_READ_BYTES} bytes: ${targetPath}`
+        );
+      }
       const content = await file.text();
       return {
         output: content,
@@ -289,7 +532,7 @@ async function executeFileStep(
     }
     case 'write': {
-      if (!step.content) {
+      if (step.content === undefined) {
         throw new Error('Content is required for write operation');
       }
       const content = ExpressionEvaluator.evaluateString(step.content, context);
@@ -308,7 +551,7 @@ async function executeFileStep(
     }
     case 'append': {
-      if (!step.content) {
+      if (step.content === undefined) {
         throw new Error('Content is required for append operation');
       }
       const content = ExpressionEvaluator.evaluateString(step.content, context);
@@ -319,7 +562,7 @@ async function executeFileStep(
         fs.mkdirSync(dir, { recursive: true });
       }
-      fs.appendFileSync(targetPath, content);
+      await fs.promises.appendFile(targetPath, content);
       return {
         output: { path: targetPath, bytes: content.length },
@@ -332,94 +575,246 @@ async function executeFileStep(
   }
 }
+async function readResponseTextWithLimit(
+  response: Response,
+  maxBytes: number
+): Promise<{ text: string; truncated: boolean }> {
+  if (!response.body) {
+    return { text: '', truncated: false };
+  }
+  const reader = response.body.getReader();
+  const decoder = new TextDecoder();
+  let text = '';
+  let bytesRead = 0;
+  while (true) {
+    const { value, done } = await reader.read();
+    if (done) break;
+    if (!value) continue;
+    if (bytesRead + value.byteLength > maxBytes) {
+      const allowed = maxBytes - bytesRead;
+      if (allowed > 0) {
+        text += decoder.decode(value.slice(0, allowed), { stream: true });
+      }
+      text += decoder.decode();
+      try {
+        await reader.cancel();
+      } catch {}
+      return { text, truncated: true };
+    }
+    bytesRead += value.byteLength;
+    text += decoder.decode(value, { stream: true });
+  }
+  text += decoder.decode();
+  return { text, truncated: false };
+}
 /**
  * Execute an HTTP request step
  */
 async function executeRequestStep(
   step: RequestStep,
   context: ExpressionContext,
-  _logger: Logger
+  _logger: Logger,
+  abortSignal?: AbortSignal
 ): Promise<StepResult> {
+  if (abortSignal?.aborted) {
+    throw new Error('Step canceled');
+  }
   const url = ExpressionEvaluator.evaluateString(step.url, context);
+  const requestTimeoutMs = step.timeout ?? TIMEOUTS.DEFAULT_HTTP_TIMEOUT_MS;
+  const controller = new AbortController();
+  const onAbort = () => controller.abort(new Error('Step canceled'));
+  if (abortSignal) {
+    if (abortSignal.aborted) {
+      onAbort();
+    } else {
+      abortSignal.addEventListener('abort', onAbort, { once: true });
+    }
+  }
+  const timeoutId = setTimeout(() => {
+    controller.abort(new Error(`Request timed out after ${requestTimeoutMs}ms`));
+  }, requestTimeoutMs);
-  // Validate URL to prevent SSRF
-  await validateRemoteUrl(url);
+  try {
+    // Validate URL to prevent SSRF
+    await validateRemoteUrl(url, { allowInsecure: step.allowInsecure });
-  // Evaluate headers
-  const headers: Record<string, string> = {};
-  if (step.headers) {
-    for (const [key, value] of Object.entries(step.headers)) {
-      headers[key] = ExpressionEvaluator.evaluateString(value, context);
+    // Evaluate headers
+    const headers: Record<string, string> = {};
+    if (step.headers) {
+      for (const [key, value] of Object.entries(step.headers)) {
+        headers[key] = ExpressionEvaluator.evaluateString(value, context);
+      }
     }
-  }
-  // Evaluate body
-  let body: string | undefined;
-  if (step.body) {
-    const evaluatedBody = ExpressionEvaluator.evaluateObject(step.body, context);
+    // Evaluate body
+    let body: string | undefined;
+    if (step.body !== undefined) {
+      const evaluatedBody = ExpressionEvaluator.evaluateObject(step.body, context);
-    const contentType = Object.entries(headers).find(
-      ([k]) => k.toLowerCase() === 'content-type'
-    )?.[1];
+      const contentType = Object.entries(headers).find(
+        ([k]) => k.toLowerCase() === 'content-type'
+      )?.[1];
-    if (contentType?.includes('application/x-www-form-urlencoded')) {
-      if (typeof evaluatedBody === 'object' && evaluatedBody !== null) {
-        const params = new URLSearchParams();
-        for (const [key, value] of Object.entries(evaluatedBody)) {
-          params.append(key, String(value));
+      if (contentType?.includes('application/x-www-form-urlencoded')) {
+        if (typeof evaluatedBody === 'object' && evaluatedBody !== null) {
+          const params = new URLSearchParams();
+          for (const [key, value] of Object.entries(evaluatedBody)) {
+            params.append(key, String(value));
+          }
+          body = params.toString();
+        } else {
+          body = String(evaluatedBody);
         }
-        body = params.toString();
       } else {
-        body = String(evaluatedBody);
+        // Default to JSON if not form-encoded and not already a string
+        body = typeof evaluatedBody === 'string' ? evaluatedBody : JSON.stringify(evaluatedBody);
+        // Auto-set Content-Type to application/json if not already set and body is an object
+        if (!contentType && typeof evaluatedBody === 'object' && evaluatedBody !== null) {
+          headers['Content-Type'] = 'application/json';
+        }
       }
-    } else {
-      // Default to JSON if not form-encoded and not already a string
-      body = typeof evaluatedBody === 'string' ? evaluatedBody : JSON.stringify(evaluatedBody);
+    }
+    const maxRedirects = 5;
+    let response: Response | undefined;
+    let currentUrl = url;
+    let currentMethod = step.method;
+    let currentBody = body;
+    const currentHeaders: Record<string, string> = { ...headers };
+    const safeCrossOriginHeaders = new Set(['accept', 'accept-language', 'user-agent']);
+    const removeHeader = (name: string) => {
+      const target = name.toLowerCase();
+      for (const key of Object.keys(currentHeaders)) {
+        if (key.toLowerCase() === target) {
+          delete currentHeaders[key];
+        }
+      }
+    };
+    const stripCrossOriginHeaders = () => {
+      for (const key of Object.keys(currentHeaders)) {
+        if (!safeCrossOriginHeaders.has(key.toLowerCase())) {
+          delete currentHeaders[key];
+        }
+      }
+    };
+    for (let redirectCount = 0; redirectCount <= maxRedirects; redirectCount++) {
+      response = await fetch(currentUrl, {
+        method: currentMethod,
+        headers: currentHeaders,
+        body: currentBody,
+        redirect: 'manual',
+        signal: controller.signal,
+      });
+      if (response.status >= 300 && response.status < 400) {
+        const location = response.headers.get('location');
+        if (!location) {
+          break;
+        }
+        if (redirectCount >= maxRedirects) {
+          throw new Error(`Request exceeded maximum redirects (${maxRedirects})`);
+        }
+        const nextUrl = new URL(location, currentUrl).href;
+        await validateRemoteUrl(nextUrl, { allowInsecure: step.allowInsecure });
+        let nextMethod = currentMethod;
+        let nextBody = currentBody;
+        if (
+          response.status === 303 ||
+          ((response.status === 301 || response.status === 302) &&
+            currentMethod !== 'GET' &&
+            currentMethod !== 'HEAD')
+        ) {
+          nextMethod = 'GET';
+          nextBody = undefined;
+          removeHeader('content-type');
+        }
-      // Auto-set Content-Type to application/json if not already set and body is an object
-      if (!contentType && typeof evaluatedBody === 'object' && evaluatedBody !== null) {
-        headers['Content-Type'] = 'application/json';
+        const fromOrigin = new URL(currentUrl).origin;
+        const toOrigin = new URL(nextUrl).origin;
+        if (fromOrigin !== toOrigin) {
+          removeHeader('authorization');
+          removeHeader('proxy-authorization');
+          removeHeader('cookie');
+          if (!step.allowInsecure) {
+            if (nextMethod !== 'GET' && nextMethod !== 'HEAD') {
+              throw new Error(
+                `Cross-origin redirect blocked for ${nextMethod} request. Set allowInsecure to true to override.`
+              );
+            }
+            stripCrossOriginHeaders();
+          }
+        }
+        currentMethod = nextMethod;
+        currentBody = nextBody;
+        currentUrl = nextUrl;
+        continue;
       }
+      break;
     }
-  }
-  const response = await fetch(url, {
-    method: step.method,
-    headers,
-    body,
-  });
+    if (!response) {
+      throw new Error('Request failed: No response received');
+    }
-  const responseText = await response.text();
-  let responseData: unknown;
+    const maxResponseBytes = LIMITS.MAX_HTTP_RESPONSE_BYTES;
+    const { text: responseText, truncated } = await readResponseTextWithLimit(
+      response,
+      maxResponseBytes
+    );
+    let responseData: unknown;
-  try {
-    responseData = JSON.parse(responseText);
-  } catch {
-    responseData = responseText;
-  }
+    try {
+      responseData = JSON.parse(responseText);
+    } catch {
+      responseData = responseText;
+    }
-  return {
-    output: {
-      status: response.status,
-      statusText: response.statusText,
-      headers: (() => {
-        const h: Record<string, string> = {};
-        response.headers.forEach((v, k) => {
-          h[k] = v;
-        });
-        return h;
-      })(),
-      data: responseData,
-    },
-    status: response.ok ? 'success' : 'failed',
-    error: response.ok
-      ? undefined
-      : `HTTP ${response.status}: ${response.statusText}${
-          responseText
-            ? `\nResponse Body: ${responseText.substring(0, 500)}${responseText.length > 500 ? '...' : ''}`
-            : ''
-        }`,
-  };
+    return {
+      output: {
+        status: response.status,
+        statusText: response.statusText,
+        headers: (() => {
+          const h: Record<string, string> = {};
+          response.headers.forEach((v, k) => {
+            h[k] = v;
+          });
+          return h;
+        })(),
+        data: responseData,
+        truncated,
+        maxBytes: maxResponseBytes,
+      },
+      status: response.ok ? 'success' : 'failed',
+      error: response.ok
+        ? undefined
+        : `HTTP ${response.status}: ${response.statusText}${
+            responseText
+              ? `\nResponse Body: ${responseText.substring(0, 500)}${responseText.length > 500 ? '...' : ''}${
+                  truncated ? ' [truncated]' : ''
+                }`
+              : truncated
+                ? '\nResponse Body: [truncated]'
+                : ''
+          }`,
+    };
+  } finally {
+    clearTimeout(timeoutId);
+    if (abortSignal) {
+      abortSignal.removeEventListener('abort', onAbort);
+    }
+  }
 }
 /**
@@ -428,8 +823,12 @@ async function executeRequestStep(
 async function executeHumanStep(
   step: HumanStep,
   context: ExpressionContext,
-  logger: Logger
+  logger: Logger,
+  abortSignal?: AbortSignal
 ): Promise<StepResult> {
+  if (abortSignal?.aborted) {
+    throw new Error('Step canceled');
+  }
   const message = ExpressionEvaluator.evaluateString(step.message, context);
   // Check if we have a resume answer
@@ -510,8 +909,12 @@ async function executeHumanStep(
 async function executeSleepStep(
   step: SleepStep,
   context: ExpressionContext,
-  _logger: Logger
+  _logger: Logger,
+  abortSignal?: AbortSignal
 ): Promise<StepResult> {
+  if (abortSignal?.aborted) {
+    throw new Error('Step canceled');
+  }
   const evaluated = ExpressionEvaluator.evaluate(step.duration.toString(), context);
   const duration = Number(evaluated);
@@ -519,7 +922,39 @@ async function executeSleepStep(
     throw new Error(`Invalid sleep duration: ${evaluated}`);
   }
-  await new Promise((resolve) => setTimeout(resolve, duration));
+  // For durable sleeps, return waiting status with wake time
+  // Threshold: 60s (60000ms) - only durably wait if requested AND long enough
+  if (step.durable && duration >= 60000) {
+    const wakeAt = new Date(Date.now() + duration).toISOString();
+    return {
+      output: { durable: true, wakeAt, durationMs: duration },
+      status: 'waiting',
+    };
+  }
+  await new Promise((resolve, reject) => {
+    const onAbort = () => {
+      clearTimeout(timeoutId);
+      reject(new Error('Step canceled'));
+    };
+    const cleanup = () => {
+      if (abortSignal) {
+        abortSignal.removeEventListener('abort', onAbort);
+      }
+    };
+    const timeoutId = setTimeout(() => {
+      cleanup();
+      resolve(undefined);
+    }, duration);
+    if (abortSignal) {
+      if (abortSignal.aborted) {
+        onAbort();
+        cleanup();
+        return;
+      }
+      abortSignal.addEventListener('abort', onAbort, { once: true });
+    }
+  });
   return {
     output: { slept: duration },
@@ -533,9 +968,13 @@ async function executeScriptStep(
   step: ScriptStep,
   context: ExpressionContext,
   _logger: Logger,
-  sandbox = SafeSandbox
+  sandbox = SafeSandbox,
+  abortSignal?: AbortSignal
 ): Promise<StepResult> {
   try {
+    if (abortSignal?.aborted) {
+      throw new Error('Step canceled');
+    }
     if (!step.allowInsecure) {
       throw new Error(
         'Script execution is disabled by default because Bun uses an insecure VM sandbox. ' +
@@ -543,6 +982,8 @@ async function executeScriptStep(
       );
     }
+    const requireFn = createRequire(import.meta.url);
     const result = await sandbox.execute(
       step.run,
       {
@@ -550,9 +991,13 @@ async function executeScriptStep(
         secrets: context.secrets,
         steps: context.steps,
         env: context.env,
+        // biome-ignore lint/suspicious/noExplicitAny: args is dynamic
+        args: (context as any).args,
+        require: requireFn,
       },
       {
         timeout: step.timeout,
+        logger: _logger,
       }
     );
@@ -584,7 +1029,13 @@ async function executeMemoryStep(
   }
   try {
-    const { adapter, resolvedModel } = getAdapterFn(step.model || 'local');
+    const requestedModel = step.model || 'local';
+    if (requestedModel !== 'local' && !requestedModel.startsWith('local:')) {
+      throw new Error(
+        'Memory steps only support local embeddings. Use model: local (or local:<model>) or omit the model.'
+      );
+    }
+    const { adapter, resolvedModel } = getAdapterFn(requestedModel);
     if (!adapter.embed) {
       throw new Error(`Provider for model ${step.model || 'local'} does not support embeddings`);
     }