npm - keystone-cli - Versions diffs - 0.5.1 → 0.6.0 - Mend

keystone-cli 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/README.md +55 -8
package/package.json +8 -17
package/src/cli.ts +33 -192
package/src/db/memory-db.test.ts +54 -0
package/src/db/memory-db.ts +122 -0
package/src/db/sqlite-setup.ts +49 -0
package/src/db/workflow-db.test.ts +41 -10
package/src/db/workflow-db.ts +84 -28
package/src/expression/evaluator.test.ts +19 -0
package/src/expression/evaluator.ts +134 -39
package/src/parser/schema.ts +41 -0
package/src/runner/audit-verification.test.ts +23 -0
package/src/runner/auto-heal.test.ts +64 -0
package/src/runner/debug-repl.test.ts +74 -0
package/src/runner/debug-repl.ts +225 -0
package/src/runner/foreach-executor.ts +327 -0
package/src/runner/llm-adapter.test.ts +27 -14
package/src/runner/llm-adapter.ts +90 -112
package/src/runner/llm-executor.test.ts +47 -6
package/src/runner/llm-executor.ts +18 -3
package/src/runner/mcp-client.audit.test.ts +69 -0
package/src/runner/mcp-client.test.ts +12 -3
package/src/runner/mcp-client.ts +199 -19
package/src/runner/mcp-manager.ts +19 -8
package/src/runner/mcp-server.test.ts +8 -5
package/src/runner/mcp-server.ts +31 -17
package/src/runner/optimization-runner.ts +305 -0
package/src/runner/reflexion.test.ts +87 -0
package/src/runner/shell-executor.test.ts +12 -0
package/src/runner/shell-executor.ts +9 -6
package/src/runner/step-executor.test.ts +46 -1
package/src/runner/step-executor.ts +154 -60
package/src/runner/stream-utils.test.ts +65 -0
package/src/runner/stream-utils.ts +186 -0
package/src/runner/workflow-runner.test.ts +4 -4
package/src/runner/workflow-runner.ts +436 -251
package/src/templates/agents/keystone-architect.md +6 -4
package/src/templates/full-feature-demo.yaml +4 -4
package/src/types/assets.d.ts +14 -0
package/src/types/status.ts +1 -1
package/src/ui/dashboard.tsx +38 -26
package/src/utils/auth-manager.ts +3 -1
package/src/utils/logger.test.ts +76 -0
package/src/utils/logger.ts +39 -0
package/src/utils/prompt.ts +75 -0
package/src/utils/redactor.test.ts +86 -4
package/src/utils/redactor.ts +48 -13

package/src/runner/workflow-runner.ts CHANGED Viewed

@@ -1,22 +1,23 @@
 import { randomUUID } from 'node:crypto';
-import { dirname } from 'node:path';
+import { dirname, join } from 'node:path';
+import { MemoryDb } from '../db/memory-db.ts';
 import { type RunStatus, WorkflowDb } from '../db/workflow-db.ts';
 import type { ExpressionContext } from '../expression/evaluator.ts';
 import { ExpressionEvaluator } from '../expression/evaluator.ts';
 import type { Step, Workflow, WorkflowStep } from '../parser/schema.ts';
 import { WorkflowParser } from '../parser/workflow-parser.ts';
+import { StepStatus, type StepStatusType, WorkflowStatus } from '../types/status.ts';
+import { extractJson } from '../utils/json-parser.ts';
 import { Redactor } from '../utils/redactor.ts';
 import { WorkflowRegistry } from '../utils/workflow-registry.ts';
+import { ForeachExecutor } from './foreach-executor.ts';
+import { type LLMMessage, getAdapter } from './llm-adapter.ts';
 import { MCPManager } from './mcp-manager.ts';
 import { withRetry } from './retry.ts';
 import { type StepResult, WorkflowSuspendedError, executeStep } from './step-executor.ts';
 import { withTimeout } from './timeout.ts';
-export interface Logger {
-  log: (msg: string) => void;
-  error: (msg: string) => void;
-  warn: (msg: string) => void;
-}
+import { ConsoleLogger, type Logger } from '../utils/logger.ts';
 /**
  * A logger wrapper that redacts secrets from all log messages
@@ -38,11 +39,22 @@ class RedactingLogger implements Logger {
   warn(msg: string): void {
     this.inner.warn(this.redactor.redact(msg));
   }
+  info(msg: string): void {
+    this.inner.info(this.redactor.redact(msg));
+  }
+  debug(msg: string): void {
+    if (this.inner.debug) {
+      this.inner.debug(this.redactor.redact(msg));
+    }
+  }
 }
 export interface RunOptions {
   inputs?: Record<string, unknown>;
   dbPath?: string;
+  memoryDbPath?: string;
   resumeRunId?: string;
   logger?: Logger;
   mcpManager?: MCPManager;
@@ -50,12 +62,15 @@ export interface RunOptions {
   workflowDir?: string;
   resumeInputs?: Record<string, unknown>;
   dryRun?: boolean;
+  debug?: boolean;
+  getAdapter?: typeof getAdapter;
+  depth?: number;
 }
 export interface StepContext {
   output?: unknown;
   outputs?: Record<string, unknown>;
-  status: 'success' | 'failed' | 'skipped' | 'pending' | 'suspended';
+  status: StepStatusType;
   error?: string;
   usage?: {
     prompt_tokens: number;
@@ -78,6 +93,7 @@ export interface ForeachStepContext extends StepContext {
 export class WorkflowRunner {
   private workflow: Workflow;
   private db: WorkflowDb;
+  private memoryDb: MemoryDb;
   private runId: string;
   private stepContexts: Map<string, StepContext | ForeachStepContext> = new Map();
   private inputs: Record<string, unknown>;
@@ -92,15 +108,26 @@ export class WorkflowRunner {
   private isStopping = false;
   private hasWarnedMemory = false;
   private static readonly MEMORY_WARNING_THRESHOLD = 1000;
+  private static readonly MAX_RECURSION_DEPTH = 10;
+  private depth = 0;
   constructor(workflow: Workflow, options: RunOptions = {}) {
     this.workflow = workflow;
     this.options = options;
+    this.depth = options.depth || 0;
+    if (this.depth > WorkflowRunner.MAX_RECURSION_DEPTH) {
+      throw new Error(
+        `Maximum workflow recursion depth (${WorkflowRunner.MAX_RECURSION_DEPTH}) exceeded.`
+      );
+    }
     this.db = new WorkflowDb(options.dbPath);
+    this.memoryDb = new MemoryDb(options.memoryDbPath);
     this.secrets = this.loadSecrets();
     this.redactor = new Redactor(this.secrets);
     // Wrap the logger with a redactor to prevent secret leakage in logs
-    const rawLogger = options.logger || console;
+    const rawLogger = options.logger || new ConsoleLogger();
     this.logger = new RedactingLogger(rawLogger, this.redactor);
     this.mcpManager = options.mcpManager || new MCPManager();
@@ -129,13 +156,13 @@ export class WorkflowRunner {
    * Restore state from a previous run (for resume functionality)
    */
   private async restoreState(): Promise<void> {
-    const run = this.db.getRun(this.runId);
+    const run = await this.db.getRun(this.runId);
     if (!run) {
       throw new Error(`Run ${this.runId} not found`);
     }
     // Only allow resuming failed or paused runs
-    if (run.status !== 'failed' && run.status !== 'paused') {
+    if (run.status !== WorkflowStatus.FAILED && run.status !== WorkflowStatus.PAUSED) {
       throw new Error(
         `Cannot resume run with status '${run.status}'. Only 'failed' or 'paused' runs can be resumed.`
       );
@@ -144,18 +171,22 @@ export class WorkflowRunner {
     // Restore inputs from the previous run to ensure consistency
     // Merge with any resumeInputs provided (e.g. answers to human steps)
     try {
-      const storedInputs = JSON.parse(run.inputs);
-      this.inputs = { ...storedInputs, ...this.inputs };
+      if (!run.inputs || run.inputs === 'null' || run.inputs === '') {
+        this.logger.warn(`Run ${this.runId} has no persisted inputs`);
+        // Keep existing inputs
+      } else {
+        const storedInputs = JSON.parse(run.inputs);
+        this.inputs = { ...storedInputs, ...this.inputs };
+      }
     } catch (error) {
-      // Log warning but continue with default empty inputs instead of crashing
-      this.logger.warn(
-        `Failed to parse inputs from run ${this.runId}, using defaults: ${error instanceof Error ? error.message : String(error)}`
+      this.logger.error(
+        `CRITICAL: Failed to parse inputs from run ${this.runId}. Data may be corrupted. Using default/resume inputs. Error: ${error instanceof Error ? error.message : String(error)}`
       );
-      // Keep existing inputs (from resumeInputs or empty)
+      // Fallback: preserve existing inputs from resume options
     }
     // Load all step executions for this run
-    const steps = this.db.getStepsByRun(this.runId);
+    const steps = await this.db.getStepsByRun(this.runId);
     // Group steps by step_id to handle foreach loops (multiple executions per step_id)
     const stepExecutionsByStepId = new Map<string, typeof steps>();
@@ -194,7 +225,7 @@ export class WorkflowRunner {
         for (const exec of sortedExecs) {
           if (exec.iteration_index === null) continue; // Skip parent step record
-          if (exec.status === 'success' || exec.status === 'skipped') {
+          if (exec.status === StepStatus.SUCCESS || exec.status === StepStatus.SKIPPED) {
             let output: unknown = null;
             try {
               output = exec.output ? JSON.parse(exec.output) : null;
@@ -210,7 +241,7 @@ export class WorkflowRunner {
                 typeof output === 'object' && output !== null && !Array.isArray(output)
                   ? (output as Record<string, unknown>)
                   : {},
-              status: exec.status as 'success' | 'skipped',
+              status: exec.status as typeof StepStatus.SUCCESS | typeof StepStatus.SKIPPED,
             };
             outputs[exec.iteration_index] = output;
           } else {
@@ -219,7 +250,7 @@ export class WorkflowRunner {
             items[exec.iteration_index] = {
               output: null,
               outputs: {},
-              status: exec.status as 'failed' | 'pending' | 'success' | 'skipped' | 'suspended',
+              status: exec.status as StepStatusType,
             };
           }
         }
@@ -263,17 +294,17 @@ export class WorkflowRunner {
           !Array.from({ length: expectedCount }).some((_, i) => !items[i]);
         // Determine overall status based on iterations
-        let status: StepContext['status'] = 'success';
+        let status: StepContext['status'] = StepStatus.SUCCESS;
         if (allSuccess && hasAllItems) {
-          status = 'success';
-        } else if (items.some((item) => item?.status === 'suspended')) {
-          status = 'suspended';
+          status = StepStatus.SUCCESS;
+        } else if (items.some((item) => item?.status === StepStatus.SUSPENDED)) {
+          status = StepStatus.SUSPENDED;
         } else {
-          status = 'failed';
+          status = StepStatus.FAILED;
         }
         // Always restore what we have to allow partial expression evaluation
-        const mappedOutputs = this.aggregateOutputs(outputs);
+        const mappedOutputs = ForeachExecutor.aggregateOutputs(outputs);
         this.stepContexts.set(stepId, {
           output: outputs,
           outputs: mappedOutputs,
@@ -282,13 +313,17 @@ export class WorkflowRunner {
         } as ForeachStepContext);
         // Only mark as fully completed if all iterations completed successfully AND we have all items
-        if (status === 'success') {
+        if (status === StepStatus.SUCCESS) {
           completedStepIds.add(stepId);
         }
       } else {
         // Single execution step
         const exec = stepExecutions[0];
-        if (exec.status === 'success' || exec.status === 'skipped' || exec.status === 'suspended') {
+        if (
+          exec.status === StepStatus.SUCCESS ||
+          exec.status === StepStatus.SKIPPED ||
+          exec.status === StepStatus.SUSPENDED
+        ) {
           let output: unknown = null;
           try {
             output = exec.output ? JSON.parse(exec.output) : null;
@@ -304,7 +339,7 @@ export class WorkflowRunner {
                 : {},
             status: exec.status as StepContext['status'],
           });
-          if (exec.status !== 'suspended') {
+          if (exec.status !== StepStatus.SUSPENDED) {
             completedStepIds.add(stepId);
           }
         }
@@ -322,7 +357,7 @@ export class WorkflowRunner {
     const handler = async (signal: string) => {
       if (this.isStopping) return;
       this.logger.log(`\n\n🛑 Received ${signal}. Cleaning up...`);
-      await this.stop('failed', `Cancelled by user (${signal})`);
+      await this.stop(WorkflowStatus.FAILED, `Cancelled by user (${signal})`);
       // Only exit if not embedded
       if (!this.options.preventExit) {
@@ -339,7 +374,7 @@ export class WorkflowRunner {
   /**
    * Stop the runner and cleanup resources
    */
-  public async stop(status: RunStatus = 'failed', error?: string): Promise<void> {
+  public async stop(status: RunStatus = WorkflowStatus.FAILED, error?: string): Promise<void> {
     if (this.isStopping) return;
     this.isStopping = true;
@@ -353,6 +388,7 @@ export class WorkflowRunner {
       await this.mcpManager.stopAll();
       this.db.close();
+      this.memoryDb.close();
     } catch (err) {
       this.logger.error(`Error during stop/cleanup: ${err}`);
     }
@@ -389,9 +425,7 @@ export class WorkflowRunner {
       '_',
       'SHLVL',
       'LC_ALL',
-      'OLDPWD',
       'DISPLAY',
-      'TMPDIR',
       'SSH_AUTH_SOCK',
       'XPC_FLAGS',
       'XPC_SERVICE_NAME',
@@ -400,6 +434,20 @@ export class WorkflowRunner {
       'TERM_PROGRAM',
       'TERM_PROGRAM_VERSION',
       'COLORTERM',
+      'LC_TERMINAL',
+      'LC_TERMINAL_VERSION',
+      'PWD',
+      'OLDPWD',
+      'HOME',
+      'USER',
+      'SHELL',
+      'PATH',
+      'LOGNAME',
+      'TMPDIR',
+      'XDG_CONFIG_HOME',
+      'XDG_DATA_HOME',
+      'XDG_CACHE_HOME',
+      'XDG_RUNTIME_DIR',
     ]);
     // Bun automatically loads .env file
@@ -411,31 +459,6 @@ export class WorkflowRunner {
     return secrets;
   }
-  /**
-   * Aggregate outputs from multiple iterations of a foreach step
-   */
-  private aggregateOutputs(outputs: unknown[]): Record<string, unknown> {
-    const mappedOutputs: Record<string, unknown> = { length: outputs.length };
-    const allKeys = new Set<string>();
-    for (const output of outputs) {
-      if (output && typeof output === 'object' && !Array.isArray(output)) {
-        for (const key of Object.keys(output)) {
-          allKeys.add(key);
-        }
-      }
-    }
-    for (const key of allKeys) {
-      mappedOutputs[key] = outputs.map((o) =>
-        o && typeof o === 'object' && !Array.isArray(o) && key in (o as Record<string, unknown>)
-          ? (o as Record<string, unknown>)[key]
-          : null
-      );
-    }
-    return mappedOutputs;
-  }
   /**
    * Apply workflow defaults to inputs and validate types
    */
@@ -541,6 +564,39 @@ export class WorkflowRunner {
     }
   }
+  /**
+   * Retrieve past successful runs and format them as few-shot examples
+   */
+  private async getFewShotExamples(workflowName: string): Promise<string> {
+    try {
+      const runs = await this.db.getSuccessfulRuns(workflowName, 3);
+      if (!runs || runs.length === 0) return '';
+      let examples = 'Here are examples of how you successfully handled this task in the past:\n';
+      for (const [index, run] of runs.entries()) {
+        examples += `\nExample ${index + 1}:\n`;
+        try {
+          // Pretty print JSON inputs/outputs
+          const inputs = JSON.stringify(JSON.parse(run.inputs), null, 2);
+          const outputs = run.outputs ? JSON.stringify(JSON.parse(run.outputs), null, 2) : '{}';
+          examples += `Input: ${inputs}\n`;
+          examples += `Output: ${outputs}\n`;
+        } catch (e) {
+          // Fallback for raw strings if parsing fails
+          examples += `Input: ${run.inputs}\n`;
+          examples += `Output: ${run.outputs || '{}'}\n`;
+        }
+      }
+      return examples;
+    } catch (error) {
+      this.logger.warn(`Failed to retrieve few-shot examples: ${error}`);
+      return '';
+    }
+  }
   /**
    * Execute a single step instance and return the result
    * Does NOT update global stepContexts
@@ -550,15 +606,37 @@ export class WorkflowRunner {
     context: ExpressionContext,
     stepExecId: string
   ): Promise<StepContext> {
-    await this.db.startStep(stepExecId);
+    let stepToExecute = step;
+    // Inject few-shot examples if enabled
+    if (step.type === 'llm' && step.learn) {
+      const examples = await this.getFewShotExamples(this.workflow.name);
+      if (examples) {
+        stepToExecute = {
+          ...step,
+          prompt: `${examples}\n\n${step.prompt}`,
+        };
+        this.logger.log(
+          `  🧠 Injected few-shot examples from ${examples.split('Example').length - 1} past runs`
+        );
+      }
+    }
+    const isRecursion =
+      (context.reflexionAttempts as number) > 0 || (context.autoHealAttempts as number) > 0;
+    if (!isRecursion) {
+      await this.db.startStep(stepExecId);
+    }
     const operation = async () => {
       const result = await executeStep(
-        step,
+        stepToExecute,
         context,
         this.logger,
         this.executeSubWorkflow.bind(this),
         this.mcpManager,
+        this.memoryDb,
         this.options.workflowDir,
         this.options.dryRun
       );
@@ -581,10 +659,10 @@ export class WorkflowRunner {
         await this.db.incrementRetry(stepExecId);
       });
-      if (result.status === 'suspended') {
+      if (result.status === StepStatus.SUSPENDED) {
         await this.db.completeStep(
           stepExecId,
-          'suspended',
+          StepStatus.SUSPENDED,
           result.output,
           'Waiting for interaction',
           result.usage
@@ -600,6 +678,17 @@ export class WorkflowRunner {
         result.usage
       );
+      // Auto-Learning logic
+      if (step.learn && result.status === StepStatus.SUCCESS) {
+        try {
+          await this.learnFromStep(step, result, context);
+        } catch (error) {
+          this.logger.warn(
+            `  ⚠️ Failed to learn from step ${step.id}: ${error instanceof Error ? error.message : String(error)}`
+          );
+        }
+      }
       // Ensure outputs is always an object for consistent access
       let outputs: Record<string, unknown>;
       if (
@@ -621,6 +710,104 @@ export class WorkflowRunner {
         usage: result.usage,
       };
     } catch (error) {
+      // Reflexion (Self-Correction) logic
+      if (step.reflexion) {
+        const { limit = 3, hint } = step.reflexion;
+        const currentAttempt = (context.reflexionAttempts as number) || 0;
+        if (currentAttempt < limit) {
+          const errorMsg = error instanceof Error ? error.message : String(error);
+          this.logger.log(
+            `  🔧 Reflexion triggered for step ${step.id} (Attempt ${currentAttempt + 1}/${limit})`
+          );
+          try {
+            // Get corrected command from Mechanic
+            const fixedStep = await this.getFixFromReflexion(step, errorMsg, hint);
+            // Merge fixed properties
+            const newStep = { ...step, ...fixedStep };
+            // Retry with new step definition
+            const nextContext = {
+              ...context,
+              reflexionAttempts: currentAttempt + 1,
+            };
+            return this.executeStepInternal(newStep, nextContext, stepExecId);
+          } catch (healError) {
+            this.logger.error(
+              `  ✗ Reflexion failed: ${healError instanceof Error ? healError.message : String(healError)}`
+            );
+            // Fall through to auto-heal or failure
+          }
+        }
+      }
+      // Auto-heal logic
+      if (step.auto_heal && typeof step.auto_heal === 'object') {
+        const autoHeal = step.auto_heal;
+        // Limit recursion/loops
+        const maxAttempts = autoHeal.maxAttempts || 1;
+        const currentAttempt = (context.autoHealAttempts as number) || 0;
+        if (currentAttempt < maxAttempts) {
+          const errorMsg = error instanceof Error ? error.message : String(error);
+          this.logger.log(
+            `  🩹 Auto-healing triggered for step ${step.id} (Attempt ${currentAttempt + 1}/${maxAttempts})`
+          );
+          try {
+            // Get fix from agent
+            const fixedStep = await this.getFixFromAgent(step, errorMsg, context);
+            // Merge fixed properties into the step
+            const newStep = { ...step, ...fixedStep };
+            // Retry with new step definition
+            const nextContext = {
+              ...context,
+              autoHealAttempts: currentAttempt + 1,
+            };
+            return this.executeStepInternal(newStep, nextContext, stepExecId);
+          } catch (healError) {
+            this.logger.error(
+              `  ✗ Auto-heal failed: ${healError instanceof Error ? healError.message : String(healError)}`
+            );
+            // Fall through to normal failure
+          }
+        }
+      }
+      // Debug REPL logic
+      if (this.options.debug) {
+        try {
+          const { DebugRepl } = await import('./debug-repl.ts');
+          const repl = new DebugRepl(context, step, error, this.logger);
+          const action = await repl.start();
+          if (action.type === 'retry') {
+            this.logger.log(`  ↻ Retrying step ${step.id} after manual intervention`);
+            // We use the modified step if provided, else original
+            const stepToRun = action.modifiedStep || step;
+            return this.executeStepInternal(stepToRun, context, stepExecId);
+          }
+          if (action.type === 'skip') {
+            this.logger.log(`  ⏭️ Skipping step ${step.id} manually`);
+            await this.db.completeStep(stepExecId, StepStatus.SKIPPED, null, undefined, undefined);
+            return {
+              output: null,
+              outputs: {},
+              status: StepStatus.SKIPPED,
+            };
+          }
+          // if 'continue_failure', fall through
+        } catch (replError) {
+          this.logger.error(`  ✗ Debug REPL error: ${replError}`);
+        }
+      }
       const errorMsg = error instanceof Error ? error.message : String(error);
       const redactedErrorMsg = this.redactor.redact(errorMsg);
       this.logger.error(`  ✗ Step ${step.id} failed: ${redactedErrorMsg}`);
@@ -636,210 +823,202 @@ export class WorkflowRunner {
   }
   /**
-   * Execute a step (handles foreach if present)
+   * Consult an agent to fix a failing step
    */
-  private async executeStepWithForeach(step: Step): Promise<void> {
-    const baseContext = this.buildContext();
+  private async getFixFromAgent(
+    step: Step,
+    error: string,
+    context: ExpressionContext
+  ): Promise<Partial<Step>> {
+    const { auto_heal } = step;
+    if (!auto_heal) throw new Error('Auto-heal not configured');
+    const prompt = `
+The following step failed during execution:
+\`\`\`json
+${JSON.stringify(step, null, 2)}
+\`\`\`
+Error:
+${error}
+Please analyze the error and provide a fixed version of the step configuration.
+Return ONLY a valid JSON object containing the fields that need to be changed.
+For example, if the command was wrong, return:
+{ "run": "correct command" }
+Do not change the 'id' or 'type' or 'auto_heal' fields.
+`;
+    // Create a synthetic step to invoke the agent
+    const agentStep: Step = {
+      id: `${step.id}-healer`,
+      type: 'llm',
+      agent: auto_heal.agent,
+      model: auto_heal.model,
+      prompt,
+      schema: {
+        type: 'object',
+        description: 'Partial step configuration with fixed values',
+        additionalProperties: true,
+      },
+    } as import('../parser/schema.ts').LlmStep;
+    this.logger.log(`  🚑 Consulting agent ${auto_heal.agent} for a fix...`);
+    // Execute the agent step
+    // We use a fresh context but share secrets/env
+    const result = await executeStep(
+      agentStep,
+      context,
+      this.logger,
+      this.executeSubWorkflow.bind(this),
+      this.mcpManager,
+      this.memoryDb,
+      this.options.workflowDir,
+      this.options.dryRun
+    );
-    if (this.shouldSkipStep(step, baseContext)) {
-      this.logger.log(`  ⊘ Skipping step ${step.id} (condition not met)`);
-      const stepExecId = randomUUID();
-      await this.db.createStep(stepExecId, this.runId, step.id);
-      await this.db.completeStep(stepExecId, 'skipped', null);
-      this.stepContexts.set(step.id, { status: 'skipped' });
-      return;
+    if (result.status !== 'success' || !result.output) {
+      throw new Error(`Healer agent failed: ${result.error || 'No output'}`);
     }
-    if (step.foreach) {
-      const items = ExpressionEvaluator.evaluate(step.foreach, baseContext);
-      if (!Array.isArray(items)) {
-        throw new Error(`foreach expression must evaluate to an array: ${step.foreach}`);
-      }
-      this.logger.log(`  ⤷ Executing step ${step.id} for ${items.length} items`);
-      if (items.length > WorkflowRunner.MEMORY_WARNING_THRESHOLD && !this.hasWarnedMemory) {
-        this.logger.warn(
-          `  ⚠️  Warning: Large foreach loop detected (${items.length} items). This may consume significant memory and lead to instability.`
-        );
-        this.hasWarnedMemory = true;
-      }
-      // Evaluate concurrency if it's an expression, otherwise use the number directly
-      let concurrencyLimit = items.length;
-      if (step.concurrency !== undefined) {
-        if (typeof step.concurrency === 'string') {
-          concurrencyLimit = Number(ExpressionEvaluator.evaluate(step.concurrency, baseContext));
-          if (!Number.isInteger(concurrencyLimit) || concurrencyLimit <= 0) {
-            throw new Error(
-              `concurrency must evaluate to a positive integer, got: ${concurrencyLimit}`
-            );
-          }
-        } else {
-          concurrencyLimit = step.concurrency;
-        }
-      }
-      // Create parent step record in DB
-      const parentStepExecId = randomUUID();
-      await this.db.createStep(parentStepExecId, this.runId, step.id);
-      await this.db.startStep(parentStepExecId);
-      // Persist the foreach items in parent step for deterministic resume
-      // This ensures resume uses the same array even if expression would evaluate differently
-      await this.db.completeStep(parentStepExecId, 'pending', { __foreachItems: items });
+    return result.output as Partial<Step>;
+  }
-      try {
-        // Initialize results array with existing context or empty slots
-        const existingContext = this.stepContexts.get(step.id) as ForeachStepContext;
-        const itemResults: StepContext[] = existingContext?.items || new Array(items.length);
+  /**
+   * Automatically learn from a successful step outcome
+   */
+  private async learnFromStep(
+    step: Step,
+    result: StepResult,
+    _context: ExpressionContext
+  ): Promise<void> {
+    const getAdapterFn = this.options.getAdapter || getAdapter;
+    const { adapter } = getAdapterFn('local'); // Default for embedding
+    if (!adapter.embed) return;
+    // Combine input context (if relevant) and output
+    // For now, let's keep it simple: "Step: ID\nGoal: description\nOutput: result"
+    // We can try to construct a summary of what happened
+    let textToEmbed = `Step ID: ${step.id} (${step.type})\n`;
+    if (step.type === 'llm') {
+      // biome-ignore lint/suspicious/noExplicitAny: generic access
+      textToEmbed += `Task Context/Prompt:\n${(step as any).prompt}\n\n`;
+    } else if (step.type === 'shell') {
+      // biome-ignore lint/suspicious/noExplicitAny: generic access
+      textToEmbed += `Command:\n${(step as any).run}\n\n`;
+    }
-        // Ensure array is correct length if items changed (unlikely in resume but safe)
-        if (itemResults.length !== items.length) {
-          itemResults.length = items.length;
-        }
+    textToEmbed += `Successful Outcome:\n${JSON.stringify(result.output, null, 2)}`;
-        // Worker pool implementation for true concurrency
-        let currentIndex = 0;
-        let aborted = false;
-        const workers = new Array(Math.min(concurrencyLimit, items.length))
-          .fill(null)
-          .map(async () => {
-            while (currentIndex < items.length && !aborted) {
-              const i = currentIndex++; // Capture index atomically
-              const item = items[i];
-              // Skip if already successful or skipped in previous run or by another worker
-              if (
-                itemResults[i] &&
-                (itemResults[i].status === 'success' || itemResults[i].status === 'skipped')
-              ) {
-                continue;
-              }
+    const embedding = await adapter.embed(textToEmbed, 'local');
+    await this.memoryDb.store(textToEmbed, embedding, {
+      stepId: step.id,
+      workflow: this.workflow.name,
+      timestamp: new Date().toISOString(),
+    });
-              const itemContext = this.buildContext(item, i);
-              // Check DB again for robustness (in case itemResults wasn't fully restored)
-              const existingExec = this.db.getStepByIteration(this.runId, step.id, i);
-              if (
-                existingExec &&
-                (existingExec.status === 'success' || existingExec.status === 'skipped')
-              ) {
-                let output: unknown = null;
-                try {
-                  output = existingExec.output ? JSON.parse(existingExec.output) : null;
-                } catch (error) {
-                  this.logger.warn(
-                    `Failed to parse output for step ${step.id} iteration ${i}: ${error}`
-                  );
-                  output = { error: 'Failed to parse output' };
-                }
-                itemResults[i] = {
-                  output,
-                  outputs:
-                    typeof output === 'object' && output !== null && !Array.isArray(output)
-                      ? (output as Record<string, unknown>)
-                      : {},
-                  status: existingExec.status as 'success' | 'skipped',
-                };
-                continue;
-              }
+    this.logger.log(`  ✨ Learned from step ${step.id}`);
+  }
-              const stepExecId = randomUUID();
-              await this.db.createStep(stepExecId, this.runId, step.id, i);
-              // Execute and store result at correct index
-              try {
-                this.logger.log(`  ⤷ [${i + 1}/${items.length}] Executing iteration...`);
-                itemResults[i] = await this.executeStepInternal(step, itemContext, stepExecId);
-                if (itemResults[i].status === 'failed') {
-                  aborted = true;
-                }
-              } catch (error) {
-                aborted = true;
-                throw error;
-              }
-            }
-          });
+  /**
+   * Consult the built-in "Mechanic" agent to fix a failing step
+   */
+  private async getFixFromReflexion(
+    step: Step,
+    error: string,
+    hint?: string
+  ): Promise<Partial<Step>> {
+    const systemPrompt = `You are the "Mechanic", an expert coding assistant built into the Keystone CLI.
+Your job is to fix failing shell commands or scripts by analyzing the error output and the user's original intent.
+Rules:
+1. Analyze the failing command and the error message which comes from stdout/stderr.
+2. If a "Hint" is provided, prioritize it as the primary strategy for the fix.
+3. Return ONLY a valid JSON object containing the fields that need to be changed in the step configuration.
+4. Do NOT verify the fix yourself; just provide the corrected configuration.
+5. Common fixes include:
+   - Installing missing dependencies (e.g. pip install, npm install)
+   - Fixing syntax errors
+   - Creating missing directories
+   - Adjusting flags or arguments`;
+    // biome-ignore lint/suspicious/noExplicitAny: generic access
+    const runCommand = (step as any).run;
+    const userContent = `The following step failed:
+\`\`\`json
+${JSON.stringify({ type: step.type, run: runCommand }, null, 2)}
+\`\`\`
+Error Output:
+${error}
+${hint ? `Hint from User: "${hint}"` : ''}
+Please provide the fixed step configuration as JSON.`;
+    const messages: LLMMessage[] = [
+      { role: 'system', content: systemPrompt },
+      { role: 'user', content: userContent },
+    ];
-        await Promise.all(workers);
-        // Aggregate results to match Spec requirements
-        // This allows:
-        // 1. ${{ steps.id.output }} -> array of output values
-        // 2. ${{ steps.id.items[0].status }} -> 'success'
-        // 3. ${{ steps.id.items.every(s => s.status == 'success') }} -> works via items array
-        const outputs = itemResults.map((r) => r.output);
-        const allSuccess = itemResults.every((r) => r.status === 'success');
-        const anySuspended = itemResults.some((r) => r.status === 'suspended');
-        // Aggregate usage from all items
-        const aggregatedUsage = itemResults.reduce(
-          (acc, r) => {
-            if (r.usage) {
-              acc.prompt_tokens += r.usage.prompt_tokens;
-              acc.completion_tokens += r.usage.completion_tokens;
-              acc.total_tokens += r.usage.total_tokens;
-            }
-            return acc;
-          },
-          { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
-        );
+    try {
+      // Use the default model (gpt-4o) or configured default for the Mechanic
+      // We'll use gpt-4o as a strong default for this reasoning task
+      const getAdapterFn = this.options.getAdapter || getAdapter;
+      const { adapter, resolvedModel } = getAdapterFn('gpt-4o');
+      this.logger.log(`  🤖 Mechanic is analyzing the failure using ${resolvedModel}...`);
+      const response = await adapter.chat(messages, {
+        model: resolvedModel,
+      });
-        // Map child properties for easier access
-        // If outputs are [{ id: 1 }, { id: 2 }], then outputs.id = [1, 2]
-        const mappedOutputs = this.aggregateOutputs(outputs);
+      const content = response.message.content;
+      if (!content) {
+        throw new Error('Mechanic returned empty response');
+      }
-        // Determine final status
-        let finalStatus: StepContext['status'] = 'failed';
-        if (allSuccess) {
-          finalStatus = 'success';
-        } else if (anySuspended) {
-          finalStatus = 'suspended';
-        }
+      try {
+        const fixedConfig = extractJson(content) as Partial<Step>;
+        return fixedConfig;
+      } catch (e) {
+        throw new Error(`Failed to parse Mechanic's response as JSON: ${content}`);
+      }
+    } catch (err) {
+      throw new Error(`Mechanic unavailable: ${err instanceof Error ? err.message : String(err)}`);
+    }
+  }
-        // Use proper object structure that serializes correctly
-        const aggregatedContext: ForeachStepContext = {
-          output: outputs,
-          outputs: mappedOutputs,
-          status: finalStatus,
-          items: itemResults,
-          usage: aggregatedUsage,
-        };
+  /**
+   * Execute a step (handles foreach if present)
+   */
+  private async executeStepWithForeach(step: Step): Promise<void> {
+    const baseContext = this.buildContext();
-        this.stepContexts.set(step.id, aggregatedContext);
+    if (this.shouldSkipStep(step, baseContext)) {
+      this.logger.log(`  ⊘ Skipping step ${step.id} (condition not met)`);
+      const stepExecId = randomUUID();
+      await this.db.createStep(stepExecId, this.runId, step.id);
+      await this.db.completeStep(stepExecId, 'skipped', null);
+      this.stepContexts.set(step.id, { status: 'skipped' });
+      return;
+    }
-        // Update parent step record with aggregated status
-        await this.db.completeStep(
-          parentStepExecId,
-          finalStatus,
-          aggregatedContext,
-          finalStatus === 'failed' ? 'One or more iterations failed' : undefined
-        );
+    if (step.foreach) {
+      const { ForeachExecutor } = await import('./foreach-executor.ts');
+      const executor = new ForeachExecutor(
+        this.db,
+        this.logger,
+        this.executeStepInternal.bind(this)
+      );
-        if (finalStatus === 'suspended') {
-          // If any iteration suspended, the whole step is suspended
-          // We assume for now that only human steps can suspend, and we'll use the first one's input type
-          const suspendedItem = itemResults.find((r) => r.status === 'suspended');
-          throw new WorkflowSuspendedError(
-            suspendedItem?.error || 'Iteration suspended',
-            step.id,
-            'text'
-          );
-        }
+      const existingContext = this.stepContexts.get(step.id) as ForeachStepContext;
+      const result = await executor.execute(step, baseContext, this.runId, existingContext);
-        if (finalStatus === 'failed') {
-          throw new Error(`Step ${step.id} failed: one or more iterations failed`);
-        }
-      } catch (error) {
-        if (error instanceof WorkflowSuspendedError) {
-          throw error;
-        }
-        // Mark parent step as failed
-        const errorMsg = error instanceof Error ? error.message : String(error);
-        await this.db.completeStep(parentStepExecId, 'failed', null, errorMsg);
-        throw error;
-      }
+      this.stepContexts.set(step.id, result);
     } else {
       // Single execution
       const stepExecId = randomUUID();
@@ -888,6 +1067,7 @@ export class WorkflowRunner {
       logger: this.logger,
       mcpManager: this.mcpManager,
       workflowDir: subWorkflowDir,
+      depth: this.depth + 1,
     });
     try {
@@ -960,7 +1140,7 @@ export class WorkflowRunner {
         this.logger.log('All steps already completed. Nothing to resume.\n');
         // Evaluate outputs from completed state
         const outputs = this.evaluateOutputs();
-        await this.db.updateRunStatus(this.runId, 'completed', outputs);
+        await this.db.updateRunStatus(this.runId, 'success', outputs);
         this.logger.log('✨ Workflow already completed!\n');
         return outputs;
       }
@@ -986,6 +1166,11 @@ export class WorkflowRunner {
           globalConcurrencyLimit = this.workflow.concurrency;
         }
       }
+      if (!Number.isInteger(globalConcurrencyLimit) || globalConcurrencyLimit <= 0) {
+        throw new Error(
+          `workflow.concurrency must be a positive integer, got: ${globalConcurrencyLimit}`
+        );
+      }
       // Execute steps in parallel where possible (respecting dependencies and global concurrency)
       const pendingSteps = new Set(remainingSteps);
@@ -1049,7 +1234,7 @@ export class WorkflowRunner {
       const outputs = this.evaluateOutputs();
       // Mark run as complete
-      await this.db.updateRunStatus(this.runId, 'completed', outputs);
+      await this.db.updateRunStatus(this.runId, 'success', outputs);
       this.logger.log('✨ Workflow completed successfully!\n');