keystone-cli 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +55 -8
  2. package/package.json +5 -3
  3. package/src/cli.ts +33 -192
  4. package/src/db/memory-db.test.ts +54 -0
  5. package/src/db/memory-db.ts +122 -0
  6. package/src/db/sqlite-setup.ts +49 -0
  7. package/src/db/workflow-db.test.ts +41 -10
  8. package/src/db/workflow-db.ts +84 -28
  9. package/src/expression/evaluator.test.ts +19 -0
  10. package/src/expression/evaluator.ts +134 -39
  11. package/src/parser/schema.ts +41 -0
  12. package/src/runner/audit-verification.test.ts +23 -0
  13. package/src/runner/auto-heal.test.ts +64 -0
  14. package/src/runner/debug-repl.test.ts +74 -0
  15. package/src/runner/debug-repl.ts +225 -0
  16. package/src/runner/foreach-executor.ts +327 -0
  17. package/src/runner/llm-adapter.test.ts +27 -14
  18. package/src/runner/llm-adapter.ts +90 -112
  19. package/src/runner/llm-executor.test.ts +91 -6
  20. package/src/runner/llm-executor.ts +26 -6
  21. package/src/runner/mcp-client.audit.test.ts +69 -0
  22. package/src/runner/mcp-client.test.ts +12 -3
  23. package/src/runner/mcp-client.ts +199 -19
  24. package/src/runner/mcp-manager.ts +19 -8
  25. package/src/runner/mcp-server.test.ts +8 -5
  26. package/src/runner/mcp-server.ts +31 -17
  27. package/src/runner/optimization-runner.ts +305 -0
  28. package/src/runner/reflexion.test.ts +87 -0
  29. package/src/runner/shell-executor.test.ts +12 -0
  30. package/src/runner/shell-executor.ts +9 -6
  31. package/src/runner/step-executor.test.ts +46 -1
  32. package/src/runner/step-executor.ts +154 -60
  33. package/src/runner/stream-utils.test.ts +65 -0
  34. package/src/runner/stream-utils.ts +186 -0
  35. package/src/runner/workflow-runner.test.ts +4 -4
  36. package/src/runner/workflow-runner.ts +436 -251
  37. package/src/templates/agents/keystone-architect.md +6 -4
  38. package/src/templates/full-feature-demo.yaml +4 -4
  39. package/src/types/assets.d.ts +14 -0
  40. package/src/types/status.ts +1 -1
  41. package/src/ui/dashboard.tsx +38 -26
  42. package/src/utils/auth-manager.ts +3 -1
  43. package/src/utils/logger.test.ts +76 -0
  44. package/src/utils/logger.ts +39 -0
  45. package/src/utils/prompt.ts +75 -0
  46. package/src/utils/redactor.test.ts +86 -4
  47. package/src/utils/redactor.ts +48 -13
@@ -1,22 +1,23 @@
1
1
  import { randomUUID } from 'node:crypto';
2
- import { dirname } from 'node:path';
2
+ import { dirname, join } from 'node:path';
3
+ import { MemoryDb } from '../db/memory-db.ts';
3
4
  import { type RunStatus, WorkflowDb } from '../db/workflow-db.ts';
4
5
  import type { ExpressionContext } from '../expression/evaluator.ts';
5
6
  import { ExpressionEvaluator } from '../expression/evaluator.ts';
6
7
  import type { Step, Workflow, WorkflowStep } from '../parser/schema.ts';
7
8
  import { WorkflowParser } from '../parser/workflow-parser.ts';
9
+ import { StepStatus, type StepStatusType, WorkflowStatus } from '../types/status.ts';
10
+ import { extractJson } from '../utils/json-parser.ts';
8
11
  import { Redactor } from '../utils/redactor.ts';
9
12
  import { WorkflowRegistry } from '../utils/workflow-registry.ts';
13
+ import { ForeachExecutor } from './foreach-executor.ts';
14
+ import { type LLMMessage, getAdapter } from './llm-adapter.ts';
10
15
  import { MCPManager } from './mcp-manager.ts';
11
16
  import { withRetry } from './retry.ts';
12
17
  import { type StepResult, WorkflowSuspendedError, executeStep } from './step-executor.ts';
13
18
  import { withTimeout } from './timeout.ts';
14
19
 
15
- export interface Logger {
16
- log: (msg: string) => void;
17
- error: (msg: string) => void;
18
- warn: (msg: string) => void;
19
- }
20
+ import { ConsoleLogger, type Logger } from '../utils/logger.ts';
20
21
 
21
22
  /**
22
23
  * A logger wrapper that redacts secrets from all log messages
@@ -38,11 +39,22 @@ class RedactingLogger implements Logger {
38
39
  warn(msg: string): void {
39
40
  this.inner.warn(this.redactor.redact(msg));
40
41
  }
42
+
43
+ info(msg: string): void {
44
+ this.inner.info(this.redactor.redact(msg));
45
+ }
46
+
47
+ debug(msg: string): void {
48
+ if (this.inner.debug) {
49
+ this.inner.debug(this.redactor.redact(msg));
50
+ }
51
+ }
41
52
  }
42
53
 
43
54
  export interface RunOptions {
44
55
  inputs?: Record<string, unknown>;
45
56
  dbPath?: string;
57
+ memoryDbPath?: string;
46
58
  resumeRunId?: string;
47
59
  logger?: Logger;
48
60
  mcpManager?: MCPManager;
@@ -50,12 +62,15 @@ export interface RunOptions {
50
62
  workflowDir?: string;
51
63
  resumeInputs?: Record<string, unknown>;
52
64
  dryRun?: boolean;
65
+ debug?: boolean;
66
+ getAdapter?: typeof getAdapter;
67
+ depth?: number;
53
68
  }
54
69
 
55
70
  export interface StepContext {
56
71
  output?: unknown;
57
72
  outputs?: Record<string, unknown>;
58
- status: 'success' | 'failed' | 'skipped' | 'pending' | 'suspended';
73
+ status: StepStatusType;
59
74
  error?: string;
60
75
  usage?: {
61
76
  prompt_tokens: number;
@@ -78,6 +93,7 @@ export interface ForeachStepContext extends StepContext {
78
93
  export class WorkflowRunner {
79
94
  private workflow: Workflow;
80
95
  private db: WorkflowDb;
96
+ private memoryDb: MemoryDb;
81
97
  private runId: string;
82
98
  private stepContexts: Map<string, StepContext | ForeachStepContext> = new Map();
83
99
  private inputs: Record<string, unknown>;
@@ -92,15 +108,26 @@ export class WorkflowRunner {
92
108
  private isStopping = false;
93
109
  private hasWarnedMemory = false;
94
110
  private static readonly MEMORY_WARNING_THRESHOLD = 1000;
111
+ private static readonly MAX_RECURSION_DEPTH = 10;
112
+ private depth = 0;
95
113
 
96
114
  constructor(workflow: Workflow, options: RunOptions = {}) {
97
115
  this.workflow = workflow;
98
116
  this.options = options;
117
+ this.depth = options.depth || 0;
118
+
119
+ if (this.depth > WorkflowRunner.MAX_RECURSION_DEPTH) {
120
+ throw new Error(
121
+ `Maximum workflow recursion depth (${WorkflowRunner.MAX_RECURSION_DEPTH}) exceeded.`
122
+ );
123
+ }
124
+
99
125
  this.db = new WorkflowDb(options.dbPath);
126
+ this.memoryDb = new MemoryDb(options.memoryDbPath);
100
127
  this.secrets = this.loadSecrets();
101
128
  this.redactor = new Redactor(this.secrets);
102
129
  // Wrap the logger with a redactor to prevent secret leakage in logs
103
- const rawLogger = options.logger || console;
130
+ const rawLogger = options.logger || new ConsoleLogger();
104
131
  this.logger = new RedactingLogger(rawLogger, this.redactor);
105
132
  this.mcpManager = options.mcpManager || new MCPManager();
106
133
 
@@ -129,13 +156,13 @@ export class WorkflowRunner {
129
156
  * Restore state from a previous run (for resume functionality)
130
157
  */
131
158
  private async restoreState(): Promise<void> {
132
- const run = this.db.getRun(this.runId);
159
+ const run = await this.db.getRun(this.runId);
133
160
  if (!run) {
134
161
  throw new Error(`Run ${this.runId} not found`);
135
162
  }
136
163
 
137
164
  // Only allow resuming failed or paused runs
138
- if (run.status !== 'failed' && run.status !== 'paused') {
165
+ if (run.status !== WorkflowStatus.FAILED && run.status !== WorkflowStatus.PAUSED) {
139
166
  throw new Error(
140
167
  `Cannot resume run with status '${run.status}'. Only 'failed' or 'paused' runs can be resumed.`
141
168
  );
@@ -144,18 +171,22 @@ export class WorkflowRunner {
144
171
  // Restore inputs from the previous run to ensure consistency
145
172
  // Merge with any resumeInputs provided (e.g. answers to human steps)
146
173
  try {
147
- const storedInputs = JSON.parse(run.inputs);
148
- this.inputs = { ...storedInputs, ...this.inputs };
174
+ if (!run.inputs || run.inputs === 'null' || run.inputs === '') {
175
+ this.logger.warn(`Run ${this.runId} has no persisted inputs`);
176
+ // Keep existing inputs
177
+ } else {
178
+ const storedInputs = JSON.parse(run.inputs);
179
+ this.inputs = { ...storedInputs, ...this.inputs };
180
+ }
149
181
  } catch (error) {
150
- // Log warning but continue with default empty inputs instead of crashing
151
- this.logger.warn(
152
- `Failed to parse inputs from run ${this.runId}, using defaults: ${error instanceof Error ? error.message : String(error)}`
182
+ this.logger.error(
183
+ `CRITICAL: Failed to parse inputs from run ${this.runId}. Data may be corrupted. Using default/resume inputs. Error: ${error instanceof Error ? error.message : String(error)}`
153
184
  );
154
- // Keep existing inputs (from resumeInputs or empty)
185
+ // Fallback: preserve existing inputs from resume options
155
186
  }
156
187
 
157
188
  // Load all step executions for this run
158
- const steps = this.db.getStepsByRun(this.runId);
189
+ const steps = await this.db.getStepsByRun(this.runId);
159
190
 
160
191
  // Group steps by step_id to handle foreach loops (multiple executions per step_id)
161
192
  const stepExecutionsByStepId = new Map<string, typeof steps>();
@@ -194,7 +225,7 @@ export class WorkflowRunner {
194
225
  for (const exec of sortedExecs) {
195
226
  if (exec.iteration_index === null) continue; // Skip parent step record
196
227
 
197
- if (exec.status === 'success' || exec.status === 'skipped') {
228
+ if (exec.status === StepStatus.SUCCESS || exec.status === StepStatus.SKIPPED) {
198
229
  let output: unknown = null;
199
230
  try {
200
231
  output = exec.output ? JSON.parse(exec.output) : null;
@@ -210,7 +241,7 @@ export class WorkflowRunner {
210
241
  typeof output === 'object' && output !== null && !Array.isArray(output)
211
242
  ? (output as Record<string, unknown>)
212
243
  : {},
213
- status: exec.status as 'success' | 'skipped',
244
+ status: exec.status as typeof StepStatus.SUCCESS | typeof StepStatus.SKIPPED,
214
245
  };
215
246
  outputs[exec.iteration_index] = output;
216
247
  } else {
@@ -219,7 +250,7 @@ export class WorkflowRunner {
219
250
  items[exec.iteration_index] = {
220
251
  output: null,
221
252
  outputs: {},
222
- status: exec.status as 'failed' | 'pending' | 'success' | 'skipped' | 'suspended',
253
+ status: exec.status as StepStatusType,
223
254
  };
224
255
  }
225
256
  }
@@ -263,17 +294,17 @@ export class WorkflowRunner {
263
294
  !Array.from({ length: expectedCount }).some((_, i) => !items[i]);
264
295
 
265
296
  // Determine overall status based on iterations
266
- let status: StepContext['status'] = 'success';
297
+ let status: StepContext['status'] = StepStatus.SUCCESS;
267
298
  if (allSuccess && hasAllItems) {
268
- status = 'success';
269
- } else if (items.some((item) => item?.status === 'suspended')) {
270
- status = 'suspended';
299
+ status = StepStatus.SUCCESS;
300
+ } else if (items.some((item) => item?.status === StepStatus.SUSPENDED)) {
301
+ status = StepStatus.SUSPENDED;
271
302
  } else {
272
- status = 'failed';
303
+ status = StepStatus.FAILED;
273
304
  }
274
305
 
275
306
  // Always restore what we have to allow partial expression evaluation
276
- const mappedOutputs = this.aggregateOutputs(outputs);
307
+ const mappedOutputs = ForeachExecutor.aggregateOutputs(outputs);
277
308
  this.stepContexts.set(stepId, {
278
309
  output: outputs,
279
310
  outputs: mappedOutputs,
@@ -282,13 +313,17 @@ export class WorkflowRunner {
282
313
  } as ForeachStepContext);
283
314
 
284
315
  // Only mark as fully completed if all iterations completed successfully AND we have all items
285
- if (status === 'success') {
316
+ if (status === StepStatus.SUCCESS) {
286
317
  completedStepIds.add(stepId);
287
318
  }
288
319
  } else {
289
320
  // Single execution step
290
321
  const exec = stepExecutions[0];
291
- if (exec.status === 'success' || exec.status === 'skipped' || exec.status === 'suspended') {
322
+ if (
323
+ exec.status === StepStatus.SUCCESS ||
324
+ exec.status === StepStatus.SKIPPED ||
325
+ exec.status === StepStatus.SUSPENDED
326
+ ) {
292
327
  let output: unknown = null;
293
328
  try {
294
329
  output = exec.output ? JSON.parse(exec.output) : null;
@@ -304,7 +339,7 @@ export class WorkflowRunner {
304
339
  : {},
305
340
  status: exec.status as StepContext['status'],
306
341
  });
307
- if (exec.status !== 'suspended') {
342
+ if (exec.status !== StepStatus.SUSPENDED) {
308
343
  completedStepIds.add(stepId);
309
344
  }
310
345
  }
@@ -322,7 +357,7 @@ export class WorkflowRunner {
322
357
  const handler = async (signal: string) => {
323
358
  if (this.isStopping) return;
324
359
  this.logger.log(`\n\n🛑 Received ${signal}. Cleaning up...`);
325
- await this.stop('failed', `Cancelled by user (${signal})`);
360
+ await this.stop(WorkflowStatus.FAILED, `Cancelled by user (${signal})`);
326
361
 
327
362
  // Only exit if not embedded
328
363
  if (!this.options.preventExit) {
@@ -339,7 +374,7 @@ export class WorkflowRunner {
339
374
  /**
340
375
  * Stop the runner and cleanup resources
341
376
  */
342
- public async stop(status: RunStatus = 'failed', error?: string): Promise<void> {
377
+ public async stop(status: RunStatus = WorkflowStatus.FAILED, error?: string): Promise<void> {
343
378
  if (this.isStopping) return;
344
379
  this.isStopping = true;
345
380
 
@@ -353,6 +388,7 @@ export class WorkflowRunner {
353
388
  await this.mcpManager.stopAll();
354
389
 
355
390
  this.db.close();
391
+ this.memoryDb.close();
356
392
  } catch (err) {
357
393
  this.logger.error(`Error during stop/cleanup: ${err}`);
358
394
  }
@@ -389,9 +425,7 @@ export class WorkflowRunner {
389
425
  '_',
390
426
  'SHLVL',
391
427
  'LC_ALL',
392
- 'OLDPWD',
393
428
  'DISPLAY',
394
- 'TMPDIR',
395
429
  'SSH_AUTH_SOCK',
396
430
  'XPC_FLAGS',
397
431
  'XPC_SERVICE_NAME',
@@ -400,6 +434,20 @@ export class WorkflowRunner {
400
434
  'TERM_PROGRAM',
401
435
  'TERM_PROGRAM_VERSION',
402
436
  'COLORTERM',
437
+ 'LC_TERMINAL',
438
+ 'LC_TERMINAL_VERSION',
439
+ 'PWD',
440
+ 'OLDPWD',
441
+ 'HOME',
442
+ 'USER',
443
+ 'SHELL',
444
+ 'PATH',
445
+ 'LOGNAME',
446
+ 'TMPDIR',
447
+ 'XDG_CONFIG_HOME',
448
+ 'XDG_DATA_HOME',
449
+ 'XDG_CACHE_HOME',
450
+ 'XDG_RUNTIME_DIR',
403
451
  ]);
404
452
 
405
453
  // Bun automatically loads .env file
@@ -411,31 +459,6 @@ export class WorkflowRunner {
411
459
  return secrets;
412
460
  }
413
461
 
414
- /**
415
- * Aggregate outputs from multiple iterations of a foreach step
416
- */
417
- private aggregateOutputs(outputs: unknown[]): Record<string, unknown> {
418
- const mappedOutputs: Record<string, unknown> = { length: outputs.length };
419
- const allKeys = new Set<string>();
420
-
421
- for (const output of outputs) {
422
- if (output && typeof output === 'object' && !Array.isArray(output)) {
423
- for (const key of Object.keys(output)) {
424
- allKeys.add(key);
425
- }
426
- }
427
- }
428
-
429
- for (const key of allKeys) {
430
- mappedOutputs[key] = outputs.map((o) =>
431
- o && typeof o === 'object' && !Array.isArray(o) && key in (o as Record<string, unknown>)
432
- ? (o as Record<string, unknown>)[key]
433
- : null
434
- );
435
- }
436
- return mappedOutputs;
437
- }
438
-
439
462
  /**
440
463
  * Apply workflow defaults to inputs and validate types
441
464
  */
@@ -541,6 +564,39 @@ export class WorkflowRunner {
541
564
  }
542
565
  }
543
566
 
567
+ /**
568
+ * Retrieve past successful runs and format them as few-shot examples
569
+ */
570
+ private async getFewShotExamples(workflowName: string): Promise<string> {
571
+ try {
572
+ const runs = await this.db.getSuccessfulRuns(workflowName, 3);
573
+ if (!runs || runs.length === 0) return '';
574
+
575
+ let examples = 'Here are examples of how you successfully handled this task in the past:\n';
576
+
577
+ for (const [index, run] of runs.entries()) {
578
+ examples += `\nExample ${index + 1}:\n`;
579
+ try {
580
+ // Pretty print JSON inputs/outputs
581
+ const inputs = JSON.stringify(JSON.parse(run.inputs), null, 2);
582
+ const outputs = run.outputs ? JSON.stringify(JSON.parse(run.outputs), null, 2) : '{}';
583
+
584
+ examples += `Input: ${inputs}\n`;
585
+ examples += `Output: ${outputs}\n`;
586
+ } catch (e) {
587
+ // Fallback for raw strings if parsing fails
588
+ examples += `Input: ${run.inputs}\n`;
589
+ examples += `Output: ${run.outputs || '{}'}\n`;
590
+ }
591
+ }
592
+
593
+ return examples;
594
+ } catch (error) {
595
+ this.logger.warn(`Failed to retrieve few-shot examples: ${error}`);
596
+ return '';
597
+ }
598
+ }
599
+
544
600
  /**
545
601
  * Execute a single step instance and return the result
546
602
  * Does NOT update global stepContexts
@@ -550,15 +606,37 @@ export class WorkflowRunner {
550
606
  context: ExpressionContext,
551
607
  stepExecId: string
552
608
  ): Promise<StepContext> {
553
- await this.db.startStep(stepExecId);
609
+ let stepToExecute = step;
610
+
611
+ // Inject few-shot examples if enabled
612
+ if (step.type === 'llm' && step.learn) {
613
+ const examples = await this.getFewShotExamples(this.workflow.name);
614
+ if (examples) {
615
+ stepToExecute = {
616
+ ...step,
617
+ prompt: `${examples}\n\n${step.prompt}`,
618
+ };
619
+ this.logger.log(
620
+ ` 🧠 Injected few-shot examples from ${examples.split('Example').length - 1} past runs`
621
+ );
622
+ }
623
+ }
624
+
625
+ const isRecursion =
626
+ (context.reflexionAttempts as number) > 0 || (context.autoHealAttempts as number) > 0;
627
+
628
+ if (!isRecursion) {
629
+ await this.db.startStep(stepExecId);
630
+ }
554
631
 
555
632
  const operation = async () => {
556
633
  const result = await executeStep(
557
- step,
634
+ stepToExecute,
558
635
  context,
559
636
  this.logger,
560
637
  this.executeSubWorkflow.bind(this),
561
638
  this.mcpManager,
639
+ this.memoryDb,
562
640
  this.options.workflowDir,
563
641
  this.options.dryRun
564
642
  );
@@ -581,10 +659,10 @@ export class WorkflowRunner {
581
659
  await this.db.incrementRetry(stepExecId);
582
660
  });
583
661
 
584
- if (result.status === 'suspended') {
662
+ if (result.status === StepStatus.SUSPENDED) {
585
663
  await this.db.completeStep(
586
664
  stepExecId,
587
- 'suspended',
665
+ StepStatus.SUSPENDED,
588
666
  result.output,
589
667
  'Waiting for interaction',
590
668
  result.usage
@@ -600,6 +678,17 @@ export class WorkflowRunner {
600
678
  result.usage
601
679
  );
602
680
 
681
+ // Auto-Learning logic
682
+ if (step.learn && result.status === StepStatus.SUCCESS) {
683
+ try {
684
+ await this.learnFromStep(step, result, context);
685
+ } catch (error) {
686
+ this.logger.warn(
687
+ ` ⚠️ Failed to learn from step ${step.id}: ${error instanceof Error ? error.message : String(error)}`
688
+ );
689
+ }
690
+ }
691
+
603
692
  // Ensure outputs is always an object for consistent access
604
693
  let outputs: Record<string, unknown>;
605
694
  if (
@@ -621,6 +710,104 @@ export class WorkflowRunner {
621
710
  usage: result.usage,
622
711
  };
623
712
  } catch (error) {
713
+ // Reflexion (Self-Correction) logic
714
+ if (step.reflexion) {
715
+ const { limit = 3, hint } = step.reflexion;
716
+ const currentAttempt = (context.reflexionAttempts as number) || 0;
717
+
718
+ if (currentAttempt < limit) {
719
+ const errorMsg = error instanceof Error ? error.message : String(error);
720
+ this.logger.log(
721
+ ` 🔧 Reflexion triggered for step ${step.id} (Attempt ${currentAttempt + 1}/${limit})`
722
+ );
723
+
724
+ try {
725
+ // Get corrected command from Mechanic
726
+ const fixedStep = await this.getFixFromReflexion(step, errorMsg, hint);
727
+
728
+ // Merge fixed properties
729
+ const newStep = { ...step, ...fixedStep };
730
+
731
+ // Retry with new step definition
732
+ const nextContext = {
733
+ ...context,
734
+ reflexionAttempts: currentAttempt + 1,
735
+ };
736
+
737
+ return this.executeStepInternal(newStep, nextContext, stepExecId);
738
+ } catch (healError) {
739
+ this.logger.error(
740
+ ` ✗ Reflexion failed: ${healError instanceof Error ? healError.message : String(healError)}`
741
+ );
742
+ // Fall through to auto-heal or failure
743
+ }
744
+ }
745
+ }
746
+
747
+ // Auto-heal logic
748
+ if (step.auto_heal && typeof step.auto_heal === 'object') {
749
+ const autoHeal = step.auto_heal;
750
+ // Limit recursion/loops
751
+ const maxAttempts = autoHeal.maxAttempts || 1;
752
+ const currentAttempt = (context.autoHealAttempts as number) || 0;
753
+
754
+ if (currentAttempt < maxAttempts) {
755
+ const errorMsg = error instanceof Error ? error.message : String(error);
756
+ this.logger.log(
757
+ ` 🩹 Auto-healing triggered for step ${step.id} (Attempt ${currentAttempt + 1}/${maxAttempts})`
758
+ );
759
+
760
+ try {
761
+ // Get fix from agent
762
+ const fixedStep = await this.getFixFromAgent(step, errorMsg, context);
763
+
764
+ // Merge fixed properties into the step
765
+ const newStep = { ...step, ...fixedStep };
766
+
767
+ // Retry with new step definition
768
+ const nextContext = {
769
+ ...context,
770
+ autoHealAttempts: currentAttempt + 1,
771
+ };
772
+
773
+ return this.executeStepInternal(newStep, nextContext, stepExecId);
774
+ } catch (healError) {
775
+ this.logger.error(
776
+ ` ✗ Auto-heal failed: ${healError instanceof Error ? healError.message : String(healError)}`
777
+ );
778
+ // Fall through to normal failure
779
+ }
780
+ }
781
+ }
782
+
783
+ // Debug REPL logic
784
+ if (this.options.debug) {
785
+ try {
786
+ const { DebugRepl } = await import('./debug-repl.ts');
787
+ const repl = new DebugRepl(context, step, error, this.logger);
788
+ const action = await repl.start();
789
+
790
+ if (action.type === 'retry') {
791
+ this.logger.log(` ↻ Retrying step ${step.id} after manual intervention`);
792
+ // We use the modified step if provided, else original
793
+ const stepToRun = action.modifiedStep || step;
794
+ return this.executeStepInternal(stepToRun, context, stepExecId);
795
+ }
796
+ if (action.type === 'skip') {
797
+ this.logger.log(` ⏭️ Skipping step ${step.id} manually`);
798
+ await this.db.completeStep(stepExecId, StepStatus.SKIPPED, null, undefined, undefined);
799
+ return {
800
+ output: null,
801
+ outputs: {},
802
+ status: StepStatus.SKIPPED,
803
+ };
804
+ }
805
+ // if 'continue_failure', fall through
806
+ } catch (replError) {
807
+ this.logger.error(` ✗ Debug REPL error: ${replError}`);
808
+ }
809
+ }
810
+
624
811
  const errorMsg = error instanceof Error ? error.message : String(error);
625
812
  const redactedErrorMsg = this.redactor.redact(errorMsg);
626
813
  this.logger.error(` ✗ Step ${step.id} failed: ${redactedErrorMsg}`);
@@ -636,210 +823,202 @@ export class WorkflowRunner {
636
823
  }
637
824
 
638
825
  /**
639
- * Execute a step (handles foreach if present)
826
+ * Consult an agent to fix a failing step
640
827
  */
641
- private async executeStepWithForeach(step: Step): Promise<void> {
642
- const baseContext = this.buildContext();
828
+ private async getFixFromAgent(
829
+ step: Step,
830
+ error: string,
831
+ context: ExpressionContext
832
+ ): Promise<Partial<Step>> {
833
+ const { auto_heal } = step;
834
+ if (!auto_heal) throw new Error('Auto-heal not configured');
835
+
836
+ const prompt = `
837
+ The following step failed during execution:
838
+ \`\`\`json
839
+ ${JSON.stringify(step, null, 2)}
840
+ \`\`\`
841
+
842
+ Error:
843
+ ${error}
844
+
845
+ Please analyze the error and provide a fixed version of the step configuration.
846
+ Return ONLY a valid JSON object containing the fields that need to be changed.
847
+ For example, if the command was wrong, return:
848
+ { "run": "correct command" }
849
+
850
+ Do not change the 'id' or 'type' or 'auto_heal' fields.
851
+ `;
852
+
853
+ // Create a synthetic step to invoke the agent
854
+ const agentStep: Step = {
855
+ id: `${step.id}-healer`,
856
+ type: 'llm',
857
+ agent: auto_heal.agent,
858
+ model: auto_heal.model,
859
+ prompt,
860
+ schema: {
861
+ type: 'object',
862
+ description: 'Partial step configuration with fixed values',
863
+ additionalProperties: true,
864
+ },
865
+ } as import('../parser/schema.ts').LlmStep;
866
+
867
+ this.logger.log(` 🚑 Consulting agent ${auto_heal.agent} for a fix...`);
868
+
869
+ // Execute the agent step
870
+ // We use a fresh context but share secrets/env
871
+ const result = await executeStep(
872
+ agentStep,
873
+ context,
874
+ this.logger,
875
+ this.executeSubWorkflow.bind(this),
876
+ this.mcpManager,
877
+ this.memoryDb,
878
+ this.options.workflowDir,
879
+ this.options.dryRun
880
+ );
643
881
 
644
- if (this.shouldSkipStep(step, baseContext)) {
645
- this.logger.log(` Skipping step ${step.id} (condition not met)`);
646
- const stepExecId = randomUUID();
647
- await this.db.createStep(stepExecId, this.runId, step.id);
648
- await this.db.completeStep(stepExecId, 'skipped', null);
649
- this.stepContexts.set(step.id, { status: 'skipped' });
650
- return;
882
+ if (result.status !== 'success' || !result.output) {
883
+ throw new Error(`Healer agent failed: ${result.error || 'No output'}`);
651
884
  }
652
885
 
653
- if (step.foreach) {
654
- const items = ExpressionEvaluator.evaluate(step.foreach, baseContext);
655
- if (!Array.isArray(items)) {
656
- throw new Error(`foreach expression must evaluate to an array: ${step.foreach}`);
657
- }
658
-
659
- this.logger.log(` ⤷ Executing step ${step.id} for ${items.length} items`);
660
-
661
- if (items.length > WorkflowRunner.MEMORY_WARNING_THRESHOLD && !this.hasWarnedMemory) {
662
- this.logger.warn(
663
- ` ⚠️ Warning: Large foreach loop detected (${items.length} items). This may consume significant memory and lead to instability.`
664
- );
665
- this.hasWarnedMemory = true;
666
- }
667
-
668
- // Evaluate concurrency if it's an expression, otherwise use the number directly
669
- let concurrencyLimit = items.length;
670
- if (step.concurrency !== undefined) {
671
- if (typeof step.concurrency === 'string') {
672
- concurrencyLimit = Number(ExpressionEvaluator.evaluate(step.concurrency, baseContext));
673
- if (!Number.isInteger(concurrencyLimit) || concurrencyLimit <= 0) {
674
- throw new Error(
675
- `concurrency must evaluate to a positive integer, got: ${concurrencyLimit}`
676
- );
677
- }
678
- } else {
679
- concurrencyLimit = step.concurrency;
680
- }
681
- }
682
-
683
- // Create parent step record in DB
684
- const parentStepExecId = randomUUID();
685
- await this.db.createStep(parentStepExecId, this.runId, step.id);
686
- await this.db.startStep(parentStepExecId);
687
-
688
- // Persist the foreach items in parent step for deterministic resume
689
- // This ensures resume uses the same array even if expression would evaluate differently
690
- await this.db.completeStep(parentStepExecId, 'pending', { __foreachItems: items });
886
+ return result.output as Partial<Step>;
887
+ }
691
888
 
692
- try {
693
- // Initialize results array with existing context or empty slots
694
- const existingContext = this.stepContexts.get(step.id) as ForeachStepContext;
695
- const itemResults: StepContext[] = existingContext?.items || new Array(items.length);
889
+ /**
890
+ * Automatically learn from a successful step outcome
891
+ */
892
+ private async learnFromStep(
893
+ step: Step,
894
+ result: StepResult,
895
+ _context: ExpressionContext
896
+ ): Promise<void> {
897
+ const getAdapterFn = this.options.getAdapter || getAdapter;
898
+ const { adapter } = getAdapterFn('local'); // Default for embedding
899
+ if (!adapter.embed) return;
900
+
901
+ // Combine input context (if relevant) and output
902
+ // For now, let's keep it simple: "Step: ID\nGoal: description\nOutput: result"
903
+
904
+ // We can try to construct a summary of what happened
905
+ let textToEmbed = `Step ID: ${step.id} (${step.type})\n`;
906
+
907
+ if (step.type === 'llm') {
908
+ // biome-ignore lint/suspicious/noExplicitAny: generic access
909
+ textToEmbed += `Task Context/Prompt:\n${(step as any).prompt}\n\n`;
910
+ } else if (step.type === 'shell') {
911
+ // biome-ignore lint/suspicious/noExplicitAny: generic access
912
+ textToEmbed += `Command:\n${(step as any).run}\n\n`;
913
+ }
696
914
 
697
- // Ensure array is correct length if items changed (unlikely in resume but safe)
698
- if (itemResults.length !== items.length) {
699
- itemResults.length = items.length;
700
- }
915
+ textToEmbed += `Successful Outcome:\n${JSON.stringify(result.output, null, 2)}`;
701
916
 
702
- // Worker pool implementation for true concurrency
703
- let currentIndex = 0;
704
- let aborted = false;
705
- const workers = new Array(Math.min(concurrencyLimit, items.length))
706
- .fill(null)
707
- .map(async () => {
708
- while (currentIndex < items.length && !aborted) {
709
- const i = currentIndex++; // Capture index atomically
710
- const item = items[i];
711
-
712
- // Skip if already successful or skipped in previous run or by another worker
713
- if (
714
- itemResults[i] &&
715
- (itemResults[i].status === 'success' || itemResults[i].status === 'skipped')
716
- ) {
717
- continue;
718
- }
917
+ const embedding = await adapter.embed(textToEmbed, 'local');
918
+ await this.memoryDb.store(textToEmbed, embedding, {
919
+ stepId: step.id,
920
+ workflow: this.workflow.name,
921
+ timestamp: new Date().toISOString(),
922
+ });
719
923
 
720
- const itemContext = this.buildContext(item, i);
721
-
722
- // Check DB again for robustness (in case itemResults wasn't fully restored)
723
- const existingExec = this.db.getStepByIteration(this.runId, step.id, i);
724
- if (
725
- existingExec &&
726
- (existingExec.status === 'success' || existingExec.status === 'skipped')
727
- ) {
728
- let output: unknown = null;
729
- try {
730
- output = existingExec.output ? JSON.parse(existingExec.output) : null;
731
- } catch (error) {
732
- this.logger.warn(
733
- `Failed to parse output for step ${step.id} iteration ${i}: ${error}`
734
- );
735
- output = { error: 'Failed to parse output' };
736
- }
737
- itemResults[i] = {
738
- output,
739
- outputs:
740
- typeof output === 'object' && output !== null && !Array.isArray(output)
741
- ? (output as Record<string, unknown>)
742
- : {},
743
- status: existingExec.status as 'success' | 'skipped',
744
- };
745
- continue;
746
- }
924
+ this.logger.log(` ✨ Learned from step ${step.id}`);
925
+ }
747
926
 
748
- const stepExecId = randomUUID();
749
- await this.db.createStep(stepExecId, this.runId, step.id, i);
750
-
751
- // Execute and store result at correct index
752
- try {
753
- this.logger.log(` ⤷ [${i + 1}/${items.length}] Executing iteration...`);
754
- itemResults[i] = await this.executeStepInternal(step, itemContext, stepExecId);
755
- if (itemResults[i].status === 'failed') {
756
- aborted = true;
757
- }
758
- } catch (error) {
759
- aborted = true;
760
- throw error;
761
- }
762
- }
763
- });
927
+ /**
928
+ * Consult the built-in "Mechanic" agent to fix a failing step
929
+ */
930
+ private async getFixFromReflexion(
931
+ step: Step,
932
+ error: string,
933
+ hint?: string
934
+ ): Promise<Partial<Step>> {
935
+ const systemPrompt = `You are the "Mechanic", an expert coding assistant built into the Keystone CLI.
936
+ Your job is to fix failing shell commands or scripts by analyzing the error output and the user's original intent.
937
+
938
+ Rules:
939
+ 1. Analyze the failing command and the error message which comes from stdout/stderr.
940
+ 2. If a "Hint" is provided, prioritize it as the primary strategy for the fix.
941
+ 3. Return ONLY a valid JSON object containing the fields that need to be changed in the step configuration.
942
+ 4. Do NOT verify the fix yourself; just provide the corrected configuration.
943
+ 5. Common fixes include:
944
+ - Installing missing dependencies (e.g. pip install, npm install)
945
+ - Fixing syntax errors
946
+ - Creating missing directories
947
+ - Adjusting flags or arguments`;
948
+
949
+ // biome-ignore lint/suspicious/noExplicitAny: generic access
950
+ const runCommand = (step as any).run;
951
+ const userContent = `The following step failed:
952
+ \`\`\`json
953
+ ${JSON.stringify({ type: step.type, run: runCommand }, null, 2)}
954
+ \`\`\`
955
+
956
+ Error Output:
957
+ ${error}
958
+
959
+ ${hint ? `Hint from User: "${hint}"` : ''}
960
+
961
+ Please provide the fixed step configuration as JSON.`;
962
+
963
+ const messages: LLMMessage[] = [
964
+ { role: 'system', content: systemPrompt },
965
+ { role: 'user', content: userContent },
966
+ ];
764
967
 
765
- await Promise.all(workers);
766
-
767
- // Aggregate results to match Spec requirements
768
- // This allows:
769
- // 1. ${{ steps.id.output }} -> array of output values
770
- // 2. ${{ steps.id.items[0].status }} -> 'success'
771
- // 3. ${{ steps.id.items.every(s => s.status == 'success') }} -> works via items array
772
- const outputs = itemResults.map((r) => r.output);
773
- const allSuccess = itemResults.every((r) => r.status === 'success');
774
- const anySuspended = itemResults.some((r) => r.status === 'suspended');
775
-
776
- // Aggregate usage from all items
777
- const aggregatedUsage = itemResults.reduce(
778
- (acc, r) => {
779
- if (r.usage) {
780
- acc.prompt_tokens += r.usage.prompt_tokens;
781
- acc.completion_tokens += r.usage.completion_tokens;
782
- acc.total_tokens += r.usage.total_tokens;
783
- }
784
- return acc;
785
- },
786
- { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
787
- );
968
+ try {
969
+ // Use the default model (gpt-4o) or configured default for the Mechanic
970
+ // We'll use gpt-4o as a strong default for this reasoning task
971
+ const getAdapterFn = this.options.getAdapter || getAdapter;
972
+ const { adapter, resolvedModel } = getAdapterFn('gpt-4o');
973
+ this.logger.log(` 🤖 Mechanic is analyzing the failure using ${resolvedModel}...`);
974
+
975
+ const response = await adapter.chat(messages, {
976
+ model: resolvedModel,
977
+ });
788
978
 
789
- // Map child properties for easier access
790
- // If outputs are [{ id: 1 }, { id: 2 }], then outputs.id = [1, 2]
791
- const mappedOutputs = this.aggregateOutputs(outputs);
979
+ const content = response.message.content;
980
+ if (!content) {
981
+ throw new Error('Mechanic returned empty response');
982
+ }
792
983
 
793
- // Determine final status
794
- let finalStatus: StepContext['status'] = 'failed';
795
- if (allSuccess) {
796
- finalStatus = 'success';
797
- } else if (anySuspended) {
798
- finalStatus = 'suspended';
799
- }
984
+ try {
985
+ const fixedConfig = extractJson(content) as Partial<Step>;
986
+ return fixedConfig;
987
+ } catch (e) {
988
+ throw new Error(`Failed to parse Mechanic's response as JSON: ${content}`);
989
+ }
990
+ } catch (err) {
991
+ throw new Error(`Mechanic unavailable: ${err instanceof Error ? err.message : String(err)}`);
992
+ }
993
+ }
800
994
 
801
- // Use proper object structure that serializes correctly
802
- const aggregatedContext: ForeachStepContext = {
803
- output: outputs,
804
- outputs: mappedOutputs,
805
- status: finalStatus,
806
- items: itemResults,
807
- usage: aggregatedUsage,
808
- };
995
+ /**
996
+ * Execute a step (handles foreach if present)
997
+ */
998
+ private async executeStepWithForeach(step: Step): Promise<void> {
999
+ const baseContext = this.buildContext();
809
1000
 
810
- this.stepContexts.set(step.id, aggregatedContext);
1001
+ if (this.shouldSkipStep(step, baseContext)) {
1002
+ this.logger.log(` ⊘ Skipping step ${step.id} (condition not met)`);
1003
+ const stepExecId = randomUUID();
1004
+ await this.db.createStep(stepExecId, this.runId, step.id);
1005
+ await this.db.completeStep(stepExecId, 'skipped', null);
1006
+ this.stepContexts.set(step.id, { status: 'skipped' });
1007
+ return;
1008
+ }
811
1009
 
812
- // Update parent step record with aggregated status
813
- await this.db.completeStep(
814
- parentStepExecId,
815
- finalStatus,
816
- aggregatedContext,
817
- finalStatus === 'failed' ? 'One or more iterations failed' : undefined
818
- );
1010
+ if (step.foreach) {
1011
+ const { ForeachExecutor } = await import('./foreach-executor.ts');
1012
+ const executor = new ForeachExecutor(
1013
+ this.db,
1014
+ this.logger,
1015
+ this.executeStepInternal.bind(this)
1016
+ );
819
1017
 
820
- if (finalStatus === 'suspended') {
821
- // If any iteration suspended, the whole step is suspended
822
- // We assume for now that only human steps can suspend, and we'll use the first one's input type
823
- const suspendedItem = itemResults.find((r) => r.status === 'suspended');
824
- throw new WorkflowSuspendedError(
825
- suspendedItem?.error || 'Iteration suspended',
826
- step.id,
827
- 'text'
828
- );
829
- }
1018
+ const existingContext = this.stepContexts.get(step.id) as ForeachStepContext;
1019
+ const result = await executor.execute(step, baseContext, this.runId, existingContext);
830
1020
 
831
- if (finalStatus === 'failed') {
832
- throw new Error(`Step ${step.id} failed: one or more iterations failed`);
833
- }
834
- } catch (error) {
835
- if (error instanceof WorkflowSuspendedError) {
836
- throw error;
837
- }
838
- // Mark parent step as failed
839
- const errorMsg = error instanceof Error ? error.message : String(error);
840
- await this.db.completeStep(parentStepExecId, 'failed', null, errorMsg);
841
- throw error;
842
- }
1021
+ this.stepContexts.set(step.id, result);
843
1022
  } else {
844
1023
  // Single execution
845
1024
  const stepExecId = randomUUID();
@@ -888,6 +1067,7 @@ export class WorkflowRunner {
888
1067
  logger: this.logger,
889
1068
  mcpManager: this.mcpManager,
890
1069
  workflowDir: subWorkflowDir,
1070
+ depth: this.depth + 1,
891
1071
  });
892
1072
 
893
1073
  try {
@@ -960,7 +1140,7 @@ export class WorkflowRunner {
960
1140
  this.logger.log('All steps already completed. Nothing to resume.\n');
961
1141
  // Evaluate outputs from completed state
962
1142
  const outputs = this.evaluateOutputs();
963
- await this.db.updateRunStatus(this.runId, 'completed', outputs);
1143
+ await this.db.updateRunStatus(this.runId, 'success', outputs);
964
1144
  this.logger.log('✨ Workflow already completed!\n');
965
1145
  return outputs;
966
1146
  }
@@ -986,6 +1166,11 @@ export class WorkflowRunner {
986
1166
  globalConcurrencyLimit = this.workflow.concurrency;
987
1167
  }
988
1168
  }
1169
+ if (!Number.isInteger(globalConcurrencyLimit) || globalConcurrencyLimit <= 0) {
1170
+ throw new Error(
1171
+ `workflow.concurrency must be a positive integer, got: ${globalConcurrencyLimit}`
1172
+ );
1173
+ }
989
1174
 
990
1175
  // Execute steps in parallel where possible (respecting dependencies and global concurrency)
991
1176
  const pendingSteps = new Set(remainingSteps);
@@ -1049,7 +1234,7 @@ export class WorkflowRunner {
1049
1234
  const outputs = this.evaluateOutputs();
1050
1235
 
1051
1236
  // Mark run as complete
1052
- await this.db.updateRunStatus(this.runId, 'completed', outputs);
1237
+ await this.db.updateRunStatus(this.runId, 'success', outputs);
1053
1238
 
1054
1239
  this.logger.log('✨ Workflow completed successfully!\n');
1055
1240