keystone-cli 0.5.1 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +55 -8
  2. package/package.json +8 -17
  3. package/src/cli.ts +219 -166
  4. package/src/db/memory-db.test.ts +54 -0
  5. package/src/db/memory-db.ts +128 -0
  6. package/src/db/sqlite-setup.test.ts +47 -0
  7. package/src/db/sqlite-setup.ts +49 -0
  8. package/src/db/workflow-db.test.ts +41 -10
  9. package/src/db/workflow-db.ts +90 -28
  10. package/src/expression/evaluator.test.ts +19 -0
  11. package/src/expression/evaluator.ts +134 -39
  12. package/src/parser/schema.ts +41 -0
  13. package/src/runner/audit-verification.test.ts +23 -0
  14. package/src/runner/auto-heal.test.ts +64 -0
  15. package/src/runner/debug-repl.test.ts +308 -0
  16. package/src/runner/debug-repl.ts +225 -0
  17. package/src/runner/foreach-executor.ts +327 -0
  18. package/src/runner/llm-adapter.test.ts +37 -18
  19. package/src/runner/llm-adapter.ts +90 -112
  20. package/src/runner/llm-executor.test.ts +47 -6
  21. package/src/runner/llm-executor.ts +18 -3
  22. package/src/runner/mcp-client.audit.test.ts +69 -0
  23. package/src/runner/mcp-client.test.ts +12 -3
  24. package/src/runner/mcp-client.ts +199 -19
  25. package/src/runner/mcp-manager.ts +19 -8
  26. package/src/runner/mcp-server.test.ts +8 -5
  27. package/src/runner/mcp-server.ts +31 -17
  28. package/src/runner/optimization-runner.ts +305 -0
  29. package/src/runner/reflexion.test.ts +87 -0
  30. package/src/runner/shell-executor.test.ts +12 -0
  31. package/src/runner/shell-executor.ts +9 -6
  32. package/src/runner/step-executor.test.ts +240 -2
  33. package/src/runner/step-executor.ts +183 -68
  34. package/src/runner/stream-utils.test.ts +171 -0
  35. package/src/runner/stream-utils.ts +186 -0
  36. package/src/runner/workflow-runner.test.ts +4 -4
  37. package/src/runner/workflow-runner.ts +438 -259
  38. package/src/templates/agents/keystone-architect.md +6 -4
  39. package/src/templates/full-feature-demo.yaml +4 -4
  40. package/src/types/assets.d.ts +14 -0
  41. package/src/types/status.ts +1 -1
  42. package/src/ui/dashboard.tsx +38 -26
  43. package/src/utils/auth-manager.ts +3 -1
  44. package/src/utils/logger.test.ts +76 -0
  45. package/src/utils/logger.ts +39 -0
  46. package/src/utils/prompt.ts +75 -0
  47. package/src/utils/redactor.test.ts +86 -4
  48. package/src/utils/redactor.ts +48 -13
@@ -1,22 +1,23 @@
1
1
  import { randomUUID } from 'node:crypto';
2
- import { dirname } from 'node:path';
2
+ import { dirname, join } from 'node:path';
3
+ import { MemoryDb } from '../db/memory-db.ts';
3
4
  import { type RunStatus, WorkflowDb } from '../db/workflow-db.ts';
4
5
  import type { ExpressionContext } from '../expression/evaluator.ts';
5
6
  import { ExpressionEvaluator } from '../expression/evaluator.ts';
6
7
  import type { Step, Workflow, WorkflowStep } from '../parser/schema.ts';
7
8
  import { WorkflowParser } from '../parser/workflow-parser.ts';
9
+ import { StepStatus, type StepStatusType, WorkflowStatus } from '../types/status.ts';
10
+ import { extractJson } from '../utils/json-parser.ts';
8
11
  import { Redactor } from '../utils/redactor.ts';
9
12
  import { WorkflowRegistry } from '../utils/workflow-registry.ts';
13
+ import { ForeachExecutor } from './foreach-executor.ts';
14
+ import { type LLMMessage, getAdapter } from './llm-adapter.ts';
10
15
  import { MCPManager } from './mcp-manager.ts';
11
16
  import { withRetry } from './retry.ts';
12
17
  import { type StepResult, WorkflowSuspendedError, executeStep } from './step-executor.ts';
13
18
  import { withTimeout } from './timeout.ts';
14
19
 
15
- export interface Logger {
16
- log: (msg: string) => void;
17
- error: (msg: string) => void;
18
- warn: (msg: string) => void;
19
- }
20
+ import { ConsoleLogger, type Logger } from '../utils/logger.ts';
20
21
 
21
22
  /**
22
23
  * A logger wrapper that redacts secrets from all log messages
@@ -38,11 +39,22 @@ class RedactingLogger implements Logger {
38
39
  warn(msg: string): void {
39
40
  this.inner.warn(this.redactor.redact(msg));
40
41
  }
42
+
43
+ info(msg: string): void {
44
+ this.inner.info(this.redactor.redact(msg));
45
+ }
46
+
47
+ debug(msg: string): void {
48
+ if (this.inner.debug) {
49
+ this.inner.debug(this.redactor.redact(msg));
50
+ }
51
+ }
41
52
  }
42
53
 
43
54
  export interface RunOptions {
44
55
  inputs?: Record<string, unknown>;
45
56
  dbPath?: string;
57
+ memoryDbPath?: string;
46
58
  resumeRunId?: string;
47
59
  logger?: Logger;
48
60
  mcpManager?: MCPManager;
@@ -50,12 +62,15 @@ export interface RunOptions {
50
62
  workflowDir?: string;
51
63
  resumeInputs?: Record<string, unknown>;
52
64
  dryRun?: boolean;
65
+ debug?: boolean;
66
+ getAdapter?: typeof getAdapter;
67
+ depth?: number;
53
68
  }
54
69
 
55
70
  export interface StepContext {
56
71
  output?: unknown;
57
72
  outputs?: Record<string, unknown>;
58
- status: 'success' | 'failed' | 'skipped' | 'pending' | 'suspended';
73
+ status: StepStatusType;
59
74
  error?: string;
60
75
  usage?: {
61
76
  prompt_tokens: number;
@@ -78,6 +93,7 @@ export interface ForeachStepContext extends StepContext {
78
93
  export class WorkflowRunner {
79
94
  private workflow: Workflow;
80
95
  private db: WorkflowDb;
96
+ private memoryDb: MemoryDb;
81
97
  private runId: string;
82
98
  private stepContexts: Map<string, StepContext | ForeachStepContext> = new Map();
83
99
  private inputs: Record<string, unknown>;
@@ -92,15 +108,26 @@ export class WorkflowRunner {
92
108
  private isStopping = false;
93
109
  private hasWarnedMemory = false;
94
110
  private static readonly MEMORY_WARNING_THRESHOLD = 1000;
111
+ private static readonly MAX_RECURSION_DEPTH = 10;
112
+ private depth = 0;
95
113
 
96
114
  constructor(workflow: Workflow, options: RunOptions = {}) {
97
115
  this.workflow = workflow;
98
116
  this.options = options;
117
+ this.depth = options.depth || 0;
118
+
119
+ if (this.depth > WorkflowRunner.MAX_RECURSION_DEPTH) {
120
+ throw new Error(
121
+ `Maximum workflow recursion depth (${WorkflowRunner.MAX_RECURSION_DEPTH}) exceeded.`
122
+ );
123
+ }
124
+
99
125
  this.db = new WorkflowDb(options.dbPath);
126
+ this.memoryDb = new MemoryDb(options.memoryDbPath);
100
127
  this.secrets = this.loadSecrets();
101
128
  this.redactor = new Redactor(this.secrets);
102
129
  // Wrap the logger with a redactor to prevent secret leakage in logs
103
- const rawLogger = options.logger || console;
130
+ const rawLogger = options.logger || new ConsoleLogger();
104
131
  this.logger = new RedactingLogger(rawLogger, this.redactor);
105
132
  this.mcpManager = options.mcpManager || new MCPManager();
106
133
 
@@ -129,13 +156,13 @@ export class WorkflowRunner {
129
156
  * Restore state from a previous run (for resume functionality)
130
157
  */
131
158
  private async restoreState(): Promise<void> {
132
- const run = this.db.getRun(this.runId);
159
+ const run = await this.db.getRun(this.runId);
133
160
  if (!run) {
134
161
  throw new Error(`Run ${this.runId} not found`);
135
162
  }
136
163
 
137
164
  // Only allow resuming failed or paused runs
138
- if (run.status !== 'failed' && run.status !== 'paused') {
165
+ if (run.status !== WorkflowStatus.FAILED && run.status !== WorkflowStatus.PAUSED) {
139
166
  throw new Error(
140
167
  `Cannot resume run with status '${run.status}'. Only 'failed' or 'paused' runs can be resumed.`
141
168
  );
@@ -144,18 +171,22 @@ export class WorkflowRunner {
144
171
  // Restore inputs from the previous run to ensure consistency
145
172
  // Merge with any resumeInputs provided (e.g. answers to human steps)
146
173
  try {
147
- const storedInputs = JSON.parse(run.inputs);
148
- this.inputs = { ...storedInputs, ...this.inputs };
174
+ if (!run.inputs || run.inputs === 'null' || run.inputs === '') {
175
+ this.logger.warn(`Run ${this.runId} has no persisted inputs`);
176
+ // Keep existing inputs
177
+ } else {
178
+ const storedInputs = JSON.parse(run.inputs);
179
+ this.inputs = { ...storedInputs, ...this.inputs };
180
+ }
149
181
  } catch (error) {
150
- // Log warning but continue with default empty inputs instead of crashing
151
- this.logger.warn(
152
- `Failed to parse inputs from run ${this.runId}, using defaults: ${error instanceof Error ? error.message : String(error)}`
182
+ this.logger.error(
183
+ `CRITICAL: Failed to parse inputs from run ${this.runId}. Data may be corrupted. Using default/resume inputs. Error: ${error instanceof Error ? error.message : String(error)}`
153
184
  );
154
- // Keep existing inputs (from resumeInputs or empty)
185
+ // Fallback: preserve existing inputs from resume options
155
186
  }
156
187
 
157
188
  // Load all step executions for this run
158
- const steps = this.db.getStepsByRun(this.runId);
189
+ const steps = await this.db.getStepsByRun(this.runId);
159
190
 
160
191
  // Group steps by step_id to handle foreach loops (multiple executions per step_id)
161
192
  const stepExecutionsByStepId = new Map<string, typeof steps>();
@@ -194,7 +225,7 @@ export class WorkflowRunner {
194
225
  for (const exec of sortedExecs) {
195
226
  if (exec.iteration_index === null) continue; // Skip parent step record
196
227
 
197
- if (exec.status === 'success' || exec.status === 'skipped') {
228
+ if (exec.status === StepStatus.SUCCESS || exec.status === StepStatus.SKIPPED) {
198
229
  let output: unknown = null;
199
230
  try {
200
231
  output = exec.output ? JSON.parse(exec.output) : null;
@@ -210,7 +241,7 @@ export class WorkflowRunner {
210
241
  typeof output === 'object' && output !== null && !Array.isArray(output)
211
242
  ? (output as Record<string, unknown>)
212
243
  : {},
213
- status: exec.status as 'success' | 'skipped',
244
+ status: exec.status as typeof StepStatus.SUCCESS | typeof StepStatus.SKIPPED,
214
245
  };
215
246
  outputs[exec.iteration_index] = output;
216
247
  } else {
@@ -219,7 +250,7 @@ export class WorkflowRunner {
219
250
  items[exec.iteration_index] = {
220
251
  output: null,
221
252
  outputs: {},
222
- status: exec.status as 'failed' | 'pending' | 'success' | 'skipped' | 'suspended',
253
+ status: exec.status as StepStatusType,
223
254
  };
224
255
  }
225
256
  }
@@ -263,17 +294,17 @@ export class WorkflowRunner {
263
294
  !Array.from({ length: expectedCount }).some((_, i) => !items[i]);
264
295
 
265
296
  // Determine overall status based on iterations
266
- let status: StepContext['status'] = 'success';
297
+ let status: StepContext['status'] = StepStatus.SUCCESS;
267
298
  if (allSuccess && hasAllItems) {
268
- status = 'success';
269
- } else if (items.some((item) => item?.status === 'suspended')) {
270
- status = 'suspended';
299
+ status = StepStatus.SUCCESS;
300
+ } else if (items.some((item) => item?.status === StepStatus.SUSPENDED)) {
301
+ status = StepStatus.SUSPENDED;
271
302
  } else {
272
- status = 'failed';
303
+ status = StepStatus.FAILED;
273
304
  }
274
305
 
275
306
  // Always restore what we have to allow partial expression evaluation
276
- const mappedOutputs = this.aggregateOutputs(outputs);
307
+ const mappedOutputs = ForeachExecutor.aggregateOutputs(outputs);
277
308
  this.stepContexts.set(stepId, {
278
309
  output: outputs,
279
310
  outputs: mappedOutputs,
@@ -282,13 +313,17 @@ export class WorkflowRunner {
282
313
  } as ForeachStepContext);
283
314
 
284
315
  // Only mark as fully completed if all iterations completed successfully AND we have all items
285
- if (status === 'success') {
316
+ if (status === StepStatus.SUCCESS) {
286
317
  completedStepIds.add(stepId);
287
318
  }
288
319
  } else {
289
320
  // Single execution step
290
321
  const exec = stepExecutions[0];
291
- if (exec.status === 'success' || exec.status === 'skipped' || exec.status === 'suspended') {
322
+ if (
323
+ exec.status === StepStatus.SUCCESS ||
324
+ exec.status === StepStatus.SKIPPED ||
325
+ exec.status === StepStatus.SUSPENDED
326
+ ) {
292
327
  let output: unknown = null;
293
328
  try {
294
329
  output = exec.output ? JSON.parse(exec.output) : null;
@@ -304,7 +339,7 @@ export class WorkflowRunner {
304
339
  : {},
305
340
  status: exec.status as StepContext['status'],
306
341
  });
307
- if (exec.status !== 'suspended') {
342
+ if (exec.status !== StepStatus.SUSPENDED) {
308
343
  completedStepIds.add(stepId);
309
344
  }
310
345
  }
@@ -322,7 +357,7 @@ export class WorkflowRunner {
322
357
  const handler = async (signal: string) => {
323
358
  if (this.isStopping) return;
324
359
  this.logger.log(`\n\n🛑 Received ${signal}. Cleaning up...`);
325
- await this.stop('failed', `Cancelled by user (${signal})`);
360
+ await this.stop(WorkflowStatus.FAILED, `Cancelled by user (${signal})`);
326
361
 
327
362
  // Only exit if not embedded
328
363
  if (!this.options.preventExit) {
@@ -339,7 +374,7 @@ export class WorkflowRunner {
339
374
  /**
340
375
  * Stop the runner and cleanup resources
341
376
  */
342
- public async stop(status: RunStatus = 'failed', error?: string): Promise<void> {
377
+ public async stop(status: RunStatus = WorkflowStatus.FAILED, error?: string): Promise<void> {
343
378
  if (this.isStopping) return;
344
379
  this.isStopping = true;
345
380
 
@@ -353,6 +388,7 @@ export class WorkflowRunner {
353
388
  await this.mcpManager.stopAll();
354
389
 
355
390
  this.db.close();
391
+ this.memoryDb.close();
356
392
  } catch (err) {
357
393
  this.logger.error(`Error during stop/cleanup: ${err}`);
358
394
  }
@@ -389,9 +425,7 @@ export class WorkflowRunner {
389
425
  '_',
390
426
  'SHLVL',
391
427
  'LC_ALL',
392
- 'OLDPWD',
393
428
  'DISPLAY',
394
- 'TMPDIR',
395
429
  'SSH_AUTH_SOCK',
396
430
  'XPC_FLAGS',
397
431
  'XPC_SERVICE_NAME',
@@ -400,6 +434,20 @@ export class WorkflowRunner {
400
434
  'TERM_PROGRAM',
401
435
  'TERM_PROGRAM_VERSION',
402
436
  'COLORTERM',
437
+ 'LC_TERMINAL',
438
+ 'LC_TERMINAL_VERSION',
439
+ 'PWD',
440
+ 'OLDPWD',
441
+ 'HOME',
442
+ 'USER',
443
+ 'SHELL',
444
+ 'PATH',
445
+ 'LOGNAME',
446
+ 'TMPDIR',
447
+ 'XDG_CONFIG_HOME',
448
+ 'XDG_DATA_HOME',
449
+ 'XDG_CACHE_HOME',
450
+ 'XDG_RUNTIME_DIR',
403
451
  ]);
404
452
 
405
453
  // Bun automatically loads .env file
@@ -411,31 +459,6 @@ export class WorkflowRunner {
411
459
  return secrets;
412
460
  }
413
461
 
414
- /**
415
- * Aggregate outputs from multiple iterations of a foreach step
416
- */
417
- private aggregateOutputs(outputs: unknown[]): Record<string, unknown> {
418
- const mappedOutputs: Record<string, unknown> = { length: outputs.length };
419
- const allKeys = new Set<string>();
420
-
421
- for (const output of outputs) {
422
- if (output && typeof output === 'object' && !Array.isArray(output)) {
423
- for (const key of Object.keys(output)) {
424
- allKeys.add(key);
425
- }
426
- }
427
- }
428
-
429
- for (const key of allKeys) {
430
- mappedOutputs[key] = outputs.map((o) =>
431
- o && typeof o === 'object' && !Array.isArray(o) && key in (o as Record<string, unknown>)
432
- ? (o as Record<string, unknown>)[key]
433
- : null
434
- );
435
- }
436
- return mappedOutputs;
437
- }
438
-
439
462
  /**
440
463
  * Apply workflow defaults to inputs and validate types
441
464
  */
@@ -541,6 +564,39 @@ export class WorkflowRunner {
541
564
  }
542
565
  }
543
566
 
567
+ /**
568
+ * Retrieve past successful runs and format them as few-shot examples
569
+ */
570
+ private async getFewShotExamples(workflowName: string): Promise<string> {
571
+ try {
572
+ const runs = await this.db.getSuccessfulRuns(workflowName, 3);
573
+ if (!runs || runs.length === 0) return '';
574
+
575
+ let examples = 'Here are examples of how you successfully handled this task in the past:\n';
576
+
577
+ for (const [index, run] of runs.entries()) {
578
+ examples += `\nExample ${index + 1}:\n`;
579
+ try {
580
+ // Pretty print JSON inputs/outputs
581
+ const inputs = JSON.stringify(JSON.parse(run.inputs), null, 2);
582
+ const outputs = run.outputs ? JSON.stringify(JSON.parse(run.outputs), null, 2) : '{}';
583
+
584
+ examples += `Input: ${inputs}\n`;
585
+ examples += `Output: ${outputs}\n`;
586
+ } catch (e) {
587
+ // Fallback for raw strings if parsing fails
588
+ examples += `Input: ${run.inputs}\n`;
589
+ examples += `Output: ${run.outputs || '{}'}\n`;
590
+ }
591
+ }
592
+
593
+ return examples;
594
+ } catch (error) {
595
+ this.logger.warn(`Failed to retrieve few-shot examples: ${error}`);
596
+ return '';
597
+ }
598
+ }
599
+
544
600
  /**
545
601
  * Execute a single step instance and return the result
546
602
  * Does NOT update global stepContexts
@@ -550,18 +606,37 @@ export class WorkflowRunner {
550
606
  context: ExpressionContext,
551
607
  stepExecId: string
552
608
  ): Promise<StepContext> {
553
- await this.db.startStep(stepExecId);
609
+ let stepToExecute = step;
610
+
611
+ // Inject few-shot examples if enabled
612
+ if (step.type === 'llm' && step.learn) {
613
+ const examples = await this.getFewShotExamples(this.workflow.name);
614
+ if (examples) {
615
+ stepToExecute = {
616
+ ...step,
617
+ prompt: `${examples}\n\n${step.prompt}`,
618
+ };
619
+ this.logger.log(
620
+ ` 🧠 Injected few-shot examples from ${examples.split('Example').length - 1} past runs`
621
+ );
622
+ }
623
+ }
624
+
625
+ const isRecursion =
626
+ (context.reflexionAttempts as number) > 0 || (context.autoHealAttempts as number) > 0;
627
+
628
+ if (!isRecursion) {
629
+ await this.db.startStep(stepExecId);
630
+ }
554
631
 
555
632
  const operation = async () => {
556
- const result = await executeStep(
557
- step,
558
- context,
559
- this.logger,
560
- this.executeSubWorkflow.bind(this),
561
- this.mcpManager,
562
- this.options.workflowDir,
563
- this.options.dryRun
564
- );
633
+ const result = await executeStep(stepToExecute, context, this.logger, {
634
+ executeWorkflowFn: this.executeSubWorkflow.bind(this),
635
+ mcpManager: this.mcpManager,
636
+ memoryDb: this.memoryDb,
637
+ workflowDir: this.options.workflowDir,
638
+ dryRun: this.options.dryRun,
639
+ });
565
640
  if (result.status === 'failed') {
566
641
  throw new Error(result.error || 'Step failed');
567
642
  }
@@ -581,10 +656,10 @@ export class WorkflowRunner {
581
656
  await this.db.incrementRetry(stepExecId);
582
657
  });
583
658
 
584
- if (result.status === 'suspended') {
659
+ if (result.status === StepStatus.SUSPENDED) {
585
660
  await this.db.completeStep(
586
661
  stepExecId,
587
- 'suspended',
662
+ StepStatus.SUSPENDED,
588
663
  result.output,
589
664
  'Waiting for interaction',
590
665
  result.usage
@@ -600,6 +675,17 @@ export class WorkflowRunner {
600
675
  result.usage
601
676
  );
602
677
 
678
+ // Auto-Learning logic
679
+ if (step.learn && result.status === StepStatus.SUCCESS) {
680
+ try {
681
+ await this.learnFromStep(step, result, context);
682
+ } catch (error) {
683
+ this.logger.warn(
684
+ ` ⚠️ Failed to learn from step ${step.id}: ${error instanceof Error ? error.message : String(error)}`
685
+ );
686
+ }
687
+ }
688
+
603
689
  // Ensure outputs is always an object for consistent access
604
690
  let outputs: Record<string, unknown>;
605
691
  if (
@@ -621,6 +707,104 @@ export class WorkflowRunner {
621
707
  usage: result.usage,
622
708
  };
623
709
  } catch (error) {
710
+ // Reflexion (Self-Correction) logic
711
+ if (step.reflexion) {
712
+ const { limit = 3, hint } = step.reflexion;
713
+ const currentAttempt = (context.reflexionAttempts as number) || 0;
714
+
715
+ if (currentAttempt < limit) {
716
+ const errorMsg = error instanceof Error ? error.message : String(error);
717
+ this.logger.log(
718
+ ` 🔧 Reflexion triggered for step ${step.id} (Attempt ${currentAttempt + 1}/${limit})`
719
+ );
720
+
721
+ try {
722
+ // Get corrected command from Mechanic
723
+ const fixedStep = await this.getFixFromReflexion(step, errorMsg, hint);
724
+
725
+ // Merge fixed properties
726
+ const newStep = { ...step, ...fixedStep };
727
+
728
+ // Retry with new step definition
729
+ const nextContext = {
730
+ ...context,
731
+ reflexionAttempts: currentAttempt + 1,
732
+ };
733
+
734
+ return this.executeStepInternal(newStep, nextContext, stepExecId);
735
+ } catch (healError) {
736
+ this.logger.error(
737
+ ` ✗ Reflexion failed: ${healError instanceof Error ? healError.message : String(healError)}`
738
+ );
739
+ // Fall through to auto-heal or failure
740
+ }
741
+ }
742
+ }
743
+
744
+ // Auto-heal logic
745
+ if (step.auto_heal && typeof step.auto_heal === 'object') {
746
+ const autoHeal = step.auto_heal;
747
+ // Limit recursion/loops
748
+ const maxAttempts = autoHeal.maxAttempts || 1;
749
+ const currentAttempt = (context.autoHealAttempts as number) || 0;
750
+
751
+ if (currentAttempt < maxAttempts) {
752
+ const errorMsg = error instanceof Error ? error.message : String(error);
753
+ this.logger.log(
754
+ ` 🩹 Auto-healing triggered for step ${step.id} (Attempt ${currentAttempt + 1}/${maxAttempts})`
755
+ );
756
+
757
+ try {
758
+ // Get fix from agent
759
+ const fixedStep = await this.getFixFromAgent(step, errorMsg, context);
760
+
761
+ // Merge fixed properties into the step
762
+ const newStep = { ...step, ...fixedStep };
763
+
764
+ // Retry with new step definition
765
+ const nextContext = {
766
+ ...context,
767
+ autoHealAttempts: currentAttempt + 1,
768
+ };
769
+
770
+ return this.executeStepInternal(newStep, nextContext, stepExecId);
771
+ } catch (healError) {
772
+ this.logger.error(
773
+ ` ✗ Auto-heal failed: ${healError instanceof Error ? healError.message : String(healError)}`
774
+ );
775
+ // Fall through to normal failure
776
+ }
777
+ }
778
+ }
779
+
780
+ // Debug REPL logic
781
+ if (this.options.debug) {
782
+ try {
783
+ const { DebugRepl } = await import('./debug-repl.ts');
784
+ const repl = new DebugRepl(context, step, error, this.logger);
785
+ const action = await repl.start();
786
+
787
+ if (action.type === 'retry') {
788
+ this.logger.log(` ↻ Retrying step ${step.id} after manual intervention`);
789
+ // We use the modified step if provided, else original
790
+ const stepToRun = action.modifiedStep || step;
791
+ return this.executeStepInternal(stepToRun, context, stepExecId);
792
+ }
793
+ if (action.type === 'skip') {
794
+ this.logger.log(` ⏭️ Skipping step ${step.id} manually`);
795
+ await this.db.completeStep(stepExecId, StepStatus.SKIPPED, null, undefined, undefined);
796
+ return {
797
+ output: null,
798
+ outputs: {},
799
+ status: StepStatus.SKIPPED,
800
+ };
801
+ }
802
+ // if 'continue_failure', fall through
803
+ } catch (replError) {
804
+ this.logger.error(` ✗ Debug REPL error: ${replError}`);
805
+ }
806
+ }
807
+
624
808
  const errorMsg = error instanceof Error ? error.message : String(error);
625
809
  const redactedErrorMsg = this.redactor.redact(errorMsg);
626
810
  this.logger.error(` ✗ Step ${step.id} failed: ${redactedErrorMsg}`);
@@ -636,210 +820,199 @@ export class WorkflowRunner {
636
820
  }
637
821
 
638
822
  /**
639
- * Execute a step (handles foreach if present)
823
+ * Consult an agent to fix a failing step
640
824
  */
641
- private async executeStepWithForeach(step: Step): Promise<void> {
642
- const baseContext = this.buildContext();
825
+ private async getFixFromAgent(
826
+ step: Step,
827
+ error: string,
828
+ context: ExpressionContext
829
+ ): Promise<Partial<Step>> {
830
+ const { auto_heal } = step;
831
+ if (!auto_heal) throw new Error('Auto-heal not configured');
832
+
833
+ const prompt = `
834
+ The following step failed during execution:
835
+ \`\`\`json
836
+ ${JSON.stringify(step, null, 2)}
837
+ \`\`\`
838
+
839
+ Error:
840
+ ${error}
841
+
842
+ Please analyze the error and provide a fixed version of the step configuration.
843
+ Return ONLY a valid JSON object containing the fields that need to be changed.
844
+ For example, if the command was wrong, return:
845
+ { "run": "correct command" }
846
+
847
+ Do not change the 'id' or 'type' or 'auto_heal' fields.
848
+ `;
849
+
850
+ // Create a synthetic step to invoke the agent
851
+ const agentStep: Step = {
852
+ id: `${step.id}-healer`,
853
+ type: 'llm',
854
+ agent: auto_heal.agent,
855
+ model: auto_heal.model,
856
+ prompt,
857
+ schema: {
858
+ type: 'object',
859
+ description: 'Partial step configuration with fixed values',
860
+ additionalProperties: true,
861
+ },
862
+ } as import('../parser/schema.ts').LlmStep;
863
+
864
+ this.logger.log(` 🚑 Consulting agent ${auto_heal.agent} for a fix...`);
865
+
866
+ // Execute the agent step
867
+ // We use a fresh context but share secrets/env
868
+ const result = await executeStep(agentStep, context, this.logger, {
869
+ executeWorkflowFn: this.executeSubWorkflow.bind(this),
870
+ mcpManager: this.mcpManager,
871
+ memoryDb: this.memoryDb,
872
+ workflowDir: this.options.workflowDir,
873
+ dryRun: this.options.dryRun,
874
+ });
643
875
 
644
- if (this.shouldSkipStep(step, baseContext)) {
645
- this.logger.log(` Skipping step ${step.id} (condition not met)`);
646
- const stepExecId = randomUUID();
647
- await this.db.createStep(stepExecId, this.runId, step.id);
648
- await this.db.completeStep(stepExecId, 'skipped', null);
649
- this.stepContexts.set(step.id, { status: 'skipped' });
650
- return;
876
+ if (result.status !== 'success' || !result.output) {
877
+ throw new Error(`Healer agent failed: ${result.error || 'No output'}`);
651
878
  }
652
879
 
653
- if (step.foreach) {
654
- const items = ExpressionEvaluator.evaluate(step.foreach, baseContext);
655
- if (!Array.isArray(items)) {
656
- throw new Error(`foreach expression must evaluate to an array: ${step.foreach}`);
657
- }
658
-
659
- this.logger.log(` ⤷ Executing step ${step.id} for ${items.length} items`);
660
-
661
- if (items.length > WorkflowRunner.MEMORY_WARNING_THRESHOLD && !this.hasWarnedMemory) {
662
- this.logger.warn(
663
- ` ⚠️ Warning: Large foreach loop detected (${items.length} items). This may consume significant memory and lead to instability.`
664
- );
665
- this.hasWarnedMemory = true;
666
- }
667
-
668
- // Evaluate concurrency if it's an expression, otherwise use the number directly
669
- let concurrencyLimit = items.length;
670
- if (step.concurrency !== undefined) {
671
- if (typeof step.concurrency === 'string') {
672
- concurrencyLimit = Number(ExpressionEvaluator.evaluate(step.concurrency, baseContext));
673
- if (!Number.isInteger(concurrencyLimit) || concurrencyLimit <= 0) {
674
- throw new Error(
675
- `concurrency must evaluate to a positive integer, got: ${concurrencyLimit}`
676
- );
677
- }
678
- } else {
679
- concurrencyLimit = step.concurrency;
680
- }
681
- }
682
-
683
- // Create parent step record in DB
684
- const parentStepExecId = randomUUID();
685
- await this.db.createStep(parentStepExecId, this.runId, step.id);
686
- await this.db.startStep(parentStepExecId);
687
-
688
- // Persist the foreach items in parent step for deterministic resume
689
- // This ensures resume uses the same array even if expression would evaluate differently
690
- await this.db.completeStep(parentStepExecId, 'pending', { __foreachItems: items });
880
+ return result.output as Partial<Step>;
881
+ }
691
882
 
692
- try {
693
- // Initialize results array with existing context or empty slots
694
- const existingContext = this.stepContexts.get(step.id) as ForeachStepContext;
695
- const itemResults: StepContext[] = existingContext?.items || new Array(items.length);
883
+ /**
884
+ * Automatically learn from a successful step outcome
885
+ */
886
+ private async learnFromStep(
887
+ step: Step,
888
+ result: StepResult,
889
+ _context: ExpressionContext
890
+ ): Promise<void> {
891
+ const getAdapterFn = this.options.getAdapter || getAdapter;
892
+ const { adapter } = getAdapterFn('local'); // Default for embedding
893
+ if (!adapter.embed) return;
894
+
895
+ // Combine input context (if relevant) and output
896
+ // For now, let's keep it simple: "Step: ID\nGoal: description\nOutput: result"
897
+
898
+ // We can try to construct a summary of what happened
899
+ let textToEmbed = `Step ID: ${step.id} (${step.type})\n`;
900
+
901
+ if (step.type === 'llm') {
902
+ // biome-ignore lint/suspicious/noExplicitAny: generic access
903
+ textToEmbed += `Task Context/Prompt:\n${(step as any).prompt}\n\n`;
904
+ } else if (step.type === 'shell') {
905
+ // biome-ignore lint/suspicious/noExplicitAny: generic access
906
+ textToEmbed += `Command:\n${(step as any).run}\n\n`;
907
+ }
696
908
 
697
- // Ensure array is correct length if items changed (unlikely in resume but safe)
698
- if (itemResults.length !== items.length) {
699
- itemResults.length = items.length;
700
- }
909
+ textToEmbed += `Successful Outcome:\n${JSON.stringify(result.output, null, 2)}`;
701
910
 
702
- // Worker pool implementation for true concurrency
703
- let currentIndex = 0;
704
- let aborted = false;
705
- const workers = new Array(Math.min(concurrencyLimit, items.length))
706
- .fill(null)
707
- .map(async () => {
708
- while (currentIndex < items.length && !aborted) {
709
- const i = currentIndex++; // Capture index atomically
710
- const item = items[i];
711
-
712
- // Skip if already successful or skipped in previous run or by another worker
713
- if (
714
- itemResults[i] &&
715
- (itemResults[i].status === 'success' || itemResults[i].status === 'skipped')
716
- ) {
717
- continue;
718
- }
911
+ const embedding = await adapter.embed(textToEmbed, 'local');
912
+ await this.memoryDb.store(textToEmbed, embedding, {
913
+ stepId: step.id,
914
+ workflow: this.workflow.name,
915
+ timestamp: new Date().toISOString(),
916
+ });
719
917
 
720
- const itemContext = this.buildContext(item, i);
721
-
722
- // Check DB again for robustness (in case itemResults wasn't fully restored)
723
- const existingExec = this.db.getStepByIteration(this.runId, step.id, i);
724
- if (
725
- existingExec &&
726
- (existingExec.status === 'success' || existingExec.status === 'skipped')
727
- ) {
728
- let output: unknown = null;
729
- try {
730
- output = existingExec.output ? JSON.parse(existingExec.output) : null;
731
- } catch (error) {
732
- this.logger.warn(
733
- `Failed to parse output for step ${step.id} iteration ${i}: ${error}`
734
- );
735
- output = { error: 'Failed to parse output' };
736
- }
737
- itemResults[i] = {
738
- output,
739
- outputs:
740
- typeof output === 'object' && output !== null && !Array.isArray(output)
741
- ? (output as Record<string, unknown>)
742
- : {},
743
- status: existingExec.status as 'success' | 'skipped',
744
- };
745
- continue;
746
- }
918
+ this.logger.log(` ✨ Learned from step ${step.id}`);
919
+ }
747
920
 
748
- const stepExecId = randomUUID();
749
- await this.db.createStep(stepExecId, this.runId, step.id, i);
750
-
751
- // Execute and store result at correct index
752
- try {
753
- this.logger.log(` ⤷ [${i + 1}/${items.length}] Executing iteration...`);
754
- itemResults[i] = await this.executeStepInternal(step, itemContext, stepExecId);
755
- if (itemResults[i].status === 'failed') {
756
- aborted = true;
757
- }
758
- } catch (error) {
759
- aborted = true;
760
- throw error;
761
- }
762
- }
763
- });
921
+ /**
922
+ * Consult the built-in "Mechanic" agent to fix a failing step
923
+ */
924
+ private async getFixFromReflexion(
925
+ step: Step,
926
+ error: string,
927
+ hint?: string
928
+ ): Promise<Partial<Step>> {
929
+ const systemPrompt = `You are the "Mechanic", an expert coding assistant built into the Keystone CLI.
930
+ Your job is to fix failing shell commands or scripts by analyzing the error output and the user's original intent.
931
+
932
+ Rules:
933
+ 1. Analyze the failing command and the error message which comes from stdout/stderr.
934
+ 2. If a "Hint" is provided, prioritize it as the primary strategy for the fix.
935
+ 3. Return ONLY a valid JSON object containing the fields that need to be changed in the step configuration.
936
+ 4. Do NOT verify the fix yourself; just provide the corrected configuration.
937
+ 5. Common fixes include:
938
+ - Installing missing dependencies (e.g. pip install, npm install)
939
+ - Fixing syntax errors
940
+ - Creating missing directories
941
+ - Adjusting flags or arguments`;
942
+
943
+ // biome-ignore lint/suspicious/noExplicitAny: generic access
944
+ const runCommand = (step as any).run;
945
+ const userContent = `The following step failed:
946
+ \`\`\`json
947
+ ${JSON.stringify({ type: step.type, run: runCommand }, null, 2)}
948
+ \`\`\`
949
+
950
+ Error Output:
951
+ ${error}
952
+
953
+ ${hint ? `Hint from User: "${hint}"` : ''}
954
+
955
+ Please provide the fixed step configuration as JSON.`;
956
+
957
+ const messages: LLMMessage[] = [
958
+ { role: 'system', content: systemPrompt },
959
+ { role: 'user', content: userContent },
960
+ ];
764
961
 
765
- await Promise.all(workers);
766
-
767
- // Aggregate results to match Spec requirements
768
- // This allows:
769
- // 1. ${{ steps.id.output }} -> array of output values
770
- // 2. ${{ steps.id.items[0].status }} -> 'success'
771
- // 3. ${{ steps.id.items.every(s => s.status == 'success') }} -> works via items array
772
- const outputs = itemResults.map((r) => r.output);
773
- const allSuccess = itemResults.every((r) => r.status === 'success');
774
- const anySuspended = itemResults.some((r) => r.status === 'suspended');
775
-
776
- // Aggregate usage from all items
777
- const aggregatedUsage = itemResults.reduce(
778
- (acc, r) => {
779
- if (r.usage) {
780
- acc.prompt_tokens += r.usage.prompt_tokens;
781
- acc.completion_tokens += r.usage.completion_tokens;
782
- acc.total_tokens += r.usage.total_tokens;
783
- }
784
- return acc;
785
- },
786
- { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
787
- );
962
+ try {
963
+ // Use the default model (gpt-4o) or configured default for the Mechanic
964
+ // We'll use gpt-4o as a strong default for this reasoning task
965
+ const getAdapterFn = this.options.getAdapter || getAdapter;
966
+ const { adapter, resolvedModel } = getAdapterFn('gpt-4o');
967
+ this.logger.log(` 🤖 Mechanic is analyzing the failure using ${resolvedModel}...`);
968
+
969
+ const response = await adapter.chat(messages, {
970
+ model: resolvedModel,
971
+ });
788
972
 
789
- // Map child properties for easier access
790
- // If outputs are [{ id: 1 }, { id: 2 }], then outputs.id = [1, 2]
791
- const mappedOutputs = this.aggregateOutputs(outputs);
973
+ const content = response.message.content;
974
+ if (!content) {
975
+ throw new Error('Mechanic returned empty response');
976
+ }
792
977
 
793
- // Determine final status
794
- let finalStatus: StepContext['status'] = 'failed';
795
- if (allSuccess) {
796
- finalStatus = 'success';
797
- } else if (anySuspended) {
798
- finalStatus = 'suspended';
799
- }
978
+ try {
979
+ const fixedConfig = extractJson(content) as Partial<Step>;
980
+ return fixedConfig;
981
+ } catch (e) {
982
+ throw new Error(`Failed to parse Mechanic's response as JSON: ${content}`);
983
+ }
984
+ } catch (err) {
985
+ throw new Error(`Mechanic unavailable: ${err instanceof Error ? err.message : String(err)}`);
986
+ }
987
+ }
800
988
 
801
- // Use proper object structure that serializes correctly
802
- const aggregatedContext: ForeachStepContext = {
803
- output: outputs,
804
- outputs: mappedOutputs,
805
- status: finalStatus,
806
- items: itemResults,
807
- usage: aggregatedUsage,
808
- };
989
+ /**
990
+ * Execute a step (handles foreach if present)
991
+ */
992
+ private async executeStepWithForeach(step: Step): Promise<void> {
993
+ const baseContext = this.buildContext();
809
994
 
810
- this.stepContexts.set(step.id, aggregatedContext);
995
+ if (this.shouldSkipStep(step, baseContext)) {
996
+ this.logger.log(` ⊘ Skipping step ${step.id} (condition not met)`);
997
+ const stepExecId = randomUUID();
998
+ await this.db.createStep(stepExecId, this.runId, step.id);
999
+ await this.db.completeStep(stepExecId, 'skipped', null);
1000
+ this.stepContexts.set(step.id, { status: 'skipped' });
1001
+ return;
1002
+ }
811
1003
 
812
- // Update parent step record with aggregated status
813
- await this.db.completeStep(
814
- parentStepExecId,
815
- finalStatus,
816
- aggregatedContext,
817
- finalStatus === 'failed' ? 'One or more iterations failed' : undefined
818
- );
1004
+ if (step.foreach) {
1005
+ const { ForeachExecutor } = await import('./foreach-executor.ts');
1006
+ const executor = new ForeachExecutor(
1007
+ this.db,
1008
+ this.logger,
1009
+ this.executeStepInternal.bind(this)
1010
+ );
819
1011
 
820
- if (finalStatus === 'suspended') {
821
- // If any iteration suspended, the whole step is suspended
822
- // We assume for now that only human steps can suspend, and we'll use the first one's input type
823
- const suspendedItem = itemResults.find((r) => r.status === 'suspended');
824
- throw new WorkflowSuspendedError(
825
- suspendedItem?.error || 'Iteration suspended',
826
- step.id,
827
- 'text'
828
- );
829
- }
1012
+ const existingContext = this.stepContexts.get(step.id) as ForeachStepContext;
1013
+ const result = await executor.execute(step, baseContext, this.runId, existingContext);
830
1014
 
831
- if (finalStatus === 'failed') {
832
- throw new Error(`Step ${step.id} failed: one or more iterations failed`);
833
- }
834
- } catch (error) {
835
- if (error instanceof WorkflowSuspendedError) {
836
- throw error;
837
- }
838
- // Mark parent step as failed
839
- const errorMsg = error instanceof Error ? error.message : String(error);
840
- await this.db.completeStep(parentStepExecId, 'failed', null, errorMsg);
841
- throw error;
842
- }
1015
+ this.stepContexts.set(step.id, result);
843
1016
  } else {
844
1017
  // Single execution
845
1018
  const stepExecId = randomUUID();
@@ -888,6 +1061,7 @@ export class WorkflowRunner {
888
1061
  logger: this.logger,
889
1062
  mcpManager: this.mcpManager,
890
1063
  workflowDir: subWorkflowDir,
1064
+ depth: this.depth + 1,
891
1065
  });
892
1066
 
893
1067
  try {
@@ -960,7 +1134,7 @@ export class WorkflowRunner {
960
1134
  this.logger.log('All steps already completed. Nothing to resume.\n');
961
1135
  // Evaluate outputs from completed state
962
1136
  const outputs = this.evaluateOutputs();
963
- await this.db.updateRunStatus(this.runId, 'completed', outputs);
1137
+ await this.db.updateRunStatus(this.runId, 'success', outputs);
964
1138
  this.logger.log('✨ Workflow already completed!\n');
965
1139
  return outputs;
966
1140
  }
@@ -986,6 +1160,11 @@ export class WorkflowRunner {
986
1160
  globalConcurrencyLimit = this.workflow.concurrency;
987
1161
  }
988
1162
  }
1163
+ if (!Number.isInteger(globalConcurrencyLimit) || globalConcurrencyLimit <= 0) {
1164
+ throw new Error(
1165
+ `workflow.concurrency must be a positive integer, got: ${globalConcurrencyLimit}`
1166
+ );
1167
+ }
989
1168
 
990
1169
  // Execute steps in parallel where possible (respecting dependencies and global concurrency)
991
1170
  const pendingSteps = new Set(remainingSteps);
@@ -1049,7 +1228,7 @@ export class WorkflowRunner {
1049
1228
  const outputs = this.evaluateOutputs();
1050
1229
 
1051
1230
  // Mark run as complete
1052
- await this.db.updateRunStatus(this.runId, 'completed', outputs);
1231
+ await this.db.updateRunStatus(this.runId, 'success', outputs);
1053
1232
 
1054
1233
  this.logger.log('✨ Workflow completed successfully!\n');
1055
1234