keystone-cli 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/README.md +276 -32
  2. package/package.json +8 -4
  3. package/src/cli.ts +350 -416
  4. package/src/commands/doc.ts +31 -0
  5. package/src/commands/event.ts +29 -0
  6. package/src/commands/graph.ts +37 -0
  7. package/src/commands/index.ts +14 -0
  8. package/src/commands/init.ts +185 -0
  9. package/src/commands/run.ts +124 -0
  10. package/src/commands/schema.ts +40 -0
  11. package/src/commands/utils.ts +78 -0
  12. package/src/commands/validate.ts +111 -0
  13. package/src/db/workflow-db.test.ts +314 -0
  14. package/src/db/workflow-db.ts +810 -210
  15. package/src/expression/evaluator-audit.test.ts +4 -2
  16. package/src/expression/evaluator.test.ts +14 -1
  17. package/src/expression/evaluator.ts +166 -19
  18. package/src/parser/config-schema.ts +18 -0
  19. package/src/parser/schema.ts +153 -22
  20. package/src/parser/test-schema.ts +6 -6
  21. package/src/parser/workflow-parser.test.ts +24 -0
  22. package/src/parser/workflow-parser.ts +65 -3
  23. package/src/runner/auto-heal.test.ts +5 -6
  24. package/src/runner/blueprint-executor.test.ts +2 -2
  25. package/src/runner/debug-repl.test.ts +5 -8
  26. package/src/runner/debug-repl.ts +59 -16
  27. package/src/runner/durable-timers.test.ts +11 -2
  28. package/src/runner/engine-executor.test.ts +1 -1
  29. package/src/runner/events.ts +57 -0
  30. package/src/runner/executors/artifact-executor.ts +166 -0
  31. package/src/runner/{blueprint-executor.ts → executors/blueprint-executor.ts} +15 -7
  32. package/src/runner/{engine-executor.ts → executors/engine-executor.ts} +55 -7
  33. package/src/runner/executors/file-executor.test.ts +48 -0
  34. package/src/runner/executors/file-executor.ts +324 -0
  35. package/src/runner/{foreach-executor.ts → executors/foreach-executor.ts} +168 -80
  36. package/src/runner/executors/human-executor.ts +144 -0
  37. package/src/runner/executors/join-executor.ts +75 -0
  38. package/src/runner/executors/llm-executor.ts +1266 -0
  39. package/src/runner/executors/memory-executor.ts +71 -0
  40. package/src/runner/executors/plan-executor.ts +104 -0
  41. package/src/runner/executors/request-executor.ts +265 -0
  42. package/src/runner/executors/script-executor.ts +43 -0
  43. package/src/runner/executors/shell-executor.ts +403 -0
  44. package/src/runner/executors/subworkflow-executor.ts +114 -0
  45. package/src/runner/executors/types.ts +69 -0
  46. package/src/runner/executors/wait-executor.ts +59 -0
  47. package/src/runner/join-scheduling.test.ts +197 -0
  48. package/src/runner/llm-adapter-runtime.test.ts +209 -0
  49. package/src/runner/llm-adapter.test.ts +419 -24
  50. package/src/runner/llm-adapter.ts +130 -26
  51. package/src/runner/llm-clarification.test.ts +2 -1
  52. package/src/runner/llm-executor.test.ts +532 -17
  53. package/src/runner/mcp-client-audit.test.ts +1 -2
  54. package/src/runner/mcp-client.ts +136 -46
  55. package/src/runner/mcp-manager.test.ts +4 -0
  56. package/src/runner/mcp-server.test.ts +58 -0
  57. package/src/runner/mcp-server.ts +26 -0
  58. package/src/runner/memoization.test.ts +190 -0
  59. package/src/runner/optimization-runner.ts +4 -9
  60. package/src/runner/quality-gate.test.ts +69 -0
  61. package/src/runner/reflexion.test.ts +6 -17
  62. package/src/runner/resource-pool.ts +102 -14
  63. package/src/runner/services/context-builder.ts +144 -0
  64. package/src/runner/services/secret-manager.ts +105 -0
  65. package/src/runner/services/workflow-validator.ts +131 -0
  66. package/src/runner/shell-executor.test.ts +28 -4
  67. package/src/runner/standard-tools-ast.test.ts +196 -0
  68. package/src/runner/standard-tools-execution.test.ts +27 -0
  69. package/src/runner/standard-tools-integration.test.ts +6 -10
  70. package/src/runner/standard-tools.ts +339 -102
  71. package/src/runner/step-executor.test.ts +216 -4
  72. package/src/runner/step-executor.ts +69 -941
  73. package/src/runner/stream-utils.ts +7 -3
  74. package/src/runner/test-harness.ts +20 -1
  75. package/src/runner/timeout.test.ts +10 -0
  76. package/src/runner/timeout.ts +11 -2
  77. package/src/runner/tool-integration.test.ts +1 -1
  78. package/src/runner/wait-step.test.ts +102 -0
  79. package/src/runner/workflow-runner.test.ts +208 -15
  80. package/src/runner/workflow-runner.ts +890 -818
  81. package/src/runner/workflow-scheduler.ts +75 -0
  82. package/src/runner/workflow-state.ts +269 -0
  83. package/src/runner/workflow-subflows.test.ts +13 -12
  84. package/src/scripts/generate-schemas.ts +16 -0
  85. package/src/templates/agents/explore.md +1 -0
  86. package/src/templates/agents/general.md +1 -0
  87. package/src/templates/agents/handoff-router.md +14 -0
  88. package/src/templates/agents/handoff-specialist.md +15 -0
  89. package/src/templates/agents/keystone-architect.md +13 -44
  90. package/src/templates/agents/my-agent.md +1 -0
  91. package/src/templates/agents/software-engineer.md +1 -0
  92. package/src/templates/agents/summarizer.md +1 -0
  93. package/src/templates/agents/test-agent.md +1 -0
  94. package/src/templates/agents/tester.md +1 -0
  95. package/src/templates/{basic-inputs.yaml → basics/basic-inputs.yaml} +2 -0
  96. package/src/templates/{basic-shell.yaml → basics/basic-shell.yaml} +2 -1
  97. package/src/templates/{full-feature-demo.yaml → basics/full-feature-demo.yaml} +2 -0
  98. package/src/templates/{stop-watch.yaml → basics/stop-watch.yaml} +1 -0
  99. package/src/templates/{child-rollback.yaml → control-flow/child-rollback.yaml} +1 -0
  100. package/src/templates/{cleanup-finally.yaml → control-flow/cleanup-finally.yaml} +1 -0
  101. package/src/templates/{fan-out-fan-in.yaml → control-flow/fan-out-fan-in.yaml} +3 -0
  102. package/src/templates/control-flow/idempotency-example.yaml +30 -0
  103. package/src/templates/{loop-parallel.yaml → control-flow/loop-parallel.yaml} +3 -0
  104. package/src/templates/{parent-rollback.yaml → control-flow/parent-rollback.yaml} +1 -0
  105. package/src/templates/{retry-policy.yaml → control-flow/retry-policy.yaml} +3 -0
  106. package/src/templates/features/artifact-example.yaml +39 -0
  107. package/src/templates/{engine-example.yaml → features/engine-example.yaml} +1 -0
  108. package/src/templates/{human-interaction.yaml → features/human-interaction.yaml} +1 -0
  109. package/src/templates/{llm-agent.yaml → features/llm-agent.yaml} +1 -0
  110. package/src/templates/{memory-service.yaml → features/memory-service.yaml} +2 -0
  111. package/src/templates/{robust-automation.yaml → features/robust-automation.yaml} +3 -0
  112. package/src/templates/features/script-example.yaml +27 -0
  113. package/src/templates/patterns/agent-handoff.yaml +53 -0
  114. package/src/templates/{approval-process.yaml → patterns/approval-process.yaml} +1 -0
  115. package/src/templates/{batch-processor.yaml → patterns/batch-processor.yaml} +2 -0
  116. package/src/templates/{composition-child.yaml → patterns/composition-child.yaml} +1 -0
  117. package/src/templates/{composition-parent.yaml → patterns/composition-parent.yaml} +1 -0
  118. package/src/templates/{data-pipeline.yaml → patterns/data-pipeline.yaml} +2 -0
  119. package/src/templates/{decompose-implement.yaml → scaffolding/decompose-implement.yaml} +1 -0
  120. package/src/templates/{decompose-problem.yaml → scaffolding/decompose-problem.yaml} +1 -0
  121. package/src/templates/{decompose-research.yaml → scaffolding/decompose-research.yaml} +1 -0
  122. package/src/templates/{decompose-review.yaml → scaffolding/decompose-review.yaml} +1 -0
  123. package/src/templates/{dev.yaml → scaffolding/dev.yaml} +1 -0
  124. package/src/templates/scaffolding/review-loop.yaml +97 -0
  125. package/src/templates/{scaffold-feature.yaml → scaffolding/scaffold-feature.yaml} +2 -0
  126. package/src/templates/{scaffold-generate.yaml → scaffolding/scaffold-generate.yaml} +1 -0
  127. package/src/templates/{scaffold-plan.yaml → scaffolding/scaffold-plan.yaml} +1 -0
  128. package/src/templates/testing/invalid.yaml +6 -0
  129. package/src/ui/dashboard.tsx +191 -33
  130. package/src/utils/auth-manager.test.ts +337 -0
  131. package/src/utils/auth-manager.ts +157 -61
  132. package/src/utils/blueprint-utils.ts +4 -6
  133. package/src/utils/config-loader.test.ts +2 -0
  134. package/src/utils/config-loader.ts +12 -3
  135. package/src/utils/constants.ts +76 -0
  136. package/src/utils/container.ts +63 -0
  137. package/src/utils/context-injector.test.ts +200 -0
  138. package/src/utils/context-injector.ts +244 -0
  139. package/src/utils/doc-generator.ts +85 -0
  140. package/src/utils/env-filter.ts +45 -0
  141. package/src/utils/json-parser.test.ts +12 -0
  142. package/src/utils/json-parser.ts +30 -5
  143. package/src/utils/logger.ts +12 -1
  144. package/src/utils/mermaid.ts +4 -0
  145. package/src/utils/paths.ts +52 -1
  146. package/src/utils/process-sandbox-worker.test.ts +46 -0
  147. package/src/utils/process-sandbox.ts +227 -14
  148. package/src/utils/redactor.test.ts +11 -6
  149. package/src/utils/redactor.ts +25 -9
  150. package/src/utils/sandbox.ts +3 -0
  151. package/src/runner/llm-executor.ts +0 -638
  152. package/src/runner/shell-executor.ts +0 -366
  153. package/src/templates/invalid.yaml +0 -5
@@ -59,8 +59,10 @@ describe('ExpressionEvaluator Audit Fixes', () => {
59
59
 
60
60
  it('should support level 3 array nesting', () => {
61
61
  // ${{ [ [ [ 1 ] ] ] }}
62
- // biome-ignore lint/suspicious/noExplicitAny: generic loose validation for test
63
- const res = ExpressionEvaluator.evaluate('${{ [ [ [ 1 ] ] ] }}', nestedContext) as any;
62
+ const res = ExpressionEvaluator.evaluate(
63
+ '${{ [ [ [ 1 ] ] ] }}',
64
+ nestedContext
65
+ ) as number[][][];
64
66
  expect(res[0][0][0]).toBe(1);
65
67
  });
66
68
  });
@@ -1,4 +1,4 @@
1
- import { describe, expect, test } from 'bun:test';
1
+ import { afterEach, describe, expect, test } from 'bun:test';
2
2
  import { ExpressionEvaluator } from './evaluator';
3
3
 
4
4
  describe('ExpressionEvaluator', () => {
@@ -21,6 +21,10 @@ describe('ExpressionEvaluator', () => {
21
21
  my_val: 123,
22
22
  };
23
23
 
24
+ afterEach(() => {
25
+ ExpressionEvaluator.setStrictMode(false);
26
+ });
27
+
24
28
  test('should evaluate simple literals', () => {
25
29
  expect(ExpressionEvaluator.evaluate("${{ 'hello' }}", context)).toBe('hello');
26
30
  expect(ExpressionEvaluator.evaluate('${{ 123 }}', context)).toBe(123);
@@ -98,6 +102,15 @@ describe('ExpressionEvaluator', () => {
98
102
  expect(ExpressionEvaluator.hasExpression('has ${{ expr }}')).toBe(true);
99
103
  });
100
104
 
105
+ test('should fail fast on malformed templates in strict mode', () => {
106
+ ExpressionEvaluator.setStrictMode(true);
107
+ expect(() => ExpressionEvaluator.evaluate('Hello ${{ inputs.name', context)).toThrow(
108
+ /Unclosed expression/
109
+ );
110
+ expect(() => ExpressionEvaluator.evaluate('Hello }}', context)).toThrow(/Unexpected/);
111
+ expect(ExpressionEvaluator.evaluate('Hello ${{ inputs.name }}', context)).toBe('Hello World');
112
+ });
113
+
101
114
  test('should handle evaluateObject', () => {
102
115
  const obj = {
103
116
  name: 'Hello ${{ inputs.name }}',
@@ -1,7 +1,7 @@
1
1
  import jsepArrow from '@jsep-plugin/arrow';
2
2
  import jsepObject from '@jsep-plugin/object';
3
3
  import jsep from 'jsep';
4
- import { escapeShellArg } from '../runner/shell-executor.ts';
4
+ import { escapeShellArg } from '../runner/executors/shell-executor.ts';
5
5
 
6
6
  // Register plugins
7
7
  jsep.plugins.register(jsepArrow);
@@ -14,6 +14,7 @@ jsep.plugins.register(jsepObject);
14
14
  * - secrets.KEY
15
15
  * - steps.step_id.output
16
16
  * - steps.step_id.outputs.field
17
+ * - memory.key
17
18
  * - item (for foreach)
18
19
  * - Basic JS expressions (arithmetic, comparisons, logical operators)
19
20
  * - Array access, method calls (map, filter, every, etc.)
@@ -36,10 +37,13 @@ export interface ExpressionContext {
36
37
  args?: unknown;
37
38
  index?: number;
38
39
  env?: Record<string, string>;
40
+ envOverrides?: Record<string, string>;
41
+ memory?: Record<string, unknown>;
39
42
  output?: unknown;
40
43
  autoHealAttempts?: number;
41
44
  reflexionAttempts?: number;
42
45
  outputRepairAttempts?: number;
46
+ qualityGateAttempts?: number;
43
47
  last_failed_step?: { id: string; error: string };
44
48
  }
45
49
 
@@ -90,6 +94,77 @@ export class ExpressionEvaluator {
90
94
  private static readonly MAX_TOTAL_NODES = 10000;
91
95
  // Maximum arrow function nesting depth
92
96
  private static readonly MAX_ARROW_DEPTH = 3;
97
+ private static strictMode = false;
98
+ private static jsepCache = new Map<string, ASTNode>();
99
+ private static maxCacheSize = 1000;
100
+
101
+ /**
102
+ * Set strict mode for template validation
103
+ */
104
+ static setStrictMode(strict: boolean): void {
105
+ ExpressionEvaluator.strictMode = strict;
106
+ }
107
+
108
+ /**
109
+ * Set the maximum cache size for parsed expressions.
110
+ * Default is 1000, which is suitable for most workflows.
111
+ * Increase for workflows with many unique expressions.
112
+ *
113
+ * @param size Maximum number of parsed expressions to cache
114
+ */
115
+ static setCacheSize(size: number): void {
116
+ if (size < 0) throw new Error('Cache size must be non-negative');
117
+ ExpressionEvaluator.maxCacheSize = size;
118
+ // Prune cache if it's now too large
119
+ while (ExpressionEvaluator.jsepCache.size > size) {
120
+ const firstKey = ExpressionEvaluator.jsepCache.keys().next().value;
121
+ if (firstKey !== undefined) ExpressionEvaluator.jsepCache.delete(firstKey);
122
+ }
123
+ }
124
+
125
+ /**
126
+ * Clear the expression cache. Useful for testing or memory management.
127
+ */
128
+ static clearCache(): void {
129
+ ExpressionEvaluator.jsepCache.clear();
130
+ }
131
+
132
+ private static validateTemplate(template: string): void {
133
+ let i = 0;
134
+ while (i < template.length) {
135
+ if (template.substring(i, i + 3) === '${{') {
136
+ let depth = 0;
137
+ let j = i + 3;
138
+ let closed = false;
139
+
140
+ while (j < template.length) {
141
+ if (template.substring(j, j + 2) === '}}' && depth === 0) {
142
+ closed = true;
143
+ i = j + 2;
144
+ break;
145
+ }
146
+
147
+ if (template[j] === '{') {
148
+ depth++;
149
+ } else if (template[j] === '}') {
150
+ if (depth > 0) depth--;
151
+ }
152
+ j++;
153
+ }
154
+
155
+ if (!closed) {
156
+ throw new Error(`Unclosed expression starting at index ${i}`);
157
+ }
158
+ continue;
159
+ }
160
+
161
+ if (template.substring(i, i + 2) === '}}') {
162
+ throw new Error(`Unexpected "}}" at index ${i}`);
163
+ }
164
+
165
+ i++;
166
+ }
167
+ }
93
168
 
94
169
  /**
95
170
  * Helper to scan string for matches of ${{ ... }} handling nested braces manually
@@ -141,6 +216,10 @@ export class ExpressionEvaluator {
141
216
  * Strict equality (===) is preserved for '==='.
142
217
  */
143
218
  static evaluate(template: string, context: ExpressionContext): unknown {
219
+ if (ExpressionEvaluator.strictMode && (template.includes('${{') || template.includes('}}'))) {
220
+ ExpressionEvaluator.validateTemplate(template);
221
+ }
222
+
144
223
  const hasExpr = ExpressionEvaluator.hasExpression(template);
145
224
 
146
225
  // Prevent excessive length
@@ -229,8 +308,37 @@ export class ExpressionEvaluator {
229
308
  * Evaluate a string and ensure the result is a string.
230
309
  * Objects and arrays are stringified to JSON.
231
310
  * null and undefined return an empty string.
311
+ *
312
+ * @throws TypeError if template is an object with a custom toString() method
232
313
  */
233
- static evaluateString(template: string, context: ExpressionContext): string {
314
+ static evaluateString(template: unknown, context: ExpressionContext): string {
315
+ if (typeof template !== 'string') {
316
+ if (template === null || template === undefined) return '';
317
+
318
+ // Security: Reject objects with custom toString() to prevent code execution
319
+ // during string conversion. Only allow primitives.
320
+ if (typeof template === 'object') {
321
+ // Check if this is an object with a custom toString (not Object.prototype.toString)
322
+ const proto = Object.getPrototypeOf(template);
323
+ if (proto !== null && proto !== Object.prototype && proto !== Array.prototype) {
324
+ // Has custom prototype - could have malicious toString
325
+ if (
326
+ typeof (template as { toString?: unknown }).toString === 'function' &&
327
+ (template as { toString: () => string }).toString !== Object.prototype.toString
328
+ ) {
329
+ throw new TypeError(
330
+ 'Security: Cannot evaluate object with custom toString() method. ' +
331
+ 'Pass a string template instead.'
332
+ );
333
+ }
334
+ }
335
+ // Safe to serialize as JSON
336
+ return JSON.stringify(template, null, 2);
337
+ }
338
+
339
+ // Primitives are safe to convert
340
+ return String(template);
341
+ }
234
342
  const result = ExpressionEvaluator.evaluate(template, context);
235
343
 
236
344
  if (result === null || result === undefined) {
@@ -250,7 +358,22 @@ export class ExpressionEvaluator {
250
358
  */
251
359
  static evaluateExpression(expr: string, context: ExpressionContext): unknown {
252
360
  try {
253
- const ast = jsep(expr);
361
+ let ast = ExpressionEvaluator.jsepCache.get(expr);
362
+ if (!ast) {
363
+ ast = jsep(expr);
364
+ // Only cache if maxCacheSize > 0 (caching enabled)
365
+ if (ExpressionEvaluator.maxCacheSize > 0) {
366
+ // Manage cache size with incremental eviction to reduce GC pressure
367
+ if (ExpressionEvaluator.jsepCache.size >= ExpressionEvaluator.maxCacheSize) {
368
+ const firstKey = ExpressionEvaluator.jsepCache.keys().next().value;
369
+ if (firstKey !== undefined) {
370
+ ExpressionEvaluator.jsepCache.delete(firstKey);
371
+ }
372
+ }
373
+ ExpressionEvaluator.jsepCache.set(expr, ast);
374
+ }
375
+ }
376
+
254
377
  // Track total nodes evaluated to prevent DoS
255
378
  const nodeCounter = { count: 0 };
256
379
  return ExpressionEvaluator.evaluateNode(ast, context, 0, nodeCounter);
@@ -357,6 +480,7 @@ export class ExpressionEvaluator {
357
480
  args: context.args,
358
481
  index: context.index,
359
482
  env: context.env || {},
483
+ memory: context.memory || {},
360
484
  stdout: contextAsRecord.stdout, // For transform expressions
361
485
  last_failed_step: context.last_failed_step,
362
486
  };
@@ -399,8 +523,9 @@ export class ExpressionEvaluator {
399
523
  if (
400
524
  ExpressionEvaluator.FORBIDDEN_PROPERTIES.has(property) ||
401
525
  ExpressionEvaluator.FORBIDDEN_PROPERTIES.has(propertyLower) ||
402
- normalizedProperty.includes('proto') ||
403
- normalizedProperty.includes('constructor')
526
+ normalizedProperty === '__proto__' ||
527
+ normalizedProperty === 'constructor' ||
528
+ normalizedProperty === 'prototype'
404
529
  ) {
405
530
  throw new Error(`Access to property "${property}" is forbidden for security reasons`);
406
531
  }
@@ -425,15 +550,19 @@ export class ExpressionEvaluator {
425
550
 
426
551
  switch (binaryNode.operator) {
427
552
  case '+':
428
- return (left as number) + (right as number);
553
+ // Support both string concatenation and numeric addition
554
+ if (typeof left === 'string' || typeof right === 'string') {
555
+ return String(left ?? '') + String(right ?? '');
556
+ }
557
+ return Number(left) + Number(right);
429
558
  case '-':
430
- return (left as number) - (right as number);
559
+ return Number(left) - Number(right);
431
560
  case '*':
432
- return (left as number) * (right as number);
561
+ return Number(left) * Number(right);
433
562
  case '/':
434
- return (left as number) / (right as number);
563
+ return Number(left) / Number(right);
435
564
  case '%':
436
- return (left as number) % (right as number);
565
+ return Number(left) % Number(right);
437
566
  case '==':
438
567
  // Use loose equality to match non-programmer expectations (e.g. "5" == 5)
439
568
  // Strict equality is available via ===
@@ -447,13 +576,13 @@ export class ExpressionEvaluator {
447
576
  case '!==':
448
577
  return left !== right;
449
578
  case '<':
450
- return (left as number) < (right as number);
579
+ return Number(left) < Number(right);
451
580
  case '<=':
452
- return (left as number) <= (right as number);
581
+ return Number(left) <= Number(right);
453
582
  case '>':
454
- return (left as number) > (right as number);
583
+ return Number(left) > Number(right);
455
584
  case '>=':
456
- return (left as number) >= (right as number);
585
+ return Number(left) >= Number(right);
457
586
  default:
458
587
  throw new Error(`Unsupported binary operator: ${binaryNode.operator}`);
459
588
  }
@@ -518,6 +647,17 @@ export class ExpressionEvaluator {
518
647
  prop.key.type === 'Identifier' && !prop.computed
519
648
  ? (prop.key as jsep.Identifier).name
520
649
  : ExpressionEvaluator.evaluateNode(prop.key, context);
650
+ if (typeof key === 'string') {
651
+ const normalizedKey = key.normalize('NFKC').toLowerCase();
652
+ if (
653
+ ExpressionEvaluator.FORBIDDEN_PROPERTIES.has(key) ||
654
+ ExpressionEvaluator.FORBIDDEN_PROPERTIES.has(normalizedKey) ||
655
+ normalizedKey.includes('proto') ||
656
+ normalizedKey.includes('constructor')
657
+ ) {
658
+ throw new Error(`Access to property "${key}" is forbidden for security reasons`);
659
+ }
660
+ }
521
661
  result[key as string] = ExpressionEvaluator.evaluateNode(prop.value, context);
522
662
  }
523
663
  return result;
@@ -544,7 +684,8 @@ export class ExpressionEvaluator {
544
684
  if (arg.type === 'ArrowFunctionExpression') {
545
685
  return ExpressionEvaluator.createArrowFunction(
546
686
  arg as ArrowFunctionExpression,
547
- context
687
+ context,
688
+ nodeCounter
548
689
  );
549
690
  }
550
691
  return ExpressionEvaluator.evaluateNode(arg, context);
@@ -653,7 +794,8 @@ export class ExpressionEvaluator {
653
794
  if (arg.type === 'ArrowFunctionExpression') {
654
795
  return ExpressionEvaluator.createArrowFunction(
655
796
  arg as ArrowFunctionExpression,
656
- context
797
+ context,
798
+ nodeCounter
657
799
  );
658
800
  }
659
801
  return ExpressionEvaluator.evaluateNode(arg, context);
@@ -668,7 +810,11 @@ export class ExpressionEvaluator {
668
810
  case 'ArrowFunctionExpression': {
669
811
  // Arrow functions should be handled in the context of CallExpression
670
812
  // If we reach here, it means they're being used outside of a method call
671
- return ExpressionEvaluator.createArrowFunction(node as ArrowFunctionExpression, context);
813
+ return ExpressionEvaluator.createArrowFunction(
814
+ node as ArrowFunctionExpression,
815
+ context,
816
+ nodeCounter
817
+ );
672
818
  }
673
819
 
674
820
  default:
@@ -681,7 +827,8 @@ export class ExpressionEvaluator {
681
827
  */
682
828
  private static createArrowFunction(
683
829
  arrowNode: ArrowFunctionExpression,
684
- context: ExpressionContext
830
+ context: ExpressionContext,
831
+ nodeCounter: { count: number }
685
832
  ): (...args: unknown[]) => unknown {
686
833
  return (...args: unknown[]) => {
687
834
  // Create a new context with arrow function parameters
@@ -695,7 +842,7 @@ export class ExpressionEvaluator {
695
842
  });
696
843
 
697
844
  // Evaluate the body with the new context
698
- return ExpressionEvaluator.evaluateNode(arrowNode.body, arrowContext);
845
+ return ExpressionEvaluator.evaluateNode(arrowNode.body, arrowContext, 0, nodeCounter);
699
846
  };
700
847
  }
701
848
 
@@ -100,6 +100,24 @@ export const ConfigSchema = z.object({
100
100
  }),
101
101
  })
102
102
  .default({}),
103
+ expression: z
104
+ .object({
105
+ strict: z.boolean().default(false),
106
+ })
107
+ .default({}),
108
+ features: z
109
+ .object({
110
+ context_injection: z
111
+ .object({
112
+ enabled: z.boolean().default(false),
113
+ search_depth: z.number().default(3),
114
+ sources: z
115
+ .array(z.enum(['readme', 'agents_md', 'cursor_rules']))
116
+ .default(['readme', 'agents_md', 'cursor_rules']),
117
+ })
118
+ .optional(),
119
+ })
120
+ .optional(),
103
121
  });
104
122
 
105
123
  export type Config = z.infer<typeof ConfigSchema>;
@@ -104,13 +104,19 @@ const ReflexionSchema = z.object({
104
104
  hint: z.string().optional(),
105
105
  });
106
106
 
107
+ // ===== Matrix Strategy Schema =====
108
+
109
+ const StrategySchema = z.object({
110
+ matrix: z.record(z.array(z.union([z.string(), z.number(), z.boolean()]))),
111
+ });
112
+
107
113
  // ===== Base Step Schema =====
108
114
 
109
- const BaseStepSchema = z.object({
115
+ export const BaseStepSchema = z.object({
110
116
  id: z.string(),
111
117
  type: z.string(),
112
118
  needs: z.array(z.string()).optional().default([]),
113
- if: z.string().optional(),
119
+ if: z.union([z.string(), z.boolean()]).optional(),
114
120
  timeout: z.number().int().positive().optional(),
115
121
  retry: RetrySchema.optional(),
116
122
  auto_heal: AutoHealSchema.optional(),
@@ -123,8 +129,12 @@ const BaseStepSchema = z.object({
123
129
  // Accept both number and string (for expressions or YAML number-as-string)
124
130
  concurrency: z.union([z.number().int().positive(), z.string()]).optional(),
125
131
  pool: z.string().optional(), // Resource pool to use for this step
132
+ breakpoint: z.boolean().optional(),
133
+ strategy: StrategySchema.optional(),
126
134
  transform: z.string().optional(),
127
135
  learn: z.boolean().optional(),
136
+ memoize: z.boolean().optional(),
137
+ memoizeTtlSeconds: z.number().int().positive().optional(),
128
138
  inputSchema: z.any().optional(),
129
139
  outputSchema: z.any().optional(),
130
140
  outputRetries: z.number().int().min(0).optional(), // Max retries for output validation failures
@@ -136,9 +146,11 @@ const BaseStepSchema = z.object({
136
146
 
137
147
  const ShellStepSchema = BaseStepSchema.extend({
138
148
  type: z.literal('shell'),
139
- run: z.string(),
149
+ run: z.string().optional(),
150
+ args: z.array(z.string()).optional(),
140
151
  dir: z.string().optional(),
141
152
  env: z.record(z.string()).optional(),
153
+ allowOutsideCwd: z.boolean().optional(),
142
154
  allowInsecure: z.boolean().optional(),
143
155
  });
144
156
 
@@ -176,6 +188,14 @@ const EngineHandoffSchema = z.object({
176
188
  }),
177
189
  });
178
190
 
191
+ const QualityGateSchema = z.object({
192
+ agent: z.string(),
193
+ prompt: z.string().optional(),
194
+ provider: z.string().optional(),
195
+ model: z.string().optional(),
196
+ maxAttempts: z.number().int().min(1).default(1),
197
+ });
198
+
179
199
  const LlmStepSchema = BaseStepSchema.extend({
180
200
  type: z.literal('llm'),
181
201
  agent: z.string(),
@@ -183,8 +203,51 @@ const LlmStepSchema = BaseStepSchema.extend({
183
203
  model: z.string().optional(),
184
204
  prompt: z.string(),
185
205
  tools: z.array(AgentToolSchema).optional(),
206
+ allowedHandoffs: z.array(z.string()).optional(),
186
207
  maxIterations: z.number().int().positive().default(10),
187
208
  maxMessageHistory: z.number().int().positive().optional(), // Max messages to keep in conversation history
209
+ contextStrategy: z.enum(['truncate', 'summary', 'auto']).optional(),
210
+ qualityGate: QualityGateSchema.optional(),
211
+ useGlobalMcp: z.boolean().optional(),
212
+ allowClarification: z.boolean().optional(),
213
+ mcpServers: z
214
+ .array(
215
+ z.union([
216
+ z.string(),
217
+ z.object({
218
+ name: z.string(),
219
+ type: z.enum(['local', 'remote']).optional(),
220
+ command: z.string().optional(),
221
+ args: z.array(z.string()).optional(),
222
+ env: z.record(z.string()).optional(),
223
+ url: z.string().optional(),
224
+ headers: z.record(z.string()).optional(),
225
+ timeout: z.number().int().positive().optional(),
226
+ }),
227
+ ])
228
+ )
229
+ .optional(),
230
+ useStandardTools: z.boolean().optional(),
231
+ allowOutsideCwd: z.boolean().optional(),
232
+ allowInsecure: z.boolean().optional(),
233
+ handoff: EngineHandoffSchema.optional(),
234
+ });
235
+
236
+ const PlanStepSchema = BaseStepSchema.extend({
237
+ type: z.literal('plan'),
238
+ goal: z.string(),
239
+ context: z.string().optional(),
240
+ constraints: z.string().optional(),
241
+ prompt: z.string().optional(),
242
+ agent: z.string().optional().default('keystone-architect'),
243
+ provider: z.string().optional(),
244
+ model: z.string().optional(),
245
+ tools: z.array(AgentToolSchema).optional(),
246
+ allowedHandoffs: z.array(z.string()).optional(),
247
+ maxIterations: z.number().int().positive().default(10),
248
+ maxMessageHistory: z.number().int().positive().optional(),
249
+ contextStrategy: z.enum(['truncate', 'summary', 'auto']).optional(),
250
+ qualityGate: QualityGateSchema.optional(),
188
251
  useGlobalMcp: z.boolean().optional(),
189
252
  allowClarification: z.boolean().optional(),
190
253
  mcpServers: z
@@ -229,7 +292,7 @@ const FileStepSchema = BaseStepSchema.extend({
229
292
  type: z.literal('file'),
230
293
  path: z.string(),
231
294
  content: z.string().optional(),
232
- op: z.enum(['read', 'write', 'append']),
295
+ op: z.enum(['read', 'write', 'append', 'patch']),
233
296
  allowOutsideCwd: z.boolean().optional(),
234
297
  });
235
298
 
@@ -250,13 +313,15 @@ const HumanStepSchema = BaseStepSchema.extend({
250
313
 
251
314
  const SleepStepSchema = BaseStepSchema.extend({
252
315
  type: z.literal('sleep'),
253
- duration: z.union([z.number().int().positive(), z.string()]),
316
+ duration: z.union([z.number().int().positive(), z.string()]).optional(),
317
+ until: z.string().optional(),
254
318
  durable: z.boolean().optional(), // Persist across restarts for long sleeps
255
319
  });
256
320
 
257
321
  const ScriptStepSchema = BaseStepSchema.extend({
258
322
  type: z.literal('script'),
259
323
  run: z.string(),
324
+ allowOutsideCwd: z.boolean().optional(),
260
325
  allowInsecure: z.boolean().optional().default(false),
261
326
  });
262
327
 
@@ -321,13 +386,29 @@ const MemoryStepSchema = BaseStepSchema.extend({
321
386
  limit: z.number().int().positive().optional().default(5),
322
387
  });
323
388
 
389
+ const ArtifactStepSchema = BaseStepSchema.extend({
390
+ type: z.literal('artifact'),
391
+ op: z.enum(['upload', 'download']),
392
+ name: z.string(),
393
+ paths: z.array(z.string()).optional(),
394
+ path: z.string().optional(),
395
+ allowOutsideCwd: z.boolean().optional(),
396
+ });
397
+
398
+ const WaitStepSchema = BaseStepSchema.extend({
399
+ type: z.literal('wait'),
400
+ event: z.string(),
401
+ oneShot: z.boolean().optional().default(true),
402
+ // timeout is already in BaseStepSchema, but let's make it explicit here if needed
403
+ });
404
+
324
405
  // ===== Discriminated Union for Steps =====
325
406
 
326
- // biome-ignore lint/suspicious/noExplicitAny: Recursive Zod type
327
- export const StepSchema: z.ZodType<any> = z.lazy(() =>
407
+ export const StepSchema: z.ZodType<unknown> = z.lazy(() =>
328
408
  z.discriminatedUnion('type', [
329
409
  ShellStepSchema,
330
410
  LlmStepSchema,
411
+ PlanStepSchema,
331
412
  WorkflowStepSchema,
332
413
  FileStepSchema,
333
414
  RequestStepSchema,
@@ -338,6 +419,8 @@ export const StepSchema: z.ZodType<any> = z.lazy(() =>
338
419
  MemoryStepSchema,
339
420
  JoinStepSchema,
340
421
  BlueprintStepSchema,
422
+ ArtifactStepSchema,
423
+ WaitStepSchema,
341
424
  ])
342
425
  );
343
426
 
@@ -354,21 +437,40 @@ const EvalSchema = z.object({
354
437
 
355
438
  // ===== Workflow Schema =====
356
439
 
357
- export const WorkflowSchema = z.object({
358
- name: z.string(),
359
- description: z.string().optional(),
360
- inputs: z.record(InputSchema).optional(),
361
- outputs: z.record(z.string()).optional(),
362
- outputSchema: z.any().optional(), // JSON Schema for final workflow outputs
363
- env: z.record(z.string()).optional(),
364
- concurrency: z.union([z.number().int().positive(), z.string()]).optional(),
365
- pools: z.record(z.union([z.number().int().positive(), z.string()])).optional(), // Resource pool overrides
366
- steps: z.array(StepSchema),
367
- errors: z.array(StepSchema).optional(),
368
- finally: z.array(StepSchema).optional(),
369
- compensate: z.lazy(() => StepSchema).optional(), // Top-level compensation for the entire workflow
370
- eval: EvalSchema.optional(),
371
- });
440
+ export const WorkflowSchema = z
441
+ .object({
442
+ name: z.string(),
443
+ description: z.string().optional(),
444
+ inputs: z.record(InputSchema).optional(),
445
+ outputs: z.record(z.string()).optional(),
446
+ outputSchema: z.any().optional(), // JSON Schema for final workflow outputs
447
+ env: z.record(z.string()).optional(),
448
+ concurrency: z.union([z.number().int().positive(), z.string()]).optional(),
449
+ pools: z.record(z.union([z.number().int().positive(), z.string()])).optional(), // Resource pool overrides
450
+ steps: z.array(StepSchema),
451
+ errors: z.array(StepSchema).optional(),
452
+ finally: z.array(StepSchema).optional(),
453
+ compensate: z.lazy(() => StepSchema).optional(), // Top-level compensation for the entire workflow
454
+ eval: EvalSchema.optional(),
455
+ })
456
+ .superRefine((data, ctx) => {
457
+ const checkShellSteps = (steps: Step[] | undefined, pathPrefix: (string | number)[]) => {
458
+ if (!steps) return;
459
+ steps.forEach((step, index) => {
460
+ if (step.type === 'shell' && !step.run && !step.args) {
461
+ ctx.addIssue({
462
+ code: z.ZodIssueCode.custom,
463
+ message: 'Shell step must have either "run" or "args"',
464
+ path: [...pathPrefix, index],
465
+ });
466
+ }
467
+ });
468
+ };
469
+
470
+ checkShellSteps(data.steps, ['steps']);
471
+ checkShellSteps(data.errors, ['errors']);
472
+ checkShellSteps(data.finally, ['finally']);
473
+ });
372
474
 
373
475
  // ===== Agent Schema =====
374
476
 
@@ -388,6 +490,7 @@ export type RetryConfig = z.infer<typeof RetrySchema>;
388
490
  export type Step = z.infer<typeof StepSchema>;
389
491
  export type ShellStep = z.infer<typeof ShellStepSchema>;
390
492
  export type LlmStep = z.infer<typeof LlmStepSchema>;
493
+ export type PlanStep = z.infer<typeof PlanStepSchema>;
391
494
  export type WorkflowStep = z.infer<typeof WorkflowStepSchema>;
392
495
  export type FileStep = z.infer<typeof FileStepSchema>;
393
496
  export type RequestStep = z.infer<typeof RequestStepSchema>;
@@ -398,7 +501,35 @@ export type MemoryStep = z.infer<typeof MemoryStepSchema>;
398
501
  export type EngineStep = z.infer<typeof EngineStepSchema>;
399
502
  export type JoinStep = z.infer<typeof JoinStepSchema>;
400
503
  export type BlueprintStep = z.infer<typeof BlueprintStepSchema>;
504
+ export type ArtifactStep = z.infer<typeof ArtifactStepSchema>;
401
505
  export type Blueprint = z.infer<typeof BlueprintSchema>;
402
506
  export type Workflow = z.infer<typeof WorkflowSchema>;
403
507
  export type AgentTool = z.infer<typeof AgentToolSchema>;
508
+ export type WaitStep = z.infer<typeof WaitStepSchema>;
509
+
510
+ // ===== Helper Schemas =====
511
+ export {
512
+ InputSchema,
513
+ RetrySchema,
514
+ AutoHealSchema,
515
+ ReflexionSchema,
516
+ StrategySchema,
517
+ EngineConfigSchema,
518
+ EngineHandoffSchema,
519
+ BlueprintSchema,
520
+ WaitStepSchema,
521
+ ShellStepSchema,
522
+ LlmStepSchema,
523
+ PlanStepSchema,
524
+ WorkflowStepSchema,
525
+ FileStepSchema,
526
+ RequestStepSchema,
527
+ HumanStepSchema,
528
+ SleepStepSchema,
529
+ ScriptStepSchema,
530
+ EngineStepSchema,
531
+ BlueprintStepSchema,
532
+ MemoryStepSchema,
533
+ ArtifactStepSchema,
534
+ };
404
535
  export type Agent = z.infer<typeof AgentSchema>;
@@ -9,21 +9,21 @@ export interface TestDefinition {
9
9
  step?: string;
10
10
  type?: string;
11
11
  prompt?: string;
12
- // biome-ignore lint/suspicious/noExplicitAny: Mock responses can be any type
13
- response: any;
12
+ response: unknown;
14
13
  }>;
15
14
  };
15
+ options?: {
16
+ allowSideEffects?: boolean;
17
+ };
16
18
  snapshot?: {
17
19
  steps: Record<
18
20
  string,
19
21
  {
20
22
  status: string;
21
- // biome-ignore lint/suspicious/noExplicitAny: Step outputs can be any type
22
- output: any;
23
+ output: unknown;
23
24
  error?: string;
24
25
  }
25
26
  >;
26
- // biome-ignore lint/suspicious/noExplicitAny: Workflow outputs can be any type
27
- outputs: Record<string, any>;
27
+ outputs: Record<string, unknown>;
28
28
  };
29
29
  }