keystone-cli 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/README.md +288 -24
  2. package/package.json +8 -4
  3. package/src/cli.ts +538 -419
  4. package/src/commands/doc.ts +31 -0
  5. package/src/commands/event.ts +29 -0
  6. package/src/commands/graph.ts +37 -0
  7. package/src/commands/index.ts +14 -0
  8. package/src/commands/init.ts +185 -0
  9. package/src/commands/run.ts +124 -0
  10. package/src/commands/schema.ts +40 -0
  11. package/src/commands/utils.ts +78 -0
  12. package/src/commands/validate.ts +111 -0
  13. package/src/db/memory-db.ts +50 -2
  14. package/src/db/workflow-db.test.ts +314 -0
  15. package/src/db/workflow-db.ts +810 -210
  16. package/src/expression/evaluator-audit.test.ts +4 -2
  17. package/src/expression/evaluator.test.ts +14 -1
  18. package/src/expression/evaluator.ts +166 -19
  19. package/src/parser/config-schema.ts +18 -0
  20. package/src/parser/schema.ts +153 -22
  21. package/src/parser/test-schema.ts +6 -6
  22. package/src/parser/workflow-parser.test.ts +24 -0
  23. package/src/parser/workflow-parser.ts +65 -3
  24. package/src/runner/auto-heal.test.ts +5 -6
  25. package/src/runner/blueprint-executor.test.ts +2 -2
  26. package/src/runner/debug-repl.test.ts +5 -8
  27. package/src/runner/debug-repl.ts +59 -16
  28. package/src/runner/durable-timers.test.ts +11 -2
  29. package/src/runner/engine-executor.test.ts +1 -1
  30. package/src/runner/events.ts +57 -0
  31. package/src/runner/executors/artifact-executor.ts +166 -0
  32. package/src/runner/{blueprint-executor.ts → executors/blueprint-executor.ts} +15 -7
  33. package/src/runner/{engine-executor.ts → executors/engine-executor.ts} +55 -7
  34. package/src/runner/executors/file-executor.test.ts +48 -0
  35. package/src/runner/executors/file-executor.ts +324 -0
  36. package/src/runner/{foreach-executor.ts → executors/foreach-executor.ts} +168 -80
  37. package/src/runner/executors/human-executor.ts +144 -0
  38. package/src/runner/executors/join-executor.ts +75 -0
  39. package/src/runner/executors/llm-executor.ts +1266 -0
  40. package/src/runner/executors/memory-executor.ts +71 -0
  41. package/src/runner/executors/plan-executor.ts +104 -0
  42. package/src/runner/executors/request-executor.ts +265 -0
  43. package/src/runner/executors/script-executor.ts +43 -0
  44. package/src/runner/executors/shell-executor.ts +403 -0
  45. package/src/runner/executors/subworkflow-executor.ts +114 -0
  46. package/src/runner/executors/types.ts +69 -0
  47. package/src/runner/executors/wait-executor.ts +59 -0
  48. package/src/runner/join-scheduling.test.ts +197 -0
  49. package/src/runner/llm-adapter-runtime.test.ts +209 -0
  50. package/src/runner/llm-adapter.test.ts +419 -24
  51. package/src/runner/llm-adapter.ts +414 -17
  52. package/src/runner/llm-clarification.test.ts +2 -1
  53. package/src/runner/llm-executor.test.ts +532 -17
  54. package/src/runner/mcp-client-audit.test.ts +1 -2
  55. package/src/runner/mcp-client.ts +136 -46
  56. package/src/runner/mcp-manager.test.ts +4 -0
  57. package/src/runner/mcp-server.test.ts +58 -0
  58. package/src/runner/mcp-server.ts +26 -0
  59. package/src/runner/memoization.test.ts +190 -0
  60. package/src/runner/optimization-runner.ts +4 -9
  61. package/src/runner/quality-gate.test.ts +69 -0
  62. package/src/runner/reflexion.test.ts +6 -17
  63. package/src/runner/resource-pool.ts +102 -14
  64. package/src/runner/services/context-builder.ts +144 -0
  65. package/src/runner/services/secret-manager.ts +105 -0
  66. package/src/runner/services/workflow-validator.ts +131 -0
  67. package/src/runner/shell-executor.test.ts +28 -4
  68. package/src/runner/standard-tools-ast.test.ts +196 -0
  69. package/src/runner/standard-tools-execution.test.ts +27 -0
  70. package/src/runner/standard-tools-integration.test.ts +6 -10
  71. package/src/runner/standard-tools.ts +339 -102
  72. package/src/runner/step-executor.test.ts +216 -4
  73. package/src/runner/step-executor.ts +69 -941
  74. package/src/runner/stream-utils.ts +7 -3
  75. package/src/runner/test-harness.ts +20 -1
  76. package/src/runner/timeout.test.ts +10 -0
  77. package/src/runner/timeout.ts +11 -2
  78. package/src/runner/tool-integration.test.ts +1 -1
  79. package/src/runner/wait-step.test.ts +102 -0
  80. package/src/runner/workflow-runner.test.ts +208 -15
  81. package/src/runner/workflow-runner.ts +890 -818
  82. package/src/runner/workflow-scheduler.ts +75 -0
  83. package/src/runner/workflow-state.ts +269 -0
  84. package/src/runner/workflow-subflows.test.ts +13 -12
  85. package/src/scripts/generate-schemas.ts +16 -0
  86. package/src/templates/agents/explore.md +1 -0
  87. package/src/templates/agents/general.md +1 -0
  88. package/src/templates/agents/handoff-router.md +14 -0
  89. package/src/templates/agents/handoff-specialist.md +15 -0
  90. package/src/templates/agents/keystone-architect.md +13 -44
  91. package/src/templates/agents/my-agent.md +1 -0
  92. package/src/templates/agents/software-engineer.md +1 -0
  93. package/src/templates/agents/summarizer.md +1 -0
  94. package/src/templates/agents/test-agent.md +1 -0
  95. package/src/templates/agents/tester.md +1 -0
  96. package/src/templates/{basic-inputs.yaml → basics/basic-inputs.yaml} +2 -0
  97. package/src/templates/{basic-shell.yaml → basics/basic-shell.yaml} +2 -1
  98. package/src/templates/{full-feature-demo.yaml → basics/full-feature-demo.yaml} +2 -0
  99. package/src/templates/{stop-watch.yaml → basics/stop-watch.yaml} +1 -0
  100. package/src/templates/{child-rollback.yaml → control-flow/child-rollback.yaml} +1 -0
  101. package/src/templates/{cleanup-finally.yaml → control-flow/cleanup-finally.yaml} +1 -0
  102. package/src/templates/{fan-out-fan-in.yaml → control-flow/fan-out-fan-in.yaml} +3 -0
  103. package/src/templates/control-flow/idempotency-example.yaml +30 -0
  104. package/src/templates/{loop-parallel.yaml → control-flow/loop-parallel.yaml} +3 -0
  105. package/src/templates/{parent-rollback.yaml → control-flow/parent-rollback.yaml} +1 -0
  106. package/src/templates/{retry-policy.yaml → control-flow/retry-policy.yaml} +3 -0
  107. package/src/templates/features/artifact-example.yaml +39 -0
  108. package/src/templates/{engine-example.yaml → features/engine-example.yaml} +1 -0
  109. package/src/templates/{human-interaction.yaml → features/human-interaction.yaml} +1 -0
  110. package/src/templates/{llm-agent.yaml → features/llm-agent.yaml} +1 -0
  111. package/src/templates/{memory-service.yaml → features/memory-service.yaml} +2 -0
  112. package/src/templates/{robust-automation.yaml → features/robust-automation.yaml} +3 -0
  113. package/src/templates/features/script-example.yaml +27 -0
  114. package/src/templates/patterns/agent-handoff.yaml +53 -0
  115. package/src/templates/{approval-process.yaml → patterns/approval-process.yaml} +1 -0
  116. package/src/templates/{batch-processor.yaml → patterns/batch-processor.yaml} +2 -0
  117. package/src/templates/{composition-child.yaml → patterns/composition-child.yaml} +1 -0
  118. package/src/templates/{composition-parent.yaml → patterns/composition-parent.yaml} +1 -0
  119. package/src/templates/{data-pipeline.yaml → patterns/data-pipeline.yaml} +2 -0
  120. package/src/templates/{decompose-implement.yaml → scaffolding/decompose-implement.yaml} +1 -0
  121. package/src/templates/{decompose-problem.yaml → scaffolding/decompose-problem.yaml} +1 -0
  122. package/src/templates/{decompose-research.yaml → scaffolding/decompose-research.yaml} +1 -0
  123. package/src/templates/{decompose-review.yaml → scaffolding/decompose-review.yaml} +1 -0
  124. package/src/templates/{dev.yaml → scaffolding/dev.yaml} +1 -0
  125. package/src/templates/scaffolding/review-loop.yaml +97 -0
  126. package/src/templates/{scaffold-feature.yaml → scaffolding/scaffold-feature.yaml} +2 -0
  127. package/src/templates/{scaffold-generate.yaml → scaffolding/scaffold-generate.yaml} +1 -0
  128. package/src/templates/{scaffold-plan.yaml → scaffolding/scaffold-plan.yaml} +1 -0
  129. package/src/templates/testing/invalid.yaml +6 -0
  130. package/src/ui/dashboard.tsx +191 -33
  131. package/src/utils/auth-manager.test.ts +337 -0
  132. package/src/utils/auth-manager.ts +157 -61
  133. package/src/utils/blueprint-utils.ts +4 -6
  134. package/src/utils/config-loader.test.ts +2 -0
  135. package/src/utils/config-loader.ts +12 -3
  136. package/src/utils/constants.ts +76 -0
  137. package/src/utils/container.ts +63 -0
  138. package/src/utils/context-injector.test.ts +200 -0
  139. package/src/utils/context-injector.ts +244 -0
  140. package/src/utils/doc-generator.ts +85 -0
  141. package/src/utils/env-filter.ts +45 -0
  142. package/src/utils/json-parser.test.ts +12 -0
  143. package/src/utils/json-parser.ts +30 -5
  144. package/src/utils/logger.ts +12 -1
  145. package/src/utils/mermaid.ts +4 -0
  146. package/src/utils/paths.ts +52 -1
  147. package/src/utils/process-sandbox-worker.test.ts +46 -0
  148. package/src/utils/process-sandbox.ts +227 -14
  149. package/src/utils/redactor.test.ts +11 -6
  150. package/src/utils/redactor.ts +25 -9
  151. package/src/utils/sandbox.ts +3 -0
  152. package/src/utils/workflow-registry.test.ts +2 -2
  153. package/src/runner/llm-executor.ts +0 -638
  154. package/src/runner/shell-executor.ts +0 -366
  155. package/src/templates/invalid.yaml +0 -5
@@ -26,6 +26,8 @@ export async function processOpenAIStream(
26
26
  const toolCalls: LLMToolCall[] = [];
27
27
  let buffer = '';
28
28
 
29
+ let usage: LLMResponse['usage'];
30
+
29
31
  try {
30
32
  while (true) {
31
33
  const { done, value } = await reader.read();
@@ -48,9 +50,10 @@ export async function processOpenAIStream(
48
50
  try {
49
51
  const data = JSON.parse(trimmedLine.slice(6));
50
52
 
51
- // Handle Copilot's occasional 'choices' missing or different structure if needed,
52
- // but generally they match OpenAI.
53
- // Some proxies might return null delta.
53
+ if (data.usage) {
54
+ usage = data.usage;
55
+ }
56
+
54
57
  const delta = data.choices?.[0]?.delta;
55
58
  if (!delta) continue;
56
59
 
@@ -182,5 +185,6 @@ export async function processOpenAIStream(
182
185
  content: fullContent || null,
183
186
  tool_calls: toolCalls.length > 0 ? toolCalls.filter(Boolean) : undefined,
184
187
  },
188
+ usage,
185
189
  };
186
190
  }
@@ -32,6 +32,10 @@ export interface TestSnapshot {
32
32
  outputs: Record<string, unknown>;
33
33
  }
34
34
 
35
+ export interface TestOptions {
36
+ allowSideEffects?: boolean;
37
+ }
38
+
35
39
  export class TestHarness {
36
40
  private stepResults: Map<string, { status: string; output: unknown; error?: string }> = new Map();
37
41
  private mockResponses: Map<string, unknown> = new Map();
@@ -39,7 +43,8 @@ export class TestHarness {
39
43
 
40
44
  constructor(
41
45
  private workflow: Workflow,
42
- private fixture: TestFixture = {}
46
+ private fixture: TestFixture = {},
47
+ private options: TestOptions = {}
43
48
  ) {
44
49
  if (fixture.mocks) {
45
50
  for (const mock of fixture.mocks) {
@@ -116,6 +121,12 @@ export class TestHarness {
116
121
  return result;
117
122
  }
118
123
 
124
+ if (!this.options.allowSideEffects && this.isSideEffectStep(step)) {
125
+ throw new Error(
126
+ `🛑 Safety Violation: Step "${step.id}" of type "${step.type}" attempts to execute a side-effect.\nTo allow this, set 'options.allowSideEffects: true' in your test file.\nOtherwise, provide a mock response in 'fixture.mocks'.`
127
+ );
128
+ }
129
+
119
130
  // Default to real execution but capture snapshot
120
131
  const result = await executeStep(step, context, logger, {
121
132
  ...options,
@@ -132,6 +143,14 @@ export class TestHarness {
132
143
  return result;
133
144
  }
134
145
 
146
+ private isSideEffectStep(step: Step): boolean {
147
+ if (['shell', 'script', 'engine', 'request', 'artifact'].includes(step.type)) return true;
148
+ if (step.type === 'file' && (step as any).op !== 'read') return true;
149
+ // LLM is generally considered "safe" (no system modification) but costly.
150
+ // For now we allow LLM unless mocked, as users might want to test prompt logic.
151
+ return false;
152
+ }
153
+
135
154
  private getMockAdapter(model: string): { adapter: LLMAdapter; resolvedModel: string } {
136
155
  return {
137
156
  resolvedModel: model,
@@ -17,4 +17,14 @@ describe('timeout', () => {
17
17
  const promise = new Promise((resolve) => setTimeout(() => resolve('ok'), 100));
18
18
  await expect(withTimeout(promise, 10, 'MyStep')).rejects.toThrow(/MyStep timed out/);
19
19
  });
20
+
21
+ it('should abort the controller when the timeout triggers', async () => {
22
+ const controller = new AbortController();
23
+ const promise = new Promise(() => {});
24
+
25
+ await expect(
26
+ withTimeout(promise, 10, 'SlowOp', { abortController: controller })
27
+ ).rejects.toThrow(TimeoutError);
28
+ expect(controller.signal.aborted).toBe(true);
29
+ });
20
30
  });
@@ -9,16 +9,25 @@ export class TimeoutError extends Error {
9
9
  }
10
10
  }
11
11
 
12
+ export interface TimeoutOptions {
13
+ abortController?: AbortController;
14
+ }
15
+
12
16
  export async function withTimeout<T>(
13
17
  promise: Promise<T>,
14
18
  timeoutMs: number,
15
- operation = 'Operation'
19
+ operation = 'Operation',
20
+ options: TimeoutOptions = {}
16
21
  ): Promise<T> {
17
22
  let timeoutId: Timer | undefined;
18
23
 
19
24
  const timeoutPromise = new Promise<never>((_, reject) => {
20
25
  timeoutId = setTimeout(() => {
21
- reject(new TimeoutError(`${operation} timed out after ${timeoutMs}ms`));
26
+ const timeoutError = new TimeoutError(`${operation} timed out after ${timeoutMs}ms`);
27
+ if (options.abortController && !options.abortController.signal.aborted) {
28
+ options.abortController.abort(timeoutError);
29
+ }
30
+ reject(timeoutError);
22
31
  }, timeoutMs);
23
32
  });
24
33
 
@@ -3,8 +3,8 @@ import { mkdirSync, unlinkSync, writeFileSync } from 'node:fs';
3
3
  import { join } from 'node:path';
4
4
  import type { ExpressionContext } from '../expression/evaluator';
5
5
  import type { LlmStep, Step } from '../parser/schema';
6
+ import { executeLlmStep } from './executors/llm-executor.ts';
6
7
  import type { LLMAdapter } from './llm-adapter';
7
- import { executeLlmStep } from './llm-executor';
8
8
  import type { StepResult } from './step-executor';
9
9
 
10
10
  interface MockToolCall {
@@ -0,0 +1,102 @@
1
+ import { afterEach, beforeEach, describe, expect, it, mock } from 'bun:test';
2
+ import { WorkflowDb } from '../db/workflow-db';
3
+ import type { WaitStep } from '../parser/schema';
4
+ import { container } from '../utils/container';
5
+ import { ConsoleLogger } from '../utils/logger';
6
+ import { executeStep } from './step-executor';
7
+
8
+ describe('Wait Step', () => {
9
+ let db: WorkflowDb;
10
+ const logger = new ConsoleLogger();
11
+ const context = { inputs: {}, steps: {} };
12
+
13
+ beforeEach(() => {
14
+ db = new WorkflowDb(':memory:');
15
+ container.register('db', db);
16
+ container.register('logger', logger);
17
+ });
18
+
19
+ afterEach(() => {
20
+ db.close();
21
+ });
22
+
23
+ it('should succeed when event exists and consume it by default (oneShot: true)', async () => {
24
+ const eventName = 'test-event';
25
+ const eventData = { foo: 'bar' };
26
+ await db.storeEvent(eventName, eventData);
27
+
28
+ const step: WaitStep = {
29
+ id: 'wait1',
30
+ type: 'wait',
31
+ event: eventName,
32
+ needs: [],
33
+ };
34
+
35
+ const result = await executeStep(step, context, logger, { db });
36
+ expect(result.status).toBe('success');
37
+ expect(result.output).toEqual(eventData);
38
+
39
+ // Verify event is consumed
40
+ const eventAfter = await db.getEvent(eventName);
41
+ expect(eventAfter).toBeNull();
42
+ });
43
+
44
+ it('should suspend when event does not exist', async () => {
45
+ const eventName = 'non-existent';
46
+ const step: WaitStep = {
47
+ id: 'wait1',
48
+ type: 'wait',
49
+ event: eventName,
50
+ needs: [],
51
+ };
52
+
53
+ const result = await executeStep(step, context, logger, { db });
54
+ expect(result.status).toBe('suspended');
55
+ expect(result.output).toEqual({ event: eventName });
56
+ });
57
+
58
+ it('should NOT consume event when oneShot is false', async () => {
59
+ const eventName = 'persistent-event';
60
+ const eventData = { hello: 'world' };
61
+ await db.storeEvent(eventName, eventData);
62
+
63
+ const step: WaitStep = {
64
+ id: 'wait1',
65
+ type: 'wait',
66
+ event: eventName,
67
+ oneShot: false,
68
+ needs: [],
69
+ };
70
+
71
+ const result = await executeStep(step, context, logger, { db });
72
+ expect(result.status).toBe('success');
73
+ expect(result.output).toEqual(eventData);
74
+
75
+ // Verify event STILL exists
76
+ const eventAfter = await db.getEvent(eventName);
77
+ expect(eventAfter).not.toBeNull();
78
+ if (eventAfter) {
79
+ expect(JSON.parse(eventAfter.data)).toEqual(eventData);
80
+ }
81
+ });
82
+
83
+ it('should handle sequential wait steps for the same one-shot event', async () => {
84
+ const eventName = 'seq-event';
85
+ await db.storeEvent(eventName, { count: 1 });
86
+
87
+ const step: WaitStep = {
88
+ id: 'wait1',
89
+ type: 'wait',
90
+ event: eventName,
91
+ needs: [],
92
+ };
93
+
94
+ // First wait succeeds and consumes
95
+ const result1 = await executeStep(step, context, logger, { db });
96
+ expect(result1.status).toBe('success');
97
+
98
+ // Second wait suspends because event is gone
99
+ const result2 = await executeStep(step, context, logger, { db });
100
+ expect(result2.status).toBe('suspended');
101
+ });
102
+ });
@@ -1,14 +1,23 @@
1
1
  import { afterAll, afterEach, describe, expect, it, mock, spyOn } from 'bun:test';
2
+ import { randomUUID } from 'node:crypto';
2
3
  import { existsSync, rmSync } from 'node:fs';
4
+ import { MemoryDb } from '../db/memory-db';
3
5
  import { WorkflowDb } from '../db/workflow-db';
4
6
  import type { Workflow } from '../parser/schema';
5
7
  import { WorkflowParser } from '../parser/workflow-parser';
6
8
  import { ConfigLoader } from '../utils/config-loader';
9
+ import { container } from '../utils/container';
10
+ import { ConsoleLogger } from '../utils/logger';
7
11
  import { WorkflowRegistry } from '../utils/workflow-registry';
8
12
  import { WorkflowRunner } from './workflow-runner';
9
13
 
10
14
  describe('WorkflowRunner', () => {
11
15
  const dbPath = ':memory:';
16
+
17
+ // Setup DI container for tests
18
+ container.register('logger', new ConsoleLogger());
19
+ container.register('db', new WorkflowDb(dbPath));
20
+ container.register('memoryDb', new MemoryDb());
12
21
  const activeSpies: Array<{ mockRestore: () => void }> = [];
13
22
  const trackSpy = <T extends { mockRestore: () => void }>(spy: T): T => {
14
23
  activeSpies.push(spy);
@@ -45,6 +54,7 @@ describe('WorkflowRunner', () => {
45
54
  id: 'step2',
46
55
  type: 'shell',
47
56
  run: 'echo "${{ steps.step1.output.stdout.trim() }} world"',
57
+ allowInsecure: true,
48
58
  needs: ['step1'],
49
59
  },
50
60
  ],
@@ -73,6 +83,7 @@ describe('WorkflowRunner', () => {
73
83
  id: 'print',
74
84
  type: 'shell',
75
85
  run: 'echo $TOKEN',
86
+ allowInsecure: true,
76
87
  needs: [],
77
88
  },
78
89
  ],
@@ -124,6 +135,7 @@ describe('WorkflowRunner', () => {
124
135
  id: 'gen',
125
136
  type: 'shell',
126
137
  run: 'echo "[1, 2, 3]"',
138
+ allowInsecure: true,
127
139
  transform: 'JSON.parse(output.stdout)',
128
140
  needs: [],
129
141
  },
@@ -131,6 +143,7 @@ describe('WorkflowRunner', () => {
131
143
  id: 'process',
132
144
  type: 'shell',
133
145
  run: 'echo "item-${{ item }}"',
146
+ allowInsecure: true,
134
147
  foreach: '${{ steps.gen.output }}',
135
148
  needs: ['gen'],
136
149
  },
@@ -209,6 +222,7 @@ describe('WorkflowRunner', () => {
209
222
  id: 's1',
210
223
  type: 'shell',
211
224
  run: 'echo "${{ inputs.name }} ${{ inputs.count }}"',
225
+ allowInsecure: true,
212
226
  needs: [],
213
227
  },
214
228
  ],
@@ -223,7 +237,7 @@ describe('WorkflowRunner', () => {
223
237
  });
224
238
 
225
239
  it('should validate step input schema', async () => {
226
- const schemaDbPath = 'test-step-input-schema.db';
240
+ const schemaDbPath = `test-step-input-schema-${randomUUID()}.db`;
227
241
  const workflowWithInputSchema: Workflow = {
228
242
  name: 'step-input-schema-wf',
229
243
  steps: [
@@ -254,7 +268,7 @@ describe('WorkflowRunner', () => {
254
268
  });
255
269
 
256
270
  it('should validate step output schema', async () => {
257
- const schemaDbPath = 'test-step-output-schema.db';
271
+ const schemaDbPath = `test-step-output-schema-${randomUUID()}.db`;
258
272
  const workflowWithOutputSchema: Workflow = {
259
273
  name: 'step-output-schema-wf',
260
274
  steps: [
@@ -368,7 +382,7 @@ describe('WorkflowRunner', () => {
368
382
  });
369
383
 
370
384
  it('should deduplicate steps using idempotencyKey within a run', async () => {
371
- const idempotencyDbPath = 'test-idempotency.db';
385
+ const idempotencyDbPath = `test-idempotency-${randomUUID()}.db`;
372
386
  if (existsSync(idempotencyDbPath)) rmSync(idempotencyDbPath);
373
387
 
374
388
  let idempotencyHitCount = 0;
@@ -421,7 +435,7 @@ describe('WorkflowRunner', () => {
421
435
  });
422
436
 
423
437
  it('should allow disabling idempotency deduplication', async () => {
424
- const idempotencyDbPath = 'test-idempotency-disabled.db';
438
+ const idempotencyDbPath = `test-idempotency-disabled-${randomUUID()}.db`;
425
439
  if (existsSync(idempotencyDbPath)) rmSync(idempotencyDbPath);
426
440
 
427
441
  const idempotencyWorkflow: Workflow = {
@@ -460,7 +474,7 @@ describe('WorkflowRunner', () => {
460
474
  });
461
475
 
462
476
  it('should detect in-flight idempotency keys', async () => {
463
- const idempotencyDbPath = 'test-idempotency-inflight.db';
477
+ const idempotencyDbPath = `test-idempotency-inflight-${randomUUID()}.db`;
464
478
  if (existsSync(idempotencyDbPath)) rmSync(idempotencyDbPath);
465
479
 
466
480
  const idempotencyWorkflow: Workflow = {
@@ -469,14 +483,14 @@ describe('WorkflowRunner', () => {
469
483
  {
470
484
  id: 's1',
471
485
  type: 'sleep',
472
- duration: 50,
486
+ duration: '50ms',
473
487
  needs: [],
474
488
  idempotencyKey: '"same-key"',
475
489
  },
476
490
  {
477
491
  id: 's2',
478
492
  type: 'sleep',
479
- duration: 50,
493
+ duration: '50ms',
480
494
  needs: [],
481
495
  idempotencyKey: '"same-key"',
482
496
  },
@@ -489,12 +503,92 @@ describe('WorkflowRunner', () => {
489
503
  if (existsSync(idempotencyDbPath)) rmSync(idempotencyDbPath);
490
504
  });
491
505
 
506
+ it('should memoize deterministic steps across runs', async () => {
507
+ const memoizeDbPath = `test-memoize-${randomUUID()}.db`;
508
+ if (existsSync(memoizeDbPath)) rmSync(memoizeDbPath);
509
+
510
+ const memoizeWorkflow: Workflow = {
511
+ name: 'memoize-wf',
512
+ steps: [
513
+ {
514
+ id: 's1',
515
+ type: 'shell',
516
+ run: 'bun -e "console.log(Date.now())"',
517
+ allowInsecure: true,
518
+ memoize: true,
519
+ needs: [],
520
+ },
521
+ ],
522
+ outputs: {
523
+ out: '${{ steps.s1.output.stdout.trim() }}',
524
+ },
525
+ } as unknown as Workflow;
526
+
527
+ const runner1 = new WorkflowRunner(memoizeWorkflow, { dbPath: memoizeDbPath });
528
+ const outputs1 = await runner1.run();
529
+ await Bun.sleep(5);
530
+
531
+ const runner2 = new WorkflowRunner(memoizeWorkflow, { dbPath: memoizeDbPath });
532
+ const outputs2 = await runner2.run();
533
+
534
+ expect(outputs2.out).toBe(outputs1.out);
535
+
536
+ if (existsSync(memoizeDbPath)) rmSync(memoizeDbPath);
537
+ });
538
+
539
+ it('should redact memoized outputs at rest', async () => {
540
+ const memoizeDbPath = `test-memoize-redact-${randomUUID()}.db`;
541
+ if (existsSync(memoizeDbPath)) rmSync(memoizeDbPath);
542
+
543
+ const secret = 'supersecret';
544
+ const memoizeWorkflow: Workflow = {
545
+ name: 'memoize-redact-wf',
546
+ steps: [
547
+ {
548
+ id: 's1',
549
+ type: 'shell',
550
+ run: `echo "${secret}"`,
551
+ memoize: true,
552
+ needs: [],
553
+ },
554
+ ],
555
+ outputs: {
556
+ out: '${{ steps.s1.output.stdout.trim() }}',
557
+ },
558
+ } as unknown as Workflow;
559
+
560
+ const runner = new WorkflowRunner(memoizeWorkflow, {
561
+ dbPath: memoizeDbPath,
562
+ secrets: { TOKEN: secret },
563
+ });
564
+ await runner.run();
565
+
566
+ const db = new WorkflowDb(memoizeDbPath);
567
+ const step = memoizeWorkflow.steps[0] as Workflow['steps'][number];
568
+ const stepInputs = { run: (step as { run: string }).run };
569
+ const cacheKey = Bun.hash(
570
+ JSON.stringify({
571
+ type: step.type,
572
+ inputs: stepInputs, // shell steps put 'run' in inputs
573
+ env: (step as { env?: Record<string, string> }).env,
574
+ version: 2,
575
+ })
576
+ ).toString(16);
577
+ const cached = await db.getStepCache(cacheKey);
578
+ expect(cached).not.toBeNull();
579
+ expect(cached?.output).not.toContain(secret);
580
+ expect(JSON.parse(cached?.output).stdout).toContain('***REDACTED***');
581
+ db.close();
582
+
583
+ if (existsSync(memoizeDbPath)) rmSync(memoizeDbPath);
584
+ });
585
+
492
586
  it('should execute steps in parallel', async () => {
493
587
  const parallelWorkflow: Workflow = {
494
588
  name: 'parallel-wf',
495
589
  steps: [
496
- { id: 's1', type: 'sleep', duration: 100, needs: [] },
497
- { id: 's2', type: 'sleep', duration: 100, needs: [] },
590
+ { id: 's1', type: 'sleep', duration: '100ms', needs: [] },
591
+ { id: 's2', type: 'sleep', duration: '100ms', needs: [] },
498
592
  ],
499
593
  outputs: {
500
594
  done: 'true',
@@ -518,7 +612,15 @@ describe('WorkflowRunner', () => {
518
612
  inputs: {
519
613
  val: { type: 'string' },
520
614
  },
521
- steps: [{ id: 'cs1', type: 'shell', run: 'echo "child-${{ inputs.val }}"', needs: [] }],
615
+ steps: [
616
+ {
617
+ id: 'cs1',
618
+ type: 'shell',
619
+ run: 'echo "child-${{ inputs.val }}"',
620
+ allowInsecure: true,
621
+ needs: [],
622
+ },
623
+ ],
522
624
  outputs: {
523
625
  out: '${{ steps.cs1.output.stdout.trim() }}',
524
626
  },
@@ -549,7 +651,7 @@ describe('WorkflowRunner', () => {
549
651
  });
550
652
 
551
653
  it('should resume a failed workflow', async () => {
552
- const resumeDbPath = 'test-resume.db';
654
+ const resumeDbPath = `test-resume-${randomUUID()}.db`;
553
655
  if (existsSync(resumeDbPath)) rmSync(resumeDbPath);
554
656
 
555
657
  const workflow: Workflow = {
@@ -590,6 +692,8 @@ describe('WorkflowRunner', () => {
590
692
  },
591
693
  error: () => {},
592
694
  warn: () => {},
695
+ info: () => {},
696
+ debug: () => {},
593
697
  };
594
698
 
595
699
  const runner2 = new WorkflowRunner(fixedWorkflow, {
@@ -605,7 +709,7 @@ describe('WorkflowRunner', () => {
605
709
  });
606
710
 
607
711
  it('should merge resumeInputs with stored inputs on resume', async () => {
608
- const resumeDbPath = 'test-merge-inputs.db';
712
+ const resumeDbPath = `test-merge-inputs-${randomUUID()}.db`;
609
713
  if (existsSync(resumeDbPath)) rmSync(resumeDbPath);
610
714
 
611
715
  const workflow: Workflow = {
@@ -669,7 +773,7 @@ describe('WorkflowRunner', () => {
669
773
  });
670
774
 
671
775
  it('should redact secret inputs at rest', async () => {
672
- const dbFile = 'test-secret-at-rest.db';
776
+ const dbFile = `test-secret-at-rest-${randomUUID()}.db`;
673
777
  const workflow: Workflow = {
674
778
  name: 'secret-input-wf',
675
779
  inputs: {
@@ -688,6 +792,7 @@ describe('WorkflowRunner', () => {
688
792
  mcp_servers: {},
689
793
  engines: { allowlist: {}, denylist: [] },
690
794
  concurrency: { default: 10, pools: { llm: 2, shell: 5, http: 10, engine: 2 } },
795
+ expression: { strict: false },
691
796
  });
692
797
 
693
798
  const runner = new WorkflowRunner(workflow, {
@@ -724,6 +829,8 @@ describe('WorkflowRunner', () => {
724
829
  }
725
830
  },
726
831
  warn: () => {},
832
+ info: () => {},
833
+ debug: () => {},
727
834
  };
728
835
 
729
836
  const failFinallyWorkflow: Workflow = {
@@ -747,6 +854,8 @@ describe('WorkflowRunner', () => {
747
854
  },
748
855
  error: () => {},
749
856
  warn: () => {},
857
+ info: () => {},
858
+ debug: () => {},
750
859
  };
751
860
 
752
861
  const retryWorkflow: Workflow = {
@@ -782,6 +891,7 @@ describe('WorkflowRunner', () => {
782
891
  id: 'gen',
783
892
  type: 'shell',
784
893
  run: 'echo "[1, 2]"',
894
+ allowInsecure: true,
785
895
  transform: 'JSON.parse(output.stdout)',
786
896
  needs: [],
787
897
  },
@@ -854,6 +964,64 @@ describe('WorkflowRunner', () => {
854
964
  if (existsSync(resumeDbPath)) rmSync(resumeDbPath);
855
965
  });
856
966
 
967
+ it('should reuse persisted foreach items on resume even if inputs change', async () => {
968
+ const resumeDbPath = 'test-foreach-resume-items.db';
969
+ if (existsSync(resumeDbPath)) rmSync(resumeDbPath);
970
+
971
+ const workflow: Workflow = {
972
+ name: 'foreach-resume-items',
973
+ steps: [
974
+ {
975
+ id: 'process',
976
+ type: 'human',
977
+ message: 'Item ${{ item }}',
978
+ foreach: '${{ inputs.items }}',
979
+ needs: [],
980
+ },
981
+ ],
982
+ outputs: {
983
+ results: '${{ steps.process.output }}',
984
+ },
985
+ } as unknown as Workflow;
986
+
987
+ const originalIsTTY = process.stdin.isTTY;
988
+ process.stdin.isTTY = false;
989
+
990
+ const runner1 = new WorkflowRunner(workflow, {
991
+ dbPath: resumeDbPath,
992
+ inputs: { items: [1, 2] },
993
+ });
994
+ let suspendedError: unknown;
995
+ try {
996
+ await runner1.run();
997
+ } catch (e) {
998
+ suspendedError = e;
999
+ } finally {
1000
+ process.stdin.isTTY = originalIsTTY;
1001
+ }
1002
+
1003
+ expect(suspendedError).toBeDefined();
1004
+ expect(
1005
+ typeof suspendedError === 'object' && suspendedError !== null && 'name' in suspendedError
1006
+ ? (suspendedError as { name: string }).name
1007
+ : undefined
1008
+ ).toBe('WorkflowSuspendedError');
1009
+
1010
+ const runner2 = new WorkflowRunner(workflow, {
1011
+ dbPath: resumeDbPath,
1012
+ resumeRunId: runner1.runId,
1013
+ resumeInputs: {
1014
+ process: { __answer: 'ok' },
1015
+ items: [1, 2, 3],
1016
+ },
1017
+ });
1018
+
1019
+ const outputs = await runner2.run();
1020
+ expect(outputs.results).toEqual(['ok', 'ok']);
1021
+
1022
+ if (existsSync(resumeDbPath)) rmSync(resumeDbPath);
1023
+ });
1024
+
857
1025
  it('should resume a workflow marked as running (crashed process)', async () => {
858
1026
  const resumeDbPath = 'test-running-resume.db';
859
1027
  if (existsSync(resumeDbPath)) rmSync(resumeDbPath);
@@ -943,8 +1111,8 @@ describe('WorkflowRunner', () => {
943
1111
  const workflow: Workflow = {
944
1112
  name: 'cancel-wf',
945
1113
  steps: [
946
- { id: 's1', type: 'sleep', duration: 10, needs: [] },
947
- { id: 's2', type: 'sleep', duration: 10, needs: ['s1'] },
1114
+ { id: 's1', type: 'sleep', duration: '10ms', needs: [] },
1115
+ { id: 's2', type: 'sleep', duration: '10ms', needs: ['s1'] },
948
1116
  ],
949
1117
  } as unknown as Workflow;
950
1118
 
@@ -1021,4 +1189,29 @@ describe('WorkflowRunner', () => {
1021
1189
 
1022
1190
  if (existsSync(resumeDbPath)) rmSync(resumeDbPath);
1023
1191
  });
1192
+
1193
+ it('should support safe direct shell execution via args', async () => {
1194
+ const argsWorkflow: Workflow = {
1195
+ name: 'args-wf',
1196
+ inputs: {
1197
+ val: { type: 'string', default: 'foo "bar" baz' },
1198
+ },
1199
+ steps: [
1200
+ {
1201
+ id: 's1',
1202
+ type: 'shell',
1203
+ args: ['echo', '${{ inputs.val }}'],
1204
+ needs: [],
1205
+ },
1206
+ ],
1207
+ outputs: {
1208
+ out: '${{ steps.s1.output.stdout.trim() }}',
1209
+ },
1210
+ } as unknown as Workflow;
1211
+
1212
+ const runner = new WorkflowRunner(argsWorkflow, { dbPath });
1213
+ const outputs = await runner.run();
1214
+ // Bun.spawn with args array should preserve quotes and spaces without needing escape()
1215
+ expect(outputs.out).toBe('foo "bar" baz');
1216
+ });
1024
1217
  });