npm - keystone-cli - Versions diffs - 1.0.2 → 1.1.0 - Mend

keystone-cli 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (155) hide show

package/README.md +288 -24
package/package.json +8 -4
package/src/cli.ts +538 -419
package/src/commands/doc.ts +31 -0
package/src/commands/event.ts +29 -0
package/src/commands/graph.ts +37 -0
package/src/commands/index.ts +14 -0
package/src/commands/init.ts +185 -0
package/src/commands/run.ts +124 -0
package/src/commands/schema.ts +40 -0
package/src/commands/utils.ts +78 -0
package/src/commands/validate.ts +111 -0
package/src/db/memory-db.ts +50 -2
package/src/db/workflow-db.test.ts +314 -0
package/src/db/workflow-db.ts +810 -210
package/src/expression/evaluator-audit.test.ts +4 -2
package/src/expression/evaluator.test.ts +14 -1
package/src/expression/evaluator.ts +166 -19
package/src/parser/config-schema.ts +18 -0
package/src/parser/schema.ts +153 -22
package/src/parser/test-schema.ts +6 -6
package/src/parser/workflow-parser.test.ts +24 -0
package/src/parser/workflow-parser.ts +65 -3
package/src/runner/auto-heal.test.ts +5 -6
package/src/runner/blueprint-executor.test.ts +2 -2
package/src/runner/debug-repl.test.ts +5 -8
package/src/runner/debug-repl.ts +59 -16
package/src/runner/durable-timers.test.ts +11 -2
package/src/runner/engine-executor.test.ts +1 -1
package/src/runner/events.ts +57 -0
package/src/runner/executors/artifact-executor.ts +166 -0
package/src/runner/{blueprint-executor.ts → executors/blueprint-executor.ts} +15 -7
package/src/runner/{engine-executor.ts → executors/engine-executor.ts} +55 -7
package/src/runner/executors/file-executor.test.ts +48 -0
package/src/runner/executors/file-executor.ts +324 -0
package/src/runner/{foreach-executor.ts → executors/foreach-executor.ts} +168 -80
package/src/runner/executors/human-executor.ts +144 -0
package/src/runner/executors/join-executor.ts +75 -0
package/src/runner/executors/llm-executor.ts +1266 -0
package/src/runner/executors/memory-executor.ts +71 -0
package/src/runner/executors/plan-executor.ts +104 -0
package/src/runner/executors/request-executor.ts +265 -0
package/src/runner/executors/script-executor.ts +43 -0
package/src/runner/executors/shell-executor.ts +403 -0
package/src/runner/executors/subworkflow-executor.ts +114 -0
package/src/runner/executors/types.ts +69 -0
package/src/runner/executors/wait-executor.ts +59 -0
package/src/runner/join-scheduling.test.ts +197 -0
package/src/runner/llm-adapter-runtime.test.ts +209 -0
package/src/runner/llm-adapter.test.ts +419 -24
package/src/runner/llm-adapter.ts +414 -17
package/src/runner/llm-clarification.test.ts +2 -1
package/src/runner/llm-executor.test.ts +532 -17
package/src/runner/mcp-client-audit.test.ts +1 -2
package/src/runner/mcp-client.ts +136 -46
package/src/runner/mcp-manager.test.ts +4 -0
package/src/runner/mcp-server.test.ts +58 -0
package/src/runner/mcp-server.ts +26 -0
package/src/runner/memoization.test.ts +190 -0
package/src/runner/optimization-runner.ts +4 -9
package/src/runner/quality-gate.test.ts +69 -0
package/src/runner/reflexion.test.ts +6 -17
package/src/runner/resource-pool.ts +102 -14
package/src/runner/services/context-builder.ts +144 -0
package/src/runner/services/secret-manager.ts +105 -0
package/src/runner/services/workflow-validator.ts +131 -0
package/src/runner/shell-executor.test.ts +28 -4
package/src/runner/standard-tools-ast.test.ts +196 -0
package/src/runner/standard-tools-execution.test.ts +27 -0
package/src/runner/standard-tools-integration.test.ts +6 -10
package/src/runner/standard-tools.ts +339 -102
package/src/runner/step-executor.test.ts +216 -4
package/src/runner/step-executor.ts +69 -941
package/src/runner/stream-utils.ts +7 -3
package/src/runner/test-harness.ts +20 -1
package/src/runner/timeout.test.ts +10 -0
package/src/runner/timeout.ts +11 -2
package/src/runner/tool-integration.test.ts +1 -1
package/src/runner/wait-step.test.ts +102 -0
package/src/runner/workflow-runner.test.ts +208 -15
package/src/runner/workflow-runner.ts +890 -818
package/src/runner/workflow-scheduler.ts +75 -0
package/src/runner/workflow-state.ts +269 -0
package/src/runner/workflow-subflows.test.ts +13 -12
package/src/scripts/generate-schemas.ts +16 -0
package/src/templates/agents/explore.md +1 -0
package/src/templates/agents/general.md +1 -0
package/src/templates/agents/handoff-router.md +14 -0
package/src/templates/agents/handoff-specialist.md +15 -0
package/src/templates/agents/keystone-architect.md +13 -44
package/src/templates/agents/my-agent.md +1 -0
package/src/templates/agents/software-engineer.md +1 -0
package/src/templates/agents/summarizer.md +1 -0
package/src/templates/agents/test-agent.md +1 -0
package/src/templates/agents/tester.md +1 -0
package/src/templates/{basic-inputs.yaml → basics/basic-inputs.yaml} +2 -0
package/src/templates/{basic-shell.yaml → basics/basic-shell.yaml} +2 -1
package/src/templates/{full-feature-demo.yaml → basics/full-feature-demo.yaml} +2 -0
package/src/templates/{stop-watch.yaml → basics/stop-watch.yaml} +1 -0
package/src/templates/{child-rollback.yaml → control-flow/child-rollback.yaml} +1 -0
package/src/templates/{cleanup-finally.yaml → control-flow/cleanup-finally.yaml} +1 -0
package/src/templates/{fan-out-fan-in.yaml → control-flow/fan-out-fan-in.yaml} +3 -0
package/src/templates/control-flow/idempotency-example.yaml +30 -0
package/src/templates/{loop-parallel.yaml → control-flow/loop-parallel.yaml} +3 -0
package/src/templates/{parent-rollback.yaml → control-flow/parent-rollback.yaml} +1 -0
package/src/templates/{retry-policy.yaml → control-flow/retry-policy.yaml} +3 -0
package/src/templates/features/artifact-example.yaml +39 -0
package/src/templates/{engine-example.yaml → features/engine-example.yaml} +1 -0
package/src/templates/{human-interaction.yaml → features/human-interaction.yaml} +1 -0
package/src/templates/{llm-agent.yaml → features/llm-agent.yaml} +1 -0
package/src/templates/{memory-service.yaml → features/memory-service.yaml} +2 -0
package/src/templates/{robust-automation.yaml → features/robust-automation.yaml} +3 -0
package/src/templates/features/script-example.yaml +27 -0
package/src/templates/patterns/agent-handoff.yaml +53 -0
package/src/templates/{approval-process.yaml → patterns/approval-process.yaml} +1 -0
package/src/templates/{batch-processor.yaml → patterns/batch-processor.yaml} +2 -0
package/src/templates/{composition-child.yaml → patterns/composition-child.yaml} +1 -0
package/src/templates/{composition-parent.yaml → patterns/composition-parent.yaml} +1 -0
package/src/templates/{data-pipeline.yaml → patterns/data-pipeline.yaml} +2 -0
package/src/templates/{decompose-implement.yaml → scaffolding/decompose-implement.yaml} +1 -0
package/src/templates/{decompose-problem.yaml → scaffolding/decompose-problem.yaml} +1 -0
package/src/templates/{decompose-research.yaml → scaffolding/decompose-research.yaml} +1 -0
package/src/templates/{decompose-review.yaml → scaffolding/decompose-review.yaml} +1 -0
package/src/templates/{dev.yaml → scaffolding/dev.yaml} +1 -0
package/src/templates/scaffolding/review-loop.yaml +97 -0
package/src/templates/{scaffold-feature.yaml → scaffolding/scaffold-feature.yaml} +2 -0
package/src/templates/{scaffold-generate.yaml → scaffolding/scaffold-generate.yaml} +1 -0
package/src/templates/{scaffold-plan.yaml → scaffolding/scaffold-plan.yaml} +1 -0
package/src/templates/testing/invalid.yaml +6 -0
package/src/ui/dashboard.tsx +191 -33
package/src/utils/auth-manager.test.ts +337 -0
package/src/utils/auth-manager.ts +157 -61
package/src/utils/blueprint-utils.ts +4 -6
package/src/utils/config-loader.test.ts +2 -0
package/src/utils/config-loader.ts +12 -3
package/src/utils/constants.ts +76 -0
package/src/utils/container.ts +63 -0
package/src/utils/context-injector.test.ts +200 -0
package/src/utils/context-injector.ts +244 -0
package/src/utils/doc-generator.ts +85 -0
package/src/utils/env-filter.ts +45 -0
package/src/utils/json-parser.test.ts +12 -0
package/src/utils/json-parser.ts +30 -5
package/src/utils/logger.ts +12 -1
package/src/utils/mermaid.ts +4 -0
package/src/utils/paths.ts +52 -1
package/src/utils/process-sandbox-worker.test.ts +46 -0
package/src/utils/process-sandbox.ts +227 -14
package/src/utils/redactor.test.ts +11 -6
package/src/utils/redactor.ts +25 -9
package/src/utils/sandbox.ts +3 -0
package/src/utils/workflow-registry.test.ts +2 -2
package/src/runner/llm-executor.ts +0 -638
package/src/runner/shell-executor.ts +0 -366
package/src/templates/invalid.yaml +0 -5

package/src/runner/stream-utils.ts CHANGED Viewed

@@ -26,6 +26,8 @@ export async function processOpenAIStream(
   const toolCalls: LLMToolCall[] = [];
   let buffer = '';
+  let usage: LLMResponse['usage'];
   try {
     while (true) {
       const { done, value } = await reader.read();
@@ -48,9 +50,10 @@ export async function processOpenAIStream(
         try {
           const data = JSON.parse(trimmedLine.slice(6));
-          // Handle Copilot's occasional 'choices' missing or different structure if needed,
-          // but generally they match OpenAI.
-          // Some proxies might return null delta.
+          if (data.usage) {
+            usage = data.usage;
+          }
           const delta = data.choices?.[0]?.delta;
           if (!delta) continue;
@@ -182,5 +185,6 @@ export async function processOpenAIStream(
       content: fullContent || null,
       tool_calls: toolCalls.length > 0 ? toolCalls.filter(Boolean) : undefined,
     },
+    usage,
   };
 }

package/src/runner/test-harness.ts CHANGED Viewed

@@ -32,6 +32,10 @@ export interface TestSnapshot {
   outputs: Record<string, unknown>;
 }
+export interface TestOptions {
+  allowSideEffects?: boolean;
+}
 export class TestHarness {
   private stepResults: Map<string, { status: string; output: unknown; error?: string }> = new Map();
   private mockResponses: Map<string, unknown> = new Map();
@@ -39,7 +43,8 @@ export class TestHarness {
   constructor(
     private workflow: Workflow,
-    private fixture: TestFixture = {}
+    private fixture: TestFixture = {},
+    private options: TestOptions = {}
   ) {
     if (fixture.mocks) {
       for (const mock of fixture.mocks) {
@@ -116,6 +121,12 @@ export class TestHarness {
       return result;
     }
+    if (!this.options.allowSideEffects && this.isSideEffectStep(step)) {
+      throw new Error(
+        `🛑 Safety Violation: Step "${step.id}" of type "${step.type}" attempts to execute a side-effect.\nTo allow this, set 'options.allowSideEffects: true' in your test file.\nOtherwise, provide a mock response in 'fixture.mocks'.`
+      );
+    }
     // Default to real execution but capture snapshot
     const result = await executeStep(step, context, logger, {
       ...options,
@@ -132,6 +143,14 @@ export class TestHarness {
     return result;
   }
+  private isSideEffectStep(step: Step): boolean {
+    if (['shell', 'script', 'engine', 'request', 'artifact'].includes(step.type)) return true;
+    if (step.type === 'file' && (step as any).op !== 'read') return true;
+    // LLM is generally considered "safe" (no system modification) but costly.
+    // For now we allow LLM unless mocked, as users might want to test prompt logic.
+    return false;
+  }
   private getMockAdapter(model: string): { adapter: LLMAdapter; resolvedModel: string } {
     return {
       resolvedModel: model,

package/src/runner/timeout.test.ts CHANGED Viewed

@@ -17,4 +17,14 @@ describe('timeout', () => {
     const promise = new Promise((resolve) => setTimeout(() => resolve('ok'), 100));
     await expect(withTimeout(promise, 10, 'MyStep')).rejects.toThrow(/MyStep timed out/);
   });
+  it('should abort the controller when the timeout triggers', async () => {
+    const controller = new AbortController();
+    const promise = new Promise(() => {});
+    await expect(
+      withTimeout(promise, 10, 'SlowOp', { abortController: controller })
+    ).rejects.toThrow(TimeoutError);
+    expect(controller.signal.aborted).toBe(true);
+  });
 });

package/src/runner/timeout.ts CHANGED Viewed

@@ -9,16 +9,25 @@ export class TimeoutError extends Error {
   }
 }
+export interface TimeoutOptions {
+  abortController?: AbortController;
+}
 export async function withTimeout<T>(
   promise: Promise<T>,
   timeoutMs: number,
-  operation = 'Operation'
+  operation = 'Operation',
+  options: TimeoutOptions = {}
 ): Promise<T> {
   let timeoutId: Timer | undefined;
   const timeoutPromise = new Promise<never>((_, reject) => {
     timeoutId = setTimeout(() => {
-      reject(new TimeoutError(`${operation} timed out after ${timeoutMs}ms`));
+      const timeoutError = new TimeoutError(`${operation} timed out after ${timeoutMs}ms`);
+      if (options.abortController && !options.abortController.signal.aborted) {
+        options.abortController.abort(timeoutError);
+      }
+      reject(timeoutError);
     }, timeoutMs);
   });

package/src/runner/tool-integration.test.ts CHANGED Viewed

@@ -3,8 +3,8 @@ import { mkdirSync, unlinkSync, writeFileSync } from 'node:fs';
 import { join } from 'node:path';
 import type { ExpressionContext } from '../expression/evaluator';
 import type { LlmStep, Step } from '../parser/schema';
+import { executeLlmStep } from './executors/llm-executor.ts';
 import type { LLMAdapter } from './llm-adapter';
-import { executeLlmStep } from './llm-executor';
 import type { StepResult } from './step-executor';
 interface MockToolCall {

package/src/runner/wait-step.test.ts ADDED Viewed

@@ -0,0 +1,102 @@
+import { afterEach, beforeEach, describe, expect, it, mock } from 'bun:test';
+import { WorkflowDb } from '../db/workflow-db';
+import type { WaitStep } from '../parser/schema';
+import { container } from '../utils/container';
+import { ConsoleLogger } from '../utils/logger';
+import { executeStep } from './step-executor';
+describe('Wait Step', () => {
+  let db: WorkflowDb;
+  const logger = new ConsoleLogger();
+  const context = { inputs: {}, steps: {} };
+  beforeEach(() => {
+    db = new WorkflowDb(':memory:');
+    container.register('db', db);
+    container.register('logger', logger);
+  });
+  afterEach(() => {
+    db.close();
+  });
+  it('should succeed when event exists and consume it by default (oneShot: true)', async () => {
+    const eventName = 'test-event';
+    const eventData = { foo: 'bar' };
+    await db.storeEvent(eventName, eventData);
+    const step: WaitStep = {
+      id: 'wait1',
+      type: 'wait',
+      event: eventName,
+      needs: [],
+    };
+    const result = await executeStep(step, context, logger, { db });
+    expect(result.status).toBe('success');
+    expect(result.output).toEqual(eventData);
+    // Verify event is consumed
+    const eventAfter = await db.getEvent(eventName);
+    expect(eventAfter).toBeNull();
+  });
+  it('should suspend when event does not exist', async () => {
+    const eventName = 'non-existent';
+    const step: WaitStep = {
+      id: 'wait1',
+      type: 'wait',
+      event: eventName,
+      needs: [],
+    };
+    const result = await executeStep(step, context, logger, { db });
+    expect(result.status).toBe('suspended');
+    expect(result.output).toEqual({ event: eventName });
+  });
+  it('should NOT consume event when oneShot is false', async () => {
+    const eventName = 'persistent-event';
+    const eventData = { hello: 'world' };
+    await db.storeEvent(eventName, eventData);
+    const step: WaitStep = {
+      id: 'wait1',
+      type: 'wait',
+      event: eventName,
+      oneShot: false,
+      needs: [],
+    };
+    const result = await executeStep(step, context, logger, { db });
+    expect(result.status).toBe('success');
+    expect(result.output).toEqual(eventData);
+    // Verify event STILL exists
+    const eventAfter = await db.getEvent(eventName);
+    expect(eventAfter).not.toBeNull();
+    if (eventAfter) {
+      expect(JSON.parse(eventAfter.data)).toEqual(eventData);
+    }
+  });
+  it('should handle sequential wait steps for the same one-shot event', async () => {
+    const eventName = 'seq-event';
+    await db.storeEvent(eventName, { count: 1 });
+    const step: WaitStep = {
+      id: 'wait1',
+      type: 'wait',
+      event: eventName,
+      needs: [],
+    };
+    // First wait succeeds and consumes
+    const result1 = await executeStep(step, context, logger, { db });
+    expect(result1.status).toBe('success');
+    // Second wait suspends because event is gone
+    const result2 = await executeStep(step, context, logger, { db });
+    expect(result2.status).toBe('suspended');
+  });
+});

package/src/runner/workflow-runner.test.ts CHANGED Viewed

@@ -1,14 +1,23 @@
 import { afterAll, afterEach, describe, expect, it, mock, spyOn } from 'bun:test';
+import { randomUUID } from 'node:crypto';
 import { existsSync, rmSync } from 'node:fs';
+import { MemoryDb } from '../db/memory-db';
 import { WorkflowDb } from '../db/workflow-db';
 import type { Workflow } from '../parser/schema';
 import { WorkflowParser } from '../parser/workflow-parser';
 import { ConfigLoader } from '../utils/config-loader';
+import { container } from '../utils/container';
+import { ConsoleLogger } from '../utils/logger';
 import { WorkflowRegistry } from '../utils/workflow-registry';
 import { WorkflowRunner } from './workflow-runner';
 describe('WorkflowRunner', () => {
   const dbPath = ':memory:';
+  // Setup DI container for tests
+  container.register('logger', new ConsoleLogger());
+  container.register('db', new WorkflowDb(dbPath));
+  container.register('memoryDb', new MemoryDb());
   const activeSpies: Array<{ mockRestore: () => void }> = [];
   const trackSpy = <T extends { mockRestore: () => void }>(spy: T): T => {
     activeSpies.push(spy);
@@ -45,6 +54,7 @@ describe('WorkflowRunner', () => {
         id: 'step2',
         type: 'shell',
         run: 'echo "${{ steps.step1.output.stdout.trim() }} world"',
+        allowInsecure: true,
         needs: ['step1'],
       },
     ],
@@ -73,6 +83,7 @@ describe('WorkflowRunner', () => {
           id: 'print',
           type: 'shell',
           run: 'echo $TOKEN',
+          allowInsecure: true,
           needs: [],
         },
       ],
@@ -124,6 +135,7 @@ describe('WorkflowRunner', () => {
           id: 'gen',
           type: 'shell',
           run: 'echo "[1, 2, 3]"',
+          allowInsecure: true,
           transform: 'JSON.parse(output.stdout)',
           needs: [],
         },
@@ -131,6 +143,7 @@ describe('WorkflowRunner', () => {
           id: 'process',
           type: 'shell',
           run: 'echo "item-${{ item }}"',
+          allowInsecure: true,
           foreach: '${{ steps.gen.output }}',
           needs: ['gen'],
         },
@@ -209,6 +222,7 @@ describe('WorkflowRunner', () => {
           id: 's1',
           type: 'shell',
           run: 'echo "${{ inputs.name }} ${{ inputs.count }}"',
+          allowInsecure: true,
           needs: [],
         },
       ],
@@ -223,7 +237,7 @@ describe('WorkflowRunner', () => {
   });
   it('should validate step input schema', async () => {
-    const schemaDbPath = 'test-step-input-schema.db';
+    const schemaDbPath = `test-step-input-schema-${randomUUID()}.db`;
     const workflowWithInputSchema: Workflow = {
       name: 'step-input-schema-wf',
       steps: [
@@ -254,7 +268,7 @@ describe('WorkflowRunner', () => {
   });
   it('should validate step output schema', async () => {
-    const schemaDbPath = 'test-step-output-schema.db';
+    const schemaDbPath = `test-step-output-schema-${randomUUID()}.db`;
     const workflowWithOutputSchema: Workflow = {
       name: 'step-output-schema-wf',
       steps: [
@@ -368,7 +382,7 @@ describe('WorkflowRunner', () => {
   });
   it('should deduplicate steps using idempotencyKey within a run', async () => {
-    const idempotencyDbPath = 'test-idempotency.db';
+    const idempotencyDbPath = `test-idempotency-${randomUUID()}.db`;
     if (existsSync(idempotencyDbPath)) rmSync(idempotencyDbPath);
     let idempotencyHitCount = 0;
@@ -421,7 +435,7 @@ describe('WorkflowRunner', () => {
   });
   it('should allow disabling idempotency deduplication', async () => {
-    const idempotencyDbPath = 'test-idempotency-disabled.db';
+    const idempotencyDbPath = `test-idempotency-disabled-${randomUUID()}.db`;
     if (existsSync(idempotencyDbPath)) rmSync(idempotencyDbPath);
     const idempotencyWorkflow: Workflow = {
@@ -460,7 +474,7 @@ describe('WorkflowRunner', () => {
   });
   it('should detect in-flight idempotency keys', async () => {
-    const idempotencyDbPath = 'test-idempotency-inflight.db';
+    const idempotencyDbPath = `test-idempotency-inflight-${randomUUID()}.db`;
     if (existsSync(idempotencyDbPath)) rmSync(idempotencyDbPath);
     const idempotencyWorkflow: Workflow = {
@@ -469,14 +483,14 @@ describe('WorkflowRunner', () => {
         {
           id: 's1',
           type: 'sleep',
-          duration: 50,
+          duration: '50ms',
           needs: [],
           idempotencyKey: '"same-key"',
         },
         {
           id: 's2',
           type: 'sleep',
-          duration: 50,
+          duration: '50ms',
           needs: [],
           idempotencyKey: '"same-key"',
         },
@@ -489,12 +503,92 @@ describe('WorkflowRunner', () => {
     if (existsSync(idempotencyDbPath)) rmSync(idempotencyDbPath);
   });
+  it('should memoize deterministic steps across runs', async () => {
+    const memoizeDbPath = `test-memoize-${randomUUID()}.db`;
+    if (existsSync(memoizeDbPath)) rmSync(memoizeDbPath);
+    const memoizeWorkflow: Workflow = {
+      name: 'memoize-wf',
+      steps: [
+        {
+          id: 's1',
+          type: 'shell',
+          run: 'bun -e "console.log(Date.now())"',
+          allowInsecure: true,
+          memoize: true,
+          needs: [],
+        },
+      ],
+      outputs: {
+        out: '${{ steps.s1.output.stdout.trim() }}',
+      },
+    } as unknown as Workflow;
+    const runner1 = new WorkflowRunner(memoizeWorkflow, { dbPath: memoizeDbPath });
+    const outputs1 = await runner1.run();
+    await Bun.sleep(5);
+    const runner2 = new WorkflowRunner(memoizeWorkflow, { dbPath: memoizeDbPath });
+    const outputs2 = await runner2.run();
+    expect(outputs2.out).toBe(outputs1.out);
+    if (existsSync(memoizeDbPath)) rmSync(memoizeDbPath);
+  });
+  it('should redact memoized outputs at rest', async () => {
+    const memoizeDbPath = `test-memoize-redact-${randomUUID()}.db`;
+    if (existsSync(memoizeDbPath)) rmSync(memoizeDbPath);
+    const secret = 'supersecret';
+    const memoizeWorkflow: Workflow = {
+      name: 'memoize-redact-wf',
+      steps: [
+        {
+          id: 's1',
+          type: 'shell',
+          run: `echo "${secret}"`,
+          memoize: true,
+          needs: [],
+        },
+      ],
+      outputs: {
+        out: '${{ steps.s1.output.stdout.trim() }}',
+      },
+    } as unknown as Workflow;
+    const runner = new WorkflowRunner(memoizeWorkflow, {
+      dbPath: memoizeDbPath,
+      secrets: { TOKEN: secret },
+    });
+    await runner.run();
+    const db = new WorkflowDb(memoizeDbPath);
+    const step = memoizeWorkflow.steps[0] as Workflow['steps'][number];
+    const stepInputs = { run: (step as { run: string }).run };
+    const cacheKey = Bun.hash(
+      JSON.stringify({
+        type: step.type,
+        inputs: stepInputs, // shell steps put 'run' in inputs
+        env: (step as { env?: Record<string, string> }).env,
+        version: 2,
+      })
+    ).toString(16);
+    const cached = await db.getStepCache(cacheKey);
+    expect(cached).not.toBeNull();
+    expect(cached?.output).not.toContain(secret);
+    expect(JSON.parse(cached?.output).stdout).toContain('***REDACTED***');
+    db.close();
+    if (existsSync(memoizeDbPath)) rmSync(memoizeDbPath);
+  });
   it('should execute steps in parallel', async () => {
     const parallelWorkflow: Workflow = {
       name: 'parallel-wf',
       steps: [
-        { id: 's1', type: 'sleep', duration: 100, needs: [] },
-        { id: 's2', type: 'sleep', duration: 100, needs: [] },
+        { id: 's1', type: 'sleep', duration: '100ms', needs: [] },
+        { id: 's2', type: 'sleep', duration: '100ms', needs: [] },
       ],
       outputs: {
         done: 'true',
@@ -518,7 +612,15 @@ describe('WorkflowRunner', () => {
       inputs: {
         val: { type: 'string' },
       },
-      steps: [{ id: 'cs1', type: 'shell', run: 'echo "child-${{ inputs.val }}"', needs: [] }],
+      steps: [
+        {
+          id: 'cs1',
+          type: 'shell',
+          run: 'echo "child-${{ inputs.val }}"',
+          allowInsecure: true,
+          needs: [],
+        },
+      ],
       outputs: {
         out: '${{ steps.cs1.output.stdout.trim() }}',
       },
@@ -549,7 +651,7 @@ describe('WorkflowRunner', () => {
   });
   it('should resume a failed workflow', async () => {
-    const resumeDbPath = 'test-resume.db';
+    const resumeDbPath = `test-resume-${randomUUID()}.db`;
     if (existsSync(resumeDbPath)) rmSync(resumeDbPath);
     const workflow: Workflow = {
@@ -590,6 +692,8 @@ describe('WorkflowRunner', () => {
       },
       error: () => {},
       warn: () => {},
+      info: () => {},
+      debug: () => {},
     };
     const runner2 = new WorkflowRunner(fixedWorkflow, {
@@ -605,7 +709,7 @@ describe('WorkflowRunner', () => {
   });
   it('should merge resumeInputs with stored inputs on resume', async () => {
-    const resumeDbPath = 'test-merge-inputs.db';
+    const resumeDbPath = `test-merge-inputs-${randomUUID()}.db`;
     if (existsSync(resumeDbPath)) rmSync(resumeDbPath);
     const workflow: Workflow = {
@@ -669,7 +773,7 @@ describe('WorkflowRunner', () => {
   });
   it('should redact secret inputs at rest', async () => {
-    const dbFile = 'test-secret-at-rest.db';
+    const dbFile = `test-secret-at-rest-${randomUUID()}.db`;
     const workflow: Workflow = {
       name: 'secret-input-wf',
       inputs: {
@@ -688,6 +792,7 @@ describe('WorkflowRunner', () => {
       mcp_servers: {},
       engines: { allowlist: {}, denylist: [] },
       concurrency: { default: 10, pools: { llm: 2, shell: 5, http: 10, engine: 2 } },
+      expression: { strict: false },
     });
     const runner = new WorkflowRunner(workflow, {
@@ -724,6 +829,8 @@ describe('WorkflowRunner', () => {
         }
       },
       warn: () => {},
+      info: () => {},
+      debug: () => {},
     };
     const failFinallyWorkflow: Workflow = {
@@ -747,6 +854,8 @@ describe('WorkflowRunner', () => {
       },
       error: () => {},
       warn: () => {},
+      info: () => {},
+      debug: () => {},
     };
     const retryWorkflow: Workflow = {
@@ -782,6 +891,7 @@ describe('WorkflowRunner', () => {
           id: 'gen',
           type: 'shell',
           run: 'echo "[1, 2]"',
+          allowInsecure: true,
           transform: 'JSON.parse(output.stdout)',
           needs: [],
         },
@@ -854,6 +964,64 @@ describe('WorkflowRunner', () => {
     if (existsSync(resumeDbPath)) rmSync(resumeDbPath);
   });
+  it('should reuse persisted foreach items on resume even if inputs change', async () => {
+    const resumeDbPath = 'test-foreach-resume-items.db';
+    if (existsSync(resumeDbPath)) rmSync(resumeDbPath);
+    const workflow: Workflow = {
+      name: 'foreach-resume-items',
+      steps: [
+        {
+          id: 'process',
+          type: 'human',
+          message: 'Item ${{ item }}',
+          foreach: '${{ inputs.items }}',
+          needs: [],
+        },
+      ],
+      outputs: {
+        results: '${{ steps.process.output }}',
+      },
+    } as unknown as Workflow;
+    const originalIsTTY = process.stdin.isTTY;
+    process.stdin.isTTY = false;
+    const runner1 = new WorkflowRunner(workflow, {
+      dbPath: resumeDbPath,
+      inputs: { items: [1, 2] },
+    });
+    let suspendedError: unknown;
+    try {
+      await runner1.run();
+    } catch (e) {
+      suspendedError = e;
+    } finally {
+      process.stdin.isTTY = originalIsTTY;
+    }
+    expect(suspendedError).toBeDefined();
+    expect(
+      typeof suspendedError === 'object' && suspendedError !== null && 'name' in suspendedError
+        ? (suspendedError as { name: string }).name
+        : undefined
+    ).toBe('WorkflowSuspendedError');
+    const runner2 = new WorkflowRunner(workflow, {
+      dbPath: resumeDbPath,
+      resumeRunId: runner1.runId,
+      resumeInputs: {
+        process: { __answer: 'ok' },
+        items: [1, 2, 3],
+      },
+    });
+    const outputs = await runner2.run();
+    expect(outputs.results).toEqual(['ok', 'ok']);
+    if (existsSync(resumeDbPath)) rmSync(resumeDbPath);
+  });
   it('should resume a workflow marked as running (crashed process)', async () => {
     const resumeDbPath = 'test-running-resume.db';
     if (existsSync(resumeDbPath)) rmSync(resumeDbPath);
@@ -943,8 +1111,8 @@ describe('WorkflowRunner', () => {
     const workflow: Workflow = {
       name: 'cancel-wf',
       steps: [
-        { id: 's1', type: 'sleep', duration: 10, needs: [] },
-        { id: 's2', type: 'sleep', duration: 10, needs: ['s1'] },
+        { id: 's1', type: 'sleep', duration: '10ms', needs: [] },
+        { id: 's2', type: 'sleep', duration: '10ms', needs: ['s1'] },
       ],
     } as unknown as Workflow;
@@ -1021,4 +1189,29 @@ describe('WorkflowRunner', () => {
     if (existsSync(resumeDbPath)) rmSync(resumeDbPath);
   });
+  it('should support safe direct shell execution via args', async () => {
+    const argsWorkflow: Workflow = {
+      name: 'args-wf',
+      inputs: {
+        val: { type: 'string', default: 'foo "bar" baz' },
+      },
+      steps: [
+        {
+          id: 's1',
+          type: 'shell',
+          args: ['echo', '${{ inputs.val }}'],
+          needs: [],
+        },
+      ],
+      outputs: {
+        out: '${{ steps.s1.output.stdout.trim() }}',
+      },
+    } as unknown as Workflow;
+    const runner = new WorkflowRunner(argsWorkflow, { dbPath });
+    const outputs = await runner.run();
+    // Bun.spawn with args array should preserve quotes and spaces without needing escape()
+    expect(outputs.out).toBe('foo "bar" baz');
+  });
 });