npm - keystone-cli - Versions diffs - 1.2.0 → 2.0.0 - Mend

keystone-cli 1.2.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/README.md +163 -138
package/package.json +6 -3
package/src/cli.ts +54 -369
package/src/commands/init.ts +19 -27
package/src/db/dynamic-state-manager.test.ts +319 -0
package/src/db/dynamic-state-manager.ts +411 -0
package/src/db/memory-db.test.ts +45 -0
package/src/db/memory-db.ts +47 -21
package/src/db/sqlite-setup.ts +26 -3
package/src/db/workflow-db.ts +76 -5
package/src/parser/config-schema.ts +11 -13
package/src/parser/schema.ts +37 -2
package/src/parser/workflow-parser.test.ts +3 -4
package/src/parser/workflow-parser.ts +3 -62
package/src/runner/__test__/llm-mock-setup.ts +173 -0
package/src/runner/__test__/llm-test-setup.ts +271 -0
package/src/runner/engine-executor.test.ts +25 -18
package/src/runner/executors/blueprint-executor.ts +0 -1
package/src/runner/executors/dynamic-executor.test.ts +613 -0
package/src/runner/executors/dynamic-executor.ts +723 -0
package/src/runner/executors/dynamic-types.ts +69 -0
package/src/runner/executors/engine-executor.ts +5 -1
package/src/runner/executors/llm-executor.ts +502 -1033
package/src/runner/executors/memory-executor.ts +35 -19
package/src/runner/executors/plan-executor.ts +0 -1
package/src/runner/executors/types.ts +4 -4
package/src/runner/llm-adapter.integration.test.ts +151 -0
package/src/runner/llm-adapter.ts +263 -1401
package/src/runner/llm-clarification.test.ts +91 -106
package/src/runner/llm-executor.test.ts +217 -1181
package/src/runner/memoization.test.ts +0 -1
package/src/runner/recovery-security.test.ts +51 -20
package/src/runner/reflexion.test.ts +55 -18
package/src/runner/standard-tools-integration.test.ts +137 -87
package/src/runner/step-executor.test.ts +36 -80
package/src/runner/step-executor.ts +20 -2
package/src/runner/test-harness.ts +3 -29
package/src/runner/tool-integration.test.ts +122 -73
package/src/runner/workflow-runner.ts +92 -35
package/src/runner/workflow-scheduler.ts +11 -1
package/src/runner/workflow-summary.ts +144 -0
package/src/templates/dynamic-demo.yaml +31 -0
package/src/templates/scaffolding/decompose-problem.yaml +1 -1
package/src/templates/scaffolding/dynamic-decompose.yaml +39 -0
package/src/utils/auth-manager.test.ts +10 -520
package/src/utils/auth-manager.ts +3 -756
package/src/utils/config-loader.ts +12 -0
package/src/utils/constants.ts +0 -17
package/src/utils/process-sandbox.ts +15 -3
package/src/utils/topo-sort.ts +47 -0
package/src/runner/llm-adapter-runtime.test.ts +0 -209
package/src/runner/llm-adapter.test.ts +0 -1012

package/src/runner/step-executor.ts CHANGED Viewed

@@ -6,6 +6,7 @@ import { ConsoleLogger, type Logger } from '../utils/logger.ts';
 import { executeArtifactStep } from './executors/artifact-executor.ts';
 import { executeBlueprintStep } from './executors/blueprint-executor.ts';
+import { executeDynamicStep } from './executors/dynamic-executor.ts';
 import { executeEngineStepWrapper } from './executors/engine-executor.ts';
 import { executeFileStep } from './executors/file-executor.ts';
 import { executeGitStep } from './executors/git-executor.ts';
@@ -49,7 +50,6 @@ export async function executeStep(
     stepExecutionId,
     artifactRoot,
     redactForStorage,
-    getAdapter,
     executeStep: injectedExecuteStep,
     executeLlmStep: injectedExecuteLlmStep,
   } = options;
@@ -108,7 +108,6 @@ export async function executeStep(
           mcpManager,
           workflowDir,
           abortSignal,
-          getAdapter,
           options.emitEvent,
           options.workflowName
             ? { runId: options.runId, workflow: options.workflowName }
@@ -171,6 +170,25 @@ export async function executeStep(
       case 'git':
         result = await executeGitStep(step, context, logger, abortSignal);
         break;
+      case 'dynamic':
+        result = await executeDynamicStep(
+          step,
+          context,
+          (s, c) => (injectedExecuteStep || executeStep)(s, c, logger, options),
+          logger,
+          {
+            mcpManager,
+            workflowDir,
+            abortSignal,
+            runId,
+            artifactRoot,
+            executeLlmStep: injectedExecuteLlmStep || executeLlmStep,
+            emitEvent: options.emitEvent,
+            workflowName: options.workflowName,
+            db: options.db,
+          }
+        );
+        break;
       default:
         throw new Error(`Unknown step type: ${(step as Step).type}`);
     }

package/src/runner/test-harness.ts CHANGED Viewed

@@ -4,7 +4,7 @@ import { dirname, join, resolve } from 'node:path';
 import { type ExpressionContext, ExpressionEvaluator } from '../expression/evaluator';
 import type { Step, Workflow } from '../parser/schema';
 import { ConsoleLogger, type Logger } from '../utils/logger';
-import type { LLMAdapter, LLMMessage, LLMResponse } from './llm-adapter';
+// Note: LLM mocking is now handled via module mocking of getModel in tests
 import { type StepExecutorOptions, type StepResult, executeStep } from './step-executor';
 import { WorkflowRunner } from './workflow-runner';
@@ -66,7 +66,6 @@ export class TestHarness {
       inputs: this.fixture.inputs,
       secrets: this.fixture.secrets,
       executeStep: this.mockExecuteStep.bind(this),
-      getAdapter: this.getMockAdapter.bind(this),
       // Use memory DB for tests
       dbPath: ':memory:',
     });
@@ -131,7 +130,6 @@ export class TestHarness {
     const result = await executeStep(step, context, logger, {
       ...options,
       executeStep: this.mockExecuteStep.bind(this),
-      getAdapter: this.getMockAdapter.bind(this),
     });
     this.stepResults.set(step.id, {
@@ -151,30 +149,6 @@ export class TestHarness {
     return false;
   }
-  private getMockAdapter(model: string): { adapter: LLMAdapter; resolvedModel: string } {
-    return {
-      resolvedModel: model,
-      adapter: {
-        chat: async (messages: LLMMessage[]) => {
-          const userMessage = messages.find((m) => m.role === 'user')?.content || '';
-          for (const mock of this.llmMocks) {
-            if (userMessage.includes(mock.prompt)) {
-              return {
-                message: {
-                  role: 'assistant',
-                  content:
-                    typeof mock.response === 'string'
-                      ? mock.response
-                      : JSON.stringify(mock.response),
-                },
-              };
-            }
-          }
-          throw new Error(`No LLM mock found for prompt: ${userMessage.substring(0, 100)}...`);
-        },
-      },
-    };
-  }
+  // Note: LLM mocking for test harness is handled via module mocking of llm-adapter
+  // If you need to mock LLM responses, use bun's mock.module() to mock getModel
 }

package/src/runner/tool-integration.test.ts CHANGED Viewed

@@ -1,12 +1,39 @@
-import { afterAll, beforeAll, describe, expect, it, mock } from 'bun:test';
-import { mkdirSync, unlinkSync, writeFileSync } from 'node:fs';
+// Import shared mock setup FIRST (mock.module is in preload, these are the mock references)
+import {
+  type MockLLMResponse,
+  createUnifiedMockModel,
+  mockGetModel,
+  resetLlmMocks,
+  setCurrentChatFn,
+  setupLlmMocks,
+} from './__test__/llm-test-setup';
+import {
+  afterAll,
+  afterEach,
+  beforeAll,
+  beforeEach,
+  describe,
+  expect,
+  it,
+  mock,
+  spyOn,
+} from 'bun:test';
 import { join } from 'node:path';
 import type { ExpressionContext } from '../expression/evaluator';
-import type { LlmStep, Step } from '../parser/schema';
-import { executeLlmStep } from './executors/llm-executor.ts';
-import type { LLMAdapter } from './llm-adapter';
+import * as agentParser from '../parser/agent-parser';
+import type { Agent, LlmStep, Step } from '../parser/schema';
+import { ConfigLoader } from '../utils/config-loader';
 import type { StepResult } from './step-executor';
+// Note: mock.module() for llm-adapter is now handled by the preload file
+// Dynamic import holder
+let executeLlmStep: any;
+// Local chat function wrapper
+let currentChatFn: (messages: any[], options?: any) => Promise<MockLLMResponse>;
 interface MockToolCall {
   function: {
     name: string;
@@ -14,14 +41,9 @@ interface MockToolCall {
 }
 describe('llm-executor with tools and MCP', () => {
-  const agentsDir = join(process.cwd(), '.keystone', 'workflows', 'agents');
-  const agentPath = join(agentsDir, 'tool-test-agent.md');
-  const createMockGetAdapter = (chatFn: LLMAdapter['chat']) => {
-    return (_modelString: string) => ({
-      adapter: { chat: chatFn } as LLMAdapter,
-      resolvedModel: 'gpt-4',
-    });
-  };
+  let resolveAgentPathSpy: ReturnType<typeof spyOn>;
+  let parseAgentSpy: ReturnType<typeof spyOn>;
   const createMockMcpClient = (
     options: {
       tools?: { name: string; description?: string; inputSchema: Record<string, unknown> }[];
@@ -48,43 +70,74 @@ describe('llm-executor with tools and MCP', () => {
     return { getClient };
   };
-  beforeAll(() => {
-    try {
-      mkdirSync(agentsDir, { recursive: true });
-    } catch (e) {
-      // Ignore error
-    }
-    const agentContent = `---
-name: tool-test-agent
-tools:
-  - name: agent-tool
-    execution:
-      id: agent-tool-exec
-      type: shell
-      run: echo "agent tool"
----
-Test system prompt`;
-    writeFileSync(agentPath, agentContent);
+  beforeAll(async () => {
+    mockGetModel.mockResolvedValue(createUnifiedMockModel());
+    // Set up config
+    ConfigLoader.setConfig({
+      providers: {
+        openai: { type: 'openai', package: '@ai-sdk/openai', api_key_env: 'OPENAI_API_KEY' },
+      },
+      default_provider: 'openai',
+      model_mappings: {},
+      storage: { retention_days: 30, redact_secrets_at_rest: true },
+      mcp_servers: {},
+      engines: { allowlist: {}, denylist: [] },
+      concurrency: { default: 10, pools: { llm: 2, shell: 5, http: 10, engine: 2 } },
+      expression: { strict: false },
+    } as any);
+    // Ensure the mock model is set up
+    setupLlmMocks();
+    // Dynamic import AFTER mocks are set up
+    const module = await import('./executors/llm-executor.ts');
+    executeLlmStep = module.executeLlmStep;
+  });
+  beforeEach(() => {
+    resetLlmMocks();
+    // jest.restoreAllMocks();
+    ConfigLoader.clear();
+    // Setup mocks
+    setupLlmMocks();
+    // Mock agent parser to avoid needing actual agent files
+    resolveAgentPathSpy = spyOn(agentParser, 'resolveAgentPath').mockReturnValue('tool-agent.md');
+    parseAgentSpy = spyOn(agentParser, 'parseAgent').mockReturnValue({
+      name: 'tool-test-agent',
+      systemPrompt: 'Test system prompt',
+      tools: [
+        {
+          name: 'agent-tool',
+          parameters: { type: 'object', properties: {} },
+          execution: { id: 'agent-tool-exec', type: 'shell', run: 'echo "agent tool"' },
+        },
+      ],
+      model: 'gpt-4o',
+    } as unknown as Agent);
+  });
+  afterEach(() => {
+    resolveAgentPathSpy?.mockRestore();
+    parseAgentSpy?.mockRestore();
   });
   afterAll(() => {
-    try {
-      unlinkSync(agentPath);
-    } catch (e) {
-      // Ignore error
-    }
+    ConfigLoader.clear();
   });
   it('should merge tools from agent, step and MCP', async () => {
     let capturedTools: MockToolCall[] = [];
-    const mockChat = mock(async (_messages: unknown, options: unknown) => {
+    currentChatFn = async (_messages: unknown, options: unknown) => {
       capturedTools = (options as { tools?: MockToolCall[] })?.tools || [];
       return {
         message: { role: 'assistant', content: 'Final response' },
       };
-    }) as unknown as LLMAdapter['chat'];
-    const getAdapter = createMockGetAdapter(mockChat);
+    };
+    setCurrentChatFn(currentChatFn as any);
     const mockClient = createMockMcpClient({
       tools: [
@@ -109,6 +162,7 @@ Test system prompt`;
       tools: [
         {
           name: 'step-tool',
+          parameters: { type: 'object', properties: {} },
           execution: { id: 'step-tool-exec', type: 'shell', run: 'echo step' },
         },
       ],
@@ -125,8 +179,7 @@ Test system prompt`;
       undefined,
       mcpManager as unknown as { getClient: () => Promise<unknown> },
       undefined,
-      undefined,
-      getAdapter
+      undefined
     );
     const toolNames = capturedTools.map((t) => t.function.name);
@@ -136,29 +189,21 @@ Test system prompt`;
   });
   it('should execute MCP tool when called', async () => {
-    let chatCount = 0;
-    const mockChat = mock(async () => {
-      chatCount++;
-      if (chatCount === 1) {
-        return {
-          message: {
-            role: 'assistant',
-            tool_calls: [
-              {
-                id: 'call-1',
-                type: 'function',
-                function: { name: 'mcp-tool', arguments: '{}' },
-              },
-            ],
-          },
-        };
-      }
+    currentChatFn = async () => {
       return {
-        message: { role: 'assistant', content: 'Done' },
+        message: {
+          role: 'assistant',
+          tool_calls: [
+            {
+              id: 'call-1',
+              type: 'function',
+              function: { name: 'mcp-tool', arguments: '{}' },
+            },
+          ],
+        },
       };
-    }) as unknown as LLMAdapter['chat'];
-    const getAdapter = createMockGetAdapter(mockChat);
+    };
+    setCurrentChatFn(currentChatFn as any);
     const mockCallTool = mock(async () => ({ result: 'mcp success' }));
     const mockClient = createMockMcpClient({
@@ -181,25 +226,29 @@ Test system prompt`;
       agent: 'tool-test-agent',
       prompt: 'test',
       needs: [],
-      maxIterations: 10,
+      maxIterations: 2, // Give room for tool execution
       mcpServers: [{ name: 'test-mcp', command: 'node', args: ['-e', ''] }],
     };
     const context: ExpressionContext = { inputs: {}, steps: {} };
     const executeStepFn = async () => ({ status: 'success' as const, output: {} });
-    await executeLlmStep(
-      step,
-      context,
-      executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
-      undefined,
-      mcpManager as unknown as { getClient: () => Promise<unknown> },
-      undefined,
-      undefined,
-      getAdapter
-    );
+    // The execution may hit max iterations, but the tool should still be called
+    try {
+      await executeLlmStep(
+        step,
+        context,
+        executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
+        undefined,
+        mcpManager as unknown as { getClient: () => Promise<unknown> },
+        undefined,
+        undefined
+      );
+    } catch (e) {
+      // May throw max iterations error
+    }
+    // Verify MCP tool was invoked
     expect(mockCallTool).toHaveBeenCalledWith('mcp-tool', {});
-    expect(chatCount).toBe(2);
   });
 });

package/src/runner/workflow-runner.ts CHANGED Viewed

@@ -2,8 +2,9 @@ import { createHash, randomUUID } from 'node:crypto';
 import * as fs from 'node:fs';
 import * as path from 'node:path';
 import { dirname, join } from 'node:path';
+import { embed, generateText } from 'ai';
 import { MemoryDb } from '../db/memory-db.ts';
-import { type RunStatus, WorkflowDb } from '../db/workflow-db.ts';
+import { type RunStatus, type StepExecution, WorkflowDb } from '../db/workflow-db.ts';
 import type { ExpressionContext } from '../expression/evaluator.ts';
 import { ExpressionEvaluator } from '../expression/evaluator.ts';
 import type { LlmStep, PlanStep, Step, Workflow, WorkflowStep } from '../parser/schema.ts';
@@ -18,8 +19,9 @@ import { formatSchemaErrors, validateJsonSchema } from '../utils/schema-validato
 import { WorkflowRegistry } from '../utils/workflow-registry.ts';
 import type { EventHandler, StepPhase, WorkflowEvent } from './events.ts';
 import { ForeachExecutor } from './executors/foreach-executor.ts';
-import { type RunnerFactory, executeSubWorkflow } from './executors/subworkflow-executor.ts';
-import { type LLMMessage, getAdapter } from './llm-adapter.ts';
+import type { RunnerFactory } from './executors/subworkflow-executor.ts';
+import { executeSubWorkflow } from './executors/subworkflow-executor.ts';
+import { type LLMMessage, getEmbeddingModel, getModel } from './llm-adapter.ts';
 import { MCPManager } from './mcp-manager.ts';
 import { ResourcePoolManager } from './resource-pool.ts';
 import { withRetry } from './retry.ts';
@@ -35,6 +37,7 @@ import {
 import { withTimeout } from './timeout.ts';
 import { WorkflowScheduler } from './workflow-scheduler.ts';
 import { type ForeachStepContext, type StepContext, WorkflowState } from './workflow-state.ts';
+import { formatTimingSummary, formatTokenUsageSummary } from './workflow-summary.ts';
 /**
  * A logger wrapper that redacts secrets from all log messages
@@ -111,7 +114,7 @@ export interface RunOptions {
   dryRun?: boolean;
   debug?: boolean;
   dedup?: boolean;
-  getAdapter?: typeof getAdapter;
   executeStep?: typeof executeStep;
   executeLlmStep?: typeof import('./executors/llm-executor.ts').executeLlmStep;
   depth?: number;
@@ -140,7 +143,9 @@ export class WorkflowRunner {
   private _runId!: string;
   private state!: WorkflowState;
   private scheduler!: WorkflowScheduler;
+  private stepMap: Map<string, Step> = new Map();
   private inputs!: Record<string, unknown>;
   private secretManager: SecretManager;
   private contextBuilder!: ContextBuilder;
   private validator!: WorkflowValidator;
@@ -161,6 +166,7 @@ export class WorkflowRunner {
   private abortController = new AbortController();
   private resourcePool!: ResourcePoolManager;
   private restored = false;
+  private stepEvents: WorkflowEvent[] = [];
   /**
    * Get the abort signal for cancellation checks
@@ -199,7 +205,9 @@ export class WorkflowRunner {
   constructor(workflow: Workflow, options: RunOptions = {}) {
     this.workflow = workflow;
+    this.stepMap = new Map(workflow.steps.map((s) => [s.id, s]));
     this.options = options;
     this.depth = options.depth || 0;
     if (this.depth > WorkflowRunner.MAX_RECURSION_DEPTH) {
@@ -544,7 +552,7 @@ export class WorkflowRunner {
     const data = {
       type: step.type,
       inputs,
-      env: step.env,
+      env: 'env' in step ? step.env : undefined,
       version: 2, // Cache versioning
     };
@@ -601,7 +609,8 @@ export class WorkflowRunner {
     if (!step.if) return false;
     try {
-      return !this.evaluateCondition(step.if, context);
+      if (typeof step.if === 'boolean') return !step.if;
+      return !this.evaluateCondition(step.if as string, context);
     } catch (error) {
       throw new Error(
         `Failed to evaluate condition for step "${step.id}": ${error instanceof Error ? error.message : String(error)}`
@@ -911,7 +920,6 @@ export class WorkflowRunner {
         stepExecutionId: stepExecId,
         artifactRoot: this.options.artifactRoot,
         redactForStorage: this.secretManager.redactForStorage.bind(this.secretManager),
-        getAdapter: this.options.getAdapter,
         executeStep: this.options.executeStep || executeStep,
         executeLlmStep: this.options.executeLlmStep,
         emitEvent: this.emitEvent.bind(this),
@@ -1272,7 +1280,7 @@ export class WorkflowRunner {
             };
             return this.executeStepInternal(
-              newStep,
+              newStep as Step,
               nextContext,
               stepExecId,
               idempotencyContextForRetry
@@ -1321,7 +1329,7 @@ export class WorkflowRunner {
             };
             return this.executeStepInternal(
-              newStep,
+              newStep as Step,
               nextContext,
               stepExecId,
               idempotencyContextForRetry
@@ -1512,32 +1520,48 @@ Do not change the 'id' or 'type' or 'auto_heal' fields.
     result: StepResult,
     _context: ExpressionContext
   ): Promise<void> {
-    const getAdapterFn = this.options.getAdapter || getAdapter;
-    const { adapter } = getAdapterFn('local'); // Default for embedding
-    if (!adapter.embed) return;
+    const config = ConfigLoader.load();
+    const modelName = config.embedding_model;
-    // Combine input context (if relevant) and output
-    // For now, let's keep it simple: "Step: ID\nGoal: description\nOutput: result"
+    if (!modelName) return;
-    // We can try to construct a summary of what happened
-    let textToEmbed = `Step ID: ${step.id} (${step.type})\n`;
+    // Resolve dimension
+    const providerName = ConfigLoader.getProviderForModel(modelName);
+    const providerConfig = config.providers[providerName];
+    const dimension = providerConfig?.embedding_dimension || config.embedding_dimension || 384;
-    if (step.type === 'llm') {
-      textToEmbed += `Task Context/Prompt:\n${(step as LlmStep).prompt}\n\n`;
-    } else if (step.type === 'shell') {
-      textToEmbed += `Command:\n${(step as unknown as { run: string }).run}\n\n`;
+    // We reuse or create a specialized learning memory DB if needed,
+    // but here we ensure the dimension is passed correctly.
+    // If this.memoryDb is already shared, it might need to be re-initialized if it's the wrong dimension.
+    // For now, we assume the shared memoryDb in runner is initialized with correct dimension or we pass it.
+    const memoryDb = this.memoryDb;
+    // Combine input context (if relevant) and output
+    // For now, let's keep it simple: "Step: ID\nGoal: description\nOutput: result"
+    let textToEmbed = `Step: ${step.id}\n`;
+    if (step.type === 'llm' || step.type === 'plan' || step.type === 'dynamic') {
+      const goalOrPrompt = 'goal' in step ? step.goal : 'prompt' in step ? step.prompt : '';
+      textToEmbed += `Goal: ${goalOrPrompt}\n`;
     }
     textToEmbed += `Successful Outcome:\n${JSON.stringify(result.output, null, 2)}`;
-    const embedding = await adapter.embed(textToEmbed, 'local');
-    await this.memoryDb.store(textToEmbed, embedding, {
-      stepId: step.id,
-      workflow: this.workflow.name,
-      timestamp: new Date().toISOString(),
-    });
+    try {
+      const model = await getEmbeddingModel(modelName);
+      const { embedding } = await embed({ model, value: textToEmbed });
-    this.logger.log(`  ✨ Learned from step ${step.id}`);
+      await memoryDb.store(textToEmbed, embedding, {
+        stepId: step.id,
+        workflow: this.workflow.name,
+        timestamp: new Date().toISOString(),
+      });
+      this.logger.log(`  ✨ Learned from step ${step.id}`);
+    } catch (err) {
+      this.logger.warn(
+        `  ⚠ Failed to embed/store step learning: ${err instanceof Error ? err.message : String(err)}`
+      );
+    }
   }
   /**
@@ -1582,12 +1606,14 @@ Please provide the fixed step configuration as JSON.`;
     // Use the default model (gpt-4o) or configured default for the Mechanic
     // We'll use gpt-4o as a strong default for this reasoning task
-    const getAdapterFn = this.options.getAdapter || getAdapter;
-    const { adapter } = getAdapterFn('gpt-4o');
+    const model = await getModel('gpt-4o');
-    const response = await adapter.chat(messages);
+    const { text } = await generateText({
+      model,
+      messages: messages as any, // Cast to AI SDK messages
+    });
-    return extractJson(response.message.content || '{}') as Partial<Step>;
+    return extractJson(text || '{}') as Partial<Step>;
   }
   /**
@@ -1770,7 +1796,6 @@ Revise the output to address the feedback. Return only the corrected output.`;
         runId: this.runId,
         artifactRoot: this.options.artifactRoot,
         redactForStorage: this.secretManager.redactForStorage.bind(this.secretManager),
-        getAdapter: this.options.getAdapter,
         executeStep: this.options.executeStep || executeStep,
         emitEvent: this.emitEvent.bind(this),
         workflowName: this.workflow.name,
@@ -1834,7 +1859,7 @@ Revise the output to address the feedback. Return only the corrected output.`;
         runId: this.runId,
         artifactRoot: this.options.artifactRoot,
         redactForStorage: this.secretManager.redactForStorage.bind(this.secretManager),
-        getAdapter: this.options.getAdapter,
         executeStep: this.options.executeStep || executeStep,
         emitEvent: this.emitEvent.bind(this),
         workflowName: this.workflow.name,
@@ -1951,6 +1976,12 @@ Revise the output to address the feedback. Return only the corrected output.`;
     try {
       const redactor = this.secretManager.getRedactor();
       const redacted = redactor.redactValue(event) as WorkflowEvent;
+      // Track step.end events for summary generation
+      if (redacted.type === 'step.end') {
+        this.stepEvents.push(redacted);
+      }
       if (redacted.type === 'llm.thought') {
         void this.db
           .storeThoughtEvent(
@@ -2196,6 +2227,7 @@ Revise the output to address the feedback. Return only the corrected output.`;
                 this.logger.log(`[${stepIndex}/${totalSteps}] ✓ Step ${step.id} completed\n`);
               } catch (error) {
                 this.emitStepEnd(step, 'main', startedAt, error, stepIndex, totalSteps);
+                this.scheduler.markStepFailed(stepId);
                 throw error;
               } finally {
                 if (typeof release === 'function') {
@@ -2222,7 +2254,6 @@ Revise the output to address the feedback. Return only the corrected output.`;
           // 3. Wait for at least one step to finish before checking again
           if (runningPromises.size > 0) {
             await Promise.race(runningPromises.values());
-            // Yield to event loop to prevent tight loop if multiple steps finish in same tick
             await Bun.sleep(0);
           }
         }
@@ -2243,7 +2274,18 @@ Revise the output to address the feedback. Return only the corrected output.`;
         throw error;
       }
+      // Final check for failed steps before success update
+      for (const [id, ctx] of this.state.entries()) {
+        if (ctx.status === StepStatus.FAILED) {
+          const step = this.stepMap.get(id);
+          if (!step?.allowFailure) {
+            throw new Error(ctx.error || `Step ${id} failed`);
+          }
+        }
+      }
       // Evaluate outputs
       const outputs = this.evaluateOutputs();
       // Mark run as complete
@@ -2253,7 +2295,22 @@ Revise the output to address the feedback. Return only the corrected output.`;
         this.secretManager.redactForStorage(outputs)
       );
-      this.logger.log('✨ Workflow completed successfully!\n');
+      this.logger.log('✨ Workflow completed successfully!');
+      // Display timing summary
+      const timingSummary = formatTimingSummary(this.stepEvents);
+      if (timingSummary) {
+        this.logger.log(timingSummary);
+      }
+      // Display token usage summary
+      const steps = await this.db.getStepsByRun(this.runId);
+      const tokenSummary = formatTokenUsageSummary(steps);
+      if (tokenSummary) {
+        this.logger.log(tokenSummary);
+      }
+      this.logger.log('');
       completionEvent = {
         type: 'workflow.complete',