npm - keystone-cli - Versions diffs - 1.3.0 → 2.0.1 - Mend

keystone-cli 1.3.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/README.md +127 -140
package/package.json +6 -3
package/src/cli.ts +54 -369
package/src/commands/init.ts +15 -29
package/src/db/memory-db.test.ts +45 -0
package/src/db/memory-db.ts +47 -21
package/src/db/sqlite-setup.ts +26 -3
package/src/db/workflow-db.ts +12 -5
package/src/parser/config-schema.ts +17 -13
package/src/parser/schema.ts +4 -2
package/src/runner/__test__/llm-mock-setup.ts +173 -0
package/src/runner/__test__/llm-test-setup.ts +271 -0
package/src/runner/engine-executor.test.ts +25 -18
package/src/runner/executors/blueprint-executor.ts +0 -1
package/src/runner/executors/dynamic-executor.ts +11 -6
package/src/runner/executors/engine-executor.ts +5 -1
package/src/runner/executors/llm-executor.ts +502 -1033
package/src/runner/executors/memory-executor.ts +35 -19
package/src/runner/executors/plan-executor.ts +0 -1
package/src/runner/executors/types.ts +4 -4
package/src/runner/llm-adapter.integration.test.ts +151 -0
package/src/runner/llm-adapter.ts +270 -1398
package/src/runner/llm-clarification.test.ts +91 -106
package/src/runner/llm-executor.test.ts +217 -1181
package/src/runner/memoization.test.ts +0 -1
package/src/runner/recovery-security.test.ts +51 -20
package/src/runner/reflexion.test.ts +55 -18
package/src/runner/standard-tools-integration.test.ts +137 -87
package/src/runner/step-executor.test.ts +36 -80
package/src/runner/step-executor.ts +0 -2
package/src/runner/test-harness.ts +3 -29
package/src/runner/tool-integration.test.ts +122 -73
package/src/runner/workflow-runner.ts +110 -49
package/src/runner/workflow-scheduler.ts +11 -1
package/src/runner/workflow-summary.ts +144 -0
package/src/utils/auth-manager.test.ts +10 -520
package/src/utils/auth-manager.ts +3 -756
package/src/utils/config-loader.ts +12 -0
package/src/utils/constants.ts +0 -17
package/src/utils/process-sandbox.ts +15 -3
package/src/runner/llm-adapter-runtime.test.ts +0 -209
package/src/runner/llm-adapter.test.ts +0 -1012

package/src/runner/workflow-runner.ts CHANGED Viewed

@@ -2,8 +2,9 @@ import { createHash, randomUUID } from 'node:crypto';
 import * as fs from 'node:fs';
 import * as path from 'node:path';
 import { dirname, join } from 'node:path';
+import { embed, generateText } from 'ai';
 import { MemoryDb } from '../db/memory-db.ts';
-import { type RunStatus, WorkflowDb } from '../db/workflow-db.ts';
+import { type RunStatus, type StepExecution, WorkflowDb } from '../db/workflow-db.ts';
 import type { ExpressionContext } from '../expression/evaluator.ts';
 import { ExpressionEvaluator } from '../expression/evaluator.ts';
 import type { LlmStep, PlanStep, Step, Workflow, WorkflowStep } from '../parser/schema.ts';
@@ -18,8 +19,9 @@ import { formatSchemaErrors, validateJsonSchema } from '../utils/schema-validato
 import { WorkflowRegistry } from '../utils/workflow-registry.ts';
 import type { EventHandler, StepPhase, WorkflowEvent } from './events.ts';
 import { ForeachExecutor } from './executors/foreach-executor.ts';
-import { type RunnerFactory, executeSubWorkflow } from './executors/subworkflow-executor.ts';
-import { type LLMMessage, getAdapter } from './llm-adapter.ts';
+import type { RunnerFactory } from './executors/subworkflow-executor.ts';
+import { executeSubWorkflow } from './executors/subworkflow-executor.ts';
+import { type LLMMessage, getEmbeddingModel, getModel } from './llm-adapter.ts';
 import { MCPManager } from './mcp-manager.ts';
 import { ResourcePoolManager } from './resource-pool.ts';
 import { withRetry } from './retry.ts';
@@ -35,6 +37,7 @@ import {
 import { withTimeout } from './timeout.ts';
 import { WorkflowScheduler } from './workflow-scheduler.ts';
 import { type ForeachStepContext, type StepContext, WorkflowState } from './workflow-state.ts';
+import { formatTimingSummary, formatTokenUsageSummary } from './workflow-summary.ts';
 /**
  * A logger wrapper that redacts secrets from all log messages
@@ -43,7 +46,7 @@ class RedactingLogger implements Logger {
   constructor(
     private inner: Logger,
     private redactor: Redactor
-  ) {}
+  ) { }
   log(msg: string): void {
     this.inner.log(this.redactor.redact(msg));
@@ -111,7 +114,7 @@ export interface RunOptions {
   dryRun?: boolean;
   debug?: boolean;
   dedup?: boolean;
-  getAdapter?: typeof getAdapter;
   executeStep?: typeof executeStep;
   executeLlmStep?: typeof import('./executors/llm-executor.ts').executeLlmStep;
   depth?: number;
@@ -140,7 +143,9 @@ export class WorkflowRunner {
   private _runId!: string;
   private state!: WorkflowState;
   private scheduler!: WorkflowScheduler;
+  private stepMap: Map<string, Step> = new Map();
   private inputs!: Record<string, unknown>;
   private secretManager: SecretManager;
   private contextBuilder!: ContextBuilder;
   private validator!: WorkflowValidator;
@@ -161,6 +166,7 @@ export class WorkflowRunner {
   private abortController = new AbortController();
   private resourcePool!: ResourcePoolManager;
   private restored = false;
+  private stepEvents: WorkflowEvent[] = [];
   /**
    * Get the abort signal for cancellation checks
@@ -187,7 +193,7 @@ export class WorkflowRunner {
     if (parentSignal.aborted) {
       controller.abort();
-      return { controller, cleanup: () => {} };
+      return { controller, cleanup: () => { } };
     }
     parentSignal.addEventListener('abort', onAbort, { once: true });
@@ -199,7 +205,9 @@ export class WorkflowRunner {
   constructor(workflow: Workflow, options: RunOptions = {}) {
     this.workflow = workflow;
+    this.stepMap = new Map(workflow.steps.map((s) => [s.id, s]));
     this.options = options;
     this.depth = options.depth || 0;
     if (this.depth > WorkflowRunner.MAX_RECURSION_DEPTH) {
@@ -544,7 +552,7 @@ export class WorkflowRunner {
     const data = {
       type: step.type,
       inputs,
-      env: step.env,
+      env: 'env' in step ? step.env : undefined,
       version: 2, // Cache versioning
     };
@@ -601,7 +609,8 @@ export class WorkflowRunner {
     if (!step.if) return false;
     try {
-      return !this.evaluateCondition(step.if, context);
+      if (typeof step.if === 'boolean') return !step.if;
+      return !this.evaluateCondition(step.if as string, context);
     } catch (error) {
       throw new Error(
         `Failed to evaluate condition for step "${step.id}": ${error instanceof Error ? error.message : String(error)}`
@@ -807,11 +816,11 @@ export class WorkflowRunner {
     const idempotencyContextForRetry =
       idempotencyClaimed && scopedIdempotencyKey
         ? {
-            rawKey: idempotencyKey || scopedIdempotencyKey,
-            scopedKey: scopedIdempotencyKey,
-            ttlSeconds: idempotencyTtlSeconds,
-            claimed: true,
-          }
+          rawKey: idempotencyKey || scopedIdempotencyKey,
+          scopedKey: scopedIdempotencyKey,
+          ttlSeconds: idempotencyTtlSeconds,
+          claimed: true,
+        }
         : undefined;
     let stepToExecute = step;
@@ -911,7 +920,6 @@ export class WorkflowRunner {
         stepExecutionId: stepExecId,
         artifactRoot: this.options.artifactRoot,
         redactForStorage: this.secretManager.redactForStorage.bind(this.secretManager),
-        getAdapter: this.options.getAdapter,
         executeStep: this.options.executeStep || executeStep,
         executeLlmStep: this.options.executeLlmStep,
         emitEvent: this.emitEvent.bind(this),
@@ -924,9 +932,9 @@ export class WorkflowRunner {
         try {
           const outputForValidation =
             stepToExecute.type === 'engine' &&
-            result.output &&
-            typeof result.output === 'object' &&
-            'summary' in result.output
+              result.output &&
+              typeof result.output === 'object' &&
+              'summary' in result.output
               ? (result.output as { summary?: unknown }).summary
               : result.output;
           this.validator.validateSchema(
@@ -1272,7 +1280,7 @@ export class WorkflowRunner {
             };
             return this.executeStepInternal(
-              newStep,
+              newStep as Step,
               nextContext,
               stepExecId,
               idempotencyContextForRetry
@@ -1321,7 +1329,7 @@ export class WorkflowRunner {
             };
             return this.executeStepInternal(
-              newStep,
+              newStep as Step,
               nextContext,
               stepExecId,
               idempotencyContextForRetry
@@ -1512,32 +1520,48 @@ Do not change the 'id' or 'type' or 'auto_heal' fields.
     result: StepResult,
     _context: ExpressionContext
   ): Promise<void> {
-    const getAdapterFn = this.options.getAdapter || getAdapter;
-    const { adapter } = getAdapterFn('local'); // Default for embedding
-    if (!adapter.embed) return;
+    const config = ConfigLoader.load();
+    const modelName = config.embedding_model;
-    // Combine input context (if relevant) and output
-    // For now, let's keep it simple: "Step: ID\nGoal: description\nOutput: result"
+    if (!modelName) return;
+    // Resolve dimension
+    const providerName = ConfigLoader.getProviderForModel(modelName);
+    const providerConfig = config.providers[providerName];
+    const dimension = providerConfig?.embedding_dimension || config.embedding_dimension || 384;
-    // We can try to construct a summary of what happened
-    let textToEmbed = `Step ID: ${step.id} (${step.type})\n`;
+    // We reuse or create a specialized learning memory DB if needed,
+    // but here we ensure the dimension is passed correctly.
+    // If this.memoryDb is already shared, it might need to be re-initialized if it's the wrong dimension.
+    // For now, we assume the shared memoryDb in runner is initialized with correct dimension or we pass it.
+    const memoryDb = this.memoryDb;
-    if (step.type === 'llm') {
-      textToEmbed += `Task Context/Prompt:\n${(step as LlmStep).prompt}\n\n`;
-    } else if (step.type === 'shell') {
-      textToEmbed += `Command:\n${(step as unknown as { run: string }).run}\n\n`;
+    // Combine input context (if relevant) and output
+    // For now, let's keep it simple: "Step: ID\nGoal: description\nOutput: result"
+    let textToEmbed = `Step: ${step.id}\n`;
+    if (step.type === 'llm' || step.type === 'plan' || step.type === 'dynamic') {
+      const goalOrPrompt = 'goal' in step ? step.goal : 'prompt' in step ? step.prompt : '';
+      textToEmbed += `Goal: ${goalOrPrompt}\n`;
     }
     textToEmbed += `Successful Outcome:\n${JSON.stringify(result.output, null, 2)}`;
-    const embedding = await adapter.embed(textToEmbed, 'local');
-    await this.memoryDb.store(textToEmbed, embedding, {
-      stepId: step.id,
-      workflow: this.workflow.name,
-      timestamp: new Date().toISOString(),
-    });
+    try {
+      const model = await getEmbeddingModel(modelName);
+      const { embedding } = await embed({ model, value: textToEmbed });
+      await memoryDb.store(textToEmbed, embedding, {
+        stepId: step.id,
+        workflow: this.workflow.name,
+        timestamp: new Date().toISOString(),
+      });
-    this.logger.log(`  ✨ Learned from step ${step.id}`);
+      this.logger.log(`  ✨ Learned from step ${step.id}`);
+    } catch (err) {
+      this.logger.warn(
+        `  ⚠ Failed to embed/store step learning: ${err instanceof Error ? err.message : String(err)}`
+      );
+    }
   }
   /**
@@ -1582,12 +1606,14 @@ Please provide the fixed step configuration as JSON.`;
     // Use the default model (gpt-4o) or configured default for the Mechanic
     // We'll use gpt-4o as a strong default for this reasoning task
-    const getAdapterFn = this.options.getAdapter || getAdapter;
-    const { adapter } = getAdapterFn('gpt-4o');
+    const model = await getModel('gpt-4o');
-    const response = await adapter.chat(messages);
+    const { text } = await generateText({
+      model,
+      messages: messages as any, // Cast to AI SDK messages
+    });
-    return extractJson(response.message.content || '{}') as Partial<Step>;
+    return extractJson(text || '{}') as Partial<Step>;
   }
   /**
@@ -1770,7 +1796,6 @@ Revise the output to address the feedback. Return only the corrected output.`;
         runId: this.runId,
         artifactRoot: this.options.artifactRoot,
         redactForStorage: this.secretManager.redactForStorage.bind(this.secretManager),
-        getAdapter: this.options.getAdapter,
         executeStep: this.options.executeStep || executeStep,
         emitEvent: this.emitEvent.bind(this),
         workflowName: this.workflow.name,
@@ -1834,7 +1859,7 @@ Revise the output to address the feedback. Return only the corrected output.`;
         runId: this.runId,
         artifactRoot: this.options.artifactRoot,
         redactForStorage: this.secretManager.redactForStorage.bind(this.secretManager),
-        getAdapter: this.options.getAdapter,
         executeStep: this.options.executeStep || executeStep,
         emitEvent: this.emitEvent.bind(this),
         workflowName: this.workflow.name,
@@ -1951,6 +1976,12 @@ Revise the output to address the feedback. Return only the corrected output.`;
     try {
       const redactor = this.secretManager.getRedactor();
       const redacted = redactor.redactValue(event) as WorkflowEvent;
+      // Track step.end events for summary generation
+      if (redacted.type === 'step.end') {
+        this.stepEvents.push(redacted);
+      }
       if (redacted.type === 'llm.thought') {
         void this.db
           .storeThoughtEvent(
@@ -2045,12 +2076,16 @@ Revise the output to address the feedback. Return only the corrected output.`;
     this.logger.log(`\n🏛️  ${isResume ? 'Resuming' : 'Running'} workflow: ${this.workflow.name}`);
     this.logger.log(`Run ID: ${this.runId}`);
-    this.logger.log(
-      '\n⚠️  Security Warning: Only run workflows from trusted sources.\n' +
+    const config = ConfigLoader.load();
+    if (!config.logging?.suppress_security_warning) {
+      this.logger.log(
+        '\n⚠️  Security Warning: Only run workflows from trusted sources.\n' +
         '   Workflows can execute arbitrary shell commands and access your environment.\n'
-    );
+      );
+    }
-    this.secretManager.redactAtRest = ConfigLoader.load().storage?.redact_secrets_at_rest ?? true;
+    this.secretManager.redactAtRest = config.storage?.redact_secrets_at_rest ?? true;
     // Apply defaults and validate inputs
     const validated = this.validator.applyDefaultsAndValidate();
@@ -2196,6 +2231,7 @@ Revise the output to address the feedback. Return only the corrected output.`;
                 this.logger.log(`[${stepIndex}/${totalSteps}] ✓ Step ${step.id} completed\n`);
               } catch (error) {
                 this.emitStepEnd(step, 'main', startedAt, error, stepIndex, totalSteps);
+                this.scheduler.markStepFailed(stepId);
                 throw error;
               } finally {
                 if (typeof release === 'function') {
@@ -2222,7 +2258,6 @@ Revise the output to address the feedback. Return only the corrected output.`;
           // 3. Wait for at least one step to finish before checking again
           if (runningPromises.size > 0) {
             await Promise.race(runningPromises.values());
-            // Yield to event loop to prevent tight loop if multiple steps finish in same tick
             await Bun.sleep(0);
           }
         }
@@ -2243,7 +2278,18 @@ Revise the output to address the feedback. Return only the corrected output.`;
         throw error;
       }
+      // Final check for failed steps before success update
+      for (const [id, ctx] of this.state.entries()) {
+        if (ctx.status === StepStatus.FAILED) {
+          const step = this.stepMap.get(id);
+          if (!step?.allowFailure) {
+            throw new Error(ctx.error || `Step ${id} failed`);
+          }
+        }
+      }
       // Evaluate outputs
       const outputs = this.evaluateOutputs();
       // Mark run as complete
@@ -2253,7 +2299,22 @@ Revise the output to address the feedback. Return only the corrected output.`;
         this.secretManager.redactForStorage(outputs)
       );
-      this.logger.log('✨ Workflow completed successfully!\n');
+      this.logger.log('✨ Workflow completed successfully!');
+      // Display timing summary
+      const timingSummary = formatTimingSummary(this.stepEvents);
+      if (timingSummary) {
+        this.logger.log(timingSummary);
+      }
+      // Display token usage summary
+      const steps = await this.db.getStepsByRun(this.runId);
+      const tokenSummary = formatTokenUsageSummary(steps);
+      if (tokenSummary) {
+        this.logger.log(tokenSummary);
+      }
+      this.logger.log('');
       completionEvent = {
         type: 'workflow.complete',

package/src/runner/workflow-scheduler.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import { WorkflowParser } from '../parser/workflow-parser.ts';
 export class WorkflowScheduler {
   private executionOrder: string[];
   private pendingSteps: Set<string>;
+  private runningSteps: Set<string>;
   private completedSteps: Set<string>;
   private stepMap: Map<string, Step>;
@@ -20,6 +21,7 @@ export class WorkflowScheduler {
     // Remaining steps to execute
     const remaining = this.executionOrder.filter((id) => !this.completedSteps.has(id));
     this.pendingSteps = new Set(remaining);
+    this.runningSteps = new Set();
   }
   public getExecutionOrder(): string[] {
@@ -31,12 +33,13 @@ export class WorkflowScheduler {
   }
   public isComplete(): boolean {
-    return this.pendingSteps.size === 0;
+    return this.pendingSteps.size === 0 && this.runningSteps.size === 0;
   }
   public markStepComplete(stepId: string): void {
     this.completedSteps.add(stepId);
     this.pendingSteps.delete(stepId);
+    this.runningSteps.delete(stepId);
   }
   public getRunnableSteps(runningCount: number, globalConcurrencyLimit: number): Step[] {
@@ -60,6 +63,13 @@ export class WorkflowScheduler {
   public startStep(stepId: string): void {
     this.pendingSteps.delete(stepId);
+    this.runningSteps.add(stepId);
+  }
+  public markStepFailed(stepId: string): void {
+    this.runningSteps.delete(stepId);
+    // Note: We don't add back to pending; it's failed.
+    // Resume will handle restoring state and scheduler will see it's not completed.
   }
   private isStepReady(step: Step): boolean {

package/src/runner/workflow-summary.ts ADDED Viewed

@@ -0,0 +1,144 @@
+import type { StepExecution } from '../db/workflow-db';
+import type { WorkflowEvent } from './events';
+/**
+ * Format a duration in milliseconds to a human-readable string
+ */
+function formatDuration(ms: number): string {
+  if (ms < 1000) {
+    return `${Math.round(ms)}ms`;
+  }
+  return `${(ms / 1000).toFixed(1)}s`;
+}
+/**
+ * Format a number with comma separators
+ */
+function formatNumber(num: number): string {
+  return num.toLocaleString('en-US');
+}
+interface StepTiming {
+  stepId: string;
+  stepType: string;
+  durationMs: number;
+}
+/**
+ * Extract timing information from step.end events
+ */
+export function extractStepTimings(events: WorkflowEvent[]): StepTiming[] {
+  const timings: StepTiming[] = [];
+  for (const event of events) {
+    if (event.type === 'step.end' && event.phase === 'main' && event.durationMs !== undefined) {
+      timings.push({
+        stepId: event.stepId,
+        stepType: event.stepType,
+        durationMs: event.durationMs,
+      });
+    }
+  }
+  return timings;
+}
+/**
+ * Format timing summary from step events
+ */
+export function formatTimingSummary(events: WorkflowEvent[]): string | null {
+  const timings = extractStepTimings(events);
+  if (timings.length === 0) {
+    return null;
+  }
+  const totalMs = timings.reduce((sum, t) => sum + t.durationMs, 0);
+  if (totalMs === 0) {
+    return null;
+  }
+  // Sort by duration descending
+  const sorted = timings.sort((a, b) => b.durationMs - a.durationMs);
+  const lines: string[] = [];
+  lines.push(`\n⏱️  Timing Summary (total: ${formatDuration(totalMs)})`);
+  for (const timing of sorted) {
+    const percentage = Math.round((timing.durationMs / totalMs) * 100);
+    lines.push(`  • ${timing.stepId}: ${formatDuration(timing.durationMs)} (${percentage}%)`);
+  }
+  return lines.join('\n');
+}
+interface TokenUsage {
+  promptTokens: number;
+  completionTokens: number;
+  totalTokens: number;
+}
+/**
+ * Extract and aggregate token usage from step executions
+ */
+export function aggregateTokenUsage(steps: StepExecution[]): TokenUsage | null {
+  let promptTokens = 0;
+  let completionTokens = 0;
+  let totalTokens = 0;
+  let hasUsage = false;
+  for (const step of steps) {
+    if (step.usage) {
+      try {
+        const usage = JSON.parse(step.usage);
+        if (usage.prompt_tokens !== undefined) {
+          promptTokens += usage.prompt_tokens || 0;
+          completionTokens += usage.completion_tokens || 0;
+          totalTokens += usage.total_tokens || 0;
+          hasUsage = true;
+        }
+      } catch {
+        // Ignore parse errors
+      }
+    }
+  }
+  return hasUsage ? { promptTokens, completionTokens, totalTokens } : null;
+}
+/**
+ * Estimate cost based on token usage
+ * Uses rough estimates for common models (GPT-4o pricing as baseline)
+ */
+function estimateCost(usage: TokenUsage): string {
+  // Rough estimate: $2.50 per 1M input tokens, $10 per 1M output tokens (GPT-4o)
+  const inputCost = (usage.promptTokens / 1_000_000) * 2.5;
+  const outputCost = (usage.completionTokens / 1_000_000) * 10;
+  const total = inputCost + outputCost;
+  if (total < 0.01) {
+    return '<$0.01';
+  }
+  return `~$${total.toFixed(2)}`;
+}
+/**
+ * Format token usage summary from step executions
+ */
+export function formatTokenUsageSummary(steps: StepExecution[]): string | null {
+  const usage = aggregateTokenUsage(steps);
+  if (!usage) {
+    return null;
+  }
+  const lines: string[] = [];
+  lines.push('\n📊 Token Usage');
+  lines.push(
+    `  • Input: ${formatNumber(usage.promptTokens)} | Output: ${formatNumber(usage.completionTokens)} | Total: ${formatNumber(usage.totalTokens)}`
+  );
+  lines.push(`  • Estimated cost: ${estimateCost(usage)}`);
+  return lines.join('\n');
+}