npm - keystone-cli - Versions diffs - 0.5.1 → 0.6.1 - Mend

keystone-cli 0.5.1 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/README.md +55 -8
package/package.json +8 -17
package/src/cli.ts +219 -166
package/src/db/memory-db.test.ts +54 -0
package/src/db/memory-db.ts +128 -0
package/src/db/sqlite-setup.test.ts +47 -0
package/src/db/sqlite-setup.ts +49 -0
package/src/db/workflow-db.test.ts +41 -10
package/src/db/workflow-db.ts +90 -28
package/src/expression/evaluator.test.ts +19 -0
package/src/expression/evaluator.ts +134 -39
package/src/parser/schema.ts +41 -0
package/src/runner/audit-verification.test.ts +23 -0
package/src/runner/auto-heal.test.ts +64 -0
package/src/runner/debug-repl.test.ts +308 -0
package/src/runner/debug-repl.ts +225 -0
package/src/runner/foreach-executor.ts +327 -0
package/src/runner/llm-adapter.test.ts +37 -18
package/src/runner/llm-adapter.ts +90 -112
package/src/runner/llm-executor.test.ts +47 -6
package/src/runner/llm-executor.ts +18 -3
package/src/runner/mcp-client.audit.test.ts +69 -0
package/src/runner/mcp-client.test.ts +12 -3
package/src/runner/mcp-client.ts +199 -19
package/src/runner/mcp-manager.ts +19 -8
package/src/runner/mcp-server.test.ts +8 -5
package/src/runner/mcp-server.ts +31 -17
package/src/runner/optimization-runner.ts +305 -0
package/src/runner/reflexion.test.ts +87 -0
package/src/runner/shell-executor.test.ts +12 -0
package/src/runner/shell-executor.ts +9 -6
package/src/runner/step-executor.test.ts +240 -2
package/src/runner/step-executor.ts +183 -68
package/src/runner/stream-utils.test.ts +171 -0
package/src/runner/stream-utils.ts +186 -0
package/src/runner/workflow-runner.test.ts +4 -4
package/src/runner/workflow-runner.ts +438 -259
package/src/templates/agents/keystone-architect.md +6 -4
package/src/templates/full-feature-demo.yaml +4 -4
package/src/types/assets.d.ts +14 -0
package/src/types/status.ts +1 -1
package/src/ui/dashboard.tsx +38 -26
package/src/utils/auth-manager.ts +3 -1
package/src/utils/logger.test.ts +76 -0
package/src/utils/logger.ts +39 -0
package/src/utils/prompt.ts +75 -0
package/src/utils/redactor.test.ts +86 -4
package/src/utils/redactor.ts +48 -13

package/src/runner/foreach-executor.ts ADDED Viewed

@@ -0,0 +1,327 @@
+import { randomUUID } from 'node:crypto';
+import type { WorkflowDb } from '../db/workflow-db.ts';
+import { type ExpressionContext, ExpressionEvaluator } from '../expression/evaluator.ts';
+import type { Step } from '../parser/schema.ts';
+import { StepStatus, WorkflowStatus } from '../types/status.ts';
+import type { Logger } from '../utils/logger.ts';
+import { WorkflowSuspendedError } from './step-executor.ts';
+import type { ForeachStepContext, StepContext } from './workflow-runner.ts';
+export type ExecuteStepCallback = (
+  step: Step,
+  context: ExpressionContext,
+  stepExecId: string
+) => Promise<StepContext>;
+export class ForeachExecutor {
+  private static readonly MEMORY_WARNING_THRESHOLD = 1000;
+  private hasWarnedMemory = false;
+  constructor(
+    private db: WorkflowDb,
+    private logger: Logger,
+    private executeStepFn: ExecuteStepCallback
+  ) {}
+  /**
+   * Aggregate outputs from multiple iterations of a foreach step
+   */
+  public static aggregateOutputs(outputs: unknown[]): Record<string, unknown> {
+    const parentOutputs: Record<string, unknown> = {};
+    const validOutputs = outputs.filter((o) => o !== undefined);
+    if (validOutputs.length === 0) return parentOutputs;
+    // We can only aggregate objects, and we assume all outputs have similar shape
+    const firstOutput = validOutputs[0];
+    if (typeof firstOutput !== 'object' || firstOutput === null) {
+      return parentOutputs;
+    }
+    // Collect all keys from all outputs
+    const keys = new Set<string>();
+    for (const output of validOutputs) {
+      if (typeof output === 'object' && output !== null) {
+        for (const key of Object.keys(output)) {
+          keys.add(key);
+        }
+      }
+    }
+    // For each key, create an array of values
+    for (const key of keys) {
+      parentOutputs[key] = outputs.map((output) => {
+        if (typeof output === 'object' && output !== null) {
+          return (output as Record<string, unknown>)[key];
+        }
+        return undefined;
+      });
+    }
+    return parentOutputs;
+  }
+  /**
+   * Execute a step with foreach logic
+   */
+  async execute(
+    step: Step,
+    baseContext: ExpressionContext,
+    runId: string,
+    existingContext?: ForeachStepContext
+  ): Promise<ForeachStepContext> {
+    if (!step.foreach) {
+      throw new Error('Step is not a foreach step');
+    }
+    const items = ExpressionEvaluator.evaluate(step.foreach, baseContext);
+    if (!Array.isArray(items)) {
+      throw new Error(`foreach expression must evaluate to an array: ${step.foreach}`);
+    }
+    this.logger.log(`  ⤷ Executing step ${step.id} for ${items.length} items`);
+    if (items.length > ForeachExecutor.MEMORY_WARNING_THRESHOLD && !this.hasWarnedMemory) {
+      this.logger.warn(
+        `  ⚠️  Warning: Large foreach loop detected (${items.length} items). This may consume significant memory and lead to instability.`
+      );
+      this.hasWarnedMemory = true;
+    }
+    // Evaluate concurrency
+    let concurrencyLimit = items.length;
+    if (step.concurrency !== undefined) {
+      if (typeof step.concurrency === 'string') {
+        concurrencyLimit = Number(ExpressionEvaluator.evaluate(step.concurrency, baseContext));
+        if (!Number.isInteger(concurrencyLimit) || concurrencyLimit <= 0) {
+          throw new Error(
+            `concurrency must evaluate to a positive integer, got: ${concurrencyLimit}`
+          );
+        }
+      } else {
+        concurrencyLimit = step.concurrency;
+        if (!Number.isInteger(concurrencyLimit) || concurrencyLimit <= 0) {
+          throw new Error(`concurrency must be a positive integer, got: ${concurrencyLimit}`);
+        }
+      }
+    }
+    // Create parent step record in DB
+    const parentStepExecId = randomUUID();
+    await this.db.createStep(parentStepExecId, runId, step.id);
+    await this.db.startStep(parentStepExecId);
+    // Persist the foreach items
+    await this.db.completeStep(parentStepExecId, StepStatus.PENDING, { __foreachItems: items });
+    try {
+      // Initialize results array
+      const itemResults: StepContext[] = existingContext?.items || new Array(items.length);
+      const shouldCheckDb = !!existingContext;
+      // Ensure array is correct length
+      if (itemResults.length !== items.length) {
+        itemResults.length = items.length;
+      }
+      // Worker pool implementation
+      let currentIndex = 0;
+      let aborted = false;
+      const workers = new Array(Math.min(concurrencyLimit, items.length))
+        .fill(null)
+        .map(async () => {
+          const nextIndex = () => {
+            if (aborted) return null;
+            if (currentIndex >= items.length) return null;
+            const i = currentIndex;
+            currentIndex += 1;
+            return i;
+          };
+          while (true) {
+            const i = nextIndex();
+            if (i === null) break;
+            if (aborted) break;
+            const item = items[i];
+            // Skip if already successful or skipped
+            if (
+              itemResults[i] &&
+              (itemResults[i].status === StepStatus.SUCCESS ||
+                itemResults[i].status === StepStatus.SKIPPED)
+            ) {
+              continue;
+            }
+            // Build item-specific context
+            const itemContext = {
+              ...baseContext,
+              item,
+              index: i,
+            };
+            // Check DB again for robustness (resume flows only)
+            const existingExec = shouldCheckDb
+              ? await this.db.getStepByIteration(runId, step.id, i)
+              : undefined;
+            if (
+              existingExec &&
+              (existingExec.status === StepStatus.SUCCESS ||
+                existingExec.status === StepStatus.SKIPPED)
+            ) {
+              let output: unknown = null;
+              let itemStatus = existingExec.status as
+                | typeof StepStatus.SUCCESS
+                | typeof StepStatus.SKIPPED
+                | typeof StepStatus.FAILED;
+              try {
+                output = existingExec.output ? JSON.parse(existingExec.output) : null;
+              } catch (error) {
+                this.logger.warn(
+                  `Failed to parse output for step ${step.id} iteration ${i}: ${error}`
+                );
+                output = { error: 'Failed to parse output' };
+                itemStatus = StepStatus.FAILED;
+                aborted = true; // Fail fast if we find corrupted data
+                try {
+                  await this.db.completeStep(
+                    existingExec.id,
+                    StepStatus.FAILED,
+                    output,
+                    'Failed to parse output'
+                  );
+                } catch (dbError) {
+                  this.logger.warn(
+                    `Failed to update DB for corrupted output on step ${step.id} iteration ${i}: ${dbError}`
+                  );
+                }
+              }
+              itemResults[i] = {
+                output,
+                outputs:
+                  typeof output === 'object' && output !== null && !Array.isArray(output)
+                    ? (output as Record<string, unknown>)
+                    : {},
+                status: itemStatus,
+              } as StepContext;
+              continue;
+            }
+            if (aborted) break;
+            const stepExecId = randomUUID();
+            await this.db.createStep(stepExecId, runId, step.id, i);
+            // Execute and store result
+            try {
+              if (aborted) break;
+              this.logger.log(`  ⤷ [${i + 1}/${items.length}] Executing iteration...`);
+              itemResults[i] = await this.executeStepFn(step, itemContext, stepExecId);
+              if (
+                itemResults[i].status === StepStatus.FAILED ||
+                itemResults[i].status === StepStatus.SUSPENDED
+              ) {
+                aborted = true;
+              }
+            } catch (error) {
+              aborted = true;
+              throw error;
+            }
+          }
+        });
+      const workerResults = await Promise.allSettled(workers);
+      // Check if any worker rejected (this would be due to an unexpected throw)
+      const firstError = workerResults.find((r) => r.status === 'rejected') as
+        | PromiseRejectedResult
+        | undefined;
+      if (firstError) {
+        throw firstError.reason;
+      }
+      // Aggregate results
+      const outputs = itemResults.map((r) => r?.output);
+      const allSuccess = itemResults.every((r) => r?.status === StepStatus.SUCCESS);
+      const anyFailed = itemResults.some((r) => r?.status === StepStatus.FAILED);
+      const anySuspended = itemResults.some((r) => r?.status === StepStatus.SUSPENDED);
+      // Aggregate usage
+      const aggregatedUsage = itemResults.reduce(
+        (acc, r) => {
+          if (r?.usage) {
+            acc.prompt_tokens += r.usage.prompt_tokens;
+            acc.completion_tokens += r.usage.completion_tokens;
+            acc.total_tokens += r.usage.total_tokens;
+          }
+          return acc;
+        },
+        { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
+      );
+      // Map child properties
+      const mappedOutputs = ForeachExecutor.aggregateOutputs(outputs);
+      // Determine final status
+      let finalStatus: (typeof StepStatus)[keyof typeof StepStatus] = StepStatus.FAILED;
+      if (allSuccess) {
+        finalStatus = StepStatus.SUCCESS;
+      } else if (anyFailed) {
+        finalStatus = StepStatus.FAILED;
+      } else if (anySuspended) {
+        finalStatus = StepStatus.SUSPENDED;
+      }
+      const aggregatedContext: ForeachStepContext = {
+        output: outputs,
+        outputs: mappedOutputs,
+        status: finalStatus,
+        items: itemResults,
+        usage: aggregatedUsage,
+      };
+      const persistedContext = {
+        ...aggregatedContext,
+        __foreachItems: items,
+      };
+      // Update parent step record
+      await this.db.completeStep(
+        parentStepExecId,
+        finalStatus,
+        persistedContext,
+        finalStatus === StepStatus.FAILED ? 'One or more iterations failed' : undefined
+      );
+      if (finalStatus === StepStatus.SUSPENDED) {
+        const suspendedItem = itemResults.find((r) => r.status === StepStatus.SUSPENDED);
+        throw new WorkflowSuspendedError(
+          suspendedItem?.error || 'Iteration suspended',
+          step.id,
+          'text'
+        );
+      }
+      if (finalStatus === StepStatus.FAILED) {
+        throw new Error(`Step ${step.id} failed: one or more iterations failed`);
+      }
+      return aggregatedContext;
+    } catch (error) {
+      if (error instanceof WorkflowSuspendedError) {
+        throw error;
+      }
+      // Mark parent step as failed (if not already handled)
+      const errorMsg = error instanceof Error ? error.message : String(error);
+      try {
+        await this.db.completeStep(parentStepExecId, StepStatus.FAILED, null, errorMsg);
+      } catch (dbError) {
+        this.logger.error(`Failed to update DB on foreach error: ${dbError}`);
+      }
+      throw error;
+    }
+  }
+}

package/src/runner/llm-adapter.test.ts CHANGED Viewed

@@ -105,7 +105,9 @@ describe('AnthropicAdapter', () => {
     // @ts-ignore
     const fetchMock = global.fetch as MockFetch;
     // @ts-ignore
-    const [url, init] = fetchMock.mock.calls[0];
+    // @ts-ignore
+    // biome-ignore lint/suspicious/noExplicitAny: mock fetch init
+    const [url, init] = fetchMock.mock.calls[0] as [string, any];
     expect(url).toBe('https://api.anthropic.com/v1/messages');
     expect(init.headers['x-api-key']).toBe('fake-anthropic-key');
@@ -179,7 +181,8 @@ describe('AnthropicAdapter', () => {
     ]);
     // @ts-ignore
-    const init = global.fetch.mock.calls[0][1];
+    // biome-ignore lint/suspicious/noExplicitAny: mock fetch init
+    const init = global.fetch.mock.calls[0][1] as any;
     const body = JSON.parse(init.body);
     expect(body.messages[0].role).toBe('assistant');
     expect(body.messages[0].content).toHaveLength(2);
@@ -208,7 +211,8 @@ describe('AnthropicAdapter', () => {
     ]);
     // @ts-ignore
-    const init = global.fetch.mock.calls[0][1];
+    // biome-ignore lint/suspicious/noExplicitAny: mock fetch init
+    const init = global.fetch.mock.calls[0][1] as any;
     const body = JSON.parse(init.body);
     expect(body.messages[0].role).toBe('user');
     expect(body.messages[0].content[0]).toEqual({
@@ -255,7 +259,9 @@ describe('CopilotAdapter', () => {
     // @ts-ignore
     const fetchMock = global.fetch as MockFetch;
     // @ts-ignore
-    const [url, init] = fetchMock.mock.calls[0];
+    // @ts-ignore
+    // biome-ignore lint/suspicious/noExplicitAny: mock fetch init
+    const [url, init] = fetchMock.mock.calls[0] as [string, any];
     expect(url).toBe('https://api.githubcopilot.com/chat/completions');
     expect(init.headers.Authorization).toBe('Bearer mock-token');
     spy.mockRestore();
@@ -272,33 +278,41 @@ describe('CopilotAdapter', () => {
 describe('getAdapter', () => {
   beforeEach(() => {
-    spyOn(ConfigLoader, 'getProviderForModel').mockImplementation((model: string) => {
-      if (model.startsWith('claude')) return 'anthropic';
-      if (model.startsWith('gpt')) return 'openai';
-      if (model.startsWith('copilot')) return 'copilot';
-      return 'openai';
-    });
-    // @ts-ignore
-    spyOn(ConfigLoader, 'load').mockReturnValue({
+    // Setup a clean config for each test
+    ConfigLoader.setConfig({
+      default_provider: 'openai',
       providers: {
         openai: { type: 'openai', api_key_env: 'OPENAI_API_KEY' },
         anthropic: { type: 'anthropic', api_key_env: 'ANTHROPIC_API_KEY' },
         copilot: { type: 'copilot' },
       },
+      model_mappings: {
+        'claude-*': 'anthropic',
+        'gpt-*': 'openai',
+        'copilot:*': 'copilot',
+      },
+      storage: { retention_days: 30 },
+      workflows_directory: 'workflows',
+      mcp_servers: {},
     });
   });
   afterEach(() => {
-    mock.restore();
+    ConfigLoader.clear();
   });
   it('should return OpenAIAdapter for gpt models', () => {
+    // ConfigLoader.getProviderForModel logic will handle this
     const { adapter, resolvedModel } = getAdapter('gpt-4');
     expect(adapter).toBeInstanceOf(OpenAIAdapter);
     expect(resolvedModel).toBe('gpt-4');
   });
   it('should return AnthropicAdapter for claude models', () => {
+    // Explicit mapping in our mock config above covers this if ConfigLoader logic works
+    // Or we rely on model name prefix if ConfigLoader has that default logic
+    // Let's ensure the mapping exists if we removed the spy
+    // ConfigLoader.getProviderForModel uses: explicit mapping OR default provider
     const { adapter, resolvedModel } = getAdapter('claude-3');
     expect(adapter).toBeInstanceOf(AnthropicAdapter);
     expect(resolvedModel).toBe('claude-3');
@@ -311,11 +325,16 @@ describe('getAdapter', () => {
   });
   it('should throw error for unknown provider', () => {
-    // @ts-ignore
-    ConfigLoader.getProviderForModel.mockReturnValue('unknown');
-    // @ts-ignore
-    ConfigLoader.load.mockReturnValue({ providers: {} });
+    // Set config with empty providers to force error
+    ConfigLoader.setConfig({
+      default_provider: 'unknown',
+      providers: {}, // No providers configured
+      model_mappings: {},
+      storage: { retention_days: 30 },
+      workflows_directory: 'workflows',
+      mcp_servers: {},
+    });
-    expect(() => getAdapter('unknown-model')).toThrow(/Provider configuration not found/);
+    expect(() => getAdapter('unknown-model')).toThrow();
   });
 });