npm - keystone-cli - Versions diffs - 0.5.0 → 0.6.0 - Mend

keystone-cli 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/README.md +55 -8
package/package.json +5 -3
package/src/cli.ts +33 -192
package/src/db/memory-db.test.ts +54 -0
package/src/db/memory-db.ts +122 -0
package/src/db/sqlite-setup.ts +49 -0
package/src/db/workflow-db.test.ts +41 -10
package/src/db/workflow-db.ts +84 -28
package/src/expression/evaluator.test.ts +19 -0
package/src/expression/evaluator.ts +134 -39
package/src/parser/schema.ts +41 -0
package/src/runner/audit-verification.test.ts +23 -0
package/src/runner/auto-heal.test.ts +64 -0
package/src/runner/debug-repl.test.ts +74 -0
package/src/runner/debug-repl.ts +225 -0
package/src/runner/foreach-executor.ts +327 -0
package/src/runner/llm-adapter.test.ts +27 -14
package/src/runner/llm-adapter.ts +90 -112
package/src/runner/llm-executor.test.ts +91 -6
package/src/runner/llm-executor.ts +26 -6
package/src/runner/mcp-client.audit.test.ts +69 -0
package/src/runner/mcp-client.test.ts +12 -3
package/src/runner/mcp-client.ts +199 -19
package/src/runner/mcp-manager.ts +19 -8
package/src/runner/mcp-server.test.ts +8 -5
package/src/runner/mcp-server.ts +31 -17
package/src/runner/optimization-runner.ts +305 -0
package/src/runner/reflexion.test.ts +87 -0
package/src/runner/shell-executor.test.ts +12 -0
package/src/runner/shell-executor.ts +9 -6
package/src/runner/step-executor.test.ts +46 -1
package/src/runner/step-executor.ts +154 -60
package/src/runner/stream-utils.test.ts +65 -0
package/src/runner/stream-utils.ts +186 -0
package/src/runner/workflow-runner.test.ts +4 -4
package/src/runner/workflow-runner.ts +436 -251
package/src/templates/agents/keystone-architect.md +6 -4
package/src/templates/full-feature-demo.yaml +4 -4
package/src/types/assets.d.ts +14 -0
package/src/types/status.ts +1 -1
package/src/ui/dashboard.tsx +38 -26
package/src/utils/auth-manager.ts +3 -1
package/src/utils/logger.test.ts +76 -0
package/src/utils/logger.ts +39 -0
package/src/utils/prompt.ts +75 -0
package/src/utils/redactor.test.ts +86 -4
package/src/utils/redactor.ts +48 -13

package/src/runner/step-executor.ts CHANGED Viewed

@@ -1,9 +1,11 @@
+import type { MemoryDb } from '../db/memory-db.ts';
 import type { ExpressionContext } from '../expression/evaluator.ts';
 import { ExpressionEvaluator } from '../expression/evaluator.ts';
 // Removed synchronous file I/O imports - using Bun's async file API instead
 import type {
   FileStep,
   HumanStep,
+  MemoryStep,
   RequestStep,
   ScriptStep,
   ShellStep,
@@ -11,12 +13,17 @@ import type {
   Step,
   WorkflowStep,
 } from '../parser/schema.ts';
+import { ConsoleLogger, type Logger } from '../utils/logger.ts';
+import { getAdapter } from './llm-adapter.ts';
 import { detectShellInjectionRisk, executeShell } from './shell-executor.ts';
-import type { Logger } from './workflow-runner.ts';
+import * as fs from 'node:fs';
+import * as os from 'node:os';
+import * as path from 'node:path';
 import * as readline from 'node:readline/promises';
 import { SafeSandbox } from '../utils/sandbox.ts';
 import { executeLlmStep } from './llm-executor.ts';
+import { validateRemoteUrl } from './mcp-client.ts';
 import type { MCPManager } from './mcp-manager.ts';
 export class WorkflowSuspendedError extends Error {
@@ -47,9 +54,10 @@ export interface StepResult {
 export async function executeStep(
   step: Step,
   context: ExpressionContext,
-  logger: Logger = console,
+  logger: Logger = new ConsoleLogger(),
   executeWorkflowFn?: (step: WorkflowStep, context: ExpressionContext) => Promise<StepResult>,
   mcpManager?: MCPManager,
+  memoryDb?: MemoryDb,
   workflowDir?: string,
   dryRun?: boolean
 ): Promise<StepResult> {
@@ -75,12 +83,16 @@ export async function executeStep(
         result = await executeLlmStep(
           step,
           context,
-          (s, c) => executeStep(s, c, logger, executeWorkflowFn, mcpManager, workflowDir, dryRun),
+          (s, c) =>
+            executeStep(s, c, logger, executeWorkflowFn, mcpManager, memoryDb, workflowDir, dryRun),
           logger,
           mcpManager,
           workflowDir
         );
         break;
+      case 'memory':
+        result = await executeMemoryStep(step, context, logger, memoryDb);
+        break;
       case 'workflow':
         if (!executeWorkflowFn) {
           throw new Error('Workflow executor not provided');
@@ -150,44 +162,10 @@ async function executeShellStep(
   const command = ExpressionEvaluator.evaluateString(step.run, context);
   const isRisky = detectShellInjectionRisk(command);
-  if (isRisky) {
-    // Check if we have a resume approval
-    const stepInputs = context.inputs
-      ? (context.inputs as Record<string, unknown>)[step.id]
-      : undefined;
-    if (
-      stepInputs &&
-      typeof stepInputs === 'object' &&
-      '__approved' in stepInputs &&
-      stepInputs.__approved === true
-    ) {
-      // Already approved, proceed
-    } else {
-      const message = `Potentially risky shell command detected: ${command}`;
-      if (!process.stdin.isTTY) {
-        return {
-          output: null,
-          status: 'suspended',
-          error: `APPROVAL_REQUIRED: ${message}`,
-        };
-      }
-      const rl = readline.createInterface({
-        input: process.stdin,
-        output: process.stdout,
-      });
-      try {
-        logger.warn(`\n⚠️  ${message}`);
-        const answer = (await rl.question('Do you want to execute this command? (y/N): ')).trim();
-        if (answer.toLowerCase() !== 'y' && answer.toLowerCase() !== 'yes') {
-          throw new Error('Command execution denied by user');
-        }
-      } finally {
-        rl.close();
-      }
-    }
+  if (isRisky && !step.allowInsecure) {
+    throw new Error(
+      `Security Error: Command contains shell metacharacters that may indicate injection risk.\n   Command: ${command.substring(0, 100)}${command.length > 100 ? '...' : ''}\n   To execute this command, set 'allowInsecure: true' on the step definition.`
+    );
   }
   const result = await executeShell(step, context, logger);
@@ -227,22 +205,62 @@ async function executeFileStep(
   _logger: Logger,
   dryRun?: boolean
 ): Promise<StepResult> {
-  const path = ExpressionEvaluator.evaluateString(step.path, context);
+  const rawPath = ExpressionEvaluator.evaluateString(step.path, context);
+  // Security: Prevent path traversal
+  const cwd = process.cwd();
+  const resolvedPath = path.resolve(cwd, rawPath);
+  const realCwd = fs.realpathSync(cwd);
+  const isWithin = (target: string) => {
+    const relativePath = path.relative(realCwd, target);
+    return !(relativePath.startsWith('..') || path.isAbsolute(relativePath));
+  };
+  const getExistingAncestorRealPath = (start: string) => {
+    let current = start;
+    while (!fs.existsSync(current)) {
+      const parent = path.dirname(current);
+      if (parent === current) {
+        break;
+      }
+      current = parent;
+    }
+    if (!fs.existsSync(current)) {
+      return realCwd;
+    }
+    return fs.realpathSync(current);
+  };
+  if (!step.allowOutsideCwd) {
+    if (fs.existsSync(resolvedPath)) {
+      const realTarget = fs.realpathSync(resolvedPath);
+      if (!isWithin(realTarget)) {
+        throw new Error(`Access denied: Path '${rawPath}' resolves outside the working directory.`);
+      }
+    } else {
+      const realParent = getExistingAncestorRealPath(path.dirname(resolvedPath));
+      if (!isWithin(realParent)) {
+        throw new Error(`Access denied: Path '${rawPath}' resolves outside the working directory.`);
+      }
+    }
+  }
+  // Use resolved path for operations
+  const targetPath = resolvedPath;
   if (dryRun && step.op !== 'read') {
     const opVerb = step.op === 'write' ? 'write to' : 'append to';
-    _logger.log(`[DRY RUN] Would ${opVerb} file: ${path}`);
+    _logger.log(`[DRY RUN] Would ${opVerb} file: ${targetPath}`);
     return {
-      output: { path, bytes: 0 },
+      output: { path: targetPath, bytes: 0 },
       status: 'success',
     };
   }
   switch (step.op) {
     case 'read': {
-      const file = Bun.file(path);
+      const file = Bun.file(targetPath);
       if (!(await file.exists())) {
-        throw new Error(`File not found: ${path}`);
+        throw new Error(`File not found: ${targetPath}`);
       }
       const content = await file.text();
       return {
@@ -258,14 +276,14 @@ async function executeFileStep(
       const content = ExpressionEvaluator.evaluateString(step.content, context);
       // Ensure parent directory exists
-      const fs = await import('node:fs/promises');
-      const pathModule = await import('node:path');
-      const dir = pathModule.dirname(path);
-      await fs.mkdir(dir, { recursive: true });
+      const dir = path.dirname(targetPath);
+      if (!fs.existsSync(dir)) {
+        fs.mkdirSync(dir, { recursive: true });
+      }
-      const bytes = await Bun.write(path, content);
+      await Bun.write(targetPath, content);
       return {
-        output: { path, bytes },
+        output: { path: targetPath, bytes: content.length },
         status: 'success',
       };
     }
@@ -277,16 +295,15 @@ async function executeFileStep(
       const content = ExpressionEvaluator.evaluateString(step.content, context);
       // Ensure parent directory exists
-      const fs = await import('node:fs/promises');
-      const pathModule = await import('node:path');
-      const dir = pathModule.dirname(path);
-      await fs.mkdir(dir, { recursive: true });
+      const dir = path.dirname(targetPath);
+      if (!fs.existsSync(dir)) {
+        fs.mkdirSync(dir, { recursive: true });
+      }
-      // Use Node.js fs for efficient append operation
-      await fs.appendFile(path, content, 'utf-8');
+      fs.appendFileSync(targetPath, content);
       return {
-        output: { path, bytes: content.length },
+        output: { path: targetPath, bytes: content.length },
         status: 'success',
       };
     }
@@ -306,6 +323,9 @@ async function executeRequestStep(
 ): Promise<StepResult> {
   const url = ExpressionEvaluator.evaluateString(step.url, context);
+  // Validate URL to prevent SSRF
+  await validateRemoteUrl(url);
   // Evaluate headers
   const headers: Record<string, string> = {};
   if (step.headers) {
@@ -486,6 +506,13 @@ async function executeScriptStep(
   _logger: Logger
 ): Promise<StepResult> {
   try {
+    if (!step.allowInsecure) {
+      throw new Error(
+        'Script execution is disabled by default because Bun uses an insecure VM sandbox. ' +
+          "Set 'allowInsecure: true' on the script step to run it anyway."
+      );
+    }
     const result = await SafeSandbox.execute(
       step.run,
       {
@@ -495,7 +522,7 @@ async function executeScriptStep(
         env: context.env,
       },
       {
-        allowInsecureFallback: step.allowInsecure,
+        timeout: step.timeout,
       }
     );
@@ -511,3 +538,70 @@ async function executeScriptStep(
     };
   }
 }
+/**
+ * Execute a memory operation (search or store)
+ */
+async function executeMemoryStep(
+  step: MemoryStep,
+  context: ExpressionContext,
+  logger: Logger,
+  memoryDb?: MemoryDb
+): Promise<StepResult> {
+  if (!memoryDb) {
+    throw new Error('Memory database not initialized');
+  }
+  try {
+    const { adapter, resolvedModel } = getAdapter(step.model || 'local');
+    if (!adapter.embed) {
+      throw new Error(`Provider for model ${step.model || 'local'} does not support embeddings`);
+    }
+    if (step.op === 'store') {
+      const text = step.text ? ExpressionEvaluator.evaluateString(step.text, context) : '';
+      if (!text) {
+        throw new Error('Text is required for memory store operation');
+      }
+      logger.log(
+        `  💾 Storing in memory: ${text.substring(0, 50)}${text.length > 50 ? '...' : ''}`
+      );
+      const embedding = await adapter.embed(text, resolvedModel);
+      const metadata = step.metadata
+        ? // biome-ignore lint/suspicious/noExplicitAny: metadata typing
+          (ExpressionEvaluator.evaluateObject(step.metadata, context) as Record<string, any>)
+        : {};
+      const id = await memoryDb.store(text, embedding, metadata);
+      return {
+        output: { id, status: 'stored' },
+        status: 'success',
+      };
+    }
+    if (step.op === 'search') {
+      const query = step.query ? ExpressionEvaluator.evaluateString(step.query, context) : '';
+      if (!query) {
+        throw new Error('Query is required for memory search operation');
+      }
+      logger.log(`  🔍 Recalling memory: "${query}"`);
+      const embedding = await adapter.embed(query, resolvedModel);
+      const results = await memoryDb.search(embedding, step.limit);
+      return {
+        output: results,
+        status: 'success',
+      };
+    }
+    throw new Error(`Unknown memory operation: ${step.op}`);
+  } catch (error) {
+    return {
+      output: null,
+      status: 'failed',
+      error: error instanceof Error ? error.message : String(error),
+    };
+  }
+}

package/src/runner/stream-utils.test.ts ADDED Viewed

@@ -0,0 +1,65 @@
+import { describe, expect, it, mock } from 'bun:test';
+import { processOpenAIStream } from './stream-utils';
+const encoder = new TextEncoder();
+function responseFromChunks(chunks: string[]): Response {
+  const stream = new ReadableStream({
+    start(controller) {
+      for (const chunk of chunks) {
+        controller.enqueue(encoder.encode(chunk));
+      }
+      controller.close();
+    },
+  });
+  return new Response(stream);
+}
+describe('processOpenAIStream', () => {
+  it('accumulates content and tool calls across chunks', async () => {
+    const onStream = mock(() => {});
+    const response = responseFromChunks([
+      'data: {"choices":[{"delta":{"content":"hello "}}]}\n',
+      'data: {"choices":[{"delta":{"content":"world","tool_calls":[{"index":0,"id":"call_1","function":{"name":"my_tool","arguments":"{\\"arg\\":"}}]}}]}\n',
+      'data: {"choices":[{"delta":{"tool_calls":[{"index":0,"function":{"arguments":"1}"}}]}}]}\n',
+      'data: [DONE]\n',
+    ]);
+    const result = await processOpenAIStream(response, { onStream });
+    expect(result.message.content).toBe('hello world');
+    expect(onStream).toHaveBeenCalledTimes(2);
+    expect(result.message.tool_calls?.[0]?.function?.name).toBe('my_tool');
+    expect(result.message.tool_calls?.[0]?.function?.arguments).toBe('{"arg":1}');
+  });
+  it('parses a final line without a newline', async () => {
+    const onStream = mock(() => {});
+    const response = responseFromChunks(['data: {"choices":[{"delta":{"content":"tail"}}]}']);
+    const result = await processOpenAIStream(response, { onStream });
+    expect(result.message.content).toBe('tail');
+    expect(onStream).toHaveBeenCalledTimes(1);
+  });
+  it('logs malformed JSON and continues processing', async () => {
+    const logger = {
+      log: mock(() => {}),
+      error: mock(() => {}),
+      warn: mock(() => {}),
+      info: mock(() => {}),
+    };
+    const response = responseFromChunks([
+      'data: {bad json}\n',
+      'data: {"choices":[{"delta":{"content":"ok"}}]}\n',
+      'data: [DONE]\n',
+    ]);
+    const result = await processOpenAIStream(response, { logger });
+    expect(result.message.content).toBe('ok');
+    expect(logger.warn).toHaveBeenCalledTimes(1);
+  });
+});

package/src/runner/stream-utils.ts ADDED Viewed

@@ -0,0 +1,186 @@
+import { ConsoleLogger, type Logger } from '../utils/logger.ts';
+import type { LLMResponse, LLMToolCall } from './llm-adapter.ts';
+// Maximum response size to prevent memory exhaustion (1MB)
+const MAX_RESPONSE_SIZE = 1024 * 1024;
+const MAX_BUFFER_SIZE = MAX_RESPONSE_SIZE;
+type ToolCallDelta = {
+  index: number;
+  id?: string;
+  function?: {
+    name?: string;
+    arguments?: string;
+  };
+};
+export async function processOpenAIStream(
+  response: Response,
+  options?: { onStream?: (chunk: string) => void; logger?: Logger },
+  streamLabel = 'OpenAI'
+): Promise<LLMResponse> {
+  if (!response.body) throw new Error('Response body is null');
+  const reader = response.body.getReader();
+  const decoder = new TextDecoder();
+  let fullContent = '';
+  const toolCalls: LLMToolCall[] = [];
+  let buffer = '';
+  try {
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+      const chunk = decoder.decode(value, { stream: true });
+      buffer += chunk;
+      if (buffer.length > MAX_BUFFER_SIZE) {
+        throw new Error(`LLM stream line exceed maximum size of ${MAX_BUFFER_SIZE} bytes`);
+      }
+      const lines = buffer.split('\n');
+      // Keep the last partial line in the buffer
+      buffer = lines.pop() || '';
+      for (const line of lines) {
+        const trimmedLine = line.trim();
+        if (trimmedLine === '' || trimmedLine === 'data: [DONE]') continue;
+        if (!trimmedLine.startsWith('data: ')) continue;
+        try {
+          const data = JSON.parse(trimmedLine.slice(6));
+          // Handle Copilot's occasional 'choices' missing or different structure if needed,
+          // but generally they match OpenAI.
+          // Some proxies might return null delta.
+          const delta = data.choices?.[0]?.delta;
+          if (!delta) continue;
+          if (delta.content) {
+            if (fullContent.length + delta.content.length > MAX_RESPONSE_SIZE) {
+              throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
+            }
+            fullContent += delta.content;
+            options?.onStream?.(delta.content);
+          }
+          if (delta.tool_calls) {
+            for (const tc of delta.tool_calls) {
+              const toolCall = tc as ToolCallDelta;
+              if (!toolCalls[toolCall.index]) {
+                toolCalls[toolCall.index] = {
+                  id: toolCall.id,
+                  type: 'function',
+                  function: { name: '', arguments: '' },
+                };
+              }
+              const existing = toolCalls[toolCall.index];
+              if (toolCall.function?.name) existing.function.name += toolCall.function.name;
+              if (toolCall.function?.arguments) {
+                if (
+                  fullContent.length +
+                    toolCalls.reduce((acc, t) => acc + (t?.function?.arguments?.length || 0), 0) +
+                    toolCall.function.arguments.length >
+                  MAX_RESPONSE_SIZE
+                ) {
+                  throw new Error(
+                    `LLM tool call arguments exceed maximum size of ${MAX_RESPONSE_SIZE} bytes`
+                  );
+                }
+                existing.function.arguments += toolCall.function.arguments;
+              }
+            }
+          }
+        } catch (e) {
+          const activeLogger = options?.logger || new ConsoleLogger();
+          // Rethrow size limit errors so they bubble up
+          if (String(e).toLowerCase().includes('exceed maximum size')) {
+            throw e;
+          }
+          if (e instanceof SyntaxError) {
+            activeLogger.warn(
+              `[${streamLabel} Stream] Malformed JSON line: ${line.slice(0, 80)}...`
+            );
+          } else {
+            activeLogger.warn(`[${streamLabel} Stream] Error processing chunk: ${e}`);
+          }
+        }
+      }
+    }
+  } catch (error) {
+    try {
+      await reader.cancel();
+    } catch {
+      // Ignore cancel errors while bubbling up the original issue.
+    }
+    throw error;
+  }
+  // Final check for any remaining data in the buffer (in case of no final newline)
+  if (buffer.trim()) {
+    const trimmedLine = buffer.trim();
+    if (trimmedLine.startsWith('data: ') && trimmedLine !== 'data: [DONE]') {
+      try {
+        const data = JSON.parse(trimmedLine.slice(6));
+        const delta = data.choices?.[0]?.delta;
+        if (delta) {
+          if (delta.content) {
+            if (fullContent.length + delta.content.length > MAX_RESPONSE_SIZE) {
+              throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
+            }
+            fullContent += delta.content;
+            options?.onStream?.(delta.content);
+          }
+          if (delta.tool_calls) {
+            // Tool calls in the very last chunk are unlikely but possible
+            for (const tc of delta.tool_calls) {
+              const toolCall = tc as ToolCallDelta;
+              if (!toolCalls[toolCall.index]) {
+                toolCalls[toolCall.index] = {
+                  id: toolCall.id,
+                  type: 'function',
+                  function: { name: '', arguments: '' },
+                };
+              }
+              const existing = toolCalls[toolCall.index];
+              if (toolCall.function?.name) existing.function.name += toolCall.function.name;
+              if (toolCall.function?.arguments) {
+                if (
+                  fullContent.length +
+                    toolCalls.reduce((acc, t) => acc + (t?.function?.arguments?.length || 0), 0) +
+                    toolCall.function.arguments.length >
+                  MAX_RESPONSE_SIZE
+                ) {
+                  throw new Error(
+                    `LLM tool call arguments exceed maximum size of ${MAX_RESPONSE_SIZE} bytes`
+                  );
+                }
+                existing.function.arguments += toolCall.function.arguments;
+              }
+            }
+          }
+        }
+      } catch (e) {
+        if (String(e).toLowerCase().includes('exceed maximum size')) {
+          throw e;
+        }
+        const activeLogger = options?.logger || new ConsoleLogger();
+        if (e instanceof SyntaxError) {
+          activeLogger.warn(
+            `[${streamLabel} Stream] Malformed JSON line: ${trimmedLine.slice(0, 80)}...`
+          );
+        } else {
+          activeLogger.warn(`[${streamLabel} Stream] Error processing final line: ${e}`);
+        }
+      }
+    }
+  }
+  return {
+    message: {
+      role: 'assistant',
+      content: fullContent || null,
+      tool_calls: toolCalls.length > 0 ? toolCalls.filter(Boolean) : undefined,
+    },
+  };
+}

package/src/runner/workflow-runner.test.ts CHANGED Viewed

@@ -457,10 +457,10 @@ describe('WorkflowRunner', () => {
     // Check DB status - parent should be 'paused' and step should be 'suspended'
     const db = new WorkflowDb(resumeDbPath);
-    const run = db.getRun(runId);
+    const run = await db.getRun(runId);
     expect(run?.status).toBe('paused');
-    const steps = db.getStepsByRun(runId);
+    const steps = await db.getStepsByRun(runId);
     const parentStep = steps.find(
       (s: { step_id: string; iteration_index: number | null }) =>
         s.step_id === 'process' && s.iteration_index === null
@@ -481,8 +481,8 @@ describe('WorkflowRunner', () => {
     expect(outputs.results).toEqual(['ok', 'ok']);
     const finalDb = new WorkflowDb(resumeDbPath);
-    const finalRun = finalDb.getRun(runId);
-    expect(finalRun?.status).toBe('completed');
+    const finalRun = await finalDb.getRun(runId);
+    expect(finalRun?.status).toBe('success');
     finalDb.close();
     if (existsSync(resumeDbPath)) rmSync(resumeDbPath);