npm - keystone-cli - Versions diffs - 0.5.0 → 0.6.0 - Mend

keystone-cli 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/README.md +55 -8
package/package.json +5 -3
package/src/cli.ts +33 -192
package/src/db/memory-db.test.ts +54 -0
package/src/db/memory-db.ts +122 -0
package/src/db/sqlite-setup.ts +49 -0
package/src/db/workflow-db.test.ts +41 -10
package/src/db/workflow-db.ts +84 -28
package/src/expression/evaluator.test.ts +19 -0
package/src/expression/evaluator.ts +134 -39
package/src/parser/schema.ts +41 -0
package/src/runner/audit-verification.test.ts +23 -0
package/src/runner/auto-heal.test.ts +64 -0
package/src/runner/debug-repl.test.ts +74 -0
package/src/runner/debug-repl.ts +225 -0
package/src/runner/foreach-executor.ts +327 -0
package/src/runner/llm-adapter.test.ts +27 -14
package/src/runner/llm-adapter.ts +90 -112
package/src/runner/llm-executor.test.ts +91 -6
package/src/runner/llm-executor.ts +26 -6
package/src/runner/mcp-client.audit.test.ts +69 -0
package/src/runner/mcp-client.test.ts +12 -3
package/src/runner/mcp-client.ts +199 -19
package/src/runner/mcp-manager.ts +19 -8
package/src/runner/mcp-server.test.ts +8 -5
package/src/runner/mcp-server.ts +31 -17
package/src/runner/optimization-runner.ts +305 -0
package/src/runner/reflexion.test.ts +87 -0
package/src/runner/shell-executor.test.ts +12 -0
package/src/runner/shell-executor.ts +9 -6
package/src/runner/step-executor.test.ts +46 -1
package/src/runner/step-executor.ts +154 -60
package/src/runner/stream-utils.test.ts +65 -0
package/src/runner/stream-utils.ts +186 -0
package/src/runner/workflow-runner.test.ts +4 -4
package/src/runner/workflow-runner.ts +436 -251
package/src/templates/agents/keystone-architect.md +6 -4
package/src/templates/full-feature-demo.yaml +4 -4
package/src/types/assets.d.ts +14 -0
package/src/types/status.ts +1 -1
package/src/ui/dashboard.tsx +38 -26
package/src/utils/auth-manager.ts +3 -1
package/src/utils/logger.test.ts +76 -0
package/src/utils/logger.ts +39 -0
package/src/utils/prompt.ts +75 -0
package/src/utils/redactor.test.ts +86 -4
package/src/utils/redactor.ts +48 -13

package/src/runner/optimization-runner.ts ADDED Viewed

@@ -0,0 +1,305 @@
+import { readFileSync, writeFileSync } from 'node:fs';
+import { dirname } from 'node:path';
+import { stringify } from 'yaml';
+import { parseAgent, resolveAgentPath } from '../parser/agent-parser';
+import type { LlmStep, Step, Workflow } from '../parser/schema';
+import { extractJson } from '../utils/json-parser';
+import { getAdapter } from './llm-adapter';
+import { executeLlmStep } from './llm-executor';
+import { WorkflowRunner } from './workflow-runner';
+export interface OptimizationOptions {
+  workflowPath: string;
+  targetStepId: string;
+  inputs?: Record<string, unknown>;
+  iterations?: number;
+}
+export class OptimizationRunner {
+  private workflow: Workflow;
+  private workflowPath: string;
+  private targetStepId: string;
+  private iterations: number;
+  private inputs: Record<string, unknown>;
+  constructor(workflow: Workflow, options: OptimizationOptions) {
+    this.workflow = workflow;
+    this.workflowPath = options.workflowPath;
+    this.targetStepId = options.targetStepId;
+    this.iterations = options.iterations || 5;
+    this.inputs = options.inputs || {};
+  }
+  public async optimize(): Promise<{ bestPrompt: string; bestScore: number }> {
+    if (!this.workflow.eval) {
+      throw new Error('Workflow is missing "eval" configuration');
+    }
+    const targetStep = this.workflow.steps.find((s) => s.id === this.targetStepId);
+    if (!targetStep || (targetStep.type !== 'llm' && targetStep.type !== 'shell')) {
+      throw new Error(`Target step "${this.targetStepId}" not found or is not an LLM/Shell step`);
+    }
+    console.log(`\n🚀 Optimizing step: ${this.targetStepId} (${targetStep.type})`);
+    console.log(`📊 Iterations: ${this.iterations}`);
+    let bestPrompt =
+      targetStep.type === 'llm'
+        ? (targetStep as LlmStep).prompt
+        : // biome-ignore lint/suspicious/noExplicitAny: generic step access
+          (targetStep as any).run;
+    let bestScore = -1;
+    let currentPrompt = bestPrompt;
+    for (let i = 1; i <= this.iterations; i++) {
+      console.log(`\n--- Iteration ${i}/${this.iterations} ---`);
+      console.log(
+        `Current Prompt: ${currentPrompt.substring(0, 100)}${currentPrompt.length > 100 ? '...' : ''}`
+      );
+      // 1. Run the workflow until the target step (or full run for simplicity in MVP)
+      // Note: In a more optimized version, we'd only run dependencies once.
+      // For now, we run a full WorkflowRunner but with the modified prompt.
+      const modifiedWorkflow = JSON.parse(JSON.stringify(this.workflow));
+      const modifiedTargetStep = modifiedWorkflow.steps.find(
+        (s: { id: string }) => s.id === this.targetStepId
+      );
+      if (modifiedTargetStep.type === 'llm') {
+        modifiedTargetStep.prompt = currentPrompt;
+      } else {
+        modifiedTargetStep.run = currentPrompt;
+      }
+      const runner = new WorkflowRunner(modifiedWorkflow, {
+        inputs: this.inputs,
+        workflowDir: dirname(this.workflowPath),
+      });
+      const outputs = await runner.run();
+      // 2. Evaluate the result
+      const score = await this.evaluate(outputs);
+      console.log(`Score: ${score}/100`);
+      if (score > bestScore) {
+        bestScore = score;
+        bestPrompt = currentPrompt;
+        console.log(`✨ New best score: ${bestScore}`);
+      }
+      // 3. Suggest next prompt (if not last iteration)
+      if (i < this.iterations) {
+        currentPrompt = await this.suggestNextPrompt(currentPrompt, score, outputs);
+      }
+    }
+    await this.saveBestPrompt(bestPrompt);
+    return { bestPrompt, bestScore };
+  }
+  private async evaluate(outputs: Record<string, unknown>): Promise<number> {
+    const { eval: evalConfig } = this.workflow;
+    if (!evalConfig) return 0;
+    if (evalConfig.scorer === 'script') {
+      // Note: getAdapter already imported at top level
+      const { executeStep } = await import('./step-executor');
+      // Create a context with outputs available
+      const context = {
+        inputs: this.inputs,
+        steps: {},
+        // biome-ignore lint/suspicious/noExplicitAny: environment access
+        secrets: Bun.env as any,
+        env: this.workflow.env,
+        outputs, // Direct access
+        output: outputs, // For convenience
+      };
+      const scriptStep: Step = {
+        id: 'evaluator',
+        type: 'script',
+        run: evalConfig.run || 'echo 0',
+      };
+      // Execute script
+      // We need to inject the outputs into the environment or allow the script to access them via template
+      // The step executor handles interpolation in the 'run' command.
+      // But if the script is extensive, it might be tricky.
+      // For now, assume the user interpolates scores like `node score.js ${{ outputs.foo }}`
+      // We need a proper step executor call here.
+      // We'll mock the missing dependencies for executeStep as we did for executeLlmStep,
+      // but we need to pass the context correctly.
+      // Note: OptimizationRunner should probably import executeStep
+      const { SafeSandbox } = await import('../utils/sandbox');
+      try {
+        const result = await SafeSandbox.execute(scriptStep.run, context, { timeout: 5000 });
+        if (typeof result === 'object' && result !== null && 'stdout' in result) {
+          // biome-ignore lint/suspicious/noExplicitAny: result typing
+          const match = (result as any).stdout.match(/\d+/);
+          if (match) return Number.parseInt(match[0], 10);
+        }
+        // If raw result is number
+        if (typeof result === 'number') return result;
+        // If string
+        if (typeof result === 'string') {
+          const match = result.match(/\d+/);
+          if (match) return Number.parseInt(match[0], 10);
+        }
+      } catch (e) {
+        console.error('Eval script failed:', e);
+      }
+      return 0;
+    }
+    // LLM Scorer
+    if (!evalConfig.agent || !evalConfig.prompt) {
+      console.warn('Skipping LLM evaluation: agent or prompt missing');
+      return 0;
+    }
+    const evalStep: LlmStep = {
+      id: 'evaluator',
+      type: 'llm',
+      agent: evalConfig.agent,
+      prompt: `${evalConfig.prompt}\n\nOutputs to evaluate:\n${JSON.stringify(outputs, null, 2)}`,
+      needs: [],
+      maxIterations: 10,
+      schema: {
+        type: 'object',
+        properties: {
+          score: { type: 'number', minimum: 0, maximum: 100 },
+        },
+        required: ['score'],
+      },
+    };
+    // Use a temporary runner/context for evaluation
+    // We need a minimal context for executeLlmStep
+    const context = {
+      inputs: this.inputs,
+      steps: {},
+      // biome-ignore lint/suspicious/noExplicitAny: environment access
+      secrets: Bun.env as any,
+      env: this.workflow.env,
+    };
+    const result = await executeLlmStep(
+      evalStep,
+      // biome-ignore lint/suspicious/noExplicitAny: context typing
+      context as any,
+      async () => {
+        throw new Error('Tools not supported in eval');
+      },
+      console
+    );
+    if (result.status === 'success' && result.output && typeof result.output === 'object') {
+      // biome-ignore lint/suspicious/noExplicitAny: output typing
+      return (result.output as any).score || 0;
+    }
+    // Try to extract number if JSON failed but text output exists
+    if (typeof result.output === 'string') {
+      const match = result.output.match(/\d+/);
+      if (match) return Number.parseInt(match[0], 10);
+    }
+    return 0;
+  }
+  private async suggestNextPrompt(
+    currentPrompt: string,
+    lastScore: number,
+    lastOutputs: Record<string, unknown>
+  ): Promise<string> {
+    const metaStep: LlmStep = {
+      id: 'optimizer',
+      type: 'llm',
+      agent: 'general', // Or a specialized "optimizer" agent if available
+      needs: [],
+      maxIterations: 10,
+      prompt: `You are an expert prompt engineer. Your task is to optimize a system prompt to get a higher score.
+Current Prompt:
+"""
+${currentPrompt}
+"""
+Last Score: ${lastScore}/100
+Last Outputs:
+${JSON.stringify(lastOutputs, null, 2)}
+Evaluation Criteria:
+${this.workflow.eval?.prompt || this.workflow.eval?.run}
+Suggest a slightly modified version of the prompt that might improve the score.
+Maintain the same core instructions but refine the phrasing, add constraints, or clarify expectations.
+Return ONLY the new prompt text.`,
+    };
+    const context = {
+      inputs: this.inputs,
+      steps: {},
+      // biome-ignore lint/suspicious/noExplicitAny: environment access
+      secrets: Bun.env as any,
+      env: this.workflow.env,
+    };
+    try {
+      const result = await executeLlmStep(
+        metaStep,
+        // biome-ignore lint/suspicious/noExplicitAny: context typing
+        context as any,
+        async () => {
+          throw new Error('Tools not supported in meta-opt');
+        },
+        console,
+        undefined,
+        dirname(this.workflowPath) // Pass workflowDir to resolve agent
+      );
+      if (result.status === 'success' && typeof result.output === 'string') {
+        return result.output.trim();
+      }
+    } catch (e) {
+      console.warn(`  ⚠️ Meta-optimizer failed: ${e instanceof Error ? e.message : String(e)}`);
+      // Adding a dummy mutation for testing purposes if env var is set
+      if (Bun.env.TEST_OPTIMIZER) {
+        return `${currentPrompt}!`;
+      }
+    }
+    return currentPrompt; // Fallback to current
+  }
+  private async saveBestPrompt(prompt: string): Promise<void> {
+    console.log(`\n💾 Saving best prompt to ${this.workflowPath}`);
+    // We need to be careful here. The prompt might be in the workflow YAML directly,
+    // or it might be in an agent file.
+    const targetStep = this.workflow.steps.find((s) => s.id === this.targetStepId);
+    console.log(`--- BEST PROMPT/RUN ---\n${prompt}\n-----------------------`);
+    if (targetStep?.type === 'llm') {
+      const agentPath = resolveAgentPath((targetStep as LlmStep).agent, dirname(this.workflowPath));
+      try {
+        // For MVP, we just logged it. Automatic replacement in arbitrary files is risky without robust parsing.
+        // But we can try to warn/notify.
+        console.log(
+          `To apply this optimization, update the 'systemPrompt' or instruction in: ${agentPath}`
+        );
+      } catch (e) {
+        console.warn(`Could not locate agent file: ${e}`);
+      }
+    } else {
+      console.log(
+        `To apply this optimization, update the 'run' command for step '${this.targetStepId}' in ${this.workflowPath}`
+      );
+    }
+  }
+}

package/src/runner/reflexion.test.ts ADDED Viewed

@@ -0,0 +1,87 @@
+import { beforeEach, describe, expect, jest, mock, test } from 'bun:test';
+import type { Step, Workflow } from '../parser/schema';
+import * as StepExecutor from './step-executor';
+import { WorkflowRunner } from './workflow-runner';
+// Mock the LLM Adapter
+describe('WorkflowRunner Reflexion', () => {
+  beforeEach(() => {
+    jest.restoreAllMocks();
+  });
+  test('should attempt to self-correct a failing step using flexion', async () => {
+    const workflow: Workflow = {
+      name: 'reflexion-test',
+      steps: [
+        {
+          id: 'fail-step',
+          type: 'shell',
+          run: 'exit 1',
+          reflexion: {
+            limit: 2,
+            hint: 'fix it',
+          },
+        } as Step,
+      ],
+    };
+    const mockGetAdapter = () => ({
+      adapter: {
+        chat: async () => ({
+          message: {
+            content: JSON.stringify({ run: 'echo "fixed"' }),
+          },
+        }),
+        // biome-ignore lint/suspicious/noExplicitAny: mock adapter
+      } as any,
+      resolvedModel: 'mock-model',
+    });
+    const runner = new WorkflowRunner(workflow, {
+      logger: { log: () => {}, error: () => {}, warn: () => {} },
+      dbPath: ':memory:',
+      getAdapter: mockGetAdapter,
+    });
+    // biome-ignore lint/suspicious/noExplicitAny: Accessing private property for testing
+    const db = (runner as any).db;
+    await db.createRun(runner.getRunId(), workflow.name, {});
+    const spy = jest.spyOn(StepExecutor, 'executeStep');
+    // First call fails, Reflexion logic kicks in (calling mocked getAdapter),
+    // then it retries with corrected command.
+    spy.mockImplementation(async (step, _context) => {
+      // Original failing command
+      // biome-ignore lint/suspicious/noExplicitAny: Accessing run property dynamically
+      if ((step as any).run === 'exit 1') {
+        return { status: 'failed', output: null, error: 'Command failed' };
+      }
+      // Corrected command from mock
+      // biome-ignore lint/suspicious/noExplicitAny: Accessing run property dynamically
+      if ((step as any).run === 'echo "fixed"') {
+        return { status: 'success', output: 'fixed' };
+      }
+      return { status: 'failed', output: null, error: 'Unknown step' };
+    });
+    // biome-ignore lint/suspicious/noExplicitAny: Accessing private property for testing
+    await (runner as any).executeStepWithForeach(workflow.steps[0]);
+    // Expectations:
+    // 1. First execution (fails)
+    // 2. Reflexion happens (internal, not executeStep)
+    // 3. Second execution (retry with new command)
+    expect(spy).toHaveBeenCalledTimes(2);
+    // Verify the second call had the corrected command
+    // biome-ignore lint/suspicious/noExplicitAny: mock call args typing
+    const secondCallArg = spy.mock.calls[1][0] as any;
+    expect(secondCallArg.run).toBe('echo "fixed"');
+    spy.mockRestore();
+  });
+});

package/src/runner/shell-executor.test.ts CHANGED Viewed

@@ -137,5 +137,17 @@ describe('shell-executor', () => {
       await expect(executeShell(step, context)).rejects.toThrow(/Security Error/);
     });
+    it('should allow flow control with semicolons', async () => {
+      const step: ShellStep = {
+        id: 'test',
+        type: 'shell',
+        needs: [],
+        run: 'if [ "1" = "1" ]; then echo "match"; fi',
+      };
+      const result = await executeShell(step, context);
+      expect(result.exitCode).toBe(0);
+      expect(result.stdout.trim()).toBe('match');
+    });
   });
 });

package/src/runner/shell-executor.ts CHANGED Viewed

@@ -29,7 +29,7 @@ import { $ } from 'bun';
 import type { ExpressionContext } from '../expression/evaluator.ts';
 import { ExpressionEvaluator } from '../expression/evaluator.ts';
 import type { ShellStep } from '../parser/schema.ts';
-import type { Logger } from './workflow-runner.ts';
+import { ConsoleLogger, type Logger } from '../utils/logger.ts';
 /**
  * Escape a shell argument for safe use in shell commands
@@ -41,7 +41,7 @@ import type { Logger } from './workflow-runner.ts';
  *   - id: safe_echo
  *     type: shell
  *     # Use this pattern to safely interpolate user inputs:
- *     run: echo ${{ inputs.message }}  # Safe: expression evaluation happens first
+ *     run: echo ${{ escape(inputs.message) }}  # Safe: explicitly escaped
  *     # Avoid patterns like: sh -c "echo $USER_INPUT" where USER_INPUT is raw
  * ```
  */
@@ -63,7 +63,7 @@ export interface ShellResult {
 // Pre-compiled dangerous patterns for performance
 // These patterns are designed to detect likely injection attempts while minimizing false positives
 const DANGEROUS_PATTERNS: RegExp[] = [
-  /;\s*\w/, // Command chaining with semicolon (e.g., `; rm -rf /`)
+  /;\s*(?:rm|chmod|chown|mkfs|dd)\b/, // Command chaining with destructive commands
   /\|\s*(?:sh|bash|zsh|ksh|dash|csh|python|python[23]?|node|ruby|perl|php|lua)\b/, // Piping to shell/interpreter (download-and-execute pattern)
   /\|\s*(?:sudo|su)\b/, // Piping to privilege escalation
   /&&\s*(?:rm|chmod|chown|mkfs|dd)\b/, // AND chaining with destructive commands
@@ -98,9 +98,12 @@ const DANGEROUS_PATTERNS: RegExp[] = [
   /\d*>&-\s*/, // Closing file descriptors
 ];
+// Combine all patterns into single regex for O(m) matching instead of O(n×m)
+const COMBINED_DANGEROUS_PATTERN = new RegExp(DANGEROUS_PATTERNS.map((r) => r.source).join('|'));
 export function detectShellInjectionRisk(command: string): boolean {
-  // Check against pre-compiled patterns
-  return DANGEROUS_PATTERNS.some((pattern) => pattern.test(command));
+  // Use combined pattern for single-pass matching
+  return COMBINED_DANGEROUS_PATTERN.test(command);
 }
 /**
@@ -109,7 +112,7 @@ export function detectShellInjectionRisk(command: string): boolean {
 export async function executeShell(
   step: ShellStep,
   context: ExpressionContext,
-  logger: Logger = console
+  logger: Logger = new ConsoleLogger()
 ): Promise<ShellResult> {
   // Evaluate the command string
   const command = ExpressionEvaluator.evaluateString(step.run, context);

package/src/runner/step-executor.test.ts CHANGED Viewed

@@ -1,5 +1,17 @@
-import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it, mock } from 'bun:test';
+import {
+  afterAll,
+  afterEach,
+  beforeAll,
+  beforeEach,
+  describe,
+  expect,
+  it,
+  mock,
+  spyOn,
+} from 'bun:test';
+import * as dns from 'node:dns/promises';
 import { mkdirSync, rmSync } from 'node:fs';
+import { tmpdir } from 'node:os';
 import { join } from 'node:path';
 import type { ExpressionContext } from '../expression/evaluator';
 import type {
@@ -187,6 +199,34 @@ describe('step-executor', () => {
       expect(result.status).toBe('failed');
       expect(result.error).toContain('Unknown file operation');
     });
+    it('should allow file paths outside cwd when allowOutsideCwd is true', async () => {
+      const outsidePath = join(tmpdir(), `keystone-test-${Date.now()}.txt`);
+      const writeStep: FileStep = {
+        id: 'w-outside',
+        type: 'file',
+        needs: [],
+        op: 'write',
+        path: outsidePath,
+        content: 'outside',
+        allowOutsideCwd: true,
+      };
+      try {
+        const writeResult = await executeStep(writeStep, context);
+        expect(writeResult.status).toBe('success');
+        const content = await Bun.file(outsidePath).text();
+        expect(content).toBe('outside');
+      } finally {
+        try {
+          rmSync(outsidePath);
+        } catch (e) {
+          // Ignore cleanup errors
+        }
+      }
+    });
   });
   describe('sleep', () => {
@@ -207,14 +247,19 @@ describe('step-executor', () => {
   describe('request', () => {
     const originalFetch = global.fetch;
+    let lookupSpy: ReturnType<typeof spyOn>;
     beforeEach(() => {
       // @ts-ignore
       global.fetch = mock();
+      lookupSpy = spyOn(dns, 'lookup').mockResolvedValue([
+        { address: '93.184.216.34', family: 4 },
+      ] as unknown as Awaited<ReturnType<typeof dns.lookup>>);
     });
     afterEach(() => {
       global.fetch = originalFetch;
+      lookupSpy.mockRestore();
     });
     it('should perform an HTTP request', async () => {