npm - keystone-cli - Versions diffs - 0.3.1 → 0.4.0 - Mend

keystone-cli 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/README.md +18 -1
package/package.json +1 -1
package/src/db/workflow-db.ts +26 -7
package/src/expression/evaluator.ts +1 -0
package/src/parser/agent-parser.test.ts +8 -5
package/src/parser/schema.ts +8 -2
package/src/runner/audit-verification.test.ts +106 -0
package/src/runner/llm-adapter.ts +196 -4
package/src/runner/llm-clarification.test.ts +182 -0
package/src/runner/llm-executor.ts +118 -26
package/src/runner/mcp-manager.ts +4 -1
package/src/runner/mcp-server.test.ts +115 -1
package/src/runner/mcp-server.ts +161 -4
package/src/runner/shell-executor.ts +1 -1
package/src/runner/step-executor.test.ts +33 -10
package/src/runner/step-executor.ts +110 -14
package/src/runner/workflow-runner.test.ts +132 -0
package/src/runner/workflow-runner.ts +118 -23
package/src/templates/agents/keystone-architect.md +21 -5
package/src/templates/full-feature-demo.yaml +5 -0
package/src/ui/dashboard.tsx +32 -4
package/src/utils/auth-manager.test.ts +31 -0
package/src/utils/auth-manager.ts +21 -5
package/src/utils/json-parser.test.ts +35 -0
package/src/utils/json-parser.ts +95 -0
package/src/utils/mermaid.ts +12 -0
package/src/utils/sandbox.test.ts +12 -4
package/src/utils/sandbox.ts +69 -49

package/src/runner/mcp-server.ts CHANGED Viewed

@@ -153,6 +153,40 @@ export class MCPServer {
                   required: ['run_id', 'input'],
                 },
               },
+              {
+                name: 'start_workflow',
+                description:
+                  'Start a workflow asynchronously. Returns immediately with a run_id. Use get_run_status to poll for completion.',
+                inputSchema: {
+                  type: 'object',
+                  properties: {
+                    workflow_name: {
+                      type: 'string',
+                      description: 'The name of the workflow to run (e.g., "deploy", "cleanup")',
+                    },
+                    inputs: {
+                      type: 'object',
+                      description: 'Key-value pairs for workflow inputs',
+                    },
+                  },
+                  required: ['workflow_name'],
+                },
+              },
+              {
+                name: 'get_run_status',
+                description:
+                  'Get the current status of a workflow run. Returns status and outputs if complete.',
+                inputSchema: {
+                  type: 'object',
+                  properties: {
+                    run_id: {
+                      type: 'string',
+                      description: 'The ID of the workflow run',
+                    },
+                  },
+                  required: ['run_id'],
+                },
+              },
             ],
           },
         };
@@ -335,17 +369,24 @@ export class MCPServer {
               throw new Error(`Run ${run_id} is not paused (status: ${run.status})`);
             }
-            // Find the pending human step
+            // Find the pending or suspended step
             const steps = this.db.getStepsByRun(run_id);
-            const pendingStep = steps.find((s) => s.status === 'pending');
+            const pendingStep = steps.find(
+              (s) => s.status === 'pending' || s.status === 'suspended'
+            );
             if (!pendingStep) {
-              throw new Error(`No pending step found for run ${run_id}`);
+              throw new Error(`No pending or suspended step found for run ${run_id}`);
             }
             // Fulfill the step in the DB
             let output: unknown = input;
             const lowerInput = input.trim().toLowerCase();
-            if (lowerInput === 'confirm' || lowerInput === 'y' || lowerInput === 'yes' || lowerInput === '') {
+            if (
+              lowerInput === 'confirm' ||
+              lowerInput === 'y' ||
+              lowerInput === 'yes' ||
+              lowerInput === ''
+            ) {
               output = true;
             } else if (lowerInput === 'n' || lowerInput === 'no') {
               output = false;
@@ -366,6 +407,7 @@ export class MCPServer {
             const runner = new WorkflowRunner(workflow, {
               resumeRunId: run_id,
+              resumeInputs: { [pendingStep.step_id]: { __answer: output } },
               logger,
               preventExit: true,
             });
@@ -440,6 +482,121 @@ export class MCPServer {
             };
           }
+          // --- Tool: start_workflow (async) ---
+          if (toolParams.name === 'start_workflow') {
+            const { workflow_name, inputs } = toolParams.arguments as {
+              workflow_name: string;
+              inputs?: Record<string, unknown>;
+            };
+            const path = WorkflowRegistry.resolvePath(workflow_name);
+            const workflow = WorkflowParser.loadWorkflow(path);
+            // Create a silent logger - we don't capture logs for async runs
+            const logger = {
+              log: () => {},
+              error: () => {},
+              warn: () => {},
+            };
+            const runner = new WorkflowRunner(workflow, {
+              inputs: inputs || {},
+              logger,
+              preventExit: true,
+            });
+            const runId = runner.getRunId();
+            // Start the workflow asynchronously - don't await
+            runner.run().then(
+              (outputs) => {
+                // Update DB with success on completion (RunStatus uses 'completed')
+                this.db.updateRunStatus(runId, 'completed', outputs);
+              },
+              (error) => {
+                // Update DB with failure
+                if (error instanceof WorkflowSuspendedError) {
+                  this.db.updateRunStatus(runId, 'paused');
+                } else {
+                  this.db.updateRunStatus(
+                    runId,
+                    'failed',
+                    undefined,
+                    error instanceof Error ? error.message : String(error)
+                  );
+                }
+              }
+            );
+            return {
+              jsonrpc: '2.0',
+              id,
+              result: {
+                content: [
+                  {
+                    type: 'text',
+                    text: JSON.stringify(
+                      {
+                        status: 'running',
+                        run_id: runId,
+                        workflow: workflow_name,
+                        hint: 'Use get_run_status to check for completion.',
+                      },
+                      null,
+                      2
+                    ),
+                  },
+                ],
+              },
+            };
+          }
+          // --- Tool: get_run_status ---
+          if (toolParams.name === 'get_run_status') {
+            const { run_id } = toolParams.arguments as { run_id: string };
+            const run = this.db.getRun(run_id);
+            if (!run) {
+              throw new Error(`Run ID ${run_id} not found`);
+            }
+            const response: Record<string, unknown> = {
+              run_id,
+              workflow: run.workflow_name,
+              status: run.status,
+            };
+            // Include outputs if completed successfully
+            if (run.status === 'completed' && run.outputs) {
+              response.outputs = JSON.parse(run.outputs);
+            }
+            // Include error if failed
+            if (run.status === 'failed' && run.error) {
+              response.error = run.error;
+            }
+            // Include hint for paused workflows
+            if (run.status === 'paused') {
+              response.hint =
+                'Workflow is paused waiting for human input. Use answer_human_input to resume.';
+            }
+            // Include hint for running workflows
+            if (run.status === 'running') {
+              response.hint =
+                'Workflow is still running. Call get_run_status again to check for completion.';
+            }
+            return {
+              jsonrpc: '2.0',
+              id,
+              result: {
+                content: [{ type: 'text', text: JSON.stringify(response, null, 2) }],
+              },
+            };
+          }
           throw new Error(`Unknown tool: ${toolParams.name}`);
         } catch (error) {
           return {

package/src/runner/shell-executor.ts CHANGED Viewed

@@ -60,7 +60,7 @@ export interface ShellResult {
  * Check if a command contains potentially dangerous shell metacharacters
  * Returns true if the command looks like it might contain unescaped user input
  */
-function detectShellInjectionRisk(command: string): boolean {
+export function detectShellInjectionRisk(command: string): boolean {
   // Common shell metacharacters that indicate potential injection
   const dangerousPatterns = [
     /;[\s]*\w/, // Command chaining with semicolon

package/src/runner/step-executor.test.ts CHANGED Viewed

@@ -34,7 +34,7 @@ interface RequestOutput {
 // Mock node:readline/promises
 const mockRl = {
   question: mock(() => Promise.resolve('')),
-  close: mock(() => { }),
+  close: mock(() => {}),
 };
 mock.module('node:readline/promises', () => ({
@@ -49,13 +49,13 @@ describe('step-executor', () => {
   beforeAll(() => {
     try {
       mkdirSync(tempDir, { recursive: true });
-    } catch (e) { }
+    } catch (e) {}
   });
   afterAll(() => {
     try {
       rmSync(tempDir, { recursive: true, force: true });
-    } catch (e) { }
+    } catch (e) {}
   });
   beforeEach(() => {
@@ -306,6 +306,29 @@ describe('step-executor', () => {
       // @ts-ignore
       expect(result.output.data).toBe('plain text');
     });
+    it('should include response body in error for failed requests', async () => {
+      // @ts-ignore
+      global.fetch.mockResolvedValue(
+        new Response('{"error": "bad request details"}', {
+          status: 400,
+          statusText: 'Bad Request',
+          headers: { 'Content-Type': 'application/json' },
+        })
+      );
+      const step: RequestStep = {
+        id: 'req1',
+        type: 'request',
+        url: 'https://api.example.com/fail',
+        method: 'POST',
+      };
+      const result = await executeStep(step, context);
+      expect(result.status).toBe('failed');
+      expect(result.error).toContain('HTTP 400: Bad Request');
+      expect(result.error).toContain('Response Body: {"error": "bad request details"}');
+    });
   });
   describe('human', () => {
@@ -330,7 +353,7 @@ describe('step-executor', () => {
       };
       // @ts-ignore
-      const result = await executeStep(step, context, { log: () => { } });
+      const result = await executeStep(step, context, { log: () => {} });
       expect(result.status).toBe('success');
       expect(result.output).toBe(true);
       expect(mockRl.question).toHaveBeenCalled();
@@ -347,7 +370,7 @@ describe('step-executor', () => {
       };
       // @ts-ignore
-      const result = await executeStep(step, context, { log: () => { } });
+      const result = await executeStep(step, context, { log: () => {} });
       expect(result.status).toBe('success');
       expect(result.output).toBe('user response');
     });
@@ -363,19 +386,19 @@ describe('step-executor', () => {
       // Test 'yes'
       mockRl.question.mockResolvedValue('yes');
       // @ts-ignore
-      let result = await executeStep(step, context, { log: () => { } });
+      let result = await executeStep(step, context, { log: () => {} });
       expect(result.output).toBe(true);
       // Test 'no'
       mockRl.question.mockResolvedValue('no');
       // @ts-ignore
-      result = await executeStep(step, context, { log: () => { } });
+      result = await executeStep(step, context, { log: () => {} });
       expect(result.output).toBe(false);
       // Test empty string (default to true)
       mockRl.question.mockResolvedValue('');
       // @ts-ignore
-      result = await executeStep(step, context, { log: () => { } });
+      result = await executeStep(step, context, { log: () => {} });
       expect(result.output).toBe(true);
     });
@@ -390,7 +413,7 @@ describe('step-executor', () => {
       };
       // @ts-ignore
-      const result = await executeStep(step, context, { log: () => { } });
+      const result = await executeStep(step, context, { log: () => {} });
       expect(result.status).toBe('success');
       expect(result.output).toBe('some custom response');
     });
@@ -406,7 +429,7 @@ describe('step-executor', () => {
       };
       // @ts-ignore
-      const result = await executeStep(step, context, { log: () => { } });
+      const result = await executeStep(step, context, { log: () => {} });
       expect(result.status).toBe('suspended');
       expect(result.error).toBe('Proceed?');
     });

package/src/runner/step-executor.ts CHANGED Viewed

@@ -11,7 +11,7 @@ import type {
   Step,
   WorkflowStep,
 } from '../parser/schema.ts';
-import { executeShell } from './shell-executor.ts';
+import { detectShellInjectionRisk, executeShell } from './shell-executor.ts';
 import type { Logger } from './workflow-runner.ts';
 import * as readline from 'node:readline/promises';
@@ -34,6 +34,11 @@ export interface StepResult {
   output: unknown;
   status: 'success' | 'failed' | 'suspended';
   error?: string;
+  usage?: {
+    prompt_tokens: number;
+    completion_tokens: number;
+    total_tokens: number;
+  };
 }
 /**
@@ -45,16 +50,17 @@ export async function executeStep(
   logger: Logger = console,
   executeWorkflowFn?: (step: WorkflowStep, context: ExpressionContext) => Promise<StepResult>,
   mcpManager?: MCPManager,
-  workflowDir?: string
+  workflowDir?: string,
+  dryRun?: boolean
 ): Promise<StepResult> {
   try {
     let result: StepResult;
     switch (step.type) {
       case 'shell':
-        result = await executeShellStep(step, context, logger);
+        result = await executeShellStep(step, context, logger, dryRun);
         break;
       case 'file':
-        result = await executeFileStep(step, context, logger);
+        result = await executeFileStep(step, context, logger, dryRun);
         break;
       case 'request':
         result = await executeRequestStep(step, context, logger);
@@ -69,7 +75,7 @@ export async function executeStep(
         result = await executeLlmStep(
           step,
           context,
-          (s, c) => executeStep(s, c, logger, executeWorkflowFn, mcpManager, workflowDir),
+          (s, c) => executeStep(s, c, logger, executeWorkflowFn, mcpManager, workflowDir, dryRun),
           logger,
           mcpManager,
           workflowDir
@@ -129,8 +135,61 @@ export async function executeStep(
 async function executeShellStep(
   step: ShellStep,
   context: ExpressionContext,
-  logger: Logger
+  logger: Logger,
+  dryRun?: boolean
 ): Promise<StepResult> {
+  if (dryRun) {
+    const command = ExpressionEvaluator.evaluateString(step.run, context);
+    logger.log(`[DRY RUN] Would execute shell command: ${command}`);
+    return {
+      output: { stdout: '[DRY RUN] Success', stderr: '', exitCode: 0 },
+      status: 'success',
+    };
+  }
+  // Check for risk and prompt if TTY
+  const command = ExpressionEvaluator.evaluateString(step.run, context);
+  const isRisky = detectShellInjectionRisk(command);
+  if (isRisky) {
+    // Check if we have a resume approval
+    const stepInputs = context.inputs
+      ? (context.inputs as Record<string, unknown>)[step.id]
+      : undefined;
+    if (
+      stepInputs &&
+      typeof stepInputs === 'object' &&
+      '__approved' in stepInputs &&
+      stepInputs.__approved === true
+    ) {
+      // Already approved, proceed
+    } else {
+      const message = `Potentially risky shell command detected: ${command}`;
+      if (!process.stdin.isTTY) {
+        return {
+          output: null,
+          status: 'suspended',
+          error: `APPROVAL_REQUIRED: ${message}`,
+        };
+      }
+      const rl = readline.createInterface({
+        input: process.stdin,
+        output: process.stdout,
+      });
+      try {
+        logger.warn(`\n⚠️  ${message}`);
+        const answer = (await rl.question('Do you want to execute this command? (y/N): ')).trim();
+        if (answer.toLowerCase() !== 'y' && answer.toLowerCase() !== 'yes') {
+          throw new Error('Command execution denied by user');
+        }
+      } finally {
+        rl.close();
+      }
+    }
+  }
   const result = await executeShell(step, context, logger);
   if (result.stdout) {
@@ -165,10 +224,20 @@ async function executeShellStep(
 async function executeFileStep(
   step: FileStep,
   context: ExpressionContext,
-  _logger: Logger
+  _logger: Logger,
+  dryRun?: boolean
 ): Promise<StepResult> {
   const path = ExpressionEvaluator.evaluateString(step.path, context);
+  if (dryRun && step.op !== 'read') {
+    const opVerb = step.op === 'write' ? 'write to' : 'append to';
+    _logger.log(`[DRY RUN] Would ${opVerb} file: ${path}`);
+    return {
+      output: { path, bytes: 0 },
+      status: 'success',
+    };
+  }
   switch (step.op) {
     case 'read': {
       const file = Bun.file(path);
@@ -298,7 +367,13 @@ async function executeRequestStep(
       data: responseData,
     },
     status: response.ok ? 'success' : 'failed',
-    error: response.ok ? undefined : `HTTP ${response.status}: ${response.statusText}`,
+    error: response.ok
+      ? undefined
+      : `HTTP ${response.status}: ${response.statusText}${
+          responseText
+            ? `\nResponse Body: ${responseText.substring(0, 500)}${responseText.length > 500 ? '...' : ''}`
+            : ''
+        }`,
   };
 }
@@ -312,6 +387,21 @@ async function executeHumanStep(
 ): Promise<StepResult> {
   const message = ExpressionEvaluator.evaluateString(step.message, context);
+  // Check if we have a resume answer
+  const stepInputs = context.inputs
+    ? (context.inputs as Record<string, unknown>)[step.id]
+    : undefined;
+  if (stepInputs && typeof stepInputs === 'object' && '__answer' in stepInputs) {
+    const answer = (stepInputs as Record<string, unknown>).__answer;
+    return {
+      output:
+        step.inputType === 'confirm'
+          ? answer === true || answer === 'true' || answer === 'yes' || answer === 'y'
+          : answer,
+      status: 'success',
+    };
+  }
   // If not a TTY (e.g. MCP server), suspend execution
   if (!process.stdin.isTTY) {
     return {
@@ -396,12 +486,18 @@ async function executeScriptStep(
   _logger: Logger
 ): Promise<StepResult> {
   try {
-    const result = await SafeSandbox.execute(step.run, {
-      inputs: context.inputs,
-      secrets: context.secrets,
-      steps: context.steps,
-      env: context.env,
-    });
+    const result = await SafeSandbox.execute(
+      step.run,
+      {
+        inputs: context.inputs,
+        secrets: context.secrets,
+        steps: context.steps,
+        env: context.env,
+      },
+      {
+        allowInsecureFallback: step.allowInsecure,
+      }
+    );
     return {
       output: result,

package/src/runner/workflow-runner.test.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import { afterAll, afterEach, beforeEach, describe, expect, it, mock, spyOn } from 'bun:test';
 import { existsSync, rmSync } from 'node:fs';
+import { WorkflowDb } from '../db/workflow-db';
 import type { Workflow } from '../parser/schema';
 import { WorkflowParser } from '../parser/workflow-parser';
 import { WorkflowRegistry } from '../utils/workflow-registry';
@@ -12,6 +13,9 @@ describe('WorkflowRunner', () => {
     if (existsSync('test-resume.db')) {
       rmSync('test-resume.db');
     }
+    if (existsSync('test-foreach-resume.db')) {
+      rmSync('test-foreach-resume.db');
+    }
   });
   beforeEach(() => {
@@ -273,6 +277,51 @@ describe('WorkflowRunner', () => {
     expect(s1Executed).toBe(false); // Should have been skipped
   });
+  it('should merge resumeInputs with stored inputs on resume', async () => {
+    const resumeDbPath = 'test-merge-inputs.db';
+    if (existsSync(resumeDbPath)) rmSync(resumeDbPath);
+    const workflow: Workflow = {
+      name: 'merge-wf',
+      inputs: {
+        initial: { type: 'string' },
+        resumed: { type: 'string' },
+      },
+      steps: [{ id: 's1', type: 'shell', run: 'exit 1', needs: [] }],
+      outputs: {
+        merged: '${{ inputs.initial }}-${{ inputs.resumed }}',
+      },
+    } as unknown as Workflow;
+    const runner1 = new WorkflowRunner(workflow, {
+      dbPath: resumeDbPath,
+      inputs: { initial: 'first', resumed: 'pending' },
+    });
+    let runId = '';
+    try {
+      await runner1.run();
+    } catch (e) {
+      runId = runner1.getRunId();
+    }
+    const fixedWorkflow: Workflow = {
+      ...workflow,
+      steps: [{ id: 's1', type: 'shell', run: 'echo ok', needs: [] }],
+    } as unknown as Workflow;
+    const runner2 = new WorkflowRunner(fixedWorkflow, {
+      dbPath: resumeDbPath,
+      resumeRunId: runId,
+      resumeInputs: { resumed: 'second' },
+    });
+    const outputs = await runner2.run();
+    expect(outputs.merged).toBe('first-second');
+    if (existsSync(resumeDbPath)) rmSync(resumeDbPath);
+  });
   it('should redact secrets from outputs', async () => {
     const workflow: Workflow = {
       name: 'redaction-wf',
@@ -355,4 +404,87 @@ describe('WorkflowRunner', () => {
     }
     expect(retryLogged).toBe(true);
   });
+  it('should handle foreach suspension and resume correctly', async () => {
+    const resumeDbPath = 'test-foreach-resume.db';
+    if (existsSync(resumeDbPath)) rmSync(resumeDbPath);
+    const workflow: Workflow = {
+      name: 'foreach-suspend-wf',
+      steps: [
+        {
+          id: 'gen',
+          type: 'shell',
+          run: 'echo "[1, 2]"',
+          transform: 'JSON.parse(output.stdout)',
+          needs: [],
+        },
+        {
+          id: 'process',
+          type: 'human',
+          message: 'Item ${{ item }}',
+          foreach: '${{ steps.gen.output }}',
+          needs: ['gen'],
+        },
+      ],
+      outputs: {
+        results: '${{ steps.process.output }}',
+      },
+    } as unknown as Workflow;
+    // First run - should suspend
+    const originalIsTTY = process.stdin.isTTY;
+    process.stdin.isTTY = false;
+    const runner1 = new WorkflowRunner(workflow, { dbPath: resumeDbPath });
+    let suspendedError: unknown;
+    try {
+      await runner1.run();
+    } catch (e) {
+      suspendedError = e;
+    } finally {
+      process.stdin.isTTY = originalIsTTY;
+    }
+    expect(suspendedError).toBeDefined();
+    expect(
+      typeof suspendedError === 'object' && suspendedError !== null && 'name' in suspendedError
+        ? (suspendedError as { name: string }).name
+        : undefined
+    ).toBe('WorkflowSuspendedError');
+    const runId = runner1.getRunId();
+    // Check DB status - parent should be 'paused' and step should be 'suspended'
+    const db = new WorkflowDb(resumeDbPath);
+    const run = db.getRun(runId);
+    expect(run?.status).toBe('paused');
+    const steps = db.getStepsByRun(runId);
+    const parentStep = steps.find(
+      (s: { step_id: string; iteration_index: number | null }) =>
+        s.step_id === 'process' && s.iteration_index === null
+    );
+    expect(parentStep?.status).toBe('suspended');
+    db.close();
+    // Second run - resume with answers
+    const runner2 = new WorkflowRunner(workflow, {
+      dbPath: resumeDbPath,
+      resumeRunId: runId,
+      resumeInputs: {
+        process: { __answer: 'ok' },
+      },
+    });
+    const outputs = await runner2.run();
+    expect(outputs.results).toEqual(['ok', 'ok']);
+    const finalDb = new WorkflowDb(resumeDbPath);
+    const finalRun = finalDb.getRun(runId);
+    expect(finalRun?.status).toBe('completed');
+    finalDb.close();
+    if (existsSync(resumeDbPath)) rmSync(resumeDbPath);
+  });
 });