npm - keystone-cli - Versions diffs - 0.3.0 → 0.3.2 - Mend

keystone-cli 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +6 -4
package/package.json +7 -2
package/src/cli.ts +64 -10
package/src/parser/schema.ts +19 -9
package/src/runner/mcp-server.test.ts +22 -15
package/src/runner/mcp-server.ts +21 -4
package/src/runner/step-executor.test.ts +49 -6
package/src/runner/step-executor.ts +51 -3
package/src/runner/workflow-runner.ts +56 -22
package/src/templates/agents/keystone-architect.md +12 -3
package/src/templates/full-feature-demo.yaml +5 -0
package/src/utils/mermaid.test.ts +18 -42
package/src/utils/mermaid.ts +154 -20
package/src/utils/redactor.test.ts +6 -0
package/src/utils/redactor.ts +10 -1
package/src/utils/sandbox.test.ts +29 -0
package/src/utils/sandbox.ts +61 -0

package/README.md CHANGED Viewed

@@ -136,8 +136,8 @@ mcp_servers:
   github:
     command: npx
     args: ["-y", "@modelcontextprotocol/server-github"]
-      env:
-        GITHUB_PERSONAL_ACCESS_TOKEN: "your-github-pat" # Or omit if GITHUB_TOKEN is in your .env
+    env:
+      GITHUB_PERSONAL_ACCESS_TOKEN: "your-github-pat" # Or omit if GITHUB_TOKEN is in your .env
 storage:
@@ -265,6 +265,7 @@ Keystone supports several specialized step types:
   - `inputType: confirm`: Simple Enter-to-continue prompt.
   - `inputType: text`: Prompt for a string input, available via `${{ steps.id.output }}`.
 - `workflow`: Trigger another workflow as a sub-step.
+- `script`: Run arbitrary JavaScript in a secure sandbox (`isolated-vm` with fallback to `node:vm`).
 - `sleep`: Pause execution for a specified duration.
 All steps support common features like `needs` (dependencies), `if` (conditionals), `retry`, `timeout`, `foreach` (parallel iteration), and `transform` (post-process output using expressions).
@@ -327,7 +328,7 @@ You are a software developer. You can use tools to explore the codebase.
 Keystone can itself act as an MCP server, allowing other agents (like Claude Desktop or GitHub Copilot) to discover and run your workflows as tools.
 ```bash
-keystone mcp
+keystone mcp start
 ```
 > **Note:** Workflow execution via the Keystone MCP server is synchronous. This provides a better experience for agents as they receive the final results directly, though it means the connection remains open for the duration of the workflow run.
@@ -396,7 +397,8 @@ In these examples, the agent will have access to all tools provided by the MCP s
 | `auth login [provider]` | Login to an authentication provider (github, openai, anthropic) |
 | `auth logout [provider]` | Logout and clear authentication tokens |
 | `ui` | Open the interactive TUI dashboard |
-| `mcp` | Start the Keystone MCP server |
+| `mcp start` | Start the Keystone MCP server |
+| `mcp login <server>` | Login to a remote MCP server |
 | `completion [shell]` | Generate shell completion script (zsh, bash) |
 | `prune [--days N]` | Cleanup old run data from the database |

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "keystone-cli",
-  "version": "0.3.0",
+  "version": "0.3.2",
   "description": "A local-first, declarative, agentic workflow orchestrator built on Bun",
   "type": "module",
   "bin": {
@@ -38,9 +38,11 @@
     "@jsep-plugin/object": "^1.2.2",
     "@types/react": "^19.2.7",
     "commander": "^12.1.0",
+    "dagre": "^0.8.5",
     "ink": "^6.5.1",
     "ink-select-input": "3.1.2",
     "ink-spinner": "^5.0.0",
+    "isolated-vm": "^6.0.2",
     "js-yaml": "^4.1.0",
     "jsep": "^1.4.0",
     "react": "^19.2.3",
@@ -48,7 +50,10 @@
   },
   "devDependencies": {
     "@biomejs/biome": "^1.9.4",
-    "@types/js-yaml": "^4.0.9"
+    "@types/bun": "^1.3.5",
+    "@types/dagre": "^0.7.53",
+    "@types/js-yaml": "^4.0.9",
+    "@types/node": "^25.0.3"
   },
   "engines": {
     "bun": ">=1.0.0"

package/src/cli.ts CHANGED Viewed

@@ -12,7 +12,7 @@ import scaffoldWorkflow from './templates/scaffold-feature.yaml' with { type: 't
 import { WorkflowDb } from './db/workflow-db.ts';
 import { WorkflowParser } from './parser/workflow-parser.ts';
 import { ConfigLoader } from './utils/config-loader.ts';
-import { generateMermaidGraph, renderMermaidAsAscii } from './utils/mermaid.ts';
+import { generateMermaidGraph, renderWorkflowAsAscii } from './utils/mermaid.ts';
 import { WorkflowRegistry } from './utils/workflow-registry.ts';
 import pkg from '../package.json' with { type: 'json' };
@@ -204,12 +204,11 @@ program
     try {
       const resolvedPath = WorkflowRegistry.resolvePath(workflowPath);
       const workflow = WorkflowParser.loadWorkflow(resolvedPath);
-      const mermaid = generateMermaidGraph(workflow);
-      const ascii = await renderMermaidAsAscii(mermaid);
+      const ascii = renderWorkflowAsAscii(workflow);
       if (ascii) {
         console.log(`\n${ascii}\n`);
       } else {
+        const mermaid = generateMermaidGraph(workflow);
         console.log('\n```mermaid');
         console.log(mermaid);
         console.log('```\n');
@@ -614,11 +613,15 @@ const auth = program.command('auth').description('Authentication management');
 auth
   .command('login')
   .description('Login to an authentication provider')
-  .option('-p, --provider <provider>', 'Authentication provider', 'github')
+  .argument('[provider]', 'Authentication provider', 'github')
+  .option(
+    '-p, --provider <provider>',
+    'Authentication provider (deprecated, use positional argument)'
+  )
   .option('-t, --token <token>', 'Personal Access Token (if not using interactive mode)')
-  .action(async (options) => {
+  .action(async (providerArg, options) => {
     const { AuthManager } = await import('./utils/auth-manager.ts');
-    const provider = options.provider.toLowerCase();
+    const provider = (options.provider || providerArg).toLowerCase();
     if (provider === 'github') {
       let token = options.token;
@@ -675,6 +678,31 @@ auth
         console.error('✗ No token provided.');
         process.exit(1);
       }
+    } else if (provider === 'openai' || provider === 'anthropic') {
+      let key = options.token; // Use --token if provided as the API key
+      if (!key) {
+        console.log(`\n🔑 Login to ${provider.toUpperCase()}`);
+        console.log(`   Please provide your ${provider.toUpperCase()} API key.\n`);
+        const prompt = 'API Key: ';
+        process.stdout.write(prompt);
+        for await (const line of console) {
+          key = line.trim();
+          break;
+        }
+      }
+      if (key) {
+        if (provider === 'openai') {
+          AuthManager.save({ openai_api_key: key });
+        } else {
+          AuthManager.save({ anthropic_api_key: key });
+        }
+        console.log(`\n✓ Successfully saved ${provider.toUpperCase()} API key.`);
+      } else {
+        console.error('✗ No API key provided.');
+        process.exit(1);
+      }
     } else {
       console.error(`✗ Unsupported provider: ${provider}`);
       process.exit(1);
@@ -702,13 +730,33 @@ auth
         }
       } else if (provider) {
         console.log(
-          `  ⊘ Not logged into GitHub. Run "keystone auth login --provider github" to authenticate.`
+          `  ⊘ Not logged into GitHub. Run "keystone auth login github" to authenticate.`
+        );
+      }
+    }
+    if (!provider || provider === 'openai') {
+      if (auth.openai_api_key) {
+        console.log('  ✓ OpenAI API key configured');
+      } else if (provider) {
+        console.log(
+          `  ⊘ OpenAI API key not configured. Run "keystone auth login openai" to authenticate.`
+        );
+      }
+    }
+    if (!provider || provider === 'anthropic') {
+      if (auth.anthropic_api_key) {
+        console.log('  ✓ Anthropic API key configured');
+      } else if (provider) {
+        console.log(
+          `  ⊘ Anthropic API key not configured. Run "keystone auth login anthropic" to authenticate.`
         );
       }
     }
-    if (!auth.github_token && !provider) {
-      console.log('  ⊘ Not logged in. Run "keystone auth login" to authenticate.');
+    if (!auth.github_token && !auth.openai_api_key && !auth.anthropic_api_key && !provider) {
+      console.log('  ⊘ No providers configured. Run "keystone auth login" to authenticate.');
     }
   });
@@ -731,6 +779,12 @@ auth
         copilot_expires_at: undefined,
       });
       console.log('✓ Successfully logged out of GitHub.');
+    } else if (provider === 'openai') {
+      AuthManager.save({ openai_api_key: undefined });
+      console.log('✓ Successfully cleared OpenAI API key.');
+    } else if (provider === 'anthropic') {
+      AuthManager.save({ anthropic_api_key: undefined });
+      console.log('✓ Successfully cleared Anthropic API key.');
     } else {
       console.error(`✗ Unknown provider: ${provider}`);
       process.exit(1);

package/src/parser/schema.ts CHANGED Viewed

@@ -105,17 +105,26 @@ const SleepStepSchema = BaseStepSchema.extend({
   duration: z.union([z.number().int().positive(), z.string()]),
 });
+const ScriptStepSchema = BaseStepSchema.extend({
+  type: z.literal('script'),
+  run: z.string(),
+});
 // ===== Discriminated Union for Steps =====
-export const StepSchema = z.discriminatedUnion('type', [
-  ShellStepSchema,
-  LlmStepSchema,
-  WorkflowStepSchema,
-  FileStepSchema,
-  RequestStepSchema,
-  HumanStepSchema,
-  SleepStepSchema,
-]);
+// biome-ignore lint/suspicious/noExplicitAny: Recursive Zod type
+export const StepSchema: z.ZodType<any> = z.lazy(() =>
+  z.discriminatedUnion('type', [
+    ShellStepSchema,
+    LlmStepSchema,
+    WorkflowStepSchema,
+    FileStepSchema,
+    RequestStepSchema,
+    HumanStepSchema,
+    SleepStepSchema,
+    ScriptStepSchema,
+  ])
+);
 // ===== Workflow Schema =====
@@ -152,6 +161,7 @@ export type FileStep = z.infer<typeof FileStepSchema>;
 export type RequestStep = z.infer<typeof RequestStepSchema>;
 export type HumanStep = z.infer<typeof HumanStepSchema>;
 export type SleepStep = z.infer<typeof SleepStepSchema>;
+export type ScriptStep = z.infer<typeof ScriptStepSchema>;
 export type Workflow = z.infer<typeof WorkflowSchema>;
 export type AgentTool = z.infer<typeof AgentToolSchema>;
 export type Agent = z.infer<typeof AgentSchema>;

package/src/runner/mcp-server.test.ts CHANGED Viewed

@@ -28,7 +28,7 @@ describe('MCPServer', () => {
       method: 'initialize',
     });
-    expect(response.result.serverInfo.name).toBe('keystone-mcp');
+    expect(response?.result?.serverInfo?.name).toBe('keystone-mcp');
   });
   it('should list tools', async () => {
@@ -38,9 +38,9 @@ describe('MCPServer', () => {
       method: 'tools/list',
     });
-    expect(response.result.tools).toHaveLength(5);
+    expect(response?.result?.tools).toHaveLength(5);
     // @ts-ignore
-    expect(response.result.tools.map((t) => t.name)).toContain('run_workflow');
+    expect(response?.result?.tools?.map((t) => t.name)).toContain('run_workflow');
   });
   it('should call list_workflows tool', async () => {
@@ -55,7 +55,7 @@ describe('MCPServer', () => {
       params: { name: 'list_workflows', arguments: {} },
     });
-    expect(response.result.content[0].text).toContain('test-wf');
+    expect(response?.result?.content?.[0]?.text).toContain('test-wf');
   });
   it('should call run_workflow tool successfully', async () => {
@@ -104,8 +104,8 @@ describe('MCPServer', () => {
       },
     });
-    expect(response.result.isError).toBe(true);
-    expect(response.result.content[0].text).toContain('Workflow failed');
+    expect(response?.result?.isError).toBe(true);
+    expect(response?.result?.content?.[0]?.text).toContain('Workflow failed');
   });
   it('should handle workflow suspension in run_workflow', async () => {
@@ -130,7 +130,7 @@ describe('MCPServer', () => {
       },
     });
-    const result = JSON.parse(response.result.content[0].text);
+    const result = JSON.parse(response?.result?.content?.[0]?.text);
     expect(result.status).toBe('paused');
     expect(result.run_id).toBe('run123');
     expect(result.message).toBe('Input needed');
@@ -187,7 +187,7 @@ describe('MCPServer', () => {
       params: { name: 'get_run_logs', arguments: { run_id: runId } },
     });
-    const summary = JSON.parse(response.result.content[0].text);
+    const summary = JSON.parse(response?.result?.content?.[0]?.text);
     expect(summary.workflow).toBe('test-wf');
     expect(summary.steps).toHaveLength(1);
     expect(summary.steps[0].step).toBe('s1');
@@ -202,7 +202,7 @@ describe('MCPServer', () => {
       params: { name: 'unknown_tool', arguments: {} },
     });
-    expect(response.error.message).toContain('Unknown tool');
+    expect(response?.error?.message).toContain('Unknown tool');
   });
   it('should handle unknown method', async () => {
@@ -212,14 +212,21 @@ describe('MCPServer', () => {
       method: 'unknown_method',
     });
-    expect(response.error.message).toContain('Method not found');
+    expect(response?.error?.message).toContain('Method not found');
   });
   it('should start and handle messages from stdin', async () => {
-    const writeSpy = spyOn(process.stdout, 'write').mockImplementation(() => true);
+    const { PassThrough } = await import('node:stream');
+    const input = new PassThrough();
+    const outputStream = new PassThrough();
+    // Create a new server for this test to use the streams
+    const testServer = new MCPServer(db, input, outputStream);
+    const writeSpy = spyOn(outputStream, 'write').mockImplementation(() => true);
     const consoleSpy = spyOn(console, 'error').mockImplementation(() => {});
-    const startPromise = server.start();
+    const startPromise = testServer.start();
     // Simulate stdin data
     const message = {
@@ -227,16 +234,16 @@ describe('MCPServer', () => {
       id: 9,
       method: 'initialize',
     };
-    process.stdin.emit('data', Buffer.from(`${JSON.stringify(message)}\n`));
+    input.write(`${JSON.stringify(message)}\n`);
     // Wait for async processing
-    await new Promise((resolve) => setTimeout(resolve, 50));
+    await new Promise((resolve) => setTimeout(resolve, 100));
     expect(writeSpy).toHaveBeenCalled();
     const output = JSON.parse(writeSpy.mock.calls[0][0] as string);
     expect(output.id).toBe(9);
-    process.stdin.emit('close');
+    input.end();
     await startPromise;
     writeSpy.mockRestore();

package/src/runner/mcp-server.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import * as readline from 'node:readline';
+import type { Readable, Writable } from 'node:stream';
 import pkg from '../../package.json' with { type: 'json' };
 import { WorkflowDb } from '../db/workflow-db';
 import { WorkflowParser } from '../parser/workflow-parser';
@@ -16,14 +17,18 @@ interface MCPMessage {
 export class MCPServer {
   private db: WorkflowDb;
+  private input: Readable;
+  private output: Writable;
-  constructor(db?: WorkflowDb) {
+  constructor(db?: WorkflowDb, input: Readable = process.stdin, output: Writable = process.stdout) {
     this.db = db || new WorkflowDb();
+    this.input = input;
+    this.output = output;
   }
   async start() {
     const rl = readline.createInterface({
-      input: process.stdin,
+      input: this.input,
       terminal: false,
     });
@@ -35,7 +40,7 @@ export class MCPServer {
           const message = JSON.parse(line) as MCPMessage;
           const response = await this.handleMessage(message);
           if (response) {
-            process.stdout.write(`${JSON.stringify(response)}\n`);
+            this.output.write(`${JSON.stringify(response)}\n`);
           }
         } catch (error) {
           console.error('Error handling MCP message:', error);
@@ -46,6 +51,11 @@ export class MCPServer {
         this.stop();
         resolve();
       });
+      // Handle stream errors
+      this.input.on('error', (err: Error) => {
+        console.error('stdin error:', err);
+      });
     });
   }
@@ -333,7 +343,14 @@ export class MCPServer {
             }
             // Fulfill the step in the DB
-            const output = input === 'confirm' ? true : input;
+            let output: unknown = input;
+            const lowerInput = input.trim().toLowerCase();
+            if (lowerInput === 'confirm' || lowerInput === 'y' || lowerInput === 'yes' || lowerInput === '') {
+              output = true;
+            } else if (lowerInput === 'n' || lowerInput === 'no') {
+              output = false;
+            }
             await this.db.completeStep(pendingStep.id, 'success', output);
             // Resume the workflow

package/src/runner/step-executor.test.ts CHANGED Viewed

@@ -34,7 +34,7 @@ interface RequestOutput {
 // Mock node:readline/promises
 const mockRl = {
   question: mock(() => Promise.resolve('')),
-  close: mock(() => {}),
+  close: mock(() => { }),
 };
 mock.module('node:readline/promises', () => ({
@@ -49,13 +49,13 @@ describe('step-executor', () => {
   beforeAll(() => {
     try {
       mkdirSync(tempDir, { recursive: true });
-    } catch (e) {}
+    } catch (e) { }
   });
   afterAll(() => {
     try {
       rmSync(tempDir, { recursive: true, force: true });
-    } catch (e) {}
+    } catch (e) { }
   });
   beforeEach(() => {
@@ -330,7 +330,7 @@ describe('step-executor', () => {
       };
       // @ts-ignore
-      const result = await executeStep(step, context, { log: () => {} });
+      const result = await executeStep(step, context, { log: () => { } });
       expect(result.status).toBe('success');
       expect(result.output).toBe(true);
       expect(mockRl.question).toHaveBeenCalled();
@@ -347,11 +347,54 @@ describe('step-executor', () => {
       };
       // @ts-ignore
-      const result = await executeStep(step, context, { log: () => {} });
+      const result = await executeStep(step, context, { log: () => { } });
       expect(result.status).toBe('success');
       expect(result.output).toBe('user response');
     });
+    it('should handle human confirmation (yes/no/empty)', async () => {
+      const step: HumanStep = {
+        id: 'h1',
+        type: 'human',
+        message: 'Proceed?',
+        inputType: 'confirm',
+      };
+      // Test 'yes'
+      mockRl.question.mockResolvedValue('yes');
+      // @ts-ignore
+      let result = await executeStep(step, context, { log: () => { } });
+      expect(result.output).toBe(true);
+      // Test 'no'
+      mockRl.question.mockResolvedValue('no');
+      // @ts-ignore
+      result = await executeStep(step, context, { log: () => { } });
+      expect(result.output).toBe(false);
+      // Test empty string (default to true)
+      mockRl.question.mockResolvedValue('');
+      // @ts-ignore
+      result = await executeStep(step, context, { log: () => { } });
+      expect(result.output).toBe(true);
+    });
+    it('should fallback to text in confirm mode', async () => {
+      mockRl.question.mockResolvedValue('some custom response');
+      const step: HumanStep = {
+        id: 'h1',
+        type: 'human',
+        message: 'Proceed?',
+        inputType: 'confirm',
+      };
+      // @ts-ignore
+      const result = await executeStep(step, context, { log: () => { } });
+      expect(result.status).toBe('success');
+      expect(result.output).toBe('some custom response');
+    });
     it('should suspend if not a TTY', async () => {
       process.stdin.isTTY = false;
@@ -363,7 +406,7 @@ describe('step-executor', () => {
       };
       // @ts-ignore
-      const result = await executeStep(step, context, { log: () => {} });
+      const result = await executeStep(step, context, { log: () => { } });
       expect(result.status).toBe('suspended');
       expect(result.error).toBe('Proceed?');
     });

package/src/runner/step-executor.ts CHANGED Viewed

@@ -5,6 +5,7 @@ import type {
   FileStep,
   HumanStep,
   RequestStep,
+  ScriptStep,
   ShellStep,
   SleepStep,
   Step,
@@ -14,6 +15,7 @@ import { executeShell } from './shell-executor.ts';
 import type { Logger } from './workflow-runner.ts';
 import * as readline from 'node:readline/promises';
+import { SafeSandbox } from '../utils/sandbox.ts';
 import { executeLlmStep } from './llm-executor.ts';
 import type { MCPManager } from './mcp-manager.ts';
@@ -79,6 +81,9 @@ export async function executeStep(
         }
         result = await executeWorkflowFn(step, context);
         break;
+      case 'script':
+        result = await executeScriptStep(step, context, logger);
+        break;
       default:
         throw new Error(`Unknown step type: ${(step as Step).type}`);
     }
@@ -324,10 +329,25 @@ async function executeHumanStep(
   try {
     if (step.inputType === 'confirm') {
       logger.log(`\n❓ ${message}`);
-      const answer = await rl.question('Confirm? (Y/n): ');
-      const isConfirmed = answer.toLowerCase() !== 'n';
+      const answer = (await rl.question('Response (Y/n/text): ')).trim();
+      const lowerAnswer = answer.toLowerCase();
+      if (lowerAnswer === '' || lowerAnswer === 'y' || lowerAnswer === 'yes') {
+        return {
+          output: true,
+          status: 'success',
+        };
+      }
+      if (lowerAnswer === 'n' || lowerAnswer === 'no') {
+        return {
+          output: false,
+          status: 'success',
+        };
+      }
+      // Fallback to text if it's not a clear yes/no
       return {
-        output: isConfirmed,
+        output: answer,
         status: 'success',
       };
     }
@@ -367,3 +387,31 @@ async function executeSleepStep(
     status: 'success',
   };
 }
+/**
+ * Execute a script step in a safe sandbox
+ */
+async function executeScriptStep(
+  step: ScriptStep,
+  context: ExpressionContext,
+  _logger: Logger
+): Promise<StepResult> {
+  try {
+    const result = await SafeSandbox.execute(step.run, {
+      inputs: context.inputs,
+      secrets: context.secrets,
+      steps: context.steps,
+      env: context.env,
+    });
+    return {
+      output: result,
+      status: 'success',
+    };
+  } catch (error) {
+    return {
+      output: null,
+      status: 'failed',
+      error: error instanceof Error ? error.message : String(error),
+    };
+  }
+}

package/src/runner/workflow-runner.ts CHANGED Viewed

@@ -25,7 +25,7 @@ class RedactingLogger implements Logger {
   constructor(
     private inner: Logger,
     private redactor: Redactor
-  ) {}
+  ) { }
   log(msg: string): void {
     this.inner.log(this.redactor.redact(msg));
@@ -53,7 +53,7 @@ export interface RunOptions {
 export interface StepContext {
   output?: unknown;
   outputs?: Record<string, unknown>;
-  status: 'success' | 'failed' | 'skipped';
+  status: 'success' | 'failed' | 'skipped' | 'pending' | 'suspended';
 }
 // Type for foreach results - wraps array to ensure JSON serialization preserves all properties
@@ -196,7 +196,7 @@ export class WorkflowRunner {
             items[exec.iteration_index] = {
               output: null,
               outputs: {},
-              status: exec.status as 'failed' | 'running' | 'pending',
+              status: exec.status as 'failed' | 'pending' | 'success' | 'skipped' | 'suspended',
             };
           }
         }
@@ -305,9 +305,37 @@ export class WorkflowRunner {
   private loadSecrets(): Record<string, string> {
     const secrets: Record<string, string> = {};
+    // Common non-secret environment variables to exclude from redaction
+    const blocklist = new Set([
+      'USER',
+      'PATH',
+      'SHELL',
+      'HOME',
+      'PWD',
+      'LOGNAME',
+      'LANG',
+      'TERM',
+      'EDITOR',
+      'VISUAL',
+      '_',
+      'SHLVL',
+      'LC_ALL',
+      'OLDPWD',
+      'DISPLAY',
+      'TMPDIR',
+      'SSH_AUTH_SOCK',
+      'XPC_FLAGS',
+      'XPC_SERVICE_NAME',
+      'ITERM_SESSION_ID',
+      'ITERM_PROFILE',
+      'TERM_PROGRAM',
+      'TERM_PROGRAM_VERSION',
+      'COLORTERM',
+    ]);
     // Bun automatically loads .env file
     for (const [key, value] of Object.entries(Bun.env)) {
-      if (value) {
+      if (value && !blocklist.has(key)) {
         secrets[key] = value;
       }
     }
@@ -485,11 +513,7 @@ export class WorkflowRunner {
         return result;
       }
-      // Redact secrets from output and error before storing
-      const redactedOutput = this.redactor.redactValue(result.output);
-      const redactedError = result.error ? this.redactor.redact(result.error) : undefined;
-      await this.db.completeStep(stepExecId, result.status, redactedOutput, redactedError);
+      await this.db.completeStep(stepExecId, result.status, result.output, result.error);
       // Ensure outputs is always an object for consistent access
       let outputs: Record<string, unknown>;
@@ -621,6 +645,7 @@ export class WorkflowRunner {
               // Execute and store result at correct index
               try {
+                this.logger.log(`  ⤷ [${i + 1}/${items.length}] Executing iteration...`);
                 itemResults[i] = await this.executeStepInternal(step, itemContext, stepExecId);
                 if (itemResults[i].status === 'failed') {
                   aborted = true;
@@ -760,7 +785,7 @@ export class WorkflowRunner {
     this.logger.log(`Run ID: ${this.runId}`);
     this.logger.log(
       '\n⚠️  Security Warning: Only run workflows from trusted sources.\n' +
-        '   Workflows can execute arbitrary shell commands and access your environment.\n'
+      '   Workflows can execute arbitrary shell commands and access your environment.\n'
     );
     // Apply defaults and validate inputs
@@ -787,8 +812,7 @@ export class WorkflowRunner {
         this.logger.log('All steps already completed. Nothing to resume.\n');
         // Evaluate outputs from completed state
         const outputs = this.evaluateOutputs();
-        const redactedOutputs = this.redactor.redactValue(outputs) as Record<string, unknown>;
-        await this.db.updateRunStatus(this.runId, 'completed', redactedOutputs);
+        await this.db.updateRunStatus(this.runId, 'completed', outputs);
         this.logger.log('✨ Workflow already completed!\n');
         return outputs;
       }
@@ -799,6 +823,9 @@ export class WorkflowRunner {
       this.logger.log(`Execution order: ${executionOrder.join(' → ')}\n`);
+      const totalSteps = executionOrder.length;
+      const stepIndices = new Map(executionOrder.map((id, index) => [id, index + 1]));
       // Execute steps in parallel where possible (respecting dependencies)
       const pendingSteps = new Set(remainingSteps);
       const runningPromises = new Map<string, Promise<void>>();
@@ -811,18 +838,21 @@ export class WorkflowRunner {
             if (!step) {
               throw new Error(`Step ${stepId} not found in workflow`);
             }
-            const dependenciesMet = step.needs.every((dep) => completedSteps.has(dep));
+            const dependenciesMet = step.needs.every((dep: string) => completedSteps.has(dep));
             if (dependenciesMet) {
               pendingSteps.delete(stepId);
               // Start execution
-              this.logger.log(`▶ Executing step: ${step.id} (${step.type})`);
+              const stepIndex = stepIndices.get(stepId);
+              this.logger.log(
+                `[${stepIndex}/${totalSteps}] ▶ Executing step: ${step.id} (${step.type})`
+              );
               const promise = this.executeStepWithForeach(step)
                 .then(() => {
                   completedSteps.add(stepId);
                   runningPromises.delete(stepId);
-                  this.logger.log(`  ✓ Step ${step.id} completed\n`);
+                  this.logger.log(`[${stepIndex}/${totalSteps}] ✓ Step ${step.id} completed\n`);
                 })
                 .catch((err) => {
                   runningPromises.delete(stepId);
@@ -857,11 +887,8 @@ export class WorkflowRunner {
       // Evaluate outputs
       const outputs = this.evaluateOutputs();
-      // Redact secrets from outputs before storing
-      const redactedOutputs = this.redactor.redactValue(outputs) as Record<string, unknown>;
       // Mark run as complete
-      await this.db.updateRunStatus(this.runId, 'completed', redactedOutputs);
+      await this.db.updateRunStatus(this.runId, 'completed', outputs);
       this.logger.log('✨ Workflow completed successfully!\n');
@@ -900,6 +927,8 @@ export class WorkflowRunner {
     const completedFinallySteps = new Set<string>();
     const pendingFinallySteps = new Set(this.workflow.finally.map((s) => s.id));
     const runningPromises = new Map<string, Promise<void>>();
+    const totalFinallySteps = this.workflow.finally.length;
+    const finallyStepIndices = new Map(this.workflow.finally.map((s, index) => [s.id, index + 1]));
     try {
       while (pendingFinallySteps.size > 0 || runningPromises.size > 0) {
@@ -909,18 +938,23 @@ export class WorkflowRunner {
           // Dependencies can be from main steps (already in this.stepContexts) or previous finally steps
           const dependenciesMet = step.needs.every(
-            (dep) => this.stepContexts.has(dep) || completedFinallySteps.has(dep)
+            (dep: string) => this.stepContexts.has(dep) || completedFinallySteps.has(dep)
           );
           if (dependenciesMet) {
             pendingFinallySteps.delete(stepId);
-            this.logger.log(`▶ Executing finally step: ${step.id} (${step.type})`);
+            const finallyStepIndex = finallyStepIndices.get(stepId);
+            this.logger.log(
+              `[${finallyStepIndex}/${totalFinallySteps}] ▶ Executing finally step: ${step.id} (${step.type})`
+            );
             const promise = this.executeStepWithForeach(step)
               .then(() => {
                 completedFinallySteps.add(stepId);
                 runningPromises.delete(stepId);
-                this.logger.log(`  ✓ Finally step ${step.id} completed\n`);
+                this.logger.log(
+                  `[${finallyStepIndex}/${totalFinallySteps}] ✓ Finally step ${step.id} completed\n`
+                );
               })
               .catch((err) => {
                 runningPromises.delete(stepId);

package/src/templates/agents/keystone-architect.md CHANGED Viewed

@@ -15,13 +15,15 @@ You are the Keystone Architect. Your goal is to design and generate high-quality
 - **outputs**: Map of expressions (e.g., `${{ steps.id.output }}`) under the `outputs` key.
 - **steps**: Array of step objects. Each step MUST have an `id` and a `type`:
   - **shell**: `{ id, type: 'shell', run, dir, env, transform }`
-  - **llm**: `{ id, type: 'llm', agent, prompt, schema }`
+  - **llm**: `{ id, type: 'llm', agent, prompt, schema, provider, model, tools, maxIterations, useGlobalMcp, mcpServers }`
   - **workflow**: `{ id, type: 'workflow', path, inputs }`
   - **file**: `{ id, type: 'file', path, op: 'read'|'write'|'append', content }`
   - **request**: `{ id, type: 'request', url, method, body, headers }`
-  - **human**: `{ id, type: 'human', message, inputType: 'confirm'|'text' }`
+  - **human**: `{ id, type: 'human', message, inputType: 'confirm'|'text' }` (Note: 'confirm' returns boolean but automatically fallbacks to text if input is not yes/no)
   - **sleep**: `{ id, type: 'sleep', duration }`
-- **Common Step Fields**: `needs` (array of IDs), `if` (expression), `retry`, `foreach`, `concurrency`.
+  - **script**: `{ id, type: 'script', run }` (Executes JS in a secure sandbox)
+- **Common Step Fields**: `needs` (array of IDs), `if` (expression), `retry`, `foreach`, `concurrency`, `transform`.
+- **finally**: Optional array of steps to run at the end of the workflow, regardless of success or failure.
 - **IMPORTANT**: Steps run in **parallel** by default. To ensure sequential execution, a step must explicitly list the previous step's ID in its `needs` array.
 ## Agent Schema (.md)
@@ -38,6 +40,13 @@ Markdown files with YAML frontmatter:
 - `${{ args.paramName }}` (used inside agent tools)
 - Standard JS-like expressions: `${{ steps.count > 0 ? 'yes' : 'no' }}`
+# Guidelines
+- **User Interaction**: Use `human` steps when user input or approval is needed.
+- **Error Handling**: Use `retry` for flaky operations and `finally` for cleanup (e.g., removing temp files).
+- **Custom Logic**: Use `script` steps for data manipulation that is too complex for expressions.
+- **Agent Collaboration**: Create specialized agents for complex sub-tasks and coordinate them via `llm` steps.
+- **Discovery**: Use `mcpServers` in `llm` steps when the agent needs access to external tools or systems.
 # Output Instructions
 When asked to design a feature:
 1. Provide the necessary Keystone files (Workflows and Agents).

package/src/templates/full-feature-demo.yaml CHANGED Viewed

@@ -62,3 +62,8 @@ steps:
     type: sleep
     duration: 100
     needs: [api_test]
+finally:
+  - id: cleanup
+    type: shell
+    run: rm /tmp/keystone-test.txt

package/src/utils/mermaid.test.ts CHANGED Viewed

@@ -1,6 +1,6 @@
-import { describe, expect, it, mock, spyOn } from 'bun:test';
+import { describe, expect, it } from 'bun:test';
 import type { Workflow } from '../parser/schema';
-import { generateMermaidGraph, renderMermaidAsAscii } from './mermaid';
+import { generateMermaidGraph, renderWorkflowAsAscii } from './mermaid';
 describe('mermaid', () => {
   it('should generate a mermaid graph from a workflow', () => {
@@ -16,7 +16,7 @@ describe('mermaid', () => {
     const graph = generateMermaidGraph(workflow);
     expect(graph).toContain('graph TD');
     expect(graph).toContain('s1["s1\\n(shell)"]:::shell');
-    expect(graph).toContain('s2["s2\\n🤖 my-agent"]:::ai');
+    expect(graph).toContain('s2["s2\\n🤖 my-agent\\n(llm)"]:::ai');
     expect(graph).toContain('s3["s3\\n(human)\\n❓ Conditional"]:::human');
     expect(graph).toContain('s1 --> s2');
     expect(graph).toContain('s2 --> s3');
@@ -31,45 +31,21 @@ describe('mermaid', () => {
     expect(graph).toContain('(📚 Loop)');
   });
-  it('should render mermaid as ascii', async () => {
-    const originalFetch = global.fetch;
-    // @ts-ignore
-    global.fetch = mock(() =>
-      Promise.resolve(
-        new Response('ascii graph', {
-          status: 200,
-        })
-      )
-    );
-    const result = await renderMermaidAsAscii('graph TD\n  A --> B');
-    expect(result).toBe('ascii graph');
-    global.fetch = originalFetch;
-  });
-  it('should return null if API returns error', async () => {
-    const fetchSpy = spyOn(global, 'fetch').mockResolvedValue(
-      new Response('Error', { status: 500 })
-    );
-    const result = await renderMermaidAsAscii('graph TD; A-->B');
-    expect(result).toBeNull();
-    fetchSpy.mockRestore();
-  });
-  it('should return null if API returns failure message', async () => {
-    const fetchSpy = spyOn(global, 'fetch').mockResolvedValue(
-      new Response('Failed to render diagram', { status: 200 })
-    );
-    const result = await renderMermaidAsAscii('graph TD; A-->B');
-    expect(result).toBeNull();
-    fetchSpy.mockRestore();
-  });
+  it('should render workflow as ascii', () => {
+    const workflow: Workflow = {
+      name: 'test',
+      steps: [
+        { id: 's1', type: 'shell', run: 'echo 1', needs: [] },
+        { id: 's2', type: 'llm', agent: 'my-agent', prompt: 'hi', needs: ['s1'] },
+      ],
+    } as unknown as Workflow;
-  it('should return null if fetch throws', async () => {
-    const fetchSpy = spyOn(global, 'fetch').mockRejectedValue(new Error('Network error'));
-    const result = await renderMermaidAsAscii('graph TD; A-->B');
-    expect(result).toBeNull();
-    fetchSpy.mockRestore();
+    const ascii = renderWorkflowAsAscii(workflow);
+    expect(ascii).toBeDefined();
+    expect(ascii).toContain('s1');
+    expect(ascii).toContain('s2 (AI: my-agent)');
+    expect(ascii).toContain('|');
+    expect(ascii).toContain('-');
+    expect(ascii).toContain('>');
   });
 });

package/src/utils/mermaid.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+import dagre from 'dagre';
 import type { Workflow } from '../parser/schema';
 export function generateMermaidGraph(workflow: Workflow): string {
@@ -12,7 +13,7 @@ export function generateMermaidGraph(workflow: Workflow): string {
     let label = `${step.id}\\n(${step.type})`;
     // Add specific details based on type
-    if (step.type === 'llm') label = `${step.id}\\n🤖 ${step.agent}`;
+    if (step.type === 'llm') label = `${step.id}\\n🤖 ${step.agent}\\n(${step.type})`;
     if (step.foreach) label += '\\n(📚 Loop)';
     if (step.if) label += '\\n❓ Conditional';
@@ -59,29 +60,162 @@ export function generateMermaidGraph(workflow: Workflow): string {
 }
 /**
- * Renders a Mermaid graph as ASCII using mermaid-ascii.art
+ * Renders a workflow as a local ASCII graph using dagre for layout.
  */
-export async function renderMermaidAsAscii(mermaid: string): Promise<string | null> {
-  try {
-    const response = await fetch('https://mermaid-ascii.art', {
-      method: 'POST',
-      headers: {
-        'Content-Type': 'application/x-www-form-urlencoded',
-      },
-      body: `mermaid=${encodeURIComponent(mermaid)}`,
-    });
-    if (!response.ok) {
-      return null;
+export async function renderMermaidAsAscii(_mermaid: string): Promise<string | null> {
+  // We no longer use the mermaid string for ASCII, we use the workflow object directly.
+  return null;
+}
+export function renderWorkflowAsAscii(workflow: Workflow): string {
+  const g = new dagre.graphlib.Graph();
+  g.setGraph({ rankdir: 'LR', nodesep: 2, edgesep: 1, ranksep: 4 });
+  g.setDefaultEdgeLabel(() => ({}));
+  const nodeWidth = 24;
+  const nodeHeight = 3;
+  for (const step of workflow.steps) {
+    let label = `${step.id} (${step.type})`;
+    if (step.type === 'llm') label = `${step.id} (AI: ${step.agent})`;
+    if (step.if) label = `IF ${label}`;
+    if (step.foreach) label = `LOOP ${label}`;
+    const width = Math.max(nodeWidth, label.length + 4);
+    g.setNode(step.id, { label, width, height: nodeHeight });
+    if (step.needs) {
+      for (const need of step.needs) {
+        g.setEdge(need, step.id);
+      }
     }
+  }
+  dagre.layout(g);
+  // Canvas dimensions
+  let minX = Number.POSITIVE_INFINITY;
+  let minY = Number.POSITIVE_INFINITY;
+  let maxX = Number.NEGATIVE_INFINITY;
+  let maxY = Number.NEGATIVE_INFINITY;
+  for (const v of g.nodes()) {
+    const node = g.node(v);
+    minX = Math.min(minX, node.x - node.width / 2);
+    minY = Math.min(minY, node.y - node.height / 2);
+    maxX = Math.max(maxX, node.x + node.width / 2);
+    maxY = Math.max(maxY, node.y + node.height / 2);
+  }
-    const ascii = await response.text();
-    if (ascii.includes('Failed to render diagram')) {
-      return null;
+  for (const e of g.edges()) {
+    const edge = g.edge(e);
+    for (const p of edge.points) {
+      minX = Math.min(minX, p.x);
+      minY = Math.min(minY, p.y);
+      maxX = Math.max(maxX, p.x);
+      maxY = Math.max(maxY, p.y);
     }
+  }
+  const canvasWidth = Math.ceil(maxX - minX) + 10;
+  const canvasHeight = Math.ceil(maxY - minY) + 4;
+  const canvas = Array.from({ length: canvasHeight }, () => Array(canvasWidth).fill(' '));
-    return ascii;
-  } catch {
-    return null;
+  const offsetX = Math.floor(-minX) + 2;
+  const offsetY = Math.floor(-minY) + 1;
+  // Helper to draw at coordinates
+  const draw = (x: number, y: number, char: string) => {
+    const ix = Math.floor(x) + offsetX;
+    const iy = Math.floor(y) + offsetY;
+    if (iy >= 0 && iy < canvas.length && ix >= 0 && ix < canvas[0].length) {
+      canvas[iy][ix] = char;
+    }
+  };
+  const drawText = (x: number, y: number, text: string) => {
+    const startX = Math.floor(x);
+    const startY = Math.floor(y);
+    for (let i = 0; i < text.length; i++) {
+      draw(startX + i, startY, text[i]);
+    }
+  };
+  // Draw Nodes
+  for (const v of g.nodes()) {
+    const node = g.node(v);
+    const x = node.x - node.width / 2;
+    const y = node.y - node.height / 2;
+    const w = node.width;
+    const h = node.height;
+    const startX = Math.floor(x);
+    const startY = Math.floor(y);
+    const endX = startX + Math.floor(w) - 1;
+    const endY = startY + Math.floor(h) - 1;
+    for (let i = startX; i <= endX; i++) {
+      draw(i, startY, '-');
+      draw(i, endY, '-');
+    }
+    for (let i = startY; i <= endY; i++) {
+      draw(startX, i, '|');
+      draw(endX, i, '|');
+    }
+    draw(startX, startY, '+');
+    draw(endX, startY, '+');
+    draw(startX, endY, '+');
+    draw(endX, endY, '+');
+    const labelX = x + Math.floor((w - (node.label?.length || 0)) / 2);
+    const labelY = y + Math.floor(h / 2);
+    drawText(labelX, labelY, node.label || '');
+  }
+  // Draw Edges
+  for (const e of g.edges()) {
+    const edge = g.edge(e);
+    const points = edge.points;
+    for (let i = 0; i < points.length - 1; i++) {
+      const p1 = points[i];
+      const p2 = points[i + 1];
+      const x1 = Math.floor(p1.x);
+      const y1 = Math.floor(p1.y);
+      const x2 = Math.floor(p2.x);
+      const y2 = Math.floor(p2.y);
+      if (x1 === x2) {
+        for (let y = Math.min(y1, y2); y <= Math.max(y1, y2); y++) draw(x1, y, '|');
+      } else if (y1 === y2) {
+        for (let x = Math.min(x1, x2); x <= Math.max(x1, x2); x++) draw(x, y1, '-');
+      } else {
+        const xStep = x2 > x1 ? 1 : -1;
+        const yStep = y2 > y1 ? 1 : -1;
+        if (x1 !== x2) {
+          for (let x = x1; x !== x2; x += xStep) {
+            draw(x, y1, '-');
+          }
+          draw(x2, y1, '+');
+        }
+        if (y1 !== y2) {
+          for (let y = y1 + yStep; y !== y2; y += yStep) {
+            draw(x2, y, '|');
+          }
+        }
+      }
+    }
+    const lastPoint = points[points.length - 1];
+    const prevPoint = points[points.length - 2];
+    if (lastPoint.x > prevPoint.x) draw(lastPoint.x, lastPoint.y, '>');
+    else if (lastPoint.x < prevPoint.x) draw(lastPoint.x, lastPoint.y, '<');
+    else if (lastPoint.y > prevPoint.y) draw(lastPoint.x, lastPoint.y, 'v');
+    else if (lastPoint.y < prevPoint.y) draw(lastPoint.x, lastPoint.y, '^');
   }
+  return canvas.map((row) => row.join('').trimEnd()).join('\n');
 }

package/src/utils/redactor.test.ts CHANGED Viewed

@@ -63,4 +63,10 @@ describe('Redactor', () => {
     const text = 'a and 12 are safe, but abc is a secret';
     expect(shortRedactor.redact(text)).toBe('a and 12 are safe, but ***REDACTED*** is a secret');
   });
+  it('should not redact substrings of larger words when using alphanumeric secrets', () => {
+    const wordRedactor = new Redactor({ USER: 'mark' });
+    const text = 'mark went to the marketplace';
+    expect(wordRedactor.redact(text)).toBe('***REDACTED*** went to the marketplace');
+  });
 });

package/src/utils/redactor.ts CHANGED Viewed

@@ -30,7 +30,16 @@ export class Redactor {
       // Use a global replace to handle multiple occurrences
       // Escape special regex characters in the secret
       const escaped = secret.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
-      redacted = redacted.replace(new RegExp(escaped, 'g'), '***REDACTED***');
+      // Use word boundaries if the secret starts/ends with an alphanumeric character
+      // to avoid partial matches (e.g. redacting 'mark' in 'marketplace')
+      const startBoundary = /^\w/.test(secret) ? '\\b' : '';
+      const endBoundary = /\w$/.test(secret) ? '\\b' : '';
+      redacted = redacted.replace(
+        new RegExp(`${startBoundary}${escaped}${endBoundary}`, 'g'),
+        '***REDACTED***'
+      );
     }
     return redacted;
   }

package/src/utils/sandbox.test.ts ADDED Viewed

@@ -0,0 +1,29 @@
+import { describe, expect, it } from 'bun:test';
+import { SafeSandbox } from './sandbox';
+describe('SafeSandbox', () => {
+  it('should execute basic arithmetic', async () => {
+    const result = await SafeSandbox.execute('1 + 2');
+    expect(result).toBe(3);
+  });
+  it('should have access to context variables', async () => {
+    const result = await SafeSandbox.execute('a + b', { a: 10, b: 20 });
+    expect(result).toBe(30);
+  });
+  it('should not have access to Node.js globals', async () => {
+    const result = await SafeSandbox.execute('typeof process');
+    expect(result).toBe('undefined');
+  });
+  it('should handle object results', async () => {
+    const result = await SafeSandbox.execute('({ x: 1, y: 2 })');
+    expect(result).toEqual({ x: 1, y: 2 });
+  });
+  it('should respect timeouts', async () => {
+    const promise = SafeSandbox.execute('while(true) {}', {}, { timeout: 100 });
+    await expect(promise).rejects.toThrow();
+  });
+});

package/src/utils/sandbox.ts ADDED Viewed

@@ -0,0 +1,61 @@
+import * as vm from 'node:vm';
+export interface SandboxOptions {
+    timeout?: number;
+    memoryLimit?: number;
+}
+export class SafeSandbox {
+    /**
+     * Execute a script in a secure sandbox
+     */
+    static async execute(
+        code: string,
+        context: Record<string, unknown> = {},
+        options: SandboxOptions = {}
+    ): Promise<unknown> {
+        try {
+            // Try to use isolated-vm if available (dynamic import)
+            // Note: This will likely fail on Bun as it expects V8 host symbols
+            const ivm = await import('isolated-vm').then((m) => m.default || m).catch(() => null);
+            if (ivm && typeof ivm.Isolate === 'function') {
+                const isolate = new ivm.Isolate({ memoryLimit: options.memoryLimit || 128 });
+                try {
+                    const contextInstance = await isolate.createContext();
+                    const jail = contextInstance.global;
+                    // Set up global context
+                    await jail.set('global', jail.derefInto());
+                    // Inject context variables
+                    for (const [key, value] of Object.entries(context)) {
+                        // Only copy non-undefined values
+                        if (value !== undefined) {
+                            await jail.set(key, new ivm.ExternalCopy(value).copyInto());
+                        }
+                    }
+                    const script = await isolate.compileScript(code);
+                    const result = await script.run(contextInstance, { timeout: options.timeout || 5000 });
+                    if (result && typeof result === 'object' && result instanceof ivm.Reference) {
+                        return await result.copy();
+                    }
+                    return result;
+                } finally {
+                    isolate.dispose();
+                }
+            }
+        } catch (e) {
+            // Fallback to node:vm if isolated-vm fails to load or run
+        }
+        // Fallback implementation using node:vm (built-in)
+        const sandbox = { ...context };
+        return vm.runInNewContext(code, sandbox, {
+            timeout: options.timeout || 5000,
+            displayErrors: true,
+        });
+    }
+}