npm - keystone-cli - Versions diffs - 0.3.2 → 0.4.0 - Mend

keystone-cli 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/README.md +18 -1
package/package.json +1 -1
package/src/db/workflow-db.ts +26 -7
package/src/expression/evaluator.ts +1 -0
package/src/parser/agent-parser.test.ts +8 -5
package/src/parser/schema.ts +8 -2
package/src/runner/audit-verification.test.ts +106 -0
package/src/runner/llm-adapter.ts +196 -4
package/src/runner/llm-clarification.test.ts +182 -0
package/src/runner/llm-executor.ts +118 -26
package/src/runner/mcp-manager.ts +4 -1
package/src/runner/mcp-server.test.ts +115 -1
package/src/runner/mcp-server.ts +161 -4
package/src/runner/shell-executor.ts +1 -1
package/src/runner/step-executor.test.ts +33 -10
package/src/runner/step-executor.ts +110 -14
package/src/runner/workflow-runner.test.ts +132 -0
package/src/runner/workflow-runner.ts +118 -23
package/src/templates/agents/keystone-architect.md +13 -6
package/src/ui/dashboard.tsx +32 -4
package/src/utils/auth-manager.test.ts +31 -0
package/src/utils/auth-manager.ts +21 -5
package/src/utils/json-parser.test.ts +35 -0
package/src/utils/json-parser.ts +95 -0
package/src/utils/mermaid.ts +12 -0
package/src/utils/sandbox.test.ts +12 -4
package/src/utils/sandbox.ts +69 -49

package/README.md CHANGED Viewed

@@ -259,6 +259,7 @@ Keystone supports several specialized step types:
 - `shell`: Run arbitrary shell commands.
 - `llm`: Prompt an agent and get structured or unstructured responses. Supports `schema` (JSON Schema) for structured output.
+  - `allowClarification`: Boolean (default `false`). If `true`, allows the LLM to ask clarifying questions back to the user or suspend the workflow if no human is available.
 - `request`: Make HTTP requests (GET, POST, etc.).
 - `file`: Read, write, or append to files.
 - `human`: Pause execution for manual confirmation or text input.
@@ -331,7 +332,23 @@ Keystone can itself act as an MCP server, allowing other agents (like Claude Des
 keystone mcp start
 ```
-> **Note:** Workflow execution via the Keystone MCP server is synchronous. This provides a better experience for agents as they receive the final results directly, though it means the connection remains open for the duration of the workflow run.
+#### Sync vs Async Execution
+The MCP server provides two modes for running workflows:
+| Tool | Mode | Use Case |
+|------|------|----------|
+| `run_workflow` | **Sync** | Short workflows. Blocks until completion, returns outputs directly. |
+| `start_workflow` | **Async** | Long workflows. Returns immediately with a `run_id`, use `get_run_status` to poll. |
+**Example: Async execution pattern**
+```
+1. Agent calls start_workflow → { run_id: "abc", status: "running" }
+2. Agent polls get_run_status → { status: "running" }
+3. Agent polls get_run_status → { status: "completed", outputs: {...} }
+```
+The async pattern is ideal for LLM-heavy workflows that may take minutes to complete.
 #### Global MCP Servers
 Define shared MCP servers in `.keystone/config.yaml` to reuse them across different workflows. Keystone ensures that multiple steps using the same global server will share a single running process.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "keystone-cli",
-  "version": "0.3.2",
+  "version": "0.4.0",
   "description": "A local-first, declarative, agentic workflow orchestrator built on Bun",
   "type": "module",
   "bin": {

package/src/db/workflow-db.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { Database } from 'bun:sqlite';
 export type RunStatus = 'pending' | 'running' | 'completed' | 'failed' | 'paused';
-export type StepStatus = 'pending' | 'running' | 'success' | 'failed' | 'skipped';
+export type StepStatus = 'pending' | 'running' | 'success' | 'failed' | 'skipped' | 'suspended';
 export interface WorkflowRun {
   id: string;
@@ -25,18 +25,35 @@ export interface StepExecution {
   started_at: string | null;
   completed_at: string | null;
   retry_count: number;
+  usage: string | null; // JSON
 }
 export class WorkflowDb {
   private db: Database;
-  constructor(dbPath = '.keystone/state.db') {
+  constructor(public readonly dbPath = '.keystone/state.db') {
     this.db = new Database(dbPath, { create: true });
     this.db.exec('PRAGMA journal_mode = WAL;'); // Write-ahead logging
     this.db.exec('PRAGMA foreign_keys = ON;'); // Enable foreign key enforcement
     this.initSchema();
   }
+  /**
+   * Type guard to check if an error is a SQLite busy error
+   */
+  private isSQLiteBusyError(error: unknown): boolean {
+    if (typeof error === 'object' && error !== null) {
+      const err = error as { code?: string | number; message?: string };
+      return (
+        err.code === 'SQLITE_BUSY' ||
+        err.code === 5 ||
+        (typeof err.message === 'string' &&
+          (err.message.includes('SQLITE_BUSY') || err.message.includes('database is locked')))
+      );
+    }
+    return false;
+  }
   /**
    * Retry wrapper for SQLite operations that may encounter SQLITE_BUSY errors
    * during high concurrency scenarios (e.g., foreach loops)
@@ -49,9 +66,8 @@ export class WorkflowDb {
         return operation();
       } catch (error) {
         // Check if this is a SQLITE_BUSY error
-        const errorMsg = error instanceof Error ? error.message : String(error);
-        if (errorMsg.includes('SQLITE_BUSY') || errorMsg.includes('database is locked')) {
-          lastError = error instanceof Error ? error : new Error(errorMsg);
+        if (this.isSQLiteBusyError(error)) {
+          lastError = error instanceof Error ? error : new Error(String(error));
           // Exponential backoff: 10ms, 20ms, 40ms, 80ms, 160ms
           const delayMs = 10 * 2 ** attempt;
           await Bun.sleep(delayMs);
@@ -89,6 +105,7 @@ export class WorkflowDb {
         started_at TEXT,
         completed_at TEXT,
         retry_count INTEGER DEFAULT 0,
+        usage TEXT,
         FOREIGN KEY (run_id) REFERENCES workflow_runs(id) ON DELETE CASCADE
       );
@@ -204,12 +221,13 @@ export class WorkflowDb {
     id: string,
     status: StepStatus,
     output?: unknown,
-    error?: string
+    error?: string,
+    usage?: unknown
   ): Promise<void> {
     await this.withRetry(() => {
       const stmt = this.db.prepare(`
         UPDATE step_executions
-        SET status = ?, output = ?, error = ?, completed_at = ?
+        SET status = ?, output = ?, error = ?, completed_at = ?, usage = ?
         WHERE id = ?
       `);
       stmt.run(
@@ -217,6 +235,7 @@ export class WorkflowDb {
         output ? JSON.stringify(output) : null,
         error || null,
         new Date().toISOString(),
+        usage ? JSON.stringify(usage) : null,
         id
       );
     });

package/src/expression/evaluator.ts CHANGED Viewed

@@ -31,6 +31,7 @@ export interface ExpressionContext {
   item?: unknown;
   index?: number;
   env?: Record<string, string>;
+  output?: unknown;
 }
 type ASTNode = jsep.Expression;

package/src/parser/agent-parser.test.ts CHANGED Viewed

@@ -1,16 +1,19 @@
-import { afterAll, describe, expect, it, spyOn } from 'bun:test';
+import { afterAll, beforeAll, describe, expect, it, spyOn } from 'bun:test';
 import { mkdirSync, rmSync, writeFileSync } from 'node:fs';
 import * as os from 'node:os';
 import { join } from 'node:path';
 import { parseAgent, resolveAgentPath } from './agent-parser';
 describe('agent-parser', () => {
-  const tempDir = join(process.cwd(), 'temp-test-agents');
+  // Use a unique temp directory with random suffix to prevent parallel test collisions
+  const tempDir = join(
+    process.cwd(),
+    `temp-test-agents-${Date.now()}-${Math.random().toString(36).slice(2)}`
+  );
-  // Setup temp directory
-  try {
+  beforeAll(() => {
     mkdirSync(tempDir, { recursive: true });
-  } catch (e) {}
+  });
   afterAll(() => {
     try {

package/src/parser/schema.ts CHANGED Viewed

@@ -21,7 +21,7 @@ const RetrySchema = z.object({
 const BaseStepSchema = z.object({
   id: z.string(),
   type: z.string(),
-  needs: z.array(z.string()).default([]),
+  needs: z.array(z.string()).optional().default([]),
   if: z.string().optional(),
   timeout: z.number().int().positive().optional(),
   retry: RetrySchema.optional(),
@@ -58,15 +58,19 @@ const LlmStepSchema = BaseStepSchema.extend({
   tools: z.array(AgentToolSchema).optional(),
   maxIterations: z.number().int().positive().default(10),
   useGlobalMcp: z.boolean().optional(),
+  allowClarification: z.boolean().optional(),
   mcpServers: z
     .array(
       z.union([
         z.string(),
         z.object({
           name: z.string(),
-          command: z.string(),
+          type: z.enum(['local', 'remote']).optional(),
+          command: z.string().optional(),
           args: z.array(z.string()).optional(),
           env: z.record(z.string()).optional(),
+          url: z.string().optional(),
+          headers: z.record(z.string()).optional(),
         }),
       ])
     )
@@ -108,6 +112,7 @@ const SleepStepSchema = BaseStepSchema.extend({
 const ScriptStepSchema = BaseStepSchema.extend({
   type: z.literal('script'),
   run: z.string(),
+  allowInsecure: z.boolean().optional().default(false),
 });
 // ===== Discriminated Union for Steps =====
@@ -134,6 +139,7 @@ export const WorkflowSchema = z.object({
   inputs: z.record(InputSchema).optional(),
   outputs: z.record(z.string()).optional(),
   env: z.record(z.string()).optional(),
+  concurrency: z.union([z.number().int().positive(), z.string()]).optional(),
   steps: z.array(StepSchema),
   finally: z.array(StepSchema).optional(),
 });

package/src/runner/audit-verification.test.ts ADDED Viewed

@@ -0,0 +1,106 @@
+import { describe, expect, it, mock, spyOn } from 'bun:test';
+import { Redactor } from '../utils/redactor';
+import { SafeSandbox } from '../utils/sandbox';
+import { MCPManager } from './mcp-manager';
+// Type for accessing private methods in tests
+type MCPManagerPrivate = {
+  getServerKey(config: {
+    name: string;
+    type?: 'local' | 'remote';
+    command?: string;
+    args?: string[];
+    url?: string;
+  }): string;
+};
+describe('Audit Fixes Verification', () => {
+  describe('Secret Redaction', () => {
+    it('should redact secrets in text', () => {
+      const secrets = { MY_SECRET: 'super-secret-value' };
+      const redactor = new Redactor(secrets);
+      const input = 'This contains super-secret-value in the text.';
+      const result = redactor.redact(input);
+      expect(result).toContain('***REDACTED***');
+      expect(result).not.toContain('super-secret-value');
+    });
+    it('should handle partial matches correctly', () => {
+      const secrets = { MY_SECRET: 'abc123' };
+      const redactor = new Redactor(secrets);
+      const input = 'The value abc123 should be redacted.';
+      const result = redactor.redact(input);
+      expect(result).toContain('***REDACTED***');
+      expect(result).not.toContain('abc123');
+    });
+  });
+  describe('Sandbox Security', () => {
+    it('should throw by default if isolated-vm is missing and insecure fallback is disabled', async () => {
+      const code = '1 + 1';
+      expect(SafeSandbox.execute(code, {}, { allowInsecureFallback: false })).rejects.toThrow(
+        /secure sandbox failed/
+      );
+    });
+    it('should allow execution if allowInsecureFallback is true', async () => {
+      const code = '1 + 1';
+      const result = await SafeSandbox.execute(code, {}, { allowInsecureFallback: true });
+      expect(result).toBe(2);
+    });
+  });
+  describe('MCP Client Uniqueness', () => {
+    it('should generate unique keys for different ad-hoc configs with same name', async () => {
+      const manager = new MCPManager();
+      const config1 = {
+        name: 'test-server',
+        type: 'local' as const,
+        command: 'echo',
+        args: ['hello'],
+      };
+      const config2 = {
+        name: 'test-server',
+        type: 'local' as const,
+        command: 'echo',
+        args: ['world'],
+      };
+      const key1 = (manager as unknown as MCPManagerPrivate).getServerKey(config1);
+      const key2 = (manager as unknown as MCPManagerPrivate).getServerKey(config2);
+      expect(key1).not.toBe(key2);
+      expect(key1).toContain('hello');
+      expect(key2).toContain('world');
+    });
+    it('should generate unique keys for remote servers', async () => {
+      const manager = new MCPManager();
+      const config1 = {
+        name: 'remote-server',
+        type: 'remote' as const,
+        url: 'https://api1.example.com',
+      };
+      const config2 = {
+        name: 'remote-server',
+        type: 'remote' as const,
+        url: 'https://api2.example.com',
+      };
+      const key1 = (manager as unknown as MCPManagerPrivate).getServerKey(config1);
+      const key2 = (manager as unknown as MCPManagerPrivate).getServerKey(config2);
+      expect(key1).not.toBe(key2);
+      expect(key1).toContain('api1');
+      expect(key2).toContain('api2');
+    });
+  });
+});

package/src/runner/llm-adapter.ts CHANGED Viewed

@@ -39,7 +39,11 @@ export interface LLMTool {
 export interface LLMAdapter {
   chat(
     messages: LLMMessage[],
-    options?: { model?: string; tools?: LLMTool[] }
+    options?: {
+      model?: string;
+      tools?: LLMTool[];
+      onStream?: (chunk: string) => void;
+    }
   ): Promise<LLMResponse>;
 }
@@ -58,8 +62,14 @@ export class OpenAIAdapter implements LLMAdapter {
   async chat(
     messages: LLMMessage[],
-    options?: { model?: string; tools?: LLMTool[] }
+    options?: {
+      model?: string;
+      tools?: LLMTool[];
+      onStream?: (chunk: string) => void;
+    }
   ): Promise<LLMResponse> {
+    const isStreaming = !!options?.onStream;
     const response = await fetch(`${this.baseUrl}/chat/completions`, {
       method: 'POST',
       headers: {
@@ -70,6 +80,7 @@ export class OpenAIAdapter implements LLMAdapter {
         model: options?.model || 'gpt-4o',
         messages,
         tools: options?.tools,
+        stream: isStreaming,
       }),
     });
@@ -78,6 +89,62 @@ export class OpenAIAdapter implements LLMAdapter {
       throw new Error(`OpenAI API error: ${response.status} ${response.statusText} - ${error}`);
     }
+    if (isStreaming) {
+      if (!response.body) throw new Error('Response body is null');
+      const reader = response.body.getReader();
+      const decoder = new TextDecoder();
+      let fullContent = '';
+      const toolCalls: LLMToolCall[] = [];
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        const chunk = decoder.decode(value);
+        const lines = chunk.split('\n').filter((line) => line.trim() !== '');
+        for (const line of lines) {
+          if (line.includes('[DONE]')) continue;
+          if (!line.startsWith('data: ')) continue;
+          try {
+            const data = JSON.parse(line.slice(6));
+            const delta = data.choices[0].delta;
+            if (delta.content) {
+              fullContent += delta.content;
+              options.onStream?.(delta.content);
+            }
+            if (delta.tool_calls) {
+              for (const tc of delta.tool_calls) {
+                if (!toolCalls[tc.index]) {
+                  toolCalls[tc.index] = {
+                    id: tc.id,
+                    type: 'function',
+                    function: { name: '', arguments: '' },
+                  };
+                }
+                const existing = toolCalls[tc.index];
+                if (tc.function?.name) existing.function.name += tc.function.name;
+                if (tc.function?.arguments) existing.function.arguments += tc.function.arguments;
+              }
+            }
+          } catch (e) {
+            // Ignore parse errors for incomplete chunks
+          }
+        }
+      }
+      return {
+        message: {
+          role: 'assistant',
+          content: fullContent || null,
+          tool_calls: toolCalls.length > 0 ? toolCalls.filter(Boolean) : undefined,
+        },
+      };
+    }
     const data = (await response.json()) as {
       choices: { message: LLMMessage }[];
       usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number };
@@ -104,8 +171,13 @@ export class AnthropicAdapter implements LLMAdapter {
   async chat(
     messages: LLMMessage[],
-    options?: { model?: string; tools?: LLMTool[] }
+    options?: {
+      model?: string;
+      tools?: LLMTool[];
+      onStream?: (chunk: string) => void;
+    }
   ): Promise<LLMResponse> {
+    const isStreaming = !!options?.onStream;
     const system = messages.find((m) => m.role === 'system')?.content || undefined;
     // Anthropic requires alternating user/assistant roles.
@@ -201,6 +273,7 @@ export class AnthropicAdapter implements LLMAdapter {
         messages: anthropicMessages,
         tools: anthropicTools,
         max_tokens: 4096,
+        stream: isStreaming,
       }),
     });
@@ -209,6 +282,61 @@ export class AnthropicAdapter implements LLMAdapter {
       throw new Error(`Anthropic API error: ${response.status} ${response.statusText} - ${error}`);
     }
+    if (isStreaming) {
+      if (!response.body) throw new Error('Response body is null');
+      const reader = response.body.getReader();
+      const decoder = new TextDecoder();
+      let fullContent = '';
+      const toolCalls: { id: string; name: string; inputString: string }[] = [];
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        const chunk = decoder.decode(value);
+        const lines = chunk.split('\n').filter((line) => line.trim() !== '');
+        for (const line of lines) {
+          if (!line.startsWith('data: ')) continue;
+          try {
+            const data = JSON.parse(line.slice(6));
+            if (data.type === 'content_block_delta' && data.delta?.text) {
+              fullContent += data.delta.text;
+              options.onStream?.(data.delta.text);
+            }
+            if (data.type === 'content_block_start' && data.content_block?.type === 'tool_use') {
+              toolCalls.push({
+                id: data.content_block.id,
+                name: data.content_block.name,
+                inputString: '',
+              });
+            }
+            if (data.type === 'tool_use_delta' && data.delta?.partial_json) {
+              const lastTool = toolCalls[toolCalls.length - 1];
+              if (lastTool) lastTool.inputString += data.delta.partial_json;
+            }
+          } catch (e) {
+            // Ignore parse errors
+          }
+        }
+      }
+      return {
+        message: {
+          role: 'assistant',
+          content: fullContent || null,
+          tool_calls: toolCalls.map((tc) => ({
+            id: tc.id,
+            type: 'function',
+            function: { name: tc.name, arguments: tc.inputString },
+          })),
+        },
+      };
+    }
     const data = (await response.json()) as {
       content: {
         type: 'text' | 'tool_use';
@@ -256,8 +384,13 @@ export class CopilotAdapter implements LLMAdapter {
   async chat(
     messages: LLMMessage[],
-    options?: { model?: string; tools?: LLMTool[] }
+    options?: {
+      model?: string;
+      tools?: LLMTool[];
+      onStream?: (chunk: string) => void;
+    }
   ): Promise<LLMResponse> {
+    const isStreaming = !!options?.onStream;
     const token = await AuthManager.getCopilotToken();
     if (!token) {
       throw new Error('GitHub Copilot token not found. Please run "keystone auth login" first.');
@@ -276,6 +409,7 @@ export class CopilotAdapter implements LLMAdapter {
         model: options?.model || 'gpt-4o',
         messages,
         tools: options?.tools,
+        stream: isStreaming,
       }),
     });
@@ -284,6 +418,64 @@ export class CopilotAdapter implements LLMAdapter {
       throw new Error(`Copilot API error: ${response.status} ${response.statusText} - ${error}`);
     }
+    if (isStreaming) {
+      // Use the same streaming logic as OpenAIAdapter since Copilot uses OpenAI API
+      if (!response.body) throw new Error('Response body is null');
+      const reader = response.body.getReader();
+      const decoder = new TextDecoder();
+      let fullContent = '';
+      const toolCalls: LLMToolCall[] = [];
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        const chunk = decoder.decode(value);
+        const lines = chunk.split('\n').filter((line) => line.trim() !== '');
+        for (const line of lines) {
+          if (line.includes('[DONE]')) continue;
+          if (!line.startsWith('data: ')) continue;
+          try {
+            const data = JSON.parse(line.slice(6));
+            if (!data.choices?.[0]?.delta) continue;
+            const delta = data.choices[0].delta;
+            if (delta.content) {
+              fullContent += delta.content;
+              options.onStream?.(delta.content);
+            }
+            if (delta.tool_calls) {
+              for (const tc of delta.tool_calls) {
+                if (!toolCalls[tc.index]) {
+                  toolCalls[tc.index] = {
+                    id: tc.id,
+                    type: 'function',
+                    function: { name: '', arguments: '' },
+                  };
+                }
+                const existing = toolCalls[tc.index];
+                if (tc.function?.name) existing.function.name += tc.function.name;
+                if (tc.function?.arguments) existing.function.arguments += tc.function.arguments;
+              }
+            }
+          } catch (e) {
+            // Ignore parse errors
+          }
+        }
+      }
+      return {
+        message: {
+          role: 'assistant',
+          content: fullContent || null,
+          tool_calls: toolCalls.length > 0 ? toolCalls.filter(Boolean) : undefined,
+        },
+      };
+    }
     const data = (await response.json()) as {
       choices: { message: LLMMessage }[];
       usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number };