npm - keystone-cli - Versions diffs - 0.3.1 → 0.4.0 - Mend

keystone-cli 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/README.md +18 -1
package/package.json +1 -1
package/src/db/workflow-db.ts +26 -7
package/src/expression/evaluator.ts +1 -0
package/src/parser/agent-parser.test.ts +8 -5
package/src/parser/schema.ts +8 -2
package/src/runner/audit-verification.test.ts +106 -0
package/src/runner/llm-adapter.ts +196 -4
package/src/runner/llm-clarification.test.ts +182 -0
package/src/runner/llm-executor.ts +118 -26
package/src/runner/mcp-manager.ts +4 -1
package/src/runner/mcp-server.test.ts +115 -1
package/src/runner/mcp-server.ts +161 -4
package/src/runner/shell-executor.ts +1 -1
package/src/runner/step-executor.test.ts +33 -10
package/src/runner/step-executor.ts +110 -14
package/src/runner/workflow-runner.test.ts +132 -0
package/src/runner/workflow-runner.ts +118 -23
package/src/templates/agents/keystone-architect.md +21 -5
package/src/templates/full-feature-demo.yaml +5 -0
package/src/ui/dashboard.tsx +32 -4
package/src/utils/auth-manager.test.ts +31 -0
package/src/utils/auth-manager.ts +21 -5
package/src/utils/json-parser.test.ts +35 -0
package/src/utils/json-parser.ts +95 -0
package/src/utils/mermaid.ts +12 -0
package/src/utils/sandbox.test.ts +12 -4
package/src/utils/sandbox.ts +69 -49

package/src/runner/llm-clarification.test.ts ADDED Viewed

@@ -0,0 +1,182 @@
+import { afterEach, beforeEach, describe, expect, it, mock, spyOn } from 'bun:test';
+import type { ExpressionContext } from '../expression/evaluator';
+import * as agentParser from '../parser/agent-parser';
+import type { Config } from '../parser/config-schema';
+import type { Agent, LlmStep, Step } from '../parser/schema';
+import { ConfigLoader } from '../utils/config-loader';
+import { type LLMMessage, OpenAIAdapter } from './llm-adapter';
+import { executeLlmStep } from './llm-executor';
+describe('LLM Clarification', () => {
+  const originalChat = OpenAIAdapter.prototype.chat;
+  beforeEach(() => {
+    spyOn(agentParser, 'resolveAgentPath').mockReturnValue('test-agent.md');
+    spyOn(agentParser, 'parseAgent').mockReturnValue({
+      name: 'test-agent',
+      systemPrompt: 'test system prompt',
+      tools: [],
+      model: 'gpt-4o',
+    } as unknown as Agent);
+    ConfigLoader.setConfig({
+      providers: {
+        openai: { type: 'openai', api_key_env: 'OPENAI_API_KEY' },
+      },
+      default_provider: 'openai',
+      model_mappings: {},
+      storage: { retention_days: 30 },
+      workflows_directory: 'workflows',
+      mcp_servers: {},
+    } as unknown as Config);
+  });
+  afterEach(() => {
+    OpenAIAdapter.prototype.chat = originalChat;
+    mock.restore();
+  });
+  it('should inject ask tool when allowClarification is true', async () => {
+    const step: LlmStep = {
+      id: 'test-step',
+      type: 'llm',
+      agent: 'test-agent',
+      prompt: 'test prompt',
+      allowClarification: true,
+      needs: [],
+      maxIterations: 10,
+    };
+    const context: ExpressionContext = {
+      inputs: {},
+      output: {},
+    };
+    const chatMock = mock(async () => ({
+      message: { role: 'assistant' as const, content: 'Final response' },
+      usage: { prompt_tokens: 10, completion_tokens: 10, total_tokens: 20 },
+    }));
+    OpenAIAdapter.prototype.chat = chatMock;
+    const executeStepFn = mock(async () => ({ output: 'ok', status: 'success' as const }));
+    await executeLlmStep(step, context, executeStepFn);
+    expect(chatMock).toHaveBeenCalled();
+    const calls = chatMock.mock.calls as unknown[][];
+    const options = calls[0][1] as { tools?: { function: { name: string } }[] };
+    expect(options.tools).toBeDefined();
+    expect(options.tools?.some((t) => t.function.name === 'ask')).toBe(true);
+  });
+  it('should suspend in non-TTY when ask is called', async () => {
+    const originalIsTTY = process.stdin.isTTY;
+    // Assign directly to match step-executor.test.ts pattern
+    // @ts-ignore
+    process.stdin.isTTY = false;
+    try {
+      const step: LlmStep = {
+        id: 'test-step',
+        type: 'llm',
+        agent: 'test-agent',
+        prompt: 'test prompt',
+        allowClarification: true,
+        needs: [],
+        maxIterations: 10,
+      };
+      const context: ExpressionContext = {
+        inputs: {},
+        output: {},
+      };
+      const chatMock = mock(async () => ({
+        message: {
+          role: 'assistant' as const,
+          content: null,
+          tool_calls: [
+            {
+              id: 'call-ask',
+              type: 'function',
+              function: { name: 'ask', arguments: '{"question": "What is your name?"}' },
+            },
+          ],
+        },
+        usage: { prompt_tokens: 10, completion_tokens: 10, total_tokens: 20 },
+      }));
+      OpenAIAdapter.prototype.chat = chatMock;
+      const executeStepFn = mock(async () => ({ output: 'ok', status: 'success' as const }));
+      const result = await executeLlmStep(step, context, executeStepFn);
+      expect(result.status).toBe('suspended');
+      const output = result.output as { question: string; messages: unknown[] };
+      expect(output.question).toBe('What is your name?');
+      expect(output.messages).toBeDefined();
+    } finally {
+      // @ts-ignore
+      process.stdin.isTTY = originalIsTTY;
+    }
+  });
+  it('should resume correctly when answer is provided', async () => {
+    const step: LlmStep = {
+      id: 'test-step',
+      type: 'llm',
+      agent: 'test-agent',
+      prompt: 'test prompt',
+      allowClarification: true,
+      needs: [],
+      maxIterations: 10,
+    };
+    const context: ExpressionContext = {
+      inputs: {
+        'test-step': { __answer: 'My name is Keystone' },
+      },
+      output: {
+        messages: [
+          { role: 'system', content: 'system prompt' },
+          { role: 'user', content: 'test prompt' },
+          {
+            role: 'assistant',
+            content: null,
+            tool_calls: [
+              {
+                id: 'call-ask',
+                type: 'function',
+                function: { name: 'ask', arguments: '{"question": "What is your name?"}' },
+              },
+            ],
+          },
+        ] as LLMMessage[],
+      },
+    };
+    const chatMock = mock(async () => ({
+      message: { role: 'assistant' as const, content: 'Hello Keystone' },
+      usage: { prompt_tokens: 10, completion_tokens: 10, total_tokens: 20 },
+    }));
+    OpenAIAdapter.prototype.chat = chatMock;
+    const executeStepFn = mock(async () => ({ output: 'ok', status: 'success' as const }));
+    const result = await executeLlmStep(step, context, executeStepFn);
+    expect(result.output).toBe('Hello Keystone');
+    expect(chatMock).toHaveBeenCalled();
+    const calls = chatMock.mock.calls as unknown[][];
+    const messages = calls[0][0] as {
+      role: string;
+      content: string | null;
+      tool_call_id?: string;
+    }[];
+    const toolMsg = messages[messages.length - 2];
+    expect(toolMsg.role).toBe('tool');
+    expect(toolMsg.content).toBe('My name is Keystone');
+    expect(toolMsg.tool_call_id).toBe('call-ask');
+  });
+});

package/src/runner/llm-executor.ts CHANGED Viewed

@@ -3,9 +3,10 @@ import type { ExpressionContext } from '../expression/evaluator';
 import { ExpressionEvaluator } from '../expression/evaluator';
 import { parseAgent, resolveAgentPath } from '../parser/agent-parser';
 import type { AgentTool, LlmStep, Step } from '../parser/schema';
+import { Redactor } from '../utils/redactor';
 import { type LLMMessage, getAdapter } from './llm-adapter';
 import { MCPClient } from './mcp-client';
-import type { MCPManager } from './mcp-manager';
+import type { MCPManager, MCPServerConfig } from './mcp-manager';
 import type { StepResult } from './step-executor';
 import type { Logger } from './workflow-runner';
@@ -42,10 +43,30 @@ export async function executeLlmStep(
     systemPrompt += `\n\nIMPORTANT: You must output valid JSON that matches the following schema:\n${JSON.stringify(step.schema, null, 2)}`;
   }
-  const messages: LLMMessage[] = [
-    { role: 'system', content: systemPrompt },
-    { role: 'user', content: prompt },
-  ];
+  const messages: LLMMessage[] = [];
+  // Resume from state if provided
+  if (context.output && typeof context.output === 'object' && 'messages' in context.output) {
+    messages.push(...(context.output.messages as LLMMessage[]));
+    // If we have an answer in inputs, add it as a tool result for the last tool call
+    const stepInputs = context.inputs?.[step.id] as Record<string, unknown> | undefined;
+    if (stepInputs && typeof stepInputs === 'object' && '__answer' in stepInputs) {
+      const answer = stepInputs.__answer;
+      const lastMessage = messages[messages.length - 1];
+      const askCall = lastMessage?.tool_calls?.find((tc) => tc.function.name === 'ask');
+      if (askCall) {
+        messages.push({
+          role: 'tool',
+          tool_call_id: askCall.id,
+          name: 'ask',
+          content: String(answer),
+        });
+      }
+    }
+  } else {
+    messages.push({ role: 'system', content: systemPrompt }, { role: 'user', content: prompt });
+  }
   const localMcpClients: MCPClient[] = [];
   const allTools: ToolDefinition[] = [];
@@ -84,14 +105,15 @@ export async function executeLlmStep(
     }
     // 3. Add MCP tools
-    const mcpServersToConnect = [...(step.mcpServers || [])];
+    const mcpServersToConnect: (string | MCPServerConfig)[] = [...(step.mcpServers || [])];
     if (step.useGlobalMcp && mcpManager) {
       const globalServers = mcpManager.getGlobalServers();
       for (const globalServer of globalServers) {
         // Only add if not already explicitly listed
-        const alreadyListed = mcpServersToConnect.some((s) =>
-          typeof s === 'string' ? s === globalServer.name : s.name === globalServer.name
-        );
+        const alreadyListed = mcpServersToConnect.some((s) => {
+          const name = typeof s === 'string' ? s : s.name;
+          return name === globalServer.name;
+        });
         if (!alreadyListed) {
           mcpServersToConnect.push(globalServer);
         }
@@ -103,7 +125,7 @@ export async function executeLlmStep(
         let client: MCPClient | undefined;
         if (mcpManager) {
-          client = await mcpManager.getClient(server, logger);
+          client = await mcpManager.getClient(server as string | MCPServerConfig, logger);
         } else {
           // Fallback if no manager (should not happen in normal workflow run)
           if (typeof server === 'string') {
@@ -113,9 +135,9 @@ export async function executeLlmStep(
           logger.log(`  🔌 Connecting to MCP server: ${server.name}`);
           try {
             client = await MCPClient.createLocal(
-              server.command,
-              server.args || [],
-              server.env || {}
+              (server as MCPServerConfig).command || 'node',
+              (server as MCPServerConfig).args || [],
+              (server as MCPServerConfig).env || {}
             );
             await client.initialize();
             localMcpClients.push(client);
@@ -123,7 +145,9 @@ export async function executeLlmStep(
             logger.error(
               `  ✗ Failed to connect to MCP server ${server.name}: ${error instanceof Error ? error.message : String(error)}`
             );
-            client.stop();
+            if (client) {
+              client.stop();
+            }
             client = undefined;
           }
         }
@@ -144,44 +168,76 @@ export async function executeLlmStep(
     }
     const llmTools = allTools.map((t) => ({
-      type: 'function',
+      type: 'function' as const,
       function: {
         name: t.name,
         description: t.description,
-        parameters: t.parameters,
+        parameters: t.parameters as Record<string, unknown>,
       },
     }));
+    if (step.allowClarification) {
+      llmTools.push({
+        type: 'function' as const,
+        function: {
+          name: 'ask',
+          description:
+            'Ask the user a clarifying question if the initial request is ambiguous or missing information.',
+          parameters: {
+            type: 'object',
+            properties: {
+              question: {
+                type: 'string',
+                description: 'The question to ask the user',
+              },
+            },
+            required: ['question'],
+          } as Record<string, unknown>,
+        },
+      });
+    }
     // ReAct Loop
     let iterations = 0;
     const maxIterations = step.maxIterations || 10;
+    const totalUsage = {
+      prompt_tokens: 0,
+      completion_tokens: 0,
+      total_tokens: 0,
+    };
     while (iterations < maxIterations) {
       iterations++;
+      const redactor = new Redactor(context.secrets || {});
       const response = await adapter.chat(messages, {
         model: resolvedModel,
         tools: llmTools.length > 0 ? llmTools : undefined,
+        onStream: (chunk) => {
+          if (!step.schema) {
+            process.stdout.write(redactor.redact(chunk));
+          }
+        },
       });
+      if (response.usage) {
+        totalUsage.prompt_tokens += response.usage.prompt_tokens;
+        totalUsage.completion_tokens += response.usage.completion_tokens;
+        totalUsage.total_tokens += response.usage.total_tokens;
+      }
       const { message } = response;
       messages.push(message);
-      if (message.content && !step.schema) {
-        logger.log(`\n${message.content}`);
-      }
       if (!message.tool_calls || message.tool_calls.length === 0) {
         let output = message.content;
         // If schema is defined, attempt to parse JSON
         if (step.schema && typeof output === 'string') {
           try {
-            // Attempt to extract JSON if wrapped in markdown code blocks or just finding the first {
-            const jsonMatch =
-              output.match(/```(?:json)?\s*([\s\S]*?)\s*```/i) || output.match(/\{[\s\S]*\}/);
-            const jsonStr = jsonMatch ? jsonMatch[1] || jsonMatch[0] : output;
-            output = JSON.parse(jsonStr);
+            const { extractJson } = await import('../utils/json-parser');
+            output = extractJson(output);
           } catch (e) {
             throw new Error(
               `Failed to parse LLM output as JSON matching schema: ${e instanceof Error ? e.message : String(e)}\nOutput: ${output}`
@@ -192,6 +248,7 @@ export async function executeLlmStep(
         return {
           output,
           status: 'success',
+          usage: totalUsage,
         };
       }
@@ -201,6 +258,41 @@ export async function executeLlmStep(
         const toolInfo = allTools.find((t) => t.name === toolCall.function.name);
         if (!toolInfo) {
+          if (toolCall.function.name === 'ask' && step.allowClarification) {
+            const args = JSON.parse(toolCall.function.arguments) as { question: string };
+            if (process.stdin.isTTY) {
+              // In TTY, we can use a human step to get the answer immediately
+              logger.log(`\n🤔 Question from ${agent.name}: ${args.question}`);
+              const result = await executeStepFn(
+                {
+                  id: `${step.id}-clarify`,
+                  type: 'human',
+                  message: args.question,
+                  inputType: 'text',
+                } as Step,
+                context
+              );
+              messages.push({
+                role: 'tool',
+                tool_call_id: toolCall.id,
+                name: 'ask',
+                content: String(result.output),
+              });
+              continue;
+            }
+            // In non-TTY, we suspend
+            return {
+              status: 'suspended',
+              output: {
+                messages,
+                question: args.question,
+              },
+              usage: totalUsage,
+            };
+          }
           messages.push({
             role: 'tool',
             tool_call_id: toolCall.id,
@@ -233,7 +325,7 @@ export async function executeLlmStep(
           // Execute the tool as a step
           const toolContext: ExpressionContext = {
             ...context,
-            args,
+            item: args, // Use item to pass args to tool execution
           };
           const result = await executeStepFn(toolInfo.execution, toolContext);

package/src/runner/mcp-manager.ts CHANGED Viewed

@@ -135,7 +135,10 @@ export class MCPManager {
   }
   private getServerKey(config: MCPServerConfig): string {
-    return config.name;
+    if (config.type === 'remote') {
+      return `remote:${config.name}:${config.url}`;
+    }
+    return `local:${config.name}:${config.command}:${(config.args || []).join(' ')}`;
   }
   getGlobalServers(): MCPServerConfig[] {

package/src/runner/mcp-server.test.ts CHANGED Viewed

@@ -38,7 +38,7 @@ describe('MCPServer', () => {
       method: 'tools/list',
     });
-    expect(response?.result?.tools).toHaveLength(5);
+    expect(response?.result?.tools).toHaveLength(7);
     // @ts-ignore
     expect(response?.result?.tools?.map((t) => t.name)).toContain('run_workflow');
   });
@@ -249,4 +249,118 @@ describe('MCPServer', () => {
     writeSpy.mockRestore();
     consoleSpy.mockRestore();
   });
+  it('should call start_workflow tool and return immediately', async () => {
+    spyOn(WorkflowRegistry, 'resolvePath').mockReturnValue('test.yaml');
+    // @ts-ignore
+    spyOn(WorkflowParser, 'loadWorkflow').mockReturnValue({
+      name: 'test-wf',
+      steps: [],
+    });
+    // Mock WorkflowRunner - simulate a slow workflow
+    const mockRun = mock(
+      () => new Promise((resolve) => setTimeout(() => resolve({ result: 'ok' }), 100))
+    );
+    // @ts-ignore
+    spyOn(WorkflowRunner.prototype, 'run').mockImplementation(mockRun);
+    spyOn(WorkflowRunner.prototype, 'getRunId').mockReturnValue('async-run-123');
+    const response = await handleMessage({
+      jsonrpc: '2.0',
+      id: 10,
+      method: 'tools/call',
+      params: {
+        name: 'start_workflow',
+        arguments: { workflow_name: 'test-wf', inputs: {} },
+      },
+    });
+    const result = JSON.parse(response?.result?.content?.[0]?.text);
+    expect(result.status).toBe('running');
+    expect(result.run_id).toBe('async-run-123');
+    expect(result.hint).toContain('get_run_status');
+  });
+  it('should call get_run_status tool for running workflow', async () => {
+    const runId = 'status-test-run';
+    await db.createRun(runId, 'test-wf', { foo: 'bar' });
+    await db.updateRunStatus(runId, 'running');
+    const response = await handleMessage({
+      jsonrpc: '2.0',
+      id: 11,
+      method: 'tools/call',
+      params: { name: 'get_run_status', arguments: { run_id: runId } },
+    });
+    const status = JSON.parse(response?.result?.content?.[0]?.text);
+    expect(status.run_id).toBe(runId);
+    expect(status.workflow).toBe('test-wf');
+    expect(status.status).toBe('running');
+    expect(status.hint).toContain('still running');
+  });
+  it('should call get_run_status tool for completed workflow', async () => {
+    const runId = 'completed-test-run';
+    await db.createRun(runId, 'test-wf', {});
+    await db.updateRunStatus(runId, 'completed', { output: 'done' });
+    const response = await handleMessage({
+      jsonrpc: '2.0',
+      id: 12,
+      method: 'tools/call',
+      params: { name: 'get_run_status', arguments: { run_id: runId } },
+    });
+    const status = JSON.parse(response?.result?.content?.[0]?.text);
+    expect(status.status).toBe('completed');
+    expect(status.outputs).toEqual({ output: 'done' });
+    expect(status.hint).toBeUndefined();
+  });
+  it('should call get_run_status tool for failed workflow', async () => {
+    const runId = 'failed-test-run';
+    await db.createRun(runId, 'test-wf', {});
+    await db.updateRunStatus(runId, 'failed', undefined, 'Something went wrong');
+    const response = await handleMessage({
+      jsonrpc: '2.0',
+      id: 13,
+      method: 'tools/call',
+      params: { name: 'get_run_status', arguments: { run_id: runId } },
+    });
+    const status = JSON.parse(response?.result?.content?.[0]?.text);
+    expect(status.status).toBe('failed');
+    expect(status.error).toBe('Something went wrong');
+  });
+  it('should call get_run_status tool for paused workflow', async () => {
+    const runId = 'paused-test-run';
+    await db.createRun(runId, 'test-wf', {});
+    await db.updateRunStatus(runId, 'paused');
+    const response = await handleMessage({
+      jsonrpc: '2.0',
+      id: 14,
+      method: 'tools/call',
+      params: { name: 'get_run_status', arguments: { run_id: runId } },
+    });
+    const status = JSON.parse(response?.result?.content?.[0]?.text);
+    expect(status.status).toBe('paused');
+    expect(status.hint).toContain('answer_human_input');
+  });
+  it('should return error for non-existent run in get_run_status', async () => {
+    const response = await handleMessage({
+      jsonrpc: '2.0',
+      id: 15,
+      method: 'tools/call',
+      params: { name: 'get_run_status', arguments: { run_id: 'non-existent' } },
+    });
+    expect(response?.error?.message).toContain('not found');
+  });
 });