npm - keystone-cli - Versions diffs - 0.4.4 → 0.5.1 - Mend

keystone-cli 0.4.4 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/README.md +29 -4
package/package.json +1 -2
package/src/cli.ts +64 -4
package/src/db/workflow-db.ts +16 -7
package/src/expression/evaluator.audit.test.ts +67 -0
package/src/expression/evaluator.test.ts +15 -2
package/src/expression/evaluator.ts +102 -29
package/src/parser/agent-parser.test.ts +6 -2
package/src/parser/schema.ts +2 -0
package/src/parser/workflow-parser.test.ts +6 -2
package/src/parser/workflow-parser.ts +22 -11
package/src/runner/audit-verification.test.ts +12 -8
package/src/runner/llm-adapter.ts +49 -12
package/src/runner/llm-executor.test.ts +75 -13
package/src/runner/llm-executor.ts +84 -47
package/src/runner/mcp-client.audit.test.ts +79 -0
package/src/runner/mcp-client.ts +102 -19
package/src/runner/shell-executor.test.ts +33 -15
package/src/runner/shell-executor.ts +110 -39
package/src/runner/step-executor.test.ts +30 -2
package/src/runner/timeout.ts +2 -2
package/src/runner/tool-integration.test.ts +8 -2
package/src/runner/workflow-runner.ts +95 -29
package/src/templates/agents/keystone-architect.md +5 -3
package/src/types/status.ts +25 -0
package/src/ui/dashboard.tsx +3 -1
package/src/utils/auth-manager.test.ts +3 -1
package/src/utils/auth-manager.ts +12 -2
package/src/utils/config-loader.test.ts +2 -17
package/src/utils/mermaid.ts +0 -8
package/src/utils/redactor.ts +115 -22
package/src/utils/sandbox.test.ts +9 -13
package/src/utils/sandbox.ts +40 -53
package/src/utils/workflow-registry.test.ts +6 -2

package/src/parser/workflow-parser.test.ts CHANGED Viewed

@@ -8,12 +8,16 @@ describe('WorkflowParser', () => {
   const tempDir = join(process.cwd(), 'temp-test-workflows');
   try {
     mkdirSync(tempDir, { recursive: true });
-  } catch (e) {}
+  } catch (e) {
+    // Ignore existing dir error
+  }
   afterAll(() => {
     try {
       rmSync(tempDir, { recursive: true, force: true });
-    } catch (e) {}
+    } catch (e) {
+      // Ignore cleanup error
+    }
   });
   describe('topologicalSort', () => {
     test('should sort simple dependencies', () => {

package/src/parser/workflow-parser.ts CHANGED Viewed

@@ -53,18 +53,22 @@ export class WorkflowParser {
       const detected = new Set<string>();
       // Helper to scan any value for dependencies
-      const scan = (value: unknown) => {
+      const scan = (value: unknown, depth = 0) => {
+        if (depth > 100) {
+          throw new Error('Maximum expression nesting depth exceeded (potential DOS attack)');
+        }
         if (typeof value === 'string') {
           for (const dep of ExpressionEvaluator.findStepDependencies(value)) {
             detected.add(dep);
           }
         } else if (Array.isArray(value)) {
           for (const item of value) {
-            scan(item);
+            scan(item, depth + 1);
           }
         } else if (value && typeof value === 'object') {
           for (const val of Object.values(value)) {
-            scan(val);
+            scan(val, depth + 1);
           }
         }
       };
@@ -187,6 +191,15 @@ export class WorkflowParser {
       inDegree.set(step.id, step.needs.length);
     }
+    // Build reverse dependency map for O(1) lookups instead of O(n)
+    const dependents = new Map<string, string[]>();
+    for (const step of workflow.steps) {
+      for (const dep of step.needs) {
+        if (!dependents.has(dep)) dependents.set(dep, []);
+        dependents.get(dep)?.push(step.id);
+      }
+    }
     // Kahn's algorithm
     const queue: string[] = [];
     const result: string[] = [];
@@ -203,14 +216,12 @@ export class WorkflowParser {
       if (!stepId) continue;
       result.push(stepId);
-      // Find all steps that depend on this step
-      for (const step of workflow.steps) {
-        if (step.needs.includes(stepId)) {
-          const newDegree = (inDegree.get(step.id) || 0) - 1;
-          inDegree.set(step.id, newDegree);
-          if (newDegree === 0) {
-            queue.push(step.id);
-          }
+      // Find all steps that depend on this step (O(1) lookup)
+      for (const dependentId of dependents.get(stepId) || []) {
+        const newDegree = (inDegree.get(dependentId) || 0) - 1;
+        inDegree.set(dependentId, newDegree);
+        if (newDegree === 0) {
+          queue.push(dependentId);
         }
       }
     }

package/src/runner/audit-verification.test.ts CHANGED Viewed

@@ -40,17 +40,21 @@ describe('Audit Fixes Verification', () => {
   });
   describe('Sandbox Security', () => {
-    it('should throw by default if isolated-vm is missing and insecure fallback is disabled', async () => {
+    it('should execute code using node:vm sandbox on Bun', async () => {
+      // Since Bun uses JSC (not V8), isolated-vm cannot work.
+      // The sandbox now uses node:vm directly with security warnings.
+      SafeSandbox.resetWarning();
       const code = '1 + 1';
-      expect(SafeSandbox.execute(code, {}, { allowInsecureFallback: false })).rejects.toThrow(
-        /secure sandbox failed/
-      );
+      const result = await SafeSandbox.execute(code, {});
+      expect(result).toBe(2);
     });
-    it('should allow execution if allowInsecureFallback is true', async () => {
-      const code = '1 + 1';
-      const result = await SafeSandbox.execute(code, {}, { allowInsecureFallback: true });
-      expect(result).toBe(2);
+    it('should show security warning on first execution', async () => {
+      SafeSandbox.resetWarning();
+      const code = '2 + 2';
+      const result = await SafeSandbox.execute(code, {});
+      expect(result).toBe(4);
+      // Warning is shown to stderr, we just verify execution works
     });
   });

package/src/runner/llm-adapter.ts CHANGED Viewed

@@ -1,6 +1,9 @@
 import { AuthManager, COPILOT_HEADERS } from '../utils/auth-manager';
 import { ConfigLoader } from '../utils/config-loader';
+// Maximum response size to prevent memory exhaustion (1MB)
+const MAX_RESPONSE_SIZE = 1024 * 1024;
 export interface LLMMessage {
   role: 'system' | 'user' | 'assistant' | 'tool';
   content: string | null;
@@ -112,6 +115,9 @@ export class OpenAIAdapter implements LLMAdapter {
             const delta = data.choices[0].delta;
             if (delta.content) {
+              if (fullContent.length + delta.content.length > MAX_RESPONSE_SIZE) {
+                throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
+              }
               fullContent += delta.content;
               options.onStream?.(delta.content);
             }
@@ -287,7 +293,8 @@ export class AnthropicAdapter implements LLMAdapter {
       const reader = response.body.getReader();
       const decoder = new TextDecoder();
       let fullContent = '';
-      const toolCalls: { id: string; name: string; inputString: string }[] = [];
+      // Track tool calls by content block index for robust correlation
+      const toolCallsMap = new Map<number, { id: string; name: string; inputString: string }>();
       while (true) {
         const { done, value } = await reader.read();
@@ -302,21 +309,43 @@ export class AnthropicAdapter implements LLMAdapter {
           try {
             const data = JSON.parse(line.slice(6));
             if (data.type === 'content_block_delta' && data.delta?.text) {
+              if (fullContent.length + data.delta.text.length > MAX_RESPONSE_SIZE) {
+                throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
+              }
               fullContent += data.delta.text;
               options.onStream?.(data.delta.text);
             }
+            // Track tool calls by their index in the content blocks
             if (data.type === 'content_block_start' && data.content_block?.type === 'tool_use') {
-              toolCalls.push({
-                id: data.content_block.id,
-                name: data.content_block.name,
+              const index = data.index ?? toolCallsMap.size;
+              toolCallsMap.set(index, {
+                id: data.content_block.id || '',
+                name: data.content_block.name || '',
                 inputString: '',
               });
             }
-            if (data.type === 'tool_use_delta' && data.delta?.partial_json) {
-              const lastTool = toolCalls[toolCalls.length - 1];
-              if (lastTool) lastTool.inputString += data.delta.partial_json;
+            // Handle tool input streaming - Anthropic uses content_block_delta with input_json_delta
+            if (
+              data.type === 'content_block_delta' &&
+              data.delta?.type === 'input_json_delta' &&
+              data.delta?.partial_json
+            ) {
+              const index = data.index;
+              const toolCall = toolCallsMap.get(index);
+              if (toolCall) {
+                toolCall.inputString += data.delta.partial_json;
+              }
+            }
+            // Update tool call ID if it arrives later (some edge cases)
+            if (data.type === 'content_block_delta' && data.content_block?.id) {
+              const index = data.index;
+              const toolCall = toolCallsMap.get(index);
+              if (toolCall && !toolCall.id) {
+                toolCall.id = data.content_block.id;
+              }
             }
           } catch (e) {
             // Ignore parse errors
@@ -324,15 +353,20 @@ export class AnthropicAdapter implements LLMAdapter {
         }
       }
+      // Convert map to array and filter out incomplete tool calls
+      const toolCalls = Array.from(toolCallsMap.values())
+        .filter((tc) => tc.id && tc.name) // Only include complete tool calls
+        .map((tc) => ({
+          id: tc.id,
+          type: 'function' as const,
+          function: { name: tc.name, arguments: tc.inputString },
+        }));
       return {
         message: {
           role: 'assistant',
           content: fullContent || null,
-          tool_calls: toolCalls.map((tc) => ({
-            id: tc.id,
-            type: 'function',
-            function: { name: tc.name, arguments: tc.inputString },
-          })),
+          tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
         },
       };
     }
@@ -443,6 +477,9 @@ export class CopilotAdapter implements LLMAdapter {
             const delta = data.choices[0].delta;
             if (delta.content) {
+              if (fullContent.length + delta.content.length > MAX_RESPONSE_SIZE) {
+                throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
+              }
               fullContent += delta.content;
               options.onStream?.(delta.content);
             }

package/src/runner/llm-executor.test.ts CHANGED Viewed

@@ -132,13 +132,13 @@ describe('llm-executor', () => {
   beforeAll(() => {
     // Mock spawn to avoid actual process creation
     const mockProcess = Object.assign(new EventEmitter(), {
-      stdout: new Readable({ read() {} }),
+      stdout: new Readable({ read() { } }),
       stdin: new Writable({
         write(_chunk, _encoding, cb: (error?: Error | null) => void) {
           cb();
         },
       }),
-      kill: mock(() => {}),
+      kill: mock(() => { }),
     });
     spawnSpy = spyOn(child_process, 'spawn').mockReturnValue(
       mockProcess as unknown as child_process.ChildProcess
@@ -146,7 +146,9 @@ describe('llm-executor', () => {
     try {
       mkdirSync(agentsDir, { recursive: true });
-    } catch (e) {}
+    } catch (e) {
+      // Ignore error during cleanup
+    }
     const agentContent = `---
 name: test-agent
 model: gpt-4
@@ -196,6 +198,7 @@ You are a test agent.`;
       agent: 'test-agent',
       prompt: 'hello',
       needs: [],
+      maxIterations: 10,
     };
     const context: ExpressionContext = { inputs: {}, steps: {} };
     const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
@@ -216,6 +219,7 @@ You are a test agent.`;
       agent: 'test-agent',
       prompt: 'trigger tool',
       needs: [],
+      maxIterations: 10,
     };
     const context: ExpressionContext = { inputs: {}, steps: {} };
@@ -242,6 +246,7 @@ You are a test agent.`;
       agent: 'test-agent',
       prompt: 'give me json',
       needs: [],
+      maxIterations: 10,
       schema: {
         type: 'object',
         properties: {
@@ -261,19 +266,64 @@ You are a test agent.`;
     expect(result.output).toEqual({ foo: 'bar' });
   });
-  it('should throw error if JSON parsing fails for schema', async () => {
+  it('should retry if LLM output fails schema validation', async () => {
+    const step: LlmStep = {
+      id: 'l1',
+      type: 'llm',
+      agent: 'test-agent',
+      prompt: 'give me invalid json',
+      needs: [],
+      maxIterations: 10,
+      schema: { type: 'object' },
+    };
+    const context: ExpressionContext = { inputs: {}, steps: {} };
+    const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
+    const originalOpenAIChatInner = OpenAIAdapter.prototype.chat;
+    const originalCopilotChatInner = CopilotAdapter.prototype.chat;
+    const originalAnthropicChatInner = AnthropicAdapter.prototype.chat;
+    let attempt = 0;
+    const mockChat = mock(async () => {
+      attempt++;
+      if (attempt === 1) {
+        return { message: { role: 'assistant', content: 'Not JSON' } };
+      }
+      return { message: { role: 'assistant', content: '{"success": true}' } };
+    }) as unknown as typeof originalOpenAIChat;
+    OpenAIAdapter.prototype.chat = mockChat;
+    CopilotAdapter.prototype.chat = mockChat;
+    AnthropicAdapter.prototype.chat = mockChat;
+    const result = await executeLlmStep(
+      step,
+      context,
+      executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>
+    );
+    expect(result.status).toBe('success');
+    expect(result.output).toEqual({ success: true });
+    expect(attempt).toBe(2);
+    OpenAIAdapter.prototype.chat = originalOpenAIChatInner;
+    CopilotAdapter.prototype.chat = originalCopilotChatInner;
+    AnthropicAdapter.prototype.chat = originalAnthropicChatInner;
+  });
+  it('should fail after max iterations if JSON remains invalid', async () => {
     const step: LlmStep = {
       id: 'l1',
       type: 'llm',
       agent: 'test-agent',
       prompt: 'give me invalid json',
       needs: [],
+      maxIterations: 3,
       schema: { type: 'object' },
     };
     const context: ExpressionContext = { inputs: {}, steps: {} };
     const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
-    // Mock response with invalid JSON
     const originalOpenAIChatInner = OpenAIAdapter.prototype.chat;
     const originalCopilotChatInner = CopilotAdapter.prototype.chat;
     const originalAnthropicChatInner = AnthropicAdapter.prototype.chat;
@@ -292,7 +342,7 @@ You are a test agent.`;
         context,
         executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>
       )
-    ).rejects.toThrow(/Failed to parse LLM output as JSON/);
+    ).rejects.toThrow('Max ReAct iterations reached');
     OpenAIAdapter.prototype.chat = originalOpenAIChatInner;
     CopilotAdapter.prototype.chat = originalCopilotChatInner;
@@ -306,6 +356,7 @@ You are a test agent.`;
       agent: 'test-agent',
       prompt: 'trigger unknown tool',
       needs: [],
+      maxIterations: 10,
     };
     const context: ExpressionContext = { inputs: {}, steps: {} };
     const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
@@ -359,6 +410,7 @@ You are a test agent.`;
       agent: 'test-agent',
       prompt: 'hello',
       needs: [],
+      maxIterations: 10,
       mcpServers: [{ name: 'fail-mcp', command: 'node', args: [] }],
     };
     const context: ExpressionContext = { inputs: {}, steps: {} };
@@ -370,7 +422,7 @@ You are a test agent.`;
       spyOn(client, 'stop').mockReturnValue(undefined);
       return client;
     });
-    const consoleSpy = spyOn(console, 'error').mockImplementation(() => {});
+    const consoleSpy = spyOn(console, 'error').mockImplementation(() => { });
     await executeLlmStep(
       step,
@@ -379,7 +431,7 @@ You are a test agent.`;
     );
     expect(consoleSpy).toHaveBeenCalledWith(
-      expect.stringContaining('Failed to connect to MCP server fail-mcp')
+      expect.stringContaining('Failed to list tools from MCP server fail-mcp')
     );
     createLocalSpy.mockRestore();
     consoleSpy.mockRestore();
@@ -392,6 +444,7 @@ You are a test agent.`;
       agent: 'test-agent',
       prompt: 'trigger mcp tool',
       needs: [],
+      maxIterations: 10,
       mcpServers: [{ name: 'test-mcp', command: 'node', args: [] }],
     };
     const context: ExpressionContext = { inputs: {}, steps: {} };
@@ -446,13 +499,15 @@ You are a test agent.`;
   it('should use global MCP servers when useGlobalMcp is true', async () => {
     ConfigLoader.setConfig({
       mcp_servers: {
-        'global-mcp': { command: 'node', args: ['server.js'] },
+        'global-mcp': { type: 'local', command: 'node', args: ['server.js'], timeout: 1000 },
       },
       providers: {
-        openai: { apiKey: 'test' },
+        openai: { type: 'openai', api_key_env: 'OPENAI_API_KEY' },
       },
       model_mappings: {},
       default_provider: 'openai',
+      storage: { retention_days: 30 },
+      workflows_directory: 'workflows',
     });
     const manager = new MCPManager();
@@ -462,6 +517,7 @@ You are a test agent.`;
       agent: 'test-agent',
       prompt: 'hello',
       needs: [],
+      maxIterations: 10,
       useGlobalMcp: true,
     };
     const context: ExpressionContext = { inputs: {}, steps: {} };
@@ -510,6 +566,7 @@ You are a test agent.`;
       agent: 'test-agent',
       prompt: 'trigger adhoc tool',
       needs: [],
+      maxIterations: 10,
       tools: [
         {
           name: 'adhoc-tool',
@@ -547,11 +604,12 @@ You are a test agent.`;
       agent: 'test-agent',
       prompt: 'hello',
       needs: [],
+      maxIterations: 10,
       mcpServers: ['some-global-server'],
     };
     const context: ExpressionContext = { inputs: {}, steps: {} };
     const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
-    const consoleSpy = spyOn(console, 'error').mockImplementation(() => {});
+    const consoleSpy = spyOn(console, 'error').mockImplementation(() => { });
     await executeLlmStep(
       step,
@@ -571,11 +629,13 @@ You are a test agent.`;
   it('should not add global MCP server if already explicitly listed', async () => {
     ConfigLoader.setConfig({
       mcp_servers: {
-        'test-mcp': { command: 'node', args: ['server.js'] },
+        'test-mcp': { type: 'local', command: 'node', args: ['server.js'], timeout: 1000 },
       },
-      providers: { openai: { apiKey: 'test' } },
+      providers: { openai: { type: 'openai', api_key_env: 'OPENAI_API_KEY' } },
       model_mappings: {},
       default_provider: 'openai',
+      storage: { retention_days: 30 },
+      workflows_directory: 'workflows',
     });
     const manager = new MCPManager();
@@ -585,6 +645,7 @@ You are a test agent.`;
       agent: 'test-agent',
       prompt: 'hello',
       needs: [],
+      maxIterations: 10,
       useGlobalMcp: true,
       mcpServers: [{ name: 'test-mcp', command: 'node', args: ['local.js'] }],
     };
@@ -636,6 +697,7 @@ You are a test agent.`;
       agent: 'test-agent',
       prompt: '${{ steps.prev.output }}' as unknown as string,
       needs: [],
+      maxIterations: 10,
     };
     const context: ExpressionContext = {
       inputs: {},

package/src/runner/llm-executor.ts CHANGED Viewed

@@ -3,7 +3,8 @@ import type { ExpressionContext } from '../expression/evaluator';
 import { ExpressionEvaluator } from '../expression/evaluator';
 import { parseAgent, resolveAgentPath } from '../parser/agent-parser';
 import type { AgentTool, LlmStep, Step } from '../parser/schema';
-import { Redactor } from '../utils/redactor';
+import { extractJson } from '../utils/json-parser';
+import { RedactionBuffer, Redactor } from '../utils/redactor';
 import { type LLMMessage, getAdapter } from './llm-adapter';
 import { MCPClient } from './mcp-client';
 import type { MCPManager, MCPServerConfig } from './mcp-manager';
@@ -121,50 +122,54 @@ export async function executeLlmStep(
     }
     if (mcpServersToConnect.length > 0) {
-      for (const server of mcpServersToConnect) {
-        let client: MCPClient | undefined;
-        if (mcpManager) {
-          client = await mcpManager.getClient(server as string | MCPServerConfig, logger);
-        } else {
-          // Fallback if no manager (should not happen in normal workflow run)
-          if (typeof server === 'string') {
-            logger.error(`  ✗ Cannot reference global MCP server '${server}' without MCPManager`);
-            continue;
-          }
-          logger.log(`  🔌 Connecting to MCP server: ${server.name}`);
+      await Promise.all(
+        mcpServersToConnect.map(async (server) => {
+          let client: MCPClient | undefined;
+          const serverName = typeof server === 'string' ? server : server.name;
           try {
-            client = await MCPClient.createLocal(
-              (server as MCPServerConfig).command || 'node',
-              (server as MCPServerConfig).args || [],
-              (server as MCPServerConfig).env || {}
-            );
-            await client.initialize();
-            localMcpClients.push(client);
+            if (mcpManager) {
+              client = await mcpManager.getClient(server as string | MCPServerConfig, logger);
+            } else {
+              // Fallback if no manager (should not happen in normal workflow run)
+              if (typeof server === 'string') {
+                logger.error(
+                  `  ✗ Cannot reference global MCP server '${server}' without MCPManager`
+                );
+                return;
+              }
+              logger.log(`  🔌 Connecting to MCP server: ${server.name}`);
+              client = await MCPClient.createLocal(
+                (server as MCPServerConfig).command || 'node',
+                (server as MCPServerConfig).args || [],
+                (server as MCPServerConfig).env || {}
+              );
+              await client.initialize();
+              localMcpClients.push(client);
+            }
+            if (client) {
+              const mcpTools = await client.listTools();
+              for (const tool of mcpTools) {
+                allTools.push({
+                  name: tool.name,
+                  description: tool.description,
+                  parameters: tool.inputSchema,
+                  source: 'mcp',
+                  mcpClient: client,
+                });
+              }
+            }
           } catch (error) {
             logger.error(
-              `  ✗ Failed to connect to MCP server ${server.name}: ${error instanceof Error ? error.message : String(error)}`
+              `  ✗ Failed to list tools from MCP server ${serverName}: ${error instanceof Error ? error.message : String(error)}`
             );
-            if (client) {
+            if (!mcpManager && client) {
               client.stop();
             }
-            client = undefined;
           }
-        }
-        if (client) {
-          const mcpTools = await client.listTools();
-          for (const tool of mcpTools) {
-            allTools.push({
-              name: tool.name,
-              description: tool.description,
-              parameters: tool.inputSchema,
-              source: 'mcp',
-              mcpClient: client,
-            });
-          }
-        }
-      }
+        })
+      );
     }
     const llmTools = allTools.map((t) => ({
@@ -206,21 +211,27 @@ export async function executeLlmStep(
       total_tokens: 0,
     };
+    // Create redactor once outside the loop for performance (regex compilation)
+    const redactor = new Redactor(context.secrets || {});
+    const redactionBuffer = new RedactionBuffer(redactor);
     while (iterations < maxIterations) {
       iterations++;
-      const redactor = new Redactor(context.secrets || {});
       const response = await adapter.chat(messages, {
         model: resolvedModel,
         tools: llmTools.length > 0 ? llmTools : undefined,
         onStream: (chunk) => {
           if (!step.schema) {
-            process.stdout.write(redactor.redact(chunk));
+            process.stdout.write(redactionBuffer.process(chunk));
           }
         },
       });
+      if (!step.schema) {
+        process.stdout.write(redactionBuffer.flush());
+      }
       if (response.usage) {
         totalUsage.prompt_tokens += response.usage.prompt_tokens;
         totalUsage.completion_tokens += response.usage.completion_tokens;
@@ -236,12 +247,16 @@ export async function executeLlmStep(
         // If schema is defined, attempt to parse JSON
         if (step.schema && typeof output === 'string') {
           try {
-            const { extractJson } = await import('../utils/json-parser');
             output = extractJson(output) as typeof output;
           } catch (e) {
-            throw new Error(
-              `Failed to parse LLM output as JSON matching schema: ${e instanceof Error ? e.message : String(e)}\nOutput: ${output}`
-            );
+            const errorMessage = `Failed to parse LLM output as JSON matching schema: ${e instanceof Error ? e.message : String(e)}`;
+            logger.error(`  ⚠️  ${errorMessage}. Retrying...`);
+            messages.push({
+              role: 'user',
+              content: `Error: ${errorMessage}\n\nPlease correct your output to be valid JSON matching the schema.`,
+            });
+            continue;
           }
         }
@@ -259,7 +274,18 @@ export async function executeLlmStep(
         if (!toolInfo) {
           if (toolCall.function.name === 'ask' && step.allowClarification) {
-            const args = JSON.parse(toolCall.function.arguments) as { question: string };
+            let args: { question: string };
+            try {
+              args = JSON.parse(toolCall.function.arguments);
+            } catch (e) {
+              messages.push({
+                role: 'tool',
+                tool_call_id: toolCall.id,
+                name: 'ask',
+                content: `Error: Invalid JSON in arguments: ${e instanceof Error ? e.message : String(e)}`,
+              });
+              continue;
+            }
             if (process.stdin.isTTY) {
               // In TTY, we can use a human step to get the answer immediately
@@ -302,7 +328,18 @@ export async function executeLlmStep(
           continue;
         }
-        const args = JSON.parse(toolCall.function.arguments);
+        let args: Record<string, unknown>;
+        try {
+          args = JSON.parse(toolCall.function.arguments);
+        } catch (e) {
+          messages.push({
+            role: 'tool',
+            tool_call_id: toolCall.id,
+            name: toolCall.function.name,
+            content: `Error: Invalid JSON in arguments: ${e instanceof Error ? e.message : String(e)}`,
+          });
+          continue;
+        }
         if (toolInfo.source === 'mcp' && toolInfo.mcpClient) {
           try {