keystone-cli 0.4.4 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,12 +8,16 @@ describe('WorkflowParser', () => {
8
8
  const tempDir = join(process.cwd(), 'temp-test-workflows');
9
9
  try {
10
10
  mkdirSync(tempDir, { recursive: true });
11
- } catch (e) {}
11
+ } catch (e) {
12
+ // Ignore existing dir error
13
+ }
12
14
 
13
15
  afterAll(() => {
14
16
  try {
15
17
  rmSync(tempDir, { recursive: true, force: true });
16
- } catch (e) {}
18
+ } catch (e) {
19
+ // Ignore cleanup error
20
+ }
17
21
  });
18
22
  describe('topologicalSort', () => {
19
23
  test('should sort simple dependencies', () => {
@@ -53,18 +53,22 @@ export class WorkflowParser {
53
53
  const detected = new Set<string>();
54
54
 
55
55
  // Helper to scan any value for dependencies
56
- const scan = (value: unknown) => {
56
+ const scan = (value: unknown, depth = 0) => {
57
+ if (depth > 100) {
58
+ throw new Error('Maximum expression nesting depth exceeded (potential DOS attack)');
59
+ }
60
+
57
61
  if (typeof value === 'string') {
58
62
  for (const dep of ExpressionEvaluator.findStepDependencies(value)) {
59
63
  detected.add(dep);
60
64
  }
61
65
  } else if (Array.isArray(value)) {
62
66
  for (const item of value) {
63
- scan(item);
67
+ scan(item, depth + 1);
64
68
  }
65
69
  } else if (value && typeof value === 'object') {
66
70
  for (const val of Object.values(value)) {
67
- scan(val);
71
+ scan(val, depth + 1);
68
72
  }
69
73
  }
70
74
  };
@@ -187,6 +191,15 @@ export class WorkflowParser {
187
191
  inDegree.set(step.id, step.needs.length);
188
192
  }
189
193
 
194
+ // Build reverse dependency map for O(1) lookups instead of O(n)
195
+ const dependents = new Map<string, string[]>();
196
+ for (const step of workflow.steps) {
197
+ for (const dep of step.needs) {
198
+ if (!dependents.has(dep)) dependents.set(dep, []);
199
+ dependents.get(dep)?.push(step.id);
200
+ }
201
+ }
202
+
190
203
  // Kahn's algorithm
191
204
  const queue: string[] = [];
192
205
  const result: string[] = [];
@@ -203,14 +216,12 @@ export class WorkflowParser {
203
216
  if (!stepId) continue;
204
217
  result.push(stepId);
205
218
 
206
- // Find all steps that depend on this step
207
- for (const step of workflow.steps) {
208
- if (step.needs.includes(stepId)) {
209
- const newDegree = (inDegree.get(step.id) || 0) - 1;
210
- inDegree.set(step.id, newDegree);
211
- if (newDegree === 0) {
212
- queue.push(step.id);
213
- }
219
+ // Find all steps that depend on this step (O(1) lookup)
220
+ for (const dependentId of dependents.get(stepId) || []) {
221
+ const newDegree = (inDegree.get(dependentId) || 0) - 1;
222
+ inDegree.set(dependentId, newDegree);
223
+ if (newDegree === 0) {
224
+ queue.push(dependentId);
214
225
  }
215
226
  }
216
227
  }
@@ -40,17 +40,21 @@ describe('Audit Fixes Verification', () => {
40
40
  });
41
41
 
42
42
  describe('Sandbox Security', () => {
43
- it('should throw by default if isolated-vm is missing and insecure fallback is disabled', async () => {
43
+ it('should execute code using node:vm sandbox on Bun', async () => {
44
+ // Since Bun uses JSC (not V8), isolated-vm cannot work.
45
+ // The sandbox now uses node:vm directly with security warnings.
46
+ SafeSandbox.resetWarning();
44
47
  const code = '1 + 1';
45
- expect(SafeSandbox.execute(code, {}, { allowInsecureFallback: false })).rejects.toThrow(
46
- /secure sandbox failed/
47
- );
48
+ const result = await SafeSandbox.execute(code, {});
49
+ expect(result).toBe(2);
48
50
  });
49
51
 
50
- it('should allow execution if allowInsecureFallback is true', async () => {
51
- const code = '1 + 1';
52
- const result = await SafeSandbox.execute(code, {}, { allowInsecureFallback: true });
53
- expect(result).toBe(2);
52
+ it('should show security warning on first execution', async () => {
53
+ SafeSandbox.resetWarning();
54
+ const code = '2 + 2';
55
+ const result = await SafeSandbox.execute(code, {});
56
+ expect(result).toBe(4);
57
+ // Warning is shown to stderr, we just verify execution works
54
58
  });
55
59
  });
56
60
 
@@ -1,6 +1,9 @@
1
1
  import { AuthManager, COPILOT_HEADERS } from '../utils/auth-manager';
2
2
  import { ConfigLoader } from '../utils/config-loader';
3
3
 
4
+ // Maximum response size to prevent memory exhaustion (1MB)
5
+ const MAX_RESPONSE_SIZE = 1024 * 1024;
6
+
4
7
  export interface LLMMessage {
5
8
  role: 'system' | 'user' | 'assistant' | 'tool';
6
9
  content: string | null;
@@ -112,6 +115,9 @@ export class OpenAIAdapter implements LLMAdapter {
112
115
  const delta = data.choices[0].delta;
113
116
 
114
117
  if (delta.content) {
118
+ if (fullContent.length + delta.content.length > MAX_RESPONSE_SIZE) {
119
+ throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
120
+ }
115
121
  fullContent += delta.content;
116
122
  options.onStream?.(delta.content);
117
123
  }
@@ -287,7 +293,8 @@ export class AnthropicAdapter implements LLMAdapter {
287
293
  const reader = response.body.getReader();
288
294
  const decoder = new TextDecoder();
289
295
  let fullContent = '';
290
- const toolCalls: { id: string; name: string; inputString: string }[] = [];
296
+ // Track tool calls by content block index for robust correlation
297
+ const toolCallsMap = new Map<number, { id: string; name: string; inputString: string }>();
291
298
 
292
299
  while (true) {
293
300
  const { done, value } = await reader.read();
@@ -302,21 +309,43 @@ export class AnthropicAdapter implements LLMAdapter {
302
309
  try {
303
310
  const data = JSON.parse(line.slice(6));
304
311
  if (data.type === 'content_block_delta' && data.delta?.text) {
312
+ if (fullContent.length + data.delta.text.length > MAX_RESPONSE_SIZE) {
313
+ throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
314
+ }
305
315
  fullContent += data.delta.text;
306
316
  options.onStream?.(data.delta.text);
307
317
  }
308
318
 
319
+ // Track tool calls by their index in the content blocks
309
320
  if (data.type === 'content_block_start' && data.content_block?.type === 'tool_use') {
310
- toolCalls.push({
311
- id: data.content_block.id,
312
- name: data.content_block.name,
321
+ const index = data.index ?? toolCallsMap.size;
322
+ toolCallsMap.set(index, {
323
+ id: data.content_block.id || '',
324
+ name: data.content_block.name || '',
313
325
  inputString: '',
314
326
  });
315
327
  }
316
328
 
317
- if (data.type === 'tool_use_delta' && data.delta?.partial_json) {
318
- const lastTool = toolCalls[toolCalls.length - 1];
319
- if (lastTool) lastTool.inputString += data.delta.partial_json;
329
+ // Handle tool input streaming - Anthropic uses content_block_delta with input_json_delta
330
+ if (
331
+ data.type === 'content_block_delta' &&
332
+ data.delta?.type === 'input_json_delta' &&
333
+ data.delta?.partial_json
334
+ ) {
335
+ const index = data.index;
336
+ const toolCall = toolCallsMap.get(index);
337
+ if (toolCall) {
338
+ toolCall.inputString += data.delta.partial_json;
339
+ }
340
+ }
341
+
342
+ // Update tool call ID if it arrives later (some edge cases)
343
+ if (data.type === 'content_block_delta' && data.content_block?.id) {
344
+ const index = data.index;
345
+ const toolCall = toolCallsMap.get(index);
346
+ if (toolCall && !toolCall.id) {
347
+ toolCall.id = data.content_block.id;
348
+ }
320
349
  }
321
350
  } catch (e) {
322
351
  // Ignore parse errors
@@ -324,15 +353,20 @@ export class AnthropicAdapter implements LLMAdapter {
324
353
  }
325
354
  }
326
355
 
356
+ // Convert map to array and filter out incomplete tool calls
357
+ const toolCalls = Array.from(toolCallsMap.values())
358
+ .filter((tc) => tc.id && tc.name) // Only include complete tool calls
359
+ .map((tc) => ({
360
+ id: tc.id,
361
+ type: 'function' as const,
362
+ function: { name: tc.name, arguments: tc.inputString },
363
+ }));
364
+
327
365
  return {
328
366
  message: {
329
367
  role: 'assistant',
330
368
  content: fullContent || null,
331
- tool_calls: toolCalls.map((tc) => ({
332
- id: tc.id,
333
- type: 'function',
334
- function: { name: tc.name, arguments: tc.inputString },
335
- })),
369
+ tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
336
370
  },
337
371
  };
338
372
  }
@@ -443,6 +477,9 @@ export class CopilotAdapter implements LLMAdapter {
443
477
  const delta = data.choices[0].delta;
444
478
 
445
479
  if (delta.content) {
480
+ if (fullContent.length + delta.content.length > MAX_RESPONSE_SIZE) {
481
+ throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
482
+ }
446
483
  fullContent += delta.content;
447
484
  options.onStream?.(delta.content);
448
485
  }
@@ -132,13 +132,13 @@ describe('llm-executor', () => {
132
132
  beforeAll(() => {
133
133
  // Mock spawn to avoid actual process creation
134
134
  const mockProcess = Object.assign(new EventEmitter(), {
135
- stdout: new Readable({ read() {} }),
135
+ stdout: new Readable({ read() { } }),
136
136
  stdin: new Writable({
137
137
  write(_chunk, _encoding, cb: (error?: Error | null) => void) {
138
138
  cb();
139
139
  },
140
140
  }),
141
- kill: mock(() => {}),
141
+ kill: mock(() => { }),
142
142
  });
143
143
  spawnSpy = spyOn(child_process, 'spawn').mockReturnValue(
144
144
  mockProcess as unknown as child_process.ChildProcess
@@ -146,7 +146,9 @@ describe('llm-executor', () => {
146
146
 
147
147
  try {
148
148
  mkdirSync(agentsDir, { recursive: true });
149
- } catch (e) {}
149
+ } catch (e) {
150
+ // Ignore error during cleanup
151
+ }
150
152
  const agentContent = `---
151
153
  name: test-agent
152
154
  model: gpt-4
@@ -196,6 +198,7 @@ You are a test agent.`;
196
198
  agent: 'test-agent',
197
199
  prompt: 'hello',
198
200
  needs: [],
201
+ maxIterations: 10,
199
202
  };
200
203
  const context: ExpressionContext = { inputs: {}, steps: {} };
201
204
  const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
@@ -216,6 +219,7 @@ You are a test agent.`;
216
219
  agent: 'test-agent',
217
220
  prompt: 'trigger tool',
218
221
  needs: [],
222
+ maxIterations: 10,
219
223
  };
220
224
  const context: ExpressionContext = { inputs: {}, steps: {} };
221
225
 
@@ -242,6 +246,7 @@ You are a test agent.`;
242
246
  agent: 'test-agent',
243
247
  prompt: 'give me json',
244
248
  needs: [],
249
+ maxIterations: 10,
245
250
  schema: {
246
251
  type: 'object',
247
252
  properties: {
@@ -261,19 +266,64 @@ You are a test agent.`;
261
266
  expect(result.output).toEqual({ foo: 'bar' });
262
267
  });
263
268
 
264
- it('should throw error if JSON parsing fails for schema', async () => {
269
+ it('should retry if LLM output fails schema validation', async () => {
270
+ const step: LlmStep = {
271
+ id: 'l1',
272
+ type: 'llm',
273
+ agent: 'test-agent',
274
+ prompt: 'give me invalid json',
275
+ needs: [],
276
+ maxIterations: 10,
277
+ schema: { type: 'object' },
278
+ };
279
+ const context: ExpressionContext = { inputs: {}, steps: {} };
280
+ const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
281
+
282
+ const originalOpenAIChatInner = OpenAIAdapter.prototype.chat;
283
+ const originalCopilotChatInner = CopilotAdapter.prototype.chat;
284
+ const originalAnthropicChatInner = AnthropicAdapter.prototype.chat;
285
+
286
+ let attempt = 0;
287
+ const mockChat = mock(async () => {
288
+ attempt++;
289
+ if (attempt === 1) {
290
+ return { message: { role: 'assistant', content: 'Not JSON' } };
291
+ }
292
+ return { message: { role: 'assistant', content: '{"success": true}' } };
293
+ }) as unknown as typeof originalOpenAIChat;
294
+
295
+ OpenAIAdapter.prototype.chat = mockChat;
296
+ CopilotAdapter.prototype.chat = mockChat;
297
+ AnthropicAdapter.prototype.chat = mockChat;
298
+
299
+ const result = await executeLlmStep(
300
+ step,
301
+ context,
302
+ executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>
303
+ );
304
+
305
+ expect(result.status).toBe('success');
306
+ expect(result.output).toEqual({ success: true });
307
+ expect(attempt).toBe(2);
308
+
309
+ OpenAIAdapter.prototype.chat = originalOpenAIChatInner;
310
+ CopilotAdapter.prototype.chat = originalCopilotChatInner;
311
+ AnthropicAdapter.prototype.chat = originalAnthropicChatInner;
312
+ });
313
+
314
+ it('should fail after max iterations if JSON remains invalid', async () => {
265
315
  const step: LlmStep = {
266
316
  id: 'l1',
267
317
  type: 'llm',
268
318
  agent: 'test-agent',
269
319
  prompt: 'give me invalid json',
270
320
  needs: [],
321
+ maxIterations: 3,
271
322
  schema: { type: 'object' },
272
323
  };
273
324
  const context: ExpressionContext = { inputs: {}, steps: {} };
274
325
  const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
275
326
 
276
- // Mock response with invalid JSON
277
327
  const originalOpenAIChatInner = OpenAIAdapter.prototype.chat;
278
328
  const originalCopilotChatInner = CopilotAdapter.prototype.chat;
279
329
  const originalAnthropicChatInner = AnthropicAdapter.prototype.chat;
@@ -292,7 +342,7 @@ You are a test agent.`;
292
342
  context,
293
343
  executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>
294
344
  )
295
- ).rejects.toThrow(/Failed to parse LLM output as JSON/);
345
+ ).rejects.toThrow('Max ReAct iterations reached');
296
346
 
297
347
  OpenAIAdapter.prototype.chat = originalOpenAIChatInner;
298
348
  CopilotAdapter.prototype.chat = originalCopilotChatInner;
@@ -306,6 +356,7 @@ You are a test agent.`;
306
356
  agent: 'test-agent',
307
357
  prompt: 'trigger unknown tool',
308
358
  needs: [],
359
+ maxIterations: 10,
309
360
  };
310
361
  const context: ExpressionContext = { inputs: {}, steps: {} };
311
362
  const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
@@ -359,6 +410,7 @@ You are a test agent.`;
359
410
  agent: 'test-agent',
360
411
  prompt: 'hello',
361
412
  needs: [],
413
+ maxIterations: 10,
362
414
  mcpServers: [{ name: 'fail-mcp', command: 'node', args: [] }],
363
415
  };
364
416
  const context: ExpressionContext = { inputs: {}, steps: {} };
@@ -370,7 +422,7 @@ You are a test agent.`;
370
422
  spyOn(client, 'stop').mockReturnValue(undefined);
371
423
  return client;
372
424
  });
373
- const consoleSpy = spyOn(console, 'error').mockImplementation(() => {});
425
+ const consoleSpy = spyOn(console, 'error').mockImplementation(() => { });
374
426
 
375
427
  await executeLlmStep(
376
428
  step,
@@ -379,7 +431,7 @@ You are a test agent.`;
379
431
  );
380
432
 
381
433
  expect(consoleSpy).toHaveBeenCalledWith(
382
- expect.stringContaining('Failed to connect to MCP server fail-mcp')
434
+ expect.stringContaining('Failed to list tools from MCP server fail-mcp')
383
435
  );
384
436
  createLocalSpy.mockRestore();
385
437
  consoleSpy.mockRestore();
@@ -392,6 +444,7 @@ You are a test agent.`;
392
444
  agent: 'test-agent',
393
445
  prompt: 'trigger mcp tool',
394
446
  needs: [],
447
+ maxIterations: 10,
395
448
  mcpServers: [{ name: 'test-mcp', command: 'node', args: [] }],
396
449
  };
397
450
  const context: ExpressionContext = { inputs: {}, steps: {} };
@@ -446,13 +499,15 @@ You are a test agent.`;
446
499
  it('should use global MCP servers when useGlobalMcp is true', async () => {
447
500
  ConfigLoader.setConfig({
448
501
  mcp_servers: {
449
- 'global-mcp': { command: 'node', args: ['server.js'] },
502
+ 'global-mcp': { type: 'local', command: 'node', args: ['server.js'], timeout: 1000 },
450
503
  },
451
504
  providers: {
452
- openai: { apiKey: 'test' },
505
+ openai: { type: 'openai', api_key_env: 'OPENAI_API_KEY' },
453
506
  },
454
507
  model_mappings: {},
455
508
  default_provider: 'openai',
509
+ storage: { retention_days: 30 },
510
+ workflows_directory: 'workflows',
456
511
  });
457
512
 
458
513
  const manager = new MCPManager();
@@ -462,6 +517,7 @@ You are a test agent.`;
462
517
  agent: 'test-agent',
463
518
  prompt: 'hello',
464
519
  needs: [],
520
+ maxIterations: 10,
465
521
  useGlobalMcp: true,
466
522
  };
467
523
  const context: ExpressionContext = { inputs: {}, steps: {} };
@@ -510,6 +566,7 @@ You are a test agent.`;
510
566
  agent: 'test-agent',
511
567
  prompt: 'trigger adhoc tool',
512
568
  needs: [],
569
+ maxIterations: 10,
513
570
  tools: [
514
571
  {
515
572
  name: 'adhoc-tool',
@@ -547,11 +604,12 @@ You are a test agent.`;
547
604
  agent: 'test-agent',
548
605
  prompt: 'hello',
549
606
  needs: [],
607
+ maxIterations: 10,
550
608
  mcpServers: ['some-global-server'],
551
609
  };
552
610
  const context: ExpressionContext = { inputs: {}, steps: {} };
553
611
  const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
554
- const consoleSpy = spyOn(console, 'error').mockImplementation(() => {});
612
+ const consoleSpy = spyOn(console, 'error').mockImplementation(() => { });
555
613
 
556
614
  await executeLlmStep(
557
615
  step,
@@ -571,11 +629,13 @@ You are a test agent.`;
571
629
  it('should not add global MCP server if already explicitly listed', async () => {
572
630
  ConfigLoader.setConfig({
573
631
  mcp_servers: {
574
- 'test-mcp': { command: 'node', args: ['server.js'] },
632
+ 'test-mcp': { type: 'local', command: 'node', args: ['server.js'], timeout: 1000 },
575
633
  },
576
- providers: { openai: { apiKey: 'test' } },
634
+ providers: { openai: { type: 'openai', api_key_env: 'OPENAI_API_KEY' } },
577
635
  model_mappings: {},
578
636
  default_provider: 'openai',
637
+ storage: { retention_days: 30 },
638
+ workflows_directory: 'workflows',
579
639
  });
580
640
 
581
641
  const manager = new MCPManager();
@@ -585,6 +645,7 @@ You are a test agent.`;
585
645
  agent: 'test-agent',
586
646
  prompt: 'hello',
587
647
  needs: [],
648
+ maxIterations: 10,
588
649
  useGlobalMcp: true,
589
650
  mcpServers: [{ name: 'test-mcp', command: 'node', args: ['local.js'] }],
590
651
  };
@@ -636,6 +697,7 @@ You are a test agent.`;
636
697
  agent: 'test-agent',
637
698
  prompt: '${{ steps.prev.output }}' as unknown as string,
638
699
  needs: [],
700
+ maxIterations: 10,
639
701
  };
640
702
  const context: ExpressionContext = {
641
703
  inputs: {},
@@ -3,7 +3,8 @@ import type { ExpressionContext } from '../expression/evaluator';
3
3
  import { ExpressionEvaluator } from '../expression/evaluator';
4
4
  import { parseAgent, resolveAgentPath } from '../parser/agent-parser';
5
5
  import type { AgentTool, LlmStep, Step } from '../parser/schema';
6
- import { Redactor } from '../utils/redactor';
6
+ import { extractJson } from '../utils/json-parser';
7
+ import { RedactionBuffer, Redactor } from '../utils/redactor';
7
8
  import { type LLMMessage, getAdapter } from './llm-adapter';
8
9
  import { MCPClient } from './mcp-client';
9
10
  import type { MCPManager, MCPServerConfig } from './mcp-manager';
@@ -121,50 +122,54 @@ export async function executeLlmStep(
121
122
  }
122
123
 
123
124
  if (mcpServersToConnect.length > 0) {
124
- for (const server of mcpServersToConnect) {
125
- let client: MCPClient | undefined;
126
-
127
- if (mcpManager) {
128
- client = await mcpManager.getClient(server as string | MCPServerConfig, logger);
129
- } else {
130
- // Fallback if no manager (should not happen in normal workflow run)
131
- if (typeof server === 'string') {
132
- logger.error(` ✗ Cannot reference global MCP server '${server}' without MCPManager`);
133
- continue;
134
- }
135
- logger.log(` 🔌 Connecting to MCP server: ${server.name}`);
125
+ await Promise.all(
126
+ mcpServersToConnect.map(async (server) => {
127
+ let client: MCPClient | undefined;
128
+ const serverName = typeof server === 'string' ? server : server.name;
129
+
136
130
  try {
137
- client = await MCPClient.createLocal(
138
- (server as MCPServerConfig).command || 'node',
139
- (server as MCPServerConfig).args || [],
140
- (server as MCPServerConfig).env || {}
141
- );
142
- await client.initialize();
143
- localMcpClients.push(client);
131
+ if (mcpManager) {
132
+ client = await mcpManager.getClient(server as string | MCPServerConfig, logger);
133
+ } else {
134
+ // Fallback if no manager (should not happen in normal workflow run)
135
+ if (typeof server === 'string') {
136
+ logger.error(
137
+ ` ✗ Cannot reference global MCP server '${server}' without MCPManager`
138
+ );
139
+ return;
140
+ }
141
+ logger.log(` 🔌 Connecting to MCP server: ${server.name}`);
142
+ client = await MCPClient.createLocal(
143
+ (server as MCPServerConfig).command || 'node',
144
+ (server as MCPServerConfig).args || [],
145
+ (server as MCPServerConfig).env || {}
146
+ );
147
+ await client.initialize();
148
+ localMcpClients.push(client);
149
+ }
150
+
151
+ if (client) {
152
+ const mcpTools = await client.listTools();
153
+ for (const tool of mcpTools) {
154
+ allTools.push({
155
+ name: tool.name,
156
+ description: tool.description,
157
+ parameters: tool.inputSchema,
158
+ source: 'mcp',
159
+ mcpClient: client,
160
+ });
161
+ }
162
+ }
144
163
  } catch (error) {
145
164
  logger.error(
146
- ` ✗ Failed to connect to MCP server ${server.name}: ${error instanceof Error ? error.message : String(error)}`
165
+ ` ✗ Failed to list tools from MCP server ${serverName}: ${error instanceof Error ? error.message : String(error)}`
147
166
  );
148
- if (client) {
167
+ if (!mcpManager && client) {
149
168
  client.stop();
150
169
  }
151
- client = undefined;
152
170
  }
153
- }
154
-
155
- if (client) {
156
- const mcpTools = await client.listTools();
157
- for (const tool of mcpTools) {
158
- allTools.push({
159
- name: tool.name,
160
- description: tool.description,
161
- parameters: tool.inputSchema,
162
- source: 'mcp',
163
- mcpClient: client,
164
- });
165
- }
166
- }
167
- }
171
+ })
172
+ );
168
173
  }
169
174
 
170
175
  const llmTools = allTools.map((t) => ({
@@ -206,21 +211,27 @@ export async function executeLlmStep(
206
211
  total_tokens: 0,
207
212
  };
208
213
 
214
+ // Create redactor once outside the loop for performance (regex compilation)
215
+ const redactor = new Redactor(context.secrets || {});
216
+ const redactionBuffer = new RedactionBuffer(redactor);
217
+
209
218
  while (iterations < maxIterations) {
210
219
  iterations++;
211
220
 
212
- const redactor = new Redactor(context.secrets || {});
213
-
214
221
  const response = await adapter.chat(messages, {
215
222
  model: resolvedModel,
216
223
  tools: llmTools.length > 0 ? llmTools : undefined,
217
224
  onStream: (chunk) => {
218
225
  if (!step.schema) {
219
- process.stdout.write(redactor.redact(chunk));
226
+ process.stdout.write(redactionBuffer.process(chunk));
220
227
  }
221
228
  },
222
229
  });
223
230
 
231
+ if (!step.schema) {
232
+ process.stdout.write(redactionBuffer.flush());
233
+ }
234
+
224
235
  if (response.usage) {
225
236
  totalUsage.prompt_tokens += response.usage.prompt_tokens;
226
237
  totalUsage.completion_tokens += response.usage.completion_tokens;
@@ -236,12 +247,16 @@ export async function executeLlmStep(
236
247
  // If schema is defined, attempt to parse JSON
237
248
  if (step.schema && typeof output === 'string') {
238
249
  try {
239
- const { extractJson } = await import('../utils/json-parser');
240
250
  output = extractJson(output) as typeof output;
241
251
  } catch (e) {
242
- throw new Error(
243
- `Failed to parse LLM output as JSON matching schema: ${e instanceof Error ? e.message : String(e)}\nOutput: ${output}`
244
- );
252
+ const errorMessage = `Failed to parse LLM output as JSON matching schema: ${e instanceof Error ? e.message : String(e)}`;
253
+ logger.error(` ⚠️ ${errorMessage}. Retrying...`);
254
+
255
+ messages.push({
256
+ role: 'user',
257
+ content: `Error: ${errorMessage}\n\nPlease correct your output to be valid JSON matching the schema.`,
258
+ });
259
+ continue;
245
260
  }
246
261
  }
247
262
 
@@ -259,7 +274,18 @@ export async function executeLlmStep(
259
274
 
260
275
  if (!toolInfo) {
261
276
  if (toolCall.function.name === 'ask' && step.allowClarification) {
262
- const args = JSON.parse(toolCall.function.arguments) as { question: string };
277
+ let args: { question: string };
278
+ try {
279
+ args = JSON.parse(toolCall.function.arguments);
280
+ } catch (e) {
281
+ messages.push({
282
+ role: 'tool',
283
+ tool_call_id: toolCall.id,
284
+ name: 'ask',
285
+ content: `Error: Invalid JSON in arguments: ${e instanceof Error ? e.message : String(e)}`,
286
+ });
287
+ continue;
288
+ }
263
289
 
264
290
  if (process.stdin.isTTY) {
265
291
  // In TTY, we can use a human step to get the answer immediately
@@ -302,7 +328,18 @@ export async function executeLlmStep(
302
328
  continue;
303
329
  }
304
330
 
305
- const args = JSON.parse(toolCall.function.arguments);
331
+ let args: Record<string, unknown>;
332
+ try {
333
+ args = JSON.parse(toolCall.function.arguments);
334
+ } catch (e) {
335
+ messages.push({
336
+ role: 'tool',
337
+ tool_call_id: toolCall.id,
338
+ name: toolCall.function.name,
339
+ content: `Error: Invalid JSON in arguments: ${e instanceof Error ? e.message : String(e)}`,
340
+ });
341
+ continue;
342
+ }
306
343
 
307
344
  if (toolInfo.source === 'mcp' && toolInfo.mcpClient) {
308
345
  try {