keystone-cli 0.4.4 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,7 +18,9 @@ describe('agent-parser', () => {
18
18
  afterAll(() => {
19
19
  try {
20
20
  rmSync(tempDir, { recursive: true, force: true });
21
- } catch (e) {}
21
+ } catch (e) {
22
+ // Ignore cleanup error
23
+ }
22
24
  });
23
25
 
24
26
  describe('parseAgent', () => {
@@ -100,7 +102,9 @@ Prompt`;
100
102
  const agentsDir = join(process.cwd(), '.keystone', 'workflows', 'agents');
101
103
  try {
102
104
  mkdirSync(agentsDir, { recursive: true });
103
- } catch (e) {}
105
+ } catch (e) {
106
+ // Ignore cleanup error
107
+ }
104
108
 
105
109
  const filePath = join(agentsDir, 'my-agent.md');
106
110
  writeFileSync(filePath, '---name: my-agent---');
@@ -38,6 +38,7 @@ const ShellStepSchema = BaseStepSchema.extend({
38
38
  run: z.string(),
39
39
  dir: z.string().optional(),
40
40
  env: z.record(z.string()).optional(),
41
+ allowInsecure: z.boolean().optional(),
41
42
  });
42
43
 
43
44
  // Forward declaration for AgentToolSchema which depends on StepSchema
@@ -71,6 +72,7 @@ const LlmStepSchema = BaseStepSchema.extend({
71
72
  env: z.record(z.string()).optional(),
72
73
  url: z.string().optional(),
73
74
  headers: z.record(z.string()).optional(),
75
+ timeout: z.number().int().positive().optional(),
74
76
  }),
75
77
  ])
76
78
  )
@@ -8,12 +8,16 @@ describe('WorkflowParser', () => {
8
8
  const tempDir = join(process.cwd(), 'temp-test-workflows');
9
9
  try {
10
10
  mkdirSync(tempDir, { recursive: true });
11
- } catch (e) {}
11
+ } catch (e) {
12
+ // Ignore existing dir error
13
+ }
12
14
 
13
15
  afterAll(() => {
14
16
  try {
15
17
  rmSync(tempDir, { recursive: true, force: true });
16
- } catch (e) {}
18
+ } catch (e) {
19
+ // Ignore cleanup error
20
+ }
17
21
  });
18
22
  describe('topologicalSort', () => {
19
23
  test('should sort simple dependencies', () => {
@@ -53,18 +53,22 @@ export class WorkflowParser {
53
53
  const detected = new Set<string>();
54
54
 
55
55
  // Helper to scan any value for dependencies
56
- const scan = (value: unknown) => {
56
+ const scan = (value: unknown, depth = 0) => {
57
+ if (depth > 100) {
58
+ throw new Error('Maximum expression nesting depth exceeded (potential DOS attack)');
59
+ }
60
+
57
61
  if (typeof value === 'string') {
58
62
  for (const dep of ExpressionEvaluator.findStepDependencies(value)) {
59
63
  detected.add(dep);
60
64
  }
61
65
  } else if (Array.isArray(value)) {
62
66
  for (const item of value) {
63
- scan(item);
67
+ scan(item, depth + 1);
64
68
  }
65
69
  } else if (value && typeof value === 'object') {
66
70
  for (const val of Object.values(value)) {
67
- scan(val);
71
+ scan(val, depth + 1);
68
72
  }
69
73
  }
70
74
  };
@@ -187,6 +191,15 @@ export class WorkflowParser {
187
191
  inDegree.set(step.id, step.needs.length);
188
192
  }
189
193
 
194
+ // Build reverse dependency map for O(1) lookups instead of O(n)
195
+ const dependents = new Map<string, string[]>();
196
+ for (const step of workflow.steps) {
197
+ for (const dep of step.needs) {
198
+ if (!dependents.has(dep)) dependents.set(dep, []);
199
+ dependents.get(dep)?.push(step.id);
200
+ }
201
+ }
202
+
190
203
  // Kahn's algorithm
191
204
  const queue: string[] = [];
192
205
  const result: string[] = [];
@@ -203,14 +216,12 @@ export class WorkflowParser {
203
216
  if (!stepId) continue;
204
217
  result.push(stepId);
205
218
 
206
- // Find all steps that depend on this step
207
- for (const step of workflow.steps) {
208
- if (step.needs.includes(stepId)) {
209
- const newDegree = (inDegree.get(step.id) || 0) - 1;
210
- inDegree.set(step.id, newDegree);
211
- if (newDegree === 0) {
212
- queue.push(step.id);
213
- }
219
+ // Find all steps that depend on this step (O(1) lookup)
220
+ for (const dependentId of dependents.get(stepId) || []) {
221
+ const newDegree = (inDegree.get(dependentId) || 0) - 1;
222
+ inDegree.set(dependentId, newDegree);
223
+ if (newDegree === 0) {
224
+ queue.push(dependentId);
214
225
  }
215
226
  }
216
227
  }
@@ -40,17 +40,21 @@ describe('Audit Fixes Verification', () => {
40
40
  });
41
41
 
42
42
  describe('Sandbox Security', () => {
43
- it('should throw by default if isolated-vm is missing and insecure fallback is disabled', async () => {
43
+ it('should execute code using node:vm sandbox on Bun', async () => {
44
+ // Since Bun uses JSC (not V8), isolated-vm cannot work.
45
+ // The sandbox now uses node:vm directly with security warnings.
46
+ SafeSandbox.resetWarning();
44
47
  const code = '1 + 1';
45
- expect(SafeSandbox.execute(code, {}, { allowInsecureFallback: false })).rejects.toThrow(
46
- /secure sandbox failed/
47
- );
48
+ const result = await SafeSandbox.execute(code, {});
49
+ expect(result).toBe(2);
48
50
  });
49
51
 
50
- it('should allow execution if allowInsecureFallback is true', async () => {
51
- const code = '1 + 1';
52
- const result = await SafeSandbox.execute(code, {}, { allowInsecureFallback: true });
53
- expect(result).toBe(2);
52
+ it('should show security warning on first execution', async () => {
53
+ SafeSandbox.resetWarning();
54
+ const code = '2 + 2';
55
+ const result = await SafeSandbox.execute(code, {});
56
+ expect(result).toBe(4);
57
+ // Warning is shown to stderr, we just verify execution works
54
58
  });
55
59
  });
56
60
 
@@ -1,6 +1,9 @@
1
1
  import { AuthManager, COPILOT_HEADERS } from '../utils/auth-manager';
2
2
  import { ConfigLoader } from '../utils/config-loader';
3
3
 
4
+ // Maximum response size to prevent memory exhaustion (1MB)
5
+ const MAX_RESPONSE_SIZE = 1024 * 1024;
6
+
4
7
  export interface LLMMessage {
5
8
  role: 'system' | 'user' | 'assistant' | 'tool';
6
9
  content: string | null;
@@ -112,6 +115,9 @@ export class OpenAIAdapter implements LLMAdapter {
112
115
  const delta = data.choices[0].delta;
113
116
 
114
117
  if (delta.content) {
118
+ if (fullContent.length + delta.content.length > MAX_RESPONSE_SIZE) {
119
+ throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
120
+ }
115
121
  fullContent += delta.content;
116
122
  options.onStream?.(delta.content);
117
123
  }
@@ -287,7 +293,8 @@ export class AnthropicAdapter implements LLMAdapter {
287
293
  const reader = response.body.getReader();
288
294
  const decoder = new TextDecoder();
289
295
  let fullContent = '';
290
- const toolCalls: { id: string; name: string; inputString: string }[] = [];
296
+ // Track tool calls by content block index for robust correlation
297
+ const toolCallsMap = new Map<number, { id: string; name: string; inputString: string }>();
291
298
 
292
299
  while (true) {
293
300
  const { done, value } = await reader.read();
@@ -302,21 +309,43 @@ export class AnthropicAdapter implements LLMAdapter {
302
309
  try {
303
310
  const data = JSON.parse(line.slice(6));
304
311
  if (data.type === 'content_block_delta' && data.delta?.text) {
312
+ if (fullContent.length + data.delta.text.length > MAX_RESPONSE_SIZE) {
313
+ throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
314
+ }
305
315
  fullContent += data.delta.text;
306
316
  options.onStream?.(data.delta.text);
307
317
  }
308
318
 
319
+ // Track tool calls by their index in the content blocks
309
320
  if (data.type === 'content_block_start' && data.content_block?.type === 'tool_use') {
310
- toolCalls.push({
311
- id: data.content_block.id,
312
- name: data.content_block.name,
321
+ const index = data.index ?? toolCallsMap.size;
322
+ toolCallsMap.set(index, {
323
+ id: data.content_block.id || '',
324
+ name: data.content_block.name || '',
313
325
  inputString: '',
314
326
  });
315
327
  }
316
328
 
317
- if (data.type === 'tool_use_delta' && data.delta?.partial_json) {
318
- const lastTool = toolCalls[toolCalls.length - 1];
319
- if (lastTool) lastTool.inputString += data.delta.partial_json;
329
+ // Handle tool input streaming - Anthropic uses content_block_delta with input_json_delta
330
+ if (
331
+ data.type === 'content_block_delta' &&
332
+ data.delta?.type === 'input_json_delta' &&
333
+ data.delta?.partial_json
334
+ ) {
335
+ const index = data.index;
336
+ const toolCall = toolCallsMap.get(index);
337
+ if (toolCall) {
338
+ toolCall.inputString += data.delta.partial_json;
339
+ }
340
+ }
341
+
342
+ // Update tool call ID if it arrives later (some edge cases)
343
+ if (data.type === 'content_block_delta' && data.content_block?.id) {
344
+ const index = data.index;
345
+ const toolCall = toolCallsMap.get(index);
346
+ if (toolCall && !toolCall.id) {
347
+ toolCall.id = data.content_block.id;
348
+ }
320
349
  }
321
350
  } catch (e) {
322
351
  // Ignore parse errors
@@ -324,15 +353,20 @@ export class AnthropicAdapter implements LLMAdapter {
324
353
  }
325
354
  }
326
355
 
356
+ // Convert map to array and filter out incomplete tool calls
357
+ const toolCalls = Array.from(toolCallsMap.values())
358
+ .filter((tc) => tc.id && tc.name) // Only include complete tool calls
359
+ .map((tc) => ({
360
+ id: tc.id,
361
+ type: 'function' as const,
362
+ function: { name: tc.name, arguments: tc.inputString },
363
+ }));
364
+
327
365
  return {
328
366
  message: {
329
367
  role: 'assistant',
330
368
  content: fullContent || null,
331
- tool_calls: toolCalls.map((tc) => ({
332
- id: tc.id,
333
- type: 'function',
334
- function: { name: tc.name, arguments: tc.inputString },
335
- })),
369
+ tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
336
370
  },
337
371
  };
338
372
  }
@@ -443,6 +477,9 @@ export class CopilotAdapter implements LLMAdapter {
443
477
  const delta = data.choices[0].delta;
444
478
 
445
479
  if (delta.content) {
480
+ if (fullContent.length + delta.content.length > MAX_RESPONSE_SIZE) {
481
+ throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
482
+ }
446
483
  fullContent += delta.content;
447
484
  options.onStream?.(delta.content);
448
485
  }
@@ -146,7 +146,9 @@ describe('llm-executor', () => {
146
146
 
147
147
  try {
148
148
  mkdirSync(agentsDir, { recursive: true });
149
- } catch (e) {}
149
+ } catch (e) {
150
+ // Ignore error during cleanup
151
+ }
150
152
  const agentContent = `---
151
153
  name: test-agent
152
154
  model: gpt-4
@@ -196,6 +198,7 @@ You are a test agent.`;
196
198
  agent: 'test-agent',
197
199
  prompt: 'hello',
198
200
  needs: [],
201
+ maxIterations: 10,
199
202
  };
200
203
  const context: ExpressionContext = { inputs: {}, steps: {} };
201
204
  const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
@@ -216,6 +219,7 @@ You are a test agent.`;
216
219
  agent: 'test-agent',
217
220
  prompt: 'trigger tool',
218
221
  needs: [],
222
+ maxIterations: 10,
219
223
  };
220
224
  const context: ExpressionContext = { inputs: {}, steps: {} };
221
225
 
@@ -242,6 +246,7 @@ You are a test agent.`;
242
246
  agent: 'test-agent',
243
247
  prompt: 'give me json',
244
248
  needs: [],
249
+ maxIterations: 10,
245
250
  schema: {
246
251
  type: 'object',
247
252
  properties: {
@@ -268,6 +273,7 @@ You are a test agent.`;
268
273
  agent: 'test-agent',
269
274
  prompt: 'give me invalid json',
270
275
  needs: [],
276
+ maxIterations: 10,
271
277
  schema: { type: 'object' },
272
278
  };
273
279
  const context: ExpressionContext = { inputs: {}, steps: {} };
@@ -306,6 +312,7 @@ You are a test agent.`;
306
312
  agent: 'test-agent',
307
313
  prompt: 'trigger unknown tool',
308
314
  needs: [],
315
+ maxIterations: 10,
309
316
  };
310
317
  const context: ExpressionContext = { inputs: {}, steps: {} };
311
318
  const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
@@ -359,6 +366,7 @@ You are a test agent.`;
359
366
  agent: 'test-agent',
360
367
  prompt: 'hello',
361
368
  needs: [],
369
+ maxIterations: 10,
362
370
  mcpServers: [{ name: 'fail-mcp', command: 'node', args: [] }],
363
371
  };
364
372
  const context: ExpressionContext = { inputs: {}, steps: {} };
@@ -379,7 +387,7 @@ You are a test agent.`;
379
387
  );
380
388
 
381
389
  expect(consoleSpy).toHaveBeenCalledWith(
382
- expect.stringContaining('Failed to connect to MCP server fail-mcp')
390
+ expect.stringContaining('Failed to list tools from MCP server fail-mcp')
383
391
  );
384
392
  createLocalSpy.mockRestore();
385
393
  consoleSpy.mockRestore();
@@ -392,6 +400,7 @@ You are a test agent.`;
392
400
  agent: 'test-agent',
393
401
  prompt: 'trigger mcp tool',
394
402
  needs: [],
403
+ maxIterations: 10,
395
404
  mcpServers: [{ name: 'test-mcp', command: 'node', args: [] }],
396
405
  };
397
406
  const context: ExpressionContext = { inputs: {}, steps: {} };
@@ -446,13 +455,15 @@ You are a test agent.`;
446
455
  it('should use global MCP servers when useGlobalMcp is true', async () => {
447
456
  ConfigLoader.setConfig({
448
457
  mcp_servers: {
449
- 'global-mcp': { command: 'node', args: ['server.js'] },
458
+ 'global-mcp': { type: 'local', command: 'node', args: ['server.js'], timeout: 1000 },
450
459
  },
451
460
  providers: {
452
- openai: { apiKey: 'test' },
461
+ openai: { type: 'openai', api_key_env: 'OPENAI_API_KEY' },
453
462
  },
454
463
  model_mappings: {},
455
464
  default_provider: 'openai',
465
+ storage: { retention_days: 30 },
466
+ workflows_directory: 'workflows',
456
467
  });
457
468
 
458
469
  const manager = new MCPManager();
@@ -462,6 +473,7 @@ You are a test agent.`;
462
473
  agent: 'test-agent',
463
474
  prompt: 'hello',
464
475
  needs: [],
476
+ maxIterations: 10,
465
477
  useGlobalMcp: true,
466
478
  };
467
479
  const context: ExpressionContext = { inputs: {}, steps: {} };
@@ -510,6 +522,7 @@ You are a test agent.`;
510
522
  agent: 'test-agent',
511
523
  prompt: 'trigger adhoc tool',
512
524
  needs: [],
525
+ maxIterations: 10,
513
526
  tools: [
514
527
  {
515
528
  name: 'adhoc-tool',
@@ -547,6 +560,7 @@ You are a test agent.`;
547
560
  agent: 'test-agent',
548
561
  prompt: 'hello',
549
562
  needs: [],
563
+ maxIterations: 10,
550
564
  mcpServers: ['some-global-server'],
551
565
  };
552
566
  const context: ExpressionContext = { inputs: {}, steps: {} };
@@ -571,11 +585,13 @@ You are a test agent.`;
571
585
  it('should not add global MCP server if already explicitly listed', async () => {
572
586
  ConfigLoader.setConfig({
573
587
  mcp_servers: {
574
- 'test-mcp': { command: 'node', args: ['server.js'] },
588
+ 'test-mcp': { type: 'local', command: 'node', args: ['server.js'], timeout: 1000 },
575
589
  },
576
- providers: { openai: { apiKey: 'test' } },
590
+ providers: { openai: { type: 'openai', api_key_env: 'OPENAI_API_KEY' } },
577
591
  model_mappings: {},
578
592
  default_provider: 'openai',
593
+ storage: { retention_days: 30 },
594
+ workflows_directory: 'workflows',
579
595
  });
580
596
 
581
597
  const manager = new MCPManager();
@@ -585,6 +601,7 @@ You are a test agent.`;
585
601
  agent: 'test-agent',
586
602
  prompt: 'hello',
587
603
  needs: [],
604
+ maxIterations: 10,
588
605
  useGlobalMcp: true,
589
606
  mcpServers: [{ name: 'test-mcp', command: 'node', args: ['local.js'] }],
590
607
  };
@@ -636,6 +653,7 @@ You are a test agent.`;
636
653
  agent: 'test-agent',
637
654
  prompt: '${{ steps.prev.output }}' as unknown as string,
638
655
  needs: [],
656
+ maxIterations: 10,
639
657
  };
640
658
  const context: ExpressionContext = {
641
659
  inputs: {},
@@ -3,7 +3,8 @@ import type { ExpressionContext } from '../expression/evaluator';
3
3
  import { ExpressionEvaluator } from '../expression/evaluator';
4
4
  import { parseAgent, resolveAgentPath } from '../parser/agent-parser';
5
5
  import type { AgentTool, LlmStep, Step } from '../parser/schema';
6
- import { Redactor } from '../utils/redactor';
6
+ import { extractJson } from '../utils/json-parser';
7
+ import { RedactionBuffer, Redactor } from '../utils/redactor';
7
8
  import { type LLMMessage, getAdapter } from './llm-adapter';
8
9
  import { MCPClient } from './mcp-client';
9
10
  import type { MCPManager, MCPServerConfig } from './mcp-manager';
@@ -121,50 +122,54 @@ export async function executeLlmStep(
121
122
  }
122
123
 
123
124
  if (mcpServersToConnect.length > 0) {
124
- for (const server of mcpServersToConnect) {
125
- let client: MCPClient | undefined;
126
-
127
- if (mcpManager) {
128
- client = await mcpManager.getClient(server as string | MCPServerConfig, logger);
129
- } else {
130
- // Fallback if no manager (should not happen in normal workflow run)
131
- if (typeof server === 'string') {
132
- logger.error(` ✗ Cannot reference global MCP server '${server}' without MCPManager`);
133
- continue;
134
- }
135
- logger.log(` 🔌 Connecting to MCP server: ${server.name}`);
125
+ await Promise.all(
126
+ mcpServersToConnect.map(async (server) => {
127
+ let client: MCPClient | undefined;
128
+ const serverName = typeof server === 'string' ? server : server.name;
129
+
136
130
  try {
137
- client = await MCPClient.createLocal(
138
- (server as MCPServerConfig).command || 'node',
139
- (server as MCPServerConfig).args || [],
140
- (server as MCPServerConfig).env || {}
141
- );
142
- await client.initialize();
143
- localMcpClients.push(client);
131
+ if (mcpManager) {
132
+ client = await mcpManager.getClient(server as string | MCPServerConfig, logger);
133
+ } else {
134
+ // Fallback if no manager (should not happen in normal workflow run)
135
+ if (typeof server === 'string') {
136
+ logger.error(
137
+ ` ✗ Cannot reference global MCP server '${server}' without MCPManager`
138
+ );
139
+ return;
140
+ }
141
+ logger.log(` 🔌 Connecting to MCP server: ${server.name}`);
142
+ client = await MCPClient.createLocal(
143
+ (server as MCPServerConfig).command || 'node',
144
+ (server as MCPServerConfig).args || [],
145
+ (server as MCPServerConfig).env || {}
146
+ );
147
+ await client.initialize();
148
+ localMcpClients.push(client);
149
+ }
150
+
151
+ if (client) {
152
+ const mcpTools = await client.listTools();
153
+ for (const tool of mcpTools) {
154
+ allTools.push({
155
+ name: tool.name,
156
+ description: tool.description,
157
+ parameters: tool.inputSchema,
158
+ source: 'mcp',
159
+ mcpClient: client,
160
+ });
161
+ }
162
+ }
144
163
  } catch (error) {
145
164
  logger.error(
146
- ` ✗ Failed to connect to MCP server ${server.name}: ${error instanceof Error ? error.message : String(error)}`
165
+ ` ✗ Failed to list tools from MCP server ${serverName}: ${error instanceof Error ? error.message : String(error)}`
147
166
  );
148
- if (client) {
167
+ if (!mcpManager && client) {
149
168
  client.stop();
150
169
  }
151
- client = undefined;
152
170
  }
153
- }
154
-
155
- if (client) {
156
- const mcpTools = await client.listTools();
157
- for (const tool of mcpTools) {
158
- allTools.push({
159
- name: tool.name,
160
- description: tool.description,
161
- parameters: tool.inputSchema,
162
- source: 'mcp',
163
- mcpClient: client,
164
- });
165
- }
166
- }
167
- }
171
+ })
172
+ );
168
173
  }
169
174
 
170
175
  const llmTools = allTools.map((t) => ({
@@ -206,21 +211,27 @@ export async function executeLlmStep(
206
211
  total_tokens: 0,
207
212
  };
208
213
 
214
+ // Create redactor once outside the loop for performance (regex compilation)
215
+ const redactor = new Redactor(context.secrets || {});
216
+ const redactionBuffer = new RedactionBuffer(redactor);
217
+
209
218
  while (iterations < maxIterations) {
210
219
  iterations++;
211
220
 
212
- const redactor = new Redactor(context.secrets || {});
213
-
214
221
  const response = await adapter.chat(messages, {
215
222
  model: resolvedModel,
216
223
  tools: llmTools.length > 0 ? llmTools : undefined,
217
224
  onStream: (chunk) => {
218
225
  if (!step.schema) {
219
- process.stdout.write(redactor.redact(chunk));
226
+ process.stdout.write(redactionBuffer.process(chunk));
220
227
  }
221
228
  },
222
229
  });
223
230
 
231
+ if (!step.schema) {
232
+ process.stdout.write(redactionBuffer.flush());
233
+ }
234
+
224
235
  if (response.usage) {
225
236
  totalUsage.prompt_tokens += response.usage.prompt_tokens;
226
237
  totalUsage.completion_tokens += response.usage.completion_tokens;
@@ -236,7 +247,6 @@ export async function executeLlmStep(
236
247
  // If schema is defined, attempt to parse JSON
237
248
  if (step.schema && typeof output === 'string') {
238
249
  try {
239
- const { extractJson } = await import('../utils/json-parser');
240
250
  output = extractJson(output) as typeof output;
241
251
  } catch (e) {
242
252
  throw new Error(
@@ -259,7 +269,18 @@ export async function executeLlmStep(
259
269
 
260
270
  if (!toolInfo) {
261
271
  if (toolCall.function.name === 'ask' && step.allowClarification) {
262
- const args = JSON.parse(toolCall.function.arguments) as { question: string };
272
+ let args: { question: string };
273
+ try {
274
+ args = JSON.parse(toolCall.function.arguments);
275
+ } catch (e) {
276
+ messages.push({
277
+ role: 'tool',
278
+ tool_call_id: toolCall.id,
279
+ name: 'ask',
280
+ content: `Error: Invalid JSON in arguments: ${e instanceof Error ? e.message : String(e)}`,
281
+ });
282
+ continue;
283
+ }
263
284
 
264
285
  if (process.stdin.isTTY) {
265
286
  // In TTY, we can use a human step to get the answer immediately
@@ -302,7 +323,18 @@ export async function executeLlmStep(
302
323
  continue;
303
324
  }
304
325
 
305
- const args = JSON.parse(toolCall.function.arguments);
326
+ let args: Record<string, unknown>;
327
+ try {
328
+ args = JSON.parse(toolCall.function.arguments);
329
+ } catch (e) {
330
+ messages.push({
331
+ role: 'tool',
332
+ tool_call_id: toolCall.id,
333
+ name: toolCall.function.name,
334
+ content: `Error: Invalid JSON in arguments: ${e instanceof Error ? e.message : String(e)}`,
335
+ });
336
+ continue;
337
+ }
306
338
 
307
339
  if (toolInfo.source === 'mcp' && toolInfo.mcpClient) {
308
340
  try {
@@ -0,0 +1,79 @@
1
+ import { afterEach, beforeEach, describe, expect, it, mock, spyOn } from 'bun:test';
2
+ import * as child_process from 'node:child_process';
3
+ import { MCPClient } from './mcp-client';
4
+
5
+ import { Readable, Writable } from 'node:stream';
6
+
7
+ describe('MCPClient Audit Fixes', () => {
8
+ let spawnSpy: ReturnType<typeof spyOn>;
9
+
10
+ beforeEach(() => {
11
+ spawnSpy = spyOn(child_process, 'spawn').mockReturnValue({
12
+ stdout: new Readable({ read() {} }),
13
+ stdin: new Writable({
14
+ write(c, e, cb) {
15
+ cb();
16
+ },
17
+ }),
18
+ kill: () => {},
19
+ on: () => {},
20
+ // biome-ignore lint/suspicious/noExplicitAny: Mocking complex object
21
+ } as any);
22
+ });
23
+
24
+ afterEach(() => {
25
+ spawnSpy.mockRestore();
26
+ });
27
+
28
+ it('should filter sensitive environment variables', async () => {
29
+ // Set temp environment variables
30
+ process.env.TEST_API_KEY_LEAK = 'secret_value';
31
+ process.env.TEST_SAFE_VAR = 'safe_value';
32
+ process.env.TEST_TOKEN_XYZ = 'secret_token';
33
+
34
+ try {
35
+ await MCPClient.createLocal('node', [], { EXPLICIT_SECRET: 'allowed' });
36
+
37
+ // Assert spawn arguments
38
+ // args: [0]=command, [1]=args, [2]=options
39
+ const call = spawnSpy.mock.lastCall;
40
+ if (!call) throw new Error('spawn not called');
41
+
42
+ const envArg = call[2].env;
43
+
44
+ // Safe vars should remain
45
+ expect(envArg.TEST_SAFE_VAR).toBe('safe_value');
46
+
47
+ // Explicitly passed vars should remain
48
+ expect(envArg.EXPLICIT_SECRET).toBe('allowed');
49
+
50
+ // Sensitive vars should be filtered
51
+ expect(envArg.TEST_API_KEY_LEAK).toBeUndefined();
52
+ expect(envArg.TEST_TOKEN_XYZ).toBeUndefined();
53
+ } finally {
54
+ // Cleanup
55
+ process.env.TEST_API_KEY_LEAK = undefined;
56
+ process.env.TEST_SAFE_VAR = undefined;
57
+ process.env.TEST_TOKEN_XYZ = undefined;
58
+ }
59
+ });
60
+
61
+ it('should allow whitelisted sensitive vars if explicitly provided', async () => {
62
+ process.env.TEST_API_KEY_LEAK = 'secret_value';
63
+
64
+ try {
65
+ // User explicitly asks to pass this env var
66
+ await MCPClient.createLocal('node', [], {
67
+ TEST_API_KEY_LEAK: process.env.TEST_API_KEY_LEAK as string,
68
+ });
69
+
70
+ const call = spawnSpy.mock.lastCall;
71
+ if (!call) throw new Error('spawn not called');
72
+ const envArg = call[2].env;
73
+
74
+ expect(envArg.TEST_API_KEY_LEAK).toBe('secret_value');
75
+ } finally {
76
+ process.env.TEST_API_KEY_LEAK = undefined;
77
+ }
78
+ });
79
+ });