keystone-cli 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +55 -8
  2. package/package.json +5 -3
  3. package/src/cli.ts +33 -192
  4. package/src/db/memory-db.test.ts +54 -0
  5. package/src/db/memory-db.ts +122 -0
  6. package/src/db/sqlite-setup.ts +49 -0
  7. package/src/db/workflow-db.test.ts +41 -10
  8. package/src/db/workflow-db.ts +84 -28
  9. package/src/expression/evaluator.test.ts +19 -0
  10. package/src/expression/evaluator.ts +134 -39
  11. package/src/parser/schema.ts +41 -0
  12. package/src/runner/audit-verification.test.ts +23 -0
  13. package/src/runner/auto-heal.test.ts +64 -0
  14. package/src/runner/debug-repl.test.ts +74 -0
  15. package/src/runner/debug-repl.ts +225 -0
  16. package/src/runner/foreach-executor.ts +327 -0
  17. package/src/runner/llm-adapter.test.ts +27 -14
  18. package/src/runner/llm-adapter.ts +90 -112
  19. package/src/runner/llm-executor.test.ts +91 -6
  20. package/src/runner/llm-executor.ts +26 -6
  21. package/src/runner/mcp-client.audit.test.ts +69 -0
  22. package/src/runner/mcp-client.test.ts +12 -3
  23. package/src/runner/mcp-client.ts +199 -19
  24. package/src/runner/mcp-manager.ts +19 -8
  25. package/src/runner/mcp-server.test.ts +8 -5
  26. package/src/runner/mcp-server.ts +31 -17
  27. package/src/runner/optimization-runner.ts +305 -0
  28. package/src/runner/reflexion.test.ts +87 -0
  29. package/src/runner/shell-executor.test.ts +12 -0
  30. package/src/runner/shell-executor.ts +9 -6
  31. package/src/runner/step-executor.test.ts +46 -1
  32. package/src/runner/step-executor.ts +154 -60
  33. package/src/runner/stream-utils.test.ts +65 -0
  34. package/src/runner/stream-utils.ts +186 -0
  35. package/src/runner/workflow-runner.test.ts +4 -4
  36. package/src/runner/workflow-runner.ts +436 -251
  37. package/src/templates/agents/keystone-architect.md +6 -4
  38. package/src/templates/full-feature-demo.yaml +4 -4
  39. package/src/types/assets.d.ts +14 -0
  40. package/src/types/status.ts +1 -1
  41. package/src/ui/dashboard.tsx +38 -26
  42. package/src/utils/auth-manager.ts +3 -1
  43. package/src/utils/logger.test.ts +76 -0
  44. package/src/utils/logger.ts +39 -0
  45. package/src/utils/prompt.ts +75 -0
  46. package/src/utils/redactor.test.ts +86 -4
  47. package/src/utils/redactor.ts +48 -13
@@ -272,33 +272,41 @@ describe('CopilotAdapter', () => {
272
272
 
273
273
  describe('getAdapter', () => {
274
274
  beforeEach(() => {
275
- spyOn(ConfigLoader, 'getProviderForModel').mockImplementation((model: string) => {
276
- if (model.startsWith('claude')) return 'anthropic';
277
- if (model.startsWith('gpt')) return 'openai';
278
- if (model.startsWith('copilot')) return 'copilot';
279
- return 'openai';
280
- });
281
- // @ts-ignore
282
- spyOn(ConfigLoader, 'load').mockReturnValue({
275
+ // Setup a clean config for each test
276
+ ConfigLoader.setConfig({
277
+ default_provider: 'openai',
283
278
  providers: {
284
279
  openai: { type: 'openai', api_key_env: 'OPENAI_API_KEY' },
285
280
  anthropic: { type: 'anthropic', api_key_env: 'ANTHROPIC_API_KEY' },
286
281
  copilot: { type: 'copilot' },
287
282
  },
283
+ model_mappings: {
284
+ 'claude-*': 'anthropic',
285
+ 'gpt-*': 'openai',
286
+ 'copilot:*': 'copilot',
287
+ },
288
+ storage: { retention_days: 30 },
289
+ workflows_directory: 'workflows',
290
+ mcp_servers: {},
288
291
  });
289
292
  });
290
293
 
291
294
  afterEach(() => {
292
- mock.restore();
295
+ ConfigLoader.clear();
293
296
  });
294
297
 
295
298
  it('should return OpenAIAdapter for gpt models', () => {
299
+ // ConfigLoader.getProviderForModel logic will handle this
296
300
  const { adapter, resolvedModel } = getAdapter('gpt-4');
297
301
  expect(adapter).toBeInstanceOf(OpenAIAdapter);
298
302
  expect(resolvedModel).toBe('gpt-4');
299
303
  });
300
304
 
301
305
  it('should return AnthropicAdapter for claude models', () => {
306
+ // Explicit mapping in our mock config above covers this if ConfigLoader logic works
307
+ // Or we rely on model name prefix if ConfigLoader has that default logic
308
+ // Let's ensure the mapping exists if we removed the spy
309
+ // ConfigLoader.getProviderForModel uses: explicit mapping OR default provider
302
310
  const { adapter, resolvedModel } = getAdapter('claude-3');
303
311
  expect(adapter).toBeInstanceOf(AnthropicAdapter);
304
312
  expect(resolvedModel).toBe('claude-3');
@@ -311,11 +319,16 @@ describe('getAdapter', () => {
311
319
  });
312
320
 
313
321
  it('should throw error for unknown provider', () => {
314
- // @ts-ignore
315
- ConfigLoader.getProviderForModel.mockReturnValue('unknown');
316
- // @ts-ignore
317
- ConfigLoader.load.mockReturnValue({ providers: {} });
322
+ // Set config with empty providers to force error
323
+ ConfigLoader.setConfig({
324
+ default_provider: 'unknown',
325
+ providers: {}, // No providers configured
326
+ model_mappings: {},
327
+ storage: { retention_days: 30 },
328
+ workflows_directory: 'workflows',
329
+ mcp_servers: {},
330
+ });
318
331
 
319
- expect(() => getAdapter('unknown-model')).toThrow(/Provider configuration not found/);
332
+ expect(() => getAdapter('unknown-model')).toThrow();
320
333
  });
321
334
  });
@@ -1,5 +1,7 @@
1
+ import { pipeline } from '@xenova/transformers';
1
2
  import { AuthManager, COPILOT_HEADERS } from '../utils/auth-manager';
2
3
  import { ConfigLoader } from '../utils/config-loader';
4
+ import { processOpenAIStream } from './stream-utils';
3
5
 
4
6
  // Maximum response size to prevent memory exhaustion (1MB)
5
7
  const MAX_RESPONSE_SIZE = 1024 * 1024;
@@ -48,6 +50,7 @@ export interface LLMAdapter {
48
50
  onStream?: (chunk: string) => void;
49
51
  }
50
52
  ): Promise<LLMResponse>;
53
+ embed?(text: string, model?: string): Promise<number[]>;
51
54
  }
52
55
 
53
56
  export class OpenAIAdapter implements LLMAdapter {
@@ -94,72 +97,51 @@ export class OpenAIAdapter implements LLMAdapter {
94
97
 
95
98
  if (isStreaming) {
96
99
  if (!response.body) throw new Error('Response body is null');
97
- const reader = response.body.getReader();
98
- const decoder = new TextDecoder();
99
- let fullContent = '';
100
- const toolCalls: LLMToolCall[] = [];
101
-
102
- while (true) {
103
- const { done, value } = await reader.read();
104
- if (done) break;
105
-
106
- const chunk = decoder.decode(value);
107
- const lines = chunk.split('\n').filter((line) => line.trim() !== '');
108
-
109
- for (const line of lines) {
110
- if (line.includes('[DONE]')) continue;
111
- if (!line.startsWith('data: ')) continue;
112
-
113
- try {
114
- const data = JSON.parse(line.slice(6));
115
- const delta = data.choices[0].delta;
116
-
117
- if (delta.content) {
118
- if (fullContent.length + delta.content.length > MAX_RESPONSE_SIZE) {
119
- throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
120
- }
121
- fullContent += delta.content;
122
- options.onStream?.(delta.content);
123
- }
124
-
125
- if (delta.tool_calls) {
126
- for (const tc of delta.tool_calls) {
127
- if (!toolCalls[tc.index]) {
128
- toolCalls[tc.index] = {
129
- id: tc.id,
130
- type: 'function',
131
- function: { name: '', arguments: '' },
132
- };
133
- }
134
- const existing = toolCalls[tc.index];
135
- if (tc.function?.name) existing.function.name += tc.function.name;
136
- if (tc.function?.arguments) existing.function.arguments += tc.function.arguments;
137
- }
138
- }
139
- } catch (e) {
140
- // Ignore parse errors for incomplete chunks
141
- }
142
- }
143
- }
144
-
145
- return {
146
- message: {
147
- role: 'assistant',
148
- content: fullContent || null,
149
- tool_calls: toolCalls.length > 0 ? toolCalls.filter(Boolean) : undefined,
150
- },
151
- };
100
+ return processOpenAIStream(response, options, 'OpenAI');
152
101
  }
153
102
 
154
103
  const data = (await response.json()) as {
155
104
  choices: { message: LLMMessage }[];
156
105
  usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number };
157
106
  };
107
+
108
+ // Validate response size to prevent memory exhaustion
109
+ const contentLength = data.choices[0]?.message?.content?.length ?? 0;
110
+ if (contentLength > MAX_RESPONSE_SIZE) {
111
+ throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
112
+ }
113
+
158
114
  return {
159
115
  message: data.choices[0].message,
160
116
  usage: data.usage,
161
117
  };
162
118
  }
119
+
120
+ async embed(text: string, model = 'text-embedding-3-small'): Promise<number[]> {
121
+ const response = await fetch(`${this.baseUrl}/embeddings`, {
122
+ method: 'POST',
123
+ headers: {
124
+ 'Content-Type': 'application/json',
125
+ Authorization: `Bearer ${this.apiKey}`,
126
+ },
127
+ body: JSON.stringify({
128
+ model,
129
+ input: text,
130
+ }),
131
+ });
132
+
133
+ if (!response.ok) {
134
+ const error = await response.text();
135
+ throw new Error(
136
+ `OpenAI Embeddings API error: ${response.status} ${response.statusText} - ${error}`
137
+ );
138
+ }
139
+
140
+ const data = (await response.json()) as {
141
+ data: { embedding: number[] }[];
142
+ };
143
+ return data.data[0].embedding;
144
+ }
163
145
  }
164
146
 
165
147
  export class AnthropicAdapter implements LLMAdapter {
@@ -348,7 +330,15 @@ export class AnthropicAdapter implements LLMAdapter {
348
330
  }
349
331
  }
350
332
  } catch (e) {
351
- // Ignore parse errors
333
+ // Log non-SyntaxError exceptions at warning level (they indicate real issues)
334
+ if (!(e instanceof SyntaxError)) {
335
+ console.warn(`[Anthropic Stream] Error processing chunk: ${e}`);
336
+ } else if (process.env.DEBUG || process.env.LLM_DEBUG) {
337
+ // SyntaxErrors are normal for incomplete chunks - only log in debug mode
338
+ process.stderr.write(
339
+ `[Anthropic Stream] Incomplete chunk parse: ${line.slice(0, 50)}...\n`
340
+ );
341
+ }
352
342
  }
353
343
  }
354
344
  }
@@ -383,6 +373,12 @@ export class AnthropicAdapter implements LLMAdapter {
383
373
  };
384
374
 
385
375
  const content = data.content.find((c) => c.type === 'text')?.text || null;
376
+
377
+ // Validate response size to prevent memory exhaustion
378
+ if (content && content.length > MAX_RESPONSE_SIZE) {
379
+ throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
380
+ }
381
+
386
382
  const toolCalls = data.content
387
383
  .filter((c) => c.type === 'tool_use')
388
384
  .map((c) => ({
@@ -455,68 +451,20 @@ export class CopilotAdapter implements LLMAdapter {
455
451
  if (isStreaming) {
456
452
  // Use the same streaming logic as OpenAIAdapter since Copilot uses OpenAI API
457
453
  if (!response.body) throw new Error('Response body is null');
458
- const reader = response.body.getReader();
459
- const decoder = new TextDecoder();
460
- let fullContent = '';
461
- const toolCalls: LLMToolCall[] = [];
462
-
463
- while (true) {
464
- const { done, value } = await reader.read();
465
- if (done) break;
466
-
467
- const chunk = decoder.decode(value);
468
- const lines = chunk.split('\n').filter((line) => line.trim() !== '');
469
-
470
- for (const line of lines) {
471
- if (line.includes('[DONE]')) continue;
472
- if (!line.startsWith('data: ')) continue;
473
-
474
- try {
475
- const data = JSON.parse(line.slice(6));
476
- if (!data.choices?.[0]?.delta) continue;
477
- const delta = data.choices[0].delta;
478
-
479
- if (delta.content) {
480
- if (fullContent.length + delta.content.length > MAX_RESPONSE_SIZE) {
481
- throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
482
- }
483
- fullContent += delta.content;
484
- options.onStream?.(delta.content);
485
- }
486
-
487
- if (delta.tool_calls) {
488
- for (const tc of delta.tool_calls) {
489
- if (!toolCalls[tc.index]) {
490
- toolCalls[tc.index] = {
491
- id: tc.id,
492
- type: 'function',
493
- function: { name: '', arguments: '' },
494
- };
495
- }
496
- const existing = toolCalls[tc.index];
497
- if (tc.function?.name) existing.function.name += tc.function.name;
498
- if (tc.function?.arguments) existing.function.arguments += tc.function.arguments;
499
- }
500
- }
501
- } catch (e) {
502
- // Ignore parse errors
503
- }
504
- }
505
- }
506
-
507
- return {
508
- message: {
509
- role: 'assistant',
510
- content: fullContent || null,
511
- tool_calls: toolCalls.length > 0 ? toolCalls.filter(Boolean) : undefined,
512
- },
513
- };
454
+ return processOpenAIStream(response, options, 'Copilot');
514
455
  }
515
456
 
516
457
  const data = (await response.json()) as {
517
458
  choices: { message: LLMMessage }[];
518
459
  usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number };
519
460
  };
461
+
462
+ // Validate response size to prevent memory exhaustion
463
+ const contentLength = data.choices[0]?.message?.content?.length ?? 0;
464
+ if (contentLength > MAX_RESPONSE_SIZE) {
465
+ throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
466
+ }
467
+
520
468
  return {
521
469
  message: data.choices[0].message,
522
470
  usage: data.usage,
@@ -524,7 +472,37 @@ export class CopilotAdapter implements LLMAdapter {
524
472
  }
525
473
  }
526
474
 
475
+ export class LocalEmbeddingAdapter implements LLMAdapter {
476
+ // biome-ignore lint/suspicious/noExplicitAny: transformers pipeline type
477
+ private static extractor: any = null;
478
+
479
+ async chat(): Promise<LLMResponse> {
480
+ throw new Error(
481
+ 'Local models in Keystone currently only support memory/embedding operations. ' +
482
+ 'To use a local LLM for chat/generation, please use an OpenAI-compatible local server ' +
483
+ '(like Ollama, LM Studio, or LocalAI) and configure it as an OpenAI provider in your config.'
484
+ );
485
+ }
486
+
487
+ async embed(text: string, model = 'Xenova/all-MiniLM-L6-v2'): Promise<number[]> {
488
+ const modelToUse = model === 'local' ? 'Xenova/all-MiniLM-L6-v2' : model;
489
+ if (!LocalEmbeddingAdapter.extractor) {
490
+ LocalEmbeddingAdapter.extractor = await pipeline('feature-extraction', modelToUse);
491
+ }
492
+ const output = await LocalEmbeddingAdapter.extractor(text, {
493
+ pooling: 'mean',
494
+ normalize: true,
495
+ });
496
+ return Array.from(output.data);
497
+ }
498
+ }
499
+
527
500
  export function getAdapter(model: string): { adapter: LLMAdapter; resolvedModel: string } {
501
+ if (model === 'local' || model.startsWith('local:')) {
502
+ const resolvedModel = model === 'local' ? 'Xenova/all-MiniLM-L6-v2' : model.substring(6);
503
+ return { adapter: new LocalEmbeddingAdapter(), resolvedModel };
504
+ }
505
+
528
506
  const providerName = ConfigLoader.getProviderForModel(model);
529
507
  const config = ConfigLoader.load();
530
508
  const providerConfig = config.providers[providerName];
@@ -28,7 +28,8 @@ import {
28
28
  import { executeLlmStep } from './llm-executor';
29
29
  import { MCPClient, type MCPResponse } from './mcp-client';
30
30
  import { MCPManager } from './mcp-manager';
31
- import type { StepResult } from './step-executor';
31
+ import { type StepResult, executeStep } from './step-executor';
32
+ import type { Logger } from './workflow-runner';
32
33
 
33
34
  // Mock adapters
34
35
  const originalOpenAIChat = OpenAIAdapter.prototype.chat;
@@ -129,10 +130,12 @@ describe('llm-executor', () => {
129
130
  };
130
131
  };
131
132
 
132
- beforeAll(() => {
133
+ beforeAll(async () => {
133
134
  // Mock spawn to avoid actual process creation
134
135
  const mockProcess = Object.assign(new EventEmitter(), {
135
- stdout: new Readable({ read() {} }),
136
+ stdout: new Readable({
137
+ read() {},
138
+ }),
136
139
  stdin: new Writable({
137
140
  write(_chunk, _encoding, cb: (error?: Error | null) => void) {
138
141
  cb();
@@ -239,6 +242,44 @@ You are a test agent.`;
239
242
  expect(result.output).toBe('LLM Response');
240
243
  });
241
244
 
245
+ it('should log tool call arguments', async () => {
246
+ const step: LlmStep = {
247
+ id: 'l1',
248
+ type: 'llm',
249
+ agent: 'test-agent',
250
+ prompt: 'trigger tool',
251
+ needs: [],
252
+ maxIterations: 10,
253
+ };
254
+ const context: ExpressionContext = { inputs: {}, steps: {} };
255
+
256
+ const executeStepFn = async (s: Step) => {
257
+ if (s.type === 'shell') {
258
+ return { status: 'success' as const, output: { stdout: 'tool result' } };
259
+ }
260
+ return { status: 'success' as const, output: 'ok' };
261
+ };
262
+
263
+ const logger: Logger = {
264
+ log: mock(() => {}),
265
+ error: mock(() => {}),
266
+ warn: mock(() => {}),
267
+ };
268
+
269
+ await executeLlmStep(
270
+ step,
271
+ context,
272
+ executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
273
+ logger
274
+ );
275
+
276
+ // Check if logger.log was called with arguments
277
+ // The tool call from mockChat is { name: 'test-tool', arguments: '{"val": 123}' }
278
+ expect(logger.log).toHaveBeenCalledWith(
279
+ expect.stringContaining('🛠️ Tool Call: test-tool {"val":123}')
280
+ );
281
+ });
282
+
242
283
  it('should support schema for JSON output', async () => {
243
284
  const step: LlmStep = {
244
285
  id: 'l1',
@@ -266,7 +307,7 @@ You are a test agent.`;
266
307
  expect(result.output).toEqual({ foo: 'bar' });
267
308
  });
268
309
 
269
- it('should throw error if JSON parsing fails for schema', async () => {
310
+ it('should retry if LLM output fails schema validation', async () => {
270
311
  const step: LlmStep = {
271
312
  id: 'l1',
272
313
  type: 'llm',
@@ -279,7 +320,51 @@ You are a test agent.`;
279
320
  const context: ExpressionContext = { inputs: {}, steps: {} };
280
321
  const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
281
322
 
282
- // Mock response with invalid JSON
323
+ const originalOpenAIChatInner = OpenAIAdapter.prototype.chat;
324
+ const originalCopilotChatInner = CopilotAdapter.prototype.chat;
325
+ const originalAnthropicChatInner = AnthropicAdapter.prototype.chat;
326
+
327
+ let attempt = 0;
328
+ const mockChat = mock(async () => {
329
+ attempt++;
330
+ if (attempt === 1) {
331
+ return { message: { role: 'assistant', content: 'Not JSON' } };
332
+ }
333
+ return { message: { role: 'assistant', content: '{"success": true}' } };
334
+ }) as unknown as typeof originalOpenAIChat;
335
+
336
+ OpenAIAdapter.prototype.chat = mockChat;
337
+ CopilotAdapter.prototype.chat = mockChat;
338
+ AnthropicAdapter.prototype.chat = mockChat;
339
+
340
+ const result = await executeLlmStep(
341
+ step,
342
+ context,
343
+ executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>
344
+ );
345
+
346
+ expect(result.status).toBe('success');
347
+ expect(result.output).toEqual({ success: true });
348
+ expect(attempt).toBe(2);
349
+
350
+ OpenAIAdapter.prototype.chat = originalOpenAIChatInner;
351
+ CopilotAdapter.prototype.chat = originalCopilotChatInner;
352
+ AnthropicAdapter.prototype.chat = originalAnthropicChatInner;
353
+ });
354
+
355
+ it('should fail after max iterations if JSON remains invalid', async () => {
356
+ const step: LlmStep = {
357
+ id: 'l1',
358
+ type: 'llm',
359
+ agent: 'test-agent',
360
+ prompt: 'give me invalid json',
361
+ needs: [],
362
+ maxIterations: 3,
363
+ schema: { type: 'object' },
364
+ };
365
+ const context: ExpressionContext = { inputs: {}, steps: {} };
366
+ const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
367
+
283
368
  const originalOpenAIChatInner = OpenAIAdapter.prototype.chat;
284
369
  const originalCopilotChatInner = CopilotAdapter.prototype.chat;
285
370
  const originalAnthropicChatInner = AnthropicAdapter.prototype.chat;
@@ -298,7 +383,7 @@ You are a test agent.`;
298
383
  context,
299
384
  executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>
300
385
  )
301
- ).rejects.toThrow(/Failed to parse LLM output as JSON/);
386
+ ).rejects.toThrow('Max ReAct iterations reached');
302
387
 
303
388
  OpenAIAdapter.prototype.chat = originalOpenAIChatInner;
304
389
  CopilotAdapter.prototype.chat = originalCopilotChatInner;
@@ -4,12 +4,12 @@ import { ExpressionEvaluator } from '../expression/evaluator';
4
4
  import { parseAgent, resolveAgentPath } from '../parser/agent-parser';
5
5
  import type { AgentTool, LlmStep, Step } from '../parser/schema';
6
6
  import { extractJson } from '../utils/json-parser';
7
+ import { ConsoleLogger, type Logger } from '../utils/logger.ts';
7
8
  import { RedactionBuffer, Redactor } from '../utils/redactor';
8
9
  import { type LLMMessage, getAdapter } from './llm-adapter';
9
10
  import { MCPClient } from './mcp-client';
10
11
  import type { MCPManager, MCPServerConfig } from './mcp-manager';
11
12
  import type { StepResult } from './step-executor';
12
- import type { Logger } from './workflow-runner';
13
13
 
14
14
  interface ToolDefinition {
15
15
  name: string;
@@ -24,7 +24,7 @@ export async function executeLlmStep(
24
24
  step: LlmStep,
25
25
  context: ExpressionContext,
26
26
  executeStepFn: (step: Step, context: ExpressionContext) => Promise<StepResult>,
27
- logger: Logger = console,
27
+ logger: Logger = new ConsoleLogger(),
28
28
  mcpManager?: MCPManager,
29
29
  workflowDir?: string
30
30
  ): Promise<StepResult> {
@@ -249,9 +249,14 @@ export async function executeLlmStep(
249
249
  try {
250
250
  output = extractJson(output) as typeof output;
251
251
  } catch (e) {
252
- throw new Error(
253
- `Failed to parse LLM output as JSON matching schema: ${e instanceof Error ? e.message : String(e)}\nOutput: ${output}`
254
- );
252
+ const errorMessage = `Failed to parse LLM output as JSON matching schema: ${e instanceof Error ? e.message : String(e)}`;
253
+ logger.error(` ⚠️ ${errorMessage}. Retrying...`);
254
+
255
+ messages.push({
256
+ role: 'user',
257
+ content: `Error: ${errorMessage}\n\nPlease correct your output to be valid JSON matching the schema.`,
258
+ });
259
+ continue;
255
260
  }
256
261
  }
257
262
 
@@ -264,7 +269,22 @@ export async function executeLlmStep(
264
269
 
265
270
  // Execute tools
266
271
  for (const toolCall of message.tool_calls) {
267
- logger.log(` 🛠️ Tool Call: ${toolCall.function.name}`);
272
+ const argsStr = toolCall.function.arguments;
273
+ let displayArgs = '';
274
+ try {
275
+ const parsedArgs = JSON.parse(argsStr);
276
+ const keys = Object.keys(parsedArgs);
277
+ if (keys.length > 0) {
278
+ const formatted = JSON.stringify(parsedArgs);
279
+ displayArgs = formatted.length > 100 ? `${formatted.substring(0, 100)}...` : formatted;
280
+ }
281
+ } catch (e) {
282
+ displayArgs = argsStr.length > 100 ? `${argsStr.substring(0, 100)}...` : argsStr;
283
+ }
284
+
285
+ logger.log(
286
+ ` 🛠️ Tool Call: ${toolCall.function.name}${displayArgs ? ` ${displayArgs}` : ''}`
287
+ );
268
288
  const toolInfo = allTools.find((t) => t.name === toolCall.function.name);
269
289
 
270
290
  if (!toolInfo) {
@@ -77,3 +77,72 @@ describe('MCPClient Audit Fixes', () => {
77
77
  }
78
78
  });
79
79
  });
80
+
81
+ describe('MCPClient SSRF Protection', () => {
82
+ it('should reject localhost URLs without allowInsecure', async () => {
83
+ // HTTP localhost is rejected for not using HTTPS
84
+ await expect(MCPClient.createRemote('http://localhost:8080/sse')).rejects.toThrow(
85
+ /SSRF Protection.*HTTPS/
86
+ );
87
+ // HTTPS localhost is rejected for being localhost
88
+ await expect(MCPClient.createRemote('https://localhost:8080/sse')).rejects.toThrow(
89
+ /SSRF Protection.*localhost/
90
+ );
91
+ });
92
+
93
+ it('should reject 127.0.0.1', async () => {
94
+ await expect(MCPClient.createRemote('https://127.0.0.1:8080/sse')).rejects.toThrow(
95
+ /SSRF Protection.*localhost/
96
+ );
97
+ });
98
+
99
+ it('should reject private IP ranges (10.x.x.x)', async () => {
100
+ await expect(MCPClient.createRemote('https://10.0.0.1:8080/sse')).rejects.toThrow(
101
+ /SSRF Protection.*private/
102
+ );
103
+ });
104
+
105
+ it('should reject private IP ranges (192.168.x.x)', async () => {
106
+ await expect(MCPClient.createRemote('https://192.168.1.1:8080/sse')).rejects.toThrow(
107
+ /SSRF Protection.*private/
108
+ );
109
+ });
110
+
111
+ it('should reject private IP ranges (172.16-31.x.x)', async () => {
112
+ await expect(MCPClient.createRemote('https://172.16.0.1:8080/sse')).rejects.toThrow(
113
+ /SSRF Protection.*private/
114
+ );
115
+ await expect(MCPClient.createRemote('https://172.31.255.1:8080/sse')).rejects.toThrow(
116
+ /SSRF Protection.*private/
117
+ );
118
+ });
119
+
120
+ it('should reject cloud metadata endpoints', async () => {
121
+ // 169.254.169.254 is caught by link-local IP range check
122
+ await expect(
123
+ MCPClient.createRemote('https://169.254.169.254/latest/meta-data/')
124
+ ).rejects.toThrow(/SSRF Protection.*private/);
125
+ // Also test the hostname-based metadata detection
126
+ await expect(MCPClient.createRemote('https://metadata.google.internal/sse')).rejects.toThrow(
127
+ /SSRF Protection.*metadata/
128
+ );
129
+ });
130
+
131
+ it('should require HTTPS by default', async () => {
132
+ await expect(MCPClient.createRemote('http://api.example.com/sse')).rejects.toThrow(
133
+ /SSRF Protection.*HTTPS/
134
+ );
135
+ });
136
+
137
+ it('should allow HTTP with allowInsecure option', async () => {
138
+ // This will fail due to network issues, not SSRF
139
+ const promise = MCPClient.createRemote(
140
+ 'http://api.example.com/sse',
141
+ {},
142
+ 100, // short timeout
143
+ { allowInsecure: true }
144
+ );
145
+ // Should NOT throw SSRF error, but will throw timeout/connection error
146
+ await expect(promise).rejects.not.toThrow(/SSRF Protection/);
147
+ });
148
+ });
@@ -139,7 +139,10 @@ describe('MCPClient', () => {
139
139
  return Promise.resolve(new Response(JSON.stringify({ ok: true })));
140
140
  });
141
141
 
142
- const clientPromise = MCPClient.createRemote('http://localhost:8080/sse');
142
+ // Use allowInsecure for testing with localhost (fetch is mocked anyway)
143
+ const clientPromise = MCPClient.createRemote('http://localhost:8080/sse', {}, 60000, {
144
+ allowInsecure: true,
145
+ });
143
146
 
144
147
  const client = await clientPromise;
145
148
  expect(client).toBeDefined();
@@ -185,7 +188,10 @@ describe('MCPClient', () => {
185
188
  return Promise.resolve(new Response(JSON.stringify({ ok: true })));
186
189
  });
187
190
 
188
- const client = await MCPClient.createRemote('http://localhost:8080/sse');
191
+ // Use allowInsecure for testing with localhost (fetch is mocked anyway)
192
+ const client = await MCPClient.createRemote('http://localhost:8080/sse', {}, 60000, {
193
+ allowInsecure: true,
194
+ });
189
195
 
190
196
  // We can't easily hook into onMessage without reaching into internals
191
197
  // Instead, we'll test that initialize resolves correctly when the response arrives
@@ -228,7 +234,10 @@ describe('MCPClient', () => {
228
234
  )
229
235
  );
230
236
 
231
- const clientPromise = MCPClient.createRemote('http://localhost:8080/sse');
237
+ // Use allowInsecure for testing with localhost (fetch is mocked anyway)
238
+ const clientPromise = MCPClient.createRemote('http://localhost:8080/sse', {}, 60000, {
239
+ allowInsecure: true,
240
+ });
232
241
 
233
242
  await expect(clientPromise).rejects.toThrow(/SSE connection failed: 500/);
234
243