keystone-cli 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -8
- package/package.json +5 -3
- package/src/cli.ts +33 -192
- package/src/db/memory-db.test.ts +54 -0
- package/src/db/memory-db.ts +122 -0
- package/src/db/sqlite-setup.ts +49 -0
- package/src/db/workflow-db.test.ts +41 -10
- package/src/db/workflow-db.ts +84 -28
- package/src/expression/evaluator.test.ts +19 -0
- package/src/expression/evaluator.ts +134 -39
- package/src/parser/schema.ts +41 -0
- package/src/runner/audit-verification.test.ts +23 -0
- package/src/runner/auto-heal.test.ts +64 -0
- package/src/runner/debug-repl.test.ts +74 -0
- package/src/runner/debug-repl.ts +225 -0
- package/src/runner/foreach-executor.ts +327 -0
- package/src/runner/llm-adapter.test.ts +27 -14
- package/src/runner/llm-adapter.ts +90 -112
- package/src/runner/llm-executor.test.ts +91 -6
- package/src/runner/llm-executor.ts +26 -6
- package/src/runner/mcp-client.audit.test.ts +69 -0
- package/src/runner/mcp-client.test.ts +12 -3
- package/src/runner/mcp-client.ts +199 -19
- package/src/runner/mcp-manager.ts +19 -8
- package/src/runner/mcp-server.test.ts +8 -5
- package/src/runner/mcp-server.ts +31 -17
- package/src/runner/optimization-runner.ts +305 -0
- package/src/runner/reflexion.test.ts +87 -0
- package/src/runner/shell-executor.test.ts +12 -0
- package/src/runner/shell-executor.ts +9 -6
- package/src/runner/step-executor.test.ts +46 -1
- package/src/runner/step-executor.ts +154 -60
- package/src/runner/stream-utils.test.ts +65 -0
- package/src/runner/stream-utils.ts +186 -0
- package/src/runner/workflow-runner.test.ts +4 -4
- package/src/runner/workflow-runner.ts +436 -251
- package/src/templates/agents/keystone-architect.md +6 -4
- package/src/templates/full-feature-demo.yaml +4 -4
- package/src/types/assets.d.ts +14 -0
- package/src/types/status.ts +1 -1
- package/src/ui/dashboard.tsx +38 -26
- package/src/utils/auth-manager.ts +3 -1
- package/src/utils/logger.test.ts +76 -0
- package/src/utils/logger.ts +39 -0
- package/src/utils/prompt.ts +75 -0
- package/src/utils/redactor.test.ts +86 -4
- package/src/utils/redactor.ts +48 -13
|
@@ -272,33 +272,41 @@ describe('CopilotAdapter', () => {
|
|
|
272
272
|
|
|
273
273
|
describe('getAdapter', () => {
|
|
274
274
|
beforeEach(() => {
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
if (model.startsWith('copilot')) return 'copilot';
|
|
279
|
-
return 'openai';
|
|
280
|
-
});
|
|
281
|
-
// @ts-ignore
|
|
282
|
-
spyOn(ConfigLoader, 'load').mockReturnValue({
|
|
275
|
+
// Setup a clean config for each test
|
|
276
|
+
ConfigLoader.setConfig({
|
|
277
|
+
default_provider: 'openai',
|
|
283
278
|
providers: {
|
|
284
279
|
openai: { type: 'openai', api_key_env: 'OPENAI_API_KEY' },
|
|
285
280
|
anthropic: { type: 'anthropic', api_key_env: 'ANTHROPIC_API_KEY' },
|
|
286
281
|
copilot: { type: 'copilot' },
|
|
287
282
|
},
|
|
283
|
+
model_mappings: {
|
|
284
|
+
'claude-*': 'anthropic',
|
|
285
|
+
'gpt-*': 'openai',
|
|
286
|
+
'copilot:*': 'copilot',
|
|
287
|
+
},
|
|
288
|
+
storage: { retention_days: 30 },
|
|
289
|
+
workflows_directory: 'workflows',
|
|
290
|
+
mcp_servers: {},
|
|
288
291
|
});
|
|
289
292
|
});
|
|
290
293
|
|
|
291
294
|
afterEach(() => {
|
|
292
|
-
|
|
295
|
+
ConfigLoader.clear();
|
|
293
296
|
});
|
|
294
297
|
|
|
295
298
|
it('should return OpenAIAdapter for gpt models', () => {
|
|
299
|
+
// ConfigLoader.getProviderForModel logic will handle this
|
|
296
300
|
const { adapter, resolvedModel } = getAdapter('gpt-4');
|
|
297
301
|
expect(adapter).toBeInstanceOf(OpenAIAdapter);
|
|
298
302
|
expect(resolvedModel).toBe('gpt-4');
|
|
299
303
|
});
|
|
300
304
|
|
|
301
305
|
it('should return AnthropicAdapter for claude models', () => {
|
|
306
|
+
// Explicit mapping in our mock config above covers this if ConfigLoader logic works
|
|
307
|
+
// Or we rely on model name prefix if ConfigLoader has that default logic
|
|
308
|
+
// Let's ensure the mapping exists if we removed the spy
|
|
309
|
+
// ConfigLoader.getProviderForModel uses: explicit mapping OR default provider
|
|
302
310
|
const { adapter, resolvedModel } = getAdapter('claude-3');
|
|
303
311
|
expect(adapter).toBeInstanceOf(AnthropicAdapter);
|
|
304
312
|
expect(resolvedModel).toBe('claude-3');
|
|
@@ -311,11 +319,16 @@ describe('getAdapter', () => {
|
|
|
311
319
|
});
|
|
312
320
|
|
|
313
321
|
it('should throw error for unknown provider', () => {
|
|
314
|
-
//
|
|
315
|
-
ConfigLoader.
|
|
316
|
-
|
|
317
|
-
|
|
322
|
+
// Set config with empty providers to force error
|
|
323
|
+
ConfigLoader.setConfig({
|
|
324
|
+
default_provider: 'unknown',
|
|
325
|
+
providers: {}, // No providers configured
|
|
326
|
+
model_mappings: {},
|
|
327
|
+
storage: { retention_days: 30 },
|
|
328
|
+
workflows_directory: 'workflows',
|
|
329
|
+
mcp_servers: {},
|
|
330
|
+
});
|
|
318
331
|
|
|
319
|
-
expect(() => getAdapter('unknown-model')).toThrow(
|
|
332
|
+
expect(() => getAdapter('unknown-model')).toThrow();
|
|
320
333
|
});
|
|
321
334
|
});
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
+
import { pipeline } from '@xenova/transformers';
|
|
1
2
|
import { AuthManager, COPILOT_HEADERS } from '../utils/auth-manager';
|
|
2
3
|
import { ConfigLoader } from '../utils/config-loader';
|
|
4
|
+
import { processOpenAIStream } from './stream-utils';
|
|
3
5
|
|
|
4
6
|
// Maximum response size to prevent memory exhaustion (1MB)
|
|
5
7
|
const MAX_RESPONSE_SIZE = 1024 * 1024;
|
|
@@ -48,6 +50,7 @@ export interface LLMAdapter {
|
|
|
48
50
|
onStream?: (chunk: string) => void;
|
|
49
51
|
}
|
|
50
52
|
): Promise<LLMResponse>;
|
|
53
|
+
embed?(text: string, model?: string): Promise<number[]>;
|
|
51
54
|
}
|
|
52
55
|
|
|
53
56
|
export class OpenAIAdapter implements LLMAdapter {
|
|
@@ -94,72 +97,51 @@ export class OpenAIAdapter implements LLMAdapter {
|
|
|
94
97
|
|
|
95
98
|
if (isStreaming) {
|
|
96
99
|
if (!response.body) throw new Error('Response body is null');
|
|
97
|
-
|
|
98
|
-
const decoder = new TextDecoder();
|
|
99
|
-
let fullContent = '';
|
|
100
|
-
const toolCalls: LLMToolCall[] = [];
|
|
101
|
-
|
|
102
|
-
while (true) {
|
|
103
|
-
const { done, value } = await reader.read();
|
|
104
|
-
if (done) break;
|
|
105
|
-
|
|
106
|
-
const chunk = decoder.decode(value);
|
|
107
|
-
const lines = chunk.split('\n').filter((line) => line.trim() !== '');
|
|
108
|
-
|
|
109
|
-
for (const line of lines) {
|
|
110
|
-
if (line.includes('[DONE]')) continue;
|
|
111
|
-
if (!line.startsWith('data: ')) continue;
|
|
112
|
-
|
|
113
|
-
try {
|
|
114
|
-
const data = JSON.parse(line.slice(6));
|
|
115
|
-
const delta = data.choices[0].delta;
|
|
116
|
-
|
|
117
|
-
if (delta.content) {
|
|
118
|
-
if (fullContent.length + delta.content.length > MAX_RESPONSE_SIZE) {
|
|
119
|
-
throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
|
|
120
|
-
}
|
|
121
|
-
fullContent += delta.content;
|
|
122
|
-
options.onStream?.(delta.content);
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
if (delta.tool_calls) {
|
|
126
|
-
for (const tc of delta.tool_calls) {
|
|
127
|
-
if (!toolCalls[tc.index]) {
|
|
128
|
-
toolCalls[tc.index] = {
|
|
129
|
-
id: tc.id,
|
|
130
|
-
type: 'function',
|
|
131
|
-
function: { name: '', arguments: '' },
|
|
132
|
-
};
|
|
133
|
-
}
|
|
134
|
-
const existing = toolCalls[tc.index];
|
|
135
|
-
if (tc.function?.name) existing.function.name += tc.function.name;
|
|
136
|
-
if (tc.function?.arguments) existing.function.arguments += tc.function.arguments;
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
} catch (e) {
|
|
140
|
-
// Ignore parse errors for incomplete chunks
|
|
141
|
-
}
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
return {
|
|
146
|
-
message: {
|
|
147
|
-
role: 'assistant',
|
|
148
|
-
content: fullContent || null,
|
|
149
|
-
tool_calls: toolCalls.length > 0 ? toolCalls.filter(Boolean) : undefined,
|
|
150
|
-
},
|
|
151
|
-
};
|
|
100
|
+
return processOpenAIStream(response, options, 'OpenAI');
|
|
152
101
|
}
|
|
153
102
|
|
|
154
103
|
const data = (await response.json()) as {
|
|
155
104
|
choices: { message: LLMMessage }[];
|
|
156
105
|
usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number };
|
|
157
106
|
};
|
|
107
|
+
|
|
108
|
+
// Validate response size to prevent memory exhaustion
|
|
109
|
+
const contentLength = data.choices[0]?.message?.content?.length ?? 0;
|
|
110
|
+
if (contentLength > MAX_RESPONSE_SIZE) {
|
|
111
|
+
throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
|
|
112
|
+
}
|
|
113
|
+
|
|
158
114
|
return {
|
|
159
115
|
message: data.choices[0].message,
|
|
160
116
|
usage: data.usage,
|
|
161
117
|
};
|
|
162
118
|
}
|
|
119
|
+
|
|
120
|
+
async embed(text: string, model = 'text-embedding-3-small'): Promise<number[]> {
|
|
121
|
+
const response = await fetch(`${this.baseUrl}/embeddings`, {
|
|
122
|
+
method: 'POST',
|
|
123
|
+
headers: {
|
|
124
|
+
'Content-Type': 'application/json',
|
|
125
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
126
|
+
},
|
|
127
|
+
body: JSON.stringify({
|
|
128
|
+
model,
|
|
129
|
+
input: text,
|
|
130
|
+
}),
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
if (!response.ok) {
|
|
134
|
+
const error = await response.text();
|
|
135
|
+
throw new Error(
|
|
136
|
+
`OpenAI Embeddings API error: ${response.status} ${response.statusText} - ${error}`
|
|
137
|
+
);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const data = (await response.json()) as {
|
|
141
|
+
data: { embedding: number[] }[];
|
|
142
|
+
};
|
|
143
|
+
return data.data[0].embedding;
|
|
144
|
+
}
|
|
163
145
|
}
|
|
164
146
|
|
|
165
147
|
export class AnthropicAdapter implements LLMAdapter {
|
|
@@ -348,7 +330,15 @@ export class AnthropicAdapter implements LLMAdapter {
|
|
|
348
330
|
}
|
|
349
331
|
}
|
|
350
332
|
} catch (e) {
|
|
351
|
-
//
|
|
333
|
+
// Log non-SyntaxError exceptions at warning level (they indicate real issues)
|
|
334
|
+
if (!(e instanceof SyntaxError)) {
|
|
335
|
+
console.warn(`[Anthropic Stream] Error processing chunk: ${e}`);
|
|
336
|
+
} else if (process.env.DEBUG || process.env.LLM_DEBUG) {
|
|
337
|
+
// SyntaxErrors are normal for incomplete chunks - only log in debug mode
|
|
338
|
+
process.stderr.write(
|
|
339
|
+
`[Anthropic Stream] Incomplete chunk parse: ${line.slice(0, 50)}...\n`
|
|
340
|
+
);
|
|
341
|
+
}
|
|
352
342
|
}
|
|
353
343
|
}
|
|
354
344
|
}
|
|
@@ -383,6 +373,12 @@ export class AnthropicAdapter implements LLMAdapter {
|
|
|
383
373
|
};
|
|
384
374
|
|
|
385
375
|
const content = data.content.find((c) => c.type === 'text')?.text || null;
|
|
376
|
+
|
|
377
|
+
// Validate response size to prevent memory exhaustion
|
|
378
|
+
if (content && content.length > MAX_RESPONSE_SIZE) {
|
|
379
|
+
throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
|
|
380
|
+
}
|
|
381
|
+
|
|
386
382
|
const toolCalls = data.content
|
|
387
383
|
.filter((c) => c.type === 'tool_use')
|
|
388
384
|
.map((c) => ({
|
|
@@ -455,68 +451,20 @@ export class CopilotAdapter implements LLMAdapter {
|
|
|
455
451
|
if (isStreaming) {
|
|
456
452
|
// Use the same streaming logic as OpenAIAdapter since Copilot uses OpenAI API
|
|
457
453
|
if (!response.body) throw new Error('Response body is null');
|
|
458
|
-
|
|
459
|
-
const decoder = new TextDecoder();
|
|
460
|
-
let fullContent = '';
|
|
461
|
-
const toolCalls: LLMToolCall[] = [];
|
|
462
|
-
|
|
463
|
-
while (true) {
|
|
464
|
-
const { done, value } = await reader.read();
|
|
465
|
-
if (done) break;
|
|
466
|
-
|
|
467
|
-
const chunk = decoder.decode(value);
|
|
468
|
-
const lines = chunk.split('\n').filter((line) => line.trim() !== '');
|
|
469
|
-
|
|
470
|
-
for (const line of lines) {
|
|
471
|
-
if (line.includes('[DONE]')) continue;
|
|
472
|
-
if (!line.startsWith('data: ')) continue;
|
|
473
|
-
|
|
474
|
-
try {
|
|
475
|
-
const data = JSON.parse(line.slice(6));
|
|
476
|
-
if (!data.choices?.[0]?.delta) continue;
|
|
477
|
-
const delta = data.choices[0].delta;
|
|
478
|
-
|
|
479
|
-
if (delta.content) {
|
|
480
|
-
if (fullContent.length + delta.content.length > MAX_RESPONSE_SIZE) {
|
|
481
|
-
throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
|
|
482
|
-
}
|
|
483
|
-
fullContent += delta.content;
|
|
484
|
-
options.onStream?.(delta.content);
|
|
485
|
-
}
|
|
486
|
-
|
|
487
|
-
if (delta.tool_calls) {
|
|
488
|
-
for (const tc of delta.tool_calls) {
|
|
489
|
-
if (!toolCalls[tc.index]) {
|
|
490
|
-
toolCalls[tc.index] = {
|
|
491
|
-
id: tc.id,
|
|
492
|
-
type: 'function',
|
|
493
|
-
function: { name: '', arguments: '' },
|
|
494
|
-
};
|
|
495
|
-
}
|
|
496
|
-
const existing = toolCalls[tc.index];
|
|
497
|
-
if (tc.function?.name) existing.function.name += tc.function.name;
|
|
498
|
-
if (tc.function?.arguments) existing.function.arguments += tc.function.arguments;
|
|
499
|
-
}
|
|
500
|
-
}
|
|
501
|
-
} catch (e) {
|
|
502
|
-
// Ignore parse errors
|
|
503
|
-
}
|
|
504
|
-
}
|
|
505
|
-
}
|
|
506
|
-
|
|
507
|
-
return {
|
|
508
|
-
message: {
|
|
509
|
-
role: 'assistant',
|
|
510
|
-
content: fullContent || null,
|
|
511
|
-
tool_calls: toolCalls.length > 0 ? toolCalls.filter(Boolean) : undefined,
|
|
512
|
-
},
|
|
513
|
-
};
|
|
454
|
+
return processOpenAIStream(response, options, 'Copilot');
|
|
514
455
|
}
|
|
515
456
|
|
|
516
457
|
const data = (await response.json()) as {
|
|
517
458
|
choices: { message: LLMMessage }[];
|
|
518
459
|
usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number };
|
|
519
460
|
};
|
|
461
|
+
|
|
462
|
+
// Validate response size to prevent memory exhaustion
|
|
463
|
+
const contentLength = data.choices[0]?.message?.content?.length ?? 0;
|
|
464
|
+
if (contentLength > MAX_RESPONSE_SIZE) {
|
|
465
|
+
throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
|
|
466
|
+
}
|
|
467
|
+
|
|
520
468
|
return {
|
|
521
469
|
message: data.choices[0].message,
|
|
522
470
|
usage: data.usage,
|
|
@@ -524,7 +472,37 @@ export class CopilotAdapter implements LLMAdapter {
|
|
|
524
472
|
}
|
|
525
473
|
}
|
|
526
474
|
|
|
475
|
+
export class LocalEmbeddingAdapter implements LLMAdapter {
|
|
476
|
+
// biome-ignore lint/suspicious/noExplicitAny: transformers pipeline type
|
|
477
|
+
private static extractor: any = null;
|
|
478
|
+
|
|
479
|
+
async chat(): Promise<LLMResponse> {
|
|
480
|
+
throw new Error(
|
|
481
|
+
'Local models in Keystone currently only support memory/embedding operations. ' +
|
|
482
|
+
'To use a local LLM for chat/generation, please use an OpenAI-compatible local server ' +
|
|
483
|
+
'(like Ollama, LM Studio, or LocalAI) and configure it as an OpenAI provider in your config.'
|
|
484
|
+
);
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
async embed(text: string, model = 'Xenova/all-MiniLM-L6-v2'): Promise<number[]> {
|
|
488
|
+
const modelToUse = model === 'local' ? 'Xenova/all-MiniLM-L6-v2' : model;
|
|
489
|
+
if (!LocalEmbeddingAdapter.extractor) {
|
|
490
|
+
LocalEmbeddingAdapter.extractor = await pipeline('feature-extraction', modelToUse);
|
|
491
|
+
}
|
|
492
|
+
const output = await LocalEmbeddingAdapter.extractor(text, {
|
|
493
|
+
pooling: 'mean',
|
|
494
|
+
normalize: true,
|
|
495
|
+
});
|
|
496
|
+
return Array.from(output.data);
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
|
|
527
500
|
export function getAdapter(model: string): { adapter: LLMAdapter; resolvedModel: string } {
|
|
501
|
+
if (model === 'local' || model.startsWith('local:')) {
|
|
502
|
+
const resolvedModel = model === 'local' ? 'Xenova/all-MiniLM-L6-v2' : model.substring(6);
|
|
503
|
+
return { adapter: new LocalEmbeddingAdapter(), resolvedModel };
|
|
504
|
+
}
|
|
505
|
+
|
|
528
506
|
const providerName = ConfigLoader.getProviderForModel(model);
|
|
529
507
|
const config = ConfigLoader.load();
|
|
530
508
|
const providerConfig = config.providers[providerName];
|
|
@@ -28,7 +28,8 @@ import {
|
|
|
28
28
|
import { executeLlmStep } from './llm-executor';
|
|
29
29
|
import { MCPClient, type MCPResponse } from './mcp-client';
|
|
30
30
|
import { MCPManager } from './mcp-manager';
|
|
31
|
-
import type
|
|
31
|
+
import { type StepResult, executeStep } from './step-executor';
|
|
32
|
+
import type { Logger } from './workflow-runner';
|
|
32
33
|
|
|
33
34
|
// Mock adapters
|
|
34
35
|
const originalOpenAIChat = OpenAIAdapter.prototype.chat;
|
|
@@ -129,10 +130,12 @@ describe('llm-executor', () => {
|
|
|
129
130
|
};
|
|
130
131
|
};
|
|
131
132
|
|
|
132
|
-
beforeAll(() => {
|
|
133
|
+
beforeAll(async () => {
|
|
133
134
|
// Mock spawn to avoid actual process creation
|
|
134
135
|
const mockProcess = Object.assign(new EventEmitter(), {
|
|
135
|
-
stdout: new Readable({
|
|
136
|
+
stdout: new Readable({
|
|
137
|
+
read() {},
|
|
138
|
+
}),
|
|
136
139
|
stdin: new Writable({
|
|
137
140
|
write(_chunk, _encoding, cb: (error?: Error | null) => void) {
|
|
138
141
|
cb();
|
|
@@ -239,6 +242,44 @@ You are a test agent.`;
|
|
|
239
242
|
expect(result.output).toBe('LLM Response');
|
|
240
243
|
});
|
|
241
244
|
|
|
245
|
+
it('should log tool call arguments', async () => {
|
|
246
|
+
const step: LlmStep = {
|
|
247
|
+
id: 'l1',
|
|
248
|
+
type: 'llm',
|
|
249
|
+
agent: 'test-agent',
|
|
250
|
+
prompt: 'trigger tool',
|
|
251
|
+
needs: [],
|
|
252
|
+
maxIterations: 10,
|
|
253
|
+
};
|
|
254
|
+
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
255
|
+
|
|
256
|
+
const executeStepFn = async (s: Step) => {
|
|
257
|
+
if (s.type === 'shell') {
|
|
258
|
+
return { status: 'success' as const, output: { stdout: 'tool result' } };
|
|
259
|
+
}
|
|
260
|
+
return { status: 'success' as const, output: 'ok' };
|
|
261
|
+
};
|
|
262
|
+
|
|
263
|
+
const logger: Logger = {
|
|
264
|
+
log: mock(() => {}),
|
|
265
|
+
error: mock(() => {}),
|
|
266
|
+
warn: mock(() => {}),
|
|
267
|
+
};
|
|
268
|
+
|
|
269
|
+
await executeLlmStep(
|
|
270
|
+
step,
|
|
271
|
+
context,
|
|
272
|
+
executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
|
|
273
|
+
logger
|
|
274
|
+
);
|
|
275
|
+
|
|
276
|
+
// Check if logger.log was called with arguments
|
|
277
|
+
// The tool call from mockChat is { name: 'test-tool', arguments: '{"val": 123}' }
|
|
278
|
+
expect(logger.log).toHaveBeenCalledWith(
|
|
279
|
+
expect.stringContaining('🛠️ Tool Call: test-tool {"val":123}')
|
|
280
|
+
);
|
|
281
|
+
});
|
|
282
|
+
|
|
242
283
|
it('should support schema for JSON output', async () => {
|
|
243
284
|
const step: LlmStep = {
|
|
244
285
|
id: 'l1',
|
|
@@ -266,7 +307,7 @@ You are a test agent.`;
|
|
|
266
307
|
expect(result.output).toEqual({ foo: 'bar' });
|
|
267
308
|
});
|
|
268
309
|
|
|
269
|
-
it('should
|
|
310
|
+
it('should retry if LLM output fails schema validation', async () => {
|
|
270
311
|
const step: LlmStep = {
|
|
271
312
|
id: 'l1',
|
|
272
313
|
type: 'llm',
|
|
@@ -279,7 +320,51 @@ You are a test agent.`;
|
|
|
279
320
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
280
321
|
const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
|
|
281
322
|
|
|
282
|
-
|
|
323
|
+
const originalOpenAIChatInner = OpenAIAdapter.prototype.chat;
|
|
324
|
+
const originalCopilotChatInner = CopilotAdapter.prototype.chat;
|
|
325
|
+
const originalAnthropicChatInner = AnthropicAdapter.prototype.chat;
|
|
326
|
+
|
|
327
|
+
let attempt = 0;
|
|
328
|
+
const mockChat = mock(async () => {
|
|
329
|
+
attempt++;
|
|
330
|
+
if (attempt === 1) {
|
|
331
|
+
return { message: { role: 'assistant', content: 'Not JSON' } };
|
|
332
|
+
}
|
|
333
|
+
return { message: { role: 'assistant', content: '{"success": true}' } };
|
|
334
|
+
}) as unknown as typeof originalOpenAIChat;
|
|
335
|
+
|
|
336
|
+
OpenAIAdapter.prototype.chat = mockChat;
|
|
337
|
+
CopilotAdapter.prototype.chat = mockChat;
|
|
338
|
+
AnthropicAdapter.prototype.chat = mockChat;
|
|
339
|
+
|
|
340
|
+
const result = await executeLlmStep(
|
|
341
|
+
step,
|
|
342
|
+
context,
|
|
343
|
+
executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>
|
|
344
|
+
);
|
|
345
|
+
|
|
346
|
+
expect(result.status).toBe('success');
|
|
347
|
+
expect(result.output).toEqual({ success: true });
|
|
348
|
+
expect(attempt).toBe(2);
|
|
349
|
+
|
|
350
|
+
OpenAIAdapter.prototype.chat = originalOpenAIChatInner;
|
|
351
|
+
CopilotAdapter.prototype.chat = originalCopilotChatInner;
|
|
352
|
+
AnthropicAdapter.prototype.chat = originalAnthropicChatInner;
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
it('should fail after max iterations if JSON remains invalid', async () => {
|
|
356
|
+
const step: LlmStep = {
|
|
357
|
+
id: 'l1',
|
|
358
|
+
type: 'llm',
|
|
359
|
+
agent: 'test-agent',
|
|
360
|
+
prompt: 'give me invalid json',
|
|
361
|
+
needs: [],
|
|
362
|
+
maxIterations: 3,
|
|
363
|
+
schema: { type: 'object' },
|
|
364
|
+
};
|
|
365
|
+
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
366
|
+
const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
|
|
367
|
+
|
|
283
368
|
const originalOpenAIChatInner = OpenAIAdapter.prototype.chat;
|
|
284
369
|
const originalCopilotChatInner = CopilotAdapter.prototype.chat;
|
|
285
370
|
const originalAnthropicChatInner = AnthropicAdapter.prototype.chat;
|
|
@@ -298,7 +383,7 @@ You are a test agent.`;
|
|
|
298
383
|
context,
|
|
299
384
|
executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>
|
|
300
385
|
)
|
|
301
|
-
).rejects.toThrow(
|
|
386
|
+
).rejects.toThrow('Max ReAct iterations reached');
|
|
302
387
|
|
|
303
388
|
OpenAIAdapter.prototype.chat = originalOpenAIChatInner;
|
|
304
389
|
CopilotAdapter.prototype.chat = originalCopilotChatInner;
|
|
@@ -4,12 +4,12 @@ import { ExpressionEvaluator } from '../expression/evaluator';
|
|
|
4
4
|
import { parseAgent, resolveAgentPath } from '../parser/agent-parser';
|
|
5
5
|
import type { AgentTool, LlmStep, Step } from '../parser/schema';
|
|
6
6
|
import { extractJson } from '../utils/json-parser';
|
|
7
|
+
import { ConsoleLogger, type Logger } from '../utils/logger.ts';
|
|
7
8
|
import { RedactionBuffer, Redactor } from '../utils/redactor';
|
|
8
9
|
import { type LLMMessage, getAdapter } from './llm-adapter';
|
|
9
10
|
import { MCPClient } from './mcp-client';
|
|
10
11
|
import type { MCPManager, MCPServerConfig } from './mcp-manager';
|
|
11
12
|
import type { StepResult } from './step-executor';
|
|
12
|
-
import type { Logger } from './workflow-runner';
|
|
13
13
|
|
|
14
14
|
interface ToolDefinition {
|
|
15
15
|
name: string;
|
|
@@ -24,7 +24,7 @@ export async function executeLlmStep(
|
|
|
24
24
|
step: LlmStep,
|
|
25
25
|
context: ExpressionContext,
|
|
26
26
|
executeStepFn: (step: Step, context: ExpressionContext) => Promise<StepResult>,
|
|
27
|
-
logger: Logger =
|
|
27
|
+
logger: Logger = new ConsoleLogger(),
|
|
28
28
|
mcpManager?: MCPManager,
|
|
29
29
|
workflowDir?: string
|
|
30
30
|
): Promise<StepResult> {
|
|
@@ -249,9 +249,14 @@ export async function executeLlmStep(
|
|
|
249
249
|
try {
|
|
250
250
|
output = extractJson(output) as typeof output;
|
|
251
251
|
} catch (e) {
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
252
|
+
const errorMessage = `Failed to parse LLM output as JSON matching schema: ${e instanceof Error ? e.message : String(e)}`;
|
|
253
|
+
logger.error(` ⚠️ ${errorMessage}. Retrying...`);
|
|
254
|
+
|
|
255
|
+
messages.push({
|
|
256
|
+
role: 'user',
|
|
257
|
+
content: `Error: ${errorMessage}\n\nPlease correct your output to be valid JSON matching the schema.`,
|
|
258
|
+
});
|
|
259
|
+
continue;
|
|
255
260
|
}
|
|
256
261
|
}
|
|
257
262
|
|
|
@@ -264,7 +269,22 @@ export async function executeLlmStep(
|
|
|
264
269
|
|
|
265
270
|
// Execute tools
|
|
266
271
|
for (const toolCall of message.tool_calls) {
|
|
267
|
-
|
|
272
|
+
const argsStr = toolCall.function.arguments;
|
|
273
|
+
let displayArgs = '';
|
|
274
|
+
try {
|
|
275
|
+
const parsedArgs = JSON.parse(argsStr);
|
|
276
|
+
const keys = Object.keys(parsedArgs);
|
|
277
|
+
if (keys.length > 0) {
|
|
278
|
+
const formatted = JSON.stringify(parsedArgs);
|
|
279
|
+
displayArgs = formatted.length > 100 ? `${formatted.substring(0, 100)}...` : formatted;
|
|
280
|
+
}
|
|
281
|
+
} catch (e) {
|
|
282
|
+
displayArgs = argsStr.length > 100 ? `${argsStr.substring(0, 100)}...` : argsStr;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
logger.log(
|
|
286
|
+
` 🛠️ Tool Call: ${toolCall.function.name}${displayArgs ? ` ${displayArgs}` : ''}`
|
|
287
|
+
);
|
|
268
288
|
const toolInfo = allTools.find((t) => t.name === toolCall.function.name);
|
|
269
289
|
|
|
270
290
|
if (!toolInfo) {
|
|
@@ -77,3 +77,72 @@ describe('MCPClient Audit Fixes', () => {
|
|
|
77
77
|
}
|
|
78
78
|
});
|
|
79
79
|
});
|
|
80
|
+
|
|
81
|
+
describe('MCPClient SSRF Protection', () => {
|
|
82
|
+
it('should reject localhost URLs without allowInsecure', async () => {
|
|
83
|
+
// HTTP localhost is rejected for not using HTTPS
|
|
84
|
+
await expect(MCPClient.createRemote('http://localhost:8080/sse')).rejects.toThrow(
|
|
85
|
+
/SSRF Protection.*HTTPS/
|
|
86
|
+
);
|
|
87
|
+
// HTTPS localhost is rejected for being localhost
|
|
88
|
+
await expect(MCPClient.createRemote('https://localhost:8080/sse')).rejects.toThrow(
|
|
89
|
+
/SSRF Protection.*localhost/
|
|
90
|
+
);
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
it('should reject 127.0.0.1', async () => {
|
|
94
|
+
await expect(MCPClient.createRemote('https://127.0.0.1:8080/sse')).rejects.toThrow(
|
|
95
|
+
/SSRF Protection.*localhost/
|
|
96
|
+
);
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
it('should reject private IP ranges (10.x.x.x)', async () => {
|
|
100
|
+
await expect(MCPClient.createRemote('https://10.0.0.1:8080/sse')).rejects.toThrow(
|
|
101
|
+
/SSRF Protection.*private/
|
|
102
|
+
);
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
it('should reject private IP ranges (192.168.x.x)', async () => {
|
|
106
|
+
await expect(MCPClient.createRemote('https://192.168.1.1:8080/sse')).rejects.toThrow(
|
|
107
|
+
/SSRF Protection.*private/
|
|
108
|
+
);
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
it('should reject private IP ranges (172.16-31.x.x)', async () => {
|
|
112
|
+
await expect(MCPClient.createRemote('https://172.16.0.1:8080/sse')).rejects.toThrow(
|
|
113
|
+
/SSRF Protection.*private/
|
|
114
|
+
);
|
|
115
|
+
await expect(MCPClient.createRemote('https://172.31.255.1:8080/sse')).rejects.toThrow(
|
|
116
|
+
/SSRF Protection.*private/
|
|
117
|
+
);
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
it('should reject cloud metadata endpoints', async () => {
|
|
121
|
+
// 169.254.169.254 is caught by link-local IP range check
|
|
122
|
+
await expect(
|
|
123
|
+
MCPClient.createRemote('https://169.254.169.254/latest/meta-data/')
|
|
124
|
+
).rejects.toThrow(/SSRF Protection.*private/);
|
|
125
|
+
// Also test the hostname-based metadata detection
|
|
126
|
+
await expect(MCPClient.createRemote('https://metadata.google.internal/sse')).rejects.toThrow(
|
|
127
|
+
/SSRF Protection.*metadata/
|
|
128
|
+
);
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
it('should require HTTPS by default', async () => {
|
|
132
|
+
await expect(MCPClient.createRemote('http://api.example.com/sse')).rejects.toThrow(
|
|
133
|
+
/SSRF Protection.*HTTPS/
|
|
134
|
+
);
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
it('should allow HTTP with allowInsecure option', async () => {
|
|
138
|
+
// This will fail due to network issues, not SSRF
|
|
139
|
+
const promise = MCPClient.createRemote(
|
|
140
|
+
'http://api.example.com/sse',
|
|
141
|
+
{},
|
|
142
|
+
100, // short timeout
|
|
143
|
+
{ allowInsecure: true }
|
|
144
|
+
);
|
|
145
|
+
// Should NOT throw SSRF error, but will throw timeout/connection error
|
|
146
|
+
await expect(promise).rejects.not.toThrow(/SSRF Protection/);
|
|
147
|
+
});
|
|
148
|
+
});
|
|
@@ -139,7 +139,10 @@ describe('MCPClient', () => {
|
|
|
139
139
|
return Promise.resolve(new Response(JSON.stringify({ ok: true })));
|
|
140
140
|
});
|
|
141
141
|
|
|
142
|
-
|
|
142
|
+
// Use allowInsecure for testing with localhost (fetch is mocked anyway)
|
|
143
|
+
const clientPromise = MCPClient.createRemote('http://localhost:8080/sse', {}, 60000, {
|
|
144
|
+
allowInsecure: true,
|
|
145
|
+
});
|
|
143
146
|
|
|
144
147
|
const client = await clientPromise;
|
|
145
148
|
expect(client).toBeDefined();
|
|
@@ -185,7 +188,10 @@ describe('MCPClient', () => {
|
|
|
185
188
|
return Promise.resolve(new Response(JSON.stringify({ ok: true })));
|
|
186
189
|
});
|
|
187
190
|
|
|
188
|
-
|
|
191
|
+
// Use allowInsecure for testing with localhost (fetch is mocked anyway)
|
|
192
|
+
const client = await MCPClient.createRemote('http://localhost:8080/sse', {}, 60000, {
|
|
193
|
+
allowInsecure: true,
|
|
194
|
+
});
|
|
189
195
|
|
|
190
196
|
// We can't easily hook into onMessage without reaching into internals
|
|
191
197
|
// Instead, we'll test that initialize resolves correctly when the response arrives
|
|
@@ -228,7 +234,10 @@ describe('MCPClient', () => {
|
|
|
228
234
|
)
|
|
229
235
|
);
|
|
230
236
|
|
|
231
|
-
|
|
237
|
+
// Use allowInsecure for testing with localhost (fetch is mocked anyway)
|
|
238
|
+
const clientPromise = MCPClient.createRemote('http://localhost:8080/sse', {}, 60000, {
|
|
239
|
+
allowInsecure: true,
|
|
240
|
+
});
|
|
232
241
|
|
|
233
242
|
await expect(clientPromise).rejects.toThrow(/SSE connection failed: 500/);
|
|
234
243
|
|