keystone-cli 0.4.4 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -4
- package/package.json +1 -2
- package/src/cli.ts +64 -4
- package/src/db/workflow-db.ts +16 -7
- package/src/expression/evaluator.audit.test.ts +67 -0
- package/src/expression/evaluator.test.ts +15 -2
- package/src/expression/evaluator.ts +102 -29
- package/src/parser/agent-parser.test.ts +6 -2
- package/src/parser/schema.ts +2 -0
- package/src/parser/workflow-parser.test.ts +6 -2
- package/src/parser/workflow-parser.ts +22 -11
- package/src/runner/audit-verification.test.ts +12 -8
- package/src/runner/llm-adapter.ts +49 -12
- package/src/runner/llm-executor.test.ts +75 -13
- package/src/runner/llm-executor.ts +84 -47
- package/src/runner/mcp-client.audit.test.ts +79 -0
- package/src/runner/mcp-client.ts +102 -19
- package/src/runner/shell-executor.test.ts +33 -15
- package/src/runner/shell-executor.ts +110 -39
- package/src/runner/step-executor.test.ts +30 -2
- package/src/runner/timeout.ts +2 -2
- package/src/runner/tool-integration.test.ts +8 -2
- package/src/runner/workflow-runner.ts +95 -29
- package/src/templates/agents/keystone-architect.md +5 -3
- package/src/types/status.ts +25 -0
- package/src/ui/dashboard.tsx +3 -1
- package/src/utils/auth-manager.test.ts +3 -1
- package/src/utils/auth-manager.ts +12 -2
- package/src/utils/config-loader.test.ts +2 -17
- package/src/utils/mermaid.ts +0 -8
- package/src/utils/redactor.ts +115 -22
- package/src/utils/sandbox.test.ts +9 -13
- package/src/utils/sandbox.ts +40 -53
- package/src/utils/workflow-registry.test.ts +6 -2
|
@@ -8,12 +8,16 @@ describe('WorkflowParser', () => {
|
|
|
8
8
|
const tempDir = join(process.cwd(), 'temp-test-workflows');
|
|
9
9
|
try {
|
|
10
10
|
mkdirSync(tempDir, { recursive: true });
|
|
11
|
-
} catch (e) {
|
|
11
|
+
} catch (e) {
|
|
12
|
+
// Ignore existing dir error
|
|
13
|
+
}
|
|
12
14
|
|
|
13
15
|
afterAll(() => {
|
|
14
16
|
try {
|
|
15
17
|
rmSync(tempDir, { recursive: true, force: true });
|
|
16
|
-
} catch (e) {
|
|
18
|
+
} catch (e) {
|
|
19
|
+
// Ignore cleanup error
|
|
20
|
+
}
|
|
17
21
|
});
|
|
18
22
|
describe('topologicalSort', () => {
|
|
19
23
|
test('should sort simple dependencies', () => {
|
|
@@ -53,18 +53,22 @@ export class WorkflowParser {
|
|
|
53
53
|
const detected = new Set<string>();
|
|
54
54
|
|
|
55
55
|
// Helper to scan any value for dependencies
|
|
56
|
-
const scan = (value: unknown) => {
|
|
56
|
+
const scan = (value: unknown, depth = 0) => {
|
|
57
|
+
if (depth > 100) {
|
|
58
|
+
throw new Error('Maximum expression nesting depth exceeded (potential DOS attack)');
|
|
59
|
+
}
|
|
60
|
+
|
|
57
61
|
if (typeof value === 'string') {
|
|
58
62
|
for (const dep of ExpressionEvaluator.findStepDependencies(value)) {
|
|
59
63
|
detected.add(dep);
|
|
60
64
|
}
|
|
61
65
|
} else if (Array.isArray(value)) {
|
|
62
66
|
for (const item of value) {
|
|
63
|
-
scan(item);
|
|
67
|
+
scan(item, depth + 1);
|
|
64
68
|
}
|
|
65
69
|
} else if (value && typeof value === 'object') {
|
|
66
70
|
for (const val of Object.values(value)) {
|
|
67
|
-
scan(val);
|
|
71
|
+
scan(val, depth + 1);
|
|
68
72
|
}
|
|
69
73
|
}
|
|
70
74
|
};
|
|
@@ -187,6 +191,15 @@ export class WorkflowParser {
|
|
|
187
191
|
inDegree.set(step.id, step.needs.length);
|
|
188
192
|
}
|
|
189
193
|
|
|
194
|
+
// Build reverse dependency map for O(1) lookups instead of O(n)
|
|
195
|
+
const dependents = new Map<string, string[]>();
|
|
196
|
+
for (const step of workflow.steps) {
|
|
197
|
+
for (const dep of step.needs) {
|
|
198
|
+
if (!dependents.has(dep)) dependents.set(dep, []);
|
|
199
|
+
dependents.get(dep)?.push(step.id);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
190
203
|
// Kahn's algorithm
|
|
191
204
|
const queue: string[] = [];
|
|
192
205
|
const result: string[] = [];
|
|
@@ -203,14 +216,12 @@ export class WorkflowParser {
|
|
|
203
216
|
if (!stepId) continue;
|
|
204
217
|
result.push(stepId);
|
|
205
218
|
|
|
206
|
-
// Find all steps that depend on this step
|
|
207
|
-
for (const
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
queue.push(step.id);
|
|
213
|
-
}
|
|
219
|
+
// Find all steps that depend on this step (O(1) lookup)
|
|
220
|
+
for (const dependentId of dependents.get(stepId) || []) {
|
|
221
|
+
const newDegree = (inDegree.get(dependentId) || 0) - 1;
|
|
222
|
+
inDegree.set(dependentId, newDegree);
|
|
223
|
+
if (newDegree === 0) {
|
|
224
|
+
queue.push(dependentId);
|
|
214
225
|
}
|
|
215
226
|
}
|
|
216
227
|
}
|
|
@@ -40,17 +40,21 @@ describe('Audit Fixes Verification', () => {
|
|
|
40
40
|
});
|
|
41
41
|
|
|
42
42
|
describe('Sandbox Security', () => {
|
|
43
|
-
it('should
|
|
43
|
+
it('should execute code using node:vm sandbox on Bun', async () => {
|
|
44
|
+
// Since Bun uses JSC (not V8), isolated-vm cannot work.
|
|
45
|
+
// The sandbox now uses node:vm directly with security warnings.
|
|
46
|
+
SafeSandbox.resetWarning();
|
|
44
47
|
const code = '1 + 1';
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
);
|
|
48
|
+
const result = await SafeSandbox.execute(code, {});
|
|
49
|
+
expect(result).toBe(2);
|
|
48
50
|
});
|
|
49
51
|
|
|
50
|
-
it('should
|
|
51
|
-
|
|
52
|
-
const
|
|
53
|
-
|
|
52
|
+
it('should show security warning on first execution', async () => {
|
|
53
|
+
SafeSandbox.resetWarning();
|
|
54
|
+
const code = '2 + 2';
|
|
55
|
+
const result = await SafeSandbox.execute(code, {});
|
|
56
|
+
expect(result).toBe(4);
|
|
57
|
+
// Warning is shown to stderr, we just verify execution works
|
|
54
58
|
});
|
|
55
59
|
});
|
|
56
60
|
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import { AuthManager, COPILOT_HEADERS } from '../utils/auth-manager';
|
|
2
2
|
import { ConfigLoader } from '../utils/config-loader';
|
|
3
3
|
|
|
4
|
+
// Maximum response size to prevent memory exhaustion (1MB)
|
|
5
|
+
const MAX_RESPONSE_SIZE = 1024 * 1024;
|
|
6
|
+
|
|
4
7
|
export interface LLMMessage {
|
|
5
8
|
role: 'system' | 'user' | 'assistant' | 'tool';
|
|
6
9
|
content: string | null;
|
|
@@ -112,6 +115,9 @@ export class OpenAIAdapter implements LLMAdapter {
|
|
|
112
115
|
const delta = data.choices[0].delta;
|
|
113
116
|
|
|
114
117
|
if (delta.content) {
|
|
118
|
+
if (fullContent.length + delta.content.length > MAX_RESPONSE_SIZE) {
|
|
119
|
+
throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
|
|
120
|
+
}
|
|
115
121
|
fullContent += delta.content;
|
|
116
122
|
options.onStream?.(delta.content);
|
|
117
123
|
}
|
|
@@ -287,7 +293,8 @@ export class AnthropicAdapter implements LLMAdapter {
|
|
|
287
293
|
const reader = response.body.getReader();
|
|
288
294
|
const decoder = new TextDecoder();
|
|
289
295
|
let fullContent = '';
|
|
290
|
-
|
|
296
|
+
// Track tool calls by content block index for robust correlation
|
|
297
|
+
const toolCallsMap = new Map<number, { id: string; name: string; inputString: string }>();
|
|
291
298
|
|
|
292
299
|
while (true) {
|
|
293
300
|
const { done, value } = await reader.read();
|
|
@@ -302,21 +309,43 @@ export class AnthropicAdapter implements LLMAdapter {
|
|
|
302
309
|
try {
|
|
303
310
|
const data = JSON.parse(line.slice(6));
|
|
304
311
|
if (data.type === 'content_block_delta' && data.delta?.text) {
|
|
312
|
+
if (fullContent.length + data.delta.text.length > MAX_RESPONSE_SIZE) {
|
|
313
|
+
throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
|
|
314
|
+
}
|
|
305
315
|
fullContent += data.delta.text;
|
|
306
316
|
options.onStream?.(data.delta.text);
|
|
307
317
|
}
|
|
308
318
|
|
|
319
|
+
// Track tool calls by their index in the content blocks
|
|
309
320
|
if (data.type === 'content_block_start' && data.content_block?.type === 'tool_use') {
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
321
|
+
const index = data.index ?? toolCallsMap.size;
|
|
322
|
+
toolCallsMap.set(index, {
|
|
323
|
+
id: data.content_block.id || '',
|
|
324
|
+
name: data.content_block.name || '',
|
|
313
325
|
inputString: '',
|
|
314
326
|
});
|
|
315
327
|
}
|
|
316
328
|
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
329
|
+
// Handle tool input streaming - Anthropic uses content_block_delta with input_json_delta
|
|
330
|
+
if (
|
|
331
|
+
data.type === 'content_block_delta' &&
|
|
332
|
+
data.delta?.type === 'input_json_delta' &&
|
|
333
|
+
data.delta?.partial_json
|
|
334
|
+
) {
|
|
335
|
+
const index = data.index;
|
|
336
|
+
const toolCall = toolCallsMap.get(index);
|
|
337
|
+
if (toolCall) {
|
|
338
|
+
toolCall.inputString += data.delta.partial_json;
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// Update tool call ID if it arrives later (some edge cases)
|
|
343
|
+
if (data.type === 'content_block_delta' && data.content_block?.id) {
|
|
344
|
+
const index = data.index;
|
|
345
|
+
const toolCall = toolCallsMap.get(index);
|
|
346
|
+
if (toolCall && !toolCall.id) {
|
|
347
|
+
toolCall.id = data.content_block.id;
|
|
348
|
+
}
|
|
320
349
|
}
|
|
321
350
|
} catch (e) {
|
|
322
351
|
// Ignore parse errors
|
|
@@ -324,15 +353,20 @@ export class AnthropicAdapter implements LLMAdapter {
|
|
|
324
353
|
}
|
|
325
354
|
}
|
|
326
355
|
|
|
356
|
+
// Convert map to array and filter out incomplete tool calls
|
|
357
|
+
const toolCalls = Array.from(toolCallsMap.values())
|
|
358
|
+
.filter((tc) => tc.id && tc.name) // Only include complete tool calls
|
|
359
|
+
.map((tc) => ({
|
|
360
|
+
id: tc.id,
|
|
361
|
+
type: 'function' as const,
|
|
362
|
+
function: { name: tc.name, arguments: tc.inputString },
|
|
363
|
+
}));
|
|
364
|
+
|
|
327
365
|
return {
|
|
328
366
|
message: {
|
|
329
367
|
role: 'assistant',
|
|
330
368
|
content: fullContent || null,
|
|
331
|
-
tool_calls: toolCalls.
|
|
332
|
-
id: tc.id,
|
|
333
|
-
type: 'function',
|
|
334
|
-
function: { name: tc.name, arguments: tc.inputString },
|
|
335
|
-
})),
|
|
369
|
+
tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
|
|
336
370
|
},
|
|
337
371
|
};
|
|
338
372
|
}
|
|
@@ -443,6 +477,9 @@ export class CopilotAdapter implements LLMAdapter {
|
|
|
443
477
|
const delta = data.choices[0].delta;
|
|
444
478
|
|
|
445
479
|
if (delta.content) {
|
|
480
|
+
if (fullContent.length + delta.content.length > MAX_RESPONSE_SIZE) {
|
|
481
|
+
throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
|
|
482
|
+
}
|
|
446
483
|
fullContent += delta.content;
|
|
447
484
|
options.onStream?.(delta.content);
|
|
448
485
|
}
|
|
@@ -132,13 +132,13 @@ describe('llm-executor', () => {
|
|
|
132
132
|
beforeAll(() => {
|
|
133
133
|
// Mock spawn to avoid actual process creation
|
|
134
134
|
const mockProcess = Object.assign(new EventEmitter(), {
|
|
135
|
-
stdout: new Readable({ read() {} }),
|
|
135
|
+
stdout: new Readable({ read() { } }),
|
|
136
136
|
stdin: new Writable({
|
|
137
137
|
write(_chunk, _encoding, cb: (error?: Error | null) => void) {
|
|
138
138
|
cb();
|
|
139
139
|
},
|
|
140
140
|
}),
|
|
141
|
-
kill: mock(() => {}),
|
|
141
|
+
kill: mock(() => { }),
|
|
142
142
|
});
|
|
143
143
|
spawnSpy = spyOn(child_process, 'spawn').mockReturnValue(
|
|
144
144
|
mockProcess as unknown as child_process.ChildProcess
|
|
@@ -146,7 +146,9 @@ describe('llm-executor', () => {
|
|
|
146
146
|
|
|
147
147
|
try {
|
|
148
148
|
mkdirSync(agentsDir, { recursive: true });
|
|
149
|
-
} catch (e) {
|
|
149
|
+
} catch (e) {
|
|
150
|
+
// Ignore error during cleanup
|
|
151
|
+
}
|
|
150
152
|
const agentContent = `---
|
|
151
153
|
name: test-agent
|
|
152
154
|
model: gpt-4
|
|
@@ -196,6 +198,7 @@ You are a test agent.`;
|
|
|
196
198
|
agent: 'test-agent',
|
|
197
199
|
prompt: 'hello',
|
|
198
200
|
needs: [],
|
|
201
|
+
maxIterations: 10,
|
|
199
202
|
};
|
|
200
203
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
201
204
|
const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
|
|
@@ -216,6 +219,7 @@ You are a test agent.`;
|
|
|
216
219
|
agent: 'test-agent',
|
|
217
220
|
prompt: 'trigger tool',
|
|
218
221
|
needs: [],
|
|
222
|
+
maxIterations: 10,
|
|
219
223
|
};
|
|
220
224
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
221
225
|
|
|
@@ -242,6 +246,7 @@ You are a test agent.`;
|
|
|
242
246
|
agent: 'test-agent',
|
|
243
247
|
prompt: 'give me json',
|
|
244
248
|
needs: [],
|
|
249
|
+
maxIterations: 10,
|
|
245
250
|
schema: {
|
|
246
251
|
type: 'object',
|
|
247
252
|
properties: {
|
|
@@ -261,19 +266,64 @@ You are a test agent.`;
|
|
|
261
266
|
expect(result.output).toEqual({ foo: 'bar' });
|
|
262
267
|
});
|
|
263
268
|
|
|
264
|
-
it('should
|
|
269
|
+
it('should retry if LLM output fails schema validation', async () => {
|
|
270
|
+
const step: LlmStep = {
|
|
271
|
+
id: 'l1',
|
|
272
|
+
type: 'llm',
|
|
273
|
+
agent: 'test-agent',
|
|
274
|
+
prompt: 'give me invalid json',
|
|
275
|
+
needs: [],
|
|
276
|
+
maxIterations: 10,
|
|
277
|
+
schema: { type: 'object' },
|
|
278
|
+
};
|
|
279
|
+
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
280
|
+
const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
|
|
281
|
+
|
|
282
|
+
const originalOpenAIChatInner = OpenAIAdapter.prototype.chat;
|
|
283
|
+
const originalCopilotChatInner = CopilotAdapter.prototype.chat;
|
|
284
|
+
const originalAnthropicChatInner = AnthropicAdapter.prototype.chat;
|
|
285
|
+
|
|
286
|
+
let attempt = 0;
|
|
287
|
+
const mockChat = mock(async () => {
|
|
288
|
+
attempt++;
|
|
289
|
+
if (attempt === 1) {
|
|
290
|
+
return { message: { role: 'assistant', content: 'Not JSON' } };
|
|
291
|
+
}
|
|
292
|
+
return { message: { role: 'assistant', content: '{"success": true}' } };
|
|
293
|
+
}) as unknown as typeof originalOpenAIChat;
|
|
294
|
+
|
|
295
|
+
OpenAIAdapter.prototype.chat = mockChat;
|
|
296
|
+
CopilotAdapter.prototype.chat = mockChat;
|
|
297
|
+
AnthropicAdapter.prototype.chat = mockChat;
|
|
298
|
+
|
|
299
|
+
const result = await executeLlmStep(
|
|
300
|
+
step,
|
|
301
|
+
context,
|
|
302
|
+
executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>
|
|
303
|
+
);
|
|
304
|
+
|
|
305
|
+
expect(result.status).toBe('success');
|
|
306
|
+
expect(result.output).toEqual({ success: true });
|
|
307
|
+
expect(attempt).toBe(2);
|
|
308
|
+
|
|
309
|
+
OpenAIAdapter.prototype.chat = originalOpenAIChatInner;
|
|
310
|
+
CopilotAdapter.prototype.chat = originalCopilotChatInner;
|
|
311
|
+
AnthropicAdapter.prototype.chat = originalAnthropicChatInner;
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
it('should fail after max iterations if JSON remains invalid', async () => {
|
|
265
315
|
const step: LlmStep = {
|
|
266
316
|
id: 'l1',
|
|
267
317
|
type: 'llm',
|
|
268
318
|
agent: 'test-agent',
|
|
269
319
|
prompt: 'give me invalid json',
|
|
270
320
|
needs: [],
|
|
321
|
+
maxIterations: 3,
|
|
271
322
|
schema: { type: 'object' },
|
|
272
323
|
};
|
|
273
324
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
274
325
|
const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
|
|
275
326
|
|
|
276
|
-
// Mock response with invalid JSON
|
|
277
327
|
const originalOpenAIChatInner = OpenAIAdapter.prototype.chat;
|
|
278
328
|
const originalCopilotChatInner = CopilotAdapter.prototype.chat;
|
|
279
329
|
const originalAnthropicChatInner = AnthropicAdapter.prototype.chat;
|
|
@@ -292,7 +342,7 @@ You are a test agent.`;
|
|
|
292
342
|
context,
|
|
293
343
|
executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>
|
|
294
344
|
)
|
|
295
|
-
).rejects.toThrow(
|
|
345
|
+
).rejects.toThrow('Max ReAct iterations reached');
|
|
296
346
|
|
|
297
347
|
OpenAIAdapter.prototype.chat = originalOpenAIChatInner;
|
|
298
348
|
CopilotAdapter.prototype.chat = originalCopilotChatInner;
|
|
@@ -306,6 +356,7 @@ You are a test agent.`;
|
|
|
306
356
|
agent: 'test-agent',
|
|
307
357
|
prompt: 'trigger unknown tool',
|
|
308
358
|
needs: [],
|
|
359
|
+
maxIterations: 10,
|
|
309
360
|
};
|
|
310
361
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
311
362
|
const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
|
|
@@ -359,6 +410,7 @@ You are a test agent.`;
|
|
|
359
410
|
agent: 'test-agent',
|
|
360
411
|
prompt: 'hello',
|
|
361
412
|
needs: [],
|
|
413
|
+
maxIterations: 10,
|
|
362
414
|
mcpServers: [{ name: 'fail-mcp', command: 'node', args: [] }],
|
|
363
415
|
};
|
|
364
416
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
@@ -370,7 +422,7 @@ You are a test agent.`;
|
|
|
370
422
|
spyOn(client, 'stop').mockReturnValue(undefined);
|
|
371
423
|
return client;
|
|
372
424
|
});
|
|
373
|
-
const consoleSpy = spyOn(console, 'error').mockImplementation(() => {});
|
|
425
|
+
const consoleSpy = spyOn(console, 'error').mockImplementation(() => { });
|
|
374
426
|
|
|
375
427
|
await executeLlmStep(
|
|
376
428
|
step,
|
|
@@ -379,7 +431,7 @@ You are a test agent.`;
|
|
|
379
431
|
);
|
|
380
432
|
|
|
381
433
|
expect(consoleSpy).toHaveBeenCalledWith(
|
|
382
|
-
expect.stringContaining('Failed to
|
|
434
|
+
expect.stringContaining('Failed to list tools from MCP server fail-mcp')
|
|
383
435
|
);
|
|
384
436
|
createLocalSpy.mockRestore();
|
|
385
437
|
consoleSpy.mockRestore();
|
|
@@ -392,6 +444,7 @@ You are a test agent.`;
|
|
|
392
444
|
agent: 'test-agent',
|
|
393
445
|
prompt: 'trigger mcp tool',
|
|
394
446
|
needs: [],
|
|
447
|
+
maxIterations: 10,
|
|
395
448
|
mcpServers: [{ name: 'test-mcp', command: 'node', args: [] }],
|
|
396
449
|
};
|
|
397
450
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
@@ -446,13 +499,15 @@ You are a test agent.`;
|
|
|
446
499
|
it('should use global MCP servers when useGlobalMcp is true', async () => {
|
|
447
500
|
ConfigLoader.setConfig({
|
|
448
501
|
mcp_servers: {
|
|
449
|
-
'global-mcp': { command: 'node', args: ['server.js'] },
|
|
502
|
+
'global-mcp': { type: 'local', command: 'node', args: ['server.js'], timeout: 1000 },
|
|
450
503
|
},
|
|
451
504
|
providers: {
|
|
452
|
-
openai: {
|
|
505
|
+
openai: { type: 'openai', api_key_env: 'OPENAI_API_KEY' },
|
|
453
506
|
},
|
|
454
507
|
model_mappings: {},
|
|
455
508
|
default_provider: 'openai',
|
|
509
|
+
storage: { retention_days: 30 },
|
|
510
|
+
workflows_directory: 'workflows',
|
|
456
511
|
});
|
|
457
512
|
|
|
458
513
|
const manager = new MCPManager();
|
|
@@ -462,6 +517,7 @@ You are a test agent.`;
|
|
|
462
517
|
agent: 'test-agent',
|
|
463
518
|
prompt: 'hello',
|
|
464
519
|
needs: [],
|
|
520
|
+
maxIterations: 10,
|
|
465
521
|
useGlobalMcp: true,
|
|
466
522
|
};
|
|
467
523
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
@@ -510,6 +566,7 @@ You are a test agent.`;
|
|
|
510
566
|
agent: 'test-agent',
|
|
511
567
|
prompt: 'trigger adhoc tool',
|
|
512
568
|
needs: [],
|
|
569
|
+
maxIterations: 10,
|
|
513
570
|
tools: [
|
|
514
571
|
{
|
|
515
572
|
name: 'adhoc-tool',
|
|
@@ -547,11 +604,12 @@ You are a test agent.`;
|
|
|
547
604
|
agent: 'test-agent',
|
|
548
605
|
prompt: 'hello',
|
|
549
606
|
needs: [],
|
|
607
|
+
maxIterations: 10,
|
|
550
608
|
mcpServers: ['some-global-server'],
|
|
551
609
|
};
|
|
552
610
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
553
611
|
const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
|
|
554
|
-
const consoleSpy = spyOn(console, 'error').mockImplementation(() => {});
|
|
612
|
+
const consoleSpy = spyOn(console, 'error').mockImplementation(() => { });
|
|
555
613
|
|
|
556
614
|
await executeLlmStep(
|
|
557
615
|
step,
|
|
@@ -571,11 +629,13 @@ You are a test agent.`;
|
|
|
571
629
|
it('should not add global MCP server if already explicitly listed', async () => {
|
|
572
630
|
ConfigLoader.setConfig({
|
|
573
631
|
mcp_servers: {
|
|
574
|
-
'test-mcp': { command: 'node', args: ['server.js'] },
|
|
632
|
+
'test-mcp': { type: 'local', command: 'node', args: ['server.js'], timeout: 1000 },
|
|
575
633
|
},
|
|
576
|
-
providers: { openai: {
|
|
634
|
+
providers: { openai: { type: 'openai', api_key_env: 'OPENAI_API_KEY' } },
|
|
577
635
|
model_mappings: {},
|
|
578
636
|
default_provider: 'openai',
|
|
637
|
+
storage: { retention_days: 30 },
|
|
638
|
+
workflows_directory: 'workflows',
|
|
579
639
|
});
|
|
580
640
|
|
|
581
641
|
const manager = new MCPManager();
|
|
@@ -585,6 +645,7 @@ You are a test agent.`;
|
|
|
585
645
|
agent: 'test-agent',
|
|
586
646
|
prompt: 'hello',
|
|
587
647
|
needs: [],
|
|
648
|
+
maxIterations: 10,
|
|
588
649
|
useGlobalMcp: true,
|
|
589
650
|
mcpServers: [{ name: 'test-mcp', command: 'node', args: ['local.js'] }],
|
|
590
651
|
};
|
|
@@ -636,6 +697,7 @@ You are a test agent.`;
|
|
|
636
697
|
agent: 'test-agent',
|
|
637
698
|
prompt: '${{ steps.prev.output }}' as unknown as string,
|
|
638
699
|
needs: [],
|
|
700
|
+
maxIterations: 10,
|
|
639
701
|
};
|
|
640
702
|
const context: ExpressionContext = {
|
|
641
703
|
inputs: {},
|
|
@@ -3,7 +3,8 @@ import type { ExpressionContext } from '../expression/evaluator';
|
|
|
3
3
|
import { ExpressionEvaluator } from '../expression/evaluator';
|
|
4
4
|
import { parseAgent, resolveAgentPath } from '../parser/agent-parser';
|
|
5
5
|
import type { AgentTool, LlmStep, Step } from '../parser/schema';
|
|
6
|
-
import {
|
|
6
|
+
import { extractJson } from '../utils/json-parser';
|
|
7
|
+
import { RedactionBuffer, Redactor } from '../utils/redactor';
|
|
7
8
|
import { type LLMMessage, getAdapter } from './llm-adapter';
|
|
8
9
|
import { MCPClient } from './mcp-client';
|
|
9
10
|
import type { MCPManager, MCPServerConfig } from './mcp-manager';
|
|
@@ -121,50 +122,54 @@ export async function executeLlmStep(
|
|
|
121
122
|
}
|
|
122
123
|
|
|
123
124
|
if (mcpServersToConnect.length > 0) {
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
} else {
|
|
130
|
-
// Fallback if no manager (should not happen in normal workflow run)
|
|
131
|
-
if (typeof server === 'string') {
|
|
132
|
-
logger.error(` ✗ Cannot reference global MCP server '${server}' without MCPManager`);
|
|
133
|
-
continue;
|
|
134
|
-
}
|
|
135
|
-
logger.log(` 🔌 Connecting to MCP server: ${server.name}`);
|
|
125
|
+
await Promise.all(
|
|
126
|
+
mcpServersToConnect.map(async (server) => {
|
|
127
|
+
let client: MCPClient | undefined;
|
|
128
|
+
const serverName = typeof server === 'string' ? server : server.name;
|
|
129
|
+
|
|
136
130
|
try {
|
|
137
|
-
|
|
138
|
-
(server as
|
|
139
|
-
|
|
140
|
-
(
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
131
|
+
if (mcpManager) {
|
|
132
|
+
client = await mcpManager.getClient(server as string | MCPServerConfig, logger);
|
|
133
|
+
} else {
|
|
134
|
+
// Fallback if no manager (should not happen in normal workflow run)
|
|
135
|
+
if (typeof server === 'string') {
|
|
136
|
+
logger.error(
|
|
137
|
+
` ✗ Cannot reference global MCP server '${server}' without MCPManager`
|
|
138
|
+
);
|
|
139
|
+
return;
|
|
140
|
+
}
|
|
141
|
+
logger.log(` 🔌 Connecting to MCP server: ${server.name}`);
|
|
142
|
+
client = await MCPClient.createLocal(
|
|
143
|
+
(server as MCPServerConfig).command || 'node',
|
|
144
|
+
(server as MCPServerConfig).args || [],
|
|
145
|
+
(server as MCPServerConfig).env || {}
|
|
146
|
+
);
|
|
147
|
+
await client.initialize();
|
|
148
|
+
localMcpClients.push(client);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (client) {
|
|
152
|
+
const mcpTools = await client.listTools();
|
|
153
|
+
for (const tool of mcpTools) {
|
|
154
|
+
allTools.push({
|
|
155
|
+
name: tool.name,
|
|
156
|
+
description: tool.description,
|
|
157
|
+
parameters: tool.inputSchema,
|
|
158
|
+
source: 'mcp',
|
|
159
|
+
mcpClient: client,
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
}
|
|
144
163
|
} catch (error) {
|
|
145
164
|
logger.error(
|
|
146
|
-
` ✗ Failed to
|
|
165
|
+
` ✗ Failed to list tools from MCP server ${serverName}: ${error instanceof Error ? error.message : String(error)}`
|
|
147
166
|
);
|
|
148
|
-
if (client) {
|
|
167
|
+
if (!mcpManager && client) {
|
|
149
168
|
client.stop();
|
|
150
169
|
}
|
|
151
|
-
client = undefined;
|
|
152
170
|
}
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
if (client) {
|
|
156
|
-
const mcpTools = await client.listTools();
|
|
157
|
-
for (const tool of mcpTools) {
|
|
158
|
-
allTools.push({
|
|
159
|
-
name: tool.name,
|
|
160
|
-
description: tool.description,
|
|
161
|
-
parameters: tool.inputSchema,
|
|
162
|
-
source: 'mcp',
|
|
163
|
-
mcpClient: client,
|
|
164
|
-
});
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
}
|
|
171
|
+
})
|
|
172
|
+
);
|
|
168
173
|
}
|
|
169
174
|
|
|
170
175
|
const llmTools = allTools.map((t) => ({
|
|
@@ -206,21 +211,27 @@ export async function executeLlmStep(
|
|
|
206
211
|
total_tokens: 0,
|
|
207
212
|
};
|
|
208
213
|
|
|
214
|
+
// Create redactor once outside the loop for performance (regex compilation)
|
|
215
|
+
const redactor = new Redactor(context.secrets || {});
|
|
216
|
+
const redactionBuffer = new RedactionBuffer(redactor);
|
|
217
|
+
|
|
209
218
|
while (iterations < maxIterations) {
|
|
210
219
|
iterations++;
|
|
211
220
|
|
|
212
|
-
const redactor = new Redactor(context.secrets || {});
|
|
213
|
-
|
|
214
221
|
const response = await adapter.chat(messages, {
|
|
215
222
|
model: resolvedModel,
|
|
216
223
|
tools: llmTools.length > 0 ? llmTools : undefined,
|
|
217
224
|
onStream: (chunk) => {
|
|
218
225
|
if (!step.schema) {
|
|
219
|
-
process.stdout.write(
|
|
226
|
+
process.stdout.write(redactionBuffer.process(chunk));
|
|
220
227
|
}
|
|
221
228
|
},
|
|
222
229
|
});
|
|
223
230
|
|
|
231
|
+
if (!step.schema) {
|
|
232
|
+
process.stdout.write(redactionBuffer.flush());
|
|
233
|
+
}
|
|
234
|
+
|
|
224
235
|
if (response.usage) {
|
|
225
236
|
totalUsage.prompt_tokens += response.usage.prompt_tokens;
|
|
226
237
|
totalUsage.completion_tokens += response.usage.completion_tokens;
|
|
@@ -236,12 +247,16 @@ export async function executeLlmStep(
|
|
|
236
247
|
// If schema is defined, attempt to parse JSON
|
|
237
248
|
if (step.schema && typeof output === 'string') {
|
|
238
249
|
try {
|
|
239
|
-
const { extractJson } = await import('../utils/json-parser');
|
|
240
250
|
output = extractJson(output) as typeof output;
|
|
241
251
|
} catch (e) {
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
252
|
+
const errorMessage = `Failed to parse LLM output as JSON matching schema: ${e instanceof Error ? e.message : String(e)}`;
|
|
253
|
+
logger.error(` ⚠️ ${errorMessage}. Retrying...`);
|
|
254
|
+
|
|
255
|
+
messages.push({
|
|
256
|
+
role: 'user',
|
|
257
|
+
content: `Error: ${errorMessage}\n\nPlease correct your output to be valid JSON matching the schema.`,
|
|
258
|
+
});
|
|
259
|
+
continue;
|
|
245
260
|
}
|
|
246
261
|
}
|
|
247
262
|
|
|
@@ -259,7 +274,18 @@ export async function executeLlmStep(
|
|
|
259
274
|
|
|
260
275
|
if (!toolInfo) {
|
|
261
276
|
if (toolCall.function.name === 'ask' && step.allowClarification) {
|
|
262
|
-
|
|
277
|
+
let args: { question: string };
|
|
278
|
+
try {
|
|
279
|
+
args = JSON.parse(toolCall.function.arguments);
|
|
280
|
+
} catch (e) {
|
|
281
|
+
messages.push({
|
|
282
|
+
role: 'tool',
|
|
283
|
+
tool_call_id: toolCall.id,
|
|
284
|
+
name: 'ask',
|
|
285
|
+
content: `Error: Invalid JSON in arguments: ${e instanceof Error ? e.message : String(e)}`,
|
|
286
|
+
});
|
|
287
|
+
continue;
|
|
288
|
+
}
|
|
263
289
|
|
|
264
290
|
if (process.stdin.isTTY) {
|
|
265
291
|
// In TTY, we can use a human step to get the answer immediately
|
|
@@ -302,7 +328,18 @@ export async function executeLlmStep(
|
|
|
302
328
|
continue;
|
|
303
329
|
}
|
|
304
330
|
|
|
305
|
-
|
|
331
|
+
let args: Record<string, unknown>;
|
|
332
|
+
try {
|
|
333
|
+
args = JSON.parse(toolCall.function.arguments);
|
|
334
|
+
} catch (e) {
|
|
335
|
+
messages.push({
|
|
336
|
+
role: 'tool',
|
|
337
|
+
tool_call_id: toolCall.id,
|
|
338
|
+
name: toolCall.function.name,
|
|
339
|
+
content: `Error: Invalid JSON in arguments: ${e instanceof Error ? e.message : String(e)}`,
|
|
340
|
+
});
|
|
341
|
+
continue;
|
|
342
|
+
}
|
|
306
343
|
|
|
307
344
|
if (toolInfo.source === 'mcp' && toolInfo.mcpClient) {
|
|
308
345
|
try {
|