keystone-cli 0.4.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -4
- package/package.json +4 -16
- package/src/cli.ts +64 -4
- package/src/db/workflow-db.ts +16 -7
- package/src/expression/evaluator.audit.test.ts +67 -0
- package/src/expression/evaluator.test.ts +15 -2
- package/src/expression/evaluator.ts +102 -29
- package/src/parser/agent-parser.test.ts +6 -2
- package/src/parser/schema.ts +2 -0
- package/src/parser/workflow-parser.test.ts +6 -2
- package/src/parser/workflow-parser.ts +22 -11
- package/src/runner/audit-verification.test.ts +12 -8
- package/src/runner/llm-adapter.ts +49 -12
- package/src/runner/llm-executor.test.ts +24 -6
- package/src/runner/llm-executor.ts +76 -44
- package/src/runner/mcp-client.audit.test.ts +79 -0
- package/src/runner/mcp-client.ts +102 -19
- package/src/runner/shell-executor.test.ts +33 -15
- package/src/runner/shell-executor.ts +110 -39
- package/src/runner/step-executor.test.ts +30 -2
- package/src/runner/timeout.ts +2 -2
- package/src/runner/tool-integration.test.ts +8 -2
- package/src/runner/workflow-runner.ts +95 -29
- package/src/templates/agents/keystone-architect.md +5 -3
- package/src/types/status.ts +25 -0
- package/src/ui/dashboard.tsx +3 -1
- package/src/utils/auth-manager.test.ts +3 -1
- package/src/utils/auth-manager.ts +12 -2
- package/src/utils/config-loader.test.ts +2 -17
- package/src/utils/mermaid.ts +0 -8
- package/src/utils/redactor.ts +115 -22
- package/src/utils/sandbox.test.ts +9 -13
- package/src/utils/sandbox.ts +40 -53
- package/src/utils/workflow-registry.test.ts +6 -2
|
@@ -18,7 +18,9 @@ describe('agent-parser', () => {
|
|
|
18
18
|
afterAll(() => {
|
|
19
19
|
try {
|
|
20
20
|
rmSync(tempDir, { recursive: true, force: true });
|
|
21
|
-
} catch (e) {
|
|
21
|
+
} catch (e) {
|
|
22
|
+
// Ignore cleanup error
|
|
23
|
+
}
|
|
22
24
|
});
|
|
23
25
|
|
|
24
26
|
describe('parseAgent', () => {
|
|
@@ -100,7 +102,9 @@ Prompt`;
|
|
|
100
102
|
const agentsDir = join(process.cwd(), '.keystone', 'workflows', 'agents');
|
|
101
103
|
try {
|
|
102
104
|
mkdirSync(agentsDir, { recursive: true });
|
|
103
|
-
} catch (e) {
|
|
105
|
+
} catch (e) {
|
|
106
|
+
// Ignore cleanup error
|
|
107
|
+
}
|
|
104
108
|
|
|
105
109
|
const filePath = join(agentsDir, 'my-agent.md');
|
|
106
110
|
writeFileSync(filePath, '---name: my-agent---');
|
package/src/parser/schema.ts
CHANGED
|
@@ -38,6 +38,7 @@ const ShellStepSchema = BaseStepSchema.extend({
|
|
|
38
38
|
run: z.string(),
|
|
39
39
|
dir: z.string().optional(),
|
|
40
40
|
env: z.record(z.string()).optional(),
|
|
41
|
+
allowInsecure: z.boolean().optional(),
|
|
41
42
|
});
|
|
42
43
|
|
|
43
44
|
// Forward declaration for AgentToolSchema which depends on StepSchema
|
|
@@ -71,6 +72,7 @@ const LlmStepSchema = BaseStepSchema.extend({
|
|
|
71
72
|
env: z.record(z.string()).optional(),
|
|
72
73
|
url: z.string().optional(),
|
|
73
74
|
headers: z.record(z.string()).optional(),
|
|
75
|
+
timeout: z.number().int().positive().optional(),
|
|
74
76
|
}),
|
|
75
77
|
])
|
|
76
78
|
)
|
|
@@ -8,12 +8,16 @@ describe('WorkflowParser', () => {
|
|
|
8
8
|
const tempDir = join(process.cwd(), 'temp-test-workflows');
|
|
9
9
|
try {
|
|
10
10
|
mkdirSync(tempDir, { recursive: true });
|
|
11
|
-
} catch (e) {
|
|
11
|
+
} catch (e) {
|
|
12
|
+
// Ignore existing dir error
|
|
13
|
+
}
|
|
12
14
|
|
|
13
15
|
afterAll(() => {
|
|
14
16
|
try {
|
|
15
17
|
rmSync(tempDir, { recursive: true, force: true });
|
|
16
|
-
} catch (e) {
|
|
18
|
+
} catch (e) {
|
|
19
|
+
// Ignore cleanup error
|
|
20
|
+
}
|
|
17
21
|
});
|
|
18
22
|
describe('topologicalSort', () => {
|
|
19
23
|
test('should sort simple dependencies', () => {
|
|
@@ -53,18 +53,22 @@ export class WorkflowParser {
|
|
|
53
53
|
const detected = new Set<string>();
|
|
54
54
|
|
|
55
55
|
// Helper to scan any value for dependencies
|
|
56
|
-
const scan = (value: unknown) => {
|
|
56
|
+
const scan = (value: unknown, depth = 0) => {
|
|
57
|
+
if (depth > 100) {
|
|
58
|
+
throw new Error('Maximum expression nesting depth exceeded (potential DOS attack)');
|
|
59
|
+
}
|
|
60
|
+
|
|
57
61
|
if (typeof value === 'string') {
|
|
58
62
|
for (const dep of ExpressionEvaluator.findStepDependencies(value)) {
|
|
59
63
|
detected.add(dep);
|
|
60
64
|
}
|
|
61
65
|
} else if (Array.isArray(value)) {
|
|
62
66
|
for (const item of value) {
|
|
63
|
-
scan(item);
|
|
67
|
+
scan(item, depth + 1);
|
|
64
68
|
}
|
|
65
69
|
} else if (value && typeof value === 'object') {
|
|
66
70
|
for (const val of Object.values(value)) {
|
|
67
|
-
scan(val);
|
|
71
|
+
scan(val, depth + 1);
|
|
68
72
|
}
|
|
69
73
|
}
|
|
70
74
|
};
|
|
@@ -187,6 +191,15 @@ export class WorkflowParser {
|
|
|
187
191
|
inDegree.set(step.id, step.needs.length);
|
|
188
192
|
}
|
|
189
193
|
|
|
194
|
+
// Build reverse dependency map for O(1) lookups instead of O(n)
|
|
195
|
+
const dependents = new Map<string, string[]>();
|
|
196
|
+
for (const step of workflow.steps) {
|
|
197
|
+
for (const dep of step.needs) {
|
|
198
|
+
if (!dependents.has(dep)) dependents.set(dep, []);
|
|
199
|
+
dependents.get(dep)?.push(step.id);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
190
203
|
// Kahn's algorithm
|
|
191
204
|
const queue: string[] = [];
|
|
192
205
|
const result: string[] = [];
|
|
@@ -203,14 +216,12 @@ export class WorkflowParser {
|
|
|
203
216
|
if (!stepId) continue;
|
|
204
217
|
result.push(stepId);
|
|
205
218
|
|
|
206
|
-
// Find all steps that depend on this step
|
|
207
|
-
for (const
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
queue.push(step.id);
|
|
213
|
-
}
|
|
219
|
+
// Find all steps that depend on this step (O(1) lookup)
|
|
220
|
+
for (const dependentId of dependents.get(stepId) || []) {
|
|
221
|
+
const newDegree = (inDegree.get(dependentId) || 0) - 1;
|
|
222
|
+
inDegree.set(dependentId, newDegree);
|
|
223
|
+
if (newDegree === 0) {
|
|
224
|
+
queue.push(dependentId);
|
|
214
225
|
}
|
|
215
226
|
}
|
|
216
227
|
}
|
|
@@ -40,17 +40,21 @@ describe('Audit Fixes Verification', () => {
|
|
|
40
40
|
});
|
|
41
41
|
|
|
42
42
|
describe('Sandbox Security', () => {
|
|
43
|
-
it('should
|
|
43
|
+
it('should execute code using node:vm sandbox on Bun', async () => {
|
|
44
|
+
// Since Bun uses JSC (not V8), isolated-vm cannot work.
|
|
45
|
+
// The sandbox now uses node:vm directly with security warnings.
|
|
46
|
+
SafeSandbox.resetWarning();
|
|
44
47
|
const code = '1 + 1';
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
);
|
|
48
|
+
const result = await SafeSandbox.execute(code, {});
|
|
49
|
+
expect(result).toBe(2);
|
|
48
50
|
});
|
|
49
51
|
|
|
50
|
-
it('should
|
|
51
|
-
|
|
52
|
-
const
|
|
53
|
-
|
|
52
|
+
it('should show security warning on first execution', async () => {
|
|
53
|
+
SafeSandbox.resetWarning();
|
|
54
|
+
const code = '2 + 2';
|
|
55
|
+
const result = await SafeSandbox.execute(code, {});
|
|
56
|
+
expect(result).toBe(4);
|
|
57
|
+
// Warning is shown to stderr, we just verify execution works
|
|
54
58
|
});
|
|
55
59
|
});
|
|
56
60
|
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import { AuthManager, COPILOT_HEADERS } from '../utils/auth-manager';
|
|
2
2
|
import { ConfigLoader } from '../utils/config-loader';
|
|
3
3
|
|
|
4
|
+
// Maximum response size to prevent memory exhaustion (1MB)
|
|
5
|
+
const MAX_RESPONSE_SIZE = 1024 * 1024;
|
|
6
|
+
|
|
4
7
|
export interface LLMMessage {
|
|
5
8
|
role: 'system' | 'user' | 'assistant' | 'tool';
|
|
6
9
|
content: string | null;
|
|
@@ -112,6 +115,9 @@ export class OpenAIAdapter implements LLMAdapter {
|
|
|
112
115
|
const delta = data.choices[0].delta;
|
|
113
116
|
|
|
114
117
|
if (delta.content) {
|
|
118
|
+
if (fullContent.length + delta.content.length > MAX_RESPONSE_SIZE) {
|
|
119
|
+
throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
|
|
120
|
+
}
|
|
115
121
|
fullContent += delta.content;
|
|
116
122
|
options.onStream?.(delta.content);
|
|
117
123
|
}
|
|
@@ -287,7 +293,8 @@ export class AnthropicAdapter implements LLMAdapter {
|
|
|
287
293
|
const reader = response.body.getReader();
|
|
288
294
|
const decoder = new TextDecoder();
|
|
289
295
|
let fullContent = '';
|
|
290
|
-
|
|
296
|
+
// Track tool calls by content block index for robust correlation
|
|
297
|
+
const toolCallsMap = new Map<number, { id: string; name: string; inputString: string }>();
|
|
291
298
|
|
|
292
299
|
while (true) {
|
|
293
300
|
const { done, value } = await reader.read();
|
|
@@ -302,21 +309,43 @@ export class AnthropicAdapter implements LLMAdapter {
|
|
|
302
309
|
try {
|
|
303
310
|
const data = JSON.parse(line.slice(6));
|
|
304
311
|
if (data.type === 'content_block_delta' && data.delta?.text) {
|
|
312
|
+
if (fullContent.length + data.delta.text.length > MAX_RESPONSE_SIZE) {
|
|
313
|
+
throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
|
|
314
|
+
}
|
|
305
315
|
fullContent += data.delta.text;
|
|
306
316
|
options.onStream?.(data.delta.text);
|
|
307
317
|
}
|
|
308
318
|
|
|
319
|
+
// Track tool calls by their index in the content blocks
|
|
309
320
|
if (data.type === 'content_block_start' && data.content_block?.type === 'tool_use') {
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
321
|
+
const index = data.index ?? toolCallsMap.size;
|
|
322
|
+
toolCallsMap.set(index, {
|
|
323
|
+
id: data.content_block.id || '',
|
|
324
|
+
name: data.content_block.name || '',
|
|
313
325
|
inputString: '',
|
|
314
326
|
});
|
|
315
327
|
}
|
|
316
328
|
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
329
|
+
// Handle tool input streaming - Anthropic uses content_block_delta with input_json_delta
|
|
330
|
+
if (
|
|
331
|
+
data.type === 'content_block_delta' &&
|
|
332
|
+
data.delta?.type === 'input_json_delta' &&
|
|
333
|
+
data.delta?.partial_json
|
|
334
|
+
) {
|
|
335
|
+
const index = data.index;
|
|
336
|
+
const toolCall = toolCallsMap.get(index);
|
|
337
|
+
if (toolCall) {
|
|
338
|
+
toolCall.inputString += data.delta.partial_json;
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// Update tool call ID if it arrives later (some edge cases)
|
|
343
|
+
if (data.type === 'content_block_delta' && data.content_block?.id) {
|
|
344
|
+
const index = data.index;
|
|
345
|
+
const toolCall = toolCallsMap.get(index);
|
|
346
|
+
if (toolCall && !toolCall.id) {
|
|
347
|
+
toolCall.id = data.content_block.id;
|
|
348
|
+
}
|
|
320
349
|
}
|
|
321
350
|
} catch (e) {
|
|
322
351
|
// Ignore parse errors
|
|
@@ -324,15 +353,20 @@ export class AnthropicAdapter implements LLMAdapter {
|
|
|
324
353
|
}
|
|
325
354
|
}
|
|
326
355
|
|
|
356
|
+
// Convert map to array and filter out incomplete tool calls
|
|
357
|
+
const toolCalls = Array.from(toolCallsMap.values())
|
|
358
|
+
.filter((tc) => tc.id && tc.name) // Only include complete tool calls
|
|
359
|
+
.map((tc) => ({
|
|
360
|
+
id: tc.id,
|
|
361
|
+
type: 'function' as const,
|
|
362
|
+
function: { name: tc.name, arguments: tc.inputString },
|
|
363
|
+
}));
|
|
364
|
+
|
|
327
365
|
return {
|
|
328
366
|
message: {
|
|
329
367
|
role: 'assistant',
|
|
330
368
|
content: fullContent || null,
|
|
331
|
-
tool_calls: toolCalls.
|
|
332
|
-
id: tc.id,
|
|
333
|
-
type: 'function',
|
|
334
|
-
function: { name: tc.name, arguments: tc.inputString },
|
|
335
|
-
})),
|
|
369
|
+
tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
|
|
336
370
|
},
|
|
337
371
|
};
|
|
338
372
|
}
|
|
@@ -443,6 +477,9 @@ export class CopilotAdapter implements LLMAdapter {
|
|
|
443
477
|
const delta = data.choices[0].delta;
|
|
444
478
|
|
|
445
479
|
if (delta.content) {
|
|
480
|
+
if (fullContent.length + delta.content.length > MAX_RESPONSE_SIZE) {
|
|
481
|
+
throw new Error(`LLM response exceeds maximum size of ${MAX_RESPONSE_SIZE} bytes`);
|
|
482
|
+
}
|
|
446
483
|
fullContent += delta.content;
|
|
447
484
|
options.onStream?.(delta.content);
|
|
448
485
|
}
|
|
@@ -146,7 +146,9 @@ describe('llm-executor', () => {
|
|
|
146
146
|
|
|
147
147
|
try {
|
|
148
148
|
mkdirSync(agentsDir, { recursive: true });
|
|
149
|
-
} catch (e) {
|
|
149
|
+
} catch (e) {
|
|
150
|
+
// Ignore error during cleanup
|
|
151
|
+
}
|
|
150
152
|
const agentContent = `---
|
|
151
153
|
name: test-agent
|
|
152
154
|
model: gpt-4
|
|
@@ -196,6 +198,7 @@ You are a test agent.`;
|
|
|
196
198
|
agent: 'test-agent',
|
|
197
199
|
prompt: 'hello',
|
|
198
200
|
needs: [],
|
|
201
|
+
maxIterations: 10,
|
|
199
202
|
};
|
|
200
203
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
201
204
|
const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
|
|
@@ -216,6 +219,7 @@ You are a test agent.`;
|
|
|
216
219
|
agent: 'test-agent',
|
|
217
220
|
prompt: 'trigger tool',
|
|
218
221
|
needs: [],
|
|
222
|
+
maxIterations: 10,
|
|
219
223
|
};
|
|
220
224
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
221
225
|
|
|
@@ -242,6 +246,7 @@ You are a test agent.`;
|
|
|
242
246
|
agent: 'test-agent',
|
|
243
247
|
prompt: 'give me json',
|
|
244
248
|
needs: [],
|
|
249
|
+
maxIterations: 10,
|
|
245
250
|
schema: {
|
|
246
251
|
type: 'object',
|
|
247
252
|
properties: {
|
|
@@ -268,6 +273,7 @@ You are a test agent.`;
|
|
|
268
273
|
agent: 'test-agent',
|
|
269
274
|
prompt: 'give me invalid json',
|
|
270
275
|
needs: [],
|
|
276
|
+
maxIterations: 10,
|
|
271
277
|
schema: { type: 'object' },
|
|
272
278
|
};
|
|
273
279
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
@@ -306,6 +312,7 @@ You are a test agent.`;
|
|
|
306
312
|
agent: 'test-agent',
|
|
307
313
|
prompt: 'trigger unknown tool',
|
|
308
314
|
needs: [],
|
|
315
|
+
maxIterations: 10,
|
|
309
316
|
};
|
|
310
317
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
311
318
|
const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
|
|
@@ -359,6 +366,7 @@ You are a test agent.`;
|
|
|
359
366
|
agent: 'test-agent',
|
|
360
367
|
prompt: 'hello',
|
|
361
368
|
needs: [],
|
|
369
|
+
maxIterations: 10,
|
|
362
370
|
mcpServers: [{ name: 'fail-mcp', command: 'node', args: [] }],
|
|
363
371
|
};
|
|
364
372
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
@@ -379,7 +387,7 @@ You are a test agent.`;
|
|
|
379
387
|
);
|
|
380
388
|
|
|
381
389
|
expect(consoleSpy).toHaveBeenCalledWith(
|
|
382
|
-
expect.stringContaining('Failed to
|
|
390
|
+
expect.stringContaining('Failed to list tools from MCP server fail-mcp')
|
|
383
391
|
);
|
|
384
392
|
createLocalSpy.mockRestore();
|
|
385
393
|
consoleSpy.mockRestore();
|
|
@@ -392,6 +400,7 @@ You are a test agent.`;
|
|
|
392
400
|
agent: 'test-agent',
|
|
393
401
|
prompt: 'trigger mcp tool',
|
|
394
402
|
needs: [],
|
|
403
|
+
maxIterations: 10,
|
|
395
404
|
mcpServers: [{ name: 'test-mcp', command: 'node', args: [] }],
|
|
396
405
|
};
|
|
397
406
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
@@ -446,13 +455,15 @@ You are a test agent.`;
|
|
|
446
455
|
it('should use global MCP servers when useGlobalMcp is true', async () => {
|
|
447
456
|
ConfigLoader.setConfig({
|
|
448
457
|
mcp_servers: {
|
|
449
|
-
'global-mcp': { command: 'node', args: ['server.js'] },
|
|
458
|
+
'global-mcp': { type: 'local', command: 'node', args: ['server.js'], timeout: 1000 },
|
|
450
459
|
},
|
|
451
460
|
providers: {
|
|
452
|
-
openai: {
|
|
461
|
+
openai: { type: 'openai', api_key_env: 'OPENAI_API_KEY' },
|
|
453
462
|
},
|
|
454
463
|
model_mappings: {},
|
|
455
464
|
default_provider: 'openai',
|
|
465
|
+
storage: { retention_days: 30 },
|
|
466
|
+
workflows_directory: 'workflows',
|
|
456
467
|
});
|
|
457
468
|
|
|
458
469
|
const manager = new MCPManager();
|
|
@@ -462,6 +473,7 @@ You are a test agent.`;
|
|
|
462
473
|
agent: 'test-agent',
|
|
463
474
|
prompt: 'hello',
|
|
464
475
|
needs: [],
|
|
476
|
+
maxIterations: 10,
|
|
465
477
|
useGlobalMcp: true,
|
|
466
478
|
};
|
|
467
479
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
@@ -510,6 +522,7 @@ You are a test agent.`;
|
|
|
510
522
|
agent: 'test-agent',
|
|
511
523
|
prompt: 'trigger adhoc tool',
|
|
512
524
|
needs: [],
|
|
525
|
+
maxIterations: 10,
|
|
513
526
|
tools: [
|
|
514
527
|
{
|
|
515
528
|
name: 'adhoc-tool',
|
|
@@ -547,6 +560,7 @@ You are a test agent.`;
|
|
|
547
560
|
agent: 'test-agent',
|
|
548
561
|
prompt: 'hello',
|
|
549
562
|
needs: [],
|
|
563
|
+
maxIterations: 10,
|
|
550
564
|
mcpServers: ['some-global-server'],
|
|
551
565
|
};
|
|
552
566
|
const context: ExpressionContext = { inputs: {}, steps: {} };
|
|
@@ -571,11 +585,13 @@ You are a test agent.`;
|
|
|
571
585
|
it('should not add global MCP server if already explicitly listed', async () => {
|
|
572
586
|
ConfigLoader.setConfig({
|
|
573
587
|
mcp_servers: {
|
|
574
|
-
'test-mcp': { command: 'node', args: ['server.js'] },
|
|
588
|
+
'test-mcp': { type: 'local', command: 'node', args: ['server.js'], timeout: 1000 },
|
|
575
589
|
},
|
|
576
|
-
providers: { openai: {
|
|
590
|
+
providers: { openai: { type: 'openai', api_key_env: 'OPENAI_API_KEY' } },
|
|
577
591
|
model_mappings: {},
|
|
578
592
|
default_provider: 'openai',
|
|
593
|
+
storage: { retention_days: 30 },
|
|
594
|
+
workflows_directory: 'workflows',
|
|
579
595
|
});
|
|
580
596
|
|
|
581
597
|
const manager = new MCPManager();
|
|
@@ -585,6 +601,7 @@ You are a test agent.`;
|
|
|
585
601
|
agent: 'test-agent',
|
|
586
602
|
prompt: 'hello',
|
|
587
603
|
needs: [],
|
|
604
|
+
maxIterations: 10,
|
|
588
605
|
useGlobalMcp: true,
|
|
589
606
|
mcpServers: [{ name: 'test-mcp', command: 'node', args: ['local.js'] }],
|
|
590
607
|
};
|
|
@@ -636,6 +653,7 @@ You are a test agent.`;
|
|
|
636
653
|
agent: 'test-agent',
|
|
637
654
|
prompt: '${{ steps.prev.output }}' as unknown as string,
|
|
638
655
|
needs: [],
|
|
656
|
+
maxIterations: 10,
|
|
639
657
|
};
|
|
640
658
|
const context: ExpressionContext = {
|
|
641
659
|
inputs: {},
|
|
@@ -3,7 +3,8 @@ import type { ExpressionContext } from '../expression/evaluator';
|
|
|
3
3
|
import { ExpressionEvaluator } from '../expression/evaluator';
|
|
4
4
|
import { parseAgent, resolveAgentPath } from '../parser/agent-parser';
|
|
5
5
|
import type { AgentTool, LlmStep, Step } from '../parser/schema';
|
|
6
|
-
import {
|
|
6
|
+
import { extractJson } from '../utils/json-parser';
|
|
7
|
+
import { RedactionBuffer, Redactor } from '../utils/redactor';
|
|
7
8
|
import { type LLMMessage, getAdapter } from './llm-adapter';
|
|
8
9
|
import { MCPClient } from './mcp-client';
|
|
9
10
|
import type { MCPManager, MCPServerConfig } from './mcp-manager';
|
|
@@ -121,50 +122,54 @@ export async function executeLlmStep(
|
|
|
121
122
|
}
|
|
122
123
|
|
|
123
124
|
if (mcpServersToConnect.length > 0) {
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
} else {
|
|
130
|
-
// Fallback if no manager (should not happen in normal workflow run)
|
|
131
|
-
if (typeof server === 'string') {
|
|
132
|
-
logger.error(` ✗ Cannot reference global MCP server '${server}' without MCPManager`);
|
|
133
|
-
continue;
|
|
134
|
-
}
|
|
135
|
-
logger.log(` 🔌 Connecting to MCP server: ${server.name}`);
|
|
125
|
+
await Promise.all(
|
|
126
|
+
mcpServersToConnect.map(async (server) => {
|
|
127
|
+
let client: MCPClient | undefined;
|
|
128
|
+
const serverName = typeof server === 'string' ? server : server.name;
|
|
129
|
+
|
|
136
130
|
try {
|
|
137
|
-
|
|
138
|
-
(server as
|
|
139
|
-
|
|
140
|
-
(
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
131
|
+
if (mcpManager) {
|
|
132
|
+
client = await mcpManager.getClient(server as string | MCPServerConfig, logger);
|
|
133
|
+
} else {
|
|
134
|
+
// Fallback if no manager (should not happen in normal workflow run)
|
|
135
|
+
if (typeof server === 'string') {
|
|
136
|
+
logger.error(
|
|
137
|
+
` ✗ Cannot reference global MCP server '${server}' without MCPManager`
|
|
138
|
+
);
|
|
139
|
+
return;
|
|
140
|
+
}
|
|
141
|
+
logger.log(` 🔌 Connecting to MCP server: ${server.name}`);
|
|
142
|
+
client = await MCPClient.createLocal(
|
|
143
|
+
(server as MCPServerConfig).command || 'node',
|
|
144
|
+
(server as MCPServerConfig).args || [],
|
|
145
|
+
(server as MCPServerConfig).env || {}
|
|
146
|
+
);
|
|
147
|
+
await client.initialize();
|
|
148
|
+
localMcpClients.push(client);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (client) {
|
|
152
|
+
const mcpTools = await client.listTools();
|
|
153
|
+
for (const tool of mcpTools) {
|
|
154
|
+
allTools.push({
|
|
155
|
+
name: tool.name,
|
|
156
|
+
description: tool.description,
|
|
157
|
+
parameters: tool.inputSchema,
|
|
158
|
+
source: 'mcp',
|
|
159
|
+
mcpClient: client,
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
}
|
|
144
163
|
} catch (error) {
|
|
145
164
|
logger.error(
|
|
146
|
-
` ✗ Failed to
|
|
165
|
+
` ✗ Failed to list tools from MCP server ${serverName}: ${error instanceof Error ? error.message : String(error)}`
|
|
147
166
|
);
|
|
148
|
-
if (client) {
|
|
167
|
+
if (!mcpManager && client) {
|
|
149
168
|
client.stop();
|
|
150
169
|
}
|
|
151
|
-
client = undefined;
|
|
152
170
|
}
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
if (client) {
|
|
156
|
-
const mcpTools = await client.listTools();
|
|
157
|
-
for (const tool of mcpTools) {
|
|
158
|
-
allTools.push({
|
|
159
|
-
name: tool.name,
|
|
160
|
-
description: tool.description,
|
|
161
|
-
parameters: tool.inputSchema,
|
|
162
|
-
source: 'mcp',
|
|
163
|
-
mcpClient: client,
|
|
164
|
-
});
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
}
|
|
171
|
+
})
|
|
172
|
+
);
|
|
168
173
|
}
|
|
169
174
|
|
|
170
175
|
const llmTools = allTools.map((t) => ({
|
|
@@ -206,21 +211,27 @@ export async function executeLlmStep(
|
|
|
206
211
|
total_tokens: 0,
|
|
207
212
|
};
|
|
208
213
|
|
|
214
|
+
// Create redactor once outside the loop for performance (regex compilation)
|
|
215
|
+
const redactor = new Redactor(context.secrets || {});
|
|
216
|
+
const redactionBuffer = new RedactionBuffer(redactor);
|
|
217
|
+
|
|
209
218
|
while (iterations < maxIterations) {
|
|
210
219
|
iterations++;
|
|
211
220
|
|
|
212
|
-
const redactor = new Redactor(context.secrets || {});
|
|
213
|
-
|
|
214
221
|
const response = await adapter.chat(messages, {
|
|
215
222
|
model: resolvedModel,
|
|
216
223
|
tools: llmTools.length > 0 ? llmTools : undefined,
|
|
217
224
|
onStream: (chunk) => {
|
|
218
225
|
if (!step.schema) {
|
|
219
|
-
process.stdout.write(
|
|
226
|
+
process.stdout.write(redactionBuffer.process(chunk));
|
|
220
227
|
}
|
|
221
228
|
},
|
|
222
229
|
});
|
|
223
230
|
|
|
231
|
+
if (!step.schema) {
|
|
232
|
+
process.stdout.write(redactionBuffer.flush());
|
|
233
|
+
}
|
|
234
|
+
|
|
224
235
|
if (response.usage) {
|
|
225
236
|
totalUsage.prompt_tokens += response.usage.prompt_tokens;
|
|
226
237
|
totalUsage.completion_tokens += response.usage.completion_tokens;
|
|
@@ -236,7 +247,6 @@ export async function executeLlmStep(
|
|
|
236
247
|
// If schema is defined, attempt to parse JSON
|
|
237
248
|
if (step.schema && typeof output === 'string') {
|
|
238
249
|
try {
|
|
239
|
-
const { extractJson } = await import('../utils/json-parser');
|
|
240
250
|
output = extractJson(output) as typeof output;
|
|
241
251
|
} catch (e) {
|
|
242
252
|
throw new Error(
|
|
@@ -259,7 +269,18 @@ export async function executeLlmStep(
|
|
|
259
269
|
|
|
260
270
|
if (!toolInfo) {
|
|
261
271
|
if (toolCall.function.name === 'ask' && step.allowClarification) {
|
|
262
|
-
|
|
272
|
+
let args: { question: string };
|
|
273
|
+
try {
|
|
274
|
+
args = JSON.parse(toolCall.function.arguments);
|
|
275
|
+
} catch (e) {
|
|
276
|
+
messages.push({
|
|
277
|
+
role: 'tool',
|
|
278
|
+
tool_call_id: toolCall.id,
|
|
279
|
+
name: 'ask',
|
|
280
|
+
content: `Error: Invalid JSON in arguments: ${e instanceof Error ? e.message : String(e)}`,
|
|
281
|
+
});
|
|
282
|
+
continue;
|
|
283
|
+
}
|
|
263
284
|
|
|
264
285
|
if (process.stdin.isTTY) {
|
|
265
286
|
// In TTY, we can use a human step to get the answer immediately
|
|
@@ -302,7 +323,18 @@ export async function executeLlmStep(
|
|
|
302
323
|
continue;
|
|
303
324
|
}
|
|
304
325
|
|
|
305
|
-
|
|
326
|
+
let args: Record<string, unknown>;
|
|
327
|
+
try {
|
|
328
|
+
args = JSON.parse(toolCall.function.arguments);
|
|
329
|
+
} catch (e) {
|
|
330
|
+
messages.push({
|
|
331
|
+
role: 'tool',
|
|
332
|
+
tool_call_id: toolCall.id,
|
|
333
|
+
name: toolCall.function.name,
|
|
334
|
+
content: `Error: Invalid JSON in arguments: ${e instanceof Error ? e.message : String(e)}`,
|
|
335
|
+
});
|
|
336
|
+
continue;
|
|
337
|
+
}
|
|
306
338
|
|
|
307
339
|
if (toolInfo.source === 'mcp' && toolInfo.mcpClient) {
|
|
308
340
|
try {
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, it, mock, spyOn } from 'bun:test';
|
|
2
|
+
import * as child_process from 'node:child_process';
|
|
3
|
+
import { MCPClient } from './mcp-client';
|
|
4
|
+
|
|
5
|
+
import { Readable, Writable } from 'node:stream';
|
|
6
|
+
|
|
7
|
+
describe('MCPClient Audit Fixes', () => {
|
|
8
|
+
let spawnSpy: ReturnType<typeof spyOn>;
|
|
9
|
+
|
|
10
|
+
beforeEach(() => {
|
|
11
|
+
spawnSpy = spyOn(child_process, 'spawn').mockReturnValue({
|
|
12
|
+
stdout: new Readable({ read() {} }),
|
|
13
|
+
stdin: new Writable({
|
|
14
|
+
write(c, e, cb) {
|
|
15
|
+
cb();
|
|
16
|
+
},
|
|
17
|
+
}),
|
|
18
|
+
kill: () => {},
|
|
19
|
+
on: () => {},
|
|
20
|
+
// biome-ignore lint/suspicious/noExplicitAny: Mocking complex object
|
|
21
|
+
} as any);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
afterEach(() => {
|
|
25
|
+
spawnSpy.mockRestore();
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it('should filter sensitive environment variables', async () => {
|
|
29
|
+
// Set temp environment variables
|
|
30
|
+
process.env.TEST_API_KEY_LEAK = 'secret_value';
|
|
31
|
+
process.env.TEST_SAFE_VAR = 'safe_value';
|
|
32
|
+
process.env.TEST_TOKEN_XYZ = 'secret_token';
|
|
33
|
+
|
|
34
|
+
try {
|
|
35
|
+
await MCPClient.createLocal('node', [], { EXPLICIT_SECRET: 'allowed' });
|
|
36
|
+
|
|
37
|
+
// Assert spawn arguments
|
|
38
|
+
// args: [0]=command, [1]=args, [2]=options
|
|
39
|
+
const call = spawnSpy.mock.lastCall;
|
|
40
|
+
if (!call) throw new Error('spawn not called');
|
|
41
|
+
|
|
42
|
+
const envArg = call[2].env;
|
|
43
|
+
|
|
44
|
+
// Safe vars should remain
|
|
45
|
+
expect(envArg.TEST_SAFE_VAR).toBe('safe_value');
|
|
46
|
+
|
|
47
|
+
// Explicitly passed vars should remain
|
|
48
|
+
expect(envArg.EXPLICIT_SECRET).toBe('allowed');
|
|
49
|
+
|
|
50
|
+
// Sensitive vars should be filtered
|
|
51
|
+
expect(envArg.TEST_API_KEY_LEAK).toBeUndefined();
|
|
52
|
+
expect(envArg.TEST_TOKEN_XYZ).toBeUndefined();
|
|
53
|
+
} finally {
|
|
54
|
+
// Cleanup
|
|
55
|
+
process.env.TEST_API_KEY_LEAK = undefined;
|
|
56
|
+
process.env.TEST_SAFE_VAR = undefined;
|
|
57
|
+
process.env.TEST_TOKEN_XYZ = undefined;
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it('should allow whitelisted sensitive vars if explicitly provided', async () => {
|
|
62
|
+
process.env.TEST_API_KEY_LEAK = 'secret_value';
|
|
63
|
+
|
|
64
|
+
try {
|
|
65
|
+
// User explicitly asks to pass this env var
|
|
66
|
+
await MCPClient.createLocal('node', [], {
|
|
67
|
+
TEST_API_KEY_LEAK: process.env.TEST_API_KEY_LEAK as string,
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
const call = spawnSpy.mock.lastCall;
|
|
71
|
+
if (!call) throw new Error('spawn not called');
|
|
72
|
+
const envArg = call[2].env;
|
|
73
|
+
|
|
74
|
+
expect(envArg.TEST_API_KEY_LEAK).toBe('secret_value');
|
|
75
|
+
} finally {
|
|
76
|
+
process.env.TEST_API_KEY_LEAK = undefined;
|
|
77
|
+
}
|
|
78
|
+
});
|
|
79
|
+
});
|