keystone-cli 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +69 -16
  2. package/package.json +14 -3
  3. package/src/cli.ts +183 -84
  4. package/src/db/workflow-db.ts +0 -7
  5. package/src/expression/evaluator.test.ts +46 -0
  6. package/src/expression/evaluator.ts +36 -0
  7. package/src/parser/agent-parser.test.ts +10 -0
  8. package/src/parser/agent-parser.ts +13 -5
  9. package/src/parser/config-schema.ts +24 -5
  10. package/src/parser/schema.ts +1 -1
  11. package/src/parser/workflow-parser.ts +5 -9
  12. package/src/runner/llm-adapter.test.ts +0 -8
  13. package/src/runner/llm-adapter.ts +33 -10
  14. package/src/runner/llm-executor.test.ts +230 -96
  15. package/src/runner/llm-executor.ts +9 -4
  16. package/src/runner/mcp-client.test.ts +204 -88
  17. package/src/runner/mcp-client.ts +349 -22
  18. package/src/runner/mcp-manager.test.ts +73 -15
  19. package/src/runner/mcp-manager.ts +84 -18
  20. package/src/runner/mcp-server.test.ts +4 -1
  21. package/src/runner/mcp-server.ts +25 -11
  22. package/src/runner/shell-executor.ts +3 -3
  23. package/src/runner/step-executor.test.ts +2 -2
  24. package/src/runner/step-executor.ts +31 -16
  25. package/src/runner/tool-integration.test.ts +21 -14
  26. package/src/runner/workflow-runner.ts +34 -7
  27. package/src/templates/agents/explore.md +54 -0
  28. package/src/templates/agents/general.md +8 -0
  29. package/src/templates/agents/keystone-architect.md +54 -0
  30. package/src/templates/agents/my-agent.md +3 -0
  31. package/src/templates/agents/summarizer.md +28 -0
  32. package/src/templates/agents/test-agent.md +10 -0
  33. package/src/templates/approval-process.yaml +36 -0
  34. package/src/templates/basic-inputs.yaml +19 -0
  35. package/src/templates/basic-shell.yaml +20 -0
  36. package/src/templates/batch-processor.yaml +43 -0
  37. package/src/templates/cleanup-finally.yaml +22 -0
  38. package/src/templates/composition-child.yaml +13 -0
  39. package/src/templates/composition-parent.yaml +14 -0
  40. package/src/templates/data-pipeline.yaml +38 -0
  41. package/src/templates/full-feature-demo.yaml +64 -0
  42. package/src/templates/human-interaction.yaml +12 -0
  43. package/src/templates/invalid.yaml +5 -0
  44. package/src/templates/llm-agent.yaml +8 -0
  45. package/src/templates/loop-parallel.yaml +37 -0
  46. package/src/templates/retry-policy.yaml +36 -0
  47. package/src/templates/scaffold-feature.yaml +48 -0
  48. package/src/templates/state.db +0 -0
  49. package/src/templates/state.db-shm +0 -0
  50. package/src/templates/state.db-wal +0 -0
  51. package/src/templates/stop-watch.yaml +17 -0
  52. package/src/templates/workflow.db +0 -0
  53. package/src/utils/auth-manager.test.ts +86 -0
  54. package/src/utils/auth-manager.ts +89 -0
  55. package/src/utils/config-loader.test.ts +32 -2
  56. package/src/utils/config-loader.ts +11 -1
  57. package/src/utils/mermaid.test.ts +27 -3
@@ -1,13 +1,33 @@
1
- import { afterAll, beforeAll, describe, expect, it, mock, spyOn } from 'bun:test';
1
+ import {
2
+ afterAll,
3
+ afterEach,
4
+ beforeAll,
5
+ beforeEach,
6
+ describe,
7
+ expect,
8
+ it,
9
+ mock,
10
+ spyOn,
11
+ } from 'bun:test';
12
+ import * as child_process from 'node:child_process';
13
+ import { EventEmitter } from 'node:events';
2
14
  import { mkdirSync, writeFileSync } from 'node:fs';
3
15
  import { join } from 'node:path';
16
+ import { Readable, Writable } from 'node:stream';
4
17
  import type { ExpressionContext } from '../expression/evaluator';
5
18
  import type { LlmStep, Step } from '../parser/schema';
6
- import { AnthropicAdapter, CopilotAdapter, OpenAIAdapter } from './llm-adapter';
7
- import { MCPClient } from './mcp-client';
19
+ import { ConfigLoader } from '../utils/config-loader';
20
+ import {
21
+ AnthropicAdapter,
22
+ CopilotAdapter,
23
+ type LLMMessage,
24
+ type LLMResponse,
25
+ type LLMTool,
26
+ OpenAIAdapter,
27
+ } from './llm-adapter';
8
28
  import { executeLlmStep } from './llm-executor';
29
+ import { MCPClient, type MCPResponse } from './mcp-client';
9
30
  import { MCPManager } from './mcp-manager';
10
- import { ConfigLoader } from '../utils/config-loader';
11
31
  import type { StepResult } from './step-executor';
12
32
 
13
33
  // Mock adapters
@@ -17,8 +37,113 @@ const originalAnthropicChat = AnthropicAdapter.prototype.chat;
17
37
 
18
38
  describe('llm-executor', () => {
19
39
  const agentsDir = join(process.cwd(), '.keystone', 'workflows', 'agents');
40
+ let spawnSpy: ReturnType<typeof spyOn>;
41
+ let initSpy: ReturnType<typeof spyOn>;
42
+ let listToolsSpy: ReturnType<typeof spyOn>;
43
+ let stopSpy: ReturnType<typeof spyOn>;
44
+
45
+ const mockChat = async (messages: unknown[], _options?: unknown) => {
46
+ const msgs = messages as LLMMessage[];
47
+ const lastMessage = msgs[msgs.length - 1];
48
+ const systemMessage = msgs.find((m) => m.role === 'system');
49
+
50
+ // If there's any tool message, just respond with final message
51
+ if (msgs.some((m) => m.role === 'tool')) {
52
+ return {
53
+ message: { role: 'assistant', content: 'LLM Response' },
54
+ };
55
+ }
56
+
57
+ if (systemMessage?.content?.includes('IMPORTANT: You must output valid JSON')) {
58
+ return {
59
+ message: { role: 'assistant', content: '```json\n{"foo": "bar"}\n```' },
60
+ };
61
+ }
62
+
63
+ if (lastMessage.role === 'user' && lastMessage.content?.includes('trigger tool')) {
64
+ return {
65
+ message: {
66
+ role: 'assistant',
67
+ content: null,
68
+ tool_calls: [
69
+ {
70
+ id: 'call-1',
71
+ type: 'function',
72
+ function: { name: 'test-tool', arguments: '{"val": 123}' },
73
+ },
74
+ ],
75
+ },
76
+ };
77
+ }
78
+
79
+ if (lastMessage.role === 'user' && lastMessage.content?.includes('trigger adhoc tool')) {
80
+ return {
81
+ message: {
82
+ role: 'assistant',
83
+ content: null,
84
+ tool_calls: [
85
+ {
86
+ id: 'call-adhoc',
87
+ type: 'function',
88
+ function: { name: 'adhoc-tool', arguments: '{}' },
89
+ },
90
+ ],
91
+ },
92
+ };
93
+ }
94
+
95
+ if (lastMessage.role === 'user' && lastMessage.content?.includes('trigger unknown tool')) {
96
+ return {
97
+ message: {
98
+ role: 'assistant',
99
+ content: null,
100
+ tool_calls: [
101
+ {
102
+ id: 'call-unknown',
103
+ type: 'function',
104
+ function: { name: 'unknown-tool', arguments: '{}' },
105
+ },
106
+ ],
107
+ },
108
+ };
109
+ }
110
+
111
+ if (lastMessage.role === 'user' && lastMessage.content?.includes('trigger mcp tool')) {
112
+ return {
113
+ message: {
114
+ role: 'assistant',
115
+ content: null,
116
+ tool_calls: [
117
+ {
118
+ id: 'call-mcp',
119
+ type: 'function',
120
+ function: { name: 'mcp-tool', arguments: '{}' },
121
+ },
122
+ ],
123
+ },
124
+ };
125
+ }
126
+
127
+ return {
128
+ message: { role: 'assistant', content: 'LLM Response' },
129
+ };
130
+ };
20
131
 
21
132
  beforeAll(() => {
133
+ // Mock spawn to avoid actual process creation
134
+ const mockProcess = Object.assign(new EventEmitter(), {
135
+ stdout: new Readable({ read() {} }),
136
+ stdin: new Writable({
137
+ write(_chunk, _encoding, cb: (error?: Error | null) => void) {
138
+ cb();
139
+ },
140
+ }),
141
+ kill: mock(() => {}),
142
+ });
143
+ spawnSpy = spyOn(child_process, 'spawn').mockReturnValue(
144
+ mockProcess as unknown as child_process.ChildProcess
145
+ );
146
+
22
147
  try {
23
148
  mkdirSync(agentsDir, { recursive: true });
24
149
  } catch (e) {}
@@ -33,68 +158,35 @@ tools:
33
158
  ---
34
159
  You are a test agent.`;
35
160
  writeFileSync(join(agentsDir, 'test-agent.md'), agentContent);
161
+ });
36
162
 
37
- const mockChat = async (messages: unknown[], _options?: unknown) => {
38
- const lastMessage = messages[messages.length - 1] as { content?: string };
39
- const systemMessage = messages.find(
40
- (m) =>
41
- typeof m === 'object' &&
42
- m !== null &&
43
- 'role' in m &&
44
- (m as { role: string }).role === 'system'
45
- ) as { content?: string } | undefined;
46
-
47
- if (systemMessage?.content?.includes('IMPORTANT: You must output valid JSON')) {
48
- return {
49
- message: { role: 'assistant', content: '```json\n{"foo": "bar"}\n```' },
50
- };
51
- }
52
-
53
- if (lastMessage?.content?.includes('trigger tool')) {
54
- return {
55
- message: {
56
- role: 'assistant',
57
- content: null,
58
- tool_calls: [
59
- {
60
- id: 'call-1',
61
- type: 'function',
62
- function: { name: 'test-tool', arguments: '{"val": 123}' },
63
- },
64
- ],
65
- },
66
- };
67
- }
68
-
69
- if (lastMessage?.content?.includes('trigger adhoc tool')) {
70
- return {
71
- message: {
72
- role: 'assistant',
73
- content: null,
74
- tool_calls: [
75
- {
76
- id: 'call-adhoc',
77
- type: 'function',
78
- function: { name: 'adhoc-tool', arguments: '{}' },
79
- },
80
- ],
81
- },
82
- };
83
- }
84
- return {
85
- message: { role: 'assistant', content: 'LLM Response' },
86
- };
87
- };
88
-
163
+ beforeEach(() => {
164
+ // Global MCP mocks to avoid hangs
165
+ initSpy = spyOn(MCPClient.prototype, 'initialize').mockResolvedValue({
166
+ jsonrpc: '2.0',
167
+ id: 0,
168
+ result: { protocolVersion: '2024-11-05' },
169
+ } as MCPResponse);
170
+ listToolsSpy = spyOn(MCPClient.prototype, 'listTools').mockResolvedValue([]);
171
+ stopSpy = spyOn(MCPClient.prototype, 'stop').mockReturnValue(undefined);
172
+
173
+ // Set adapters to global mock
89
174
  OpenAIAdapter.prototype.chat = mock(mockChat) as unknown as typeof originalOpenAIChat;
90
175
  CopilotAdapter.prototype.chat = mock(mockChat) as unknown as typeof originalCopilotChat;
91
176
  AnthropicAdapter.prototype.chat = mock(mockChat) as unknown as typeof originalAnthropicChat;
92
177
  });
93
178
 
179
+ afterEach(() => {
180
+ initSpy.mockRestore();
181
+ listToolsSpy.mockRestore();
182
+ stopSpy.mockRestore();
183
+ });
184
+
94
185
  afterAll(() => {
95
186
  OpenAIAdapter.prototype.chat = originalOpenAIChat;
96
187
  CopilotAdapter.prototype.chat = originalCopilotChat;
97
188
  AnthropicAdapter.prototype.chat = originalAnthropicChat;
189
+ spawnSpy.mockRestore();
98
190
  });
99
191
 
100
192
  it('should execute a simple LLM step', async () => {
@@ -272,9 +364,12 @@ You are a test agent.`;
272
364
  const context: ExpressionContext = { inputs: {}, steps: {} };
273
365
  const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
274
366
 
275
- const spy = spyOn(MCPClient.prototype, 'initialize').mockRejectedValue(
276
- new Error('Connect failed')
277
- );
367
+ const createLocalSpy = spyOn(MCPClient, 'createLocal').mockImplementation(async () => {
368
+ const client = Object.create(MCPClient.prototype);
369
+ spyOn(client, 'initialize').mockRejectedValue(new Error('Connect failed'));
370
+ spyOn(client, 'stop').mockReturnValue(undefined);
371
+ return client;
372
+ });
278
373
  const consoleSpy = spyOn(console, 'error').mockImplementation(() => {});
279
374
 
280
375
  await executeLlmStep(
@@ -286,7 +381,7 @@ You are a test agent.`;
286
381
  expect(consoleSpy).toHaveBeenCalledWith(
287
382
  expect.stringContaining('Failed to connect to MCP server fail-mcp')
288
383
  );
289
- spy.mockRestore();
384
+ createLocalSpy.mockRestore();
290
385
  consoleSpy.mockRestore();
291
386
  });
292
387
 
@@ -302,22 +397,21 @@ You are a test agent.`;
302
397
  const context: ExpressionContext = { inputs: {}, steps: {} };
303
398
  const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
304
399
 
305
- const initSpy = spyOn(MCPClient.prototype, 'initialize').mockResolvedValue(
306
- {} as unknown as any
307
- );
308
- const listSpy = spyOn(MCPClient.prototype, 'listTools').mockResolvedValue([
309
- { name: 'mcp-tool', inputSchema: {} },
310
- ]);
311
- const callSpy = spyOn(MCPClient.prototype, 'callTool').mockRejectedValue(
312
- new Error('Tool failed')
313
- );
400
+ const createLocalSpy = spyOn(MCPClient, 'createLocal').mockImplementation(async () => {
401
+ const client = Object.create(MCPClient.prototype);
402
+ spyOn(client, 'initialize').mockResolvedValue({} as MCPResponse);
403
+ spyOn(client, 'listTools').mockResolvedValue([{ name: 'mcp-tool', inputSchema: {} }]);
404
+ spyOn(client, 'callTool').mockRejectedValue(new Error('Tool failed'));
405
+ spyOn(client, 'stop').mockReturnValue(undefined);
406
+ return client;
407
+ });
314
408
 
315
409
  const originalOpenAIChatInner = OpenAIAdapter.prototype.chat;
316
410
  const originalCopilotChatInner = CopilotAdapter.prototype.chat;
317
411
  const originalAnthropicChatInner = AnthropicAdapter.prototype.chat;
318
412
  let toolErrorCaptured = false;
319
413
 
320
- const mockChat = mock(async (messages: any[]) => {
414
+ const mockChat = mock(async (messages: LLMMessage[]) => {
321
415
  const toolResultMessage = messages.find((m) => m.role === 'tool');
322
416
  if (toolResultMessage?.content?.includes('Error: Tool failed')) {
323
417
  toolErrorCaptured = true;
@@ -331,7 +425,7 @@ You are a test agent.`;
331
425
  ],
332
426
  },
333
427
  };
334
- }) as any;
428
+ }) as unknown as typeof originalOpenAIChat;
335
429
 
336
430
  OpenAIAdapter.prototype.chat = mockChat;
337
431
  CopilotAdapter.prototype.chat = mockChat;
@@ -346,11 +440,7 @@ You are a test agent.`;
346
440
  expect(toolErrorCaptured).toBe(true);
347
441
 
348
442
  OpenAIAdapter.prototype.chat = originalOpenAIChatInner;
349
- CopilotAdapter.prototype.chat = originalCopilotChatInner;
350
- AnthropicAdapter.prototype.chat = originalAnthropicChatInner;
351
- initSpy.mockRestore();
352
- listSpy.mockRestore();
353
- callSpy.mockRestore();
443
+ createLocalSpy.mockRestore();
354
444
  });
355
445
 
356
446
  it('should use global MCP servers when useGlobalMcp is true', async () => {
@@ -377,21 +467,24 @@ You are a test agent.`;
377
467
  const context: ExpressionContext = { inputs: {}, steps: {} };
378
468
  const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
379
469
 
380
- const initSpy = spyOn(MCPClient.prototype, 'initialize').mockResolvedValue(
381
- {} as unknown as any
382
- );
383
- const listSpy = spyOn(MCPClient.prototype, 'listTools').mockResolvedValue([
384
- { name: 'global-tool', description: 'A global tool', inputSchema: {} },
385
- ]);
470
+ const createLocalSpy = spyOn(MCPClient, 'createLocal').mockImplementation(async () => {
471
+ const client = Object.create(MCPClient.prototype);
472
+ spyOn(client, 'initialize').mockResolvedValue({} as MCPResponse);
473
+ spyOn(client, 'listTools').mockResolvedValue([
474
+ { name: 'global-tool', description: 'A global tool', inputSchema: {} },
475
+ ]);
476
+ spyOn(client, 'stop').mockReturnValue(undefined);
477
+ return client;
478
+ });
386
479
 
387
480
  let toolFound = false;
388
481
  const originalOpenAIChatInner = OpenAIAdapter.prototype.chat;
389
- const mockChat = mock(async (_messages: any[], options: any) => {
390
- if (options.tools?.some((t: any) => t.function.name === 'global-tool')) {
482
+ const mockChat = mock(async (_messages: LLMMessage[], options: { tools?: LLMTool[] }) => {
483
+ if (options.tools?.some((t: LLMTool) => t.function.name === 'global-tool')) {
391
484
  toolFound = true;
392
485
  }
393
486
  return { message: { role: 'assistant', content: 'hello' } };
394
- }) as any;
487
+ }) as unknown as typeof originalOpenAIChat;
395
488
 
396
489
  OpenAIAdapter.prototype.chat = mockChat;
397
490
 
@@ -406,8 +499,7 @@ You are a test agent.`;
406
499
  expect(toolFound).toBe(true);
407
500
 
408
501
  OpenAIAdapter.prototype.chat = originalOpenAIChatInner;
409
- initSpy.mockRestore();
410
- listSpy.mockRestore();
502
+ createLocalSpy.mockRestore();
411
503
  ConfigLoader.clear();
412
504
  });
413
505
 
@@ -499,15 +591,18 @@ You are a test agent.`;
499
591
  const context: ExpressionContext = { inputs: {}, steps: {} };
500
592
  const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
501
593
 
502
- const initSpy = spyOn(MCPClient.prototype, 'initialize').mockResolvedValue(
503
- {} as unknown as any
504
- );
505
- const listSpy = spyOn(MCPClient.prototype, 'listTools').mockResolvedValue([]);
594
+ const createLocalSpy = spyOn(MCPClient, 'createLocal').mockImplementation(async () => {
595
+ const client = Object.create(MCPClient.prototype);
596
+ spyOn(client, 'initialize').mockResolvedValue({} as MCPResponse);
597
+ spyOn(client, 'listTools').mockResolvedValue([]);
598
+ spyOn(client, 'stop').mockReturnValue(undefined);
599
+ return client;
600
+ });
506
601
 
507
602
  const originalOpenAIChatInner = OpenAIAdapter.prototype.chat;
508
603
  const mockChat = mock(async () => ({
509
604
  message: { role: 'assistant', content: 'hello' },
510
- })) as any;
605
+ })) as unknown as typeof originalOpenAIChat;
511
606
  OpenAIAdapter.prototype.chat = mockChat;
512
607
 
513
608
  const managerSpy = spyOn(manager, 'getGlobalServers');
@@ -525,13 +620,52 @@ You are a test agent.`;
525
620
  // We can check this by seeing how many times initialize was called if they were different,
526
621
  // but here we just want to ensure it didn't push the global one again.
527
622
 
528
- // Actually, initialize will be called for 'test-mcp' (explicitly listed)
529
- expect(initSpy).toHaveBeenCalledTimes(1);
623
+ // Actually, createLocal will be called for 'test-mcp' (explicitly listed)
624
+ expect(createLocalSpy).toHaveBeenCalledTimes(1);
530
625
 
531
626
  OpenAIAdapter.prototype.chat = originalOpenAIChatInner;
532
- initSpy.mockRestore();
533
- listSpy.mockRestore();
627
+ createLocalSpy.mockRestore();
534
628
  managerSpy.mockRestore();
535
629
  ConfigLoader.clear();
536
630
  });
631
+
632
+ it('should handle object prompts by stringifying them', async () => {
633
+ const step: LlmStep = {
634
+ id: 'l1',
635
+ type: 'llm',
636
+ agent: 'test-agent',
637
+ prompt: '${{ steps.prev.output }}' as unknown as string,
638
+ needs: [],
639
+ };
640
+ const context: ExpressionContext = {
641
+ inputs: {},
642
+ steps: {
643
+ prev: { output: { key: 'value' }, status: 'success' },
644
+ },
645
+ };
646
+
647
+ let capturedPrompt = '';
648
+ const originalOpenAIChatInner = OpenAIAdapter.prototype.chat;
649
+ const mockChat = mock(async (messages: LLMMessage[]) => {
650
+ // console.log('MESSAGES:', JSON.stringify(messages, null, 2));
651
+ capturedPrompt = messages.find((m) => m.role === 'user')?.content || '';
652
+ return { message: { role: 'assistant', content: 'Response' } };
653
+ }) as unknown as typeof originalOpenAIChat;
654
+ OpenAIAdapter.prototype.chat = mockChat;
655
+ CopilotAdapter.prototype.chat = mockChat;
656
+ AnthropicAdapter.prototype.chat = mockChat;
657
+
658
+ const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
659
+
660
+ await executeLlmStep(
661
+ step,
662
+ context,
663
+ executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>
664
+ );
665
+
666
+ expect(capturedPrompt).toContain('"key": "value"');
667
+ expect(capturedPrompt).not.toContain('[object Object]');
668
+
669
+ OpenAIAdapter.prototype.chat = originalOpenAIChatInner;
670
+ });
537
671
  });
@@ -23,14 +23,15 @@ export async function executeLlmStep(
23
23
  context: ExpressionContext,
24
24
  executeStepFn: (step: Step, context: ExpressionContext) => Promise<StepResult>,
25
25
  logger: Logger = console,
26
- mcpManager?: MCPManager
26
+ mcpManager?: MCPManager,
27
+ workflowDir?: string
27
28
  ): Promise<StepResult> {
28
- const agentPath = resolveAgentPath(step.agent);
29
+ const agentPath = resolveAgentPath(step.agent, workflowDir);
29
30
  const agent = parseAgent(agentPath);
30
31
 
31
32
  const provider = step.provider || agent.provider;
32
33
  const model = step.model || agent.model || 'gpt-4o';
33
- const prompt = ExpressionEvaluator.evaluate(step.prompt, context) as string;
34
+ const prompt = ExpressionEvaluator.evaluateString(step.prompt, context);
34
35
 
35
36
  const fullModelString = provider ? `${provider}:${model}` : model;
36
37
  const { adapter, resolvedModel } = getAdapter(fullModelString);
@@ -110,8 +111,12 @@ export async function executeLlmStep(
110
111
  continue;
111
112
  }
112
113
  logger.log(` 🔌 Connecting to MCP server: ${server.name}`);
113
- client = new MCPClient(server.command, server.args, server.env);
114
114
  try {
115
+ client = await MCPClient.createLocal(
116
+ server.command,
117
+ server.args || [],
118
+ server.env || {}
119
+ );
115
120
  await client.initialize();
116
121
  localMcpClients.push(client);
117
122
  } catch (error) {