keystone-cli 0.8.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/README.md +486 -54
  2. package/package.json +8 -2
  3. package/src/__fixtures__/index.ts +100 -0
  4. package/src/cli.ts +809 -90
  5. package/src/db/memory-db.ts +35 -1
  6. package/src/db/workflow-db.test.ts +24 -0
  7. package/src/db/workflow-db.ts +469 -14
  8. package/src/expression/evaluator.ts +68 -4
  9. package/src/parser/agent-parser.ts +6 -3
  10. package/src/parser/config-schema.ts +38 -2
  11. package/src/parser/schema.ts +192 -7
  12. package/src/parser/test-schema.ts +29 -0
  13. package/src/parser/workflow-parser.test.ts +54 -0
  14. package/src/parser/workflow-parser.ts +153 -7
  15. package/src/runner/aggregate-error.test.ts +57 -0
  16. package/src/runner/aggregate-error.ts +46 -0
  17. package/src/runner/audit-verification.test.ts +2 -2
  18. package/src/runner/auto-heal.test.ts +1 -1
  19. package/src/runner/blueprint-executor.test.ts +63 -0
  20. package/src/runner/blueprint-executor.ts +157 -0
  21. package/src/runner/concurrency-limit.test.ts +82 -0
  22. package/src/runner/debug-repl.ts +18 -3
  23. package/src/runner/durable-timers.test.ts +200 -0
  24. package/src/runner/engine-executor.test.ts +464 -0
  25. package/src/runner/engine-executor.ts +489 -0
  26. package/src/runner/foreach-executor.ts +30 -12
  27. package/src/runner/llm-adapter.test.ts +282 -5
  28. package/src/runner/llm-adapter.ts +581 -8
  29. package/src/runner/llm-clarification.test.ts +79 -21
  30. package/src/runner/llm-errors.ts +83 -0
  31. package/src/runner/llm-executor.test.ts +258 -219
  32. package/src/runner/llm-executor.ts +226 -29
  33. package/src/runner/mcp-client.ts +70 -3
  34. package/src/runner/mcp-manager.test.ts +52 -52
  35. package/src/runner/mcp-manager.ts +12 -5
  36. package/src/runner/mcp-server.test.ts +117 -78
  37. package/src/runner/mcp-server.ts +13 -4
  38. package/src/runner/optimization-runner.ts +48 -31
  39. package/src/runner/reflexion.test.ts +1 -1
  40. package/src/runner/resource-pool.test.ts +113 -0
  41. package/src/runner/resource-pool.ts +164 -0
  42. package/src/runner/shell-executor.ts +130 -32
  43. package/src/runner/standard-tools-integration.test.ts +36 -36
  44. package/src/runner/standard-tools.test.ts +18 -0
  45. package/src/runner/standard-tools.ts +110 -37
  46. package/src/runner/step-executor.test.ts +176 -16
  47. package/src/runner/step-executor.ts +530 -86
  48. package/src/runner/stream-utils.test.ts +14 -0
  49. package/src/runner/subflow-outputs.test.ts +103 -0
  50. package/src/runner/test-harness.ts +161 -0
  51. package/src/runner/tool-integration.test.ts +73 -79
  52. package/src/runner/workflow-runner.test.ts +492 -15
  53. package/src/runner/workflow-runner.ts +1438 -79
  54. package/src/runner/workflow-subflows.test.ts +255 -0
  55. package/src/templates/agents/keystone-architect.md +19 -14
  56. package/src/templates/agents/tester.md +21 -0
  57. package/src/templates/batch-processor.yaml +1 -1
  58. package/src/templates/child-rollback.yaml +11 -0
  59. package/src/templates/decompose-implement.yaml +53 -0
  60. package/src/templates/decompose-problem.yaml +159 -0
  61. package/src/templates/decompose-research.yaml +52 -0
  62. package/src/templates/decompose-review.yaml +51 -0
  63. package/src/templates/dev.yaml +134 -0
  64. package/src/templates/engine-example.yaml +33 -0
  65. package/src/templates/fan-out-fan-in.yaml +61 -0
  66. package/src/templates/loop-parallel.yaml +1 -1
  67. package/src/templates/memory-service.yaml +1 -1
  68. package/src/templates/parent-rollback.yaml +16 -0
  69. package/src/templates/robust-automation.yaml +1 -1
  70. package/src/templates/scaffold-feature.yaml +29 -27
  71. package/src/templates/scaffold-generate.yaml +41 -0
  72. package/src/templates/scaffold-plan.yaml +53 -0
  73. package/src/types/status.ts +3 -0
  74. package/src/ui/dashboard.tsx +4 -3
  75. package/src/utils/assets.macro.ts +36 -0
  76. package/src/utils/auth-manager.ts +585 -8
  77. package/src/utils/blueprint-utils.test.ts +49 -0
  78. package/src/utils/blueprint-utils.ts +80 -0
  79. package/src/utils/circuit-breaker.test.ts +177 -0
  80. package/src/utils/circuit-breaker.ts +160 -0
  81. package/src/utils/config-loader.test.ts +100 -13
  82. package/src/utils/config-loader.ts +44 -17
  83. package/src/utils/constants.ts +62 -0
  84. package/src/utils/error-renderer.test.ts +267 -0
  85. package/src/utils/error-renderer.ts +320 -0
  86. package/src/utils/json-parser.test.ts +4 -0
  87. package/src/utils/json-parser.ts +18 -1
  88. package/src/utils/mermaid.ts +4 -0
  89. package/src/utils/paths.test.ts +46 -0
  90. package/src/utils/paths.ts +70 -0
  91. package/src/utils/process-sandbox.test.ts +128 -0
  92. package/src/utils/process-sandbox.ts +293 -0
  93. package/src/utils/rate-limiter.test.ts +143 -0
  94. package/src/utils/rate-limiter.ts +221 -0
  95. package/src/utils/redactor.test.ts +23 -15
  96. package/src/utils/redactor.ts +65 -25
  97. package/src/utils/resource-loader.test.ts +54 -0
  98. package/src/utils/resource-loader.ts +158 -0
  99. package/src/utils/sandbox.test.ts +69 -4
  100. package/src/utils/sandbox.ts +69 -6
  101. package/src/utils/schema-validator.ts +65 -0
  102. package/src/utils/workflow-registry.test.ts +57 -0
  103. package/src/utils/workflow-registry.ts +45 -25
  104. /package/src/expression/{evaluator.audit.test.ts → evaluator-audit.test.ts} +0 -0
  105. /package/src/runner/{mcp-client.audit.test.ts → mcp-client-audit.test.ts} +0 -0
@@ -3,13 +3,31 @@ import * as child_process from 'node:child_process';
3
3
  import { EventEmitter } from 'node:events';
4
4
  import { Readable, Writable } from 'node:stream';
5
5
  import { ConfigLoader } from '../utils/config-loader';
6
- import { MCPClient, type MCPResponse } from './mcp-client';
7
- import { MCPManager } from './mcp-manager';
6
+ import type { MCPClient } from './mcp-client';
7
+ import { type MCPClientFactory, MCPManager } from './mcp-manager';
8
8
 
9
9
  import type { Config } from '../parser/config-schema';
10
10
 
11
11
  describe('MCPManager', () => {
12
12
  let spawnSpy: ReturnType<typeof spyOn>;
13
+ const createMockClient = (initializeImpl?: () => Promise<unknown>) => {
14
+ const initialize = mock(
15
+ initializeImpl ||
16
+ (async () => ({
17
+ result: { protocolVersion: '1.0' },
18
+ jsonrpc: '2.0',
19
+ id: 0,
20
+ }))
21
+ );
22
+ const stop = mock(() => undefined);
23
+ const client = { initialize, stop } as unknown as MCPClient;
24
+ return { client, initialize, stop };
25
+ };
26
+
27
+ const createMockFactory = (client: MCPClient): MCPClientFactory => ({
28
+ createLocal: mock(async () => client),
29
+ createRemote: mock(async () => client),
30
+ });
13
31
 
14
32
  beforeEach(() => {
15
33
  ConfigLoader.clear();
@@ -46,8 +64,9 @@ describe('MCPManager', () => {
46
64
  providers: {},
47
65
  model_mappings: {},
48
66
  default_provider: 'openai',
49
- storage: { retention_days: 30 },
50
- workflows_directory: 'workflows',
67
+ storage: { retention_days: 30, redact_secrets_at_rest: true },
68
+ engines: { allowlist: {}, denylist: [] },
69
+ concurrency: { default: 10, pools: { llm: 2, shell: 5, http: 10, engine: 2 } },
51
70
  } as unknown as Config);
52
71
 
53
72
  const manager = new MCPManager();
@@ -68,43 +87,32 @@ describe('MCPManager', () => {
68
87
  providers: {},
69
88
  model_mappings: {},
70
89
  default_provider: 'openai',
71
- storage: { retention_days: 30 },
72
- workflows_directory: 'workflows',
90
+ storage: { retention_days: 30, redact_secrets_at_rest: true },
91
+ engines: { allowlist: {}, denylist: [] },
92
+ concurrency: { default: 10, pools: { llm: 2, shell: 5, http: 10, engine: 2 } },
73
93
  } as unknown as Config);
74
94
 
75
- const initSpy = spyOn(MCPClient.prototype, 'initialize').mockResolvedValue({
76
- result: { protocolVersion: '1.0' },
77
- jsonrpc: '2.0',
78
- id: 0,
79
- });
80
- const stopSpy = spyOn(MCPClient.prototype, 'stop').mockReturnValue(undefined);
81
-
82
- const manager = new MCPManager();
95
+ const { client: mockClient, initialize, stop } = createMockClient();
96
+ const clientFactory = createMockFactory(mockClient);
97
+ const manager = new MCPManager(undefined, clientFactory);
83
98
  const client = await manager.getClient('test-server');
84
99
 
85
100
  expect(client).toBeDefined();
86
- expect(initSpy).toHaveBeenCalled();
101
+ expect(initialize).toHaveBeenCalled();
87
102
 
88
103
  // Should reuse client
89
104
  const client2 = await manager.getClient('test-server');
90
105
  expect(client2).toBe(client);
91
- expect(initSpy).toHaveBeenCalledTimes(1);
106
+ expect(initialize).toHaveBeenCalledTimes(1);
92
107
 
93
108
  await manager.stopAll();
94
- expect(stopSpy).toHaveBeenCalled();
95
-
96
- initSpy.mockRestore();
97
- stopSpy.mockRestore();
109
+ expect(stop).toHaveBeenCalled();
98
110
  });
99
111
 
100
112
  it('should get client for ad-hoc server config', async () => {
101
- const initSpy = spyOn(MCPClient.prototype, 'initialize').mockResolvedValue({
102
- result: { protocolVersion: '1.0' },
103
- jsonrpc: '2.0',
104
- id: 0,
105
- });
106
-
107
- const manager = new MCPManager();
113
+ const { client: mockClient, initialize } = createMockClient();
114
+ const clientFactory = createMockFactory(mockClient);
115
+ const manager = new MCPManager(undefined, clientFactory);
108
116
  const client = await manager.getClient({
109
117
  name: 'adhoc',
110
118
  type: 'local',
@@ -112,9 +120,7 @@ describe('MCPManager', () => {
112
120
  });
113
121
 
114
122
  expect(client).toBeDefined();
115
- expect(initSpy).toHaveBeenCalled();
116
-
117
- initSpy.mockRestore();
123
+ expect(initialize).toHaveBeenCalled();
118
124
  });
119
125
 
120
126
  it('should return undefined if global server not found', async () => {
@@ -134,23 +140,24 @@ describe('MCPManager', () => {
134
140
  providers: {},
135
141
  model_mappings: {},
136
142
  default_provider: 'openai',
137
- storage: { retention_days: 30 },
138
- workflows_directory: 'workflows',
143
+ storage: { retention_days: 30, redact_secrets_at_rest: true },
144
+ engines: { allowlist: {}, denylist: [] },
145
+ concurrency: { default: 10, pools: { llm: 2, shell: 5, http: 10, engine: 2 } },
139
146
  } as unknown as Config);
140
147
 
141
148
  // Mock initialize to take some time
142
149
  let initCalls = 0;
143
- const initSpy = spyOn(MCPClient.prototype, 'initialize').mockImplementation(async () => {
150
+ const { client: mockClient, initialize } = createMockClient(async () => {
144
151
  initCalls++;
145
152
  await new Promise((resolve) => setTimeout(resolve, 50));
146
153
  return {
147
154
  result: { protocolVersion: '1.0' },
148
155
  jsonrpc: '2.0',
149
156
  id: 0,
150
- } as MCPResponse;
157
+ };
151
158
  });
152
-
153
- const manager = new MCPManager();
159
+ const clientFactory = createMockFactory(mockClient);
160
+ const manager = new MCPManager(undefined, clientFactory);
154
161
 
155
162
  // Fire off multiple requests concurrently
156
163
  const p1 = manager.getClient('concurrent-server');
@@ -163,8 +170,7 @@ describe('MCPManager', () => {
163
170
  expect(c1).toBe(c2);
164
171
  expect(c1).toBe(c3);
165
172
  expect(initCalls).toBe(1); // Crucial: only one initialization
166
-
167
- initSpy.mockRestore();
173
+ expect(initialize).toHaveBeenCalledTimes(1);
168
174
  });
169
175
 
170
176
  it('should handle connection failure', async () => {
@@ -178,24 +184,18 @@ describe('MCPManager', () => {
178
184
  providers: {},
179
185
  model_mappings: {},
180
186
  default_provider: 'openai',
181
- storage: { retention_days: 30 },
182
- workflows_directory: 'workflows',
187
+ storage: { retention_days: 30, redact_secrets_at_rest: true },
188
+ engines: { allowlist: {}, denylist: [] },
189
+ concurrency: { default: 10, pools: { llm: 2, shell: 5, http: 10, engine: 2 } },
183
190
  } as unknown as Config);
184
191
 
185
- const createLocalSpy = spyOn(MCPClient, 'createLocal').mockImplementation(
186
- async (_cmd: string) => {
187
- const client = Object.create(MCPClient.prototype);
188
- spyOn(client, 'initialize').mockRejectedValue(new Error('Connection failed'));
189
- spyOn(client, 'stop').mockReturnValue(undefined);
190
- return client;
191
- }
192
- );
193
-
194
- const manager = new MCPManager();
192
+ const { client: mockClient } = createMockClient(async () => {
193
+ throw new Error('Connection failed');
194
+ });
195
+ const clientFactory = createMockFactory(mockClient);
196
+ const manager = new MCPManager(undefined, clientFactory);
195
197
  const client = await manager.getClient('fail-server');
196
198
 
197
199
  expect(client).toBeUndefined();
198
-
199
- createLocalSpy.mockRestore();
200
200
  });
201
201
  });
@@ -1,6 +1,8 @@
1
1
  import { ConfigLoader } from '../utils/config-loader';
2
2
  import { ConsoleLogger, type Logger } from '../utils/logger.ts';
3
- import { MCPClient } from './mcp-client';
3
+ import { MCPClient, validateRemoteUrl } from './mcp-client';
4
+
5
+ export type MCPClientFactory = Pick<typeof MCPClient, 'createLocal' | 'createRemote'>;
4
6
 
5
7
  export interface MCPServerConfig {
6
8
  name: string;
@@ -21,9 +23,11 @@ export class MCPManager {
21
23
  private connectionPromises: Map<string, Promise<MCPClient | undefined>> = new Map();
22
24
  private sharedServers: Map<string, MCPServerConfig> = new Map();
23
25
  private logger: Logger;
26
+ private clientFactory: MCPClientFactory;
24
27
 
25
- constructor(logger: Logger = new ConsoleLogger()) {
26
- this.logger = logger;
28
+ constructor(logger?: Logger, clientFactory: MCPClientFactory = MCPClient) {
29
+ this.logger = logger || new ConsoleLogger();
30
+ this.clientFactory = clientFactory;
27
31
  this.loadGlobalConfig();
28
32
 
29
33
  // Ensure cleanup on process exit
@@ -83,6 +87,9 @@ export class MCPManager {
83
87
  if (config.type === 'remote') {
84
88
  if (!config.url) throw new Error('Remote MCP server missing URL');
85
89
 
90
+ // SSRF Protection: Validate URL before connecting
91
+ await validateRemoteUrl(config.url);
92
+
86
93
  const headers = { ...(config.headers || {}) };
87
94
 
88
95
  if (config.oauth) {
@@ -99,7 +106,7 @@ export class MCPManager {
99
106
  headers.Authorization = `Bearer ${token}`;
100
107
  }
101
108
 
102
- client = await MCPClient.createRemote(config.url, headers, config.timeout, {
109
+ client = await this.clientFactory.createRemote(config.url, headers, config.timeout, {
103
110
  logger: activeLogger,
104
111
  });
105
112
  } else {
@@ -124,7 +131,7 @@ export class MCPManager {
124
131
  env.MCP_TOKEN = token;
125
132
  }
126
133
 
127
- client = await MCPClient.createLocal(
134
+ client = await this.clientFactory.createLocal(
128
135
  config.command,
129
136
  config.args || [],
130
137
  env,
@@ -1,24 +1,38 @@
1
- import { beforeEach, describe, expect, it, mock, spyOn } from 'bun:test';
1
+ import { afterEach, beforeEach, describe, expect, it, mock, spyOn } from 'bun:test';
2
2
  import { WorkflowDb } from '../db/workflow-db';
3
3
  import { WorkflowParser } from '../parser/workflow-parser';
4
+ import { ConsoleLogger } from '../utils/logger';
4
5
  import { WorkflowRegistry } from '../utils/workflow-registry';
5
6
  import { MCPServer } from './mcp-server';
6
7
  import { WorkflowSuspendedError } from './step-executor';
7
- import { WorkflowRunner } from './workflow-runner';
8
+ import type { WorkflowRunner } from './workflow-runner';
8
9
 
9
10
  describe('MCPServer', () => {
10
11
  let db: WorkflowDb;
11
12
  let server: MCPServer;
13
+ const activeSpies: Array<{ mockRestore: () => void }> = [];
14
+ const trackSpy = <T extends { mockRestore: () => void }>(spy: T): T => {
15
+ activeSpies.push(spy);
16
+ return spy;
17
+ };
12
18
 
13
19
  beforeEach(() => {
14
20
  db = new WorkflowDb(':memory:');
15
21
  server = new MCPServer(db);
16
- mock.restore();
22
+ });
23
+ afterEach(() => {
24
+ for (const spy of activeSpies) {
25
+ spy.mockRestore();
26
+ }
27
+ activeSpies.length = 0;
17
28
  });
18
29
 
19
- const handleMessage = (msg: unknown) => {
30
+ const createServerWithRunner = (runner: WorkflowRunner) =>
31
+ new MCPServer(db, process.stdin, process.stdout, new ConsoleLogger(), () => runner);
32
+
33
+ const handleMessage = (msg: unknown, targetServer: MCPServer = server) => {
20
34
  // @ts-ignore
21
- return server.handleMessage(msg);
35
+ return targetServer.handleMessage(msg);
22
36
  };
23
37
 
24
38
  it('should handle initialize request', async () => {
@@ -44,7 +58,7 @@ describe('MCPServer', () => {
44
58
  });
45
59
 
46
60
  it('should call list_workflows tool', async () => {
47
- spyOn(WorkflowRegistry, 'listWorkflows').mockReturnValue([
61
+ trackSpy(spyOn(WorkflowRegistry, 'listWorkflows')).mockReturnValue([
48
62
  { name: 'test-wf', description: 'Test Workflow' },
49
63
  ]);
50
64
 
@@ -59,76 +73,90 @@ describe('MCPServer', () => {
59
73
  });
60
74
 
61
75
  it('should call run_workflow tool successfully', async () => {
62
- spyOn(WorkflowRegistry, 'resolvePath').mockReturnValue('test.yaml');
76
+ trackSpy(spyOn(WorkflowRegistry, 'resolvePath')).mockReturnValue('test.yaml');
63
77
  // @ts-ignore
64
- spyOn(WorkflowParser, 'loadWorkflow').mockReturnValue({
78
+ trackSpy(spyOn(WorkflowParser, 'loadWorkflow')).mockReturnValue({
65
79
  name: 'test-wf',
66
80
  steps: [],
67
81
  });
68
82
 
69
- // Mock WorkflowRunner
70
83
  const mockRun = mock(() => Promise.resolve({ result: 'ok' }));
71
- // @ts-ignore
72
- spyOn(WorkflowRunner.prototype, 'run').mockImplementation(mockRun);
73
-
74
- const response = await handleMessage({
75
- jsonrpc: '2.0',
76
- id: 4,
77
- method: 'tools/call',
78
- params: {
79
- name: 'run_workflow',
80
- arguments: { workflow_name: 'test-wf', inputs: {} },
84
+ const runner = { run: mockRun } as unknown as WorkflowRunner;
85
+ const testServer = createServerWithRunner(runner);
86
+
87
+ const response = await handleMessage(
88
+ {
89
+ jsonrpc: '2.0',
90
+ id: 4,
91
+ method: 'tools/call',
92
+ params: {
93
+ name: 'run_workflow',
94
+ arguments: { workflow_name: 'test-wf', inputs: {} },
95
+ },
81
96
  },
82
- });
97
+ testServer
98
+ );
83
99
 
84
100
  expect(JSON.parse(response?.result?.content?.[0]?.text || '{}').status).toBe('success');
85
101
  });
86
102
 
87
103
  it('should handle run_workflow failure', async () => {
88
- spyOn(WorkflowRegistry, 'resolvePath').mockReturnValue('test.yaml');
104
+ trackSpy(spyOn(WorkflowRegistry, 'resolvePath')).mockReturnValue('test.yaml');
89
105
  // @ts-ignore
90
- spyOn(WorkflowParser, 'loadWorkflow').mockReturnValue({
106
+ trackSpy(spyOn(WorkflowParser, 'loadWorkflow')).mockReturnValue({
91
107
  name: 'test-wf',
92
108
  steps: [],
93
109
  });
94
110
 
95
- spyOn(WorkflowRunner.prototype, 'run').mockRejectedValue(new Error('workflow failed'));
96
-
97
- const response = await handleMessage({
98
- jsonrpc: '2.0',
99
- id: 5,
100
- method: 'tools/call',
101
- params: {
102
- name: 'run_workflow',
103
- arguments: { workflow_name: 'test-wf' },
111
+ const runner = {
112
+ run: mock(() => Promise.reject(new Error('workflow failed'))),
113
+ } as unknown as WorkflowRunner;
114
+ const testServer = createServerWithRunner(runner);
115
+
116
+ const response = await handleMessage(
117
+ {
118
+ jsonrpc: '2.0',
119
+ id: 5,
120
+ method: 'tools/call',
121
+ params: {
122
+ name: 'run_workflow',
123
+ arguments: { workflow_name: 'test-wf' },
124
+ },
104
125
  },
105
- });
126
+ testServer
127
+ );
106
128
 
107
129
  expect(response?.result?.isError).toBe(true);
108
130
  expect(response?.result?.content?.[0]?.text).toContain('Workflow failed');
109
131
  });
110
132
 
111
133
  it('should handle workflow suspension in run_workflow', async () => {
112
- spyOn(WorkflowRegistry, 'resolvePath').mockReturnValue('test.yaml');
134
+ trackSpy(spyOn(WorkflowRegistry, 'resolvePath')).mockReturnValue('test.yaml');
113
135
  // @ts-ignore
114
- spyOn(WorkflowParser, 'loadWorkflow').mockReturnValue({
136
+ trackSpy(spyOn(WorkflowParser, 'loadWorkflow')).mockReturnValue({
115
137
  name: 'test-wf',
116
138
  steps: [],
117
139
  });
118
140
 
119
141
  const suspendedError = new WorkflowSuspendedError('Input needed', 'step1', 'text');
120
- spyOn(WorkflowRunner.prototype, 'run').mockRejectedValue(suspendedError);
121
- spyOn(WorkflowRunner.prototype, 'getRunId').mockReturnValue('run123');
122
-
123
- const response = await handleMessage({
124
- jsonrpc: '2.0',
125
- id: 6,
126
- method: 'tools/call',
127
- params: {
128
- name: 'run_workflow',
129
- arguments: { workflow_name: 'test-wf' },
142
+ const runner = {
143
+ run: mock(() => Promise.reject(suspendedError)),
144
+ getRunId: mock(() => 'run123'),
145
+ } as unknown as WorkflowRunner;
146
+ const testServer = createServerWithRunner(runner);
147
+
148
+ const response = await handleMessage(
149
+ {
150
+ jsonrpc: '2.0',
151
+ id: 6,
152
+ method: 'tools/call',
153
+ params: {
154
+ name: 'run_workflow',
155
+ arguments: { workflow_name: 'test-wf' },
156
+ },
130
157
  },
131
- });
158
+ testServer
159
+ );
132
160
 
133
161
  const result = JSON.parse(response?.result?.content?.[0]?.text || '{}');
134
162
  expect(result.status).toBe('paused');
@@ -142,26 +170,29 @@ describe('MCPServer', () => {
142
170
  await db.updateRunStatus(runId, 'paused');
143
171
  await db.createStep('step-exec-1', runId, 's1');
144
172
 
145
- spyOn(WorkflowRegistry, 'resolvePath').mockReturnValue('test.yaml');
173
+ trackSpy(spyOn(WorkflowRegistry, 'resolvePath')).mockReturnValue('test.yaml');
146
174
  // @ts-ignore
147
- spyOn(WorkflowParser, 'loadWorkflow').mockReturnValue({
175
+ trackSpy(spyOn(WorkflowParser, 'loadWorkflow')).mockReturnValue({
148
176
  name: 'test-wf',
149
177
  steps: [{ id: 's1', type: 'human' }],
150
178
  });
151
179
 
152
180
  const mockRun = mock(() => Promise.resolve({ result: 'resumed' }));
153
- // @ts-ignore
154
- spyOn(WorkflowRunner.prototype, 'run').mockImplementation(mockRun);
155
-
156
- const response = await handleMessage({
157
- jsonrpc: '2.0',
158
- id: 7,
159
- method: 'tools/call',
160
- params: {
161
- name: 'answer_human_input',
162
- arguments: { run_id: runId, input: 'my response' },
181
+ const runner = { run: mockRun } as unknown as WorkflowRunner;
182
+ const testServer = createServerWithRunner(runner);
183
+
184
+ const response = await handleMessage(
185
+ {
186
+ jsonrpc: '2.0',
187
+ id: 7,
188
+ method: 'tools/call',
189
+ params: {
190
+ name: 'answer_human_input',
191
+ arguments: { run_id: runId, input: 'my response' },
192
+ },
163
193
  },
164
- });
194
+ testServer
195
+ );
165
196
 
166
197
  expect(JSON.parse(response?.result?.content?.[0]?.text || '{}').status).toBe('success');
167
198
 
@@ -223,8 +254,8 @@ describe('MCPServer', () => {
223
254
  // Create a new server for this test to use the streams
224
255
  const testServer = new MCPServer(db, input, outputStream);
225
256
 
226
- const writeSpy = spyOn(outputStream, 'write').mockImplementation(() => true);
227
- const consoleSpy = spyOn(console, 'error').mockImplementation(() => {});
257
+ const writeSpy = trackSpy(spyOn(outputStream, 'write')).mockImplementation(() => true);
258
+ const consoleSpy = trackSpy(spyOn(console, 'error')).mockImplementation(() => {});
228
259
 
229
260
  const startPromise = testServer.start();
230
261
 
@@ -245,36 +276,37 @@ describe('MCPServer', () => {
245
276
 
246
277
  input.end();
247
278
  await startPromise;
248
-
249
- writeSpy.mockRestore();
250
- consoleSpy.mockRestore();
251
279
  });
252
280
 
253
281
  it('should call start_workflow tool and return immediately', async () => {
254
- spyOn(WorkflowRegistry, 'resolvePath').mockReturnValue('test.yaml');
282
+ trackSpy(spyOn(WorkflowRegistry, 'resolvePath')).mockReturnValue('test.yaml');
255
283
  // @ts-ignore
256
- spyOn(WorkflowParser, 'loadWorkflow').mockReturnValue({
284
+ trackSpy(spyOn(WorkflowParser, 'loadWorkflow')).mockReturnValue({
257
285
  name: 'test-wf',
258
286
  steps: [],
259
287
  });
260
288
 
261
- // Mock WorkflowRunner - simulate a slow workflow
262
289
  const mockRun = mock(
263
290
  () => new Promise((resolve) => setTimeout(() => resolve({ result: 'ok' }), 100))
264
291
  );
265
- // @ts-ignore
266
- spyOn(WorkflowRunner.prototype, 'run').mockImplementation(mockRun);
267
- spyOn(WorkflowRunner.prototype, 'getRunId').mockReturnValue('async-run-123');
268
-
269
- const response = await handleMessage({
270
- jsonrpc: '2.0',
271
- id: 10,
272
- method: 'tools/call',
273
- params: {
274
- name: 'start_workflow',
275
- arguments: { workflow_name: 'test-wf', inputs: {} },
292
+ const runner = {
293
+ run: mockRun,
294
+ getRunId: mock(() => 'async-run-123'),
295
+ } as unknown as WorkflowRunner;
296
+ const testServer = createServerWithRunner(runner);
297
+
298
+ const response = await handleMessage(
299
+ {
300
+ jsonrpc: '2.0',
301
+ id: 10,
302
+ method: 'tools/call',
303
+ params: {
304
+ name: 'start_workflow',
305
+ arguments: { workflow_name: 'test-wf', inputs: {} },
306
+ },
276
307
  },
277
- });
308
+ testServer
309
+ );
278
310
 
279
311
  const result = JSON.parse(response?.result?.content?.[0]?.text || '{}');
280
312
  expect(result.status).toBe('running');
@@ -366,4 +398,11 @@ describe('MCPServer', () => {
366
398
 
367
399
  expect(response?.error?.message).toContain('not found');
368
400
  });
401
+
402
+ it('should close database connection when stop is called', () => {
403
+ const dbCloseSpy = spyOn(db, 'close');
404
+ server.stop();
405
+ expect(dbCloseSpy).toHaveBeenCalled();
406
+ dbCloseSpy.mockRestore();
407
+ });
369
408
  });
@@ -16,22 +16,31 @@ interface MCPMessage {
16
16
  id?: string | number;
17
17
  }
18
18
 
19
+ type WorkflowRunnerFactory = (
20
+ workflow: ConstructorParameters<typeof WorkflowRunner>[0],
21
+ options: ConstructorParameters<typeof WorkflowRunner>[1]
22
+ ) => WorkflowRunner;
23
+
19
24
  export class MCPServer {
20
25
  private db: WorkflowDb;
21
26
  private input: Readable;
22
27
  private output: Writable;
23
28
  private logger: Logger;
29
+ private runnerFactory: WorkflowRunnerFactory;
24
30
 
25
31
  constructor(
26
32
  db?: WorkflowDb,
27
33
  input: Readable = process.stdin,
28
34
  output: Writable = process.stdout,
29
- logger: Logger = new ConsoleLogger()
35
+ logger: Logger = new ConsoleLogger(),
36
+ runnerFactory: WorkflowRunnerFactory = (workflow, options) =>
37
+ new WorkflowRunner(workflow, options)
30
38
  ) {
31
39
  this.db = db || new WorkflowDb();
32
40
  this.input = input;
33
41
  this.output = output;
34
42
  this.logger = logger;
43
+ this.runnerFactory = runnerFactory;
35
44
  }
36
45
 
37
46
  async start() {
@@ -235,7 +244,7 @@ export class MCPServer {
235
244
  debug: (msg: string) => logs.push(`DEBUG: ${msg}`),
236
245
  };
237
246
 
238
- const runner = new WorkflowRunner(workflow, {
247
+ const runner = this.runnerFactory(workflow, {
239
248
  inputs,
240
249
  logger,
241
250
  preventExit: true,
@@ -417,7 +426,7 @@ export class MCPServer {
417
426
  debug: (msg: string) => logs.push(`DEBUG: ${msg}`),
418
427
  };
419
428
 
420
- const runner = new WorkflowRunner(workflow, {
429
+ const runner = this.runnerFactory(workflow, {
421
430
  resumeRunId: run_id,
422
431
  resumeInputs: { [pendingStep.step_id]: { __answer: output } },
423
432
  logger,
@@ -513,7 +522,7 @@ export class MCPServer {
513
522
  debug: () => {},
514
523
  };
515
524
 
516
- const runner = new WorkflowRunner(workflow, {
525
+ const runner = this.runnerFactory(workflow, {
517
526
  inputs: inputs || {},
518
527
  logger,
519
528
  preventExit: true,