keystone-cli 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +43 -4
  2. package/package.json +4 -1
  3. package/src/cli.ts +1 -0
  4. package/src/commands/event.ts +9 -0
  5. package/src/commands/run.ts +17 -0
  6. package/src/db/dynamic-state-manager.ts +12 -9
  7. package/src/db/memory-db.test.ts +19 -1
  8. package/src/db/memory-db.ts +101 -22
  9. package/src/db/workflow-db.ts +181 -9
  10. package/src/expression/evaluator.ts +4 -1
  11. package/src/parser/config-schema.ts +6 -0
  12. package/src/parser/schema.ts +1 -0
  13. package/src/runner/__test__/llm-test-setup.ts +43 -11
  14. package/src/runner/durable-timers.test.ts +1 -1
  15. package/src/runner/executors/dynamic-executor.ts +125 -88
  16. package/src/runner/executors/engine-executor.ts +10 -39
  17. package/src/runner/executors/file-executor.ts +67 -0
  18. package/src/runner/executors/foreach-executor.ts +170 -17
  19. package/src/runner/executors/human-executor.ts +18 -0
  20. package/src/runner/executors/llm/stream-handler.ts +103 -0
  21. package/src/runner/executors/llm/tool-manager.ts +360 -0
  22. package/src/runner/executors/llm-executor.ts +288 -555
  23. package/src/runner/executors/memory-executor.ts +41 -34
  24. package/src/runner/executors/shell-executor.ts +96 -52
  25. package/src/runner/executors/subworkflow-executor.ts +16 -0
  26. package/src/runner/executors/types.ts +3 -1
  27. package/src/runner/executors/verification_fixes.test.ts +46 -0
  28. package/src/runner/join-scheduling.test.ts +2 -1
  29. package/src/runner/llm-adapter.integration.test.ts +10 -5
  30. package/src/runner/llm-adapter.ts +57 -18
  31. package/src/runner/llm-clarification.test.ts +4 -1
  32. package/src/runner/llm-executor.test.ts +21 -7
  33. package/src/runner/mcp-client.ts +36 -2
  34. package/src/runner/mcp-server.ts +65 -36
  35. package/src/runner/recovery-security.test.ts +5 -2
  36. package/src/runner/reflexion.test.ts +6 -3
  37. package/src/runner/services/context-builder.ts +13 -4
  38. package/src/runner/services/workflow-validator.ts +2 -1
  39. package/src/runner/standard-tools-ast.test.ts +4 -2
  40. package/src/runner/standard-tools-execution.test.ts +14 -1
  41. package/src/runner/standard-tools-integration.test.ts +6 -0
  42. package/src/runner/standard-tools.ts +13 -10
  43. package/src/runner/step-executor.ts +2 -2
  44. package/src/runner/tool-integration.test.ts +4 -1
  45. package/src/runner/workflow-runner.test.ts +23 -12
  46. package/src/runner/workflow-runner.ts +172 -79
  47. package/src/runner/workflow-state.ts +181 -111
  48. package/src/ui/dashboard.tsx +17 -3
  49. package/src/utils/config-loader.ts +4 -0
  50. package/src/utils/constants.ts +4 -0
  51. package/src/utils/context-injector.test.ts +27 -27
  52. package/src/utils/context-injector.ts +68 -26
  53. package/src/utils/process-sandbox.ts +138 -148
  54. package/src/utils/redactor.ts +39 -9
  55. package/src/utils/resource-loader.ts +24 -19
  56. package/src/utils/sandbox.ts +6 -0
  57. package/src/utils/stream-utils.ts +58 -0
@@ -29,6 +29,7 @@ import type { ExpressionContext } from '../expression/evaluator';
29
29
  import * as agentParser from '../parser/agent-parser';
30
30
  import type { Agent, LlmStep, Step } from '../parser/schema';
31
31
  import { ConfigLoader } from '../utils/config-loader';
32
+ import * as llmAdapter from './llm-adapter';
32
33
  import type { StepResult } from './step-executor';
33
34
 
34
35
  // Note: mock.module() for llm-adapter is now handled by the preload file
@@ -66,6 +67,7 @@ describe('llm-executor', () => {
66
67
  let spawnSpy: ReturnType<typeof spyOn>;
67
68
  let resolveAgentPathSpy: ReturnType<typeof spyOn>;
68
69
  let parseAgentSpy: ReturnType<typeof spyOn>;
70
+ let getModelSpy: ReturnType<typeof spyOn>;
69
71
 
70
72
  // Default Mock Chat Logic
71
73
  const defaultMockChat = async (messages: LLMMessage[], _options: any) => {
@@ -184,7 +186,9 @@ describe('llm-executor', () => {
184
186
  ConfigLoader.clear();
185
187
  setupLlmMocks();
186
188
  resetLlmMocks();
187
- mockGetModel.mockResolvedValue(createUnifiedMockModel());
189
+
190
+ // Spy on getModel to return our mock model directly
191
+ getModelSpy = spyOn(llmAdapter, 'getModel').mockResolvedValue(createUnifiedMockModel() as any);
188
192
 
189
193
  // Mock agent parser to avoid file dependencies
190
194
  resolveAgentPathSpy = spyOn(agentParser, 'resolveAgentPath').mockReturnValue('test-agent.md');
@@ -215,6 +219,7 @@ describe('llm-executor', () => {
215
219
  afterEach(() => {
216
220
  resolveAgentPathSpy?.mockRestore();
217
221
  parseAgentSpy?.mockRestore();
222
+ getModelSpy?.mockRestore();
218
223
  });
219
224
 
220
225
  afterAll(() => {
@@ -254,21 +259,25 @@ describe('llm-executor', () => {
254
259
  needs: [],
255
260
  maxIterations: 10,
256
261
  };
257
- const logger = { log: mock(), error: mock(), warn: mock(), info: mock(), debug: mock() };
262
+ const loggerSpy = { log: mock(), error: mock(), warn: mock(), info: mock(), debug: mock() };
263
+
264
+ const consoleSpy = spyOn(console, 'error').mockImplementation(() => {});
258
265
 
259
266
  await executeLlmStep(
260
267
  step,
261
268
  { inputs: {}, steps: {} },
262
269
  async () => ({ status: 'success', output: 'ok' }),
263
- logger
270
+ loggerSpy
264
271
  );
265
272
 
266
- expect(logger.log).toHaveBeenCalledWith(
273
+ consoleSpy.mockRestore();
274
+
275
+ expect(loggerSpy.log).toHaveBeenCalledWith(
267
276
  expect.stringContaining(' 🛠️ Tool Call: test-tool {"val":123}')
268
277
  );
269
278
  });
270
279
 
271
- it('should return raw output logic if schema schema validation fails (no retry implemented)', async () => {
280
+ it('should return failed status if schema validation fails and JSON cannot be extracted', async () => {
272
281
  setupMockModel(defaultMockChat as any);
273
282
  const step: LlmStep = {
274
283
  id: 'l1',
@@ -282,13 +291,14 @@ describe('llm-executor', () => {
282
291
 
283
292
  // Case 1: Model returns text that is NOT valid JSON
284
293
  setupMockModel(async () => ({ message: { role: 'assistant', content: 'Not JSON' } }));
294
+
285
295
  const result = await executeLlmStep(step, { inputs: {}, steps: {} }, async () => ({
286
296
  status: 'success',
287
297
  output: 'ok',
288
298
  }));
289
299
 
290
- // current simple refactor doesn't implement retry, just returns text or throws
291
- expect(result.output).toBe('Not JSON');
300
+ expect(result.status).toBe('failed');
301
+ expect(result.error).toContain('Failed to extract valid JSON');
292
302
  });
293
303
 
294
304
  it('should handle tool not found', async () => {
@@ -301,11 +311,15 @@ describe('llm-executor', () => {
301
311
  maxIterations: 10,
302
312
  };
303
313
 
314
+ const consoleSpy = spyOn(console, 'error').mockImplementation(() => {});
315
+
304
316
  const result = await executeLlmStep(step, { inputs: {}, steps: {} }, async () => ({
305
317
  status: 'success',
306
318
  output: 'ok',
307
319
  }));
308
320
 
321
+ consoleSpy.mockRestore();
322
+
309
323
  expect(result.status).toBe('success');
310
324
  });
311
325
 
@@ -13,6 +13,9 @@ export const MCP_PROTOCOL_VERSION = MCP.PROTOCOL_VERSION;
13
13
  // Maximum buffer size for incoming messages (10MB) to prevent memory exhaustion
14
14
  const MAX_BUFFER_SIZE = 10 * 1024 * 1024;
15
15
 
16
+ // Track if we have already warned about SSRF limitations to avoid log spam
17
+ let hasWarnedSSRF = false;
18
+
16
19
  /**
17
20
  * Efficient line splitting without regex to prevent ReDoS attacks.
18
21
  * Handles \r\n, \r, and \n line endings.
@@ -99,7 +102,7 @@ function isPrivateIpAddress(address: string): boolean {
99
102
 
100
103
  export async function validateRemoteUrl(
101
104
  url: string,
102
- options: { allowInsecure?: boolean } = {}
105
+ options: { allowInsecure?: boolean; logger?: Logger } = {}
103
106
  ): Promise<void> {
104
107
  let parsed: URL;
105
108
  try {
@@ -114,6 +117,11 @@ export async function validateRemoteUrl(
114
117
  }
115
118
 
116
119
  // Require HTTPS in production
120
+ // SECURITY WARNING: This check is susceptible to TOCTOU (Time-of-Check to Time-of-Use)
121
+ // DNS rebinding attacks. A malicious domain could resolve to a public IP during this check
122
+ // and then switch to a private IP (e.g. 127.0.0.1) when the connection is actually made.
123
+ // Full protection requires resolving the IP once and using that IP for the connection,
124
+ // or using a proxy that enforces these rules.
117
125
  if (parsed.protocol !== 'https:') {
118
126
  throw new Error(
119
127
  `SSRF Protection: URL must use HTTPS. Got: ${parsed.protocol}. Set allowInsecure option to true if you trust this server.`
@@ -154,12 +162,28 @@ export async function validateRemoteUrl(
154
162
  // Resolve DNS to prevent hostnames that map to private IPs (DNS rebinding checks)
155
163
  // WARNING: This check is vulnerable to Time-of-Check Time-of-Use (TOCTOU) DNS Rebinding attacks.
156
164
  // A malicious DNS server could return a public IP here, then switch to a private IP for the actual fetch.
157
- // In a nodejs environment using standard fetch/native DNS, this is hard to fully prevent without
165
+ // In a nodejs/bun environment using standard fetch/native DNS, this is hard to fully prevent without
158
166
  // a custom agent that pins the IP or low-level socket inspection.
167
+ // Users requiring high security should run this in an isolated network environment (container/VM).
159
168
  // For now, this check provides "defense in depth" against accidental internal access.
169
+ // CRITICAL SECURITY NOTE: In high-security environments, do NOT rely solely on this check.
170
+ // Use network-level isolation (e.g. firewalls, service meshes, or egress proxies) to strictly block
171
+ // internal traffic from the Keystone process.
172
+ //
173
+ // Recommendation: Use 'allowInsecure: true' only in trusted environments.
160
174
  if (!isIP(hostname)) {
161
175
  try {
176
+ // WARNING: This check is vulnerable to DNS Rebinding (TOCTOU)
177
+ if (options.logger?.warn && !hasWarnedSSRF) {
178
+ options.logger.warn(
179
+ ' ⚠️ Security Note: Remote URL validation provides defense-in-depth but does not fully prevent DNS rebinding attacks.\n' +
180
+ ' For high-security environments, ensure network-level isolation (e.g. firewalls).'
181
+ );
182
+ hasWarnedSSRF = true;
183
+ }
184
+
162
185
  const resolved = await lookup(hostname, { all: true });
186
+
163
187
  for (const record of resolved) {
164
188
  if (isPrivateIpAddress(record.address)) {
165
189
  throw new Error(
@@ -168,6 +192,16 @@ export async function validateRemoteUrl(
168
192
  }
169
193
  }
170
194
  } catch (error) {
195
+ if (error instanceof Error && error.message.startsWith('SSRF Protection')) {
196
+ throw error;
197
+ }
198
+
199
+ if (options.logger?.warn) {
200
+ options.logger.warn(
201
+ `[Security Warning] validateRemoteUrl check for ${hostname} failed/bypassed: ${error}`
202
+ );
203
+ }
204
+
171
205
  throw new Error(
172
206
  `SSRF Protection: Failed to resolve hostname "${hostname}": ${
173
207
  error instanceof Error ? error.message : String(error)
@@ -248,14 +248,22 @@ export class MCPServer {
248
248
  const path = WorkflowRegistry.resolvePath(workflow_name);
249
249
  const workflow = WorkflowParser.loadWorkflow(path);
250
250
 
251
- // Use a custom logger that captures logs for the MCP response
251
+ // Use a fixed-size ring buffer for logs to prevent memory leaks
252
+ const MAX_LOG_LINES = 1000;
252
253
  const logs: string[] = [];
254
+ const addLog = (msg: string) => {
255
+ if (logs.length >= MAX_LOG_LINES) {
256
+ logs.shift(); // Remove oldest
257
+ }
258
+ logs.push(msg);
259
+ };
260
+
253
261
  const logger = {
254
- log: (msg: string) => logs.push(msg),
255
- error: (msg: string) => logs.push(`ERROR: ${msg}`),
256
- warn: (msg: string) => logs.push(`WARN: ${msg}`),
257
- info: (msg: string) => logs.push(`INFO: ${msg}`),
258
- debug: (msg: string) => logs.push(`DEBUG: ${msg}`),
262
+ log: (msg: string) => addLog(msg),
263
+ error: (msg: string) => addLog(`ERROR: ${msg}`),
264
+ warn: (msg: string) => addLog(`WARN: ${msg}`),
265
+ info: (msg: string) => addLog(`INFO: ${msg}`),
266
+ debug: (msg: string) => addLog(`DEBUG: ${msg}`),
259
267
  };
260
268
 
261
269
  const runner = this.runnerFactory(workflow, {
@@ -545,37 +553,58 @@ export class MCPServer {
545
553
  const runId = runner.getRunId();
546
554
 
547
555
  // Start the workflow asynchronously
548
- runner.run().then(
549
- async (outputs) => {
550
- // Update DB with success on completion
551
- await this.db.updateRunStatus(runId, 'success', outputs);
552
- },
553
- async (error) => {
554
- // Update DB with failure
555
- if (error instanceof WorkflowSuspendedError) {
556
- await this.db.updateRunStatus(runId, 'paused');
557
- this.sendNotification('notifications/keystone.human_input', {
558
- run_id: runId,
559
- workflow: workflow_name,
560
- status: 'paused',
561
- message: error.message,
562
- step_id: error.stepId,
563
- input_type: error.inputType,
564
- instructions:
565
- error.inputType === 'confirm'
566
- ? 'Use answer_human_input with input="confirm" to proceed.'
567
- : 'Use answer_human_input with the required text input.',
568
- });
569
- } else {
570
- await this.db.updateRunStatus(
571
- runId,
572
- 'failed',
573
- undefined,
574
- error instanceof Error ? error.message : String(error)
575
- );
556
+ // Start the workflow asynchronously
557
+ runner
558
+ .run()
559
+ .then(
560
+ async (outputs) => {
561
+ try {
562
+ // Update DB with success on completion
563
+ await this.db.updateRunStatus(runId, 'success', outputs);
564
+ } catch (e) {
565
+ this.logger.error(
566
+ `[McpServer] Failed to update success status for run ${runId}: ${e}`
567
+ );
568
+ }
569
+ },
570
+ async (error) => {
571
+ try {
572
+ // Update DB with failure
573
+ if (error instanceof WorkflowSuspendedError) {
574
+ await this.db.updateRunStatus(runId, 'paused');
575
+ this.sendNotification('notifications/keystone.human_input', {
576
+ run_id: runId,
577
+ workflow: workflow_name,
578
+ status: 'paused',
579
+ message: error.message,
580
+ step_id: error.stepId,
581
+ input_type: error.inputType,
582
+ instructions:
583
+ error.inputType === 'confirm'
584
+ ? 'Use answer_human_input with input="confirm" to proceed.'
585
+ : 'Use answer_human_input with the required text input.',
586
+ });
587
+ } else {
588
+ await this.db.updateRunStatus(
589
+ runId,
590
+ 'failed',
591
+ undefined,
592
+ error instanceof Error ? error.message : String(error)
593
+ );
594
+ }
595
+ } catch (e) {
596
+ this.logger.error(
597
+ `[McpServer] Failed to update failure status for run ${runId}: ${e}`
598
+ );
599
+ }
576
600
  }
577
- }
578
- );
601
+ )
602
+ .catch((e) => {
603
+ // Catch any other errors in the promise chain construction
604
+ this.logger.error(
605
+ `[McpServer] Unexpected error in async workflow execution for run ${runId}: ${e}`
606
+ );
607
+ });
579
608
 
580
609
  return {
581
610
  jsonrpc: '2.0',
@@ -10,20 +10,23 @@ import {
10
10
 
11
11
  import { ConfigLoader } from '../utils/config-loader';
12
12
 
13
- import { beforeEach, describe, expect, jest, mock, test } from 'bun:test';
13
+ import { beforeEach, describe, expect, jest, mock, spyOn, test } from 'bun:test';
14
14
  import type { Step, Workflow } from '../parser/schema';
15
+ import * as llmAdapter from './llm-adapter';
15
16
 
16
17
  // Note: mock.module() for llm-adapter is now handled by the preload file
17
18
  // We should NOT mock 'ai' globally as it breaks other tests using the real ai SDK.
18
19
  // Instead, we use a mock model that the real ai SDK calls.
19
20
 
20
21
  describe('WorkflowRunner Recovery Security', () => {
22
+ let getModelSpy: ReturnType<typeof spyOn>;
23
+
21
24
  beforeEach(() => {
22
25
  jest.restoreAllMocks();
23
26
  ConfigLoader.clear();
24
27
  setupLlmMocks();
25
28
  resetLlmMocks();
26
- mockGetModel.mockResolvedValue(createUnifiedMockModel());
29
+ getModelSpy = spyOn(llmAdapter, 'getModel').mockResolvedValue(createUnifiedMockModel() as any);
27
30
  });
28
31
 
29
32
  test('should NOT allow reflexion to overwrite critical step properties', async () => {
@@ -7,9 +7,10 @@ import {
7
7
  setupLlmMocks,
8
8
  } from './__test__/llm-test-setup';
9
9
 
10
- import { beforeAll, beforeEach, describe, expect, jest, mock, test } from 'bun:test';
10
+ import { beforeAll, beforeEach, describe, expect, jest, mock, spyOn, test } from 'bun:test';
11
11
  import type { Step, Workflow } from '../parser/schema';
12
12
  import { ConfigLoader } from '../utils/config-loader';
13
+ import * as llmAdapter from './llm-adapter';
13
14
 
14
15
  // Note: mock.module() for llm-adapter is now handled by the preload file
15
16
  // We should NOT mock 'ai' globally as it breaks other tests using the real ai SDK.
@@ -19,6 +20,8 @@ import { ConfigLoader } from '../utils/config-loader';
19
20
  let WorkflowRunner: any;
20
21
 
21
22
  describe('WorkflowRunner Reflexion', () => {
23
+ let getModelSpy: ReturnType<typeof spyOn>;
24
+
22
25
  beforeAll(async () => {
23
26
  // Set up config
24
27
  ConfigLoader.setConfig({
@@ -34,7 +37,7 @@ describe('WorkflowRunner Reflexion', () => {
34
37
  expression: { strict: false },
35
38
  } as any);
36
39
 
37
- mockGetModel.mockResolvedValue(createUnifiedMockModel());
40
+ getModelSpy = spyOn(llmAdapter, 'getModel').mockResolvedValue(createUnifiedMockModel() as any);
38
41
  setupLlmMocks();
39
42
 
40
43
  setCurrentChatFn(async () => ({
@@ -50,8 +53,8 @@ describe('WorkflowRunner Reflexion', () => {
50
53
  ConfigLoader.clear();
51
54
  jest.restoreAllMocks();
52
55
  setupLlmMocks();
53
- setupLlmMocks();
54
56
  resetLlmMocks();
57
+ getModelSpy = spyOn(llmAdapter, 'getModel').mockResolvedValue(createUnifiedMockModel() as any);
55
58
  setCurrentChatFn(async () => ({
56
59
  message: { role: 'assistant', content: JSON.stringify({ run: 'echo "fixed"' }) },
57
60
  }));
@@ -1,6 +1,6 @@
1
1
  import type { ExpressionContext } from '../../expression/evaluator.ts';
2
2
  import { ExpressionEvaluator } from '../../expression/evaluator.ts';
3
- import type { Workflow } from '../../parser/schema.ts';
3
+ import type { Step, Workflow } from '../../parser/schema.ts';
4
4
  import type { Logger } from '../../utils/logger.ts';
5
5
  import type { WorkflowState } from '../workflow-state.ts';
6
6
 
@@ -92,7 +92,7 @@ export class ContextBuilder {
92
92
  /**
93
93
  * Builds input object for a specific step.
94
94
  */
95
- public buildStepInputs(step: any, context: ExpressionContext): Record<string, unknown> {
95
+ public buildStepInputs(step: Step, context: ExpressionContext): Record<string, unknown> {
96
96
  const stripUndefined = (value: Record<string, unknown>) => {
97
97
  const result: Record<string, unknown> = {};
98
98
  for (const [key, val] of Object.entries(value)) {
@@ -165,8 +165,17 @@ export class ContextBuilder {
165
165
  inputType: step.inputType,
166
166
  });
167
167
  case 'sleep': {
168
- const evaluated = ExpressionEvaluator.evaluate(step.duration.toString(), context);
169
- return { duration: Number(evaluated) };
168
+ return stripUndefined({
169
+ duration:
170
+ step.duration !== undefined
171
+ ? Number(ExpressionEvaluator.evaluate(step.duration.toString(), context))
172
+ : undefined,
173
+ until:
174
+ step.until !== undefined
175
+ ? ExpressionEvaluator.evaluateString(step.until, context)
176
+ : undefined,
177
+ durable: step.durable,
178
+ });
170
179
  }
171
180
  case 'llm':
172
181
  return stripUndefined({
@@ -1,4 +1,5 @@
1
1
  import type { Workflow, WorkflowInput } from '../../parser/schema.ts';
2
+ import { Redactor } from '../../utils/redactor.ts';
2
3
  import { validateJsonSchema } from '../../utils/schema-validator.ts';
3
4
  import { SecretManager } from './secret-manager.ts';
4
5
 
@@ -6,7 +7,7 @@ import { SecretManager } from './secret-manager.ts';
6
7
  * Service for validating workflow inputs and applying defaults.
7
8
  */
8
9
  export class WorkflowValidator {
9
- public static readonly REDACTED_PLACEHOLDER = '[REDACTED]';
10
+ public static readonly REDACTED_PLACEHOLDER = Redactor.REDACTED_PLACEHOLDER;
10
11
 
11
12
  constructor(
12
13
  private workflow: Workflow,
@@ -142,7 +142,8 @@ describe('AST-Grep Tools', () => {
142
142
  };
143
143
 
144
144
  expect(() => {
145
- vm.runInNewContext(script, sandbox);
145
+ // Wrap in async IIFE to support top-level return
146
+ vm.runInNewContext(`(async () => { ${script} })();`, sandbox);
146
147
  }).not.toThrow();
147
148
  });
148
149
  });
@@ -189,7 +190,8 @@ describe('AST-Grep Tools', () => {
189
190
  };
190
191
 
191
192
  expect(() => {
192
- vm.runInNewContext(script, sandbox);
193
+ // Wrap in async IIFE to support top-level return
194
+ vm.runInNewContext(`(async () => { ${script} })();`, sandbox);
193
195
  }).not.toThrow();
194
196
  });
195
197
  });
@@ -54,12 +54,25 @@ describe('Standard Tools Execution Verification', () => {
54
54
  }),
55
55
  };
56
56
  }
57
+ if (mod === 'node:worker_threads') {
58
+ return {
59
+ Worker: class MockWorker {
60
+ on() {}
61
+ terminate() {}
62
+ },
63
+ parentPort: null,
64
+ workerData: null,
65
+ };
66
+ }
57
67
  return {};
58
68
  },
59
69
  };
60
70
 
61
71
  expect(() => {
62
- vm.runInNewContext(script, sandbox);
72
+ // Wrap the script in an async IIFE to match ProcessSandbox behavior
73
+ // ProcessSandbox wraps scripts: const __result = await (async () => { ${code} })();
74
+ const wrappedScript = `(async () => { ${script} })()`;
75
+ vm.runInNewContext(wrappedScript, sandbox);
63
76
  }).not.toThrow();
64
77
  });
65
78
  }
@@ -25,6 +25,7 @@ import type { ExpressionContext } from '../expression/evaluator';
25
25
  import * as agentParser from '../parser/agent-parser';
26
26
  import type { Agent, LlmStep, Step } from '../parser/schema';
27
27
  import { ConfigLoader } from '../utils/config-loader';
28
+ import * as llmAdapter from './llm-adapter';
28
29
  import type { StepResult } from './step-executor';
29
30
 
30
31
  // Note: mock.module() is now handled by the preload file
@@ -40,6 +41,7 @@ describe('Standard Tools Integration', () => {
40
41
  const testDir = join(process.cwd(), '.e2e-tmp', 'standard-tools-test');
41
42
  let resolveAgentPathSpy: ReturnType<typeof spyOn>;
42
43
  let parseAgentSpy: ReturnType<typeof spyOn>;
44
+ let getModelSpy: ReturnType<typeof spyOn>;
43
45
 
44
46
  beforeAll(async () => {
45
47
  // Setup config before importing the executor
@@ -54,6 +56,9 @@ describe('Standard Tools Integration', () => {
54
56
  model_mappings: {},
55
57
  } as any);
56
58
 
59
+ // Spy on getModel to return mock model
60
+ getModelSpy = spyOn(llmAdapter, 'getModel').mockResolvedValue(createUnifiedMockModel() as any);
61
+
57
62
  // Ensure the mock model is set up
58
63
  setupLlmMocks();
59
64
 
@@ -88,6 +93,7 @@ describe('Standard Tools Integration', () => {
88
93
  afterEach(() => {
89
94
  resolveAgentPathSpy?.mockRestore();
90
95
  parseAgentSpy?.mockRestore();
96
+ getModelSpy?.mockClear();
91
97
  resetLlmMocks();
92
98
  });
93
99
 
@@ -39,7 +39,7 @@ export const STANDARD_TOOLS: AgentTool[] = [
39
39
  id: 'std_read_file_lines',
40
40
  type: 'script',
41
41
  run: `
42
- (function() {
42
+ return (function() {
43
43
  const fs = require('node:fs');
44
44
  const path = require('node:path');
45
45
  const filePath = args.path;
@@ -113,7 +113,7 @@ export const STANDARD_TOOLS: AgentTool[] = [
113
113
  id: 'std_list_files',
114
114
  type: 'script',
115
115
  run: `
116
- (function() {
116
+ return (function() {
117
117
  const fs = require('node:fs');
118
118
  const path = require('node:path');
119
119
  const dir = args.path || '.';
@@ -166,7 +166,7 @@ export const STANDARD_TOOLS: AgentTool[] = [
166
166
  id: 'std_search_content',
167
167
  type: 'script',
168
168
  run: `
169
- (async function() {
169
+ return (async function() {
170
170
  const fs = require('node:fs');
171
171
  const path = require('node:path');
172
172
  const { globSync } = require('glob');
@@ -358,7 +358,7 @@ export const STANDARD_TOOLS: AgentTool[] = [
358
358
  id: 'std_ast_grep_search',
359
359
  type: 'script',
360
360
  run: `
361
- (function() {
361
+ return (function() {
362
362
  const fs = require('node:fs');
363
363
  const path = require('node:path');
364
364
  const { Lang, parse } = require('@ast-grep/napi');
@@ -442,7 +442,7 @@ export const STANDARD_TOOLS: AgentTool[] = [
442
442
  id: 'std_ast_grep_replace',
443
443
  type: 'script',
444
444
  run: `
445
- (function() {
445
+ return (function() {
446
446
  const fs = require('node:fs');
447
447
  const path = require('node:path');
448
448
  const { Lang, parse } = require('@ast-grep/napi');
@@ -565,12 +565,15 @@ export function validateStandardToolSecurity(
565
565
  'ast_grep_replace',
566
566
  ].includes(toolName)
567
567
  ) {
568
- const rawPath = args.path || args.dir || '.';
568
+ const rawPath = (args as any).path || (args as any).dir || '.';
569
569
  assertWithinCwd(rawPath);
570
570
 
571
571
  // For AST tools, validate all paths in the array
572
- if (['ast_grep_search', 'ast_grep_replace'].includes(toolName) && Array.isArray(args.paths)) {
573
- for (const p of args.paths) {
572
+ if (
573
+ ['ast_grep_search', 'ast_grep_replace'].includes(toolName) &&
574
+ Array.isArray((args as any).paths)
575
+ ) {
576
+ for (const p of (args as any).paths) {
574
577
  assertWithinCwd(p);
575
578
  }
576
579
  }
@@ -578,8 +581,8 @@ export function validateStandardToolSecurity(
578
581
 
579
582
  // 2. Check shell risk for run_command and guard working directory
580
583
  if (toolName === 'run_command') {
581
- assertWithinCwd(args.dir, 'Directory');
582
- if (!options.allowInsecure && detectShellInjectionRisk(args.command)) {
584
+ assertWithinCwd((args as any).dir, 'Directory');
585
+ if (!options.allowInsecure && detectShellInjectionRisk((args as any).command)) {
583
586
  throw new Error(
584
587
  `Security Error: Command contains risky shell characters. Use 'allowInsecure: true' on the llm step to execute this.`
585
588
  );
@@ -130,7 +130,7 @@ export async function executeStep(
130
130
  if (!executeWorkflowFn) {
131
131
  throw new Error('Workflow executor not provided');
132
132
  }
133
- result = await executeWorkflowFn(step, context, abortSignal);
133
+ result = await executeWorkflowFn(step, context, abortSignal, options.stepExecutionId);
134
134
  break;
135
135
  case 'script':
136
136
  result = await executeScriptStep(step, context, logger, {
@@ -183,9 +183,9 @@ export async function executeStep(
183
183
  runId,
184
184
  artifactRoot,
185
185
  executeLlmStep: injectedExecuteLlmStep || executeLlmStep,
186
- emitEvent: options.emitEvent,
187
186
  workflowName: options.workflowName,
188
187
  db: options.db,
188
+ depth: options.depth,
189
189
  }
190
190
  );
191
191
  break;
@@ -24,6 +24,7 @@ import type { ExpressionContext } from '../expression/evaluator';
24
24
  import * as agentParser from '../parser/agent-parser';
25
25
  import type { Agent, LlmStep, Step } from '../parser/schema';
26
26
  import { ConfigLoader } from '../utils/config-loader';
27
+ import * as llmAdapter from './llm-adapter';
27
28
  import type { StepResult } from './step-executor';
28
29
 
29
30
  // Note: mock.module() for llm-adapter is now handled by the preload file
@@ -43,6 +44,7 @@ interface MockToolCall {
43
44
  describe('llm-executor with tools and MCP', () => {
44
45
  let resolveAgentPathSpy: ReturnType<typeof spyOn>;
45
46
  let parseAgentSpy: ReturnType<typeof spyOn>;
47
+ let getModelSpy: ReturnType<typeof spyOn>;
46
48
 
47
49
  const createMockMcpClient = (
48
50
  options: {
@@ -71,7 +73,7 @@ describe('llm-executor with tools and MCP', () => {
71
73
  };
72
74
 
73
75
  beforeAll(async () => {
74
- mockGetModel.mockResolvedValue(createUnifiedMockModel());
76
+ getModelSpy = spyOn(llmAdapter, 'getModel').mockResolvedValue(createUnifiedMockModel() as any);
75
77
 
76
78
  // Set up config
77
79
  ConfigLoader.setConfig({
@@ -122,6 +124,7 @@ describe('llm-executor with tools and MCP', () => {
122
124
  afterEach(() => {
123
125
  resolveAgentPathSpy?.mockRestore();
124
126
  parseAgentSpy?.mockRestore();
127
+ getModelSpy?.mockClear();
125
128
  });
126
129
 
127
130
  afterAll(() => {