keystone-cli 2.0.1 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +30 -4
  2. package/package.json +17 -3
  3. package/src/cli.ts +3 -2
  4. package/src/commands/event.ts +9 -0
  5. package/src/commands/run.ts +17 -0
  6. package/src/db/dynamic-state-manager.ts +12 -9
  7. package/src/db/memory-db.test.ts +19 -1
  8. package/src/db/memory-db.ts +101 -22
  9. package/src/db/workflow-db.ts +181 -9
  10. package/src/expression/evaluator.ts +4 -1
  11. package/src/parser/schema.ts +2 -1
  12. package/src/runner/__test__/llm-test-setup.ts +43 -11
  13. package/src/runner/durable-timers.test.ts +1 -1
  14. package/src/runner/executors/dynamic-executor.ts +125 -88
  15. package/src/runner/executors/engine-executor.ts +10 -39
  16. package/src/runner/executors/file-executor.ts +38 -0
  17. package/src/runner/executors/foreach-executor.ts +170 -17
  18. package/src/runner/executors/human-executor.ts +18 -0
  19. package/src/runner/executors/llm/stream-handler.ts +103 -0
  20. package/src/runner/executors/llm/tool-manager.ts +342 -0
  21. package/src/runner/executors/llm-executor.ts +313 -550
  22. package/src/runner/executors/memory-executor.ts +41 -34
  23. package/src/runner/executors/shell-executor.ts +141 -54
  24. package/src/runner/executors/subworkflow-executor.ts +16 -0
  25. package/src/runner/executors/types.ts +3 -1
  26. package/src/runner/executors/verification_fixes.test.ts +46 -0
  27. package/src/runner/join-scheduling.test.ts +2 -1
  28. package/src/runner/llm-adapter.integration.test.ts +10 -5
  29. package/src/runner/llm-adapter.ts +46 -17
  30. package/src/runner/llm-clarification.test.ts +4 -1
  31. package/src/runner/llm-executor.test.ts +21 -7
  32. package/src/runner/mcp-client.ts +36 -2
  33. package/src/runner/mcp-server.ts +65 -36
  34. package/src/runner/memoization.test.ts +2 -2
  35. package/src/runner/recovery-security.test.ts +5 -2
  36. package/src/runner/reflexion.test.ts +6 -3
  37. package/src/runner/services/context-builder.ts +13 -4
  38. package/src/runner/services/workflow-validator.ts +2 -1
  39. package/src/runner/shell-executor.test.ts +107 -1
  40. package/src/runner/standard-tools-ast.test.ts +4 -2
  41. package/src/runner/standard-tools-execution.test.ts +14 -1
  42. package/src/runner/standard-tools-integration.test.ts +6 -0
  43. package/src/runner/standard-tools.ts +13 -10
  44. package/src/runner/step-executor.ts +2 -2
  45. package/src/runner/tool-integration.test.ts +4 -1
  46. package/src/runner/workflow-runner.test.ts +23 -12
  47. package/src/runner/workflow-runner.ts +174 -85
  48. package/src/runner/workflow-state.ts +186 -111
  49. package/src/ui/dashboard.tsx +17 -3
  50. package/src/utils/config-loader.ts +4 -0
  51. package/src/utils/constants.ts +4 -0
  52. package/src/utils/context-injector.test.ts +27 -27
  53. package/src/utils/context-injector.ts +68 -26
  54. package/src/utils/process-sandbox.ts +138 -148
  55. package/src/utils/redactor.ts +39 -9
  56. package/src/utils/resource-loader.ts +24 -19
  57. package/src/utils/sandbox.ts +6 -0
  58. package/src/utils/stream-utils.ts +58 -0
@@ -29,6 +29,7 @@ import type { ExpressionContext } from '../expression/evaluator';
29
29
  import * as agentParser from '../parser/agent-parser';
30
30
  import type { Agent, LlmStep, Step } from '../parser/schema';
31
31
  import { ConfigLoader } from '../utils/config-loader';
32
+ import * as llmAdapter from './llm-adapter';
32
33
  import type { StepResult } from './step-executor';
33
34
 
34
35
  // Note: mock.module() for llm-adapter is now handled by the preload file
@@ -66,6 +67,7 @@ describe('llm-executor', () => {
66
67
  let spawnSpy: ReturnType<typeof spyOn>;
67
68
  let resolveAgentPathSpy: ReturnType<typeof spyOn>;
68
69
  let parseAgentSpy: ReturnType<typeof spyOn>;
70
+ let getModelSpy: ReturnType<typeof spyOn>;
69
71
 
70
72
  // Default Mock Chat Logic
71
73
  const defaultMockChat = async (messages: LLMMessage[], _options: any) => {
@@ -184,7 +186,9 @@ describe('llm-executor', () => {
184
186
  ConfigLoader.clear();
185
187
  setupLlmMocks();
186
188
  resetLlmMocks();
187
- mockGetModel.mockResolvedValue(createUnifiedMockModel());
189
+
190
+ // Spy on getModel to return our mock model directly
191
+ getModelSpy = spyOn(llmAdapter, 'getModel').mockResolvedValue(createUnifiedMockModel() as any);
188
192
 
189
193
  // Mock agent parser to avoid file dependencies
190
194
  resolveAgentPathSpy = spyOn(agentParser, 'resolveAgentPath').mockReturnValue('test-agent.md');
@@ -215,6 +219,7 @@ describe('llm-executor', () => {
215
219
  afterEach(() => {
216
220
  resolveAgentPathSpy?.mockRestore();
217
221
  parseAgentSpy?.mockRestore();
222
+ getModelSpy?.mockRestore();
218
223
  });
219
224
 
220
225
  afterAll(() => {
@@ -254,21 +259,25 @@ describe('llm-executor', () => {
254
259
  needs: [],
255
260
  maxIterations: 10,
256
261
  };
257
- const logger = { log: mock(), error: mock(), warn: mock(), info: mock(), debug: mock() };
262
+ const loggerSpy = { log: mock(), error: mock(), warn: mock(), info: mock(), debug: mock() };
263
+
264
+ const consoleSpy = spyOn(console, 'error').mockImplementation(() => {});
258
265
 
259
266
  await executeLlmStep(
260
267
  step,
261
268
  { inputs: {}, steps: {} },
262
269
  async () => ({ status: 'success', output: 'ok' }),
263
- logger
270
+ loggerSpy
264
271
  );
265
272
 
266
- expect(logger.log).toHaveBeenCalledWith(
273
+ consoleSpy.mockRestore();
274
+
275
+ expect(loggerSpy.log).toHaveBeenCalledWith(
267
276
  expect.stringContaining(' 🛠️ Tool Call: test-tool {"val":123}')
268
277
  );
269
278
  });
270
279
 
271
- it('should return raw output logic if schema schema validation fails (no retry implemented)', async () => {
280
+ it('should return failed status if schema validation fails and JSON cannot be extracted', async () => {
272
281
  setupMockModel(defaultMockChat as any);
273
282
  const step: LlmStep = {
274
283
  id: 'l1',
@@ -282,13 +291,14 @@ describe('llm-executor', () => {
282
291
 
283
292
  // Case 1: Model returns text that is NOT valid JSON
284
293
  setupMockModel(async () => ({ message: { role: 'assistant', content: 'Not JSON' } }));
294
+
285
295
  const result = await executeLlmStep(step, { inputs: {}, steps: {} }, async () => ({
286
296
  status: 'success',
287
297
  output: 'ok',
288
298
  }));
289
299
 
290
- // current simple refactor doesn't implement retry, just returns text or throws
291
- expect(result.output).toBe('Not JSON');
300
+ expect(result.status).toBe('failed');
301
+ expect(result.error).toContain('Failed to extract valid JSON');
292
302
  });
293
303
 
294
304
  it('should handle tool not found', async () => {
@@ -301,11 +311,15 @@ describe('llm-executor', () => {
301
311
  maxIterations: 10,
302
312
  };
303
313
 
314
+ const consoleSpy = spyOn(console, 'error').mockImplementation(() => {});
315
+
304
316
  const result = await executeLlmStep(step, { inputs: {}, steps: {} }, async () => ({
305
317
  status: 'success',
306
318
  output: 'ok',
307
319
  }));
308
320
 
321
+ consoleSpy.mockRestore();
322
+
309
323
  expect(result.status).toBe('success');
310
324
  });
311
325
 
@@ -13,6 +13,9 @@ export const MCP_PROTOCOL_VERSION = MCP.PROTOCOL_VERSION;
13
13
  // Maximum buffer size for incoming messages (10MB) to prevent memory exhaustion
14
14
  const MAX_BUFFER_SIZE = 10 * 1024 * 1024;
15
15
 
16
+ // Track if we have already warned about SSRF limitations to avoid log spam
17
+ let hasWarnedSSRF = false;
18
+
16
19
  /**
17
20
  * Efficient line splitting without regex to prevent ReDoS attacks.
18
21
  * Handles \r\n, \r, and \n line endings.
@@ -99,7 +102,7 @@ function isPrivateIpAddress(address: string): boolean {
99
102
 
100
103
  export async function validateRemoteUrl(
101
104
  url: string,
102
- options: { allowInsecure?: boolean } = {}
105
+ options: { allowInsecure?: boolean; logger?: Logger } = {}
103
106
  ): Promise<void> {
104
107
  let parsed: URL;
105
108
  try {
@@ -114,6 +117,11 @@ export async function validateRemoteUrl(
114
117
  }
115
118
 
116
119
  // Require HTTPS in production
120
+ // SECURITY WARNING: This check is susceptible to TOCTOU (Time-of-Check to Time-of-Use)
121
+ // DNS rebinding attacks. A malicious domain could resolve to a public IP during this check
122
+ // and then switch to a private IP (e.g. 127.0.0.1) when the connection is actually made.
123
+ // Full protection requires resolving the IP once and using that IP for the connection,
124
+ // or using a proxy that enforces these rules.
117
125
  if (parsed.protocol !== 'https:') {
118
126
  throw new Error(
119
127
  `SSRF Protection: URL must use HTTPS. Got: ${parsed.protocol}. Set allowInsecure option to true if you trust this server.`
@@ -154,12 +162,28 @@ export async function validateRemoteUrl(
154
162
  // Resolve DNS to prevent hostnames that map to private IPs (DNS rebinding checks)
155
163
  // WARNING: This check is vulnerable to Time-of-Check Time-of-Use (TOCTOU) DNS Rebinding attacks.
156
164
  // A malicious DNS server could return a public IP here, then switch to a private IP for the actual fetch.
157
- // In a nodejs environment using standard fetch/native DNS, this is hard to fully prevent without
165
+ // In a nodejs/bun environment using standard fetch/native DNS, this is hard to fully prevent without
158
166
  // a custom agent that pins the IP or low-level socket inspection.
167
+ // Users requiring high security should run this in an isolated network environment (container/VM).
159
168
  // For now, this check provides "defense in depth" against accidental internal access.
169
+ // CRITICAL SECURITY NOTE: In high-security environments, do NOT rely solely on this check.
170
+ // Use network-level isolation (e.g. firewalls, service meshes, or egress proxies) to strictly block
171
+ // internal traffic from the Keystone process.
172
+ //
173
+ // Recommendation: Use 'allowInsecure: true' only in trusted environments.
160
174
  if (!isIP(hostname)) {
161
175
  try {
176
+ // WARNING: This check is vulnerable to DNS Rebinding (TOCTOU)
177
+ if (options.logger?.warn && !hasWarnedSSRF) {
178
+ options.logger.warn(
179
+ ' ⚠️ Security Note: Remote URL validation provides defense-in-depth but does not fully prevent DNS rebinding attacks.\n' +
180
+ ' For high-security environments, ensure network-level isolation (e.g. firewalls).'
181
+ );
182
+ hasWarnedSSRF = true;
183
+ }
184
+
162
185
  const resolved = await lookup(hostname, { all: true });
186
+
163
187
  for (const record of resolved) {
164
188
  if (isPrivateIpAddress(record.address)) {
165
189
  throw new Error(
@@ -168,6 +192,16 @@ export async function validateRemoteUrl(
168
192
  }
169
193
  }
170
194
  } catch (error) {
195
+ if (error instanceof Error && error.message.startsWith('SSRF Protection')) {
196
+ throw error;
197
+ }
198
+
199
+ if (options.logger?.warn) {
200
+ options.logger.warn(
201
+ `[Security Warning] validateRemoteUrl check for ${hostname} failed/bypassed: ${error}`
202
+ );
203
+ }
204
+
171
205
  throw new Error(
172
206
  `SSRF Protection: Failed to resolve hostname "${hostname}": ${
173
207
  error instanceof Error ? error.message : String(error)
@@ -248,14 +248,22 @@ export class MCPServer {
248
248
  const path = WorkflowRegistry.resolvePath(workflow_name);
249
249
  const workflow = WorkflowParser.loadWorkflow(path);
250
250
 
251
- // Use a custom logger that captures logs for the MCP response
251
+ // Use a fixed-size ring buffer for logs to prevent memory leaks
252
+ const MAX_LOG_LINES = 1000;
252
253
  const logs: string[] = [];
254
+ const addLog = (msg: string) => {
255
+ if (logs.length >= MAX_LOG_LINES) {
256
+ logs.shift(); // Remove oldest
257
+ }
258
+ logs.push(msg);
259
+ };
260
+
253
261
  const logger = {
254
- log: (msg: string) => logs.push(msg),
255
- error: (msg: string) => logs.push(`ERROR: ${msg}`),
256
- warn: (msg: string) => logs.push(`WARN: ${msg}`),
257
- info: (msg: string) => logs.push(`INFO: ${msg}`),
258
- debug: (msg: string) => logs.push(`DEBUG: ${msg}`),
262
+ log: (msg: string) => addLog(msg),
263
+ error: (msg: string) => addLog(`ERROR: ${msg}`),
264
+ warn: (msg: string) => addLog(`WARN: ${msg}`),
265
+ info: (msg: string) => addLog(`INFO: ${msg}`),
266
+ debug: (msg: string) => addLog(`DEBUG: ${msg}`),
259
267
  };
260
268
 
261
269
  const runner = this.runnerFactory(workflow, {
@@ -545,37 +553,58 @@ export class MCPServer {
545
553
  const runId = runner.getRunId();
546
554
 
547
555
  // Start the workflow asynchronously
548
- runner.run().then(
549
- async (outputs) => {
550
- // Update DB with success on completion
551
- await this.db.updateRunStatus(runId, 'success', outputs);
552
- },
553
- async (error) => {
554
- // Update DB with failure
555
- if (error instanceof WorkflowSuspendedError) {
556
- await this.db.updateRunStatus(runId, 'paused');
557
- this.sendNotification('notifications/keystone.human_input', {
558
- run_id: runId,
559
- workflow: workflow_name,
560
- status: 'paused',
561
- message: error.message,
562
- step_id: error.stepId,
563
- input_type: error.inputType,
564
- instructions:
565
- error.inputType === 'confirm'
566
- ? 'Use answer_human_input with input="confirm" to proceed.'
567
- : 'Use answer_human_input with the required text input.',
568
- });
569
- } else {
570
- await this.db.updateRunStatus(
571
- runId,
572
- 'failed',
573
- undefined,
574
- error instanceof Error ? error.message : String(error)
575
- );
556
+ // Start the workflow asynchronously
557
+ runner
558
+ .run()
559
+ .then(
560
+ async (outputs) => {
561
+ try {
562
+ // Update DB with success on completion
563
+ await this.db.updateRunStatus(runId, 'success', outputs);
564
+ } catch (e) {
565
+ this.logger.error(
566
+ `[McpServer] Failed to update success status for run ${runId}: ${e}`
567
+ );
568
+ }
569
+ },
570
+ async (error) => {
571
+ try {
572
+ // Update DB with failure
573
+ if (error instanceof WorkflowSuspendedError) {
574
+ await this.db.updateRunStatus(runId, 'paused');
575
+ this.sendNotification('notifications/keystone.human_input', {
576
+ run_id: runId,
577
+ workflow: workflow_name,
578
+ status: 'paused',
579
+ message: error.message,
580
+ step_id: error.stepId,
581
+ input_type: error.inputType,
582
+ instructions:
583
+ error.inputType === 'confirm'
584
+ ? 'Use answer_human_input with input="confirm" to proceed.'
585
+ : 'Use answer_human_input with the required text input.',
586
+ });
587
+ } else {
588
+ await this.db.updateRunStatus(
589
+ runId,
590
+ 'failed',
591
+ undefined,
592
+ error instanceof Error ? error.message : String(error)
593
+ );
594
+ }
595
+ } catch (e) {
596
+ this.logger.error(
597
+ `[McpServer] Failed to update failure status for run ${runId}: ${e}`
598
+ );
599
+ }
576
600
  }
577
- }
578
- );
601
+ )
602
+ .catch((e) => {
603
+ // Catch any other errors in the promise chain construction
604
+ this.logger.error(
605
+ `[McpServer] Unexpected error in async workflow execution for run ${runId}: ${e}`
606
+ );
607
+ });
579
608
 
580
609
  return {
581
610
  jsonrpc: '2.0',
@@ -86,13 +86,13 @@ describe('Workflow Memoization (Auto-Hashing)', () => {
86
86
 
87
87
  // We can check if `executeLlmStep` was called.
88
88
  let called = false;
89
- const trackingExecute = async (s: any, c: any) => {
89
+ // Match signature of executeLlmStep (at least the required args)
90
+ const trackingExecute = async (s: any, c: any, _execFn: any, ..._args: any[]) => {
90
91
  called = true;
91
92
  return mockExecuteLlmStep(s, c);
92
93
  };
93
94
 
94
95
  // Override the executor for runner2 to track calls
95
- // @ts-ignore - hacking private property or constructor option
96
96
  // Actually we passed it in constructor option.
97
97
  const runner2Tracked = new WorkflowRunner(workflow, {
98
98
  dbPath,
@@ -10,20 +10,23 @@ import {
10
10
 
11
11
  import { ConfigLoader } from '../utils/config-loader';
12
12
 
13
- import { beforeEach, describe, expect, jest, mock, test } from 'bun:test';
13
+ import { beforeEach, describe, expect, jest, mock, spyOn, test } from 'bun:test';
14
14
  import type { Step, Workflow } from '../parser/schema';
15
+ import * as llmAdapter from './llm-adapter';
15
16
 
16
17
  // Note: mock.module() for llm-adapter is now handled by the preload file
17
18
  // We should NOT mock 'ai' globally as it breaks other tests using the real ai SDK.
18
19
  // Instead, we use a mock model that the real ai SDK calls.
19
20
 
20
21
  describe('WorkflowRunner Recovery Security', () => {
22
+ let getModelSpy: ReturnType<typeof spyOn>;
23
+
21
24
  beforeEach(() => {
22
25
  jest.restoreAllMocks();
23
26
  ConfigLoader.clear();
24
27
  setupLlmMocks();
25
28
  resetLlmMocks();
26
- mockGetModel.mockResolvedValue(createUnifiedMockModel());
29
+ getModelSpy = spyOn(llmAdapter, 'getModel').mockResolvedValue(createUnifiedMockModel() as any);
27
30
  });
28
31
 
29
32
  test('should NOT allow reflexion to overwrite critical step properties', async () => {
@@ -7,9 +7,10 @@ import {
7
7
  setupLlmMocks,
8
8
  } from './__test__/llm-test-setup';
9
9
 
10
- import { beforeAll, beforeEach, describe, expect, jest, mock, test } from 'bun:test';
10
+ import { beforeAll, beforeEach, describe, expect, jest, mock, spyOn, test } from 'bun:test';
11
11
  import type { Step, Workflow } from '../parser/schema';
12
12
  import { ConfigLoader } from '../utils/config-loader';
13
+ import * as llmAdapter from './llm-adapter';
13
14
 
14
15
  // Note: mock.module() for llm-adapter is now handled by the preload file
15
16
  // We should NOT mock 'ai' globally as it breaks other tests using the real ai SDK.
@@ -19,6 +20,8 @@ import { ConfigLoader } from '../utils/config-loader';
19
20
  let WorkflowRunner: any;
20
21
 
21
22
  describe('WorkflowRunner Reflexion', () => {
23
+ let getModelSpy: ReturnType<typeof spyOn>;
24
+
22
25
  beforeAll(async () => {
23
26
  // Set up config
24
27
  ConfigLoader.setConfig({
@@ -34,7 +37,7 @@ describe('WorkflowRunner Reflexion', () => {
34
37
  expression: { strict: false },
35
38
  } as any);
36
39
 
37
- mockGetModel.mockResolvedValue(createUnifiedMockModel());
40
+ getModelSpy = spyOn(llmAdapter, 'getModel').mockResolvedValue(createUnifiedMockModel() as any);
38
41
  setupLlmMocks();
39
42
 
40
43
  setCurrentChatFn(async () => ({
@@ -50,8 +53,8 @@ describe('WorkflowRunner Reflexion', () => {
50
53
  ConfigLoader.clear();
51
54
  jest.restoreAllMocks();
52
55
  setupLlmMocks();
53
- setupLlmMocks();
54
56
  resetLlmMocks();
57
+ getModelSpy = spyOn(llmAdapter, 'getModel').mockResolvedValue(createUnifiedMockModel() as any);
55
58
  setCurrentChatFn(async () => ({
56
59
  message: { role: 'assistant', content: JSON.stringify({ run: 'echo "fixed"' }) },
57
60
  }));
@@ -1,6 +1,6 @@
1
1
  import type { ExpressionContext } from '../../expression/evaluator.ts';
2
2
  import { ExpressionEvaluator } from '../../expression/evaluator.ts';
3
- import type { Workflow } from '../../parser/schema.ts';
3
+ import type { Step, Workflow } from '../../parser/schema.ts';
4
4
  import type { Logger } from '../../utils/logger.ts';
5
5
  import type { WorkflowState } from '../workflow-state.ts';
6
6
 
@@ -92,7 +92,7 @@ export class ContextBuilder {
92
92
  /**
93
93
  * Builds input object for a specific step.
94
94
  */
95
- public buildStepInputs(step: any, context: ExpressionContext): Record<string, unknown> {
95
+ public buildStepInputs(step: Step, context: ExpressionContext): Record<string, unknown> {
96
96
  const stripUndefined = (value: Record<string, unknown>) => {
97
97
  const result: Record<string, unknown> = {};
98
98
  for (const [key, val] of Object.entries(value)) {
@@ -165,8 +165,17 @@ export class ContextBuilder {
165
165
  inputType: step.inputType,
166
166
  });
167
167
  case 'sleep': {
168
- const evaluated = ExpressionEvaluator.evaluate(step.duration.toString(), context);
169
- return { duration: Number(evaluated) };
168
+ return stripUndefined({
169
+ duration:
170
+ step.duration !== undefined
171
+ ? Number(ExpressionEvaluator.evaluate(step.duration.toString(), context))
172
+ : undefined,
173
+ until:
174
+ step.until !== undefined
175
+ ? ExpressionEvaluator.evaluateString(step.until, context)
176
+ : undefined,
177
+ durable: step.durable,
178
+ });
170
179
  }
171
180
  case 'llm':
172
181
  return stripUndefined({
@@ -1,4 +1,5 @@
1
1
  import type { Workflow, WorkflowInput } from '../../parser/schema.ts';
2
+ import { Redactor } from '../../utils/redactor.ts';
2
3
  import { validateJsonSchema } from '../../utils/schema-validator.ts';
3
4
  import { SecretManager } from './secret-manager.ts';
4
5
 
@@ -6,7 +7,7 @@ import { SecretManager } from './secret-manager.ts';
6
7
  * Service for validating workflow inputs and applying defaults.
7
8
  */
8
9
  export class WorkflowValidator {
9
- public static readonly REDACTED_PLACEHOLDER = '[REDACTED]';
10
+ public static readonly REDACTED_PLACEHOLDER = Redactor.REDACTED_PLACEHOLDER;
10
11
 
11
12
  constructor(
12
13
  private workflow: Workflow,
@@ -1,9 +1,17 @@
1
1
  import { describe, expect, it } from 'bun:test';
2
+ import { realpathSync } from 'node:fs';
3
+ import { tmpdir } from 'node:os';
4
+ import { basename, resolve as resolvePath, sep } from 'node:path';
2
5
  import type { ExpressionContext } from '../expression/evaluator';
6
+ import { ConfigSchema } from '../parser/config-schema';
3
7
  import type { ShellStep } from '../parser/schema';
4
- import { escapeShellArg, executeShell } from './executors/shell-executor.ts';
8
+ import { ConfigLoader } from '../utils/config-loader';
9
+ import { ConsoleLogger } from '../utils/logger';
10
+ import { escapeShellArg, executeShell, executeShellStep } from './executors/shell-executor.ts';
5
11
 
6
12
  describe('shell-executor', () => {
13
+ const logger = new ConsoleLogger();
14
+
7
15
  describe('escapeShellArg', () => {
8
16
  it('should wrap in single quotes', () => {
9
17
  expect(escapeShellArg('hello')).toBe("'hello'");
@@ -174,4 +182,102 @@ describe('shell-executor', () => {
174
182
  expect(result.stdout.trim()).toBe('match');
175
183
  });
176
184
  });
185
+
186
+ describe('executeShellStep (args)', () => {
187
+ const context: ExpressionContext = {
188
+ inputs: {},
189
+ steps: {},
190
+ env: {},
191
+ };
192
+
193
+ it('should reject empty args', async () => {
194
+ const step: ShellStep = {
195
+ id: 'test',
196
+ type: 'shell',
197
+ needs: [],
198
+ args: [],
199
+ };
200
+
201
+ await expect(executeShellStep(step, context, logger)).rejects.toThrow(
202
+ /args must contain at least one element/
203
+ );
204
+ });
205
+
206
+ it('should apply step env for args execution', async () => {
207
+ const bunPath = process.execPath;
208
+ const step: ShellStep = {
209
+ id: 'test',
210
+ type: 'shell',
211
+ needs: [],
212
+ args: [bunPath, '-e', 'console.log(process.env.TEST_VAR ?? "")'],
213
+ env: { TEST_VAR: 'args-env' },
214
+ };
215
+
216
+ const result = await executeShellStep(step, context, logger);
217
+ expect(result.output?.stdout?.trim()).toBe('args-env');
218
+ });
219
+
220
+ it('should enforce denylist for args execution', async () => {
221
+ const bunPath = process.execPath;
222
+ const denied = basename(bunPath);
223
+
224
+ ConfigLoader.setConfig(
225
+ ConfigSchema.parse({
226
+ engines: { denylist: [denied] },
227
+ })
228
+ );
229
+
230
+ try {
231
+ const step: ShellStep = {
232
+ id: 'test',
233
+ type: 'shell',
234
+ needs: [],
235
+ args: [bunPath, '-e', 'console.log("nope")'],
236
+ };
237
+
238
+ await expect(executeShellStep(step, context, logger)).rejects.toThrow(/denylist/);
239
+ } finally {
240
+ ConfigLoader.clear();
241
+ }
242
+ });
243
+
244
+ it('should enforce allowOutsideCwd for args execution', async () => {
245
+ const bunPath = process.execPath;
246
+ const cwd = resolvePath(process.cwd());
247
+ let outsideDir = resolvePath(tmpdir());
248
+
249
+ if (outsideDir.startsWith(`${cwd}${sep}`)) {
250
+ const parent = resolvePath(cwd, '..');
251
+ if (parent !== cwd) {
252
+ outsideDir = parent;
253
+ }
254
+ }
255
+
256
+ if (outsideDir === cwd) {
257
+ return;
258
+ }
259
+
260
+ const step: ShellStep = {
261
+ id: 'test',
262
+ type: 'shell',
263
+ needs: [],
264
+ args: [bunPath, '-e', 'console.log(process.cwd())'],
265
+ dir: outsideDir,
266
+ };
267
+
268
+ await expect(executeShellStep(step, context, logger)).rejects.toThrow(
269
+ /outside the project directory/
270
+ );
271
+
272
+ const allowedStep: ShellStep = {
273
+ ...step,
274
+ allowOutsideCwd: true,
275
+ };
276
+
277
+ const result = await executeShellStep(allowedStep, context, logger);
278
+ const resolvedOutput = realpathSync(resolvePath(result.output?.stdout?.trim() || ''));
279
+ const resolvedOutside = realpathSync(outsideDir);
280
+ expect(resolvedOutput).toBe(resolvedOutside);
281
+ });
282
+ });
177
283
  });
@@ -142,7 +142,8 @@ describe('AST-Grep Tools', () => {
142
142
  };
143
143
 
144
144
  expect(() => {
145
- vm.runInNewContext(script, sandbox);
145
+ // Wrap in async IIFE to support top-level return
146
+ vm.runInNewContext(`(async () => { ${script} })();`, sandbox);
146
147
  }).not.toThrow();
147
148
  });
148
149
  });
@@ -189,7 +190,8 @@ describe('AST-Grep Tools', () => {
189
190
  };
190
191
 
191
192
  expect(() => {
192
- vm.runInNewContext(script, sandbox);
193
+ // Wrap in async IIFE to support top-level return
194
+ vm.runInNewContext(`(async () => { ${script} })();`, sandbox);
193
195
  }).not.toThrow();
194
196
  });
195
197
  });
@@ -54,12 +54,25 @@ describe('Standard Tools Execution Verification', () => {
54
54
  }),
55
55
  };
56
56
  }
57
+ if (mod === 'node:worker_threads') {
58
+ return {
59
+ Worker: class MockWorker {
60
+ on() {}
61
+ terminate() {}
62
+ },
63
+ parentPort: null,
64
+ workerData: null,
65
+ };
66
+ }
57
67
  return {};
58
68
  },
59
69
  };
60
70
 
61
71
  expect(() => {
62
- vm.runInNewContext(script, sandbox);
72
+ // Wrap the script in an async IIFE to match ProcessSandbox behavior
73
+ // ProcessSandbox wraps scripts: const __result = await (async () => { ${code} })();
74
+ const wrappedScript = `(async () => { ${script} })()`;
75
+ vm.runInNewContext(wrappedScript, sandbox);
63
76
  }).not.toThrow();
64
77
  });
65
78
  }
@@ -25,6 +25,7 @@ import type { ExpressionContext } from '../expression/evaluator';
25
25
  import * as agentParser from '../parser/agent-parser';
26
26
  import type { Agent, LlmStep, Step } from '../parser/schema';
27
27
  import { ConfigLoader } from '../utils/config-loader';
28
+ import * as llmAdapter from './llm-adapter';
28
29
  import type { StepResult } from './step-executor';
29
30
 
30
31
  // Note: mock.module() is now handled by the preload file
@@ -40,6 +41,7 @@ describe('Standard Tools Integration', () => {
40
41
  const testDir = join(process.cwd(), '.e2e-tmp', 'standard-tools-test');
41
42
  let resolveAgentPathSpy: ReturnType<typeof spyOn>;
42
43
  let parseAgentSpy: ReturnType<typeof spyOn>;
44
+ let getModelSpy: ReturnType<typeof spyOn>;
43
45
 
44
46
  beforeAll(async () => {
45
47
  // Setup config before importing the executor
@@ -54,6 +56,9 @@ describe('Standard Tools Integration', () => {
54
56
  model_mappings: {},
55
57
  } as any);
56
58
 
59
+ // Spy on getModel to return mock model
60
+ getModelSpy = spyOn(llmAdapter, 'getModel').mockResolvedValue(createUnifiedMockModel() as any);
61
+
57
62
  // Ensure the mock model is set up
58
63
  setupLlmMocks();
59
64
 
@@ -88,6 +93,7 @@ describe('Standard Tools Integration', () => {
88
93
  afterEach(() => {
89
94
  resolveAgentPathSpy?.mockRestore();
90
95
  parseAgentSpy?.mockRestore();
96
+ getModelSpy?.mockClear();
91
97
  resetLlmMocks();
92
98
  });
93
99