npm - keystone-cli - Versions diffs - 2.0.0 → 2.1.0 - Mend

keystone-cli 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

package/README.md +43 -4
package/package.json +4 -1
package/src/cli.ts +1 -0
package/src/commands/event.ts +9 -0
package/src/commands/run.ts +17 -0
package/src/db/dynamic-state-manager.ts +12 -9
package/src/db/memory-db.test.ts +19 -1
package/src/db/memory-db.ts +101 -22
package/src/db/workflow-db.ts +181 -9
package/src/expression/evaluator.ts +4 -1
package/src/parser/config-schema.ts +6 -0
package/src/parser/schema.ts +1 -0
package/src/runner/__test__/llm-test-setup.ts +43 -11
package/src/runner/durable-timers.test.ts +1 -1
package/src/runner/executors/dynamic-executor.ts +125 -88
package/src/runner/executors/engine-executor.ts +10 -39
package/src/runner/executors/file-executor.ts +67 -0
package/src/runner/executors/foreach-executor.ts +170 -17
package/src/runner/executors/human-executor.ts +18 -0
package/src/runner/executors/llm/stream-handler.ts +103 -0
package/src/runner/executors/llm/tool-manager.ts +360 -0
package/src/runner/executors/llm-executor.ts +288 -555
package/src/runner/executors/memory-executor.ts +41 -34
package/src/runner/executors/shell-executor.ts +96 -52
package/src/runner/executors/subworkflow-executor.ts +16 -0
package/src/runner/executors/types.ts +3 -1
package/src/runner/executors/verification_fixes.test.ts +46 -0
package/src/runner/join-scheduling.test.ts +2 -1
package/src/runner/llm-adapter.integration.test.ts +10 -5
package/src/runner/llm-adapter.ts +57 -18
package/src/runner/llm-clarification.test.ts +4 -1
package/src/runner/llm-executor.test.ts +21 -7
package/src/runner/mcp-client.ts +36 -2
package/src/runner/mcp-server.ts +65 -36
package/src/runner/recovery-security.test.ts +5 -2
package/src/runner/reflexion.test.ts +6 -3
package/src/runner/services/context-builder.ts +13 -4
package/src/runner/services/workflow-validator.ts +2 -1
package/src/runner/standard-tools-ast.test.ts +4 -2
package/src/runner/standard-tools-execution.test.ts +14 -1
package/src/runner/standard-tools-integration.test.ts +6 -0
package/src/runner/standard-tools.ts +13 -10
package/src/runner/step-executor.ts +2 -2
package/src/runner/tool-integration.test.ts +4 -1
package/src/runner/workflow-runner.test.ts +23 -12
package/src/runner/workflow-runner.ts +172 -79
package/src/runner/workflow-state.ts +181 -111
package/src/ui/dashboard.tsx +17 -3
package/src/utils/config-loader.ts +4 -0
package/src/utils/constants.ts +4 -0
package/src/utils/context-injector.test.ts +27 -27
package/src/utils/context-injector.ts +68 -26
package/src/utils/process-sandbox.ts +138 -148
package/src/utils/redactor.ts +39 -9
package/src/utils/resource-loader.ts +24 -19
package/src/utils/sandbox.ts +6 -0
package/src/utils/stream-utils.ts +58 -0

package/src/runner/llm-executor.test.ts CHANGED Viewed

@@ -29,6 +29,7 @@ import type { ExpressionContext } from '../expression/evaluator';
 import * as agentParser from '../parser/agent-parser';
 import type { Agent, LlmStep, Step } from '../parser/schema';
 import { ConfigLoader } from '../utils/config-loader';
+import * as llmAdapter from './llm-adapter';
 import type { StepResult } from './step-executor';
 // Note: mock.module() for llm-adapter is now handled by the preload file
@@ -66,6 +67,7 @@ describe('llm-executor', () => {
   let spawnSpy: ReturnType<typeof spyOn>;
   let resolveAgentPathSpy: ReturnType<typeof spyOn>;
   let parseAgentSpy: ReturnType<typeof spyOn>;
+  let getModelSpy: ReturnType<typeof spyOn>;
   // Default Mock Chat Logic
   const defaultMockChat = async (messages: LLMMessage[], _options: any) => {
@@ -184,7 +186,9 @@ describe('llm-executor', () => {
     ConfigLoader.clear();
     setupLlmMocks();
     resetLlmMocks();
-    mockGetModel.mockResolvedValue(createUnifiedMockModel());
+    // Spy on getModel to return our mock model directly
+    getModelSpy = spyOn(llmAdapter, 'getModel').mockResolvedValue(createUnifiedMockModel() as any);
     // Mock agent parser to avoid file dependencies
     resolveAgentPathSpy = spyOn(agentParser, 'resolveAgentPath').mockReturnValue('test-agent.md');
@@ -215,6 +219,7 @@ describe('llm-executor', () => {
   afterEach(() => {
     resolveAgentPathSpy?.mockRestore();
     parseAgentSpy?.mockRestore();
+    getModelSpy?.mockRestore();
   });
   afterAll(() => {
@@ -254,21 +259,25 @@ describe('llm-executor', () => {
       needs: [],
       maxIterations: 10,
     };
-    const logger = { log: mock(), error: mock(), warn: mock(), info: mock(), debug: mock() };
+    const loggerSpy = { log: mock(), error: mock(), warn: mock(), info: mock(), debug: mock() };
+    const consoleSpy = spyOn(console, 'error').mockImplementation(() => {});
     await executeLlmStep(
       step,
       { inputs: {}, steps: {} },
       async () => ({ status: 'success', output: 'ok' }),
-      logger
+      loggerSpy
     );
-    expect(logger.log).toHaveBeenCalledWith(
+    consoleSpy.mockRestore();
+    expect(loggerSpy.log).toHaveBeenCalledWith(
       expect.stringContaining('  🛠️  Tool Call: test-tool {"val":123}')
     );
   });
-  it('should return raw output logic if schema schema validation fails (no retry implemented)', async () => {
+  it('should return failed status if schema validation fails and JSON cannot be extracted', async () => {
     setupMockModel(defaultMockChat as any);
     const step: LlmStep = {
       id: 'l1',
@@ -282,13 +291,14 @@ describe('llm-executor', () => {
     // Case 1: Model returns text that is NOT valid JSON
     setupMockModel(async () => ({ message: { role: 'assistant', content: 'Not JSON' } }));
     const result = await executeLlmStep(step, { inputs: {}, steps: {} }, async () => ({
       status: 'success',
       output: 'ok',
     }));
-    // current simple refactor doesn't implement retry, just returns text or throws
-    expect(result.output).toBe('Not JSON');
+    expect(result.status).toBe('failed');
+    expect(result.error).toContain('Failed to extract valid JSON');
   });
   it('should handle tool not found', async () => {
@@ -301,11 +311,15 @@ describe('llm-executor', () => {
       maxIterations: 10,
     };
+    const consoleSpy = spyOn(console, 'error').mockImplementation(() => {});
     const result = await executeLlmStep(step, { inputs: {}, steps: {} }, async () => ({
       status: 'success',
       output: 'ok',
     }));
+    consoleSpy.mockRestore();
     expect(result.status).toBe('success');
   });

package/src/runner/mcp-client.ts CHANGED Viewed

@@ -13,6 +13,9 @@ export const MCP_PROTOCOL_VERSION = MCP.PROTOCOL_VERSION;
 // Maximum buffer size for incoming messages (10MB) to prevent memory exhaustion
 const MAX_BUFFER_SIZE = 10 * 1024 * 1024;
+// Track if we have already warned about SSRF limitations to avoid log spam
+let hasWarnedSSRF = false;
 /**
  * Efficient line splitting without regex to prevent ReDoS attacks.
  * Handles \r\n, \r, and \n line endings.
@@ -99,7 +102,7 @@ function isPrivateIpAddress(address: string): boolean {
 export async function validateRemoteUrl(
   url: string,
-  options: { allowInsecure?: boolean } = {}
+  options: { allowInsecure?: boolean; logger?: Logger } = {}
 ): Promise<void> {
   let parsed: URL;
   try {
@@ -114,6 +117,11 @@ export async function validateRemoteUrl(
   }
   // Require HTTPS in production
+  // SECURITY WARNING: This check is susceptible to TOCTOU (Time-of-Check to Time-of-Use)
+  // DNS rebinding attacks. A malicious domain could resolve to a public IP during this check
+  // and then switch to a private IP (e.g. 127.0.0.1) when the connection is actually made.
+  // Full protection requires resolving the IP once and using that IP for the connection,
+  // or using a proxy that enforces these rules.
   if (parsed.protocol !== 'https:') {
     throw new Error(
       `SSRF Protection: URL must use HTTPS. Got: ${parsed.protocol}. Set allowInsecure option to true if you trust this server.`
@@ -154,12 +162,28 @@ export async function validateRemoteUrl(
   // Resolve DNS to prevent hostnames that map to private IPs (DNS rebinding checks)
   // WARNING: This check is vulnerable to Time-of-Check Time-of-Use (TOCTOU) DNS Rebinding attacks.
   // A malicious DNS server could return a public IP here, then switch to a private IP for the actual fetch.
-  // In a nodejs environment using standard fetch/native DNS, this is hard to fully prevent without
+  // In a nodejs/bun environment using standard fetch/native DNS, this is hard to fully prevent without
   // a custom agent that pins the IP or low-level socket inspection.
+  // Users requiring high security should run this in an isolated network environment (container/VM).
   // For now, this check provides "defense in depth" against accidental internal access.
+  // CRITICAL SECURITY NOTE: In high-security environments, do NOT rely solely on this check.
+  // Use network-level isolation (e.g. firewalls, service meshes, or egress proxies) to strictly block
+  // internal traffic from the Keystone process.
+  //
+  // Recommendation: Use 'allowInsecure: true' only in trusted environments.
   if (!isIP(hostname)) {
     try {
+      // WARNING: This check is vulnerable to DNS Rebinding (TOCTOU)
+      if (options.logger?.warn && !hasWarnedSSRF) {
+        options.logger.warn(
+          '  ⚠️  Security Note: Remote URL validation provides defense-in-depth but does not fully prevent DNS rebinding attacks.\n' +
+            '      For high-security environments, ensure network-level isolation (e.g. firewalls).'
+        );
+        hasWarnedSSRF = true;
+      }
       const resolved = await lookup(hostname, { all: true });
       for (const record of resolved) {
         if (isPrivateIpAddress(record.address)) {
           throw new Error(
@@ -168,6 +192,16 @@ export async function validateRemoteUrl(
         }
       }
     } catch (error) {
+      if (error instanceof Error && error.message.startsWith('SSRF Protection')) {
+        throw error;
+      }
+      if (options.logger?.warn) {
+        options.logger.warn(
+          `[Security Warning] validateRemoteUrl check for ${hostname} failed/bypassed: ${error}`
+        );
+      }
       throw new Error(
         `SSRF Protection: Failed to resolve hostname "${hostname}": ${
           error instanceof Error ? error.message : String(error)

package/src/runner/mcp-server.ts CHANGED Viewed

@@ -248,14 +248,22 @@ export class MCPServer {
             const path = WorkflowRegistry.resolvePath(workflow_name);
             const workflow = WorkflowParser.loadWorkflow(path);
-            // Use a custom logger that captures logs for the MCP response
+            // Use a fixed-size ring buffer for logs to prevent memory leaks
+            const MAX_LOG_LINES = 1000;
             const logs: string[] = [];
+            const addLog = (msg: string) => {
+              if (logs.length >= MAX_LOG_LINES) {
+                logs.shift(); // Remove oldest
+              }
+              logs.push(msg);
+            };
             const logger = {
-              log: (msg: string) => logs.push(msg),
-              error: (msg: string) => logs.push(`ERROR: ${msg}`),
-              warn: (msg: string) => logs.push(`WARN: ${msg}`),
-              info: (msg: string) => logs.push(`INFO: ${msg}`),
-              debug: (msg: string) => logs.push(`DEBUG: ${msg}`),
+              log: (msg: string) => addLog(msg),
+              error: (msg: string) => addLog(`ERROR: ${msg}`),
+              warn: (msg: string) => addLog(`WARN: ${msg}`),
+              info: (msg: string) => addLog(`INFO: ${msg}`),
+              debug: (msg: string) => addLog(`DEBUG: ${msg}`),
             };
             const runner = this.runnerFactory(workflow, {
@@ -545,37 +553,58 @@ export class MCPServer {
             const runId = runner.getRunId();
             // Start the workflow asynchronously
-            runner.run().then(
-              async (outputs) => {
-                // Update DB with success on completion
-                await this.db.updateRunStatus(runId, 'success', outputs);
-              },
-              async (error) => {
-                // Update DB with failure
-                if (error instanceof WorkflowSuspendedError) {
-                  await this.db.updateRunStatus(runId, 'paused');
-                  this.sendNotification('notifications/keystone.human_input', {
-                    run_id: runId,
-                    workflow: workflow_name,
-                    status: 'paused',
-                    message: error.message,
-                    step_id: error.stepId,
-                    input_type: error.inputType,
-                    instructions:
-                      error.inputType === 'confirm'
-                        ? 'Use answer_human_input with input="confirm" to proceed.'
-                        : 'Use answer_human_input with the required text input.',
-                  });
-                } else {
-                  await this.db.updateRunStatus(
-                    runId,
-                    'failed',
-                    undefined,
-                    error instanceof Error ? error.message : String(error)
-                  );
+            // Start the workflow asynchronously
+            runner
+              .run()
+              .then(
+                async (outputs) => {
+                  try {
+                    // Update DB with success on completion
+                    await this.db.updateRunStatus(runId, 'success', outputs);
+                  } catch (e) {
+                    this.logger.error(
+                      `[McpServer] Failed to update success status for run ${runId}: ${e}`
+                    );
+                  }
+                },
+                async (error) => {
+                  try {
+                    // Update DB with failure
+                    if (error instanceof WorkflowSuspendedError) {
+                      await this.db.updateRunStatus(runId, 'paused');
+                      this.sendNotification('notifications/keystone.human_input', {
+                        run_id: runId,
+                        workflow: workflow_name,
+                        status: 'paused',
+                        message: error.message,
+                        step_id: error.stepId,
+                        input_type: error.inputType,
+                        instructions:
+                          error.inputType === 'confirm'
+                            ? 'Use answer_human_input with input="confirm" to proceed.'
+                            : 'Use answer_human_input with the required text input.',
+                      });
+                    } else {
+                      await this.db.updateRunStatus(
+                        runId,
+                        'failed',
+                        undefined,
+                        error instanceof Error ? error.message : String(error)
+                      );
+                    }
+                  } catch (e) {
+                    this.logger.error(
+                      `[McpServer] Failed to update failure status for run ${runId}: ${e}`
+                    );
+                  }
                 }
-              }
-            );
+              )
+              .catch((e) => {
+                // Catch any other errors in the promise chain construction
+                this.logger.error(
+                  `[McpServer] Unexpected error in async workflow execution for run ${runId}: ${e}`
+                );
+              });
             return {
               jsonrpc: '2.0',

package/src/runner/recovery-security.test.ts CHANGED Viewed

@@ -10,20 +10,23 @@ import {
 import { ConfigLoader } from '../utils/config-loader';
-import { beforeEach, describe, expect, jest, mock, test } from 'bun:test';
+import { beforeEach, describe, expect, jest, mock, spyOn, test } from 'bun:test';
 import type { Step, Workflow } from '../parser/schema';
+import * as llmAdapter from './llm-adapter';
 // Note: mock.module() for llm-adapter is now handled by the preload file
 // We should NOT mock 'ai' globally as it breaks other tests using the real ai SDK.
 // Instead, we use a mock model that the real ai SDK calls.
 describe('WorkflowRunner Recovery Security', () => {
+  let getModelSpy: ReturnType<typeof spyOn>;
   beforeEach(() => {
     jest.restoreAllMocks();
     ConfigLoader.clear();
     setupLlmMocks();
     resetLlmMocks();
-    mockGetModel.mockResolvedValue(createUnifiedMockModel());
+    getModelSpy = spyOn(llmAdapter, 'getModel').mockResolvedValue(createUnifiedMockModel() as any);
   });
   test('should NOT allow reflexion to overwrite critical step properties', async () => {

package/src/runner/reflexion.test.ts CHANGED Viewed

@@ -7,9 +7,10 @@ import {
   setupLlmMocks,
 } from './__test__/llm-test-setup';
-import { beforeAll, beforeEach, describe, expect, jest, mock, test } from 'bun:test';
+import { beforeAll, beforeEach, describe, expect, jest, mock, spyOn, test } from 'bun:test';
 import type { Step, Workflow } from '../parser/schema';
 import { ConfigLoader } from '../utils/config-loader';
+import * as llmAdapter from './llm-adapter';
 // Note: mock.module() for llm-adapter is now handled by the preload file
 // We should NOT mock 'ai' globally as it breaks other tests using the real ai SDK.
@@ -19,6 +20,8 @@ import { ConfigLoader } from '../utils/config-loader';
 let WorkflowRunner: any;
 describe('WorkflowRunner Reflexion', () => {
+  let getModelSpy: ReturnType<typeof spyOn>;
   beforeAll(async () => {
     // Set up config
     ConfigLoader.setConfig({
@@ -34,7 +37,7 @@ describe('WorkflowRunner Reflexion', () => {
       expression: { strict: false },
     } as any);
-    mockGetModel.mockResolvedValue(createUnifiedMockModel());
+    getModelSpy = spyOn(llmAdapter, 'getModel').mockResolvedValue(createUnifiedMockModel() as any);
     setupLlmMocks();
     setCurrentChatFn(async () => ({
@@ -50,8 +53,8 @@ describe('WorkflowRunner Reflexion', () => {
     ConfigLoader.clear();
     jest.restoreAllMocks();
     setupLlmMocks();
-    setupLlmMocks();
     resetLlmMocks();
+    getModelSpy = spyOn(llmAdapter, 'getModel').mockResolvedValue(createUnifiedMockModel() as any);
     setCurrentChatFn(async () => ({
       message: { role: 'assistant', content: JSON.stringify({ run: 'echo "fixed"' }) },
     }));

package/src/runner/services/context-builder.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import type { ExpressionContext } from '../../expression/evaluator.ts';
 import { ExpressionEvaluator } from '../../expression/evaluator.ts';
-import type { Workflow } from '../../parser/schema.ts';
+import type { Step, Workflow } from '../../parser/schema.ts';
 import type { Logger } from '../../utils/logger.ts';
 import type { WorkflowState } from '../workflow-state.ts';
@@ -92,7 +92,7 @@ export class ContextBuilder {
   /**
    * Builds input object for a specific step.
    */
-  public buildStepInputs(step: any, context: ExpressionContext): Record<string, unknown> {
+  public buildStepInputs(step: Step, context: ExpressionContext): Record<string, unknown> {
     const stripUndefined = (value: Record<string, unknown>) => {
       const result: Record<string, unknown> = {};
       for (const [key, val] of Object.entries(value)) {
@@ -165,8 +165,17 @@ export class ContextBuilder {
           inputType: step.inputType,
         });
       case 'sleep': {
-        const evaluated = ExpressionEvaluator.evaluate(step.duration.toString(), context);
-        return { duration: Number(evaluated) };
+        return stripUndefined({
+          duration:
+            step.duration !== undefined
+              ? Number(ExpressionEvaluator.evaluate(step.duration.toString(), context))
+              : undefined,
+          until:
+            step.until !== undefined
+              ? ExpressionEvaluator.evaluateString(step.until, context)
+              : undefined,
+          durable: step.durable,
+        });
       }
       case 'llm':
         return stripUndefined({

package/src/runner/services/workflow-validator.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import type { Workflow, WorkflowInput } from '../../parser/schema.ts';
+import { Redactor } from '../../utils/redactor.ts';
 import { validateJsonSchema } from '../../utils/schema-validator.ts';
 import { SecretManager } from './secret-manager.ts';
@@ -6,7 +7,7 @@ import { SecretManager } from './secret-manager.ts';
  * Service for validating workflow inputs and applying defaults.
  */
 export class WorkflowValidator {
-  public static readonly REDACTED_PLACEHOLDER = '[REDACTED]';
+  public static readonly REDACTED_PLACEHOLDER = Redactor.REDACTED_PLACEHOLDER;
   constructor(
     private workflow: Workflow,

package/src/runner/standard-tools-ast.test.ts CHANGED Viewed

@@ -142,7 +142,8 @@ describe('AST-Grep Tools', () => {
       };
       expect(() => {
-        vm.runInNewContext(script, sandbox);
+        // Wrap in async IIFE to support top-level return
+        vm.runInNewContext(`(async () => { ${script} })();`, sandbox);
       }).not.toThrow();
     });
   });
@@ -189,7 +190,8 @@ describe('AST-Grep Tools', () => {
       };
       expect(() => {
-        vm.runInNewContext(script, sandbox);
+        // Wrap in async IIFE to support top-level return
+        vm.runInNewContext(`(async () => { ${script} })();`, sandbox);
       }).not.toThrow();
     });
   });

package/src/runner/standard-tools-execution.test.ts CHANGED Viewed

@@ -54,12 +54,25 @@ describe('Standard Tools Execution Verification', () => {
               }),
             };
           }
+          if (mod === 'node:worker_threads') {
+            return {
+              Worker: class MockWorker {
+                on() {}
+                terminate() {}
+              },
+              parentPort: null,
+              workerData: null,
+            };
+          }
           return {};
         },
       };
       expect(() => {
-        vm.runInNewContext(script, sandbox);
+        // Wrap the script in an async IIFE to match ProcessSandbox behavior
+        // ProcessSandbox wraps scripts: const __result = await (async () => { ${code} })();
+        const wrappedScript = `(async () => { ${script} })()`;
+        vm.runInNewContext(wrappedScript, sandbox);
       }).not.toThrow();
     });
   }

package/src/runner/standard-tools-integration.test.ts CHANGED Viewed

@@ -25,6 +25,7 @@ import type { ExpressionContext } from '../expression/evaluator';
 import * as agentParser from '../parser/agent-parser';
 import type { Agent, LlmStep, Step } from '../parser/schema';
 import { ConfigLoader } from '../utils/config-loader';
+import * as llmAdapter from './llm-adapter';
 import type { StepResult } from './step-executor';
 // Note: mock.module() is now handled by the preload file
@@ -40,6 +41,7 @@ describe('Standard Tools Integration', () => {
   const testDir = join(process.cwd(), '.e2e-tmp', 'standard-tools-test');
   let resolveAgentPathSpy: ReturnType<typeof spyOn>;
   let parseAgentSpy: ReturnType<typeof spyOn>;
+  let getModelSpy: ReturnType<typeof spyOn>;
   beforeAll(async () => {
     // Setup config before importing the executor
@@ -54,6 +56,9 @@ describe('Standard Tools Integration', () => {
       model_mappings: {},
     } as any);
+    // Spy on getModel to return mock model
+    getModelSpy = spyOn(llmAdapter, 'getModel').mockResolvedValue(createUnifiedMockModel() as any);
     // Ensure the mock model is set up
     setupLlmMocks();
@@ -88,6 +93,7 @@ describe('Standard Tools Integration', () => {
   afterEach(() => {
     resolveAgentPathSpy?.mockRestore();
     parseAgentSpy?.mockRestore();
+    getModelSpy?.mockClear();
     resetLlmMocks();
   });

package/src/runner/standard-tools.ts CHANGED Viewed

@@ -39,7 +39,7 @@ export const STANDARD_TOOLS: AgentTool[] = [
       id: 'std_read_file_lines',
       type: 'script',
       run: `
-        (function() {
+        return (function() {
           const fs = require('node:fs');
           const path = require('node:path');
           const filePath = args.path;
@@ -113,7 +113,7 @@ export const STANDARD_TOOLS: AgentTool[] = [
       id: 'std_list_files',
       type: 'script',
       run: `
-        (function() {
+        return (function() {
           const fs = require('node:fs');
           const path = require('node:path');
           const dir = args.path || '.';
@@ -166,7 +166,7 @@ export const STANDARD_TOOLS: AgentTool[] = [
       id: 'std_search_content',
       type: 'script',
       run: `
-        (async function() {
+        return (async function() {
           const fs = require('node:fs');
           const path = require('node:path');
           const { globSync } = require('glob');
@@ -358,7 +358,7 @@ export const STANDARD_TOOLS: AgentTool[] = [
       id: 'std_ast_grep_search',
       type: 'script',
       run: `
-        (function() {
+        return (function() {
           const fs = require('node:fs');
           const path = require('node:path');
           const { Lang, parse } = require('@ast-grep/napi');
@@ -442,7 +442,7 @@ export const STANDARD_TOOLS: AgentTool[] = [
       id: 'std_ast_grep_replace',
       type: 'script',
       run: `
-        (function() {
+        return (function() {
           const fs = require('node:fs');
           const path = require('node:path');
           const { Lang, parse } = require('@ast-grep/napi');
@@ -565,12 +565,15 @@ export function validateStandardToolSecurity(
       'ast_grep_replace',
     ].includes(toolName)
   ) {
-    const rawPath = args.path || args.dir || '.';
+    const rawPath = (args as any).path || (args as any).dir || '.';
     assertWithinCwd(rawPath);
     // For AST tools, validate all paths in the array
-    if (['ast_grep_search', 'ast_grep_replace'].includes(toolName) && Array.isArray(args.paths)) {
-      for (const p of args.paths) {
+    if (
+      ['ast_grep_search', 'ast_grep_replace'].includes(toolName) &&
+      Array.isArray((args as any).paths)
+    ) {
+      for (const p of (args as any).paths) {
         assertWithinCwd(p);
       }
     }
@@ -578,8 +581,8 @@ export function validateStandardToolSecurity(
   // 2. Check shell risk for run_command and guard working directory
   if (toolName === 'run_command') {
-    assertWithinCwd(args.dir, 'Directory');
-    if (!options.allowInsecure && detectShellInjectionRisk(args.command)) {
+    assertWithinCwd((args as any).dir, 'Directory');
+    if (!options.allowInsecure && detectShellInjectionRisk((args as any).command)) {
       throw new Error(
         `Security Error: Command contains risky shell characters. Use 'allowInsecure: true' on the llm step to execute this.`
       );

package/src/runner/step-executor.ts CHANGED Viewed

@@ -130,7 +130,7 @@ export async function executeStep(
         if (!executeWorkflowFn) {
           throw new Error('Workflow executor not provided');
         }
-        result = await executeWorkflowFn(step, context, abortSignal);
+        result = await executeWorkflowFn(step, context, abortSignal, options.stepExecutionId);
         break;
       case 'script':
         result = await executeScriptStep(step, context, logger, {
@@ -183,9 +183,9 @@ export async function executeStep(
             runId,
             artifactRoot,
             executeLlmStep: injectedExecuteLlmStep || executeLlmStep,
-            emitEvent: options.emitEvent,
             workflowName: options.workflowName,
             db: options.db,
+            depth: options.depth,
           }
         );
         break;

package/src/runner/tool-integration.test.ts CHANGED Viewed

@@ -24,6 +24,7 @@ import type { ExpressionContext } from '../expression/evaluator';
 import * as agentParser from '../parser/agent-parser';
 import type { Agent, LlmStep, Step } from '../parser/schema';
 import { ConfigLoader } from '../utils/config-loader';
+import * as llmAdapter from './llm-adapter';
 import type { StepResult } from './step-executor';
 // Note: mock.module() for llm-adapter is now handled by the preload file
@@ -43,6 +44,7 @@ interface MockToolCall {
 describe('llm-executor with tools and MCP', () => {
   let resolveAgentPathSpy: ReturnType<typeof spyOn>;
   let parseAgentSpy: ReturnType<typeof spyOn>;
+  let getModelSpy: ReturnType<typeof spyOn>;
   const createMockMcpClient = (
     options: {
@@ -71,7 +73,7 @@ describe('llm-executor with tools and MCP', () => {
   };
   beforeAll(async () => {
-    mockGetModel.mockResolvedValue(createUnifiedMockModel());
+    getModelSpy = spyOn(llmAdapter, 'getModel').mockResolvedValue(createUnifiedMockModel() as any);
     // Set up config
     ConfigLoader.setConfig({
@@ -122,6 +124,7 @@ describe('llm-executor with tools and MCP', () => {
   afterEach(() => {
     resolveAgentPathSpy?.mockRestore();
     parseAgentSpy?.mockRestore();
+    getModelSpy?.mockClear();
   });
   afterAll(() => {