npm - keystone-cli - Versions diffs - 1.0.2 → 1.1.0 - Mend

keystone-cli 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (155) hide show

package/README.md +288 -24
package/package.json +8 -4
package/src/cli.ts +538 -419
package/src/commands/doc.ts +31 -0
package/src/commands/event.ts +29 -0
package/src/commands/graph.ts +37 -0
package/src/commands/index.ts +14 -0
package/src/commands/init.ts +185 -0
package/src/commands/run.ts +124 -0
package/src/commands/schema.ts +40 -0
package/src/commands/utils.ts +78 -0
package/src/commands/validate.ts +111 -0
package/src/db/memory-db.ts +50 -2
package/src/db/workflow-db.test.ts +314 -0
package/src/db/workflow-db.ts +810 -210
package/src/expression/evaluator-audit.test.ts +4 -2
package/src/expression/evaluator.test.ts +14 -1
package/src/expression/evaluator.ts +166 -19
package/src/parser/config-schema.ts +18 -0
package/src/parser/schema.ts +153 -22
package/src/parser/test-schema.ts +6 -6
package/src/parser/workflow-parser.test.ts +24 -0
package/src/parser/workflow-parser.ts +65 -3
package/src/runner/auto-heal.test.ts +5 -6
package/src/runner/blueprint-executor.test.ts +2 -2
package/src/runner/debug-repl.test.ts +5 -8
package/src/runner/debug-repl.ts +59 -16
package/src/runner/durable-timers.test.ts +11 -2
package/src/runner/engine-executor.test.ts +1 -1
package/src/runner/events.ts +57 -0
package/src/runner/executors/artifact-executor.ts +166 -0
package/src/runner/{blueprint-executor.ts → executors/blueprint-executor.ts} +15 -7
package/src/runner/{engine-executor.ts → executors/engine-executor.ts} +55 -7
package/src/runner/executors/file-executor.test.ts +48 -0
package/src/runner/executors/file-executor.ts +324 -0
package/src/runner/{foreach-executor.ts → executors/foreach-executor.ts} +168 -80
package/src/runner/executors/human-executor.ts +144 -0
package/src/runner/executors/join-executor.ts +75 -0
package/src/runner/executors/llm-executor.ts +1266 -0
package/src/runner/executors/memory-executor.ts +71 -0
package/src/runner/executors/plan-executor.ts +104 -0
package/src/runner/executors/request-executor.ts +265 -0
package/src/runner/executors/script-executor.ts +43 -0
package/src/runner/executors/shell-executor.ts +403 -0
package/src/runner/executors/subworkflow-executor.ts +114 -0
package/src/runner/executors/types.ts +69 -0
package/src/runner/executors/wait-executor.ts +59 -0
package/src/runner/join-scheduling.test.ts +197 -0
package/src/runner/llm-adapter-runtime.test.ts +209 -0
package/src/runner/llm-adapter.test.ts +419 -24
package/src/runner/llm-adapter.ts +414 -17
package/src/runner/llm-clarification.test.ts +2 -1
package/src/runner/llm-executor.test.ts +532 -17
package/src/runner/mcp-client-audit.test.ts +1 -2
package/src/runner/mcp-client.ts +136 -46
package/src/runner/mcp-manager.test.ts +4 -0
package/src/runner/mcp-server.test.ts +58 -0
package/src/runner/mcp-server.ts +26 -0
package/src/runner/memoization.test.ts +190 -0
package/src/runner/optimization-runner.ts +4 -9
package/src/runner/quality-gate.test.ts +69 -0
package/src/runner/reflexion.test.ts +6 -17
package/src/runner/resource-pool.ts +102 -14
package/src/runner/services/context-builder.ts +144 -0
package/src/runner/services/secret-manager.ts +105 -0
package/src/runner/services/workflow-validator.ts +131 -0
package/src/runner/shell-executor.test.ts +28 -4
package/src/runner/standard-tools-ast.test.ts +196 -0
package/src/runner/standard-tools-execution.test.ts +27 -0
package/src/runner/standard-tools-integration.test.ts +6 -10
package/src/runner/standard-tools.ts +339 -102
package/src/runner/step-executor.test.ts +216 -4
package/src/runner/step-executor.ts +69 -941
package/src/runner/stream-utils.ts +7 -3
package/src/runner/test-harness.ts +20 -1
package/src/runner/timeout.test.ts +10 -0
package/src/runner/timeout.ts +11 -2
package/src/runner/tool-integration.test.ts +1 -1
package/src/runner/wait-step.test.ts +102 -0
package/src/runner/workflow-runner.test.ts +208 -15
package/src/runner/workflow-runner.ts +890 -818
package/src/runner/workflow-scheduler.ts +75 -0
package/src/runner/workflow-state.ts +269 -0
package/src/runner/workflow-subflows.test.ts +13 -12
package/src/scripts/generate-schemas.ts +16 -0
package/src/templates/agents/explore.md +1 -0
package/src/templates/agents/general.md +1 -0
package/src/templates/agents/handoff-router.md +14 -0
package/src/templates/agents/handoff-specialist.md +15 -0
package/src/templates/agents/keystone-architect.md +13 -44
package/src/templates/agents/my-agent.md +1 -0
package/src/templates/agents/software-engineer.md +1 -0
package/src/templates/agents/summarizer.md +1 -0
package/src/templates/agents/test-agent.md +1 -0
package/src/templates/agents/tester.md +1 -0
package/src/templates/{basic-inputs.yaml → basics/basic-inputs.yaml} +2 -0
package/src/templates/{basic-shell.yaml → basics/basic-shell.yaml} +2 -1
package/src/templates/{full-feature-demo.yaml → basics/full-feature-demo.yaml} +2 -0
package/src/templates/{stop-watch.yaml → basics/stop-watch.yaml} +1 -0
package/src/templates/{child-rollback.yaml → control-flow/child-rollback.yaml} +1 -0
package/src/templates/{cleanup-finally.yaml → control-flow/cleanup-finally.yaml} +1 -0
package/src/templates/{fan-out-fan-in.yaml → control-flow/fan-out-fan-in.yaml} +3 -0
package/src/templates/control-flow/idempotency-example.yaml +30 -0
package/src/templates/{loop-parallel.yaml → control-flow/loop-parallel.yaml} +3 -0
package/src/templates/{parent-rollback.yaml → control-flow/parent-rollback.yaml} +1 -0
package/src/templates/{retry-policy.yaml → control-flow/retry-policy.yaml} +3 -0
package/src/templates/features/artifact-example.yaml +39 -0
package/src/templates/{engine-example.yaml → features/engine-example.yaml} +1 -0
package/src/templates/{human-interaction.yaml → features/human-interaction.yaml} +1 -0
package/src/templates/{llm-agent.yaml → features/llm-agent.yaml} +1 -0
package/src/templates/{memory-service.yaml → features/memory-service.yaml} +2 -0
package/src/templates/{robust-automation.yaml → features/robust-automation.yaml} +3 -0
package/src/templates/features/script-example.yaml +27 -0
package/src/templates/patterns/agent-handoff.yaml +53 -0
package/src/templates/{approval-process.yaml → patterns/approval-process.yaml} +1 -0
package/src/templates/{batch-processor.yaml → patterns/batch-processor.yaml} +2 -0
package/src/templates/{composition-child.yaml → patterns/composition-child.yaml} +1 -0
package/src/templates/{composition-parent.yaml → patterns/composition-parent.yaml} +1 -0
package/src/templates/{data-pipeline.yaml → patterns/data-pipeline.yaml} +2 -0
package/src/templates/{decompose-implement.yaml → scaffolding/decompose-implement.yaml} +1 -0
package/src/templates/{decompose-problem.yaml → scaffolding/decompose-problem.yaml} +1 -0
package/src/templates/{decompose-research.yaml → scaffolding/decompose-research.yaml} +1 -0
package/src/templates/{decompose-review.yaml → scaffolding/decompose-review.yaml} +1 -0
package/src/templates/{dev.yaml → scaffolding/dev.yaml} +1 -0
package/src/templates/scaffolding/review-loop.yaml +97 -0
package/src/templates/{scaffold-feature.yaml → scaffolding/scaffold-feature.yaml} +2 -0
package/src/templates/{scaffold-generate.yaml → scaffolding/scaffold-generate.yaml} +1 -0
package/src/templates/{scaffold-plan.yaml → scaffolding/scaffold-plan.yaml} +1 -0
package/src/templates/testing/invalid.yaml +6 -0
package/src/ui/dashboard.tsx +191 -33
package/src/utils/auth-manager.test.ts +337 -0
package/src/utils/auth-manager.ts +157 -61
package/src/utils/blueprint-utils.ts +4 -6
package/src/utils/config-loader.test.ts +2 -0
package/src/utils/config-loader.ts +12 -3
package/src/utils/constants.ts +76 -0
package/src/utils/container.ts +63 -0
package/src/utils/context-injector.test.ts +200 -0
package/src/utils/context-injector.ts +244 -0
package/src/utils/doc-generator.ts +85 -0
package/src/utils/env-filter.ts +45 -0
package/src/utils/json-parser.test.ts +12 -0
package/src/utils/json-parser.ts +30 -5
package/src/utils/logger.ts +12 -1
package/src/utils/mermaid.ts +4 -0
package/src/utils/paths.ts +52 -1
package/src/utils/process-sandbox-worker.test.ts +46 -0
package/src/utils/process-sandbox.ts +227 -14
package/src/utils/redactor.test.ts +11 -6
package/src/utils/redactor.ts +25 -9
package/src/utils/sandbox.ts +3 -0
package/src/utils/workflow-registry.test.ts +2 -2
package/src/runner/llm-executor.ts +0 -638
package/src/runner/shell-executor.ts +0 -366
package/src/templates/invalid.yaml +0 -5

package/src/runner/llm-executor.test.ts CHANGED Viewed

@@ -5,12 +5,14 @@ import { mkdirSync, writeFileSync } from 'node:fs';
 import { join } from 'node:path';
 import { Readable, Writable } from 'node:stream';
 import type { ExpressionContext } from '../expression/evaluator';
+import { ExpressionEvaluator } from '../expression/evaluator';
+import { parseAgent } from '../parser/agent-parser';
 import type { LlmStep, Step } from '../parser/schema';
+import { ConsoleLogger, type Logger } from '../utils/logger';
+import { executeLlmStep } from './executors/llm-executor.ts';
 import type { LLMAdapter, LLMMessage, LLMResponse, LLMTool } from './llm-adapter';
-import { executeLlmStep } from './llm-executor';
 import type { MCPServerConfig } from './mcp-manager';
 import type { StepResult } from './step-executor';
-import type { Logger } from './workflow-runner';
 // Mock adapters
 // Instead of mutating prototypes (which causes cross-test contamination),
@@ -187,6 +189,25 @@ tools:
 ---
 You are a test agent.`;
     writeFileSync(join(agentsDir, 'test-agent.md'), agentContent);
+    const handoffTargetContent = `---
+name: handoff-target
+model: gpt-4
+tools:
+  - name: specialist-tool
+    execution:
+      type: shell
+      run: echo "specialist"
+---
+You are the specialist for \${{ inputs.topic }}.`;
+    writeFileSync(join(agentsDir, 'handoff-target.md'), handoffTargetContent);
+    const contextAgentContent = `---
+name: context-agent
+model: gpt-4
+---
+You are a context-aware agent.`;
+    writeFileSync(join(agentsDir, 'context-agent.md'), contextAgentContent);
   });
   afterAll(() => {
@@ -230,7 +251,7 @@ You are a test agent.`;
     };
     const context: ExpressionContext = { inputs: {}, steps: {} };
-    const executeStepFn = async (s: Step) => {
+    const executeStepFn = async (s: any) => {
       if (s.type === 'shell') {
         return { status: 'success' as const, output: { stdout: 'tool result' } };
       }
@@ -262,7 +283,7 @@ You are a test agent.`;
     };
     const context: ExpressionContext = { inputs: {}, steps: {} };
-    const executeStepFn = async (s: Step) => {
+    const executeStepFn = async (s: any) => {
       if (s.type === 'shell') {
         return { status: 'success' as const, output: { stdout: 'tool result' } };
       }
@@ -273,6 +294,8 @@ You are a test agent.`;
       log: mock(() => {}),
       error: mock(() => {}),
       warn: mock(() => {}),
+      info: mock(() => {}),
+      debug: mock(() => {}),
     };
     await executeLlmStep(
@@ -325,6 +348,63 @@ You are a test agent.`;
     expect(result.output).toEqual({ foo: 'bar' });
   });
+  it('should accept native structured output tool calls when responseSchema is provided', async () => {
+    const outputSchema = {
+      type: 'object',
+      properties: {
+        foo: { type: 'string' },
+      },
+      required: ['foo'],
+    };
+    let receivedSchema: unknown;
+    const chatMock = mock(async (_messages, options) => {
+      receivedSchema = options?.responseSchema;
+      return {
+        message: {
+          role: 'assistant',
+          content: null,
+          tool_calls: [
+            {
+              id: 'call-1',
+              type: 'function',
+              function: { name: 'record_output', arguments: '{"foo":"bar"}' },
+            },
+          ],
+        },
+      };
+    }) as unknown as LLMAdapter['chat'];
+    const getAdapter = createMockGetAdapter(chatMock);
+    const step: LlmStep = {
+      id: 'l1',
+      type: 'llm',
+      agent: 'test-agent',
+      prompt: 'give me json',
+      needs: [],
+      maxIterations: 5,
+      outputSchema,
+    };
+    const context: ExpressionContext = { inputs: {}, steps: {} };
+    const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
+    const result = await executeLlmStep(
+      step,
+      context,
+      executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
+      undefined,
+      undefined,
+      undefined,
+      undefined,
+      getAdapter
+    );
+    expect(receivedSchema).toEqual(outputSchema);
+    expect(result.status).toBe('success');
+    expect(result.output).toEqual({ foo: 'bar' });
+    expect(executeStepFn).not.toHaveBeenCalled();
+  });
   it('should retry if LLM output fails schema validation', async () => {
     const step: LlmStep = {
       id: 'l1',
@@ -468,7 +548,7 @@ You are a test agent.`;
       context,
       executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
       console,
-      mcpManager as unknown as { getClient: () => Promise<unknown> },
+      mcpManager as any,
       undefined,
       undefined,
       mockGetAdapter
@@ -527,7 +607,7 @@ You are a test agent.`;
       context,
       executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
       undefined,
-      mcpManager as unknown as { getClient: () => Promise<unknown> },
+      mcpManager as any,
       undefined,
       undefined,
       getAdapter
@@ -570,10 +650,7 @@ You are a test agent.`;
       context,
       executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
       console,
-      manager as unknown as {
-        getClient: () => Promise<unknown>;
-        getGlobalServers: () => unknown[];
-      },
+      manager as any,
       undefined,
       undefined,
       getAdapter
@@ -603,7 +680,8 @@ You are a test agent.`;
     };
     const context: ExpressionContext = { inputs: {}, steps: {} };
     let toolExecuted = false;
-    const executeStepFn = async (s: Step) => {
+    const executeStepFn = async (s: any) => {
       if (s.id === 'adhoc-step') {
         toolExecuted = true;
         return { status: 'success' as const, output: { stdout: 'adhoc result' } };
@@ -691,7 +769,7 @@ You are a test agent.`;
       getAdapter
     );
-    expect(capturedStep?.type).toBe('engine');
+    expect((capturedStep as any)?.type).toBe('engine');
     expect(chatCount).toBe(2);
   });
@@ -728,6 +806,194 @@ You are a test agent.`;
     consoleSpy.mockRestore();
   });
+  it('should summarize messages when history is too long', async () => {
+    let summaryAttempted = false;
+    const chatMock = mock(async (messages: LLMMessage[]) => {
+      if (messages.find((m) => m.name === 'context_summary')) {
+        summaryAttempted = true;
+      }
+      return { message: { role: 'assistant', content: 'Resuming' } };
+    }) as unknown as LLMAdapter['chat'];
+    const getAdapter = (modelString: string) => {
+      const mockAdapter: LLMAdapter = {
+        chat: async (messages, options) => {
+          if (messages[0].role === 'system' && messages[0].content?.includes('Summarize')) {
+            return { message: { role: 'assistant', content: 'Summary text' } };
+          }
+          return chatMock(messages, options);
+        },
+      };
+      return { adapter: mockAdapter, resolvedModel: 'gpt-4' };
+    };
+    const step: LlmStep = {
+      id: 'l1',
+      type: 'llm',
+      agent: 'test-agent',
+      prompt: 'continue',
+      needs: [],
+      maxIterations: 1,
+      maxMessageHistory: 4, // Allow at least one non-system message before summarization
+      contextStrategy: 'summary',
+    };
+    const context: ExpressionContext = {
+      inputs: {},
+      steps: {
+        l1: {
+          output: {
+            messages: [
+              { role: 'user', content: 'm1' },
+              { role: 'assistant', content: 'm2' },
+              { role: 'user', content: 'm3' },
+            ],
+          },
+        },
+      },
+    };
+    const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
+    await executeLlmStep(
+      step,
+      context,
+      executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
+      undefined,
+      undefined,
+      undefined,
+      undefined,
+      getAdapter
+    );
+    expect(summaryAttempted).toBe(true);
+  });
+  it('should fall back to truncation if summarization fails', async () => {
+    const logger: Logger = {
+      log: mock(() => {}),
+      error: mock(() => {}),
+      warn: mock(() => {}),
+      info: mock(() => {}),
+      debug: mock(() => {}),
+    };
+    const getAdapter = (modelString: string) => {
+      const mockAdapter: LLMAdapter = {
+        chat: async (messages) => {
+          if (messages[0].role === 'system' && messages[0].content?.includes('Summarize')) {
+            throw new Error('Summary failed');
+          }
+          return { message: { role: 'assistant', content: 'Truncated response' } };
+        },
+      };
+      return { adapter: mockAdapter, resolvedModel: 'gpt-4' };
+    };
+    const step: LlmStep = {
+      id: 'l1',
+      type: 'llm',
+      agent: 'test-agent',
+      prompt: 'continue',
+      needs: [],
+      maxIterations: 1,
+      maxMessageHistory: 4,
+      contextStrategy: 'summary',
+    };
+    const context: ExpressionContext = {
+      inputs: {},
+      steps: {
+        l1: {
+          output: {
+            messages: [
+              { role: 'user', content: 'm1' },
+              { role: 'assistant', content: 'm2' },
+              { role: 'user', content: 'm3' },
+            ],
+          },
+        },
+      },
+    };
+    const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
+    await executeLlmStep(
+      step,
+      context,
+      executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
+      logger,
+      undefined,
+      undefined,
+      undefined,
+      getAdapter
+    );
+    expect(logger.warn).toHaveBeenCalledWith(
+      expect.stringContaining('Context summarization failed')
+    );
+  });
+  it('should extract thought blocks and emit thought events', async () => {
+    const logger: Logger = {
+      log: mock(() => {}),
+      error: mock(() => {}),
+      warn: mock(() => {}),
+      info: mock(() => {}),
+      debug: mock(() => {}),
+    };
+    const emitEvent = mock(() => {});
+    const eventContext = { runId: 'run-1', workflow: 'wf-1' };
+    const chatMock = mock(async () => {
+      return {
+        message: {
+          role: 'assistant',
+          content: '<thinking>I should do X</thinking>Final answer',
+        },
+      };
+    }) as unknown as LLMAdapter['chat'];
+    const getAdapter = () => ({
+      adapter: { chat: chatMock },
+      resolvedModel: 'gpt-4',
+    });
+    const step: LlmStep = {
+      id: 'l1',
+      type: 'llm',
+      agent: 'test-agent',
+      prompt: 'hello',
+      needs: [],
+      maxIterations: 10,
+    };
+    await executeLlmStep(
+      step,
+      { inputs: {}, steps: {} },
+      mock(async () => ({ status: 'success' as const, output: 'ok' })) as any,
+      logger,
+      undefined,
+      undefined,
+      undefined,
+      getAdapter as any,
+      emitEvent,
+      eventContext
+    );
+    expect(logger.info).toHaveBeenCalledWith(
+      expect.stringContaining('Thought (thinking): I should do X')
+    );
+    expect(emitEvent).toHaveBeenCalledWith(
+      expect.objectContaining({
+        type: 'llm.thought',
+        content: 'I should do X',
+        source: 'thinking',
+      })
+    );
+  });
   it('should not add global MCP server if already explicitly listed', async () => {
     const mockClient = createMockMcpClient();
     const manager = createMockMcpManager({
@@ -757,10 +1023,7 @@ You are a test agent.`;
       context,
       executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
       console,
-      manager as unknown as {
-        getClient: () => Promise<unknown>;
-        getGlobalServers: () => unknown[];
-      },
+      manager as any,
       undefined,
       undefined,
       getAdapter
@@ -788,7 +1051,6 @@ You are a test agent.`;
     let capturedPrompt = '';
     const chatMock = mock(async (messages: LLMMessage[]) => {
-      // console.log('MESSAGES:', JSON.stringify(messages, null, 2));
       capturedPrompt = messages.find((m) => m.role === 'user')?.content || '';
       return { message: { role: 'assistant', content: 'Response' } };
     }) as unknown as LLMAdapter['chat'];
@@ -810,4 +1072,257 @@ You are a test agent.`;
     expect(capturedPrompt).toContain('"key": "value"');
     expect(capturedPrompt).not.toContain('[object Object]');
   });
+  it('should evaluate expressions in agent system prompts', async () => {
+    const step: LlmStep = {
+      id: 'l1',
+      type: 'llm',
+      agent: 'handoff-target',
+      prompt: 'hello',
+      needs: [],
+      maxIterations: 3,
+    };
+    const context: ExpressionContext = { inputs: { topic: 'payments' }, steps: {} };
+    let capturedSystem = '';
+    const chatMock = mock(async (messages: LLMMessage[]) => {
+      const systemMessages = messages.filter((m) => m.role === 'system');
+      capturedSystem =
+        (systemMessages.find((m) => typeof m.content === 'string')?.content as string) || '';
+      return { message: { role: 'assistant', content: 'ok' } };
+    }) as unknown as LLMAdapter['chat'];
+    const getAdapter = createMockGetAdapter(chatMock);
+    const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
+    const result = await executeLlmStep(
+      step,
+      context,
+      executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
+      undefined,
+      undefined,
+      undefined,
+      undefined,
+      getAdapter
+    );
+    expect(result.status).toBe('success');
+    expect(capturedSystem).toContain('payments');
+    expect(capturedSystem).not.toContain('${{');
+  });
+  it('should handle streaming chunks with thoughts', async () => {
+    const step = {
+      id: 'l-stream',
+      type: 'llm' as const,
+      agent: 'test-agent',
+      prompt: 'stream this',
+      needs: [],
+      maxIterations: 1,
+    };
+    // We can't easily add 'stream' to LlmStep without changing schema,
+    // but we can mock the adapter to stream if onStream is provided.
+    const chatMock = mock(async (messages: LLMMessage[], options: any) => {
+      if (options.onStream) {
+        options.onStream('<thinking>thought</thinking>done');
+      }
+      return { message: { role: 'assistant', content: '<thinking>thought</thinking>done' } };
+    }) as unknown as LLMAdapter['chat'];
+    const adapter = {
+      chat: chatMock,
+    } as any;
+    const context: ExpressionContext = { inputs: {}, steps: {} };
+    spyOn(process.stdout, 'write').mockImplementation(() => true);
+    const emitThought = mock(() => {});
+    await executeLlmStep(
+      step as any,
+      context,
+      mock(async () => ({ status: 'success' as const, output: 'ok' })) as any,
+      new ConsoleLogger(),
+      undefined,
+      undefined,
+      undefined,
+      () => ({ adapter, resolvedModel: 'test-model' }),
+      emitThought,
+      { runId: 'test-run', workflow: 'test-wf' }
+    );
+    expect(emitThought).toHaveBeenCalled();
+  });
+  it('should transfer to allowed agent and swap system prompt/tools', async () => {
+    let callCount = 0;
+    let sawTransferTool = false;
+    let sawOriginalTool = false;
+    let sawTargetToolAfter = false;
+    let sawOriginalToolAfter = false;
+    let sawTargetPrompt = false;
+    const chatMock = mock(async (messages: LLMMessage[], options: { tools?: LLMTool[] }) => {
+      callCount++;
+      const toolNames = options.tools?.map((t) => t.function.name) || [];
+      if (callCount === 1) {
+        sawTransferTool = toolNames.includes('transfer_to_agent');
+        sawOriginalTool = toolNames.includes('test-tool');
+        return {
+          message: {
+            role: 'assistant',
+            content: null,
+            tool_calls: [
+              {
+                id: 'call-transfer',
+                type: 'function',
+                function: {
+                  name: 'transfer_to_agent',
+                  arguments: '{"agent_name":"handoff-target"}',
+                },
+              },
+            ],
+          },
+        };
+      }
+      const systemMessages = messages.filter((m) => m.role === 'system');
+      sawTargetPrompt = systemMessages.some(
+        (m) => typeof m.content === 'string' && m.content.includes('specialist for billing')
+      );
+      sawTargetToolAfter = toolNames.includes('specialist-tool');
+      sawOriginalToolAfter = toolNames.includes('test-tool');
+      return {
+        message: { role: 'assistant', content: 'done' },
+      };
+    }) as unknown as LLMAdapter['chat'];
+    const getAdapter = createMockGetAdapter(chatMock);
+    const step: LlmStep = {
+      id: 'l1',
+      type: 'llm',
+      agent: 'test-agent',
+      prompt: 'handoff',
+      needs: [],
+      maxIterations: 4,
+      allowedHandoffs: ['handoff-target'],
+    };
+    const context: ExpressionContext = { inputs: { topic: 'billing' }, steps: {} };
+    const executeStepFn = mock(async () => ({ status: 'success' as const, output: 'ok' }));
+    const result = await executeLlmStep(
+      step,
+      context,
+      executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
+      undefined,
+      undefined,
+      undefined,
+      undefined,
+      getAdapter
+    );
+    expect(result.status).toBe('success');
+    expect(sawTransferTool).toBe(true);
+    expect(sawOriginalTool).toBe(true);
+    expect(sawTargetToolAfter).toBe(true);
+    expect(sawOriginalToolAfter).toBe(false);
+    expect(sawTargetPrompt).toBe(true);
+  });
+  it('should apply context updates from tool output', async () => {
+    const step: LlmStep = {
+      id: 'l1',
+      type: 'llm',
+      agent: 'context-agent',
+      prompt: 'update context',
+      needs: [],
+      maxIterations: 4,
+      tools: [
+        {
+          name: 'update-context',
+          execution: {
+            id: 'update-step',
+            type: 'shell',
+            run: 'echo update',
+          },
+        },
+        {
+          name: 'read-context',
+          execution: {
+            id: 'read-step',
+            type: 'shell',
+            run: 'echo read',
+          },
+        },
+      ],
+    };
+    const context: ExpressionContext = { inputs: {}, steps: {} };
+    let sawEnvUpdate = false;
+    let sawMemoryUpdate = false;
+    const executeStepFn = async (_step: any, toolContext: ExpressionContext) => {
+      if (_step.id === 'update-step') {
+        return {
+          status: 'success' as const,
+          output: {
+            __keystone_context: {
+              env: { USER_ID: '123' },
+              memory: { user: 'Ada' },
+            },
+            ok: true,
+          },
+        };
+      }
+      if (_step.id === 'read-step') {
+        sawEnvUpdate = toolContext.env?.USER_ID === '123';
+        sawMemoryUpdate = toolContext.memory?.user === 'Ada';
+        return { status: 'success' as const, output: { seen: true } };
+      }
+      return { status: 'success' as const, output: 'ok' };
+    };
+    let callCount = 0;
+    const chatMock = mock(async () => {
+      callCount++;
+      if (callCount === 1) {
+        return {
+          message: {
+            role: 'assistant',
+            content: null,
+            tool_calls: [
+              {
+                id: 'call-update',
+                type: 'function',
+                function: { name: 'update-context', arguments: '{}' },
+              },
+              {
+                id: 'call-read',
+                type: 'function',
+                function: { name: 'read-context', arguments: '{}' },
+              },
+            ],
+          },
+        };
+      }
+      return { message: { role: 'assistant', content: 'done' } };
+    }) as unknown as LLMAdapter['chat'];
+    const getAdapter = createMockGetAdapter(chatMock);
+    await executeLlmStep(
+      step,
+      context,
+      executeStepFn as unknown as (step: Step, context: ExpressionContext) => Promise<StepResult>,
+      undefined,
+      undefined,
+      undefined,
+      undefined,
+      getAdapter
+    );
+    expect(sawEnvUpdate).toBe(true);
+    expect(sawMemoryUpdate).toBe(true);
+  });
 });

package/src/runner/mcp-client-audit.test.ts CHANGED Viewed

@@ -17,8 +17,7 @@ describe('MCPClient Audit Fixes', () => {
       }),
       kill: () => {},
       on: () => {},
-      // biome-ignore lint/suspicious/noExplicitAny: Mocking complex object
-    } as any);
+    } as unknown as child_process.ChildProcess);
   });
   afterEach(() => {