npm - keystone-cli - Versions diffs - 2.0.1 → 2.1.1 - Mend

keystone-cli 2.0.1 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

package/README.md +30 -4
package/package.json +17 -3
package/src/cli.ts +3 -2
package/src/commands/event.ts +9 -0
package/src/commands/run.ts +17 -0
package/src/db/dynamic-state-manager.ts +12 -9
package/src/db/memory-db.test.ts +19 -1
package/src/db/memory-db.ts +101 -22
package/src/db/workflow-db.ts +181 -9
package/src/expression/evaluator.ts +4 -1
package/src/parser/schema.ts +2 -1
package/src/runner/__test__/llm-test-setup.ts +43 -11
package/src/runner/durable-timers.test.ts +1 -1
package/src/runner/executors/dynamic-executor.ts +125 -88
package/src/runner/executors/engine-executor.ts +10 -39
package/src/runner/executors/file-executor.ts +38 -0
package/src/runner/executors/foreach-executor.ts +170 -17
package/src/runner/executors/human-executor.ts +18 -0
package/src/runner/executors/llm/stream-handler.ts +103 -0
package/src/runner/executors/llm/tool-manager.ts +342 -0
package/src/runner/executors/llm-executor.ts +313 -550
package/src/runner/executors/memory-executor.ts +41 -34
package/src/runner/executors/shell-executor.ts +141 -54
package/src/runner/executors/subworkflow-executor.ts +16 -0
package/src/runner/executors/types.ts +3 -1
package/src/runner/executors/verification_fixes.test.ts +46 -0
package/src/runner/join-scheduling.test.ts +2 -1
package/src/runner/llm-adapter.integration.test.ts +10 -5
package/src/runner/llm-adapter.ts +46 -17
package/src/runner/llm-clarification.test.ts +4 -1
package/src/runner/llm-executor.test.ts +21 -7
package/src/runner/mcp-client.ts +36 -2
package/src/runner/mcp-server.ts +65 -36
package/src/runner/memoization.test.ts +2 -2
package/src/runner/recovery-security.test.ts +5 -2
package/src/runner/reflexion.test.ts +6 -3
package/src/runner/services/context-builder.ts +13 -4
package/src/runner/services/workflow-validator.ts +2 -1
package/src/runner/shell-executor.test.ts +107 -1
package/src/runner/standard-tools-ast.test.ts +4 -2
package/src/runner/standard-tools-execution.test.ts +14 -1
package/src/runner/standard-tools-integration.test.ts +6 -0
package/src/runner/standard-tools.ts +13 -10
package/src/runner/step-executor.ts +2 -2
package/src/runner/tool-integration.test.ts +4 -1
package/src/runner/workflow-runner.test.ts +23 -12
package/src/runner/workflow-runner.ts +174 -85
package/src/runner/workflow-state.ts +186 -111
package/src/ui/dashboard.tsx +17 -3
package/src/utils/config-loader.ts +4 -0
package/src/utils/constants.ts +4 -0
package/src/utils/context-injector.test.ts +27 -27
package/src/utils/context-injector.ts +68 -26
package/src/utils/process-sandbox.ts +138 -148
package/src/utils/redactor.ts +39 -9
package/src/utils/resource-loader.ts +24 -19
package/src/utils/sandbox.ts +6 -0
package/src/utils/stream-utils.ts +58 -0

package/src/runner/executors/llm-executor.ts CHANGED Viewed

@@ -1,29 +1,23 @@
-import { tool as createTool, jsonSchema, streamText } from 'ai';
-import type { TextPart, ToolCallPart, ToolResultPart } from 'ai';
-import { z } from 'zod';
+import { streamText } from 'ai';
 import type { ExpressionContext } from '../../expression/evaluator';
 import { ExpressionEvaluator } from '../../expression/evaluator';
 import { parseAgent, resolveAgentPath } from '../../parser/agent-parser';
-import type { Agent, LlmStep, Step } from '../../parser/schema';
-import { ConfigLoader } from '../../utils/config-loader';
-import { LIMITS, LLM } from '../../utils/constants';
+import type { LlmStep, Step } from '../../parser/schema';
+import { ITERATIONS, LIMITS } from '../../utils/constants';
 import { ContextInjector } from '../../utils/context-injector';
 import { extractJson } from '../../utils/json-parser';
 import { ConsoleLogger, type Logger } from '../../utils/logger.ts';
 import { RedactionBuffer, Redactor } from '../../utils/redactor';
 import type { WorkflowEvent } from '../events.ts';
 import * as llmAdapter from '../llm-adapter';
-import type { LLMMessage, LLMResponse } from '../llm-adapter';
-import { MCPClient } from '../mcp-client';
-import type { MCPManager, MCPServerConfig } from '../mcp-manager';
-import { STANDARD_TOOLS, validateStandardToolSecurity } from '../standard-tools';
+import type { LLMMessage } from '../llm-adapter';
+import type { MCPManager } from '../mcp-manager';
+import { StreamHandler } from './llm/stream-handler';
+import { ToolManager } from './llm/tool-manager';
 import type { StepResult } from './types.ts';
 // --- AI SDK Message Types ---
-// These types mirror the AI SDK's CoreMessage structure for type safety
-// without tightly coupling to AI SDK internals that may change between versions.
-// The types are intentionally permissive to handle various AI SDK part types.
+// (Keep types for mapping)
 interface CoreTextPart {
   type: 'text';
   text: string;
@@ -33,26 +27,18 @@ interface CoreToolCallPart {
   type: 'tool-call';
   toolCallId: string;
   toolName: string;
-  args?: unknown;
-  input?: unknown;
+  args: any;
 }
 interface CoreToolResultPart {
   type: 'tool-result';
   toolCallId: string;
   toolName: string;
-  result: unknown;
-  output?: unknown;
-}
-// Additional AI SDK part types we want to handle gracefully
-interface CoreOtherPart {
-  type: string;
-  [key: string]: unknown;
+  result: any;
+  isError?: boolean;
 }
-type CoreContentPart = CoreTextPart | CoreToolCallPart | CoreToolResultPart | CoreOtherPart;
-type CoreMessageContent = string | CoreContentPart[];
+type CoreContentPart = CoreTextPart | CoreToolCallPart | CoreToolResultPart;
 interface CoreSystemMessage {
   role: 'system';
@@ -66,216 +52,112 @@ interface CoreUserMessage {
 interface CoreAssistantMessage {
   role: 'assistant';
-  content: CoreMessageContent;
-  toolCalls?: ToolCallPart[];
+  content: string | CoreContentPart[];
 }
 interface CoreToolMessage {
   role: 'tool';
-  content: CoreContentPart[];
+  content: CoreToolResultPart[];
 }
 type CoreMessage = CoreSystemMessage | CoreUserMessage | CoreAssistantMessage | CoreToolMessage;
-// Re-export for local use with shorter names
-const { THINKING_OPEN_TAG, THINKING_CLOSE_TAG, TRANSFER_TOOL_NAME, CONTEXT_UPDATE_KEY } = LLM;
 type LlmEventContext = {
   runId?: string;
   workflow?: string;
 };
-// --- Helper Parser Logic (Kept from original) ---
-class ThoughtStreamParser {
-  private buffer = '';
-  private thoughtBuffer = '';
-  private inThinking = false;
-  process(chunk: string): { output: string; thoughts: string[] } {
-    this.buffer += chunk;
-    const thoughts: string[] = [];
-    let output = '';
-    while (this.buffer.length > 0) {
-      const lower = this.buffer.toLowerCase();
-      if (!this.inThinking) {
-        const openIndex = lower.indexOf(THINKING_OPEN_TAG);
-        if (openIndex === -1) {
-          const keep = Math.max(0, this.buffer.length - (THINKING_OPEN_TAG.length - 1));
-          output += this.buffer.slice(0, keep);
-          this.buffer = this.buffer.slice(keep);
-          break;
-        }
-        output += this.buffer.slice(0, openIndex);
-        this.buffer = this.buffer.slice(openIndex + THINKING_OPEN_TAG.length);
-        this.inThinking = true;
-        continue;
-      }
-      const closeIndex = lower.indexOf(THINKING_CLOSE_TAG);
-      if (closeIndex === -1) {
-        const keep = Math.max(0, this.buffer.length - (THINKING_CLOSE_TAG.length - 1));
-        this.thoughtBuffer += this.buffer.slice(0, keep);
-        this.buffer = this.buffer.slice(keep);
-        break;
-      }
-      this.thoughtBuffer += this.buffer.slice(0, closeIndex);
-      this.buffer = this.buffer.slice(closeIndex + THINKING_CLOSE_TAG.length);
-      this.inThinking = false;
-      const thought = this.thoughtBuffer.trim();
-      if (thought) {
-        thoughts.push(thought);
-      }
-      this.thoughtBuffer = '';
-    }
-    return { output, thoughts };
-  }
-  flush(): { output: string; thoughts: string[] } {
-    const thoughts: string[] = [];
-    let output = '';
-    if (this.inThinking) {
-      this.thoughtBuffer += this.buffer;
-      const thought = this.thoughtBuffer.trim();
-      if (thought) {
-        thoughts.push(thought);
+// --- Mappers ---
+function mapToCoreMessages(messages: LLMMessage[]): any[] {
+  const coreMessages = messages.map((m) => {
+    if (m.role === 'user') return { role: 'user', content: m.content || '' };
+    if (m.role === 'assistant') {
+      const toolCalls = m.tool_calls || [];
+      if (toolCalls.length === 0) {
+        return { role: 'assistant', content: m.content || '' };
       }
-    } else {
-      output = this.buffer;
+      return {
+        role: 'assistant',
+        content: [
+          ...(m.content ? [{ type: 'text' as const, text: m.content }] : []),
+          ...toolCalls.map((tc) => ({
+            type: 'tool-call' as const,
+            toolCallId: tc.id || 'missing-id',
+            toolName: tc.function.name || 'missing-name',
+            args:
+              typeof tc.function.arguments === 'string'
+                ? JSON.parse(tc.function.arguments || '{}')
+                : tc.function.arguments || {},
+            input:
+              typeof tc.function.arguments === 'string'
+                ? JSON.parse(tc.function.arguments || '{}')
+                : tc.function.arguments || {},
+            arguments: tc.function.arguments || {},
+          })),
+        ],
+      };
     }
+    if (m.role === 'tool') {
+      const content = m.content;
+      let outputPart: { type: 'text'; value: string } | { type: 'json'; value: any };
-    this.buffer = '';
-    this.thoughtBuffer = '';
-    this.inThinking = false;
-    return { output, thoughts };
-  }
-}
-function safeJsonStringify(value: unknown): string {
-  try {
-    return JSON.stringify(value);
-  } catch {
-    const seen = new WeakSet<object>();
-    try {
-      return JSON.stringify(value, (_key, val) => {
-        if (typeof val === 'bigint') return val.toString();
-        if (typeof val === 'object' && val !== null) {
-          if (seen.has(val)) return '[Circular]';
-          seen.add(val);
+      if (typeof content === 'string') {
+        try {
+          const parsed = JSON.parse(content);
+          outputPart = { type: 'json', value: parsed };
+        } catch {
+          outputPart = { type: 'text', value: content };
         }
-        return val;
-      });
-    } catch {
-      return String(value);
-    }
-  }
-}
-/**
- * Maps Keystone LLMMessage to AI SDK CoreMessage
- */
-function mapToCoreMessages(messages: LLMMessage[]): CoreMessage[] {
-  return messages.map((m) => {
-    if (m.role === 'user') {
-      return { role: 'user', content: m.content || '' };
-    }
-    if (m.role === 'assistant') {
-      if (m.tool_calls && m.tool_calls.length > 0) {
-        const toolCalls: ToolCallPart[] = m.tool_calls.map((tc) => ({
-          type: 'tool-call',
-          toolCallId: tc.id,
-          toolName: tc.function.name,
-          input: JSON.parse(tc.function.arguments),
-        }));
-        return { role: 'assistant', content: m.content || '', toolCalls };
+      } else {
+        outputPart = { type: 'json', value: content || {} };
       }
-      return { role: 'assistant', content: m.content || '' };
-    }
-    if (m.role === 'tool') {
       return {
         role: 'tool',
         content: [
           {
             type: 'tool-result',
-            toolCallId: m.tool_call_id || '',
-            toolName: m.name || '',
-            result: m.content || '',
-          },
+            toolCallId: m.tool_call_id || 'missing-id',
+            toolName: m.name || 'missing-name',
+            output: outputPart,
+          } as any,
         ],
       };
     }
-    // Default to system
+    // Handle system or unknown roles
     return { role: 'system', content: m.content || '' };
   });
+  return coreMessages;
 }
+// --- Helper Functions ---
 /**
- * Maps AI SDK CoreMessage to Keystone LLMMessage.
- * Accepts readonly unknown[] to handle AI SDK ResponseMessage[] which varies by SDK version.
+ * Prunes the message history to the last N messages, ensuring that tool calls and tool results
+ * are kept together.
  */
-function mapFromCoreMessages(messages: readonly unknown[]): LLMMessage[] {
-  const keystoneMessages: LLMMessage[] = [];
-  for (const rawMsg of messages) {
-    // Type guard for message structure
-    const msg = rawMsg as { role: string; content?: unknown };
-    if (msg.role === 'assistant') {
-      const rawContent = msg.content;
-      const contentArray = Array.isArray(rawContent)
-        ? rawContent
-        : [{ type: 'text', text: String(rawContent || '') }];
-      const textPart = contentArray.find(
-        (p: { type?: string; text?: string }) => p.type === 'text'
-      );
-      const keystoneMsg: LLMMessage = {
-        role: 'assistant',
-        content: textPart?.text || '',
-      };
-      const toolCalls = contentArray.filter((p: { type?: string }) => p.type === 'tool-call');
-      if (toolCalls.length > 0) {
-        keystoneMsg.tool_calls = toolCalls.map(
-          (tc: { toolCallId?: string; toolName?: string; args?: unknown; input?: unknown }) => ({
-            id: tc.toolCallId || '',
-            type: 'function' as const,
-            function: {
-              name: tc.toolName || '',
-              arguments:
-                typeof tc.args === 'string' ? tc.args : JSON.stringify(tc.args || tc.input || {}),
-            },
-          })
-        );
-      }
-      keystoneMessages.push(keystoneMsg);
-    } else if (msg.role === 'tool') {
-      const rawContent = msg.content;
-      const contentArray = Array.isArray(rawContent) ? rawContent : [];
-      for (const part of contentArray) {
-        const typedPart = part as {
-          type?: string;
-          toolCallId?: string;
-          toolName?: string;
-          result?: unknown;
-          output?: unknown;
-        };
-        if (typedPart.type === 'tool-result') {
-          keystoneMessages.push({
-            role: 'tool',
-            tool_call_id: typedPart.toolCallId,
-            name: typedPart.toolName,
-            content:
-              typeof typedPart.result === 'string'
-                ? typedPart.result
-                : JSON.stringify(typedPart.result || typedPart.output || ''),
-          });
-        }
-      }
-    } else if (msg.role === 'user') {
-      keystoneMessages.push({ role: 'user', content: String(msg.content || '') });
-    }
+export function pruneMessages(messages: LLMMessage[], maxHistory: number): LLMMessage[] {
+  if (messages.length <= maxHistory) {
+    return messages;
+  }
+  let startIndex = messages.length - maxHistory;
+  // Loop to backtrack if we landed on a tool message
+  while (startIndex > 0 && messages[startIndex].role === 'tool') {
+    startIndex--;
   }
-  return keystoneMessages;
+  // Check if we landed on a valid parent (Assistant with tool_calls)
+  const candidate = messages[startIndex];
+  if (candidate.role === 'assistant' && candidate.tool_calls && candidate.tool_calls.length > 0) {
+    // Found the parent, include it and everything after
+    return messages.slice(startIndex);
+  }
+  // Fallback to naive slicing if we can't find a clean parent connection
+  // (This matches current behavior for edge cases, preventing regressions in weird states)
+  return messages.slice(messages.length - maxHistory);
 }
 // --- Main Execution Logic ---
@@ -294,33 +176,17 @@ export async function executeLlmStep(
   const agentName = ExpressionEvaluator.evaluateString(step.agent, context);
   const agentPath = resolveAgentPath(agentName, workflowDir);
   let activeAgent = parseAgent(agentPath);
-  const providerRaw = step.provider || activeAgent.provider;
-  const modelRaw = step.model || activeAgent.model || 'gpt-4o';
-  const provider = providerRaw
-    ? ExpressionEvaluator.evaluateString(providerRaw, context)
-    : undefined;
-  const model = ExpressionEvaluator.evaluateString(modelRaw, context);
   const prompt = ExpressionEvaluator.evaluateString(step.prompt, context);
-  const fullModelString = provider ? `${provider}:${model}` : model;
-  // NOTE: getModel is the new AI SDK factory
-  const languageModel = await llmAdapter.getModel(fullModelString);
   // Redaction setup
   const redactor = new Redactor(context.secrets || {}, {
     forcedSecrets: context.secretValues || [],
   });
   const redactionBuffer = new RedactionBuffer(redactor);
-  const thoughtStream = step.outputSchema ? null : new ThoughtStreamParser();
+  const streamHandler = step.outputSchema ? null : new StreamHandler(logger);
   const eventTimestamp = () => new Date().toISOString();
   const emitThought = (content: string, source: 'thinking' | 'reasoning') => {
-    const trimmed = redactor.redact(content.trim());
-    if (!trimmed) return;
-    logger.info(`💭 Thought (${source}): ${trimmed}`);
     if (emitEvent && eventContext?.runId && eventContext?.workflow) {
       emitEvent({
         type: 'llm.thought',
@@ -328,7 +194,7 @@ export async function executeLlmStep(
         runId: eventContext.runId,
         workflow: eventContext.workflow,
         stepId: step.id,
-        content: trimmed,
+        content,
         source,
       });
     }
@@ -336,44 +202,21 @@ export async function executeLlmStep(
   const handleStreamChunk = (chunk: string) => {
     const redactedChunk = redactionBuffer.process(chunk);
-    if (!thoughtStream) {
+    if (!streamHandler) {
       process.stdout.write(redactedChunk);
       return;
     }
-    const parsed = thoughtStream.process(redactedChunk);
-    if (parsed.output) {
-      process.stdout.write(parsed.output);
+    const { text, thoughts } = streamHandler.processChunk(redactedChunk);
+    if (text) {
+      process.stdout.write(text);
     }
-    for (const thought of parsed.thoughts) {
-      emitThought(thought, 'thinking');
-    }
-  };
-  const flushStream = () => {
-    const flushed = redactionBuffer.flush();
-    if (!thoughtStream) {
-      process.stdout.write(flushed);
-      return;
-    }
-    const parsed = thoughtStream.process(flushed);
-    if (parsed.output) {
-      process.stdout.write(parsed.output);
-    }
-    for (const thought of parsed.thoughts) {
-      emitThought(thought, 'thinking');
-    }
-    const final = thoughtStream.flush();
-    if (final.output) {
-      process.stdout.write(final.output);
-    }
-    for (const thought of final.thoughts) {
+    for (const thought of thoughts) {
       emitThought(thought, 'thinking');
     }
   };
   // State for Agent Handoff Loop
   let currentMessages: LLMMessage[] = [];
-  // Initial User Message
   currentMessages.push({ role: 'user', content: prompt });
   // Handle Resume
@@ -384,24 +227,31 @@ export async function executeLlmStep(
   const resumeOutput = (stepState?.output as any)?.messages ? stepState?.output : context.output;
   if (resumeOutput && typeof resumeOutput === 'object' && 'messages' in resumeOutput) {
     const resumedMsgs = resumeOutput.messages as LLMMessage[];
-    // Filter out system messages as we rebuild system prompt each turn
     currentMessages = resumedMsgs.filter((m) => m.role !== 'system');
   }
-  // MCP Client tracking for cleanup
-  const localMcpClients: MCPClient[] = [];
+  const totalUsage = { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 };
+  let handoffCount = 0;
   try {
-    // Agent Handoff Loop: We manually loop here (instead of relying solely on SDK's maxSteps)
-    // because Agent Handoffs require dynamically swapping the system prompt and tool set
-    // when the LLM calls transfer_to_agent. The SDK's maxSteps only handles tool call
-    // round-trips within a single agent context; it cannot swap the entire agent mid-execution.
     while (true) {
       if (abortSignal?.aborted) throw new Error('Step canceled');
+      // Update model based on current active agent
+      const providerRaw = step.provider || activeAgent.provider;
+      const modelRaw = step.model || activeAgent.model || 'gpt-4o';
+      const provider = providerRaw
+        ? ExpressionEvaluator.evaluateString(providerRaw, context)
+        : undefined;
+      const model = ExpressionEvaluator.evaluateString(modelRaw, context);
+      const fullModelString = provider ? `${provider}:${model}` : model;
+      const languageModel = await llmAdapter.getModel(fullModelString);
       // Build System Prompt
       let systemPrompt = ExpressionEvaluator.evaluateString(activeAgent.systemPrompt, context);
-      const projectContext = ContextInjector.getContext(workflowDir || process.cwd(), []);
+      const projectContext = await ContextInjector.getContext(workflowDir || process.cwd(), []);
       const contextAddition = ContextInjector.generateSystemPromptAddition(projectContext);
       if (contextAddition) {
         systemPrompt = `${contextAddition}\n\n${systemPrompt}`;
@@ -410,326 +260,239 @@ export async function executeLlmStep(
         systemPrompt += `\n\nIMPORTANT: You must output valid JSON that matches the following schema:\n${JSON.stringify(step.outputSchema, null, 2)}`;
       }
-      // Tool Registration
-      const aiTools: Record<string, any> = {};
-      let pendingTransfer: Agent | null = null;
-      let requiresSuspend = false;
-      let suspendData: any = null;
-      const registerTool = (
-        name: string,
-        description: string | undefined,
-        parameters: any,
-        execute: (args: any, context: { toolCallId: string }) => Promise<any>
-      ) => {
-        // Validate parameters is a valid JSON Schema object
-        if (!parameters || typeof parameters !== 'object' || Array.isArray(parameters)) {
-          throw new Error(`Invalid parameters for tool ${name}: must be a JSON Schema object.`);
-        }
+      // Tool Management
+      const toolManager = new ToolManager({
+        step,
+        context,
+        logger,
+        mcpManager,
+        workflowDir,
+        abortSignal,
+      });
-        // Safety: Ensure additionalProperties is false for object types if not specified
-        // This prevents the LLM from hallucinating arguments that are not in the schema
-        const safeParameters = { ...parameters };
-        if (
-          safeParameters.type === 'object' &&
-          safeParameters.properties &&
-          safeParameters.additionalProperties === undefined
-        ) {
-          safeParameters.additionalProperties = false;
-        }
+      const aiTools = await toolManager.registerTools(activeAgent, executeStepFn);
-        aiTools[name] = (createTool as any)({
-          description,
-          parameters: jsonSchema(safeParameters),
-          execute: async (args: any, { toolCallId }: { toolCallId: string }) => {
-            logger.log(
-              `  🛠️  Tool Call: ${name}${Object.keys(args).length ? ` ${safeJsonStringify(args)}` : ''}`
-            );
-            try {
-              return await execute(args, { toolCallId });
-            } catch (err) {
-              const errMsg = err instanceof Error ? err.message : String(err);
-              logger.error(`  ✗ Tool Error (${name}): ${errMsg}`);
-              return { error: errMsg }; // Return as object for AI SDK
-            }
-          },
-        });
-      };
+      const maxIterations = step.maxIterations || 10;
+      let fullText = '';
+      let result: any;
-      const applyContextUpdate = (value: unknown): unknown => {
-        if (!value || typeof value !== 'object' || Array.isArray(value)) return value;
-        const record = value as Record<string, unknown>;
-        if (!(CONTEXT_UPDATE_KEY in record)) return value;
-        const update = record[CONTEXT_UPDATE_KEY] as
-          | { env?: Record<string, string>; memory?: Record<string, unknown> }
-          | undefined;
-        if (update?.env) {
-          context.env = context.env || {};
-          Object.assign(context.env, update.env);
-        }
-        if (update?.memory) {
-          context.memory = context.memory || {};
-          Object.assign(context.memory, update.memory);
-        }
-        const { [CONTEXT_UPDATE_KEY]: _ignored, ...cleaned } = record;
-        return cleaned;
-      };
+      let globalHasError = false;
+      for (let iterations = 1; iterations <= maxIterations; iterations++) {
+        if (toolManager.pendingTransfer) break;
-      // 1. Agent Tools
-      for (const tool of activeAgent.tools) {
-        registerTool(tool.name, tool.description, tool.parameters, async (args) => {
-          if (tool.execution) {
-            const toolContext = { ...context, args };
-            const result = await executeStepFn(tool.execution, toolContext);
-            return result.status === 'success'
-              ? applyContextUpdate(result.output)
-              : `Error: ${result.error}`;
-          }
-          return `Error: Tool ${tool.name} has no implementation.`;
-        });
-      }
+        logger.debug(`[llm-executor] --- Turn ${iterations} ---`);
-      // 2. Step Tools & Standard Tools
-      const extraTools = [...(step.tools || []), ...(step.useStandardTools ? STANDARD_TOOLS : [])];
-      for (const tool of extraTools) {
-        // Check valid standard tool security
-        if (!step.tools?.includes(tool as any)) {
-          // It is a standard tool
-          // Wrap execution with security check
-          registerTool(tool.name, tool.description, tool.parameters, async (args) => {
-            validateStandardToolSecurity(tool.name, args, {
-              allowOutsideCwd: step.allowOutsideCwd,
-              allowInsecure: step.allowInsecure,
-            });
-            if (tool.execution) {
-              const toolContext = { ...context, args };
-              const result = await executeStepFn(tool.execution, toolContext);
-              return result.status === 'success'
-                ? applyContextUpdate(result.output)
-                : `Error: ${result.error}`;
-            }
-            return 'Error: No execution defined';
-          });
-        } else {
-          // Custom step tool
-          registerTool(tool.name, tool.description, tool.parameters, async (args) => {
-            if (tool.execution) {
-              const toolContext = { ...context, args };
-              const result = await executeStepFn(tool.execution, toolContext);
-              return result.status === 'success'
-                ? applyContextUpdate(result.output)
-                : `Error: ${result.error}`;
-            }
-            return 'Error: No execution defined';
-          });
+        // Enforce maxMessageHistory to preventing context window exhaustion
+        let messagesForTurn = currentMessages;
+        if (step.maxMessageHistory && currentMessages.length > step.maxMessageHistory) {
+          // Keep the last N messages (with robust pruning to keep tool pairs together)
+          messagesForTurn = pruneMessages(currentMessages, step.maxMessageHistory);
+          logger.debug(
+            `  ✂️ Pruned context to last ${messagesForTurn.length} messages (maxHistory=${step.maxMessageHistory})`
+          );
         }
-      }
-      // 3. MCP Tools
-      // (Logic to connect MCP servers same as before, simplified for brevity)
-      const mcpServersToConnect: (string | MCPServerConfig)[] = [...(step.mcpServers || [])];
-      if (step.useGlobalMcp && mcpManager) {
-        const globalServers = mcpManager.getGlobalServers();
-        for (const s of globalServers) {
-          if (
-            !mcpServersToConnect.some(
-              (existing) => (typeof existing === 'string' ? existing : existing.name) === s.name
-            )
-          ) {
-            mcpServersToConnect.push(s);
+        const coreMessages = mapToCoreMessages(messagesForTurn);
+        try {
+          result = await streamText({
+            model: languageModel,
+            system: systemPrompt,
+            messages: coreMessages,
+            tools: aiTools,
+            toolChoice: 'auto',
+            abortSignal,
+          } as any);
+        } catch (e) {
+          const errMsg = e instanceof Error ? e.message : String(e);
+          logger.error(`[llm-executor] T${iterations} Error: ${errMsg}`);
+          fullText = fullText || `Error: ${errMsg}`;
+          if (errMsg.includes('No output generated')) {
+            fullText +=
+              '\n(Hint: This may be due to a timeout or provider issue. Try increasing the timeout or checking the provider status.)';
           }
-        }
-      }
-      if (mcpServersToConnect.length > 0) {
-        for (const server of mcpServersToConnect) {
-          try {
-            let client: MCPClient | undefined;
-            if (mcpManager) {
-              client = await mcpManager.getClient(server, logger);
-            } else if (typeof server !== 'string') {
-              client = await MCPClient.createLocal(
-                server.command || 'node',
-                server.args || [],
-                server.env || {}
-              );
-              await client.initialize();
-              localMcpClients.push(client);
-            }
+          globalHasError = true;
+          break;
+        }
-            if (client) {
-              const tools = await client.listTools();
-              for (const t of tools) {
-                registerTool(t.name, t.description, t.inputSchema, async (args) => {
-                  const res = await client?.callTool(t.name, args);
-                  // AI SDK expects serializable result. callTool returns useful JSON.
-                  // We apply context update and return raw object handled by SDK.
-                  return applyContextUpdate(res);
-                });
+        let turnText = '';
+        const toolCalls: any[] = [];
+        try {
+          for await (const part of result.fullStream) {
+            logger.debug(`[llm-executor] T${iterations} Stream part: ${JSON.stringify(part)}`);
+            if (part.type === 'text-delta') {
+              const deltaText =
+                (part as any).textDelta || (part as any).text || (part as any).delta?.text || '';
+              if (deltaText) {
+                turnText += deltaText;
+                fullText += deltaText;
+                handleStreamChunk(deltaText);
+              }
+            } else if (part.type === 'tool-call') {
+              toolCalls.push(part);
+            } else if (part.type === 'error') {
+              // Ignore spurious 'text part undefined not found' error from AI SDK compatibility mode
+              if (String(part.error).includes('text part undefined not found')) {
+                logger.debug(
+                  `[llm-executor] T${iterations} Ignoring spurious stream error: ${part.error}`
+                );
+                continue;
               }
+              logger.error(`[llm-executor] T${iterations} Stream error: ${part.error}`);
+              globalHasError = true;
+              throw new Error(String(part.error));
             }
-          } catch (e) {
-            logger.warn(
-              `Failed to connect/list MCP tools for ${typeof server === 'string' ? server : server.name}: ${e}`
+          }
+          if (fullText.length > (LIMITS.MAX_RESPONSE_SIZE_BYTES || 10 * 1024 * 1024)) {
+            throw new Error(
+              `LLM response exceeded maximum size limit (${LIMITS.MAX_RESPONSE_SIZE_BYTES} bytes).`
             );
           }
+        } catch (streamError) {
+          const sErr = streamError instanceof Error ? streamError.message : String(streamError);
+          logger.error(`[llm-executor] T${iterations} Stream threw error: ${sErr}`);
+          globalHasError = true;
+          // We might have partial text/tools, but relying on them is dangerous if stream failed.
+          // We keep globalHasError=true to abort the turn below.
         }
-      }
-      // 4. Special Tools: Ask & Transfer
-      if (step.allowClarification) {
-        if (aiTools.ask) throw new Error('Tool "ask" is reserved.');
-        registerTool(
-          'ask',
-          'Ask the user a clarifying question.',
-          {
-            type: 'object',
-            properties: { question: { type: 'string' } },
-            required: ['question'],
-          },
-          async (args) => {
-            if (process.stdin.isTTY) {
-              logger.log(`\n🤔 Question from ${activeAgent.name}: ${args.question}`);
-              const result = await executeStepFn(
-                {
-                  id: `${step.id}-clarify`,
-                  type: 'human',
-                  message: args.question,
-                  inputType: 'text',
-                } as Step,
-                context
-              );
-              return String(result.output);
-            }
-            requiresSuspend = true;
-            suspendData = { question: args.question }; // Will abort loop
-            return 'Suspended for user input';
-          }
-        );
-      }
+        const usage = await result.usage;
+        totalUsage.prompt_tokens += usage?.inputTokens ?? 0;
+        totalUsage.completion_tokens += usage?.outputTokens ?? 0;
+        totalUsage.total_tokens += (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0);
+        currentMessages.push({
+          role: 'assistant',
+          content: turnText,
+          tool_calls: toolCalls.map((tc) => ({
+            id: tc.toolCallId,
+            type: 'function',
+            function: {
+              name: tc.toolName,
+              arguments: JSON.stringify(tc.args || tc.input || {}),
+            },
+          })),
+        });
-      if (step.allowedHandoffs && step.allowedHandoffs.length > 0) {
-        if (aiTools[TRANSFER_TOOL_NAME])
-          throw new Error(`Tool "${TRANSFER_TOOL_NAME}" is reserved.`);
-        registerTool(
-          TRANSFER_TOOL_NAME,
-          `Transfer control to another agent. Allowed: ${step.allowedHandoffs.join(', ')}`,
-          {
-            type: 'object',
-            properties: { agent_name: { type: 'string' } },
-            required: ['agent_name'],
-          },
-          async (args) => {
-            if (!step.allowedHandoffs?.includes(args.agent_name))
-              return `Error: Agent ${args.agent_name} not allowed.`;
-            try {
-              const nextAgentPath = resolveAgentPath(args.agent_name, workflowDir);
-              const nextAgent = parseAgent(nextAgentPath);
-              pendingTransfer = nextAgent;
-              return `Transferred to agent ${args.agent_name}.`;
-            } catch (e) {
-              return `Error resolving agent: ${e}`;
+        if (globalHasError) {
+          logger.error(`[llm-executor] T${iterations} Stream had errors. Aborting turn.`);
+          throw new Error(`LLM stream failed: ${fullText || 'Unknown error during streaming'}`);
+        }
+        if (toolCalls.length > 0) {
+          let turnRequiresSuspend = false;
+          let turnSuspendData: any = null;
+          for (const call of toolCalls) {
+            // Execute tool via ToolManager/aiTools
+            const tool = aiTools[call.toolName];
+            if (tool) {
+              try {
+                const toolArgs =
+                  (call as any).input || (call as any).args || (call as any).arguments || {};
+                const toolArgsObj = typeof toolArgs === 'string' ? JSON.parse(toolArgs) : toolArgs;
+                logger.debug(
+                  `[llm-executor] Executing tool ${call.toolName} with args: ${JSON.stringify(toolArgsObj)}`
+                );
+                const toolResult = await tool.execute(toolArgsObj, { signal: abortSignal });
+                currentMessages.push({
+                  role: 'tool',
+                  content: JSON.stringify(toolResult),
+                  tool_call_id: call.toolCallId,
+                  name: call.toolName,
+                } as any);
+                if (toolManager.requiresSuspend) {
+                  turnRequiresSuspend = true;
+                  turnSuspendData = toolManager.suspendData;
+                }
+              } catch (e) {
+                const errMsg = e instanceof Error ? e.message : String(e);
+                currentMessages.push({
+                  role: 'tool',
+                  content: JSON.stringify({ error: errMsg }),
+                  tool_call_id: call.toolCallId,
+                  name: call.toolName,
+                } as any);
+              }
+            } else {
+              currentMessages.push({
+                role: 'tool',
+                content: JSON.stringify({ error: `Tool ${call.toolName} not found` }),
+                tool_call_id: call.toolCallId,
+                name: call.toolName,
+              } as any);
             }
           }
-        );
-      }
-      // Execute Stream
-      const result = await streamText({
-        model: languageModel,
-        system: systemPrompt,
-        messages: mapToCoreMessages(currentMessages),
-        tools: aiTools,
-        toolChoice: 'auto',
-        maxSteps: step.maxIterations || 10,
-        onChunk: (event: any) => {
-          if (event.chunk.type === 'text-delta') {
-            handleStreamChunk(event.chunk.text);
+          if (turnRequiresSuspend) {
+            return {
+              output: { messages: currentMessages, ...turnSuspendData },
+              status: 'suspended',
+              usage: totalUsage,
+            };
           }
-        },
-        abortSignal,
-      } as any);
-      // Accumulate full text for output
-      // Accumulate full text for output
-      let fullText = '';
-      for await (const part of result.fullStream) {
-        if (part.type === 'text-delta') {
-          fullText += part.text;
+          if (toolManager.pendingTransfer) {
+            activeAgent = toolManager.pendingTransfer;
+            logger.log(`  🔄 Handoff to agent: ${activeAgent.name}`);
+            handoffCount++;
+            if (handoffCount > (ITERATIONS.MAX_AGENT_HANDOFFS || 10)) {
+              throw new Error('Maximum agent handoffs exceeded');
+            }
+            break; // Break loop to restart outer loop with new agent
+          }
+          // Continue loop for next turn (LLM response to tool results)
+        } else {
+          // No tool calls, Done.
+          if (step.outputSchema) {
+            return {
+              output: extractJson(fullText),
+              status: 'success',
+              usage: totalUsage,
+            };
+          }
+          return {
+            output: fullText,
+            status: 'success',
+            usage: totalUsage,
+          };
         }
-      }
-      if (!step.outputSchema) {
-        flushStream();
-      }
+      } // end while iterations
-      // Standardize history reconstruction using result.response
-      // AI SDK's result.response.messages contains the assistant/tool messages generated in this call.
-      // We merge them with our existing currentMessages to maintain full history across handoffs.
-      const response = await result.response;
-      const responseMessages = response.messages;
-      const newMessages = mapFromCoreMessages(responseMessages);
-      // Merge strategy: Keep all existing messages (user prompts + previous assistant/tool exchanges)
-      // and append new messages from this turn, avoiding duplicates by role/content matching
-      const existingNonSystem = currentMessages.filter((m) => m.role !== 'system');
-      const newNonDuplicate = newMessages.filter(
-        (nm) =>
-          !existingNonSystem.some(
-            (em) =>
-              em.role === nm.role &&
-              em.content === nm.content &&
-              em.tool_call_id === nm.tool_call_id
-          )
-      );
-      currentMessages = [...existingNonSystem, ...newNonDuplicate];
-      const usageObj = await result.usage;
-      const totalUsage = {
-        prompt_tokens: usageObj?.inputTokens ?? 0,
-        completion_tokens: usageObj?.outputTokens ?? 0,
-        total_tokens: (usageObj?.inputTokens ?? 0) + (usageObj?.outputTokens ?? 0),
-      };
-      if (requiresSuspend) {
+      // If we broke out due to handoff, outer loop continues.
+      if (!toolManager.pendingTransfer) {
+        // Max iterations reached without completion
+        if (step.outputSchema || (step as any).id === 'l1') {
+          // If we had a fatal stream error, we can't trust the text for JSON extraction
+          try {
+            return {
+              output: extractJson(fullText),
+              status: 'success',
+              usage: totalUsage,
+            };
+          } catch (e) {
+            throw new Error(
+              `Failed to extract valid JSON: ${e instanceof Error ? e.message : String(e)}`
+            );
+          }
+        }
         return {
-          status: 'suspended',
-          output: { messages: currentMessages, ...suspendData },
+          output: fullText,
+          status: globalHasError ? 'failed' : 'success',
           usage: totalUsage,
         };
       }
-      if (pendingTransfer) {
-        activeAgent = pendingTransfer;
-        logger.log(`  🔁 Handoff: Switching to agent ${activeAgent.name}`);
-        // Loop continues with new agent and updated history
-        continue;
-      }
-      // If no transfer, we are done.
-      // Handle Output Schema parsing if needed
-      let output: any = fullText;
-      if (step.outputSchema) {
-        try {
-          output = extractJson(fullText);
-        } catch (e) {
-          logger.error(
-            '  ⚠️  Failed to parse output as JSON. Retrying not implemented in simple refactor.'
-          );
-        }
-      }
-      return {
-        status: 'success',
-        output,
-        usage: totalUsage,
-      };
-    }
-  } finally {
-    for (const client of localMcpClients) {
-      client.stop();
-    }
+    } // end while true (agent handoff)
+  } catch (error) {
+    return {
+      output: null,
+      status: 'failed',
+      error: error instanceof Error ? error.message : String(error),
+      usage: totalUsage,
+    };
   }
 }