npm - @mcpmesh/sdk - Versions diffs - 2.3.0 → 2.4.0 - Mend

@mcpmesh/sdk 2.3.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

package/dist/__tests__/llm-agent-model-params.test.js +83 -0
package/dist/__tests__/llm-agent-model-params.test.js.map +1 -1
package/dist/__tests__/llm-max-iterations.test.d.ts +20 -0
package/dist/__tests__/llm-max-iterations.test.d.ts.map +1 -0
package/dist/__tests__/llm-max-iterations.test.js +250 -0
package/dist/__tests__/llm-max-iterations.test.js.map +1 -0
package/dist/__tests__/llm-mesh-error-mapping.test.d.ts +16 -0
package/dist/__tests__/llm-mesh-error-mapping.test.d.ts.map +1 -0
package/dist/__tests__/llm-mesh-error-mapping.test.js +135 -0
package/dist/__tests__/llm-mesh-error-mapping.test.js.map +1 -0
package/dist/__tests__/llm-provider-output-mode.test.d.ts +21 -0
package/dist/__tests__/llm-provider-output-mode.test.d.ts.map +1 -0
package/dist/__tests__/llm-provider-output-mode.test.js +115 -0
package/dist/__tests__/llm-provider-output-mode.test.js.map +1 -0
package/dist/__tests__/llm-provider-system-synthesis.test.d.ts +20 -0
package/dist/__tests__/llm-provider-system-synthesis.test.d.ts.map +1 -0
package/dist/__tests__/llm-provider-system-synthesis.test.js +167 -0
package/dist/__tests__/llm-provider-system-synthesis.test.js.map +1 -0
package/dist/__tests__/llm-response-model.test.d.ts +10 -0
package/dist/__tests__/llm-response-model.test.d.ts.map +1 -0
package/dist/__tests__/llm-response-model.test.js +92 -0
package/dist/__tests__/llm-response-model.test.js.map +1 -0
package/dist/__tests__/proxy-timeout-guard.test.d.ts +12 -0
package/dist/__tests__/proxy-timeout-guard.test.d.ts.map +1 -0
package/dist/__tests__/proxy-timeout-guard.test.js +85 -0
package/dist/__tests__/proxy-timeout-guard.test.js.map +1 -0
package/dist/__tests__/registry-disconnect-retains-deps.spec.d.ts +2 -0
package/dist/__tests__/registry-disconnect-retains-deps.spec.d.ts.map +1 -0
package/dist/__tests__/registry-disconnect-retains-deps.spec.js +101 -0
package/dist/__tests__/registry-disconnect-retains-deps.spec.js.map +1 -0
package/dist/__tests__/response-parser.test.js +29 -0
package/dist/__tests__/response-parser.test.js.map +1 -1
package/dist/agent.d.ts.map +1 -1
package/dist/agent.js +4 -0
package/dist/agent.js.map +1 -1
package/dist/api-runtime.d.ts.map +1 -1
package/dist/api-runtime.js +8 -1
package/dist/api-runtime.js.map +1 -1
package/dist/express.d.ts.map +1 -1
package/dist/express.js +8 -1
package/dist/express.js.map +1 -1
package/dist/llm-agent.d.ts +34 -0
package/dist/llm-agent.d.ts.map +1 -1
package/dist/llm-agent.js +239 -434
package/dist/llm-agent.js.map +1 -1
package/dist/llm-provider.d.ts +33 -4
package/dist/llm-provider.d.ts.map +1 -1
package/dist/llm-provider.js +91 -4
package/dist/llm-provider.js.map +1 -1
package/dist/llm.d.ts +1 -1
package/dist/llm.d.ts.map +1 -1
package/dist/llm.js +8 -5
package/dist/llm.js.map +1 -1
package/dist/provider-handlers/gemini-handler.d.ts.map +1 -1
package/dist/provider-handlers/gemini-handler.js +2 -14
package/dist/provider-handlers/gemini-handler.js.map +1 -1
package/dist/provider-handlers/openai-handler.d.ts.map +1 -1
package/dist/provider-handlers/openai-handler.js +2 -15
package/dist/provider-handlers/openai-handler.js.map +1 -1
package/dist/provider-handlers/provider-handler.d.ts +12 -0
package/dist/provider-handlers/provider-handler.d.ts.map +1 -1
package/dist/provider-handlers/provider-handler.js +24 -0
package/dist/provider-handlers/provider-handler.js.map +1 -1
package/dist/proxy.d.ts.map +1 -1
package/dist/proxy.js +189 -254
package/dist/proxy.js.map +1 -1
package/dist/response-parser.d.ts +10 -0
package/dist/response-parser.d.ts.map +1 -1
package/dist/response-parser.js +55 -0
package/dist/response-parser.js.map +1 -1
package/dist/tracing.d.ts +12 -0
package/dist/tracing.d.ts.map +1 -1
package/dist/tracing.js +37 -0
package/dist/tracing.js.map +1 -1
package/dist/types.d.ts +10 -2
package/dist/types.d.ts.map +1 -1
package/package.json +2 -2

package/dist/llm-agent.js CHANGED Viewed

@@ -33,12 +33,10 @@ import { zodToJsonSchema } from "zod-to-json-schema";
 import { renderTemplate } from "./template.js";
 import { ResponseParser } from "./response-parser.js";
 import { MaxIterationsError, LLMAPIError, ToolExecutionError, } from "./errors.js";
-import { parseSSEResponse } from "./sse.js";
 import { resolveMediaInputs } from "./media/index.js";
-import { getCurrentTraceContext, getCurrentPropagatedHeaders, streamMcpTool, DEFAULT_CALL_OPTIONS, } from "./proxy.js";
-import { generateSpanId, publishTraceSpan, createTraceHeaders, injectTraceContext, } from "./tracing.js";
-import { fetchWithTimeout, isTimeoutError } from "./timeout-utils.js";
-import { getDispatcher } from "./http-pool.js";
+import { callMcpTool, streamMcpTool, DEFAULT_CALL_OPTIONS, } from "./proxy.js";
+import { isTimeoutError } from "./timeout-utils.js";
+import { envMaxIterations, sanitizeMaxIterations } from "./llm-provider.js";
 /**
  * Mesh provider that delegates to an LLM provider discovered via mesh.
  */
@@ -51,8 +49,16 @@ export class MeshDelegatedProvider {
         this.functionName = functionName;
         this.parallelToolCalls = parallelToolCalls;
     }
-    async complete(model, messages, tools, options) {
-        // Build MeshLlmRequest structure (matches Python claude_provider schema)
+    /**
+     * Build the MeshLlmRequest body shared by complete() and streamComplete().
+     *
+     * Assembles model_params (with the escape-hatch merge + typed overrides),
+     * wraps messages/tools into the MeshLlmRequest, and returns it pre-wrapped
+     * in the ``{ request }`` arguments object. Callers inject trace context /
+     * propagated headers into ``args`` afterward (per-caller — complete() uses
+     * injectTraceAndHeaders, streamComplete() lets streamMcpTool() handle it).
+     */
+    buildMeshLlmRequest(model, messages, tools, options) {
         const modelParams = {};
         // Escape-hatch merge: callers can pass vendor-specific kwargs
         // (e.g., thinking_config, output_config) via options.modelParams.
@@ -84,6 +90,18 @@ export class MeshDelegatedProvider {
         if (this.parallelToolCalls) {
             modelParams.parallel_tool_calls = true;
         }
+        // Issue #1116: forward the provider-managed loop cap. Typed field, so it
+        // takes precedence over any escape-hatch modelParams.max_iterations above.
+        if (options?.maxIterations !== undefined) {
+            modelParams.max_iterations = options.maxIterations;
+        }
+        // Issue #1112: forward the consumer-supplied output_mode override ONLY when
+        // the user explicitly set it (undefined = auto = provider's per-vendor
+        // selection; omitting keeps the provider byte-identical to today). Typed
+        // field, so it takes precedence over any escape-hatch modelParams.output_mode.
+        if (options?.outputMode !== undefined) {
+            modelParams.output_mode = options.outputMode;
+        }
         const request = {
             messages,
         };
@@ -96,152 +114,82 @@ export class MeshDelegatedProvider {
         }
         // Wrap in "request" parameter as expected by Python claude_provider
         const args = { request };
+        return { request, args };
+    }
+    async complete(model, messages, tools, options) {
+        // Build MeshLlmRequest structure (matches Python claude_provider schema).
+        // The {request} wrapper stays; callMcpTool injects trace context internally.
+        const { args } = this.buildMeshLlmRequest(model, messages, tools, options);
         // Set up timeout (default 300s to match Python SDK's stream_timeout)
         const timeoutMs = parseInt(process.env.MESH_PROVIDER_TIMEOUT_MS || "300000", 10);
-        // Tracing: propagate context to downstream provider
-        const traceCtx = getCurrentTraceContext();
-        const traceSpanId = traceCtx ? generateSpanId() : null;
-        const traceStartTime = Date.now() / 1000;
-        // Inject trace context and propagated headers into args via Rust core
-        const delegatedPropHeaders = getCurrentPropagatedHeaders();
-        if (traceCtx && traceSpanId) {
-            try {
-                const argsJson = JSON.stringify(args);
-                const headersJson = Object.keys(delegatedPropHeaders).length > 0 ? JSON.stringify(delegatedPropHeaders) : undefined;
-                const injectedJson = injectTraceContext(argsJson, traceCtx.traceId, traceSpanId, headersJson);
-                const injected = JSON.parse(injectedJson);
-                Object.assign(args, injected);
-            }
-            catch {
-                // Fallback to manual injection
-                args._trace_id = traceCtx.traceId;
-                args._parent_span = traceSpanId;
-                if (Object.keys(delegatedPropHeaders).length > 0) {
-                    args._mesh_headers = { ...delegatedPropHeaders };
-                }
-            }
-        }
-        else if (Object.keys(delegatedPropHeaders).length > 0) {
-            args._mesh_headers = { ...delegatedPropHeaders };
-        }
-        let traceSuccess = true;
-        let traceError = null;
+        // Route through the shared callMcpTool (trace injection, span publish,
+        // dispatcher pooling, job-cancel wiring all handled internally). LLM calls
+        // are expensive / non-idempotent so disable retries; LLM responses can
+        // exceed the default 10 MiB cap so raise it to effectively unbounded.
+        const callOptions = {
+            ...DEFAULT_CALL_OPTIONS,
+            timeout: timeoutMs,
+            maxAttempts: 1,
+            maxResponseSize: Number.MAX_SAFE_INTEGER,
+        };
+        // Call the mesh provider via MCP. callMcpTool throws a plain Error on
+        // failure — re-wrap into LLMAPIError to preserve the public error type.
+        let content;
         try {
-            // Call the mesh provider via MCP
-            let response;
-            try {
-                response = await fetchWithTimeout(`${this.endpoint}/mcp`, {
-                    method: "POST",
-                    headers: {
-                        "Content-Type": "application/json",
-                        "Accept": "application/json, text/event-stream",
-                        ...(traceCtx && traceSpanId ? createTraceHeaders(traceCtx.traceId, traceSpanId) : {}),
-                        ...getCurrentPropagatedHeaders(),
-                    },
-                    body: JSON.stringify({
-                        jsonrpc: "2.0",
-                        id: Date.now(),
-                        method: "tools/call",
-                        params: {
-                            name: this.functionName,
-                            arguments: args,
-                        },
-                    }),
-                    timeout: timeoutMs,
-                    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-                    dispatcher: getDispatcher(`${this.endpoint}/mcp`),
-                });
-            }
-            catch (error) {
-                if (isTimeoutError(error)) {
-                    throw new LLMAPIError(408, `Request timed out after ${timeoutMs}ms`, `mesh:${this.endpoint}`);
-                }
-                throw new LLMAPIError(0, `Fetch failed: ${error instanceof Error ? error.message : String(error)}`, `mesh:${this.endpoint}`);
-            }
-            if (!response.ok) {
-                const error = await response.text();
-                throw new LLMAPIError(response.status, error, `mesh:${this.endpoint}`);
-            }
-            // Handle SSE response from FastMCP stateless HTTP stream
-            const responseText = await response.text();
-            const result = parseSSEResponse(responseText);
-            if (result.error) {
-                throw new Error(`Mesh provider RPC error: ${result.error.message}`);
-            }
-            // Parse the MCP result content
-            const content = result.result?.content?.[0];
-            if (!content || content.type !== "text") {
-                throw new Error("Invalid response from mesh provider");
-            }
-            // Check for MCP tool execution error (isError flag in result)
-            // eslint-disable-next-line @typescript-eslint/no-explicit-any
-            if (result.result?.isError) {
-                throw new Error(`Mesh provider tool error: ${content.text}`);
-            }
-            // Parse the LLM provider response
-            // Format: { role, content, tool_calls?, _mesh_usage? }
-            const meshResponse = JSON.parse(content.text);
-            // Validate role - LLM responses should always be "assistant"
-            let validatedRole = "assistant";
-            if (meshResponse.role !== "assistant") {
-                console.warn(`[mesh.llm] Unexpected role "${meshResponse.role}" from mesh provider, defaulting to "assistant"`);
-            }
-            // Convert to OpenAI format expected by MeshLlmAgent
-            const openAiResponse = {
-                id: `mesh-${Date.now()}`,
-                object: "chat.completion",
-                created: Math.floor(Date.now() / 1000),
-                model: "mesh-delegated",
-                choices: [
-                    {
-                        index: 0,
-                        message: {
-                            role: validatedRole,
-                            content: meshResponse.content,
-                            tool_calls: meshResponse.tool_calls,
-                        },
-                        finish_reason: meshResponse.tool_calls ? "tool_calls" : "stop",
-                    },
-                ],
-                usage: meshResponse._mesh_usage
-                    ? {
-                        prompt_tokens: meshResponse._mesh_usage.prompt_tokens,
-                        completion_tokens: meshResponse._mesh_usage.completion_tokens,
-                        total_tokens: meshResponse._mesh_usage.prompt_tokens +
-                            meshResponse._mesh_usage.completion_tokens,
-                    }
-                    : undefined,
-            };
-            return openAiResponse;
+            content = await callMcpTool(this.endpoint, this.functionName, args, callOptions, "mesh-llm");
         }
         catch (err) {
-            traceSuccess = false;
-            traceError = err instanceof Error ? err.message : String(err);
-            throw err;
-        }
-        finally {
-            if (traceCtx && traceSpanId) {
-                const traceEndTime = Date.now() / 1000;
-                const traceDurationMs = (traceEndTime - traceStartTime) * 1000;
-                publishTraceSpan({
-                    traceId: traceCtx.traceId,
-                    spanId: traceSpanId,
-                    parentSpan: traceCtx.parentSpanId,
-                    functionName: "proxy_call_wrapper",
-                    startTime: traceStartTime,
-                    endTime: traceEndTime,
-                    durationMs: traceDurationMs,
-                    success: traceSuccess,
-                    error: traceError,
-                    resultType: traceSuccess ? "object" : "error",
-                    argsCount: 0,
-                    kwargsCount: 0,
-                    dependencies: [this.endpoint],
-                    injectedDependencies: 0,
-                    meshPositions: [],
-                }).catch(() => { });
-            }
-        }
+            const message = err instanceof Error ? err.message : String(err);
+            // callMcpTool catches the AbortError and re-throws a plain Error whose
+            // message is "MCP call timed out after <N>ms", so isTimeoutError (name ===
+            // "AbortError") no longer matches here — also detect the message.
+            const isTimeout = isTimeoutError(err) ||
+                (err instanceof Error && /timed out/i.test(err.message));
+            if (isTimeout) {
+                throw new LLMAPIError(408, message, `mesh:${this.endpoint}`);
+            }
+            throw new LLMAPIError(0, message, `mesh:${this.endpoint}`);
+        }
+        // The mesh provider returns a single text content item whose text is the
+        // LLM provider response JSON. callMcpTool joins/extracts it to a string.
+        if (typeof content !== "string") {
+            throw new Error("Invalid response from mesh provider");
+        }
+        // Parse the LLM provider response
+        // Format: { role, content, tool_calls?, _mesh_usage? }
+        const meshResponse = JSON.parse(content);
+        // Validate role - LLM responses should always be "assistant"
+        let validatedRole = "assistant";
+        if (meshResponse.role !== "assistant") {
+            console.warn(`[mesh.llm] Unexpected role "${meshResponse.role}" from mesh provider, defaulting to "assistant"`);
+        }
+        // Convert to OpenAI format expected by MeshLlmAgent
+        const openAiResponse = {
+            id: `mesh-${Date.now()}`,
+            object: "chat.completion",
+            created: Math.floor(Date.now() / 1000),
+            model: "mesh-delegated",
+            choices: [
+                {
+                    index: 0,
+                    message: {
+                        role: validatedRole,
+                        content: meshResponse.content,
+                        tool_calls: meshResponse.tool_calls,
+                    },
+                    finish_reason: meshResponse.tool_calls ? "tool_calls" : "stop",
+                },
+            ],
+            usage: meshResponse._mesh_usage
+                ? {
+                    prompt_tokens: meshResponse._mesh_usage.prompt_tokens,
+                    completion_tokens: meshResponse._mesh_usage.completion_tokens,
+                    total_tokens: meshResponse._mesh_usage.prompt_tokens +
+                        meshResponse._mesh_usage.completion_tokens,
+                }
+                : undefined,
+        };
+        return openAiResponse;
     }
     /**
      * Stream chunks from the mesh-delegated provider's streaming variant.
@@ -257,40 +205,8 @@ export class MeshDelegatedProvider {
      * ``ai.mcpmesh.stream`` tag opt-in (see ``MeshLlmAgent.stream()``).
      */
     async *streamComplete(model, messages, tools, options) {
-        // Build MeshLlmRequest body — same shape as complete()
-        const modelParams = {};
-        // Escape-hatch merge: callers can pass vendor-specific kwargs
-        // (e.g., thinking_config, output_config) via options.modelParams.
-        // Merged FIRST so typed fields below take precedence on collision.
-        if (options?.modelParams) {
-            Object.assign(modelParams, options.modelParams);
-        }
-        if (model && model !== "default") {
-            modelParams.model = model;
-        }
-        if (options?.maxOutputTokens)
-            modelParams.max_tokens = options.maxOutputTokens;
-        if (options?.temperature !== undefined)
-            modelParams.temperature = options.temperature;
-        if (options?.topP !== undefined)
-            modelParams.top_p = options.topP;
-        if (options?.stop)
-            modelParams.stop = options.stop;
-        if (options?.outputSchema) {
-            modelParams.output_schema = options.outputSchema.schema;
-            modelParams.output_type_name = options.outputSchema.name;
-        }
-        if (this.parallelToolCalls) {
-            modelParams.parallel_tool_calls = true;
-        }
-        const request = { messages };
-        if (Object.keys(modelParams).length > 0) {
-            request.model_params = modelParams;
-        }
-        if (tools && tools.length > 0) {
-            request.tools = tools;
-        }
-        const args = { request };
+        // Build MeshLlmRequest body — same shape as complete().
+        const { args } = this.buildMeshLlmRequest(model, messages, tools, options);
         // streamMcpTool() handles trace context injection / propagated headers /
         // dispatcher pooling internally — same path as createProxy().stream().
         // Match complete()'s env-backed timeout (MESH_PROVIDER_TIMEOUT_MS) so
@@ -314,10 +230,36 @@ export class MeshLlmAgent {
     _meta = null;
     _systemPromptOverride = null;
     _parallelLogEmitted = false;
+    // Cached output schema derived from the immutable returnSchema (Issue #459).
+    // Computed once: `null` means "not yet computed", an object holds the result
+    // (which may itself be `undefined` when conversion failed).
+    _outputSchema = null;
+    _outputSchemaSection = null;
     constructor(config) {
         this.config = config;
         this.responseParser = new ResponseParser(config.returnSchema);
     }
+    /**
+     * Build (once) the provider output schema from the immutable returnSchema.
+     * Returns `undefined` when there is no schema or conversion failed.
+     */
+    getOutputSchema() {
+        if (this._outputSchema !== null)
+            return this._outputSchema;
+        let result;
+        if (this.config.returnSchema) {
+            try {
+                const jsonSchema = zodToJsonSchema(this.config.returnSchema);
+                const schemaName = jsonSchema.title ?? "Response";
+                result = { schema: jsonSchema, name: schemaName };
+            }
+            catch {
+                // If schema conversion fails, skip
+            }
+        }
+        this._outputSchema = result;
+        return result;
+    }
     /**
      * Get metadata from the last run.
      */
@@ -337,28 +279,20 @@ export class MeshLlmAgent {
         return this._systemPromptOverride ?? this.config.systemPrompt;
     }
     /**
-     * Run the agentic loop.
+     * Build the initial LlmMessage[] shared by run() and stream():
+     * render the system prompt (+ tool schema injection), optionally append the
+     * output-schema hint, resolve media inputs, and unwind multi-turn history
+     * (attaching resolved media to the last user message).
      *
-     * @param messageInput - User message string or multi-turn message array
-     * @param context - Runtime context with tools and options
-     * @returns Parsed response (validated if schema provided)
+     * The ONLY behavioral knob is opts.includeOutputSchemaHint:
+     * - run() passes `!meshDelegated` (consumer-side schema hint when not delegated).
+     * - stream() passes `false` (always mesh-delegated; provider applies formatting).
      */
-    async run(messageInput, context) {
-        if (this.config.parallelToolCalls && !this._parallelLogEmitted) {
-            console.log("[mesh.llm] parallel tool calls enabled — tools will execute concurrently via Promise.all()");
-            this._parallelLogEmitted = true;
-        }
-        const startTime = Date.now();
-        const toolCalls = [];
-        let totalInputTokens = 0;
-        let totalOutputTokens = 0;
-        // Resolve provider
-        const provider = this.resolveProvider(context);
-        // Build initial messages
+    async buildAgentMessages(messageInput, context, opts) {
         const messages = [];
-        // Build tool definitions first (needed for schema injection)
+        // Build tool definitions first (needed for schema injection).
         // When using mesh delegation, enrich tools with endpoint URLs
-        // so the provider can execute tools directly via MCP proxies
+        // so the provider can execute tools directly via MCP proxies.
         const isMeshDelegated = !!context.meshProvider;
         const toolDefs = this.buildToolDefinitions(context.tools, isMeshDelegated);
         // Add system prompt if configured
@@ -375,7 +309,7 @@ export class MeshLlmAgent {
             // formatting via output_schema in model_params. Consumer doesn't know
             // the provider's vendor, so it must not add vendor-agnostic schema instructions.
             const outputMode = this.config.outputMode ?? "hint";
-            if (!context.meshProvider && outputMode !== "text" && this.config.returnSchema) {
+            if (opts.includeOutputSchemaHint && outputMode !== "text" && this.config.returnSchema) {
                 const outputSchemaSection = this.buildOutputSchemaSection();
                 systemContent += outputSchemaSection;
             }
@@ -426,11 +360,41 @@ export class MeshLlmAgent {
                 }
             }
         }
+        return messages;
+    }
+    /**
+     * Run the agentic loop.
+     *
+     * @param messageInput - User message string or multi-turn message array
+     * @param context - Runtime context with tools and options
+     * @returns Parsed response (validated if schema provided)
+     */
+    async run(messageInput, context) {
+        if (this.config.parallelToolCalls && !this._parallelLogEmitted) {
+            console.log("[mesh.llm] parallel tool calls enabled — tools will execute concurrently via Promise.all()");
+            this._parallelLogEmitted = true;
+        }
+        const startTime = Date.now();
+        const toolCalls = [];
+        let totalInputTokens = 0;
+        let totalOutputTokens = 0;
+        // Resolve provider
+        const provider = this.resolveProvider(context);
+        // Build tool definitions (needed for schema injection + the agentic loop).
+        // When using mesh delegation, enrich tools with endpoint URLs
+        // so the provider can execute tools directly via MCP proxies.
+        const isMeshDelegated = !!context.meshProvider;
+        const toolDefs = this.buildToolDefinitions(context.tools, isMeshDelegated);
+        // Build initial messages (system prompt + tool schema + output-schema hint
+        // + resolved media + multi-turn unwinding). run() includes the output-schema
+        // hint only when NOT mesh-delegated.
+        const messages = await this.buildAgentMessages(messageInput, context, {
+            includeOutputSchemaHint: !isMeshDelegated,
+        });
         // Get effective options (runtime options > MESH_LLM_* env > config)
-        const maxIterations = context.options?.maxIterations ??
-            (process.env.MESH_LLM_MAX_ITERATIONS
-                ? parseInt(process.env.MESH_LLM_MAX_ITERATIONS, 10)
-                : this.config.maxIterations);
+        const maxIterations = sanitizeMaxIterations(context.options?.maxIterations) ??
+            envMaxIterations() ??
+            this.config.maxIterations;
         const maxTokens = context.options?.maxOutputTokens ?? this.config.maxOutputTokens;
         const temperature = context.options?.temperature ?? this.config.temperature;
         // Determine model (mesh provider > MESH_LLM_MODEL env > config > default)
@@ -438,19 +402,8 @@ export class MeshLlmAgent {
             process.env.MESH_LLM_MODEL ??
             this.config.model ??
             this.getDefaultModel();
-        // Build output schema for provider (Issue #459) - computed once before loop
-        let outputSchema;
-        if (this.config.returnSchema) {
-            try {
-                const jsonSchema = zodToJsonSchema(this.config.returnSchema);
-                // Extract schema name from title or use generic name
-                const schemaName = jsonSchema.title ?? "Response";
-                outputSchema = { schema: jsonSchema, name: schemaName };
-            }
-            catch {
-                // If schema conversion fails, skip
-            }
-        }
+        // Build output schema for provider (Issue #459) - computed once, cached
+        const outputSchema = this.getOutputSchema();
         // Agentic loop
         let iteration = 0;
         let finalContent = "";
@@ -465,6 +418,11 @@ export class MeshLlmAgent {
                 outputSchema,
                 // Issue #1019: forward caller-supplied escape-hatch kwargs
                 modelParams: context.options?.modelParams,
+                // Issue #1116: forward the resolved provider-managed loop cap.
+                maxIterations,
+                // Issue #1112: forward the RAW (possibly-undefined) output_mode so the
+                // provider honors an explicit override; unset stays auto.
+                outputMode: this.config.outputMode,
             });
             // Track tokens
             if (response.usage) {
@@ -597,79 +555,25 @@ export class MeshLlmAgent {
         // loop. The mesh-delegated streaming provider runs its own loop on the
         // server side and emits text chunks via notifications/progress; the
         // consumer just yields each one.
-        const messages = [];
-        const isMeshDelegated = true; // by definition: we required meshProvider above
-        const toolDefs = this.buildToolDefinitions(context.tools, isMeshDelegated);
-        // System prompt with template rendering + tool schema injection.
-        // Mirrors run(): mesh-delegated path skips the output-schema hint
-        // because the provider applies vendor-specific output formatting.
-        const systemPromptTemplate = this.getSystemPrompt();
-        if (systemPromptTemplate) {
-            let systemContent = await renderTemplate(systemPromptTemplate, context.templateContext ?? {});
-            if (toolDefs.length > 0) {
-                systemContent += this.buildToolSchemaSection(toolDefs);
-            }
-            messages.push({ role: "system", content: systemContent });
-        }
-        // Resolve media items to OpenAI-compatible image_url parts
-        const mediaItems = context.options?.media;
-        let mediaParts = null;
-        if (mediaItems && mediaItems.length > 0) {
-            mediaParts = await resolveMediaInputs(mediaItems);
-        }
-        if (typeof messageInput === "string") {
-            if (mediaParts && mediaParts.length > 0) {
-                messages.push({
-                    role: "user",
-                    content: [
-                        { type: "text", text: messageInput },
-                        ...mediaParts,
-                    ],
-                });
-            }
-            else {
-                messages.push({ role: "user", content: messageInput });
-            }
-        }
-        else {
-            for (let i = 0; i < messageInput.length; i++) {
-                const msg = messageInput[i];
-                const isLastUser = mediaParts &&
-                    mediaParts.length > 0 &&
-                    msg.role === "user" &&
-                    i === messageInput.length - 1;
-                if (isLastUser) {
-                    messages.push({
-                        role: "user",
-                        content: [
-                            { type: "text", text: msg.content },
-                            ...mediaParts,
-                        ],
-                    });
-                }
-                else {
-                    messages.push({ role: msg.role, content: msg.content });
-                }
-            }
-        }
+        // Mesh-delegated by definition (we required meshProvider above).
+        const toolDefs = this.buildToolDefinitions(context.tools, true);
+        // Build initial messages (system prompt + tool schema + resolved media +
+        // multi-turn unwinding). stream() NEVER includes the output-schema hint —
+        // the provider applies vendor-specific output formatting.
+        const messages = await this.buildAgentMessages(messageInput, context, {
+            includeOutputSchemaHint: false,
+        });
         // Effective options (runtime > env > config)
+        const maxIterations = sanitizeMaxIterations(context.options?.maxIterations) ??
+            envMaxIterations() ??
+            this.config.maxIterations;
         const maxTokens = context.options?.maxOutputTokens ?? this.config.maxOutputTokens;
         const temperature = context.options?.temperature ?? this.config.temperature;
         const model = context.meshProvider?.model ??
             process.env.MESH_LLM_MODEL ??
             this.config.model ??
             this.getDefaultModel();
-        let outputSchema;
-        if (this.config.returnSchema) {
-            try {
-                const jsonSchema = zodToJsonSchema(this.config.returnSchema);
-                const schemaName = jsonSchema.title ?? "Response";
-                outputSchema = { schema: jsonSchema, name: schemaName };
-            }
-            catch {
-                // skip
-            }
-        }
+        const outputSchema = this.getOutputSchema();
         const provider = new MeshDelegatedProvider(context.meshProvider.endpoint, context.meshProvider.functionName, this.config.parallelToolCalls ?? false);
         yield* provider.streamComplete(model, messages, toolDefs.length > 0 ? toolDefs : undefined, {
             maxOutputTokens: maxTokens,
@@ -679,6 +583,11 @@ export class MeshLlmAgent {
             outputSchema,
             // Issue #1019: forward caller-supplied escape-hatch kwargs
             modelParams: context.options?.modelParams,
+            // Issue #1116: forward the resolved provider-managed loop cap.
+            maxIterations,
+            // Issue #1112: forward the RAW (possibly-undefined) output_mode so the
+            // provider honors an explicit override; unset stays auto.
+            outputMode: this.config.outputMode,
         });
     }
     /**
@@ -686,8 +595,9 @@ export class MeshLlmAgent {
      */
     createCallable(context) {
         const agent = this;
-        const callable = async (message, options) => {
-            // Handle context mode
+        // Shared "context merge vs replace" semantics used by both the buffered
+        // callable and the stream method below.
+        const mergeRunContext = (options) => {
             const contextMode = options?.contextMode ?? "merge";
             let mergedTemplateContext;
             if (contextMode === "replace" && options?.context) {
@@ -702,13 +612,14 @@ export class MeshLlmAgent {
                 // No runtime context - use base context
                 mergedTemplateContext = context.templateContext ?? {};
             }
-            // Merge options
-            const mergedContext = {
+            return {
                 ...context,
                 options: options ? { ...context.options, ...options } : context.options,
                 templateContext: mergedTemplateContext,
             };
-            return agent.run(message, mergedContext);
+        };
+        const callable = async (message, options) => {
+            return agent.run(message, mergeRunContext(options));
         };
         // Attach meta property
         Object.defineProperty(callable, "meta", {
@@ -727,23 +638,7 @@ export class MeshLlmAgent {
         // "context merge vs replace" behavior as the buffered call.
         Object.defineProperty(callable, "stream", {
             value: (message, options) => {
-                const contextMode = options?.contextMode ?? "merge";
-                let mergedTemplateContext;
-                if (contextMode === "replace" && options?.context) {
-                    mergedTemplateContext = options.context;
-                }
-                else if (options?.context) {
-                    mergedTemplateContext = { ...context.templateContext, ...options.context };
-                }
-                else {
-                    mergedTemplateContext = context.templateContext ?? {};
-                }
-                const mergedContext = {
-                    ...context,
-                    options: options ? { ...context.options, ...options } : context.options,
-                    templateContext: mergedTemplateContext,
-                };
-                return agent.stream(message, mergedContext);
+                return agent.stream(message, mergeRunContext(options));
             },
         });
         return callable;
@@ -819,17 +714,16 @@ export class MeshLlmAgent {
      * Guides the LLM to produce structured output matching the schema.
      */
     buildOutputSchemaSection() {
-        if (!this.config.returnSchema)
-            return "";
-        try {
-            const jsonSchema = zodToJsonSchema(this.config.returnSchema);
-            const schemaStr = JSON.stringify(jsonSchema, null, 2);
-            return `\n\n## Output Format\n\nYour response MUST be valid JSON matching this schema:\n\n\`\`\`json\n${schemaStr}\n\`\`\`\n\nRespond ONLY with the JSON object, no additional text.`;
-        }
-        catch {
-            // If schema conversion fails, skip injection
+        if (this._outputSchemaSection !== null)
+            return this._outputSchemaSection;
+        const cached = this.getOutputSchema();
+        if (!cached) {
+            this._outputSchemaSection = "";
             return "";
         }
+        const schemaStr = JSON.stringify(cached.schema, null, 2);
+        this._outputSchemaSection = `\n\n## Output Format\n\nYour response MUST be valid JSON matching this schema:\n\n\`\`\`json\n${schemaStr}\n\`\`\`\n\nRespond ONLY with the JSON object, no additional text.`;
+        return this._outputSchemaSection;
     }
     /**
      * Execute a tool call and record metadata.
@@ -876,129 +770,40 @@ export function createLlmToolProxy(toolInfo, description) {
     const proxy = async (args) => {
         // Set up timeout (default 30s for tool calls)
         const timeoutMs = parseInt(process.env.MESH_TOOL_TIMEOUT_MS || "30000", 10);
-        // Tracing: propagate context to downstream tool
-        const traceCtx = getCurrentTraceContext();
-        const traceSpanId = traceCtx ? generateSpanId() : null;
-        const traceStartTime = Date.now() / 1000;
-        let traceSuccess = true;
-        let traceError = null;
-        let resultType = "unknown";
-        // Build arguments with trace context injection via Rust core
-        const toolPropHeaders = getCurrentPropagatedHeaders();
-        let toolArgsWithTrace;
-        if (traceCtx && traceSpanId) {
-            try {
-                const argsJson = JSON.stringify(args);
-                const headersJson = Object.keys(toolPropHeaders).length > 0 ? JSON.stringify(toolPropHeaders) : undefined;
-                const injectedJson = injectTraceContext(argsJson, traceCtx.traceId, traceSpanId, headersJson);
-                toolArgsWithTrace = JSON.parse(injectedJson);
-            }
-            catch {
-                // Fallback to manual injection
-                toolArgsWithTrace = {
-                    ...args,
-                    _trace_id: traceCtx.traceId,
-                    _parent_span: traceSpanId,
-                    ...(Object.keys(toolPropHeaders).length > 0 ? { _mesh_headers: toolPropHeaders } : {}),
-                };
-            }
+        // Route through the shared callMcpTool (trace injection, span publish,
+        // dispatcher pooling, job-cancel wiring all handled internally). Tools may
+        // be non-idempotent so disable retries; LLM tool results can exceed the
+        // default 10 MiB cap so raise it to effectively unbounded.
+        const callOptions = {
+            ...DEFAULT_CALL_OPTIONS,
+            timeout: timeoutMs,
+            maxAttempts: 1,
+            maxResponseSize: Number.MAX_SAFE_INTEGER,
+        };
+        let result;
+        try {
+            result = await callMcpTool(toolInfo.endpoint, toolInfo.functionName, args, callOptions, "mesh-tool");
         }
-        else {
-            toolArgsWithTrace = {
-                ...args,
-                ...(Object.keys(toolPropHeaders).length > 0 ? { _mesh_headers: toolPropHeaders } : {}),
-            };
+        catch (error) {
+            // callMcpTool throws a plain Error on failure — re-wrap into the
+            // ToolExecutionError shape the agentic loop expects.
+            throw new ToolExecutionError(toolInfo.functionName, error instanceof Error ? error : new Error(String(error)));
         }
-        try {
-            // Make MCP call to the tool
-            let response;
-            try {
-                response = await fetchWithTimeout(`${toolInfo.endpoint}/mcp`, {
-                    method: "POST",
-                    headers: {
-                        "Content-Type": "application/json",
-                        "Accept": "application/json, text/event-stream",
-                        ...(traceCtx && traceSpanId ? createTraceHeaders(traceCtx.traceId, traceSpanId) : {}),
-                        ...toolPropHeaders,
-                    },
-                    body: JSON.stringify({
-                        jsonrpc: "2.0",
-                        id: Date.now(),
-                        method: "tools/call",
-                        params: {
-                            name: toolInfo.functionName,
-                            arguments: toolArgsWithTrace,
-                        },
-                    }),
-                    timeout: timeoutMs,
-                    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-                    dispatcher: getDispatcher(`${toolInfo.endpoint}/mcp`),
-                });
-            }
-            catch (error) {
-                if (isTimeoutError(error)) {
-                    throw new ToolExecutionError(toolInfo.functionName, new Error(`Tool call timed out after ${timeoutMs}ms (endpoint: ${toolInfo.endpoint})`));
-                }
-                throw new ToolExecutionError(toolInfo.functionName, error instanceof Error ? error : new Error(String(error)));
-            }
-            if (!response.ok) {
-                const errorBody = await response.text();
-                throw new ToolExecutionError(toolInfo.functionName, new Error(`Tool call failed: ${response.status} ${errorBody}`));
-            }
-            // Handle SSE response from FastMCP stateless HTTP stream
-            const responseText = await response.text();
-            const result = parseSSEResponse(responseText);
-            if (result.error) {
-                throw new Error(`Tool error: ${result.error.message}`);
-            }
-            // Parse result content
-            const content = result.result?.content?.[0];
-            if (!content) {
-                resultType = "null";
-                return null;
-            }
-            if (content.type === "text" && content.text) {
-                // Try to parse as JSON
-                try {
-                    const parsed = JSON.parse(content.text);
-                    resultType = typeof parsed;
-                    return parsed;
-                }
-                catch {
-                    resultType = "string";
-                    return content.text;
-                }
-            }
-            resultType = typeof content;
-            return content;
+        // Multi-content results are returned as structured objects as-is.
+        if (typeof result === "object") {
+            return result;
         }
-        catch (err) {
-            traceSuccess = false;
-            traceError = err instanceof Error ? err.message : String(err);
-            throw err;
+        // Empty content signals "tool returned nothing" — preserve the null result
+        // the hand-rolled path produced rather than an empty string.
+        if (result === "") {
+            return null;
         }
-        finally {
-            if (traceCtx && traceSpanId) {
-                const traceEndTime = Date.now() / 1000;
-                const traceDurationMs = (traceEndTime - traceStartTime) * 1000;
-                publishTraceSpan({
-                    traceId: traceCtx.traceId,
-                    spanId: traceSpanId,
-                    parentSpan: traceCtx.parentSpanId,
-                    functionName: "proxy_call_wrapper",
-                    startTime: traceStartTime,
-                    endTime: traceEndTime,
-                    durationMs: traceDurationMs,
-                    success: traceSuccess,
-                    error: traceError,
-                    resultType: traceSuccess ? resultType : "error",
-                    argsCount: 0,
-                    kwargsCount: 0,
-                    dependencies: [toolInfo.endpoint],
-                    injectedDependencies: 0,
-                    meshPositions: [],
-                }).catch(() => { });
-            }
+        // Parse JSON if possible, otherwise return the raw string.
+        try {
+            return JSON.parse(result);
+        }
+        catch {
+            return result;
         }
     };
     // Safely parse inputSchema - don't let malformed JSON break proxy creation