npm - @juspay/neurolink - Versions diffs - 9.26.0 → 9.26.2 - Mend

@juspay/neurolink 9.26.0 → 9.26.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/CHANGELOG.md +12 -0
package/dist/adapters/providerImageAdapter.js +6 -0
package/dist/constants/contextWindows.js +2 -0
package/dist/constants/enums.d.ts +2 -0
package/dist/constants/enums.js +2 -0
package/dist/lib/adapters/providerImageAdapter.js +6 -0
package/dist/lib/constants/contextWindows.js +2 -0
package/dist/lib/constants/enums.d.ts +2 -0
package/dist/lib/constants/enums.js +2 -0
package/dist/lib/providers/googleAiStudio.js +135 -89
package/dist/lib/providers/googleNativeGemini3.d.ts +43 -0
package/dist/lib/providers/googleNativeGemini3.js +148 -18
package/dist/lib/providers/googleVertex.js +162 -140
package/dist/providers/googleAiStudio.js +135 -89
package/dist/providers/googleNativeGemini3.d.ts +43 -0
package/dist/providers/googleNativeGemini3.js +148 -18
package/dist/providers/googleVertex.js +162 -140
package/package.json +18 -17

package/dist/lib/providers/googleNativeGemini3.js CHANGED Viewed

@@ -158,7 +158,6 @@ export function sanitizeToolsForGemini(tools) {
             logger.warn(`[Gemini] Failed to sanitize tool "${name}", skipping: ${error instanceof Error ? error.message : String(error)}`);
             // Don't fall back to the original tool — an incompatible schema would fail the Gemini request
             dropped.push(name);
-            continue;
         }
     }
     return { tools: sanitized, dropped };
@@ -171,29 +170,45 @@ export function sanitizeToolsForGemini(tools) {
 export function buildNativeToolDeclarations(tools) {
     const functionDeclarations = [];
     const executeMap = new Map();
+    const skippedTools = [];
     for (const [name, tool] of Object.entries(tools)) {
-        const decl = {
-            name,
-            description: tool.description || `Tool: ${name}`,
-        };
-        if (tool.parameters) {
-            let rawSchema;
-            if (isZodSchema(tool.parameters)) {
-                rawSchema = convertZodToJsonSchema(tool.parameters);
-            }
-            else if (typeof tool.parameters === "object") {
-                rawSchema = tool.parameters;
+        try {
+            const decl = {
+                name,
+                description: tool.description || `Tool: ${name}`,
+            };
+            if (tool.parameters) {
+                let rawSchema;
+                if (isZodSchema(tool.parameters)) {
+                    rawSchema = convertZodToJsonSchema(tool.parameters);
+                }
+                else if (typeof tool.parameters === "object") {
+                    rawSchema = tool.parameters;
+                }
+                else {
+                    rawSchema = { type: "object", properties: {} };
+                }
+                // Unwrap Vercel AI SDK's jsonSchema() wrapper: { jsonSchema: { type: "object", ... } }
+                if (rawSchema.jsonSchema &&
+                    typeof rawSchema.jsonSchema === "object" &&
+                    !rawSchema.type) {
+                    rawSchema = rawSchema.jsonSchema;
+                }
+                decl.parametersJsonSchema = sanitizeSchemaForGemini(inlineJsonSchema(rawSchema));
             }
-            else {
-                rawSchema = { type: "object", properties: {} };
+            functionDeclarations.push(decl);
+            if (tool.execute) {
+                executeMap.set(name, tool.execute);
             }
-            decl.parametersJsonSchema = sanitizeSchemaForGemini(inlineJsonSchema(rawSchema));
         }
-        functionDeclarations.push(decl);
-        if (tool.execute) {
-            executeMap.set(name, tool.execute);
+        catch (err) {
+            skippedTools.push(name);
+            logger.error(`[buildNativeToolDeclarations] Failed to convert tool "${name}":`, err);
         }
     }
+    if (skippedTools.length > 0) {
+        logger.warn(`[buildNativeToolDeclarations] ${skippedTools.length} tool(s) skipped due to schema errors: ${skippedTools.join(", ")}`);
+    }
     return { toolsConfig: [{ functionDeclarations }], executeMap };
 }
 /**
@@ -265,6 +280,121 @@ export async function collectStreamChunks(stream) {
     }
     return { rawResponseParts, stepFunctionCalls, inputTokens, outputTokens };
 }
+/**
+ * Create a push-based text channel that bridges a background producer
+ * (the agentic tool-calling loop) with an async-iterable consumer.
+ *
+ * This enables truly incremental streaming: text parts are yielded to the
+ * caller as they arrive from the network, rather than being buffered until
+ * the model finishes generating.
+ */
+export function createTextChannel() {
+    const queue = [];
+    let done = false;
+    let fatalError = undefined;
+    // Resolve the current "wait for data" promise when new data arrives
+    let notify = null;
+    function wake() {
+        if (notify) {
+            const fn = notify;
+            notify = null;
+            fn();
+        }
+    }
+    function push(text) {
+        if (done) {
+            return;
+        }
+        queue.push({ content: text });
+        wake();
+    }
+    function close() {
+        done = true;
+        wake();
+    }
+    function error(err) {
+        done = true;
+        fatalError = err;
+        wake();
+    }
+    let readIndex = 0;
+    async function* iterable() {
+        try {
+            while (true) {
+                if (readIndex < queue.length) {
+                    yield queue[readIndex++];
+                    // Periodically compact consumed chunks to avoid unbounded retention
+                    if (readIndex > 1024 && readIndex * 2 >= queue.length) {
+                        queue.splice(0, readIndex);
+                        readIndex = 0;
+                    }
+                }
+                else if (done) {
+                    if (fatalError !== undefined) {
+                        throw fatalError instanceof Error
+                            ? fatalError
+                            : new Error(String(fatalError));
+                    }
+                    return;
+                }
+                else {
+                    // Wait until the producer pushes data or signals completion
+                    await new Promise((resolve) => {
+                        notify = resolve;
+                    });
+                }
+            }
+        }
+        finally {
+            // Consumer stopped reading (e.g. disconnect/cancel): stop buffering.
+            done = true;
+            queue.length = 0;
+            notify?.();
+        }
+    }
+    return { push, close, error, iterable: iterable() };
+}
+/**
+ * Iterate a single stream step incrementally, pushing text parts to `channel`
+ * as they arrive from the network while simultaneously accumulating the full
+ * `CollectedChunkResult` needed for history and token accounting.
+ *
+ * Used for all steps (both intermediate tool-calling steps and the final
+ * text-only step).  Text parts are pushed to the channel as they arrive,
+ * enabling truly incremental streaming.  The complete `rawResponseParts`
+ * (including thoughtSignature) are still returned at the end for use by
+ * `pushModelResponseToHistory`.
+ */
+export async function collectStreamChunksIncremental(stream, channel) {
+    const rawResponseParts = [];
+    const stepFunctionCalls = [];
+    let inputTokens = 0;
+    let outputTokens = 0;
+    for await (const chunk of stream) {
+        const chunkRecord = chunk;
+        const candidates = chunkRecord.candidates;
+        const firstCandidate = candidates?.[0];
+        const chunkContent = firstCandidate?.content;
+        if (chunkContent && Array.isArray(chunkContent.parts)) {
+            for (const part of chunkContent.parts) {
+                rawResponseParts.push(part);
+                // Forward text parts to the consumer immediately
+                if (typeof part.text === "string" && part.text.length > 0) {
+                    channel.push(part.text);
+                }
+            }
+        }
+        if (chunk.functionCalls) {
+            stepFunctionCalls.push(...chunk.functionCalls);
+        }
+        const usage = chunkRecord.usageMetadata;
+        if (usage) {
+            inputTokens = Math.max(inputTokens, usage.promptTokenCount || 0);
+            outputTokens = Math.max(outputTokens, usage.candidatesTokenCount || 0);
+        }
+    }
+    return { rawResponseParts, stepFunctionCalls, inputTokens, outputTokens };
+}
 /**
  * Extract text from raw response parts, filtering out non-text parts
  * (thoughtSignature, functionCall) to avoid SDK warnings.

package/dist/lib/providers/googleVertex.js CHANGED Viewed

@@ -1,8 +1,8 @@
+import dns from "node:dns";
 import { createVertex, } from "@ai-sdk/google-vertex";
 import { createVertexAnthropic, } from "@ai-sdk/google-vertex/anthropic";
+import { SpanKind, SpanStatusCode, trace } from "@opentelemetry/api";
 import { embed, embedMany, Output, streamText, } from "ai";
-import { trace, SpanKind, SpanStatusCode } from "@opentelemetry/api";
-import dns from "node:dns";
 import fs from "fs";
 import os from "os";
 import path from "path";
@@ -11,18 +11,18 @@ import { BaseProvider } from "../core/baseProvider.js";
 import { DEFAULT_MAX_STEPS, GLOBAL_LOCATION_MODELS, } from "../core/constants.js";
 import { ModelConfigurationManager } from "../core/modelConfiguration.js";
 import { createProxyFetch } from "../proxy/proxyFetch.js";
-import { AuthenticationError, NetworkError, ProviderError, RateLimitError, InvalidModelError, } from "../types/errors.js";
+import { ATTR, tracers, withClientSpan } from "../telemetry/index.js";
+import { AuthenticationError, InvalidModelError, NetworkError, ProviderError, RateLimitError, } from "../types/errors.js";
 import { ERROR_CODES, NeuroLinkError } from "../utils/errorHandling.js";
 import { FileDetector } from "../utils/fileDetector.js";
 import { logger } from "../utils/logger.js";
-import { estimateTokens } from "../utils/tokenEstimation.js";
 import { isGemini3Model } from "../utils/modelDetection.js";
 import { calculateCost } from "../utils/pricing.js";
-import { tracers, ATTR, withClientSpan } from "../telemetry/index.js";
 import { createGoogleAuthConfig, createVertexProjectConfig, validateApiKey, } from "../utils/providerConfig.js";
 import { convertZodToJsonSchema, inlineJsonSchema, } from "../utils/schemaConversion.js";
 import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
-import { buildNativeToolDeclarations, buildNativeConfig, computeMaxSteps as computeMaxStepsShared, collectStreamChunks, extractTextFromParts, executeNativeToolCalls, handleMaxStepsTermination, pushModelResponseToHistory, sanitizeToolsForGemini, } from "./googleNativeGemini3.js";
+import { estimateTokens } from "../utils/tokenEstimation.js";
+import { buildNativeConfig, buildNativeToolDeclarations, collectStreamChunks, collectStreamChunksIncremental, computeMaxSteps as computeMaxStepsShared, createTextChannel, executeNativeToolCalls, extractTextFromParts, handleMaxStepsTermination, pushModelResponseToHistory, sanitizeToolsForGemini, } from "./googleNativeGemini3.js";
 // Import proper types for multimodal message handling
 // Keep-alive note: Node.js native fetch and undici (used by createProxyFetch)
 // handle HTTP keep-alive internally. The fetchWithRetry wrapper in proxyFetch.ts
@@ -1217,15 +1217,13 @@ export class GoogleVertexProvider extends BaseProvider {
                     toolNames: toolsConfig[0].functionDeclarations.map((t) => t.name),
                 });
             }
-            // Build config
+            // Build config — systemInstruction stays in config for Gemini 3.x.
+            // The @google/genai SDK maps config.systemInstruction to the HTTP-level
+            // system_instruction field, which is the correct mechanism for all
+            // Gemini 3.x models (including global endpoint).  Older workaround
+            // that moved systemInstruction into user/model content messages caused
+            // "Please use a valid role: user, model" on Gemini 3.1+ preview models.
             const config = buildNativeConfig(options, toolsConfig);
-            // Global endpoint rejects systemInstruction for Gemini 3.x —
-            // move it into a prefixed user message (same fix as generate path)
-            let streamSystemPreamble;
-            if (effectiveLocation === "global" && config.systemInstruction) {
-                streamSystemPreamble = config.systemInstruction;
-                delete config.systemInstruction;
-            }
             // Add JSON output format support for native SDK stream
             if (streamOptions.output?.format === "json" || streamOptions.schema) {
                 config.responseMimeType = "application/json";
@@ -1247,110 +1245,146 @@ export class GoogleVertexProvider extends BaseProvider {
             const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
             const maxSteps = computeMaxStepsShared(options.maxSteps);
             // Inject conversation history so the native path has multi-turn context
-            let currentContents = this.prependConversationHistory([...contents], options.conversationMessages);
-            // Prepend system prompt as a user message for the global endpoint
-            if (streamSystemPreamble) {
-                currentContents = [
-                    {
-                        role: "user",
-                        parts: [
-                            { text: `[System Instructions]\n${streamSystemPreamble}` },
-                        ],
-                    },
-                    {
-                        role: "model",
-                        parts: [{ text: "OK" }],
-                    },
-                    ...currentContents,
-                ];
-            }
-            let finalText = "";
-            let lastStepText = "";
-            let totalInputTokens = 0;
-            let totalOutputTokens = 0;
+            const currentContents = this.prependConversationHistory([...contents], options.conversationMessages);
+            // Create a push-based text channel so the caller receives tokens as
+            // they arrive from the network rather than after full buffering.
+            const channel = createTextChannel();
+            // Shared mutable state updated by the background agentic loop.
             const allToolCalls = [];
-            let step = 0;
-            const failedTools = new Map();
-            // Agentic loop for tool calling
-            try {
-                while (step < maxSteps) {
-                    if (timeoutController?.controller.signal.aborted) {
-                        break;
-                    }
-                    step++;
-                    logger.debug(`[GoogleVertex] Native SDK step ${step}/${maxSteps}`);
-                    try {
-                        const stream = await client.models.generateContentStream({
-                            model: modelName,
-                            contents: currentContents,
-                            config,
-                            ...(composedSignal
-                                ? { httpOptions: { signal: composedSignal } }
-                                : {}),
-                        });
-                        const chunkResult = await collectStreamChunks(stream);
-                        totalInputTokens += chunkResult.inputTokens;
-                        totalOutputTokens += chunkResult.outputTokens;
-                        const stepText = extractTextFromParts(chunkResult.rawResponseParts);
-                        if (chunkResult.stepFunctionCalls.length === 0) {
-                            finalText = stepText;
-                            break;
+            // Shared metadata object mutated by the background loop so that
+            // responseTime and totalToolExecutions reflect final values.
+            const metadata = {
+                streamId: `native-vertex-${Date.now()}`,
+                startTime,
+                responseTime: 0,
+                totalToolExecutions: 0,
+            };
+            // analyticsResolvers lets the background loop settle the analytics
+            // promise once token counts are known (after the loop completes).
+            let analyticsResolve;
+            let analyticsReject;
+            const analyticsPromise = new Promise((res, rej) => {
+                analyticsResolve = res;
+                analyticsReject = rej;
+            });
+            // Run the agentic loop in the background without awaiting it here,
+            // so we can return the StreamResult (with channel.iterable) immediately.
+            const loopPromise = (async () => {
+                let lastStepText = "";
+                let totalInputTokens = 0;
+                let totalOutputTokens = 0;
+                let step = 0;
+                let completedWithFinalAnswer = false;
+                const failedTools = new Map();
+                try {
+                    // Agentic loop for tool calling
+                    while (step < maxSteps) {
+                        if (composedSignal?.aborted) {
+                            throw composedSignal.reason instanceof Error
+                                ? composedSignal.reason
+                                : new Error("Request aborted");
                         }
-                        lastStepText = stepText;
-                        // Record tool call events on the span
-                        for (const fc of chunkResult.stepFunctionCalls) {
-                            span.addEvent("gen_ai.tool_call", {
-                                "tool.name": fc.name,
-                                "tool.step": step,
+                        step++;
+                        logger.debug(`[GoogleVertex] Native SDK step ${step}/${maxSteps}`);
+                        try {
+                            const rawStream = await client.models.generateContentStream({
+                                model: modelName,
+                                contents: currentContents,
+                                config,
+                                ...(composedSignal
+                                    ? { httpOptions: { signal: composedSignal } }
+                                    : {}),
+                            });
+                            // For every step, use incremental collection so text parts
+                            // are pushed to the channel as they arrive.  For intermediate
+                            // steps (those that produce function calls) we still need the
+                            // complete rawResponseParts for pushModelResponseToHistory,
+                            // which collectStreamChunksIncremental provides at stream end.
+                            const chunkResult = await collectStreamChunksIncremental(rawStream, channel);
+                            totalInputTokens += chunkResult.inputTokens;
+                            totalOutputTokens += chunkResult.outputTokens;
+                            const stepText = extractTextFromParts(chunkResult.rawResponseParts);
+                            // If no function calls, this was the final step — channel
+                            // already received all text parts incrementally.
+                            if (chunkResult.stepFunctionCalls.length === 0) {
+                                completedWithFinalAnswer = true;
+                                break;
+                            }
+                            lastStepText = stepText;
+                            // Record tool call events on the span
+                            for (const fc of chunkResult.stepFunctionCalls) {
+                                span.addEvent("gen_ai.tool_call", {
+                                    "tool.name": fc.name,
+                                    "tool.step": step,
+                                });
+                            }
+                            logger.debug(`[GoogleVertex] Executing ${chunkResult.stepFunctionCalls.length} function calls`);
+                            pushModelResponseToHistory(currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
+                            const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, executeMap, failedTools, allToolCalls, { abortSignal: composedSignal });
+                            // Function/tool responses must use role: "user" — the
+                            // @google/genai SDK's validateHistory() only accepts "user"
+                            // and "model" roles (matching automaticFunctionCalling).
+                            currentContents.push({
+                                role: "user",
+                                parts: functionResponses,
                             });
                         }
-                        logger.debug(`[GoogleVertex] Executing ${chunkResult.stepFunctionCalls.length} function calls`);
-                        pushModelResponseToHistory(currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
-                        const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, executeMap, failedTools, allToolCalls, { abortSignal: composedSignal });
-                        // Add function responses to history
-                        currentContents.push({
-                            role: "function",
-                            parts: functionResponses,
-                        });
+                        catch (error) {
+                            logger.error("[GoogleVertex] Native SDK error", error);
+                            throw this.handleProviderError(error);
+                        }
                     }
-                    catch (error) {
-                        logger.error("[GoogleVertex] Native SDK error", error);
-                        throw this.handleProviderError(error);
+                    // Handle max-steps termination: if the model was still calling
+                    // tools when we hit the limit, push a synthetic final message.
+                    if (step >= maxSteps && !completedWithFinalAnswer) {
+                        const fallback = handleMaxStepsTermination("[GoogleVertex]", step, maxSteps, "", // finalText is empty — model didn't stop on its own
+                        lastStepText);
+                        if (fallback) {
+                            channel.push(fallback);
+                        }
                     }
+                    const responseTime = Date.now() - startTime;
+                    // Propagate final values to the shared metadata object so that
+                    // the already-returned StreamResult reflects accurate telemetry.
+                    metadata.responseTime = responseTime;
+                    metadata.totalToolExecutions = allToolCalls.length;
+                    // Set token usage and finish reason on the span
+                    span.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, totalInputTokens);
+                    span.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, totalOutputTokens);
+                    span.setAttribute(ATTR.GEN_AI_FINISH_REASON, step >= maxSteps && !completedWithFinalAnswer
+                        ? "max_steps"
+                        : "stop");
+                    analyticsResolve({
+                        provider: this.providerName,
+                        model: modelName,
+                        tokenUsage: {
+                            input: totalInputTokens,
+                            output: totalOutputTokens,
+                            total: totalInputTokens + totalOutputTokens,
+                        },
+                        requestDuration: responseTime,
+                        timestamp: new Date().toISOString(),
+                    });
+                    channel.close();
                 }
-            }
-            finally {
-                timeoutController?.cleanup();
-            }
-            finalText = handleMaxStepsTermination("[GoogleVertex]", step, maxSteps, finalText, lastStepText);
-            const responseTime = Date.now() - startTime;
-            // Set token usage and finish reason on the span
-            span.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, totalInputTokens);
-            span.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, totalOutputTokens);
-            span.setAttribute(ATTR.GEN_AI_FINISH_REASON, step >= maxSteps ? "max_steps" : "stop");
-            // Create async iterable for streaming result
-            async function* createTextStream() {
-                yield { content: finalText };
-            }
+                catch (err) {
+                    channel.error(err);
+                    analyticsReject(err);
+                }
+                finally {
+                    timeoutController?.cleanup();
+                }
+            })();
+            // Suppress unhandled-rejection warnings on loopPromise — errors are
+            // forwarded to the channel and will surface when the caller iterates.
+            loopPromise.catch(() => undefined);
             return {
-                stream: createTextStream(),
+                stream: channel.iterable,
                 provider: this.providerName,
                 model: modelName,
-                usage: {
-                    input: totalInputTokens,
-                    output: totalOutputTokens,
-                    total: totalInputTokens + totalOutputTokens,
-                },
-                toolCalls: allToolCalls.map((tc) => ({
-                    toolName: tc.toolName,
-                    args: tc.args,
-                })),
-                metadata: {
-                    streamId: `native-vertex-${Date.now()}`,
-                    startTime,
-                    responseTime,
-                    totalToolExecutions: allToolCalls.length,
-                },
+                toolCalls: allToolCalls,
+                analytics: analyticsPromise,
+                metadata,
             };
         });
     }
@@ -1378,7 +1412,9 @@ export class GoogleVertexProvider extends BaseProvider {
                 location: effectiveLocation,
             });
             // Build contents from input with multimodal support
-            const inputText = options.prompt || options.input?.text || "Please respond.";
+            // Prefer input.text over prompt — processCSVFilesForNativeSDK enriches
+            // input.text with inlined CSV data, so using prompt first would discard it.
+            const inputText = options.input?.text || options.prompt || "Please respond.";
             const multimodalInput = options.input;
             const contents = this.buildNativeContentParts(inputText, multimodalInput, "native generate");
             // Get tools from SDK and options
@@ -1404,16 +1440,9 @@ export class GoogleVertexProvider extends BaseProvider {
                     toolNames: toolsConfig[0].functionDeclarations.map((t) => t.name),
                 });
             }
-            // Build config
+            // Build config — systemInstruction stays in config for Gemini 3.x.
+            // See stream path comment for rationale.
             const config = buildNativeConfig(options, toolsConfig);
-            // Global endpoint rejects systemInstruction for Gemini 3.x, returning
-            // "Please use a valid role: user, model." Move it into a prefixed
-            // user message so the model still receives the system context.
-            let systemPreamble;
-            if (effectiveLocation === "global" && config.systemInstruction) {
-                systemPreamble = config.systemInstruction;
-                delete config.systemInstruction;
-            }
             // Note: Schema/JSON output for Gemini 3 native SDK is complex due to $ref resolution issues
             // For now, schemas are handled via the AI SDK fallback path, not native SDK
             // TODO: Implement proper $ref resolution for complex nested schemas
@@ -1423,21 +1452,7 @@ export class GoogleVertexProvider extends BaseProvider {
             const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
             const maxSteps = computeMaxStepsShared(options.maxSteps);
             // Inject conversation history so the native path has multi-turn context
-            let currentContents = this.prependConversationHistory([...contents], options.conversationMessages);
-            // Prepend system prompt as a user message for the global endpoint
-            if (systemPreamble) {
-                currentContents = [
-                    {
-                        role: "user",
-                        parts: [{ text: `[System Instructions]\n${systemPreamble}` }],
-                    },
-                    {
-                        role: "model",
-                        parts: [{ text: "OK" }],
-                    },
-                    ...currentContents,
-                ];
-            }
+            const currentContents = this.prependConversationHistory([...contents], options.conversationMessages);
             let finalText = "";
             let lastStepText = "";
             let totalInputTokens = 0;
@@ -1449,8 +1464,10 @@ export class GoogleVertexProvider extends BaseProvider {
             try {
                 // Agentic loop for tool calling
                 while (step < maxSteps) {
-                    if (timeoutController?.controller.signal.aborted) {
-                        break;
+                    if (composedSignal?.aborted) {
+                        throw composedSignal.reason instanceof Error
+                            ? composedSignal.reason
+                            : new Error("Request aborted");
                     }
                     step++;
                     logger.debug(`[GoogleVertex] Native SDK generate step ${step}/${maxSteps}`);
@@ -1483,9 +1500,11 @@ export class GoogleVertexProvider extends BaseProvider {
                         logger.debug(`[GoogleVertex] Generate executing ${chunkResult.stepFunctionCalls.length} function calls`);
                         pushModelResponseToHistory(currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
                         const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, executeMap, failedTools, allToolCalls, { toolExecutions, abortSignal: composedSignal });
-                        // Add function responses to history
+                        // Function/tool responses must use role: "user" — the
+                        // @google/genai SDK's validateHistory() only accepts "user"
+                        // and "model" roles (matching automaticFunctionCalling).
                         currentContents.push({
-                            role: "function",
+                            role: "user",
                             parts: functionResponses,
                         });
                     }
@@ -1598,13 +1617,16 @@ export class GoogleVertexProvider extends BaseProvider {
             ? { prompt: optionsOrPrompt }
             : optionsOrPrompt;
         const modelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
+        // Structured output (JSON format or schema) is incompatible with tools on Gemini.
+        // Mirror the stream path pattern to prevent silent downgrade on the generate path.
+        const wantsStructuredOutput = options.output?.format === "json" || !!options.schema;
         // Check if we should use native SDK for Gemini 3 with tools
-        const shouldUseTools = !options.disableTools && this.supportsTools();
+        const shouldUseTools = !options.disableTools && this.supportsTools() && !wantsStructuredOutput;
         const sdkTools = shouldUseTools ? await this.getAllTools() : {};
         const hasTools = shouldUseTools &&
             (Object.keys(sdkTools).length > 0 ||
                 (options.tools && Object.keys(options.tools).length > 0));
-        if (isGemini3Model(modelName) && hasTools) {
+        if (isGemini3Model(modelName) && hasTools && !wantsStructuredOutput) {
             // Process CSV files before routing to native SDK (bypasses normal message builder)
             const processedOptions = await this.processCSVFilesForNativeSDK(options);
             // Merge SDK tools into options for native SDK path
@@ -2434,7 +2456,7 @@ export class GoogleVertexProvider extends BaseProvider {
                 if (!isRetryable || attempt === maxRetries) {
                     throw error;
                 }
-                const delay = baseDelay * Math.pow(2, attempt - 1);
+                const delay = baseDelay * 2 ** (attempt - 1);
                 logger.warn(`[GoogleVertexProvider] Auth token transient error (${err?.code || err?.message}), retrying in ${delay}ms (attempt ${attempt}/${maxRetries})`);
                 await new Promise((r) => setTimeout(r, delay));
             }