npm - @juspay/neurolink - Versions diffs - 9.42.0 → 9.42.1 - Mend

@juspay/neurolink 9.42.0 → 9.42.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

package/CHANGELOG.md +2 -0
package/dist/auth/anthropicOAuth.js +12 -0
package/dist/browser/neurolink.min.js +337 -336
package/dist/cli/commands/mcp.d.ts +6 -0
package/dist/cli/commands/mcp.js +188 -184
package/dist/cli/commands/proxy.js +537 -518
package/dist/core/baseProvider.d.ts +6 -1
package/dist/core/baseProvider.js +208 -230
package/dist/core/factory.d.ts +3 -0
package/dist/core/factory.js +138 -188
package/dist/evaluation/pipeline/evaluationPipeline.js +5 -2
package/dist/evaluation/scorers/scorerRegistry.d.ts +3 -0
package/dist/evaluation/scorers/scorerRegistry.js +353 -282
package/dist/lib/auth/anthropicOAuth.js +12 -0
package/dist/lib/core/baseProvider.d.ts +6 -1
package/dist/lib/core/baseProvider.js +208 -230
package/dist/lib/core/factory.d.ts +3 -0
package/dist/lib/core/factory.js +138 -188
package/dist/lib/evaluation/pipeline/evaluationPipeline.js +5 -2
package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +3 -0
package/dist/lib/evaluation/scorers/scorerRegistry.js +353 -282
package/dist/lib/mcp/toolRegistry.d.ts +2 -0
package/dist/lib/mcp/toolRegistry.js +32 -31
package/dist/lib/neurolink.d.ts +38 -0
package/dist/lib/neurolink.js +1858 -1689
package/dist/lib/providers/googleAiStudio.js +0 -5
package/dist/lib/providers/googleVertex.d.ts +10 -0
package/dist/lib/providers/googleVertex.js +436 -444
package/dist/lib/providers/litellm.d.ts +1 -0
package/dist/lib/providers/litellm.js +73 -64
package/dist/lib/providers/ollama.js +17 -4
package/dist/lib/providers/openAI.d.ts +2 -0
package/dist/lib/providers/openAI.js +139 -140
package/dist/lib/proxy/claudeFormat.js +12 -4
package/dist/lib/proxy/oauthFetch.js +298 -318
package/dist/lib/proxy/proxyConfig.js +3 -1
package/dist/lib/proxy/proxyFetch.js +250 -222
package/dist/lib/proxy/requestLogger.js +132 -45
package/dist/lib/proxy/sseInterceptor.js +36 -11
package/dist/lib/server/routes/claudeProxyRoutes.d.ts +10 -1
package/dist/lib/server/routes/claudeProxyRoutes.js +2726 -2272
package/dist/lib/services/server/ai/observability/instrumentation.js +194 -218
package/dist/lib/tasks/backends/bullmqBackend.js +24 -18
package/dist/lib/tasks/store/redisTaskStore.js +23 -16
package/dist/lib/tasks/taskManager.d.ts +2 -0
package/dist/lib/tasks/taskManager.js +100 -5
package/dist/lib/telemetry/telemetryService.js +9 -5
package/dist/lib/types/proxyTypes.d.ts +124 -1
package/dist/lib/utils/providerHealth.d.ts +1 -0
package/dist/lib/utils/providerHealth.js +46 -31
package/dist/lib/utils/providerUtils.js +11 -22
package/dist/mcp/toolRegistry.d.ts +2 -0
package/dist/mcp/toolRegistry.js +32 -31
package/dist/neurolink.d.ts +38 -0
package/dist/neurolink.js +1858 -1689
package/dist/providers/googleAiStudio.js +0 -5
package/dist/providers/googleVertex.d.ts +10 -0
package/dist/providers/googleVertex.js +436 -444
package/dist/providers/litellm.d.ts +1 -0
package/dist/providers/litellm.js +73 -64
package/dist/providers/ollama.js +17 -4
package/dist/providers/openAI.d.ts +2 -0
package/dist/providers/openAI.js +139 -140
package/dist/proxy/claudeFormat.js +12 -4
package/dist/proxy/oauthFetch.js +298 -318
package/dist/proxy/proxyConfig.js +3 -1
package/dist/proxy/proxyFetch.js +250 -222
package/dist/proxy/requestLogger.js +132 -45
package/dist/proxy/sseInterceptor.js +36 -11
package/dist/server/routes/claudeProxyRoutes.d.ts +10 -1
package/dist/server/routes/claudeProxyRoutes.js +2726 -2272
package/dist/services/server/ai/observability/instrumentation.js +194 -218
package/dist/tasks/backends/bullmqBackend.js +24 -18
package/dist/tasks/store/redisTaskStore.js +23 -16
package/dist/tasks/taskManager.d.ts +2 -0
package/dist/tasks/taskManager.js +100 -5
package/dist/telemetry/telemetryService.js +9 -5
package/dist/types/proxyTypes.d.ts +124 -1
package/dist/utils/providerHealth.d.ts +1 -0
package/dist/utils/providerHealth.js +46 -31
package/dist/utils/providerUtils.js +12 -22
package/package.json +3 -2
package/scripts/observability/check-proxy-telemetry.mjs +1 -1
package/scripts/observability/manage-local-openobserve.sh +36 -5

package/dist/lib/providers/googleVertex.js CHANGED Viewed

@@ -777,272 +777,76 @@ export class GoogleVertexProvider extends BaseProvider {
         this.validateStreamOptions(options);
     }
     async executeStream(options, analysisSchema) {
-        // Check if this is a Gemini 3 model with tools - use native SDK for thought_signature
-        const gemini3CheckModelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
-        // Structured output (analysisSchema, JSON format, or schema) is incompatible with tools on Gemini.
-        // Compute once and reuse in both the native Gemini 3 gate and the streamText fallback path.
+        const modelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
+        const nativeGemini3Result = await this.maybeExecuteNativeGemini3ToolStream(options, analysisSchema, modelName);
+        if (nativeGemini3Result) {
+            return nativeGemini3Result;
+        }
+        return this.executeAISDKStream(options, analysisSchema, modelName);
+    }
+    async maybeExecuteNativeGemini3ToolStream(options, analysisSchema, modelName) {
         const wantsStructuredOutput = analysisSchema || options.output?.format === "json" || options.schema;
-        // Check for tools from options AND from SDK (MCP tools)
-        // Need to check early if we should route to native SDK
-        const gemini3CheckShouldUseTools = !options.disableTools && this.supportsTools() && !wantsStructuredOutput;
+        const shouldUseTools = !options.disableTools && this.supportsTools() && !wantsStructuredOutput;
         const optionTools = options.tools || {};
-        const sdkTools = gemini3CheckShouldUseTools ? await this.getAllTools() : {};
+        const sdkTools = shouldUseTools ? await this.getAllTools() : {};
         const combinedToolCount = Object.keys(optionTools).length + Object.keys(sdkTools).length;
-        const hasTools = gemini3CheckShouldUseTools && combinedToolCount > 0;
-        if (isGemini3Model(gemini3CheckModelName) && hasTools) {
-            // Process CSV files before routing to native SDK (bypasses normal message builder)
-            const processedOptions = await this.processCSVFilesForNativeSDK(options);
-            // Merge SDK tools into options for native SDK path
-            const mergedOptions = {
-                ...processedOptions,
-                tools: { ...sdkTools, ...optionTools },
-            };
-            logger.info("[GoogleVertex] Routing Gemini 3 to native SDK for tool calling", {
-                model: gemini3CheckModelName,
-                optionToolCount: Object.keys(optionTools).length,
-                sdkToolCount: Object.keys(sdkTools).length,
-                totalToolCount: combinedToolCount,
-            });
-            return this.executeNativeGemini3Stream(mergedOptions);
+        const hasTools = shouldUseTools && combinedToolCount > 0;
+        if (!isGemini3Model(modelName) || !hasTools) {
+            return null;
         }
-        // Initialize stream execution tracking
+        const processedOptions = await this.processCSVFilesForNativeSDK(options);
+        const mergedOptions = {
+            ...processedOptions,
+            tools: { ...sdkTools, ...optionTools },
+        };
+        logger.info("[GoogleVertex] Routing Gemini 3 to native SDK for tool calling", {
+            model: modelName,
+            optionToolCount: Object.keys(optionTools).length,
+            sdkToolCount: Object.keys(sdkTools).length,
+            totalToolCount: combinedToolCount,
+        });
+        return this.executeNativeGemini3Stream(mergedOptions);
+    }
+    async executeAISDKStream(options, analysisSchema, modelName) {
         const functionTag = "GoogleVertexProvider.executeStream";
-        let chunkCount = 0;
-        // Setup timeout controller
-        const timeout = this.getTimeout(options);
-        const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
+        const tracking = {
+            chunkCount: 0,
+            collectedToolCalls: [],
+            collectedToolResults: [],
+        };
+        const timeoutController = createTimeoutController(this.getTimeout(options), this.providerName, "stream");
         try {
-            // Validate stream options
             this.validateStreamOptionsOnly(options);
-            // Build message array from options with multimodal support
-            // Using protected helper from BaseProvider to eliminate code duplication
             const messages = await this.buildMessagesForStream(options);
-            const model = await this.getAISDKModelWithMiddleware(options); // This is where network connection happens!
-            // Get all available tools (direct + MCP + external + user-provided RAG tools) for streaming
-            const shouldUseTools = !options.disableTools && this.supportsTools();
-            const baseStreamTools = shouldUseTools ? await this.getAllTools() : {};
-            const rawTools = shouldUseTools
-                ? { ...baseStreamTools, ...(options.tools || {}) }
-                : {};
-            // Only sanitize for Gemini models (not Anthropic/Claude models routed through Vertex)
-            const isAnthropic = isAnthropicModel(gemini3CheckModelName);
-            let tools;
-            if (Object.keys(rawTools).length > 0 && !isAnthropic) {
-                const sanitized = sanitizeToolsForGemini(rawTools);
-                if (sanitized.dropped.length > 0) {
-                    logger.warn(`[GoogleVertex] Dropped ${sanitized.dropped.length} incompatible tool(s): ${sanitized.dropped.join(", ")}`);
-                }
-                tools =
-                    Object.keys(sanitized.tools).length > 0 ? sanitized.tools : undefined;
-            }
-            else if (isAnthropic && Object.keys(rawTools).length > 0) {
-                // Anthropic models don't need Gemini sanitization — pass tools through
-                tools = rawTools;
-            }
-            else {
-                tools = undefined;
-            }
-            logger.debug(`${functionTag}: Tools for streaming`, {
+            const model = await this.getAISDKModelWithMiddleware(options);
+            const { shouldUseTools, tools, isAnthropic } = await this.resolveAISDKStreamTools(options, modelName, functionTag);
+            const streamOptions = this.buildAISDKStreamOptions({
+                options,
+                analysisSchema,
+                functionTag,
+                modelName,
+                model,
+                messages,
+                tools,
                 shouldUseTools,
-                baseToolCount: Object.keys(baseStreamTools).length,
-                externalToolCount: Object.keys(options.tools || {}).length,
-                toolCount: Object.keys(tools ?? {}).length,
-                toolNames: Object.keys(tools ?? {}),
-            });
-            // Model-specific maxTokens handling
-            const modelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
-            // Use cached model configuration to determine maxTokens handling for streaming performance
-            // This avoids hardcoded model-specific logic and repeated config lookups
-            const shouldSetMaxTokens = this.shouldSetMaxTokensCached(modelName);
-            const maxTokens = shouldSetMaxTokens
-                ? options.maxTokens // No default limit
-                : undefined;
-            const collectedToolCalls = [];
-            const collectedToolResults = [];
-            // Build complete stream options with proper typing
-            let streamOptions = {
-                model: model,
-                messages: messages,
-                temperature: options.temperature,
-                ...(maxTokens && { maxTokens }),
-                maxRetries: 0, // NL11: Disable AI SDK's invisible internal retries; we handle retries with OTel instrumentation
-                ...(shouldUseTools &&
-                    tools &&
-                    Object.keys(tools).length > 0 && {
-                    tools,
-                    toolChoice: resolveToolChoice(options, tools, shouldUseTools),
-                    stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
-                }),
-                abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
-                experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
-                // Gemini 3: use thinkingLevel via providerOptions (Vertex AI)
-                // Gemini 2.5: use thinkingBudget via providerOptions
-                ...(options.thinkingConfig?.enabled && {
-                    providerOptions: {
-                        vertex: {
-                            thinkingConfig: {
-                                ...(options.thinkingConfig.thinkingLevel && {
-                                    thinkingLevel: options.thinkingConfig.thinkingLevel,
-                                }),
-                                ...(options.thinkingConfig.budgetTokens &&
-                                    !options.thinkingConfig.thinkingLevel && {
-                                    thinkingBudget: options.thinkingConfig.budgetTokens,
-                                }),
-                                includeThoughts: true,
-                            },
-                        },
-                    },
-                }),
-                onError: (event) => {
-                    const error = event.error;
-                    const errorMessage = error instanceof Error ? error.message : String(error);
-                    logger.error(`${functionTag}: Stream error`, {
-                        provider: this.providerName,
-                        modelName: this.modelName,
-                        error: errorMessage,
-                        chunkCount,
-                    });
-                },
-                onFinish: (event) => {
-                    logger.debug(`${functionTag}: Stream finished`, {
-                        finishReason: event.finishReason,
-                        totalChunks: chunkCount,
-                    });
-                },
-                onChunk: () => {
-                    chunkCount++;
-                },
-                onStepFinish: ({ toolCalls, toolResults }) => {
-                    logger.info("Tool execution completed", { toolResults, toolCalls });
-                    for (const toolCall of toolCalls) {
-                        collectedToolCalls.push({
-                            toolCallId: toolCall.toolCallId,
-                            toolName: toolCall.toolName,
-                            args: toolCall.args ??
-                                toolCall.input ??
-                                toolCall
-                                    .parameters ??
-                                {},
-                        });
-                    }
-                    for (const toolResult of toolResults) {
-                        const rawToolResult = toolResult;
-                        collectedToolResults.push({
-                            toolName: toolResult.toolName,
-                            status: rawToolResult.error ? "failure" : "success",
-                            output: (rawToolResult.output ??
-                                rawToolResult.result) ?? undefined,
-                            error: rawToolResult.error,
-                            id: rawToolResult.toolCallId ?? toolResult.toolName,
-                        });
-                    }
-                    // Handle tool execution storage
-                    this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
-                        logger.warn("[GoogleVertexProvider] Failed to store tool executions", {
-                            provider: this.providerName,
-                            error: error instanceof Error ? error.message : String(error),
-                        });
-                    });
-                },
-            };
-            if (analysisSchema) {
-                try {
-                    // Gemini cannot use tools and JSON schema simultaneously
-                    if (!isAnthropic) {
-                        delete streamOptions.tools;
-                        delete streamOptions.toolChoice;
-                        delete streamOptions.stopWhen;
-                    }
-                    streamOptions = {
-                        ...streamOptions,
-                        experimental_output: Output.object({
-                            schema: analysisSchema,
-                        }),
-                    };
-                }
-                catch (error) {
-                    logger.warn("Schema application failed, continuing without schema", {
-                        error: String(error),
-                    });
-                }
-            }
-            // Wrap streamText in an OTel span to capture provider-level latency and token usage
-            const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
-                kind: SpanKind.CLIENT,
-                attributes: {
-                    "gen_ai.system": "vertex",
-                    "gen_ai.request.model": getModelId(model, this.modelName || "unknown"),
-                },
+                isAnthropic,
+                timeoutController,
+                tracking,
             });
-            let result;
-            try {
-                result = streamText(streamOptions);
-            }
-            catch (err) {
-                streamSpan.recordException(err instanceof Error ? err : new Error(String(err)));
-                streamSpan.setStatus({
-                    code: SpanStatusCode.ERROR,
-                    message: err instanceof Error ? err.message : String(err),
-                });
-                streamSpan.end();
-                throw err;
-            }
-            // Collect token usage and finish reason asynchronously when the stream completes,
-            // then end the span. This avoids blocking the stream consumer.
-            Promise.resolve(result.usage)
-                .then((usage) => {
-                streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.inputTokens || 0);
-                streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.outputTokens || 0);
-                const effectiveModel = options.model ||
-                    getModelId(model, this.modelName || getDefaultVertexModel());
-                const cost = calculateCost(this.providerName, effectiveModel, {
-                    input: usage.inputTokens || 0,
-                    output: usage.outputTokens || 0,
-                    total: (usage.inputTokens || 0) + (usage.outputTokens || 0),
-                });
-                if (cost && cost > 0) {
-                    streamSpan.setAttribute("neurolink.cost", cost);
-                }
-            })
-                .catch(() => {
-                // Usage may not be available if the stream is aborted
-            });
-            Promise.resolve(result.finishReason)
-                .then((reason) => {
-                streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
-            })
-                .catch(() => {
-                // Finish reason may not be available if the stream is aborted
-            });
-            Promise.resolve(result.text)
-                .then(() => {
-                streamSpan.end();
-            })
-                .catch((err) => {
-                streamSpan.setStatus({
-                    code: SpanStatusCode.ERROR,
-                    message: err instanceof Error ? err.message : String(err),
-                });
-                streamSpan.end();
+            const result = this.startObservedAISDKStream(streamOptions, model, modelName, options);
+            this.observeAISDKStreamResult(result, {
+                model,
+                modelName,
+                options,
+                timeoutController,
             });
-            // Defer timeout cleanup until the stream completes or errors.
-            // Guard against NoOutputGeneratedError becoming an unhandled rejection.
-            Promise.resolve(result.text)
-                .catch((err) => {
-                logger.debug("Stream text promise rejected (expected for empty streams)", {
-                    error: err instanceof Error ? err.message : String(err),
-                });
-            })
-                .finally(() => timeoutController?.cleanup());
-            // Transform string stream to content object stream using BaseProvider method
-            const transformedStream = this.createTextStream(result);
             return {
-                stream: transformedStream,
+                stream: this.createTextStream(result),
                 provider: this.providerName,
                 model: this.modelName,
                 ...(shouldUseTools && {
-                    toolCalls: collectedToolCalls,
-                    toolResults: collectedToolResults,
+                    toolCalls: tracking.collectedToolCalls,
+                    toolResults: tracking.collectedToolResults,
                 }),
             };
         }
@@ -1052,11 +856,222 @@ export class GoogleVertexProvider extends BaseProvider {
                 provider: this.providerName,
                 modelName: this.modelName,
                 error: String(error),
-                chunkCount,
+                chunkCount: tracking.chunkCount,
             });
             throw this.handleProviderError(error);
         }
     }
+    async resolveAISDKStreamTools(options, modelName, functionTag) {
+        const shouldUseTools = !options.disableTools && this.supportsTools();
+        const baseStreamTools = shouldUseTools ? await this.getAllTools() : {};
+        const rawTools = shouldUseTools
+            ? { ...baseStreamTools, ...(options.tools || {}) }
+            : {};
+        const isAnthropic = isAnthropicModel(modelName);
+        let tools;
+        if (Object.keys(rawTools).length > 0 && !isAnthropic) {
+            const sanitized = sanitizeToolsForGemini(rawTools);
+            if (sanitized.dropped.length > 0) {
+                logger.warn(`[GoogleVertex] Dropped ${sanitized.dropped.length} incompatible tool(s): ${sanitized.dropped.join(", ")}`);
+            }
+            tools =
+                Object.keys(sanitized.tools).length > 0 ? sanitized.tools : undefined;
+        }
+        else if (isAnthropic && Object.keys(rawTools).length > 0) {
+            tools = rawTools;
+        }
+        else {
+            tools = undefined;
+        }
+        logger.debug(`${functionTag}: Tools for streaming`, {
+            shouldUseTools,
+            baseToolCount: Object.keys(baseStreamTools).length,
+            externalToolCount: Object.keys(options.tools || {}).length,
+            toolCount: Object.keys(tools ?? {}).length,
+            toolNames: Object.keys(tools ?? {}),
+        });
+        return {
+            shouldUseTools,
+            tools,
+            isAnthropic,
+            baseToolCount: Object.keys(baseStreamTools).length,
+        };
+    }
+    buildAISDKStreamOptions(params) {
+        const { options, analysisSchema, functionTag, modelName, model, messages, tools, shouldUseTools, isAnthropic, timeoutController, tracking, } = params;
+        const shouldSetMaxTokens = this.shouldSetMaxTokensCached(modelName);
+        const maxTokens = shouldSetMaxTokens ? options.maxTokens : undefined;
+        let streamOptions = {
+            model,
+            messages,
+            temperature: options.temperature,
+            ...(maxTokens && { maxTokens }),
+            maxRetries: 0,
+            ...(shouldUseTools &&
+                tools &&
+                Object.keys(tools).length > 0 && {
+                tools,
+                toolChoice: resolveToolChoice(options, tools, shouldUseTools),
+                stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
+            }),
+            abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
+            experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
+            ...(options.thinkingConfig?.enabled && {
+                providerOptions: {
+                    vertex: {
+                        thinkingConfig: {
+                            ...(options.thinkingConfig.thinkingLevel && {
+                                thinkingLevel: options.thinkingConfig.thinkingLevel,
+                            }),
+                            ...(options.thinkingConfig.budgetTokens &&
+                                !options.thinkingConfig.thinkingLevel && {
+                                thinkingBudget: options.thinkingConfig.budgetTokens,
+                            }),
+                            includeThoughts: true,
+                        },
+                    },
+                },
+            }),
+            onError: (event) => {
+                const errorMessage = event.error instanceof Error
+                    ? event.error.message
+                    : String(event.error);
+                logger.error(`${functionTag}: Stream error`, {
+                    provider: this.providerName,
+                    modelName: this.modelName,
+                    error: errorMessage,
+                    chunkCount: tracking.chunkCount,
+                });
+            },
+            onFinish: (event) => {
+                logger.debug(`${functionTag}: Stream finished`, {
+                    finishReason: event.finishReason,
+                    totalChunks: tracking.chunkCount,
+                });
+            },
+            onChunk: () => {
+                tracking.chunkCount++;
+            },
+            onStepFinish: ({ toolCalls, toolResults }) => {
+                this.captureAISDKStreamToolStep(options, toolCalls, toolResults, tracking);
+            },
+        };
+        if (!analysisSchema) {
+            return streamOptions;
+        }
+        try {
+            if (!isAnthropic) {
+                delete streamOptions.tools;
+                delete streamOptions.toolChoice;
+                delete streamOptions.stopWhen;
+            }
+            streamOptions = {
+                ...streamOptions,
+                experimental_output: Output.object({ schema: analysisSchema }),
+            };
+        }
+        catch (error) {
+            logger.warn("Schema application failed, continuing without schema", {
+                error: String(error),
+            });
+        }
+        return streamOptions;
+    }
+    captureAISDKStreamToolStep(options, toolCalls, toolResults, tracking) {
+        logger.info("Tool execution completed", { toolResults, toolCalls });
+        for (const toolCall of toolCalls) {
+            tracking.collectedToolCalls.push({
+                toolCallId: toolCall.toolCallId,
+                toolName: toolCall.toolName,
+                args: toolCall.args ?? toolCall.input ?? toolCall.parameters ?? {},
+            });
+        }
+        for (const toolResult of toolResults) {
+            tracking.collectedToolResults.push({
+                toolName: toolResult.toolName,
+                status: toolResult.error ? "failure" : "success",
+                output: (toolResult.output ?? toolResult.result) ??
+                    undefined,
+                error: toolResult.error,
+                id: toolResult.toolCallId ?? toolResult.toolName,
+            });
+        }
+        this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
+            logger.warn("[GoogleVertexProvider] Failed to store tool executions", {
+                provider: this.providerName,
+                error: error instanceof Error ? error.message : String(error),
+            });
+        });
+    }
+    startObservedAISDKStream(streamOptions, model, modelName, options) {
+        const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
+            kind: SpanKind.CLIENT,
+            attributes: {
+                "gen_ai.system": "vertex",
+                "gen_ai.request.model": getModelId(model, this.modelName || "unknown"),
+            },
+        });
+        try {
+            const result = streamText(streamOptions);
+            this.attachAISDKStreamObservers(result, streamSpan, model, modelName, options);
+            return result;
+        }
+        catch (error) {
+            streamSpan.recordException(error instanceof Error ? error : new Error(String(error)));
+            streamSpan.setStatus({
+                code: SpanStatusCode.ERROR,
+                message: error instanceof Error ? error.message : String(error),
+            });
+            streamSpan.end();
+            throw error;
+        }
+    }
+    attachAISDKStreamObservers(result, streamSpan, model, modelName, options) {
+        Promise.resolve(result.usage)
+            .then((usage) => {
+            streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.inputTokens || 0);
+            streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.outputTokens || 0);
+            const effectiveModel = options.model ||
+                getModelId(model, modelName || getDefaultVertexModel());
+            const cost = calculateCost(this.providerName, effectiveModel, {
+                input: usage.inputTokens || 0,
+                output: usage.outputTokens || 0,
+                total: (usage.inputTokens || 0) + (usage.outputTokens || 0),
+            });
+            if (cost && cost > 0) {
+                streamSpan.setAttribute("neurolink.cost", cost);
+            }
+        })
+            .catch(() => undefined);
+        Promise.resolve(result.finishReason)
+            .then((reason) => {
+            streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
+        })
+            .catch(() => undefined);
+        Promise.resolve(result.text)
+            .then(() => {
+            streamSpan.end();
+        })
+            .catch((error) => {
+            streamSpan.setStatus({
+                code: SpanStatusCode.ERROR,
+                message: error instanceof Error ? error.message : String(error),
+            });
+            streamSpan.end();
+        });
+    }
+    observeAISDKStreamResult(result, params) {
+        void params.model;
+        void params.modelName;
+        void params.options;
+        Promise.resolve(result.text)
+            .catch((error) => {
+            logger.debug("Stream text promise rejected (expected for empty streams)", {
+                error: error instanceof Error ? error.message : String(error),
+            });
+        })
+            .finally(() => params.timeoutController?.cleanup());
+    }
     /**
      * Create @google/genai client configured for Vertex AI
      */
@@ -1211,210 +1226,187 @@ export class GoogleVertexProvider extends BaseProvider {
                 [ATTR.GEN_AI_OPERATION]: "stream",
                 [ATTR.NL_PROVIDER]: this.providerName,
             },
-        }, async (span) => {
-            const client = await this.createVertexGenAIClient(options.region);
-            const effectiveLocation = options.region || this.location || getVertexLocation();
-            logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3", {
-                model: modelName,
-                hasTools: !!options.tools && Object.keys(options.tools).length > 0,
-                project: this.projectId,
-                location: effectiveLocation,
+        }, (span) => this.executeNativeGemini3StreamWithSpan(options, modelName, span));
+    }
+    async executeNativeGemini3StreamWithSpan(options, modelName, span) {
+        const client = await this.createVertexGenAIClient(options.region);
+        const effectiveLocation = options.region || this.location || getVertexLocation();
+        logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3", {
+            model: modelName,
+            hasTools: !!options.tools && Object.keys(options.tools).length > 0,
+            project: this.projectId,
+            location: effectiveLocation,
+        });
+        const multimodalInput = options.input;
+        const contents = this.buildNativeContentParts(options.input.text, multimodalInput, "native stream");
+        let hasToolsInput = !!options.tools &&
+            Object.keys(options.tools).length > 0 &&
+            !options.disableTools;
+        const streamOptions = options;
+        const wantsJsonOutput = streamOptions.output?.format === "json" || streamOptions.schema;
+        if (wantsJsonOutput && hasToolsInput) {
+            logger.warn("[GoogleVertex] Gemini does not support tools and JSON schema output simultaneously. Disabling tools for this request.");
+            hasToolsInput = false;
+        }
+        let toolsConfig;
+        let executeMap = new Map();
+        if (hasToolsInput) {
+            const toolDeclarationResult = buildNativeToolDeclarations(options.tools);
+            toolsConfig = toolDeclarationResult.toolsConfig;
+            executeMap = toolDeclarationResult.executeMap;
+            logger.debug("[GoogleVertex] Converted tools for native SDK", {
+                toolCount: toolsConfig[0].functionDeclarations.length,
+                toolNames: toolsConfig[0].functionDeclarations.map((tool) => tool.name),
             });
-            // Build contents from input with multimodal support
-            const multimodalInput = options.input;
-            const contents = this.buildNativeContentParts(options.input.text, multimodalInput, "native stream");
-            // Convert tools to native format
-            let hasToolsInput = options.tools &&
-                Object.keys(options.tools).length > 0 &&
-                !options.disableTools;
-            // Guard: Gemini cannot use tools + JSON schema simultaneously
-            const streamOptions = options;
-            const wantsJsonOutput = streamOptions.output?.format === "json" || streamOptions.schema;
-            if (wantsJsonOutput && hasToolsInput) {
-                logger.warn("[GoogleVertex] Gemini does not support tools and JSON schema output simultaneously. Disabling tools for this request.");
-                hasToolsInput = false;
-            }
-            let toolsConfig;
-            let executeMap = new Map();
-            if (hasToolsInput) {
-                const result = buildNativeToolDeclarations(options.tools);
-                toolsConfig = result.toolsConfig;
-                executeMap = result.executeMap;
-                logger.debug("[GoogleVertex] Converted tools for native SDK", {
-                    toolCount: toolsConfig[0].functionDeclarations.length,
-                    toolNames: toolsConfig[0].functionDeclarations.map((t) => t.name),
+        }
+        const config = buildNativeConfig(options, toolsConfig);
+        if (wantsJsonOutput) {
+            config.responseMimeType = "application/json";
+            if (streamOptions.schema) {
+                const rawSchema = convertZodToJsonSchema(streamOptions.schema);
+                const inlinedSchema = inlineJsonSchema(rawSchema);
+                if (inlinedSchema.$schema) {
+                    delete inlinedSchema.$schema;
+                }
+                config.responseSchema = inlinedSchema;
+                logger.debug("[GoogleVertex] Added responseSchema for JSON output (stream)", {
+                    schemaKeys: Object.keys(inlinedSchema),
                 });
             }
-            // Build config — systemInstruction stays in config for Gemini 3.x.
-            // The @google/genai SDK maps config.systemInstruction to the HTTP-level
-            // system_instruction field, which is the correct mechanism for all
-            // Gemini 3.x models (including global endpoint).  Older workaround
-            // that moved systemInstruction into user/model content messages caused
-            // "Please use a valid role: user, model" on Gemini 3.1+ preview models.
-            const config = buildNativeConfig(options, toolsConfig);
-            // Add JSON output format support for native SDK stream
-            if (streamOptions.output?.format === "json" || streamOptions.schema) {
-                config.responseMimeType = "application/json";
-                if (streamOptions.schema) {
-                    const rawSchema = convertZodToJsonSchema(streamOptions.schema);
-                    const inlinedSchema = inlineJsonSchema(rawSchema);
-                    if (inlinedSchema.$schema) {
-                        delete inlinedSchema.$schema;
-                    }
-                    config.responseSchema = inlinedSchema;
-                    logger.debug("[GoogleVertex] Added responseSchema for JSON output (stream)", {
-                        schemaKeys: Object.keys(inlinedSchema),
-                    });
+        }
+        const startTime = Date.now();
+        const timeoutController = createTimeoutController(this.getTimeout(options), this.providerName, "stream");
+        const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
+        const maxSteps = computeMaxStepsShared(options.maxSteps);
+        const currentContents = this.prependConversationHistory([...contents], options.conversationMessages);
+        const channel = createTextChannel();
+        const allToolCalls = [];
+        const metadata = {
+            streamId: `native-vertex-${Date.now()}`,
+            startTime,
+            responseTime: 0,
+            totalToolExecutions: 0,
+        };
+        let analyticsResolve;
+        let analyticsReject;
+        const analyticsPromise = new Promise((resolve, reject) => {
+            analyticsResolve = resolve;
+            analyticsReject = reject;
+        });
+        const loopPromise = this.runNativeGemini3StreamLoop({
+            client,
+            modelName,
+            span,
+            config,
+            currentContents,
+            executeMap,
+            channel,
+            allToolCalls,
+            metadata,
+            analyticsResolve,
+            analyticsReject,
+            startTime,
+            timeoutController,
+            composedSignal,
+            maxSteps,
+        });
+        loopPromise.catch(() => undefined);
+        return {
+            stream: channel.iterable,
+            provider: this.providerName,
+            model: modelName,
+            toolCalls: allToolCalls,
+            analytics: analyticsPromise,
+            metadata,
+        };
+    }
+    async runNativeGemini3StreamLoop(params) {
+        let lastStepText = "";
+        let totalInputTokens = 0;
+        let totalOutputTokens = 0;
+        let step = 0;
+        let completedWithFinalAnswer = false;
+        const failedTools = new Map();
+        try {
+            while (step < params.maxSteps) {
+                if (params.composedSignal?.aborted) {
+                    throw params.composedSignal.reason instanceof Error
+                        ? params.composedSignal.reason
+                        : new Error("Request aborted");
                 }
-            }
-            const startTime = Date.now();
-            const timeout = this.getTimeout(options);
-            const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
-            const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
-            const maxSteps = computeMaxStepsShared(options.maxSteps);
-            // Inject conversation history so the native path has multi-turn context
-            const currentContents = this.prependConversationHistory([...contents], options.conversationMessages);
-            // Create a push-based text channel so the caller receives tokens as
-            // they arrive from the network rather than after full buffering.
-            const channel = createTextChannel();
-            // Shared mutable state updated by the background agentic loop.
-            const allToolCalls = [];
-            // Shared metadata object mutated by the background loop so that
-            // responseTime and totalToolExecutions reflect final values.
-            const metadata = {
-                streamId: `native-vertex-${Date.now()}`,
-                startTime,
-                responseTime: 0,
-                totalToolExecutions: 0,
-            };
-            // analyticsResolvers lets the background loop settle the analytics
-            // promise once token counts are known (after the loop completes).
-            let analyticsResolve;
-            let analyticsReject;
-            const analyticsPromise = new Promise((res, rej) => {
-                analyticsResolve = res;
-                analyticsReject = rej;
-            });
-            // Run the agentic loop in the background without awaiting it here,
-            // so we can return the StreamResult (with channel.iterable) immediately.
-            const loopPromise = (async () => {
-                let lastStepText = "";
-                let totalInputTokens = 0;
-                let totalOutputTokens = 0;
-                let step = 0;
-                let completedWithFinalAnswer = false;
-                const failedTools = new Map();
+                step++;
+                logger.debug(`[GoogleVertex] Native SDK step ${step}/${params.maxSteps}`);
                 try {
-                    // Agentic loop for tool calling
-                    while (step < maxSteps) {
-                        if (composedSignal?.aborted) {
-                            throw composedSignal.reason instanceof Error
-                                ? composedSignal.reason
-                                : new Error("Request aborted");
-                        }
-                        step++;
-                        logger.debug(`[GoogleVertex] Native SDK step ${step}/${maxSteps}`);
-                        try {
-                            const rawStream = await client.models.generateContentStream({
-                                model: modelName,
-                                contents: currentContents,
-                                config,
-                                ...(composedSignal
-                                    ? { httpOptions: { signal: composedSignal } }
-                                    : {}),
-                            });
-                            // For every step, use incremental collection so text parts
-                            // are pushed to the channel as they arrive.  For intermediate
-                            // steps (those that produce function calls) we still need the
-                            // complete rawResponseParts for pushModelResponseToHistory,
-                            // which collectStreamChunksIncremental provides at stream end.
-                            const chunkResult = await collectStreamChunksIncremental(rawStream, channel);
-                            totalInputTokens += chunkResult.inputTokens;
-                            totalOutputTokens += chunkResult.outputTokens;
-                            const stepText = extractTextFromParts(chunkResult.rawResponseParts);
-                            // If no function calls, this was the final step — channel
-                            // already received all text parts incrementally.
-                            if (chunkResult.stepFunctionCalls.length === 0) {
-                                completedWithFinalAnswer = true;
-                                break;
-                            }
-                            lastStepText = stepText;
-                            // Record tool call events on the span
-                            for (const fc of chunkResult.stepFunctionCalls) {
-                                span.addEvent("gen_ai.tool_call", {
-                                    "tool.name": fc.name,
-                                    "tool.step": step,
-                                });
-                            }
-                            logger.debug(`[GoogleVertex] Executing ${chunkResult.stepFunctionCalls.length} function calls`);
-                            pushModelResponseToHistory(currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
-                            const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, executeMap, failedTools, allToolCalls, { abortSignal: composedSignal });
-                            // Function/tool responses must use role: "user" — the
-                            // @google/genai SDK's validateHistory() only accepts "user"
-                            // and "model" roles (matching automaticFunctionCalling).
-                            currentContents.push({
-                                role: "user",
-                                parts: functionResponses,
-                            });
-                        }
-                        catch (error) {
-                            logger.error("[GoogleVertex] Native SDK error", error);
-                            throw this.handleProviderError(error);
-                        }
+                    const rawStream = await params.client.models.generateContentStream({
+                        model: params.modelName,
+                        contents: params.currentContents,
+                        config: params.config,
+                        ...(params.composedSignal
+                            ? { httpOptions: { signal: params.composedSignal } }
+                            : {}),
+                    });
+                    const chunkResult = await collectStreamChunksIncremental(rawStream, params.channel);
+                    totalInputTokens += chunkResult.inputTokens;
+                    totalOutputTokens += chunkResult.outputTokens;
+                    const stepText = extractTextFromParts(chunkResult.rawResponseParts);
+                    if (chunkResult.stepFunctionCalls.length === 0) {
+                        completedWithFinalAnswer = true;
+                        break;
                     }
-                    // Handle max-steps termination: if the model was still calling
-                    // tools when we hit the limit, push a synthetic final message.
-                    if (step >= maxSteps && !completedWithFinalAnswer) {
-                        const fallback = handleMaxStepsTermination("[GoogleVertex]", step, maxSteps, "", // finalText is empty — model didn't stop on its own
-                        lastStepText);
-                        if (fallback) {
-                            channel.push(fallback);
-                        }
+                    lastStepText = stepText;
+                    for (const functionCall of chunkResult.stepFunctionCalls) {
+                        params.span.addEvent("gen_ai.tool_call", {
+                            "tool.name": functionCall.name,
+                            "tool.step": step,
+                        });
                     }
-                    const responseTime = Date.now() - startTime;
-                    // Propagate final values to the shared metadata object so that
-                    // the already-returned StreamResult reflects accurate telemetry.
-                    metadata.responseTime = responseTime;
-                    metadata.totalToolExecutions = allToolCalls.length;
-                    // Set token usage and finish reason on the span
-                    span.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, totalInputTokens);
-                    span.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, totalOutputTokens);
-                    span.setAttribute(ATTR.GEN_AI_FINISH_REASON, step >= maxSteps && !completedWithFinalAnswer
-                        ? "max_steps"
-                        : "stop");
-                    analyticsResolve({
-                        provider: this.providerName,
-                        model: modelName,
-                        tokenUsage: {
-                            input: totalInputTokens,
-                            output: totalOutputTokens,
-                            total: totalInputTokens + totalOutputTokens,
-                        },
-                        requestDuration: responseTime,
-                        timestamp: new Date().toISOString(),
+                    logger.debug(`[GoogleVertex] Executing ${chunkResult.stepFunctionCalls.length} function calls`);
+                    pushModelResponseToHistory(params.currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
+                    const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, params.executeMap, failedTools, params.allToolCalls, { abortSignal: params.composedSignal });
+                    params.currentContents.push({
+                        role: "user",
+                        parts: functionResponses,
                     });
-                    channel.close();
                 }
-                catch (err) {
-                    channel.error(err);
-                    analyticsReject(err);
+                catch (error) {
+                    logger.error("[GoogleVertex] Native SDK error", error);
+                    throw this.handleProviderError(error);
                 }
-                finally {
-                    timeoutController?.cleanup();
+            }
+            if (step >= params.maxSteps && !completedWithFinalAnswer) {
+                const fallback = handleMaxStepsTermination("[GoogleVertex]", step, params.maxSteps, "", lastStepText);
+                if (fallback) {
+                    params.channel.push(fallback);
                 }
-            })();
-            // Suppress unhandled-rejection warnings on loopPromise — errors are
-            // forwarded to the channel and will surface when the caller iterates.
-            loopPromise.catch(() => undefined);
-            return {
-                stream: channel.iterable,
+            }
+            const responseTime = Date.now() - params.startTime;
+            params.metadata.responseTime = responseTime;
+            params.metadata.totalToolExecutions = params.allToolCalls.length;
+            params.span.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, totalInputTokens);
+            params.span.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, totalOutputTokens);
+            params.span.setAttribute(ATTR.GEN_AI_FINISH_REASON, step >= params.maxSteps && !completedWithFinalAnswer
+                ? "max_steps"
+                : "stop");
+            params.analyticsResolve({
                 provider: this.providerName,
-                model: modelName,
-                toolCalls: allToolCalls,
-                analytics: analyticsPromise,
-                metadata,
-            };
-        });
+                model: params.modelName,
+                tokenUsage: {
+                    input: totalInputTokens,
+                    output: totalOutputTokens,
+                    total: totalInputTokens + totalOutputTokens,
+                },
+                requestDuration: responseTime,
+                timestamp: new Date().toISOString(),
+            });
+            params.channel.close();
+        }
+        catch (error) {
+            params.channel.error(error);
+            params.analyticsReject(error);
+        }
+        finally {
+            params.timeoutController?.cleanup();
+        }
     }
     /**
      * Execute generate using native @google/genai SDK for Gemini 3 models on Vertex AI