@juspay/neurolink 9.41.0 → 9.42.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/README.md +7 -1
  3. package/dist/auth/anthropicOAuth.d.ts +18 -3
  4. package/dist/auth/anthropicOAuth.js +149 -4
  5. package/dist/auth/providers/firebase.js +5 -1
  6. package/dist/auth/providers/jwt.js +5 -1
  7. package/dist/auth/providers/workos.js +5 -1
  8. package/dist/auth/sessionManager.d.ts +1 -1
  9. package/dist/auth/sessionManager.js +58 -27
  10. package/dist/browser/neurolink.min.js +354 -334
  11. package/dist/cli/commands/mcp.d.ts +6 -0
  12. package/dist/cli/commands/mcp.js +188 -181
  13. package/dist/cli/commands/proxy.d.ts +2 -1
  14. package/dist/cli/commands/proxy.js +713 -431
  15. package/dist/cli/commands/task.js +3 -0
  16. package/dist/cli/factories/commandFactory.d.ts +2 -0
  17. package/dist/cli/factories/commandFactory.js +38 -0
  18. package/dist/cli/parser.js +4 -3
  19. package/dist/client/aiSdkAdapter.js +3 -0
  20. package/dist/client/streamingClient.js +30 -10
  21. package/dist/core/baseProvider.d.ts +6 -1
  22. package/dist/core/baseProvider.js +208 -230
  23. package/dist/core/factory.d.ts +3 -0
  24. package/dist/core/factory.js +138 -188
  25. package/dist/core/modules/GenerationHandler.js +3 -2
  26. package/dist/core/redisConversationMemoryManager.js +7 -3
  27. package/dist/evaluation/BatchEvaluator.js +4 -1
  28. package/dist/evaluation/hooks/observabilityHooks.js +5 -3
  29. package/dist/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
  30. package/dist/evaluation/pipeline/evaluationPipeline.js +24 -9
  31. package/dist/evaluation/pipeline/strategies/batchStrategy.js +6 -3
  32. package/dist/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
  33. package/dist/evaluation/scorers/scorerRegistry.d.ts +3 -0
  34. package/dist/evaluation/scorers/scorerRegistry.js +353 -282
  35. package/dist/lib/auth/anthropicOAuth.d.ts +18 -3
  36. package/dist/lib/auth/anthropicOAuth.js +149 -4
  37. package/dist/lib/auth/providers/firebase.js +5 -1
  38. package/dist/lib/auth/providers/jwt.js +5 -1
  39. package/dist/lib/auth/providers/workos.js +5 -1
  40. package/dist/lib/auth/sessionManager.d.ts +1 -1
  41. package/dist/lib/auth/sessionManager.js +58 -27
  42. package/dist/lib/client/aiSdkAdapter.js +3 -0
  43. package/dist/lib/client/streamingClient.js +30 -10
  44. package/dist/lib/core/baseProvider.d.ts +6 -1
  45. package/dist/lib/core/baseProvider.js +208 -230
  46. package/dist/lib/core/factory.d.ts +3 -0
  47. package/dist/lib/core/factory.js +138 -188
  48. package/dist/lib/core/modules/GenerationHandler.js +3 -2
  49. package/dist/lib/core/redisConversationMemoryManager.js +7 -3
  50. package/dist/lib/evaluation/BatchEvaluator.js +4 -1
  51. package/dist/lib/evaluation/hooks/observabilityHooks.js +5 -3
  52. package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +3 -2
  53. package/dist/lib/evaluation/pipeline/evaluationPipeline.js +24 -9
  54. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +6 -3
  55. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +18 -10
  56. package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +3 -0
  57. package/dist/lib/evaluation/scorers/scorerRegistry.js +353 -282
  58. package/dist/lib/mcp/toolRegistry.d.ts +2 -0
  59. package/dist/lib/mcp/toolRegistry.js +32 -31
  60. package/dist/lib/neurolink.d.ts +41 -2
  61. package/dist/lib/neurolink.js +1616 -1681
  62. package/dist/lib/observability/otelBridge.d.ts +2 -2
  63. package/dist/lib/observability/otelBridge.js +12 -3
  64. package/dist/lib/providers/amazonBedrock.js +2 -4
  65. package/dist/lib/providers/anthropic.d.ts +9 -5
  66. package/dist/lib/providers/anthropic.js +19 -14
  67. package/dist/lib/providers/anthropicBaseProvider.d.ts +3 -3
  68. package/dist/lib/providers/anthropicBaseProvider.js +5 -4
  69. package/dist/lib/providers/azureOpenai.d.ts +1 -1
  70. package/dist/lib/providers/azureOpenai.js +5 -4
  71. package/dist/lib/providers/googleAiStudio.js +30 -6
  72. package/dist/lib/providers/googleVertex.d.ts +10 -0
  73. package/dist/lib/providers/googleVertex.js +437 -423
  74. package/dist/lib/providers/huggingFace.d.ts +3 -3
  75. package/dist/lib/providers/huggingFace.js +6 -8
  76. package/dist/lib/providers/litellm.d.ts +1 -0
  77. package/dist/lib/providers/litellm.js +76 -55
  78. package/dist/lib/providers/mistral.js +2 -1
  79. package/dist/lib/providers/ollama.js +93 -23
  80. package/dist/lib/providers/openAI.d.ts +2 -0
  81. package/dist/lib/providers/openAI.js +141 -141
  82. package/dist/lib/providers/openRouter.js +2 -1
  83. package/dist/lib/providers/openaiCompatible.d.ts +4 -4
  84. package/dist/lib/providers/openaiCompatible.js +4 -4
  85. package/dist/lib/proxy/claudeFormat.d.ts +3 -2
  86. package/dist/lib/proxy/claudeFormat.js +27 -14
  87. package/dist/lib/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
  88. package/dist/lib/proxy/cloaking/plugins/sessionIdentity.js +9 -33
  89. package/dist/lib/proxy/modelRouter.js +3 -0
  90. package/dist/lib/proxy/oauthFetch.d.ts +1 -1
  91. package/dist/lib/proxy/oauthFetch.js +289 -316
  92. package/dist/lib/proxy/proxyConfig.js +46 -24
  93. package/dist/lib/proxy/proxyEnv.d.ts +19 -0
  94. package/dist/lib/proxy/proxyEnv.js +73 -0
  95. package/dist/lib/proxy/proxyFetch.js +291 -217
  96. package/dist/lib/proxy/proxyTracer.d.ts +133 -0
  97. package/dist/lib/proxy/proxyTracer.js +645 -0
  98. package/dist/lib/proxy/rawStreamCapture.d.ts +10 -0
  99. package/dist/lib/proxy/rawStreamCapture.js +83 -0
  100. package/dist/lib/proxy/requestLogger.d.ts +32 -5
  101. package/dist/lib/proxy/requestLogger.js +503 -47
  102. package/dist/lib/proxy/sseInterceptor.d.ts +97 -0
  103. package/dist/lib/proxy/sseInterceptor.js +427 -0
  104. package/dist/lib/proxy/usageStats.d.ts +4 -3
  105. package/dist/lib/proxy/usageStats.js +25 -12
  106. package/dist/lib/rag/chunkers/MarkdownChunker.js +13 -5
  107. package/dist/lib/rag/chunking/markdownChunker.js +15 -6
  108. package/dist/lib/server/routes/claudeProxyRoutes.d.ts +17 -3
  109. package/dist/lib/server/routes/claudeProxyRoutes.js +3032 -1349
  110. package/dist/lib/services/server/ai/observability/instrumentation.d.ts +7 -1
  111. package/dist/lib/services/server/ai/observability/instrumentation.js +337 -161
  112. package/dist/lib/tasks/backends/bullmqBackend.d.ts +1 -0
  113. package/dist/lib/tasks/backends/bullmqBackend.js +35 -22
  114. package/dist/lib/tasks/store/redisTaskStore.d.ts +1 -0
  115. package/dist/lib/tasks/store/redisTaskStore.js +54 -39
  116. package/dist/lib/tasks/taskManager.d.ts +5 -0
  117. package/dist/lib/tasks/taskManager.js +158 -30
  118. package/dist/lib/telemetry/index.d.ts +2 -1
  119. package/dist/lib/telemetry/index.js +2 -1
  120. package/dist/lib/telemetry/telemetryService.d.ts +3 -0
  121. package/dist/lib/telemetry/telemetryService.js +69 -5
  122. package/dist/lib/types/cli.d.ts +10 -0
  123. package/dist/lib/types/proxyTypes.d.ts +160 -5
  124. package/dist/lib/types/streamTypes.d.ts +25 -3
  125. package/dist/lib/utils/messageBuilder.js +3 -2
  126. package/dist/lib/utils/providerHealth.d.ts +19 -0
  127. package/dist/lib/utils/providerHealth.js +279 -33
  128. package/dist/lib/utils/providerUtils.js +17 -22
  129. package/dist/lib/utils/toolChoice.d.ts +4 -0
  130. package/dist/lib/utils/toolChoice.js +7 -0
  131. package/dist/mcp/toolRegistry.d.ts +2 -0
  132. package/dist/mcp/toolRegistry.js +32 -31
  133. package/dist/neurolink.d.ts +41 -2
  134. package/dist/neurolink.js +1616 -1681
  135. package/dist/observability/otelBridge.d.ts +2 -2
  136. package/dist/observability/otelBridge.js +12 -3
  137. package/dist/providers/amazonBedrock.js +2 -4
  138. package/dist/providers/anthropic.d.ts +9 -5
  139. package/dist/providers/anthropic.js +19 -14
  140. package/dist/providers/anthropicBaseProvider.d.ts +3 -3
  141. package/dist/providers/anthropicBaseProvider.js +5 -4
  142. package/dist/providers/azureOpenai.d.ts +1 -1
  143. package/dist/providers/azureOpenai.js +5 -4
  144. package/dist/providers/googleAiStudio.js +30 -6
  145. package/dist/providers/googleVertex.d.ts +10 -0
  146. package/dist/providers/googleVertex.js +437 -423
  147. package/dist/providers/huggingFace.d.ts +3 -3
  148. package/dist/providers/huggingFace.js +6 -7
  149. package/dist/providers/litellm.d.ts +1 -0
  150. package/dist/providers/litellm.js +76 -55
  151. package/dist/providers/mistral.js +2 -1
  152. package/dist/providers/ollama.js +93 -23
  153. package/dist/providers/openAI.d.ts +2 -0
  154. package/dist/providers/openAI.js +141 -141
  155. package/dist/providers/openRouter.js +2 -1
  156. package/dist/providers/openaiCompatible.d.ts +4 -4
  157. package/dist/providers/openaiCompatible.js +4 -3
  158. package/dist/proxy/claudeFormat.d.ts +3 -2
  159. package/dist/proxy/claudeFormat.js +27 -14
  160. package/dist/proxy/cloaking/plugins/sessionIdentity.d.ts +2 -6
  161. package/dist/proxy/cloaking/plugins/sessionIdentity.js +9 -33
  162. package/dist/proxy/modelRouter.js +3 -0
  163. package/dist/proxy/oauthFetch.d.ts +1 -1
  164. package/dist/proxy/oauthFetch.js +289 -316
  165. package/dist/proxy/proxyConfig.js +46 -24
  166. package/dist/proxy/proxyEnv.d.ts +19 -0
  167. package/dist/proxy/proxyEnv.js +72 -0
  168. package/dist/proxy/proxyFetch.js +291 -217
  169. package/dist/proxy/proxyTracer.d.ts +133 -0
  170. package/dist/proxy/proxyTracer.js +644 -0
  171. package/dist/proxy/rawStreamCapture.d.ts +10 -0
  172. package/dist/proxy/rawStreamCapture.js +82 -0
  173. package/dist/proxy/requestLogger.d.ts +32 -5
  174. package/dist/proxy/requestLogger.js +503 -47
  175. package/dist/proxy/sseInterceptor.d.ts +97 -0
  176. package/dist/proxy/sseInterceptor.js +426 -0
  177. package/dist/proxy/usageStats.d.ts +4 -3
  178. package/dist/proxy/usageStats.js +25 -12
  179. package/dist/rag/chunkers/MarkdownChunker.js +13 -5
  180. package/dist/rag/chunking/markdownChunker.js +15 -6
  181. package/dist/server/routes/claudeProxyRoutes.d.ts +17 -3
  182. package/dist/server/routes/claudeProxyRoutes.js +3032 -1349
  183. package/dist/services/server/ai/observability/instrumentation.d.ts +7 -1
  184. package/dist/services/server/ai/observability/instrumentation.js +337 -161
  185. package/dist/tasks/backends/bullmqBackend.d.ts +1 -0
  186. package/dist/tasks/backends/bullmqBackend.js +35 -22
  187. package/dist/tasks/store/redisTaskStore.d.ts +1 -0
  188. package/dist/tasks/store/redisTaskStore.js +54 -39
  189. package/dist/tasks/taskManager.d.ts +5 -0
  190. package/dist/tasks/taskManager.js +158 -30
  191. package/dist/telemetry/index.d.ts +2 -1
  192. package/dist/telemetry/index.js +2 -1
  193. package/dist/telemetry/telemetryService.d.ts +3 -0
  194. package/dist/telemetry/telemetryService.js +69 -5
  195. package/dist/types/cli.d.ts +10 -0
  196. package/dist/types/proxyTypes.d.ts +160 -5
  197. package/dist/types/streamTypes.d.ts +25 -3
  198. package/dist/utils/messageBuilder.js +3 -2
  199. package/dist/utils/providerHealth.d.ts +19 -0
  200. package/dist/utils/providerHealth.js +279 -33
  201. package/dist/utils/providerUtils.js +18 -22
  202. package/dist/utils/toolChoice.d.ts +4 -0
  203. package/dist/utils/toolChoice.js +6 -0
  204. package/docs/assets/dashboards/neurolink-proxy-observability-dashboard.json +6609 -0
  205. package/docs/changelog.md +252 -0
  206. package/package.json +19 -2
  207. package/scripts/observability/check-proxy-telemetry.mjs +235 -0
  208. package/scripts/observability/docker-compose.proxy-observability.yaml +55 -0
  209. package/scripts/observability/import-openobserve-dashboard.mjs +240 -0
  210. package/scripts/observability/manage-local-openobserve.sh +215 -0
  211. package/scripts/observability/otel-collector.proxy-observability.yaml +78 -0
  212. package/scripts/observability/proxy-observability.env.example +23 -0
@@ -22,6 +22,7 @@ import { createGoogleAuthConfig, createVertexProjectConfig, validateApiKey, } fr
22
22
  import { convertZodToJsonSchema, inlineJsonSchema, } from "../utils/schemaConversion.js";
23
23
  import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
24
24
  import { estimateTokens } from "../utils/tokenEstimation.js";
25
+ import { resolveToolChoice } from "../utils/toolChoice.js";
25
26
  import { buildNativeConfig, buildNativeToolDeclarations, collectStreamChunks, collectStreamChunksIncremental, computeMaxSteps as computeMaxStepsShared, createTextChannel, executeNativeToolCalls, extractTextFromParts, handleMaxStepsTermination, pushModelResponseToHistory, sanitizeToolsForGemini, } from "./googleNativeGemini3.js";
26
27
  import { getModelId } from "./providerTypeUtils.js";
27
28
  // Import proper types for multimodal message handling
@@ -776,251 +777,76 @@ export class GoogleVertexProvider extends BaseProvider {
776
777
  this.validateStreamOptions(options);
777
778
  }
778
779
  async executeStream(options, analysisSchema) {
779
- // Check if this is a Gemini 3 model with tools - use native SDK for thought_signature
780
- const gemini3CheckModelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
781
- // Structured output (analysisSchema, JSON format, or schema) is incompatible with tools on Gemini.
782
- // Compute once and reuse in both the native Gemini 3 gate and the streamText fallback path.
780
+ const modelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
781
+ const nativeGemini3Result = await this.maybeExecuteNativeGemini3ToolStream(options, analysisSchema, modelName);
782
+ if (nativeGemini3Result) {
783
+ return nativeGemini3Result;
784
+ }
785
+ return this.executeAISDKStream(options, analysisSchema, modelName);
786
+ }
787
+ async maybeExecuteNativeGemini3ToolStream(options, analysisSchema, modelName) {
783
788
  const wantsStructuredOutput = analysisSchema || options.output?.format === "json" || options.schema;
784
- // Check for tools from options AND from SDK (MCP tools)
785
- // Need to check early if we should route to native SDK
786
- const gemini3CheckShouldUseTools = !options.disableTools && this.supportsTools() && !wantsStructuredOutput;
789
+ const shouldUseTools = !options.disableTools && this.supportsTools() && !wantsStructuredOutput;
787
790
  const optionTools = options.tools || {};
788
- const sdkTools = gemini3CheckShouldUseTools ? await this.getAllTools() : {};
791
+ const sdkTools = shouldUseTools ? await this.getAllTools() : {};
789
792
  const combinedToolCount = Object.keys(optionTools).length + Object.keys(sdkTools).length;
790
- const hasTools = gemini3CheckShouldUseTools && combinedToolCount > 0;
791
- if (isGemini3Model(gemini3CheckModelName) && hasTools) {
792
- // Process CSV files before routing to native SDK (bypasses normal message builder)
793
- const processedOptions = await this.processCSVFilesForNativeSDK(options);
794
- // Merge SDK tools into options for native SDK path
795
- const mergedOptions = {
796
- ...processedOptions,
797
- tools: { ...sdkTools, ...optionTools },
798
- };
799
- logger.info("[GoogleVertex] Routing Gemini 3 to native SDK for tool calling", {
800
- model: gemini3CheckModelName,
801
- optionToolCount: Object.keys(optionTools).length,
802
- sdkToolCount: Object.keys(sdkTools).length,
803
- totalToolCount: combinedToolCount,
804
- });
805
- return this.executeNativeGemini3Stream(mergedOptions);
793
+ const hasTools = shouldUseTools && combinedToolCount > 0;
794
+ if (!isGemini3Model(modelName) || !hasTools) {
795
+ return null;
806
796
  }
807
- // Initialize stream execution tracking
797
+ const processedOptions = await this.processCSVFilesForNativeSDK(options);
798
+ const mergedOptions = {
799
+ ...processedOptions,
800
+ tools: { ...sdkTools, ...optionTools },
801
+ };
802
+ logger.info("[GoogleVertex] Routing Gemini 3 to native SDK for tool calling", {
803
+ model: modelName,
804
+ optionToolCount: Object.keys(optionTools).length,
805
+ sdkToolCount: Object.keys(sdkTools).length,
806
+ totalToolCount: combinedToolCount,
807
+ });
808
+ return this.executeNativeGemini3Stream(mergedOptions);
809
+ }
810
+ async executeAISDKStream(options, analysisSchema, modelName) {
808
811
  const functionTag = "GoogleVertexProvider.executeStream";
809
- let chunkCount = 0;
810
- // Setup timeout controller
811
- const timeout = this.getTimeout(options);
812
- const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
812
+ const tracking = {
813
+ chunkCount: 0,
814
+ collectedToolCalls: [],
815
+ collectedToolResults: [],
816
+ };
817
+ const timeoutController = createTimeoutController(this.getTimeout(options), this.providerName, "stream");
813
818
  try {
814
- // Validate stream options
815
819
  this.validateStreamOptionsOnly(options);
816
- // Build message array from options with multimodal support
817
- // Using protected helper from BaseProvider to eliminate code duplication
818
820
  const messages = await this.buildMessagesForStream(options);
819
- const model = await this.getAISDKModelWithMiddleware(options); // This is where network connection happens!
820
- // Get all available tools (direct + MCP + external + user-provided RAG tools) for streaming
821
- const shouldUseTools = !options.disableTools && this.supportsTools();
822
- const baseStreamTools = shouldUseTools ? await this.getAllTools() : {};
823
- const rawTools = shouldUseTools
824
- ? { ...baseStreamTools, ...(options.tools || {}) }
825
- : {};
826
- // Only sanitize for Gemini models (not Anthropic/Claude models routed through Vertex)
827
- const isAnthropic = isAnthropicModel(gemini3CheckModelName);
828
- let tools;
829
- if (Object.keys(rawTools).length > 0 && !isAnthropic) {
830
- const sanitized = sanitizeToolsForGemini(rawTools);
831
- if (sanitized.dropped.length > 0) {
832
- logger.warn(`[GoogleVertex] Dropped ${sanitized.dropped.length} incompatible tool(s): ${sanitized.dropped.join(", ")}`);
833
- }
834
- tools =
835
- Object.keys(sanitized.tools).length > 0 ? sanitized.tools : undefined;
836
- }
837
- else if (isAnthropic && Object.keys(rawTools).length > 0) {
838
- // Anthropic models don't need Gemini sanitization — pass tools through
839
- tools = rawTools;
840
- }
841
- else {
842
- tools = undefined;
843
- }
844
- logger.debug(`${functionTag}: Tools for streaming`, {
821
+ const model = await this.getAISDKModelWithMiddleware(options);
822
+ const { shouldUseTools, tools, isAnthropic } = await this.resolveAISDKStreamTools(options, modelName, functionTag);
823
+ const streamOptions = this.buildAISDKStreamOptions({
824
+ options,
825
+ analysisSchema,
826
+ functionTag,
827
+ modelName,
828
+ model,
829
+ messages,
830
+ tools,
845
831
  shouldUseTools,
846
- baseToolCount: Object.keys(baseStreamTools).length,
847
- externalToolCount: Object.keys(options.tools || {}).length,
848
- toolCount: Object.keys(tools ?? {}).length,
849
- toolNames: Object.keys(tools ?? {}),
850
- });
851
- // Model-specific maxTokens handling
852
- const modelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
853
- // Use cached model configuration to determine maxTokens handling for streaming performance
854
- // This avoids hardcoded model-specific logic and repeated config lookups
855
- const shouldSetMaxTokens = this.shouldSetMaxTokensCached(modelName);
856
- const maxTokens = shouldSetMaxTokens
857
- ? options.maxTokens // No default limit
858
- : undefined;
859
- // Build complete stream options with proper typing
860
- let streamOptions = {
861
- model: model,
862
- messages: messages,
863
- temperature: options.temperature,
864
- ...(maxTokens && { maxTokens }),
865
- maxRetries: 0, // NL11: Disable AI SDK's invisible internal retries; we handle retries with OTel instrumentation
866
- ...(shouldUseTools &&
867
- tools &&
868
- Object.keys(tools).length > 0 && {
869
- tools,
870
- toolChoice: "auto",
871
- stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
872
- }),
873
- abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
874
- experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
875
- // Gemini 3: use thinkingLevel via providerOptions (Vertex AI)
876
- // Gemini 2.5: use thinkingBudget via providerOptions
877
- ...(options.thinkingConfig?.enabled && {
878
- providerOptions: {
879
- vertex: {
880
- thinkingConfig: {
881
- ...(options.thinkingConfig.thinkingLevel && {
882
- thinkingLevel: options.thinkingConfig.thinkingLevel,
883
- }),
884
- ...(options.thinkingConfig.budgetTokens &&
885
- !options.thinkingConfig.thinkingLevel && {
886
- thinkingBudget: options.thinkingConfig.budgetTokens,
887
- }),
888
- includeThoughts: true,
889
- },
890
- },
891
- },
892
- }),
893
- onError: (event) => {
894
- const error = event.error;
895
- const errorMessage = error instanceof Error ? error.message : String(error);
896
- logger.error(`${functionTag}: Stream error`, {
897
- provider: this.providerName,
898
- modelName: this.modelName,
899
- error: errorMessage,
900
- chunkCount,
901
- });
902
- },
903
- onFinish: (event) => {
904
- logger.debug(`${functionTag}: Stream finished`, {
905
- finishReason: event.finishReason,
906
- totalChunks: chunkCount,
907
- });
908
- },
909
- onChunk: () => {
910
- chunkCount++;
911
- },
912
- onStepFinish: ({ toolCalls, toolResults }) => {
913
- logger.info("Tool execution completed", { toolResults, toolCalls });
914
- // Handle tool execution storage
915
- this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
916
- logger.warn("[GoogleVertexProvider] Failed to store tool executions", {
917
- provider: this.providerName,
918
- error: error instanceof Error ? error.message : String(error),
919
- });
920
- });
921
- },
922
- };
923
- if (analysisSchema) {
924
- try {
925
- // Gemini cannot use tools and JSON schema simultaneously
926
- if (!isAnthropic) {
927
- delete streamOptions.tools;
928
- delete streamOptions.toolChoice;
929
- delete streamOptions.stopWhen;
930
- }
931
- streamOptions = {
932
- ...streamOptions,
933
- experimental_output: Output.object({
934
- schema: analysisSchema,
935
- }),
936
- };
937
- }
938
- catch (error) {
939
- logger.warn("Schema application failed, continuing without schema", {
940
- error: String(error),
941
- });
942
- }
943
- }
944
- // Wrap streamText in an OTel span to capture provider-level latency and token usage
945
- const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
946
- kind: SpanKind.CLIENT,
947
- attributes: {
948
- "gen_ai.system": "vertex",
949
- "gen_ai.request.model": getModelId(model, this.modelName || "unknown"),
950
- },
832
+ isAnthropic,
833
+ timeoutController,
834
+ tracking,
951
835
  });
952
- let result;
953
- try {
954
- result = streamText(streamOptions);
955
- }
956
- catch (err) {
957
- streamSpan.recordException(err instanceof Error ? err : new Error(String(err)));
958
- streamSpan.setStatus({
959
- code: SpanStatusCode.ERROR,
960
- message: err instanceof Error ? err.message : String(err),
961
- });
962
- streamSpan.end();
963
- throw err;
964
- }
965
- // Collect token usage and finish reason asynchronously when the stream completes,
966
- // then end the span. This avoids blocking the stream consumer.
967
- Promise.resolve(result.usage)
968
- .then((usage) => {
969
- streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.inputTokens || 0);
970
- streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.outputTokens || 0);
971
- const effectiveModel = options.model ||
972
- getModelId(model, this.modelName || getDefaultVertexModel());
973
- const cost = calculateCost(this.providerName, effectiveModel, {
974
- input: usage.inputTokens || 0,
975
- output: usage.outputTokens || 0,
976
- total: (usage.inputTokens || 0) + (usage.outputTokens || 0),
977
- });
978
- if (cost && cost > 0) {
979
- streamSpan.setAttribute("neurolink.cost", cost);
980
- }
981
- })
982
- .catch(() => {
983
- // Usage may not be available if the stream is aborted
984
- });
985
- Promise.resolve(result.finishReason)
986
- .then((reason) => {
987
- streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
988
- })
989
- .catch(() => {
990
- // Finish reason may not be available if the stream is aborted
991
- });
992
- Promise.resolve(result.text)
993
- .then(() => {
994
- streamSpan.end();
995
- })
996
- .catch((err) => {
997
- streamSpan.setStatus({
998
- code: SpanStatusCode.ERROR,
999
- message: err instanceof Error ? err.message : String(err),
1000
- });
1001
- streamSpan.end();
836
+ const result = this.startObservedAISDKStream(streamOptions, model, modelName, options);
837
+ this.observeAISDKStreamResult(result, {
838
+ model,
839
+ modelName,
840
+ options,
841
+ timeoutController,
1002
842
  });
1003
- // Defer timeout cleanup until the stream completes or errors.
1004
- // Guard against NoOutputGeneratedError becoming an unhandled rejection.
1005
- Promise.resolve(result.text)
1006
- .catch((err) => {
1007
- logger.debug("Stream text promise rejected (expected for empty streams)", {
1008
- error: err instanceof Error ? err.message : String(err),
1009
- });
1010
- })
1011
- .finally(() => timeoutController?.cleanup());
1012
- // Transform string stream to content object stream using BaseProvider method
1013
- const transformedStream = this.createTextStream(result);
1014
- // Track tool calls and results for streaming
1015
- const toolCalls = [];
1016
- const toolResults = [];
1017
843
  return {
1018
- stream: transformedStream,
844
+ stream: this.createTextStream(result),
1019
845
  provider: this.providerName,
1020
846
  model: this.modelName,
1021
847
  ...(shouldUseTools && {
1022
- toolCalls,
1023
- toolResults,
848
+ toolCalls: tracking.collectedToolCalls,
849
+ toolResults: tracking.collectedToolResults,
1024
850
  }),
1025
851
  };
1026
852
  }
@@ -1030,11 +856,222 @@ export class GoogleVertexProvider extends BaseProvider {
1030
856
  provider: this.providerName,
1031
857
  modelName: this.modelName,
1032
858
  error: String(error),
1033
- chunkCount,
859
+ chunkCount: tracking.chunkCount,
1034
860
  });
1035
861
  throw this.handleProviderError(error);
1036
862
  }
1037
863
  }
864
+ async resolveAISDKStreamTools(options, modelName, functionTag) {
865
+ const shouldUseTools = !options.disableTools && this.supportsTools();
866
+ const baseStreamTools = shouldUseTools ? await this.getAllTools() : {};
867
+ const rawTools = shouldUseTools
868
+ ? { ...baseStreamTools, ...(options.tools || {}) }
869
+ : {};
870
+ const isAnthropic = isAnthropicModel(modelName);
871
+ let tools;
872
+ if (Object.keys(rawTools).length > 0 && !isAnthropic) {
873
+ const sanitized = sanitizeToolsForGemini(rawTools);
874
+ if (sanitized.dropped.length > 0) {
875
+ logger.warn(`[GoogleVertex] Dropped ${sanitized.dropped.length} incompatible tool(s): ${sanitized.dropped.join(", ")}`);
876
+ }
877
+ tools =
878
+ Object.keys(sanitized.tools).length > 0 ? sanitized.tools : undefined;
879
+ }
880
+ else if (isAnthropic && Object.keys(rawTools).length > 0) {
881
+ tools = rawTools;
882
+ }
883
+ else {
884
+ tools = undefined;
885
+ }
886
+ logger.debug(`${functionTag}: Tools for streaming`, {
887
+ shouldUseTools,
888
+ baseToolCount: Object.keys(baseStreamTools).length,
889
+ externalToolCount: Object.keys(options.tools || {}).length,
890
+ toolCount: Object.keys(tools ?? {}).length,
891
+ toolNames: Object.keys(tools ?? {}),
892
+ });
893
+ return {
894
+ shouldUseTools,
895
+ tools,
896
+ isAnthropic,
897
+ baseToolCount: Object.keys(baseStreamTools).length,
898
+ };
899
+ }
900
+ buildAISDKStreamOptions(params) {
901
+ const { options, analysisSchema, functionTag, modelName, model, messages, tools, shouldUseTools, isAnthropic, timeoutController, tracking, } = params;
902
+ const shouldSetMaxTokens = this.shouldSetMaxTokensCached(modelName);
903
+ const maxTokens = shouldSetMaxTokens ? options.maxTokens : undefined;
904
+ let streamOptions = {
905
+ model,
906
+ messages,
907
+ temperature: options.temperature,
908
+ ...(maxTokens && { maxTokens }),
909
+ maxRetries: 0,
910
+ ...(shouldUseTools &&
911
+ tools &&
912
+ Object.keys(tools).length > 0 && {
913
+ tools,
914
+ toolChoice: resolveToolChoice(options, tools, shouldUseTools),
915
+ stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
916
+ }),
917
+ abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
918
+ experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
919
+ ...(options.thinkingConfig?.enabled && {
920
+ providerOptions: {
921
+ vertex: {
922
+ thinkingConfig: {
923
+ ...(options.thinkingConfig.thinkingLevel && {
924
+ thinkingLevel: options.thinkingConfig.thinkingLevel,
925
+ }),
926
+ ...(options.thinkingConfig.budgetTokens &&
927
+ !options.thinkingConfig.thinkingLevel && {
928
+ thinkingBudget: options.thinkingConfig.budgetTokens,
929
+ }),
930
+ includeThoughts: true,
931
+ },
932
+ },
933
+ },
934
+ }),
935
+ onError: (event) => {
936
+ const errorMessage = event.error instanceof Error
937
+ ? event.error.message
938
+ : String(event.error);
939
+ logger.error(`${functionTag}: Stream error`, {
940
+ provider: this.providerName,
941
+ modelName: this.modelName,
942
+ error: errorMessage,
943
+ chunkCount: tracking.chunkCount,
944
+ });
945
+ },
946
+ onFinish: (event) => {
947
+ logger.debug(`${functionTag}: Stream finished`, {
948
+ finishReason: event.finishReason,
949
+ totalChunks: tracking.chunkCount,
950
+ });
951
+ },
952
+ onChunk: () => {
953
+ tracking.chunkCount++;
954
+ },
955
+ onStepFinish: ({ toolCalls, toolResults }) => {
956
+ this.captureAISDKStreamToolStep(options, toolCalls, toolResults, tracking);
957
+ },
958
+ };
959
+ if (!analysisSchema) {
960
+ return streamOptions;
961
+ }
962
+ try {
963
+ if (!isAnthropic) {
964
+ delete streamOptions.tools;
965
+ delete streamOptions.toolChoice;
966
+ delete streamOptions.stopWhen;
967
+ }
968
+ streamOptions = {
969
+ ...streamOptions,
970
+ experimental_output: Output.object({ schema: analysisSchema }),
971
+ };
972
+ }
973
+ catch (error) {
974
+ logger.warn("Schema application failed, continuing without schema", {
975
+ error: String(error),
976
+ });
977
+ }
978
+ return streamOptions;
979
+ }
980
+ captureAISDKStreamToolStep(options, toolCalls, toolResults, tracking) {
981
+ logger.info("Tool execution completed", { toolResults, toolCalls });
982
+ for (const toolCall of toolCalls) {
983
+ tracking.collectedToolCalls.push({
984
+ toolCallId: toolCall.toolCallId,
985
+ toolName: toolCall.toolName,
986
+ args: toolCall.args ?? toolCall.input ?? toolCall.parameters ?? {},
987
+ });
988
+ }
989
+ for (const toolResult of toolResults) {
990
+ tracking.collectedToolResults.push({
991
+ toolName: toolResult.toolName,
992
+ status: toolResult.error ? "failure" : "success",
993
+ output: (toolResult.output ?? toolResult.result) ??
994
+ undefined,
995
+ error: toolResult.error,
996
+ id: toolResult.toolCallId ?? toolResult.toolName,
997
+ });
998
+ }
999
+ this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
1000
+ logger.warn("[GoogleVertexProvider] Failed to store tool executions", {
1001
+ provider: this.providerName,
1002
+ error: error instanceof Error ? error.message : String(error),
1003
+ });
1004
+ });
1005
+ }
1006
+ startObservedAISDKStream(streamOptions, model, modelName, options) {
1007
+ const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
1008
+ kind: SpanKind.CLIENT,
1009
+ attributes: {
1010
+ "gen_ai.system": "vertex",
1011
+ "gen_ai.request.model": getModelId(model, this.modelName || "unknown"),
1012
+ },
1013
+ });
1014
+ try {
1015
+ const result = streamText(streamOptions);
1016
+ this.attachAISDKStreamObservers(result, streamSpan, model, modelName, options);
1017
+ return result;
1018
+ }
1019
+ catch (error) {
1020
+ streamSpan.recordException(error instanceof Error ? error : new Error(String(error)));
1021
+ streamSpan.setStatus({
1022
+ code: SpanStatusCode.ERROR,
1023
+ message: error instanceof Error ? error.message : String(error),
1024
+ });
1025
+ streamSpan.end();
1026
+ throw error;
1027
+ }
1028
+ }
1029
+ attachAISDKStreamObservers(result, streamSpan, model, modelName, options) {
1030
+ Promise.resolve(result.usage)
1031
+ .then((usage) => {
1032
+ streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.inputTokens || 0);
1033
+ streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.outputTokens || 0);
1034
+ const effectiveModel = options.model ||
1035
+ getModelId(model, modelName || getDefaultVertexModel());
1036
+ const cost = calculateCost(this.providerName, effectiveModel, {
1037
+ input: usage.inputTokens || 0,
1038
+ output: usage.outputTokens || 0,
1039
+ total: (usage.inputTokens || 0) + (usage.outputTokens || 0),
1040
+ });
1041
+ if (cost && cost > 0) {
1042
+ streamSpan.setAttribute("neurolink.cost", cost);
1043
+ }
1044
+ })
1045
+ .catch(() => undefined);
1046
+ Promise.resolve(result.finishReason)
1047
+ .then((reason) => {
1048
+ streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
1049
+ })
1050
+ .catch(() => undefined);
1051
+ Promise.resolve(result.text)
1052
+ .then(() => {
1053
+ streamSpan.end();
1054
+ })
1055
+ .catch((error) => {
1056
+ streamSpan.setStatus({
1057
+ code: SpanStatusCode.ERROR,
1058
+ message: error instanceof Error ? error.message : String(error),
1059
+ });
1060
+ streamSpan.end();
1061
+ });
1062
+ }
1063
+ observeAISDKStreamResult(result, params) {
1064
+ void params.model;
1065
+ void params.modelName;
1066
+ void params.options;
1067
+ Promise.resolve(result.text)
1068
+ .catch((error) => {
1069
+ logger.debug("Stream text promise rejected (expected for empty streams)", {
1070
+ error: error instanceof Error ? error.message : String(error),
1071
+ });
1072
+ })
1073
+ .finally(() => params.timeoutController?.cleanup());
1074
+ }
1038
1075
  /**
1039
1076
  * Create @google/genai client configured for Vertex AI
1040
1077
  */
@@ -1189,210 +1226,187 @@ export class GoogleVertexProvider extends BaseProvider {
1189
1226
  [ATTR.GEN_AI_OPERATION]: "stream",
1190
1227
  [ATTR.NL_PROVIDER]: this.providerName,
1191
1228
  },
1192
- }, async (span) => {
1193
- const client = await this.createVertexGenAIClient(options.region);
1194
- const effectiveLocation = options.region || this.location || getVertexLocation();
1195
- logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3", {
1196
- model: modelName,
1197
- hasTools: !!options.tools && Object.keys(options.tools).length > 0,
1198
- project: this.projectId,
1199
- location: effectiveLocation,
1229
+ }, (span) => this.executeNativeGemini3StreamWithSpan(options, modelName, span));
1230
+ }
1231
+ async executeNativeGemini3StreamWithSpan(options, modelName, span) {
1232
+ const client = await this.createVertexGenAIClient(options.region);
1233
+ const effectiveLocation = options.region || this.location || getVertexLocation();
1234
+ logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3", {
1235
+ model: modelName,
1236
+ hasTools: !!options.tools && Object.keys(options.tools).length > 0,
1237
+ project: this.projectId,
1238
+ location: effectiveLocation,
1239
+ });
1240
+ const multimodalInput = options.input;
1241
+ const contents = this.buildNativeContentParts(options.input.text, multimodalInput, "native stream");
1242
+ let hasToolsInput = !!options.tools &&
1243
+ Object.keys(options.tools).length > 0 &&
1244
+ !options.disableTools;
1245
+ const streamOptions = options;
1246
+ const wantsJsonOutput = streamOptions.output?.format === "json" || streamOptions.schema;
1247
+ if (wantsJsonOutput && hasToolsInput) {
1248
+ logger.warn("[GoogleVertex] Gemini does not support tools and JSON schema output simultaneously. Disabling tools for this request.");
1249
+ hasToolsInput = false;
1250
+ }
1251
+ let toolsConfig;
1252
+ let executeMap = new Map();
1253
+ if (hasToolsInput) {
1254
+ const toolDeclarationResult = buildNativeToolDeclarations(options.tools);
1255
+ toolsConfig = toolDeclarationResult.toolsConfig;
1256
+ executeMap = toolDeclarationResult.executeMap;
1257
+ logger.debug("[GoogleVertex] Converted tools for native SDK", {
1258
+ toolCount: toolsConfig[0].functionDeclarations.length,
1259
+ toolNames: toolsConfig[0].functionDeclarations.map((tool) => tool.name),
1200
1260
  });
1201
- // Build contents from input with multimodal support
1202
- const multimodalInput = options.input;
1203
- const contents = this.buildNativeContentParts(options.input.text, multimodalInput, "native stream");
1204
- // Convert tools to native format
1205
- let hasToolsInput = options.tools &&
1206
- Object.keys(options.tools).length > 0 &&
1207
- !options.disableTools;
1208
- // Guard: Gemini cannot use tools + JSON schema simultaneously
1209
- const streamOptions = options;
1210
- const wantsJsonOutput = streamOptions.output?.format === "json" || streamOptions.schema;
1211
- if (wantsJsonOutput && hasToolsInput) {
1212
- logger.warn("[GoogleVertex] Gemini does not support tools and JSON schema output simultaneously. Disabling tools for this request.");
1213
- hasToolsInput = false;
1214
- }
1215
- let toolsConfig;
1216
- let executeMap = new Map();
1217
- if (hasToolsInput) {
1218
- const result = buildNativeToolDeclarations(options.tools);
1219
- toolsConfig = result.toolsConfig;
1220
- executeMap = result.executeMap;
1221
- logger.debug("[GoogleVertex] Converted tools for native SDK", {
1222
- toolCount: toolsConfig[0].functionDeclarations.length,
1223
- toolNames: toolsConfig[0].functionDeclarations.map((t) => t.name),
1261
+ }
1262
+ const config = buildNativeConfig(options, toolsConfig);
1263
+ if (wantsJsonOutput) {
1264
+ config.responseMimeType = "application/json";
1265
+ if (streamOptions.schema) {
1266
+ const rawSchema = convertZodToJsonSchema(streamOptions.schema);
1267
+ const inlinedSchema = inlineJsonSchema(rawSchema);
1268
+ if (inlinedSchema.$schema) {
1269
+ delete inlinedSchema.$schema;
1270
+ }
1271
+ config.responseSchema = inlinedSchema;
1272
+ logger.debug("[GoogleVertex] Added responseSchema for JSON output (stream)", {
1273
+ schemaKeys: Object.keys(inlinedSchema),
1224
1274
  });
1225
1275
  }
1226
- // Build config — systemInstruction stays in config for Gemini 3.x.
1227
- // The @google/genai SDK maps config.systemInstruction to the HTTP-level
1228
- // system_instruction field, which is the correct mechanism for all
1229
- // Gemini 3.x models (including global endpoint). Older workaround
1230
- // that moved systemInstruction into user/model content messages caused
1231
- // "Please use a valid role: user, model" on Gemini 3.1+ preview models.
1232
- const config = buildNativeConfig(options, toolsConfig);
1233
- // Add JSON output format support for native SDK stream
1234
- if (streamOptions.output?.format === "json" || streamOptions.schema) {
1235
- config.responseMimeType = "application/json";
1236
- if (streamOptions.schema) {
1237
- const rawSchema = convertZodToJsonSchema(streamOptions.schema);
1238
- const inlinedSchema = inlineJsonSchema(rawSchema);
1239
- if (inlinedSchema.$schema) {
1240
- delete inlinedSchema.$schema;
1241
- }
1242
- config.responseSchema = inlinedSchema;
1243
- logger.debug("[GoogleVertex] Added responseSchema for JSON output (stream)", {
1244
- schemaKeys: Object.keys(inlinedSchema),
1245
- });
1276
+ }
1277
+ const startTime = Date.now();
1278
+ const timeoutController = createTimeoutController(this.getTimeout(options), this.providerName, "stream");
1279
+ const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
1280
+ const maxSteps = computeMaxStepsShared(options.maxSteps);
1281
+ const currentContents = this.prependConversationHistory([...contents], options.conversationMessages);
1282
+ const channel = createTextChannel();
1283
+ const allToolCalls = [];
1284
+ const metadata = {
1285
+ streamId: `native-vertex-${Date.now()}`,
1286
+ startTime,
1287
+ responseTime: 0,
1288
+ totalToolExecutions: 0,
1289
+ };
1290
+ let analyticsResolve;
1291
+ let analyticsReject;
1292
+ const analyticsPromise = new Promise((resolve, reject) => {
1293
+ analyticsResolve = resolve;
1294
+ analyticsReject = reject;
1295
+ });
1296
+ const loopPromise = this.runNativeGemini3StreamLoop({
1297
+ client,
1298
+ modelName,
1299
+ span,
1300
+ config,
1301
+ currentContents,
1302
+ executeMap,
1303
+ channel,
1304
+ allToolCalls,
1305
+ metadata,
1306
+ analyticsResolve,
1307
+ analyticsReject,
1308
+ startTime,
1309
+ timeoutController,
1310
+ composedSignal,
1311
+ maxSteps,
1312
+ });
1313
+ loopPromise.catch(() => undefined);
1314
+ return {
1315
+ stream: channel.iterable,
1316
+ provider: this.providerName,
1317
+ model: modelName,
1318
+ toolCalls: allToolCalls,
1319
+ analytics: analyticsPromise,
1320
+ metadata,
1321
+ };
1322
+ }
1323
+ async runNativeGemini3StreamLoop(params) {
1324
+ let lastStepText = "";
1325
+ let totalInputTokens = 0;
1326
+ let totalOutputTokens = 0;
1327
+ let step = 0;
1328
+ let completedWithFinalAnswer = false;
1329
+ const failedTools = new Map();
1330
+ try {
1331
+ while (step < params.maxSteps) {
1332
+ if (params.composedSignal?.aborted) {
1333
+ throw params.composedSignal.reason instanceof Error
1334
+ ? params.composedSignal.reason
1335
+ : new Error("Request aborted");
1246
1336
  }
1247
- }
1248
- const startTime = Date.now();
1249
- const timeout = this.getTimeout(options);
1250
- const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
1251
- const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
1252
- const maxSteps = computeMaxStepsShared(options.maxSteps);
1253
- // Inject conversation history so the native path has multi-turn context
1254
- const currentContents = this.prependConversationHistory([...contents], options.conversationMessages);
1255
- // Create a push-based text channel so the caller receives tokens as
1256
- // they arrive from the network rather than after full buffering.
1257
- const channel = createTextChannel();
1258
- // Shared mutable state updated by the background agentic loop.
1259
- const allToolCalls = [];
1260
- // Shared metadata object mutated by the background loop so that
1261
- // responseTime and totalToolExecutions reflect final values.
1262
- const metadata = {
1263
- streamId: `native-vertex-${Date.now()}`,
1264
- startTime,
1265
- responseTime: 0,
1266
- totalToolExecutions: 0,
1267
- };
1268
- // analyticsResolvers lets the background loop settle the analytics
1269
- // promise once token counts are known (after the loop completes).
1270
- let analyticsResolve;
1271
- let analyticsReject;
1272
- const analyticsPromise = new Promise((res, rej) => {
1273
- analyticsResolve = res;
1274
- analyticsReject = rej;
1275
- });
1276
- // Run the agentic loop in the background without awaiting it here,
1277
- // so we can return the StreamResult (with channel.iterable) immediately.
1278
- const loopPromise = (async () => {
1279
- let lastStepText = "";
1280
- let totalInputTokens = 0;
1281
- let totalOutputTokens = 0;
1282
- let step = 0;
1283
- let completedWithFinalAnswer = false;
1284
- const failedTools = new Map();
1337
+ step++;
1338
+ logger.debug(`[GoogleVertex] Native SDK step ${step}/${params.maxSteps}`);
1285
1339
  try {
1286
- // Agentic loop for tool calling
1287
- while (step < maxSteps) {
1288
- if (composedSignal?.aborted) {
1289
- throw composedSignal.reason instanceof Error
1290
- ? composedSignal.reason
1291
- : new Error("Request aborted");
1292
- }
1293
- step++;
1294
- logger.debug(`[GoogleVertex] Native SDK step ${step}/${maxSteps}`);
1295
- try {
1296
- const rawStream = await client.models.generateContentStream({
1297
- model: modelName,
1298
- contents: currentContents,
1299
- config,
1300
- ...(composedSignal
1301
- ? { httpOptions: { signal: composedSignal } }
1302
- : {}),
1303
- });
1304
- // For every step, use incremental collection so text parts
1305
- // are pushed to the channel as they arrive. For intermediate
1306
- // steps (those that produce function calls) we still need the
1307
- // complete rawResponseParts for pushModelResponseToHistory,
1308
- // which collectStreamChunksIncremental provides at stream end.
1309
- const chunkResult = await collectStreamChunksIncremental(rawStream, channel);
1310
- totalInputTokens += chunkResult.inputTokens;
1311
- totalOutputTokens += chunkResult.outputTokens;
1312
- const stepText = extractTextFromParts(chunkResult.rawResponseParts);
1313
- // If no function calls, this was the final step — channel
1314
- // already received all text parts incrementally.
1315
- if (chunkResult.stepFunctionCalls.length === 0) {
1316
- completedWithFinalAnswer = true;
1317
- break;
1318
- }
1319
- lastStepText = stepText;
1320
- // Record tool call events on the span
1321
- for (const fc of chunkResult.stepFunctionCalls) {
1322
- span.addEvent("gen_ai.tool_call", {
1323
- "tool.name": fc.name,
1324
- "tool.step": step,
1325
- });
1326
- }
1327
- logger.debug(`[GoogleVertex] Executing ${chunkResult.stepFunctionCalls.length} function calls`);
1328
- pushModelResponseToHistory(currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
1329
- const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, executeMap, failedTools, allToolCalls, { abortSignal: composedSignal });
1330
- // Function/tool responses must use role: "user" — the
1331
- // @google/genai SDK's validateHistory() only accepts "user"
1332
- // and "model" roles (matching automaticFunctionCalling).
1333
- currentContents.push({
1334
- role: "user",
1335
- parts: functionResponses,
1336
- });
1337
- }
1338
- catch (error) {
1339
- logger.error("[GoogleVertex] Native SDK error", error);
1340
- throw this.handleProviderError(error);
1341
- }
1340
+ const rawStream = await params.client.models.generateContentStream({
1341
+ model: params.modelName,
1342
+ contents: params.currentContents,
1343
+ config: params.config,
1344
+ ...(params.composedSignal
1345
+ ? { httpOptions: { signal: params.composedSignal } }
1346
+ : {}),
1347
+ });
1348
+ const chunkResult = await collectStreamChunksIncremental(rawStream, params.channel);
1349
+ totalInputTokens += chunkResult.inputTokens;
1350
+ totalOutputTokens += chunkResult.outputTokens;
1351
+ const stepText = extractTextFromParts(chunkResult.rawResponseParts);
1352
+ if (chunkResult.stepFunctionCalls.length === 0) {
1353
+ completedWithFinalAnswer = true;
1354
+ break;
1342
1355
  }
1343
- // Handle max-steps termination: if the model was still calling
1344
- // tools when we hit the limit, push a synthetic final message.
1345
- if (step >= maxSteps && !completedWithFinalAnswer) {
1346
- const fallback = handleMaxStepsTermination("[GoogleVertex]", step, maxSteps, "", // finalText is empty — model didn't stop on its own
1347
- lastStepText);
1348
- if (fallback) {
1349
- channel.push(fallback);
1350
- }
1356
+ lastStepText = stepText;
1357
+ for (const functionCall of chunkResult.stepFunctionCalls) {
1358
+ params.span.addEvent("gen_ai.tool_call", {
1359
+ "tool.name": functionCall.name,
1360
+ "tool.step": step,
1361
+ });
1351
1362
  }
1352
- const responseTime = Date.now() - startTime;
1353
- // Propagate final values to the shared metadata object so that
1354
- // the already-returned StreamResult reflects accurate telemetry.
1355
- metadata.responseTime = responseTime;
1356
- metadata.totalToolExecutions = allToolCalls.length;
1357
- // Set token usage and finish reason on the span
1358
- span.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, totalInputTokens);
1359
- span.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, totalOutputTokens);
1360
- span.setAttribute(ATTR.GEN_AI_FINISH_REASON, step >= maxSteps && !completedWithFinalAnswer
1361
- ? "max_steps"
1362
- : "stop");
1363
- analyticsResolve({
1364
- provider: this.providerName,
1365
- model: modelName,
1366
- tokenUsage: {
1367
- input: totalInputTokens,
1368
- output: totalOutputTokens,
1369
- total: totalInputTokens + totalOutputTokens,
1370
- },
1371
- requestDuration: responseTime,
1372
- timestamp: new Date().toISOString(),
1363
+ logger.debug(`[GoogleVertex] Executing ${chunkResult.stepFunctionCalls.length} function calls`);
1364
+ pushModelResponseToHistory(params.currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
1365
+ const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, params.executeMap, failedTools, params.allToolCalls, { abortSignal: params.composedSignal });
1366
+ params.currentContents.push({
1367
+ role: "user",
1368
+ parts: functionResponses,
1373
1369
  });
1374
- channel.close();
1375
1370
  }
1376
- catch (err) {
1377
- channel.error(err);
1378
- analyticsReject(err);
1371
+ catch (error) {
1372
+ logger.error("[GoogleVertex] Native SDK error", error);
1373
+ throw this.handleProviderError(error);
1379
1374
  }
1380
- finally {
1381
- timeoutController?.cleanup();
1375
+ }
1376
+ if (step >= params.maxSteps && !completedWithFinalAnswer) {
1377
+ const fallback = handleMaxStepsTermination("[GoogleVertex]", step, params.maxSteps, "", lastStepText);
1378
+ if (fallback) {
1379
+ params.channel.push(fallback);
1382
1380
  }
1383
- })();
1384
- // Suppress unhandled-rejection warnings on loopPromise — errors are
1385
- // forwarded to the channel and will surface when the caller iterates.
1386
- loopPromise.catch(() => undefined);
1387
- return {
1388
- stream: channel.iterable,
1381
+ }
1382
+ const responseTime = Date.now() - params.startTime;
1383
+ params.metadata.responseTime = responseTime;
1384
+ params.metadata.totalToolExecutions = params.allToolCalls.length;
1385
+ params.span.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, totalInputTokens);
1386
+ params.span.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, totalOutputTokens);
1387
+ params.span.setAttribute(ATTR.GEN_AI_FINISH_REASON, step >= params.maxSteps && !completedWithFinalAnswer
1388
+ ? "max_steps"
1389
+ : "stop");
1390
+ params.analyticsResolve({
1389
1391
  provider: this.providerName,
1390
- model: modelName,
1391
- toolCalls: allToolCalls,
1392
- analytics: analyticsPromise,
1393
- metadata,
1394
- };
1395
- });
1392
+ model: params.modelName,
1393
+ tokenUsage: {
1394
+ input: totalInputTokens,
1395
+ output: totalOutputTokens,
1396
+ total: totalInputTokens + totalOutputTokens,
1397
+ },
1398
+ requestDuration: responseTime,
1399
+ timestamp: new Date().toISOString(),
1400
+ });
1401
+ params.channel.close();
1402
+ }
1403
+ catch (error) {
1404
+ params.channel.error(error);
1405
+ params.analyticsReject(error);
1406
+ }
1407
+ finally {
1408
+ params.timeoutController?.cleanup();
1409
+ }
1396
1410
  }
1397
1411
  /**
1398
1412
  * Execute generate using native @google/genai SDK for Gemini 3 models on Vertex AI