@juspay/neurolink 9.42.0 → 9.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/dist/auth/anthropicOAuth.js +12 -0
  3. package/dist/browser/neurolink.min.js +335 -334
  4. package/dist/cli/commands/mcp.d.ts +6 -0
  5. package/dist/cli/commands/mcp.js +200 -184
  6. package/dist/cli/commands/proxy.js +560 -518
  7. package/dist/core/baseProvider.d.ts +6 -1
  8. package/dist/core/baseProvider.js +219 -232
  9. package/dist/core/factory.d.ts +3 -0
  10. package/dist/core/factory.js +140 -190
  11. package/dist/core/modules/ToolsManager.d.ts +1 -0
  12. package/dist/core/modules/ToolsManager.js +40 -42
  13. package/dist/core/toolEvents.d.ts +3 -0
  14. package/dist/core/toolEvents.js +7 -0
  15. package/dist/evaluation/pipeline/evaluationPipeline.js +5 -2
  16. package/dist/evaluation/scorers/scorerRegistry.d.ts +3 -0
  17. package/dist/evaluation/scorers/scorerRegistry.js +356 -284
  18. package/dist/lib/auth/anthropicOAuth.js +12 -0
  19. package/dist/lib/core/baseProvider.d.ts +6 -1
  20. package/dist/lib/core/baseProvider.js +219 -232
  21. package/dist/lib/core/factory.d.ts +3 -0
  22. package/dist/lib/core/factory.js +140 -190
  23. package/dist/lib/core/modules/ToolsManager.d.ts +1 -0
  24. package/dist/lib/core/modules/ToolsManager.js +40 -42
  25. package/dist/lib/core/toolEvents.d.ts +3 -0
  26. package/dist/lib/core/toolEvents.js +8 -0
  27. package/dist/lib/evaluation/pipeline/evaluationPipeline.js +5 -2
  28. package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +3 -0
  29. package/dist/lib/evaluation/scorers/scorerRegistry.js +356 -284
  30. package/dist/lib/mcp/toolRegistry.d.ts +2 -0
  31. package/dist/lib/mcp/toolRegistry.js +32 -31
  32. package/dist/lib/neurolink.d.ts +38 -0
  33. package/dist/lib/neurolink.js +1890 -1707
  34. package/dist/lib/providers/googleAiStudio.js +0 -5
  35. package/dist/lib/providers/googleNativeGemini3.d.ts +4 -0
  36. package/dist/lib/providers/googleNativeGemini3.js +39 -1
  37. package/dist/lib/providers/googleVertex.d.ts +10 -0
  38. package/dist/lib/providers/googleVertex.js +445 -445
  39. package/dist/lib/providers/litellm.d.ts +1 -0
  40. package/dist/lib/providers/litellm.js +73 -64
  41. package/dist/lib/providers/ollama.js +17 -4
  42. package/dist/lib/providers/openAI.d.ts +2 -0
  43. package/dist/lib/providers/openAI.js +139 -140
  44. package/dist/lib/proxy/claudeFormat.js +14 -5
  45. package/dist/lib/proxy/oauthFetch.js +298 -318
  46. package/dist/lib/proxy/proxyConfig.js +3 -1
  47. package/dist/lib/proxy/proxyFetch.js +250 -222
  48. package/dist/lib/proxy/proxyHealth.d.ts +17 -0
  49. package/dist/lib/proxy/proxyHealth.js +55 -0
  50. package/dist/lib/proxy/requestLogger.js +140 -48
  51. package/dist/lib/proxy/routingPolicy.d.ts +33 -0
  52. package/dist/lib/proxy/routingPolicy.js +255 -0
  53. package/dist/lib/proxy/snapshotPersistence.d.ts +2 -0
  54. package/dist/lib/proxy/snapshotPersistence.js +41 -0
  55. package/dist/lib/proxy/sseInterceptor.js +36 -11
  56. package/dist/lib/server/routes/claudeProxyRoutes.d.ts +2 -1
  57. package/dist/lib/server/routes/claudeProxyRoutes.js +2916 -2377
  58. package/dist/lib/services/server/ai/observability/instrumentation.js +194 -218
  59. package/dist/lib/tasks/backends/bullmqBackend.js +24 -18
  60. package/dist/lib/tasks/store/redisTaskStore.js +42 -17
  61. package/dist/lib/tasks/taskManager.d.ts +2 -0
  62. package/dist/lib/tasks/taskManager.js +100 -5
  63. package/dist/lib/telemetry/telemetryService.js +9 -5
  64. package/dist/lib/types/cli.d.ts +4 -0
  65. package/dist/lib/types/proxyTypes.d.ts +211 -1
  66. package/dist/lib/types/tools.d.ts +18 -0
  67. package/dist/lib/utils/providerHealth.d.ts +1 -0
  68. package/dist/lib/utils/providerHealth.js +46 -31
  69. package/dist/lib/utils/providerUtils.js +11 -22
  70. package/dist/lib/utils/schemaConversion.d.ts +1 -0
  71. package/dist/lib/utils/schemaConversion.js +3 -0
  72. package/dist/mcp/toolRegistry.d.ts +2 -0
  73. package/dist/mcp/toolRegistry.js +32 -31
  74. package/dist/neurolink.d.ts +38 -0
  75. package/dist/neurolink.js +1890 -1707
  76. package/dist/providers/googleAiStudio.js +0 -5
  77. package/dist/providers/googleNativeGemini3.d.ts +4 -0
  78. package/dist/providers/googleNativeGemini3.js +39 -1
  79. package/dist/providers/googleVertex.d.ts +10 -0
  80. package/dist/providers/googleVertex.js +445 -445
  81. package/dist/providers/litellm.d.ts +1 -0
  82. package/dist/providers/litellm.js +73 -64
  83. package/dist/providers/ollama.js +17 -4
  84. package/dist/providers/openAI.d.ts +2 -0
  85. package/dist/providers/openAI.js +139 -140
  86. package/dist/proxy/claudeFormat.js +14 -5
  87. package/dist/proxy/oauthFetch.js +298 -318
  88. package/dist/proxy/proxyConfig.js +3 -1
  89. package/dist/proxy/proxyFetch.js +250 -222
  90. package/dist/proxy/proxyHealth.d.ts +17 -0
  91. package/dist/proxy/proxyHealth.js +54 -0
  92. package/dist/proxy/requestLogger.js +140 -48
  93. package/dist/proxy/routingPolicy.d.ts +33 -0
  94. package/dist/proxy/routingPolicy.js +254 -0
  95. package/dist/proxy/snapshotPersistence.d.ts +2 -0
  96. package/dist/proxy/snapshotPersistence.js +40 -0
  97. package/dist/proxy/sseInterceptor.js +36 -11
  98. package/dist/server/routes/claudeProxyRoutes.d.ts +2 -1
  99. package/dist/server/routes/claudeProxyRoutes.js +2916 -2377
  100. package/dist/services/server/ai/observability/instrumentation.js +194 -218
  101. package/dist/tasks/backends/bullmqBackend.js +24 -18
  102. package/dist/tasks/store/redisTaskStore.js +42 -17
  103. package/dist/tasks/taskManager.d.ts +2 -0
  104. package/dist/tasks/taskManager.js +100 -5
  105. package/dist/telemetry/telemetryService.js +9 -5
  106. package/dist/types/cli.d.ts +4 -0
  107. package/dist/types/proxyTypes.d.ts +211 -1
  108. package/dist/types/tools.d.ts +18 -0
  109. package/dist/utils/providerHealth.d.ts +1 -0
  110. package/dist/utils/providerHealth.js +46 -31
  111. package/dist/utils/providerUtils.js +12 -22
  112. package/dist/utils/schemaConversion.d.ts +1 -0
  113. package/dist/utils/schemaConversion.js +3 -0
  114. package/package.json +3 -2
  115. package/scripts/observability/check-proxy-telemetry.mjs +1 -1
  116. package/scripts/observability/manage-local-openobserve.sh +36 -5
@@ -23,7 +23,7 @@ import { convertZodToJsonSchema, inlineJsonSchema, } from "../utils/schemaConver
23
23
  import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
24
24
  import { estimateTokens } from "../utils/tokenEstimation.js";
25
25
  import { resolveToolChoice } from "../utils/toolChoice.js";
26
- import { buildNativeConfig, buildNativeToolDeclarations, collectStreamChunks, collectStreamChunksIncremental, computeMaxSteps as computeMaxStepsShared, createTextChannel, executeNativeToolCalls, extractTextFromParts, handleMaxStepsTermination, pushModelResponseToHistory, sanitizeToolsForGemini, } from "./googleNativeGemini3.js";
26
+ import { buildNativeConfig, buildNativeToolDeclarations, collectStreamChunks, collectStreamChunksIncremental, computeMaxSteps as computeMaxStepsShared, createTextChannel, executeNativeToolCalls, extractTextFromParts, handleMaxStepsTermination, normalizeToolsForJsonSchemaProvider, pushModelResponseToHistory, sanitizeToolsForGemini, } from "./googleNativeGemini3.js";
27
27
  import { getModelId } from "./providerTypeUtils.js";
28
28
  // Import proper types for multimodal message handling
29
29
  // Keep-alive note: Node.js native fetch and undici (used by createProxyFetch)
@@ -777,272 +777,76 @@ export class GoogleVertexProvider extends BaseProvider {
777
777
  this.validateStreamOptions(options);
778
778
  }
779
779
  async executeStream(options, analysisSchema) {
780
- // Check if this is a Gemini 3 model with tools - use native SDK for thought_signature
781
- const gemini3CheckModelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
782
- // Structured output (analysisSchema, JSON format, or schema) is incompatible with tools on Gemini.
783
- // Compute once and reuse in both the native Gemini 3 gate and the streamText fallback path.
780
+ const modelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
781
+ const nativeGemini3Result = await this.maybeExecuteNativeGemini3ToolStream(options, analysisSchema, modelName);
782
+ if (nativeGemini3Result) {
783
+ return nativeGemini3Result;
784
+ }
785
+ return this.executeAISDKStream(options, analysisSchema, modelName);
786
+ }
787
+ async maybeExecuteNativeGemini3ToolStream(options, analysisSchema, modelName) {
784
788
  const wantsStructuredOutput = analysisSchema || options.output?.format === "json" || options.schema;
785
- // Check for tools from options AND from SDK (MCP tools)
786
- // Need to check early if we should route to native SDK
787
- const gemini3CheckShouldUseTools = !options.disableTools && this.supportsTools() && !wantsStructuredOutput;
789
+ const shouldUseTools = !options.disableTools && this.supportsTools() && !wantsStructuredOutput;
788
790
  const optionTools = options.tools || {};
789
- const sdkTools = gemini3CheckShouldUseTools ? await this.getAllTools() : {};
791
+ const sdkTools = shouldUseTools ? await this.getAllTools() : {};
790
792
  const combinedToolCount = Object.keys(optionTools).length + Object.keys(sdkTools).length;
791
- const hasTools = gemini3CheckShouldUseTools && combinedToolCount > 0;
792
- if (isGemini3Model(gemini3CheckModelName) && hasTools) {
793
- // Process CSV files before routing to native SDK (bypasses normal message builder)
794
- const processedOptions = await this.processCSVFilesForNativeSDK(options);
795
- // Merge SDK tools into options for native SDK path
796
- const mergedOptions = {
797
- ...processedOptions,
798
- tools: { ...sdkTools, ...optionTools },
799
- };
800
- logger.info("[GoogleVertex] Routing Gemini 3 to native SDK for tool calling", {
801
- model: gemini3CheckModelName,
802
- optionToolCount: Object.keys(optionTools).length,
803
- sdkToolCount: Object.keys(sdkTools).length,
804
- totalToolCount: combinedToolCount,
805
- });
806
- return this.executeNativeGemini3Stream(mergedOptions);
793
+ const hasTools = shouldUseTools && combinedToolCount > 0;
794
+ if (!isGemini3Model(modelName) || !hasTools) {
795
+ return null;
807
796
  }
808
- // Initialize stream execution tracking
797
+ const processedOptions = await this.processCSVFilesForNativeSDK(options);
798
+ const mergedOptions = {
799
+ ...processedOptions,
800
+ tools: { ...sdkTools, ...optionTools },
801
+ };
802
+ logger.info("[GoogleVertex] Routing Gemini 3 to native SDK for tool calling", {
803
+ model: modelName,
804
+ optionToolCount: Object.keys(optionTools).length,
805
+ sdkToolCount: Object.keys(sdkTools).length,
806
+ totalToolCount: combinedToolCount,
807
+ });
808
+ return this.executeNativeGemini3Stream(mergedOptions);
809
+ }
810
+ async executeAISDKStream(options, analysisSchema, modelName) {
809
811
  const functionTag = "GoogleVertexProvider.executeStream";
810
- let chunkCount = 0;
811
- // Setup timeout controller
812
- const timeout = this.getTimeout(options);
813
- const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
812
+ const tracking = {
813
+ chunkCount: 0,
814
+ collectedToolCalls: [],
815
+ collectedToolResults: [],
816
+ };
817
+ const timeoutController = createTimeoutController(this.getTimeout(options), this.providerName, "stream");
814
818
  try {
815
- // Validate stream options
816
819
  this.validateStreamOptionsOnly(options);
817
- // Build message array from options with multimodal support
818
- // Using protected helper from BaseProvider to eliminate code duplication
819
820
  const messages = await this.buildMessagesForStream(options);
820
- const model = await this.getAISDKModelWithMiddleware(options); // This is where network connection happens!
821
- // Get all available tools (direct + MCP + external + user-provided RAG tools) for streaming
822
- const shouldUseTools = !options.disableTools && this.supportsTools();
823
- const baseStreamTools = shouldUseTools ? await this.getAllTools() : {};
824
- const rawTools = shouldUseTools
825
- ? { ...baseStreamTools, ...(options.tools || {}) }
826
- : {};
827
- // Only sanitize for Gemini models (not Anthropic/Claude models routed through Vertex)
828
- const isAnthropic = isAnthropicModel(gemini3CheckModelName);
829
- let tools;
830
- if (Object.keys(rawTools).length > 0 && !isAnthropic) {
831
- const sanitized = sanitizeToolsForGemini(rawTools);
832
- if (sanitized.dropped.length > 0) {
833
- logger.warn(`[GoogleVertex] Dropped ${sanitized.dropped.length} incompatible tool(s): ${sanitized.dropped.join(", ")}`);
834
- }
835
- tools =
836
- Object.keys(sanitized.tools).length > 0 ? sanitized.tools : undefined;
837
- }
838
- else if (isAnthropic && Object.keys(rawTools).length > 0) {
839
- // Anthropic models don't need Gemini sanitization — pass tools through
840
- tools = rawTools;
841
- }
842
- else {
843
- tools = undefined;
844
- }
845
- logger.debug(`${functionTag}: Tools for streaming`, {
821
+ const model = await this.getAISDKModelWithMiddleware(options);
822
+ const { shouldUseTools, tools, isAnthropic } = await this.resolveAISDKStreamTools(options, modelName, functionTag);
823
+ const streamOptions = this.buildAISDKStreamOptions({
824
+ options,
825
+ analysisSchema,
826
+ functionTag,
827
+ modelName,
828
+ model,
829
+ messages,
830
+ tools,
846
831
  shouldUseTools,
847
- baseToolCount: Object.keys(baseStreamTools).length,
848
- externalToolCount: Object.keys(options.tools || {}).length,
849
- toolCount: Object.keys(tools ?? {}).length,
850
- toolNames: Object.keys(tools ?? {}),
851
- });
852
- // Model-specific maxTokens handling
853
- const modelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
854
- // Use cached model configuration to determine maxTokens handling for streaming performance
855
- // This avoids hardcoded model-specific logic and repeated config lookups
856
- const shouldSetMaxTokens = this.shouldSetMaxTokensCached(modelName);
857
- const maxTokens = shouldSetMaxTokens
858
- ? options.maxTokens // No default limit
859
- : undefined;
860
- const collectedToolCalls = [];
861
- const collectedToolResults = [];
862
- // Build complete stream options with proper typing
863
- let streamOptions = {
864
- model: model,
865
- messages: messages,
866
- temperature: options.temperature,
867
- ...(maxTokens && { maxTokens }),
868
- maxRetries: 0, // NL11: Disable AI SDK's invisible internal retries; we handle retries with OTel instrumentation
869
- ...(shouldUseTools &&
870
- tools &&
871
- Object.keys(tools).length > 0 && {
872
- tools,
873
- toolChoice: resolveToolChoice(options, tools, shouldUseTools),
874
- stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
875
- }),
876
- abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
877
- experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
878
- // Gemini 3: use thinkingLevel via providerOptions (Vertex AI)
879
- // Gemini 2.5: use thinkingBudget via providerOptions
880
- ...(options.thinkingConfig?.enabled && {
881
- providerOptions: {
882
- vertex: {
883
- thinkingConfig: {
884
- ...(options.thinkingConfig.thinkingLevel && {
885
- thinkingLevel: options.thinkingConfig.thinkingLevel,
886
- }),
887
- ...(options.thinkingConfig.budgetTokens &&
888
- !options.thinkingConfig.thinkingLevel && {
889
- thinkingBudget: options.thinkingConfig.budgetTokens,
890
- }),
891
- includeThoughts: true,
892
- },
893
- },
894
- },
895
- }),
896
- onError: (event) => {
897
- const error = event.error;
898
- const errorMessage = error instanceof Error ? error.message : String(error);
899
- logger.error(`${functionTag}: Stream error`, {
900
- provider: this.providerName,
901
- modelName: this.modelName,
902
- error: errorMessage,
903
- chunkCount,
904
- });
905
- },
906
- onFinish: (event) => {
907
- logger.debug(`${functionTag}: Stream finished`, {
908
- finishReason: event.finishReason,
909
- totalChunks: chunkCount,
910
- });
911
- },
912
- onChunk: () => {
913
- chunkCount++;
914
- },
915
- onStepFinish: ({ toolCalls, toolResults }) => {
916
- logger.info("Tool execution completed", { toolResults, toolCalls });
917
- for (const toolCall of toolCalls) {
918
- collectedToolCalls.push({
919
- toolCallId: toolCall.toolCallId,
920
- toolName: toolCall.toolName,
921
- args: toolCall.args ??
922
- toolCall.input ??
923
- toolCall
924
- .parameters ??
925
- {},
926
- });
927
- }
928
- for (const toolResult of toolResults) {
929
- const rawToolResult = toolResult;
930
- collectedToolResults.push({
931
- toolName: toolResult.toolName,
932
- status: rawToolResult.error ? "failure" : "success",
933
- output: (rawToolResult.output ??
934
- rawToolResult.result) ?? undefined,
935
- error: rawToolResult.error,
936
- id: rawToolResult.toolCallId ?? toolResult.toolName,
937
- });
938
- }
939
- // Handle tool execution storage
940
- this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
941
- logger.warn("[GoogleVertexProvider] Failed to store tool executions", {
942
- provider: this.providerName,
943
- error: error instanceof Error ? error.message : String(error),
944
- });
945
- });
946
- },
947
- };
948
- if (analysisSchema) {
949
- try {
950
- // Gemini cannot use tools and JSON schema simultaneously
951
- if (!isAnthropic) {
952
- delete streamOptions.tools;
953
- delete streamOptions.toolChoice;
954
- delete streamOptions.stopWhen;
955
- }
956
- streamOptions = {
957
- ...streamOptions,
958
- experimental_output: Output.object({
959
- schema: analysisSchema,
960
- }),
961
- };
962
- }
963
- catch (error) {
964
- logger.warn("Schema application failed, continuing without schema", {
965
- error: String(error),
966
- });
967
- }
968
- }
969
- // Wrap streamText in an OTel span to capture provider-level latency and token usage
970
- const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
971
- kind: SpanKind.CLIENT,
972
- attributes: {
973
- "gen_ai.system": "vertex",
974
- "gen_ai.request.model": getModelId(model, this.modelName || "unknown"),
975
- },
832
+ isAnthropic,
833
+ timeoutController,
834
+ tracking,
976
835
  });
977
- let result;
978
- try {
979
- result = streamText(streamOptions);
980
- }
981
- catch (err) {
982
- streamSpan.recordException(err instanceof Error ? err : new Error(String(err)));
983
- streamSpan.setStatus({
984
- code: SpanStatusCode.ERROR,
985
- message: err instanceof Error ? err.message : String(err),
986
- });
987
- streamSpan.end();
988
- throw err;
989
- }
990
- // Collect token usage and finish reason asynchronously when the stream completes,
991
- // then end the span. This avoids blocking the stream consumer.
992
- Promise.resolve(result.usage)
993
- .then((usage) => {
994
- streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.inputTokens || 0);
995
- streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.outputTokens || 0);
996
- const effectiveModel = options.model ||
997
- getModelId(model, this.modelName || getDefaultVertexModel());
998
- const cost = calculateCost(this.providerName, effectiveModel, {
999
- input: usage.inputTokens || 0,
1000
- output: usage.outputTokens || 0,
1001
- total: (usage.inputTokens || 0) + (usage.outputTokens || 0),
1002
- });
1003
- if (cost && cost > 0) {
1004
- streamSpan.setAttribute("neurolink.cost", cost);
1005
- }
1006
- })
1007
- .catch(() => {
1008
- // Usage may not be available if the stream is aborted
1009
- });
1010
- Promise.resolve(result.finishReason)
1011
- .then((reason) => {
1012
- streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
1013
- })
1014
- .catch(() => {
1015
- // Finish reason may not be available if the stream is aborted
1016
- });
1017
- Promise.resolve(result.text)
1018
- .then(() => {
1019
- streamSpan.end();
1020
- })
1021
- .catch((err) => {
1022
- streamSpan.setStatus({
1023
- code: SpanStatusCode.ERROR,
1024
- message: err instanceof Error ? err.message : String(err),
1025
- });
1026
- streamSpan.end();
836
+ const result = this.startObservedAISDKStream(streamOptions, model, modelName, options);
837
+ this.observeAISDKStreamResult(result, {
838
+ model,
839
+ modelName,
840
+ options,
841
+ timeoutController,
1027
842
  });
1028
- // Defer timeout cleanup until the stream completes or errors.
1029
- // Guard against NoOutputGeneratedError becoming an unhandled rejection.
1030
- Promise.resolve(result.text)
1031
- .catch((err) => {
1032
- logger.debug("Stream text promise rejected (expected for empty streams)", {
1033
- error: err instanceof Error ? err.message : String(err),
1034
- });
1035
- })
1036
- .finally(() => timeoutController?.cleanup());
1037
- // Transform string stream to content object stream using BaseProvider method
1038
- const transformedStream = this.createTextStream(result);
1039
843
  return {
1040
- stream: transformedStream,
844
+ stream: this.createTextStream(result),
1041
845
  provider: this.providerName,
1042
846
  model: this.modelName,
1043
847
  ...(shouldUseTools && {
1044
- toolCalls: collectedToolCalls,
1045
- toolResults: collectedToolResults,
848
+ toolCalls: tracking.collectedToolCalls,
849
+ toolResults: tracking.collectedToolResults,
1046
850
  }),
1047
851
  };
1048
852
  }
@@ -1052,11 +856,230 @@ export class GoogleVertexProvider extends BaseProvider {
1052
856
  provider: this.providerName,
1053
857
  modelName: this.modelName,
1054
858
  error: String(error),
1055
- chunkCount,
859
+ chunkCount: tracking.chunkCount,
1056
860
  });
1057
861
  throw this.handleProviderError(error);
1058
862
  }
1059
863
  }
864
+ async resolveAISDKStreamTools(options, modelName, functionTag) {
865
+ const shouldUseTools = !options.disableTools && this.supportsTools();
866
+ const baseStreamTools = shouldUseTools ? await this.getAllTools() : {};
867
+ const rawTools = shouldUseTools
868
+ ? { ...baseStreamTools, ...(options.tools || {}) }
869
+ : {};
870
+ const isAnthropic = isAnthropicModel(modelName);
871
+ let tools;
872
+ if (Object.keys(rawTools).length > 0 && !isAnthropic) {
873
+ const sanitized = sanitizeToolsForGemini(rawTools);
874
+ if (sanitized.dropped.length > 0) {
875
+ logger.warn(`[GoogleVertex] Dropped ${sanitized.dropped.length} incompatible tool(s): ${sanitized.dropped.join(", ")}`);
876
+ }
877
+ tools =
878
+ Object.keys(sanitized.tools).length > 0 ? sanitized.tools : undefined;
879
+ }
880
+ else if (isAnthropic && Object.keys(rawTools).length > 0) {
881
+ const normalized = normalizeToolsForJsonSchemaProvider(rawTools);
882
+ if (normalized.normalized.length > 0) {
883
+ logger.debug("[GoogleVertex] Normalized Anthropic tool schema(s)", {
884
+ toolCount: normalized.normalized.length,
885
+ toolNames: normalized.normalized,
886
+ });
887
+ }
888
+ tools =
889
+ Object.keys(normalized.tools).length > 0 ? normalized.tools : undefined;
890
+ }
891
+ else {
892
+ tools = undefined;
893
+ }
894
+ logger.debug(`${functionTag}: Tools for streaming`, {
895
+ shouldUseTools,
896
+ baseToolCount: Object.keys(baseStreamTools).length,
897
+ externalToolCount: Object.keys(options.tools || {}).length,
898
+ toolCount: Object.keys(tools ?? {}).length,
899
+ toolNames: Object.keys(tools ?? {}),
900
+ });
901
+ return {
902
+ shouldUseTools,
903
+ tools,
904
+ isAnthropic,
905
+ baseToolCount: Object.keys(baseStreamTools).length,
906
+ };
907
+ }
908
+ buildAISDKStreamOptions(params) {
909
+ const { options, analysisSchema, functionTag, modelName, model, messages, tools, shouldUseTools, isAnthropic, timeoutController, tracking, } = params;
910
+ const shouldSetMaxTokens = this.shouldSetMaxTokensCached(modelName);
911
+ const maxTokens = shouldSetMaxTokens ? options.maxTokens : undefined;
912
+ let streamOptions = {
913
+ model,
914
+ messages,
915
+ temperature: options.temperature,
916
+ ...(maxTokens && { maxTokens }),
917
+ maxRetries: 0,
918
+ ...(shouldUseTools &&
919
+ tools &&
920
+ Object.keys(tools).length > 0 && {
921
+ tools,
922
+ toolChoice: resolveToolChoice(options, tools, shouldUseTools),
923
+ stopWhen: stepCountIs(options.maxSteps || DEFAULT_MAX_STEPS),
924
+ }),
925
+ abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
926
+ experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
927
+ ...(options.thinkingConfig?.enabled && {
928
+ providerOptions: {
929
+ vertex: {
930
+ thinkingConfig: {
931
+ ...(options.thinkingConfig.thinkingLevel && {
932
+ thinkingLevel: options.thinkingConfig.thinkingLevel,
933
+ }),
934
+ ...(options.thinkingConfig.budgetTokens &&
935
+ !options.thinkingConfig.thinkingLevel && {
936
+ thinkingBudget: options.thinkingConfig.budgetTokens,
937
+ }),
938
+ includeThoughts: true,
939
+ },
940
+ },
941
+ },
942
+ }),
943
+ onError: (event) => {
944
+ const errorMessage = event.error instanceof Error
945
+ ? event.error.message
946
+ : String(event.error);
947
+ logger.error(`${functionTag}: Stream error`, {
948
+ provider: this.providerName,
949
+ modelName: this.modelName,
950
+ error: errorMessage,
951
+ chunkCount: tracking.chunkCount,
952
+ });
953
+ },
954
+ onFinish: (event) => {
955
+ logger.debug(`${functionTag}: Stream finished`, {
956
+ finishReason: event.finishReason,
957
+ totalChunks: tracking.chunkCount,
958
+ });
959
+ },
960
+ onChunk: () => {
961
+ tracking.chunkCount++;
962
+ },
963
+ onStepFinish: ({ toolCalls, toolResults }) => {
964
+ this.captureAISDKStreamToolStep(options, toolCalls, toolResults, tracking);
965
+ },
966
+ };
967
+ if (!analysisSchema) {
968
+ return streamOptions;
969
+ }
970
+ try {
971
+ if (!isAnthropic) {
972
+ delete streamOptions.tools;
973
+ delete streamOptions.toolChoice;
974
+ delete streamOptions.stopWhen;
975
+ }
976
+ streamOptions = {
977
+ ...streamOptions,
978
+ experimental_output: Output.object({ schema: analysisSchema }),
979
+ };
980
+ }
981
+ catch (error) {
982
+ logger.warn("Schema application failed, continuing without schema", {
983
+ error: String(error),
984
+ });
985
+ }
986
+ return streamOptions;
987
+ }
988
+ captureAISDKStreamToolStep(options, toolCalls, toolResults, tracking) {
989
+ logger.info("Tool execution completed", { toolResults, toolCalls });
990
+ for (const toolCall of toolCalls) {
991
+ tracking.collectedToolCalls.push({
992
+ toolCallId: toolCall.toolCallId,
993
+ toolName: toolCall.toolName,
994
+ args: toolCall.args ?? toolCall.input ?? toolCall.parameters ?? {},
995
+ });
996
+ }
997
+ for (const toolResult of toolResults) {
998
+ tracking.collectedToolResults.push({
999
+ toolName: toolResult.toolName,
1000
+ status: toolResult.error ? "failure" : "success",
1001
+ output: (toolResult.output ?? toolResult.result) ??
1002
+ undefined,
1003
+ error: toolResult.error,
1004
+ id: toolResult.toolCallId ?? toolResult.toolName,
1005
+ });
1006
+ }
1007
+ this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
1008
+ logger.warn("[GoogleVertexProvider] Failed to store tool executions", {
1009
+ provider: this.providerName,
1010
+ error: error instanceof Error ? error.message : String(error),
1011
+ });
1012
+ });
1013
+ }
1014
+ startObservedAISDKStream(streamOptions, model, modelName, options) {
1015
+ const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
1016
+ kind: SpanKind.CLIENT,
1017
+ attributes: {
1018
+ "gen_ai.system": "vertex",
1019
+ "gen_ai.request.model": getModelId(model, this.modelName || "unknown"),
1020
+ },
1021
+ });
1022
+ try {
1023
+ const result = streamText(streamOptions);
1024
+ this.attachAISDKStreamObservers(result, streamSpan, model, modelName, options);
1025
+ return result;
1026
+ }
1027
+ catch (error) {
1028
+ streamSpan.recordException(error instanceof Error ? error : new Error(String(error)));
1029
+ streamSpan.setStatus({
1030
+ code: SpanStatusCode.ERROR,
1031
+ message: error instanceof Error ? error.message : String(error),
1032
+ });
1033
+ streamSpan.end();
1034
+ throw error;
1035
+ }
1036
+ }
1037
+ attachAISDKStreamObservers(result, streamSpan, model, modelName, options) {
1038
+ Promise.resolve(result.usage)
1039
+ .then((usage) => {
1040
+ streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.inputTokens || 0);
1041
+ streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.outputTokens || 0);
1042
+ const effectiveModel = options.model ||
1043
+ getModelId(model, modelName || getDefaultVertexModel());
1044
+ const cost = calculateCost(this.providerName, effectiveModel, {
1045
+ input: usage.inputTokens || 0,
1046
+ output: usage.outputTokens || 0,
1047
+ total: (usage.inputTokens || 0) + (usage.outputTokens || 0),
1048
+ });
1049
+ if (cost && cost > 0) {
1050
+ streamSpan.setAttribute("neurolink.cost", cost);
1051
+ }
1052
+ })
1053
+ .catch(() => undefined);
1054
+ Promise.resolve(result.finishReason)
1055
+ .then((reason) => {
1056
+ streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
1057
+ })
1058
+ .catch(() => undefined);
1059
+ Promise.resolve(result.text)
1060
+ .then(() => {
1061
+ streamSpan.end();
1062
+ })
1063
+ .catch((error) => {
1064
+ streamSpan.setStatus({
1065
+ code: SpanStatusCode.ERROR,
1066
+ message: error instanceof Error ? error.message : String(error),
1067
+ });
1068
+ streamSpan.end();
1069
+ });
1070
+ }
1071
+ observeAISDKStreamResult(result, params) {
1072
+ void params.model;
1073
+ void params.modelName;
1074
+ void params.options;
1075
+ Promise.resolve(result.text)
1076
+ .catch((error) => {
1077
+ logger.debug("Stream text promise rejected (expected for empty streams)", {
1078
+ error: error instanceof Error ? error.message : String(error),
1079
+ });
1080
+ })
1081
+ .finally(() => params.timeoutController?.cleanup());
1082
+ }
1060
1083
  /**
1061
1084
  * Create @google/genai client configured for Vertex AI
1062
1085
  */
@@ -1211,210 +1234,187 @@ export class GoogleVertexProvider extends BaseProvider {
1211
1234
  [ATTR.GEN_AI_OPERATION]: "stream",
1212
1235
  [ATTR.NL_PROVIDER]: this.providerName,
1213
1236
  },
1214
- }, async (span) => {
1215
- const client = await this.createVertexGenAIClient(options.region);
1216
- const effectiveLocation = options.region || this.location || getVertexLocation();
1217
- logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3", {
1218
- model: modelName,
1219
- hasTools: !!options.tools && Object.keys(options.tools).length > 0,
1220
- project: this.projectId,
1221
- location: effectiveLocation,
1237
+ }, (span) => this.executeNativeGemini3StreamWithSpan(options, modelName, span));
1238
+ }
1239
+ async executeNativeGemini3StreamWithSpan(options, modelName, span) {
1240
+ const client = await this.createVertexGenAIClient(options.region);
1241
+ const effectiveLocation = options.region || this.location || getVertexLocation();
1242
+ logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3", {
1243
+ model: modelName,
1244
+ hasTools: !!options.tools && Object.keys(options.tools).length > 0,
1245
+ project: this.projectId,
1246
+ location: effectiveLocation,
1247
+ });
1248
+ const multimodalInput = options.input;
1249
+ const contents = this.buildNativeContentParts(options.input.text, multimodalInput, "native stream");
1250
+ let hasToolsInput = !!options.tools &&
1251
+ Object.keys(options.tools).length > 0 &&
1252
+ !options.disableTools;
1253
+ const streamOptions = options;
1254
+ const wantsJsonOutput = streamOptions.output?.format === "json" || streamOptions.schema;
1255
+ if (wantsJsonOutput && hasToolsInput) {
1256
+ logger.warn("[GoogleVertex] Gemini does not support tools and JSON schema output simultaneously. Disabling tools for this request.");
1257
+ hasToolsInput = false;
1258
+ }
1259
+ let toolsConfig;
1260
+ let executeMap = new Map();
1261
+ if (hasToolsInput) {
1262
+ const toolDeclarationResult = buildNativeToolDeclarations(options.tools);
1263
+ toolsConfig = toolDeclarationResult.toolsConfig;
1264
+ executeMap = toolDeclarationResult.executeMap;
1265
+ logger.debug("[GoogleVertex] Converted tools for native SDK", {
1266
+ toolCount: toolsConfig[0].functionDeclarations.length,
1267
+ toolNames: toolsConfig[0].functionDeclarations.map((tool) => tool.name),
1222
1268
  });
1223
- // Build contents from input with multimodal support
1224
- const multimodalInput = options.input;
1225
- const contents = this.buildNativeContentParts(options.input.text, multimodalInput, "native stream");
1226
- // Convert tools to native format
1227
- let hasToolsInput = options.tools &&
1228
- Object.keys(options.tools).length > 0 &&
1229
- !options.disableTools;
1230
- // Guard: Gemini cannot use tools + JSON schema simultaneously
1231
- const streamOptions = options;
1232
- const wantsJsonOutput = streamOptions.output?.format === "json" || streamOptions.schema;
1233
- if (wantsJsonOutput && hasToolsInput) {
1234
- logger.warn("[GoogleVertex] Gemini does not support tools and JSON schema output simultaneously. Disabling tools for this request.");
1235
- hasToolsInput = false;
1236
- }
1237
- let toolsConfig;
1238
- let executeMap = new Map();
1239
- if (hasToolsInput) {
1240
- const result = buildNativeToolDeclarations(options.tools);
1241
- toolsConfig = result.toolsConfig;
1242
- executeMap = result.executeMap;
1243
- logger.debug("[GoogleVertex] Converted tools for native SDK", {
1244
- toolCount: toolsConfig[0].functionDeclarations.length,
1245
- toolNames: toolsConfig[0].functionDeclarations.map((t) => t.name),
1269
+ }
1270
+ const config = buildNativeConfig(options, toolsConfig);
1271
+ if (wantsJsonOutput) {
1272
+ config.responseMimeType = "application/json";
1273
+ if (streamOptions.schema) {
1274
+ const rawSchema = convertZodToJsonSchema(streamOptions.schema);
1275
+ const inlinedSchema = inlineJsonSchema(rawSchema);
1276
+ if (inlinedSchema.$schema) {
1277
+ delete inlinedSchema.$schema;
1278
+ }
1279
+ config.responseSchema = inlinedSchema;
1280
+ logger.debug("[GoogleVertex] Added responseSchema for JSON output (stream)", {
1281
+ schemaKeys: Object.keys(inlinedSchema),
1246
1282
  });
1247
1283
  }
1248
- // Build config — systemInstruction stays in config for Gemini 3.x.
1249
- // The @google/genai SDK maps config.systemInstruction to the HTTP-level
1250
- // system_instruction field, which is the correct mechanism for all
1251
- // Gemini 3.x models (including global endpoint). Older workaround
1252
- // that moved systemInstruction into user/model content messages caused
1253
- // "Please use a valid role: user, model" on Gemini 3.1+ preview models.
1254
- const config = buildNativeConfig(options, toolsConfig);
1255
- // Add JSON output format support for native SDK stream
1256
- if (streamOptions.output?.format === "json" || streamOptions.schema) {
1257
- config.responseMimeType = "application/json";
1258
- if (streamOptions.schema) {
1259
- const rawSchema = convertZodToJsonSchema(streamOptions.schema);
1260
- const inlinedSchema = inlineJsonSchema(rawSchema);
1261
- if (inlinedSchema.$schema) {
1262
- delete inlinedSchema.$schema;
1263
- }
1264
- config.responseSchema = inlinedSchema;
1265
- logger.debug("[GoogleVertex] Added responseSchema for JSON output (stream)", {
1266
- schemaKeys: Object.keys(inlinedSchema),
1267
- });
1284
+ }
1285
+ const startTime = Date.now();
1286
+ const timeoutController = createTimeoutController(this.getTimeout(options), this.providerName, "stream");
1287
+ const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
1288
+ const maxSteps = computeMaxStepsShared(options.maxSteps);
1289
+ const currentContents = this.prependConversationHistory([...contents], options.conversationMessages);
1290
+ const channel = createTextChannel();
1291
+ const allToolCalls = [];
1292
+ const metadata = {
1293
+ streamId: `native-vertex-${Date.now()}`,
1294
+ startTime,
1295
+ responseTime: 0,
1296
+ totalToolExecutions: 0,
1297
+ };
1298
+ let analyticsResolve;
1299
+ let analyticsReject;
1300
+ const analyticsPromise = new Promise((resolve, reject) => {
1301
+ analyticsResolve = resolve;
1302
+ analyticsReject = reject;
1303
+ });
1304
+ const loopPromise = this.runNativeGemini3StreamLoop({
1305
+ client,
1306
+ modelName,
1307
+ span,
1308
+ config,
1309
+ currentContents,
1310
+ executeMap,
1311
+ channel,
1312
+ allToolCalls,
1313
+ metadata,
1314
+ analyticsResolve,
1315
+ analyticsReject,
1316
+ startTime,
1317
+ timeoutController,
1318
+ composedSignal,
1319
+ maxSteps,
1320
+ });
1321
+ loopPromise.catch(() => undefined);
1322
+ return {
1323
+ stream: channel.iterable,
1324
+ provider: this.providerName,
1325
+ model: modelName,
1326
+ toolCalls: allToolCalls,
1327
+ analytics: analyticsPromise,
1328
+ metadata,
1329
+ };
1330
+ }
1331
+ async runNativeGemini3StreamLoop(params) {
1332
+ let lastStepText = "";
1333
+ let totalInputTokens = 0;
1334
+ let totalOutputTokens = 0;
1335
+ let step = 0;
1336
+ let completedWithFinalAnswer = false;
1337
+ const failedTools = new Map();
1338
+ try {
1339
+ while (step < params.maxSteps) {
1340
+ if (params.composedSignal?.aborted) {
1341
+ throw params.composedSignal.reason instanceof Error
1342
+ ? params.composedSignal.reason
1343
+ : new Error("Request aborted");
1268
1344
  }
1269
- }
1270
- const startTime = Date.now();
1271
- const timeout = this.getTimeout(options);
1272
- const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
1273
- const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
1274
- const maxSteps = computeMaxStepsShared(options.maxSteps);
1275
- // Inject conversation history so the native path has multi-turn context
1276
- const currentContents = this.prependConversationHistory([...contents], options.conversationMessages);
1277
- // Create a push-based text channel so the caller receives tokens as
1278
- // they arrive from the network rather than after full buffering.
1279
- const channel = createTextChannel();
1280
- // Shared mutable state updated by the background agentic loop.
1281
- const allToolCalls = [];
1282
- // Shared metadata object mutated by the background loop so that
1283
- // responseTime and totalToolExecutions reflect final values.
1284
- const metadata = {
1285
- streamId: `native-vertex-${Date.now()}`,
1286
- startTime,
1287
- responseTime: 0,
1288
- totalToolExecutions: 0,
1289
- };
1290
- // analyticsResolvers lets the background loop settle the analytics
1291
- // promise once token counts are known (after the loop completes).
1292
- let analyticsResolve;
1293
- let analyticsReject;
1294
- const analyticsPromise = new Promise((res, rej) => {
1295
- analyticsResolve = res;
1296
- analyticsReject = rej;
1297
- });
1298
- // Run the agentic loop in the background without awaiting it here,
1299
- // so we can return the StreamResult (with channel.iterable) immediately.
1300
- const loopPromise = (async () => {
1301
- let lastStepText = "";
1302
- let totalInputTokens = 0;
1303
- let totalOutputTokens = 0;
1304
- let step = 0;
1305
- let completedWithFinalAnswer = false;
1306
- const failedTools = new Map();
1345
+ step++;
1346
+ logger.debug(`[GoogleVertex] Native SDK step ${step}/${params.maxSteps}`);
1307
1347
  try {
1308
- // Agentic loop for tool calling
1309
- while (step < maxSteps) {
1310
- if (composedSignal?.aborted) {
1311
- throw composedSignal.reason instanceof Error
1312
- ? composedSignal.reason
1313
- : new Error("Request aborted");
1314
- }
1315
- step++;
1316
- logger.debug(`[GoogleVertex] Native SDK step ${step}/${maxSteps}`);
1317
- try {
1318
- const rawStream = await client.models.generateContentStream({
1319
- model: modelName,
1320
- contents: currentContents,
1321
- config,
1322
- ...(composedSignal
1323
- ? { httpOptions: { signal: composedSignal } }
1324
- : {}),
1325
- });
1326
- // For every step, use incremental collection so text parts
1327
- // are pushed to the channel as they arrive. For intermediate
1328
- // steps (those that produce function calls) we still need the
1329
- // complete rawResponseParts for pushModelResponseToHistory,
1330
- // which collectStreamChunksIncremental provides at stream end.
1331
- const chunkResult = await collectStreamChunksIncremental(rawStream, channel);
1332
- totalInputTokens += chunkResult.inputTokens;
1333
- totalOutputTokens += chunkResult.outputTokens;
1334
- const stepText = extractTextFromParts(chunkResult.rawResponseParts);
1335
- // If no function calls, this was the final step — channel
1336
- // already received all text parts incrementally.
1337
- if (chunkResult.stepFunctionCalls.length === 0) {
1338
- completedWithFinalAnswer = true;
1339
- break;
1340
- }
1341
- lastStepText = stepText;
1342
- // Record tool call events on the span
1343
- for (const fc of chunkResult.stepFunctionCalls) {
1344
- span.addEvent("gen_ai.tool_call", {
1345
- "tool.name": fc.name,
1346
- "tool.step": step,
1347
- });
1348
- }
1349
- logger.debug(`[GoogleVertex] Executing ${chunkResult.stepFunctionCalls.length} function calls`);
1350
- pushModelResponseToHistory(currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
1351
- const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, executeMap, failedTools, allToolCalls, { abortSignal: composedSignal });
1352
- // Function/tool responses must use role: "user" — the
1353
- // @google/genai SDK's validateHistory() only accepts "user"
1354
- // and "model" roles (matching automaticFunctionCalling).
1355
- currentContents.push({
1356
- role: "user",
1357
- parts: functionResponses,
1358
- });
1359
- }
1360
- catch (error) {
1361
- logger.error("[GoogleVertex] Native SDK error", error);
1362
- throw this.handleProviderError(error);
1363
- }
1348
+ const rawStream = await params.client.models.generateContentStream({
1349
+ model: params.modelName,
1350
+ contents: params.currentContents,
1351
+ config: params.config,
1352
+ ...(params.composedSignal
1353
+ ? { httpOptions: { signal: params.composedSignal } }
1354
+ : {}),
1355
+ });
1356
+ const chunkResult = await collectStreamChunksIncremental(rawStream, params.channel);
1357
+ totalInputTokens += chunkResult.inputTokens;
1358
+ totalOutputTokens += chunkResult.outputTokens;
1359
+ const stepText = extractTextFromParts(chunkResult.rawResponseParts);
1360
+ if (chunkResult.stepFunctionCalls.length === 0) {
1361
+ completedWithFinalAnswer = true;
1362
+ break;
1364
1363
  }
1365
- // Handle max-steps termination: if the model was still calling
1366
- // tools when we hit the limit, push a synthetic final message.
1367
- if (step >= maxSteps && !completedWithFinalAnswer) {
1368
- const fallback = handleMaxStepsTermination("[GoogleVertex]", step, maxSteps, "", // finalText is empty — model didn't stop on its own
1369
- lastStepText);
1370
- if (fallback) {
1371
- channel.push(fallback);
1372
- }
1364
+ lastStepText = stepText;
1365
+ for (const functionCall of chunkResult.stepFunctionCalls) {
1366
+ params.span.addEvent("gen_ai.tool_call", {
1367
+ "tool.name": functionCall.name,
1368
+ "tool.step": step,
1369
+ });
1373
1370
  }
1374
- const responseTime = Date.now() - startTime;
1375
- // Propagate final values to the shared metadata object so that
1376
- // the already-returned StreamResult reflects accurate telemetry.
1377
- metadata.responseTime = responseTime;
1378
- metadata.totalToolExecutions = allToolCalls.length;
1379
- // Set token usage and finish reason on the span
1380
- span.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, totalInputTokens);
1381
- span.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, totalOutputTokens);
1382
- span.setAttribute(ATTR.GEN_AI_FINISH_REASON, step >= maxSteps && !completedWithFinalAnswer
1383
- ? "max_steps"
1384
- : "stop");
1385
- analyticsResolve({
1386
- provider: this.providerName,
1387
- model: modelName,
1388
- tokenUsage: {
1389
- input: totalInputTokens,
1390
- output: totalOutputTokens,
1391
- total: totalInputTokens + totalOutputTokens,
1392
- },
1393
- requestDuration: responseTime,
1394
- timestamp: new Date().toISOString(),
1371
+ logger.debug(`[GoogleVertex] Executing ${chunkResult.stepFunctionCalls.length} function calls`);
1372
+ pushModelResponseToHistory(params.currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
1373
+ const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, params.executeMap, failedTools, params.allToolCalls, { abortSignal: params.composedSignal });
1374
+ params.currentContents.push({
1375
+ role: "user",
1376
+ parts: functionResponses,
1395
1377
  });
1396
- channel.close();
1397
1378
  }
1398
- catch (err) {
1399
- channel.error(err);
1400
- analyticsReject(err);
1379
+ catch (error) {
1380
+ logger.error("[GoogleVertex] Native SDK error", error);
1381
+ throw this.handleProviderError(error);
1401
1382
  }
1402
- finally {
1403
- timeoutController?.cleanup();
1383
+ }
1384
+ if (step >= params.maxSteps && !completedWithFinalAnswer) {
1385
+ const fallback = handleMaxStepsTermination("[GoogleVertex]", step, params.maxSteps, "", lastStepText);
1386
+ if (fallback) {
1387
+ params.channel.push(fallback);
1404
1388
  }
1405
- })();
1406
- // Suppress unhandled-rejection warnings on loopPromise — errors are
1407
- // forwarded to the channel and will surface when the caller iterates.
1408
- loopPromise.catch(() => undefined);
1409
- return {
1410
- stream: channel.iterable,
1389
+ }
1390
+ const responseTime = Date.now() - params.startTime;
1391
+ params.metadata.responseTime = responseTime;
1392
+ params.metadata.totalToolExecutions = params.allToolCalls.length;
1393
+ params.span.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, totalInputTokens);
1394
+ params.span.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, totalOutputTokens);
1395
+ params.span.setAttribute(ATTR.GEN_AI_FINISH_REASON, step >= params.maxSteps && !completedWithFinalAnswer
1396
+ ? "max_steps"
1397
+ : "stop");
1398
+ params.analyticsResolve({
1411
1399
  provider: this.providerName,
1412
- model: modelName,
1413
- toolCalls: allToolCalls,
1414
- analytics: analyticsPromise,
1415
- metadata,
1416
- };
1417
- });
1400
+ model: params.modelName,
1401
+ tokenUsage: {
1402
+ input: totalInputTokens,
1403
+ output: totalOutputTokens,
1404
+ total: totalInputTokens + totalOutputTokens,
1405
+ },
1406
+ requestDuration: responseTime,
1407
+ timestamp: new Date().toISOString(),
1408
+ });
1409
+ params.channel.close();
1410
+ }
1411
+ catch (error) {
1412
+ params.channel.error(error);
1413
+ params.analyticsReject(error);
1414
+ }
1415
+ finally {
1416
+ params.timeoutController?.cleanup();
1417
+ }
1418
1418
  }
1419
1419
  /**
1420
1420
  * Execute generate using native @google/genai SDK for Gemini 3 models on Vertex AI