@juspay/neurolink 9.6.0 → 9.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/adapters/video/vertexVideoHandler.js +3 -3
  3. package/dist/cli/loop/optionsSchema.d.ts +1 -1
  4. package/dist/cli/loop/optionsSchema.js +4 -0
  5. package/dist/core/analytics.js +11 -4
  6. package/dist/core/baseProvider.d.ts +6 -0
  7. package/dist/core/baseProvider.js +83 -14
  8. package/dist/core/conversationMemoryManager.d.ts +13 -0
  9. package/dist/core/conversationMemoryManager.js +28 -0
  10. package/dist/core/dynamicModels.js +3 -2
  11. package/dist/core/modules/GenerationHandler.js +2 -0
  12. package/dist/core/redisConversationMemoryManager.d.ts +11 -0
  13. package/dist/core/redisConversationMemoryManager.js +26 -9
  14. package/dist/index.d.ts +4 -0
  15. package/dist/index.js +5 -0
  16. package/dist/lib/adapters/video/vertexVideoHandler.js +3 -3
  17. package/dist/lib/core/analytics.js +11 -4
  18. package/dist/lib/core/baseProvider.d.ts +6 -0
  19. package/dist/lib/core/baseProvider.js +83 -14
  20. package/dist/lib/core/conversationMemoryManager.d.ts +13 -0
  21. package/dist/lib/core/conversationMemoryManager.js +28 -0
  22. package/dist/lib/core/dynamicModels.js +3 -2
  23. package/dist/lib/core/modules/GenerationHandler.js +2 -0
  24. package/dist/lib/core/redisConversationMemoryManager.d.ts +11 -0
  25. package/dist/lib/core/redisConversationMemoryManager.js +26 -9
  26. package/dist/lib/index.d.ts +4 -0
  27. package/dist/lib/index.js +5 -0
  28. package/dist/lib/mcp/httpRetryHandler.js +6 -2
  29. package/dist/lib/neurolink.d.ts +5 -0
  30. package/dist/lib/neurolink.js +160 -10
  31. package/dist/lib/processors/base/BaseFileProcessor.js +2 -1
  32. package/dist/lib/processors/errors/errorHelpers.js +12 -4
  33. package/dist/lib/providers/amazonBedrock.js +2 -1
  34. package/dist/lib/providers/anthropic.js +2 -2
  35. package/dist/lib/providers/anthropicBaseProvider.js +10 -4
  36. package/dist/lib/providers/azureOpenai.js +14 -25
  37. package/dist/lib/providers/googleAiStudio.d.ts +0 -34
  38. package/dist/lib/providers/googleAiStudio.js +124 -315
  39. package/dist/lib/providers/googleNativeGemini3.d.ts +119 -0
  40. package/dist/lib/providers/googleNativeGemini3.js +264 -0
  41. package/dist/lib/providers/googleVertex.d.ts +0 -40
  42. package/dist/lib/providers/googleVertex.js +150 -317
  43. package/dist/lib/providers/huggingFace.js +20 -5
  44. package/dist/lib/providers/litellm.js +6 -4
  45. package/dist/lib/providers/mistral.js +3 -2
  46. package/dist/lib/providers/openAI.js +2 -2
  47. package/dist/lib/providers/openRouter.js +8 -7
  48. package/dist/lib/providers/openaiCompatible.js +10 -4
  49. package/dist/lib/rag/resilience/RetryHandler.js +6 -2
  50. package/dist/lib/services/server/ai/observability/instrumentation.d.ts +24 -2
  51. package/dist/lib/services/server/ai/observability/instrumentation.js +12 -1
  52. package/dist/lib/types/generateTypes.d.ts +28 -0
  53. package/dist/lib/types/ragTypes.d.ts +9 -1
  54. package/dist/lib/types/streamTypes.d.ts +13 -0
  55. package/dist/lib/utils/conversationMemory.js +15 -0
  56. package/dist/lib/utils/errorHandling.d.ts +5 -0
  57. package/dist/lib/utils/errorHandling.js +19 -0
  58. package/dist/lib/utils/pricing.d.ts +12 -0
  59. package/dist/lib/utils/pricing.js +134 -0
  60. package/dist/lib/utils/redis.d.ts +17 -0
  61. package/dist/lib/utils/redis.js +105 -0
  62. package/dist/lib/utils/timeout.d.ts +10 -0
  63. package/dist/lib/utils/timeout.js +15 -0
  64. package/dist/mcp/httpRetryHandler.js +6 -2
  65. package/dist/neurolink.d.ts +5 -0
  66. package/dist/neurolink.js +160 -10
  67. package/dist/processors/base/BaseFileProcessor.js +2 -1
  68. package/dist/processors/errors/errorHelpers.js +12 -4
  69. package/dist/providers/amazonBedrock.js +2 -1
  70. package/dist/providers/anthropic.js +2 -2
  71. package/dist/providers/anthropicBaseProvider.js +10 -4
  72. package/dist/providers/azureOpenai.js +14 -25
  73. package/dist/providers/googleAiStudio.d.ts +0 -34
  74. package/dist/providers/googleAiStudio.js +124 -315
  75. package/dist/providers/googleNativeGemini3.d.ts +119 -0
  76. package/dist/providers/googleNativeGemini3.js +263 -0
  77. package/dist/providers/googleVertex.d.ts +0 -40
  78. package/dist/providers/googleVertex.js +150 -317
  79. package/dist/providers/huggingFace.js +20 -5
  80. package/dist/providers/litellm.js +6 -4
  81. package/dist/providers/mistral.js +3 -2
  82. package/dist/providers/openAI.js +2 -2
  83. package/dist/providers/openRouter.js +8 -7
  84. package/dist/providers/openaiCompatible.js +10 -4
  85. package/dist/rag/resilience/RetryHandler.js +6 -2
  86. package/dist/services/server/ai/observability/instrumentation.d.ts +24 -2
  87. package/dist/services/server/ai/observability/instrumentation.js +12 -1
  88. package/dist/types/generateTypes.d.ts +28 -0
  89. package/dist/types/ragTypes.d.ts +9 -1
  90. package/dist/types/streamTypes.d.ts +13 -0
  91. package/dist/utils/conversationMemory.js +15 -0
  92. package/dist/utils/errorHandling.d.ts +5 -0
  93. package/dist/utils/errorHandling.js +19 -0
  94. package/dist/utils/pricing.d.ts +12 -0
  95. package/dist/utils/pricing.js +133 -0
  96. package/dist/utils/redis.d.ts +17 -0
  97. package/dist/utils/redis.js +105 -0
  98. package/dist/utils/timeout.d.ts +10 -0
  99. package/dist/utils/timeout.js +15 -0
  100. package/package.json +26 -25
@@ -7,7 +7,7 @@ import os from "os";
7
7
  import path from "path";
8
8
  import { ErrorCategory, ErrorSeverity, } from "../constants/enums.js";
9
9
  import { BaseProvider } from "../core/baseProvider.js";
10
- import { DEFAULT_MAX_STEPS, DEFAULT_TOOL_MAX_RETRIES, GLOBAL_LOCATION_MODELS, } from "../core/constants.js";
10
+ import { DEFAULT_MAX_STEPS, GLOBAL_LOCATION_MODELS, } from "../core/constants.js";
11
11
  import { ModelConfigurationManager } from "../core/modelConfiguration.js";
12
12
  import { createProxyFetch } from "../proxy/proxyFetch.js";
13
13
  import { AuthenticationError, ProviderError } from "../types/errors.js";
@@ -17,8 +17,8 @@ import { logger } from "../utils/logger.js";
17
17
  import { isGemini3Model } from "../utils/modelDetection.js";
18
18
  import { createGoogleAuthConfig, createVertexProjectConfig, validateApiKey, } from "../utils/providerConfig.js";
19
19
  import { convertZodToJsonSchema, inlineJsonSchema, } from "../utils/schemaConversion.js";
20
- import { createNativeThinkingConfig, } from "../utils/thinkingConfig.js";
21
- import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
20
+ import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
21
+ import { buildNativeToolDeclarations, buildNativeConfig, computeMaxSteps as computeMaxStepsShared, collectStreamChunks, extractTextFromParts, executeNativeToolCalls, handleMaxStepsTermination, pushModelResponseToHistory, } from "./googleNativeGemini3.js";
22
22
  // Import proper types for multimodal message handling
23
23
  // Enhanced Anthropic support with direct imports
24
24
  // Using the dual provider architecture from Vercel AI SDK
@@ -744,7 +744,7 @@ export class GoogleVertexProvider extends BaseProvider {
744
744
  toolChoice: "auto",
745
745
  maxSteps: options.maxSteps || DEFAULT_MAX_STEPS,
746
746
  }),
747
- abortSignal: timeoutController?.controller.signal,
747
+ abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
748
748
  experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
749
749
  // Gemini 3: use thinkingLevel via providerOptions (Vertex AI)
750
750
  // Gemini 2.5: use thinkingBudget via providerOptions
@@ -810,7 +810,8 @@ export class GoogleVertexProvider extends BaseProvider {
810
810
  }
811
811
  }
812
812
  const result = streamText(streamOptions);
813
- timeoutController?.cleanup();
813
+ // Defer timeout cleanup until the stream completes or errors
814
+ result.text.finally(() => timeoutController?.cleanup());
814
815
  // Transform string stream to content object stream using BaseProvider method
815
816
  const transformedStream = this.createTextStream(result);
816
817
  // Track tool calls and results for streaming
@@ -948,233 +949,7 @@ export class GoogleVertexProvider extends BaseProvider {
948
949
  },
949
950
  ];
950
951
  }
951
- /**
952
- * Convert Vercel AI SDK tools to @google/genai FunctionDeclarations and build an execute map.
953
- * Shared by both stream and generate native Gemini 3 paths.
954
- */
955
- convertToolsToNativeFunctionDeclarations(toolsMap, logLabel) {
956
- if (Object.keys(toolsMap).length === 0) {
957
- return { tools: undefined, executeMap: new Map() };
958
- }
959
- const functionDeclarations = [];
960
- const executeMap = new Map();
961
- for (const [name, tool] of Object.entries(toolsMap)) {
962
- const decl = {
963
- name,
964
- description: tool.description || `Tool: ${name}`,
965
- };
966
- if (tool.parameters) {
967
- const rawSchema = convertZodToJsonSchema(tool.parameters);
968
- decl.parametersJsonSchema = inlineJsonSchema(rawSchema);
969
- if (decl.parametersJsonSchema.$schema) {
970
- delete decl.parametersJsonSchema.$schema;
971
- }
972
- }
973
- functionDeclarations.push(decl);
974
- if (tool.execute) {
975
- executeMap.set(name, tool.execute);
976
- }
977
- }
978
- logger.debug(`[GoogleVertex] Converted tools for ${logLabel}`, {
979
- toolCount: functionDeclarations.length,
980
- toolNames: functionDeclarations.map((t) => t.name),
981
- });
982
- return {
983
- tools: [{ functionDeclarations }],
984
- executeMap,
985
- };
986
- }
987
- /**
988
- * Build the native @google/genai config object for generate/stream calls.
989
- * Shared by both stream and generate native Gemini 3 paths.
990
- */
991
- buildNativeGenerateConfig(options, tools) {
992
- const config = {
993
- temperature: options.temperature ?? 1.0,
994
- maxOutputTokens: options.maxTokens,
995
- };
996
- if (tools) {
997
- config.tools = tools;
998
- }
999
- if (options.systemPrompt) {
1000
- config.systemInstruction = options.systemPrompt;
1001
- }
1002
- const nativeThinkingConfig = createNativeThinkingConfig(options.thinkingConfig);
1003
- if (nativeThinkingConfig) {
1004
- config.thinkingConfig = nativeThinkingConfig;
1005
- }
1006
- return config;
1007
- }
1008
- /**
1009
- * Compute a safe maxSteps value from raw input.
1010
- */
1011
- computeMaxSteps(rawMaxSteps) {
1012
- const raw = rawMaxSteps || DEFAULT_MAX_STEPS;
1013
- return Number.isFinite(raw) && raw > 0
1014
- ? Math.min(Math.floor(raw), 100)
1015
- : Math.min(DEFAULT_MAX_STEPS, 100);
1016
- }
1017
- /**
1018
- * Extract text from raw native SDK response parts, filtering out non-text parts
1019
- * (thoughtSignature, functionCall) to avoid SDK warnings.
1020
- */
1021
- extractTextFromRawParts(rawParts) {
1022
- return rawParts
1023
- .filter((part) => typeof part.text === "string")
1024
- .map((part) => part.text)
1025
- .join("");
1026
- }
1027
- /**
1028
- * Execute a set of function calls from the model, tracking failures and retries.
1029
- * Returns function response parts to be added to conversation history.
1030
- * Shared by both stream and generate native Gemini 3 paths.
1031
- */
1032
- async executeNativeFunctionCalls(calls, executeMap, failedTools, allToolCalls, toolExecutions) {
1033
- const functionResponses = [];
1034
- for (const call of calls) {
1035
- allToolCalls.push({ toolName: call.name, args: call.args });
1036
- // Check if this tool has already exceeded retry limit
1037
- const failedInfo = failedTools.get(call.name);
1038
- if (failedInfo && failedInfo.count >= DEFAULT_TOOL_MAX_RETRIES) {
1039
- logger.warn(`[GoogleVertex] Tool "${call.name}" has exceeded retry limit (${DEFAULT_TOOL_MAX_RETRIES}), skipping execution`);
1040
- const errorOutput = {
1041
- error: `TOOL_PERMANENTLY_FAILED: The tool "${call.name}" has failed ${failedInfo.count} times and will not be retried. Last error: ${failedInfo.lastError}. Please proceed without using this tool or inform the user that this functionality is unavailable.`,
1042
- status: "permanently_failed",
1043
- do_not_retry: true,
1044
- };
1045
- toolExecutions?.push({
1046
- name: call.name,
1047
- input: call.args,
1048
- output: errorOutput,
1049
- });
1050
- functionResponses.push({
1051
- functionResponse: { name: call.name, response: errorOutput },
1052
- });
1053
- continue;
1054
- }
1055
- const execute = executeMap.get(call.name);
1056
- if (execute) {
1057
- try {
1058
- const toolOptions = {
1059
- toolCallId: `${call.name}-${Date.now()}`,
1060
- messages: [],
1061
- abortSignal: undefined,
1062
- };
1063
- const result = await execute(call.args, toolOptions);
1064
- toolExecutions?.push({
1065
- name: call.name,
1066
- input: call.args,
1067
- output: result,
1068
- });
1069
- functionResponses.push({
1070
- functionResponse: {
1071
- name: call.name,
1072
- response: { result },
1073
- },
1074
- });
1075
- }
1076
- catch (error) {
1077
- const errorMessage = error instanceof Error ? error.message : "Unknown error";
1078
- const currentFailInfo = failedTools.get(call.name) || {
1079
- count: 0,
1080
- lastError: "",
1081
- };
1082
- currentFailInfo.count++;
1083
- currentFailInfo.lastError = errorMessage;
1084
- failedTools.set(call.name, currentFailInfo);
1085
- logger.warn(`[GoogleVertex] Tool "${call.name}" failed (attempt ${currentFailInfo.count}/${DEFAULT_TOOL_MAX_RETRIES}): ${errorMessage}`);
1086
- const isPermanentFailure = currentFailInfo.count >= DEFAULT_TOOL_MAX_RETRIES;
1087
- const errorOutput = {
1088
- error: isPermanentFailure
1089
- ? `TOOL_PERMANENTLY_FAILED: The tool "${call.name}" has failed ${currentFailInfo.count} times with error: ${errorMessage}. This tool will not be retried. Please proceed without using this tool or inform the user that this functionality is unavailable.`
1090
- : `TOOL_EXECUTION_ERROR: ${errorMessage}. Retry attempt ${currentFailInfo.count}/${DEFAULT_TOOL_MAX_RETRIES}.`,
1091
- status: isPermanentFailure ? "permanently_failed" : "failed",
1092
- do_not_retry: isPermanentFailure,
1093
- retry_count: currentFailInfo.count,
1094
- max_retries: DEFAULT_TOOL_MAX_RETRIES,
1095
- };
1096
- toolExecutions?.push({
1097
- name: call.name,
1098
- input: call.args,
1099
- output: errorOutput,
1100
- });
1101
- functionResponses.push({
1102
- functionResponse: { name: call.name, response: errorOutput },
1103
- });
1104
- }
1105
- }
1106
- else {
1107
- // Tool not found is a permanent error
1108
- const errorOutput = {
1109
- error: `TOOL_NOT_FOUND: The tool "${call.name}" does not exist. Do not attempt to call this tool again.`,
1110
- status: "permanently_failed",
1111
- do_not_retry: true,
1112
- };
1113
- toolExecutions?.push({
1114
- name: call.name,
1115
- input: call.args,
1116
- output: errorOutput,
1117
- });
1118
- functionResponses.push({
1119
- functionResponse: { name: call.name, response: errorOutput },
1120
- });
1121
- }
1122
- }
1123
- return functionResponses;
1124
- }
1125
- /**
1126
- * Collect raw response parts and function calls from a native SDK content stream chunk.
1127
- * Also accumulates token usage metadata.
1128
- * Returns updated token counts.
1129
- */
1130
- processNativeStreamChunk(chunk, rawResponseParts, stepFunctionCalls, tokenUsage) {
1131
- const chunkRecord = chunk;
1132
- const candidates = chunkRecord.candidates;
1133
- const firstCandidate = candidates?.[0];
1134
- const chunkContent = firstCandidate?.content;
1135
- if (chunkContent && Array.isArray(chunkContent.parts)) {
1136
- rawResponseParts.push(...chunkContent.parts);
1137
- }
1138
- if (chunk.functionCalls) {
1139
- stepFunctionCalls.push(...chunk.functionCalls);
1140
- }
1141
- const usageMetadata = chunkRecord.usageMetadata;
1142
- if (usageMetadata) {
1143
- if (usageMetadata.promptTokenCount !== undefined &&
1144
- usageMetadata.promptTokenCount > 0) {
1145
- tokenUsage.input = usageMetadata.promptTokenCount;
1146
- }
1147
- if (usageMetadata.candidatesTokenCount !== undefined &&
1148
- usageMetadata.candidatesTokenCount > 0) {
1149
- tokenUsage.output = usageMetadata.candidatesTokenCount;
1150
- }
1151
- }
1152
- }
1153
- /**
1154
- * Push model response parts to conversation history, preserving thoughtSignature
1155
- * for Gemini 3 multi-turn tool calling.
1156
- */
1157
- pushModelResponseToHistory(currentContents, rawResponseParts, stepFunctionCalls) {
1158
- currentContents.push({
1159
- role: "model",
1160
- parts: rawResponseParts.length > 0
1161
- ? rawResponseParts
1162
- : stepFunctionCalls.map((fc) => ({ functionCall: fc })),
1163
- });
1164
- }
1165
- /**
1166
- * Compute final text for maxSteps termination when the model was still calling tools.
1167
- */
1168
- computeMaxStepsTerminationText(step, maxSteps, finalText, lastStepText) {
1169
- if (step >= maxSteps && !finalText) {
1170
- logger.warn(`[GoogleVertex] Tool call loop terminated after reaching maxSteps (${maxSteps}). ` +
1171
- `Model was still calling tools. Using accumulated text from last step.`);
1172
- return (lastStepText ||
1173
- `[Tool execution limit reached after ${maxSteps} steps. The model continued requesting tool calls beyond the limit.]`);
1174
- }
1175
- return finalText;
1176
- }
1177
- // ── End shared helpers ──
952
+ // ── Shared Gemini 3 helpers are now in ./googleNativeGemini3.ts ──
1178
953
  /**
1179
954
  * Execute stream using native @google/genai SDK for Gemini 3 models on Vertex AI
1180
955
  * This bypasses @ai-sdk/google-vertex to properly handle thought_signature
@@ -1193,16 +968,30 @@ export class GoogleVertexProvider extends BaseProvider {
1193
968
  const multimodalInput = options.input;
1194
969
  const contents = this.buildNativeContentParts(options.input.text, multimodalInput, "native stream");
1195
970
  // Convert tools to native format
1196
- const toolsInput = options.tools &&
971
+ let hasToolsInput = options.tools &&
1197
972
  Object.keys(options.tools).length > 0 &&
1198
- !options.disableTools
1199
- ? options.tools
1200
- : {};
1201
- const { tools, executeMap } = this.convertToolsToNativeFunctionDeclarations(toolsInput, "native SDK");
973
+ !options.disableTools;
974
+ // Guard: Gemini cannot use tools + JSON schema simultaneously
975
+ const streamOptions = options;
976
+ const wantsJsonOutput = streamOptions.output?.format === "json" || streamOptions.schema;
977
+ if (wantsJsonOutput && hasToolsInput) {
978
+ logger.warn("[GoogleVertex] Gemini does not support tools and JSON schema output simultaneously. Disabling tools for this request.");
979
+ hasToolsInput = false;
980
+ }
981
+ let toolsConfig;
982
+ let executeMap = new Map();
983
+ if (hasToolsInput) {
984
+ const result = buildNativeToolDeclarations(options.tools);
985
+ toolsConfig = result.toolsConfig;
986
+ executeMap = result.executeMap;
987
+ logger.debug("[GoogleVertex] Converted tools for native SDK", {
988
+ toolCount: toolsConfig[0].functionDeclarations.length,
989
+ toolNames: toolsConfig[0].functionDeclarations.map((t) => t.name),
990
+ });
991
+ }
1202
992
  // Build config
1203
- const config = this.buildNativeGenerateConfig(options, tools);
993
+ const config = buildNativeConfig(options, toolsConfig);
1204
994
  // Add JSON output format support for native SDK stream
1205
- const streamOptions = options;
1206
995
  if (streamOptions.output?.format === "json" || streamOptions.schema) {
1207
996
  config.responseMimeType = "application/json";
1208
997
  if (streamOptions.schema) {
@@ -1218,50 +1007,63 @@ export class GoogleVertexProvider extends BaseProvider {
1218
1007
  }
1219
1008
  }
1220
1009
  const startTime = Date.now();
1221
- const maxSteps = this.computeMaxSteps(options.maxSteps);
1010
+ const timeout = this.getTimeout(options);
1011
+ const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
1012
+ const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
1013
+ const maxSteps = computeMaxStepsShared(options.maxSteps);
1222
1014
  const currentContents = [...contents];
1223
1015
  let finalText = "";
1224
1016
  let lastStepText = "";
1017
+ let totalInputTokens = 0;
1018
+ let totalOutputTokens = 0;
1225
1019
  const allToolCalls = [];
1226
1020
  let step = 0;
1227
1021
  const failedTools = new Map();
1228
- const tokenUsage = { input: 0, output: 0 };
1229
1022
  // Agentic loop for tool calling
1230
- while (step < maxSteps) {
1231
- step++;
1232
- logger.debug(`[GoogleVertex] Native SDK step ${step}/${maxSteps}`);
1233
- try {
1234
- const stream = await client.models.generateContentStream({
1235
- model: modelName,
1236
- contents: currentContents,
1237
- config,
1238
- });
1239
- const stepFunctionCalls = [];
1240
- const rawResponseParts = [];
1241
- for await (const chunk of stream) {
1242
- this.processNativeStreamChunk(chunk, rawResponseParts, stepFunctionCalls, tokenUsage);
1243
- }
1244
- const stepText = this.extractTextFromRawParts(rawResponseParts);
1245
- if (stepFunctionCalls.length === 0) {
1246
- finalText = stepText;
1023
+ try {
1024
+ while (step < maxSteps) {
1025
+ if (timeoutController?.controller.signal.aborted) {
1247
1026
  break;
1248
1027
  }
1249
- lastStepText = stepText;
1250
- logger.debug(`[GoogleVertex] Executing ${stepFunctionCalls.length} function calls`);
1251
- this.pushModelResponseToHistory(currentContents, rawResponseParts, stepFunctionCalls);
1252
- const functionResponses = await this.executeNativeFunctionCalls(stepFunctionCalls, executeMap, failedTools, allToolCalls);
1253
- // Add function responses to history
1254
- currentContents.push({
1255
- role: "function",
1256
- parts: functionResponses,
1257
- });
1258
- }
1259
- catch (error) {
1260
- logger.error("[GoogleVertex] Native SDK error", error);
1261
- throw this.handleProviderError(error);
1028
+ step++;
1029
+ logger.debug(`[GoogleVertex] Native SDK step ${step}/${maxSteps}`);
1030
+ try {
1031
+ const stream = await client.models.generateContentStream({
1032
+ model: modelName,
1033
+ contents: currentContents,
1034
+ config,
1035
+ ...(composedSignal
1036
+ ? { httpOptions: { signal: composedSignal } }
1037
+ : {}),
1038
+ });
1039
+ const chunkResult = await collectStreamChunks(stream);
1040
+ totalInputTokens += chunkResult.inputTokens;
1041
+ totalOutputTokens += chunkResult.outputTokens;
1042
+ const stepText = extractTextFromParts(chunkResult.rawResponseParts);
1043
+ if (chunkResult.stepFunctionCalls.length === 0) {
1044
+ finalText = stepText;
1045
+ break;
1046
+ }
1047
+ lastStepText = stepText;
1048
+ logger.debug(`[GoogleVertex] Executing ${chunkResult.stepFunctionCalls.length} function calls`);
1049
+ pushModelResponseToHistory(currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
1050
+ const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, executeMap, failedTools, allToolCalls, { abortSignal: composedSignal });
1051
+ // Add function responses to history
1052
+ currentContents.push({
1053
+ role: "function",
1054
+ parts: functionResponses,
1055
+ });
1056
+ }
1057
+ catch (error) {
1058
+ logger.error("[GoogleVertex] Native SDK error", error);
1059
+ throw this.handleProviderError(error);
1060
+ }
1262
1061
  }
1263
1062
  }
1264
- finalText = this.computeMaxStepsTerminationText(step, maxSteps, finalText, lastStepText);
1063
+ finally {
1064
+ timeoutController?.cleanup();
1065
+ }
1066
+ finalText = handleMaxStepsTermination("[GoogleVertex]", step, maxSteps, finalText, lastStepText);
1265
1067
  const responseTime = Date.now() - startTime;
1266
1068
  // Create async iterable for streaming result
1267
1069
  async function* createTextStream() {
@@ -1272,9 +1074,9 @@ export class GoogleVertexProvider extends BaseProvider {
1272
1074
  provider: this.providerName,
1273
1075
  model: modelName,
1274
1076
  usage: {
1275
- input: tokenUsage.input,
1276
- output: tokenUsage.output,
1277
- total: tokenUsage.input + tokenUsage.output,
1077
+ input: totalInputTokens,
1078
+ output: totalOutputTokens,
1079
+ total: totalInputTokens + totalOutputTokens,
1278
1080
  },
1279
1081
  toolCalls: allToolCalls.map((tc) => ({
1280
1082
  toolName: tc.toolName,
@@ -1306,62 +1108,93 @@ export class GoogleVertexProvider extends BaseProvider {
1306
1108
  const multimodalInput = options.input;
1307
1109
  const contents = this.buildNativeContentParts(inputText, multimodalInput, "native generate");
1308
1110
  // Get tools from SDK and options
1309
- const shouldUseTools = !options.disableTools && this.supportsTools();
1111
+ let shouldUseTools = !options.disableTools && this.supportsTools();
1112
+ // Guard: Gemini cannot use tools + JSON schema simultaneously
1113
+ const wantsJsonOutputGen = options.output?.format === "json" || options.schema;
1114
+ if (wantsJsonOutputGen && shouldUseTools) {
1115
+ logger.warn("[GoogleVertex] Gemini does not support tools and JSON schema output simultaneously. Disabling tools for this request.");
1116
+ shouldUseTools = false;
1117
+ }
1310
1118
  const sdkTools = shouldUseTools ? await this.getAllTools() : {};
1311
- const combinedTools = { ...sdkTools, ...(options.tools || {}) };
1312
- const { tools, executeMap } = this.convertToolsToNativeFunctionDeclarations(combinedTools, "native SDK generate");
1119
+ const combinedTools = shouldUseTools
1120
+ ? { ...sdkTools, ...(options.tools || {}) }
1121
+ : {};
1122
+ let toolsConfig;
1123
+ let executeMap = new Map();
1124
+ if (Object.keys(combinedTools).length > 0) {
1125
+ const result = buildNativeToolDeclarations(combinedTools);
1126
+ toolsConfig = result.toolsConfig;
1127
+ executeMap = result.executeMap;
1128
+ logger.debug("[GoogleVertex] Converted tools for native SDK generate", {
1129
+ toolCount: toolsConfig[0].functionDeclarations.length,
1130
+ toolNames: toolsConfig[0].functionDeclarations.map((t) => t.name),
1131
+ });
1132
+ }
1313
1133
  // Build config
1314
- const config = this.buildNativeGenerateConfig(options, tools);
1134
+ const config = buildNativeConfig(options, toolsConfig);
1315
1135
  // Note: Schema/JSON output for Gemini 3 native SDK is complex due to $ref resolution issues
1316
1136
  // For now, schemas are handled via the AI SDK fallback path, not native SDK
1317
1137
  // TODO: Implement proper $ref resolution for complex nested schemas
1318
1138
  const startTime = Date.now();
1319
- const maxSteps = this.computeMaxSteps(options.maxSteps);
1139
+ const timeout = this.getTimeout(options);
1140
+ const timeoutController = createTimeoutController(timeout, this.providerName, "generate");
1141
+ const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
1142
+ const maxSteps = computeMaxStepsShared(options.maxSteps);
1320
1143
  const currentContents = [...contents];
1321
1144
  let finalText = "";
1322
1145
  let lastStepText = "";
1146
+ let totalInputTokens = 0;
1147
+ let totalOutputTokens = 0;
1323
1148
  const allToolCalls = [];
1324
1149
  const toolExecutions = [];
1325
1150
  let step = 0;
1326
1151
  const failedTools = new Map();
1327
- const tokenUsage = { input: 0, output: 0 };
1328
- // Agentic loop for tool calling
1329
- while (step < maxSteps) {
1330
- step++;
1331
- logger.debug(`[GoogleVertex] Native SDK generate step ${step}/${maxSteps}`);
1332
- try {
1333
- // Use generateContentStream and collect all chunks (same as GoogleAIStudio)
1334
- const stream = await client.models.generateContentStream({
1335
- model: modelName,
1336
- contents: currentContents,
1337
- config,
1338
- });
1339
- const stepFunctionCalls = [];
1340
- const rawResponseParts = [];
1341
- for await (const chunk of stream) {
1342
- this.processNativeStreamChunk(chunk, rawResponseParts, stepFunctionCalls, tokenUsage);
1343
- }
1344
- const stepText = this.extractTextFromRawParts(rawResponseParts);
1345
- if (stepFunctionCalls.length === 0) {
1346
- finalText = stepText;
1152
+ try {
1153
+ // Agentic loop for tool calling
1154
+ while (step < maxSteps) {
1155
+ if (timeoutController?.controller.signal.aborted) {
1347
1156
  break;
1348
1157
  }
1349
- lastStepText = stepText;
1350
- logger.debug(`[GoogleVertex] Generate executing ${stepFunctionCalls.length} function calls`);
1351
- this.pushModelResponseToHistory(currentContents, rawResponseParts, stepFunctionCalls);
1352
- const functionResponses = await this.executeNativeFunctionCalls(stepFunctionCalls, executeMap, failedTools, allToolCalls, toolExecutions);
1353
- // Add function responses to history
1354
- currentContents.push({
1355
- role: "function",
1356
- parts: functionResponses,
1357
- });
1358
- }
1359
- catch (error) {
1360
- logger.error("[GoogleVertex] Native SDK generate error", error);
1361
- throw this.handleProviderError(error);
1158
+ step++;
1159
+ logger.debug(`[GoogleVertex] Native SDK generate step ${step}/${maxSteps}`);
1160
+ try {
1161
+ // Use generateContentStream and collect all chunks (same as GoogleAIStudio)
1162
+ const stream = await client.models.generateContentStream({
1163
+ model: modelName,
1164
+ contents: currentContents,
1165
+ config,
1166
+ ...(composedSignal
1167
+ ? { httpOptions: { signal: composedSignal } }
1168
+ : {}),
1169
+ });
1170
+ const chunkResult = await collectStreamChunks(stream);
1171
+ totalInputTokens += chunkResult.inputTokens;
1172
+ totalOutputTokens += chunkResult.outputTokens;
1173
+ const stepText = extractTextFromParts(chunkResult.rawResponseParts);
1174
+ if (chunkResult.stepFunctionCalls.length === 0) {
1175
+ finalText = stepText;
1176
+ break;
1177
+ }
1178
+ lastStepText = stepText;
1179
+ logger.debug(`[GoogleVertex] Generate executing ${chunkResult.stepFunctionCalls.length} function calls`);
1180
+ pushModelResponseToHistory(currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
1181
+ const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, executeMap, failedTools, allToolCalls, { toolExecutions, abortSignal: composedSignal });
1182
+ // Add function responses to history
1183
+ currentContents.push({
1184
+ role: "function",
1185
+ parts: functionResponses,
1186
+ });
1187
+ }
1188
+ catch (error) {
1189
+ logger.error("[GoogleVertex] Native SDK generate error", error);
1190
+ throw this.handleProviderError(error);
1191
+ }
1362
1192
  }
1363
1193
  }
1364
- finalText = this.computeMaxStepsTerminationText(step, maxSteps, finalText, lastStepText);
1194
+ finally {
1195
+ timeoutController?.cleanup();
1196
+ }
1197
+ finalText = handleMaxStepsTermination("[GoogleVertex]", step, maxSteps, finalText, lastStepText);
1365
1198
  const responseTime = Date.now() - startTime;
1366
1199
  // Build EnhancedGenerateResult
1367
1200
  return {
@@ -1369,9 +1202,9 @@ export class GoogleVertexProvider extends BaseProvider {
1369
1202
  provider: this.providerName,
1370
1203
  model: modelName,
1371
1204
  usage: {
1372
- input: tokenUsage.input,
1373
- output: tokenUsage.output,
1374
- total: tokenUsage.input + tokenUsage.output,
1205
+ input: totalInputTokens,
1206
+ output: totalOutputTokens,
1207
+ total: totalInputTokens + totalOutputTokens,
1375
1208
  },
1376
1209
  responseTime,
1377
1210
  toolsUsed: allToolCalls.map((tc) => tc.toolName),
@@ -3,7 +3,7 @@ import { streamText, } from "ai";
3
3
  import { AIProviderName } from "../constants/enums.js";
4
4
  import { BaseProvider } from "../core/baseProvider.js";
5
5
  import { logger } from "../utils/logger.js";
6
- import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
6
+ import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
7
7
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
8
8
  import { validateApiKey, createHuggingFaceConfig, getProviderModel, } from "../utils/providerConfig.js";
9
9
  import { createProxyFetch } from "../proxy/proxyFetch.js";
@@ -112,20 +112,35 @@ export class HuggingFaceProvider extends BaseProvider {
112
112
  const timeout = this.getTimeout(options);
113
113
  const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
114
114
  try {
115
+ // Get tools - options.tools is pre-merged by BaseProvider.stream()
116
+ const shouldUseTools = !options.disableTools && this.supportsTools();
117
+ const allTools = shouldUseTools
118
+ ? options.tools || (await this.getAllTools())
119
+ : {};
115
120
  // Enhanced tool handling for HuggingFace models
116
121
  const streamOptions = this.prepareStreamOptions(options, analysisSchema);
117
122
  // Build message array from options with multimodal support
118
123
  // Using protected helper from BaseProvider to eliminate code duplication
119
- const messages = await this.buildMessagesForStream(options);
124
+ // Pass the enhanced system prompt (with tool-calling instructions) so it
125
+ // actually reaches the model instead of being silently discarded.
126
+ const messagesOptions = streamOptions.system
127
+ ? { ...options, systemPrompt: streamOptions.system }
128
+ : options;
129
+ const messages = await this.buildMessagesForStream(messagesOptions);
120
130
  const result = await streamText({
121
131
  model: this.model,
122
132
  messages: messages,
123
133
  temperature: options.temperature,
124
134
  maxTokens: options.maxTokens, // No default limit - unlimited unless specified
125
135
  maxSteps: options.maxSteps || DEFAULT_MAX_STEPS,
126
- tools: streamOptions.tools, // Tools format conversion handled by prepareStreamOptions
127
- toolChoice: streamOptions.toolChoice, // Tool choice handled by prepareStreamOptions
128
- abortSignal: timeoutController?.controller.signal,
136
+ tools: (shouldUseTools
137
+ ? streamOptions.tools || allTools
138
+ : {}),
139
+ toolChoice: (shouldUseTools
140
+ ? streamOptions.toolChoice || "auto"
141
+ : "none"),
142
+ abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
143
+ experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
129
144
  onStepFinish: ({ toolCalls, toolResults }) => {
130
145
  this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
131
146
  logger.warn("[HuggingFaceProvider] Failed to store tool executions", {
@@ -4,9 +4,10 @@ import { BaseProvider } from "../core/baseProvider.js";
4
4
  import { DEFAULT_MAX_STEPS } from "../core/constants.js";
5
5
  import { streamAnalyticsCollector } from "../core/streamAnalytics.js";
6
6
  import { createProxyFetch } from "../proxy/proxyFetch.js";
7
+ import { isAbortError } from "../utils/errorHandling.js";
7
8
  import { logger } from "../utils/logger.js";
8
9
  import { getProviderModel } from "../utils/providerConfig.js";
9
- import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
10
+ import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
10
11
  // Configuration helpers
11
12
  const getLiteLLMConfig = () => {
12
13
  return {
@@ -157,7 +158,8 @@ export class LiteLLMProvider extends BaseProvider {
157
158
  toolChoice: "auto",
158
159
  maxSteps: options.maxSteps || DEFAULT_MAX_STEPS,
159
160
  }),
160
- abortSignal: timeoutController?.controller.signal,
161
+ abortSignal: composeAbortSignals(options.abortSignal, timeoutController?.controller.signal),
162
+ experimental_telemetry: this.telemetryHandler.getTelemetryConfig(options),
161
163
  onError: (event) => {
162
164
  const error = event.error;
163
165
  const errorMessage = error instanceof Error ? error.message : String(error);
@@ -180,7 +182,7 @@ export class LiteLLMProvider extends BaseProvider {
180
182
  onStepFinish: ({ toolCalls, toolResults }) => {
181
183
  logger.info("Tool execution completed", { toolResults, toolCalls });
182
184
  this.handleToolExecutionStorage(toolCalls, toolResults, options, new Date()).catch((error) => {
183
- logger.warn("LiteLLMProvider] Failed to store tool executions", {
185
+ logger.warn("[LiteLLMProvider] Failed to store tool executions", {
184
186
  provider: this.providerName,
185
187
  error: error instanceof Error ? error.message : String(error),
186
188
  });
@@ -361,7 +363,7 @@ export class LiteLLMProvider extends BaseProvider {
361
363
  }
362
364
  catch (error) {
363
365
  clearTimeout(timeoutId);
364
- if (error instanceof Error && error.name === "AbortError") {
366
+ if (isAbortError(error)) {
365
367
  throw new Error("Request timed out after 5 seconds");
366
368
  }
367
369
  throw error;