@juspay/neurolink 9.15.0 → 9.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/dist/adapters/video/videoAnalyzer.d.ts +1 -1
  3. package/dist/adapters/video/videoAnalyzer.js +10 -8
  4. package/dist/cli/commands/setup-anthropic.js +1 -14
  5. package/dist/cli/commands/setup-azure.js +1 -12
  6. package/dist/cli/commands/setup-bedrock.js +1 -9
  7. package/dist/cli/commands/setup-google-ai.js +1 -12
  8. package/dist/cli/commands/setup-openai.js +1 -14
  9. package/dist/cli/commands/workflow.d.ts +27 -0
  10. package/dist/cli/commands/workflow.js +216 -0
  11. package/dist/cli/factories/commandFactory.js +79 -20
  12. package/dist/cli/index.js +0 -1
  13. package/dist/cli/parser.js +4 -1
  14. package/dist/cli/utils/maskCredential.d.ts +11 -0
  15. package/dist/cli/utils/maskCredential.js +23 -0
  16. package/dist/constants/contextWindows.js +107 -16
  17. package/dist/constants/enums.d.ts +99 -15
  18. package/dist/constants/enums.js +152 -22
  19. package/dist/context/budgetChecker.js +1 -1
  20. package/dist/context/contextCompactor.js +31 -4
  21. package/dist/context/emergencyTruncation.d.ts +21 -0
  22. package/dist/context/emergencyTruncation.js +88 -0
  23. package/dist/context/errorDetection.d.ts +16 -0
  24. package/dist/context/errorDetection.js +48 -1
  25. package/dist/context/errors.d.ts +19 -0
  26. package/dist/context/errors.js +21 -0
  27. package/dist/context/stages/slidingWindowTruncator.d.ts +6 -0
  28. package/dist/context/stages/slidingWindowTruncator.js +159 -24
  29. package/dist/core/baseProvider.js +306 -200
  30. package/dist/core/conversationMemoryManager.js +104 -61
  31. package/dist/core/evaluationProviders.js +16 -33
  32. package/dist/core/factory.js +237 -164
  33. package/dist/core/modules/GenerationHandler.js +175 -116
  34. package/dist/core/modules/MessageBuilder.js +222 -170
  35. package/dist/core/modules/StreamHandler.d.ts +1 -0
  36. package/dist/core/modules/StreamHandler.js +95 -27
  37. package/dist/core/modules/TelemetryHandler.d.ts +10 -1
  38. package/dist/core/modules/TelemetryHandler.js +25 -7
  39. package/dist/core/modules/ToolsManager.js +115 -191
  40. package/dist/core/redisConversationMemoryManager.js +418 -282
  41. package/dist/factories/providerRegistry.d.ts +5 -0
  42. package/dist/factories/providerRegistry.js +20 -2
  43. package/dist/index.d.ts +2 -2
  44. package/dist/index.js +4 -2
  45. package/dist/lib/adapters/video/videoAnalyzer.d.ts +1 -1
  46. package/dist/lib/adapters/video/videoAnalyzer.js +10 -8
  47. package/dist/lib/constants/contextWindows.js +107 -16
  48. package/dist/lib/constants/enums.d.ts +99 -15
  49. package/dist/lib/constants/enums.js +152 -22
  50. package/dist/lib/context/budgetChecker.js +1 -1
  51. package/dist/lib/context/contextCompactor.js +31 -4
  52. package/dist/lib/context/emergencyTruncation.d.ts +21 -0
  53. package/dist/lib/context/emergencyTruncation.js +89 -0
  54. package/dist/lib/context/errorDetection.d.ts +16 -0
  55. package/dist/lib/context/errorDetection.js +48 -1
  56. package/dist/lib/context/errors.d.ts +19 -0
  57. package/dist/lib/context/errors.js +22 -0
  58. package/dist/lib/context/stages/slidingWindowTruncator.d.ts +6 -0
  59. package/dist/lib/context/stages/slidingWindowTruncator.js +159 -24
  60. package/dist/lib/core/baseProvider.js +306 -200
  61. package/dist/lib/core/conversationMemoryManager.js +104 -61
  62. package/dist/lib/core/evaluationProviders.js +16 -33
  63. package/dist/lib/core/factory.js +237 -164
  64. package/dist/lib/core/modules/GenerationHandler.js +175 -116
  65. package/dist/lib/core/modules/MessageBuilder.js +222 -170
  66. package/dist/lib/core/modules/StreamHandler.d.ts +1 -0
  67. package/dist/lib/core/modules/StreamHandler.js +95 -27
  68. package/dist/lib/core/modules/TelemetryHandler.d.ts +10 -1
  69. package/dist/lib/core/modules/TelemetryHandler.js +25 -7
  70. package/dist/lib/core/modules/ToolsManager.js +115 -191
  71. package/dist/lib/core/redisConversationMemoryManager.js +418 -282
  72. package/dist/lib/factories/providerRegistry.d.ts +5 -0
  73. package/dist/lib/factories/providerRegistry.js +20 -2
  74. package/dist/lib/index.d.ts +2 -2
  75. package/dist/lib/index.js +4 -2
  76. package/dist/lib/mcp/externalServerManager.js +66 -0
  77. package/dist/lib/mcp/mcpCircuitBreaker.js +24 -0
  78. package/dist/lib/mcp/mcpClientFactory.js +16 -0
  79. package/dist/lib/mcp/toolDiscoveryService.js +32 -6
  80. package/dist/lib/mcp/toolRegistry.js +193 -123
  81. package/dist/lib/neurolink.d.ts +6 -0
  82. package/dist/lib/neurolink.js +1162 -646
  83. package/dist/lib/providers/amazonBedrock.d.ts +1 -1
  84. package/dist/lib/providers/amazonBedrock.js +521 -319
  85. package/dist/lib/providers/anthropic.js +73 -17
  86. package/dist/lib/providers/anthropicBaseProvider.js +77 -17
  87. package/dist/lib/providers/googleAiStudio.d.ts +1 -1
  88. package/dist/lib/providers/googleAiStudio.js +292 -227
  89. package/dist/lib/providers/googleVertex.d.ts +36 -1
  90. package/dist/lib/providers/googleVertex.js +553 -260
  91. package/dist/lib/providers/ollama.js +329 -278
  92. package/dist/lib/providers/openAI.js +77 -19
  93. package/dist/lib/providers/sagemaker/parsers.js +3 -3
  94. package/dist/lib/providers/sagemaker/streaming.js +3 -3
  95. package/dist/lib/proxy/proxyFetch.js +81 -48
  96. package/dist/lib/rag/ChunkerFactory.js +1 -1
  97. package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +22 -0
  98. package/dist/lib/rag/chunkers/MarkdownChunker.js +213 -9
  99. package/dist/lib/rag/chunking/markdownChunker.d.ts +16 -0
  100. package/dist/lib/rag/chunking/markdownChunker.js +174 -2
  101. package/dist/lib/rag/pipeline/contextAssembly.js +2 -1
  102. package/dist/lib/rag/ragIntegration.d.ts +18 -1
  103. package/dist/lib/rag/ragIntegration.js +94 -14
  104. package/dist/lib/rag/retrieval/vectorQueryTool.js +21 -4
  105. package/dist/lib/server/abstract/baseServerAdapter.js +4 -1
  106. package/dist/lib/server/adapters/fastifyAdapter.js +35 -30
  107. package/dist/lib/services/server/ai/observability/instrumentation.d.ts +32 -0
  108. package/dist/lib/services/server/ai/observability/instrumentation.js +39 -0
  109. package/dist/lib/telemetry/attributes.d.ts +52 -0
  110. package/dist/lib/telemetry/attributes.js +61 -0
  111. package/dist/lib/telemetry/index.d.ts +3 -0
  112. package/dist/lib/telemetry/index.js +3 -0
  113. package/dist/lib/telemetry/telemetryService.d.ts +6 -0
  114. package/dist/lib/telemetry/telemetryService.js +6 -0
  115. package/dist/lib/telemetry/tracers.d.ts +15 -0
  116. package/dist/lib/telemetry/tracers.js +17 -0
  117. package/dist/lib/telemetry/withSpan.d.ts +9 -0
  118. package/dist/lib/telemetry/withSpan.js +35 -0
  119. package/dist/lib/types/contextTypes.d.ts +10 -0
  120. package/dist/lib/types/streamTypes.d.ts +14 -0
  121. package/dist/lib/utils/conversationMemory.js +121 -82
  122. package/dist/lib/utils/logger.d.ts +5 -0
  123. package/dist/lib/utils/logger.js +50 -2
  124. package/dist/lib/utils/messageBuilder.js +22 -42
  125. package/dist/lib/utils/modelDetection.js +3 -3
  126. package/dist/lib/utils/providerRetry.d.ts +41 -0
  127. package/dist/lib/utils/providerRetry.js +114 -0
  128. package/dist/lib/utils/retryability.d.ts +14 -0
  129. package/dist/lib/utils/retryability.js +23 -0
  130. package/dist/lib/utils/sanitizers/svg.js +4 -5
  131. package/dist/lib/utils/tokenEstimation.d.ts +11 -1
  132. package/dist/lib/utils/tokenEstimation.js +19 -4
  133. package/dist/lib/utils/videoAnalysisProcessor.js +7 -3
  134. package/dist/mcp/externalServerManager.js +66 -0
  135. package/dist/mcp/mcpCircuitBreaker.js +24 -0
  136. package/dist/mcp/mcpClientFactory.js +16 -0
  137. package/dist/mcp/toolDiscoveryService.js +32 -6
  138. package/dist/mcp/toolRegistry.js +193 -123
  139. package/dist/neurolink.d.ts +6 -0
  140. package/dist/neurolink.js +1162 -646
  141. package/dist/providers/amazonBedrock.d.ts +1 -1
  142. package/dist/providers/amazonBedrock.js +521 -319
  143. package/dist/providers/anthropic.js +73 -17
  144. package/dist/providers/anthropicBaseProvider.js +77 -17
  145. package/dist/providers/googleAiStudio.d.ts +1 -1
  146. package/dist/providers/googleAiStudio.js +292 -227
  147. package/dist/providers/googleVertex.d.ts +36 -1
  148. package/dist/providers/googleVertex.js +553 -260
  149. package/dist/providers/ollama.js +329 -278
  150. package/dist/providers/openAI.js +77 -19
  151. package/dist/providers/sagemaker/parsers.js +3 -3
  152. package/dist/providers/sagemaker/streaming.js +3 -3
  153. package/dist/proxy/proxyFetch.js +81 -48
  154. package/dist/rag/ChunkerFactory.js +1 -1
  155. package/dist/rag/chunkers/MarkdownChunker.d.ts +22 -0
  156. package/dist/rag/chunkers/MarkdownChunker.js +213 -9
  157. package/dist/rag/chunking/markdownChunker.d.ts +16 -0
  158. package/dist/rag/chunking/markdownChunker.js +174 -2
  159. package/dist/rag/pipeline/contextAssembly.js +2 -1
  160. package/dist/rag/ragIntegration.d.ts +18 -1
  161. package/dist/rag/ragIntegration.js +94 -14
  162. package/dist/rag/retrieval/vectorQueryTool.js +21 -4
  163. package/dist/server/abstract/baseServerAdapter.js +4 -1
  164. package/dist/server/adapters/fastifyAdapter.js +35 -30
  165. package/dist/services/server/ai/observability/instrumentation.d.ts +32 -0
  166. package/dist/services/server/ai/observability/instrumentation.js +39 -0
  167. package/dist/telemetry/attributes.d.ts +52 -0
  168. package/dist/telemetry/attributes.js +60 -0
  169. package/dist/telemetry/index.d.ts +3 -0
  170. package/dist/telemetry/index.js +3 -0
  171. package/dist/telemetry/telemetryService.d.ts +6 -0
  172. package/dist/telemetry/telemetryService.js +6 -0
  173. package/dist/telemetry/tracers.d.ts +15 -0
  174. package/dist/telemetry/tracers.js +16 -0
  175. package/dist/telemetry/withSpan.d.ts +9 -0
  176. package/dist/telemetry/withSpan.js +34 -0
  177. package/dist/types/contextTypes.d.ts +10 -0
  178. package/dist/types/streamTypes.d.ts +14 -0
  179. package/dist/utils/conversationMemory.js +121 -82
  180. package/dist/utils/logger.d.ts +5 -0
  181. package/dist/utils/logger.js +50 -2
  182. package/dist/utils/messageBuilder.js +22 -42
  183. package/dist/utils/modelDetection.js +3 -3
  184. package/dist/utils/providerRetry.d.ts +41 -0
  185. package/dist/utils/providerRetry.js +113 -0
  186. package/dist/utils/retryability.d.ts +14 -0
  187. package/dist/utils/retryability.js +22 -0
  188. package/dist/utils/sanitizers/svg.js +4 -5
  189. package/dist/utils/tokenEstimation.d.ts +11 -1
  190. package/dist/utils/tokenEstimation.js +19 -4
  191. package/dist/utils/videoAnalysisProcessor.js +7 -3
  192. package/dist/workflow/config.d.ts +26 -26
  193. package/package.json +1 -1
@@ -1,7 +1,8 @@
1
1
  import { createVertex, } from "@ai-sdk/google-vertex";
2
2
  import { createVertexAnthropic, } from "@ai-sdk/google-vertex/anthropic";
3
3
  import { Output, streamText, } from "ai";
4
- import dns from "dns";
4
+ import { trace, SpanKind, SpanStatusCode } from "@opentelemetry/api";
5
+ import dns from "node:dns";
5
6
  import fs from "fs";
6
7
  import os from "os";
7
8
  import path from "path";
@@ -14,7 +15,10 @@ import { AuthenticationError, NetworkError, ProviderError, RateLimitError, Inval
14
15
  import { ERROR_CODES, NeuroLinkError } from "../utils/errorHandling.js";
15
16
  import { FileDetector } from "../utils/fileDetector.js";
16
17
  import { logger } from "../utils/logger.js";
18
+ import { estimateTokens } from "../utils/tokenEstimation.js";
17
19
  import { isGemini3Model } from "../utils/modelDetection.js";
20
+ import { calculateCost } from "../utils/pricing.js";
21
+ import { tracers, ATTR, withClientSpan } from "../telemetry/index.js";
18
22
  import { createGoogleAuthConfig, createVertexProjectConfig, validateApiKey, } from "../utils/providerConfig.js";
19
23
  import { convertZodToJsonSchema, inlineJsonSchema, } from "../utils/schemaConversion.js";
20
24
  import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
@@ -30,6 +34,30 @@ import { buildNativeToolDeclarations, buildNativeConfig, computeMaxSteps as comp
30
34
  // custom fetch, so it is inherently isolated from user cancellation signals.
31
35
  // The image generation path (getImageGenerationAccessToken) has an additional
32
36
  // explicit 15s timeout per attempt for direct REST API calls.
37
+ /** Check whether an IP address belongs to a private, loopback, or link-local range. */
38
+ function isPrivateOrLoopbackAddress(address) {
39
+ const lower = address.toLowerCase();
40
+ // IPv4 loopback, unspecified, and private ranges
41
+ if (address.startsWith("127.") || address === "0.0.0.0") {
42
+ return true;
43
+ }
44
+ if (address.startsWith("10.") || address.startsWith("192.168.")) {
45
+ return true;
46
+ }
47
+ if (/^172\.(1[6-9]|2\d|3[01])\./.test(address)) {
48
+ return true;
49
+ }
50
+ // IPv6 loopback, link-local, unique-local
51
+ if (address === "::1" ||
52
+ lower.startsWith("fe80:") ||
53
+ lower.startsWith("fc00:") ||
54
+ lower.startsWith("fd00:")) {
55
+ return true;
56
+ }
57
+ return false;
58
+ }
59
+ const MAX_IMAGE_DOWNLOAD_BYTES = 10 * 1024 * 1024; // 10 MB
60
+ const streamTracer = trace.getTracer("neurolink.provider.vertex");
33
61
  // Enhanced Anthropic support with direct imports
34
62
  // Using the dual provider architecture from Vercel AI SDK
35
63
  const hasAnthropicSupport = () => {
@@ -63,6 +91,8 @@ const hasGoogleCredentials = () => {
63
91
  (process.env.GOOGLE_AUTH_CLIENT_EMAIL &&
64
92
  process.env.GOOGLE_AUTH_PRIVATE_KEY));
65
93
  };
94
+ // Module-level cache for runtime-created credentials file to avoid per-request writes
95
+ let cachedCredentialsPath = null;
66
96
  // Enhanced Vertex settings creation with authentication fallback and proxy support
67
97
  const createVertexSettings = async (region) => {
68
98
  const location = region || getVertexLocation();
@@ -99,9 +129,14 @@ const createVertexSettings = async (region) => {
99
129
  client_x509_cert_url: process.env.GOOGLE_AUTH_CLIENT_CERT_URL,
100
130
  universe_domain: process.env.GOOGLE_AUTH_UNIVERSE_DOMAIN,
101
131
  };
102
- // If we have the essential fields, create a runtime credentials file
132
+ // If we have the essential fields, create a runtime credentials file (cached)
103
133
  if (requiredEnvVarsForFile.client_email &&
104
134
  requiredEnvVarsForFile.private_key) {
135
+ // Return cached path if already written and still exists on disk
136
+ if (cachedCredentialsPath && fs.existsSync(cachedCredentialsPath)) {
137
+ process.env.GOOGLE_APPLICATION_CREDENTIALS = cachedCredentialsPath;
138
+ return baseSettings;
139
+ }
105
140
  try {
106
141
  // Build complete service account credentials object
107
142
  const serviceAccountCredentials = {
@@ -119,18 +154,26 @@ const createVertexSettings = async (region) => {
119
154
  client_x509_cert_url: requiredEnvVarsForFile.client_x509_cert_url || "",
120
155
  universe_domain: requiredEnvVarsForFile.universe_domain || "googleapis.com",
121
156
  };
122
- // Create temporary credentials file
157
+ // Create temporary credentials file with restricted permissions
123
158
  const tmpDir = os.tmpdir();
124
159
  const credentialsFileName = `google-credentials-${Date.now()}-${Math.random().toString(36).substring(2, 11)}.json`;
125
160
  const credentialsFilePath = path.join(tmpDir, credentialsFileName);
126
- fs.writeFileSync(credentialsFilePath, JSON.stringify(serviceAccountCredentials, null, 2));
161
+ fs.writeFileSync(credentialsFilePath, JSON.stringify(serviceAccountCredentials, null, 2), { mode: 0o600 });
162
+ cachedCredentialsPath = credentialsFilePath;
163
+ // Register cleanup on process exit to remove the credentials file
164
+ process.once("exit", () => {
165
+ try {
166
+ if (cachedCredentialsPath && fs.existsSync(cachedCredentialsPath)) {
167
+ fs.unlinkSync(cachedCredentialsPath);
168
+ }
169
+ }
170
+ catch {
171
+ /* ignore cleanup errors */
172
+ }
173
+ });
127
174
  // Set the environment variable to point to our runtime-created file
128
175
  process.env.GOOGLE_APPLICATION_CREDENTIALS = credentialsFilePath;
129
- // Now continue with the normal flow - check if the file exists
130
- const fileExists = fs.existsSync(credentialsFilePath);
131
- if (fileExists) {
132
- return baseSettings;
133
- }
176
+ return baseSettings;
134
177
  }
135
178
  catch {
136
179
  // Silent error handling for runtime credentials file creation
@@ -246,7 +289,13 @@ const createVertexSettings = async (region) => {
246
289
  };
247
290
  // Create Anthropic-specific Vertex settings with the same authentication and proxy support
248
291
  const createVertexAnthropicSettings = async (region) => {
249
- const baseVertexSettings = await createVertexSettings(region);
292
+ // The @ai-sdk/google-vertex SDK constructs Anthropic URLs as:
293
+ // https://{location}-aiplatform.googleapis.com/...
294
+ // When location is "global", this creates "https://global-aiplatform.googleapis.com"
295
+ // which is invalid. The correct global endpoint omits the region prefix entirely.
296
+ // Since the SDK doesn't handle this, redirect "global" to "us-east5" for Anthropic.
297
+ const anthropicRegion = !region || region === "global" ? "us-east5" : region;
298
+ const baseVertexSettings = await createVertexSettings(anthropicRegion);
250
299
  // GoogleVertexAnthropicProviderSettings extends GoogleVertexProviderSettings
251
300
  // so we can use the same settings with proper typing
252
301
  return {
@@ -262,6 +311,39 @@ const createVertexAnthropicSettings = async (region) => {
262
311
  const isAnthropicModel = (modelName) => {
263
312
  return modelName.toLowerCase().includes("claude");
264
313
  };
314
+ /**
315
+ * Vertex Model Aliases
316
+ *
317
+ * Maps shorthand model names to their full versioned IDs required by the
318
+ * Vertex AI API. This allows users to pass convenient names like
319
+ * "claude-sonnet-4-5" instead of "claude-sonnet-4-5@20250929".
320
+ *
321
+ * Alias resolution runs at the very start of getModel() so that all
322
+ * downstream code (isAnthropicModel, validateAnthropicModelName, etc.)
323
+ * sees the canonical versioned name.
324
+ *
325
+ * To add a new model: simply add an entry mapping the shorthand to the
326
+ * full versioned string. No other changes are needed.
327
+ */
328
+ export const VERTEX_MODEL_ALIASES = {
329
+ // Claude 4.x shorthand aliases → versioned names
330
+ "claude-sonnet-4-5": "claude-sonnet-4-5@20250929",
331
+ "claude-opus-4-5": "claude-opus-4-5@20251124",
332
+ "claude-haiku-4-5": "claude-haiku-4-5@20251001",
333
+ "claude-sonnet-4": "claude-sonnet-4@20250514",
334
+ "claude-opus-4": "claude-opus-4@20250514",
335
+ "claude-opus-4-1": "claude-opus-4-1@20250805",
336
+ // Claude 3.x shorthand aliases → versioned names
337
+ "claude-3-7-sonnet": "claude-3-7-sonnet@20250219",
338
+ "claude-3-5-sonnet": "claude-3-5-sonnet-20241022",
339
+ "claude-3-5-haiku": "claude-3-5-haiku-20241022",
340
+ "claude-3-opus": "claude-3-opus-20240229",
341
+ "claude-3-sonnet": "claude-3-sonnet-20240229",
342
+ "claude-3-haiku": "claude-3-haiku-20240307",
343
+ // Gemini shorthand aliases
344
+ "gemini-3-pro": "gemini-3-pro-latest",
345
+ "gemini-3-flash": "gemini-3-flash-latest",
346
+ };
265
347
  /**
266
348
  * Google Vertex AI Provider v2 - BaseProvider Implementation
267
349
  *
@@ -366,6 +448,13 @@ export class GoogleVertexProvider extends BaseProvider {
366
448
  const model = await this.getModel();
367
449
  return model;
368
450
  }
451
+ /**
452
+ * Resolve a raw model name through the alias map.
453
+ * Used internally to normalize model names before any API calls.
454
+ */
455
+ resolveAlias(modelName) {
456
+ return VERTEX_MODEL_ALIASES[modelName] ?? modelName;
457
+ }
369
458
  /**
370
459
  * Initialize model creation tracking
371
460
  */
@@ -373,7 +462,10 @@ export class GoogleVertexProvider extends BaseProvider {
373
462
  const modelCreationId = `vertex-model-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
374
463
  const modelCreationStartTime = Date.now();
375
464
  const modelCreationHrTimeStart = process.hrtime.bigint();
376
- const modelName = this.modelName || getDefaultVertexModel();
465
+ // Resolve shorthand model aliases (e.g. "claude-sonnet-4-5" → "claude-sonnet-4-5@20250929")
466
+ // before any downstream logic that depends on the versioned name.
467
+ const rawModelName = this.modelName || getDefaultVertexModel();
468
+ const modelName = VERTEX_MODEL_ALIASES[rawModelName] ?? rawModelName;
377
469
  return {
378
470
  modelCreationId,
379
471
  modelCreationStartTime,
@@ -665,7 +757,7 @@ export class GoogleVertexProvider extends BaseProvider {
665
757
  * Creates fresh instances for each request to ensure proper authentication
666
758
  */
667
759
  async getModel() {
668
- // Initialize logging and setup
760
+ // Initialize logging and setup (alias resolution happens inside)
669
761
  const { modelCreationId, modelCreationStartTime, modelCreationHrTimeStart, modelName, } = this.initializeModelCreationLogging();
670
762
  // Check if this is an Anthropic model and attempt creation
671
763
  const anthropicModel = await this.attemptAnthropicModelCreation(modelName, modelCreationId, modelCreationStartTime, modelCreationHrTimeStart);
@@ -684,7 +776,7 @@ export class GoogleVertexProvider extends BaseProvider {
684
776
  }
685
777
  async executeStream(options, analysisSchema) {
686
778
  // Check if this is a Gemini 3 model with tools - use native SDK for thought_signature
687
- const gemini3CheckModelName = options.model || this.modelName || getDefaultVertexModel();
779
+ const gemini3CheckModelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
688
780
  // Check for tools from options AND from SDK (MCP tools)
689
781
  // Need to check early if we should route to native SDK
690
782
  const gemini3CheckShouldUseTools = !options.disableTools && this.supportsTools();
@@ -735,7 +827,7 @@ export class GoogleVertexProvider extends BaseProvider {
735
827
  toolNames: Object.keys(tools),
736
828
  });
737
829
  // Model-specific maxTokens handling
738
- const modelName = this.modelName || getDefaultVertexModel();
830
+ const modelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
739
831
  // Use cached model configuration to determine maxTokens handling for streaming performance
740
832
  // This avoids hardcoded model-specific logic and repeated config lookups
741
833
  const shouldSetMaxTokens = this.shouldSetMaxTokensCached(modelName);
@@ -748,6 +840,7 @@ export class GoogleVertexProvider extends BaseProvider {
748
840
  messages: messages,
749
841
  temperature: options.temperature,
750
842
  ...(maxTokens && { maxTokens }),
843
+ maxRetries: 0, // NL11: Disable AI SDK's invisible internal retries; we handle retries with OTel instrumentation
751
844
  ...(shouldUseTools &&
752
845
  Object.keys(tools).length > 0 && {
753
846
  tools,
@@ -819,7 +912,67 @@ export class GoogleVertexProvider extends BaseProvider {
819
912
  });
820
913
  }
821
914
  }
822
- const result = streamText(streamOptions);
915
+ // Wrap streamText in an OTel span to capture provider-level latency and token usage
916
+ const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
917
+ kind: SpanKind.CLIENT,
918
+ attributes: {
919
+ "gen_ai.system": "vertex",
920
+ "gen_ai.request.model": model.modelId || this.modelName || "unknown",
921
+ },
922
+ });
923
+ let result;
924
+ try {
925
+ result = streamText(streamOptions);
926
+ }
927
+ catch (err) {
928
+ streamSpan.recordException(err instanceof Error ? err : new Error(String(err)));
929
+ streamSpan.setStatus({
930
+ code: SpanStatusCode.ERROR,
931
+ message: err instanceof Error ? err.message : String(err),
932
+ });
933
+ streamSpan.end();
934
+ throw err;
935
+ }
936
+ // Collect token usage and finish reason asynchronously when the stream completes,
937
+ // then end the span. This avoids blocking the stream consumer.
938
+ result.usage
939
+ .then((usage) => {
940
+ streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.promptTokens || 0);
941
+ streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.completionTokens || 0);
942
+ const effectiveModel = options.model ||
943
+ model.modelId ||
944
+ this.modelName ||
945
+ getDefaultVertexModel();
946
+ const cost = calculateCost(this.providerName, effectiveModel, {
947
+ input: usage.promptTokens || 0,
948
+ output: usage.completionTokens || 0,
949
+ total: (usage.promptTokens || 0) + (usage.completionTokens || 0),
950
+ });
951
+ if (cost && cost > 0) {
952
+ streamSpan.setAttribute("neurolink.cost", cost);
953
+ }
954
+ })
955
+ .catch(() => {
956
+ // Usage may not be available if the stream is aborted
957
+ });
958
+ result.finishReason
959
+ .then((reason) => {
960
+ streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
961
+ })
962
+ .catch(() => {
963
+ // Finish reason may not be available if the stream is aborted
964
+ });
965
+ result.text
966
+ .then(() => {
967
+ streamSpan.end();
968
+ })
969
+ .catch((err) => {
970
+ streamSpan.setStatus({
971
+ code: SpanStatusCode.ERROR,
972
+ message: err instanceof Error ? err.message : String(err),
973
+ });
974
+ streamSpan.end();
975
+ });
823
976
  // Defer timeout cleanup until the stream completes or errors
824
977
  result.text.finally(() => timeoutController?.cleanup());
825
978
  // Transform string stream to content object stream using BaseProvider method
@@ -965,262 +1118,306 @@ export class GoogleVertexProvider extends BaseProvider {
965
1118
  * This bypasses @ai-sdk/google-vertex to properly handle thought_signature
966
1119
  */
967
1120
  async executeNativeGemini3Stream(options) {
968
- const client = await this.createVertexGenAIClient(options.region);
969
- const modelName = options.model || this.modelName || getDefaultVertexModel();
970
- const effectiveLocation = options.region || this.location || getVertexLocation();
971
- logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3", {
972
- model: modelName,
973
- hasTools: !!options.tools && Object.keys(options.tools).length > 0,
974
- project: this.projectId,
975
- location: effectiveLocation,
976
- });
977
- // Build contents from input with multimodal support
978
- const multimodalInput = options.input;
979
- const contents = this.buildNativeContentParts(options.input.text, multimodalInput, "native stream");
980
- // Convert tools to native format
981
- let hasToolsInput = options.tools &&
982
- Object.keys(options.tools).length > 0 &&
983
- !options.disableTools;
984
- // Guard: Gemini cannot use tools + JSON schema simultaneously
985
- const streamOptions = options;
986
- const wantsJsonOutput = streamOptions.output?.format === "json" || streamOptions.schema;
987
- if (wantsJsonOutput && hasToolsInput) {
988
- logger.warn("[GoogleVertex] Gemini does not support tools and JSON schema output simultaneously. Disabling tools for this request.");
989
- hasToolsInput = false;
990
- }
991
- let toolsConfig;
992
- let executeMap = new Map();
993
- if (hasToolsInput) {
994
- const result = buildNativeToolDeclarations(options.tools);
995
- toolsConfig = result.toolsConfig;
996
- executeMap = result.executeMap;
997
- logger.debug("[GoogleVertex] Converted tools for native SDK", {
998
- toolCount: toolsConfig[0].functionDeclarations.length,
999
- toolNames: toolsConfig[0].functionDeclarations.map((t) => t.name),
1121
+ const modelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
1122
+ return withClientSpan({
1123
+ name: "neurolink.provider.stream",
1124
+ tracer: tracers.provider,
1125
+ attributes: {
1126
+ [ATTR.GEN_AI_SYSTEM]: "vertex",
1127
+ [ATTR.GEN_AI_MODEL]: modelName,
1128
+ [ATTR.GEN_AI_OPERATION]: "stream",
1129
+ [ATTR.NL_PROVIDER]: this.providerName,
1130
+ },
1131
+ }, async (span) => {
1132
+ const client = await this.createVertexGenAIClient(options.region);
1133
+ const effectiveLocation = options.region || this.location || getVertexLocation();
1134
+ logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3", {
1135
+ model: modelName,
1136
+ hasTools: !!options.tools && Object.keys(options.tools).length > 0,
1137
+ project: this.projectId,
1138
+ location: effectiveLocation,
1000
1139
  });
1001
- }
1002
- // Build config
1003
- const config = buildNativeConfig(options, toolsConfig);
1004
- // Add JSON output format support for native SDK stream
1005
- if (streamOptions.output?.format === "json" || streamOptions.schema) {
1006
- config.responseMimeType = "application/json";
1007
- if (streamOptions.schema) {
1008
- const rawSchema = convertZodToJsonSchema(streamOptions.schema);
1009
- const inlinedSchema = inlineJsonSchema(rawSchema);
1010
- if (inlinedSchema.$schema) {
1011
- delete inlinedSchema.$schema;
1012
- }
1013
- config.responseSchema = inlinedSchema;
1014
- logger.debug("[GoogleVertex] Added responseSchema for JSON output (stream)", {
1015
- schemaKeys: Object.keys(inlinedSchema),
1140
+ // Build contents from input with multimodal support
1141
+ const multimodalInput = options.input;
1142
+ const contents = this.buildNativeContentParts(options.input.text, multimodalInput, "native stream");
1143
+ // Convert tools to native format
1144
+ let hasToolsInput = options.tools &&
1145
+ Object.keys(options.tools).length > 0 &&
1146
+ !options.disableTools;
1147
+ // Guard: Gemini cannot use tools + JSON schema simultaneously
1148
+ const streamOptions = options;
1149
+ const wantsJsonOutput = streamOptions.output?.format === "json" || streamOptions.schema;
1150
+ if (wantsJsonOutput && hasToolsInput) {
1151
+ logger.warn("[GoogleVertex] Gemini does not support tools and JSON schema output simultaneously. Disabling tools for this request.");
1152
+ hasToolsInput = false;
1153
+ }
1154
+ let toolsConfig;
1155
+ let executeMap = new Map();
1156
+ if (hasToolsInput) {
1157
+ const result = buildNativeToolDeclarations(options.tools);
1158
+ toolsConfig = result.toolsConfig;
1159
+ executeMap = result.executeMap;
1160
+ logger.debug("[GoogleVertex] Converted tools for native SDK", {
1161
+ toolCount: toolsConfig[0].functionDeclarations.length,
1162
+ toolNames: toolsConfig[0].functionDeclarations.map((t) => t.name),
1016
1163
  });
1017
1164
  }
1018
- }
1019
- const startTime = Date.now();
1020
- const timeout = this.getTimeout(options);
1021
- const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
1022
- const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
1023
- const maxSteps = computeMaxStepsShared(options.maxSteps);
1024
- const currentContents = [...contents];
1025
- let finalText = "";
1026
- let lastStepText = "";
1027
- let totalInputTokens = 0;
1028
- let totalOutputTokens = 0;
1029
- const allToolCalls = [];
1030
- let step = 0;
1031
- const failedTools = new Map();
1032
- // Agentic loop for tool calling
1033
- try {
1034
- while (step < maxSteps) {
1035
- if (timeoutController?.controller.signal.aborted) {
1036
- break;
1037
- }
1038
- step++;
1039
- logger.debug(`[GoogleVertex] Native SDK step ${step}/${maxSteps}`);
1040
- try {
1041
- const stream = await client.models.generateContentStream({
1042
- model: modelName,
1043
- contents: currentContents,
1044
- config,
1045
- ...(composedSignal
1046
- ? { httpOptions: { signal: composedSignal } }
1047
- : {}),
1048
- });
1049
- const chunkResult = await collectStreamChunks(stream);
1050
- totalInputTokens += chunkResult.inputTokens;
1051
- totalOutputTokens += chunkResult.outputTokens;
1052
- const stepText = extractTextFromParts(chunkResult.rawResponseParts);
1053
- if (chunkResult.stepFunctionCalls.length === 0) {
1054
- finalText = stepText;
1055
- break;
1165
+ // Build config
1166
+ const config = buildNativeConfig(options, toolsConfig);
1167
+ // Add JSON output format support for native SDK stream
1168
+ if (streamOptions.output?.format === "json" || streamOptions.schema) {
1169
+ config.responseMimeType = "application/json";
1170
+ if (streamOptions.schema) {
1171
+ const rawSchema = convertZodToJsonSchema(streamOptions.schema);
1172
+ const inlinedSchema = inlineJsonSchema(rawSchema);
1173
+ if (inlinedSchema.$schema) {
1174
+ delete inlinedSchema.$schema;
1056
1175
  }
1057
- lastStepText = stepText;
1058
- logger.debug(`[GoogleVertex] Executing ${chunkResult.stepFunctionCalls.length} function calls`);
1059
- pushModelResponseToHistory(currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
1060
- const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, executeMap, failedTools, allToolCalls, { abortSignal: composedSignal });
1061
- // Add function responses to history
1062
- currentContents.push({
1063
- role: "function",
1064
- parts: functionResponses,
1176
+ config.responseSchema = inlinedSchema;
1177
+ logger.debug("[GoogleVertex] Added responseSchema for JSON output (stream)", {
1178
+ schemaKeys: Object.keys(inlinedSchema),
1065
1179
  });
1066
1180
  }
1067
- catch (error) {
1068
- logger.error("[GoogleVertex] Native SDK error", error);
1069
- throw this.handleProviderError(error);
1181
+ }
1182
+ const startTime = Date.now();
1183
+ const timeout = this.getTimeout(options);
1184
+ const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
1185
+ const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
1186
+ const maxSteps = computeMaxStepsShared(options.maxSteps);
1187
+ const currentContents = [...contents];
1188
+ let finalText = "";
1189
+ let lastStepText = "";
1190
+ let totalInputTokens = 0;
1191
+ let totalOutputTokens = 0;
1192
+ const allToolCalls = [];
1193
+ let step = 0;
1194
+ const failedTools = new Map();
1195
+ // Agentic loop for tool calling
1196
+ try {
1197
+ while (step < maxSteps) {
1198
+ if (timeoutController?.controller.signal.aborted) {
1199
+ break;
1200
+ }
1201
+ step++;
1202
+ logger.debug(`[GoogleVertex] Native SDK step ${step}/${maxSteps}`);
1203
+ try {
1204
+ const stream = await client.models.generateContentStream({
1205
+ model: modelName,
1206
+ contents: currentContents,
1207
+ config,
1208
+ ...(composedSignal
1209
+ ? { httpOptions: { signal: composedSignal } }
1210
+ : {}),
1211
+ });
1212
+ const chunkResult = await collectStreamChunks(stream);
1213
+ totalInputTokens += chunkResult.inputTokens;
1214
+ totalOutputTokens += chunkResult.outputTokens;
1215
+ const stepText = extractTextFromParts(chunkResult.rawResponseParts);
1216
+ if (chunkResult.stepFunctionCalls.length === 0) {
1217
+ finalText = stepText;
1218
+ break;
1219
+ }
1220
+ lastStepText = stepText;
1221
+ // Record tool call events on the span
1222
+ for (const fc of chunkResult.stepFunctionCalls) {
1223
+ span.addEvent("gen_ai.tool_call", {
1224
+ "tool.name": fc.name,
1225
+ "tool.step": step,
1226
+ });
1227
+ }
1228
+ logger.debug(`[GoogleVertex] Executing ${chunkResult.stepFunctionCalls.length} function calls`);
1229
+ pushModelResponseToHistory(currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
1230
+ const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, executeMap, failedTools, allToolCalls, { abortSignal: composedSignal });
1231
+ // Add function responses to history
1232
+ currentContents.push({
1233
+ role: "function",
1234
+ parts: functionResponses,
1235
+ });
1236
+ }
1237
+ catch (error) {
1238
+ logger.error("[GoogleVertex] Native SDK error", error);
1239
+ throw this.handleProviderError(error);
1240
+ }
1070
1241
  }
1071
1242
  }
1072
- }
1073
- finally {
1074
- timeoutController?.cleanup();
1075
- }
1076
- finalText = handleMaxStepsTermination("[GoogleVertex]", step, maxSteps, finalText, lastStepText);
1077
- const responseTime = Date.now() - startTime;
1078
- // Create async iterable for streaming result
1079
- async function* createTextStream() {
1080
- yield { content: finalText };
1081
- }
1082
- return {
1083
- stream: createTextStream(),
1084
- provider: this.providerName,
1085
- model: modelName,
1086
- usage: {
1087
- input: totalInputTokens,
1088
- output: totalOutputTokens,
1089
- total: totalInputTokens + totalOutputTokens,
1090
- },
1091
- toolCalls: allToolCalls.map((tc) => ({
1092
- toolName: tc.toolName,
1093
- args: tc.args,
1094
- })),
1095
- metadata: {
1096
- streamId: `native-vertex-${Date.now()}`,
1097
- startTime,
1098
- responseTime,
1099
- totalToolExecutions: allToolCalls.length,
1100
- },
1101
- };
1243
+ finally {
1244
+ timeoutController?.cleanup();
1245
+ }
1246
+ finalText = handleMaxStepsTermination("[GoogleVertex]", step, maxSteps, finalText, lastStepText);
1247
+ const responseTime = Date.now() - startTime;
1248
+ // Set token usage and finish reason on the span
1249
+ span.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, totalInputTokens);
1250
+ span.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, totalOutputTokens);
1251
+ span.setAttribute(ATTR.GEN_AI_FINISH_REASON, step >= maxSteps ? "max_steps" : "stop");
1252
+ // Create async iterable for streaming result
1253
+ async function* createTextStream() {
1254
+ yield { content: finalText };
1255
+ }
1256
+ return {
1257
+ stream: createTextStream(),
1258
+ provider: this.providerName,
1259
+ model: modelName,
1260
+ usage: {
1261
+ input: totalInputTokens,
1262
+ output: totalOutputTokens,
1263
+ total: totalInputTokens + totalOutputTokens,
1264
+ },
1265
+ toolCalls: allToolCalls.map((tc) => ({
1266
+ toolName: tc.toolName,
1267
+ args: tc.args,
1268
+ })),
1269
+ metadata: {
1270
+ streamId: `native-vertex-${Date.now()}`,
1271
+ startTime,
1272
+ responseTime,
1273
+ totalToolExecutions: allToolCalls.length,
1274
+ },
1275
+ };
1276
+ });
1102
1277
  }
1103
1278
  /**
1104
1279
  * Execute generate using native @google/genai SDK for Gemini 3 models on Vertex AI
1105
1280
  * This bypasses @ai-sdk/google-vertex to properly handle thought_signature
1106
1281
  */
1107
1282
  async executeNativeGemini3Generate(options) {
1108
- const client = await this.createVertexGenAIClient(options.region);
1109
- const modelName = options.model || this.modelName || getDefaultVertexModel();
1110
- const effectiveLocation = options.region || this.location || getVertexLocation();
1111
- logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3 generate", {
1112
- model: modelName,
1113
- project: this.projectId,
1114
- location: effectiveLocation,
1115
- });
1116
- // Build contents from input with multimodal support
1117
- const inputText = options.prompt || options.input?.text || "Please respond.";
1118
- const multimodalInput = options.input;
1119
- const contents = this.buildNativeContentParts(inputText, multimodalInput, "native generate");
1120
- // Get tools from SDK and options
1121
- let shouldUseTools = !options.disableTools && this.supportsTools();
1122
- // Guard: Gemini cannot use tools + JSON schema simultaneously
1123
- const wantsJsonOutputGen = options.output?.format === "json" || options.schema;
1124
- if (wantsJsonOutputGen && shouldUseTools) {
1125
- logger.warn("[GoogleVertex] Gemini does not support tools and JSON schema output simultaneously. Disabling tools for this request.");
1126
- shouldUseTools = false;
1127
- }
1128
- const sdkTools = shouldUseTools ? await this.getAllTools() : {};
1129
- const combinedTools = shouldUseTools
1130
- ? { ...sdkTools, ...(options.tools || {}) }
1131
- : {};
1132
- let toolsConfig;
1133
- let executeMap = new Map();
1134
- if (Object.keys(combinedTools).length > 0) {
1135
- const result = buildNativeToolDeclarations(combinedTools);
1136
- toolsConfig = result.toolsConfig;
1137
- executeMap = result.executeMap;
1138
- logger.debug("[GoogleVertex] Converted tools for native SDK generate", {
1139
- toolCount: toolsConfig[0].functionDeclarations.length,
1140
- toolNames: toolsConfig[0].functionDeclarations.map((t) => t.name),
1283
+ const modelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
1284
+ return withClientSpan({
1285
+ name: "neurolink.provider.generate",
1286
+ tracer: tracers.provider,
1287
+ attributes: {
1288
+ [ATTR.GEN_AI_SYSTEM]: "vertex",
1289
+ [ATTR.GEN_AI_MODEL]: modelName,
1290
+ [ATTR.GEN_AI_OPERATION]: "generate",
1291
+ [ATTR.NL_PROVIDER]: this.providerName,
1292
+ },
1293
+ }, async (span) => {
1294
+ const client = await this.createVertexGenAIClient(options.region);
1295
+ const effectiveLocation = options.region || this.location || getVertexLocation();
1296
+ logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3 generate", {
1297
+ model: modelName,
1298
+ project: this.projectId,
1299
+ location: effectiveLocation,
1141
1300
  });
1142
- }
1143
- // Build config
1144
- const config = buildNativeConfig(options, toolsConfig);
1145
- // Note: Schema/JSON output for Gemini 3 native SDK is complex due to $ref resolution issues
1146
- // For now, schemas are handled via the AI SDK fallback path, not native SDK
1147
- // TODO: Implement proper $ref resolution for complex nested schemas
1148
- const startTime = Date.now();
1149
- const timeout = this.getTimeout(options);
1150
- const timeoutController = createTimeoutController(timeout, this.providerName, "generate");
1151
- const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
1152
- const maxSteps = computeMaxStepsShared(options.maxSteps);
1153
- const currentContents = [...contents];
1154
- let finalText = "";
1155
- let lastStepText = "";
1156
- let totalInputTokens = 0;
1157
- let totalOutputTokens = 0;
1158
- const allToolCalls = [];
1159
- const toolExecutions = [];
1160
- let step = 0;
1161
- const failedTools = new Map();
1162
- try {
1163
- // Agentic loop for tool calling
1164
- while (step < maxSteps) {
1165
- if (timeoutController?.controller.signal.aborted) {
1166
- break;
1167
- }
1168
- step++;
1169
- logger.debug(`[GoogleVertex] Native SDK generate step ${step}/${maxSteps}`);
1170
- try {
1171
- // Use generateContentStream and collect all chunks (same as GoogleAIStudio)
1172
- const stream = await client.models.generateContentStream({
1173
- model: modelName,
1174
- contents: currentContents,
1175
- config,
1176
- ...(composedSignal
1177
- ? { httpOptions: { signal: composedSignal } }
1178
- : {}),
1179
- });
1180
- const chunkResult = await collectStreamChunks(stream);
1181
- totalInputTokens += chunkResult.inputTokens;
1182
- totalOutputTokens += chunkResult.outputTokens;
1183
- const stepText = extractTextFromParts(chunkResult.rawResponseParts);
1184
- if (chunkResult.stepFunctionCalls.length === 0) {
1185
- finalText = stepText;
1301
+ // Build contents from input with multimodal support
1302
+ const inputText = options.prompt || options.input?.text || "Please respond.";
1303
+ const multimodalInput = options.input;
1304
+ const contents = this.buildNativeContentParts(inputText, multimodalInput, "native generate");
1305
+ // Get tools from SDK and options
1306
+ let shouldUseTools = !options.disableTools && this.supportsTools();
1307
+ // Guard: Gemini cannot use tools + JSON schema simultaneously
1308
+ const wantsJsonOutputGen = options.output?.format === "json" || options.schema;
1309
+ if (wantsJsonOutputGen && shouldUseTools) {
1310
+ logger.warn("[GoogleVertex] Gemini does not support tools and JSON schema output simultaneously. Disabling tools for this request.");
1311
+ shouldUseTools = false;
1312
+ }
1313
+ const sdkTools = shouldUseTools ? await this.getAllTools() : {};
1314
+ const combinedTools = shouldUseTools
1315
+ ? { ...sdkTools, ...(options.tools || {}) }
1316
+ : {};
1317
+ let toolsConfig;
1318
+ let executeMap = new Map();
1319
+ if (Object.keys(combinedTools).length > 0) {
1320
+ const result = buildNativeToolDeclarations(combinedTools);
1321
+ toolsConfig = result.toolsConfig;
1322
+ executeMap = result.executeMap;
1323
+ logger.debug("[GoogleVertex] Converted tools for native SDK generate", {
1324
+ toolCount: toolsConfig[0].functionDeclarations.length,
1325
+ toolNames: toolsConfig[0].functionDeclarations.map((t) => t.name),
1326
+ });
1327
+ }
1328
+ // Build config
1329
+ const config = buildNativeConfig(options, toolsConfig);
1330
+ // Note: Schema/JSON output for Gemini 3 native SDK is complex due to $ref resolution issues
1331
+ // For now, schemas are handled via the AI SDK fallback path, not native SDK
1332
+ // TODO: Implement proper $ref resolution for complex nested schemas
1333
+ const startTime = Date.now();
1334
+ const timeout = this.getTimeout(options);
1335
+ const timeoutController = createTimeoutController(timeout, this.providerName, "generate");
1336
+ const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
1337
+ const maxSteps = computeMaxStepsShared(options.maxSteps);
1338
+ const currentContents = [...contents];
1339
+ let finalText = "";
1340
+ let lastStepText = "";
1341
+ let totalInputTokens = 0;
1342
+ let totalOutputTokens = 0;
1343
+ const allToolCalls = [];
1344
+ const toolExecutions = [];
1345
+ let step = 0;
1346
+ const failedTools = new Map();
1347
+ try {
1348
+ // Agentic loop for tool calling
1349
+ while (step < maxSteps) {
1350
+ if (timeoutController?.controller.signal.aborted) {
1186
1351
  break;
1187
1352
  }
1188
- lastStepText = stepText;
1189
- logger.debug(`[GoogleVertex] Generate executing ${chunkResult.stepFunctionCalls.length} function calls`);
1190
- pushModelResponseToHistory(currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
1191
- const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, executeMap, failedTools, allToolCalls, { toolExecutions, abortSignal: composedSignal });
1192
- // Add function responses to history
1193
- currentContents.push({
1194
- role: "function",
1195
- parts: functionResponses,
1196
- });
1197
- }
1198
- catch (error) {
1199
- logger.error("[GoogleVertex] Native SDK generate error", error);
1200
- throw this.handleProviderError(error);
1353
+ step++;
1354
+ logger.debug(`[GoogleVertex] Native SDK generate step ${step}/${maxSteps}`);
1355
+ try {
1356
+ // Use generateContentStream and collect all chunks (same as GoogleAIStudio)
1357
+ const stream = await client.models.generateContentStream({
1358
+ model: modelName,
1359
+ contents: currentContents,
1360
+ config,
1361
+ ...(composedSignal
1362
+ ? { httpOptions: { signal: composedSignal } }
1363
+ : {}),
1364
+ });
1365
+ const chunkResult = await collectStreamChunks(stream);
1366
+ totalInputTokens += chunkResult.inputTokens;
1367
+ totalOutputTokens += chunkResult.outputTokens;
1368
+ const stepText = extractTextFromParts(chunkResult.rawResponseParts);
1369
+ if (chunkResult.stepFunctionCalls.length === 0) {
1370
+ finalText = stepText;
1371
+ break;
1372
+ }
1373
+ lastStepText = stepText;
1374
+ // Record tool call events on the span
1375
+ for (const fc of chunkResult.stepFunctionCalls) {
1376
+ span.addEvent("gen_ai.tool_call", {
1377
+ "tool.name": fc.name,
1378
+ "tool.step": step,
1379
+ });
1380
+ }
1381
+ logger.debug(`[GoogleVertex] Generate executing ${chunkResult.stepFunctionCalls.length} function calls`);
1382
+ pushModelResponseToHistory(currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
1383
+ const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, executeMap, failedTools, allToolCalls, { toolExecutions, abortSignal: composedSignal });
1384
+ // Add function responses to history
1385
+ currentContents.push({
1386
+ role: "function",
1387
+ parts: functionResponses,
1388
+ });
1389
+ }
1390
+ catch (error) {
1391
+ logger.error("[GoogleVertex] Native SDK generate error", error);
1392
+ throw this.handleProviderError(error);
1393
+ }
1201
1394
  }
1202
1395
  }
1203
- }
1204
- finally {
1205
- timeoutController?.cleanup();
1206
- }
1207
- finalText = handleMaxStepsTermination("[GoogleVertex]", step, maxSteps, finalText, lastStepText);
1208
- const responseTime = Date.now() - startTime;
1209
- // Build EnhancedGenerateResult
1210
- return {
1211
- content: finalText,
1212
- provider: this.providerName,
1213
- model: modelName,
1214
- usage: {
1215
- input: totalInputTokens,
1216
- output: totalOutputTokens,
1217
- total: totalInputTokens + totalOutputTokens,
1218
- },
1219
- responseTime,
1220
- toolsUsed: allToolCalls.map((tc) => tc.toolName),
1221
- toolExecutions: toolExecutions,
1222
- enhancedWithTools: allToolCalls.length > 0,
1223
- };
1396
+ finally {
1397
+ timeoutController?.cleanup();
1398
+ }
1399
+ finalText = handleMaxStepsTermination("[GoogleVertex]", step, maxSteps, finalText, lastStepText);
1400
+ const responseTime = Date.now() - startTime;
1401
+ // Set token usage and finish reason on the span
1402
+ span.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, totalInputTokens);
1403
+ span.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, totalOutputTokens);
1404
+ span.setAttribute(ATTR.GEN_AI_FINISH_REASON, step >= maxSteps ? "max_steps" : "stop");
1405
+ // Build EnhancedGenerateResult
1406
+ return {
1407
+ content: finalText,
1408
+ provider: this.providerName,
1409
+ model: modelName,
1410
+ usage: {
1411
+ input: totalInputTokens,
1412
+ output: totalOutputTokens,
1413
+ total: totalInputTokens + totalOutputTokens,
1414
+ },
1415
+ responseTime,
1416
+ toolsUsed: allToolCalls.map((tc) => tc.toolName),
1417
+ toolExecutions: toolExecutions,
1418
+ enhancedWithTools: allToolCalls.length > 0,
1419
+ };
1420
+ });
1224
1421
  }
1225
1422
  /**
1226
1423
  * Process CSV files and append content to options.input.text
@@ -1298,7 +1495,7 @@ export class GoogleVertexProvider extends BaseProvider {
1298
1495
  const options = typeof optionsOrPrompt === "string"
1299
1496
  ? { prompt: optionsOrPrompt }
1300
1497
  : optionsOrPrompt;
1301
- const modelName = options.model || this.modelName || getDefaultVertexModel();
1498
+ const modelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
1302
1499
  // Check if we should use native SDK for Gemini 3 with tools
1303
1500
  const shouldUseTools = !options.disableTools && this.supportsTools();
1304
1501
  const sdkTools = shouldUseTools ? await this.getAllTools() : {};
@@ -1458,6 +1655,23 @@ export class GoogleVertexProvider extends BaseProvider {
1458
1655
  async hasAnthropicSupport() {
1459
1656
  return hasAnthropicSupport();
1460
1657
  }
1658
+ /**
1659
+ * Resolve a shorthand model name to its full versioned Vertex AI identifier.
1660
+ * Returns the original name unchanged if no alias exists.
1661
+ *
1662
+ * @param modelName - A model name, possibly a shorthand alias
1663
+ * @returns The resolved full versioned model name
1664
+ *
1665
+ * @example
1666
+ * ```typescript
1667
+ * provider.resolveModelAlias("claude-sonnet-4-5"); // "claude-sonnet-4-5@20250929"
1668
+ * provider.resolveModelAlias("gemini-3-pro"); // "gemini-3-pro-latest"
1669
+ * provider.resolveModelAlias("gemini-2.5-flash"); // "gemini-2.5-flash" (unchanged)
1670
+ * ```
1671
+ */
1672
+ resolveModelAlias(modelName) {
1673
+ return VERTEX_MODEL_ALIASES[modelName] ?? modelName;
1674
+ }
1461
1675
  /**
1462
1676
  * Create an Anthropic model instance using vertexAnthropic provider
1463
1677
  * Uses fresh vertex settings for each request with comprehensive validation
@@ -1544,11 +1758,11 @@ export class GoogleVertexProvider extends BaseProvider {
1544
1758
  modelName,
1545
1759
  issue: modelValidation.issue,
1546
1760
  recommendedModels: [
1761
+ "claude-sonnet-4-6",
1762
+ "claude-opus-4-6",
1547
1763
  "claude-sonnet-4-5@20250929",
1548
- "claude-sonnet-4@20250514",
1549
1764
  "claude-opus-4@20250514",
1550
1765
  "claude-3-5-sonnet-20241022",
1551
- "claude-3-5-haiku-20241022",
1552
1766
  ],
1553
1767
  });
1554
1768
  return null;
@@ -1749,6 +1963,8 @@ export class GoogleVertexProvider extends BaseProvider {
1749
1963
  async checkVertexRegionalSupport(region = "us-central1") {
1750
1964
  // Based on Google Cloud documentation, these regions support Anthropic models
1751
1965
  const supportedRegions = [
1966
+ // Global endpoint (routed automatically)
1967
+ "global",
1752
1968
  // North America
1753
1969
  "us-central1",
1754
1970
  "us-east1",
@@ -1810,10 +2026,17 @@ export class GoogleVertexProvider extends BaseProvider {
1810
2026
  }
1811
2027
  // Validate against known Claude model patterns
1812
2028
  const validPatterns = [
2029
+ // Claude 4.6 — versionless IDs (no @YYYYMMDD suffix)
2030
+ /^claude-opus-4-6$/,
2031
+ /^claude-sonnet-4-6$/,
2032
+ // Claude 4.x versioned
1813
2033
  /^claude-sonnet-4@\d{8}$/,
1814
2034
  /^claude-sonnet-4-5@\d{8}$/,
1815
2035
  /^claude-opus-4@\d{8}$/,
1816
2036
  /^claude-opus-4-1@\d{8}$/,
2037
+ /^claude-opus-4-5@\d{8}$/,
2038
+ /^claude-haiku-4-5@\d{8}$/,
2039
+ // Claude 3.x
1817
2040
  /^claude-3-7-sonnet@\d{8}$/,
1818
2041
  /^claude-3-5-sonnet-\d{8}$/,
1819
2042
  /^claude-3-5-haiku-\d{8}$/,
@@ -2063,11 +2286,10 @@ export class GoogleVertexProvider extends BaseProvider {
2063
2286
  return "image/png";
2064
2287
  }
2065
2288
  /**
2066
- * Estimate token count from text (simple character-based estimation)
2289
+ * Estimate token count from text using centralized estimation with provider multipliers
2067
2290
  */
2068
2291
  estimateTokenCount(text) {
2069
- // Rough estimation: ~4 characters per token
2070
- return Math.ceil(text.length / 4);
2292
+ return estimateTokens(text, "vertex");
2071
2293
  }
2072
2294
  /**
2073
2295
  * Obtain a Google Auth access token for Vertex AI REST API calls.
@@ -2123,7 +2345,7 @@ export class GoogleVertexProvider extends BaseProvider {
2123
2345
  /**
2124
2346
  * Build request parts for image generation from prompt, PDFs, and images.
2125
2347
  */
2126
- buildImageGenerationParts(prompt, pdfFiles, inputImages) {
2348
+ async buildImageGenerationParts(prompt, pdfFiles, inputImages) {
2127
2349
  const parts = [];
2128
2350
  if (prompt) {
2129
2351
  parts.push({ text: prompt });
@@ -2213,6 +2435,77 @@ export class GoogleVertexProvider extends BaseProvider {
2213
2435
  continue;
2214
2436
  }
2215
2437
  }
2438
+ else if (image.startsWith("http://") ||
2439
+ image.startsWith("https://")) {
2440
+ // Download URL image and convert to base64
2441
+ try {
2442
+ // Validate URL to prevent SSRF attacks
2443
+ const parsedUrl = new URL(image);
2444
+ const hostname = parsedUrl.hostname;
2445
+ const blockedHosts = ["localhost", "127.0.0.1", "0.0.0.0", "[::1]"];
2446
+ if (blockedHosts.some((h) => hostname === h) ||
2447
+ /^(10\.|172\.(1[6-9]|2\d|3[01])\.|192\.168\.)/.test(hostname)) {
2448
+ logger.warn(`[GoogleVertexProvider] Blocked fetch to private/local URL: ${hostname}`, { index: i });
2449
+ continue;
2450
+ }
2451
+ // DNS resolution check — verify resolved IPs are not private/loopback
2452
+ try {
2453
+ const { resolve4, resolve6 } = dns.promises;
2454
+ const addresses = [];
2455
+ try {
2456
+ addresses.push(...(await resolve4(hostname)));
2457
+ }
2458
+ catch {
2459
+ /* hostname may not have A records */
2460
+ }
2461
+ try {
2462
+ addresses.push(...(await resolve6(hostname)));
2463
+ }
2464
+ catch {
2465
+ /* hostname may not have AAAA records */
2466
+ }
2467
+ if (addresses.length > 0 &&
2468
+ addresses.every((addr) => isPrivateOrLoopbackAddress(addr))) {
2469
+ logger.warn(`[GoogleVertexProvider] Blocked fetch: hostname ${hostname} resolves to private/loopback address`, { index: i, addresses });
2470
+ continue;
2471
+ }
2472
+ }
2473
+ catch (dnsError) {
2474
+ logger.warn(`[GoogleVertexProvider] DNS resolution failed for ${hostname}, blocking fetch`, {
2475
+ index: i,
2476
+ error: dnsError instanceof Error
2477
+ ? dnsError.message
2478
+ : String(dnsError),
2479
+ });
2480
+ continue;
2481
+ }
2482
+ const response = await fetch(image, {
2483
+ signal: AbortSignal.timeout(15_000),
2484
+ });
2485
+ if (!response.ok) {
2486
+ logger.warn(`Failed to fetch image URL (${response.status}), skipping`, { index: i, url: image });
2487
+ continue;
2488
+ }
2489
+ // Size guard — reject downloads exceeding 10 MB
2490
+ const contentLength = response.headers.get("content-length");
2491
+ if (contentLength &&
2492
+ Number(contentLength) > MAX_IMAGE_DOWNLOAD_BYTES) {
2493
+ logger.warn(`[GoogleVertexProvider] Image URL exceeds ${MAX_IMAGE_DOWNLOAD_BYTES} byte limit (Content-Length: ${contentLength}), skipping`, { index: i, url: image });
2494
+ continue;
2495
+ }
2496
+ const buffer = Buffer.from(await response.arrayBuffer());
2497
+ if (buffer.byteLength > MAX_IMAGE_DOWNLOAD_BYTES) {
2498
+ logger.warn(`[GoogleVertexProvider] Downloaded image exceeds ${MAX_IMAGE_DOWNLOAD_BYTES} byte limit (${buffer.byteLength} bytes), skipping`, { index: i, url: image });
2499
+ continue;
2500
+ }
2501
+ imageBase64 = buffer.toString("base64");
2502
+ mimeType = this.detectImageType(buffer);
2503
+ }
2504
+ catch (fetchError) {
2505
+ logger.warn(`Failed to download image from URL, skipping: ${fetchError instanceof Error ? fetchError.message : String(fetchError)}`, { index: i, url: image });
2506
+ continue;
2507
+ }
2508
+ }
2216
2509
  else {
2217
2510
  imageBase64 = image;
2218
2511
  const decodedBuffer = Buffer.from(imageBase64, "base64");
@@ -2309,7 +2602,7 @@ export class GoogleVertexProvider extends BaseProvider {
2309
2602
  });
2310
2603
  try {
2311
2604
  const token = await this.getImageGenerationAccessToken();
2312
- const parts = this.buildImageGenerationParts(prompt, pdfFiles, inputImages);
2605
+ const parts = await this.buildImageGenerationParts(prompt, pdfFiles, inputImages);
2313
2606
  // Build request body with CRITICAL response_modalities setting
2314
2607
  const requestBody = {
2315
2608
  contents: [{ role: "user", parts }],