@juspay/neurolink 9.15.0 → 9.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/adapters/video/videoAnalyzer.d.ts +1 -1
- package/dist/adapters/video/videoAnalyzer.js +10 -8
- package/dist/cli/commands/setup-anthropic.js +1 -14
- package/dist/cli/commands/setup-azure.js +1 -12
- package/dist/cli/commands/setup-bedrock.js +1 -9
- package/dist/cli/commands/setup-google-ai.js +1 -12
- package/dist/cli/commands/setup-openai.js +1 -14
- package/dist/cli/commands/workflow.d.ts +27 -0
- package/dist/cli/commands/workflow.js +216 -0
- package/dist/cli/factories/commandFactory.js +79 -20
- package/dist/cli/index.js +0 -1
- package/dist/cli/parser.js +4 -1
- package/dist/cli/utils/maskCredential.d.ts +11 -0
- package/dist/cli/utils/maskCredential.js +23 -0
- package/dist/constants/contextWindows.js +107 -16
- package/dist/constants/enums.d.ts +99 -15
- package/dist/constants/enums.js +152 -22
- package/dist/context/budgetChecker.js +1 -1
- package/dist/context/contextCompactor.js +31 -4
- package/dist/context/emergencyTruncation.d.ts +21 -0
- package/dist/context/emergencyTruncation.js +88 -0
- package/dist/context/errorDetection.d.ts +16 -0
- package/dist/context/errorDetection.js +48 -1
- package/dist/context/errors.d.ts +19 -0
- package/dist/context/errors.js +21 -0
- package/dist/context/stages/slidingWindowTruncator.d.ts +6 -0
- package/dist/context/stages/slidingWindowTruncator.js +159 -24
- package/dist/core/baseProvider.js +306 -200
- package/dist/core/conversationMemoryManager.js +104 -61
- package/dist/core/evaluationProviders.js +16 -33
- package/dist/core/factory.js +237 -164
- package/dist/core/modules/GenerationHandler.js +175 -116
- package/dist/core/modules/MessageBuilder.js +222 -170
- package/dist/core/modules/StreamHandler.d.ts +1 -0
- package/dist/core/modules/StreamHandler.js +95 -27
- package/dist/core/modules/TelemetryHandler.d.ts +10 -1
- package/dist/core/modules/TelemetryHandler.js +25 -7
- package/dist/core/modules/ToolsManager.js +115 -191
- package/dist/core/redisConversationMemoryManager.js +418 -282
- package/dist/factories/providerRegistry.d.ts +5 -0
- package/dist/factories/providerRegistry.js +20 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +4 -2
- package/dist/lib/adapters/video/videoAnalyzer.d.ts +1 -1
- package/dist/lib/adapters/video/videoAnalyzer.js +10 -8
- package/dist/lib/constants/contextWindows.js +107 -16
- package/dist/lib/constants/enums.d.ts +99 -15
- package/dist/lib/constants/enums.js +152 -22
- package/dist/lib/context/budgetChecker.js +1 -1
- package/dist/lib/context/contextCompactor.js +31 -4
- package/dist/lib/context/emergencyTruncation.d.ts +21 -0
- package/dist/lib/context/emergencyTruncation.js +89 -0
- package/dist/lib/context/errorDetection.d.ts +16 -0
- package/dist/lib/context/errorDetection.js +48 -1
- package/dist/lib/context/errors.d.ts +19 -0
- package/dist/lib/context/errors.js +22 -0
- package/dist/lib/context/stages/slidingWindowTruncator.d.ts +6 -0
- package/dist/lib/context/stages/slidingWindowTruncator.js +159 -24
- package/dist/lib/core/baseProvider.js +306 -200
- package/dist/lib/core/conversationMemoryManager.js +104 -61
- package/dist/lib/core/evaluationProviders.js +16 -33
- package/dist/lib/core/factory.js +237 -164
- package/dist/lib/core/modules/GenerationHandler.js +175 -116
- package/dist/lib/core/modules/MessageBuilder.js +222 -170
- package/dist/lib/core/modules/StreamHandler.d.ts +1 -0
- package/dist/lib/core/modules/StreamHandler.js +95 -27
- package/dist/lib/core/modules/TelemetryHandler.d.ts +10 -1
- package/dist/lib/core/modules/TelemetryHandler.js +25 -7
- package/dist/lib/core/modules/ToolsManager.js +115 -191
- package/dist/lib/core/redisConversationMemoryManager.js +418 -282
- package/dist/lib/factories/providerRegistry.d.ts +5 -0
- package/dist/lib/factories/providerRegistry.js +20 -2
- package/dist/lib/index.d.ts +2 -2
- package/dist/lib/index.js +4 -2
- package/dist/lib/mcp/externalServerManager.js +66 -0
- package/dist/lib/mcp/mcpCircuitBreaker.js +24 -0
- package/dist/lib/mcp/mcpClientFactory.js +16 -0
- package/dist/lib/mcp/toolDiscoveryService.js +32 -6
- package/dist/lib/mcp/toolRegistry.js +193 -123
- package/dist/lib/neurolink.d.ts +6 -0
- package/dist/lib/neurolink.js +1162 -646
- package/dist/lib/providers/amazonBedrock.d.ts +1 -1
- package/dist/lib/providers/amazonBedrock.js +521 -319
- package/dist/lib/providers/anthropic.js +73 -17
- package/dist/lib/providers/anthropicBaseProvider.js +77 -17
- package/dist/lib/providers/googleAiStudio.d.ts +1 -1
- package/dist/lib/providers/googleAiStudio.js +292 -227
- package/dist/lib/providers/googleVertex.d.ts +36 -1
- package/dist/lib/providers/googleVertex.js +553 -260
- package/dist/lib/providers/ollama.js +329 -278
- package/dist/lib/providers/openAI.js +77 -19
- package/dist/lib/providers/sagemaker/parsers.js +3 -3
- package/dist/lib/providers/sagemaker/streaming.js +3 -3
- package/dist/lib/proxy/proxyFetch.js +81 -48
- package/dist/lib/rag/ChunkerFactory.js +1 -1
- package/dist/lib/rag/chunkers/MarkdownChunker.d.ts +22 -0
- package/dist/lib/rag/chunkers/MarkdownChunker.js +213 -9
- package/dist/lib/rag/chunking/markdownChunker.d.ts +16 -0
- package/dist/lib/rag/chunking/markdownChunker.js +174 -2
- package/dist/lib/rag/pipeline/contextAssembly.js +2 -1
- package/dist/lib/rag/ragIntegration.d.ts +18 -1
- package/dist/lib/rag/ragIntegration.js +94 -14
- package/dist/lib/rag/retrieval/vectorQueryTool.js +21 -4
- package/dist/lib/server/abstract/baseServerAdapter.js +4 -1
- package/dist/lib/server/adapters/fastifyAdapter.js +35 -30
- package/dist/lib/services/server/ai/observability/instrumentation.d.ts +32 -0
- package/dist/lib/services/server/ai/observability/instrumentation.js +39 -0
- package/dist/lib/telemetry/attributes.d.ts +52 -0
- package/dist/lib/telemetry/attributes.js +61 -0
- package/dist/lib/telemetry/index.d.ts +3 -0
- package/dist/lib/telemetry/index.js +3 -0
- package/dist/lib/telemetry/telemetryService.d.ts +6 -0
- package/dist/lib/telemetry/telemetryService.js +6 -0
- package/dist/lib/telemetry/tracers.d.ts +15 -0
- package/dist/lib/telemetry/tracers.js +17 -0
- package/dist/lib/telemetry/withSpan.d.ts +9 -0
- package/dist/lib/telemetry/withSpan.js +35 -0
- package/dist/lib/types/contextTypes.d.ts +10 -0
- package/dist/lib/types/streamTypes.d.ts +14 -0
- package/dist/lib/utils/conversationMemory.js +121 -82
- package/dist/lib/utils/logger.d.ts +5 -0
- package/dist/lib/utils/logger.js +50 -2
- package/dist/lib/utils/messageBuilder.js +22 -42
- package/dist/lib/utils/modelDetection.js +3 -3
- package/dist/lib/utils/providerRetry.d.ts +41 -0
- package/dist/lib/utils/providerRetry.js +114 -0
- package/dist/lib/utils/retryability.d.ts +14 -0
- package/dist/lib/utils/retryability.js +23 -0
- package/dist/lib/utils/sanitizers/svg.js +4 -5
- package/dist/lib/utils/tokenEstimation.d.ts +11 -1
- package/dist/lib/utils/tokenEstimation.js +19 -4
- package/dist/lib/utils/videoAnalysisProcessor.js +7 -3
- package/dist/mcp/externalServerManager.js +66 -0
- package/dist/mcp/mcpCircuitBreaker.js +24 -0
- package/dist/mcp/mcpClientFactory.js +16 -0
- package/dist/mcp/toolDiscoveryService.js +32 -6
- package/dist/mcp/toolRegistry.js +193 -123
- package/dist/neurolink.d.ts +6 -0
- package/dist/neurolink.js +1162 -646
- package/dist/providers/amazonBedrock.d.ts +1 -1
- package/dist/providers/amazonBedrock.js +521 -319
- package/dist/providers/anthropic.js +73 -17
- package/dist/providers/anthropicBaseProvider.js +77 -17
- package/dist/providers/googleAiStudio.d.ts +1 -1
- package/dist/providers/googleAiStudio.js +292 -227
- package/dist/providers/googleVertex.d.ts +36 -1
- package/dist/providers/googleVertex.js +553 -260
- package/dist/providers/ollama.js +329 -278
- package/dist/providers/openAI.js +77 -19
- package/dist/providers/sagemaker/parsers.js +3 -3
- package/dist/providers/sagemaker/streaming.js +3 -3
- package/dist/proxy/proxyFetch.js +81 -48
- package/dist/rag/ChunkerFactory.js +1 -1
- package/dist/rag/chunkers/MarkdownChunker.d.ts +22 -0
- package/dist/rag/chunkers/MarkdownChunker.js +213 -9
- package/dist/rag/chunking/markdownChunker.d.ts +16 -0
- package/dist/rag/chunking/markdownChunker.js +174 -2
- package/dist/rag/pipeline/contextAssembly.js +2 -1
- package/dist/rag/ragIntegration.d.ts +18 -1
- package/dist/rag/ragIntegration.js +94 -14
- package/dist/rag/retrieval/vectorQueryTool.js +21 -4
- package/dist/server/abstract/baseServerAdapter.js +4 -1
- package/dist/server/adapters/fastifyAdapter.js +35 -30
- package/dist/services/server/ai/observability/instrumentation.d.ts +32 -0
- package/dist/services/server/ai/observability/instrumentation.js +39 -0
- package/dist/telemetry/attributes.d.ts +52 -0
- package/dist/telemetry/attributes.js +60 -0
- package/dist/telemetry/index.d.ts +3 -0
- package/dist/telemetry/index.js +3 -0
- package/dist/telemetry/telemetryService.d.ts +6 -0
- package/dist/telemetry/telemetryService.js +6 -0
- package/dist/telemetry/tracers.d.ts +15 -0
- package/dist/telemetry/tracers.js +16 -0
- package/dist/telemetry/withSpan.d.ts +9 -0
- package/dist/telemetry/withSpan.js +34 -0
- package/dist/types/contextTypes.d.ts +10 -0
- package/dist/types/streamTypes.d.ts +14 -0
- package/dist/utils/conversationMemory.js +121 -82
- package/dist/utils/logger.d.ts +5 -0
- package/dist/utils/logger.js +50 -2
- package/dist/utils/messageBuilder.js +22 -42
- package/dist/utils/modelDetection.js +3 -3
- package/dist/utils/providerRetry.d.ts +41 -0
- package/dist/utils/providerRetry.js +113 -0
- package/dist/utils/retryability.d.ts +14 -0
- package/dist/utils/retryability.js +22 -0
- package/dist/utils/sanitizers/svg.js +4 -5
- package/dist/utils/tokenEstimation.d.ts +11 -1
- package/dist/utils/tokenEstimation.js +19 -4
- package/dist/utils/videoAnalysisProcessor.js +7 -3
- package/dist/workflow/config.d.ts +26 -26
- package/package.json +1 -1
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import { createVertex, } from "@ai-sdk/google-vertex";
|
|
2
2
|
import { createVertexAnthropic, } from "@ai-sdk/google-vertex/anthropic";
|
|
3
3
|
import { Output, streamText, } from "ai";
|
|
4
|
-
import
|
|
4
|
+
import { trace, SpanKind, SpanStatusCode } from "@opentelemetry/api";
|
|
5
|
+
import dns from "node:dns";
|
|
5
6
|
import fs from "fs";
|
|
6
7
|
import os from "os";
|
|
7
8
|
import path from "path";
|
|
@@ -14,7 +15,10 @@ import { AuthenticationError, NetworkError, ProviderError, RateLimitError, Inval
|
|
|
14
15
|
import { ERROR_CODES, NeuroLinkError } from "../utils/errorHandling.js";
|
|
15
16
|
import { FileDetector } from "../utils/fileDetector.js";
|
|
16
17
|
import { logger } from "../utils/logger.js";
|
|
18
|
+
import { estimateTokens } from "../utils/tokenEstimation.js";
|
|
17
19
|
import { isGemini3Model } from "../utils/modelDetection.js";
|
|
20
|
+
import { calculateCost } from "../utils/pricing.js";
|
|
21
|
+
import { tracers, ATTR, withClientSpan } from "../telemetry/index.js";
|
|
18
22
|
import { createGoogleAuthConfig, createVertexProjectConfig, validateApiKey, } from "../utils/providerConfig.js";
|
|
19
23
|
import { convertZodToJsonSchema, inlineJsonSchema, } from "../utils/schemaConversion.js";
|
|
20
24
|
import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js";
|
|
@@ -30,6 +34,30 @@ import { buildNativeToolDeclarations, buildNativeConfig, computeMaxSteps as comp
|
|
|
30
34
|
// custom fetch, so it is inherently isolated from user cancellation signals.
|
|
31
35
|
// The image generation path (getImageGenerationAccessToken) has an additional
|
|
32
36
|
// explicit 15s timeout per attempt for direct REST API calls.
|
|
37
|
+
/** Check whether an IP address belongs to a private, loopback, or link-local range. */
|
|
38
|
+
function isPrivateOrLoopbackAddress(address) {
|
|
39
|
+
const lower = address.toLowerCase();
|
|
40
|
+
// IPv4 loopback, unspecified, and private ranges
|
|
41
|
+
if (address.startsWith("127.") || address === "0.0.0.0") {
|
|
42
|
+
return true;
|
|
43
|
+
}
|
|
44
|
+
if (address.startsWith("10.") || address.startsWith("192.168.")) {
|
|
45
|
+
return true;
|
|
46
|
+
}
|
|
47
|
+
if (/^172\.(1[6-9]|2\d|3[01])\./.test(address)) {
|
|
48
|
+
return true;
|
|
49
|
+
}
|
|
50
|
+
// IPv6 loopback, link-local, unique-local
|
|
51
|
+
if (address === "::1" ||
|
|
52
|
+
lower.startsWith("fe80:") ||
|
|
53
|
+
lower.startsWith("fc00:") ||
|
|
54
|
+
lower.startsWith("fd00:")) {
|
|
55
|
+
return true;
|
|
56
|
+
}
|
|
57
|
+
return false;
|
|
58
|
+
}
|
|
59
|
+
const MAX_IMAGE_DOWNLOAD_BYTES = 10 * 1024 * 1024; // 10 MB
|
|
60
|
+
const streamTracer = trace.getTracer("neurolink.provider.vertex");
|
|
33
61
|
// Enhanced Anthropic support with direct imports
|
|
34
62
|
// Using the dual provider architecture from Vercel AI SDK
|
|
35
63
|
const hasAnthropicSupport = () => {
|
|
@@ -63,6 +91,8 @@ const hasGoogleCredentials = () => {
|
|
|
63
91
|
(process.env.GOOGLE_AUTH_CLIENT_EMAIL &&
|
|
64
92
|
process.env.GOOGLE_AUTH_PRIVATE_KEY));
|
|
65
93
|
};
|
|
94
|
+
// Module-level cache for runtime-created credentials file to avoid per-request writes
|
|
95
|
+
let cachedCredentialsPath = null;
|
|
66
96
|
// Enhanced Vertex settings creation with authentication fallback and proxy support
|
|
67
97
|
const createVertexSettings = async (region) => {
|
|
68
98
|
const location = region || getVertexLocation();
|
|
@@ -99,9 +129,14 @@ const createVertexSettings = async (region) => {
|
|
|
99
129
|
client_x509_cert_url: process.env.GOOGLE_AUTH_CLIENT_CERT_URL,
|
|
100
130
|
universe_domain: process.env.GOOGLE_AUTH_UNIVERSE_DOMAIN,
|
|
101
131
|
};
|
|
102
|
-
// If we have the essential fields, create a runtime credentials file
|
|
132
|
+
// If we have the essential fields, create a runtime credentials file (cached)
|
|
103
133
|
if (requiredEnvVarsForFile.client_email &&
|
|
104
134
|
requiredEnvVarsForFile.private_key) {
|
|
135
|
+
// Return cached path if already written and still exists on disk
|
|
136
|
+
if (cachedCredentialsPath && fs.existsSync(cachedCredentialsPath)) {
|
|
137
|
+
process.env.GOOGLE_APPLICATION_CREDENTIALS = cachedCredentialsPath;
|
|
138
|
+
return baseSettings;
|
|
139
|
+
}
|
|
105
140
|
try {
|
|
106
141
|
// Build complete service account credentials object
|
|
107
142
|
const serviceAccountCredentials = {
|
|
@@ -119,18 +154,26 @@ const createVertexSettings = async (region) => {
|
|
|
119
154
|
client_x509_cert_url: requiredEnvVarsForFile.client_x509_cert_url || "",
|
|
120
155
|
universe_domain: requiredEnvVarsForFile.universe_domain || "googleapis.com",
|
|
121
156
|
};
|
|
122
|
-
// Create temporary credentials file
|
|
157
|
+
// Create temporary credentials file with restricted permissions
|
|
123
158
|
const tmpDir = os.tmpdir();
|
|
124
159
|
const credentialsFileName = `google-credentials-${Date.now()}-${Math.random().toString(36).substring(2, 11)}.json`;
|
|
125
160
|
const credentialsFilePath = path.join(tmpDir, credentialsFileName);
|
|
126
|
-
fs.writeFileSync(credentialsFilePath, JSON.stringify(serviceAccountCredentials, null, 2));
|
|
161
|
+
fs.writeFileSync(credentialsFilePath, JSON.stringify(serviceAccountCredentials, null, 2), { mode: 0o600 });
|
|
162
|
+
cachedCredentialsPath = credentialsFilePath;
|
|
163
|
+
// Register cleanup on process exit to remove the credentials file
|
|
164
|
+
process.once("exit", () => {
|
|
165
|
+
try {
|
|
166
|
+
if (cachedCredentialsPath && fs.existsSync(cachedCredentialsPath)) {
|
|
167
|
+
fs.unlinkSync(cachedCredentialsPath);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
catch {
|
|
171
|
+
/* ignore cleanup errors */
|
|
172
|
+
}
|
|
173
|
+
});
|
|
127
174
|
// Set the environment variable to point to our runtime-created file
|
|
128
175
|
process.env.GOOGLE_APPLICATION_CREDENTIALS = credentialsFilePath;
|
|
129
|
-
|
|
130
|
-
const fileExists = fs.existsSync(credentialsFilePath);
|
|
131
|
-
if (fileExists) {
|
|
132
|
-
return baseSettings;
|
|
133
|
-
}
|
|
176
|
+
return baseSettings;
|
|
134
177
|
}
|
|
135
178
|
catch {
|
|
136
179
|
// Silent error handling for runtime credentials file creation
|
|
@@ -246,7 +289,13 @@ const createVertexSettings = async (region) => {
|
|
|
246
289
|
};
|
|
247
290
|
// Create Anthropic-specific Vertex settings with the same authentication and proxy support
|
|
248
291
|
const createVertexAnthropicSettings = async (region) => {
|
|
249
|
-
|
|
292
|
+
// The @ai-sdk/google-vertex SDK constructs Anthropic URLs as:
|
|
293
|
+
// https://{location}-aiplatform.googleapis.com/...
|
|
294
|
+
// When location is "global", this creates "https://global-aiplatform.googleapis.com"
|
|
295
|
+
// which is invalid. The correct global endpoint omits the region prefix entirely.
|
|
296
|
+
// Since the SDK doesn't handle this, redirect "global" to "us-east5" for Anthropic.
|
|
297
|
+
const anthropicRegion = !region || region === "global" ? "us-east5" : region;
|
|
298
|
+
const baseVertexSettings = await createVertexSettings(anthropicRegion);
|
|
250
299
|
// GoogleVertexAnthropicProviderSettings extends GoogleVertexProviderSettings
|
|
251
300
|
// so we can use the same settings with proper typing
|
|
252
301
|
return {
|
|
@@ -262,6 +311,39 @@ const createVertexAnthropicSettings = async (region) => {
|
|
|
262
311
|
const isAnthropicModel = (modelName) => {
|
|
263
312
|
return modelName.toLowerCase().includes("claude");
|
|
264
313
|
};
|
|
314
|
+
/**
|
|
315
|
+
* Vertex Model Aliases
|
|
316
|
+
*
|
|
317
|
+
* Maps shorthand model names to their full versioned IDs required by the
|
|
318
|
+
* Vertex AI API. This allows users to pass convenient names like
|
|
319
|
+
* "claude-sonnet-4-5" instead of "claude-sonnet-4-5@20250929".
|
|
320
|
+
*
|
|
321
|
+
* Alias resolution runs at the very start of getModel() so that all
|
|
322
|
+
* downstream code (isAnthropicModel, validateAnthropicModelName, etc.)
|
|
323
|
+
* sees the canonical versioned name.
|
|
324
|
+
*
|
|
325
|
+
* To add a new model: simply add an entry mapping the shorthand to the
|
|
326
|
+
* full versioned string. No other changes are needed.
|
|
327
|
+
*/
|
|
328
|
+
export const VERTEX_MODEL_ALIASES = {
|
|
329
|
+
// Claude 4.x shorthand aliases → versioned names
|
|
330
|
+
"claude-sonnet-4-5": "claude-sonnet-4-5@20250929",
|
|
331
|
+
"claude-opus-4-5": "claude-opus-4-5@20251124",
|
|
332
|
+
"claude-haiku-4-5": "claude-haiku-4-5@20251001",
|
|
333
|
+
"claude-sonnet-4": "claude-sonnet-4@20250514",
|
|
334
|
+
"claude-opus-4": "claude-opus-4@20250514",
|
|
335
|
+
"claude-opus-4-1": "claude-opus-4-1@20250805",
|
|
336
|
+
// Claude 3.x shorthand aliases → versioned names
|
|
337
|
+
"claude-3-7-sonnet": "claude-3-7-sonnet@20250219",
|
|
338
|
+
"claude-3-5-sonnet": "claude-3-5-sonnet-20241022",
|
|
339
|
+
"claude-3-5-haiku": "claude-3-5-haiku-20241022",
|
|
340
|
+
"claude-3-opus": "claude-3-opus-20240229",
|
|
341
|
+
"claude-3-sonnet": "claude-3-sonnet-20240229",
|
|
342
|
+
"claude-3-haiku": "claude-3-haiku-20240307",
|
|
343
|
+
// Gemini shorthand aliases
|
|
344
|
+
"gemini-3-pro": "gemini-3-pro-latest",
|
|
345
|
+
"gemini-3-flash": "gemini-3-flash-latest",
|
|
346
|
+
};
|
|
265
347
|
/**
|
|
266
348
|
* Google Vertex AI Provider v2 - BaseProvider Implementation
|
|
267
349
|
*
|
|
@@ -366,6 +448,13 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
366
448
|
const model = await this.getModel();
|
|
367
449
|
return model;
|
|
368
450
|
}
|
|
451
|
+
/**
|
|
452
|
+
* Resolve a raw model name through the alias map.
|
|
453
|
+
* Used internally to normalize model names before any API calls.
|
|
454
|
+
*/
|
|
455
|
+
resolveAlias(modelName) {
|
|
456
|
+
return VERTEX_MODEL_ALIASES[modelName] ?? modelName;
|
|
457
|
+
}
|
|
369
458
|
/**
|
|
370
459
|
* Initialize model creation tracking
|
|
371
460
|
*/
|
|
@@ -373,7 +462,10 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
373
462
|
const modelCreationId = `vertex-model-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`;
|
|
374
463
|
const modelCreationStartTime = Date.now();
|
|
375
464
|
const modelCreationHrTimeStart = process.hrtime.bigint();
|
|
376
|
-
|
|
465
|
+
// Resolve shorthand model aliases (e.g. "claude-sonnet-4-5" → "claude-sonnet-4-5@20250929")
|
|
466
|
+
// before any downstream logic that depends on the versioned name.
|
|
467
|
+
const rawModelName = this.modelName || getDefaultVertexModel();
|
|
468
|
+
const modelName = VERTEX_MODEL_ALIASES[rawModelName] ?? rawModelName;
|
|
377
469
|
return {
|
|
378
470
|
modelCreationId,
|
|
379
471
|
modelCreationStartTime,
|
|
@@ -665,7 +757,7 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
665
757
|
* Creates fresh instances for each request to ensure proper authentication
|
|
666
758
|
*/
|
|
667
759
|
async getModel() {
|
|
668
|
-
// Initialize logging and setup
|
|
760
|
+
// Initialize logging and setup (alias resolution happens inside)
|
|
669
761
|
const { modelCreationId, modelCreationStartTime, modelCreationHrTimeStart, modelName, } = this.initializeModelCreationLogging();
|
|
670
762
|
// Check if this is an Anthropic model and attempt creation
|
|
671
763
|
const anthropicModel = await this.attemptAnthropicModelCreation(modelName, modelCreationId, modelCreationStartTime, modelCreationHrTimeStart);
|
|
@@ -684,7 +776,7 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
684
776
|
}
|
|
685
777
|
async executeStream(options, analysisSchema) {
|
|
686
778
|
// Check if this is a Gemini 3 model with tools - use native SDK for thought_signature
|
|
687
|
-
const gemini3CheckModelName = options.model || this.modelName || getDefaultVertexModel();
|
|
779
|
+
const gemini3CheckModelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
|
|
688
780
|
// Check for tools from options AND from SDK (MCP tools)
|
|
689
781
|
// Need to check early if we should route to native SDK
|
|
690
782
|
const gemini3CheckShouldUseTools = !options.disableTools && this.supportsTools();
|
|
@@ -735,7 +827,7 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
735
827
|
toolNames: Object.keys(tools),
|
|
736
828
|
});
|
|
737
829
|
// Model-specific maxTokens handling
|
|
738
|
-
const modelName = this.modelName || getDefaultVertexModel();
|
|
830
|
+
const modelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
|
|
739
831
|
// Use cached model configuration to determine maxTokens handling for streaming performance
|
|
740
832
|
// This avoids hardcoded model-specific logic and repeated config lookups
|
|
741
833
|
const shouldSetMaxTokens = this.shouldSetMaxTokensCached(modelName);
|
|
@@ -748,6 +840,7 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
748
840
|
messages: messages,
|
|
749
841
|
temperature: options.temperature,
|
|
750
842
|
...(maxTokens && { maxTokens }),
|
|
843
|
+
maxRetries: 0, // NL11: Disable AI SDK's invisible internal retries; we handle retries with OTel instrumentation
|
|
751
844
|
...(shouldUseTools &&
|
|
752
845
|
Object.keys(tools).length > 0 && {
|
|
753
846
|
tools,
|
|
@@ -819,7 +912,67 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
819
912
|
});
|
|
820
913
|
}
|
|
821
914
|
}
|
|
822
|
-
|
|
915
|
+
// Wrap streamText in an OTel span to capture provider-level latency and token usage
|
|
916
|
+
const streamSpan = streamTracer.startSpan("neurolink.provider.streamText", {
|
|
917
|
+
kind: SpanKind.CLIENT,
|
|
918
|
+
attributes: {
|
|
919
|
+
"gen_ai.system": "vertex",
|
|
920
|
+
"gen_ai.request.model": model.modelId || this.modelName || "unknown",
|
|
921
|
+
},
|
|
922
|
+
});
|
|
923
|
+
let result;
|
|
924
|
+
try {
|
|
925
|
+
result = streamText(streamOptions);
|
|
926
|
+
}
|
|
927
|
+
catch (err) {
|
|
928
|
+
streamSpan.recordException(err instanceof Error ? err : new Error(String(err)));
|
|
929
|
+
streamSpan.setStatus({
|
|
930
|
+
code: SpanStatusCode.ERROR,
|
|
931
|
+
message: err instanceof Error ? err.message : String(err),
|
|
932
|
+
});
|
|
933
|
+
streamSpan.end();
|
|
934
|
+
throw err;
|
|
935
|
+
}
|
|
936
|
+
// Collect token usage and finish reason asynchronously when the stream completes,
|
|
937
|
+
// then end the span. This avoids blocking the stream consumer.
|
|
938
|
+
result.usage
|
|
939
|
+
.then((usage) => {
|
|
940
|
+
streamSpan.setAttribute("gen_ai.usage.input_tokens", usage.promptTokens || 0);
|
|
941
|
+
streamSpan.setAttribute("gen_ai.usage.output_tokens", usage.completionTokens || 0);
|
|
942
|
+
const effectiveModel = options.model ||
|
|
943
|
+
model.modelId ||
|
|
944
|
+
this.modelName ||
|
|
945
|
+
getDefaultVertexModel();
|
|
946
|
+
const cost = calculateCost(this.providerName, effectiveModel, {
|
|
947
|
+
input: usage.promptTokens || 0,
|
|
948
|
+
output: usage.completionTokens || 0,
|
|
949
|
+
total: (usage.promptTokens || 0) + (usage.completionTokens || 0),
|
|
950
|
+
});
|
|
951
|
+
if (cost && cost > 0) {
|
|
952
|
+
streamSpan.setAttribute("neurolink.cost", cost);
|
|
953
|
+
}
|
|
954
|
+
})
|
|
955
|
+
.catch(() => {
|
|
956
|
+
// Usage may not be available if the stream is aborted
|
|
957
|
+
});
|
|
958
|
+
result.finishReason
|
|
959
|
+
.then((reason) => {
|
|
960
|
+
streamSpan.setAttribute("gen_ai.response.finish_reason", reason || "unknown");
|
|
961
|
+
})
|
|
962
|
+
.catch(() => {
|
|
963
|
+
// Finish reason may not be available if the stream is aborted
|
|
964
|
+
});
|
|
965
|
+
result.text
|
|
966
|
+
.then(() => {
|
|
967
|
+
streamSpan.end();
|
|
968
|
+
})
|
|
969
|
+
.catch((err) => {
|
|
970
|
+
streamSpan.setStatus({
|
|
971
|
+
code: SpanStatusCode.ERROR,
|
|
972
|
+
message: err instanceof Error ? err.message : String(err),
|
|
973
|
+
});
|
|
974
|
+
streamSpan.end();
|
|
975
|
+
});
|
|
823
976
|
// Defer timeout cleanup until the stream completes or errors
|
|
824
977
|
result.text.finally(() => timeoutController?.cleanup());
|
|
825
978
|
// Transform string stream to content object stream using BaseProvider method
|
|
@@ -965,262 +1118,306 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
965
1118
|
* This bypasses @ai-sdk/google-vertex to properly handle thought_signature
|
|
966
1119
|
*/
|
|
967
1120
|
async executeNativeGemini3Stream(options) {
|
|
968
|
-
const
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
const wantsJsonOutput = streamOptions.output?.format === "json" || streamOptions.schema;
|
|
987
|
-
if (wantsJsonOutput && hasToolsInput) {
|
|
988
|
-
logger.warn("[GoogleVertex] Gemini does not support tools and JSON schema output simultaneously. Disabling tools for this request.");
|
|
989
|
-
hasToolsInput = false;
|
|
990
|
-
}
|
|
991
|
-
let toolsConfig;
|
|
992
|
-
let executeMap = new Map();
|
|
993
|
-
if (hasToolsInput) {
|
|
994
|
-
const result = buildNativeToolDeclarations(options.tools);
|
|
995
|
-
toolsConfig = result.toolsConfig;
|
|
996
|
-
executeMap = result.executeMap;
|
|
997
|
-
logger.debug("[GoogleVertex] Converted tools for native SDK", {
|
|
998
|
-
toolCount: toolsConfig[0].functionDeclarations.length,
|
|
999
|
-
toolNames: toolsConfig[0].functionDeclarations.map((t) => t.name),
|
|
1121
|
+
const modelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
|
|
1122
|
+
return withClientSpan({
|
|
1123
|
+
name: "neurolink.provider.stream",
|
|
1124
|
+
tracer: tracers.provider,
|
|
1125
|
+
attributes: {
|
|
1126
|
+
[ATTR.GEN_AI_SYSTEM]: "vertex",
|
|
1127
|
+
[ATTR.GEN_AI_MODEL]: modelName,
|
|
1128
|
+
[ATTR.GEN_AI_OPERATION]: "stream",
|
|
1129
|
+
[ATTR.NL_PROVIDER]: this.providerName,
|
|
1130
|
+
},
|
|
1131
|
+
}, async (span) => {
|
|
1132
|
+
const client = await this.createVertexGenAIClient(options.region);
|
|
1133
|
+
const effectiveLocation = options.region || this.location || getVertexLocation();
|
|
1134
|
+
logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3", {
|
|
1135
|
+
model: modelName,
|
|
1136
|
+
hasTools: !!options.tools && Object.keys(options.tools).length > 0,
|
|
1137
|
+
project: this.projectId,
|
|
1138
|
+
location: effectiveLocation,
|
|
1000
1139
|
});
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1140
|
+
// Build contents from input with multimodal support
|
|
1141
|
+
const multimodalInput = options.input;
|
|
1142
|
+
const contents = this.buildNativeContentParts(options.input.text, multimodalInput, "native stream");
|
|
1143
|
+
// Convert tools to native format
|
|
1144
|
+
let hasToolsInput = options.tools &&
|
|
1145
|
+
Object.keys(options.tools).length > 0 &&
|
|
1146
|
+
!options.disableTools;
|
|
1147
|
+
// Guard: Gemini cannot use tools + JSON schema simultaneously
|
|
1148
|
+
const streamOptions = options;
|
|
1149
|
+
const wantsJsonOutput = streamOptions.output?.format === "json" || streamOptions.schema;
|
|
1150
|
+
if (wantsJsonOutput && hasToolsInput) {
|
|
1151
|
+
logger.warn("[GoogleVertex] Gemini does not support tools and JSON schema output simultaneously. Disabling tools for this request.");
|
|
1152
|
+
hasToolsInput = false;
|
|
1153
|
+
}
|
|
1154
|
+
let toolsConfig;
|
|
1155
|
+
let executeMap = new Map();
|
|
1156
|
+
if (hasToolsInput) {
|
|
1157
|
+
const result = buildNativeToolDeclarations(options.tools);
|
|
1158
|
+
toolsConfig = result.toolsConfig;
|
|
1159
|
+
executeMap = result.executeMap;
|
|
1160
|
+
logger.debug("[GoogleVertex] Converted tools for native SDK", {
|
|
1161
|
+
toolCount: toolsConfig[0].functionDeclarations.length,
|
|
1162
|
+
toolNames: toolsConfig[0].functionDeclarations.map((t) => t.name),
|
|
1016
1163
|
});
|
|
1017
1164
|
}
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
let totalOutputTokens = 0;
|
|
1029
|
-
const allToolCalls = [];
|
|
1030
|
-
let step = 0;
|
|
1031
|
-
const failedTools = new Map();
|
|
1032
|
-
// Agentic loop for tool calling
|
|
1033
|
-
try {
|
|
1034
|
-
while (step < maxSteps) {
|
|
1035
|
-
if (timeoutController?.controller.signal.aborted) {
|
|
1036
|
-
break;
|
|
1037
|
-
}
|
|
1038
|
-
step++;
|
|
1039
|
-
logger.debug(`[GoogleVertex] Native SDK step ${step}/${maxSteps}`);
|
|
1040
|
-
try {
|
|
1041
|
-
const stream = await client.models.generateContentStream({
|
|
1042
|
-
model: modelName,
|
|
1043
|
-
contents: currentContents,
|
|
1044
|
-
config,
|
|
1045
|
-
...(composedSignal
|
|
1046
|
-
? { httpOptions: { signal: composedSignal } }
|
|
1047
|
-
: {}),
|
|
1048
|
-
});
|
|
1049
|
-
const chunkResult = await collectStreamChunks(stream);
|
|
1050
|
-
totalInputTokens += chunkResult.inputTokens;
|
|
1051
|
-
totalOutputTokens += chunkResult.outputTokens;
|
|
1052
|
-
const stepText = extractTextFromParts(chunkResult.rawResponseParts);
|
|
1053
|
-
if (chunkResult.stepFunctionCalls.length === 0) {
|
|
1054
|
-
finalText = stepText;
|
|
1055
|
-
break;
|
|
1165
|
+
// Build config
|
|
1166
|
+
const config = buildNativeConfig(options, toolsConfig);
|
|
1167
|
+
// Add JSON output format support for native SDK stream
|
|
1168
|
+
if (streamOptions.output?.format === "json" || streamOptions.schema) {
|
|
1169
|
+
config.responseMimeType = "application/json";
|
|
1170
|
+
if (streamOptions.schema) {
|
|
1171
|
+
const rawSchema = convertZodToJsonSchema(streamOptions.schema);
|
|
1172
|
+
const inlinedSchema = inlineJsonSchema(rawSchema);
|
|
1173
|
+
if (inlinedSchema.$schema) {
|
|
1174
|
+
delete inlinedSchema.$schema;
|
|
1056
1175
|
}
|
|
1057
|
-
|
|
1058
|
-
logger.debug(
|
|
1059
|
-
|
|
1060
|
-
const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, executeMap, failedTools, allToolCalls, { abortSignal: composedSignal });
|
|
1061
|
-
// Add function responses to history
|
|
1062
|
-
currentContents.push({
|
|
1063
|
-
role: "function",
|
|
1064
|
-
parts: functionResponses,
|
|
1176
|
+
config.responseSchema = inlinedSchema;
|
|
1177
|
+
logger.debug("[GoogleVertex] Added responseSchema for JSON output (stream)", {
|
|
1178
|
+
schemaKeys: Object.keys(inlinedSchema),
|
|
1065
1179
|
});
|
|
1066
1180
|
}
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1181
|
+
}
|
|
1182
|
+
const startTime = Date.now();
|
|
1183
|
+
const timeout = this.getTimeout(options);
|
|
1184
|
+
const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
|
|
1185
|
+
const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
|
|
1186
|
+
const maxSteps = computeMaxStepsShared(options.maxSteps);
|
|
1187
|
+
const currentContents = [...contents];
|
|
1188
|
+
let finalText = "";
|
|
1189
|
+
let lastStepText = "";
|
|
1190
|
+
let totalInputTokens = 0;
|
|
1191
|
+
let totalOutputTokens = 0;
|
|
1192
|
+
const allToolCalls = [];
|
|
1193
|
+
let step = 0;
|
|
1194
|
+
const failedTools = new Map();
|
|
1195
|
+
// Agentic loop for tool calling
|
|
1196
|
+
try {
|
|
1197
|
+
while (step < maxSteps) {
|
|
1198
|
+
if (timeoutController?.controller.signal.aborted) {
|
|
1199
|
+
break;
|
|
1200
|
+
}
|
|
1201
|
+
step++;
|
|
1202
|
+
logger.debug(`[GoogleVertex] Native SDK step ${step}/${maxSteps}`);
|
|
1203
|
+
try {
|
|
1204
|
+
const stream = await client.models.generateContentStream({
|
|
1205
|
+
model: modelName,
|
|
1206
|
+
contents: currentContents,
|
|
1207
|
+
config,
|
|
1208
|
+
...(composedSignal
|
|
1209
|
+
? { httpOptions: { signal: composedSignal } }
|
|
1210
|
+
: {}),
|
|
1211
|
+
});
|
|
1212
|
+
const chunkResult = await collectStreamChunks(stream);
|
|
1213
|
+
totalInputTokens += chunkResult.inputTokens;
|
|
1214
|
+
totalOutputTokens += chunkResult.outputTokens;
|
|
1215
|
+
const stepText = extractTextFromParts(chunkResult.rawResponseParts);
|
|
1216
|
+
if (chunkResult.stepFunctionCalls.length === 0) {
|
|
1217
|
+
finalText = stepText;
|
|
1218
|
+
break;
|
|
1219
|
+
}
|
|
1220
|
+
lastStepText = stepText;
|
|
1221
|
+
// Record tool call events on the span
|
|
1222
|
+
for (const fc of chunkResult.stepFunctionCalls) {
|
|
1223
|
+
span.addEvent("gen_ai.tool_call", {
|
|
1224
|
+
"tool.name": fc.name,
|
|
1225
|
+
"tool.step": step,
|
|
1226
|
+
});
|
|
1227
|
+
}
|
|
1228
|
+
logger.debug(`[GoogleVertex] Executing ${chunkResult.stepFunctionCalls.length} function calls`);
|
|
1229
|
+
pushModelResponseToHistory(currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
|
|
1230
|
+
const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, executeMap, failedTools, allToolCalls, { abortSignal: composedSignal });
|
|
1231
|
+
// Add function responses to history
|
|
1232
|
+
currentContents.push({
|
|
1233
|
+
role: "function",
|
|
1234
|
+
parts: functionResponses,
|
|
1235
|
+
});
|
|
1236
|
+
}
|
|
1237
|
+
catch (error) {
|
|
1238
|
+
logger.error("[GoogleVertex] Native SDK error", error);
|
|
1239
|
+
throw this.handleProviderError(error);
|
|
1240
|
+
}
|
|
1070
1241
|
}
|
|
1071
1242
|
}
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1243
|
+
finally {
|
|
1244
|
+
timeoutController?.cleanup();
|
|
1245
|
+
}
|
|
1246
|
+
finalText = handleMaxStepsTermination("[GoogleVertex]", step, maxSteps, finalText, lastStepText);
|
|
1247
|
+
const responseTime = Date.now() - startTime;
|
|
1248
|
+
// Set token usage and finish reason on the span
|
|
1249
|
+
span.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, totalInputTokens);
|
|
1250
|
+
span.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, totalOutputTokens);
|
|
1251
|
+
span.setAttribute(ATTR.GEN_AI_FINISH_REASON, step >= maxSteps ? "max_steps" : "stop");
|
|
1252
|
+
// Create async iterable for streaming result
|
|
1253
|
+
async function* createTextStream() {
|
|
1254
|
+
yield { content: finalText };
|
|
1255
|
+
}
|
|
1256
|
+
return {
|
|
1257
|
+
stream: createTextStream(),
|
|
1258
|
+
provider: this.providerName,
|
|
1259
|
+
model: modelName,
|
|
1260
|
+
usage: {
|
|
1261
|
+
input: totalInputTokens,
|
|
1262
|
+
output: totalOutputTokens,
|
|
1263
|
+
total: totalInputTokens + totalOutputTokens,
|
|
1264
|
+
},
|
|
1265
|
+
toolCalls: allToolCalls.map((tc) => ({
|
|
1266
|
+
toolName: tc.toolName,
|
|
1267
|
+
args: tc.args,
|
|
1268
|
+
})),
|
|
1269
|
+
metadata: {
|
|
1270
|
+
streamId: `native-vertex-${Date.now()}`,
|
|
1271
|
+
startTime,
|
|
1272
|
+
responseTime,
|
|
1273
|
+
totalToolExecutions: allToolCalls.length,
|
|
1274
|
+
},
|
|
1275
|
+
};
|
|
1276
|
+
});
|
|
1102
1277
|
}
|
|
1103
1278
|
/**
|
|
1104
1279
|
* Execute generate using native @google/genai SDK for Gemini 3 models on Vertex AI
|
|
1105
1280
|
* This bypasses @ai-sdk/google-vertex to properly handle thought_signature
|
|
1106
1281
|
*/
|
|
1107
1282
|
async executeNativeGemini3Generate(options) {
|
|
1108
|
-
const
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
logger.warn("[GoogleVertex] Gemini does not support tools and JSON schema output simultaneously. Disabling tools for this request.");
|
|
1126
|
-
shouldUseTools = false;
|
|
1127
|
-
}
|
|
1128
|
-
const sdkTools = shouldUseTools ? await this.getAllTools() : {};
|
|
1129
|
-
const combinedTools = shouldUseTools
|
|
1130
|
-
? { ...sdkTools, ...(options.tools || {}) }
|
|
1131
|
-
: {};
|
|
1132
|
-
let toolsConfig;
|
|
1133
|
-
let executeMap = new Map();
|
|
1134
|
-
if (Object.keys(combinedTools).length > 0) {
|
|
1135
|
-
const result = buildNativeToolDeclarations(combinedTools);
|
|
1136
|
-
toolsConfig = result.toolsConfig;
|
|
1137
|
-
executeMap = result.executeMap;
|
|
1138
|
-
logger.debug("[GoogleVertex] Converted tools for native SDK generate", {
|
|
1139
|
-
toolCount: toolsConfig[0].functionDeclarations.length,
|
|
1140
|
-
toolNames: toolsConfig[0].functionDeclarations.map((t) => t.name),
|
|
1283
|
+
const modelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
|
|
1284
|
+
return withClientSpan({
|
|
1285
|
+
name: "neurolink.provider.generate",
|
|
1286
|
+
tracer: tracers.provider,
|
|
1287
|
+
attributes: {
|
|
1288
|
+
[ATTR.GEN_AI_SYSTEM]: "vertex",
|
|
1289
|
+
[ATTR.GEN_AI_MODEL]: modelName,
|
|
1290
|
+
[ATTR.GEN_AI_OPERATION]: "generate",
|
|
1291
|
+
[ATTR.NL_PROVIDER]: this.providerName,
|
|
1292
|
+
},
|
|
1293
|
+
}, async (span) => {
|
|
1294
|
+
const client = await this.createVertexGenAIClient(options.region);
|
|
1295
|
+
const effectiveLocation = options.region || this.location || getVertexLocation();
|
|
1296
|
+
logger.debug("[GoogleVertex] Using native @google/genai for Gemini 3 generate", {
|
|
1297
|
+
model: modelName,
|
|
1298
|
+
project: this.projectId,
|
|
1299
|
+
location: effectiveLocation,
|
|
1141
1300
|
});
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
}
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1301
|
+
// Build contents from input with multimodal support
|
|
1302
|
+
const inputText = options.prompt || options.input?.text || "Please respond.";
|
|
1303
|
+
const multimodalInput = options.input;
|
|
1304
|
+
const contents = this.buildNativeContentParts(inputText, multimodalInput, "native generate");
|
|
1305
|
+
// Get tools from SDK and options
|
|
1306
|
+
let shouldUseTools = !options.disableTools && this.supportsTools();
|
|
1307
|
+
// Guard: Gemini cannot use tools + JSON schema simultaneously
|
|
1308
|
+
const wantsJsonOutputGen = options.output?.format === "json" || options.schema;
|
|
1309
|
+
if (wantsJsonOutputGen && shouldUseTools) {
|
|
1310
|
+
logger.warn("[GoogleVertex] Gemini does not support tools and JSON schema output simultaneously. Disabling tools for this request.");
|
|
1311
|
+
shouldUseTools = false;
|
|
1312
|
+
}
|
|
1313
|
+
const sdkTools = shouldUseTools ? await this.getAllTools() : {};
|
|
1314
|
+
const combinedTools = shouldUseTools
|
|
1315
|
+
? { ...sdkTools, ...(options.tools || {}) }
|
|
1316
|
+
: {};
|
|
1317
|
+
let toolsConfig;
|
|
1318
|
+
let executeMap = new Map();
|
|
1319
|
+
if (Object.keys(combinedTools).length > 0) {
|
|
1320
|
+
const result = buildNativeToolDeclarations(combinedTools);
|
|
1321
|
+
toolsConfig = result.toolsConfig;
|
|
1322
|
+
executeMap = result.executeMap;
|
|
1323
|
+
logger.debug("[GoogleVertex] Converted tools for native SDK generate", {
|
|
1324
|
+
toolCount: toolsConfig[0].functionDeclarations.length,
|
|
1325
|
+
toolNames: toolsConfig[0].functionDeclarations.map((t) => t.name),
|
|
1326
|
+
});
|
|
1327
|
+
}
|
|
1328
|
+
// Build config
|
|
1329
|
+
const config = buildNativeConfig(options, toolsConfig);
|
|
1330
|
+
// Note: Schema/JSON output for Gemini 3 native SDK is complex due to $ref resolution issues
|
|
1331
|
+
// For now, schemas are handled via the AI SDK fallback path, not native SDK
|
|
1332
|
+
// TODO: Implement proper $ref resolution for complex nested schemas
|
|
1333
|
+
const startTime = Date.now();
|
|
1334
|
+
const timeout = this.getTimeout(options);
|
|
1335
|
+
const timeoutController = createTimeoutController(timeout, this.providerName, "generate");
|
|
1336
|
+
const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal);
|
|
1337
|
+
const maxSteps = computeMaxStepsShared(options.maxSteps);
|
|
1338
|
+
const currentContents = [...contents];
|
|
1339
|
+
let finalText = "";
|
|
1340
|
+
let lastStepText = "";
|
|
1341
|
+
let totalInputTokens = 0;
|
|
1342
|
+
let totalOutputTokens = 0;
|
|
1343
|
+
const allToolCalls = [];
|
|
1344
|
+
const toolExecutions = [];
|
|
1345
|
+
let step = 0;
|
|
1346
|
+
const failedTools = new Map();
|
|
1347
|
+
try {
|
|
1348
|
+
// Agentic loop for tool calling
|
|
1349
|
+
while (step < maxSteps) {
|
|
1350
|
+
if (timeoutController?.controller.signal.aborted) {
|
|
1186
1351
|
break;
|
|
1187
1352
|
}
|
|
1188
|
-
|
|
1189
|
-
logger.debug(`[GoogleVertex]
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1353
|
+
step++;
|
|
1354
|
+
logger.debug(`[GoogleVertex] Native SDK generate step ${step}/${maxSteps}`);
|
|
1355
|
+
try {
|
|
1356
|
+
// Use generateContentStream and collect all chunks (same as GoogleAIStudio)
|
|
1357
|
+
const stream = await client.models.generateContentStream({
|
|
1358
|
+
model: modelName,
|
|
1359
|
+
contents: currentContents,
|
|
1360
|
+
config,
|
|
1361
|
+
...(composedSignal
|
|
1362
|
+
? { httpOptions: { signal: composedSignal } }
|
|
1363
|
+
: {}),
|
|
1364
|
+
});
|
|
1365
|
+
const chunkResult = await collectStreamChunks(stream);
|
|
1366
|
+
totalInputTokens += chunkResult.inputTokens;
|
|
1367
|
+
totalOutputTokens += chunkResult.outputTokens;
|
|
1368
|
+
const stepText = extractTextFromParts(chunkResult.rawResponseParts);
|
|
1369
|
+
if (chunkResult.stepFunctionCalls.length === 0) {
|
|
1370
|
+
finalText = stepText;
|
|
1371
|
+
break;
|
|
1372
|
+
}
|
|
1373
|
+
lastStepText = stepText;
|
|
1374
|
+
// Record tool call events on the span
|
|
1375
|
+
for (const fc of chunkResult.stepFunctionCalls) {
|
|
1376
|
+
span.addEvent("gen_ai.tool_call", {
|
|
1377
|
+
"tool.name": fc.name,
|
|
1378
|
+
"tool.step": step,
|
|
1379
|
+
});
|
|
1380
|
+
}
|
|
1381
|
+
logger.debug(`[GoogleVertex] Generate executing ${chunkResult.stepFunctionCalls.length} function calls`);
|
|
1382
|
+
pushModelResponseToHistory(currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls);
|
|
1383
|
+
const functionResponses = await executeNativeToolCalls("[GoogleVertex]", chunkResult.stepFunctionCalls, executeMap, failedTools, allToolCalls, { toolExecutions, abortSignal: composedSignal });
|
|
1384
|
+
// Add function responses to history
|
|
1385
|
+
currentContents.push({
|
|
1386
|
+
role: "function",
|
|
1387
|
+
parts: functionResponses,
|
|
1388
|
+
});
|
|
1389
|
+
}
|
|
1390
|
+
catch (error) {
|
|
1391
|
+
logger.error("[GoogleVertex] Native SDK generate error", error);
|
|
1392
|
+
throw this.handleProviderError(error);
|
|
1393
|
+
}
|
|
1201
1394
|
}
|
|
1202
1395
|
}
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1396
|
+
finally {
|
|
1397
|
+
timeoutController?.cleanup();
|
|
1398
|
+
}
|
|
1399
|
+
finalText = handleMaxStepsTermination("[GoogleVertex]", step, maxSteps, finalText, lastStepText);
|
|
1400
|
+
const responseTime = Date.now() - startTime;
|
|
1401
|
+
// Set token usage and finish reason on the span
|
|
1402
|
+
span.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, totalInputTokens);
|
|
1403
|
+
span.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, totalOutputTokens);
|
|
1404
|
+
span.setAttribute(ATTR.GEN_AI_FINISH_REASON, step >= maxSteps ? "max_steps" : "stop");
|
|
1405
|
+
// Build EnhancedGenerateResult
|
|
1406
|
+
return {
|
|
1407
|
+
content: finalText,
|
|
1408
|
+
provider: this.providerName,
|
|
1409
|
+
model: modelName,
|
|
1410
|
+
usage: {
|
|
1411
|
+
input: totalInputTokens,
|
|
1412
|
+
output: totalOutputTokens,
|
|
1413
|
+
total: totalInputTokens + totalOutputTokens,
|
|
1414
|
+
},
|
|
1415
|
+
responseTime,
|
|
1416
|
+
toolsUsed: allToolCalls.map((tc) => tc.toolName),
|
|
1417
|
+
toolExecutions: toolExecutions,
|
|
1418
|
+
enhancedWithTools: allToolCalls.length > 0,
|
|
1419
|
+
};
|
|
1420
|
+
});
|
|
1224
1421
|
}
|
|
1225
1422
|
/**
|
|
1226
1423
|
* Process CSV files and append content to options.input.text
|
|
@@ -1298,7 +1495,7 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1298
1495
|
const options = typeof optionsOrPrompt === "string"
|
|
1299
1496
|
? { prompt: optionsOrPrompt }
|
|
1300
1497
|
: optionsOrPrompt;
|
|
1301
|
-
const modelName = options.model || this.modelName || getDefaultVertexModel();
|
|
1498
|
+
const modelName = this.resolveAlias(options.model || this.modelName || getDefaultVertexModel());
|
|
1302
1499
|
// Check if we should use native SDK for Gemini 3 with tools
|
|
1303
1500
|
const shouldUseTools = !options.disableTools && this.supportsTools();
|
|
1304
1501
|
const sdkTools = shouldUseTools ? await this.getAllTools() : {};
|
|
@@ -1458,6 +1655,23 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1458
1655
|
async hasAnthropicSupport() {
|
|
1459
1656
|
return hasAnthropicSupport();
|
|
1460
1657
|
}
|
|
1658
|
+
/**
|
|
1659
|
+
* Resolve a shorthand model name to its full versioned Vertex AI identifier.
|
|
1660
|
+
* Returns the original name unchanged if no alias exists.
|
|
1661
|
+
*
|
|
1662
|
+
* @param modelName - A model name, possibly a shorthand alias
|
|
1663
|
+
* @returns The resolved full versioned model name
|
|
1664
|
+
*
|
|
1665
|
+
* @example
|
|
1666
|
+
* ```typescript
|
|
1667
|
+
* provider.resolveModelAlias("claude-sonnet-4-5"); // "claude-sonnet-4-5@20250929"
|
|
1668
|
+
* provider.resolveModelAlias("gemini-3-pro"); // "gemini-3-pro-latest"
|
|
1669
|
+
* provider.resolveModelAlias("gemini-2.5-flash"); // "gemini-2.5-flash" (unchanged)
|
|
1670
|
+
* ```
|
|
1671
|
+
*/
|
|
1672
|
+
resolveModelAlias(modelName) {
|
|
1673
|
+
return VERTEX_MODEL_ALIASES[modelName] ?? modelName;
|
|
1674
|
+
}
|
|
1461
1675
|
/**
|
|
1462
1676
|
* Create an Anthropic model instance using vertexAnthropic provider
|
|
1463
1677
|
* Uses fresh vertex settings for each request with comprehensive validation
|
|
@@ -1544,11 +1758,11 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1544
1758
|
modelName,
|
|
1545
1759
|
issue: modelValidation.issue,
|
|
1546
1760
|
recommendedModels: [
|
|
1761
|
+
"claude-sonnet-4-6",
|
|
1762
|
+
"claude-opus-4-6",
|
|
1547
1763
|
"claude-sonnet-4-5@20250929",
|
|
1548
|
-
"claude-sonnet-4@20250514",
|
|
1549
1764
|
"claude-opus-4@20250514",
|
|
1550
1765
|
"claude-3-5-sonnet-20241022",
|
|
1551
|
-
"claude-3-5-haiku-20241022",
|
|
1552
1766
|
],
|
|
1553
1767
|
});
|
|
1554
1768
|
return null;
|
|
@@ -1749,6 +1963,8 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1749
1963
|
async checkVertexRegionalSupport(region = "us-central1") {
|
|
1750
1964
|
// Based on Google Cloud documentation, these regions support Anthropic models
|
|
1751
1965
|
const supportedRegions = [
|
|
1966
|
+
// Global endpoint (routed automatically)
|
|
1967
|
+
"global",
|
|
1752
1968
|
// North America
|
|
1753
1969
|
"us-central1",
|
|
1754
1970
|
"us-east1",
|
|
@@ -1810,10 +2026,17 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
1810
2026
|
}
|
|
1811
2027
|
// Validate against known Claude model patterns
|
|
1812
2028
|
const validPatterns = [
|
|
2029
|
+
// Claude 4.6 — versionless IDs (no @YYYYMMDD suffix)
|
|
2030
|
+
/^claude-opus-4-6$/,
|
|
2031
|
+
/^claude-sonnet-4-6$/,
|
|
2032
|
+
// Claude 4.x versioned
|
|
1813
2033
|
/^claude-sonnet-4@\d{8}$/,
|
|
1814
2034
|
/^claude-sonnet-4-5@\d{8}$/,
|
|
1815
2035
|
/^claude-opus-4@\d{8}$/,
|
|
1816
2036
|
/^claude-opus-4-1@\d{8}$/,
|
|
2037
|
+
/^claude-opus-4-5@\d{8}$/,
|
|
2038
|
+
/^claude-haiku-4-5@\d{8}$/,
|
|
2039
|
+
// Claude 3.x
|
|
1817
2040
|
/^claude-3-7-sonnet@\d{8}$/,
|
|
1818
2041
|
/^claude-3-5-sonnet-\d{8}$/,
|
|
1819
2042
|
/^claude-3-5-haiku-\d{8}$/,
|
|
@@ -2063,11 +2286,10 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2063
2286
|
return "image/png";
|
|
2064
2287
|
}
|
|
2065
2288
|
/**
|
|
2066
|
-
* Estimate token count from text
|
|
2289
|
+
* Estimate token count from text using centralized estimation with provider multipliers
|
|
2067
2290
|
*/
|
|
2068
2291
|
estimateTokenCount(text) {
|
|
2069
|
-
|
|
2070
|
-
return Math.ceil(text.length / 4);
|
|
2292
|
+
return estimateTokens(text, "vertex");
|
|
2071
2293
|
}
|
|
2072
2294
|
/**
|
|
2073
2295
|
* Obtain a Google Auth access token for Vertex AI REST API calls.
|
|
@@ -2123,7 +2345,7 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2123
2345
|
/**
|
|
2124
2346
|
* Build request parts for image generation from prompt, PDFs, and images.
|
|
2125
2347
|
*/
|
|
2126
|
-
buildImageGenerationParts(prompt, pdfFiles, inputImages) {
|
|
2348
|
+
async buildImageGenerationParts(prompt, pdfFiles, inputImages) {
|
|
2127
2349
|
const parts = [];
|
|
2128
2350
|
if (prompt) {
|
|
2129
2351
|
parts.push({ text: prompt });
|
|
@@ -2213,6 +2435,77 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2213
2435
|
continue;
|
|
2214
2436
|
}
|
|
2215
2437
|
}
|
|
2438
|
+
else if (image.startsWith("http://") ||
|
|
2439
|
+
image.startsWith("https://")) {
|
|
2440
|
+
// Download URL image and convert to base64
|
|
2441
|
+
try {
|
|
2442
|
+
// Validate URL to prevent SSRF attacks
|
|
2443
|
+
const parsedUrl = new URL(image);
|
|
2444
|
+
const hostname = parsedUrl.hostname;
|
|
2445
|
+
const blockedHosts = ["localhost", "127.0.0.1", "0.0.0.0", "[::1]"];
|
|
2446
|
+
if (blockedHosts.some((h) => hostname === h) ||
|
|
2447
|
+
/^(10\.|172\.(1[6-9]|2\d|3[01])\.|192\.168\.)/.test(hostname)) {
|
|
2448
|
+
logger.warn(`[GoogleVertexProvider] Blocked fetch to private/local URL: ${hostname}`, { index: i });
|
|
2449
|
+
continue;
|
|
2450
|
+
}
|
|
2451
|
+
// DNS resolution check — verify resolved IPs are not private/loopback
|
|
2452
|
+
try {
|
|
2453
|
+
const { resolve4, resolve6 } = dns.promises;
|
|
2454
|
+
const addresses = [];
|
|
2455
|
+
try {
|
|
2456
|
+
addresses.push(...(await resolve4(hostname)));
|
|
2457
|
+
}
|
|
2458
|
+
catch {
|
|
2459
|
+
/* hostname may not have A records */
|
|
2460
|
+
}
|
|
2461
|
+
try {
|
|
2462
|
+
addresses.push(...(await resolve6(hostname)));
|
|
2463
|
+
}
|
|
2464
|
+
catch {
|
|
2465
|
+
/* hostname may not have AAAA records */
|
|
2466
|
+
}
|
|
2467
|
+
if (addresses.length > 0 &&
|
|
2468
|
+
addresses.every((addr) => isPrivateOrLoopbackAddress(addr))) {
|
|
2469
|
+
logger.warn(`[GoogleVertexProvider] Blocked fetch: hostname ${hostname} resolves to private/loopback address`, { index: i, addresses });
|
|
2470
|
+
continue;
|
|
2471
|
+
}
|
|
2472
|
+
}
|
|
2473
|
+
catch (dnsError) {
|
|
2474
|
+
logger.warn(`[GoogleVertexProvider] DNS resolution failed for ${hostname}, blocking fetch`, {
|
|
2475
|
+
index: i,
|
|
2476
|
+
error: dnsError instanceof Error
|
|
2477
|
+
? dnsError.message
|
|
2478
|
+
: String(dnsError),
|
|
2479
|
+
});
|
|
2480
|
+
continue;
|
|
2481
|
+
}
|
|
2482
|
+
const response = await fetch(image, {
|
|
2483
|
+
signal: AbortSignal.timeout(15_000),
|
|
2484
|
+
});
|
|
2485
|
+
if (!response.ok) {
|
|
2486
|
+
logger.warn(`Failed to fetch image URL (${response.status}), skipping`, { index: i, url: image });
|
|
2487
|
+
continue;
|
|
2488
|
+
}
|
|
2489
|
+
// Size guard — reject downloads exceeding 10 MB
|
|
2490
|
+
const contentLength = response.headers.get("content-length");
|
|
2491
|
+
if (contentLength &&
|
|
2492
|
+
Number(contentLength) > MAX_IMAGE_DOWNLOAD_BYTES) {
|
|
2493
|
+
logger.warn(`[GoogleVertexProvider] Image URL exceeds ${MAX_IMAGE_DOWNLOAD_BYTES} byte limit (Content-Length: ${contentLength}), skipping`, { index: i, url: image });
|
|
2494
|
+
continue;
|
|
2495
|
+
}
|
|
2496
|
+
const buffer = Buffer.from(await response.arrayBuffer());
|
|
2497
|
+
if (buffer.byteLength > MAX_IMAGE_DOWNLOAD_BYTES) {
|
|
2498
|
+
logger.warn(`[GoogleVertexProvider] Downloaded image exceeds ${MAX_IMAGE_DOWNLOAD_BYTES} byte limit (${buffer.byteLength} bytes), skipping`, { index: i, url: image });
|
|
2499
|
+
continue;
|
|
2500
|
+
}
|
|
2501
|
+
imageBase64 = buffer.toString("base64");
|
|
2502
|
+
mimeType = this.detectImageType(buffer);
|
|
2503
|
+
}
|
|
2504
|
+
catch (fetchError) {
|
|
2505
|
+
logger.warn(`Failed to download image from URL, skipping: ${fetchError instanceof Error ? fetchError.message : String(fetchError)}`, { index: i, url: image });
|
|
2506
|
+
continue;
|
|
2507
|
+
}
|
|
2508
|
+
}
|
|
2216
2509
|
else {
|
|
2217
2510
|
imageBase64 = image;
|
|
2218
2511
|
const decodedBuffer = Buffer.from(imageBase64, "base64");
|
|
@@ -2309,7 +2602,7 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2309
2602
|
});
|
|
2310
2603
|
try {
|
|
2311
2604
|
const token = await this.getImageGenerationAccessToken();
|
|
2312
|
-
const parts = this.buildImageGenerationParts(prompt, pdfFiles, inputImages);
|
|
2605
|
+
const parts = await this.buildImageGenerationParts(prompt, pdfFiles, inputImages);
|
|
2313
2606
|
// Build request body with CRITICAL response_modalities setting
|
|
2314
2607
|
const requestBody = {
|
|
2315
2608
|
contents: [{ role: "user", parts }],
|