@juspay/neurolink 7.6.1 → 7.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/CHANGELOG.md +15 -4
  2. package/README.md +78 -3
  3. package/dist/cli/commands/config.d.ts +275 -3
  4. package/dist/cli/commands/config.js +121 -0
  5. package/dist/cli/commands/mcp.js +77 -28
  6. package/dist/cli/factories/commandFactory.js +359 -6
  7. package/dist/core/analytics.js +7 -27
  8. package/dist/core/baseProvider.js +43 -4
  9. package/dist/core/constants.d.ts +46 -0
  10. package/dist/core/constants.js +47 -0
  11. package/dist/core/dynamicModels.d.ts +16 -4
  12. package/dist/core/dynamicModels.js +130 -26
  13. package/dist/core/evaluation.js +5 -1
  14. package/dist/core/evaluationProviders.d.ts +6 -2
  15. package/dist/core/evaluationProviders.js +41 -125
  16. package/dist/core/factory.d.ts +5 -0
  17. package/dist/core/factory.js +62 -50
  18. package/dist/core/modelConfiguration.d.ts +246 -0
  19. package/dist/core/modelConfiguration.js +775 -0
  20. package/dist/core/types.d.ts +22 -3
  21. package/dist/core/types.js +5 -1
  22. package/dist/factories/providerRegistry.js +3 -3
  23. package/dist/index.d.ts +1 -1
  24. package/dist/index.js +1 -1
  25. package/dist/lib/core/analytics.js +7 -27
  26. package/dist/lib/core/baseProvider.js +43 -4
  27. package/dist/lib/core/constants.d.ts +46 -0
  28. package/dist/lib/core/constants.js +47 -0
  29. package/dist/lib/core/dynamicModels.d.ts +16 -4
  30. package/dist/lib/core/dynamicModels.js +130 -26
  31. package/dist/lib/core/evaluation.js +5 -1
  32. package/dist/lib/core/evaluationProviders.d.ts +6 -2
  33. package/dist/lib/core/evaluationProviders.js +41 -125
  34. package/dist/lib/core/factory.d.ts +5 -0
  35. package/dist/lib/core/factory.js +63 -50
  36. package/dist/lib/core/modelConfiguration.d.ts +246 -0
  37. package/dist/lib/core/modelConfiguration.js +775 -0
  38. package/dist/lib/core/types.d.ts +22 -3
  39. package/dist/lib/core/types.js +5 -1
  40. package/dist/lib/factories/providerRegistry.js +3 -3
  41. package/dist/lib/index.d.ts +1 -1
  42. package/dist/lib/index.js +1 -1
  43. package/dist/lib/mcp/factory.d.ts +5 -5
  44. package/dist/lib/mcp/factory.js +2 -2
  45. package/dist/lib/mcp/servers/utilities/utilityServer.d.ts +1 -1
  46. package/dist/lib/mcp/servers/utilities/utilityServer.js +1 -1
  47. package/dist/lib/mcp/toolRegistry.js +2 -2
  48. package/dist/lib/neurolink.d.ts +168 -12
  49. package/dist/lib/neurolink.js +685 -123
  50. package/dist/lib/providers/anthropic.js +52 -2
  51. package/dist/lib/providers/googleAiStudio.js +4 -0
  52. package/dist/lib/providers/googleVertex.d.ts +75 -9
  53. package/dist/lib/providers/googleVertex.js +365 -46
  54. package/dist/lib/providers/huggingFace.d.ts +52 -11
  55. package/dist/lib/providers/huggingFace.js +180 -42
  56. package/dist/lib/providers/litellm.d.ts +9 -9
  57. package/dist/lib/providers/litellm.js +103 -16
  58. package/dist/lib/providers/ollama.d.ts +52 -17
  59. package/dist/lib/providers/ollama.js +276 -68
  60. package/dist/lib/sdk/toolRegistration.d.ts +42 -0
  61. package/dist/lib/sdk/toolRegistration.js +269 -27
  62. package/dist/lib/telemetry/telemetryService.d.ts +6 -0
  63. package/dist/lib/telemetry/telemetryService.js +38 -3
  64. package/dist/lib/types/contextTypes.d.ts +75 -11
  65. package/dist/lib/types/contextTypes.js +227 -1
  66. package/dist/lib/types/domainTypes.d.ts +62 -0
  67. package/dist/lib/types/domainTypes.js +5 -0
  68. package/dist/lib/types/generateTypes.d.ts +52 -0
  69. package/dist/lib/types/index.d.ts +1 -0
  70. package/dist/lib/types/mcpTypes.d.ts +1 -1
  71. package/dist/lib/types/mcpTypes.js +1 -1
  72. package/dist/lib/types/streamTypes.d.ts +14 -0
  73. package/dist/lib/types/universalProviderOptions.d.ts +1 -1
  74. package/dist/lib/utils/errorHandling.d.ts +142 -0
  75. package/dist/lib/utils/errorHandling.js +316 -0
  76. package/dist/lib/utils/factoryProcessing.d.ts +74 -0
  77. package/dist/lib/utils/factoryProcessing.js +588 -0
  78. package/dist/lib/utils/optionsConversion.d.ts +54 -0
  79. package/dist/lib/utils/optionsConversion.js +126 -0
  80. package/dist/lib/utils/optionsUtils.d.ts +246 -0
  81. package/dist/lib/utils/optionsUtils.js +960 -0
  82. package/dist/lib/utils/providerConfig.js +6 -2
  83. package/dist/lib/utils/providerHealth.d.ts +107 -0
  84. package/dist/lib/utils/providerHealth.js +543 -0
  85. package/dist/lib/utils/providerUtils.d.ts +17 -0
  86. package/dist/lib/utils/providerUtils.js +271 -16
  87. package/dist/lib/utils/timeout.js +1 -1
  88. package/dist/lib/utils/tokenLimits.d.ts +33 -0
  89. package/dist/lib/utils/tokenLimits.js +118 -0
  90. package/dist/mcp/factory.d.ts +5 -5
  91. package/dist/mcp/factory.js +2 -2
  92. package/dist/mcp/servers/utilities/utilityServer.d.ts +1 -1
  93. package/dist/mcp/servers/utilities/utilityServer.js +1 -1
  94. package/dist/mcp/toolRegistry.js +2 -2
  95. package/dist/neurolink.d.ts +168 -12
  96. package/dist/neurolink.js +685 -123
  97. package/dist/providers/anthropic.js +52 -2
  98. package/dist/providers/googleAiStudio.js +4 -0
  99. package/dist/providers/googleVertex.d.ts +75 -9
  100. package/dist/providers/googleVertex.js +365 -46
  101. package/dist/providers/huggingFace.d.ts +52 -11
  102. package/dist/providers/huggingFace.js +181 -43
  103. package/dist/providers/litellm.d.ts +9 -9
  104. package/dist/providers/litellm.js +103 -16
  105. package/dist/providers/ollama.d.ts +52 -17
  106. package/dist/providers/ollama.js +276 -68
  107. package/dist/sdk/toolRegistration.d.ts +42 -0
  108. package/dist/sdk/toolRegistration.js +269 -27
  109. package/dist/telemetry/telemetryService.d.ts +6 -0
  110. package/dist/telemetry/telemetryService.js +38 -3
  111. package/dist/types/contextTypes.d.ts +75 -11
  112. package/dist/types/contextTypes.js +227 -2
  113. package/dist/types/domainTypes.d.ts +62 -0
  114. package/dist/types/domainTypes.js +5 -0
  115. package/dist/types/generateTypes.d.ts +52 -0
  116. package/dist/types/index.d.ts +1 -0
  117. package/dist/types/mcpTypes.d.ts +1 -1
  118. package/dist/types/mcpTypes.js +1 -1
  119. package/dist/types/streamTypes.d.ts +14 -0
  120. package/dist/types/universalProviderOptions.d.ts +1 -1
  121. package/dist/types/universalProviderOptions.js +0 -1
  122. package/dist/utils/errorHandling.d.ts +142 -0
  123. package/dist/utils/errorHandling.js +316 -0
  124. package/dist/utils/factoryProcessing.d.ts +74 -0
  125. package/dist/utils/factoryProcessing.js +588 -0
  126. package/dist/utils/optionsConversion.d.ts +54 -0
  127. package/dist/utils/optionsConversion.js +126 -0
  128. package/dist/utils/optionsUtils.d.ts +246 -0
  129. package/dist/utils/optionsUtils.js +960 -0
  130. package/dist/utils/providerConfig.js +6 -2
  131. package/dist/utils/providerHealth.d.ts +107 -0
  132. package/dist/utils/providerHealth.js +543 -0
  133. package/dist/utils/providerUtils.d.ts +17 -0
  134. package/dist/utils/providerUtils.js +271 -16
  135. package/dist/utils/timeout.js +1 -1
  136. package/dist/utils/tokenLimits.d.ts +33 -0
  137. package/dist/utils/tokenLimits.js +118 -0
  138. package/package.json +2 -2
@@ -1,9 +1,10 @@
1
1
  import { createVertex, } from "@ai-sdk/google-vertex";
2
- import { streamText, Output } from "ai";
2
+ import { streamText, Output, } from "ai";
3
3
  import { BaseProvider } from "../core/baseProvider.js";
4
4
  import { logger } from "../utils/logger.js";
5
- import { createTimeoutController, TimeoutError, getDefaultTimeout, } from "../utils/timeout.js";
5
+ import { TimeoutError } from "../utils/timeout.js";
6
6
  import { DEFAULT_MAX_TOKENS } from "../core/constants.js";
7
+ import { ModelConfigurationManager } from "../core/modelConfiguration.js";
7
8
  import { validateApiKey, createVertexProjectConfig, createGoogleAuthConfig, } from "../utils/providerConfig.js";
8
9
  // Cache for anthropic module to avoid repeated imports
9
10
  let _createVertexAnthropic = null;
@@ -28,7 +29,6 @@ async function getCreateVertexAnthropic() {
28
29
  return null;
29
30
  }
30
31
  }
31
- // Configuration helpers
32
32
  // Configuration helpers - now using consolidated utility
33
33
  const getVertexProjectId = () => {
34
34
  return validateApiKey(createVertexProjectConfig());
@@ -40,6 +40,8 @@ const getVertexLocation = () => {
40
40
  "us-central1");
41
41
  };
42
42
  const getDefaultVertexModel = () => {
43
+ // Use gemini-1.5-pro as default - stable and widely supported model
44
+ // Override with VERTEX_MODEL environment variable if needed
43
45
  return process.env.VERTEX_MODEL || "gemini-1.5-pro";
44
46
  };
45
47
  const hasGoogleCredentials = () => {
@@ -48,24 +50,80 @@ const hasGoogleCredentials = () => {
48
50
  (process.env.GOOGLE_AUTH_CLIENT_EMAIL &&
49
51
  process.env.GOOGLE_AUTH_PRIVATE_KEY));
50
52
  };
53
+ // Enhanced Vertex settings creation with authentication fallback
54
+ const createVertexSettings = () => {
55
+ const baseSettings = {
56
+ project: getVertexProjectId(),
57
+ location: getVertexLocation(),
58
+ };
59
+ // Check for principal account authentication first (recommended for production)
60
+ if (process.env.GOOGLE_APPLICATION_CREDENTIALS) {
61
+ logger.debug("Using principal account authentication (recommended)", {
62
+ credentialsPath: process.env.GOOGLE_APPLICATION_CREDENTIALS
63
+ ? "[PROVIDED]"
64
+ : "[NOT_PROVIDED]",
65
+ authMethod: "principal_account",
66
+ });
67
+ // For principal account auth, we don't need to provide explicit credentials
68
+ // The google-auth-library will use GOOGLE_APPLICATION_CREDENTIALS automatically
69
+ return baseSettings;
70
+ }
71
+ // Fallback to explicit credentials for development
72
+ if (process.env.GOOGLE_AUTH_CLIENT_EMAIL &&
73
+ process.env.GOOGLE_AUTH_PRIVATE_KEY) {
74
+ logger.debug("Using explicit credentials authentication", {
75
+ authMethod: "explicit_credentials",
76
+ hasClientEmail: !!process.env.GOOGLE_AUTH_CLIENT_EMAIL,
77
+ hasPrivateKey: !!process.env.GOOGLE_AUTH_PRIVATE_KEY,
78
+ });
79
+ return {
80
+ ...baseSettings,
81
+ googleAuthOptions: {
82
+ credentials: {
83
+ client_email: process.env.GOOGLE_AUTH_CLIENT_EMAIL,
84
+ private_key: process.env.GOOGLE_AUTH_PRIVATE_KEY.replace(/\\n/g, "\n"),
85
+ },
86
+ },
87
+ };
88
+ }
89
+ // Log warning if no valid authentication is available
90
+ logger.warn("No valid authentication found for Google Vertex AI", {
91
+ authMethod: "none",
92
+ hasPrincipalAccount: !!process.env.GOOGLE_APPLICATION_CREDENTIALS,
93
+ hasExplicitCredentials: !!(process.env.GOOGLE_AUTH_CLIENT_EMAIL &&
94
+ process.env.GOOGLE_AUTH_PRIVATE_KEY),
95
+ });
96
+ return baseSettings;
97
+ };
98
+ // Helper function to determine if a model is an Anthropic model
99
+ const isAnthropicModel = (modelName) => {
100
+ return modelName.toLowerCase().includes("claude");
101
+ };
51
102
  /**
52
103
  * Google Vertex AI Provider v2 - BaseProvider Implementation
53
104
  *
54
- * PHASE 3.5: Simple BaseProvider wrap around existing @ai-sdk/google-vertex implementation
55
- *
56
105
  * Features:
57
106
  * - Extends BaseProvider for shared functionality
58
107
  * - Preserves existing Google Cloud authentication
59
108
  * - Maintains Anthropic model support via dynamic imports
60
- * - Uses pre-initialized Vertex instance for efficiency
109
+ * - Fresh model creation for each request
61
110
  * - Enhanced error handling with setup guidance
111
+ * - Tool registration and context management
62
112
  */
63
113
  export class GoogleVertexProvider extends BaseProvider {
64
- vertex;
65
- model;
66
114
  projectId;
67
115
  location;
68
- cachedAnthropicModel = null;
116
+ registeredTools = new Map();
117
+ toolContext = {};
118
+ // Memory-managed cache for model configuration lookups to avoid repeated calls
119
+ // Uses WeakMap for automatic cleanup and bounded LRU for recently used models
120
+ static modelConfigCache = new Map();
121
+ static modelConfigCacheTime = 0;
122
+ static CACHE_DURATION = 5 * 60 * 1000; // 5 minutes
123
+ static MAX_CACHE_SIZE = 50; // Prevent memory leaks by limiting cache size
124
+ // Memory-managed cache for maxTokens handling decisions to optimize streaming performance
125
+ static maxTokensCache = new Map();
126
+ static maxTokensCacheTime = 0;
69
127
  constructor(modelName, sdk) {
70
128
  super(modelName, "vertex", sdk);
71
129
  // Validate Google Cloud credentials - now using consolidated utility
@@ -75,14 +133,6 @@ export class GoogleVertexProvider extends BaseProvider {
75
133
  // Initialize Google Cloud configuration
76
134
  this.projectId = getVertexProjectId();
77
135
  this.location = getVertexLocation();
78
- const vertexConfig = {
79
- project: this.projectId,
80
- location: this.location,
81
- };
82
- // Create Vertex provider instance
83
- this.vertex = createVertex(vertexConfig);
84
- // Pre-initialize model for efficiency
85
- this.model = this.vertex(this.modelName || getDefaultVertexModel());
86
136
  logger.debug("Google Vertex AI BaseProvider v2 initialized", {
87
137
  modelName: this.modelName,
88
138
  projectId: this.projectId,
@@ -98,37 +148,101 @@ export class GoogleVertexProvider extends BaseProvider {
98
148
  }
99
149
  /**
100
150
  * Returns the Vercel AI SDK model instance for Google Vertex
101
- * Handles both Google and Anthropic models
151
+ * Creates fresh model instances for each request
102
152
  */
103
153
  async getAISDKModel() {
154
+ const model = await this.getModel();
155
+ return model;
156
+ }
157
+ /**
158
+ * Gets the appropriate model instance (Google or Anthropic)
159
+ * Creates fresh instances for each request to ensure proper authentication
160
+ */
161
+ async getModel() {
162
+ const modelName = this.modelName || getDefaultVertexModel();
104
163
  // Check if this is an Anthropic model
105
- if (this.modelName && this.modelName.includes("claude")) {
106
- // Return cached Anthropic model if available
107
- if (this.cachedAnthropicModel) {
108
- return this.cachedAnthropicModel;
109
- }
110
- // Create and cache new Anthropic model
111
- const anthropicModel = await this.createAnthropicModel(this.modelName);
164
+ if (isAnthropicModel(modelName)) {
165
+ logger.debug("Creating Anthropic model for Vertex AI", { modelName });
166
+ const anthropicModel = await this.createAnthropicModel(modelName);
112
167
  if (anthropicModel) {
113
- this.cachedAnthropicModel = anthropicModel;
114
168
  return anthropicModel;
115
169
  }
116
170
  // Fall back to regular model if Anthropic not available
117
- logger.warn(`Anthropic model ${this.modelName} requested but not available, falling back to Google model`);
171
+ logger.warn(`Anthropic model ${modelName} requested but not available, falling back to Google model`);
118
172
  }
119
- return this.model;
173
+ // Create fresh Google Vertex model with current settings
174
+ logger.debug("Creating Google Vertex model", {
175
+ modelName,
176
+ project: this.projectId,
177
+ location: this.location,
178
+ });
179
+ const vertex = createVertex(createVertexSettings());
180
+ const model = vertex(modelName);
181
+ return model;
120
182
  }
121
183
  // executeGenerate removed - BaseProvider handles all generation with tools
122
184
  async executeStream(options, analysisSchema) {
185
+ const functionTag = "GoogleVertexProvider.executeStream";
186
+ let chunkCount = 0;
123
187
  try {
124
188
  this.validateStreamOptions(options);
125
- const result = await streamText({
126
- model: this.model,
189
+ logger.debug(`${functionTag}: Starting stream request`, {
190
+ modelName: this.modelName,
191
+ promptLength: options.input.text.length,
192
+ hasSchema: !!analysisSchema,
193
+ });
194
+ const model = await this.getModel();
195
+ // Model-specific maxTokens handling
196
+ const modelName = this.modelName || getDefaultVertexModel();
197
+ // Use cached model configuration to determine maxTokens handling for streaming performance
198
+ // This avoids hardcoded model-specific logic and repeated config lookups
199
+ const shouldSetMaxTokens = this.shouldSetMaxTokensCached(modelName);
200
+ const maxTokens = shouldSetMaxTokens
201
+ ? options.maxTokens || DEFAULT_MAX_TOKENS
202
+ : undefined;
203
+ // Build complete stream options with proper typing
204
+ let streamOptions = {
205
+ model: model,
127
206
  prompt: options.input.text,
128
207
  system: options.systemPrompt,
129
- maxTokens: options.maxTokens || DEFAULT_MAX_TOKENS,
130
208
  temperature: options.temperature,
131
- });
209
+ ...(maxTokens && { maxTokens }),
210
+ onError: (event) => {
211
+ const error = event.error;
212
+ const errorMessage = error instanceof Error ? error.message : String(error);
213
+ logger.error(`${functionTag}: Stream error`, {
214
+ provider: this.providerName,
215
+ modelName: this.modelName,
216
+ error: errorMessage,
217
+ chunkCount,
218
+ });
219
+ },
220
+ onFinish: (event) => {
221
+ logger.debug(`${functionTag}: Stream finished`, {
222
+ finishReason: event.finishReason,
223
+ totalChunks: chunkCount,
224
+ });
225
+ },
226
+ onChunk: () => {
227
+ chunkCount++;
228
+ },
229
+ };
230
+ if (analysisSchema) {
231
+ try {
232
+ streamOptions = {
233
+ ...streamOptions,
234
+ experimental_output: Output.object({
235
+ schema: analysisSchema,
236
+ }),
237
+ };
238
+ }
239
+ catch (error) {
240
+ logger.warn("Schema application failed, continuing without schema", {
241
+ error: String(error),
242
+ });
243
+ }
244
+ }
245
+ const result = streamText(streamOptions);
132
246
  return {
133
247
  stream: (async function* () {
134
248
  for await (const chunk of result.textStream) {
@@ -140,6 +254,12 @@ export class GoogleVertexProvider extends BaseProvider {
140
254
  };
141
255
  }
142
256
  catch (error) {
257
+ logger.error(`${functionTag}: Exception`, {
258
+ provider: this.providerName,
259
+ modelName: this.modelName,
260
+ error: String(error),
261
+ chunkCount,
262
+ });
143
263
  throw this.handleProviderError(error);
144
264
  }
145
265
  }
@@ -153,24 +273,28 @@ export class GoogleVertexProvider extends BaseProvider {
153
273
  ? errorRecord.message
154
274
  : "Unknown error occurred";
155
275
  if (message.includes("PERMISSION_DENIED")) {
156
- return new Error(`❌ Google Vertex AI Permission Denied\n\nYour Google Cloud credentials don't have permission to access Vertex AI.\n\n🔧 Required Steps:\n1. Ensure your service account has Vertex AI User role\n2. Check if Vertex AI API is enabled in your project\n3. Verify your project ID is correct\n4. Confirm your location/region has Vertex AI available`);
276
+ return new Error(`❌ Google Vertex AI Permission Denied\n\nYour Google Cloud credentials don't have permission to access Vertex AI.\n\nRequired Steps:\n1. Ensure your service account has Vertex AI User role\n2. Check if Vertex AI API is enabled in your project\n3. Verify your project ID is correct\n4. Confirm your location/region has Vertex AI available`);
157
277
  }
158
278
  if (message.includes("NOT_FOUND")) {
159
- return new Error(`❌ Google Vertex AI Model Not Found\n\n${message}\n\n🔧 Check:\n1. Model name is correct (e.g., 'gemini-1.5-pro')\n2. Model is available in your region (${this.location})\n3. Your project has access to the model\n4. Model supports your request parameters`);
279
+ return new Error(`❌ Google Vertex AI Model Not Found\n\n${message}\n\nCheck:\n1. Model name is correct (e.g., 'gemini-1.5-pro')\n2. Model is available in your region (${this.location})\n3. Your project has access to the model\n4. Model supports your request parameters`);
160
280
  }
161
281
  if (message.includes("QUOTA_EXCEEDED")) {
162
- return new Error(`❌ Google Vertex AI Quota Exceeded\n\n${message}\n\n🔧 Solutions:\n1. Check your Vertex AI quotas in Google Cloud Console\n2. Request quota increase if needed\n3. Try a different model or reduce request frequency\n4. Consider using a different region`);
282
+ return new Error(`❌ Google Vertex AI Quota Exceeded\n\n${message}\n\nSolutions:\n1. Check your Vertex AI quotas in Google Cloud Console\n2. Request quota increase if needed\n3. Try a different model or reduce request frequency\n4. Consider using a different region`);
163
283
  }
164
284
  if (message.includes("INVALID_ARGUMENT")) {
165
- return new Error(`❌ Google Vertex AI Invalid Request\n\n${message}\n\n🔧 Check:\n1. Request parameters are within model limits\n2. Input text is properly formatted\n3. Temperature and other settings are valid\n4. Model supports your request type`);
285
+ return new Error(`❌ Google Vertex AI Invalid Request\n\n${message}\n\nCheck:\n1. Request parameters are within model limits\n2. Input text is properly formatted\n3. Temperature and other settings are valid\n4. Model supports your request type`);
166
286
  }
167
- return new Error(`❌ Google Vertex AI Provider Error\n\n${message}\n\n🔧 Troubleshooting:\n1. Check Google Cloud credentials and permissions\n2. Verify project ID and location settings\n3. Ensure Vertex AI API is enabled\n4. Check network connectivity`);
287
+ return new Error(`❌ Google Vertex AI Provider Error\n\n${message}\n\nTroubleshooting:\n1. Check Google Cloud credentials and permissions\n2. Verify project ID and location settings\n3. Ensure Vertex AI API is enabled\n4. Check network connectivity`);
168
288
  }
169
289
  validateStreamOptions(options) {
170
290
  if (!options.input?.text?.trim()) {
171
291
  throw new Error("Prompt is required for streaming");
172
292
  }
173
- if (options.maxTokens &&
293
+ // Use cached model configuration for validation performance
294
+ const modelName = this.modelName || getDefaultVertexModel();
295
+ const shouldValidateMaxTokens = this.shouldSetMaxTokensCached(modelName);
296
+ if (shouldValidateMaxTokens &&
297
+ options.maxTokens &&
174
298
  (options.maxTokens < 1 || options.maxTokens > 8192)) {
175
299
  throw new Error("maxTokens must be between 1 and 8192 for Google Vertex AI");
176
300
  }
@@ -179,6 +303,93 @@ export class GoogleVertexProvider extends BaseProvider {
179
303
  throw new Error("temperature must be between 0 and 2");
180
304
  }
181
305
  }
306
+ /**
307
+ * Memory-safe cache management for model configurations
308
+ * Implements LRU eviction to prevent memory leaks in long-running processes
309
+ */
310
+ static evictLRUCacheEntries(cache) {
311
+ if (cache.size <= GoogleVertexProvider.MAX_CACHE_SIZE) {
312
+ return;
313
+ }
314
+ // Evict oldest entries (first entries in Map are oldest in insertion order)
315
+ const entriesToRemove = cache.size - GoogleVertexProvider.MAX_CACHE_SIZE + 5; // Remove extra to avoid frequent evictions
316
+ let removed = 0;
317
+ for (const key of cache.keys()) {
318
+ if (removed >= entriesToRemove) {
319
+ break;
320
+ }
321
+ cache.delete(key);
322
+ removed++;
323
+ }
324
+ logger.debug("GoogleVertexProvider: Evicted LRU cache entries", {
325
+ entriesRemoved: removed,
326
+ currentCacheSize: cache.size,
327
+ });
328
+ }
329
+ /**
330
+ * Access and refresh cache entry (moves to end for LRU)
331
+ */
332
+ static accessCacheEntry(cache, key) {
333
+ const value = cache.get(key);
334
+ if (value !== undefined) {
335
+ // Move to end (most recently used)
336
+ cache.delete(key);
337
+ cache.set(key, value);
338
+ }
339
+ return value;
340
+ }
341
+ /**
342
+ * Memory-safe cached check for whether maxTokens should be set for the given model
343
+ * Optimized for streaming performance with LRU eviction to prevent memory leaks
344
+ */
345
+ shouldSetMaxTokensCached(modelName) {
346
+ const now = Date.now();
347
+ // Check if cache is valid (within 5 minutes)
348
+ if (now - GoogleVertexProvider.maxTokensCacheTime >
349
+ GoogleVertexProvider.CACHE_DURATION) {
350
+ // Cache expired, refresh all cached results
351
+ GoogleVertexProvider.maxTokensCache.clear();
352
+ GoogleVertexProvider.maxTokensCacheTime = now;
353
+ }
354
+ // Check if we have cached result for this model (with LRU access)
355
+ const cachedResult = GoogleVertexProvider.accessCacheEntry(GoogleVertexProvider.maxTokensCache, modelName);
356
+ if (cachedResult !== undefined) {
357
+ return cachedResult;
358
+ }
359
+ // Calculate and cache the result with memory management
360
+ const shouldSet = !this.modelHasMaxTokensIssues(modelName);
361
+ GoogleVertexProvider.maxTokensCache.set(modelName, shouldSet);
362
+ // Prevent memory leaks by evicting old entries if cache grows too large
363
+ GoogleVertexProvider.evictLRUCacheEntries(GoogleVertexProvider.maxTokensCache);
364
+ return shouldSet;
365
+ }
366
+ /**
367
+ * Memory-safe check if model has maxTokens issues using configuration-based approach
368
+ * This replaces hardcoded model-specific logic with configurable behavior
369
+ * Includes LRU caching to avoid repeated configuration lookups during streaming
370
+ */
371
+ modelHasMaxTokensIssues(modelName) {
372
+ const now = Date.now();
373
+ const cacheKey = "google-vertex-config";
374
+ // Check if cache is valid (within 5 minutes)
375
+ if (now - GoogleVertexProvider.modelConfigCacheTime >
376
+ GoogleVertexProvider.CACHE_DURATION) {
377
+ // Cache expired, refresh it with memory management
378
+ GoogleVertexProvider.modelConfigCache.clear();
379
+ const config = ModelConfigurationManager.getInstance();
380
+ const vertexConfig = config.getProviderConfig("google-vertex");
381
+ GoogleVertexProvider.modelConfigCache.set(cacheKey, vertexConfig);
382
+ GoogleVertexProvider.modelConfigCacheTime = now;
383
+ }
384
+ // Access cached config with LRU behavior
385
+ const vertexConfig = GoogleVertexProvider.accessCacheEntry(GoogleVertexProvider.modelConfigCache, cacheKey);
386
+ // Check if model is in the list of models with maxTokens issues
387
+ const modelsWithIssues = vertexConfig?.modelBehavior?.maxTokensIssues || [
388
+ "gemini-2.5-flash",
389
+ "gemini-2.5-pro",
390
+ ];
391
+ return modelsWithIssues.some((problematicModel) => modelName.includes(problematicModel));
392
+ }
182
393
  /**
183
394
  * Check if Anthropic models are available
184
395
  * @returns Promise<boolean> indicating if Anthropic support is available
@@ -189,19 +400,127 @@ export class GoogleVertexProvider extends BaseProvider {
189
400
  }
190
401
  /**
191
402
  * Create an Anthropic model instance if available
403
+ * Uses fresh vertex settings for each request
192
404
  * @param modelName Anthropic model name (e.g., 'claude-3-sonnet@20240229')
193
405
  * @returns LanguageModelV1 instance or null if not available
194
406
  */
195
- async createAnthropicModel(modelName) {
196
- const createVertexAnthropic = await getCreateVertexAnthropic();
197
- if (!createVertexAnthropic) {
198
- return null;
407
+ createAnthropicModel(modelName) {
408
+ return getCreateVertexAnthropic().then((createVertexAnthropic) => {
409
+ if (!createVertexAnthropic) {
410
+ return null;
411
+ }
412
+ // Use fresh vertex settings instead of cached config
413
+ // Type guard to ensure createVertexAnthropic is callable
414
+ if (typeof createVertexAnthropic !== "function") {
415
+ throw new Error("createVertexAnthropic is not a function");
416
+ }
417
+ const vertexSettings = createVertexSettings();
418
+ const vertexAnthropicInstance = createVertexAnthropic(vertexSettings);
419
+ // Type guard to ensure the returned instance has the expected model creation method
420
+ if (!vertexAnthropicInstance ||
421
+ typeof vertexAnthropicInstance !== "function") {
422
+ throw new Error("Failed to create valid Anthropic instance");
423
+ }
424
+ const model = vertexAnthropicInstance(modelName);
425
+ // Type guard to ensure the returned model implements LanguageModelV1
426
+ if (!model ||
427
+ typeof model !== "object" ||
428
+ !("specificationVersion" in model)) {
429
+ throw new Error("Failed to create valid LanguageModelV1 instance");
430
+ }
431
+ return model;
432
+ });
433
+ }
434
+ /**
435
+ * Register a tool with the AI provider
436
+ * @param name The name of the tool
437
+ * @param schema The Zod schema defining the tool's parameters
438
+ * @param description A description of what the tool does
439
+ * @param handler The function to execute when the tool is called
440
+ */
441
+ registerTool(name, schema, description, handler) {
442
+ const functionTag = "GoogleVertexProvider.registerTool";
443
+ try {
444
+ const tool = {
445
+ description,
446
+ parameters: schema,
447
+ execute: async (params) => {
448
+ try {
449
+ const contextEnrichedParams = {
450
+ ...params,
451
+ __context: this.toolContext,
452
+ };
453
+ return await handler(contextEnrichedParams);
454
+ }
455
+ catch (error) {
456
+ logger.error(`${functionTag}: Tool execution error`, {
457
+ toolName: name,
458
+ error: error instanceof Error ? error.message : String(error),
459
+ });
460
+ throw error;
461
+ }
462
+ },
463
+ };
464
+ this.registeredTools.set(name, tool);
465
+ logger.debug(`${functionTag}: Tool registered`, {
466
+ toolName: name,
467
+ modelName: this.modelName,
468
+ });
199
469
  }
200
- const vertexAnthropic = createVertexAnthropic({
201
- project: this.projectId,
202
- location: this.location,
470
+ catch (error) {
471
+ logger.error(`${functionTag}: Tool registration error`, {
472
+ toolName: name,
473
+ error: error instanceof Error ? error.message : String(error),
474
+ });
475
+ throw error;
476
+ }
477
+ }
478
+ /**
479
+ * Set the context for tool execution
480
+ * @param context The context to use for tool execution
481
+ */
482
+ setToolContext(context) {
483
+ this.toolContext = { ...this.toolContext, ...context };
484
+ logger.debug("GoogleVertexProvider.setToolContext: Tool context set", {
485
+ contextKeys: Object.keys(context),
203
486
  });
204
- return vertexAnthropic(modelName);
487
+ }
488
+ /**
489
+ * Get the current tool execution context
490
+ * @returns The current tool execution context
491
+ */
492
+ getToolContext() {
493
+ return { ...this.toolContext };
494
+ }
495
+ /**
496
+ * Clear all static caches - useful for testing and memory cleanup
497
+ * Public method to allow external cache management
498
+ */
499
+ static clearCaches() {
500
+ GoogleVertexProvider.modelConfigCache.clear();
501
+ GoogleVertexProvider.maxTokensCache.clear();
502
+ GoogleVertexProvider.modelConfigCacheTime = 0;
503
+ GoogleVertexProvider.maxTokensCacheTime = 0;
504
+ logger.debug("GoogleVertexProvider: All caches cleared", {
505
+ clearedAt: Date.now(),
506
+ });
507
+ }
508
+ /**
509
+ * Get cache statistics for monitoring and debugging
510
+ */
511
+ static getCacheStats() {
512
+ const now = Date.now();
513
+ return {
514
+ modelConfigCacheSize: GoogleVertexProvider.modelConfigCache.size,
515
+ maxTokensCacheSize: GoogleVertexProvider.maxTokensCache.size,
516
+ maxCacheSize: GoogleVertexProvider.MAX_CACHE_SIZE,
517
+ cacheAge: {
518
+ modelConfig: now - GoogleVertexProvider.modelConfigCacheTime,
519
+ maxTokens: now - GoogleVertexProvider.maxTokensCacheTime,
520
+ },
521
+ };
205
522
  }
206
523
  }
207
524
  export default GoogleVertexProvider;
525
+ // Re-export for compatibility
526
+ export { GoogleVertexProvider as GoogleVertexAI };
@@ -11,29 +11,70 @@ export declare class HuggingFaceProvider extends BaseProvider {
11
11
  private model;
12
12
  constructor(modelName?: string);
13
13
  /**
14
- * HuggingFace models currently don't properly support tool/function calling
14
+ * HuggingFace Tool Calling Support (Enhanced 2025)
15
15
  *
16
- * **Tested Models & Issues:**
17
- * - microsoft/DialoGPT-medium: Describes tools instead of executing them
18
- * - Most HF models via router endpoint: Function schema passed but not executed
19
- * - Issue: Models treat tool definitions as conversation context rather than executable functions
16
+ * **Supported Models (Tool Calling Enabled):**
17
+ * - meta-llama/Llama-3.1-8B-Instruct - Post-trained for tool calling
18
+ * - meta-llama/Llama-3.1-70B-Instruct - Advanced tool calling capabilities
19
+ * - meta-llama/Llama-3.1-405B-Instruct - Full tool calling support
20
+ * - nvidia/Llama-3.1-Nemotron-Ultra-253B-v1 - Optimized for tool calling
21
+ * - NousResearch/Hermes-3-Llama-3.2-3B - Function calling trained
22
+ * - codellama/CodeLlama-34b-Instruct-hf - Code-focused tool calling
23
+ * - mistralai/Mistral-7B-Instruct-v0.3 - Basic tool support
20
24
  *
21
- * **Known Limitations:**
22
- * - Tools are visible to model but treated as descriptive text
23
- * - No proper function call response format handling
24
- * - HuggingFace router endpoint doesn't enforce OpenAI-compatible tool execution
25
+ * **Unsupported Models (Tool Calling Disabled):**
26
+ * - microsoft/DialoGPT-* - Treats tools as conversation context
27
+ * - gpt2, bert, roberta variants - No tool calling training
28
+ * - Most pre-2024 models - Limited function calling capabilities
25
29
  *
26
- * @returns false to disable tools by default until proper implementation
30
+ * **Implementation Details:**
31
+ * - Intelligent model detection based on known capabilities
32
+ * - Custom tool schema formatting for HuggingFace models
33
+ * - Enhanced response parsing for function call extraction
34
+ * - Graceful fallback for unsupported models
35
+ *
36
+ * @returns true for supported models, false for unsupported models
27
37
  */
28
38
  supportsTools(): boolean;
29
39
  protected executeStream(options: StreamOptions, analysisSchema?: ZodType<unknown, ZodTypeDef, unknown> | Schema<unknown>): Promise<StreamResult>;
40
+ /**
41
+ * Prepare stream options with HuggingFace-specific enhancements
42
+ * Handles tool calling optimizations and model-specific formatting
43
+ */
44
+ private prepareStreamOptions;
45
+ /**
46
+ * Enhance system prompt with tool calling instructions for HuggingFace models
47
+ * Many HF models benefit from explicit tool calling guidance
48
+ */
49
+ private enhanceSystemPromptForTools;
50
+ /**
51
+ * Format tools for HuggingFace model compatibility
52
+ * Some models require specific tool schema formatting
53
+ */
54
+ private formatToolsForHuggingFace;
55
+ /**
56
+ * Get recommendations for tool-calling capable HuggingFace models
57
+ * Provides guidance for users who want to use function calling
58
+ */
59
+ static getToolCallingRecommendations(): {
60
+ recommended: string[];
61
+ performance: Record<string, {
62
+ speed: number;
63
+ quality: number;
64
+ cost: number;
65
+ }>;
66
+ notes: Record<string, string>;
67
+ };
68
+ /**
69
+ * Enhanced error handling with HuggingFace-specific guidance
70
+ */
71
+ protected handleProviderError(error: unknown): Error;
30
72
  protected getProviderName(): AIProviderName;
31
73
  protected getDefaultModel(): string;
32
74
  /**
33
75
  * Returns the Vercel AI SDK model instance for HuggingFace
34
76
  */
35
77
  protected getAISDKModel(): LanguageModelV1;
36
- protected handleProviderError(error: unknown): Error;
37
78
  private validateStreamOptions;
38
79
  }
39
80
  export default HuggingFaceProvider;