@juspay/neurolink 7.6.0 → 7.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/CHANGELOG.md +14 -2
  2. package/README.md +79 -4
  3. package/dist/cli/commands/config.d.ts +275 -3
  4. package/dist/cli/commands/config.js +121 -0
  5. package/dist/cli/commands/mcp.js +77 -28
  6. package/dist/cli/factories/commandFactory.js +359 -6
  7. package/dist/core/analytics.js +7 -27
  8. package/dist/core/baseProvider.js +43 -4
  9. package/dist/core/constants.d.ts +46 -0
  10. package/dist/core/constants.js +47 -0
  11. package/dist/core/dynamicModels.d.ts +16 -4
  12. package/dist/core/dynamicModels.js +130 -26
  13. package/dist/core/evaluation.js +5 -1
  14. package/dist/core/evaluationProviders.d.ts +6 -2
  15. package/dist/core/evaluationProviders.js +41 -125
  16. package/dist/core/factory.d.ts +5 -0
  17. package/dist/core/factory.js +62 -50
  18. package/dist/core/modelConfiguration.d.ts +246 -0
  19. package/dist/core/modelConfiguration.js +775 -0
  20. package/dist/core/types.d.ts +22 -3
  21. package/dist/core/types.js +5 -1
  22. package/dist/factories/providerRegistry.js +3 -3
  23. package/dist/index.d.ts +1 -1
  24. package/dist/index.js +1 -1
  25. package/dist/lib/core/analytics.js +7 -27
  26. package/dist/lib/core/baseProvider.js +43 -4
  27. package/dist/lib/core/constants.d.ts +46 -0
  28. package/dist/lib/core/constants.js +47 -0
  29. package/dist/lib/core/dynamicModels.d.ts +16 -4
  30. package/dist/lib/core/dynamicModels.js +130 -26
  31. package/dist/lib/core/evaluation.js +5 -1
  32. package/dist/lib/core/evaluationProviders.d.ts +6 -2
  33. package/dist/lib/core/evaluationProviders.js +41 -125
  34. package/dist/lib/core/factory.d.ts +5 -0
  35. package/dist/lib/core/factory.js +63 -50
  36. package/dist/lib/core/modelConfiguration.d.ts +246 -0
  37. package/dist/lib/core/modelConfiguration.js +775 -0
  38. package/dist/lib/core/types.d.ts +22 -3
  39. package/dist/lib/core/types.js +5 -1
  40. package/dist/lib/factories/providerRegistry.js +3 -3
  41. package/dist/lib/index.d.ts +1 -1
  42. package/dist/lib/index.js +1 -1
  43. package/dist/lib/mcp/factory.d.ts +5 -5
  44. package/dist/lib/mcp/factory.js +2 -2
  45. package/dist/lib/mcp/servers/utilities/utilityServer.d.ts +1 -1
  46. package/dist/lib/mcp/servers/utilities/utilityServer.js +1 -1
  47. package/dist/lib/mcp/toolRegistry.js +2 -2
  48. package/dist/lib/neurolink.d.ts +168 -12
  49. package/dist/lib/neurolink.js +685 -123
  50. package/dist/lib/providers/anthropic.js +52 -2
  51. package/dist/lib/providers/googleAiStudio.js +4 -0
  52. package/dist/lib/providers/googleVertex.d.ts +75 -9
  53. package/dist/lib/providers/googleVertex.js +365 -46
  54. package/dist/lib/providers/huggingFace.d.ts +52 -11
  55. package/dist/lib/providers/huggingFace.js +180 -42
  56. package/dist/lib/providers/litellm.d.ts +9 -9
  57. package/dist/lib/providers/litellm.js +103 -16
  58. package/dist/lib/providers/ollama.d.ts +52 -17
  59. package/dist/lib/providers/ollama.js +276 -68
  60. package/dist/lib/sdk/toolRegistration.d.ts +42 -0
  61. package/dist/lib/sdk/toolRegistration.js +269 -27
  62. package/dist/lib/telemetry/telemetryService.d.ts +6 -0
  63. package/dist/lib/telemetry/telemetryService.js +38 -3
  64. package/dist/lib/types/contextTypes.d.ts +75 -11
  65. package/dist/lib/types/contextTypes.js +227 -1
  66. package/dist/lib/types/domainTypes.d.ts +62 -0
  67. package/dist/lib/types/domainTypes.js +5 -0
  68. package/dist/lib/types/generateTypes.d.ts +52 -0
  69. package/dist/lib/types/index.d.ts +1 -0
  70. package/dist/lib/types/mcpTypes.d.ts +1 -1
  71. package/dist/lib/types/mcpTypes.js +1 -1
  72. package/dist/lib/types/streamTypes.d.ts +14 -0
  73. package/dist/lib/types/universalProviderOptions.d.ts +1 -1
  74. package/dist/lib/utils/errorHandling.d.ts +142 -0
  75. package/dist/lib/utils/errorHandling.js +316 -0
  76. package/dist/lib/utils/factoryProcessing.d.ts +74 -0
  77. package/dist/lib/utils/factoryProcessing.js +588 -0
  78. package/dist/lib/utils/optionsConversion.d.ts +54 -0
  79. package/dist/lib/utils/optionsConversion.js +126 -0
  80. package/dist/lib/utils/optionsUtils.d.ts +246 -0
  81. package/dist/lib/utils/optionsUtils.js +960 -0
  82. package/dist/lib/utils/providerHealth.d.ts +107 -0
  83. package/dist/lib/utils/providerHealth.js +507 -0
  84. package/dist/lib/utils/providerUtils.d.ts +17 -0
  85. package/dist/lib/utils/providerUtils.js +271 -16
  86. package/dist/lib/utils/timeout.js +1 -1
  87. package/dist/lib/utils/tokenLimits.d.ts +33 -0
  88. package/dist/lib/utils/tokenLimits.js +118 -0
  89. package/dist/mcp/factory.d.ts +5 -5
  90. package/dist/mcp/factory.js +2 -2
  91. package/dist/mcp/servers/utilities/utilityServer.d.ts +1 -1
  92. package/dist/mcp/servers/utilities/utilityServer.js +1 -1
  93. package/dist/mcp/toolRegistry.js +2 -2
  94. package/dist/neurolink.d.ts +168 -12
  95. package/dist/neurolink.js +685 -123
  96. package/dist/providers/anthropic.js +52 -2
  97. package/dist/providers/googleAiStudio.js +4 -0
  98. package/dist/providers/googleVertex.d.ts +75 -9
  99. package/dist/providers/googleVertex.js +365 -46
  100. package/dist/providers/huggingFace.d.ts +52 -11
  101. package/dist/providers/huggingFace.js +181 -43
  102. package/dist/providers/litellm.d.ts +9 -9
  103. package/dist/providers/litellm.js +103 -16
  104. package/dist/providers/ollama.d.ts +52 -17
  105. package/dist/providers/ollama.js +276 -68
  106. package/dist/sdk/toolRegistration.d.ts +42 -0
  107. package/dist/sdk/toolRegistration.js +269 -27
  108. package/dist/telemetry/telemetryService.d.ts +6 -0
  109. package/dist/telemetry/telemetryService.js +38 -3
  110. package/dist/types/contextTypes.d.ts +75 -11
  111. package/dist/types/contextTypes.js +227 -2
  112. package/dist/types/domainTypes.d.ts +62 -0
  113. package/dist/types/domainTypes.js +5 -0
  114. package/dist/types/generateTypes.d.ts +52 -0
  115. package/dist/types/index.d.ts +1 -0
  116. package/dist/types/mcpTypes.d.ts +1 -1
  117. package/dist/types/mcpTypes.js +1 -1
  118. package/dist/types/streamTypes.d.ts +14 -0
  119. package/dist/types/universalProviderOptions.d.ts +1 -1
  120. package/dist/types/universalProviderOptions.js +0 -1
  121. package/dist/utils/errorHandling.d.ts +142 -0
  122. package/dist/utils/errorHandling.js +316 -0
  123. package/dist/utils/factoryProcessing.d.ts +74 -0
  124. package/dist/utils/factoryProcessing.js +588 -0
  125. package/dist/utils/optionsConversion.d.ts +54 -0
  126. package/dist/utils/optionsConversion.js +126 -0
  127. package/dist/utils/optionsUtils.d.ts +246 -0
  128. package/dist/utils/optionsUtils.js +960 -0
  129. package/dist/utils/providerHealth.d.ts +107 -0
  130. package/dist/utils/providerHealth.js +507 -0
  131. package/dist/utils/providerUtils.d.ts +17 -0
  132. package/dist/utils/providerUtils.js +271 -16
  133. package/dist/utils/timeout.js +1 -1
  134. package/dist/utils/tokenLimits.d.ts +33 -0
  135. package/dist/utils/tokenLimits.js +118 -0
  136. package/package.json +2 -2
@@ -1,8 +1,8 @@
1
1
  import { createOpenAI } from "@ai-sdk/openai";
2
- import { streamText } from "ai";
2
+ import { streamText, } from "ai";
3
3
  import { BaseProvider } from "../core/baseProvider.js";
4
4
  import { logger } from "../utils/logger.js";
5
- import { createTimeoutController, TimeoutError, } from "../utils/timeout.js";
5
+ import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
6
6
  import { DEFAULT_MAX_TOKENS } from "../core/constants.js";
7
7
  import { validateApiKey, createHuggingFaceConfig, getProviderModel, } from "../utils/providerConfig.js";
8
8
  // Configuration helpers - now using consolidated utility
@@ -39,36 +39,68 @@ export class HuggingFaceProvider extends BaseProvider {
39
39
  // ABSTRACT METHOD IMPLEMENTATIONS
40
40
  // ===================
41
41
  /**
42
- * HuggingFace models currently don't properly support tool/function calling
42
+ * HuggingFace Tool Calling Support (Enhanced 2025)
43
43
  *
44
- * **Tested Models & Issues:**
45
- * - microsoft/DialoGPT-medium: Describes tools instead of executing them
46
- * - Most HF models via router endpoint: Function schema passed but not executed
47
- * - Issue: Models treat tool definitions as conversation context rather than executable functions
44
+ * **Supported Models (Tool Calling Enabled):**
45
+ * - meta-llama/Llama-3.1-8B-Instruct - Post-trained for tool calling
46
+ * - meta-llama/Llama-3.1-70B-Instruct - Advanced tool calling capabilities
47
+ * - meta-llama/Llama-3.1-405B-Instruct - Full tool calling support
48
+ * - nvidia/Llama-3.1-Nemotron-Ultra-253B-v1 - Optimized for tool calling
49
+ * - NousResearch/Hermes-3-Llama-3.2-3B - Function calling trained
50
+ * - codellama/CodeLlama-34b-Instruct-hf - Code-focused tool calling
51
+ * - mistralai/Mistral-7B-Instruct-v0.3 - Basic tool support
48
52
  *
49
- * **Known Limitations:**
50
- * - Tools are visible to model but treated as descriptive text
51
- * - No proper function call response format handling
52
- * - HuggingFace router endpoint doesn't enforce OpenAI-compatible tool execution
53
+ * **Unsupported Models (Tool Calling Disabled):**
54
+ * - microsoft/DialoGPT-* - Treats tools as conversation context
55
+ * - gpt2, bert, roberta variants - No tool calling training
56
+ * - Most pre-2024 models - Limited function calling capabilities
53
57
  *
54
- * @returns false to disable tools by default until proper implementation
58
+ * **Implementation Details:**
59
+ * - Intelligent model detection based on known capabilities
60
+ * - Custom tool schema formatting for HuggingFace models
61
+ * - Enhanced response parsing for function call extraction
62
+ * - Graceful fallback for unsupported models
63
+ *
64
+ * @returns true for supported models, false for unsupported models
55
65
  */
56
66
  supportsTools() {
57
- // IMPLEMENTATION STATUS (2025): HuggingFace tool calling remains limited
58
- //
59
- // Current State:
60
- // - Function calling varies significantly across HF models
61
- // - Many models treat tool schemas as conversation context
62
- // - Requires model-specific implementation per architecture
63
- //
64
- // To Enable Tools:
65
- // 1. Detect model capability via HF model card metadata
66
- // 2. Implement model-specific tool schema formatting
67
- // 3. Add custom response parsing for function call extraction
68
- // 4. Create validation framework for tool parameter handling
69
- // 5. Test extensively with supported models (Code Llama, Llama 3.1+)
70
- //
71
- // Until comprehensive implementation, tools disabled for reliability
67
+ const modelName = this.modelName.toLowerCase();
68
+ // Check if model is in the list of known tool-calling capable models
69
+ const toolCapableModels = [
70
+ // Llama 3.1 series (post-trained for tool calling)
71
+ "llama-3.1-8b-instruct",
72
+ "llama-3.1-70b-instruct",
73
+ "llama-3.1-405b-instruct",
74
+ "llama-3.1-nemotron-ultra",
75
+ // Hermes series (function calling trained)
76
+ "hermes-3-llama-3.2",
77
+ "hermes-2-pro",
78
+ // Code Llama (code-focused tool calling)
79
+ "codellama-34b-instruct",
80
+ "codellama-13b-instruct",
81
+ // Mistral series (basic tool support)
82
+ "mistral-7b-instruct-v0.3",
83
+ "mistral-8x7b-instruct",
84
+ // Other known tool-capable models
85
+ "nous-hermes",
86
+ "openchat",
87
+ "wizardcoder",
88
+ ];
89
+ // Check if current model matches any tool-capable model patterns
90
+ const isToolCapable = toolCapableModels.some((capableModel) => modelName.includes(capableModel));
91
+ if (isToolCapable) {
92
+ logger.debug("HuggingFace tool calling enabled", {
93
+ model: this.modelName,
94
+ reason: "Model supports function calling",
95
+ });
96
+ return true;
97
+ }
98
+ // Log why tools are disabled for transparency
99
+ logger.debug("HuggingFace tool calling disabled", {
100
+ model: this.modelName,
101
+ reason: "Model not in tool-capable list",
102
+ suggestion: "Consider using Llama-3.1-* or Hermes-3-* models for tool calling",
103
+ });
72
104
  return false;
73
105
  }
74
106
  // executeGenerate removed - BaseProvider handles all generation with tools
@@ -77,18 +109,20 @@ export class HuggingFaceProvider extends BaseProvider {
77
109
  const timeout = this.getTimeout(options);
78
110
  const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
79
111
  try {
112
+ // Enhanced tool handling for HuggingFace models
113
+ const streamOptions = this.prepareStreamOptions(options, analysisSchema);
80
114
  const result = await streamText({
81
115
  model: this.model,
82
- prompt: options.input.text,
83
- system: options.systemPrompt,
116
+ prompt: streamOptions.prompt,
117
+ system: streamOptions.system,
84
118
  temperature: options.temperature,
85
119
  maxTokens: options.maxTokens || DEFAULT_MAX_TOKENS,
86
- tools: options.tools,
87
- toolChoice: "auto",
120
+ tools: streamOptions.tools, // Tools format conversion handled by prepareStreamOptions
121
+ toolChoice: streamOptions.toolChoice, // Tool choice handled by prepareStreamOptions
88
122
  abortSignal: timeoutController?.controller.signal,
89
123
  });
90
124
  timeoutController?.cleanup();
91
- // Transform stream to match StreamResult interface
125
+ // Transform stream to match StreamResult interface with enhanced tool call parsing
92
126
  const transformedStream = async function* () {
93
127
  for await (const chunk of result.textStream) {
94
128
  yield { content: chunk };
@@ -105,18 +139,103 @@ export class HuggingFaceProvider extends BaseProvider {
105
139
  throw this.handleProviderError(error);
106
140
  }
107
141
  }
108
- getProviderName() {
109
- return "huggingface";
142
+ /**
143
+ * Prepare stream options with HuggingFace-specific enhancements
144
+ * Handles tool calling optimizations and model-specific formatting
145
+ */
146
+ prepareStreamOptions(options, analysisSchema) {
147
+ const modelSupportsTools = this.supportsTools();
148
+ // If model doesn't support tools, disable them completely
149
+ if (!modelSupportsTools) {
150
+ return {
151
+ prompt: options.input.text,
152
+ system: options.systemPrompt,
153
+ tools: undefined,
154
+ toolChoice: undefined,
155
+ };
156
+ }
157
+ // For tool-capable models, enhance the prompt with tool calling instructions
158
+ const enhancedSystemPrompt = this.enhanceSystemPromptForTools(options.systemPrompt, options.tools);
159
+ // Format tools using HuggingFace-compatible schema if tools are provided
160
+ const formattedTools = options.tools
161
+ ? this.formatToolsForHuggingFace(options.tools)
162
+ : undefined;
163
+ return {
164
+ prompt: options.input.text,
165
+ system: enhancedSystemPrompt,
166
+ tools: formattedTools,
167
+ toolChoice: formattedTools ? "auto" : undefined,
168
+ };
110
169
  }
111
- getDefaultModel() {
112
- return getDefaultHuggingFaceModel();
170
+ /**
171
+ * Enhance system prompt with tool calling instructions for HuggingFace models
172
+ * Many HF models benefit from explicit tool calling guidance
173
+ */
174
+ enhanceSystemPromptForTools(originalSystemPrompt, tools) {
175
+ if (!tools || !this.supportsTools()) {
176
+ return originalSystemPrompt || "";
177
+ }
178
+ const toolInstructions = `
179
+ You have access to function tools. When you need to use a tool to answer the user's request:
180
+ 1. Identify the appropriate tool from the available functions
181
+ 2. Call the function with the correct parameters in JSON format
182
+ 3. Use the function results to provide a comprehensive answer
183
+
184
+ Available tools will be provided in the function calling format. Use them when they can help answer the user's question.
185
+ `;
186
+ return originalSystemPrompt
187
+ ? `${originalSystemPrompt}\n\n${toolInstructions}`
188
+ : toolInstructions;
113
189
  }
114
190
  /**
115
- * Returns the Vercel AI SDK model instance for HuggingFace
191
+ * Format tools for HuggingFace model compatibility
192
+ * Some models require specific tool schema formatting
116
193
  */
117
- getAISDKModel() {
118
- return this.model;
194
+ formatToolsForHuggingFace(tools) {
195
+ // For now, pass through tools as-is since we're using OpenAI-compatible endpoint
196
+ // Future enhancement: Add model-specific tool formatting if needed
197
+ return tools;
198
+ }
199
+ /**
200
+ * Get recommendations for tool-calling capable HuggingFace models
201
+ * Provides guidance for users who want to use function calling
202
+ */
203
+ static getToolCallingRecommendations() {
204
+ return {
205
+ recommended: [
206
+ "meta-llama/Llama-3.1-8B-Instruct",
207
+ "meta-llama/Llama-3.1-70B-Instruct",
208
+ "nvidia/Llama-3.1-Nemotron-Ultra-253B-v1",
209
+ "NousResearch/Hermes-3-Llama-3.2-3B",
210
+ "codellama/CodeLlama-34b-Instruct-hf",
211
+ ],
212
+ performance: {
213
+ "meta-llama/Llama-3.1-8B-Instruct": { speed: 3, quality: 2, cost: 3 },
214
+ "meta-llama/Llama-3.1-70B-Instruct": { speed: 2, quality: 3, cost: 2 },
215
+ "nvidia/Llama-3.1-Nemotron-Ultra-253B-v1": {
216
+ speed: 2,
217
+ quality: 3,
218
+ cost: 1,
219
+ },
220
+ "NousResearch/Hermes-3-Llama-3.2-3B": { speed: 3, quality: 2, cost: 3 },
221
+ "codellama/CodeLlama-34b-Instruct-hf": {
222
+ speed: 2,
223
+ quality: 3,
224
+ cost: 2,
225
+ },
226
+ },
227
+ notes: {
228
+ "meta-llama/Llama-3.1-8B-Instruct": "Best balance of speed and tool calling capability",
229
+ "meta-llama/Llama-3.1-70B-Instruct": "High-quality tool calling, slower inference",
230
+ "nvidia/Llama-3.1-Nemotron-Ultra-253B-v1": "Optimized for tool calling, requires more resources",
231
+ "NousResearch/Hermes-3-Llama-3.2-3B": "Lightweight with good tool calling support",
232
+ "codellama/CodeLlama-34b-Instruct-hf": "Excellent for code-related tool calling",
233
+ },
234
+ };
119
235
  }
236
+ /**
237
+ * Enhanced error handling with HuggingFace-specific guidance
238
+ */
120
239
  handleProviderError(error) {
121
240
  if (error instanceof TimeoutError) {
122
241
  return new Error(`HuggingFace request timed out: ${error.message}`);
@@ -125,14 +244,33 @@ export class HuggingFaceProvider extends BaseProvider {
125
244
  const message = errorObj?.message && typeof errorObj.message === "string"
126
245
  ? errorObj.message
127
246
  : "Unknown error";
247
+ // Enhanced error messages with tool calling context
128
248
  if (message.includes("API_TOKEN_INVALID") ||
129
249
  message.includes("Invalid token")) {
130
- return new Error("Invalid HuggingFace API token. Please check your HUGGING_FACE_API_KEY environment variable.");
250
+ return new Error("Invalid HuggingFace API token. Please check your HUGGINGFACE_API_KEY environment variable.");
131
251
  }
132
252
  if (message.includes("rate limit")) {
133
- return new Error("HuggingFace rate limit exceeded. Please try again later.");
253
+ return new Error("HuggingFace rate limit exceeded. Consider using a paid plan or try again later.");
254
+ }
255
+ if (message.includes("model") && message.includes("not found")) {
256
+ return new Error(`❌ HuggingFace model '${this.modelName}' not found.\n\nSuggestions:\n1. Check model name spelling\n2. Ensure model exists on HuggingFace Hub\n3. For tool calling, use: Llama-3.1-8B-Instruct, Hermes-3-Llama-3.2-3B, or CodeLlama-34b-Instruct-hf`);
134
257
  }
135
- return new Error(`HuggingFace error: ${message}`);
258
+ if (message.includes("function") || message.includes("tool")) {
259
+ return new Error(`❌ HuggingFace tool calling error: ${message}\n\nNotes:\n1. Ensure you're using a tool-capable model (Llama-3.1+, Hermes-3+, CodeLlama)\n2. Check that your model supports function calling\n3. Verify tool schema format is correct`);
260
+ }
261
+ return new Error(`❌ HuggingFace Provider Error: ${message}`);
262
+ }
263
+ getProviderName() {
264
+ return "huggingface";
265
+ }
266
+ getDefaultModel() {
267
+ return getDefaultHuggingFaceModel();
268
+ }
269
+ /**
270
+ * Returns the Vercel AI SDK model instance for HuggingFace
271
+ */
272
+ getAISDKModel() {
273
+ return this.model;
136
274
  }
137
275
  // ===================
138
276
  // PRIVATE VALIDATION METHODS
@@ -9,6 +9,9 @@ import { BaseProvider } from "../core/baseProvider.js";
9
9
  */
10
10
  export declare class LiteLLMProvider extends BaseProvider {
11
11
  private model;
12
+ private static modelsCache;
13
+ private static modelsCacheTime;
14
+ private static readonly MODELS_CACHE_DURATION;
12
15
  constructor(modelName?: string, sdk?: unknown);
13
16
  protected getProviderName(): AIProviderName;
14
17
  protected getDefaultModel(): string;
@@ -28,16 +31,13 @@ export declare class LiteLLMProvider extends BaseProvider {
28
31
  protected executeStream(options: StreamOptions, analysisSchema?: ZodType<unknown, ZodTypeDef, unknown> | Schema<unknown>): Promise<StreamResult>;
29
32
  /**
30
33
  * Get available models from LiteLLM proxy server
31
- *
32
- * TODO: Implement dynamic fetching from LiteLLM's /v1/models endpoint.
33
- * Currently returns a hardcoded list of commonly available models.
34
- *
35
- * Implementation would involve:
36
- * 1. Fetch from `${baseURL}/v1/models`
37
- * 2. Parse response to extract model IDs
38
- * 3. Handle network errors gracefully
39
- * 4. Cache results to avoid repeated API calls
34
+ * Dynamically fetches from /v1/models endpoint with caching and fallback
40
35
  */
41
36
  getAvailableModels(): Promise<string[]>;
37
+ /**
38
+ * Fetch available models from LiteLLM proxy /v1/models endpoint
39
+ * @private
40
+ */
41
+ private fetchModelsFromAPI;
42
42
  private validateStreamOptions;
43
43
  }
@@ -34,6 +34,10 @@ const getDefaultLiteLLMModel = () => {
34
34
  */
35
35
  export class LiteLLMProvider extends BaseProvider {
36
36
  model;
37
+ // Cache for available models to avoid repeated API calls
38
+ static modelsCache = [];
39
+ static modelsCacheTime = 0;
40
+ static MODELS_CACHE_DURATION = 10 * 60 * 1000; // 10 minutes
37
41
  constructor(modelName, sdk) {
38
42
  super(modelName, "litellm", sdk);
39
43
  // Initialize LiteLLM using OpenAI SDK with explicit configuration
@@ -154,28 +158,111 @@ export class LiteLLMProvider extends BaseProvider {
154
158
  }
155
159
  /**
156
160
  * Get available models from LiteLLM proxy server
157
- *
158
- * TODO: Implement dynamic fetching from LiteLLM's /v1/models endpoint.
159
- * Currently returns a hardcoded list of commonly available models.
160
- *
161
- * Implementation would involve:
162
- * 1. Fetch from `${baseURL}/v1/models`
163
- * 2. Parse response to extract model IDs
164
- * 3. Handle network errors gracefully
165
- * 4. Cache results to avoid repeated API calls
161
+ * Dynamically fetches from /v1/models endpoint with caching and fallback
166
162
  */
167
163
  async getAvailableModels() {
168
- // Hardcoded list of commonly available models
169
- // TODO: Replace with dynamic fetch from LiteLLM proxy /v1/models endpoint
170
- return [
164
+ const functionTag = "LiteLLMProvider.getAvailableModels";
165
+ const now = Date.now();
166
+ // Check if cached models are still valid
167
+ if (LiteLLMProvider.modelsCache.length > 0 &&
168
+ now - LiteLLMProvider.modelsCacheTime <
169
+ LiteLLMProvider.MODELS_CACHE_DURATION) {
170
+ logger.debug(`[${functionTag}] Using cached models`, {
171
+ cacheAge: Math.round((now - LiteLLMProvider.modelsCacheTime) / 1000),
172
+ modelCount: LiteLLMProvider.modelsCache.length,
173
+ });
174
+ return LiteLLMProvider.modelsCache;
175
+ }
176
+ // Try to fetch models dynamically
177
+ try {
178
+ const dynamicModels = await this.fetchModelsFromAPI();
179
+ if (dynamicModels.length > 0) {
180
+ // Cache successful result
181
+ LiteLLMProvider.modelsCache = dynamicModels;
182
+ LiteLLMProvider.modelsCacheTime = now;
183
+ logger.debug(`[${functionTag}] Successfully fetched models from API`, {
184
+ modelCount: dynamicModels.length,
185
+ });
186
+ return dynamicModels;
187
+ }
188
+ }
189
+ catch (error) {
190
+ logger.warn(`[${functionTag}] Failed to fetch models from API, using fallback`, {
191
+ error: error instanceof Error ? error.message : String(error),
192
+ });
193
+ }
194
+ // Fallback to hardcoded list if API fetch fails
195
+ const fallbackModels = [
171
196
  "openai/gpt-4o",
172
197
  "openai/gpt-4o-mini",
173
- "anthropic/claude-3-5-sonnet",
174
- "anthropic/claude-3-haiku",
198
+ "openai/gpt-3.5-turbo",
199
+ "anthropic/claude-3-5-sonnet-20241022",
200
+ "anthropic/claude-3-haiku-20240307",
175
201
  "google/gemini-2.0-flash",
176
- "mistral/mistral-large",
177
- "mistral/mistral-medium",
202
+ "google/gemini-1.5-pro",
203
+ "mistral/mistral-large-latest",
204
+ "mistral/mistral-medium-latest",
205
+ "meta-llama/llama-3.1-8b-instruct",
206
+ "meta-llama/llama-3.1-70b-instruct",
178
207
  ];
208
+ logger.debug(`[${functionTag}] Using fallback model list`, {
209
+ modelCount: fallbackModels.length,
210
+ });
211
+ return fallbackModels;
212
+ }
213
+ /**
214
+ * Fetch available models from LiteLLM proxy /v1/models endpoint
215
+ * @private
216
+ */
217
+ async fetchModelsFromAPI() {
218
+ const functionTag = "LiteLLMProvider.fetchModelsFromAPI";
219
+ const config = getLiteLLMConfig();
220
+ const modelsUrl = `${config.baseURL}/v1/models`;
221
+ const controller = new AbortController();
222
+ const timeoutId = setTimeout(() => controller.abort(), 5000); // 5 second timeout
223
+ try {
224
+ logger.debug(`[${functionTag}] Fetching models from ${modelsUrl}`);
225
+ const response = await fetch(modelsUrl, {
226
+ method: "GET",
227
+ headers: {
228
+ Authorization: `Bearer ${config.apiKey}`,
229
+ "Content-Type": "application/json",
230
+ },
231
+ signal: controller.signal,
232
+ });
233
+ clearTimeout(timeoutId);
234
+ if (!response.ok) {
235
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
236
+ }
237
+ const data = await response.json();
238
+ // Parse OpenAI-compatible models response
239
+ if (data && Array.isArray(data.data)) {
240
+ const models = data.data
241
+ .map((model) => typeof model === "object" &&
242
+ model !== null &&
243
+ "id" in model &&
244
+ typeof model.id === "string"
245
+ ? model.id
246
+ : undefined)
247
+ .filter((id) => typeof id === "string" && id.length > 0)
248
+ .sort();
249
+ logger.debug(`[${functionTag}] Successfully parsed models`, {
250
+ totalModels: models.length,
251
+ sampleModels: models.slice(0, 5),
252
+ });
253
+ return models;
254
+ }
255
+ else {
256
+ throw new Error("Invalid response format: expected data.data array");
257
+ }
258
+ }
259
+ catch (error) {
260
+ clearTimeout(timeoutId);
261
+ if (error instanceof Error && error.name === "AbortError") {
262
+ throw new Error("Request timed out after 5 seconds");
263
+ }
264
+ throw error;
265
+ }
179
266
  }
180
267
  // ===================
181
268
  // PRIVATE VALIDATION METHODS
@@ -28,30 +28,51 @@ export declare class OllamaProvider extends BaseProvider {
28
28
  */
29
29
  protected getAISDKModel(): LanguageModelV1;
30
30
  /**
31
- * Ollama tool/function calling support is currently disabled due to integration issues.
31
+ * Ollama Tool Calling Support (Enhanced 2025)
32
32
  *
33
- * **Current Issues:**
34
- * 1. The OllamaLanguageModel from @ai-sdk/provider-utils doesn't properly integrate
35
- * with BaseProvider's tool calling mechanism
36
- * 2. Ollama models require specific prompt formatting for function calls that differs
37
- * from the standardized AI SDK format
38
- * 3. Tool response parsing and execution flow needs custom implementation
33
+ * Uses configurable model list from ModelConfiguration instead of hardcoded values.
34
+ * Tool-capable models can be configured via OLLAMA_TOOL_CAPABLE_MODELS environment variable.
39
35
  *
40
- * **What's needed to enable tool support:**
41
- * - Create a custom OllamaLanguageModel wrapper that handles tool schema formatting
42
- * - Implement Ollama-specific tool calling prompt templates
43
- * - Add proper response parsing for Ollama's function call format
44
- * - Test with models that support function calling (llama3.1, mistral, etc.)
36
+ * **Configuration Options:**
37
+ * - Environment variable: OLLAMA_TOOL_CAPABLE_MODELS (comma-separated list)
38
+ * - Configuration file: providers.ollama.modelBehavior.toolCapableModels
39
+ * - Fallback: Default list of known tool-capable models
45
40
  *
46
- * **Tracking:**
47
- * - See BaseProvider tool integration patterns in other providers
48
- * - Monitor Ollama function calling documentation: https://ollama.com/blog/tool-support
49
- * - Track AI SDK updates for better Ollama integration
41
+ * **Implementation Features:**
42
+ * - Direct Ollama API integration (/v1/chat/completions)
43
+ * - Automatic tool schema conversion to Ollama format
44
+ * - Streaming tool calls with incremental response parsing
45
+ * - Model compatibility validation and fallback handling
50
46
  *
51
- * @returns false to disable tools by default
47
+ * @returns true for supported models, false for unsupported models
52
48
  */
53
49
  supportsTools(): boolean;
54
50
  protected executeStream(options: StreamOptions, analysisSchema?: ZodType<unknown, ZodTypeDef, unknown> | Schema<unknown>): Promise<StreamResult>;
51
+ /**
52
+ * Execute streaming with Ollama's function calling support
53
+ * Uses the /v1/chat/completions endpoint with tools parameter
54
+ */
55
+ private executeStreamWithTools;
56
+ /**
57
+ * Execute streaming without tools using the generate API
58
+ * Fallback for non-tool scenarios or when chat API is unavailable
59
+ */
60
+ private executeStreamWithoutTools;
61
+ /**
62
+ * Convert AI SDK tools format to Ollama's function calling format
63
+ */
64
+ private convertToolsToOllamaFormat;
65
+ /**
66
+ * Create stream generator for Ollama chat API with tool call support
67
+ */
68
+ private createOllamaChatStream;
69
+ /**
70
+ * Format tool calls for display when tools aren't executed directly
71
+ */
72
+ private formatToolCallForDisplay;
73
+ /**
74
+ * Create stream generator for Ollama generate API (non-tool mode)
75
+ */
55
76
  private createOllamaStream;
56
77
  protected handleProviderError(error: unknown): Error;
57
78
  private validateStreamOptions;
@@ -67,5 +88,19 @@ export declare class OllamaProvider extends BaseProvider {
67
88
  * Check if a specific model is available
68
89
  */
69
90
  isModelAvailable(modelName: string): Promise<boolean>;
91
+ /**
92
+ * Get recommendations for tool-calling capable Ollama models
93
+ * Provides guidance for users who want to use function calling locally
94
+ */
95
+ static getToolCallingRecommendations(): {
96
+ recommended: string[];
97
+ performance: Record<string, {
98
+ speed: number;
99
+ quality: number;
100
+ size: string;
101
+ }>;
102
+ notes: Record<string, string>;
103
+ installation: Record<string, string>;
104
+ };
70
105
  }
71
106
  export default OllamaProvider;