@juspay/neurolink 7.6.1 → 7.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -4
- package/README.md +78 -3
- package/dist/cli/commands/config.d.ts +275 -3
- package/dist/cli/commands/config.js +121 -0
- package/dist/cli/commands/mcp.js +77 -28
- package/dist/cli/factories/commandFactory.js +359 -6
- package/dist/core/analytics.js +7 -27
- package/dist/core/baseProvider.js +43 -4
- package/dist/core/constants.d.ts +46 -0
- package/dist/core/constants.js +47 -0
- package/dist/core/dynamicModels.d.ts +16 -4
- package/dist/core/dynamicModels.js +130 -26
- package/dist/core/evaluation.js +5 -1
- package/dist/core/evaluationProviders.d.ts +6 -2
- package/dist/core/evaluationProviders.js +41 -125
- package/dist/core/factory.d.ts +5 -0
- package/dist/core/factory.js +62 -50
- package/dist/core/modelConfiguration.d.ts +246 -0
- package/dist/core/modelConfiguration.js +775 -0
- package/dist/core/types.d.ts +22 -3
- package/dist/core/types.js +5 -1
- package/dist/factories/providerRegistry.js +3 -3
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/lib/core/analytics.js +7 -27
- package/dist/lib/core/baseProvider.js +43 -4
- package/dist/lib/core/constants.d.ts +46 -0
- package/dist/lib/core/constants.js +47 -0
- package/dist/lib/core/dynamicModels.d.ts +16 -4
- package/dist/lib/core/dynamicModels.js +130 -26
- package/dist/lib/core/evaluation.js +5 -1
- package/dist/lib/core/evaluationProviders.d.ts +6 -2
- package/dist/lib/core/evaluationProviders.js +41 -125
- package/dist/lib/core/factory.d.ts +5 -0
- package/dist/lib/core/factory.js +63 -50
- package/dist/lib/core/modelConfiguration.d.ts +246 -0
- package/dist/lib/core/modelConfiguration.js +775 -0
- package/dist/lib/core/types.d.ts +22 -3
- package/dist/lib/core/types.js +5 -1
- package/dist/lib/factories/providerRegistry.js +3 -3
- package/dist/lib/index.d.ts +1 -1
- package/dist/lib/index.js +1 -1
- package/dist/lib/mcp/factory.d.ts +5 -5
- package/dist/lib/mcp/factory.js +2 -2
- package/dist/lib/mcp/servers/utilities/utilityServer.d.ts +1 -1
- package/dist/lib/mcp/servers/utilities/utilityServer.js +1 -1
- package/dist/lib/mcp/toolRegistry.js +2 -2
- package/dist/lib/neurolink.d.ts +168 -12
- package/dist/lib/neurolink.js +685 -123
- package/dist/lib/providers/anthropic.js +52 -2
- package/dist/lib/providers/googleAiStudio.js +4 -0
- package/dist/lib/providers/googleVertex.d.ts +75 -9
- package/dist/lib/providers/googleVertex.js +365 -46
- package/dist/lib/providers/huggingFace.d.ts +52 -11
- package/dist/lib/providers/huggingFace.js +180 -42
- package/dist/lib/providers/litellm.d.ts +9 -9
- package/dist/lib/providers/litellm.js +103 -16
- package/dist/lib/providers/ollama.d.ts +52 -17
- package/dist/lib/providers/ollama.js +276 -68
- package/dist/lib/sdk/toolRegistration.d.ts +42 -0
- package/dist/lib/sdk/toolRegistration.js +269 -27
- package/dist/lib/telemetry/telemetryService.d.ts +6 -0
- package/dist/lib/telemetry/telemetryService.js +38 -3
- package/dist/lib/types/contextTypes.d.ts +75 -11
- package/dist/lib/types/contextTypes.js +227 -1
- package/dist/lib/types/domainTypes.d.ts +62 -0
- package/dist/lib/types/domainTypes.js +5 -0
- package/dist/lib/types/generateTypes.d.ts +52 -0
- package/dist/lib/types/index.d.ts +1 -0
- package/dist/lib/types/mcpTypes.d.ts +1 -1
- package/dist/lib/types/mcpTypes.js +1 -1
- package/dist/lib/types/streamTypes.d.ts +14 -0
- package/dist/lib/types/universalProviderOptions.d.ts +1 -1
- package/dist/lib/utils/errorHandling.d.ts +142 -0
- package/dist/lib/utils/errorHandling.js +316 -0
- package/dist/lib/utils/factoryProcessing.d.ts +74 -0
- package/dist/lib/utils/factoryProcessing.js +588 -0
- package/dist/lib/utils/optionsConversion.d.ts +54 -0
- package/dist/lib/utils/optionsConversion.js +126 -0
- package/dist/lib/utils/optionsUtils.d.ts +246 -0
- package/dist/lib/utils/optionsUtils.js +960 -0
- package/dist/lib/utils/providerHealth.d.ts +107 -0
- package/dist/lib/utils/providerHealth.js +507 -0
- package/dist/lib/utils/providerUtils.d.ts +17 -0
- package/dist/lib/utils/providerUtils.js +271 -16
- package/dist/lib/utils/timeout.js +1 -1
- package/dist/lib/utils/tokenLimits.d.ts +33 -0
- package/dist/lib/utils/tokenLimits.js +118 -0
- package/dist/mcp/factory.d.ts +5 -5
- package/dist/mcp/factory.js +2 -2
- package/dist/mcp/servers/utilities/utilityServer.d.ts +1 -1
- package/dist/mcp/servers/utilities/utilityServer.js +1 -1
- package/dist/mcp/toolRegistry.js +2 -2
- package/dist/neurolink.d.ts +168 -12
- package/dist/neurolink.js +685 -123
- package/dist/providers/anthropic.js +52 -2
- package/dist/providers/googleAiStudio.js +4 -0
- package/dist/providers/googleVertex.d.ts +75 -9
- package/dist/providers/googleVertex.js +365 -46
- package/dist/providers/huggingFace.d.ts +52 -11
- package/dist/providers/huggingFace.js +181 -43
- package/dist/providers/litellm.d.ts +9 -9
- package/dist/providers/litellm.js +103 -16
- package/dist/providers/ollama.d.ts +52 -17
- package/dist/providers/ollama.js +276 -68
- package/dist/sdk/toolRegistration.d.ts +42 -0
- package/dist/sdk/toolRegistration.js +269 -27
- package/dist/telemetry/telemetryService.d.ts +6 -0
- package/dist/telemetry/telemetryService.js +38 -3
- package/dist/types/contextTypes.d.ts +75 -11
- package/dist/types/contextTypes.js +227 -2
- package/dist/types/domainTypes.d.ts +62 -0
- package/dist/types/domainTypes.js +5 -0
- package/dist/types/generateTypes.d.ts +52 -0
- package/dist/types/index.d.ts +1 -0
- package/dist/types/mcpTypes.d.ts +1 -1
- package/dist/types/mcpTypes.js +1 -1
- package/dist/types/streamTypes.d.ts +14 -0
- package/dist/types/universalProviderOptions.d.ts +1 -1
- package/dist/types/universalProviderOptions.js +0 -1
- package/dist/utils/errorHandling.d.ts +142 -0
- package/dist/utils/errorHandling.js +316 -0
- package/dist/utils/factoryProcessing.d.ts +74 -0
- package/dist/utils/factoryProcessing.js +588 -0
- package/dist/utils/optionsConversion.d.ts +54 -0
- package/dist/utils/optionsConversion.js +126 -0
- package/dist/utils/optionsUtils.d.ts +246 -0
- package/dist/utils/optionsUtils.js +960 -0
- package/dist/utils/providerHealth.d.ts +107 -0
- package/dist/utils/providerHealth.js +507 -0
- package/dist/utils/providerUtils.d.ts +17 -0
- package/dist/utils/providerUtils.js +271 -16
- package/dist/utils/timeout.js +1 -1
- package/dist/utils/tokenLimits.d.ts +33 -0
- package/dist/utils/tokenLimits.js +118 -0
- package/package.json +2 -2
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { createOpenAI } from "@ai-sdk/openai";
|
|
2
|
-
import { streamText } from "ai";
|
|
2
|
+
import { streamText, } from "ai";
|
|
3
3
|
import { BaseProvider } from "../core/baseProvider.js";
|
|
4
4
|
import { logger } from "../utils/logger.js";
|
|
5
|
-
import { createTimeoutController, TimeoutError
|
|
5
|
+
import { createTimeoutController, TimeoutError } from "../utils/timeout.js";
|
|
6
6
|
import { DEFAULT_MAX_TOKENS } from "../core/constants.js";
|
|
7
7
|
import { validateApiKey, createHuggingFaceConfig, getProviderModel, } from "../utils/providerConfig.js";
|
|
8
8
|
// Configuration helpers - now using consolidated utility
|
|
@@ -39,36 +39,68 @@ export class HuggingFaceProvider extends BaseProvider {
|
|
|
39
39
|
// ABSTRACT METHOD IMPLEMENTATIONS
|
|
40
40
|
// ===================
|
|
41
41
|
/**
|
|
42
|
-
* HuggingFace
|
|
42
|
+
* HuggingFace Tool Calling Support (Enhanced 2025)
|
|
43
43
|
*
|
|
44
|
-
* **
|
|
45
|
-
* -
|
|
46
|
-
* -
|
|
47
|
-
* -
|
|
44
|
+
* **Supported Models (Tool Calling Enabled):**
|
|
45
|
+
* - meta-llama/Llama-3.1-8B-Instruct - Post-trained for tool calling
|
|
46
|
+
* - meta-llama/Llama-3.1-70B-Instruct - Advanced tool calling capabilities
|
|
47
|
+
* - meta-llama/Llama-3.1-405B-Instruct - Full tool calling support
|
|
48
|
+
* - nvidia/Llama-3.1-Nemotron-Ultra-253B-v1 - Optimized for tool calling
|
|
49
|
+
* - NousResearch/Hermes-3-Llama-3.2-3B - Function calling trained
|
|
50
|
+
* - codellama/CodeLlama-34b-Instruct-hf - Code-focused tool calling
|
|
51
|
+
* - mistralai/Mistral-7B-Instruct-v0.3 - Basic tool support
|
|
48
52
|
*
|
|
49
|
-
* **
|
|
50
|
-
* -
|
|
51
|
-
* -
|
|
52
|
-
* -
|
|
53
|
+
* **Unsupported Models (Tool Calling Disabled):**
|
|
54
|
+
* - microsoft/DialoGPT-* - Treats tools as conversation context
|
|
55
|
+
* - gpt2, bert, roberta variants - No tool calling training
|
|
56
|
+
* - Most pre-2024 models - Limited function calling capabilities
|
|
53
57
|
*
|
|
54
|
-
*
|
|
58
|
+
* **Implementation Details:**
|
|
59
|
+
* - Intelligent model detection based on known capabilities
|
|
60
|
+
* - Custom tool schema formatting for HuggingFace models
|
|
61
|
+
* - Enhanced response parsing for function call extraction
|
|
62
|
+
* - Graceful fallback for unsupported models
|
|
63
|
+
*
|
|
64
|
+
* @returns true for supported models, false for unsupported models
|
|
55
65
|
*/
|
|
56
66
|
supportsTools() {
|
|
57
|
-
|
|
58
|
-
//
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
67
|
+
const modelName = this.modelName.toLowerCase();
|
|
68
|
+
// Check if model is in the list of known tool-calling capable models
|
|
69
|
+
const toolCapableModels = [
|
|
70
|
+
// Llama 3.1 series (post-trained for tool calling)
|
|
71
|
+
"llama-3.1-8b-instruct",
|
|
72
|
+
"llama-3.1-70b-instruct",
|
|
73
|
+
"llama-3.1-405b-instruct",
|
|
74
|
+
"llama-3.1-nemotron-ultra",
|
|
75
|
+
// Hermes series (function calling trained)
|
|
76
|
+
"hermes-3-llama-3.2",
|
|
77
|
+
"hermes-2-pro",
|
|
78
|
+
// Code Llama (code-focused tool calling)
|
|
79
|
+
"codellama-34b-instruct",
|
|
80
|
+
"codellama-13b-instruct",
|
|
81
|
+
// Mistral series (basic tool support)
|
|
82
|
+
"mistral-7b-instruct-v0.3",
|
|
83
|
+
"mistral-8x7b-instruct",
|
|
84
|
+
// Other known tool-capable models
|
|
85
|
+
"nous-hermes",
|
|
86
|
+
"openchat",
|
|
87
|
+
"wizardcoder",
|
|
88
|
+
];
|
|
89
|
+
// Check if current model matches any tool-capable model patterns
|
|
90
|
+
const isToolCapable = toolCapableModels.some((capableModel) => modelName.includes(capableModel));
|
|
91
|
+
if (isToolCapable) {
|
|
92
|
+
logger.debug("HuggingFace tool calling enabled", {
|
|
93
|
+
model: this.modelName,
|
|
94
|
+
reason: "Model supports function calling",
|
|
95
|
+
});
|
|
96
|
+
return true;
|
|
97
|
+
}
|
|
98
|
+
// Log why tools are disabled for transparency
|
|
99
|
+
logger.debug("HuggingFace tool calling disabled", {
|
|
100
|
+
model: this.modelName,
|
|
101
|
+
reason: "Model not in tool-capable list",
|
|
102
|
+
suggestion: "Consider using Llama-3.1-* or Hermes-3-* models for tool calling",
|
|
103
|
+
});
|
|
72
104
|
return false;
|
|
73
105
|
}
|
|
74
106
|
// executeGenerate removed - BaseProvider handles all generation with tools
|
|
@@ -77,18 +109,20 @@ export class HuggingFaceProvider extends BaseProvider {
|
|
|
77
109
|
const timeout = this.getTimeout(options);
|
|
78
110
|
const timeoutController = createTimeoutController(timeout, this.providerName, "stream");
|
|
79
111
|
try {
|
|
112
|
+
// Enhanced tool handling for HuggingFace models
|
|
113
|
+
const streamOptions = this.prepareStreamOptions(options, analysisSchema);
|
|
80
114
|
const result = await streamText({
|
|
81
115
|
model: this.model,
|
|
82
|
-
prompt:
|
|
83
|
-
system:
|
|
116
|
+
prompt: streamOptions.prompt,
|
|
117
|
+
system: streamOptions.system,
|
|
84
118
|
temperature: options.temperature,
|
|
85
119
|
maxTokens: options.maxTokens || DEFAULT_MAX_TOKENS,
|
|
86
|
-
tools:
|
|
87
|
-
toolChoice:
|
|
120
|
+
tools: streamOptions.tools, // Tools format conversion handled by prepareStreamOptions
|
|
121
|
+
toolChoice: streamOptions.toolChoice, // Tool choice handled by prepareStreamOptions
|
|
88
122
|
abortSignal: timeoutController?.controller.signal,
|
|
89
123
|
});
|
|
90
124
|
timeoutController?.cleanup();
|
|
91
|
-
// Transform stream to match StreamResult interface
|
|
125
|
+
// Transform stream to match StreamResult interface with enhanced tool call parsing
|
|
92
126
|
const transformedStream = async function* () {
|
|
93
127
|
for await (const chunk of result.textStream) {
|
|
94
128
|
yield { content: chunk };
|
|
@@ -105,18 +139,103 @@ export class HuggingFaceProvider extends BaseProvider {
|
|
|
105
139
|
throw this.handleProviderError(error);
|
|
106
140
|
}
|
|
107
141
|
}
|
|
108
|
-
|
|
109
|
-
|
|
142
|
+
/**
|
|
143
|
+
* Prepare stream options with HuggingFace-specific enhancements
|
|
144
|
+
* Handles tool calling optimizations and model-specific formatting
|
|
145
|
+
*/
|
|
146
|
+
prepareStreamOptions(options, analysisSchema) {
|
|
147
|
+
const modelSupportsTools = this.supportsTools();
|
|
148
|
+
// If model doesn't support tools, disable them completely
|
|
149
|
+
if (!modelSupportsTools) {
|
|
150
|
+
return {
|
|
151
|
+
prompt: options.input.text,
|
|
152
|
+
system: options.systemPrompt,
|
|
153
|
+
tools: undefined,
|
|
154
|
+
toolChoice: undefined,
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
// For tool-capable models, enhance the prompt with tool calling instructions
|
|
158
|
+
const enhancedSystemPrompt = this.enhanceSystemPromptForTools(options.systemPrompt, options.tools);
|
|
159
|
+
// Format tools using HuggingFace-compatible schema if tools are provided
|
|
160
|
+
const formattedTools = options.tools
|
|
161
|
+
? this.formatToolsForHuggingFace(options.tools)
|
|
162
|
+
: undefined;
|
|
163
|
+
return {
|
|
164
|
+
prompt: options.input.text,
|
|
165
|
+
system: enhancedSystemPrompt,
|
|
166
|
+
tools: formattedTools,
|
|
167
|
+
toolChoice: formattedTools ? "auto" : undefined,
|
|
168
|
+
};
|
|
110
169
|
}
|
|
111
|
-
|
|
112
|
-
|
|
170
|
+
/**
|
|
171
|
+
* Enhance system prompt with tool calling instructions for HuggingFace models
|
|
172
|
+
* Many HF models benefit from explicit tool calling guidance
|
|
173
|
+
*/
|
|
174
|
+
enhanceSystemPromptForTools(originalSystemPrompt, tools) {
|
|
175
|
+
if (!tools || !this.supportsTools()) {
|
|
176
|
+
return originalSystemPrompt || "";
|
|
177
|
+
}
|
|
178
|
+
const toolInstructions = `
|
|
179
|
+
You have access to function tools. When you need to use a tool to answer the user's request:
|
|
180
|
+
1. Identify the appropriate tool from the available functions
|
|
181
|
+
2. Call the function with the correct parameters in JSON format
|
|
182
|
+
3. Use the function results to provide a comprehensive answer
|
|
183
|
+
|
|
184
|
+
Available tools will be provided in the function calling format. Use them when they can help answer the user's question.
|
|
185
|
+
`;
|
|
186
|
+
return originalSystemPrompt
|
|
187
|
+
? `${originalSystemPrompt}\n\n${toolInstructions}`
|
|
188
|
+
: toolInstructions;
|
|
113
189
|
}
|
|
114
190
|
/**
|
|
115
|
-
*
|
|
191
|
+
* Format tools for HuggingFace model compatibility
|
|
192
|
+
* Some models require specific tool schema formatting
|
|
116
193
|
*/
|
|
117
|
-
|
|
118
|
-
|
|
194
|
+
formatToolsForHuggingFace(tools) {
|
|
195
|
+
// For now, pass through tools as-is since we're using OpenAI-compatible endpoint
|
|
196
|
+
// Future enhancement: Add model-specific tool formatting if needed
|
|
197
|
+
return tools;
|
|
198
|
+
}
|
|
199
|
+
/**
|
|
200
|
+
* Get recommendations for tool-calling capable HuggingFace models
|
|
201
|
+
* Provides guidance for users who want to use function calling
|
|
202
|
+
*/
|
|
203
|
+
static getToolCallingRecommendations() {
|
|
204
|
+
return {
|
|
205
|
+
recommended: [
|
|
206
|
+
"meta-llama/Llama-3.1-8B-Instruct",
|
|
207
|
+
"meta-llama/Llama-3.1-70B-Instruct",
|
|
208
|
+
"nvidia/Llama-3.1-Nemotron-Ultra-253B-v1",
|
|
209
|
+
"NousResearch/Hermes-3-Llama-3.2-3B",
|
|
210
|
+
"codellama/CodeLlama-34b-Instruct-hf",
|
|
211
|
+
],
|
|
212
|
+
performance: {
|
|
213
|
+
"meta-llama/Llama-3.1-8B-Instruct": { speed: 3, quality: 2, cost: 3 },
|
|
214
|
+
"meta-llama/Llama-3.1-70B-Instruct": { speed: 2, quality: 3, cost: 2 },
|
|
215
|
+
"nvidia/Llama-3.1-Nemotron-Ultra-253B-v1": {
|
|
216
|
+
speed: 2,
|
|
217
|
+
quality: 3,
|
|
218
|
+
cost: 1,
|
|
219
|
+
},
|
|
220
|
+
"NousResearch/Hermes-3-Llama-3.2-3B": { speed: 3, quality: 2, cost: 3 },
|
|
221
|
+
"codellama/CodeLlama-34b-Instruct-hf": {
|
|
222
|
+
speed: 2,
|
|
223
|
+
quality: 3,
|
|
224
|
+
cost: 2,
|
|
225
|
+
},
|
|
226
|
+
},
|
|
227
|
+
notes: {
|
|
228
|
+
"meta-llama/Llama-3.1-8B-Instruct": "Best balance of speed and tool calling capability",
|
|
229
|
+
"meta-llama/Llama-3.1-70B-Instruct": "High-quality tool calling, slower inference",
|
|
230
|
+
"nvidia/Llama-3.1-Nemotron-Ultra-253B-v1": "Optimized for tool calling, requires more resources",
|
|
231
|
+
"NousResearch/Hermes-3-Llama-3.2-3B": "Lightweight with good tool calling support",
|
|
232
|
+
"codellama/CodeLlama-34b-Instruct-hf": "Excellent for code-related tool calling",
|
|
233
|
+
},
|
|
234
|
+
};
|
|
119
235
|
}
|
|
236
|
+
/**
|
|
237
|
+
* Enhanced error handling with HuggingFace-specific guidance
|
|
238
|
+
*/
|
|
120
239
|
handleProviderError(error) {
|
|
121
240
|
if (error instanceof TimeoutError) {
|
|
122
241
|
return new Error(`HuggingFace request timed out: ${error.message}`);
|
|
@@ -125,14 +244,33 @@ export class HuggingFaceProvider extends BaseProvider {
|
|
|
125
244
|
const message = errorObj?.message && typeof errorObj.message === "string"
|
|
126
245
|
? errorObj.message
|
|
127
246
|
: "Unknown error";
|
|
247
|
+
// Enhanced error messages with tool calling context
|
|
128
248
|
if (message.includes("API_TOKEN_INVALID") ||
|
|
129
249
|
message.includes("Invalid token")) {
|
|
130
|
-
return new Error("Invalid HuggingFace API token. Please check your
|
|
250
|
+
return new Error("❌ Invalid HuggingFace API token. Please check your HUGGINGFACE_API_KEY environment variable.");
|
|
131
251
|
}
|
|
132
252
|
if (message.includes("rate limit")) {
|
|
133
|
-
return new Error("HuggingFace rate limit exceeded.
|
|
253
|
+
return new Error("❌ HuggingFace rate limit exceeded. Consider using a paid plan or try again later.");
|
|
254
|
+
}
|
|
255
|
+
if (message.includes("model") && message.includes("not found")) {
|
|
256
|
+
return new Error(`❌ HuggingFace model '${this.modelName}' not found.\n\nSuggestions:\n1. Check model name spelling\n2. Ensure model exists on HuggingFace Hub\n3. For tool calling, use: Llama-3.1-8B-Instruct, Hermes-3-Llama-3.2-3B, or CodeLlama-34b-Instruct-hf`);
|
|
134
257
|
}
|
|
135
|
-
|
|
258
|
+
if (message.includes("function") || message.includes("tool")) {
|
|
259
|
+
return new Error(`❌ HuggingFace tool calling error: ${message}\n\nNotes:\n1. Ensure you're using a tool-capable model (Llama-3.1+, Hermes-3+, CodeLlama)\n2. Check that your model supports function calling\n3. Verify tool schema format is correct`);
|
|
260
|
+
}
|
|
261
|
+
return new Error(`❌ HuggingFace Provider Error: ${message}`);
|
|
262
|
+
}
|
|
263
|
+
getProviderName() {
|
|
264
|
+
return "huggingface";
|
|
265
|
+
}
|
|
266
|
+
getDefaultModel() {
|
|
267
|
+
return getDefaultHuggingFaceModel();
|
|
268
|
+
}
|
|
269
|
+
/**
|
|
270
|
+
* Returns the Vercel AI SDK model instance for HuggingFace
|
|
271
|
+
*/
|
|
272
|
+
getAISDKModel() {
|
|
273
|
+
return this.model;
|
|
136
274
|
}
|
|
137
275
|
// ===================
|
|
138
276
|
// PRIVATE VALIDATION METHODS
|
|
@@ -9,6 +9,9 @@ import { BaseProvider } from "../core/baseProvider.js";
|
|
|
9
9
|
*/
|
|
10
10
|
export declare class LiteLLMProvider extends BaseProvider {
|
|
11
11
|
private model;
|
|
12
|
+
private static modelsCache;
|
|
13
|
+
private static modelsCacheTime;
|
|
14
|
+
private static readonly MODELS_CACHE_DURATION;
|
|
12
15
|
constructor(modelName?: string, sdk?: unknown);
|
|
13
16
|
protected getProviderName(): AIProviderName;
|
|
14
17
|
protected getDefaultModel(): string;
|
|
@@ -28,16 +31,13 @@ export declare class LiteLLMProvider extends BaseProvider {
|
|
|
28
31
|
protected executeStream(options: StreamOptions, analysisSchema?: ZodType<unknown, ZodTypeDef, unknown> | Schema<unknown>): Promise<StreamResult>;
|
|
29
32
|
/**
|
|
30
33
|
* Get available models from LiteLLM proxy server
|
|
31
|
-
*
|
|
32
|
-
* TODO: Implement dynamic fetching from LiteLLM's /v1/models endpoint.
|
|
33
|
-
* Currently returns a hardcoded list of commonly available models.
|
|
34
|
-
*
|
|
35
|
-
* Implementation would involve:
|
|
36
|
-
* 1. Fetch from `${baseURL}/v1/models`
|
|
37
|
-
* 2. Parse response to extract model IDs
|
|
38
|
-
* 3. Handle network errors gracefully
|
|
39
|
-
* 4. Cache results to avoid repeated API calls
|
|
34
|
+
* Dynamically fetches from /v1/models endpoint with caching and fallback
|
|
40
35
|
*/
|
|
41
36
|
getAvailableModels(): Promise<string[]>;
|
|
37
|
+
/**
|
|
38
|
+
* Fetch available models from LiteLLM proxy /v1/models endpoint
|
|
39
|
+
* @private
|
|
40
|
+
*/
|
|
41
|
+
private fetchModelsFromAPI;
|
|
42
42
|
private validateStreamOptions;
|
|
43
43
|
}
|
|
@@ -34,6 +34,10 @@ const getDefaultLiteLLMModel = () => {
|
|
|
34
34
|
*/
|
|
35
35
|
export class LiteLLMProvider extends BaseProvider {
|
|
36
36
|
model;
|
|
37
|
+
// Cache for available models to avoid repeated API calls
|
|
38
|
+
static modelsCache = [];
|
|
39
|
+
static modelsCacheTime = 0;
|
|
40
|
+
static MODELS_CACHE_DURATION = 10 * 60 * 1000; // 10 minutes
|
|
37
41
|
constructor(modelName, sdk) {
|
|
38
42
|
super(modelName, "litellm", sdk);
|
|
39
43
|
// Initialize LiteLLM using OpenAI SDK with explicit configuration
|
|
@@ -154,28 +158,111 @@ export class LiteLLMProvider extends BaseProvider {
|
|
|
154
158
|
}
|
|
155
159
|
/**
|
|
156
160
|
* Get available models from LiteLLM proxy server
|
|
157
|
-
*
|
|
158
|
-
* TODO: Implement dynamic fetching from LiteLLM's /v1/models endpoint.
|
|
159
|
-
* Currently returns a hardcoded list of commonly available models.
|
|
160
|
-
*
|
|
161
|
-
* Implementation would involve:
|
|
162
|
-
* 1. Fetch from `${baseURL}/v1/models`
|
|
163
|
-
* 2. Parse response to extract model IDs
|
|
164
|
-
* 3. Handle network errors gracefully
|
|
165
|
-
* 4. Cache results to avoid repeated API calls
|
|
161
|
+
* Dynamically fetches from /v1/models endpoint with caching and fallback
|
|
166
162
|
*/
|
|
167
163
|
async getAvailableModels() {
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
164
|
+
const functionTag = "LiteLLMProvider.getAvailableModels";
|
|
165
|
+
const now = Date.now();
|
|
166
|
+
// Check if cached models are still valid
|
|
167
|
+
if (LiteLLMProvider.modelsCache.length > 0 &&
|
|
168
|
+
now - LiteLLMProvider.modelsCacheTime <
|
|
169
|
+
LiteLLMProvider.MODELS_CACHE_DURATION) {
|
|
170
|
+
logger.debug(`[${functionTag}] Using cached models`, {
|
|
171
|
+
cacheAge: Math.round((now - LiteLLMProvider.modelsCacheTime) / 1000),
|
|
172
|
+
modelCount: LiteLLMProvider.modelsCache.length,
|
|
173
|
+
});
|
|
174
|
+
return LiteLLMProvider.modelsCache;
|
|
175
|
+
}
|
|
176
|
+
// Try to fetch models dynamically
|
|
177
|
+
try {
|
|
178
|
+
const dynamicModels = await this.fetchModelsFromAPI();
|
|
179
|
+
if (dynamicModels.length > 0) {
|
|
180
|
+
// Cache successful result
|
|
181
|
+
LiteLLMProvider.modelsCache = dynamicModels;
|
|
182
|
+
LiteLLMProvider.modelsCacheTime = now;
|
|
183
|
+
logger.debug(`[${functionTag}] Successfully fetched models from API`, {
|
|
184
|
+
modelCount: dynamicModels.length,
|
|
185
|
+
});
|
|
186
|
+
return dynamicModels;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
catch (error) {
|
|
190
|
+
logger.warn(`[${functionTag}] Failed to fetch models from API, using fallback`, {
|
|
191
|
+
error: error instanceof Error ? error.message : String(error),
|
|
192
|
+
});
|
|
193
|
+
}
|
|
194
|
+
// Fallback to hardcoded list if API fetch fails
|
|
195
|
+
const fallbackModels = [
|
|
171
196
|
"openai/gpt-4o",
|
|
172
197
|
"openai/gpt-4o-mini",
|
|
173
|
-
"
|
|
174
|
-
"anthropic/claude-3-
|
|
198
|
+
"openai/gpt-3.5-turbo",
|
|
199
|
+
"anthropic/claude-3-5-sonnet-20241022",
|
|
200
|
+
"anthropic/claude-3-haiku-20240307",
|
|
175
201
|
"google/gemini-2.0-flash",
|
|
176
|
-
"
|
|
177
|
-
"mistral/mistral-
|
|
202
|
+
"google/gemini-1.5-pro",
|
|
203
|
+
"mistral/mistral-large-latest",
|
|
204
|
+
"mistral/mistral-medium-latest",
|
|
205
|
+
"meta-llama/llama-3.1-8b-instruct",
|
|
206
|
+
"meta-llama/llama-3.1-70b-instruct",
|
|
178
207
|
];
|
|
208
|
+
logger.debug(`[${functionTag}] Using fallback model list`, {
|
|
209
|
+
modelCount: fallbackModels.length,
|
|
210
|
+
});
|
|
211
|
+
return fallbackModels;
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Fetch available models from LiteLLM proxy /v1/models endpoint
|
|
215
|
+
* @private
|
|
216
|
+
*/
|
|
217
|
+
async fetchModelsFromAPI() {
|
|
218
|
+
const functionTag = "LiteLLMProvider.fetchModelsFromAPI";
|
|
219
|
+
const config = getLiteLLMConfig();
|
|
220
|
+
const modelsUrl = `${config.baseURL}/v1/models`;
|
|
221
|
+
const controller = new AbortController();
|
|
222
|
+
const timeoutId = setTimeout(() => controller.abort(), 5000); // 5 second timeout
|
|
223
|
+
try {
|
|
224
|
+
logger.debug(`[${functionTag}] Fetching models from ${modelsUrl}`);
|
|
225
|
+
const response = await fetch(modelsUrl, {
|
|
226
|
+
method: "GET",
|
|
227
|
+
headers: {
|
|
228
|
+
Authorization: `Bearer ${config.apiKey}`,
|
|
229
|
+
"Content-Type": "application/json",
|
|
230
|
+
},
|
|
231
|
+
signal: controller.signal,
|
|
232
|
+
});
|
|
233
|
+
clearTimeout(timeoutId);
|
|
234
|
+
if (!response.ok) {
|
|
235
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
236
|
+
}
|
|
237
|
+
const data = await response.json();
|
|
238
|
+
// Parse OpenAI-compatible models response
|
|
239
|
+
if (data && Array.isArray(data.data)) {
|
|
240
|
+
const models = data.data
|
|
241
|
+
.map((model) => typeof model === "object" &&
|
|
242
|
+
model !== null &&
|
|
243
|
+
"id" in model &&
|
|
244
|
+
typeof model.id === "string"
|
|
245
|
+
? model.id
|
|
246
|
+
: undefined)
|
|
247
|
+
.filter((id) => typeof id === "string" && id.length > 0)
|
|
248
|
+
.sort();
|
|
249
|
+
logger.debug(`[${functionTag}] Successfully parsed models`, {
|
|
250
|
+
totalModels: models.length,
|
|
251
|
+
sampleModels: models.slice(0, 5),
|
|
252
|
+
});
|
|
253
|
+
return models;
|
|
254
|
+
}
|
|
255
|
+
else {
|
|
256
|
+
throw new Error("Invalid response format: expected data.data array");
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
catch (error) {
|
|
260
|
+
clearTimeout(timeoutId);
|
|
261
|
+
if (error instanceof Error && error.name === "AbortError") {
|
|
262
|
+
throw new Error("Request timed out after 5 seconds");
|
|
263
|
+
}
|
|
264
|
+
throw error;
|
|
265
|
+
}
|
|
179
266
|
}
|
|
180
267
|
// ===================
|
|
181
268
|
// PRIVATE VALIDATION METHODS
|
|
@@ -28,30 +28,51 @@ export declare class OllamaProvider extends BaseProvider {
|
|
|
28
28
|
*/
|
|
29
29
|
protected getAISDKModel(): LanguageModelV1;
|
|
30
30
|
/**
|
|
31
|
-
* Ollama
|
|
31
|
+
* Ollama Tool Calling Support (Enhanced 2025)
|
|
32
32
|
*
|
|
33
|
-
*
|
|
34
|
-
*
|
|
35
|
-
* with BaseProvider's tool calling mechanism
|
|
36
|
-
* 2. Ollama models require specific prompt formatting for function calls that differs
|
|
37
|
-
* from the standardized AI SDK format
|
|
38
|
-
* 3. Tool response parsing and execution flow needs custom implementation
|
|
33
|
+
* Uses configurable model list from ModelConfiguration instead of hardcoded values.
|
|
34
|
+
* Tool-capable models can be configured via OLLAMA_TOOL_CAPABLE_MODELS environment variable.
|
|
39
35
|
*
|
|
40
|
-
* **
|
|
41
|
-
* -
|
|
42
|
-
* -
|
|
43
|
-
* -
|
|
44
|
-
* - Test with models that support function calling (llama3.1, mistral, etc.)
|
|
36
|
+
* **Configuration Options:**
|
|
37
|
+
* - Environment variable: OLLAMA_TOOL_CAPABLE_MODELS (comma-separated list)
|
|
38
|
+
* - Configuration file: providers.ollama.modelBehavior.toolCapableModels
|
|
39
|
+
* - Fallback: Default list of known tool-capable models
|
|
45
40
|
*
|
|
46
|
-
* **
|
|
47
|
-
* -
|
|
48
|
-
* -
|
|
49
|
-
* -
|
|
41
|
+
* **Implementation Features:**
|
|
42
|
+
* - Direct Ollama API integration (/v1/chat/completions)
|
|
43
|
+
* - Automatic tool schema conversion to Ollama format
|
|
44
|
+
* - Streaming tool calls with incremental response parsing
|
|
45
|
+
* - Model compatibility validation and fallback handling
|
|
50
46
|
*
|
|
51
|
-
* @returns
|
|
47
|
+
* @returns true for supported models, false for unsupported models
|
|
52
48
|
*/
|
|
53
49
|
supportsTools(): boolean;
|
|
54
50
|
protected executeStream(options: StreamOptions, analysisSchema?: ZodType<unknown, ZodTypeDef, unknown> | Schema<unknown>): Promise<StreamResult>;
|
|
51
|
+
/**
|
|
52
|
+
* Execute streaming with Ollama's function calling support
|
|
53
|
+
* Uses the /v1/chat/completions endpoint with tools parameter
|
|
54
|
+
*/
|
|
55
|
+
private executeStreamWithTools;
|
|
56
|
+
/**
|
|
57
|
+
* Execute streaming without tools using the generate API
|
|
58
|
+
* Fallback for non-tool scenarios or when chat API is unavailable
|
|
59
|
+
*/
|
|
60
|
+
private executeStreamWithoutTools;
|
|
61
|
+
/**
|
|
62
|
+
* Convert AI SDK tools format to Ollama's function calling format
|
|
63
|
+
*/
|
|
64
|
+
private convertToolsToOllamaFormat;
|
|
65
|
+
/**
|
|
66
|
+
* Create stream generator for Ollama chat API with tool call support
|
|
67
|
+
*/
|
|
68
|
+
private createOllamaChatStream;
|
|
69
|
+
/**
|
|
70
|
+
* Format tool calls for display when tools aren't executed directly
|
|
71
|
+
*/
|
|
72
|
+
private formatToolCallForDisplay;
|
|
73
|
+
/**
|
|
74
|
+
* Create stream generator for Ollama generate API (non-tool mode)
|
|
75
|
+
*/
|
|
55
76
|
private createOllamaStream;
|
|
56
77
|
protected handleProviderError(error: unknown): Error;
|
|
57
78
|
private validateStreamOptions;
|
|
@@ -67,5 +88,19 @@ export declare class OllamaProvider extends BaseProvider {
|
|
|
67
88
|
* Check if a specific model is available
|
|
68
89
|
*/
|
|
69
90
|
isModelAvailable(modelName: string): Promise<boolean>;
|
|
91
|
+
/**
|
|
92
|
+
* Get recommendations for tool-calling capable Ollama models
|
|
93
|
+
* Provides guidance for users who want to use function calling locally
|
|
94
|
+
*/
|
|
95
|
+
static getToolCallingRecommendations(): {
|
|
96
|
+
recommended: string[];
|
|
97
|
+
performance: Record<string, {
|
|
98
|
+
speed: number;
|
|
99
|
+
quality: number;
|
|
100
|
+
size: string;
|
|
101
|
+
}>;
|
|
102
|
+
notes: Record<string, string>;
|
|
103
|
+
installation: Record<string, string>;
|
|
104
|
+
};
|
|
70
105
|
}
|
|
71
106
|
export default OllamaProvider;
|