@sparkleideas/providers 3.5.2-patch.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +574 -0
- package/package.json +70 -0
- package/src/__tests__/provider-integration.test.ts +446 -0
- package/src/__tests__/quick-test.ts +356 -0
- package/src/anthropic-provider.ts +435 -0
- package/src/base-provider.ts +596 -0
- package/src/cohere-provider.ts +423 -0
- package/src/google-provider.ts +429 -0
- package/src/index.ts +40 -0
- package/src/ollama-provider.ts +408 -0
- package/src/openai-provider.ts +490 -0
- package/src/provider-manager.ts +538 -0
- package/src/ruvector-provider.ts +721 -0
- package/src/types.ts +435 -0
|
@@ -0,0 +1,408 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* V3 Ollama Provider (Local Models)
|
|
3
|
+
*
|
|
4
|
+
* Supports Llama, Mistral, CodeLlama, Phi, and other local models.
|
|
5
|
+
* Zero cost - runs entirely locally.
|
|
6
|
+
*
|
|
7
|
+
* @module @sparkleideas/providers/ollama-provider
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { BaseProvider, BaseProviderOptions } from './base-provider.js';
|
|
11
|
+
import {
|
|
12
|
+
LLMProvider,
|
|
13
|
+
LLMModel,
|
|
14
|
+
LLMRequest,
|
|
15
|
+
LLMResponse,
|
|
16
|
+
LLMStreamEvent,
|
|
17
|
+
ModelInfo,
|
|
18
|
+
ProviderCapabilities,
|
|
19
|
+
HealthCheckResult,
|
|
20
|
+
ProviderUnavailableError,
|
|
21
|
+
LLMProviderError,
|
|
22
|
+
} from './types.js';
|
|
23
|
+
|
|
24
|
+
interface OllamaRequest {
|
|
25
|
+
model: string;
|
|
26
|
+
messages: Array<{
|
|
27
|
+
role: 'system' | 'user' | 'assistant';
|
|
28
|
+
content: string;
|
|
29
|
+
}>;
|
|
30
|
+
options?: {
|
|
31
|
+
temperature?: number;
|
|
32
|
+
top_p?: number;
|
|
33
|
+
top_k?: number;
|
|
34
|
+
num_predict?: number;
|
|
35
|
+
stop?: string[];
|
|
36
|
+
};
|
|
37
|
+
stream?: boolean;
|
|
38
|
+
tools?: Array<{
|
|
39
|
+
type: 'function';
|
|
40
|
+
function: {
|
|
41
|
+
name: string;
|
|
42
|
+
description: string;
|
|
43
|
+
parameters: unknown;
|
|
44
|
+
};
|
|
45
|
+
}>;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
interface OllamaResponse {
|
|
49
|
+
model: string;
|
|
50
|
+
created_at: string;
|
|
51
|
+
message: {
|
|
52
|
+
role: string;
|
|
53
|
+
content: string;
|
|
54
|
+
tool_calls?: Array<{
|
|
55
|
+
function: { name: string; arguments: unknown };
|
|
56
|
+
}>;
|
|
57
|
+
};
|
|
58
|
+
done: boolean;
|
|
59
|
+
total_duration?: number;
|
|
60
|
+
load_duration?: number;
|
|
61
|
+
prompt_eval_count?: number;
|
|
62
|
+
prompt_eval_duration?: number;
|
|
63
|
+
eval_count?: number;
|
|
64
|
+
eval_duration?: number;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export class OllamaProvider extends BaseProvider {
|
|
68
|
+
readonly name: LLMProvider = 'ollama';
|
|
69
|
+
readonly capabilities: ProviderCapabilities = {
|
|
70
|
+
supportedModels: [
|
|
71
|
+
'llama3.2',
|
|
72
|
+
'llama3.1',
|
|
73
|
+
'mistral',
|
|
74
|
+
'mixtral',
|
|
75
|
+
'codellama',
|
|
76
|
+
'phi-4',
|
|
77
|
+
'deepseek-coder',
|
|
78
|
+
],
|
|
79
|
+
maxContextLength: {
|
|
80
|
+
'llama3.2': 128000,
|
|
81
|
+
'llama3.1': 128000,
|
|
82
|
+
'mistral': 32000,
|
|
83
|
+
'mixtral': 32000,
|
|
84
|
+
'codellama': 16000,
|
|
85
|
+
'phi-4': 16000,
|
|
86
|
+
'deepseek-coder': 16000,
|
|
87
|
+
},
|
|
88
|
+
maxOutputTokens: {
|
|
89
|
+
'llama3.2': 8192,
|
|
90
|
+
'llama3.1': 8192,
|
|
91
|
+
'mistral': 8192,
|
|
92
|
+
'mixtral': 8192,
|
|
93
|
+
'codellama': 8192,
|
|
94
|
+
'phi-4': 8192,
|
|
95
|
+
'deepseek-coder': 8192,
|
|
96
|
+
},
|
|
97
|
+
supportsStreaming: true,
|
|
98
|
+
supportsToolCalling: true,
|
|
99
|
+
supportsSystemMessages: true,
|
|
100
|
+
supportsVision: true, // Some models
|
|
101
|
+
supportsAudio: false,
|
|
102
|
+
supportsFineTuning: false,
|
|
103
|
+
supportsEmbeddings: true,
|
|
104
|
+
supportsBatching: false,
|
|
105
|
+
rateLimit: {
|
|
106
|
+
requestsPerMinute: 10000, // Local - no rate limit
|
|
107
|
+
tokensPerMinute: 10000000,
|
|
108
|
+
concurrentRequests: 10,
|
|
109
|
+
},
|
|
110
|
+
// All free - local execution
|
|
111
|
+
pricing: {
|
|
112
|
+
'llama3.2': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
|
|
113
|
+
'llama3.1': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
|
|
114
|
+
'mistral': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
|
|
115
|
+
'mixtral': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
|
|
116
|
+
'codellama': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
|
|
117
|
+
'phi-4': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
|
|
118
|
+
'deepseek-coder': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
|
|
119
|
+
},
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
private baseUrl: string = 'http://localhost:11434';
|
|
123
|
+
|
|
124
|
+
constructor(options: BaseProviderOptions) {
|
|
125
|
+
super(options);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
protected async doInitialize(): Promise<void> {
|
|
129
|
+
this.baseUrl = this.config.apiUrl || 'http://localhost:11434';
|
|
130
|
+
|
|
131
|
+
// Check if Ollama is running
|
|
132
|
+
const health = await this.doHealthCheck();
|
|
133
|
+
if (!health.healthy) {
|
|
134
|
+
this.logger.warn('Ollama server not detected. Ensure Ollama is running locally.');
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
protected async doComplete(request: LLMRequest): Promise<LLMResponse> {
|
|
139
|
+
const ollamaRequest = this.buildRequest(request);
|
|
140
|
+
|
|
141
|
+
const controller = new AbortController();
|
|
142
|
+
const timeout = setTimeout(() => controller.abort(), this.config.timeout || 120000);
|
|
143
|
+
|
|
144
|
+
try {
|
|
145
|
+
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
|
146
|
+
method: 'POST',
|
|
147
|
+
headers: { 'Content-Type': 'application/json' },
|
|
148
|
+
body: JSON.stringify(ollamaRequest),
|
|
149
|
+
signal: controller.signal,
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
clearTimeout(timeout);
|
|
153
|
+
|
|
154
|
+
if (!response.ok) {
|
|
155
|
+
await this.handleErrorResponse(response);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
const data = await response.json() as OllamaResponse;
|
|
159
|
+
return this.transformResponse(data, request);
|
|
160
|
+
} catch (error) {
|
|
161
|
+
clearTimeout(timeout);
|
|
162
|
+
throw this.transformError(error);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
protected async *doStreamComplete(request: LLMRequest): AsyncIterable<LLMStreamEvent> {
|
|
167
|
+
const ollamaRequest = this.buildRequest(request, true);
|
|
168
|
+
|
|
169
|
+
const controller = new AbortController();
|
|
170
|
+
const timeout = setTimeout(() => controller.abort(), (this.config.timeout || 120000) * 2);
|
|
171
|
+
|
|
172
|
+
try {
|
|
173
|
+
const response = await fetch(`${this.baseUrl}/api/chat`, {
|
|
174
|
+
method: 'POST',
|
|
175
|
+
headers: { 'Content-Type': 'application/json' },
|
|
176
|
+
body: JSON.stringify(ollamaRequest),
|
|
177
|
+
signal: controller.signal,
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
if (!response.ok) {
|
|
181
|
+
await this.handleErrorResponse(response);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
const reader = response.body!.getReader();
|
|
185
|
+
const decoder = new TextDecoder();
|
|
186
|
+
let buffer = '';
|
|
187
|
+
let promptTokens = 0;
|
|
188
|
+
let completionTokens = 0;
|
|
189
|
+
|
|
190
|
+
while (true) {
|
|
191
|
+
const { done, value } = await reader.read();
|
|
192
|
+
if (done) break;
|
|
193
|
+
|
|
194
|
+
buffer += decoder.decode(value, { stream: true });
|
|
195
|
+
const lines = buffer.split('\n');
|
|
196
|
+
buffer = lines.pop() || '';
|
|
197
|
+
|
|
198
|
+
for (const line of lines) {
|
|
199
|
+
if (!line.trim()) continue;
|
|
200
|
+
|
|
201
|
+
try {
|
|
202
|
+
const chunk: OllamaResponse = JSON.parse(line);
|
|
203
|
+
|
|
204
|
+
if (chunk.message?.content) {
|
|
205
|
+
yield {
|
|
206
|
+
type: 'content',
|
|
207
|
+
delta: { content: chunk.message.content },
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
if (chunk.done) {
|
|
212
|
+
promptTokens = chunk.prompt_eval_count || 0;
|
|
213
|
+
completionTokens = chunk.eval_count || 0;
|
|
214
|
+
|
|
215
|
+
yield {
|
|
216
|
+
type: 'done',
|
|
217
|
+
usage: {
|
|
218
|
+
promptTokens,
|
|
219
|
+
completionTokens,
|
|
220
|
+
totalTokens: promptTokens + completionTokens,
|
|
221
|
+
},
|
|
222
|
+
cost: {
|
|
223
|
+
promptCost: 0,
|
|
224
|
+
completionCost: 0,
|
|
225
|
+
totalCost: 0,
|
|
226
|
+
currency: 'USD',
|
|
227
|
+
},
|
|
228
|
+
};
|
|
229
|
+
}
|
|
230
|
+
} catch {
|
|
231
|
+
// Ignore parse errors
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
} catch (error) {
|
|
236
|
+
clearTimeout(timeout);
|
|
237
|
+
throw this.transformError(error);
|
|
238
|
+
} finally {
|
|
239
|
+
clearTimeout(timeout);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
async listModels(): Promise<LLMModel[]> {
|
|
244
|
+
try {
|
|
245
|
+
const response = await fetch(`${this.baseUrl}/api/tags`);
|
|
246
|
+
if (!response.ok) {
|
|
247
|
+
return this.capabilities.supportedModels;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
const data = await response.json() as { models?: Array<{ name: string }> };
|
|
251
|
+
return data.models?.map((m) => m.name as LLMModel) || this.capabilities.supportedModels;
|
|
252
|
+
} catch {
|
|
253
|
+
return this.capabilities.supportedModels;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
async getModelInfo(model: LLMModel): Promise<ModelInfo> {
|
|
258
|
+
const descriptions: Record<string, string> = {
|
|
259
|
+
'llama3.2': 'Meta Llama 3.2 - Fast and capable',
|
|
260
|
+
'llama3.1': 'Meta Llama 3.1 - High performance',
|
|
261
|
+
'mistral': 'Mistral 7B - Efficient and fast',
|
|
262
|
+
'mixtral': 'Mixtral 8x7B - Mixture of experts',
|
|
263
|
+
'codellama': 'Code Llama - Optimized for code',
|
|
264
|
+
'phi-4': 'Microsoft Phi-4 - Small but powerful',
|
|
265
|
+
'deepseek-coder': 'DeepSeek Coder - Code specialist',
|
|
266
|
+
};
|
|
267
|
+
|
|
268
|
+
return {
|
|
269
|
+
model,
|
|
270
|
+
name: model,
|
|
271
|
+
description: descriptions[model] || 'Local Ollama model',
|
|
272
|
+
contextLength: this.capabilities.maxContextLength[model] || 8192,
|
|
273
|
+
maxOutputTokens: this.capabilities.maxOutputTokens[model] || 4096,
|
|
274
|
+
supportedFeatures: ['chat', 'completion', 'local'],
|
|
275
|
+
pricing: { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
protected async doHealthCheck(): Promise<HealthCheckResult> {
|
|
280
|
+
try {
|
|
281
|
+
const response = await fetch(`${this.baseUrl}/api/tags`);
|
|
282
|
+
|
|
283
|
+
return {
|
|
284
|
+
healthy: response.ok,
|
|
285
|
+
timestamp: new Date(),
|
|
286
|
+
details: {
|
|
287
|
+
server: 'ollama',
|
|
288
|
+
local: true,
|
|
289
|
+
},
|
|
290
|
+
...(response.ok ? {} : { error: `HTTP ${response.status}` }),
|
|
291
|
+
};
|
|
292
|
+
} catch (error) {
|
|
293
|
+
return {
|
|
294
|
+
healthy: false,
|
|
295
|
+
error: error instanceof Error ? error.message : 'Ollama server not reachable',
|
|
296
|
+
timestamp: new Date(),
|
|
297
|
+
details: {
|
|
298
|
+
hint: 'Ensure Ollama is running: ollama serve',
|
|
299
|
+
},
|
|
300
|
+
};
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
private buildRequest(request: LLMRequest, stream = false): OllamaRequest {
|
|
305
|
+
const ollamaRequest: OllamaRequest = {
|
|
306
|
+
model: request.model || this.config.model,
|
|
307
|
+
messages: request.messages.map((msg) => ({
|
|
308
|
+
role: msg.role === 'tool' ? 'assistant' : msg.role,
|
|
309
|
+
content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content),
|
|
310
|
+
})),
|
|
311
|
+
stream,
|
|
312
|
+
};
|
|
313
|
+
|
|
314
|
+
const options: OllamaRequest['options'] = {};
|
|
315
|
+
|
|
316
|
+
if (request.temperature !== undefined || this.config.temperature !== undefined) {
|
|
317
|
+
options.temperature = request.temperature ?? this.config.temperature;
|
|
318
|
+
}
|
|
319
|
+
if (request.topP !== undefined || this.config.topP !== undefined) {
|
|
320
|
+
options.top_p = request.topP ?? this.config.topP;
|
|
321
|
+
}
|
|
322
|
+
if (request.topK !== undefined || this.config.topK !== undefined) {
|
|
323
|
+
options.top_k = request.topK ?? this.config.topK;
|
|
324
|
+
}
|
|
325
|
+
if (request.maxTokens || this.config.maxTokens) {
|
|
326
|
+
options.num_predict = request.maxTokens || this.config.maxTokens;
|
|
327
|
+
}
|
|
328
|
+
if (request.stopSequences || this.config.stopSequences) {
|
|
329
|
+
options.stop = request.stopSequences || this.config.stopSequences;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
if (Object.keys(options).length > 0) {
|
|
333
|
+
ollamaRequest.options = options;
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
if (request.tools) {
|
|
337
|
+
ollamaRequest.tools = request.tools;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
return ollamaRequest;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
private transformResponse(data: OllamaResponse, request: LLMRequest): LLMResponse {
|
|
344
|
+
const model = request.model || this.config.model;
|
|
345
|
+
|
|
346
|
+
const promptTokens = data.prompt_eval_count || 0;
|
|
347
|
+
const completionTokens = data.eval_count || 0;
|
|
348
|
+
|
|
349
|
+
const toolCalls = data.message.tool_calls?.map((tc) => ({
|
|
350
|
+
id: `tool_${Date.now()}`,
|
|
351
|
+
type: 'function' as const,
|
|
352
|
+
function: {
|
|
353
|
+
name: tc.function.name,
|
|
354
|
+
arguments: JSON.stringify(tc.function.arguments),
|
|
355
|
+
},
|
|
356
|
+
}));
|
|
357
|
+
|
|
358
|
+
return {
|
|
359
|
+
id: `ollama-${Date.now()}`,
|
|
360
|
+
model: model as LLMModel,
|
|
361
|
+
provider: 'ollama',
|
|
362
|
+
content: data.message.content,
|
|
363
|
+
toolCalls: toolCalls?.length ? toolCalls : undefined,
|
|
364
|
+
usage: {
|
|
365
|
+
promptTokens,
|
|
366
|
+
completionTokens,
|
|
367
|
+
totalTokens: promptTokens + completionTokens,
|
|
368
|
+
},
|
|
369
|
+
cost: {
|
|
370
|
+
promptCost: 0,
|
|
371
|
+
completionCost: 0,
|
|
372
|
+
totalCost: 0,
|
|
373
|
+
currency: 'USD',
|
|
374
|
+
},
|
|
375
|
+
finishReason: data.done ? 'stop' : 'length',
|
|
376
|
+
latency: data.total_duration ? data.total_duration / 1e6 : undefined, // Convert ns to ms
|
|
377
|
+
};
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
private async handleErrorResponse(response: Response): Promise<never> {
|
|
381
|
+
const errorText = await response.text();
|
|
382
|
+
let errorData: { error?: string };
|
|
383
|
+
|
|
384
|
+
try {
|
|
385
|
+
errorData = JSON.parse(errorText);
|
|
386
|
+
} catch {
|
|
387
|
+
errorData = { error: errorText };
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
const message = errorData.error || 'Unknown error';
|
|
391
|
+
|
|
392
|
+
if (response.status === 0 || message.includes('connection')) {
|
|
393
|
+
throw new ProviderUnavailableError('ollama', {
|
|
394
|
+
message,
|
|
395
|
+
hint: 'Ensure Ollama is running: ollama serve',
|
|
396
|
+
});
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
throw new LLMProviderError(
|
|
400
|
+
message,
|
|
401
|
+
`OLLAMA_${response.status}`,
|
|
402
|
+
'ollama',
|
|
403
|
+
response.status,
|
|
404
|
+
true,
|
|
405
|
+
errorData
|
|
406
|
+
);
|
|
407
|
+
}
|
|
408
|
+
}
|