@sparkleideas/providers 3.5.2-patch.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +574 -0
- package/package.json +70 -0
- package/src/__tests__/provider-integration.test.ts +446 -0
- package/src/__tests__/quick-test.ts +356 -0
- package/src/anthropic-provider.ts +435 -0
- package/src/base-provider.ts +596 -0
- package/src/cohere-provider.ts +423 -0
- package/src/google-provider.ts +429 -0
- package/src/index.ts +40 -0
- package/src/ollama-provider.ts +408 -0
- package/src/openai-provider.ts +490 -0
- package/src/provider-manager.ts +538 -0
- package/src/ruvector-provider.ts +721 -0
- package/src/types.ts +435 -0
|
@@ -0,0 +1,721 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* V3 RuVector Provider (via @ruvector/ruvllm)
|
|
3
|
+
*
|
|
4
|
+
* Self-learning LLM orchestration with:
|
|
5
|
+
* - SONA adaptive learning
|
|
6
|
+
* - HNSW vector memory
|
|
7
|
+
* - FastGRNN intelligent routing
|
|
8
|
+
* - SIMD inference optimization
|
|
9
|
+
* - Local model execution (free)
|
|
10
|
+
*
|
|
11
|
+
* @module @sparkleideas/providers/ruvector-provider
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { BaseProvider, BaseProviderOptions } from './base-provider.js';
|
|
15
|
+
import {
|
|
16
|
+
LLMProvider,
|
|
17
|
+
LLMModel,
|
|
18
|
+
LLMRequest,
|
|
19
|
+
LLMResponse,
|
|
20
|
+
LLMStreamEvent,
|
|
21
|
+
ModelInfo,
|
|
22
|
+
ProviderCapabilities,
|
|
23
|
+
HealthCheckResult,
|
|
24
|
+
ProviderUnavailableError,
|
|
25
|
+
LLMProviderError,
|
|
26
|
+
} from './types.js';
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* RuVector LLM configuration
|
|
30
|
+
*/
|
|
31
|
+
interface RuVectorConfig {
|
|
32
|
+
/** Enable SONA self-learning (default: true) */
|
|
33
|
+
enableSona?: boolean;
|
|
34
|
+
/** SONA learning rate (default: 0.01) */
|
|
35
|
+
sonaLearningRate?: number;
|
|
36
|
+
/** Enable HNSW vector memory (default: true) */
|
|
37
|
+
enableHnsw?: boolean;
|
|
38
|
+
/** HNSW M parameter for graph construction */
|
|
39
|
+
hnswM?: number;
|
|
40
|
+
/** HNSW ef_construction parameter */
|
|
41
|
+
hnswEfConstruction?: number;
|
|
42
|
+
/** Enable FastGRNN routing (default: true) */
|
|
43
|
+
enableFastGrnn?: boolean;
|
|
44
|
+
/** Inference mode: 'simd' | 'standard' */
|
|
45
|
+
inferenceMode?: 'simd' | 'standard';
|
|
46
|
+
/** Router strategy */
|
|
47
|
+
routerStrategy?: 'cost' | 'quality' | 'balanced' | 'speed';
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
interface RuVectorRequest {
|
|
51
|
+
model: string;
|
|
52
|
+
messages: Array<{
|
|
53
|
+
role: 'system' | 'user' | 'assistant';
|
|
54
|
+
content: string;
|
|
55
|
+
}>;
|
|
56
|
+
max_tokens?: number;
|
|
57
|
+
temperature?: number;
|
|
58
|
+
top_p?: number;
|
|
59
|
+
stream?: boolean;
|
|
60
|
+
sona_options?: {
|
|
61
|
+
enabled: boolean;
|
|
62
|
+
learning_rate: number;
|
|
63
|
+
adapt_on_response: boolean;
|
|
64
|
+
};
|
|
65
|
+
router_options?: {
|
|
66
|
+
strategy: string;
|
|
67
|
+
fallback_models: string[];
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
interface RuVectorResponse {
|
|
72
|
+
id: string;
|
|
73
|
+
model: string;
|
|
74
|
+
content: string;
|
|
75
|
+
usage: {
|
|
76
|
+
prompt_tokens: number;
|
|
77
|
+
completion_tokens: number;
|
|
78
|
+
total_tokens: number;
|
|
79
|
+
};
|
|
80
|
+
sona_metrics?: {
|
|
81
|
+
adaptation_applied: boolean;
|
|
82
|
+
quality_score: number;
|
|
83
|
+
patterns_used: number;
|
|
84
|
+
};
|
|
85
|
+
router_metrics?: {
|
|
86
|
+
model_selected: string;
|
|
87
|
+
routing_reason: string;
|
|
88
|
+
latency_ms: number;
|
|
89
|
+
};
|
|
90
|
+
done: boolean;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export class RuVectorProvider extends BaseProvider {
|
|
94
|
+
readonly name: LLMProvider = 'ruvector';
|
|
95
|
+
readonly capabilities: ProviderCapabilities = {
|
|
96
|
+
supportedModels: [
|
|
97
|
+
// RuVector-managed models
|
|
98
|
+
'ruvector-auto', // Auto-selects best model
|
|
99
|
+
'ruvector-fast', // Optimized for speed
|
|
100
|
+
'ruvector-quality', // Optimized for quality
|
|
101
|
+
'ruvector-balanced', // Balanced speed/quality
|
|
102
|
+
// Local models via ruvLLM or Ollama fallback
|
|
103
|
+
'llama3.2',
|
|
104
|
+
'mistral',
|
|
105
|
+
'phi-4',
|
|
106
|
+
'deepseek-coder',
|
|
107
|
+
'codellama',
|
|
108
|
+
'qwen2.5',
|
|
109
|
+
'qwen2.5:0.5b', // CPU-friendly Qwen
|
|
110
|
+
'qwen2.5:1.5b',
|
|
111
|
+
'smollm:135m', // SmolLM models
|
|
112
|
+
'smollm:360m',
|
|
113
|
+
'tinyllama',
|
|
114
|
+
],
|
|
115
|
+
maxContextLength: {
|
|
116
|
+
'ruvector-auto': 128000,
|
|
117
|
+
'ruvector-fast': 32000,
|
|
118
|
+
'ruvector-quality': 128000,
|
|
119
|
+
'ruvector-balanced': 64000,
|
|
120
|
+
'llama3.2': 128000,
|
|
121
|
+
'mistral': 32000,
|
|
122
|
+
'phi-4': 16000,
|
|
123
|
+
'deepseek-coder': 16000,
|
|
124
|
+
'codellama': 16000,
|
|
125
|
+
'qwen2.5': 32000,
|
|
126
|
+
},
|
|
127
|
+
maxOutputTokens: {
|
|
128
|
+
'ruvector-auto': 8192,
|
|
129
|
+
'ruvector-fast': 4096,
|
|
130
|
+
'ruvector-quality': 8192,
|
|
131
|
+
'ruvector-balanced': 8192,
|
|
132
|
+
'llama3.2': 8192,
|
|
133
|
+
'mistral': 8192,
|
|
134
|
+
'phi-4': 4096,
|
|
135
|
+
'deepseek-coder': 8192,
|
|
136
|
+
'codellama': 8192,
|
|
137
|
+
'qwen2.5': 8192,
|
|
138
|
+
},
|
|
139
|
+
supportsStreaming: true,
|
|
140
|
+
supportsToolCalling: true,
|
|
141
|
+
supportsSystemMessages: true,
|
|
142
|
+
supportsVision: false,
|
|
143
|
+
supportsAudio: false,
|
|
144
|
+
supportsFineTuning: true, // SONA self-learning
|
|
145
|
+
supportsEmbeddings: true, // HNSW
|
|
146
|
+
supportsBatching: true,
|
|
147
|
+
rateLimit: {
|
|
148
|
+
requestsPerMinute: 10000, // Local - no rate limit
|
|
149
|
+
tokensPerMinute: 10000000,
|
|
150
|
+
concurrentRequests: 100,
|
|
151
|
+
},
|
|
152
|
+
// Free - local execution with SONA optimization
|
|
153
|
+
pricing: {
|
|
154
|
+
'ruvector-auto': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
|
|
155
|
+
'ruvector-fast': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
|
|
156
|
+
'ruvector-quality': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
|
|
157
|
+
'ruvector-balanced': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
|
|
158
|
+
'llama3.2': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
|
|
159
|
+
'mistral': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
|
|
160
|
+
'phi-4': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
|
|
161
|
+
'deepseek-coder': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
|
|
162
|
+
'codellama': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
|
|
163
|
+
'qwen2.5': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
|
|
164
|
+
},
|
|
165
|
+
};
|
|
166
|
+
|
|
167
|
+
private baseUrl: string = 'http://localhost:3000'; // ruvLLM default port
|
|
168
|
+
private ollamaUrl: string = 'http://localhost:11434';
|
|
169
|
+
private ruvectorConfig: RuVectorConfig = {};
|
|
170
|
+
private ruvllm: unknown; // Dynamic import of @ruvector/ruvllm
|
|
171
|
+
private useOllamaFallback: boolean = false;
|
|
172
|
+
private ruvllmAvailable: boolean = false;
|
|
173
|
+
|
|
174
|
+
constructor(options: BaseProviderOptions) {
|
|
175
|
+
super(options);
|
|
176
|
+
this.ruvectorConfig = (options.config.providerOptions as RuVectorConfig) || {};
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
protected async doInitialize(): Promise<void> {
|
|
180
|
+
// Configure URLs from options
|
|
181
|
+
this.baseUrl = this.config.apiUrl || 'http://localhost:3000';
|
|
182
|
+
this.ollamaUrl = (this.config.providerOptions as any)?.ollamaUrl || 'http://localhost:11434';
|
|
183
|
+
|
|
184
|
+
// Try to dynamically import @ruvector/ruvllm native module
|
|
185
|
+
try {
|
|
186
|
+
this.ruvllm = await import('@ruvector/ruvllm').catch(() => null);
|
|
187
|
+
if (this.ruvllm) {
|
|
188
|
+
this.logger.info('RuVector ruvLLM native module loaded');
|
|
189
|
+
this.ruvllmAvailable = true;
|
|
190
|
+
}
|
|
191
|
+
} catch {
|
|
192
|
+
this.logger.debug('RuVector ruvLLM native module not available');
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Check if RuVector HTTP server is running
|
|
196
|
+
const health = await this.doHealthCheck();
|
|
197
|
+
if (health.healthy) {
|
|
198
|
+
this.logger.info('RuVector server connected');
|
|
199
|
+
return;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// Fallback: Check if Ollama is running for local model execution
|
|
203
|
+
try {
|
|
204
|
+
const ollamaHealth = await fetch(`${this.ollamaUrl}/api/tags`, {
|
|
205
|
+
signal: AbortSignal.timeout(3000),
|
|
206
|
+
});
|
|
207
|
+
if (ollamaHealth.ok) {
|
|
208
|
+
this.useOllamaFallback = true;
|
|
209
|
+
this.logger.info('Using Ollama as fallback for local model execution');
|
|
210
|
+
}
|
|
211
|
+
} catch {
|
|
212
|
+
this.logger.warn('Neither RuVector nor Ollama available. Provider may not work.');
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
protected async doComplete(request: LLMRequest): Promise<LLMResponse> {
|
|
217
|
+
// Use Ollama fallback if RuVector server isn't available
|
|
218
|
+
if (this.useOllamaFallback) {
|
|
219
|
+
return this.completeWithOllama(request);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
const ruvectorRequest = this.buildRuvectorQuery(request);
|
|
223
|
+
|
|
224
|
+
const controller = new AbortController();
|
|
225
|
+
const timeout = setTimeout(() => controller.abort(), this.config.timeout || 120000);
|
|
226
|
+
|
|
227
|
+
try {
|
|
228
|
+
// Use ruvLLM's /query endpoint (not OpenAI-compatible)
|
|
229
|
+
const response = await fetch(`${this.baseUrl}/query`, {
|
|
230
|
+
method: 'POST',
|
|
231
|
+
headers: {
|
|
232
|
+
'Content-Type': 'application/json',
|
|
233
|
+
...(this.config.apiKey && { Authorization: `Bearer ${this.config.apiKey}` }),
|
|
234
|
+
},
|
|
235
|
+
body: JSON.stringify(ruvectorRequest),
|
|
236
|
+
signal: controller.signal,
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
clearTimeout(timeout);
|
|
240
|
+
|
|
241
|
+
if (!response.ok) {
|
|
242
|
+
await this.handleErrorResponse(response);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
const data = await response.json() as RuVectorResponse;
|
|
246
|
+
return this.transformResponse(data, request);
|
|
247
|
+
} catch (error) {
|
|
248
|
+
clearTimeout(timeout);
|
|
249
|
+
|
|
250
|
+
// Auto-fallback to Ollama on connection error
|
|
251
|
+
if (error instanceof Error && (error.message.includes('ECONNREFUSED') || error.message.includes('fetch failed'))) {
|
|
252
|
+
this.useOllamaFallback = true;
|
|
253
|
+
this.logger.info('RuVector connection failed, falling back to Ollama');
|
|
254
|
+
return this.completeWithOllama(request);
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
throw this.transformError(error);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Fallback completion using Ollama API
|
|
263
|
+
*/
|
|
264
|
+
private async completeWithOllama(request: LLMRequest): Promise<LLMResponse> {
|
|
265
|
+
const model = request.model || this.config.model;
|
|
266
|
+
|
|
267
|
+
const ollamaRequest = {
|
|
268
|
+
model,
|
|
269
|
+
messages: request.messages.map((msg) => ({
|
|
270
|
+
role: msg.role === 'tool' ? 'assistant' : msg.role,
|
|
271
|
+
content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content),
|
|
272
|
+
})),
|
|
273
|
+
stream: false,
|
|
274
|
+
options: {
|
|
275
|
+
temperature: request.temperature ?? this.config.temperature ?? 0.7,
|
|
276
|
+
num_predict: request.maxTokens || this.config.maxTokens || 2048,
|
|
277
|
+
},
|
|
278
|
+
};
|
|
279
|
+
|
|
280
|
+
const controller = new AbortController();
|
|
281
|
+
const timeout = setTimeout(() => controller.abort(), this.config.timeout || 120000);
|
|
282
|
+
|
|
283
|
+
try {
|
|
284
|
+
const response = await fetch(`${this.ollamaUrl}/api/chat`, {
|
|
285
|
+
method: 'POST',
|
|
286
|
+
headers: { 'Content-Type': 'application/json' },
|
|
287
|
+
body: JSON.stringify(ollamaRequest),
|
|
288
|
+
signal: controller.signal,
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
clearTimeout(timeout);
|
|
292
|
+
|
|
293
|
+
if (!response.ok) {
|
|
294
|
+
const errorText = await response.text();
|
|
295
|
+
throw new LLMProviderError(
|
|
296
|
+
`Ollama error: ${errorText}`,
|
|
297
|
+
`OLLAMA_${response.status}`,
|
|
298
|
+
'ruvector',
|
|
299
|
+
response.status,
|
|
300
|
+
true
|
|
301
|
+
);
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
const data = await response.json() as {
|
|
305
|
+
message?: { content: string };
|
|
306
|
+
prompt_eval_count?: number;
|
|
307
|
+
eval_count?: number;
|
|
308
|
+
};
|
|
309
|
+
|
|
310
|
+
const promptTokens = data.prompt_eval_count || this.estimateTokens(JSON.stringify(request.messages));
|
|
311
|
+
const completionTokens = data.eval_count || this.estimateTokens(data.message?.content || '');
|
|
312
|
+
|
|
313
|
+
return {
|
|
314
|
+
id: `ruvector-ollama-${Date.now()}`,
|
|
315
|
+
model: model as LLMModel,
|
|
316
|
+
provider: 'ruvector',
|
|
317
|
+
content: data.message?.content || '',
|
|
318
|
+
usage: {
|
|
319
|
+
promptTokens,
|
|
320
|
+
completionTokens,
|
|
321
|
+
totalTokens: promptTokens + completionTokens,
|
|
322
|
+
},
|
|
323
|
+
cost: {
|
|
324
|
+
promptCost: 0,
|
|
325
|
+
completionCost: 0,
|
|
326
|
+
totalCost: 0,
|
|
327
|
+
currency: 'USD',
|
|
328
|
+
},
|
|
329
|
+
finishReason: 'stop',
|
|
330
|
+
metadata: {
|
|
331
|
+
backend: 'ollama',
|
|
332
|
+
sona: { enabled: false },
|
|
333
|
+
},
|
|
334
|
+
};
|
|
335
|
+
} catch (error) {
|
|
336
|
+
clearTimeout(timeout);
|
|
337
|
+
throw this.transformError(error);
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
protected async *doStreamComplete(request: LLMRequest): AsyncIterable<LLMStreamEvent> {
|
|
342
|
+
const ruvectorRequest = this.buildRequest(request, true);
|
|
343
|
+
|
|
344
|
+
const controller = new AbortController();
|
|
345
|
+
const timeout = setTimeout(() => controller.abort(), (this.config.timeout || 120000) * 2);
|
|
346
|
+
|
|
347
|
+
try {
|
|
348
|
+
const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
|
|
349
|
+
method: 'POST',
|
|
350
|
+
headers: {
|
|
351
|
+
'Content-Type': 'application/json',
|
|
352
|
+
...(this.config.apiKey && { Authorization: `Bearer ${this.config.apiKey}` }),
|
|
353
|
+
},
|
|
354
|
+
body: JSON.stringify(ruvectorRequest),
|
|
355
|
+
signal: controller.signal,
|
|
356
|
+
});
|
|
357
|
+
|
|
358
|
+
if (!response.ok) {
|
|
359
|
+
await this.handleErrorResponse(response);
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
const reader = response.body!.getReader();
|
|
363
|
+
const decoder = new TextDecoder();
|
|
364
|
+
let buffer = '';
|
|
365
|
+
let promptTokens = 0;
|
|
366
|
+
let completionTokens = 0;
|
|
367
|
+
|
|
368
|
+
while (true) {
|
|
369
|
+
const { done, value } = await reader.read();
|
|
370
|
+
if (done) break;
|
|
371
|
+
|
|
372
|
+
buffer += decoder.decode(value, { stream: true });
|
|
373
|
+
const lines = buffer.split('\n');
|
|
374
|
+
buffer = lines.pop() || '';
|
|
375
|
+
|
|
376
|
+
for (const line of lines) {
|
|
377
|
+
if (line.startsWith('data: ')) {
|
|
378
|
+
const data = line.slice(6);
|
|
379
|
+
if (data === '[DONE]') continue;
|
|
380
|
+
|
|
381
|
+
try {
|
|
382
|
+
const chunk: RuVectorResponse = JSON.parse(data);
|
|
383
|
+
|
|
384
|
+
if (chunk.content) {
|
|
385
|
+
yield {
|
|
386
|
+
type: 'content',
|
|
387
|
+
delta: { content: chunk.content },
|
|
388
|
+
};
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
if (chunk.done && chunk.usage) {
|
|
392
|
+
promptTokens = chunk.usage.prompt_tokens;
|
|
393
|
+
completionTokens = chunk.usage.completion_tokens;
|
|
394
|
+
|
|
395
|
+
yield {
|
|
396
|
+
type: 'done',
|
|
397
|
+
usage: {
|
|
398
|
+
promptTokens,
|
|
399
|
+
completionTokens,
|
|
400
|
+
totalTokens: promptTokens + completionTokens,
|
|
401
|
+
},
|
|
402
|
+
cost: {
|
|
403
|
+
promptCost: 0,
|
|
404
|
+
completionCost: 0,
|
|
405
|
+
totalCost: 0,
|
|
406
|
+
currency: 'USD',
|
|
407
|
+
},
|
|
408
|
+
};
|
|
409
|
+
}
|
|
410
|
+
} catch {
|
|
411
|
+
// Ignore parse errors
|
|
412
|
+
}
|
|
413
|
+
} else if (line.trim() && !line.startsWith(':')) {
|
|
414
|
+
// Direct JSON response (non-SSE)
|
|
415
|
+
try {
|
|
416
|
+
const chunk: RuVectorResponse = JSON.parse(line);
|
|
417
|
+
if (chunk.content) {
|
|
418
|
+
yield {
|
|
419
|
+
type: 'content',
|
|
420
|
+
delta: { content: chunk.content },
|
|
421
|
+
};
|
|
422
|
+
}
|
|
423
|
+
} catch {
|
|
424
|
+
// Ignore
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
// Ensure done event is sent
|
|
431
|
+
if (completionTokens === 0) {
|
|
432
|
+
yield {
|
|
433
|
+
type: 'done',
|
|
434
|
+
usage: {
|
|
435
|
+
promptTokens: this.estimateTokens(JSON.stringify(request.messages)),
|
|
436
|
+
completionTokens: 100,
|
|
437
|
+
totalTokens: this.estimateTokens(JSON.stringify(request.messages)) + 100,
|
|
438
|
+
},
|
|
439
|
+
cost: { promptCost: 0, completionCost: 0, totalCost: 0, currency: 'USD' },
|
|
440
|
+
};
|
|
441
|
+
}
|
|
442
|
+
} catch (error) {
|
|
443
|
+
clearTimeout(timeout);
|
|
444
|
+
throw this.transformError(error);
|
|
445
|
+
} finally {
|
|
446
|
+
clearTimeout(timeout);
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
async listModels(): Promise<LLMModel[]> {
|
|
451
|
+
try {
|
|
452
|
+
const response = await fetch(`${this.baseUrl}/v1/models`);
|
|
453
|
+
if (!response.ok) {
|
|
454
|
+
return this.capabilities.supportedModels;
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
const data = await response.json() as { data?: Array<{ id: string }> };
|
|
458
|
+
return data.data?.map((m) => m.id as LLMModel) || this.capabilities.supportedModels;
|
|
459
|
+
} catch {
|
|
460
|
+
return this.capabilities.supportedModels;
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
async getModelInfo(model: LLMModel): Promise<ModelInfo> {
|
|
465
|
+
const descriptions: Record<string, string> = {
|
|
466
|
+
'ruvector-auto': 'Auto-selects optimal model with SONA learning',
|
|
467
|
+
'ruvector-fast': 'Optimized for speed with FastGRNN routing',
|
|
468
|
+
'ruvector-quality': 'Highest quality with full SONA adaptation',
|
|
469
|
+
'ruvector-balanced': 'Balanced speed and quality',
|
|
470
|
+
'llama3.2': 'Meta Llama 3.2 via RuVector',
|
|
471
|
+
'mistral': 'Mistral 7B via RuVector',
|
|
472
|
+
'phi-4': 'Microsoft Phi-4 via RuVector',
|
|
473
|
+
'deepseek-coder': 'DeepSeek Coder via RuVector',
|
|
474
|
+
'codellama': 'Code Llama via RuVector',
|
|
475
|
+
'qwen2.5': 'Qwen 2.5 via RuVector',
|
|
476
|
+
};
|
|
477
|
+
|
|
478
|
+
return {
|
|
479
|
+
model,
|
|
480
|
+
name: model,
|
|
481
|
+
description: descriptions[model] || 'RuVector-managed local model',
|
|
482
|
+
contextLength: this.capabilities.maxContextLength[model] || 32000,
|
|
483
|
+
maxOutputTokens: this.capabilities.maxOutputTokens[model] || 4096,
|
|
484
|
+
supportedFeatures: [
|
|
485
|
+
'chat',
|
|
486
|
+
'completion',
|
|
487
|
+
'local',
|
|
488
|
+
'self-learning',
|
|
489
|
+
'sona',
|
|
490
|
+
'hnsw-memory',
|
|
491
|
+
],
|
|
492
|
+
pricing: { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
|
|
493
|
+
};
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
protected async doHealthCheck(): Promise<HealthCheckResult> {
|
|
497
|
+
try {
|
|
498
|
+
const response = await fetch(`${this.baseUrl}/health`);
|
|
499
|
+
|
|
500
|
+
if (response.ok) {
|
|
501
|
+
const data = await response.json() as { sona?: boolean; hnsw?: boolean };
|
|
502
|
+
return {
|
|
503
|
+
healthy: true,
|
|
504
|
+
timestamp: new Date(),
|
|
505
|
+
details: {
|
|
506
|
+
server: 'ruvector',
|
|
507
|
+
sona: data.sona ?? false,
|
|
508
|
+
hnsw: data.hnsw ?? false,
|
|
509
|
+
local: true,
|
|
510
|
+
},
|
|
511
|
+
};
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
return {
|
|
515
|
+
healthy: false,
|
|
516
|
+
error: `HTTP ${response.status}`,
|
|
517
|
+
timestamp: new Date(),
|
|
518
|
+
};
|
|
519
|
+
} catch (error) {
|
|
520
|
+
return {
|
|
521
|
+
healthy: false,
|
|
522
|
+
error: error instanceof Error ? error.message : 'RuVector server not reachable',
|
|
523
|
+
timestamp: new Date(),
|
|
524
|
+
details: {
|
|
525
|
+
hint: 'Start RuVector server: npx @ruvector/ruvllm serve',
|
|
526
|
+
},
|
|
527
|
+
};
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
/**
|
|
532
|
+
* Build ruvLLM native API query format
|
|
533
|
+
* See: https://github.com/ruvnet/ruvector/tree/main/examples/ruvLLM
|
|
534
|
+
*/
|
|
535
|
+
private buildRuvectorQuery(request: LLMRequest): { query: string; session_id?: string } {
|
|
536
|
+
// ruvLLM uses simple query format, not OpenAI-compatible
|
|
537
|
+
const lastUserMessage = [...request.messages].reverse().find(m => m.role === 'user');
|
|
538
|
+
const systemPrompt = request.messages.find(m => m.role === 'system');
|
|
539
|
+
|
|
540
|
+
let query = '';
|
|
541
|
+
if (systemPrompt) {
|
|
542
|
+
query += `[System]: ${typeof systemPrompt.content === 'string' ? systemPrompt.content : JSON.stringify(systemPrompt.content)}\n\n`;
|
|
543
|
+
}
|
|
544
|
+
query += typeof lastUserMessage?.content === 'string'
|
|
545
|
+
? lastUserMessage.content
|
|
546
|
+
: JSON.stringify(lastUserMessage?.content || '');
|
|
547
|
+
|
|
548
|
+
return {
|
|
549
|
+
query,
|
|
550
|
+
session_id: request.requestId,
|
|
551
|
+
};
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
private buildRequest(request: LLMRequest, stream = false): RuVectorRequest {
|
|
555
|
+
const ruvectorRequest: RuVectorRequest = {
|
|
556
|
+
model: request.model || this.config.model,
|
|
557
|
+
messages: request.messages.map((msg) => ({
|
|
558
|
+
role: msg.role === 'tool' ? 'assistant' : msg.role,
|
|
559
|
+
content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content),
|
|
560
|
+
})),
|
|
561
|
+
stream,
|
|
562
|
+
};
|
|
563
|
+
|
|
564
|
+
if (request.temperature !== undefined || this.config.temperature !== undefined) {
|
|
565
|
+
ruvectorRequest.temperature = request.temperature ?? this.config.temperature;
|
|
566
|
+
}
|
|
567
|
+
if (request.maxTokens || this.config.maxTokens) {
|
|
568
|
+
ruvectorRequest.max_tokens = request.maxTokens || this.config.maxTokens;
|
|
569
|
+
}
|
|
570
|
+
if (request.topP !== undefined || this.config.topP !== undefined) {
|
|
571
|
+
ruvectorRequest.top_p = request.topP ?? this.config.topP;
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
// SONA options
|
|
575
|
+
if (this.ruvectorConfig.enableSona !== false) {
|
|
576
|
+
ruvectorRequest.sona_options = {
|
|
577
|
+
enabled: true,
|
|
578
|
+
learning_rate: this.ruvectorConfig.sonaLearningRate || 0.01,
|
|
579
|
+
adapt_on_response: true,
|
|
580
|
+
};
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
// Router options
|
|
584
|
+
if (this.ruvectorConfig.enableFastGrnn !== false) {
|
|
585
|
+
ruvectorRequest.router_options = {
|
|
586
|
+
strategy: this.ruvectorConfig.routerStrategy || 'balanced',
|
|
587
|
+
fallback_models: ['llama3.2', 'mistral', 'phi-4'],
|
|
588
|
+
};
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
return ruvectorRequest;
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
private transformResponse(data: RuVectorResponse, request: LLMRequest): LLMResponse {
|
|
595
|
+
const model = request.model || this.config.model;
|
|
596
|
+
|
|
597
|
+
return {
|
|
598
|
+
id: data.id || `ruvector-${Date.now()}`,
|
|
599
|
+
model: (data.model || model) as LLMModel,
|
|
600
|
+
provider: 'custom',
|
|
601
|
+
content: data.content,
|
|
602
|
+
usage: {
|
|
603
|
+
promptTokens: data.usage?.prompt_tokens || 0,
|
|
604
|
+
completionTokens: data.usage?.completion_tokens || 0,
|
|
605
|
+
totalTokens: data.usage?.total_tokens || 0,
|
|
606
|
+
},
|
|
607
|
+
cost: {
|
|
608
|
+
promptCost: 0,
|
|
609
|
+
completionCost: 0,
|
|
610
|
+
totalCost: 0,
|
|
611
|
+
currency: 'USD',
|
|
612
|
+
},
|
|
613
|
+
finishReason: data.done ? 'stop' : 'length',
|
|
614
|
+
metadata: {
|
|
615
|
+
sona: data.sona_metrics,
|
|
616
|
+
router: data.router_metrics,
|
|
617
|
+
},
|
|
618
|
+
};
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
private async handleErrorResponse(response: Response): Promise<never> {
|
|
622
|
+
const errorText = await response.text();
|
|
623
|
+
let errorData: { error?: string };
|
|
624
|
+
|
|
625
|
+
try {
|
|
626
|
+
errorData = JSON.parse(errorText);
|
|
627
|
+
} catch {
|
|
628
|
+
errorData = { error: errorText };
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
const message = errorData.error || 'Unknown error';
|
|
632
|
+
|
|
633
|
+
if (response.status === 0 || message.includes('connection')) {
|
|
634
|
+
throw new ProviderUnavailableError('custom', {
|
|
635
|
+
message,
|
|
636
|
+
hint: 'Start RuVector server: npx @ruvector/ruvllm serve',
|
|
637
|
+
});
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
throw new LLMProviderError(
|
|
641
|
+
message,
|
|
642
|
+
`RUVECTOR_${response.status}`,
|
|
643
|
+
'custom',
|
|
644
|
+
response.status,
|
|
645
|
+
true,
|
|
646
|
+
errorData
|
|
647
|
+
);
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
/**
|
|
651
|
+
* Get SONA learning metrics
|
|
652
|
+
*/
|
|
653
|
+
async getSonaMetrics(): Promise<{
|
|
654
|
+
enabled: boolean;
|
|
655
|
+
adaptationsApplied: number;
|
|
656
|
+
qualityScore: number;
|
|
657
|
+
patternsLearned: number;
|
|
658
|
+
}> {
|
|
659
|
+
try {
|
|
660
|
+
const response = await fetch(`${this.baseUrl}/v1/sona/metrics`);
|
|
661
|
+
if (response.ok) {
|
|
662
|
+
return await response.json() as {
|
|
663
|
+
enabled: boolean;
|
|
664
|
+
adaptationsApplied: number;
|
|
665
|
+
qualityScore: number;
|
|
666
|
+
patternsLearned: number;
|
|
667
|
+
};
|
|
668
|
+
}
|
|
669
|
+
} catch {
|
|
670
|
+
// Ignore
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
return {
|
|
674
|
+
enabled: false,
|
|
675
|
+
adaptationsApplied: 0,
|
|
676
|
+
qualityScore: 0,
|
|
677
|
+
patternsLearned: 0,
|
|
678
|
+
};
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
/**
|
|
682
|
+
* Trigger SONA learning from a conversation
|
|
683
|
+
*/
|
|
684
|
+
async triggerSonaLearning(conversationId: string): Promise<boolean> {
|
|
685
|
+
try {
|
|
686
|
+
const response = await fetch(`${this.baseUrl}/v1/sona/learn`, {
|
|
687
|
+
method: 'POST',
|
|
688
|
+
headers: { 'Content-Type': 'application/json' },
|
|
689
|
+
body: JSON.stringify({ conversation_id: conversationId }),
|
|
690
|
+
});
|
|
691
|
+
return response.ok;
|
|
692
|
+
} catch {
|
|
693
|
+
return false;
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
/**
|
|
698
|
+
* Search HNSW memory for similar patterns
|
|
699
|
+
*/
|
|
700
|
+
async searchMemory(query: string, limit = 5): Promise<Array<{
|
|
701
|
+
id: string;
|
|
702
|
+
similarity: number;
|
|
703
|
+
content: string;
|
|
704
|
+
}>> {
|
|
705
|
+
try {
|
|
706
|
+
const response = await fetch(`${this.baseUrl}/v1/hnsw/search`, {
|
|
707
|
+
method: 'POST',
|
|
708
|
+
headers: { 'Content-Type': 'application/json' },
|
|
709
|
+
body: JSON.stringify({ query, limit }),
|
|
710
|
+
});
|
|
711
|
+
|
|
712
|
+
if (response.ok) {
|
|
713
|
+
return await response.json() as Array<{ id: string; similarity: number; content: string }>;
|
|
714
|
+
}
|
|
715
|
+
} catch {
|
|
716
|
+
// Ignore
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
return [];
|
|
720
|
+
}
|
|
721
|
+
}
|