@sparkleideas/providers 3.5.2-patch.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,721 @@
1
+ /**
2
+ * V3 RuVector Provider (via @ruvector/ruvllm)
3
+ *
4
+ * Self-learning LLM orchestration with:
5
+ * - SONA adaptive learning
6
+ * - HNSW vector memory
7
+ * - FastGRNN intelligent routing
8
+ * - SIMD inference optimization
9
+ * - Local model execution (free)
10
+ *
11
+ * @module @sparkleideas/providers/ruvector-provider
12
+ */
13
+
14
+ import { BaseProvider, BaseProviderOptions } from './base-provider.js';
15
+ import {
16
+ LLMProvider,
17
+ LLMModel,
18
+ LLMRequest,
19
+ LLMResponse,
20
+ LLMStreamEvent,
21
+ ModelInfo,
22
+ ProviderCapabilities,
23
+ HealthCheckResult,
24
+ ProviderUnavailableError,
25
+ LLMProviderError,
26
+ } from './types.js';
27
+
28
+ /**
29
+ * RuVector LLM configuration
30
+ */
31
+ interface RuVectorConfig {
32
+ /** Enable SONA self-learning (default: true) */
33
+ enableSona?: boolean;
34
+ /** SONA learning rate (default: 0.01) */
35
+ sonaLearningRate?: number;
36
+ /** Enable HNSW vector memory (default: true) */
37
+ enableHnsw?: boolean;
38
+ /** HNSW M parameter for graph construction */
39
+ hnswM?: number;
40
+ /** HNSW ef_construction parameter */
41
+ hnswEfConstruction?: number;
42
+ /** Enable FastGRNN routing (default: true) */
43
+ enableFastGrnn?: boolean;
44
+ /** Inference mode: 'simd' | 'standard' */
45
+ inferenceMode?: 'simd' | 'standard';
46
+ /** Router strategy */
47
+ routerStrategy?: 'cost' | 'quality' | 'balanced' | 'speed';
48
+ }
49
+
50
+ interface RuVectorRequest {
51
+ model: string;
52
+ messages: Array<{
53
+ role: 'system' | 'user' | 'assistant';
54
+ content: string;
55
+ }>;
56
+ max_tokens?: number;
57
+ temperature?: number;
58
+ top_p?: number;
59
+ stream?: boolean;
60
+ sona_options?: {
61
+ enabled: boolean;
62
+ learning_rate: number;
63
+ adapt_on_response: boolean;
64
+ };
65
+ router_options?: {
66
+ strategy: string;
67
+ fallback_models: string[];
68
+ };
69
+ }
70
+
71
+ interface RuVectorResponse {
72
+ id: string;
73
+ model: string;
74
+ content: string;
75
+ usage: {
76
+ prompt_tokens: number;
77
+ completion_tokens: number;
78
+ total_tokens: number;
79
+ };
80
+ sona_metrics?: {
81
+ adaptation_applied: boolean;
82
+ quality_score: number;
83
+ patterns_used: number;
84
+ };
85
+ router_metrics?: {
86
+ model_selected: string;
87
+ routing_reason: string;
88
+ latency_ms: number;
89
+ };
90
+ done: boolean;
91
+ }
92
+
93
+ export class RuVectorProvider extends BaseProvider {
94
+ readonly name: LLMProvider = 'ruvector';
95
+ readonly capabilities: ProviderCapabilities = {
96
+ supportedModels: [
97
+ // RuVector-managed models
98
+ 'ruvector-auto', // Auto-selects best model
99
+ 'ruvector-fast', // Optimized for speed
100
+ 'ruvector-quality', // Optimized for quality
101
+ 'ruvector-balanced', // Balanced speed/quality
102
+ // Local models via ruvLLM or Ollama fallback
103
+ 'llama3.2',
104
+ 'mistral',
105
+ 'phi-4',
106
+ 'deepseek-coder',
107
+ 'codellama',
108
+ 'qwen2.5',
109
+ 'qwen2.5:0.5b', // CPU-friendly Qwen
110
+ 'qwen2.5:1.5b',
111
+ 'smollm:135m', // SmolLM models
112
+ 'smollm:360m',
113
+ 'tinyllama',
114
+ ],
115
+ maxContextLength: {
116
+ 'ruvector-auto': 128000,
117
+ 'ruvector-fast': 32000,
118
+ 'ruvector-quality': 128000,
119
+ 'ruvector-balanced': 64000,
120
+ 'llama3.2': 128000,
121
+ 'mistral': 32000,
122
+ 'phi-4': 16000,
123
+ 'deepseek-coder': 16000,
124
+ 'codellama': 16000,
125
+ 'qwen2.5': 32000,
126
+ },
127
+ maxOutputTokens: {
128
+ 'ruvector-auto': 8192,
129
+ 'ruvector-fast': 4096,
130
+ 'ruvector-quality': 8192,
131
+ 'ruvector-balanced': 8192,
132
+ 'llama3.2': 8192,
133
+ 'mistral': 8192,
134
+ 'phi-4': 4096,
135
+ 'deepseek-coder': 8192,
136
+ 'codellama': 8192,
137
+ 'qwen2.5': 8192,
138
+ },
139
+ supportsStreaming: true,
140
+ supportsToolCalling: true,
141
+ supportsSystemMessages: true,
142
+ supportsVision: false,
143
+ supportsAudio: false,
144
+ supportsFineTuning: true, // SONA self-learning
145
+ supportsEmbeddings: true, // HNSW
146
+ supportsBatching: true,
147
+ rateLimit: {
148
+ requestsPerMinute: 10000, // Local - no rate limit
149
+ tokensPerMinute: 10000000,
150
+ concurrentRequests: 100,
151
+ },
152
+ // Free - local execution with SONA optimization
153
+ pricing: {
154
+ 'ruvector-auto': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
155
+ 'ruvector-fast': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
156
+ 'ruvector-quality': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
157
+ 'ruvector-balanced': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
158
+ 'llama3.2': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
159
+ 'mistral': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
160
+ 'phi-4': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
161
+ 'deepseek-coder': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
162
+ 'codellama': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
163
+ 'qwen2.5': { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
164
+ },
165
+ };
166
+
167
+ private baseUrl: string = 'http://localhost:3000'; // ruvLLM default port
168
+ private ollamaUrl: string = 'http://localhost:11434';
169
+ private ruvectorConfig: RuVectorConfig = {};
170
+ private ruvllm: unknown; // Dynamic import of @ruvector/ruvllm
171
+ private useOllamaFallback: boolean = false;
172
+ private ruvllmAvailable: boolean = false;
173
+
174
+ constructor(options: BaseProviderOptions) {
175
+ super(options);
176
+ this.ruvectorConfig = (options.config.providerOptions as RuVectorConfig) || {};
177
+ }
178
+
179
+ protected async doInitialize(): Promise<void> {
180
+ // Configure URLs from options
181
+ this.baseUrl = this.config.apiUrl || 'http://localhost:3000';
182
+ this.ollamaUrl = (this.config.providerOptions as any)?.ollamaUrl || 'http://localhost:11434';
183
+
184
+ // Try to dynamically import @ruvector/ruvllm native module
185
+ try {
186
+ this.ruvllm = await import('@ruvector/ruvllm').catch(() => null);
187
+ if (this.ruvllm) {
188
+ this.logger.info('RuVector ruvLLM native module loaded');
189
+ this.ruvllmAvailable = true;
190
+ }
191
+ } catch {
192
+ this.logger.debug('RuVector ruvLLM native module not available');
193
+ }
194
+
195
+ // Check if RuVector HTTP server is running
196
+ const health = await this.doHealthCheck();
197
+ if (health.healthy) {
198
+ this.logger.info('RuVector server connected');
199
+ return;
200
+ }
201
+
202
+ // Fallback: Check if Ollama is running for local model execution
203
+ try {
204
+ const ollamaHealth = await fetch(`${this.ollamaUrl}/api/tags`, {
205
+ signal: AbortSignal.timeout(3000),
206
+ });
207
+ if (ollamaHealth.ok) {
208
+ this.useOllamaFallback = true;
209
+ this.logger.info('Using Ollama as fallback for local model execution');
210
+ }
211
+ } catch {
212
+ this.logger.warn('Neither RuVector nor Ollama available. Provider may not work.');
213
+ }
214
+ }
215
+
216
+ protected async doComplete(request: LLMRequest): Promise<LLMResponse> {
217
+ // Use Ollama fallback if RuVector server isn't available
218
+ if (this.useOllamaFallback) {
219
+ return this.completeWithOllama(request);
220
+ }
221
+
222
+ const ruvectorRequest = this.buildRuvectorQuery(request);
223
+
224
+ const controller = new AbortController();
225
+ const timeout = setTimeout(() => controller.abort(), this.config.timeout || 120000);
226
+
227
+ try {
228
+ // Use ruvLLM's /query endpoint (not OpenAI-compatible)
229
+ const response = await fetch(`${this.baseUrl}/query`, {
230
+ method: 'POST',
231
+ headers: {
232
+ 'Content-Type': 'application/json',
233
+ ...(this.config.apiKey && { Authorization: `Bearer ${this.config.apiKey}` }),
234
+ },
235
+ body: JSON.stringify(ruvectorRequest),
236
+ signal: controller.signal,
237
+ });
238
+
239
+ clearTimeout(timeout);
240
+
241
+ if (!response.ok) {
242
+ await this.handleErrorResponse(response);
243
+ }
244
+
245
+ const data = await response.json() as RuVectorResponse;
246
+ return this.transformResponse(data, request);
247
+ } catch (error) {
248
+ clearTimeout(timeout);
249
+
250
+ // Auto-fallback to Ollama on connection error
251
+ if (error instanceof Error && (error.message.includes('ECONNREFUSED') || error.message.includes('fetch failed'))) {
252
+ this.useOllamaFallback = true;
253
+ this.logger.info('RuVector connection failed, falling back to Ollama');
254
+ return this.completeWithOllama(request);
255
+ }
256
+
257
+ throw this.transformError(error);
258
+ }
259
+ }
260
+
261
+ /**
262
+ * Fallback completion using Ollama API
263
+ */
264
+ private async completeWithOllama(request: LLMRequest): Promise<LLMResponse> {
265
+ const model = request.model || this.config.model;
266
+
267
+ const ollamaRequest = {
268
+ model,
269
+ messages: request.messages.map((msg) => ({
270
+ role: msg.role === 'tool' ? 'assistant' : msg.role,
271
+ content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content),
272
+ })),
273
+ stream: false,
274
+ options: {
275
+ temperature: request.temperature ?? this.config.temperature ?? 0.7,
276
+ num_predict: request.maxTokens || this.config.maxTokens || 2048,
277
+ },
278
+ };
279
+
280
+ const controller = new AbortController();
281
+ const timeout = setTimeout(() => controller.abort(), this.config.timeout || 120000);
282
+
283
+ try {
284
+ const response = await fetch(`${this.ollamaUrl}/api/chat`, {
285
+ method: 'POST',
286
+ headers: { 'Content-Type': 'application/json' },
287
+ body: JSON.stringify(ollamaRequest),
288
+ signal: controller.signal,
289
+ });
290
+
291
+ clearTimeout(timeout);
292
+
293
+ if (!response.ok) {
294
+ const errorText = await response.text();
295
+ throw new LLMProviderError(
296
+ `Ollama error: ${errorText}`,
297
+ `OLLAMA_${response.status}`,
298
+ 'ruvector',
299
+ response.status,
300
+ true
301
+ );
302
+ }
303
+
304
+ const data = await response.json() as {
305
+ message?: { content: string };
306
+ prompt_eval_count?: number;
307
+ eval_count?: number;
308
+ };
309
+
310
+ const promptTokens = data.prompt_eval_count || this.estimateTokens(JSON.stringify(request.messages));
311
+ const completionTokens = data.eval_count || this.estimateTokens(data.message?.content || '');
312
+
313
+ return {
314
+ id: `ruvector-ollama-${Date.now()}`,
315
+ model: model as LLMModel,
316
+ provider: 'ruvector',
317
+ content: data.message?.content || '',
318
+ usage: {
319
+ promptTokens,
320
+ completionTokens,
321
+ totalTokens: promptTokens + completionTokens,
322
+ },
323
+ cost: {
324
+ promptCost: 0,
325
+ completionCost: 0,
326
+ totalCost: 0,
327
+ currency: 'USD',
328
+ },
329
+ finishReason: 'stop',
330
+ metadata: {
331
+ backend: 'ollama',
332
+ sona: { enabled: false },
333
+ },
334
+ };
335
+ } catch (error) {
336
+ clearTimeout(timeout);
337
+ throw this.transformError(error);
338
+ }
339
+ }
340
+
341
+ protected async *doStreamComplete(request: LLMRequest): AsyncIterable<LLMStreamEvent> {
342
+ const ruvectorRequest = this.buildRequest(request, true);
343
+
344
+ const controller = new AbortController();
345
+ const timeout = setTimeout(() => controller.abort(), (this.config.timeout || 120000) * 2);
346
+
347
+ try {
348
+ const response = await fetch(`${this.baseUrl}/v1/chat/completions`, {
349
+ method: 'POST',
350
+ headers: {
351
+ 'Content-Type': 'application/json',
352
+ ...(this.config.apiKey && { Authorization: `Bearer ${this.config.apiKey}` }),
353
+ },
354
+ body: JSON.stringify(ruvectorRequest),
355
+ signal: controller.signal,
356
+ });
357
+
358
+ if (!response.ok) {
359
+ await this.handleErrorResponse(response);
360
+ }
361
+
362
+ const reader = response.body!.getReader();
363
+ const decoder = new TextDecoder();
364
+ let buffer = '';
365
+ let promptTokens = 0;
366
+ let completionTokens = 0;
367
+
368
+ while (true) {
369
+ const { done, value } = await reader.read();
370
+ if (done) break;
371
+
372
+ buffer += decoder.decode(value, { stream: true });
373
+ const lines = buffer.split('\n');
374
+ buffer = lines.pop() || '';
375
+
376
+ for (const line of lines) {
377
+ if (line.startsWith('data: ')) {
378
+ const data = line.slice(6);
379
+ if (data === '[DONE]') continue;
380
+
381
+ try {
382
+ const chunk: RuVectorResponse = JSON.parse(data);
383
+
384
+ if (chunk.content) {
385
+ yield {
386
+ type: 'content',
387
+ delta: { content: chunk.content },
388
+ };
389
+ }
390
+
391
+ if (chunk.done && chunk.usage) {
392
+ promptTokens = chunk.usage.prompt_tokens;
393
+ completionTokens = chunk.usage.completion_tokens;
394
+
395
+ yield {
396
+ type: 'done',
397
+ usage: {
398
+ promptTokens,
399
+ completionTokens,
400
+ totalTokens: promptTokens + completionTokens,
401
+ },
402
+ cost: {
403
+ promptCost: 0,
404
+ completionCost: 0,
405
+ totalCost: 0,
406
+ currency: 'USD',
407
+ },
408
+ };
409
+ }
410
+ } catch {
411
+ // Ignore parse errors
412
+ }
413
+ } else if (line.trim() && !line.startsWith(':')) {
414
+ // Direct JSON response (non-SSE)
415
+ try {
416
+ const chunk: RuVectorResponse = JSON.parse(line);
417
+ if (chunk.content) {
418
+ yield {
419
+ type: 'content',
420
+ delta: { content: chunk.content },
421
+ };
422
+ }
423
+ } catch {
424
+ // Ignore
425
+ }
426
+ }
427
+ }
428
+ }
429
+
430
+ // Ensure done event is sent
431
+ if (completionTokens === 0) {
432
+ yield {
433
+ type: 'done',
434
+ usage: {
435
+ promptTokens: this.estimateTokens(JSON.stringify(request.messages)),
436
+ completionTokens: 100,
437
+ totalTokens: this.estimateTokens(JSON.stringify(request.messages)) + 100,
438
+ },
439
+ cost: { promptCost: 0, completionCost: 0, totalCost: 0, currency: 'USD' },
440
+ };
441
+ }
442
+ } catch (error) {
443
+ clearTimeout(timeout);
444
+ throw this.transformError(error);
445
+ } finally {
446
+ clearTimeout(timeout);
447
+ }
448
+ }
449
+
450
+ async listModels(): Promise<LLMModel[]> {
451
+ try {
452
+ const response = await fetch(`${this.baseUrl}/v1/models`);
453
+ if (!response.ok) {
454
+ return this.capabilities.supportedModels;
455
+ }
456
+
457
+ const data = await response.json() as { data?: Array<{ id: string }> };
458
+ return data.data?.map((m) => m.id as LLMModel) || this.capabilities.supportedModels;
459
+ } catch {
460
+ return this.capabilities.supportedModels;
461
+ }
462
+ }
463
+
464
+ async getModelInfo(model: LLMModel): Promise<ModelInfo> {
465
+ const descriptions: Record<string, string> = {
466
+ 'ruvector-auto': 'Auto-selects optimal model with SONA learning',
467
+ 'ruvector-fast': 'Optimized for speed with FastGRNN routing',
468
+ 'ruvector-quality': 'Highest quality with full SONA adaptation',
469
+ 'ruvector-balanced': 'Balanced speed and quality',
470
+ 'llama3.2': 'Meta Llama 3.2 via RuVector',
471
+ 'mistral': 'Mistral 7B via RuVector',
472
+ 'phi-4': 'Microsoft Phi-4 via RuVector',
473
+ 'deepseek-coder': 'DeepSeek Coder via RuVector',
474
+ 'codellama': 'Code Llama via RuVector',
475
+ 'qwen2.5': 'Qwen 2.5 via RuVector',
476
+ };
477
+
478
+ return {
479
+ model,
480
+ name: model,
481
+ description: descriptions[model] || 'RuVector-managed local model',
482
+ contextLength: this.capabilities.maxContextLength[model] || 32000,
483
+ maxOutputTokens: this.capabilities.maxOutputTokens[model] || 4096,
484
+ supportedFeatures: [
485
+ 'chat',
486
+ 'completion',
487
+ 'local',
488
+ 'self-learning',
489
+ 'sona',
490
+ 'hnsw-memory',
491
+ ],
492
+ pricing: { promptCostPer1k: 0, completionCostPer1k: 0, currency: 'USD' },
493
+ };
494
+ }
495
+
496
+ protected async doHealthCheck(): Promise<HealthCheckResult> {
497
+ try {
498
+ const response = await fetch(`${this.baseUrl}/health`);
499
+
500
+ if (response.ok) {
501
+ const data = await response.json() as { sona?: boolean; hnsw?: boolean };
502
+ return {
503
+ healthy: true,
504
+ timestamp: new Date(),
505
+ details: {
506
+ server: 'ruvector',
507
+ sona: data.sona ?? false,
508
+ hnsw: data.hnsw ?? false,
509
+ local: true,
510
+ },
511
+ };
512
+ }
513
+
514
+ return {
515
+ healthy: false,
516
+ error: `HTTP ${response.status}`,
517
+ timestamp: new Date(),
518
+ };
519
+ } catch (error) {
520
+ return {
521
+ healthy: false,
522
+ error: error instanceof Error ? error.message : 'RuVector server not reachable',
523
+ timestamp: new Date(),
524
+ details: {
525
+ hint: 'Start RuVector server: npx @ruvector/ruvllm serve',
526
+ },
527
+ };
528
+ }
529
+ }
530
+
531
+ /**
532
+ * Build ruvLLM native API query format
533
+ * See: https://github.com/ruvnet/ruvector/tree/main/examples/ruvLLM
534
+ */
535
+ private buildRuvectorQuery(request: LLMRequest): { query: string; session_id?: string } {
536
+ // ruvLLM uses simple query format, not OpenAI-compatible
537
+ const lastUserMessage = [...request.messages].reverse().find(m => m.role === 'user');
538
+ const systemPrompt = request.messages.find(m => m.role === 'system');
539
+
540
+ let query = '';
541
+ if (systemPrompt) {
542
+ query += `[System]: ${typeof systemPrompt.content === 'string' ? systemPrompt.content : JSON.stringify(systemPrompt.content)}\n\n`;
543
+ }
544
+ query += typeof lastUserMessage?.content === 'string'
545
+ ? lastUserMessage.content
546
+ : JSON.stringify(lastUserMessage?.content || '');
547
+
548
+ return {
549
+ query,
550
+ session_id: request.requestId,
551
+ };
552
+ }
553
+
554
+ private buildRequest(request: LLMRequest, stream = false): RuVectorRequest {
555
+ const ruvectorRequest: RuVectorRequest = {
556
+ model: request.model || this.config.model,
557
+ messages: request.messages.map((msg) => ({
558
+ role: msg.role === 'tool' ? 'assistant' : msg.role,
559
+ content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content),
560
+ })),
561
+ stream,
562
+ };
563
+
564
+ if (request.temperature !== undefined || this.config.temperature !== undefined) {
565
+ ruvectorRequest.temperature = request.temperature ?? this.config.temperature;
566
+ }
567
+ if (request.maxTokens || this.config.maxTokens) {
568
+ ruvectorRequest.max_tokens = request.maxTokens || this.config.maxTokens;
569
+ }
570
+ if (request.topP !== undefined || this.config.topP !== undefined) {
571
+ ruvectorRequest.top_p = request.topP ?? this.config.topP;
572
+ }
573
+
574
+ // SONA options
575
+ if (this.ruvectorConfig.enableSona !== false) {
576
+ ruvectorRequest.sona_options = {
577
+ enabled: true,
578
+ learning_rate: this.ruvectorConfig.sonaLearningRate || 0.01,
579
+ adapt_on_response: true,
580
+ };
581
+ }
582
+
583
+ // Router options
584
+ if (this.ruvectorConfig.enableFastGrnn !== false) {
585
+ ruvectorRequest.router_options = {
586
+ strategy: this.ruvectorConfig.routerStrategy || 'balanced',
587
+ fallback_models: ['llama3.2', 'mistral', 'phi-4'],
588
+ };
589
+ }
590
+
591
+ return ruvectorRequest;
592
+ }
593
+
594
+ private transformResponse(data: RuVectorResponse, request: LLMRequest): LLMResponse {
595
+ const model = request.model || this.config.model;
596
+
597
+ return {
598
+ id: data.id || `ruvector-${Date.now()}`,
599
+ model: (data.model || model) as LLMModel,
600
+ provider: 'custom',
601
+ content: data.content,
602
+ usage: {
603
+ promptTokens: data.usage?.prompt_tokens || 0,
604
+ completionTokens: data.usage?.completion_tokens || 0,
605
+ totalTokens: data.usage?.total_tokens || 0,
606
+ },
607
+ cost: {
608
+ promptCost: 0,
609
+ completionCost: 0,
610
+ totalCost: 0,
611
+ currency: 'USD',
612
+ },
613
+ finishReason: data.done ? 'stop' : 'length',
614
+ metadata: {
615
+ sona: data.sona_metrics,
616
+ router: data.router_metrics,
617
+ },
618
+ };
619
+ }
620
+
621
+ private async handleErrorResponse(response: Response): Promise<never> {
622
+ const errorText = await response.text();
623
+ let errorData: { error?: string };
624
+
625
+ try {
626
+ errorData = JSON.parse(errorText);
627
+ } catch {
628
+ errorData = { error: errorText };
629
+ }
630
+
631
+ const message = errorData.error || 'Unknown error';
632
+
633
+ if (response.status === 0 || message.includes('connection')) {
634
+ throw new ProviderUnavailableError('custom', {
635
+ message,
636
+ hint: 'Start RuVector server: npx @ruvector/ruvllm serve',
637
+ });
638
+ }
639
+
640
+ throw new LLMProviderError(
641
+ message,
642
+ `RUVECTOR_${response.status}`,
643
+ 'custom',
644
+ response.status,
645
+ true,
646
+ errorData
647
+ );
648
+ }
649
+
650
+ /**
651
+ * Get SONA learning metrics
652
+ */
653
+ async getSonaMetrics(): Promise<{
654
+ enabled: boolean;
655
+ adaptationsApplied: number;
656
+ qualityScore: number;
657
+ patternsLearned: number;
658
+ }> {
659
+ try {
660
+ const response = await fetch(`${this.baseUrl}/v1/sona/metrics`);
661
+ if (response.ok) {
662
+ return await response.json() as {
663
+ enabled: boolean;
664
+ adaptationsApplied: number;
665
+ qualityScore: number;
666
+ patternsLearned: number;
667
+ };
668
+ }
669
+ } catch {
670
+ // Ignore
671
+ }
672
+
673
+ return {
674
+ enabled: false,
675
+ adaptationsApplied: 0,
676
+ qualityScore: 0,
677
+ patternsLearned: 0,
678
+ };
679
+ }
680
+
681
+ /**
682
+ * Trigger SONA learning from a conversation
683
+ */
684
+ async triggerSonaLearning(conversationId: string): Promise<boolean> {
685
+ try {
686
+ const response = await fetch(`${this.baseUrl}/v1/sona/learn`, {
687
+ method: 'POST',
688
+ headers: { 'Content-Type': 'application/json' },
689
+ body: JSON.stringify({ conversation_id: conversationId }),
690
+ });
691
+ return response.ok;
692
+ } catch {
693
+ return false;
694
+ }
695
+ }
696
+
697
+ /**
698
+ * Search HNSW memory for similar patterns
699
+ */
700
+ async searchMemory(query: string, limit = 5): Promise<Array<{
701
+ id: string;
702
+ similarity: number;
703
+ content: string;
704
+ }>> {
705
+ try {
706
+ const response = await fetch(`${this.baseUrl}/v1/hnsw/search`, {
707
+ method: 'POST',
708
+ headers: { 'Content-Type': 'application/json' },
709
+ body: JSON.stringify({ query, limit }),
710
+ });
711
+
712
+ if (response.ok) {
713
+ return await response.json() as Array<{ id: string; similarity: number; content: string }>;
714
+ }
715
+ } catch {
716
+ // Ignore
717
+ }
718
+
719
+ return [];
720
+ }
721
+ }