@sparkleideas/embeddings 3.0.0-alpha.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1136 @@
1
+ /**
2
+ * V3 Embedding Service Implementation
3
+ *
4
+ * Production embedding service aligned with @sparkleideas/agentic-flow@alpha:
5
+ * - OpenAI provider (text-embedding-3-small/large)
6
+ * - Transformers.js provider (local ONNX models)
7
+ * - Mock provider (development/testing)
8
+ *
9
+ * Performance Targets:
10
+ * - Single embedding: <100ms (API), <50ms (local)
11
+ * - Batch embedding: <500ms for 10 items
12
+ * - Cache hit: <1ms
13
+ */
14
+
15
+ import { EventEmitter } from 'events';
16
+ import type {
17
+ EmbeddingProvider,
18
+ EmbeddingConfig,
19
+ OpenAIEmbeddingConfig,
20
+ TransformersEmbeddingConfig,
21
+ MockEmbeddingConfig,
22
+ AgenticFlowEmbeddingConfig,
23
+ EmbeddingResult,
24
+ BatchEmbeddingResult,
25
+ IEmbeddingService,
26
+ EmbeddingEvent,
27
+ EmbeddingEventListener,
28
+ SimilarityMetric,
29
+ SimilarityResult,
30
+ NormalizationType,
31
+ PersistentCacheConfig,
32
+ } from './types.js';
33
+ import { normalize } from './normalization.js';
34
+ import { PersistentEmbeddingCache } from './persistent-cache.js';
35
+
36
+ // ============================================================================
37
+ // LRU Cache Implementation
38
+ // ============================================================================
39
+
40
+ class LRUCache<K, V> {
41
+ private cache: Map<K, V> = new Map();
42
+ private hits = 0;
43
+ private misses = 0;
44
+
45
+ constructor(private readonly maxSize: number) {}
46
+
47
+ get(key: K): V | undefined {
48
+ const value = this.cache.get(key);
49
+ if (value !== undefined) {
50
+ // Move to end (most recently used)
51
+ this.cache.delete(key);
52
+ this.cache.set(key, value);
53
+ this.hits++;
54
+ return value;
55
+ }
56
+ this.misses++;
57
+ return undefined;
58
+ }
59
+
60
+ set(key: K, value: V): void {
61
+ if (this.cache.has(key)) {
62
+ this.cache.delete(key);
63
+ } else if (this.cache.size >= this.maxSize) {
64
+ // Remove oldest (first) entry
65
+ const firstKey = this.cache.keys().next().value;
66
+ if (firstKey !== undefined) {
67
+ this.cache.delete(firstKey);
68
+ }
69
+ }
70
+ this.cache.set(key, value);
71
+ }
72
+
73
+ clear(): void {
74
+ this.cache.clear();
75
+ this.hits = 0;
76
+ this.misses = 0;
77
+ }
78
+
79
+ get size(): number {
80
+ return this.cache.size;
81
+ }
82
+
83
+ get hitRate(): number {
84
+ const total = this.hits + this.misses;
85
+ return total > 0 ? this.hits / total : 0;
86
+ }
87
+
88
+ getStats() {
89
+ return {
90
+ size: this.cache.size,
91
+ maxSize: this.maxSize,
92
+ hits: this.hits,
93
+ misses: this.misses,
94
+ hitRate: this.hitRate,
95
+ };
96
+ }
97
+ }
98
+
99
+ // ============================================================================
100
+ // Base Embedding Service
101
+ // ============================================================================
102
+
103
+ abstract class BaseEmbeddingService extends EventEmitter implements IEmbeddingService {
104
+ abstract readonly provider: EmbeddingProvider;
105
+ protected cache: LRUCache<string, Float32Array>;
106
+ protected persistentCache: PersistentEmbeddingCache | null = null;
107
+ protected embeddingListeners: Set<EmbeddingEventListener> = new Set();
108
+ protected normalizationType: NormalizationType;
109
+
110
+ constructor(protected readonly config: EmbeddingConfig) {
111
+ super();
112
+ this.cache = new LRUCache(config.cacheSize ?? 1000);
113
+ this.normalizationType = config.normalization ?? 'none';
114
+
115
+ // Initialize persistent cache if configured
116
+ if (config.persistentCache?.enabled) {
117
+ const pcConfig: PersistentCacheConfig = config.persistentCache;
118
+ this.persistentCache = new PersistentEmbeddingCache({
119
+ dbPath: pcConfig.dbPath ?? '.cache/embeddings.db',
120
+ maxSize: pcConfig.maxSize ?? 10000,
121
+ ttlMs: pcConfig.ttlMs,
122
+ });
123
+ }
124
+ }
125
+
126
+ abstract embed(text: string): Promise<EmbeddingResult>;
127
+ abstract embedBatch(texts: string[]): Promise<BatchEmbeddingResult>;
128
+
129
+ /**
130
+ * Apply normalization to embedding if configured
131
+ */
132
+ protected applyNormalization(embedding: Float32Array): Float32Array {
133
+ if (this.normalizationType === 'none') {
134
+ return embedding;
135
+ }
136
+ return normalize(embedding, { type: this.normalizationType });
137
+ }
138
+
139
+ /**
140
+ * Check persistent cache for embedding
141
+ */
142
+ protected async checkPersistentCache(text: string): Promise<Float32Array | null> {
143
+ if (!this.persistentCache) return null;
144
+ return this.persistentCache.get(text);
145
+ }
146
+
147
+ /**
148
+ * Store embedding in persistent cache
149
+ */
150
+ protected async storePersistentCache(text: string, embedding: Float32Array): Promise<void> {
151
+ if (!this.persistentCache) return;
152
+ await this.persistentCache.set(text, embedding);
153
+ }
154
+
155
+ protected emitEvent(event: EmbeddingEvent): void {
156
+ for (const listener of this.embeddingListeners) {
157
+ try {
158
+ listener(event);
159
+ } catch (error) {
160
+ console.error('Error in embedding event listener:', error);
161
+ }
162
+ }
163
+ this.emit(event.type, event);
164
+ }
165
+
166
+ addEventListener(listener: EmbeddingEventListener): void {
167
+ this.embeddingListeners.add(listener);
168
+ }
169
+
170
+ removeEventListener(listener: EmbeddingEventListener): void {
171
+ this.embeddingListeners.delete(listener);
172
+ }
173
+
174
+ clearCache(): void {
175
+ const size = this.cache.size;
176
+ this.cache.clear();
177
+ this.emitEvent({ type: 'cache_eviction', size });
178
+ }
179
+
180
+ getCacheStats() {
181
+ const stats = this.cache.getStats();
182
+ return {
183
+ size: stats.size,
184
+ maxSize: stats.maxSize,
185
+ hitRate: stats.hitRate,
186
+ };
187
+ }
188
+
189
+ async shutdown(): Promise<void> {
190
+ this.clearCache();
191
+ this.embeddingListeners.clear();
192
+ }
193
+ }
194
+
195
+ // ============================================================================
196
+ // OpenAI Embedding Service
197
+ // ============================================================================
198
+
199
+ export class OpenAIEmbeddingService extends BaseEmbeddingService {
200
+ readonly provider: EmbeddingProvider = 'openai';
201
+ private readonly apiKey: string;
202
+ private readonly model: string;
203
+ private readonly baseURL: string;
204
+ private readonly timeout: number;
205
+ private readonly maxRetries: number;
206
+
207
+ constructor(config: OpenAIEmbeddingConfig) {
208
+ super(config);
209
+ this.apiKey = config.apiKey;
210
+ this.model = config.model ?? 'text-embedding-3-small';
211
+ this.baseURL = config.baseURL ?? 'https://api.openai.com/v1/embeddings';
212
+ this.timeout = config.timeout ?? 30000;
213
+ this.maxRetries = config.maxRetries ?? 3;
214
+ }
215
+
216
+ async embed(text: string): Promise<EmbeddingResult> {
217
+ // Check cache
218
+ const cached = this.cache.get(text);
219
+ if (cached) {
220
+ this.emitEvent({ type: 'cache_hit', text });
221
+ return {
222
+ embedding: cached,
223
+ latencyMs: 0,
224
+ cached: true,
225
+ };
226
+ }
227
+
228
+ this.emitEvent({ type: 'embed_start', text });
229
+ const startTime = performance.now();
230
+
231
+ try {
232
+ const response = await this.callOpenAI([text]);
233
+ const embedding = new Float32Array(response.data[0].embedding);
234
+
235
+ // Cache result
236
+ this.cache.set(text, embedding);
237
+
238
+ const latencyMs = performance.now() - startTime;
239
+ this.emitEvent({ type: 'embed_complete', text, latencyMs });
240
+
241
+ return {
242
+ embedding,
243
+ latencyMs,
244
+ usage: {
245
+ promptTokens: response.usage?.prompt_tokens ?? 0,
246
+ totalTokens: response.usage?.total_tokens ?? 0,
247
+ },
248
+ };
249
+ } catch (error) {
250
+ const message = error instanceof Error ? error.message : 'Unknown error';
251
+ this.emitEvent({ type: 'embed_error', text, error: message });
252
+ throw new Error(`OpenAI embedding failed: ${message}`);
253
+ }
254
+ }
255
+
256
+ async embedBatch(texts: string[]): Promise<BatchEmbeddingResult> {
257
+ this.emitEvent({ type: 'batch_start', count: texts.length });
258
+ const startTime = performance.now();
259
+
260
+ // Check cache for each text
261
+ const cached: Array<{ index: number; embedding: Float32Array }> = [];
262
+ const uncached: Array<{ index: number; text: string }> = [];
263
+
264
+ texts.forEach((text, index) => {
265
+ const cachedEmbedding = this.cache.get(text);
266
+ if (cachedEmbedding) {
267
+ cached.push({ index, embedding: cachedEmbedding });
268
+ this.emitEvent({ type: 'cache_hit', text });
269
+ } else {
270
+ uncached.push({ index, text });
271
+ }
272
+ });
273
+
274
+ // Fetch uncached embeddings
275
+ let apiEmbeddings: Float32Array[] = [];
276
+ let usage = { promptTokens: 0, totalTokens: 0 };
277
+
278
+ if (uncached.length > 0) {
279
+ const response = await this.callOpenAI(uncached.map(u => u.text));
280
+ apiEmbeddings = response.data.map(d => new Float32Array(d.embedding));
281
+
282
+ // Cache results
283
+ uncached.forEach((item, i) => {
284
+ this.cache.set(item.text, apiEmbeddings[i]);
285
+ });
286
+
287
+ usage = {
288
+ promptTokens: response.usage?.prompt_tokens ?? 0,
289
+ totalTokens: response.usage?.total_tokens ?? 0,
290
+ };
291
+ }
292
+
293
+ // Reconstruct result array in original order
294
+ const embeddings: Array<Float32Array> = new Array(texts.length);
295
+ cached.forEach(c => {
296
+ embeddings[c.index] = c.embedding;
297
+ });
298
+ uncached.forEach((u, i) => {
299
+ embeddings[u.index] = apiEmbeddings[i];
300
+ });
301
+
302
+ const totalLatencyMs = performance.now() - startTime;
303
+ this.emitEvent({ type: 'batch_complete', count: texts.length, latencyMs: totalLatencyMs });
304
+
305
+ return {
306
+ embeddings,
307
+ totalLatencyMs,
308
+ avgLatencyMs: totalLatencyMs / texts.length,
309
+ usage,
310
+ cacheStats: {
311
+ hits: cached.length,
312
+ misses: uncached.length,
313
+ },
314
+ };
315
+ }
316
+
317
+ private async callOpenAI(texts: string[]): Promise<{
318
+ data: Array<{ embedding: number[] }>;
319
+ usage?: { prompt_tokens: number; total_tokens: number };
320
+ }> {
321
+ const config = this.config as OpenAIEmbeddingConfig;
322
+
323
+ for (let attempt = 0; attempt < this.maxRetries; attempt++) {
324
+ try {
325
+ const controller = new AbortController();
326
+ const timeoutId = setTimeout(() => controller.abort(), this.timeout);
327
+
328
+ const response = await fetch(this.baseURL, {
329
+ method: 'POST',
330
+ headers: {
331
+ 'Content-Type': 'application/json',
332
+ Authorization: `Bearer ${this.apiKey}`,
333
+ },
334
+ body: JSON.stringify({
335
+ model: this.model,
336
+ input: texts,
337
+ dimensions: config.dimensions,
338
+ }),
339
+ signal: controller.signal,
340
+ });
341
+
342
+ clearTimeout(timeoutId);
343
+
344
+ if (!response.ok) {
345
+ const error = await response.text();
346
+ throw new Error(`OpenAI API error: ${response.status} - ${error}`);
347
+ }
348
+
349
+ return await response.json() as {
350
+ data: Array<{ embedding: number[] }>;
351
+ usage?: { prompt_tokens: number; total_tokens: number };
352
+ };
353
+ } catch (error) {
354
+ if (attempt === this.maxRetries - 1) {
355
+ throw error;
356
+ }
357
+ // Exponential backoff
358
+ await new Promise(resolve => setTimeout(resolve, Math.pow(2, attempt) * 100));
359
+ }
360
+ }
361
+
362
+ throw new Error('Max retries exceeded');
363
+ }
364
+ }
365
+
366
+ // ============================================================================
367
+ // Transformers.js Embedding Service
368
+ // ============================================================================
369
+
370
+ export class TransformersEmbeddingService extends BaseEmbeddingService {
371
+ readonly provider: EmbeddingProvider = 'transformers';
372
+ private pipeline: any = null;
373
+ private readonly modelName: string;
374
+ private initialized = false;
375
+
376
+ constructor(config: TransformersEmbeddingConfig) {
377
+ super(config);
378
+ this.modelName = config.model ?? 'Xenova/all-MiniLM-L6-v2';
379
+ }
380
+
381
+ private async initialize(): Promise<void> {
382
+ if (this.initialized) return;
383
+
384
+ try {
385
+ const { pipeline } = await import('@xenova/transformers');
386
+ this.pipeline = await pipeline('feature-extraction', this.modelName);
387
+ this.initialized = true;
388
+ } catch (error) {
389
+ throw new Error(`Failed to initialize transformers.js: ${error}`);
390
+ }
391
+ }
392
+
393
+ async embed(text: string): Promise<EmbeddingResult> {
394
+ await this.initialize();
395
+
396
+ // Check cache
397
+ const cached = this.cache.get(text);
398
+ if (cached) {
399
+ this.emitEvent({ type: 'cache_hit', text });
400
+ return {
401
+ embedding: cached,
402
+ latencyMs: 0,
403
+ cached: true,
404
+ };
405
+ }
406
+
407
+ this.emitEvent({ type: 'embed_start', text });
408
+ const startTime = performance.now();
409
+
410
+ try {
411
+ const output = await this.pipeline(text, { pooling: 'mean', normalize: true });
412
+ const embedding = new Float32Array(output.data);
413
+
414
+ // Cache result
415
+ this.cache.set(text, embedding);
416
+
417
+ const latencyMs = performance.now() - startTime;
418
+ this.emitEvent({ type: 'embed_complete', text, latencyMs });
419
+
420
+ return {
421
+ embedding,
422
+ latencyMs,
423
+ };
424
+ } catch (error) {
425
+ const message = error instanceof Error ? error.message : 'Unknown error';
426
+ this.emitEvent({ type: 'embed_error', text, error: message });
427
+ throw new Error(`Transformers.js embedding failed: ${message}`);
428
+ }
429
+ }
430
+
431
+ async embedBatch(texts: string[]): Promise<BatchEmbeddingResult> {
432
+ await this.initialize();
433
+
434
+ this.emitEvent({ type: 'batch_start', count: texts.length });
435
+ const startTime = performance.now();
436
+
437
+ const embeddings: Float32Array[] = [];
438
+ let cacheHits = 0;
439
+
440
+ for (const text of texts) {
441
+ const cached = this.cache.get(text);
442
+ if (cached) {
443
+ embeddings.push(cached);
444
+ cacheHits++;
445
+ this.emitEvent({ type: 'cache_hit', text });
446
+ } else {
447
+ const output = await this.pipeline(text, { pooling: 'mean', normalize: true });
448
+ const embedding = new Float32Array(output.data);
449
+ this.cache.set(text, embedding);
450
+ embeddings.push(embedding);
451
+ }
452
+ }
453
+
454
+ const totalLatencyMs = performance.now() - startTime;
455
+ this.emitEvent({ type: 'batch_complete', count: texts.length, latencyMs: totalLatencyMs });
456
+
457
+ return {
458
+ embeddings,
459
+ totalLatencyMs,
460
+ avgLatencyMs: totalLatencyMs / texts.length,
461
+ cacheStats: {
462
+ hits: cacheHits,
463
+ misses: texts.length - cacheHits,
464
+ },
465
+ };
466
+ }
467
+ }
468
+
469
+ // ============================================================================
470
+ // Mock Embedding Service
471
+ // ============================================================================
472
+
473
+ export class MockEmbeddingService extends BaseEmbeddingService {
474
+ readonly provider: EmbeddingProvider = 'mock';
475
+ private readonly dimensions: number;
476
+ private readonly simulatedLatency: number;
477
+
478
+ constructor(config: Partial<MockEmbeddingConfig> = {}) {
479
+ const fullConfig: MockEmbeddingConfig = {
480
+ provider: 'mock',
481
+ dimensions: config.dimensions ?? 384,
482
+ cacheSize: config.cacheSize ?? 1000,
483
+ simulatedLatency: config.simulatedLatency ?? 0,
484
+ enableCache: config.enableCache ?? true,
485
+ };
486
+ super(fullConfig);
487
+ this.dimensions = fullConfig.dimensions!;
488
+ this.simulatedLatency = fullConfig.simulatedLatency!;
489
+ }
490
+
491
+ async embed(text: string): Promise<EmbeddingResult> {
492
+ // Check cache
493
+ const cached = this.cache.get(text);
494
+ if (cached) {
495
+ this.emitEvent({ type: 'cache_hit', text });
496
+ return {
497
+ embedding: cached,
498
+ latencyMs: 0,
499
+ cached: true,
500
+ };
501
+ }
502
+
503
+ this.emitEvent({ type: 'embed_start', text });
504
+ const startTime = performance.now();
505
+
506
+ // Simulate latency
507
+ if (this.simulatedLatency > 0) {
508
+ await new Promise(resolve => setTimeout(resolve, this.simulatedLatency));
509
+ }
510
+
511
+ const embedding = this.hashEmbedding(text);
512
+ this.cache.set(text, embedding);
513
+
514
+ const latencyMs = performance.now() - startTime;
515
+ this.emitEvent({ type: 'embed_complete', text, latencyMs });
516
+
517
+ return {
518
+ embedding,
519
+ latencyMs,
520
+ };
521
+ }
522
+
523
+ async embedBatch(texts: string[]): Promise<BatchEmbeddingResult> {
524
+ this.emitEvent({ type: 'batch_start', count: texts.length });
525
+ const startTime = performance.now();
526
+
527
+ const embeddings: Float32Array[] = [];
528
+ let cacheHits = 0;
529
+
530
+ for (const text of texts) {
531
+ const cached = this.cache.get(text);
532
+ if (cached) {
533
+ embeddings.push(cached);
534
+ cacheHits++;
535
+ } else {
536
+ const embedding = this.hashEmbedding(text);
537
+ this.cache.set(text, embedding);
538
+ embeddings.push(embedding);
539
+ }
540
+ }
541
+
542
+ const totalLatencyMs = performance.now() - startTime;
543
+ this.emitEvent({ type: 'batch_complete', count: texts.length, latencyMs: totalLatencyMs });
544
+
545
+ return {
546
+ embeddings,
547
+ totalLatencyMs,
548
+ avgLatencyMs: totalLatencyMs / texts.length,
549
+ cacheStats: {
550
+ hits: cacheHits,
551
+ misses: texts.length - cacheHits,
552
+ },
553
+ };
554
+ }
555
+
556
+ /**
557
+ * Generate deterministic hash-based embedding
558
+ */
559
+ private hashEmbedding(text: string): Float32Array {
560
+ const embedding = new Float32Array(this.dimensions);
561
+
562
+ // Seed with text hash
563
+ let hash = 0;
564
+ for (let i = 0; i < text.length; i++) {
565
+ hash = (hash << 5) - hash + text.charCodeAt(i);
566
+ hash = hash & hash;
567
+ }
568
+
569
+ // Generate pseudo-random embedding
570
+ for (let i = 0; i < this.dimensions; i++) {
571
+ const seed = hash + i * 2654435761;
572
+ const x = Math.sin(seed) * 10000;
573
+ embedding[i] = x - Math.floor(x);
574
+ }
575
+
576
+ // Normalize to unit vector
577
+ const norm = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
578
+ for (let i = 0; i < this.dimensions; i++) {
579
+ embedding[i] /= norm;
580
+ }
581
+
582
+ return embedding;
583
+ }
584
+ }
585
+
586
+ // ============================================================================
587
+ // Agentic-Flow Embedding Service
588
+ // ============================================================================
589
+
590
+ /**
591
+ * Agentic-Flow embedding service using OptimizedEmbedder
592
+ *
593
+ * Features:
594
+ * - ONNX-based embeddings with SIMD acceleration
595
+ * - 256-entry LRU cache with FNV-1a hash
596
+ * - 8x loop unrolling for cosine similarity
597
+ * - Pre-allocated buffers (no GC pressure)
598
+ * - 3-4x faster batch processing
599
+ */
600
+ export class AgenticFlowEmbeddingService extends BaseEmbeddingService {
601
+ readonly provider: EmbeddingProvider = '@sparkleideas/agentic-flow';
602
+ private embedder: any = null;
603
+ private initialized = false;
604
+ private readonly modelId: string;
605
+ private readonly dimensions: number;
606
+ private readonly embedderCacheSize: number;
607
+ private readonly modelDir: string | undefined;
608
+ private readonly autoDownload: boolean;
609
+
610
+ constructor(config: AgenticFlowEmbeddingConfig) {
611
+ super(config);
612
+ this.modelId = config.modelId ?? 'all-MiniLM-L6-v2';
613
+ this.dimensions = config.dimensions ?? 384;
614
+ this.embedderCacheSize = config.embedderCacheSize ?? 256;
615
+ this.modelDir = config.modelDir;
616
+ this.autoDownload = config.autoDownload ?? false;
617
+ }
618
+
619
+ private async initialize(): Promise<void> {
620
+ if (this.initialized) return;
621
+
622
+ let lastError: Error | undefined;
623
+
624
+ const createEmbedder = async (modulePath: string): Promise<boolean> => {
625
+ try {
626
+ // Use file:// protocol for absolute paths
627
+ const importPath = modulePath.startsWith('/') ? `file://${modulePath}` : modulePath;
628
+ const module = await import(/* webpackIgnore: true */ importPath);
629
+ const getOptimizedEmbedder = module.getOptimizedEmbedder || module.default?.getOptimizedEmbedder;
630
+ if (!getOptimizedEmbedder) {
631
+ lastError = new Error(`Module loaded but getOptimizedEmbedder not found`);
632
+ return false;
633
+ }
634
+
635
+ // Only include defined values to not override defaults
636
+ const embedderConfig: Record<string, unknown> = {
637
+ modelId: this.modelId,
638
+ dimension: this.dimensions,
639
+ cacheSize: this.embedderCacheSize,
640
+ autoDownload: this.autoDownload,
641
+ };
642
+ if (this.modelDir !== undefined) {
643
+ embedderConfig.modelDir = this.modelDir;
644
+ }
645
+ this.embedder = getOptimizedEmbedder(embedderConfig);
646
+ await this.embedder.init();
647
+ this.initialized = true;
648
+ return true;
649
+ } catch (error) {
650
+ lastError = error instanceof Error ? error : new Error(String(error));
651
+ return false;
652
+ }
653
+ };
654
+
655
+ // Build list of possible module paths to try
656
+ const possiblePaths: string[] = [];
657
+
658
+ // Try proper package exports first (preferred)
659
+ possiblePaths.push('@sparkleideas/agentic-flow/embeddings');
660
+
661
+ // Try node_modules resolution from different locations (for file:// imports)
662
+ try {
663
+ const path = await import('path');
664
+ const { existsSync } = await import('fs');
665
+ const cwd = process.cwd();
666
+
667
+ // Prioritize absolute paths that exist (for file:// import fallback)
668
+ const absolutePaths = [
669
+ path.join(cwd, 'node_modules/agentic-flow/dist/embeddings/optimized-embedder.js'),
670
+ path.join(cwd, '../node_modules/agentic-flow/dist/embeddings/optimized-embedder.js'),
671
+ '/workspaces/claude-flow/node_modules/agentic-flow/dist/embeddings/optimized-embedder.js',
672
+ ];
673
+
674
+ for (const p of absolutePaths) {
675
+ if (existsSync(p)) {
676
+ possiblePaths.push(p);
677
+ }
678
+ }
679
+ } catch {
680
+ // fs/path module not available
681
+ }
682
+
683
+ // Try each path
684
+ for (const modulePath of possiblePaths) {
685
+ if (await createEmbedder(modulePath)) {
686
+ return;
687
+ }
688
+ }
689
+
690
+ const errorDetail = lastError?.message ? ` Last error: ${lastError.message}` : '';
691
+ throw new Error(
692
+ `Failed to initialize @sparkleideas/agentic-flow embeddings.${errorDetail} ` +
693
+ `Ensure @sparkleideas/agentic-flow is installed and ONNX model is downloaded: ` +
694
+ `npx @sparkleideas/agentic-flow@alpha embeddings init`
695
+ );
696
+ }
697
+
698
+ async embed(text: string): Promise<EmbeddingResult> {
699
+ await this.initialize();
700
+
701
+ // Check our LRU cache first
702
+ const cached = this.cache.get(text);
703
+ if (cached) {
704
+ this.emitEvent({ type: 'cache_hit', text });
705
+ return {
706
+ embedding: cached,
707
+ latencyMs: 0,
708
+ cached: true,
709
+ };
710
+ }
711
+
712
+ this.emitEvent({ type: 'embed_start', text });
713
+ const startTime = performance.now();
714
+
715
+ try {
716
+ // Use @sparkleideas/agentic-flow's optimized embedder (has its own internal cache)
717
+ const embedding = await this.embedder.embed(text);
718
+
719
+ // Store in our cache as well
720
+ this.cache.set(text, embedding);
721
+
722
+ const latencyMs = performance.now() - startTime;
723
+ this.emitEvent({ type: 'embed_complete', text, latencyMs });
724
+
725
+ return {
726
+ embedding,
727
+ latencyMs,
728
+ };
729
+ } catch (error) {
730
+ const message = error instanceof Error ? error.message : 'Unknown error';
731
+ this.emitEvent({ type: 'embed_error', text, error: message });
732
+ throw new Error(`Agentic-flow embedding failed: ${message}`);
733
+ }
734
+ }
735
+
736
+ async embedBatch(texts: string[]): Promise<BatchEmbeddingResult> {
737
+ await this.initialize();
738
+
739
+ this.emitEvent({ type: 'batch_start', count: texts.length });
740
+ const startTime = performance.now();
741
+
742
+ // Check cache for each text
743
+ const cached: Array<{ index: number; embedding: Float32Array }> = [];
744
+ const uncached: Array<{ index: number; text: string }> = [];
745
+
746
+ texts.forEach((text, index) => {
747
+ const cachedEmbedding = this.cache.get(text);
748
+ if (cachedEmbedding) {
749
+ cached.push({ index, embedding: cachedEmbedding });
750
+ this.emitEvent({ type: 'cache_hit', text });
751
+ } else {
752
+ uncached.push({ index, text });
753
+ }
754
+ });
755
+
756
+ // Use optimized batch embedding for uncached texts
757
+ let batchEmbeddings: Float32Array[] = [];
758
+ if (uncached.length > 0) {
759
+ const uncachedTexts = uncached.map(u => u.text);
760
+ batchEmbeddings = await this.embedder.embedBatch(uncachedTexts);
761
+
762
+ // Cache results
763
+ uncached.forEach((item, i) => {
764
+ this.cache.set(item.text, batchEmbeddings[i]);
765
+ });
766
+ }
767
+
768
+ // Reconstruct result array in original order
769
+ const embeddings: Float32Array[] = new Array(texts.length);
770
+ cached.forEach(c => {
771
+ embeddings[c.index] = c.embedding;
772
+ });
773
+ uncached.forEach((u, i) => {
774
+ embeddings[u.index] = batchEmbeddings[i];
775
+ });
776
+
777
+ const totalLatencyMs = performance.now() - startTime;
778
+ this.emitEvent({ type: 'batch_complete', count: texts.length, latencyMs: totalLatencyMs });
779
+
780
+ return {
781
+ embeddings,
782
+ totalLatencyMs,
783
+ avgLatencyMs: totalLatencyMs / texts.length,
784
+ cacheStats: {
785
+ hits: cached.length,
786
+ misses: uncached.length,
787
+ },
788
+ };
789
+ }
790
+
791
+ /**
792
+ * Get combined cache statistics from both our LRU cache and embedder's internal cache
793
+ */
794
+ override getCacheStats() {
795
+ const baseStats = super.getCacheStats();
796
+
797
+ if (this.embedder && this.embedder.getCacheStats) {
798
+ const embedderStats = this.embedder.getCacheStats();
799
+ return {
800
+ size: baseStats.size + embedderStats.size,
801
+ maxSize: baseStats.maxSize + embedderStats.maxSize,
802
+ hitRate: baseStats.hitRate,
803
+ embedderCache: embedderStats,
804
+ };
805
+ }
806
+
807
+ return baseStats;
808
+ }
809
+
810
+ override async shutdown(): Promise<void> {
811
+ if (this.embedder && this.embedder.clearCache) {
812
+ this.embedder.clearCache();
813
+ }
814
+ await super.shutdown();
815
+ }
816
+ }
817
+
818
+ // ============================================================================
819
+ // Factory Functions
820
+ // ============================================================================
821
+
822
+ /**
823
+ * Check if @sparkleideas/agentic-flow is available
824
+ */
825
+ async function isAgenticFlowAvailable(): Promise<boolean> {
826
+ try {
827
+ await import('@sparkleideas/agentic-flow/embeddings');
828
+ return true;
829
+ } catch {
830
+ return false;
831
+ }
832
+ }
833
+
834
+ /**
835
+ * Auto-install @sparkleideas/agentic-flow and initialize model
836
+ */
837
+ async function autoInstallAgenticFlow(): Promise<boolean> {
838
+ const { exec } = await import('child_process');
839
+ const { promisify } = await import('util');
840
+ const execAsync = promisify(exec);
841
+
842
+ try {
843
+ // Check if already available
844
+ if (await isAgenticFlowAvailable()) {
845
+ return true;
846
+ }
847
+
848
+ console.log('[embeddings] Installing @sparkleideas/agentic-flow@alpha...');
849
+ await execAsync('npm install @sparkleideas/agentic-flow@alpha --save', { timeout: 120000 });
850
+
851
+ // Initialize the model
852
+ console.log('[embeddings] Downloading embedding model...');
853
+ await execAsync('npx @sparkleideas/agentic-flow@alpha embeddings init', { timeout: 300000 });
854
+
855
+ // Verify installation
856
+ return await isAgenticFlowAvailable();
857
+ } catch (error) {
858
+ console.warn('[embeddings] Auto-install failed:', error instanceof Error ? error.message : error);
859
+ return false;
860
+ }
861
+ }
862
+
863
+ /**
864
+ * Create embedding service based on configuration (sync version)
865
+ * Note: For 'auto' provider or smart fallback, use createEmbeddingServiceAsync
866
+ */
867
+ export function createEmbeddingService(config: EmbeddingConfig): IEmbeddingService {
868
+ switch (config.provider) {
869
+ case 'openai':
870
+ return new OpenAIEmbeddingService(config as OpenAIEmbeddingConfig);
871
+ case 'transformers':
872
+ return new TransformersEmbeddingService(config as TransformersEmbeddingConfig);
873
+ case 'mock':
874
+ return new MockEmbeddingService(config as MockEmbeddingConfig);
875
+ case '@sparkleideas/agentic-flow':
876
+ return new AgenticFlowEmbeddingService(config as AgenticFlowEmbeddingConfig);
877
+ default:
878
+ console.warn(`Unknown provider, using mock`);
879
+ return new MockEmbeddingService({ provider: 'mock', dimensions: 384 });
880
+ }
881
+ }
882
+
883
+ /**
884
+ * Extended config with auto provider option
885
+ */
886
+ export interface AutoEmbeddingConfig {
887
+ /** Provider: 'auto' will pick best available (@sparkleideas/agentic-flow > transformers > mock) */
888
+ provider: EmbeddingProvider | 'auto';
889
+ /** Fallback provider if primary fails */
890
+ fallback?: EmbeddingProvider;
891
+ /** Auto-install @sparkleideas/agentic-flow if not available (default: true for 'auto' provider) */
892
+ autoInstall?: boolean;
893
+ /** Model ID for @sparkleideas/agentic-flow */
894
+ modelId?: string;
895
+ /** Model name for transformers */
896
+ model?: string;
897
+ /** Dimensions */
898
+ dimensions?: number;
899
+ /** Cache size */
900
+ cacheSize?: number;
901
+ /** OpenAI API key (required for openai provider) */
902
+ apiKey?: string;
903
+ }
904
+
905
+ /**
906
+ * Create embedding service with automatic provider detection and fallback
907
+ *
908
+ * Features:
909
+ * - 'auto' provider picks best available: @sparkleideas/agentic-flow > transformers > mock
910
+ * - Automatic fallback if primary provider fails to initialize
911
+ * - Pre-validates provider availability before returning
912
+ *
913
+ * @example
914
+ * // Auto-select best provider
915
+ * const service = await createEmbeddingServiceAsync({ provider: 'auto' });
916
+ *
917
+ * // Try @sparkleideas/agentic-flow, fallback to transformers
918
+ * const service = await createEmbeddingServiceAsync({
919
+ * provider: '@sparkleideas/agentic-flow',
920
+ * fallback: 'transformers'
921
+ * });
922
+ */
923
+ export async function createEmbeddingServiceAsync(
924
+ config: AutoEmbeddingConfig
925
+ ): Promise<IEmbeddingService> {
926
+ const { provider, fallback, autoInstall = true, ...rest } = config;
927
+
928
+ // Auto provider selection
929
+ if (provider === 'auto') {
930
+ // Try @sparkleideas/agentic-flow first (fastest, ONNX-based)
931
+ let agenticFlowAvailable = await isAgenticFlowAvailable();
932
+
933
+ // Auto-install if not available and autoInstall is enabled
934
+ if (!agenticFlowAvailable && autoInstall) {
935
+ agenticFlowAvailable = await autoInstallAgenticFlow();
936
+ }
937
+
938
+ if (agenticFlowAvailable) {
939
+ try {
940
+ const service = new AgenticFlowEmbeddingService({
941
+ provider: '@sparkleideas/agentic-flow',
942
+ modelId: rest.modelId ?? 'all-MiniLM-L6-v2',
943
+ dimensions: rest.dimensions ?? 384,
944
+ cacheSize: rest.cacheSize,
945
+ });
946
+ // Validate it can initialize
947
+ await service.embed('test');
948
+ return service;
949
+ } catch {
950
+ // Fall through to next option
951
+ }
952
+ }
953
+
954
+ // Try transformers (good quality, built-in)
955
+ try {
956
+ const service = new TransformersEmbeddingService({
957
+ provider: 'transformers',
958
+ model: rest.model ?? 'Xenova/all-MiniLM-L6-v2',
959
+ cacheSize: rest.cacheSize,
960
+ });
961
+ // Validate it can initialize
962
+ await service.embed('test');
963
+ return service;
964
+ } catch {
965
+ // Fall through to mock
966
+ }
967
+
968
+ // Fallback to mock (always works)
969
+ console.warn('[embeddings] Using mock provider - install @sparkleideas/agentic-flow or @xenova/transformers for real embeddings');
970
+ return new MockEmbeddingService({
971
+ dimensions: rest.dimensions ?? 384,
972
+ cacheSize: rest.cacheSize,
973
+ });
974
+ }
975
+
976
+ // Specific provider with optional fallback
977
+ const createPrimary = (): IEmbeddingService => {
978
+ switch (provider) {
979
+ case '@sparkleideas/agentic-flow':
980
+ return new AgenticFlowEmbeddingService({
981
+ provider: '@sparkleideas/agentic-flow',
982
+ modelId: rest.modelId ?? 'all-MiniLM-L6-v2',
983
+ dimensions: rest.dimensions ?? 384,
984
+ cacheSize: rest.cacheSize,
985
+ });
986
+ case 'transformers':
987
+ return new TransformersEmbeddingService({
988
+ provider: 'transformers',
989
+ model: rest.model ?? 'Xenova/all-MiniLM-L6-v2',
990
+ cacheSize: rest.cacheSize,
991
+ });
992
+ case 'openai':
993
+ if (!rest.apiKey) throw new Error('OpenAI provider requires apiKey');
994
+ return new OpenAIEmbeddingService({
995
+ provider: 'openai',
996
+ apiKey: rest.apiKey,
997
+ dimensions: rest.dimensions,
998
+ cacheSize: rest.cacheSize,
999
+ });
1000
+ case 'mock':
1001
+ return new MockEmbeddingService({
1002
+ dimensions: rest.dimensions ?? 384,
1003
+ cacheSize: rest.cacheSize,
1004
+ });
1005
+ default:
1006
+ throw new Error(`Unknown provider: ${provider}`);
1007
+ }
1008
+ };
1009
+
1010
+ const primary = createPrimary();
1011
+
1012
+ // Try to validate primary provider
1013
+ try {
1014
+ await primary.embed('test');
1015
+ return primary;
1016
+ } catch (error) {
1017
+ if (!fallback) {
1018
+ throw error;
1019
+ }
1020
+
1021
+ // Try fallback
1022
+ console.warn(`[embeddings] Primary provider '${provider}' failed, using fallback '${fallback}'`);
1023
+ const fallbackConfig: AutoEmbeddingConfig = { ...rest, provider: fallback };
1024
+ return createEmbeddingServiceAsync(fallbackConfig);
1025
+ }
1026
+ }
1027
+
1028
+ /**
1029
+ * Convenience function for quick embeddings
1030
+ */
1031
+ export async function getEmbedding(
1032
+ text: string,
1033
+ config?: Partial<EmbeddingConfig>
1034
+ ): Promise<Float32Array | number[]> {
1035
+ const service = createEmbeddingService({
1036
+ provider: 'mock',
1037
+ dimensions: 384,
1038
+ ...config,
1039
+ } as EmbeddingConfig);
1040
+
1041
+ try {
1042
+ const result = await service.embed(text);
1043
+ return result.embedding;
1044
+ } finally {
1045
+ await service.shutdown();
1046
+ }
1047
+ }
1048
+
1049
+ // ============================================================================
1050
+ // Similarity Functions
1051
+ // ============================================================================
1052
+
1053
+ /**
1054
+ * Compute cosine similarity between two embeddings
1055
+ */
1056
+ export function cosineSimilarity(
1057
+ a: Float32Array | number[],
1058
+ b: Float32Array | number[]
1059
+ ): number {
1060
+ if (a.length !== b.length) {
1061
+ throw new Error('Embedding dimensions must match');
1062
+ }
1063
+
1064
+ let dot = 0;
1065
+ let normA = 0;
1066
+ let normB = 0;
1067
+
1068
+ for (let i = 0; i < a.length; i++) {
1069
+ dot += a[i] * b[i];
1070
+ normA += a[i] * a[i];
1071
+ normB += b[i] * b[i];
1072
+ }
1073
+
1074
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
1075
+ return denom > 0 ? dot / denom : 0;
1076
+ }
1077
+
1078
+ /**
1079
+ * Compute Euclidean distance between two embeddings
1080
+ */
1081
+ export function euclideanDistance(
1082
+ a: Float32Array | number[],
1083
+ b: Float32Array | number[]
1084
+ ): number {
1085
+ if (a.length !== b.length) {
1086
+ throw new Error('Embedding dimensions must match');
1087
+ }
1088
+
1089
+ let sum = 0;
1090
+ for (let i = 0; i < a.length; i++) {
1091
+ const diff = a[i] - b[i];
1092
+ sum += diff * diff;
1093
+ }
1094
+
1095
+ return Math.sqrt(sum);
1096
+ }
1097
+
1098
+ /**
1099
+ * Compute dot product between two embeddings
1100
+ */
1101
+ export function dotProduct(
1102
+ a: Float32Array | number[],
1103
+ b: Float32Array | number[]
1104
+ ): number {
1105
+ if (a.length !== b.length) {
1106
+ throw new Error('Embedding dimensions must match');
1107
+ }
1108
+
1109
+ let dot = 0;
1110
+ for (let i = 0; i < a.length; i++) {
1111
+ dot += a[i] * b[i];
1112
+ }
1113
+
1114
+ return dot;
1115
+ }
1116
+
1117
+ /**
1118
+ * Compute similarity using specified metric
1119
+ */
1120
+ export function computeSimilarity(
1121
+ a: Float32Array | number[],
1122
+ b: Float32Array | number[],
1123
+ metric: SimilarityMetric = 'cosine'
1124
+ ): SimilarityResult {
1125
+ switch (metric) {
1126
+ case 'cosine':
1127
+ return { score: cosineSimilarity(a, b), metric };
1128
+ case 'euclidean':
1129
+ // Convert distance to similarity (closer = higher score)
1130
+ return { score: 1 / (1 + euclideanDistance(a, b)), metric };
1131
+ case 'dot':
1132
+ return { score: dotProduct(a, b), metric };
1133
+ default:
1134
+ return { score: cosineSimilarity(a, b), metric: 'cosine' };
1135
+ }
1136
+ }