agentic-flow 2.0.1-alpha.13 → 2.0.1-alpha.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,168 @@
1
+ /**
2
+ * EmbeddingService - Unified embedding interface for agentic-flow
3
+ *
4
+ * Uses ruvector@0.1.61+ for ONNX embeddings with:
5
+ * - SIMD128 acceleration (6x faster)
6
+ * - Parallel worker threads (7 workers)
7
+ * - all-MiniLM-L6-v2 model (384 dimensions)
8
+ *
9
+ * Configure via:
10
+ * - AGENTIC_FLOW_EMBEDDINGS=simple|onnx|auto (default: auto)
11
+ * - AGENTIC_FLOW_EMBEDDING_MODEL=all-MiniLM-L6-v2 (default)
12
+ */
13
+ export type EmbeddingBackend = 'simple' | 'onnx' | 'auto';
14
+ export interface EmbeddingStats {
15
+ backend: EmbeddingBackend;
16
+ effectiveBackend: EmbeddingBackend;
17
+ dimension: number;
18
+ totalEmbeddings: number;
19
+ totalLatencyMs: number;
20
+ avgLatencyMs: number;
21
+ cacheHits: number;
22
+ modelLoaded: boolean;
23
+ modelName?: string;
24
+ simdAvailable?: boolean;
25
+ parallelWorkers?: number;
26
+ }
27
+ export interface SimilarityResult {
28
+ similarity: number;
29
+ timeMs: number;
30
+ }
31
+ export interface SearchResult {
32
+ text: string;
33
+ index: number;
34
+ similarity: number;
35
+ }
36
+ export interface DuplicateGroup {
37
+ indices: number[];
38
+ texts: string[];
39
+ similarity: number;
40
+ }
41
+ export declare class EmbeddingService {
42
+ private static instance;
43
+ private backend;
44
+ private effectiveBackend;
45
+ private dimension;
46
+ private modelName;
47
+ private modelLoaded;
48
+ private loadingPromise;
49
+ private totalEmbeddings;
50
+ private totalLatencyMs;
51
+ private cacheHits;
52
+ private cache;
53
+ private cacheEnabled;
54
+ private corpus;
55
+ private constructor();
56
+ static getInstance(): EmbeddingService;
57
+ /**
58
+ * Resolve the effective backend based on ONNX detection
59
+ */
60
+ private resolveBackend;
61
+ /**
62
+ * Get configured backend (may be 'auto')
63
+ */
64
+ getBackend(): EmbeddingBackend;
65
+ /**
66
+ * Get effective backend after detection
67
+ */
68
+ getEffectiveBackend(): EmbeddingBackend;
69
+ /**
70
+ * Get embedding dimension
71
+ */
72
+ getDimension(): number;
73
+ /**
74
+ * Check if ONNX model is loaded
75
+ */
76
+ isModelLoaded(): boolean;
77
+ /**
78
+ * Generate embedding for text
79
+ * Auto-detects ONNX and uses it if available (default behavior)
80
+ */
81
+ embed(text: string): Promise<Float32Array>;
82
+ /**
83
+ * Generate embeddings for multiple texts (batch processing with parallel workers)
84
+ * Batch processing provides significant speedup with parallel ONNX workers
85
+ */
86
+ embedBatch(texts: string[]): Promise<Float32Array[]>;
87
+ /**
88
+ * Compute similarity between two texts
89
+ */
90
+ similarity(text1: string, text2: string): Promise<number>;
91
+ /**
92
+ * Compute NxN similarity matrix for a list of texts
93
+ * Uses parallel workers for ONNX backend
94
+ */
95
+ similarityMatrix(texts: string[]): Promise<number[][]>;
96
+ /**
97
+ * Build a corpus for semantic search
98
+ */
99
+ buildCorpus(texts: string[]): Promise<void>;
100
+ /**
101
+ * Semantic search against the corpus
102
+ * Returns top-k most similar texts
103
+ */
104
+ semanticSearch(query: string, topK?: number): Promise<SearchResult[]>;
105
+ /**
106
+ * Find near-duplicate texts in a list
107
+ * Groups texts with similarity above threshold
108
+ */
109
+ findDuplicates(texts: string[], threshold?: number): Promise<DuplicateGroup[]>;
110
+ /**
111
+ * K-means clustering of texts
112
+ * Returns cluster assignments and centroids
113
+ */
114
+ clusterTexts(texts: string[], k?: number, maxIterations?: number): Promise<{
115
+ clusters: number[];
116
+ centroids: Float32Array[];
117
+ }>;
118
+ /**
119
+ * Stream embeddings for large batches (memory efficient)
120
+ * Yields embeddings one at a time
121
+ */
122
+ streamEmbed(texts: string[], batchSize?: number): AsyncGenerator<{
123
+ index: number;
124
+ text: string;
125
+ embedding: Float32Array;
126
+ }>;
127
+ /**
128
+ * Simple hash-based embedding (fast, not semantic)
129
+ */
130
+ simpleEmbed(text: string, dim?: number): Float32Array;
131
+ /**
132
+ * Compute cosine similarity between two embeddings
133
+ */
134
+ cosineSimilarity(a: Float32Array, b: Float32Array): number;
135
+ /**
136
+ * Get statistics
137
+ */
138
+ getStats(): EmbeddingStats;
139
+ /**
140
+ * Clear cache
141
+ */
142
+ clearCache(): void;
143
+ /**
144
+ * Clear corpus
145
+ */
146
+ clearCorpus(): void;
147
+ /**
148
+ * Shutdown (cleanup workers)
149
+ */
150
+ shutdown(): Promise<void>;
151
+ /**
152
+ * Reset instance (for testing)
153
+ */
154
+ static reset(): Promise<void>;
155
+ }
156
+ export declare function getEmbeddingService(): EmbeddingService;
157
+ export declare function embed(text: string): Promise<Float32Array>;
158
+ export declare function embedBatch(texts: string[]): Promise<Float32Array[]>;
159
+ export declare function textSimilarity(text1: string, text2: string): Promise<number>;
160
+ export declare function simpleEmbed(text: string, dim?: number): Float32Array;
161
+ export declare function similarityMatrix(texts: string[]): Promise<number[][]>;
162
+ export declare function semanticSearch(query: string, topK?: number): Promise<SearchResult[]>;
163
+ export declare function findDuplicates(texts: string[], threshold?: number): Promise<DuplicateGroup[]>;
164
+ export declare function clusterTexts(texts: string[], k?: number): Promise<{
165
+ clusters: number[];
166
+ centroids: Float32Array[];
167
+ }>;
168
+ //# sourceMappingURL=EmbeddingService.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"EmbeddingService.d.ts","sourceRoot":"","sources":["../../src/intelligence/EmbeddingService.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,MAAM,MAAM,gBAAgB,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,CAAC;AAE1D,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,gBAAgB,CAAC;IAC1B,gBAAgB,EAAE,gBAAgB,CAAC;IACnC,SAAS,EAAE,MAAM,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,OAAO,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;CACpB;AAyFD,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAiC;IAExD,OAAO,CAAC,OAAO,CAAmB;IAClC,OAAO,CAAC,gBAAgB,CAAiC;IACzD,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAS;IAG1B,OAAO,CAAC,WAAW,CAAkB;IACrC,OAAO,CAAC,cAAc,CAA8B;IAGpD,OAAO,CAAC,eAAe,CAAa;IACpC,OAAO,CAAC,cAAc,CAAa;IACnC,OAAO,CAAC,SAAS,CAAa;IAG9B,OAAO,CAAC,KAAK,CAAiB;IAC9B,OAAO,CAAC,YAAY,CAAU;IAG9B,OAAO,CAAC,MAAM,CAAkF;IAEhG,OAAO;IASP,MAAM,CAAC,WAAW,IAAI,gBAAgB;IAOtC;;OAEG;YACW,cAAc;IAsB5B;;OAEG;IACH,UAAU,IAAI,gBAAgB;IAI9B;;OAEG;IACH,mBAAmB,IAAI,gBAAgB;IAIvC;;OAEG;IACH,YAAY,IAAI,MAAM;IAItB;;OAEG;IACH,aAAa,IAAI,OAAO;IAIxB;;;OAGG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAwChD;;;OAGG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAyC1D;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAa/D;;;OAGG;IACG,gBAAgB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAiB5D;;OAEG;IACG,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAKjD;;;OAGG;IACG,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,MAAU,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAsB9E;;;OAGG;IACG,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,GAAE,MAAY,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;IAoCzF;;;OAGG;IACG,YAAY,CAChB,KAAK,EAAE,MAAM,EAAE,EACf,CAAC,GAAE,MAAU,EACb,aAAa,GAAE,MAAY,GAC1B,OAAO,CAAC;QAAE,QAAQ,EAAE,MAAM,EAAE,CAAC;QAAC,SAAS,EAAE,YAAY,EAAE,CAAA;KAAE,CAAC;IAyE7D;;;OAGG;IACI,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,GAAE,MAAW,GAAG,cAAc,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,YAAY,CAAA;KAAE,CAAC;IAerI;;OAEG;IACH,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,GAAE,MAAY,GAAG,YAAY;IAwB1D;;OAEG;IACH,gBAAgB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM;IAiB1D;;OAEG;IACH,QAAQ,IAAI,cAAc;IAmB1B;;OAEG;IACH,UAAU,IAAI,IAAI;IAIlB;;OAEG;IACH,WAAW,IAAI,IAAI;IAInB;;OAEG;IACG,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC;IAM/B;;OAEG;WACU,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAQpC;AAGD,wBAAgB,mBAAmB,IAAI,gBAAgB,CAEtD;AAGD,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,CAE/D;AAED,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAEzE;AAED,wBAAsB,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAElF;AAED,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,GAAE,MAAY,GAAG,YAAY,CAEzE;AAED,wBAAsB,gBAAgB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAE3E;AAED,wBAAsB,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,GAAE,MAAU,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAE7F;AAED,wBAAsB,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,SAAS,GAAE,MAAY,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC,CAExG;AAED,wBAAsB,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC,GAAE,MAAU,GAAG,OAAO,CAAC;IAAE,QAAQ,EAAE,MAAM,EAAE,CAAC;IAAC,SAAS,EAAE,YAAY,EAAE,CAAA;CAAE,CAAC,CAE7H"}
@@ -0,0 +1,526 @@
1
+ /**
2
+ * EmbeddingService - Unified embedding interface for agentic-flow
3
+ *
4
+ * Uses ruvector@0.1.61+ for ONNX embeddings with:
5
+ * - SIMD128 acceleration (6x faster)
6
+ * - Parallel worker threads (7 workers)
7
+ * - all-MiniLM-L6-v2 model (384 dimensions)
8
+ *
9
+ * Configure via:
10
+ * - AGENTIC_FLOW_EMBEDDINGS=simple|onnx|auto (default: auto)
11
+ * - AGENTIC_FLOW_EMBEDDING_MODEL=all-MiniLM-L6-v2 (default)
12
+ */
13
+ // ONNX availability cache
14
+ let onnxAvailable = null;
15
+ let ruvectorModule = null;
16
+ /**
17
+ * Detect ONNX/SIMD support by loading ruvector
18
+ */
19
+ async function detectOnnx() {
20
+ if (onnxAvailable !== null) {
21
+ return onnxAvailable;
22
+ }
23
+ try {
24
+ const mod = await import('ruvector');
25
+ ruvectorModule = mod;
26
+ onnxAvailable = mod.isOnnxAvailable?.() ?? false;
27
+ return onnxAvailable;
28
+ }
29
+ catch (error) {
30
+ // Ruvector loading failed - fall back to simple embeddings
31
+ onnxAvailable = false;
32
+ return false;
33
+ }
34
+ }
35
+ // Simple LRU cache for embeddings
36
+ class EmbeddingCache {
37
+ cache = new Map();
38
+ maxSize;
39
+ constructor(maxSize = 1000) {
40
+ this.maxSize = maxSize;
41
+ }
42
+ get(key) {
43
+ const value = this.cache.get(key);
44
+ if (value) {
45
+ // Move to end (most recently used)
46
+ this.cache.delete(key);
47
+ this.cache.set(key, value);
48
+ }
49
+ return value;
50
+ }
51
+ set(key, value) {
52
+ if (this.cache.size >= this.maxSize) {
53
+ // Delete oldest (first) entry
54
+ const firstKey = this.cache.keys().next().value;
55
+ if (firstKey) {
56
+ this.cache.delete(firstKey);
57
+ }
58
+ }
59
+ this.cache.set(key, value);
60
+ }
61
+ clear() {
62
+ this.cache.clear();
63
+ }
64
+ get size() {
65
+ return this.cache.size;
66
+ }
67
+ }
68
+ export class EmbeddingService {
69
+ static instance = null;
70
+ backend;
71
+ effectiveBackend = null;
72
+ dimension;
73
+ modelName;
74
+ // ONNX state
75
+ modelLoaded = false;
76
+ loadingPromise = null;
77
+ // Stats
78
+ totalEmbeddings = 0;
79
+ totalLatencyMs = 0;
80
+ cacheHits = 0;
81
+ // Cache
82
+ cache;
83
+ cacheEnabled;
84
+ // Corpus for search operations
85
+ corpus = { texts: [], embeddings: [] };
86
+ constructor() {
87
+ // Default to 'auto' which will detect ONNX and use it if available
88
+ this.backend = process.env.AGENTIC_FLOW_EMBEDDINGS || 'auto';
89
+ this.modelName = process.env.AGENTIC_FLOW_EMBEDDING_MODEL || 'all-MiniLM-L6-v2';
90
+ this.dimension = 256; // Will be updated when ONNX loads (384)
91
+ this.cacheEnabled = process.env.AGENTIC_FLOW_EMBEDDING_CACHE !== 'false';
92
+ this.cache = new EmbeddingCache(1000);
93
+ }
94
+ static getInstance() {
95
+ if (!EmbeddingService.instance) {
96
+ EmbeddingService.instance = new EmbeddingService();
97
+ }
98
+ return EmbeddingService.instance;
99
+ }
100
+ /**
101
+ * Resolve the effective backend based on ONNX detection
102
+ */
103
+ async resolveBackend() {
104
+ if (this.effectiveBackend) {
105
+ return this.effectiveBackend;
106
+ }
107
+ if (this.backend === 'auto') {
108
+ const hasOnnx = await detectOnnx();
109
+ this.effectiveBackend = hasOnnx ? 'onnx' : 'simple';
110
+ if (hasOnnx) {
111
+ this.dimension = 384; // all-MiniLM-L6-v2 dimension
112
+ }
113
+ }
114
+ else {
115
+ this.effectiveBackend = this.backend;
116
+ if (this.backend === 'onnx') {
117
+ await detectOnnx(); // Ensure module is loaded
118
+ this.dimension = 384;
119
+ }
120
+ }
121
+ return this.effectiveBackend;
122
+ }
123
+ /**
124
+ * Get configured backend (may be 'auto')
125
+ */
126
+ getBackend() {
127
+ return this.backend;
128
+ }
129
+ /**
130
+ * Get effective backend after detection
131
+ */
132
+ getEffectiveBackend() {
133
+ return this.effectiveBackend || this.backend;
134
+ }
135
+ /**
136
+ * Get embedding dimension
137
+ */
138
+ getDimension() {
139
+ return this.dimension;
140
+ }
141
+ /**
142
+ * Check if ONNX model is loaded
143
+ */
144
+ isModelLoaded() {
145
+ return this.modelLoaded;
146
+ }
147
+ /**
148
+ * Generate embedding for text
149
+ * Auto-detects ONNX and uses it if available (default behavior)
150
+ */
151
+ async embed(text) {
152
+ const startTime = performance.now();
153
+ // Check cache
154
+ if (this.cacheEnabled) {
155
+ const cached = this.cache.get(text);
156
+ if (cached) {
157
+ this.cacheHits++;
158
+ return cached;
159
+ }
160
+ }
161
+ // Resolve backend (handles 'auto' mode)
162
+ const effectiveBackend = await this.resolveBackend();
163
+ let embedding;
164
+ if (effectiveBackend === 'onnx' && ruvectorModule) {
165
+ const result = await ruvectorModule.embed(text);
166
+ if (result?.embedding) {
167
+ embedding = result.embedding;
168
+ this.modelLoaded = true;
169
+ }
170
+ else {
171
+ embedding = this.simpleEmbed(text);
172
+ }
173
+ }
174
+ else {
175
+ embedding = this.simpleEmbed(text);
176
+ }
177
+ // Update stats
178
+ this.totalEmbeddings++;
179
+ this.totalLatencyMs += performance.now() - startTime;
180
+ // Cache result
181
+ if (this.cacheEnabled) {
182
+ this.cache.set(text, embedding);
183
+ }
184
+ return embedding;
185
+ }
186
+ /**
187
+ * Generate embeddings for multiple texts (batch processing with parallel workers)
188
+ * Batch processing provides significant speedup with parallel ONNX workers
189
+ */
190
+ async embedBatch(texts) {
191
+ const startTime = performance.now();
192
+ // Check cache for all texts first
193
+ if (this.cacheEnabled) {
194
+ const cachedResults = texts.map(t => this.cache.get(t) || null);
195
+ const allCached = cachedResults.every(r => r !== null);
196
+ if (allCached) {
197
+ this.cacheHits += texts.length;
198
+ return cachedResults;
199
+ }
200
+ }
201
+ // Resolve backend
202
+ const effectiveBackend = await this.resolveBackend();
203
+ if (effectiveBackend === 'onnx' && ruvectorModule) {
204
+ const result = await ruvectorModule.embedBatch(texts);
205
+ if (result?.embeddings && result.embeddings.length === texts.length) {
206
+ const embeddings = result.embeddings;
207
+ // Cache individual embeddings
208
+ if (this.cacheEnabled) {
209
+ for (let i = 0; i < texts.length; i++) {
210
+ this.cache.set(texts[i], embeddings[i]);
211
+ }
212
+ }
213
+ // Update stats
214
+ this.totalEmbeddings += texts.length;
215
+ this.totalLatencyMs += performance.now() - startTime;
216
+ this.modelLoaded = true;
217
+ return embeddings;
218
+ }
219
+ }
220
+ // Fall back to sequential for simple backend
221
+ return Promise.all(texts.map(t => this.embed(t)));
222
+ }
223
+ /**
224
+ * Compute similarity between two texts
225
+ */
226
+ async similarity(text1, text2) {
227
+ const effectiveBackend = await this.resolveBackend();
228
+ if (effectiveBackend === 'onnx' && ruvectorModule) {
229
+ const result = await ruvectorModule.similarity(text1, text2);
230
+ return result.similarity;
231
+ }
232
+ // Fall back to embedding + cosine
233
+ const [e1, e2] = await Promise.all([this.embed(text1), this.embed(text2)]);
234
+ return this.cosineSimilarity(e1, e2);
235
+ }
236
+ /**
237
+ * Compute NxN similarity matrix for a list of texts
238
+ * Uses parallel workers for ONNX backend
239
+ */
240
+ async similarityMatrix(texts) {
241
+ const embeddings = await this.embedBatch(texts);
242
+ const n = texts.length;
243
+ const matrix = Array(n).fill(null).map(() => Array(n).fill(0));
244
+ for (let i = 0; i < n; i++) {
245
+ matrix[i][i] = 1.0; // Self-similarity
246
+ for (let j = i + 1; j < n; j++) {
247
+ const sim = this.cosineSimilarity(embeddings[i], embeddings[j]);
248
+ matrix[i][j] = sim;
249
+ matrix[j][i] = sim; // Symmetric
250
+ }
251
+ }
252
+ return matrix;
253
+ }
254
+ /**
255
+ * Build a corpus for semantic search
256
+ */
257
+ async buildCorpus(texts) {
258
+ this.corpus.texts = texts;
259
+ this.corpus.embeddings = await this.embedBatch(texts);
260
+ }
261
+ /**
262
+ * Semantic search against the corpus
263
+ * Returns top-k most similar texts
264
+ */
265
+ async semanticSearch(query, topK = 5) {
266
+ if (this.corpus.texts.length === 0) {
267
+ throw new Error('Corpus not built. Call buildCorpus() first.');
268
+ }
269
+ const queryEmbedding = await this.embed(query);
270
+ const results = [];
271
+ for (let i = 0; i < this.corpus.texts.length; i++) {
272
+ const sim = this.cosineSimilarity(queryEmbedding, this.corpus.embeddings[i]);
273
+ results.push({
274
+ text: this.corpus.texts[i],
275
+ index: i,
276
+ similarity: sim,
277
+ });
278
+ }
279
+ // Sort by similarity (descending) and return top-k
280
+ results.sort((a, b) => b.similarity - a.similarity);
281
+ return results.slice(0, topK);
282
+ }
283
+ /**
284
+ * Find near-duplicate texts in a list
285
+ * Groups texts with similarity above threshold
286
+ */
287
+ async findDuplicates(texts, threshold = 0.9) {
288
+ const embeddings = await this.embedBatch(texts);
289
+ const n = texts.length;
290
+ const visited = new Set();
291
+ const groups = [];
292
+ for (let i = 0; i < n; i++) {
293
+ if (visited.has(i))
294
+ continue;
295
+ const group = {
296
+ indices: [i],
297
+ texts: [texts[i]],
298
+ similarity: 1.0,
299
+ };
300
+ for (let j = i + 1; j < n; j++) {
301
+ if (visited.has(j))
302
+ continue;
303
+ const sim = this.cosineSimilarity(embeddings[i], embeddings[j]);
304
+ if (sim >= threshold) {
305
+ group.indices.push(j);
306
+ group.texts.push(texts[j]);
307
+ group.similarity = Math.min(group.similarity, sim);
308
+ visited.add(j);
309
+ }
310
+ }
311
+ if (group.indices.length > 1) {
312
+ visited.add(i);
313
+ groups.push(group);
314
+ }
315
+ }
316
+ return groups;
317
+ }
318
+ /**
319
+ * K-means clustering of texts
320
+ * Returns cluster assignments and centroids
321
+ */
322
+ async clusterTexts(texts, k = 3, maxIterations = 100) {
323
+ const embeddings = await this.embedBatch(texts);
324
+ const n = texts.length;
325
+ const dim = this.dimension;
326
+ // Initialize centroids randomly (copy to new ArrayBuffer for consistent typing)
327
+ const centroidIndices = new Set();
328
+ while (centroidIndices.size < k && centroidIndices.size < n) {
329
+ centroidIndices.add(Math.floor(Math.random() * n));
330
+ }
331
+ let centroids = Array.from(centroidIndices).map(i => {
332
+ const copy = new Float32Array(dim);
333
+ copy.set(embeddings[i]);
334
+ return copy;
335
+ });
336
+ let clusters = new Array(n).fill(0);
337
+ for (let iter = 0; iter < maxIterations; iter++) {
338
+ // Assign points to nearest centroid
339
+ const newClusters = embeddings.map(emb => {
340
+ let bestCluster = 0;
341
+ let bestSim = -Infinity;
342
+ for (let c = 0; c < k; c++) {
343
+ const sim = this.cosineSimilarity(emb, centroids[c]);
344
+ if (sim > bestSim) {
345
+ bestSim = sim;
346
+ bestCluster = c;
347
+ }
348
+ }
349
+ return bestCluster;
350
+ });
351
+ // Check convergence
352
+ const changed = newClusters.some((c, i) => c !== clusters[i]);
353
+ clusters = newClusters;
354
+ if (!changed)
355
+ break;
356
+ // Update centroids
357
+ const newCentroids = [];
358
+ for (let c = 0; c < k; c++) {
359
+ newCentroids.push(new Float32Array(dim));
360
+ }
361
+ const counts = new Array(k).fill(0);
362
+ for (let i = 0; i < n; i++) {
363
+ const c = clusters[i];
364
+ counts[c]++;
365
+ for (let d = 0; d < dim; d++) {
366
+ newCentroids[c][d] += embeddings[i][d];
367
+ }
368
+ }
369
+ // Normalize centroids
370
+ for (let c = 0; c < k; c++) {
371
+ if (counts[c] > 0) {
372
+ let norm = 0;
373
+ for (let d = 0; d < dim; d++) {
374
+ newCentroids[c][d] /= counts[c];
375
+ norm += newCentroids[c][d] * newCentroids[c][d];
376
+ }
377
+ norm = Math.sqrt(norm) || 1;
378
+ for (let d = 0; d < dim; d++) {
379
+ newCentroids[c][d] /= norm;
380
+ }
381
+ }
382
+ }
383
+ centroids = newCentroids;
384
+ }
385
+ return { clusters, centroids };
386
+ }
387
+ /**
388
+ * Stream embeddings for large batches (memory efficient)
389
+ * Yields embeddings one at a time
390
+ */
391
+ async *streamEmbed(texts, batchSize = 32) {
392
+ for (let i = 0; i < texts.length; i += batchSize) {
393
+ const batch = texts.slice(i, i + batchSize);
394
+ const embeddings = await this.embedBatch(batch);
395
+ for (let j = 0; j < batch.length; j++) {
396
+ yield {
397
+ index: i + j,
398
+ text: batch[j],
399
+ embedding: embeddings[j],
400
+ };
401
+ }
402
+ }
403
+ }
404
+ /**
405
+ * Simple hash-based embedding (fast, not semantic)
406
+ */
407
+ simpleEmbed(text, dim = 256) {
408
+ const embedding = new Float32Array(dim);
409
+ // Multi-pass hash for better distribution
410
+ for (let i = 0; i < text.length; i++) {
411
+ const code = text.charCodeAt(i);
412
+ embedding[i % dim] += code / 255;
413
+ embedding[(i * 7) % dim] += (code * 0.3) / 255;
414
+ embedding[(i * 13) % dim] += (code * 0.2) / 255;
415
+ }
416
+ // Normalize
417
+ let norm = 0;
418
+ for (let i = 0; i < dim; i++) {
419
+ norm += embedding[i] * embedding[i];
420
+ }
421
+ norm = Math.sqrt(norm) || 1;
422
+ for (let i = 0; i < dim; i++) {
423
+ embedding[i] /= norm;
424
+ }
425
+ return embedding;
426
+ }
427
+ /**
428
+ * Compute cosine similarity between two embeddings
429
+ */
430
+ cosineSimilarity(a, b) {
431
+ if (ruvectorModule?.cosineSimilarity) {
432
+ return ruvectorModule.cosineSimilarity(a, b);
433
+ }
434
+ // JS fallback
435
+ let dot = 0;
436
+ let normA = 0;
437
+ let normB = 0;
438
+ for (let i = 0; i < a.length; i++) {
439
+ dot += a[i] * b[i];
440
+ normA += a[i] * a[i];
441
+ normB += b[i] * b[i];
442
+ }
443
+ return dot / (Math.sqrt(normA) * Math.sqrt(normB) || 1);
444
+ }
445
+ /**
446
+ * Get statistics
447
+ */
448
+ getStats() {
449
+ const effective = this.effectiveBackend || this.backend;
450
+ const ruvectorStats = ruvectorModule?.getStats?.() || {};
451
+ return {
452
+ backend: this.backend,
453
+ effectiveBackend: effective,
454
+ dimension: this.dimension,
455
+ totalEmbeddings: this.totalEmbeddings,
456
+ totalLatencyMs: this.totalLatencyMs,
457
+ avgLatencyMs: this.totalEmbeddings > 0 ? this.totalLatencyMs / this.totalEmbeddings : 0,
458
+ cacheHits: this.cacheHits,
459
+ modelLoaded: this.modelLoaded,
460
+ modelName: effective === 'onnx' ? this.modelName : undefined,
461
+ simdAvailable: ruvectorStats.simdAvailable ?? onnxAvailable,
462
+ parallelWorkers: ruvectorStats.workerCount ?? undefined,
463
+ };
464
+ }
465
+ /**
466
+ * Clear cache
467
+ */
468
+ clearCache() {
469
+ this.cache.clear();
470
+ }
471
+ /**
472
+ * Clear corpus
473
+ */
474
+ clearCorpus() {
475
+ this.corpus = { texts: [], embeddings: [] };
476
+ }
477
+ /**
478
+ * Shutdown (cleanup workers)
479
+ */
480
+ async shutdown() {
481
+ if (ruvectorModule?.shutdown) {
482
+ await ruvectorModule.shutdown();
483
+ }
484
+ }
485
+ /**
486
+ * Reset instance (for testing)
487
+ */
488
+ static async reset() {
489
+ if (EmbeddingService.instance) {
490
+ await EmbeddingService.instance.shutdown();
491
+ }
492
+ EmbeddingService.instance = null;
493
+ onnxAvailable = null;
494
+ ruvectorModule = null;
495
+ }
496
+ }
497
+ // Export singleton getter
498
+ export function getEmbeddingService() {
499
+ return EmbeddingService.getInstance();
500
+ }
501
+ // Export convenience functions
502
+ export async function embed(text) {
503
+ return getEmbeddingService().embed(text);
504
+ }
505
+ export async function embedBatch(texts) {
506
+ return getEmbeddingService().embedBatch(texts);
507
+ }
508
+ export async function textSimilarity(text1, text2) {
509
+ return getEmbeddingService().similarity(text1, text2);
510
+ }
511
+ export function simpleEmbed(text, dim = 256) {
512
+ return getEmbeddingService().simpleEmbed(text, dim);
513
+ }
514
+ export async function similarityMatrix(texts) {
515
+ return getEmbeddingService().similarityMatrix(texts);
516
+ }
517
+ export async function semanticSearch(query, topK = 5) {
518
+ return getEmbeddingService().semanticSearch(query, topK);
519
+ }
520
+ export async function findDuplicates(texts, threshold = 0.9) {
521
+ return getEmbeddingService().findDuplicates(texts, threshold);
522
+ }
523
+ export async function clusterTexts(texts, k = 3) {
524
+ return getEmbeddingService().clusterTexts(texts, k);
525
+ }
526
+ //# sourceMappingURL=EmbeddingService.js.map