@sparkleideas/embeddings 3.0.0-alpha.17 → 3.0.0-alpha.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,113 +0,0 @@
1
- /**
2
- * V3 Embedding Service Implementation
3
- *
4
- * Production embedding service aligned with agentic-flow@alpha:
5
- * - OpenAI provider (text-embedding-3-small/large)
6
- * - Transformers.js provider (local ONNX models)
7
- * - Mock provider (development/testing)
8
- *
9
- * Performance Targets:
10
- * - Single embedding: <100ms (API), <50ms (local)
11
- * - Batch embedding: <500ms for 10 items
12
- * - Cache hit: <1ms
13
- */
14
- import { EventEmitter } from 'events';
15
- import type { EmbeddingProvider, EmbeddingConfig, OpenAIEmbeddingConfig, TransformersEmbeddingConfig, MockEmbeddingConfig, EmbeddingResult, BatchEmbeddingResult, IEmbeddingService, EmbeddingEvent, EmbeddingEventListener, SimilarityMetric, SimilarityResult } from './types.js';
16
- declare class LRUCache<K, V> {
17
- private readonly maxSize;
18
- private cache;
19
- private hits;
20
- private misses;
21
- constructor(maxSize: number);
22
- get(key: K): V | undefined;
23
- set(key: K, value: V): void;
24
- clear(): void;
25
- get size(): number;
26
- get hitRate(): number;
27
- getStats(): {
28
- size: number;
29
- maxSize: number;
30
- hits: number;
31
- misses: number;
32
- hitRate: number;
33
- };
34
- }
35
- declare abstract class BaseEmbeddingService extends EventEmitter implements IEmbeddingService {
36
- protected readonly config: EmbeddingConfig;
37
- abstract readonly provider: EmbeddingProvider;
38
- protected cache: LRUCache<string, Float32Array>;
39
- protected embeddingListeners: Set<EmbeddingEventListener>;
40
- constructor(config: EmbeddingConfig);
41
- abstract embed(text: string): Promise<EmbeddingResult>;
42
- abstract embedBatch(texts: string[]): Promise<BatchEmbeddingResult>;
43
- protected emitEvent(event: EmbeddingEvent): void;
44
- addEventListener(listener: EmbeddingEventListener): void;
45
- removeEventListener(listener: EmbeddingEventListener): void;
46
- clearCache(): void;
47
- getCacheStats(): {
48
- size: number;
49
- maxSize: number;
50
- hitRate: number;
51
- };
52
- shutdown(): Promise<void>;
53
- }
54
- export declare class OpenAIEmbeddingService extends BaseEmbeddingService {
55
- readonly provider: EmbeddingProvider;
56
- private readonly apiKey;
57
- private readonly model;
58
- private readonly baseURL;
59
- private readonly timeout;
60
- private readonly maxRetries;
61
- constructor(config: OpenAIEmbeddingConfig);
62
- embed(text: string): Promise<EmbeddingResult>;
63
- embedBatch(texts: string[]): Promise<BatchEmbeddingResult>;
64
- private callOpenAI;
65
- }
66
- export declare class TransformersEmbeddingService extends BaseEmbeddingService {
67
- readonly provider: EmbeddingProvider;
68
- private pipeline;
69
- private readonly modelName;
70
- private initialized;
71
- constructor(config: TransformersEmbeddingConfig);
72
- private initialize;
73
- embed(text: string): Promise<EmbeddingResult>;
74
- embedBatch(texts: string[]): Promise<BatchEmbeddingResult>;
75
- }
76
- export declare class MockEmbeddingService extends BaseEmbeddingService {
77
- readonly provider: EmbeddingProvider;
78
- private readonly dimensions;
79
- private readonly simulatedLatency;
80
- constructor(config: MockEmbeddingConfig);
81
- embed(text: string): Promise<EmbeddingResult>;
82
- embedBatch(texts: string[]): Promise<BatchEmbeddingResult>;
83
- /**
84
- * Generate deterministic hash-based embedding
85
- */
86
- private hashEmbedding;
87
- }
88
- /**
89
- * Create embedding service based on configuration
90
- */
91
- export declare function createEmbeddingService(config: EmbeddingConfig): IEmbeddingService;
92
- /**
93
- * Convenience function for quick embeddings
94
- */
95
- export declare function getEmbedding(text: string, config?: Partial<EmbeddingConfig>): Promise<Float32Array | number[]>;
96
- /**
97
- * Compute cosine similarity between two embeddings
98
- */
99
- export declare function cosineSimilarity(a: Float32Array | number[], b: Float32Array | number[]): number;
100
- /**
101
- * Compute Euclidean distance between two embeddings
102
- */
103
- export declare function euclideanDistance(a: Float32Array | number[], b: Float32Array | number[]): number;
104
- /**
105
- * Compute dot product between two embeddings
106
- */
107
- export declare function dotProduct(a: Float32Array | number[], b: Float32Array | number[]): number;
108
- /**
109
- * Compute similarity using specified metric
110
- */
111
- export declare function computeSimilarity(a: Float32Array | number[], b: Float32Array | number[], metric?: SimilarityMetric): SimilarityResult;
112
- export {};
113
- //# sourceMappingURL=embedding-service.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"embedding-service.d.ts","sourceRoot":"","sources":["../src/embedding-service.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AACtC,OAAO,KAAK,EACV,iBAAiB,EACjB,eAAe,EACf,qBAAqB,EACrB,2BAA2B,EAC3B,mBAAmB,EACnB,eAAe,EACf,oBAAoB,EACpB,iBAAiB,EACjB,cAAc,EACd,sBAAsB,EACtB,gBAAgB,EAChB,gBAAgB,EACjB,MAAM,YAAY,CAAC;AAMpB,cAAM,QAAQ,CAAC,CAAC,EAAE,CAAC;IAKL,OAAO,CAAC,QAAQ,CAAC,OAAO;IAJpC,OAAO,CAAC,KAAK,CAAwB;IACrC,OAAO,CAAC,IAAI,CAAK;IACjB,OAAO,CAAC,MAAM,CAAK;gBAEU,OAAO,EAAE,MAAM;IAE5C,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,GAAG,SAAS;IAa1B,GAAG,CAAC,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,GAAG,IAAI;IAa3B,KAAK,IAAI,IAAI;IAMb,IAAI,IAAI,IAAI,MAAM,CAEjB;IAED,IAAI,OAAO,IAAI,MAAM,CAGpB;IAED,QAAQ;;;;;;;CAST;AAMD,uBAAe,oBAAqB,SAAQ,YAAa,YAAW,iBAAiB;IAKvE,SAAS,CAAC,QAAQ,CAAC,MAAM,EAAE,eAAe;IAJtD,QAAQ,CAAC,QAAQ,CAAC,QAAQ,EAAE,iBAAiB,CAAC;IAC9C,SAAS,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;IAChD,SAAS,CAAC,kBAAkB,EAAE,GAAG,CAAC,sBAAsB,CAAC,CAAa;gBAEvC,MAAM,EAAE,eAAe;IAKtD,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;IACtD,QAAQ,CAAC,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAEnE,SAAS,CAAC,SAAS,CAAC,KAAK,EAAE,cAAc,GAAG,IAAI;IAWhD,gBAAgB,CAAC,QAAQ,EAAE,sBAAsB,GAAG,IAAI;IAIxD,mBAAmB,CAAC,QAAQ,EAAE,sBAAsB,GAAG,IAAI;IAI3D,UAAU,IAAI,IAAI;IAMlB,aAAa;;;;;IASP,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC;CAIhC;AAMD,qBAAa,sBAAuB,SAAQ,oBAAoB;IAC9D,QAAQ,CAAC,QAAQ,EAAE,iBAAiB,CAAY;IAChD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;gBAExB,MAAM,EAAE,qBAAqB;IASnC,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;IAwC7C,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,oBAAoB,CAAC;YA6DlD,UAAU;CA+CzB;AAMD,qBAAa,4BAA6B,SAAQ,oBAAoB;IACpE,QAAQ,CAAC,QAAQ,EAAE,iBAAiB,CAAkB;IACtD,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,WAAW,CAAS;gBAEhB,MAAM,EAAE,2BAA2B;YAKjC,UAAU;IAYlB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;IAsC7C,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,oBAAoB,CAAC;CAoCjE;AAMD,qBAAa,oBAAqB,SAAQ,oBAAoB;IAC5D,QAAQ,CAAC,QAAQ,EAAE,iBAAiB,CAAU;IAC9C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;IACpC,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAS;gBAE9B,MAAM,EAAE,mBAAmB;IAMjC,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;IAgC7C,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,oBAAoB,CAAC;IAiChE;;OAEG;IACH,OAAO,CAAC,aAAa;CAyBtB;AAMD;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,eAAe,GAAG,iBAAiB,CAYjF;AAED;;GAEG;AACH,wBAAsB,YAAY,CAChC,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,OAAO,CAAC,eAAe,CAAC,GAChC,OAAO,CAAC,YAAY,GAAG,MAAM,EAAE,CAAC,CAalC;AAMD;;GAEG;AACH,wBAAgB,gBAAgB,CAC9B,CAAC,EAAE,YAAY,GAAG,MAAM,EAAE,EAC1B,CAAC,EAAE,YAAY,GAAG,MAAM,EAAE,GACzB,MAAM,CAiBR;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAC/B,CAAC,EAAE,YAAY,GAAG,MAAM,EAAE,EAC1B,CAAC,EAAE,YAAY,GAAG,MAAM,EAAE,GACzB,MAAM,CAYR;AAED;;GAEG;AACH,wBAAgB,UAAU,CACxB,CAAC,EAAE,YAAY,GAAG,MAAM,EAAE,EAC1B,CAAC,EAAE,YAAY,GAAG,MAAM,EAAE,GACzB,MAAM,CAWR;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAC/B,CAAC,EAAE,YAAY,GAAG,MAAM,EAAE,EAC1B,CAAC,EAAE,YAAY,GAAG,MAAM,EAAE,EAC1B,MAAM,GAAE,gBAA2B,GAClC,gBAAgB,CAYlB"}
@@ -1,543 +0,0 @@
1
- /**
2
- * V3 Embedding Service Implementation
3
- *
4
- * Production embedding service aligned with agentic-flow@alpha:
5
- * - OpenAI provider (text-embedding-3-small/large)
6
- * - Transformers.js provider (local ONNX models)
7
- * - Mock provider (development/testing)
8
- *
9
- * Performance Targets:
10
- * - Single embedding: <100ms (API), <50ms (local)
11
- * - Batch embedding: <500ms for 10 items
12
- * - Cache hit: <1ms
13
- */
14
- import { EventEmitter } from 'events';
15
- // ============================================================================
16
- // LRU Cache Implementation
17
- // ============================================================================
18
- class LRUCache {
19
- maxSize;
20
- cache = new Map();
21
- hits = 0;
22
- misses = 0;
23
- constructor(maxSize) {
24
- this.maxSize = maxSize;
25
- }
26
- get(key) {
27
- const value = this.cache.get(key);
28
- if (value !== undefined) {
29
- // Move to end (most recently used)
30
- this.cache.delete(key);
31
- this.cache.set(key, value);
32
- this.hits++;
33
- return value;
34
- }
35
- this.misses++;
36
- return undefined;
37
- }
38
- set(key, value) {
39
- if (this.cache.has(key)) {
40
- this.cache.delete(key);
41
- }
42
- else if (this.cache.size >= this.maxSize) {
43
- // Remove oldest (first) entry
44
- const firstKey = this.cache.keys().next().value;
45
- if (firstKey !== undefined) {
46
- this.cache.delete(firstKey);
47
- }
48
- }
49
- this.cache.set(key, value);
50
- }
51
- clear() {
52
- this.cache.clear();
53
- this.hits = 0;
54
- this.misses = 0;
55
- }
56
- get size() {
57
- return this.cache.size;
58
- }
59
- get hitRate() {
60
- const total = this.hits + this.misses;
61
- return total > 0 ? this.hits / total : 0;
62
- }
63
- getStats() {
64
- return {
65
- size: this.cache.size,
66
- maxSize: this.maxSize,
67
- hits: this.hits,
68
- misses: this.misses,
69
- hitRate: this.hitRate,
70
- };
71
- }
72
- }
73
- // ============================================================================
74
- // Base Embedding Service
75
- // ============================================================================
76
- class BaseEmbeddingService extends EventEmitter {
77
- config;
78
- cache;
79
- embeddingListeners = new Set();
80
- constructor(config) {
81
- super();
82
- this.config = config;
83
- this.cache = new LRUCache(config.cacheSize ?? 1000);
84
- }
85
- emitEvent(event) {
86
- for (const listener of this.embeddingListeners) {
87
- try {
88
- listener(event);
89
- }
90
- catch (error) {
91
- console.error('Error in embedding event listener:', error);
92
- }
93
- }
94
- this.emit(event.type, event);
95
- }
96
- addEventListener(listener) {
97
- this.embeddingListeners.add(listener);
98
- }
99
- removeEventListener(listener) {
100
- this.embeddingListeners.delete(listener);
101
- }
102
- clearCache() {
103
- const size = this.cache.size;
104
- this.cache.clear();
105
- this.emitEvent({ type: 'cache_eviction', size });
106
- }
107
- getCacheStats() {
108
- const stats = this.cache.getStats();
109
- return {
110
- size: stats.size,
111
- maxSize: stats.maxSize,
112
- hitRate: stats.hitRate,
113
- };
114
- }
115
- async shutdown() {
116
- this.clearCache();
117
- this.embeddingListeners.clear();
118
- }
119
- }
120
- // ============================================================================
121
- // OpenAI Embedding Service
122
- // ============================================================================
123
- export class OpenAIEmbeddingService extends BaseEmbeddingService {
124
- provider = 'openai';
125
- apiKey;
126
- model;
127
- baseURL;
128
- timeout;
129
- maxRetries;
130
- constructor(config) {
131
- super(config);
132
- this.apiKey = config.apiKey;
133
- this.model = config.model ?? 'text-embedding-3-small';
134
- this.baseURL = config.baseURL ?? 'https://api.openai.com/v1/embeddings';
135
- this.timeout = config.timeout ?? 30000;
136
- this.maxRetries = config.maxRetries ?? 3;
137
- }
138
- async embed(text) {
139
- // Check cache
140
- const cached = this.cache.get(text);
141
- if (cached) {
142
- this.emitEvent({ type: 'cache_hit', text });
143
- return {
144
- embedding: cached,
145
- latencyMs: 0,
146
- cached: true,
147
- };
148
- }
149
- this.emitEvent({ type: 'embed_start', text });
150
- const startTime = performance.now();
151
- try {
152
- const response = await this.callOpenAI([text]);
153
- const embedding = new Float32Array(response.data[0].embedding);
154
- // Cache result
155
- this.cache.set(text, embedding);
156
- const latencyMs = performance.now() - startTime;
157
- this.emitEvent({ type: 'embed_complete', text, latencyMs });
158
- return {
159
- embedding,
160
- latencyMs,
161
- usage: {
162
- promptTokens: response.usage?.prompt_tokens ?? 0,
163
- totalTokens: response.usage?.total_tokens ?? 0,
164
- },
165
- };
166
- }
167
- catch (error) {
168
- const message = error instanceof Error ? error.message : 'Unknown error';
169
- this.emitEvent({ type: 'embed_error', text, error: message });
170
- throw new Error(`OpenAI embedding failed: ${message}`);
171
- }
172
- }
173
- async embedBatch(texts) {
174
- this.emitEvent({ type: 'batch_start', count: texts.length });
175
- const startTime = performance.now();
176
- // Check cache for each text
177
- const cached = [];
178
- const uncached = [];
179
- texts.forEach((text, index) => {
180
- const cachedEmbedding = this.cache.get(text);
181
- if (cachedEmbedding) {
182
- cached.push({ index, embedding: cachedEmbedding });
183
- this.emitEvent({ type: 'cache_hit', text });
184
- }
185
- else {
186
- uncached.push({ index, text });
187
- }
188
- });
189
- // Fetch uncached embeddings
190
- let apiEmbeddings = [];
191
- let usage = { promptTokens: 0, totalTokens: 0 };
192
- if (uncached.length > 0) {
193
- const response = await this.callOpenAI(uncached.map(u => u.text));
194
- apiEmbeddings = response.data.map(d => new Float32Array(d.embedding));
195
- // Cache results
196
- uncached.forEach((item, i) => {
197
- this.cache.set(item.text, apiEmbeddings[i]);
198
- });
199
- usage = {
200
- promptTokens: response.usage?.prompt_tokens ?? 0,
201
- totalTokens: response.usage?.total_tokens ?? 0,
202
- };
203
- }
204
- // Reconstruct result array in original order
205
- const embeddings = new Array(texts.length);
206
- cached.forEach(c => {
207
- embeddings[c.index] = c.embedding;
208
- });
209
- uncached.forEach((u, i) => {
210
- embeddings[u.index] = apiEmbeddings[i];
211
- });
212
- const totalLatencyMs = performance.now() - startTime;
213
- this.emitEvent({ type: 'batch_complete', count: texts.length, latencyMs: totalLatencyMs });
214
- return {
215
- embeddings,
216
- totalLatencyMs,
217
- avgLatencyMs: totalLatencyMs / texts.length,
218
- usage,
219
- cacheStats: {
220
- hits: cached.length,
221
- misses: uncached.length,
222
- },
223
- };
224
- }
225
- async callOpenAI(texts) {
226
- const config = this.config;
227
- for (let attempt = 0; attempt < this.maxRetries; attempt++) {
228
- try {
229
- const controller = new AbortController();
230
- const timeoutId = setTimeout(() => controller.abort(), this.timeout);
231
- const response = await fetch(this.baseURL, {
232
- method: 'POST',
233
- headers: {
234
- 'Content-Type': 'application/json',
235
- Authorization: `Bearer ${this.apiKey}`,
236
- },
237
- body: JSON.stringify({
238
- model: this.model,
239
- input: texts,
240
- dimensions: config.dimensions,
241
- }),
242
- signal: controller.signal,
243
- });
244
- clearTimeout(timeoutId);
245
- if (!response.ok) {
246
- const error = await response.text();
247
- throw new Error(`OpenAI API error: ${response.status} - ${error}`);
248
- }
249
- return await response.json();
250
- }
251
- catch (error) {
252
- if (attempt === this.maxRetries - 1) {
253
- throw error;
254
- }
255
- // Exponential backoff
256
- await new Promise(resolve => setTimeout(resolve, Math.pow(2, attempt) * 100));
257
- }
258
- }
259
- throw new Error('Max retries exceeded');
260
- }
261
- }
262
- // ============================================================================
263
- // Transformers.js Embedding Service
264
- // ============================================================================
265
- export class TransformersEmbeddingService extends BaseEmbeddingService {
266
- provider = 'transformers';
267
- pipeline = null;
268
- modelName;
269
- initialized = false;
270
- constructor(config) {
271
- super(config);
272
- this.modelName = config.model ?? 'Xenova/all-MiniLM-L6-v2';
273
- }
274
- async initialize() {
275
- if (this.initialized)
276
- return;
277
- try {
278
- const { pipeline } = await import('@xenova/transformers');
279
- this.pipeline = await pipeline('feature-extraction', this.modelName);
280
- this.initialized = true;
281
- }
282
- catch (error) {
283
- throw new Error(`Failed to initialize transformers.js: ${error}`);
284
- }
285
- }
286
- async embed(text) {
287
- await this.initialize();
288
- // Check cache
289
- const cached = this.cache.get(text);
290
- if (cached) {
291
- this.emitEvent({ type: 'cache_hit', text });
292
- return {
293
- embedding: cached,
294
- latencyMs: 0,
295
- cached: true,
296
- };
297
- }
298
- this.emitEvent({ type: 'embed_start', text });
299
- const startTime = performance.now();
300
- try {
301
- const output = await this.pipeline(text, { pooling: 'mean', normalize: true });
302
- const embedding = new Float32Array(output.data);
303
- // Cache result
304
- this.cache.set(text, embedding);
305
- const latencyMs = performance.now() - startTime;
306
- this.emitEvent({ type: 'embed_complete', text, latencyMs });
307
- return {
308
- embedding,
309
- latencyMs,
310
- };
311
- }
312
- catch (error) {
313
- const message = error instanceof Error ? error.message : 'Unknown error';
314
- this.emitEvent({ type: 'embed_error', text, error: message });
315
- throw new Error(`Transformers.js embedding failed: ${message}`);
316
- }
317
- }
318
- async embedBatch(texts) {
319
- await this.initialize();
320
- this.emitEvent({ type: 'batch_start', count: texts.length });
321
- const startTime = performance.now();
322
- const embeddings = [];
323
- let cacheHits = 0;
324
- for (const text of texts) {
325
- const cached = this.cache.get(text);
326
- if (cached) {
327
- embeddings.push(cached);
328
- cacheHits++;
329
- this.emitEvent({ type: 'cache_hit', text });
330
- }
331
- else {
332
- const output = await this.pipeline(text, { pooling: 'mean', normalize: true });
333
- const embedding = new Float32Array(output.data);
334
- this.cache.set(text, embedding);
335
- embeddings.push(embedding);
336
- }
337
- }
338
- const totalLatencyMs = performance.now() - startTime;
339
- this.emitEvent({ type: 'batch_complete', count: texts.length, latencyMs: totalLatencyMs });
340
- return {
341
- embeddings,
342
- totalLatencyMs,
343
- avgLatencyMs: totalLatencyMs / texts.length,
344
- cacheStats: {
345
- hits: cacheHits,
346
- misses: texts.length - cacheHits,
347
- },
348
- };
349
- }
350
- }
351
- // ============================================================================
352
- // Mock Embedding Service
353
- // ============================================================================
354
- export class MockEmbeddingService extends BaseEmbeddingService {
355
- provider = 'mock';
356
- dimensions;
357
- simulatedLatency;
358
- constructor(config) {
359
- super(config);
360
- this.dimensions = config.dimensions ?? 384;
361
- this.simulatedLatency = config.simulatedLatency ?? 0;
362
- }
363
- async embed(text) {
364
- // Check cache
365
- const cached = this.cache.get(text);
366
- if (cached) {
367
- this.emitEvent({ type: 'cache_hit', text });
368
- return {
369
- embedding: cached,
370
- latencyMs: 0,
371
- cached: true,
372
- };
373
- }
374
- this.emitEvent({ type: 'embed_start', text });
375
- const startTime = performance.now();
376
- // Simulate latency
377
- if (this.simulatedLatency > 0) {
378
- await new Promise(resolve => setTimeout(resolve, this.simulatedLatency));
379
- }
380
- const embedding = this.hashEmbedding(text);
381
- this.cache.set(text, embedding);
382
- const latencyMs = performance.now() - startTime;
383
- this.emitEvent({ type: 'embed_complete', text, latencyMs });
384
- return {
385
- embedding,
386
- latencyMs,
387
- };
388
- }
389
- async embedBatch(texts) {
390
- this.emitEvent({ type: 'batch_start', count: texts.length });
391
- const startTime = performance.now();
392
- const embeddings = [];
393
- let cacheHits = 0;
394
- for (const text of texts) {
395
- const cached = this.cache.get(text);
396
- if (cached) {
397
- embeddings.push(cached);
398
- cacheHits++;
399
- }
400
- else {
401
- const embedding = this.hashEmbedding(text);
402
- this.cache.set(text, embedding);
403
- embeddings.push(embedding);
404
- }
405
- }
406
- const totalLatencyMs = performance.now() - startTime;
407
- this.emitEvent({ type: 'batch_complete', count: texts.length, latencyMs: totalLatencyMs });
408
- return {
409
- embeddings,
410
- totalLatencyMs,
411
- avgLatencyMs: totalLatencyMs / texts.length,
412
- cacheStats: {
413
- hits: cacheHits,
414
- misses: texts.length - cacheHits,
415
- },
416
- };
417
- }
418
- /**
419
- * Generate deterministic hash-based embedding
420
- */
421
- hashEmbedding(text) {
422
- const embedding = new Float32Array(this.dimensions);
423
- // Seed with text hash
424
- let hash = 0;
425
- for (let i = 0; i < text.length; i++) {
426
- hash = (hash << 5) - hash + text.charCodeAt(i);
427
- hash = hash & hash;
428
- }
429
- // Generate pseudo-random embedding
430
- for (let i = 0; i < this.dimensions; i++) {
431
- const seed = hash + i * 2654435761;
432
- const x = Math.sin(seed) * 10000;
433
- embedding[i] = x - Math.floor(x);
434
- }
435
- // Normalize to unit vector
436
- const norm = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
437
- for (let i = 0; i < this.dimensions; i++) {
438
- embedding[i] /= norm;
439
- }
440
- return embedding;
441
- }
442
- }
443
- // ============================================================================
444
- // Factory Functions
445
- // ============================================================================
446
- /**
447
- * Create embedding service based on configuration
448
- */
449
- export function createEmbeddingService(config) {
450
- switch (config.provider) {
451
- case 'openai':
452
- return new OpenAIEmbeddingService(config);
453
- case 'transformers':
454
- return new TransformersEmbeddingService(config);
455
- case 'mock':
456
- return new MockEmbeddingService(config);
457
- default:
458
- console.warn(`Unknown provider, using mock`);
459
- return new MockEmbeddingService({ provider: 'mock', dimensions: 384 });
460
- }
461
- }
462
- /**
463
- * Convenience function for quick embeddings
464
- */
465
- export async function getEmbedding(text, config) {
466
- const service = createEmbeddingService({
467
- provider: 'mock',
468
- dimensions: 384,
469
- ...config,
470
- });
471
- try {
472
- const result = await service.embed(text);
473
- return result.embedding;
474
- }
475
- finally {
476
- await service.shutdown();
477
- }
478
- }
479
- // ============================================================================
480
- // Similarity Functions
481
- // ============================================================================
482
- /**
483
- * Compute cosine similarity between two embeddings
484
- */
485
- export function cosineSimilarity(a, b) {
486
- if (a.length !== b.length) {
487
- throw new Error('Embedding dimensions must match');
488
- }
489
- let dot = 0;
490
- let normA = 0;
491
- let normB = 0;
492
- for (let i = 0; i < a.length; i++) {
493
- dot += a[i] * b[i];
494
- normA += a[i] * a[i];
495
- normB += b[i] * b[i];
496
- }
497
- const denom = Math.sqrt(normA) * Math.sqrt(normB);
498
- return denom > 0 ? dot / denom : 0;
499
- }
500
- /**
501
- * Compute Euclidean distance between two embeddings
502
- */
503
- export function euclideanDistance(a, b) {
504
- if (a.length !== b.length) {
505
- throw new Error('Embedding dimensions must match');
506
- }
507
- let sum = 0;
508
- for (let i = 0; i < a.length; i++) {
509
- const diff = a[i] - b[i];
510
- sum += diff * diff;
511
- }
512
- return Math.sqrt(sum);
513
- }
514
- /**
515
- * Compute dot product between two embeddings
516
- */
517
- export function dotProduct(a, b) {
518
- if (a.length !== b.length) {
519
- throw new Error('Embedding dimensions must match');
520
- }
521
- let dot = 0;
522
- for (let i = 0; i < a.length; i++) {
523
- dot += a[i] * b[i];
524
- }
525
- return dot;
526
- }
527
- /**
528
- * Compute similarity using specified metric
529
- */
530
- export function computeSimilarity(a, b, metric = 'cosine') {
531
- switch (metric) {
532
- case 'cosine':
533
- return { score: cosineSimilarity(a, b), metric };
534
- case 'euclidean':
535
- // Convert distance to similarity (closer = higher score)
536
- return { score: 1 / (1 + euclideanDistance(a, b)), metric };
537
- case 'dot':
538
- return { score: dotProduct(a, b), metric };
539
- default:
540
- return { score: cosineSimilarity(a, b), metric: 'cosine' };
541
- }
542
- }
543
- //# sourceMappingURL=embedding-service.js.map