@sparkleideas/embeddings 3.0.0-alpha.17 → 3.0.0-alpha.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +308 -17
- package/package.json +19 -7
- package/src/chunking.ts +351 -0
- package/src/embedding-service.ts +477 -5
- package/src/hyperbolic.ts +458 -0
- package/src/index.ts +77 -0
- package/src/neural-integration.ts +295 -0
- package/src/normalization.ts +267 -0
- package/src/persistent-cache.ts +410 -0
- package/src/types.ts +61 -2
- package/dist/__tests__/embedding-service.test.d.ts +0 -2
- package/dist/__tests__/embedding-service.test.d.ts.map +0 -1
- package/dist/__tests__/embedding-service.test.js +0 -98
- package/dist/__tests__/embedding-service.test.js.map +0 -1
- package/dist/embedding-service.d.ts +0 -113
- package/dist/embedding-service.d.ts.map +0 -1
- package/dist/embedding-service.js +0 -543
- package/dist/embedding-service.js.map +0 -1
- package/dist/index.d.ts +0 -15
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js +0 -15
- package/dist/index.js.map +0 -1
- package/dist/types.d.ts +0 -178
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js +0 -15
- package/dist/types.js.map +0 -1
|
@@ -0,0 +1,410 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SQLite-backed Persistent Cache for Embeddings (sql.js)
|
|
3
|
+
*
|
|
4
|
+
* Features:
|
|
5
|
+
* - Cross-platform support (pure JavaScript/WASM, no native compilation)
|
|
6
|
+
* - Disk persistence across sessions
|
|
7
|
+
* - LRU eviction with configurable max size
|
|
8
|
+
* - Automatic schema creation
|
|
9
|
+
* - TTL support for cache entries
|
|
10
|
+
* - Lazy initialization (no startup cost if not used)
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
|
|
14
|
+
import { dirname } from 'path';
|
|
15
|
+
|
|
16
|
+
// Use 'any' for sql.js types to avoid complex typing issues
|
|
17
|
+
// sql.js has its own types but they don't always match perfectly
|
|
18
|
+
type SqlJsDatabase = any;
|
|
19
|
+
type SqlJsStatic = any;
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Configuration for persistent cache
|
|
23
|
+
*/
|
|
24
|
+
export interface PersistentCacheConfig {
|
|
25
|
+
/** Path to SQLite database file */
|
|
26
|
+
dbPath: string;
|
|
27
|
+
/** Maximum number of entries (default: 10000) */
|
|
28
|
+
maxSize?: number;
|
|
29
|
+
/** TTL in milliseconds (default: 7 days) */
|
|
30
|
+
ttlMs?: number;
|
|
31
|
+
/** Enable compression for large embeddings */
|
|
32
|
+
compress?: boolean;
|
|
33
|
+
/** Auto-save interval in ms (default: 30000) */
|
|
34
|
+
autoSaveInterval?: number;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Cache statistics
|
|
39
|
+
*/
|
|
40
|
+
export interface PersistentCacheStats {
|
|
41
|
+
size: number;
|
|
42
|
+
maxSize: number;
|
|
43
|
+
hitRate: number;
|
|
44
|
+
hits: number;
|
|
45
|
+
misses: number;
|
|
46
|
+
dbSizeBytes?: number;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* SQLite-backed persistent embedding cache using sql.js (pure JS/WASM)
|
|
51
|
+
*/
|
|
52
|
+
export class PersistentEmbeddingCache {
|
|
53
|
+
private db: SqlJsDatabase | null = null;
|
|
54
|
+
private SQL: SqlJsStatic | null = null;
|
|
55
|
+
private initialized = false;
|
|
56
|
+
private dirty = false;
|
|
57
|
+
private hits = 0;
|
|
58
|
+
private misses = 0;
|
|
59
|
+
private autoSaveTimer: ReturnType<typeof setInterval> | null = null;
|
|
60
|
+
|
|
61
|
+
private readonly dbPath: string;
|
|
62
|
+
private readonly maxSize: number;
|
|
63
|
+
private readonly ttlMs: number;
|
|
64
|
+
private readonly autoSaveInterval: number;
|
|
65
|
+
|
|
66
|
+
constructor(config: PersistentCacheConfig) {
|
|
67
|
+
this.dbPath = config.dbPath;
|
|
68
|
+
this.maxSize = config.maxSize ?? 10000;
|
|
69
|
+
this.ttlMs = config.ttlMs ?? 7 * 24 * 60 * 60 * 1000; // 7 days
|
|
70
|
+
this.autoSaveInterval = config.autoSaveInterval ?? 30000; // 30 seconds
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Lazily initialize database connection
|
|
75
|
+
*/
|
|
76
|
+
private async ensureInitialized(): Promise<void> {
|
|
77
|
+
if (this.initialized) return;
|
|
78
|
+
|
|
79
|
+
try {
|
|
80
|
+
// Dynamically import sql.js
|
|
81
|
+
const initSqlJs = (await import('sql.js')).default;
|
|
82
|
+
|
|
83
|
+
// Initialize sql.js (loads WASM)
|
|
84
|
+
this.SQL = await initSqlJs();
|
|
85
|
+
|
|
86
|
+
// Ensure directory exists
|
|
87
|
+
const dir = dirname(this.dbPath);
|
|
88
|
+
if (!existsSync(dir)) {
|
|
89
|
+
mkdirSync(dir, { recursive: true });
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Load existing database or create new
|
|
93
|
+
if (existsSync(this.dbPath)) {
|
|
94
|
+
const fileBuffer = readFileSync(this.dbPath);
|
|
95
|
+
this.db = new this.SQL.Database(fileBuffer);
|
|
96
|
+
} else {
|
|
97
|
+
this.db = new this.SQL.Database();
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Create schema
|
|
101
|
+
this.db.run(`
|
|
102
|
+
CREATE TABLE IF NOT EXISTS embeddings (
|
|
103
|
+
key TEXT PRIMARY KEY,
|
|
104
|
+
embedding BLOB NOT NULL,
|
|
105
|
+
dimensions INTEGER NOT NULL,
|
|
106
|
+
created_at INTEGER NOT NULL,
|
|
107
|
+
accessed_at INTEGER NOT NULL,
|
|
108
|
+
access_count INTEGER DEFAULT 1
|
|
109
|
+
)
|
|
110
|
+
`);
|
|
111
|
+
this.db.run('CREATE INDEX IF NOT EXISTS idx_accessed_at ON embeddings(accessed_at)');
|
|
112
|
+
this.db.run('CREATE INDEX IF NOT EXISTS idx_created_at ON embeddings(created_at)');
|
|
113
|
+
|
|
114
|
+
// Clean expired entries on startup
|
|
115
|
+
this.cleanExpired();
|
|
116
|
+
|
|
117
|
+
// Save after initialization to persist schema
|
|
118
|
+
this.saveToFile();
|
|
119
|
+
|
|
120
|
+
// Start auto-save timer
|
|
121
|
+
this.startAutoSave();
|
|
122
|
+
|
|
123
|
+
this.initialized = true;
|
|
124
|
+
} catch (error) {
|
|
125
|
+
// If sql.js not available, fall back gracefully
|
|
126
|
+
console.warn('[persistent-cache] sql.js not available, cache disabled:',
|
|
127
|
+
error instanceof Error ? error.message : error);
|
|
128
|
+
this.initialized = true; // Mark as initialized to prevent retry
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Start auto-save timer
|
|
134
|
+
*/
|
|
135
|
+
private startAutoSave(): void {
|
|
136
|
+
if (this.autoSaveTimer) return;
|
|
137
|
+
|
|
138
|
+
this.autoSaveTimer = setInterval(() => {
|
|
139
|
+
if (this.dirty && this.db) {
|
|
140
|
+
this.saveToFile();
|
|
141
|
+
}
|
|
142
|
+
}, this.autoSaveInterval);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Stop auto-save timer
|
|
147
|
+
*/
|
|
148
|
+
private stopAutoSave(): void {
|
|
149
|
+
if (this.autoSaveTimer) {
|
|
150
|
+
clearInterval(this.autoSaveTimer);
|
|
151
|
+
this.autoSaveTimer = null;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Save database to file
|
|
157
|
+
*/
|
|
158
|
+
private saveToFile(): void {
|
|
159
|
+
if (!this.db) return;
|
|
160
|
+
|
|
161
|
+
try {
|
|
162
|
+
const data = this.db.export();
|
|
163
|
+
const buffer = Buffer.from(data);
|
|
164
|
+
writeFileSync(this.dbPath, buffer);
|
|
165
|
+
this.dirty = false;
|
|
166
|
+
} catch (error) {
|
|
167
|
+
console.error('[persistent-cache] Save error:', error);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Generate cache key from text
|
|
173
|
+
*/
|
|
174
|
+
private hashKey(text: string): string {
|
|
175
|
+
// FNV-1a hash for fast, deterministic key generation
|
|
176
|
+
let hash = 0x811c9dc5;
|
|
177
|
+
for (let i = 0; i < text.length; i++) {
|
|
178
|
+
hash ^= text.charCodeAt(i);
|
|
179
|
+
hash = (hash * 0x01000193) >>> 0;
|
|
180
|
+
}
|
|
181
|
+
return `emb_${hash.toString(16)}_${text.length}`;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Serialize Float32Array to Uint8Array for sql.js
|
|
186
|
+
*/
|
|
187
|
+
private serializeEmbedding(embedding: Float32Array): Uint8Array {
|
|
188
|
+
return new Uint8Array(embedding.buffer, embedding.byteOffset, embedding.byteLength);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Deserialize Uint8Array to Float32Array
|
|
193
|
+
*/
|
|
194
|
+
private deserializeEmbedding(data: Uint8Array, dimensions: number): Float32Array {
|
|
195
|
+
const buffer = new ArrayBuffer(data.length);
|
|
196
|
+
const view = new Uint8Array(buffer);
|
|
197
|
+
view.set(data);
|
|
198
|
+
return new Float32Array(buffer);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Get embedding from cache
|
|
203
|
+
*/
|
|
204
|
+
async get(text: string): Promise<Float32Array | null> {
|
|
205
|
+
await this.ensureInitialized();
|
|
206
|
+
if (!this.db) {
|
|
207
|
+
this.misses++;
|
|
208
|
+
return null;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
const key = this.hashKey(text);
|
|
212
|
+
const now = Date.now();
|
|
213
|
+
|
|
214
|
+
try {
|
|
215
|
+
const stmt = this.db.prepare(`
|
|
216
|
+
SELECT embedding, dimensions, created_at
|
|
217
|
+
FROM embeddings
|
|
218
|
+
WHERE key = ?
|
|
219
|
+
`);
|
|
220
|
+
stmt.bind([key]);
|
|
221
|
+
|
|
222
|
+
if (!stmt.step()) {
|
|
223
|
+
stmt.free();
|
|
224
|
+
this.misses++;
|
|
225
|
+
return null;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
const row = stmt.getAsObject() as {
|
|
229
|
+
embedding: Uint8Array;
|
|
230
|
+
dimensions: number;
|
|
231
|
+
created_at: number;
|
|
232
|
+
};
|
|
233
|
+
stmt.free();
|
|
234
|
+
|
|
235
|
+
// Check TTL
|
|
236
|
+
if (now - row.created_at > this.ttlMs) {
|
|
237
|
+
this.db.run('DELETE FROM embeddings WHERE key = ?', [key]);
|
|
238
|
+
this.dirty = true;
|
|
239
|
+
this.misses++;
|
|
240
|
+
return null;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// Update access time and count
|
|
244
|
+
this.db.run(`
|
|
245
|
+
UPDATE embeddings
|
|
246
|
+
SET accessed_at = ?, access_count = access_count + 1
|
|
247
|
+
WHERE key = ?
|
|
248
|
+
`, [now, key]);
|
|
249
|
+
this.dirty = true;
|
|
250
|
+
|
|
251
|
+
this.hits++;
|
|
252
|
+
return this.deserializeEmbedding(row.embedding, row.dimensions);
|
|
253
|
+
} catch (error) {
|
|
254
|
+
console.error('[persistent-cache] Get error:', error);
|
|
255
|
+
this.misses++;
|
|
256
|
+
return null;
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
/**
|
|
261
|
+
* Store embedding in cache
|
|
262
|
+
*/
|
|
263
|
+
async set(text: string, embedding: Float32Array): Promise<void> {
|
|
264
|
+
await this.ensureInitialized();
|
|
265
|
+
if (!this.db) return;
|
|
266
|
+
|
|
267
|
+
const key = this.hashKey(text);
|
|
268
|
+
const now = Date.now();
|
|
269
|
+
const data = this.serializeEmbedding(embedding);
|
|
270
|
+
|
|
271
|
+
try {
|
|
272
|
+
// Upsert entry using INSERT OR REPLACE
|
|
273
|
+
this.db.run(`
|
|
274
|
+
INSERT OR REPLACE INTO embeddings
|
|
275
|
+
(key, embedding, dimensions, created_at, accessed_at, access_count)
|
|
276
|
+
VALUES (?, ?, ?, ?, ?,
|
|
277
|
+
COALESCE((SELECT access_count + 1 FROM embeddings WHERE key = ?), 1)
|
|
278
|
+
)
|
|
279
|
+
`, [key, data, embedding.length, now, now, key]);
|
|
280
|
+
this.dirty = true;
|
|
281
|
+
|
|
282
|
+
// Check size and evict if needed
|
|
283
|
+
await this.evictIfNeeded();
|
|
284
|
+
} catch (error) {
|
|
285
|
+
console.error('[persistent-cache] Set error:', error);
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* Evict oldest entries if cache exceeds max size
|
|
291
|
+
*/
|
|
292
|
+
private async evictIfNeeded(): Promise<void> {
|
|
293
|
+
if (!this.db) return;
|
|
294
|
+
|
|
295
|
+
const result = this.db.exec('SELECT COUNT(*) as count FROM embeddings');
|
|
296
|
+
const count = result[0]?.values[0]?.[0] as number ?? 0;
|
|
297
|
+
|
|
298
|
+
if (count > this.maxSize) {
|
|
299
|
+
const toDelete = count - this.maxSize + Math.floor(this.maxSize * 0.1); // Delete 10% extra
|
|
300
|
+
this.db.run(`
|
|
301
|
+
DELETE FROM embeddings
|
|
302
|
+
WHERE key IN (
|
|
303
|
+
SELECT key FROM embeddings
|
|
304
|
+
ORDER BY accessed_at ASC
|
|
305
|
+
LIMIT ?
|
|
306
|
+
)
|
|
307
|
+
`, [toDelete]);
|
|
308
|
+
this.dirty = true;
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
/**
|
|
313
|
+
* Clean expired entries
|
|
314
|
+
*/
|
|
315
|
+
private cleanExpired(): void {
|
|
316
|
+
if (!this.db) return;
|
|
317
|
+
|
|
318
|
+
const cutoff = Date.now() - this.ttlMs;
|
|
319
|
+
this.db.run('DELETE FROM embeddings WHERE created_at < ?', [cutoff]);
|
|
320
|
+
this.dirty = true;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
/**
|
|
324
|
+
* Get cache statistics
|
|
325
|
+
*/
|
|
326
|
+
async getStats(): Promise<PersistentCacheStats> {
|
|
327
|
+
await this.ensureInitialized();
|
|
328
|
+
|
|
329
|
+
const total = this.hits + this.misses;
|
|
330
|
+
const stats: PersistentCacheStats = {
|
|
331
|
+
size: 0,
|
|
332
|
+
maxSize: this.maxSize,
|
|
333
|
+
hitRate: total > 0 ? this.hits / total : 0,
|
|
334
|
+
hits: this.hits,
|
|
335
|
+
misses: this.misses,
|
|
336
|
+
};
|
|
337
|
+
|
|
338
|
+
if (this.db) {
|
|
339
|
+
const result = this.db.exec('SELECT COUNT(*) as count FROM embeddings');
|
|
340
|
+
stats.size = result[0]?.values[0]?.[0] as number ?? 0;
|
|
341
|
+
|
|
342
|
+
// Get file size if exists
|
|
343
|
+
if (existsSync(this.dbPath)) {
|
|
344
|
+
try {
|
|
345
|
+
const buffer = readFileSync(this.dbPath);
|
|
346
|
+
stats.dbSizeBytes = buffer.length;
|
|
347
|
+
} catch {
|
|
348
|
+
// Ignore
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
return stats;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
/**
|
|
357
|
+
* Clear all cached entries
|
|
358
|
+
*/
|
|
359
|
+
async clear(): Promise<void> {
|
|
360
|
+
await this.ensureInitialized();
|
|
361
|
+
if (!this.db) return;
|
|
362
|
+
|
|
363
|
+
this.db.run('DELETE FROM embeddings');
|
|
364
|
+
this.dirty = true;
|
|
365
|
+
this.hits = 0;
|
|
366
|
+
this.misses = 0;
|
|
367
|
+
this.saveToFile();
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
/**
|
|
371
|
+
* Force save to disk
|
|
372
|
+
*/
|
|
373
|
+
async flush(): Promise<void> {
|
|
374
|
+
await this.ensureInitialized();
|
|
375
|
+
if (this.db && this.dirty) {
|
|
376
|
+
this.saveToFile();
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
/**
|
|
381
|
+
* Close database connection
|
|
382
|
+
*/
|
|
383
|
+
async close(): Promise<void> {
|
|
384
|
+
this.stopAutoSave();
|
|
385
|
+
|
|
386
|
+
if (this.db) {
|
|
387
|
+
// Save before closing
|
|
388
|
+
if (this.dirty) {
|
|
389
|
+
this.saveToFile();
|
|
390
|
+
}
|
|
391
|
+
this.db.close();
|
|
392
|
+
this.db = null;
|
|
393
|
+
this.SQL = null;
|
|
394
|
+
this.initialized = false;
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
/**
|
|
400
|
+
* Check if persistent cache is available (sql.js installed)
|
|
401
|
+
*/
|
|
402
|
+
export async function isPersistentCacheAvailable(): Promise<boolean> {
|
|
403
|
+
try {
|
|
404
|
+
const initSqlJs = (await import('sql.js')).default;
|
|
405
|
+
await initSqlJs();
|
|
406
|
+
return true;
|
|
407
|
+
} catch {
|
|
408
|
+
return false;
|
|
409
|
+
}
|
|
410
|
+
}
|
package/src/types.ts
CHANGED
|
@@ -19,7 +19,26 @@
|
|
|
19
19
|
/**
|
|
20
20
|
* Supported embedding providers
|
|
21
21
|
*/
|
|
22
|
-
export type EmbeddingProvider = 'openai' | 'transformers' | 'mock';
|
|
22
|
+
export type EmbeddingProvider = 'openai' | 'transformers' | 'mock' | 'agentic-flow';
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Normalization type for embeddings
|
|
26
|
+
*/
|
|
27
|
+
export type NormalizationType = 'l2' | 'l1' | 'minmax' | 'zscore' | 'none';
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Persistent cache configuration
|
|
31
|
+
*/
|
|
32
|
+
export interface PersistentCacheConfig {
|
|
33
|
+
/** Enable persistent disk cache (requires better-sqlite3) */
|
|
34
|
+
enabled: boolean;
|
|
35
|
+
/** Path to SQLite database file (default: .cache/embeddings.db) */
|
|
36
|
+
dbPath?: string;
|
|
37
|
+
/** Maximum entries in persistent cache (default: 10000) */
|
|
38
|
+
maxSize?: number;
|
|
39
|
+
/** TTL in milliseconds (default: 7 days) */
|
|
40
|
+
ttlMs?: number;
|
|
41
|
+
}
|
|
23
42
|
|
|
24
43
|
/**
|
|
25
44
|
* Base configuration for all providers
|
|
@@ -36,6 +55,12 @@ export interface EmbeddingBaseConfig {
|
|
|
36
55
|
|
|
37
56
|
/** Enable caching */
|
|
38
57
|
enableCache?: boolean;
|
|
58
|
+
|
|
59
|
+
/** Normalization type (default: 'none' - most providers pre-normalize) */
|
|
60
|
+
normalization?: NormalizationType;
|
|
61
|
+
|
|
62
|
+
/** Persistent disk cache configuration */
|
|
63
|
+
persistentCache?: PersistentCacheConfig;
|
|
39
64
|
}
|
|
40
65
|
|
|
41
66
|
/**
|
|
@@ -92,13 +117,41 @@ export interface MockEmbeddingConfig extends EmbeddingBaseConfig {
|
|
|
92
117
|
simulatedLatency?: number;
|
|
93
118
|
}
|
|
94
119
|
|
|
120
|
+
/**
|
|
121
|
+
* Agentic-flow provider configuration
|
|
122
|
+
* Uses optimized ONNX embeddings with:
|
|
123
|
+
* - Float32Array with flattened matrices
|
|
124
|
+
* - 256-entry LRU cache with FNV-1a hash
|
|
125
|
+
* - SIMD-friendly loop unrolling (4x)
|
|
126
|
+
* - Pre-allocated buffers (no GC pressure)
|
|
127
|
+
*/
|
|
128
|
+
export interface AgenticFlowEmbeddingConfig extends EmbeddingBaseConfig {
|
|
129
|
+
provider: 'agentic-flow';
|
|
130
|
+
|
|
131
|
+
/** Model ID (default: all-MiniLM-L6-v2) */
|
|
132
|
+
modelId?: string;
|
|
133
|
+
|
|
134
|
+
/** Embedding dimensions (default: 384) */
|
|
135
|
+
dimensions?: number;
|
|
136
|
+
|
|
137
|
+
/** Internal cache size for embedder (default: 256) */
|
|
138
|
+
embedderCacheSize?: number;
|
|
139
|
+
|
|
140
|
+
/** Model directory path */
|
|
141
|
+
modelDir?: string;
|
|
142
|
+
|
|
143
|
+
/** Auto-download model if not present */
|
|
144
|
+
autoDownload?: boolean;
|
|
145
|
+
}
|
|
146
|
+
|
|
95
147
|
/**
|
|
96
148
|
* Union of all provider configs
|
|
97
149
|
*/
|
|
98
150
|
export type EmbeddingConfig =
|
|
99
151
|
| OpenAIEmbeddingConfig
|
|
100
152
|
| TransformersEmbeddingConfig
|
|
101
|
-
| MockEmbeddingConfig
|
|
153
|
+
| MockEmbeddingConfig
|
|
154
|
+
| AgenticFlowEmbeddingConfig;
|
|
102
155
|
|
|
103
156
|
// ============================================================================
|
|
104
157
|
// Result Types
|
|
@@ -122,6 +175,12 @@ export interface EmbeddingResult {
|
|
|
122
175
|
|
|
123
176
|
/** Whether result was from cache */
|
|
124
177
|
cached?: boolean;
|
|
178
|
+
|
|
179
|
+
/** Whether result was from persistent cache */
|
|
180
|
+
persistentCached?: boolean;
|
|
181
|
+
|
|
182
|
+
/** Whether embedding was normalized */
|
|
183
|
+
normalized?: boolean;
|
|
125
184
|
}
|
|
126
185
|
|
|
127
186
|
/**
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"embedding-service.test.d.ts","sourceRoot":"","sources":["../../src/__tests__/embedding-service.test.ts"],"names":[],"mappings":""}
|
|
@@ -1,98 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Tests for EmbeddingService
|
|
3
|
-
*/
|
|
4
|
-
import { describe, it, expect, beforeEach } from 'vitest';
|
|
5
|
-
import { createEmbeddingService, MockEmbeddingService, cosineSimilarity, euclideanDistance, dotProduct, computeSimilarity, } from '../index.js';
|
|
6
|
-
describe('EmbeddingService', () => {
|
|
7
|
-
describe('MockEmbeddingService', () => {
|
|
8
|
-
let service;
|
|
9
|
-
beforeEach(() => {
|
|
10
|
-
service = new MockEmbeddingService({ provider: 'mock', dimensions: 128 });
|
|
11
|
-
});
|
|
12
|
-
it('should generate embeddings with correct dimensions', async () => {
|
|
13
|
-
const result = await service.embed('Hello, world!');
|
|
14
|
-
expect(result.embedding).toHaveLength(128);
|
|
15
|
-
});
|
|
16
|
-
it('should generate deterministic embeddings for same text', async () => {
|
|
17
|
-
const result1 = await service.embed('test text');
|
|
18
|
-
const result2 = await service.embed('test text');
|
|
19
|
-
// Mock service should be deterministic
|
|
20
|
-
expect(Array.from(result1.embedding)).toEqual(Array.from(result2.embedding));
|
|
21
|
-
});
|
|
22
|
-
it('should handle batch embeddings', async () => {
|
|
23
|
-
const texts = ['text1', 'text2', 'text3'];
|
|
24
|
-
const results = await service.embedBatch(texts);
|
|
25
|
-
expect(results.embeddings).toHaveLength(3);
|
|
26
|
-
// Each embedding should have correct dimensions
|
|
27
|
-
results.embeddings.forEach((emb) => {
|
|
28
|
-
expect(emb.length).toBe(128);
|
|
29
|
-
});
|
|
30
|
-
});
|
|
31
|
-
});
|
|
32
|
-
describe('createEmbeddingService', () => {
|
|
33
|
-
it('should create mock service', () => {
|
|
34
|
-
const service = createEmbeddingService({
|
|
35
|
-
provider: 'mock',
|
|
36
|
-
dimensions: 64,
|
|
37
|
-
});
|
|
38
|
-
expect(service).toBeInstanceOf(MockEmbeddingService);
|
|
39
|
-
});
|
|
40
|
-
});
|
|
41
|
-
});
|
|
42
|
-
describe('Similarity Functions', () => {
|
|
43
|
-
const vec1 = new Float32Array([1, 0, 0]);
|
|
44
|
-
const vec2 = new Float32Array([1, 0, 0]);
|
|
45
|
-
const vec3 = new Float32Array([0, 1, 0]);
|
|
46
|
-
const vec4 = new Float32Array([-1, 0, 0]);
|
|
47
|
-
describe('cosineSimilarity', () => {
|
|
48
|
-
it('should return 1 for identical vectors', () => {
|
|
49
|
-
expect(cosineSimilarity(vec1, vec2)).toBeCloseTo(1);
|
|
50
|
-
});
|
|
51
|
-
it('should return 0 for orthogonal vectors', () => {
|
|
52
|
-
expect(cosineSimilarity(vec1, vec3)).toBeCloseTo(0);
|
|
53
|
-
});
|
|
54
|
-
it('should return -1 for opposite vectors', () => {
|
|
55
|
-
expect(cosineSimilarity(vec1, vec4)).toBeCloseTo(-1);
|
|
56
|
-
});
|
|
57
|
-
});
|
|
58
|
-
describe('euclideanDistance', () => {
|
|
59
|
-
it('should return 0 for identical vectors', () => {
|
|
60
|
-
expect(euclideanDistance(vec1, vec2)).toBeCloseTo(0);
|
|
61
|
-
});
|
|
62
|
-
it('should return sqrt(2) for unit orthogonal vectors', () => {
|
|
63
|
-
expect(euclideanDistance(vec1, vec3)).toBeCloseTo(Math.sqrt(2));
|
|
64
|
-
});
|
|
65
|
-
it('should return 2 for opposite unit vectors', () => {
|
|
66
|
-
expect(euclideanDistance(vec1, vec4)).toBeCloseTo(2);
|
|
67
|
-
});
|
|
68
|
-
});
|
|
69
|
-
describe('dotProduct', () => {
|
|
70
|
-
it('should return 1 for identical unit vectors', () => {
|
|
71
|
-
expect(dotProduct(vec1, vec2)).toBeCloseTo(1);
|
|
72
|
-
});
|
|
73
|
-
it('should return 0 for orthogonal vectors', () => {
|
|
74
|
-
expect(dotProduct(vec1, vec3)).toBeCloseTo(0);
|
|
75
|
-
});
|
|
76
|
-
it('should return -1 for opposite unit vectors', () => {
|
|
77
|
-
expect(dotProduct(vec1, vec4)).toBeCloseTo(-1);
|
|
78
|
-
});
|
|
79
|
-
});
|
|
80
|
-
describe('computeSimilarity', () => {
|
|
81
|
-
it('should use cosine metric by default', () => {
|
|
82
|
-
const result = computeSimilarity(vec1, vec2);
|
|
83
|
-
expect(result.metric).toBe('cosine');
|
|
84
|
-
expect(result.score).toBeCloseTo(1);
|
|
85
|
-
});
|
|
86
|
-
it('should support euclidean metric', () => {
|
|
87
|
-
const result = computeSimilarity(vec1, vec3, 'euclidean');
|
|
88
|
-
expect(result.metric).toBe('euclidean');
|
|
89
|
-
expect(result.score).toBeCloseTo(Math.sqrt(2));
|
|
90
|
-
});
|
|
91
|
-
it('should support dot product metric', () => {
|
|
92
|
-
const result = computeSimilarity(vec1, vec4, 'dot');
|
|
93
|
-
expect(result.metric).toBe('dot');
|
|
94
|
-
expect(result.score).toBeCloseTo(-1);
|
|
95
|
-
});
|
|
96
|
-
});
|
|
97
|
-
});
|
|
98
|
-
//# sourceMappingURL=embedding-service.test.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"embedding-service.test.js","sourceRoot":"","sources":["../../src/__tests__/embedding-service.test.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAC1D,OAAO,EACL,sBAAsB,EACtB,oBAAoB,EACpB,gBAAgB,EAChB,iBAAiB,EACjB,UAAU,EACV,iBAAiB,GAClB,MAAM,aAAa,CAAC;AAErB,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;IAChC,QAAQ,CAAC,sBAAsB,EAAE,GAAG,EAAE;QACpC,IAAI,OAA6B,CAAC;QAElC,UAAU,CAAC,GAAG,EAAE;YACd,OAAO,GAAG,IAAI,oBAAoB,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC,CAAC;QAC5E,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,oDAAoD,EAAE,KAAK,IAAI,EAAE;YAClE,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;YACpD,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;QAC7C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wDAAwD,EAAE,KAAK,IAAI,EAAE;YACtE,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;YACjD,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;YAEjD,uCAAuC;YACvC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC;QAC/E,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;YAC9C,MAAM,KAAK,GAAG,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;YAC1C,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;YAEhD,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAC3C,gDAAgD;YAChD,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE;gBACjC,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC/B,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,wBAAwB,EAAE,GAAG,EAAE;QACtC,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;YACpC,MAAM,OAAO,GAAG,sBAAsB,CAAC;gBACrC,QAAQ,EAAE,MAAM;gBAChB,UAAU,EAAE,EAAE;aACf,CAAC,CAAC;YAEH,MAAM,CAAC,OAAO,CAAC,CAAC,cAAc,CAAC,oBAAoB,CAAC,CAAC;QACvD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,sBAAsB,EAAE,GAAG,EAAE;IACpC,MAAM,IAAI,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACzC,MAAM,IAAI,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACzC,MAAM,IAAI,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACzC,MAAM,IAAI,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAE1C,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;QAChC,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;YAC/C,MAAM,CAAC,gBAAgB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QACtD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;YAChD,MAAM,CAAC,gBAAgB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QACtD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;YAC/C,MAAM,CAAC,gBAAgB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;QACvD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;QACjC,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;YAC/C,MAAM,CAAC,iBAAiB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QACvD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,mDAAmD,EAAE,GAAG,EAAE;YAC3D,MAAM,CAAC,iBAAiB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QAClE,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;YACnD,MAAM,CAAC,iBAAiB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QACvD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;QAC1B,EAAE,CAAC,4CAA4C,EAAE,GAAG,EAAE;YACpD,MAAM,CAAC,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QAChD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;YAChD,MAAM,CAAC,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QAChD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4CAA4C,EAAE,GAAG,EAAE;YACpD,MAAM,CAAC,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;QACjC,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;YAC7C,MAAM,MAAM,GAAG,iBAAiB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;YAC7C,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACrC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QACtC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;YACzC,MAAM,MAAM,GAAG,iBAAiB,CAAC,IAAI,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC;YAC1D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YACxC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;YAC3C,MAAM,MAAM,GAAG,iBAAiB,CAAC,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC;YACpD,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAClC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;QACvC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|