@sparkleideas/embeddings 3.0.0-alpha.12-patch.14 → 3.0.0-alpha.12-patch.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/embedding-service.d.ts.map +1 -1
- package/dist/embedding-service.js +21 -1
- package/dist/embedding-service.js.map +1 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -0
- package/dist/index.js.map +1 -1
- package/dist/rvf-embedding-cache.d.ts +118 -0
- package/dist/rvf-embedding-cache.d.ts.map +1 -0
- package/dist/rvf-embedding-cache.js +458 -0
- package/dist/rvf-embedding-cache.js.map +1 -0
- package/dist/rvf-embedding-service.d.ts +79 -0
- package/dist/rvf-embedding-service.d.ts.map +1 -0
- package/dist/rvf-embedding-service.js +318 -0
- package/dist/rvf-embedding-service.js.map +1 -0
- package/dist/types.d.ts +15 -2
- package/dist/types.d.ts.map +1 -1
- package/package.json +2 -2
- package/src/embedding-service.ts +22 -1
- package/src/index.ts +10 -0
- package/src/rvf-embedding-cache.ts +578 -0
- package/src/rvf-embedding-service.ts +377 -0
- package/src/types.ts +20 -2
|
@@ -0,0 +1,578 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RVF Embedding Cache - Pure TypeScript Binary File Cache
|
|
3
|
+
*
|
|
4
|
+
* Replaces the sql.js-based PersistentEmbeddingCache with a lightweight
|
|
5
|
+
* pure-TS binary file format. No native dependencies required.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Map-based in-memory cache with periodic flush to binary file
|
|
9
|
+
* - LRU eviction tracked via access timestamps
|
|
10
|
+
* - TTL support for cache entries
|
|
11
|
+
* - Deterministic FNV-1a text hashing for keys
|
|
12
|
+
* - Binary format: RVEC magic + entry records
|
|
13
|
+
*
|
|
14
|
+
* Binary entry format:
|
|
15
|
+
* [4-byte key-hash][4-byte dims][dims*4 bytes float32][8-byte timestamp][8-byte access-count]
|
|
16
|
+
*
|
|
17
|
+
* @module @sparkleideas/embeddings
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync, renameSync } from 'fs';
|
|
21
|
+
import { dirname } from 'path';
|
|
22
|
+
|
|
23
|
+
/** Validate a file path is safe */
|
|
24
|
+
function validatePath(p: string): void {
|
|
25
|
+
if (p.includes('\0')) throw new Error('Cache path contains null bytes');
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// ============================================================================
|
|
29
|
+
// Configuration
|
|
30
|
+
// ============================================================================
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Configuration for RVF embedding cache
|
|
34
|
+
*/
|
|
35
|
+
export interface RvfEmbeddingCacheConfig {
|
|
36
|
+
/** Path to the binary cache file */
|
|
37
|
+
cachePath: string;
|
|
38
|
+
/** Maximum number of entries (default: 10000) */
|
|
39
|
+
maxSize?: number;
|
|
40
|
+
/** TTL in milliseconds (default: 7 days) */
|
|
41
|
+
ttlMs?: number;
|
|
42
|
+
/** Embedding dimensions (used for validation) */
|
|
43
|
+
dimensions?: number;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// ============================================================================
|
|
47
|
+
// Cache Entry
|
|
48
|
+
// ============================================================================
|
|
49
|
+
|
|
50
|
+
interface CacheEntry {
|
|
51
|
+
embedding: Float32Array;
|
|
52
|
+
createdAt: number;
|
|
53
|
+
accessedAt: number;
|
|
54
|
+
accessCount: number;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// ============================================================================
|
|
58
|
+
// Constants
|
|
59
|
+
// ============================================================================
|
|
60
|
+
|
|
61
|
+
/** Binary file magic bytes: "RVEC" */
|
|
62
|
+
const MAGIC = new Uint8Array([0x52, 0x56, 0x45, 0x43]); // R V E C
|
|
63
|
+
|
|
64
|
+
/** Default TTL: 7 days in milliseconds */
|
|
65
|
+
const DEFAULT_TTL_MS = 7 * 24 * 60 * 60 * 1000;
|
|
66
|
+
|
|
67
|
+
/** Default max entries */
|
|
68
|
+
const DEFAULT_MAX_SIZE = 10000;
|
|
69
|
+
|
|
70
|
+
/** Auto-flush interval: 30 seconds */
|
|
71
|
+
const AUTO_FLUSH_INTERVAL_MS = 30000;
|
|
72
|
+
|
|
73
|
+
/** FNV-1a offset basis (32-bit) */
|
|
74
|
+
const FNV_OFFSET_BASIS = 0x811c9dc5;
|
|
75
|
+
|
|
76
|
+
/** FNV-1a prime (32-bit) */
|
|
77
|
+
const FNV_PRIME = 0x01000193;
|
|
78
|
+
|
|
79
|
+
// ============================================================================
|
|
80
|
+
// RVF Embedding Cache
|
|
81
|
+
// ============================================================================
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Pure-TS binary file embedding cache with LRU eviction and TTL support.
|
|
85
|
+
*
|
|
86
|
+
* Stores embeddings as raw Float32Array bytes keyed by FNV-1a text hashes.
|
|
87
|
+
* Uses an in-memory Map with periodic flush to a compact binary file.
|
|
88
|
+
*/
|
|
89
|
+
export class RvfEmbeddingCache {
|
|
90
|
+
private readonly cachePath: string;
|
|
91
|
+
private readonly maxSize: number;
|
|
92
|
+
private readonly ttlMs: number;
|
|
93
|
+
private readonly dimensions: number | undefined;
|
|
94
|
+
|
|
95
|
+
private entries: Map<number, CacheEntry> = new Map();
|
|
96
|
+
private textToHash: Map<string, number> = new Map();
|
|
97
|
+
private dirty = false;
|
|
98
|
+
private flushTimer: ReturnType<typeof setInterval> | null = null;
|
|
99
|
+
private initialized = false;
|
|
100
|
+
|
|
101
|
+
constructor(config: RvfEmbeddingCacheConfig) {
|
|
102
|
+
this.cachePath = config.cachePath;
|
|
103
|
+
this.maxSize = config.maxSize ?? DEFAULT_MAX_SIZE;
|
|
104
|
+
this.ttlMs = config.ttlMs ?? DEFAULT_TTL_MS;
|
|
105
|
+
this.dimensions = config.dimensions;
|
|
106
|
+
validatePath(this.cachePath);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// --------------------------------------------------------------------------
|
|
110
|
+
// Initialization
|
|
111
|
+
// --------------------------------------------------------------------------
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Lazily initialize the cache: load from disk if file exists, start auto-flush.
|
|
115
|
+
*/
|
|
116
|
+
private async ensureInitialized(): Promise<void> {
|
|
117
|
+
if (this.initialized) return;
|
|
118
|
+
|
|
119
|
+
// Ensure parent directory exists
|
|
120
|
+
const dir = dirname(this.cachePath);
|
|
121
|
+
if (!existsSync(dir)) {
|
|
122
|
+
mkdirSync(dir, { recursive: true });
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Load existing cache file
|
|
126
|
+
if (existsSync(this.cachePath)) {
|
|
127
|
+
this.loadFromFile();
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Clean expired entries on startup
|
|
131
|
+
this.cleanExpired();
|
|
132
|
+
|
|
133
|
+
// Start auto-flush timer
|
|
134
|
+
this.startAutoFlush();
|
|
135
|
+
|
|
136
|
+
this.initialized = true;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// --------------------------------------------------------------------------
|
|
140
|
+
// Public API (matches PersistentEmbeddingCache)
|
|
141
|
+
// --------------------------------------------------------------------------
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Get an embedding from the cache by text.
|
|
145
|
+
* Returns null if not found or expired.
|
|
146
|
+
*/
|
|
147
|
+
async get(text: string): Promise<Float32Array | null> {
|
|
148
|
+
await this.ensureInitialized();
|
|
149
|
+
|
|
150
|
+
const hash = this.hashText(text);
|
|
151
|
+
const entry = this.entries.get(hash);
|
|
152
|
+
|
|
153
|
+
if (!entry) {
|
|
154
|
+
return null;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Check TTL
|
|
158
|
+
const now = Date.now();
|
|
159
|
+
if (now - entry.createdAt > this.ttlMs) {
|
|
160
|
+
this.entries.delete(hash);
|
|
161
|
+
this.textToHash.delete(text);
|
|
162
|
+
this.dirty = true;
|
|
163
|
+
return null;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Update LRU tracking
|
|
167
|
+
entry.accessedAt = now;
|
|
168
|
+
entry.accessCount++;
|
|
169
|
+
this.dirty = true;
|
|
170
|
+
|
|
171
|
+
return entry.embedding;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Store an embedding in the cache.
|
|
176
|
+
* Triggers LRU eviction if the cache exceeds maxSize.
|
|
177
|
+
*/
|
|
178
|
+
async set(text: string, embedding: Float32Array): Promise<void> {
|
|
179
|
+
await this.ensureInitialized();
|
|
180
|
+
|
|
181
|
+
// Validate dimensions if configured
|
|
182
|
+
if (this.dimensions !== undefined && embedding.length !== this.dimensions) {
|
|
183
|
+
throw new Error(
|
|
184
|
+
`Dimension mismatch: expected ${this.dimensions}, got ${embedding.length}`
|
|
185
|
+
);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
const hash = this.hashText(text);
|
|
189
|
+
const now = Date.now();
|
|
190
|
+
|
|
191
|
+
// Copy the embedding to avoid external mutation
|
|
192
|
+
const copy = new Float32Array(embedding.length);
|
|
193
|
+
copy.set(embedding);
|
|
194
|
+
|
|
195
|
+
const existing = this.entries.get(hash);
|
|
196
|
+
if (existing) {
|
|
197
|
+
existing.embedding = copy;
|
|
198
|
+
existing.accessedAt = now;
|
|
199
|
+
existing.accessCount++;
|
|
200
|
+
} else {
|
|
201
|
+
this.entries.set(hash, {
|
|
202
|
+
embedding: copy,
|
|
203
|
+
createdAt: now,
|
|
204
|
+
accessedAt: now,
|
|
205
|
+
accessCount: 1,
|
|
206
|
+
});
|
|
207
|
+
this.textToHash.set(text, hash);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
this.dirty = true;
|
|
211
|
+
|
|
212
|
+
// Evict if over capacity
|
|
213
|
+
this.evictIfNeeded();
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Check whether the cache contains an embedding for the given text.
|
|
218
|
+
*/
|
|
219
|
+
async has(text: string): Promise<boolean> {
|
|
220
|
+
await this.ensureInitialized();
|
|
221
|
+
|
|
222
|
+
const hash = this.hashText(text);
|
|
223
|
+
const entry = this.entries.get(hash);
|
|
224
|
+
|
|
225
|
+
if (!entry) return false;
|
|
226
|
+
|
|
227
|
+
// Check TTL
|
|
228
|
+
if (Date.now() - entry.createdAt > this.ttlMs) {
|
|
229
|
+
this.entries.delete(hash);
|
|
230
|
+
this.textToHash.delete(text);
|
|
231
|
+
this.dirty = true;
|
|
232
|
+
return false;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
return true;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/**
|
|
239
|
+
* Delete a specific entry from the cache.
|
|
240
|
+
* Returns true if the entry existed and was deleted.
|
|
241
|
+
*/
|
|
242
|
+
async delete(text: string): Promise<boolean> {
|
|
243
|
+
await this.ensureInitialized();
|
|
244
|
+
|
|
245
|
+
const hash = this.hashText(text);
|
|
246
|
+
const existed = this.entries.delete(hash);
|
|
247
|
+
this.textToHash.delete(text);
|
|
248
|
+
|
|
249
|
+
if (existed) {
|
|
250
|
+
this.dirty = true;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
return existed;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Clear all entries from the cache and persist the empty state.
|
|
258
|
+
*/
|
|
259
|
+
async clear(): Promise<void> {
|
|
260
|
+
await this.ensureInitialized();
|
|
261
|
+
|
|
262
|
+
this.entries.clear();
|
|
263
|
+
this.textToHash.clear();
|
|
264
|
+
this.dirty = true;
|
|
265
|
+
this.flushToFile();
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* Return the number of entries currently in the cache.
|
|
270
|
+
*/
|
|
271
|
+
async size(): Promise<number> {
|
|
272
|
+
await this.ensureInitialized();
|
|
273
|
+
return this.entries.size;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
/**
|
|
277
|
+
* Flush pending changes to disk and stop the auto-flush timer.
|
|
278
|
+
*/
|
|
279
|
+
async close(): Promise<void> {
|
|
280
|
+
this.stopAutoFlush();
|
|
281
|
+
|
|
282
|
+
if (this.dirty) {
|
|
283
|
+
this.flushToFile();
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
this.entries.clear();
|
|
287
|
+
this.textToHash.clear();
|
|
288
|
+
this.initialized = false;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// --------------------------------------------------------------------------
|
|
292
|
+
// Hashing
|
|
293
|
+
// --------------------------------------------------------------------------
|
|
294
|
+
|
|
295
|
+
/**
|
|
296
|
+
* FNV-1a 32-bit hash of the input text.
|
|
297
|
+
* Deterministic: same input always produces the same hash.
|
|
298
|
+
*/
|
|
299
|
+
private hashText(text: string): number {
|
|
300
|
+
let hash = FNV_OFFSET_BASIS;
|
|
301
|
+
for (let i = 0; i < text.length; i++) {
|
|
302
|
+
hash ^= text.charCodeAt(i);
|
|
303
|
+
hash = Math.imul(hash, FNV_PRIME) >>> 0;
|
|
304
|
+
}
|
|
305
|
+
return hash;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
// --------------------------------------------------------------------------
|
|
309
|
+
// LRU Eviction
|
|
310
|
+
// --------------------------------------------------------------------------
|
|
311
|
+
|
|
312
|
+
/**
|
|
313
|
+
* If the cache exceeds maxSize, evict the least-recently-accessed entries
|
|
314
|
+
* until we are back at 90% capacity.
|
|
315
|
+
*/
|
|
316
|
+
private evictIfNeeded(): void {
|
|
317
|
+
if (this.entries.size <= this.maxSize) return;
|
|
318
|
+
|
|
319
|
+
const targetSize = Math.floor(this.maxSize * 0.9);
|
|
320
|
+
const toEvict = this.entries.size - targetSize;
|
|
321
|
+
|
|
322
|
+
// Sort entries by accessedAt ascending (oldest first)
|
|
323
|
+
const sorted = [...this.entries.entries()].sort(
|
|
324
|
+
(a, b) => a[1].accessedAt - b[1].accessedAt
|
|
325
|
+
);
|
|
326
|
+
|
|
327
|
+
// Build reverse map for O(1) lookup (hash → text)
|
|
328
|
+
const hashToText = new Map<number, string>();
|
|
329
|
+
for (const [text, hash] of this.textToHash) {
|
|
330
|
+
hashToText.set(hash, text);
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
for (let i = 0; i < toEvict && i < sorted.length; i++) {
|
|
334
|
+
const [hash] = sorted[i];
|
|
335
|
+
this.entries.delete(hash);
|
|
336
|
+
const text = hashToText.get(hash);
|
|
337
|
+
if (text !== undefined) this.textToHash.delete(text);
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
this.dirty = true;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// --------------------------------------------------------------------------
|
|
344
|
+
// TTL Cleanup
|
|
345
|
+
// --------------------------------------------------------------------------
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* Remove all entries whose createdAt timestamp is older than TTL.
|
|
349
|
+
*/
|
|
350
|
+
private cleanExpired(): void {
|
|
351
|
+
const cutoff = Date.now() - this.ttlMs;
|
|
352
|
+
const toDelete: number[] = [];
|
|
353
|
+
|
|
354
|
+
for (const [hash, entry] of this.entries) {
|
|
355
|
+
if (entry.createdAt < cutoff) {
|
|
356
|
+
toDelete.push(hash);
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// Build reverse map for O(1) lookup
|
|
361
|
+
const hashToText = new Map<number, string>();
|
|
362
|
+
for (const [text, h] of this.textToHash) {
|
|
363
|
+
hashToText.set(h, text);
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
for (const hash of toDelete) {
|
|
367
|
+
this.entries.delete(hash);
|
|
368
|
+
const text = hashToText.get(hash);
|
|
369
|
+
if (text !== undefined) this.textToHash.delete(text);
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
if (toDelete.length > 0) {
|
|
373
|
+
this.dirty = true;
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
// --------------------------------------------------------------------------
|
|
378
|
+
// Auto-Flush Timer
|
|
379
|
+
// --------------------------------------------------------------------------
|
|
380
|
+
|
|
381
|
+
private startAutoFlush(): void {
|
|
382
|
+
if (this.flushTimer) return;
|
|
383
|
+
|
|
384
|
+
this.flushTimer = setInterval(() => {
|
|
385
|
+
if (this.dirty) {
|
|
386
|
+
this.flushToFile();
|
|
387
|
+
}
|
|
388
|
+
}, AUTO_FLUSH_INTERVAL_MS);
|
|
389
|
+
if (this.flushTimer.unref) this.flushTimer.unref();
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
private stopAutoFlush(): void {
|
|
393
|
+
if (this.flushTimer) {
|
|
394
|
+
clearInterval(this.flushTimer);
|
|
395
|
+
this.flushTimer = null;
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
// --------------------------------------------------------------------------
|
|
400
|
+
// Binary Serialization
|
|
401
|
+
// --------------------------------------------------------------------------
|
|
402
|
+
|
|
403
|
+
/**
|
|
404
|
+
* Write all entries to the binary cache file.
|
|
405
|
+
*
|
|
406
|
+
* Format:
|
|
407
|
+
* [4-byte magic "RVEC"]
|
|
408
|
+
* For each entry:
|
|
409
|
+
* [4-byte key-hash (uint32)]
|
|
410
|
+
* [4-byte dims (uint32)]
|
|
411
|
+
* [dims * 4 bytes float32 data]
|
|
412
|
+
* [8-byte createdAt (float64, used as timestamp)]
|
|
413
|
+
* [8-byte accessCount (float64)]
|
|
414
|
+
*/
|
|
415
|
+
private flushToFile(): void {
|
|
416
|
+
try {
|
|
417
|
+
// Version 2 format: magic(4) + version(4) + entries...
|
|
418
|
+
// Entry: hash(4) + dims(4) + embedding(dims*4) + createdAt(8) + accessedAt(8) + accessCount(8)
|
|
419
|
+
let totalSize = MAGIC.length + 4; // magic + version uint32
|
|
420
|
+
for (const [, entry] of this.entries) {
|
|
421
|
+
totalSize += 4 + 4 + entry.embedding.length * 4 + 8 + 8 + 8;
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
const buffer = new ArrayBuffer(totalSize);
|
|
425
|
+
const view = new DataView(buffer);
|
|
426
|
+
const bytes = new Uint8Array(buffer);
|
|
427
|
+
let offset = 0;
|
|
428
|
+
|
|
429
|
+
// Write magic
|
|
430
|
+
bytes.set(MAGIC, 0);
|
|
431
|
+
offset += MAGIC.length;
|
|
432
|
+
|
|
433
|
+
// Write format version
|
|
434
|
+
view.setUint32(offset, 2, true);
|
|
435
|
+
offset += 4;
|
|
436
|
+
|
|
437
|
+
// Write entries
|
|
438
|
+
for (const [hash, entry] of this.entries) {
|
|
439
|
+
// Key hash (uint32, little-endian)
|
|
440
|
+
view.setUint32(offset, hash, true);
|
|
441
|
+
offset += 4;
|
|
442
|
+
|
|
443
|
+
// Dimensions (uint32, little-endian)
|
|
444
|
+
view.setUint32(offset, entry.embedding.length, true);
|
|
445
|
+
offset += 4;
|
|
446
|
+
|
|
447
|
+
// Embedding data (float32 array, little-endian)
|
|
448
|
+
for (let i = 0; i < entry.embedding.length; i++) {
|
|
449
|
+
view.setFloat32(offset, entry.embedding[i], true);
|
|
450
|
+
offset += 4;
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
// createdAt as float64 (little-endian) - v2: separate from accessedAt
|
|
454
|
+
view.setFloat64(offset, entry.createdAt, true);
|
|
455
|
+
offset += 8;
|
|
456
|
+
|
|
457
|
+
// accessedAt as float64 (little-endian)
|
|
458
|
+
view.setFloat64(offset, entry.accessedAt, true);
|
|
459
|
+
offset += 8;
|
|
460
|
+
|
|
461
|
+
// Access count as float64 (little-endian)
|
|
462
|
+
view.setFloat64(offset, entry.accessCount, true);
|
|
463
|
+
offset += 8;
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
// Ensure parent directory exists
|
|
467
|
+
const dir = dirname(this.cachePath);
|
|
468
|
+
if (!existsSync(dir)) {
|
|
469
|
+
mkdirSync(dir, { recursive: true });
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
const tmpSuffix = Date.now().toString(36) + Math.random().toString(36).slice(2, 8);
|
|
473
|
+
const tmpPath = this.cachePath + '.tmp.' + tmpSuffix;
|
|
474
|
+
writeFileSync(tmpPath, Buffer.from(buffer));
|
|
475
|
+
renameSync(tmpPath, this.cachePath);
|
|
476
|
+
this.dirty = false;
|
|
477
|
+
} catch (error) {
|
|
478
|
+
console.error(
|
|
479
|
+
'[rvf-embedding-cache] Flush error:',
|
|
480
|
+
error instanceof Error ? error.message : error
|
|
481
|
+
);
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
/**
|
|
486
|
+
* Load entries from the binary cache file.
|
|
487
|
+
*/
|
|
488
|
+
private loadFromFile(): void {
|
|
489
|
+
try {
|
|
490
|
+
const fileBuffer = readFileSync(this.cachePath);
|
|
491
|
+
if (fileBuffer.length < MAGIC.length) return;
|
|
492
|
+
|
|
493
|
+
const buffer = fileBuffer.buffer.slice(
|
|
494
|
+
fileBuffer.byteOffset,
|
|
495
|
+
fileBuffer.byteOffset + fileBuffer.byteLength
|
|
496
|
+
);
|
|
497
|
+
const view = new DataView(buffer);
|
|
498
|
+
const bytes = new Uint8Array(buffer);
|
|
499
|
+
let offset = 0;
|
|
500
|
+
|
|
501
|
+
// Verify magic
|
|
502
|
+
for (let i = 0; i < MAGIC.length; i++) {
|
|
503
|
+
if (bytes[offset + i] !== MAGIC[i]) {
|
|
504
|
+
console.warn('[rvf-embedding-cache] Invalid magic bytes, skipping load');
|
|
505
|
+
return;
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
offset += MAGIC.length;
|
|
509
|
+
|
|
510
|
+
// Check for version header (v2+)
|
|
511
|
+
let formatVersion = 1;
|
|
512
|
+
if (offset + 4 <= buffer.byteLength) {
|
|
513
|
+
const possibleVersion = view.getUint32(offset, true);
|
|
514
|
+
if (possibleVersion === 2) {
|
|
515
|
+
formatVersion = possibleVersion;
|
|
516
|
+
offset += 4;
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
// Read entries
|
|
521
|
+
while (offset + 8 <= buffer.byteLength) {
|
|
522
|
+
// Need at least 4 (hash) + 4 (dims) = 8 bytes for the header
|
|
523
|
+
const hash = view.getUint32(offset, true);
|
|
524
|
+
offset += 4;
|
|
525
|
+
|
|
526
|
+
const dims = view.getUint32(offset, true);
|
|
527
|
+
offset += 4;
|
|
528
|
+
|
|
529
|
+
const entryDataSize = formatVersion === 2
|
|
530
|
+
? dims * 4 + 8 + 8 + 8 // v2: embedding + createdAt + accessedAt + accessCount
|
|
531
|
+
: dims * 4 + 8 + 8; // v1: embedding + accessedAt + accessCount
|
|
532
|
+
if (offset + entryDataSize > buffer.byteLength) {
|
|
533
|
+
console.warn('[rvf-embedding-cache] Truncated entry, stopping load');
|
|
534
|
+
break;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
// Read embedding
|
|
538
|
+
const embedding = new Float32Array(dims);
|
|
539
|
+
for (let i = 0; i < dims; i++) {
|
|
540
|
+
embedding[i] = view.getFloat32(offset, true);
|
|
541
|
+
offset += 4;
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
let createdAt: number;
|
|
545
|
+
let accessedAt: number;
|
|
546
|
+
let accessCount: number;
|
|
547
|
+
|
|
548
|
+
if (formatVersion >= 2) {
|
|
549
|
+
createdAt = view.getFloat64(offset, true);
|
|
550
|
+
offset += 8;
|
|
551
|
+
accessedAt = view.getFloat64(offset, true);
|
|
552
|
+
offset += 8;
|
|
553
|
+
accessCount = view.getFloat64(offset, true);
|
|
554
|
+
offset += 8;
|
|
555
|
+
} else {
|
|
556
|
+
// v1: only accessedAt was stored, use it as createdAt too
|
|
557
|
+
accessedAt = view.getFloat64(offset, true);
|
|
558
|
+
offset += 8;
|
|
559
|
+
accessCount = view.getFloat64(offset, true);
|
|
560
|
+
offset += 8;
|
|
561
|
+
createdAt = accessedAt;
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
this.entries.set(hash, {
|
|
565
|
+
embedding,
|
|
566
|
+
createdAt,
|
|
567
|
+
accessedAt,
|
|
568
|
+
accessCount,
|
|
569
|
+
});
|
|
570
|
+
}
|
|
571
|
+
} catch (error) {
|
|
572
|
+
console.warn(
|
|
573
|
+
'[rvf-embedding-cache] Load error:',
|
|
574
|
+
error instanceof Error ? error.message : error
|
|
575
|
+
);
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
}
|