embedded-raptor 2.1.1 → 2.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +13 -1
- package/dist/cli.cjs +3 -3
- package/dist/cli.mjs +2 -2
- package/dist/{engine-Zax2PJSW.mjs → engine-CjiGgePO.mjs} +250 -96
- package/dist/{engine-DKn_V99U.cjs → engine-DZWQXLVm.cjs} +267 -96
- package/dist/engine.d.ts +16 -0
- package/dist/engine.d.ts.map +1 -1
- package/dist/index.cjs +5 -2
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.mjs +2 -2
- package/dist/lru-cache.d.ts +45 -0
- package/dist/lru-cache.d.ts.map +1 -0
- package/dist/storage-engine/file-lock.d.ts +6 -0
- package/dist/storage-engine/file-lock.d.ts.map +1 -1
- package/dist/storage-engine/index.d.ts +1 -1
- package/dist/storage-engine/index.d.ts.map +1 -1
- package/dist/storage-engine/migration.d.ts.map +1 -1
- package/dist/storage-engine/storage-engine.d.ts +5 -0
- package/dist/storage-engine/storage-engine.d.ts.map +1 -1
- package/dist/storage-engine/types.d.ts +2 -0
- package/dist/storage-engine/types.d.ts.map +1 -1
- package/dist/storage-engine/wal.d.ts +2 -1
- package/dist/storage-engine/wal.d.ts.map +1 -1
- package/dist/types.d.ts +4 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +1 -1
- package/dist/binary-file-reader.d.ts +0 -9
- package/dist/binary-file-reader.d.ts.map +0 -1
- package/dist/binary-format.d.ts +0 -22
- package/dist/binary-format.d.ts.map +0 -1
- package/dist/engine-BvJ0ls3b.mjs +0 -397
- package/dist/engine-Cuz0P5Od.mjs +0 -1164
- package/dist/engine-DLM7PWhV.cjs +0 -1203
- package/dist/engine-Iq0_dbnk.cjs +0 -436
package/LICENSE
CHANGED
|
@@ -1,21 +1,21 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2025 Christoffer Artmann <artgaard@gmail.com>
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Christoffer Artmann <artgaard@gmail.com>
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -109,10 +109,19 @@ Create a new embedding engine.
|
|
|
109
109
|
|
|
110
110
|
```typescript
|
|
111
111
|
const engine = new EmbeddingEngine({
|
|
112
|
-
storePath: './database.raptor' // Path to storage file
|
|
112
|
+
storePath: './database.raptor', // Path to storage file
|
|
113
|
+
embeddingCacheSize: 100 // Optional: cache up to 100 text-to-embedding mappings
|
|
113
114
|
})
|
|
114
115
|
```
|
|
115
116
|
|
|
117
|
+
**Options:**
|
|
118
|
+
|
|
119
|
+
- `storePath` - Path to the database file (required)
|
|
120
|
+
- `cacheDir` - Directory to cache downloaded models (default: `./.cache/models`)
|
|
121
|
+
- `readOnly` - Open database in read-only mode (default: `false`)
|
|
122
|
+
- `embeddingCacheSize` - Size of the LRU cache for text-to-embedding lookups
|
|
123
|
+
(default: `0` = disabled)
|
|
124
|
+
|
|
116
125
|
#### `store(key, text)`
|
|
117
126
|
|
|
118
127
|
Store a single text entry with auto-generated embedding.
|
|
@@ -221,6 +230,9 @@ dimensions)
|
|
|
221
230
|
- **Memory efficient**: Reads file in 64KB chunks, handles large databases
|
|
222
231
|
- **Fast search**: Cosine similarity comparison across all embeddings
|
|
223
232
|
- **Deduplication**: Latest entry automatically used for duplicate keys
|
|
233
|
+
- **Embedding cache**: Use `embeddingCacheSize` to cache text-to-embedding
|
|
234
|
+
mappings and avoid regenerating embeddings for repeated text inputs (~1.7KB
|
|
235
|
+
per cached entry)
|
|
224
236
|
|
|
225
237
|
## Contributing
|
|
226
238
|
|
package/dist/cli.cjs
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
const require_engine = require('./engine-
|
|
2
|
+
const require_engine = require('./engine-DZWQXLVm.cjs');
|
|
3
3
|
let node_path = require("node:path");
|
|
4
4
|
node_path = require_engine.__toESM(node_path);
|
|
5
|
-
let node_fs = require("node:fs");
|
|
6
|
-
node_fs = require_engine.__toESM(node_fs);
|
|
7
5
|
let cleye = require("cleye");
|
|
8
6
|
cleye = require_engine.__toESM(cleye);
|
|
7
|
+
let node_fs = require("node:fs");
|
|
8
|
+
node_fs = require_engine.__toESM(node_fs);
|
|
9
9
|
let node_url = require("node:url");
|
|
10
10
|
node_url = require_engine.__toESM(node_url);
|
|
11
11
|
|
package/dist/cli.mjs
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import {
|
|
2
|
+
import { o as fileExtensions, r as Wal, s as opType, t as EmbeddingEngine } from "./engine-CjiGgePO.mjs";
|
|
3
3
|
import { dirname, resolve } from "node:path";
|
|
4
|
-
import { readFileSync } from "node:fs";
|
|
5
4
|
import { cli, command } from "cleye";
|
|
5
|
+
import { readFileSync } from "node:fs";
|
|
6
6
|
import { fileURLToPath } from "node:url";
|
|
7
7
|
|
|
8
8
|
//#region src/commands/flags.ts
|
|
@@ -2,7 +2,6 @@ import { LlamaLogLevel, getLlama, resolveModelFile } from "node-llama-cpp";
|
|
|
2
2
|
import invariant from "tiny-invariant";
|
|
3
3
|
import { copyFile, mkdir, open, rm, stat } from "node:fs/promises";
|
|
4
4
|
import { dirname } from "node:path";
|
|
5
|
-
import { constants } from "node:fs";
|
|
6
5
|
|
|
7
6
|
//#region src/storage-engine/constants.ts
|
|
8
7
|
/**
|
|
@@ -163,7 +162,7 @@ function readKeyFromBuffer(data, startOffset = 0) {
|
|
|
163
162
|
function serializeHeader(dimension) {
|
|
164
163
|
const buffer = new Uint8Array(headerSize);
|
|
165
164
|
const view = new DataView(buffer.buffer);
|
|
166
|
-
view.setUint32(headerOffsets.magic, headerMagic,
|
|
165
|
+
view.setUint32(headerOffsets.magic, headerMagic, false);
|
|
167
166
|
view.setUint16(headerOffsets.version, headerVersionV2, true);
|
|
168
167
|
view.setUint32(headerOffsets.dimension, dimension, true);
|
|
169
168
|
return buffer;
|
|
@@ -174,7 +173,7 @@ function serializeHeader(dimension) {
|
|
|
174
173
|
function deserializeHeader(data) {
|
|
175
174
|
if (data.length < headerSize) return null;
|
|
176
175
|
const view = new DataView(data.buffer, data.byteOffset);
|
|
177
|
-
if (view.getUint32(headerOffsets.magic,
|
|
176
|
+
if (view.getUint32(headerOffsets.magic, false) !== headerMagic) return null;
|
|
178
177
|
return {
|
|
179
178
|
version: view.getUint16(headerOffsets.version, true),
|
|
180
179
|
dimension: view.getUint32(headerOffsets.dimension, true)
|
|
@@ -265,19 +264,106 @@ function deserializeWalEntry(data, startOffset = 0) {
|
|
|
265
264
|
};
|
|
266
265
|
}
|
|
267
266
|
|
|
267
|
+
//#endregion
|
|
268
|
+
//#region src/storage-engine/file-lock.ts
|
|
269
|
+
const defaultLockTimeout = 1e4;
|
|
270
|
+
const retryInterval = 100;
|
|
271
|
+
/**
|
|
272
|
+
* Exclusive file lock for preventing multiple processes from
|
|
273
|
+
* accessing the same database simultaneously.
|
|
274
|
+
*
|
|
275
|
+
* Uses atomic file creation with O_EXCL flag to ensure only one
|
|
276
|
+
* process can acquire the lock at a time.
|
|
277
|
+
*/
|
|
278
|
+
var FileLock = class {
|
|
279
|
+
locked = false;
|
|
280
|
+
filePath;
|
|
281
|
+
timeoutMs;
|
|
282
|
+
constructor(filePath, timeoutMs = defaultLockTimeout) {
|
|
283
|
+
this.filePath = filePath;
|
|
284
|
+
this.timeoutMs = timeoutMs;
|
|
285
|
+
}
|
|
286
|
+
/**
|
|
287
|
+
* Acquire an exclusive lock on the file.
|
|
288
|
+
* Creates the lock file if it doesn't exist.
|
|
289
|
+
* Retries for up to timeoutMs before throwing DatabaseLockedError.
|
|
290
|
+
*/
|
|
291
|
+
async acquire() {
|
|
292
|
+
if (this.locked) throw new Error("Lock already acquired");
|
|
293
|
+
await mkdir(dirname(this.filePath), { recursive: true });
|
|
294
|
+
const startTime = Date.now();
|
|
295
|
+
while (true) try {
|
|
296
|
+
const fileHandle = await open(this.filePath, "wx");
|
|
297
|
+
await fileHandle.write(`${process.pid}\n`);
|
|
298
|
+
await fileHandle.close();
|
|
299
|
+
this.locked = true;
|
|
300
|
+
return;
|
|
301
|
+
} catch (error) {
|
|
302
|
+
if (error instanceof Error && "code" in error && error.code === "EEXIST") {
|
|
303
|
+
if (Date.now() - startTime >= this.timeoutMs) throw new DatabaseLockedError(`Database is locked by another process (timeout after ${this.timeoutMs}ms): ${this.filePath}`);
|
|
304
|
+
await sleep(retryInterval);
|
|
305
|
+
continue;
|
|
306
|
+
}
|
|
307
|
+
throw error;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
/**
|
|
311
|
+
* Release the lock by deleting the lock file.
|
|
312
|
+
*/
|
|
313
|
+
async release() {
|
|
314
|
+
if (!this.locked) return;
|
|
315
|
+
try {
|
|
316
|
+
await rm(this.filePath, { force: true });
|
|
317
|
+
} finally {
|
|
318
|
+
this.locked = false;
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
/**
|
|
322
|
+
* Check if this instance currently holds the lock.
|
|
323
|
+
*/
|
|
324
|
+
isLocked() {
|
|
325
|
+
return this.locked;
|
|
326
|
+
}
|
|
327
|
+
};
|
|
328
|
+
/**
|
|
329
|
+
* Error thrown when attempting to open a database that is
|
|
330
|
+
* already locked by another process.
|
|
331
|
+
*/
|
|
332
|
+
var DatabaseLockedError = class extends Error {
|
|
333
|
+
constructor(message) {
|
|
334
|
+
super(message);
|
|
335
|
+
this.name = "DatabaseLockedError";
|
|
336
|
+
}
|
|
337
|
+
};
|
|
338
|
+
/**
|
|
339
|
+
* Error thrown when attempting to write to a database opened in read-only mode.
|
|
340
|
+
*/
|
|
341
|
+
var ReadOnlyError = class extends Error {
|
|
342
|
+
constructor(message = "Cannot write to a read-only database") {
|
|
343
|
+
super(message);
|
|
344
|
+
this.name = "ReadOnlyError";
|
|
345
|
+
}
|
|
346
|
+
};
|
|
347
|
+
function sleep(ms) {
|
|
348
|
+
return new Promise((resolve$1) => setTimeout(resolve$1, ms));
|
|
349
|
+
}
|
|
350
|
+
|
|
268
351
|
//#endregion
|
|
269
352
|
//#region src/storage-engine/wal.ts
|
|
270
353
|
var Wal = class {
|
|
271
354
|
filePath;
|
|
355
|
+
readOnly;
|
|
272
356
|
fileHandle = null;
|
|
273
|
-
constructor(filePath) {
|
|
357
|
+
constructor(filePath, readOnly = false) {
|
|
274
358
|
this.filePath = filePath;
|
|
359
|
+
this.readOnly = readOnly;
|
|
275
360
|
}
|
|
276
361
|
/**
|
|
277
362
|
* Append a WAL entry and sync to disk.
|
|
278
363
|
* This is the commit point - once this returns, the operation is durable.
|
|
279
364
|
*/
|
|
280
365
|
async append(entry) {
|
|
366
|
+
if (this.readOnly) throw new ReadOnlyError();
|
|
281
367
|
const buffer = serializeWalEntry(entry);
|
|
282
368
|
await mkdir(dirname(this.filePath), { recursive: true });
|
|
283
369
|
this.fileHandle ??= await open(this.filePath, "a");
|
|
@@ -290,6 +376,7 @@ var Wal = class {
|
|
|
290
376
|
* @returns The number of entries written
|
|
291
377
|
*/
|
|
292
378
|
async appendBatch(entries) {
|
|
379
|
+
if (this.readOnly) throw new ReadOnlyError();
|
|
293
380
|
if (entries.length === 0) return 0;
|
|
294
381
|
await mkdir(dirname(this.filePath), { recursive: true });
|
|
295
382
|
this.fileHandle ??= await open(this.filePath, "a");
|
|
@@ -449,81 +536,6 @@ var KeyIndex = class KeyIndex {
|
|
|
449
536
|
}
|
|
450
537
|
};
|
|
451
538
|
|
|
452
|
-
//#endregion
|
|
453
|
-
//#region src/storage-engine/file-lock.ts
|
|
454
|
-
const defaultLockTimeout = 1e4;
|
|
455
|
-
const retryInterval = 100;
|
|
456
|
-
/**
|
|
457
|
-
* Exclusive file lock for preventing multiple processes from
|
|
458
|
-
* accessing the same database simultaneously.
|
|
459
|
-
*
|
|
460
|
-
* Uses atomic file creation with O_EXCL flag to ensure only one
|
|
461
|
-
* process can acquire the lock at a time.
|
|
462
|
-
*/
|
|
463
|
-
var FileLock = class {
|
|
464
|
-
locked = false;
|
|
465
|
-
filePath;
|
|
466
|
-
timeoutMs;
|
|
467
|
-
constructor(filePath, timeoutMs = defaultLockTimeout) {
|
|
468
|
-
this.filePath = filePath;
|
|
469
|
-
this.timeoutMs = timeoutMs;
|
|
470
|
-
}
|
|
471
|
-
/**
|
|
472
|
-
* Acquire an exclusive lock on the file.
|
|
473
|
-
* Creates the lock file if it doesn't exist.
|
|
474
|
-
* Retries for up to timeoutMs before throwing DatabaseLockedError.
|
|
475
|
-
*/
|
|
476
|
-
async acquire() {
|
|
477
|
-
if (this.locked) throw new Error("Lock already acquired");
|
|
478
|
-
await mkdir(dirname(this.filePath), { recursive: true });
|
|
479
|
-
const startTime = Date.now();
|
|
480
|
-
while (true) try {
|
|
481
|
-
const fileHandle = await open(this.filePath, constants.O_CREAT | constants.O_EXCL | constants.O_WRONLY);
|
|
482
|
-
await fileHandle.write(`${process.pid}\n`);
|
|
483
|
-
await fileHandle.close();
|
|
484
|
-
this.locked = true;
|
|
485
|
-
return;
|
|
486
|
-
} catch (error) {
|
|
487
|
-
if (error instanceof Error && "code" in error && error.code === "EEXIST") {
|
|
488
|
-
if (Date.now() - startTime >= this.timeoutMs) throw new DatabaseLockedError(`Database is locked by another process (timeout after ${this.timeoutMs}ms): ${this.filePath}`);
|
|
489
|
-
await sleep(retryInterval);
|
|
490
|
-
continue;
|
|
491
|
-
}
|
|
492
|
-
throw error;
|
|
493
|
-
}
|
|
494
|
-
}
|
|
495
|
-
/**
|
|
496
|
-
* Release the lock by deleting the lock file.
|
|
497
|
-
*/
|
|
498
|
-
async release() {
|
|
499
|
-
if (!this.locked) return;
|
|
500
|
-
try {
|
|
501
|
-
await rm(this.filePath, { force: true });
|
|
502
|
-
} finally {
|
|
503
|
-
this.locked = false;
|
|
504
|
-
}
|
|
505
|
-
}
|
|
506
|
-
/**
|
|
507
|
-
* Check if this instance currently holds the lock.
|
|
508
|
-
*/
|
|
509
|
-
isLocked() {
|
|
510
|
-
return this.locked;
|
|
511
|
-
}
|
|
512
|
-
};
|
|
513
|
-
/**
|
|
514
|
-
* Error thrown when attempting to open a database that is
|
|
515
|
-
* already locked by another process.
|
|
516
|
-
*/
|
|
517
|
-
var DatabaseLockedError = class extends Error {
|
|
518
|
-
constructor(message) {
|
|
519
|
-
super(message);
|
|
520
|
-
this.name = "DatabaseLockedError";
|
|
521
|
-
}
|
|
522
|
-
};
|
|
523
|
-
function sleep(ms) {
|
|
524
|
-
return new Promise((resolve$1) => setTimeout(resolve$1, ms));
|
|
525
|
-
}
|
|
526
|
-
|
|
527
539
|
//#endregion
|
|
528
540
|
//#region src/storage-engine/mutex.ts
|
|
529
541
|
/**
|
|
@@ -743,6 +755,7 @@ var StorageEngine = class StorageEngine {
|
|
|
743
755
|
walPath;
|
|
744
756
|
lockPath;
|
|
745
757
|
dimension;
|
|
758
|
+
readOnly;
|
|
746
759
|
wal;
|
|
747
760
|
index;
|
|
748
761
|
fileLock;
|
|
@@ -751,7 +764,7 @@ var StorageEngine = class StorageEngine {
|
|
|
751
764
|
dataHandle = null;
|
|
752
765
|
dataHandlePromise = null;
|
|
753
766
|
sequenceCounter = 0n;
|
|
754
|
-
constructor(dataPath, walPath, lockPath, dimension, wal, index, fileLock, sequenceCounter, writeBatcher) {
|
|
767
|
+
constructor(dataPath, walPath, lockPath, dimension, wal, index, fileLock, sequenceCounter, writeBatcher, readOnly) {
|
|
755
768
|
this.dataPath = dataPath;
|
|
756
769
|
this.walPath = walPath;
|
|
757
770
|
this.lockPath = lockPath;
|
|
@@ -762,6 +775,7 @@ var StorageEngine = class StorageEngine {
|
|
|
762
775
|
this.writeMutex = new Mutex();
|
|
763
776
|
this.sequenceCounter = sequenceCounter;
|
|
764
777
|
this.writeBatcher = writeBatcher;
|
|
778
|
+
this.readOnly = readOnly;
|
|
765
779
|
}
|
|
766
780
|
/**
|
|
767
781
|
* Create or open a storage engine.
|
|
@@ -773,21 +787,32 @@ var StorageEngine = class StorageEngine {
|
|
|
773
787
|
const walPath = basePath + fileExtensions.wal;
|
|
774
788
|
const lockPath = basePath + fileExtensions.lock;
|
|
775
789
|
const dimension = options.dimension ?? 384;
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
790
|
+
const readOnly = options.readOnly ?? false;
|
|
791
|
+
if (!readOnly) await mkdir(dirname(dataPath), { recursive: true });
|
|
792
|
+
else {
|
|
793
|
+
const dataExists = await stat(dataPath).catch(() => null);
|
|
794
|
+
const walExists = await stat(walPath).catch(() => null);
|
|
795
|
+
if (!dataExists && !walExists) throw new Error(`Cannot open database in read-only mode: no database exists at ${dataPath}`);
|
|
796
|
+
}
|
|
797
|
+
let fileLock = null;
|
|
798
|
+
if (!readOnly) {
|
|
799
|
+
fileLock = new FileLock(lockPath, options.lockTimeout);
|
|
800
|
+
await fileLock.acquire();
|
|
801
|
+
}
|
|
779
802
|
try {
|
|
780
|
-
if (
|
|
781
|
-
|
|
803
|
+
if (!readOnly) {
|
|
804
|
+
if (await StorageEngine.checkNeedsMigration(dataPath)) throw new Error(`Database at ${dataPath} uses old format (v1). Please run migration first.`);
|
|
805
|
+
}
|
|
806
|
+
const wal = new Wal(walPath, readOnly);
|
|
782
807
|
const { index, maxSequence } = await KeyIndex.buildFromWal(wal, dataPath);
|
|
783
808
|
let writeBatcher = null;
|
|
784
|
-
if (options.batchingEnabled !== false) {
|
|
809
|
+
if (!readOnly && options.batchingEnabled !== false) {
|
|
785
810
|
writeBatcher = new WriteBatcher(dataPath, wal, index, dimension, options.batchOptions);
|
|
786
811
|
await writeBatcher.initialize();
|
|
787
812
|
}
|
|
788
|
-
return new StorageEngine(dataPath, walPath, lockPath, dimension, wal, index, fileLock, maxSequence + 1n, writeBatcher);
|
|
813
|
+
return new StorageEngine(dataPath, walPath, lockPath, dimension, wal, index, fileLock, maxSequence + 1n, writeBatcher, readOnly);
|
|
789
814
|
} catch (error) {
|
|
790
|
-
await fileLock.release();
|
|
815
|
+
if (fileLock) await fileLock.release();
|
|
791
816
|
throw error;
|
|
792
817
|
}
|
|
793
818
|
}
|
|
@@ -814,6 +839,7 @@ var StorageEngine = class StorageEngine {
|
|
|
814
839
|
* Implements: data → fsync → WAL → fsync → index
|
|
815
840
|
*/
|
|
816
841
|
async writeRecord(key, embedding, op = opType.insert) {
|
|
842
|
+
if (this.readOnly) throw new ReadOnlyError();
|
|
817
843
|
if (embedding.length !== this.dimension) throw new Error(`Embedding dimension mismatch: expected ${this.dimension}, got ${embedding.length}`);
|
|
818
844
|
if (this.writeBatcher) return this.writeRecordBatched(key, embedding, op, this.writeBatcher);
|
|
819
845
|
return this.writeRecordImmediate(key, embedding, op);
|
|
@@ -969,7 +995,13 @@ var StorageEngine = class StorageEngine {
|
|
|
969
995
|
this.dataHandle = null;
|
|
970
996
|
}
|
|
971
997
|
await this.wal.close();
|
|
972
|
-
await this.fileLock.release();
|
|
998
|
+
if (this.fileLock) await this.fileLock.release();
|
|
999
|
+
}
|
|
1000
|
+
/**
|
|
1001
|
+
* Check if the storage engine is in read-only mode.
|
|
1002
|
+
*/
|
|
1003
|
+
isReadOnly() {
|
|
1004
|
+
return this.readOnly;
|
|
973
1005
|
}
|
|
974
1006
|
/**
|
|
975
1007
|
* Append data to the data file.
|
|
@@ -1003,6 +1035,11 @@ var StorageEngine = class StorageEngine {
|
|
|
1003
1035
|
async getDataHandle() {
|
|
1004
1036
|
if (this.dataHandle) return this.dataHandle;
|
|
1005
1037
|
this.dataHandlePromise ??= (async () => {
|
|
1038
|
+
if (this.readOnly) {
|
|
1039
|
+
const handle$1 = await open(this.dataPath, "r");
|
|
1040
|
+
this.dataHandle = handle$1;
|
|
1041
|
+
return handle$1;
|
|
1042
|
+
}
|
|
1006
1043
|
await mkdir(dirname(this.dataPath), { recursive: true });
|
|
1007
1044
|
const handle = await open(this.dataPath, "r+").catch(async () => {
|
|
1008
1045
|
return open(this.dataPath, "w+");
|
|
@@ -1027,7 +1064,7 @@ async function detectVersion(filePath) {
|
|
|
1027
1064
|
const buffer = new Uint8Array(headerSize);
|
|
1028
1065
|
await fileHandle.read(buffer, 0, headerSize, 0);
|
|
1029
1066
|
const view = new DataView(buffer.buffer);
|
|
1030
|
-
if (view.getUint32(0,
|
|
1067
|
+
if (view.getUint32(0, false) !== headerMagic) return null;
|
|
1031
1068
|
return view.getUint16(4, true);
|
|
1032
1069
|
} finally {
|
|
1033
1070
|
await fileHandle.close();
|
|
@@ -1141,7 +1178,7 @@ var CandidateSet = class {
|
|
|
1141
1178
|
}
|
|
1142
1179
|
add(key, value) {
|
|
1143
1180
|
invariant(key, "Key must be provided.");
|
|
1144
|
-
invariant(value, "Value must be provided.");
|
|
1181
|
+
invariant(value !== void 0 && value !== null, "Value must be provided.");
|
|
1145
1182
|
if (this.heap.length < this.size) {
|
|
1146
1183
|
this.heap.push(new CandidateSetEntry(key, value));
|
|
1147
1184
|
this.bubbleUp(this.heap.length - 1);
|
|
@@ -1203,6 +1240,80 @@ var CandidateSetEntry = class {
|
|
|
1203
1240
|
}
|
|
1204
1241
|
};
|
|
1205
1242
|
|
|
1243
|
+
//#endregion
|
|
1244
|
+
//#region src/lru-cache.ts
|
|
1245
|
+
/**
|
|
1246
|
+
* Generic LRU (Least Recently Used) cache with O(1) get/set operations.
|
|
1247
|
+
* Uses JavaScript Map which maintains insertion order - LRU is first key, MRU is last.
|
|
1248
|
+
*/
|
|
1249
|
+
var LRUCache = class {
|
|
1250
|
+
cache;
|
|
1251
|
+
maxSize;
|
|
1252
|
+
/**
|
|
1253
|
+
* Creates a new LRU cache with the specified maximum size.
|
|
1254
|
+
* @param maxSize - Maximum number of entries to cache (must be positive)
|
|
1255
|
+
*/
|
|
1256
|
+
constructor(maxSize) {
|
|
1257
|
+
invariant(typeof maxSize === "number", "maxSize must be a number");
|
|
1258
|
+
invariant(Number.isInteger(maxSize), "maxSize must be an integer");
|
|
1259
|
+
invariant(maxSize > 0, "maxSize must be a positive integer");
|
|
1260
|
+
this.maxSize = maxSize;
|
|
1261
|
+
this.cache = /* @__PURE__ */ new Map();
|
|
1262
|
+
}
|
|
1263
|
+
/**
|
|
1264
|
+
* Clears all entries from the cache.
|
|
1265
|
+
*/
|
|
1266
|
+
clear() {
|
|
1267
|
+
this.cache.clear();
|
|
1268
|
+
}
|
|
1269
|
+
/**
|
|
1270
|
+
* Gets a value from the cache and moves it to MRU position.
|
|
1271
|
+
* @param key - The key to look up
|
|
1272
|
+
* @returns The cached value, or undefined if not found
|
|
1273
|
+
*/
|
|
1274
|
+
get(key) {
|
|
1275
|
+
const value = this.cache.get(key);
|
|
1276
|
+
if (value === void 0) return;
|
|
1277
|
+
this.cache.delete(key);
|
|
1278
|
+
this.cache.set(key, value);
|
|
1279
|
+
return value;
|
|
1280
|
+
}
|
|
1281
|
+
/**
|
|
1282
|
+
* Returns the maximum number of entries the cache can hold.
|
|
1283
|
+
*/
|
|
1284
|
+
getMaxSize() {
|
|
1285
|
+
return this.maxSize;
|
|
1286
|
+
}
|
|
1287
|
+
/**
|
|
1288
|
+
* Checks if a key exists in the cache without affecting LRU order.
|
|
1289
|
+
* @param key - The key to check
|
|
1290
|
+
* @returns true if the key exists, false otherwise
|
|
1291
|
+
*/
|
|
1292
|
+
has(key) {
|
|
1293
|
+
return this.cache.has(key);
|
|
1294
|
+
}
|
|
1295
|
+
/**
|
|
1296
|
+
* Sets a value in the cache. If the cache is at capacity, evicts the LRU entry.
|
|
1297
|
+
* If the key already exists, updates the value and moves to MRU position.
|
|
1298
|
+
* @param key - The key to set
|
|
1299
|
+
* @param value - The value to cache
|
|
1300
|
+
*/
|
|
1301
|
+
set(key, value) {
|
|
1302
|
+
if (this.cache.has(key)) this.cache.delete(key);
|
|
1303
|
+
else if (this.cache.size >= this.maxSize) {
|
|
1304
|
+
const firstKey = this.cache.keys().next().value;
|
|
1305
|
+
this.cache.delete(firstKey);
|
|
1306
|
+
}
|
|
1307
|
+
this.cache.set(key, value);
|
|
1308
|
+
}
|
|
1309
|
+
/**
|
|
1310
|
+
* Returns the current number of entries in the cache.
|
|
1311
|
+
*/
|
|
1312
|
+
size() {
|
|
1313
|
+
return this.cache.size;
|
|
1314
|
+
}
|
|
1315
|
+
};
|
|
1316
|
+
|
|
1206
1317
|
//#endregion
|
|
1207
1318
|
//#region src/engine.ts
|
|
1208
1319
|
const defaultModelUri = "hf:CompendiumLabs/bge-small-en-v1.5-gguf/bge-small-en-v1.5-q8_0.gguf";
|
|
@@ -1213,16 +1324,20 @@ var EmbeddingEngine = class {
|
|
|
1213
1324
|
storePath;
|
|
1214
1325
|
cacheDir;
|
|
1215
1326
|
dimension;
|
|
1327
|
+
readOnly;
|
|
1216
1328
|
llama;
|
|
1217
1329
|
model;
|
|
1218
1330
|
embeddingContext;
|
|
1219
1331
|
initPromise;
|
|
1220
1332
|
storageInitPromise;
|
|
1221
1333
|
embeddingCache = null;
|
|
1334
|
+
textEmbeddingCache = null;
|
|
1222
1335
|
constructor(options) {
|
|
1223
1336
|
this.storePath = options.storePath;
|
|
1224
1337
|
this.cacheDir = options.cacheDir ?? defaultCacheDir;
|
|
1225
1338
|
this.dimension = defaultDimension;
|
|
1339
|
+
this.readOnly = options.readOnly ?? false;
|
|
1340
|
+
if (options.embeddingCacheSize && options.embeddingCacheSize > 0) this.textEmbeddingCache = new LRUCache(options.embeddingCacheSize);
|
|
1226
1341
|
}
|
|
1227
1342
|
/**
|
|
1228
1343
|
* Gets or initializes the storage engine
|
|
@@ -1236,10 +1351,11 @@ var EmbeddingEngine = class {
|
|
|
1236
1351
|
return this.storageEngine;
|
|
1237
1352
|
}
|
|
1238
1353
|
async initializeStorage() {
|
|
1239
|
-
await ensureV2Format(this.storePath, this.dimension);
|
|
1354
|
+
if (!this.readOnly) await ensureV2Format(this.storePath, this.dimension);
|
|
1240
1355
|
return StorageEngine.create({
|
|
1241
1356
|
dataPath: this.storePath,
|
|
1242
|
-
dimension: this.dimension
|
|
1357
|
+
dimension: this.dimension,
|
|
1358
|
+
readOnly: this.readOnly
|
|
1243
1359
|
});
|
|
1244
1360
|
}
|
|
1245
1361
|
/**
|
|
@@ -1306,13 +1422,20 @@ var EmbeddingEngine = class {
|
|
|
1306
1422
|
/**
|
|
1307
1423
|
* Internal method that returns embedding as Float32Array for performance
|
|
1308
1424
|
* Uses Float32Array throughout internal operations to avoid boxing overhead
|
|
1425
|
+
* Checks the text embedding cache first to avoid regenerating embeddings
|
|
1309
1426
|
*/
|
|
1310
1427
|
async generateEmbeddingFloat32(text) {
|
|
1428
|
+
if (this.textEmbeddingCache) {
|
|
1429
|
+
const cached = this.textEmbeddingCache.get(text);
|
|
1430
|
+
if (cached) return cached;
|
|
1431
|
+
}
|
|
1311
1432
|
await this.ensureModelLoaded();
|
|
1312
1433
|
invariant(this.embeddingContext, "Embedding context not initialized");
|
|
1313
1434
|
const truncatedText = this.truncateToContextSize(text);
|
|
1314
1435
|
const embedding = await this.embeddingContext.getEmbeddingFor(truncatedText);
|
|
1315
|
-
|
|
1436
|
+
const result = new Float32Array(embedding.vector);
|
|
1437
|
+
if (this.textEmbeddingCache) this.textEmbeddingCache.set(text, result);
|
|
1438
|
+
return result;
|
|
1316
1439
|
}
|
|
1317
1440
|
/**
|
|
1318
1441
|
* Retrieves an embedding entry by key
|
|
@@ -1375,6 +1498,7 @@ var EmbeddingEngine = class {
|
|
|
1375
1498
|
* @param text - Text to embed and store
|
|
1376
1499
|
*/
|
|
1377
1500
|
async store(key, text) {
|
|
1501
|
+
if (this.readOnly) throw new ReadOnlyError();
|
|
1378
1502
|
invariant(key, "Key must be provided.");
|
|
1379
1503
|
invariant(text, "Text must be provided.");
|
|
1380
1504
|
const embedding = await this.generateEmbeddingFloat32(text);
|
|
@@ -1387,17 +1511,25 @@ var EmbeddingEngine = class {
|
|
|
1387
1511
|
* Stores multiple text embeddings in batch
|
|
1388
1512
|
* More efficient than calling store() multiple times
|
|
1389
1513
|
* Generates embeddings in parallel and writes records sequentially
|
|
1514
|
+
* Uses text embedding cache to avoid regenerating embeddings for duplicate texts
|
|
1390
1515
|
* @param items - Array of {key, text} objects to store
|
|
1391
1516
|
*/
|
|
1392
1517
|
async storeMany(items) {
|
|
1518
|
+
if (this.readOnly) throw new ReadOnlyError();
|
|
1393
1519
|
invariant(items.length > 0, "Items array must not be empty.");
|
|
1394
1520
|
await this.ensureModelLoaded();
|
|
1395
1521
|
const embeddingContext = this.embeddingContext;
|
|
1396
1522
|
invariant(embeddingContext, "Embedding context not initialized");
|
|
1397
1523
|
const embeddingPromises = items.map(async (item) => {
|
|
1524
|
+
if (this.textEmbeddingCache) {
|
|
1525
|
+
const cached = this.textEmbeddingCache.get(item.text);
|
|
1526
|
+
if (cached) return cached;
|
|
1527
|
+
}
|
|
1398
1528
|
const truncatedText = this.truncateToContextSize(item.text);
|
|
1399
1529
|
const embedding = await embeddingContext.getEmbeddingFor(truncatedText);
|
|
1400
|
-
|
|
1530
|
+
const result = new Float32Array(embedding.vector);
|
|
1531
|
+
if (this.textEmbeddingCache) this.textEmbeddingCache.set(item.text, result);
|
|
1532
|
+
return result;
|
|
1401
1533
|
});
|
|
1402
1534
|
const embeddingsList = await Promise.all(embeddingPromises);
|
|
1403
1535
|
const storage = await this.ensureStorageEngine();
|
|
@@ -1419,6 +1551,7 @@ var EmbeddingEngine = class {
|
|
|
1419
1551
|
* @returns true if the entry was deleted, false if it didn't exist
|
|
1420
1552
|
*/
|
|
1421
1553
|
async delete(key) {
|
|
1554
|
+
if (this.readOnly) throw new ReadOnlyError();
|
|
1422
1555
|
invariant(key, "Key must be provided.");
|
|
1423
1556
|
const deleted = await (await this.ensureStorageEngine()).deleteRecord(key);
|
|
1424
1557
|
if (deleted && this.embeddingCache !== null) this.embeddingCache.delete(key);
|
|
@@ -1461,11 +1594,32 @@ var EmbeddingEngine = class {
|
|
|
1461
1594
|
return dotProduct / (magnitudeA * magnitudeB);
|
|
1462
1595
|
}
|
|
1463
1596
|
/**
|
|
1597
|
+
* Check if the engine is in read-only mode.
|
|
1598
|
+
*/
|
|
1599
|
+
isReadOnly() {
|
|
1600
|
+
return this.readOnly;
|
|
1601
|
+
}
|
|
1602
|
+
/**
|
|
1603
|
+
* Gets statistics about the text embedding cache.
|
|
1604
|
+
* @returns Cache stats if enabled, null if cache is disabled
|
|
1605
|
+
*/
|
|
1606
|
+
getTextEmbeddingCacheStats() {
|
|
1607
|
+
if (!this.textEmbeddingCache) return null;
|
|
1608
|
+
return {
|
|
1609
|
+
size: this.textEmbeddingCache.size(),
|
|
1610
|
+
maxSize: this.textEmbeddingCache.getMaxSize()
|
|
1611
|
+
};
|
|
1612
|
+
}
|
|
1613
|
+
/**
|
|
1464
1614
|
* Disposes of resources and closes the storage engine
|
|
1465
1615
|
* Call this when you're done using the engine to free up memory
|
|
1466
1616
|
*/
|
|
1467
1617
|
async dispose() {
|
|
1468
1618
|
this.embeddingCache = null;
|
|
1619
|
+
if (this.textEmbeddingCache) {
|
|
1620
|
+
this.textEmbeddingCache.clear();
|
|
1621
|
+
this.textEmbeddingCache = null;
|
|
1622
|
+
}
|
|
1469
1623
|
if (this.storageEngine) {
|
|
1470
1624
|
await this.storageEngine.close();
|
|
1471
1625
|
this.storageEngine = null;
|
|
@@ -1485,4 +1639,4 @@ var EmbeddingEngine = class {
|
|
|
1485
1639
|
};
|
|
1486
1640
|
|
|
1487
1641
|
//#endregion
|
|
1488
|
-
export {
|
|
1642
|
+
export { ReadOnlyError as a, DatabaseLockedError as i, LRUCache as n, fileExtensions as o, Wal as r, opType as s, EmbeddingEngine as t };
|