verso-db 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/CHANGELOG.md +25 -0
  2. package/README.md +81 -49
  3. package/dist/BinaryHeap.d.ts +16 -5
  4. package/dist/BinaryHeap.d.ts.map +1 -1
  5. package/dist/BinaryHeap.js +138 -0
  6. package/dist/BinaryHeap.js.map +1 -0
  7. package/dist/Collection.d.ts +98 -17
  8. package/dist/Collection.d.ts.map +1 -1
  9. package/dist/Collection.js +1186 -0
  10. package/dist/Collection.js.map +1 -0
  11. package/dist/HNSWIndex.d.ts +170 -15
  12. package/dist/HNSWIndex.d.ts.map +1 -1
  13. package/dist/HNSWIndex.js +2818 -0
  14. package/dist/HNSWIndex.js.map +1 -0
  15. package/dist/MaxBinaryHeap.d.ts +2 -60
  16. package/dist/MaxBinaryHeap.d.ts.map +1 -1
  17. package/dist/MaxBinaryHeap.js +5 -0
  18. package/dist/MaxBinaryHeap.js.map +1 -0
  19. package/dist/SearchWorker.d.ts +104 -0
  20. package/dist/SearchWorker.d.ts.map +1 -0
  21. package/dist/SearchWorker.js +573 -0
  22. package/dist/SearchWorker.js.map +1 -0
  23. package/dist/VectorDB.d.ts +19 -5
  24. package/dist/VectorDB.d.ts.map +1 -1
  25. package/dist/VectorDB.js +246 -0
  26. package/dist/VectorDB.js.map +1 -0
  27. package/dist/WorkerPool.d.ts +92 -0
  28. package/dist/WorkerPool.d.ts.map +1 -0
  29. package/dist/WorkerPool.js +266 -0
  30. package/dist/WorkerPool.js.map +1 -0
  31. package/dist/backends/JsDistanceBackend.d.ts +3 -20
  32. package/dist/backends/JsDistanceBackend.d.ts.map +1 -1
  33. package/dist/backends/JsDistanceBackend.js +163 -0
  34. package/dist/backends/JsDistanceBackend.js.map +1 -0
  35. package/dist/encoding/DeltaEncoder.d.ts +2 -2
  36. package/dist/encoding/DeltaEncoder.d.ts.map +1 -1
  37. package/dist/encoding/DeltaEncoder.js +199 -0
  38. package/dist/encoding/DeltaEncoder.js.map +1 -0
  39. package/dist/errors.js +97 -0
  40. package/dist/errors.js.map +1 -0
  41. package/dist/index.d.ts +16 -17
  42. package/dist/index.d.ts.map +1 -1
  43. package/dist/index.js +61 -3419
  44. package/dist/index.js.map +1 -0
  45. package/dist/presets.d.ts +9 -9
  46. package/dist/presets.d.ts.map +1 -1
  47. package/dist/presets.js +205 -0
  48. package/dist/presets.js.map +1 -0
  49. package/dist/quantization/ScalarQuantizer.d.ts +10 -34
  50. package/dist/quantization/ScalarQuantizer.d.ts.map +1 -1
  51. package/dist/quantization/ScalarQuantizer.js +346 -0
  52. package/dist/quantization/ScalarQuantizer.js.map +1 -0
  53. package/dist/storage/BatchWriter.d.ts.map +1 -1
  54. package/dist/storage/BatchWriter.js +351 -0
  55. package/dist/storage/BatchWriter.js.map +1 -0
  56. package/dist/storage/BunStorageBackend.d.ts +12 -5
  57. package/dist/storage/BunStorageBackend.d.ts.map +1 -1
  58. package/dist/storage/BunStorageBackend.js +182 -0
  59. package/dist/storage/BunStorageBackend.js.map +1 -0
  60. package/dist/storage/MemoryBackend.d.ts.map +1 -1
  61. package/dist/storage/MemoryBackend.js +109 -0
  62. package/dist/storage/MemoryBackend.js.map +1 -0
  63. package/dist/storage/OPFSBackend.d.ts +9 -1
  64. package/dist/storage/OPFSBackend.d.ts.map +1 -1
  65. package/dist/storage/OPFSBackend.js +325 -0
  66. package/dist/storage/OPFSBackend.js.map +1 -0
  67. package/dist/storage/StorageBackend.d.ts +1 -1
  68. package/dist/storage/StorageBackend.js +12 -0
  69. package/dist/storage/StorageBackend.js.map +1 -0
  70. package/dist/storage/WriteAheadLog.d.ts +15 -11
  71. package/dist/storage/WriteAheadLog.d.ts.map +1 -1
  72. package/dist/storage/WriteAheadLog.js +321 -0
  73. package/dist/storage/WriteAheadLog.js.map +1 -0
  74. package/dist/storage/createStorageBackend.d.ts +4 -0
  75. package/dist/storage/createStorageBackend.d.ts.map +1 -1
  76. package/dist/storage/createStorageBackend.js +119 -0
  77. package/dist/storage/createStorageBackend.js.map +1 -0
  78. package/dist/storage/index.d.ts +3 -3
  79. package/dist/storage/index.js +33 -0
  80. package/dist/storage/index.js.map +1 -0
  81. package/dist/storage/nodeFsRuntime.d.ts +14 -0
  82. package/dist/storage/nodeFsRuntime.d.ts.map +1 -0
  83. package/dist/storage/nodeFsRuntime.js +105 -0
  84. package/dist/storage/nodeFsRuntime.js.map +1 -0
  85. package/package.json +47 -23
  86. package/dist/Storage.d.ts +0 -54
  87. package/dist/Storage.d.ts.map +0 -1
  88. package/dist/backends/DistanceBackend.d.ts +0 -5
  89. package/dist/backends/DistanceBackend.d.ts.map +0 -1
  90. package/src/BinaryHeap.ts +0 -131
  91. package/src/Collection.ts +0 -695
  92. package/src/HNSWIndex.ts +0 -1839
  93. package/src/MaxBinaryHeap.ts +0 -175
  94. package/src/Storage.ts +0 -435
  95. package/src/VectorDB.ts +0 -109
  96. package/src/backends/DistanceBackend.ts +0 -17
  97. package/src/backends/JsDistanceBackend.ts +0 -227
  98. package/src/encoding/DeltaEncoder.ts +0 -217
  99. package/src/errors.ts +0 -110
  100. package/src/index.ts +0 -138
  101. package/src/presets.ts +0 -229
  102. package/src/quantization/ScalarQuantizer.ts +0 -383
  103. package/src/storage/BatchWriter.ts +0 -336
  104. package/src/storage/BunStorageBackend.ts +0 -161
  105. package/src/storage/MemoryBackend.ts +0 -120
  106. package/src/storage/OPFSBackend.ts +0 -250
  107. package/src/storage/StorageBackend.ts +0 -74
  108. package/src/storage/WriteAheadLog.ts +0 -326
  109. package/src/storage/createStorageBackend.ts +0 -137
  110. package/src/storage/index.ts +0 -53
@@ -0,0 +1,1186 @@
1
+ import { HNSWIndex } from './HNSWIndex.js';
2
+ import { DimensionMismatchError, DuplicateVectorError, VectorDBError } from './errors.js';
3
+ const EMPTY_METADATA = Object.freeze({});
4
+ /** Supported metadata filter operators — hoisted to avoid per-call allocation */
5
+ const SUPPORTED_OPERATORS = new Set(['$eq', '$ne', '$gt', '$gte', '$lt', '$lte', '$in', '$nin']);
6
+ const COLLECTION_STATE_VERSION = 1;
7
+ export class Collection {
8
+ static MAX_INTERNAL_ID = 0xFFFFFFFE;
9
+ name;
10
+ dimension;
11
+ metric;
12
+ M; // HNSW M parameter
13
+ efConstruction; // HNSW efConstruction parameter
14
+ storage;
15
+ stateKey; // Storage key for atomic state pointer
16
+ defaultIndexKey;
17
+ defaultMetaKey;
18
+ defaultDeletedKey;
19
+ indexKey; // Storage key for HNSW index data
20
+ metaKey; // Storage key for metadata
21
+ deletedKey; // Storage key for deleted IDs
22
+ hnsw;
23
+ idMap; // Maps string IDs to internal numeric IDs
24
+ idReverseMap; // Maps internal numeric IDs to string IDs
25
+ metadata; // Stores metadata for each vector
26
+ deletedIds; // Tombstone markers for deleted vectors
27
+ nextNumericId; // Monotonic counter for generating unique internal IDs
28
+ activeCount; // O(1) counter for non-deleted vectors
29
+ autoPersist;
30
+ dirty;
31
+ mutationLock;
32
+ constructor(name, config, storage, options = {}) {
33
+ this.name = name;
34
+ this.dimension = config.dimension;
35
+ this.metric = config.metric ?? 'cosine';
36
+ this.M = config.M ?? 16;
37
+ this.efConstruction = config.efConstruction ?? 200;
38
+ this.storage = storage;
39
+ this.defaultIndexKey = `${name}/${name}.hnsw`;
40
+ this.defaultMetaKey = `${name}/${name}.meta`;
41
+ this.defaultDeletedKey = `${name}/${name}.deleted`;
42
+ this.stateKey = `${name}/${name}.state`;
43
+ this.indexKey = this.defaultIndexKey;
44
+ this.metaKey = this.defaultMetaKey;
45
+ this.deletedKey = this.defaultDeletedKey;
46
+ // Initialize HNSW index with the specified parameters
47
+ this.hnsw = new HNSWIndex(config.dimension, this.metric, this.M, this.efConstruction);
48
+ this.idMap = new Map();
49
+ this.idReverseMap = new Map();
50
+ this.metadata = new Map();
51
+ this.deletedIds = new Set();
52
+ this.nextNumericId = 0;
53
+ this.activeCount = 0;
54
+ this.autoPersist = options.autoPersist ?? false;
55
+ this.dirty = false;
56
+ this.mutationLock = Promise.resolve();
57
+ }
58
+ async init() {
59
+ // Load existing data if files exist
60
+ await this.loadFromDisk();
61
+ }
62
+ async withMutationLock(fn) {
63
+ const previous = this.mutationLock;
64
+ let release;
65
+ this.mutationLock = new Promise((resolve) => {
66
+ release = resolve;
67
+ });
68
+ await previous;
69
+ try {
70
+ return await fn();
71
+ }
72
+ finally {
73
+ release();
74
+ }
75
+ }
76
+ async flush() {
77
+ await this.withMutationLock(async () => {
78
+ if (!this.dirty)
79
+ return;
80
+ await this.saveToDiskUnlocked();
81
+ });
82
+ }
83
+ captureMutationSnapshot() {
84
+ return {
85
+ hnswBuffer: this.hnsw.serialize(),
86
+ idMap: new Map(this.idMap),
87
+ idReverseMap: new Map(this.idReverseMap),
88
+ metadata: new Map(this.metadata),
89
+ deletedIds: new Set(this.deletedIds),
90
+ nextNumericId: this.nextNumericId,
91
+ activeCount: this.activeCount,
92
+ indexKey: this.indexKey,
93
+ metaKey: this.metaKey,
94
+ deletedKey: this.deletedKey,
95
+ dirty: this.dirty,
96
+ };
97
+ }
98
+ restoreMutationSnapshot(snapshot) {
99
+ const restoredHnsw = HNSWIndex.deserialize(snapshot.hnswBuffer.slice(0));
100
+ if (this.hnsw && typeof this.hnsw.destroy === 'function') {
101
+ this.hnsw.destroy();
102
+ }
103
+ this.hnsw = restoredHnsw;
104
+ this.idMap = new Map(snapshot.idMap);
105
+ this.idReverseMap = new Map(snapshot.idReverseMap);
106
+ this.metadata = new Map(snapshot.metadata);
107
+ this.deletedIds = new Set(snapshot.deletedIds);
108
+ this.nextNumericId = snapshot.nextNumericId;
109
+ this.activeCount = snapshot.activeCount;
110
+ this.setActiveStorageKeys(snapshot.indexKey, snapshot.metaKey, snapshot.deletedKey);
111
+ this.dirty = snapshot.dirty;
112
+ }
113
+ /**
114
+ * Wrap a mutation in snapshot-capture + auto-persist.
115
+ * The mutation fn should set `this.dirty = true` when it mutates state.
116
+ * If autoPersist is on and dirty was set, saves after fn completes.
117
+ * On error, restores from snapshot.
118
+ */
119
+ async withAutoSave(fn) {
120
+ const snapshot = this.autoPersist ? this.captureMutationSnapshot() : null;
121
+ try {
122
+ const result = await fn();
123
+ if (this.dirty && this.autoPersist) {
124
+ await this.saveToDiskUnlocked();
125
+ }
126
+ return result;
127
+ }
128
+ catch (err) {
129
+ if (snapshot) {
130
+ this.restoreMutationSnapshot(snapshot);
131
+ }
132
+ throw err;
133
+ }
134
+ }
135
+ reserveNumericIds(count) {
136
+ if (!Number.isInteger(count) || count < 0) {
137
+ throw new VectorDBError(`Invalid ID reservation count: ${count}`, 'VALIDATION_ERROR');
138
+ }
139
+ if (count === 0)
140
+ return this.nextNumericId;
141
+ const startId = this.nextNumericId;
142
+ const endId = startId + count - 1;
143
+ if (!Number.isSafeInteger(endId) || endId > Collection.MAX_INTERNAL_ID) {
144
+ throw new VectorDBError(`Collection '${this.name}' cannot allocate ${count} internal IDs: exhausted ID space (max ${Collection.MAX_INTERNAL_ID})`, 'VALIDATION_ERROR');
145
+ }
146
+ this.nextNumericId = endId + 1;
147
+ return startId;
148
+ }
149
+ setActiveStorageKeys(indexKey, metaKey, deletedKey) {
150
+ this.indexKey = indexKey;
151
+ this.metaKey = metaKey;
152
+ this.deletedKey = deletedKey;
153
+ }
154
+ isValidStorageState(value) {
155
+ if (!value || typeof value !== 'object')
156
+ return false;
157
+ const state = value;
158
+ return (state.version === COLLECTION_STATE_VERSION &&
159
+ typeof state.indexKey === 'string' &&
160
+ typeof state.metaKey === 'string' &&
161
+ typeof state.deletedKey === 'string');
162
+ }
163
+ async loadStorageState() {
164
+ const stateBuffer = await this.storage.read(this.stateKey);
165
+ if (!stateBuffer)
166
+ return null;
167
+ try {
168
+ const raw = JSON.parse(new TextDecoder().decode(stateBuffer));
169
+ if (this.isValidStorageState(raw)) {
170
+ return raw;
171
+ }
172
+ }
173
+ catch {
174
+ // Ignore malformed state file and fall back to legacy keys.
175
+ }
176
+ return null;
177
+ }
178
+ getVersionedDataKeys(saveId) {
179
+ return {
180
+ indexKey: `${this.name}/${this.name}.${saveId}.hnsw`,
181
+ metaKey: `${this.name}/${this.name}.${saveId}.meta`,
182
+ deletedKey: `${this.name}/${this.name}.${saveId}.deleted`,
183
+ };
184
+ }
185
+ isVersionedDataKey(key) {
186
+ if (key === this.defaultIndexKey ||
187
+ key === this.defaultMetaKey ||
188
+ key === this.defaultDeletedKey) {
189
+ return false;
190
+ }
191
+ const prefix = `${this.name}/${this.name}.`;
192
+ return key.startsWith(prefix) && (key.endsWith('.hnsw') ||
193
+ key.endsWith('.meta') ||
194
+ key.endsWith('.deleted'));
195
+ }
196
+ validateLoadedIndexConfig(index) {
197
+ if (index.getDimension() !== this.dimension ||
198
+ index.getMetric() !== this.metric ||
199
+ index.getM() !== this.M ||
200
+ index.getEfConstruction() !== this.efConstruction) {
201
+ throw new VectorDBError(`Collection '${this.name}' index parameters do not match manifest configuration`, 'CORRUPT_COLLECTION');
202
+ }
203
+ }
204
+ async loadFromDisk() {
205
+ // Reset in-memory state before loading.
206
+ this.idMap.clear();
207
+ this.idReverseMap.clear();
208
+ this.metadata.clear();
209
+ this.deletedIds.clear();
210
+ this.nextNumericId = 0;
211
+ this.activeCount = 0;
212
+ const persistedState = await this.loadStorageState();
213
+ if (persistedState) {
214
+ this.setActiveStorageKeys(persistedState.indexKey, persistedState.metaKey, persistedState.deletedKey);
215
+ }
216
+ else {
217
+ this.setActiveStorageKeys(this.defaultIndexKey, this.defaultMetaKey, this.defaultDeletedKey);
218
+ }
219
+ // Load the binary HNSW index via StorageBackend
220
+ const indexData = await this.storage.read(this.indexKey);
221
+ if (indexData) {
222
+ try {
223
+ const loaded = HNSWIndex.deserialize(indexData);
224
+ this.validateLoadedIndexConfig(loaded);
225
+ this.hnsw = loaded;
226
+ }
227
+ catch (e) {
228
+ throw new VectorDBError(`Collection '${this.name}' index file '${this.indexKey}' is corrupt or unreadable: ${e instanceof Error ? e.message : String(e)}`, 'CORRUPT_COLLECTION');
229
+ }
230
+ }
231
+ // Load metadata via StorageBackend
232
+ const metaData = await this.storage.read(this.metaKey);
233
+ if (metaData) {
234
+ try {
235
+ const metaContent = new TextDecoder().decode(metaData);
236
+ const lines = metaContent.split('\n');
237
+ for (let i = 0; i < lines.length; i++) {
238
+ const line = lines[i];
239
+ if (line.length === 0 || line.trim().length === 0)
240
+ continue;
241
+ const parsed = this.parseMetadataLine(line);
242
+ if (!parsed) {
243
+ throw new VectorDBError(`Collection '${this.name}' metadata file '${this.metaKey}' is corrupt at line ${i + 1}`, 'CORRUPT_COLLECTION');
244
+ }
245
+ const { id, internalId, metadata } = parsed;
246
+ this.idMap.set(id, internalId);
247
+ this.idReverseMap.set(internalId, id);
248
+ if (metadata !== undefined) {
249
+ this.metadata.set(internalId, metadata);
250
+ }
251
+ }
252
+ }
253
+ catch (e) {
254
+ if (e instanceof VectorDBError) {
255
+ throw e;
256
+ }
257
+ throw new VectorDBError(`Collection '${this.name}' metadata file '${this.metaKey}' is corrupt or unreadable: ${e instanceof Error ? e.message : String(e)}`, 'CORRUPT_COLLECTION');
258
+ }
259
+ }
260
+ // Load deleted IDs via StorageBackend
261
+ const deletedData = await this.storage.read(this.deletedKey);
262
+ if (deletedData) {
263
+ try {
264
+ const deletedContent = new TextDecoder().decode(deletedData);
265
+ const deletedArray = JSON.parse(deletedContent);
266
+ if (!Array.isArray(deletedArray)) {
267
+ throw new VectorDBError('Deleted-ID payload is not an array', 'CORRUPT_COLLECTION');
268
+ }
269
+ for (let i = 0; i < deletedArray.length; i++) {
270
+ const numericId = deletedArray[i];
271
+ if (!Number.isInteger(numericId) ||
272
+ numericId < 0 ||
273
+ numericId > Collection.MAX_INTERNAL_ID) {
274
+ throw new VectorDBError(`Invalid deleted internal ID at index ${i}: ${numericId}`, 'CORRUPT_COLLECTION');
275
+ }
276
+ }
277
+ this.deletedIds = new Set(deletedArray);
278
+ }
279
+ catch (e) {
280
+ throw new VectorDBError(`Collection '${this.name}' deleted-ID file '${this.deletedKey}' is corrupt or unreadable: ${e instanceof Error ? e.message : String(e)}`, 'CORRUPT_COLLECTION');
281
+ }
282
+ }
283
+ this.reconcileLoadedState();
284
+ this.dirty = false;
285
+ }
286
+ async add(config) {
287
+ return this.addInternal(config, false);
288
+ }
289
+ async query(config) {
290
+ return this.executeQuery(config, 'Query vector');
291
+ }
292
+ /**
293
+ * Batch query for multiple vectors at once.
294
+ * Shares query semantics with query(), including adaptive candidate expansion
295
+ * for filters and tombstones.
296
+ *
297
+ * @param configs Array of query configurations
298
+ * @returns Array of query results, one per query
299
+ */
300
+ async queryBatch(configs) {
301
+ if (!Array.isArray(configs)) {
302
+ throw new VectorDBError('Query batch must be an array of query configs', 'VALIDATION_ERROR');
303
+ }
304
+ if (configs.length === 0)
305
+ return [];
306
+ for (let i = 0; i < configs.length; i++) {
307
+ if (!this.isPlainObject(configs[i])) {
308
+ throw new VectorDBError(`Query ${i} must be a plain object`, 'VALIDATION_ERROR');
309
+ }
310
+ }
311
+ const results = new Array(configs.length);
312
+ for (let i = 0; i < configs.length; i++) {
313
+ results[i] = this.executeQuery(configs[i], `Query ${i}`);
314
+ }
315
+ return results;
316
+ }
317
+ /**
318
+ * Brute-force KNN search for validation and correctness checking
319
+ * This checks all vectors and returns the true k nearest neighbors
320
+ */
321
+ async queryBruteForce(config) {
322
+ this.validateQueryInput(config, 'Query vector');
323
+ const { queryVector, k, filter } = config;
324
+ // Compute distances to all vectors, applying filter during scan
325
+ const queryFloat = queryVector instanceof Float32Array ? queryVector : new Float32Array(queryVector);
326
+ const hasFilter = this.hasFilter(filter);
327
+ const distances = [];
328
+ for (const [numericId, vector] of this.hnsw.getAllVectors()) {
329
+ // Skip deleted vectors
330
+ if (this.deletedIds.has(numericId))
331
+ continue;
332
+ // Apply filter before distance computation when possible
333
+ if (hasFilter) {
334
+ const meta = this.metadata.get(numericId) || EMPTY_METADATA;
335
+ if (!this.matchesFilter(meta, filter))
336
+ continue;
337
+ }
338
+ const distance = this.hnsw.calculateDistance(queryFloat, vector);
339
+ distances.push({ id: numericId, distance });
340
+ }
341
+ // Sort by distance and take top k
342
+ distances.sort((a, b) => {
343
+ const diff = a.distance - b.distance;
344
+ return diff !== 0 ? diff : a.id - b.id;
345
+ });
346
+ const filteredResults = distances.slice(0, k);
347
+ // Prepare the output
348
+ const ids = [];
349
+ const distancesOut = [];
350
+ const metadata = [];
351
+ for (const result of filteredResults) {
352
+ const id = this.idReverseMap.get(result.id);
353
+ if (id) {
354
+ ids.push(id);
355
+ distancesOut.push(result.distance);
356
+ metadata.push(this.metadata.get(result.id) || EMPTY_METADATA);
357
+ }
358
+ }
359
+ return {
360
+ ids,
361
+ distances: distancesOut,
362
+ metadata
363
+ };
364
+ }
365
+ /**
366
+ * Upsert vectors (insert or update).
367
+ *
368
+ * For existing IDs, the old vectors are tombstone-deleted and new vectors
369
+ * are inserted. Use compact() periodically to reclaim space from
370
+ * tombstoned vectors.
371
+ *
372
+ * For new IDs, behaves identically to add().
373
+ */
374
+ async upsert(config) {
375
+ return this.addInternal(config, true);
376
+ }
377
+ async addInternal(config, isUpsert) {
378
+ await this.withMutationLock(async () => {
379
+ this.validateAddInput(config, isUpsert);
380
+ if (config.ids.length === 0)
381
+ return;
382
+ await this.withAutoSave(async () => {
383
+ const points = new Array(config.ids.length);
384
+ const startId = this.reserveNumericIds(config.ids.length);
385
+ for (let i = 0; i < config.ids.length; i++) {
386
+ points[i] = { id: startId + i, vector: new Float32Array(config.vectors[i]) };
387
+ }
388
+ await this.hnsw.addPointsBulk(points);
389
+ for (let i = 0; i < config.ids.length; i++) {
390
+ const id = config.ids[i];
391
+ const metadata = config.metadata ? config.metadata[i] : undefined;
392
+ if (isUpsert) {
393
+ const replacementId = this.idMap.get(id);
394
+ if (replacementId !== undefined) {
395
+ if (!this.deletedIds.has(replacementId)) {
396
+ this.activeCount--;
397
+ }
398
+ this.deletedIds.add(replacementId);
399
+ this.idReverseMap.delete(replacementId);
400
+ this.metadata.delete(replacementId);
401
+ }
402
+ }
403
+ const numericId = startId + i;
404
+ this.idMap.set(id, numericId);
405
+ this.idReverseMap.set(numericId, id);
406
+ if (metadata !== undefined) {
407
+ this.metadata.set(numericId, metadata);
408
+ }
409
+ this.activeCount++;
410
+ }
411
+ this.dirty = true;
412
+ });
413
+ });
414
+ }
415
+ /** Returns the vector dimension for this collection. */
416
+ getDimension() { return this.dimension; }
417
+ /** Returns the distance metric for this collection. */
418
+ getMetric() { return this.metric; }
419
+ /** Returns the HNSW M parameter for this collection. */
420
+ getM() { return this.M; }
421
+ /** Returns the HNSW efConstruction parameter for this collection. */
422
+ getEfConstruction() { return this.efConstruction; }
423
+ /**
424
+ * Returns the number of active (non-deleted) vectors in the collection.
425
+ * O(1) via maintained counter.
426
+ */
427
+ count() {
428
+ return this.activeCount;
429
+ }
430
+ /**
431
+ * Returns the total number of tracked vectors including deleted (tombstoned) ones.
432
+ * Use this to determine when compaction might be beneficial.
433
+ */
434
+ countWithDeleted() {
435
+ return this.activeCount + this.deletedIds.size;
436
+ }
437
+ /**
438
+ * Returns the number of deleted (tombstoned) vectors awaiting compaction.
439
+ */
440
+ deletedCount() {
441
+ return this.deletedIds.size;
442
+ }
443
+ /**
444
+ * Mark a vector as deleted (tombstone deletion).
445
+ * The vector remains in the index but is excluded from search results.
446
+ * Use compact() to permanently remove deleted vectors and reclaim space.
447
+ *
448
+ * @param id The string ID of the vector to delete
449
+ * @returns true if the vector was deleted, false if it didn't exist or was already deleted
450
+ */
451
+ deleteInternal(id) {
452
+ const numericId = this.idMap.get(id);
453
+ if (numericId === undefined)
454
+ return false;
455
+ if (this.deletedIds.has(numericId))
456
+ return false;
457
+ this.deletedIds.add(numericId);
458
+ this.activeCount--;
459
+ return true;
460
+ }
461
+ async delete(id) {
462
+ return this.withMutationLock(async () => {
463
+ if (!this.has(id))
464
+ return false;
465
+ return this.withAutoSave(async () => {
466
+ const deleted = this.deleteInternal(id);
467
+ if (!deleted)
468
+ return false;
469
+ this.dirty = true;
470
+ return true;
471
+ });
472
+ });
473
+ }
474
+ /**
475
+ * Mark multiple vectors as deleted (tombstone deletion).
476
+ *
477
+ * @param ids Array of string IDs to delete
478
+ * @returns Number of vectors that were successfully deleted
479
+ */
480
+ async deleteBatch(ids) {
481
+ return this.withMutationLock(async () => {
482
+ return this.withAutoSave(async () => {
483
+ let deleted = 0;
484
+ for (const id of ids) {
485
+ if (this.deleteInternal(id))
486
+ deleted++;
487
+ }
488
+ if (deleted === 0)
489
+ return 0;
490
+ this.dirty = true;
491
+ return deleted;
492
+ });
493
+ });
494
+ }
495
+ /**
496
+ * Check if a vector exists and is not deleted.
497
+ */
498
+ has(id) {
499
+ const numericId = this.idMap.get(id);
500
+ if (numericId === undefined)
501
+ return false;
502
+ return !this.deletedIds.has(numericId);
503
+ }
504
+ /**
505
+ * Check if a vector was deleted (tombstoned).
506
+ */
507
+ isDeleted(id) {
508
+ const numericId = this.idMap.get(id);
509
+ if (numericId === undefined)
510
+ return false;
511
+ return this.deletedIds.has(numericId);
512
+ }
513
+ /**
514
+ * Reorder the internal HNSW index for BFS cache locality.
515
+ * Remaps all internal IDs so that graph neighbors are stored
516
+ * contiguously in memory, improving search cache hit rates.
517
+ */
518
+ async reorderIndex() {
519
+ await this.withMutationLock(async () => {
520
+ await this.withAutoSave(async () => {
521
+ const oldToNew = this.hnsw.reorderForLocality();
522
+ if (oldToNew.size === 0)
523
+ return;
524
+ // Remap idMap and idReverseMap
525
+ const newIdMap = new Map();
526
+ const newIdReverseMap = new Map();
527
+ for (const [strId, oldNum] of this.idMap) {
528
+ const newNum = oldToNew.get(oldNum);
529
+ if (newNum !== undefined) {
530
+ newIdMap.set(strId, newNum);
531
+ newIdReverseMap.set(newNum, strId);
532
+ }
533
+ }
534
+ this.idMap = newIdMap;
535
+ this.idReverseMap = newIdReverseMap;
536
+ // Remap metadata
537
+ const newMetadata = new Map();
538
+ for (const [oldNum, meta] of this.metadata) {
539
+ const newNum = oldToNew.get(oldNum);
540
+ if (newNum !== undefined) {
541
+ newMetadata.set(newNum, meta);
542
+ }
543
+ }
544
+ this.metadata = newMetadata;
545
+ // Remap deletedIds
546
+ const newDeletedIds = new Set();
547
+ for (const oldNum of this.deletedIds) {
548
+ const newNum = oldToNew.get(oldNum);
549
+ if (newNum !== undefined) {
550
+ newDeletedIds.add(newNum);
551
+ }
552
+ }
553
+ this.deletedIds = newDeletedIds;
554
+ this.dirty = true;
555
+ });
556
+ });
557
+ }
558
+ async saveToDisk() {
559
+ await this.withMutationLock(async () => {
560
+ await this.saveToDiskUnlocked();
561
+ });
562
+ }
563
+ async saveToDiskUnlocked() {
564
+ // Ensure the collection directory exists
565
+ await this.storage.mkdir(this.name);
566
+ const saveId = `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 10)}`;
567
+ const nextKeys = this.getVersionedDataKeys(saveId);
568
+ const previousKeys = {
569
+ indexKey: this.indexKey,
570
+ metaKey: this.metaKey,
571
+ deletedKey: this.deletedKey,
572
+ };
573
+ // Prepare all data before writing
574
+ const indexBytes = new Uint8Array(this.hnsw.serialize());
575
+ const metaLines = [];
576
+ for (const [numericId, id] of this.idReverseMap) {
577
+ const meta = this.metadata.get(numericId);
578
+ metaLines.push(JSON.stringify({
579
+ id,
580
+ numericId,
581
+ metadata: meta ?? {},
582
+ }));
583
+ }
584
+ const metaBytes = new TextEncoder().encode(metaLines.join('\n'));
585
+ const deletedBytes = new TextEncoder().encode(JSON.stringify([...this.deletedIds]));
586
+ // Data writes in parallel
587
+ await Promise.all([
588
+ this.storage.write(nextKeys.indexKey, indexBytes),
589
+ this.storage.write(nextKeys.metaKey, metaBytes),
590
+ this.storage.write(nextKeys.deletedKey, deletedBytes),
591
+ ]);
592
+ // Commit pointer last. Crash before this write keeps previous committed state.
593
+ const state = {
594
+ version: COLLECTION_STATE_VERSION,
595
+ indexKey: nextKeys.indexKey,
596
+ metaKey: nextKeys.metaKey,
597
+ deletedKey: nextKeys.deletedKey,
598
+ };
599
+ await this.storage.write(this.stateKey, new TextEncoder().encode(JSON.stringify(state)));
600
+ this.setActiveStorageKeys(nextKeys.indexKey, nextKeys.metaKey, nextKeys.deletedKey);
601
+ // Shadow copies in parallel, non-fatal
602
+ await Promise.all([
603
+ this.storage.write(this.defaultIndexKey, indexBytes).catch(() => { }),
604
+ this.storage.write(this.defaultMetaKey, metaBytes).catch(() => { }),
605
+ this.storage.write(this.defaultDeletedKey, deletedBytes).catch(() => { }),
606
+ ]);
607
+ // Best-effort cleanup: only delete old versioned files. Legacy fixed keys are
608
+ // intentionally left untouched for backward compatibility.
609
+ const staleKeys = [previousKeys.indexKey, previousKeys.metaKey, previousKeys.deletedKey];
610
+ for (const key of staleKeys) {
611
+ if (!this.isVersionedDataKey(key))
612
+ continue;
613
+ if (key === this.indexKey || key === this.metaKey || key === this.deletedKey)
614
+ continue;
615
+ await this.storage.delete(key).catch(() => { });
616
+ }
617
+ this.dirty = false;
618
+ }
619
+ executeQuery(config, context) {
620
+ this.validateQueryInput(config, context);
621
+ const queryVector = config.queryVector instanceof Float32Array
622
+ ? config.queryVector
623
+ : new Float32Array(config.queryVector);
624
+ const filter = this.hasFilter(config.filter) ? config.filter : undefined;
625
+ const efSearch = config.efSearch ?? Math.max(config.k * 2, 50);
626
+ // Determine whether to use quantized search:
627
+ // - Explicit true/false from config takes precedence
628
+ // - Otherwise auto-detect: use quantized when quantization is enabled
629
+ const useQuantized = config.useQuantizedSearch ?? this.hnsw.isQuantizationEnabled();
630
+ const candidateMultiplier = config.candidateMultiplier ?? 3;
631
+ const candidates = this.searchCandidates(queryVector, config.k, efSearch, filter, useQuantized, candidateMultiplier);
632
+ return this.materializeResults(candidates, config.k);
633
+ }
634
+ searchCandidates(queryVector, k, efSearch, filter, useQuantized = false, candidateMultiplier = 3) {
635
+ const totalCandidates = this.countWithDeleted();
636
+ if (totalCandidates === 0)
637
+ return [];
638
+ let requestK = Math.min(Math.max(k * 2, 50), totalCandidates);
639
+ let requestEf = Math.max(efSearch, requestK);
640
+ let filtered = [];
641
+ while (true) {
642
+ const rawResults = useQuantized
643
+ ? this.hnsw.searchKNNQuantized(queryVector, requestK, candidateMultiplier, requestEf)
644
+ : this.hnsw.searchKNN(queryVector, requestK, requestEf);
645
+ filtered = this.filterAndDeduplicateResults(rawResults, filter);
646
+ if (filtered.length >= k || requestK >= totalCandidates) {
647
+ return filtered;
648
+ }
649
+ const nextK = Math.min(totalCandidates, requestK * 2);
650
+ if (nextK === requestK) {
651
+ return filtered;
652
+ }
653
+ requestK = nextK;
654
+ requestEf = Math.max(requestEf, requestK);
655
+ }
656
+ }
657
+ filterAndDeduplicateResults(results, filter) {
658
+ const filtered = [];
659
+ const hasFilter = !!filter;
660
+ for (const result of results) {
661
+ if (this.deletedIds.has(result.id)) {
662
+ continue;
663
+ }
664
+ if (hasFilter) {
665
+ const metadata = this.metadata.get(result.id) || EMPTY_METADATA;
666
+ if (!this.matchesFilter(metadata, filter)) {
667
+ continue;
668
+ }
669
+ }
670
+ filtered.push(result);
671
+ }
672
+ filtered.sort((a, b) => {
673
+ const diff = a.distance - b.distance;
674
+ return diff !== 0 ? diff : a.id - b.id;
675
+ });
676
+ return filtered;
677
+ }
678
+ materializeResults(candidates, k) {
679
+ const resultCount = Math.min(candidates.length, k);
680
+ const ids = new Array(resultCount);
681
+ const distances = new Array(resultCount);
682
+ const metadata = new Array(resultCount);
683
+ let outIdx = 0;
684
+ for (let i = 0; i < resultCount; i++) {
685
+ const candidate = candidates[i];
686
+ const id = this.idReverseMap.get(candidate.id);
687
+ if (!id)
688
+ continue;
689
+ ids[outIdx] = id;
690
+ distances[outIdx] = candidate.distance;
691
+ metadata[outIdx] = this.metadata.get(candidate.id) || EMPTY_METADATA;
692
+ outIdx++;
693
+ }
694
+ if (outIdx < resultCount) {
695
+ ids.length = outIdx;
696
+ distances.length = outIdx;
697
+ metadata.length = outIdx;
698
+ }
699
+ return { ids, distances, metadata };
700
+ }
701
+ validateQueryInput(config, context) {
702
+ if (!this.isPlainObject(config)) {
703
+ throw new VectorDBError(`${context} config must be a plain object`, 'VALIDATION_ERROR');
704
+ }
705
+ const queryVector = config.queryVector;
706
+ if (!(Array.isArray(queryVector) || queryVector instanceof Float32Array)) {
707
+ throw new VectorDBError(`${context} must include a queryVector (number[] or Float32Array)`, 'VALIDATION_ERROR');
708
+ }
709
+ if (!Number.isInteger(config.k) || config.k <= 0) {
710
+ throw new VectorDBError(`k must be a positive integer, got ${config.k}`, 'VALIDATION_ERROR');
711
+ }
712
+ if (config.queryVector.length !== this.dimension) {
713
+ throw new DimensionMismatchError(this.dimension, config.queryVector.length, context);
714
+ }
715
+ for (let i = 0; i < config.queryVector.length; i++) {
716
+ if (!Number.isFinite(config.queryVector[i])) {
717
+ throw new VectorDBError(`${context} contains non-finite value at dimension ${i}: ${config.queryVector[i]}`, 'VALIDATION_ERROR');
718
+ }
719
+ }
720
+ if (config.efSearch !== undefined) {
721
+ if (!Number.isInteger(config.efSearch) || config.efSearch <= 0) {
722
+ throw new VectorDBError(`efSearch must be a positive integer, got ${config.efSearch}`, 'VALIDATION_ERROR');
723
+ }
724
+ }
725
+ if (config.filter !== undefined) {
726
+ if (!this.isFilterObject(config.filter)) {
727
+ throw new VectorDBError('Filter must be a plain object', 'VALIDATION_ERROR');
728
+ }
729
+ this.validateFilterOperators(config.filter);
730
+ }
731
+ }
732
+ hasFilter(filter) {
733
+ return !!filter && Object.keys(filter).length > 0;
734
+ }
735
+ reconcileLoadedState() {
736
+ const indexVectors = this.hnsw.getAllVectors();
737
+ const existingIds = new Set();
738
+ let maxNumericId = -1;
739
+ for (const [id] of indexVectors) {
740
+ existingIds.add(id);
741
+ if (id > maxNumericId)
742
+ maxNumericId = id;
743
+ }
744
+ // Collect keys to delete (can't delete during iteration of same Map)
745
+ const staleIds = [];
746
+ for (const [id, numericId] of this.idMap) {
747
+ if (!existingIds.has(numericId)) {
748
+ staleIds.push(id);
749
+ }
750
+ }
751
+ for (const id of staleIds) {
752
+ this.idMap.delete(id);
753
+ }
754
+ this.idReverseMap.clear();
755
+ for (const [id, numericId] of this.idMap.entries()) {
756
+ if (this.idReverseMap.has(numericId)) {
757
+ throw new VectorDBError(`Collection '${this.name}' metadata is corrupt: duplicate numeric ID ${numericId}`, 'CORRUPT_COLLECTION');
758
+ }
759
+ this.idReverseMap.set(numericId, id);
760
+ }
761
+ const staleDeletedIds = [];
762
+ for (const numericId of this.deletedIds) {
763
+ if (!existingIds.has(numericId)) {
764
+ staleDeletedIds.push(numericId);
765
+ }
766
+ }
767
+ for (const id of staleDeletedIds) {
768
+ this.deletedIds.delete(id);
769
+ }
770
+ const staleMetaIds = [];
771
+ for (const numericId of this.metadata.keys()) {
772
+ if (!this.idReverseMap.has(numericId)) {
773
+ staleMetaIds.push(numericId);
774
+ }
775
+ }
776
+ for (const id of staleMetaIds) {
777
+ this.metadata.delete(id);
778
+ }
779
+ const missingMappings = [];
780
+ for (const numericId of existingIds) {
781
+ if (this.deletedIds.has(numericId))
782
+ continue;
783
+ if (!this.idReverseMap.has(numericId)) {
784
+ missingMappings.push(numericId);
785
+ }
786
+ }
787
+ if (missingMappings.length > 0) {
788
+ throw new VectorDBError(`Collection '${this.name}' metadata is inconsistent with index: missing ${missingMappings.length} active ID mapping(s)`, 'CORRUPT_COLLECTION');
789
+ }
790
+ if (maxNumericId > Collection.MAX_INTERNAL_ID) {
791
+ throw new VectorDBError(`Collection '${this.name}' contains unsupported internal ID ${maxNumericId} (max ${Collection.MAX_INTERNAL_ID})`, 'CORRUPT_COLLECTION');
792
+ }
793
+ this.nextNumericId = maxNumericId + 1;
794
+ this.activeCount = 0;
795
+ for (const numericId of this.idMap.values()) {
796
+ if (!this.deletedIds.has(numericId)) {
797
+ this.activeCount++;
798
+ }
799
+ }
800
+ }
801
+ matchesFilter(metadata, filter) {
802
+ for (const key of Object.keys(filter)) {
803
+ const value = filter[key];
804
+ const metaValue = Object.prototype.hasOwnProperty.call(metadata, key)
805
+ ? metadata[key]
806
+ : undefined;
807
+ if (this.isOperatorObject(value)) {
808
+ // Operator validity already checked by validateFilterOperators() in validateQueryInput()
809
+ if (metaValue === undefined && value.$ne === undefined && value.$nin === undefined) {
810
+ return false;
811
+ }
812
+ if (value.$eq !== undefined && !this.deepEqual(metaValue, value.$eq))
813
+ return false;
814
+ if (value.$ne !== undefined && this.deepEqual(metaValue, value.$ne))
815
+ return false;
816
+ // Comparison operators must reject undefined/missing fields explicitly,
817
+ // because JS comparisons like `undefined > 5` return false (not NaN/error),
818
+ // causing `!(undefined > 5)` to incorrectly pass.
819
+ if (value.$gt !== undefined && (metaValue === undefined || !(metaValue > value.$gt)))
820
+ return false;
821
+ if (value.$lt !== undefined && (metaValue === undefined || !(metaValue < value.$lt)))
822
+ return false;
823
+ if (value.$gte !== undefined && (metaValue === undefined || !(metaValue >= value.$gte)))
824
+ return false;
825
+ if (value.$lte !== undefined && (metaValue === undefined || !(metaValue <= value.$lte)))
826
+ return false;
827
+ if (value.$in !== undefined) {
828
+ if (!Array.isArray(value.$in) || !this.matchesInOperator(metaValue, value.$in))
829
+ return false;
830
+ }
831
+ if (value.$nin !== undefined) {
832
+ if (!Array.isArray(value.$nin))
833
+ return false;
834
+ if (this.matchesInOperator(metaValue, value.$nin))
835
+ return false;
836
+ }
837
+ }
838
+ else {
839
+ if (!this.deepEqual(metaValue, value))
840
+ return false;
841
+ }
842
+ }
843
+ return true;
844
+ }
845
+ validateFilterOperators(filter) {
846
+ for (const key of Object.keys(filter)) {
847
+ const value = filter[key];
848
+ if (!this.isOperatorObject(value))
849
+ continue;
850
+ for (const operator of Object.keys(value)) {
851
+ if (!SUPPORTED_OPERATORS.has(operator)) {
852
+ throw new VectorDBError(`Unsupported filter operator '${operator}'`, 'VALIDATION_ERROR');
853
+ }
854
+ }
855
+ }
856
+ }
857
+ validateAddInput(config, allowExistingIds) {
858
+ if (!this.isPlainObject(config)) {
859
+ throw new VectorDBError('Add config must be a plain object', 'VALIDATION_ERROR');
860
+ }
861
+ if (!Array.isArray(config.ids)) {
862
+ throw new VectorDBError('ids must be an array of strings', 'VALIDATION_ERROR');
863
+ }
864
+ if (!Array.isArray(config.vectors)) {
865
+ throw new VectorDBError('vectors must be an array of number[] or Float32Array', 'VALIDATION_ERROR');
866
+ }
867
+ if (config.vectors.length !== config.ids.length) {
868
+ throw new VectorDBError('Number of vectors must match number of IDs', 'VALIDATION_ERROR');
869
+ }
870
+ if (config.metadata !== undefined && !Array.isArray(config.metadata)) {
871
+ throw new VectorDBError('metadata must be an array when provided', 'VALIDATION_ERROR');
872
+ }
873
+ if (config.metadata && config.metadata.length !== config.ids.length) {
874
+ throw new VectorDBError('Number of metadata entries must match number of IDs', 'VALIDATION_ERROR');
875
+ }
876
+ if (config.metadata) {
877
+ for (let i = 0; i < config.metadata.length; i++) {
878
+ const meta = config.metadata[i];
879
+ if (meta === undefined)
880
+ continue;
881
+ if (!this.isPlainObject(meta)) {
882
+ throw new VectorDBError(`Metadata at index ${i} must be a plain object`, 'VALIDATION_ERROR');
883
+ }
884
+ this.validateMetadataValue(meta, `metadata[${i}]`, new Set());
885
+ }
886
+ }
887
+ const seenIds = new Set();
888
+ const duplicateIds = new Set();
889
+ for (let i = 0; i < config.ids.length; i++) {
890
+ const id = config.ids[i];
891
+ if (typeof id !== 'string') {
892
+ throw new VectorDBError(`ID at index ${i} must be a string`, 'VALIDATION_ERROR');
893
+ }
894
+ if (seenIds.has(id)) {
895
+ duplicateIds.add(id);
896
+ }
897
+ else {
898
+ seenIds.add(id);
899
+ }
900
+ if (!allowExistingIds && this.idMap.has(id)) {
901
+ duplicateIds.add(id);
902
+ }
903
+ }
904
+ if (duplicateIds.size > 0) {
905
+ throw new DuplicateVectorError([...duplicateIds]);
906
+ }
907
+ for (let i = 0; i < config.vectors.length; i++) {
908
+ const vector = config.vectors[i];
909
+ if (!(Array.isArray(vector) || vector instanceof Float32Array)) {
910
+ throw new VectorDBError(`Vector at index ${i} must be a number[] or Float32Array`, 'VALIDATION_ERROR');
911
+ }
912
+ if (vector.length !== this.dimension) {
913
+ throw new DimensionMismatchError(this.dimension, vector.length, `Vector at index ${i}`);
914
+ }
915
+ for (let d = 0; d < vector.length; d++) {
916
+ if (!Number.isFinite(vector[d])) {
917
+ throw new VectorDBError(`Vector at index ${i} contains non-finite value at dimension ${d}: ${vector[d]}`, 'VALIDATION_ERROR');
918
+ }
919
+ }
920
+ }
921
+ }
922
+ validateMetadataValue(value, path, seen) {
923
+ if (value === null)
924
+ return;
925
+ const valueType = typeof value;
926
+ if (valueType === 'string' || valueType === 'boolean')
927
+ return;
928
+ if (valueType === 'number') {
929
+ if (!Number.isFinite(value)) {
930
+ throw new VectorDBError(`${path} contains non-finite number ${value}`, 'VALIDATION_ERROR');
931
+ }
932
+ return;
933
+ }
934
+ if (valueType !== 'object') {
935
+ throw new VectorDBError(`${path} must contain only JSON-serializable values`, 'VALIDATION_ERROR');
936
+ }
937
+ const objectValue = value;
938
+ if (seen.has(objectValue)) {
939
+ throw new VectorDBError(`${path} contains a circular reference`, 'VALIDATION_ERROR');
940
+ }
941
+ seen.add(objectValue);
942
+ try {
943
+ if (Array.isArray(value)) {
944
+ for (let i = 0; i < value.length; i++) {
945
+ if (value[i] === undefined) {
946
+ throw new VectorDBError(`${path}[${i}] is undefined and cannot be persisted`, 'VALIDATION_ERROR');
947
+ }
948
+ this.validateMetadataValue(value[i], `${path}[${i}]`, seen);
949
+ }
950
+ return;
951
+ }
952
+ if (!this.isPlainObject(value)) {
953
+ throw new VectorDBError(`${path} must contain only plain JSON objects`, 'VALIDATION_ERROR');
954
+ }
955
+ for (const [key, nestedValue] of Object.entries(value)) {
956
+ if (nestedValue === undefined) {
957
+ throw new VectorDBError(`${path}.${key} is undefined and cannot be persisted`, 'VALIDATION_ERROR');
958
+ }
959
+ this.validateMetadataValue(nestedValue, `${path}.${key}`, seen);
960
+ }
961
+ }
962
+ finally {
963
+ seen.delete(objectValue);
964
+ }
965
+ }
966
+ parseMetadataLine(line) {
967
+ const trimmed = line.trim();
968
+ if (trimmed.length === 0)
969
+ return null;
970
+ // v2+ JSONL format
971
+ if (trimmed[0] === '{') {
972
+ const record = JSON.parse(trimmed);
973
+ if (typeof record.id !== 'string'
974
+ || !Number.isInteger(record.numericId)
975
+ || record.numericId < 0) {
976
+ return null;
977
+ }
978
+ if (record.metadata !== undefined && !this.isPlainObject(record.metadata)) {
979
+ return null;
980
+ }
981
+ return {
982
+ id: record.id,
983
+ internalId: record.numericId,
984
+ metadata: record.metadata === undefined ? undefined : record.metadata,
985
+ };
986
+ }
987
+ // Legacy tab-separated format
988
+ const parts = line.split('\t');
989
+ if (parts.length < 2)
990
+ return null;
991
+ const id = parts[0];
992
+ const internalId = parseInt(parts[1], 10);
993
+ if (!Number.isInteger(internalId) || internalId < 0)
994
+ return null;
995
+ let metadata;
996
+ if (parts.length > 2) {
997
+ const parsedMetadata = JSON.parse(parts.slice(2).join('\t'));
998
+ if (!this.isPlainObject(parsedMetadata))
999
+ return null;
1000
+ metadata = parsedMetadata;
1001
+ }
1002
+ return { id, internalId, metadata };
1003
+ }
1004
+ isPlainObject(value) {
1005
+ if (typeof value !== 'object' || value === null || Array.isArray(value)) {
1006
+ return false;
1007
+ }
1008
+ const prototype = Object.getPrototypeOf(value);
1009
+ return prototype === Object.prototype || prototype === null;
1010
+ }
1011
+ isFilterObject(value) {
1012
+ return typeof value === 'object' && value !== null && !Array.isArray(value);
1013
+ }
1014
+ isOperatorObject(value) {
1015
+ if (!this.isPlainObject(value))
1016
+ return false;
1017
+ const keys = Object.keys(value);
1018
+ if (keys.length === 0)
1019
+ return false;
1020
+ for (const key of keys) {
1021
+ if (!key.startsWith('$'))
1022
+ return false;
1023
+ }
1024
+ return true;
1025
+ }
1026
+ deepEqual(a, b) {
1027
+ if (Object.is(a, b))
1028
+ return true;
1029
+ if (Array.isArray(a) && Array.isArray(b)) {
1030
+ if (a.length !== b.length)
1031
+ return false;
1032
+ for (let i = 0; i < a.length; i++) {
1033
+ if (!this.deepEqual(a[i], b[i]))
1034
+ return false;
1035
+ }
1036
+ return true;
1037
+ }
1038
+ if (this.isPlainObject(a) && this.isPlainObject(b)) {
1039
+ const keysA = Object.keys(a);
1040
+ const keysB = Object.keys(b);
1041
+ if (keysA.length !== keysB.length)
1042
+ return false;
1043
+ for (const key of keysA) {
1044
+ if (!Object.prototype.hasOwnProperty.call(b, key))
1045
+ return false;
1046
+ if (!this.deepEqual(a[key], b[key]))
1047
+ return false;
1048
+ }
1049
+ return true;
1050
+ }
1051
+ return false;
1052
+ }
1053
+ matchesInOperator(metaValue, candidates) {
1054
+ if (Array.isArray(metaValue)) {
1055
+ for (const candidate of candidates) {
1056
+ for (const item of metaValue) {
1057
+ if (this.deepEqual(item, candidate))
1058
+ return true;
1059
+ }
1060
+ }
1061
+ return false;
1062
+ }
1063
+ for (const candidate of candidates) {
1064
+ if (this.deepEqual(metaValue, candidate))
1065
+ return true;
1066
+ }
1067
+ return false;
1068
+ }
1069
+ /**
1070
+ * Compact the collection by rebuilding the index without deleted vectors.
1071
+ * This permanently removes tombstoned vectors and reclaims space.
1072
+ *
1073
+ * @returns Number of vectors removed during compaction
1074
+ */
1075
+ async compact() {
1076
+ return this.withMutationLock(async () => {
1077
+ if (this.deletedIds.size === 0)
1078
+ return 0;
1079
+ const removedCount = this.deletedIds.size;
1080
+ // Collect all non-deleted vectors
1081
+ const activeVectors = [];
1082
+ for (const [numericId, vector] of this.hnsw.getAllVectors()) {
1083
+ if (!this.deletedIds.has(numericId)) {
1084
+ const stringId = this.idReverseMap.get(numericId);
1085
+ if (stringId) {
1086
+ activeVectors.push({
1087
+ id: stringId,
1088
+ numericId,
1089
+ vector,
1090
+ meta: this.metadata.get(numericId)
1091
+ });
1092
+ }
1093
+ }
1094
+ }
1095
+ // Rebuild into temporary state first so we can roll back on failure.
1096
+ const rebuiltHnsw = new HNSWIndex(this.dimension, this.metric, this.M, this.efConstruction);
1097
+ const rebuiltIdMap = new Map();
1098
+ const rebuiltIdReverseMap = new Map();
1099
+ const rebuiltMetadata = new Map();
1100
+ const rebuiltDeletedIds = new Set();
1101
+ // Prepare bulk insert data and re-populate mappings
1102
+ const points = new Array(activeVectors.length);
1103
+ for (let i = 0; i < activeVectors.length; i++) {
1104
+ const { id, vector, meta } = activeVectors[i];
1105
+ const newNumericId = i;
1106
+ rebuiltIdMap.set(id, newNumericId);
1107
+ rebuiltIdReverseMap.set(newNumericId, id);
1108
+ if (meta) {
1109
+ rebuiltMetadata.set(newNumericId, meta);
1110
+ }
1111
+ points[i] = { id: newNumericId, vector };
1112
+ }
1113
+ // Use bulk insert for better performance
1114
+ try {
1115
+ await rebuiltHnsw.addPointsBulk(points);
1116
+ }
1117
+ catch (err) {
1118
+ rebuiltHnsw.destroy();
1119
+ throw err;
1120
+ }
1121
+ const previousState = {
1122
+ hnsw: this.hnsw,
1123
+ idMap: this.idMap,
1124
+ idReverseMap: this.idReverseMap,
1125
+ metadata: this.metadata,
1126
+ deletedIds: this.deletedIds,
1127
+ nextNumericId: this.nextNumericId,
1128
+ activeCount: this.activeCount,
1129
+ indexKey: this.indexKey,
1130
+ metaKey: this.metaKey,
1131
+ deletedKey: this.deletedKey,
1132
+ dirty: this.dirty,
1133
+ };
1134
+ this.hnsw = rebuiltHnsw;
1135
+ this.idMap = rebuiltIdMap;
1136
+ this.idReverseMap = rebuiltIdReverseMap;
1137
+ this.metadata = rebuiltMetadata;
1138
+ this.deletedIds = rebuiltDeletedIds;
1139
+ this.nextNumericId = activeVectors.length;
1140
+ this.activeCount = activeVectors.length;
1141
+ this.dirty = true;
1142
+ try {
1143
+ await this.saveToDiskUnlocked();
1144
+ }
1145
+ catch (err) {
1146
+ this.hnsw.destroy();
1147
+ this.hnsw = previousState.hnsw;
1148
+ this.idMap = previousState.idMap;
1149
+ this.idReverseMap = previousState.idReverseMap;
1150
+ this.metadata = previousState.metadata;
1151
+ this.deletedIds = previousState.deletedIds;
1152
+ this.nextNumericId = previousState.nextNumericId;
1153
+ this.activeCount = previousState.activeCount;
1154
+ this.dirty = previousState.dirty;
1155
+ this.setActiveStorageKeys(previousState.indexKey, previousState.metaKey, previousState.deletedKey);
1156
+ throw err;
1157
+ }
1158
+ previousState.hnsw.destroy();
1159
+ return removedCount;
1160
+ });
1161
+ }
1162
+ /**
1163
+ * Destroy the collection, freeing all in-memory resources.
1164
+ * @param save Whether to persist data to storage before destroying (default: true).
1165
+ * Pass false when the collection is being deleted entirely.
1166
+ */
1167
+ async destroy(save = true) {
1168
+ await this.withMutationLock(async () => {
1169
+ if (save && this.dirty) {
1170
+ await this.saveToDiskUnlocked();
1171
+ }
1172
+ // Destroy the HNSW index to free memory
1173
+ if (this.hnsw && typeof this.hnsw.destroy === 'function') {
1174
+ this.hnsw.destroy();
1175
+ }
1176
+ // Clear all maps to free memory
1177
+ this.idMap.clear();
1178
+ this.idReverseMap.clear();
1179
+ this.metadata.clear();
1180
+ this.deletedIds.clear();
1181
+ this.activeCount = 0;
1182
+ this.dirty = false;
1183
+ });
1184
+ }
1185
+ }
1186
+ //# sourceMappingURL=Collection.js.map