verso-db 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/README.md +13 -7
- package/dist/BinaryHeap.d.ts +11 -1
- package/dist/BinaryHeap.d.ts.map +1 -1
- package/dist/BinaryHeap.js +138 -0
- package/dist/BinaryHeap.js.map +1 -0
- package/dist/Collection.d.ts +30 -4
- package/dist/Collection.d.ts.map +1 -1
- package/dist/Collection.js +1186 -0
- package/dist/Collection.js.map +1 -0
- package/dist/HNSWIndex.d.ts +59 -0
- package/dist/HNSWIndex.d.ts.map +1 -1
- package/dist/HNSWIndex.js +2818 -0
- package/dist/HNSWIndex.js.map +1 -0
- package/dist/MaxBinaryHeap.d.ts +2 -64
- package/dist/MaxBinaryHeap.d.ts.map +1 -1
- package/dist/MaxBinaryHeap.js +5 -0
- package/dist/MaxBinaryHeap.js.map +1 -0
- package/dist/SearchWorker.d.ts +57 -4
- package/dist/SearchWorker.d.ts.map +1 -1
- package/dist/SearchWorker.js +573 -0
- package/dist/SearchWorker.js.map +1 -0
- package/dist/VectorDB.d.ts.map +1 -1
- package/dist/VectorDB.js +246 -0
- package/dist/VectorDB.js.map +1 -0
- package/dist/WorkerPool.d.ts +32 -2
- package/dist/WorkerPool.d.ts.map +1 -1
- package/dist/WorkerPool.js +266 -0
- package/dist/WorkerPool.js.map +1 -0
- package/dist/backends/JsDistanceBackend.d.ts.map +1 -1
- package/dist/backends/JsDistanceBackend.js +163 -0
- package/dist/backends/JsDistanceBackend.js.map +1 -0
- package/dist/encoding/DeltaEncoder.d.ts +2 -2
- package/dist/encoding/DeltaEncoder.d.ts.map +1 -1
- package/dist/encoding/DeltaEncoder.js +199 -0
- package/dist/encoding/DeltaEncoder.js.map +1 -0
- package/dist/errors.js +97 -0
- package/dist/errors.js.map +1 -0
- package/dist/index.d.ts +3 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +61 -42
- package/dist/index.js.map +1 -9
- package/dist/presets.js +205 -0
- package/dist/presets.js.map +1 -0
- package/dist/quantization/ScalarQuantizer.d.ts +0 -34
- package/dist/quantization/ScalarQuantizer.d.ts.map +1 -1
- package/dist/quantization/ScalarQuantizer.js +346 -0
- package/dist/quantization/ScalarQuantizer.js.map +1 -0
- package/dist/storage/BatchWriter.js +351 -0
- package/dist/storage/BatchWriter.js.map +1 -0
- package/dist/storage/BunStorageBackend.d.ts +7 -3
- package/dist/storage/BunStorageBackend.d.ts.map +1 -1
- package/dist/storage/BunStorageBackend.js +182 -0
- package/dist/storage/BunStorageBackend.js.map +1 -0
- package/dist/storage/MemoryBackend.js +109 -0
- package/dist/storage/MemoryBackend.js.map +1 -0
- package/dist/storage/OPFSBackend.d.ts.map +1 -1
- package/dist/storage/OPFSBackend.js +325 -0
- package/dist/storage/OPFSBackend.js.map +1 -0
- package/dist/storage/StorageBackend.js +12 -0
- package/dist/storage/StorageBackend.js.map +1 -0
- package/dist/storage/WriteAheadLog.js +321 -0
- package/dist/storage/WriteAheadLog.js.map +1 -0
- package/dist/storage/createStorageBackend.d.ts +4 -0
- package/dist/storage/createStorageBackend.d.ts.map +1 -1
- package/dist/storage/createStorageBackend.js +119 -0
- package/dist/storage/createStorageBackend.js.map +1 -0
- package/{src/storage/index.ts → dist/storage/index.js} +7 -27
- package/dist/storage/index.js.map +1 -0
- package/dist/storage/nodeFsRuntime.d.ts +14 -0
- package/dist/storage/nodeFsRuntime.d.ts.map +1 -0
- package/dist/storage/nodeFsRuntime.js +105 -0
- package/dist/storage/nodeFsRuntime.js.map +1 -0
- package/package.json +9 -7
- package/src/BinaryHeap.ts +0 -136
- package/src/Collection.ts +0 -1262
- package/src/HNSWIndex.ts +0 -2894
- package/src/MaxBinaryHeap.ts +0 -181
- package/src/SearchWorker.ts +0 -264
- package/src/VectorDB.ts +0 -319
- package/src/WorkerPool.ts +0 -222
- package/src/backends/JsDistanceBackend.ts +0 -171
- package/src/encoding/DeltaEncoder.ts +0 -236
- package/src/errors.ts +0 -110
- package/src/index.ts +0 -106
- package/src/presets.ts +0 -229
- package/src/quantization/ScalarQuantizer.ts +0 -487
- package/src/storage/BatchWriter.ts +0 -420
- package/src/storage/BunStorageBackend.ts +0 -199
- package/src/storage/MemoryBackend.ts +0 -122
- package/src/storage/OPFSBackend.ts +0 -348
- package/src/storage/StorageBackend.ts +0 -74
- package/src/storage/WriteAheadLog.ts +0 -379
- package/src/storage/createStorageBackend.ts +0 -137
package/src/Collection.ts
DELETED
|
@@ -1,1262 +0,0 @@
|
|
|
1
|
-
import { HNSWIndex, DistanceMetric } from './HNSWIndex';
|
|
2
|
-
import type { StorageBackend } from './storage/StorageBackend';
|
|
3
|
-
import { DimensionMismatchError, DuplicateVectorError, VectorDBError } from './errors';
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* Configuration for adding vectors to a collection.
|
|
7
|
-
*/
|
|
8
|
-
export interface AddConfig {
|
|
9
|
-
/** Unique string identifiers for each vector */
|
|
10
|
-
ids: string[];
|
|
11
|
-
/** Array of vectors to add (must match collection dimension) */
|
|
12
|
-
vectors: Array<number[] | Float32Array>;
|
|
13
|
-
/** Optional metadata for each vector (same length as ids/vectors, plain JSON objects only) */
|
|
14
|
-
metadata?: Array<Record<string, any>>;
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
/**
|
|
18
|
-
* Configuration for querying a collection.
|
|
19
|
-
*
|
|
20
|
-
* @example
|
|
21
|
-
* ```typescript
|
|
22
|
-
* // Simple query
|
|
23
|
-
* const results = await collection.query({
|
|
24
|
-
* queryVector: [0.1, 0.2, 0.3],
|
|
25
|
-
* k: 10
|
|
26
|
-
* });
|
|
27
|
-
*
|
|
28
|
-
* // Query with metadata filter
|
|
29
|
-
* const results = await collection.query({
|
|
30
|
-
* queryVector: [0.1, 0.2, 0.3],
|
|
31
|
-
* k: 10,
|
|
32
|
-
* filter: {
|
|
33
|
-
* category: 'science', // Exact match
|
|
34
|
-
* year: { $gte: 2020 }, // Greater than or equal
|
|
35
|
-
* status: { $in: ['active', 'pending'] } // In array
|
|
36
|
-
* }
|
|
37
|
-
* });
|
|
38
|
-
* ```
|
|
39
|
-
*/
|
|
40
|
-
export interface QueryConfig {
|
|
41
|
-
/** The query vector (must match collection dimension) */
|
|
42
|
-
queryVector: number[] | Float32Array;
|
|
43
|
-
/** Number of nearest neighbors to return */
|
|
44
|
-
k: number;
|
|
45
|
-
/**
|
|
46
|
-
* Optional metadata filter. Supports MongoDB-style operators:
|
|
47
|
-
* - Simple equality: `{ field: value }`
|
|
48
|
-
* - `$gt`: Greater than `{ field: { $gt: 5 } }`
|
|
49
|
-
* - `$gte`: Greater than or equal `{ field: { $gte: 5 } }`
|
|
50
|
-
* - `$lt`: Less than `{ field: { $lt: 10 } }`
|
|
51
|
-
* - `$lte`: Less than or equal `{ field: { $lte: 10 } }`
|
|
52
|
-
* - `$ne`: Not equal `{ field: { $ne: 'excluded' } }`
|
|
53
|
-
* - `$in`: In array `{ field: { $in: ['a', 'b', 'c'] } }`
|
|
54
|
-
* - `$nin`: Not in array `{ field: { $nin: ['x', 'y'] } }`
|
|
55
|
-
*/
|
|
56
|
-
filter?: Record<string, any>;
|
|
57
|
-
/** Search effort parameter (higher = better recall, slower). Default: max(k*2, 50) */
|
|
58
|
-
efSearch?: number;
|
|
59
|
-
/** Use int8 quantized search + float32 rescore. Default: auto (true when quantization enabled) */
|
|
60
|
-
useQuantizedSearch?: boolean;
|
|
61
|
-
/** Oversampling multiplier for quantized rescore (default: 3). Higher = better recall, slower. */
|
|
62
|
-
candidateMultiplier?: number;
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
export interface QueryResult {
|
|
66
|
-
ids: string[];
|
|
67
|
-
distances: number[];
|
|
68
|
-
metadata: Array<Record<string, any>>;
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
/** Supported metadata filter operators — hoisted to avoid per-call allocation */
|
|
72
|
-
const SUPPORTED_OPERATORS = new Set(['$eq', '$ne', '$gt', '$gte', '$lt', '$lte', '$in', '$nin']);
|
|
73
|
-
const COLLECTION_STATE_VERSION = 1;
|
|
74
|
-
|
|
75
|
-
interface CollectionStorageState {
|
|
76
|
-
version: number;
|
|
77
|
-
indexKey: string;
|
|
78
|
-
metaKey: string;
|
|
79
|
-
deletedKey: string;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
export class Collection {
|
|
83
|
-
private static readonly MAX_INTERNAL_ID = 0xFFFFFFFE;
|
|
84
|
-
|
|
85
|
-
private name: string;
|
|
86
|
-
private dimension: number;
|
|
87
|
-
private metric: DistanceMetric;
|
|
88
|
-
private M: number; // HNSW M parameter
|
|
89
|
-
private efConstruction: number; // HNSW efConstruction parameter
|
|
90
|
-
private storage: StorageBackend;
|
|
91
|
-
private readonly stateKey: string; // Storage key for atomic state pointer
|
|
92
|
-
private readonly defaultIndexKey: string;
|
|
93
|
-
private readonly defaultMetaKey: string;
|
|
94
|
-
private readonly defaultDeletedKey: string;
|
|
95
|
-
private indexKey: string; // Storage key for HNSW index data
|
|
96
|
-
private metaKey: string; // Storage key for metadata
|
|
97
|
-
private deletedKey: string; // Storage key for deleted IDs
|
|
98
|
-
private hnsw: HNSWIndex;
|
|
99
|
-
private idMap: Map<string, number>; // Maps string IDs to internal numeric IDs
|
|
100
|
-
private idReverseMap: Map<number, string>; // Maps internal numeric IDs to string IDs
|
|
101
|
-
private metadata: Map<number, Record<string, any>>; // Stores metadata for each vector
|
|
102
|
-
private deletedIds: Set<number>; // Tombstone markers for deleted vectors
|
|
103
|
-
private nextNumericId: number; // Monotonic counter for generating unique internal IDs
|
|
104
|
-
private activeCount: number; // O(1) counter for non-deleted vectors
|
|
105
|
-
|
|
106
|
-
constructor(name: string, config: { dimension: number; metric?: DistanceMetric; M?: number; efConstruction?: number }, storage: StorageBackend) {
|
|
107
|
-
this.name = name;
|
|
108
|
-
this.dimension = config.dimension;
|
|
109
|
-
this.metric = config.metric ?? 'cosine';
|
|
110
|
-
this.M = config.M ?? 16;
|
|
111
|
-
this.efConstruction = config.efConstruction ?? 200;
|
|
112
|
-
this.storage = storage;
|
|
113
|
-
this.defaultIndexKey = `${name}/${name}.hnsw`;
|
|
114
|
-
this.defaultMetaKey = `${name}/${name}.meta`;
|
|
115
|
-
this.defaultDeletedKey = `${name}/${name}.deleted`;
|
|
116
|
-
this.stateKey = `${name}/${name}.state`;
|
|
117
|
-
this.indexKey = this.defaultIndexKey;
|
|
118
|
-
this.metaKey = this.defaultMetaKey;
|
|
119
|
-
this.deletedKey = this.defaultDeletedKey;
|
|
120
|
-
|
|
121
|
-
// Initialize HNSW index with the specified parameters
|
|
122
|
-
this.hnsw = new HNSWIndex(
|
|
123
|
-
config.dimension,
|
|
124
|
-
this.metric,
|
|
125
|
-
this.M,
|
|
126
|
-
this.efConstruction
|
|
127
|
-
);
|
|
128
|
-
|
|
129
|
-
this.idMap = new Map();
|
|
130
|
-
this.idReverseMap = new Map();
|
|
131
|
-
this.metadata = new Map();
|
|
132
|
-
this.deletedIds = new Set();
|
|
133
|
-
this.nextNumericId = 0;
|
|
134
|
-
this.activeCount = 0;
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
async init(): Promise<void> {
|
|
138
|
-
// Load existing data if files exist
|
|
139
|
-
await this.loadFromDisk();
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
private reserveNumericIds(count: number): number {
|
|
143
|
-
if (!Number.isInteger(count) || count < 0) {
|
|
144
|
-
throw new VectorDBError(`Invalid ID reservation count: ${count}`, 'VALIDATION_ERROR');
|
|
145
|
-
}
|
|
146
|
-
if (count === 0) return this.nextNumericId;
|
|
147
|
-
|
|
148
|
-
const startId = this.nextNumericId;
|
|
149
|
-
const endId = startId + count - 1;
|
|
150
|
-
|
|
151
|
-
if (!Number.isSafeInteger(endId) || endId > Collection.MAX_INTERNAL_ID) {
|
|
152
|
-
throw new VectorDBError(
|
|
153
|
-
`Collection '${this.name}' cannot allocate ${count} internal IDs: exhausted ID space (max ${Collection.MAX_INTERNAL_ID})`,
|
|
154
|
-
'VALIDATION_ERROR'
|
|
155
|
-
);
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
this.nextNumericId = endId + 1;
|
|
159
|
-
return startId;
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
private setActiveStorageKeys(indexKey: string, metaKey: string, deletedKey: string): void {
|
|
163
|
-
this.indexKey = indexKey;
|
|
164
|
-
this.metaKey = metaKey;
|
|
165
|
-
this.deletedKey = deletedKey;
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
private isValidStorageState(value: unknown): value is CollectionStorageState {
|
|
169
|
-
if (!value || typeof value !== 'object') return false;
|
|
170
|
-
const state = value as Record<string, unknown>;
|
|
171
|
-
return (
|
|
172
|
-
state.version === COLLECTION_STATE_VERSION &&
|
|
173
|
-
typeof state.indexKey === 'string' &&
|
|
174
|
-
typeof state.metaKey === 'string' &&
|
|
175
|
-
typeof state.deletedKey === 'string'
|
|
176
|
-
);
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
private async loadStorageState(): Promise<CollectionStorageState | null> {
|
|
180
|
-
const stateBuffer = await this.storage.read(this.stateKey);
|
|
181
|
-
if (!stateBuffer) return null;
|
|
182
|
-
|
|
183
|
-
try {
|
|
184
|
-
const raw = JSON.parse(new TextDecoder().decode(stateBuffer));
|
|
185
|
-
if (this.isValidStorageState(raw)) {
|
|
186
|
-
return raw;
|
|
187
|
-
}
|
|
188
|
-
} catch {
|
|
189
|
-
// Ignore malformed state file and fall back to legacy keys.
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
return null;
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
private getVersionedDataKeys(saveId: string): { indexKey: string; metaKey: string; deletedKey: string } {
|
|
196
|
-
return {
|
|
197
|
-
indexKey: `${this.name}/${this.name}.${saveId}.hnsw`,
|
|
198
|
-
metaKey: `${this.name}/${this.name}.${saveId}.meta`,
|
|
199
|
-
deletedKey: `${this.name}/${this.name}.${saveId}.deleted`,
|
|
200
|
-
};
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
private isVersionedDataKey(key: string): boolean {
|
|
204
|
-
if (
|
|
205
|
-
key === this.defaultIndexKey ||
|
|
206
|
-
key === this.defaultMetaKey ||
|
|
207
|
-
key === this.defaultDeletedKey
|
|
208
|
-
) {
|
|
209
|
-
return false;
|
|
210
|
-
}
|
|
211
|
-
const prefix = `${this.name}/${this.name}.`;
|
|
212
|
-
return key.startsWith(prefix) && (
|
|
213
|
-
key.endsWith('.hnsw') ||
|
|
214
|
-
key.endsWith('.meta') ||
|
|
215
|
-
key.endsWith('.deleted')
|
|
216
|
-
);
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
private validateLoadedIndexConfig(index: HNSWIndex): void {
|
|
220
|
-
if (
|
|
221
|
-
index.getDimension() !== this.dimension ||
|
|
222
|
-
index.getMetric() !== this.metric ||
|
|
223
|
-
index.getM() !== this.M ||
|
|
224
|
-
index.getEfConstruction() !== this.efConstruction
|
|
225
|
-
) {
|
|
226
|
-
throw new VectorDBError(
|
|
227
|
-
`Collection '${this.name}' index parameters do not match manifest configuration`,
|
|
228
|
-
'CORRUPT_COLLECTION'
|
|
229
|
-
);
|
|
230
|
-
}
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
private async loadFromDisk(): Promise<void> {
|
|
234
|
-
// Reset in-memory state before loading.
|
|
235
|
-
this.idMap.clear();
|
|
236
|
-
this.idReverseMap.clear();
|
|
237
|
-
this.metadata.clear();
|
|
238
|
-
this.deletedIds.clear();
|
|
239
|
-
this.nextNumericId = 0;
|
|
240
|
-
this.activeCount = 0;
|
|
241
|
-
|
|
242
|
-
const persistedState = await this.loadStorageState();
|
|
243
|
-
if (persistedState) {
|
|
244
|
-
this.setActiveStorageKeys(persistedState.indexKey, persistedState.metaKey, persistedState.deletedKey);
|
|
245
|
-
} else {
|
|
246
|
-
this.setActiveStorageKeys(this.defaultIndexKey, this.defaultMetaKey, this.defaultDeletedKey);
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
// Load the binary HNSW index via StorageBackend
|
|
250
|
-
const indexData = await this.storage.read(this.indexKey);
|
|
251
|
-
if (indexData) {
|
|
252
|
-
try {
|
|
253
|
-
const loaded = HNSWIndex.deserialize(indexData);
|
|
254
|
-
this.validateLoadedIndexConfig(loaded);
|
|
255
|
-
this.hnsw = loaded;
|
|
256
|
-
} catch (e) {
|
|
257
|
-
if (e instanceof VectorDBError && e.code === 'CORRUPT_COLLECTION') {
|
|
258
|
-
throw e;
|
|
259
|
-
}
|
|
260
|
-
console.warn(`Failed to load HNSW index from ${this.indexKey}:`, e);
|
|
261
|
-
// Reset to an empty index to avoid stale in-memory graph state.
|
|
262
|
-
this.hnsw = new HNSWIndex(this.dimension, this.metric, this.M, this.efConstruction);
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
// Load metadata via StorageBackend
|
|
267
|
-
const metaData = await this.storage.read(this.metaKey);
|
|
268
|
-
if (metaData) {
|
|
269
|
-
try {
|
|
270
|
-
const metaContent = new TextDecoder().decode(metaData);
|
|
271
|
-
const lines = metaContent.split('\n');
|
|
272
|
-
for (let i = 0; i < lines.length; i++) {
|
|
273
|
-
const line = lines[i];
|
|
274
|
-
if (line.length === 0 || line.trim().length === 0) continue;
|
|
275
|
-
|
|
276
|
-
try {
|
|
277
|
-
const parsed = this.parseMetadataLine(line);
|
|
278
|
-
if (!parsed) continue;
|
|
279
|
-
const { id, internalId, metadata } = parsed;
|
|
280
|
-
|
|
281
|
-
this.idMap.set(id, internalId);
|
|
282
|
-
this.idReverseMap.set(internalId, id);
|
|
283
|
-
if (metadata !== undefined) {
|
|
284
|
-
this.metadata.set(internalId, metadata);
|
|
285
|
-
}
|
|
286
|
-
} catch {
|
|
287
|
-
// Skip malformed lines
|
|
288
|
-
console.warn(`Skipping malformed line in metadata file: ${line}`);
|
|
289
|
-
}
|
|
290
|
-
}
|
|
291
|
-
} catch {
|
|
292
|
-
// Ignore read errors
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
// Load deleted IDs via StorageBackend
|
|
297
|
-
const deletedData = await this.storage.read(this.deletedKey);
|
|
298
|
-
if (deletedData) {
|
|
299
|
-
try {
|
|
300
|
-
const deletedContent = new TextDecoder().decode(deletedData);
|
|
301
|
-
const deletedArray = JSON.parse(deletedContent);
|
|
302
|
-
if (Array.isArray(deletedArray)) {
|
|
303
|
-
this.deletedIds = new Set(deletedArray);
|
|
304
|
-
}
|
|
305
|
-
} catch {
|
|
306
|
-
// Ignore read errors, start with empty set
|
|
307
|
-
}
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
this.reconcileLoadedState();
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
async add(config: AddConfig): Promise<void> {
|
|
314
|
-
this.validateAddInput(config, false);
|
|
315
|
-
|
|
316
|
-
const points: Array<{ id: number; vector: Float32Array }> = new Array(config.ids.length);
|
|
317
|
-
const startId = this.reserveNumericIds(config.ids.length);
|
|
318
|
-
|
|
319
|
-
for (let i = 0; i < config.ids.length; i++) {
|
|
320
|
-
const vector = config.vectors[i];
|
|
321
|
-
const numericId = startId + i;
|
|
322
|
-
points[i] = { id: numericId, vector: new Float32Array(vector) };
|
|
323
|
-
}
|
|
324
|
-
|
|
325
|
-
await this.hnsw.addPointsBulk(points);
|
|
326
|
-
|
|
327
|
-
for (let i = 0; i < config.ids.length; i++) {
|
|
328
|
-
const id = config.ids[i];
|
|
329
|
-
const metadata = config.metadata ? config.metadata[i] : undefined;
|
|
330
|
-
const numericId = startId + i;
|
|
331
|
-
|
|
332
|
-
this.idMap.set(id, numericId);
|
|
333
|
-
this.idReverseMap.set(numericId, id);
|
|
334
|
-
if (metadata !== undefined) {
|
|
335
|
-
this.metadata.set(numericId, metadata);
|
|
336
|
-
}
|
|
337
|
-
}
|
|
338
|
-
|
|
339
|
-
this.activeCount += config.ids.length;
|
|
340
|
-
}
|
|
341
|
-
|
|
342
|
-
async query(config: QueryConfig): Promise<QueryResult> {
|
|
343
|
-
return this.executeQuery(config, 'Query vector');
|
|
344
|
-
}
|
|
345
|
-
|
|
346
|
-
/**
|
|
347
|
-
* Batch query for multiple vectors at once.
|
|
348
|
-
* Shares query semantics with query(), including adaptive candidate expansion
|
|
349
|
-
* for filters and tombstones.
|
|
350
|
-
*
|
|
351
|
-
* @param configs Array of query configurations
|
|
352
|
-
* @returns Array of query results, one per query
|
|
353
|
-
*/
|
|
354
|
-
async queryBatch(configs: QueryConfig[]): Promise<QueryResult[]> {
|
|
355
|
-
if (!Array.isArray(configs)) {
|
|
356
|
-
throw new VectorDBError('Query batch must be an array of query configs', 'VALIDATION_ERROR');
|
|
357
|
-
}
|
|
358
|
-
if (configs.length === 0) return [];
|
|
359
|
-
|
|
360
|
-
for (let i = 0; i < configs.length; i++) {
|
|
361
|
-
if (!this.isPlainObject(configs[i])) {
|
|
362
|
-
throw new VectorDBError(`Query ${i} must be a plain object`, 'VALIDATION_ERROR');
|
|
363
|
-
}
|
|
364
|
-
}
|
|
365
|
-
|
|
366
|
-
const results: QueryResult[] = new Array(configs.length);
|
|
367
|
-
for (let i = 0; i < configs.length; i++) {
|
|
368
|
-
results[i] = this.executeQuery(configs[i], `Query ${i}`);
|
|
369
|
-
}
|
|
370
|
-
return results;
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
/**
|
|
374
|
-
* Brute-force KNN search for validation and correctness checking
|
|
375
|
-
* This checks all vectors and returns the true k nearest neighbors
|
|
376
|
-
*/
|
|
377
|
-
async queryBruteForce(config: QueryConfig): Promise<QueryResult> {
|
|
378
|
-
this.validateQueryInput(config, 'Query vector');
|
|
379
|
-
const { queryVector, k, filter } = config;
|
|
380
|
-
|
|
381
|
-
// Compute distances to all vectors, applying filter during scan
|
|
382
|
-
const queryFloat = queryVector instanceof Float32Array ? queryVector : new Float32Array(queryVector);
|
|
383
|
-
const hasFilter = this.hasFilter(filter);
|
|
384
|
-
const distances: Array<{ id: number; distance: number }> = [];
|
|
385
|
-
for (const [numericId, vector] of this.hnsw.getAllVectors()) {
|
|
386
|
-
// Skip deleted vectors
|
|
387
|
-
if (this.deletedIds.has(numericId)) continue;
|
|
388
|
-
|
|
389
|
-
// Apply filter before distance computation when possible
|
|
390
|
-
if (hasFilter) {
|
|
391
|
-
const meta = this.metadata.get(numericId) || {};
|
|
392
|
-
if (!this.matchesFilter(meta, filter!)) continue;
|
|
393
|
-
}
|
|
394
|
-
|
|
395
|
-
const distance = this.hnsw.calculateDistance(queryFloat, vector);
|
|
396
|
-
distances.push({ id: numericId, distance });
|
|
397
|
-
}
|
|
398
|
-
|
|
399
|
-
// Sort by distance and take top k
|
|
400
|
-
distances.sort((a, b) => {
|
|
401
|
-
const diff = a.distance - b.distance;
|
|
402
|
-
return diff !== 0 ? diff : a.id - b.id;
|
|
403
|
-
});
|
|
404
|
-
const filteredResults = distances.slice(0, k);
|
|
405
|
-
|
|
406
|
-
// Prepare the output
|
|
407
|
-
const ids: string[] = [];
|
|
408
|
-
const distancesOut: number[] = [];
|
|
409
|
-
const metadata: Array<Record<string, any>> = [];
|
|
410
|
-
|
|
411
|
-
for (const result of filteredResults) {
|
|
412
|
-
const id = this.idReverseMap.get(result.id);
|
|
413
|
-
if (id) {
|
|
414
|
-
ids.push(id);
|
|
415
|
-
distancesOut.push(result.distance);
|
|
416
|
-
metadata.push(this.metadata.get(result.id) || {});
|
|
417
|
-
}
|
|
418
|
-
}
|
|
419
|
-
|
|
420
|
-
return {
|
|
421
|
-
ids,
|
|
422
|
-
distances: distancesOut,
|
|
423
|
-
metadata
|
|
424
|
-
};
|
|
425
|
-
}
|
|
426
|
-
|
|
427
|
-
/**
|
|
428
|
-
* Upsert vectors (insert or update).
|
|
429
|
-
*
|
|
430
|
-
* For existing IDs, the old vectors are tombstone-deleted and new vectors
|
|
431
|
-
* are inserted. Use compact() periodically to reclaim space from
|
|
432
|
-
* tombstoned vectors.
|
|
433
|
-
*
|
|
434
|
-
* For new IDs, behaves identically to add().
|
|
435
|
-
*/
|
|
436
|
-
async upsert(config: AddConfig): Promise<void> {
|
|
437
|
-
this.validateAddInput(config, true);
|
|
438
|
-
|
|
439
|
-
const points: Array<{ id: number; vector: Float32Array }> = new Array(config.ids.length);
|
|
440
|
-
const startId = this.reserveNumericIds(config.ids.length);
|
|
441
|
-
|
|
442
|
-
for (let i = 0; i < config.ids.length; i++) {
|
|
443
|
-
points[i] = { id: startId + i, vector: new Float32Array(config.vectors[i]) };
|
|
444
|
-
}
|
|
445
|
-
|
|
446
|
-
await this.hnsw.addPointsBulk(points);
|
|
447
|
-
|
|
448
|
-
for (let i = 0; i < config.ids.length; i++) {
|
|
449
|
-
const id = config.ids[i];
|
|
450
|
-
const metadata = config.metadata ? config.metadata[i] : undefined;
|
|
451
|
-
const replacementId = this.idMap.get(id);
|
|
452
|
-
if (replacementId !== undefined) {
|
|
453
|
-
if (!this.deletedIds.has(replacementId)) {
|
|
454
|
-
this.activeCount--;
|
|
455
|
-
}
|
|
456
|
-
this.deletedIds.add(replacementId);
|
|
457
|
-
this.idReverseMap.delete(replacementId);
|
|
458
|
-
this.metadata.delete(replacementId);
|
|
459
|
-
}
|
|
460
|
-
|
|
461
|
-
const numericId = startId + i;
|
|
462
|
-
this.idMap.set(id, numericId);
|
|
463
|
-
this.idReverseMap.set(numericId, id);
|
|
464
|
-
if (metadata !== undefined) {
|
|
465
|
-
this.metadata.set(numericId, metadata);
|
|
466
|
-
}
|
|
467
|
-
this.activeCount++;
|
|
468
|
-
}
|
|
469
|
-
}
|
|
470
|
-
|
|
471
|
-
/** Returns the vector dimension for this collection. */
|
|
472
|
-
getDimension(): number { return this.dimension; }
|
|
473
|
-
|
|
474
|
-
/** Returns the distance metric for this collection. */
|
|
475
|
-
getMetric(): DistanceMetric { return this.metric; }
|
|
476
|
-
|
|
477
|
-
/** Returns the HNSW M parameter for this collection. */
|
|
478
|
-
getM(): number { return this.M; }
|
|
479
|
-
|
|
480
|
-
/** Returns the HNSW efConstruction parameter for this collection. */
|
|
481
|
-
getEfConstruction(): number { return this.efConstruction; }
|
|
482
|
-
|
|
483
|
-
/**
|
|
484
|
-
* Returns the number of active (non-deleted) vectors in the collection.
|
|
485
|
-
* O(1) via maintained counter.
|
|
486
|
-
*/
|
|
487
|
-
count(): number {
|
|
488
|
-
return this.activeCount;
|
|
489
|
-
}
|
|
490
|
-
|
|
491
|
-
/**
|
|
492
|
-
* Returns the total number of tracked vectors including deleted (tombstoned) ones.
|
|
493
|
-
* Use this to determine when compaction might be beneficial.
|
|
494
|
-
*/
|
|
495
|
-
countWithDeleted(): number {
|
|
496
|
-
return this.activeCount + this.deletedIds.size;
|
|
497
|
-
}
|
|
498
|
-
|
|
499
|
-
/**
|
|
500
|
-
* Returns the number of deleted (tombstoned) vectors awaiting compaction.
|
|
501
|
-
*/
|
|
502
|
-
deletedCount(): number {
|
|
503
|
-
return this.deletedIds.size;
|
|
504
|
-
}
|
|
505
|
-
|
|
506
|
-
/**
|
|
507
|
-
* Mark a vector as deleted (tombstone deletion).
|
|
508
|
-
* The vector remains in the index but is excluded from search results.
|
|
509
|
-
* Use compact() to permanently remove deleted vectors and reclaim space.
|
|
510
|
-
*
|
|
511
|
-
* @param id The string ID of the vector to delete
|
|
512
|
-
* @returns true if the vector was deleted, false if it didn't exist or was already deleted
|
|
513
|
-
*/
|
|
514
|
-
delete(id: string): boolean {
|
|
515
|
-
const numericId = this.idMap.get(id);
|
|
516
|
-
if (numericId === undefined) return false;
|
|
517
|
-
if (this.deletedIds.has(numericId)) return false;
|
|
518
|
-
|
|
519
|
-
this.deletedIds.add(numericId);
|
|
520
|
-
this.activeCount--;
|
|
521
|
-
return true;
|
|
522
|
-
}
|
|
523
|
-
|
|
524
|
-
/**
|
|
525
|
-
* Mark multiple vectors as deleted (tombstone deletion).
|
|
526
|
-
*
|
|
527
|
-
* @param ids Array of string IDs to delete
|
|
528
|
-
* @returns Number of vectors that were successfully deleted
|
|
529
|
-
*/
|
|
530
|
-
deleteBatch(ids: string[]): number {
|
|
531
|
-
let deleted = 0;
|
|
532
|
-
for (const id of ids) {
|
|
533
|
-
if (this.delete(id)) deleted++;
|
|
534
|
-
}
|
|
535
|
-
return deleted;
|
|
536
|
-
}
|
|
537
|
-
|
|
538
|
-
/**
|
|
539
|
-
* Check if a vector exists and is not deleted.
|
|
540
|
-
*/
|
|
541
|
-
has(id: string): boolean {
|
|
542
|
-
const numericId = this.idMap.get(id);
|
|
543
|
-
if (numericId === undefined) return false;
|
|
544
|
-
return !this.deletedIds.has(numericId);
|
|
545
|
-
}
|
|
546
|
-
|
|
547
|
-
/**
|
|
548
|
-
* Check if a vector was deleted (tombstoned).
|
|
549
|
-
*/
|
|
550
|
-
isDeleted(id: string): boolean {
|
|
551
|
-
const numericId = this.idMap.get(id);
|
|
552
|
-
if (numericId === undefined) return false;
|
|
553
|
-
return this.deletedIds.has(numericId);
|
|
554
|
-
}
|
|
555
|
-
|
|
556
|
-
/**
|
|
557
|
-
* Reorder the internal HNSW index for BFS cache locality.
|
|
558
|
-
* Remaps all internal IDs so that graph neighbors are stored
|
|
559
|
-
* contiguously in memory, improving search cache hit rates.
|
|
560
|
-
*/
|
|
561
|
-
reorderIndex(): void {
|
|
562
|
-
const oldToNew = this.hnsw.reorderForLocality();
|
|
563
|
-
if (oldToNew.size === 0) return;
|
|
564
|
-
|
|
565
|
-
// Remap idMap and idReverseMap
|
|
566
|
-
const newIdMap = new Map<string, number>();
|
|
567
|
-
const newIdReverseMap = new Map<number, string>();
|
|
568
|
-
for (const [strId, oldNum] of this.idMap) {
|
|
569
|
-
const newNum = oldToNew.get(oldNum);
|
|
570
|
-
if (newNum !== undefined) {
|
|
571
|
-
newIdMap.set(strId, newNum);
|
|
572
|
-
newIdReverseMap.set(newNum, strId);
|
|
573
|
-
}
|
|
574
|
-
}
|
|
575
|
-
this.idMap = newIdMap;
|
|
576
|
-
this.idReverseMap = newIdReverseMap;
|
|
577
|
-
|
|
578
|
-
// Remap metadata
|
|
579
|
-
const newMetadata = new Map<number, Record<string, any>>();
|
|
580
|
-
for (const [oldNum, meta] of this.metadata) {
|
|
581
|
-
const newNum = oldToNew.get(oldNum);
|
|
582
|
-
if (newNum !== undefined) {
|
|
583
|
-
newMetadata.set(newNum, meta);
|
|
584
|
-
}
|
|
585
|
-
}
|
|
586
|
-
this.metadata = newMetadata;
|
|
587
|
-
|
|
588
|
-
// Remap deletedIds
|
|
589
|
-
const newDeletedIds = new Set<number>();
|
|
590
|
-
for (const oldNum of this.deletedIds) {
|
|
591
|
-
const newNum = oldToNew.get(oldNum);
|
|
592
|
-
if (newNum !== undefined) {
|
|
593
|
-
newDeletedIds.add(newNum);
|
|
594
|
-
}
|
|
595
|
-
}
|
|
596
|
-
this.deletedIds = newDeletedIds;
|
|
597
|
-
}
|
|
598
|
-
|
|
599
|
-
async saveToDisk(): Promise<void> {
|
|
600
|
-
// Ensure the collection directory exists
|
|
601
|
-
await this.storage.mkdir(this.name);
|
|
602
|
-
|
|
603
|
-
const saveId = `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 10)}`;
|
|
604
|
-
const nextKeys = this.getVersionedDataKeys(saveId);
|
|
605
|
-
const previousKeys = {
|
|
606
|
-
indexKey: this.indexKey,
|
|
607
|
-
metaKey: this.metaKey,
|
|
608
|
-
deletedKey: this.deletedKey,
|
|
609
|
-
};
|
|
610
|
-
|
|
611
|
-
// Save the HNSW index as serialized binary via StorageBackend
|
|
612
|
-
const indexBuffer = this.hnsw.serialize();
|
|
613
|
-
const indexBytes = new Uint8Array(indexBuffer);
|
|
614
|
-
await this.storage.write(nextKeys.indexKey, indexBytes);
|
|
615
|
-
|
|
616
|
-
// Save metadata as JSON lines (safe for arbitrary string IDs).
|
|
617
|
-
const metaLines: string[] = [];
|
|
618
|
-
for (const [numericId, id] of this.idReverseMap) {
|
|
619
|
-
const meta = this.metadata.get(numericId);
|
|
620
|
-
metaLines.push(JSON.stringify({
|
|
621
|
-
id,
|
|
622
|
-
numericId,
|
|
623
|
-
metadata: meta ?? {},
|
|
624
|
-
}));
|
|
625
|
-
}
|
|
626
|
-
const metaBytes = new TextEncoder().encode(metaLines.join('\n'));
|
|
627
|
-
await this.storage.write(nextKeys.metaKey, metaBytes);
|
|
628
|
-
|
|
629
|
-
// Save deleted IDs
|
|
630
|
-
const deletedJson = JSON.stringify([...this.deletedIds]);
|
|
631
|
-
const deletedBytes = new TextEncoder().encode(deletedJson);
|
|
632
|
-
await this.storage.write(nextKeys.deletedKey, deletedBytes);
|
|
633
|
-
|
|
634
|
-
// Commit pointer last. Crash before this write keeps previous committed state.
|
|
635
|
-
const state: CollectionStorageState = {
|
|
636
|
-
version: COLLECTION_STATE_VERSION,
|
|
637
|
-
indexKey: nextKeys.indexKey,
|
|
638
|
-
metaKey: nextKeys.metaKey,
|
|
639
|
-
deletedKey: nextKeys.deletedKey,
|
|
640
|
-
};
|
|
641
|
-
await this.storage.write(this.stateKey, new TextEncoder().encode(JSON.stringify(state)));
|
|
642
|
-
|
|
643
|
-
this.setActiveStorageKeys(nextKeys.indexKey, nextKeys.metaKey, nextKeys.deletedKey);
|
|
644
|
-
|
|
645
|
-
// Backward-compatible shadow copies at legacy keys for tooling/tests that
|
|
646
|
-
// read fixed file names directly. Failures here are non-fatal because the
|
|
647
|
-
// committed state file is the source of truth for correctness.
|
|
648
|
-
await this.storage.write(this.defaultIndexKey, indexBytes).catch(() => {});
|
|
649
|
-
await this.storage.write(this.defaultMetaKey, metaBytes).catch(() => {});
|
|
650
|
-
await this.storage.write(this.defaultDeletedKey, deletedBytes).catch(() => {});
|
|
651
|
-
|
|
652
|
-
// Best-effort cleanup: only delete old versioned files. Legacy fixed keys are
|
|
653
|
-
// intentionally left untouched for backward compatibility.
|
|
654
|
-
const staleKeys = [previousKeys.indexKey, previousKeys.metaKey, previousKeys.deletedKey];
|
|
655
|
-
for (const key of staleKeys) {
|
|
656
|
-
if (!this.isVersionedDataKey(key)) continue;
|
|
657
|
-
if (key === this.indexKey || key === this.metaKey || key === this.deletedKey) continue;
|
|
658
|
-
await this.storage.delete(key).catch(() => {});
|
|
659
|
-
}
|
|
660
|
-
}
|
|
661
|
-
|
|
662
|
-
private executeQuery(config: QueryConfig, context: string): QueryResult {
|
|
663
|
-
this.validateQueryInput(config, context);
|
|
664
|
-
|
|
665
|
-
const queryVector = config.queryVector instanceof Float32Array
|
|
666
|
-
? config.queryVector
|
|
667
|
-
: new Float32Array(config.queryVector);
|
|
668
|
-
const filter = this.hasFilter(config.filter) ? config.filter : undefined;
|
|
669
|
-
const efSearch = config.efSearch ?? Math.max(config.k * 2, 50);
|
|
670
|
-
|
|
671
|
-
// Determine whether to use quantized search:
|
|
672
|
-
// - Explicit true/false from config takes precedence
|
|
673
|
-
// - Otherwise auto-detect: use quantized when quantization is enabled
|
|
674
|
-
const useQuantized = config.useQuantizedSearch ?? this.hnsw.isQuantizationEnabled();
|
|
675
|
-
const candidateMultiplier = config.candidateMultiplier ?? 3;
|
|
676
|
-
|
|
677
|
-
const candidates = this.searchCandidates(queryVector, config.k, efSearch, filter, useQuantized, candidateMultiplier);
|
|
678
|
-
return this.materializeResults(candidates, config.k);
|
|
679
|
-
}
|
|
680
|
-
|
|
681
|
-
private searchCandidates(
|
|
682
|
-
queryVector: Float32Array,
|
|
683
|
-
k: number,
|
|
684
|
-
efSearch: number,
|
|
685
|
-
filter?: Record<string, any>,
|
|
686
|
-
useQuantized: boolean = false,
|
|
687
|
-
candidateMultiplier: number = 3
|
|
688
|
-
): Array<{ id: number; distance: number }> {
|
|
689
|
-
const totalCandidates = this.countWithDeleted();
|
|
690
|
-
if (totalCandidates === 0) return [];
|
|
691
|
-
|
|
692
|
-
let requestK = Math.min(Math.max(k * 2, 50), totalCandidates);
|
|
693
|
-
let requestEf = Math.max(efSearch, requestK);
|
|
694
|
-
let filtered: Array<{ id: number; distance: number }> = [];
|
|
695
|
-
|
|
696
|
-
while (true) {
|
|
697
|
-
const rawResults = useQuantized
|
|
698
|
-
? this.hnsw.searchKNNQuantized(queryVector, requestK, candidateMultiplier, requestEf)
|
|
699
|
-
: this.hnsw.searchKNN(queryVector, requestK, requestEf);
|
|
700
|
-
filtered = this.filterAndDeduplicateResults(rawResults, filter);
|
|
701
|
-
|
|
702
|
-
if (filtered.length >= k || requestK >= totalCandidates) {
|
|
703
|
-
return filtered;
|
|
704
|
-
}
|
|
705
|
-
|
|
706
|
-
const nextK = Math.min(totalCandidates, requestK * 2);
|
|
707
|
-
if (nextK === requestK) {
|
|
708
|
-
return filtered;
|
|
709
|
-
}
|
|
710
|
-
requestK = nextK;
|
|
711
|
-
requestEf = Math.max(requestEf, requestK);
|
|
712
|
-
}
|
|
713
|
-
}
|
|
714
|
-
|
|
715
|
-
private filterAndDeduplicateResults(
|
|
716
|
-
results: Array<{ id: number; distance: number }>,
|
|
717
|
-
filter?: Record<string, any>
|
|
718
|
-
): Array<{ id: number; distance: number }> {
|
|
719
|
-
const filtered: Array<{ id: number; distance: number }> = [];
|
|
720
|
-
const hasFilter = !!filter;
|
|
721
|
-
|
|
722
|
-
for (const result of results) {
|
|
723
|
-
if (this.deletedIds.has(result.id)) {
|
|
724
|
-
continue;
|
|
725
|
-
}
|
|
726
|
-
|
|
727
|
-
if (hasFilter) {
|
|
728
|
-
const metadata = this.metadata.get(result.id) || {};
|
|
729
|
-
if (!this.matchesFilter(metadata, filter!)) {
|
|
730
|
-
continue;
|
|
731
|
-
}
|
|
732
|
-
}
|
|
733
|
-
|
|
734
|
-
filtered.push(result);
|
|
735
|
-
}
|
|
736
|
-
|
|
737
|
-
filtered.sort((a, b) => {
|
|
738
|
-
const diff = a.distance - b.distance;
|
|
739
|
-
return diff !== 0 ? diff : a.id - b.id;
|
|
740
|
-
});
|
|
741
|
-
return filtered;
|
|
742
|
-
}
|
|
743
|
-
|
|
744
|
-
private materializeResults(
|
|
745
|
-
candidates: Array<{ id: number; distance: number }>,
|
|
746
|
-
k: number
|
|
747
|
-
): QueryResult {
|
|
748
|
-
const resultCount = Math.min(candidates.length, k);
|
|
749
|
-
const ids = new Array<string>(resultCount);
|
|
750
|
-
const distances = new Array<number>(resultCount);
|
|
751
|
-
const metadata = new Array<Record<string, any>>(resultCount);
|
|
752
|
-
|
|
753
|
-
let outIdx = 0;
|
|
754
|
-
for (let i = 0; i < resultCount; i++) {
|
|
755
|
-
const candidate = candidates[i];
|
|
756
|
-
const id = this.idReverseMap.get(candidate.id);
|
|
757
|
-
if (!id) continue;
|
|
758
|
-
ids[outIdx] = id;
|
|
759
|
-
distances[outIdx] = candidate.distance;
|
|
760
|
-
metadata[outIdx] = this.metadata.get(candidate.id) || {};
|
|
761
|
-
outIdx++;
|
|
762
|
-
}
|
|
763
|
-
|
|
764
|
-
if (outIdx < resultCount) {
|
|
765
|
-
ids.length = outIdx;
|
|
766
|
-
distances.length = outIdx;
|
|
767
|
-
metadata.length = outIdx;
|
|
768
|
-
}
|
|
769
|
-
|
|
770
|
-
return { ids, distances, metadata };
|
|
771
|
-
}
|
|
772
|
-
|
|
773
|
-
private validateQueryInput(config: QueryConfig, context: string): void {
|
|
774
|
-
if (!this.isPlainObject(config)) {
|
|
775
|
-
throw new VectorDBError(`${context} config must be a plain object`, 'VALIDATION_ERROR');
|
|
776
|
-
}
|
|
777
|
-
|
|
778
|
-
const queryVector = (config as QueryConfig).queryVector;
|
|
779
|
-
if (!(Array.isArray(queryVector) || queryVector instanceof Float32Array)) {
|
|
780
|
-
throw new VectorDBError(`${context} must include a queryVector (number[] or Float32Array)`, 'VALIDATION_ERROR');
|
|
781
|
-
}
|
|
782
|
-
|
|
783
|
-
if (!Number.isInteger(config.k) || config.k <= 0) {
|
|
784
|
-
throw new VectorDBError(
|
|
785
|
-
`k must be a positive integer, got ${config.k}`,
|
|
786
|
-
'VALIDATION_ERROR'
|
|
787
|
-
);
|
|
788
|
-
}
|
|
789
|
-
|
|
790
|
-
if (config.queryVector.length !== this.dimension) {
|
|
791
|
-
throw new DimensionMismatchError(this.dimension, config.queryVector.length, context);
|
|
792
|
-
}
|
|
793
|
-
|
|
794
|
-
for (let i = 0; i < config.queryVector.length; i++) {
|
|
795
|
-
if (!Number.isFinite(config.queryVector[i])) {
|
|
796
|
-
throw new VectorDBError(
|
|
797
|
-
`${context} contains non-finite value at dimension ${i}: ${config.queryVector[i]}`,
|
|
798
|
-
'VALIDATION_ERROR'
|
|
799
|
-
);
|
|
800
|
-
}
|
|
801
|
-
}
|
|
802
|
-
|
|
803
|
-
if (config.efSearch !== undefined) {
|
|
804
|
-
if (!Number.isInteger(config.efSearch) || config.efSearch <= 0) {
|
|
805
|
-
throw new VectorDBError(
|
|
806
|
-
`efSearch must be a positive integer, got ${config.efSearch}`,
|
|
807
|
-
'VALIDATION_ERROR'
|
|
808
|
-
);
|
|
809
|
-
}
|
|
810
|
-
}
|
|
811
|
-
|
|
812
|
-
if (config.filter !== undefined) {
|
|
813
|
-
if (!this.isPlainObject(config.filter)) {
|
|
814
|
-
throw new VectorDBError('Filter must be a plain object', 'VALIDATION_ERROR');
|
|
815
|
-
}
|
|
816
|
-
this.validateFilterOperators(config.filter);
|
|
817
|
-
}
|
|
818
|
-
}
|
|
819
|
-
|
|
820
|
-
private hasFilter(filter?: Record<string, any>): boolean {
|
|
821
|
-
return !!filter && Object.keys(filter).length > 0;
|
|
822
|
-
}
|
|
823
|
-
|
|
824
|
-
private reconcileLoadedState(): void {
|
|
825
|
-
const indexVectors = this.hnsw.getAllVectors();
|
|
826
|
-
const existingIds = new Set<number>();
|
|
827
|
-
let maxNumericId = -1;
|
|
828
|
-
for (const [id] of indexVectors) {
|
|
829
|
-
existingIds.add(id);
|
|
830
|
-
if (id > maxNumericId) maxNumericId = id;
|
|
831
|
-
}
|
|
832
|
-
|
|
833
|
-
for (const [id, numericId] of Array.from(this.idMap.entries())) {
|
|
834
|
-
if (!existingIds.has(numericId)) {
|
|
835
|
-
this.idMap.delete(id);
|
|
836
|
-
}
|
|
837
|
-
}
|
|
838
|
-
|
|
839
|
-
this.idReverseMap.clear();
|
|
840
|
-
for (const [id, numericId] of this.idMap.entries()) {
|
|
841
|
-
if (this.idReverseMap.has(numericId)) {
|
|
842
|
-
throw new VectorDBError(
|
|
843
|
-
`Collection '${this.name}' metadata is corrupt: duplicate numeric ID ${numericId}`,
|
|
844
|
-
'CORRUPT_COLLECTION'
|
|
845
|
-
);
|
|
846
|
-
}
|
|
847
|
-
this.idReverseMap.set(numericId, id);
|
|
848
|
-
}
|
|
849
|
-
|
|
850
|
-
for (const numericId of Array.from(this.deletedIds.values())) {
|
|
851
|
-
if (!existingIds.has(numericId)) {
|
|
852
|
-
this.deletedIds.delete(numericId);
|
|
853
|
-
}
|
|
854
|
-
}
|
|
855
|
-
|
|
856
|
-
for (const numericId of Array.from(this.metadata.keys())) {
|
|
857
|
-
if (!this.idReverseMap.has(numericId)) {
|
|
858
|
-
this.metadata.delete(numericId);
|
|
859
|
-
}
|
|
860
|
-
}
|
|
861
|
-
|
|
862
|
-
const missingMappings: number[] = [];
|
|
863
|
-
for (const numericId of existingIds) {
|
|
864
|
-
if (this.deletedIds.has(numericId)) continue;
|
|
865
|
-
if (!this.idReverseMap.has(numericId)) {
|
|
866
|
-
missingMappings.push(numericId);
|
|
867
|
-
}
|
|
868
|
-
}
|
|
869
|
-
|
|
870
|
-
if (missingMappings.length > 0) {
|
|
871
|
-
throw new VectorDBError(
|
|
872
|
-
`Collection '${this.name}' metadata is inconsistent with index: missing ${missingMappings.length} active ID mapping(s)`,
|
|
873
|
-
'CORRUPT_COLLECTION'
|
|
874
|
-
);
|
|
875
|
-
}
|
|
876
|
-
|
|
877
|
-
if (maxNumericId > Collection.MAX_INTERNAL_ID) {
|
|
878
|
-
throw new VectorDBError(
|
|
879
|
-
`Collection '${this.name}' contains unsupported internal ID ${maxNumericId} (max ${Collection.MAX_INTERNAL_ID})`,
|
|
880
|
-
'CORRUPT_COLLECTION'
|
|
881
|
-
);
|
|
882
|
-
}
|
|
883
|
-
|
|
884
|
-
this.nextNumericId = maxNumericId + 1;
|
|
885
|
-
this.activeCount = 0;
|
|
886
|
-
for (const numericId of this.idMap.values()) {
|
|
887
|
-
if (!this.deletedIds.has(numericId)) {
|
|
888
|
-
this.activeCount++;
|
|
889
|
-
}
|
|
890
|
-
}
|
|
891
|
-
}
|
|
892
|
-
|
|
893
|
-
private matchesFilter(metadata: Record<string, any>, filter: Record<string, any>): boolean {
|
|
894
|
-
const filterKeys = Object.keys(filter);
|
|
895
|
-
for (const key of filterKeys) {
|
|
896
|
-
const value = filter[key];
|
|
897
|
-
const metaValue = Object.prototype.hasOwnProperty.call(metadata, key)
|
|
898
|
-
? metadata[key]
|
|
899
|
-
: undefined;
|
|
900
|
-
|
|
901
|
-
if (this.isOperatorObject(value)) {
|
|
902
|
-
for (const operator of Object.keys(value)) {
|
|
903
|
-
if (!SUPPORTED_OPERATORS.has(operator)) {
|
|
904
|
-
throw new VectorDBError(`Unsupported filter operator '${operator}'`, 'VALIDATION_ERROR');
|
|
905
|
-
}
|
|
906
|
-
}
|
|
907
|
-
|
|
908
|
-
if (metaValue === undefined && value.$ne === undefined && value.$nin === undefined) {
|
|
909
|
-
return false;
|
|
910
|
-
}
|
|
911
|
-
|
|
912
|
-
if (value.$eq !== undefined && !this.deepEqual(metaValue, value.$eq)) return false;
|
|
913
|
-
if (value.$ne !== undefined && this.deepEqual(metaValue, value.$ne)) return false;
|
|
914
|
-
// Comparison operators must reject undefined/missing fields explicitly,
|
|
915
|
-
// because JS comparisons like `undefined > 5` return false (not NaN/error),
|
|
916
|
-
// causing `!(undefined > 5)` to incorrectly pass.
|
|
917
|
-
if (value.$gt !== undefined && (metaValue === undefined || !(metaValue > value.$gt))) return false;
|
|
918
|
-
if (value.$lt !== undefined && (metaValue === undefined || !(metaValue < value.$lt))) return false;
|
|
919
|
-
if (value.$gte !== undefined && (metaValue === undefined || !(metaValue >= value.$gte))) return false;
|
|
920
|
-
if (value.$lte !== undefined && (metaValue === undefined || !(metaValue <= value.$lte))) return false;
|
|
921
|
-
|
|
922
|
-
if (value.$in !== undefined) {
|
|
923
|
-
if (!Array.isArray(value.$in) || !this.matchesInOperator(metaValue, value.$in)) return false;
|
|
924
|
-
}
|
|
925
|
-
|
|
926
|
-
if (value.$nin !== undefined) {
|
|
927
|
-
if (!Array.isArray(value.$nin)) return false;
|
|
928
|
-
if (this.matchesInOperator(metaValue, value.$nin)) return false;
|
|
929
|
-
}
|
|
930
|
-
} else {
|
|
931
|
-
if (!this.deepEqual(metaValue, value)) return false;
|
|
932
|
-
}
|
|
933
|
-
}
|
|
934
|
-
return true;
|
|
935
|
-
}
|
|
936
|
-
|
|
937
|
-
private validateFilterOperators(filter: Record<string, any>): void {
|
|
938
|
-
for (const key of Object.keys(filter)) {
|
|
939
|
-
const value = filter[key];
|
|
940
|
-
if (!this.isOperatorObject(value)) continue;
|
|
941
|
-
for (const operator of Object.keys(value)) {
|
|
942
|
-
if (!SUPPORTED_OPERATORS.has(operator)) {
|
|
943
|
-
throw new VectorDBError(`Unsupported filter operator '${operator}'`, 'VALIDATION_ERROR');
|
|
944
|
-
}
|
|
945
|
-
}
|
|
946
|
-
}
|
|
947
|
-
}
|
|
948
|
-
|
|
949
|
-
private validateAddInput(config: AddConfig, allowExistingIds: boolean): void {
|
|
950
|
-
if (!this.isPlainObject(config)) {
|
|
951
|
-
throw new VectorDBError('Add config must be a plain object', 'VALIDATION_ERROR');
|
|
952
|
-
}
|
|
953
|
-
if (!Array.isArray(config.ids)) {
|
|
954
|
-
throw new VectorDBError('ids must be an array of strings', 'VALIDATION_ERROR');
|
|
955
|
-
}
|
|
956
|
-
if (!Array.isArray(config.vectors)) {
|
|
957
|
-
throw new VectorDBError('vectors must be an array of number[] or Float32Array', 'VALIDATION_ERROR');
|
|
958
|
-
}
|
|
959
|
-
|
|
960
|
-
if (config.vectors.length !== config.ids.length) {
|
|
961
|
-
throw new VectorDBError('Number of vectors must match number of IDs', 'VALIDATION_ERROR');
|
|
962
|
-
}
|
|
963
|
-
|
|
964
|
-
if (config.metadata !== undefined && !Array.isArray(config.metadata)) {
|
|
965
|
-
throw new VectorDBError('metadata must be an array when provided', 'VALIDATION_ERROR');
|
|
966
|
-
}
|
|
967
|
-
|
|
968
|
-
if (config.metadata && config.metadata.length !== config.ids.length) {
|
|
969
|
-
throw new VectorDBError('Number of metadata entries must match number of IDs', 'VALIDATION_ERROR');
|
|
970
|
-
}
|
|
971
|
-
|
|
972
|
-
if (config.metadata) {
|
|
973
|
-
for (let i = 0; i < config.metadata.length; i++) {
|
|
974
|
-
const meta = config.metadata[i];
|
|
975
|
-
if (meta === undefined) continue;
|
|
976
|
-
if (!this.isPlainObject(meta)) {
|
|
977
|
-
throw new VectorDBError(
|
|
978
|
-
`Metadata at index ${i} must be a plain object`,
|
|
979
|
-
'VALIDATION_ERROR'
|
|
980
|
-
);
|
|
981
|
-
}
|
|
982
|
-
try {
|
|
983
|
-
JSON.stringify(meta);
|
|
984
|
-
} catch {
|
|
985
|
-
throw new VectorDBError(
|
|
986
|
-
`Metadata at index ${i} is not JSON-serializable`,
|
|
987
|
-
'VALIDATION_ERROR'
|
|
988
|
-
);
|
|
989
|
-
}
|
|
990
|
-
}
|
|
991
|
-
}
|
|
992
|
-
|
|
993
|
-
const seenIds = new Set<string>();
|
|
994
|
-
const duplicateIds = new Set<string>();
|
|
995
|
-
for (let i = 0; i < config.ids.length; i++) {
|
|
996
|
-
const id = config.ids[i];
|
|
997
|
-
if (typeof id !== 'string') {
|
|
998
|
-
throw new VectorDBError(
|
|
999
|
-
`ID at index ${i} must be a string`,
|
|
1000
|
-
'VALIDATION_ERROR'
|
|
1001
|
-
);
|
|
1002
|
-
}
|
|
1003
|
-
if (seenIds.has(id)) {
|
|
1004
|
-
duplicateIds.add(id);
|
|
1005
|
-
} else {
|
|
1006
|
-
seenIds.add(id);
|
|
1007
|
-
}
|
|
1008
|
-
if (!allowExistingIds && this.idMap.has(id)) {
|
|
1009
|
-
duplicateIds.add(id);
|
|
1010
|
-
}
|
|
1011
|
-
}
|
|
1012
|
-
|
|
1013
|
-
if (duplicateIds.size > 0) {
|
|
1014
|
-
throw new DuplicateVectorError([...duplicateIds]);
|
|
1015
|
-
}
|
|
1016
|
-
|
|
1017
|
-
for (let i = 0; i < config.vectors.length; i++) {
|
|
1018
|
-
const vector = config.vectors[i];
|
|
1019
|
-
if (!(Array.isArray(vector) || vector instanceof Float32Array)) {
|
|
1020
|
-
throw new VectorDBError(
|
|
1021
|
-
`Vector at index ${i} must be a number[] or Float32Array`,
|
|
1022
|
-
'VALIDATION_ERROR'
|
|
1023
|
-
);
|
|
1024
|
-
}
|
|
1025
|
-
if (vector.length !== this.dimension) {
|
|
1026
|
-
throw new DimensionMismatchError(this.dimension, vector.length, `Vector at index ${i}`);
|
|
1027
|
-
}
|
|
1028
|
-
for (let d = 0; d < vector.length; d++) {
|
|
1029
|
-
if (!Number.isFinite(vector[d])) {
|
|
1030
|
-
throw new VectorDBError(
|
|
1031
|
-
`Vector at index ${i} contains non-finite value at dimension ${d}: ${vector[d]}`,
|
|
1032
|
-
'VALIDATION_ERROR'
|
|
1033
|
-
);
|
|
1034
|
-
}
|
|
1035
|
-
}
|
|
1036
|
-
}
|
|
1037
|
-
}
|
|
1038
|
-
|
|
1039
|
-
private parseMetadataLine(line: string): { id: string; internalId: number; metadata?: Record<string, any> } | null {
|
|
1040
|
-
const trimmed = line.trim();
|
|
1041
|
-
if (trimmed.length === 0) return null;
|
|
1042
|
-
|
|
1043
|
-
// v2+ JSONL format
|
|
1044
|
-
if (trimmed[0] === '{') {
|
|
1045
|
-
const record = JSON.parse(trimmed) as { id?: unknown; numericId?: unknown; metadata?: unknown };
|
|
1046
|
-
if (
|
|
1047
|
-
typeof record.id !== 'string'
|
|
1048
|
-
|| !Number.isInteger(record.numericId)
|
|
1049
|
-
|| (record.numericId as number) < 0
|
|
1050
|
-
) {
|
|
1051
|
-
return null;
|
|
1052
|
-
}
|
|
1053
|
-
|
|
1054
|
-
if (record.metadata !== undefined && !this.isPlainObject(record.metadata)) {
|
|
1055
|
-
return null;
|
|
1056
|
-
}
|
|
1057
|
-
return {
|
|
1058
|
-
id: record.id,
|
|
1059
|
-
internalId: record.numericId as number,
|
|
1060
|
-
metadata: record.metadata === undefined ? undefined : (record.metadata as Record<string, any>),
|
|
1061
|
-
};
|
|
1062
|
-
}
|
|
1063
|
-
|
|
1064
|
-
// Legacy tab-separated format
|
|
1065
|
-
const parts = line.split('\t');
|
|
1066
|
-
if (parts.length < 2) return null;
|
|
1067
|
-
const id = parts[0];
|
|
1068
|
-
const internalId = parseInt(parts[1], 10);
|
|
1069
|
-
if (!Number.isInteger(internalId) || internalId < 0) return null;
|
|
1070
|
-
|
|
1071
|
-
let metadata: Record<string, any> | undefined;
|
|
1072
|
-
if (parts.length > 2) {
|
|
1073
|
-
const parsedMetadata = JSON.parse(parts.slice(2).join('\t'));
|
|
1074
|
-
if (!this.isPlainObject(parsedMetadata)) return null;
|
|
1075
|
-
metadata = parsedMetadata as Record<string, any>;
|
|
1076
|
-
}
|
|
1077
|
-
|
|
1078
|
-
return { id, internalId, metadata };
|
|
1079
|
-
}
|
|
1080
|
-
|
|
1081
|
-
private isPlainObject(value: unknown): value is Record<string, any> {
|
|
1082
|
-
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
|
1083
|
-
}
|
|
1084
|
-
|
|
1085
|
-
private isOperatorObject(value: unknown): value is Record<string, any> {
|
|
1086
|
-
if (!this.isPlainObject(value)) return false;
|
|
1087
|
-
const keys = Object.keys(value);
|
|
1088
|
-
if (keys.length === 0) return false;
|
|
1089
|
-
for (const key of keys) {
|
|
1090
|
-
if (!key.startsWith('$')) return false;
|
|
1091
|
-
}
|
|
1092
|
-
return true;
|
|
1093
|
-
}
|
|
1094
|
-
|
|
1095
|
-
private deepEqual(a: unknown, b: unknown): boolean {
|
|
1096
|
-
if (Object.is(a, b)) return true;
|
|
1097
|
-
|
|
1098
|
-
if (Array.isArray(a) && Array.isArray(b)) {
|
|
1099
|
-
if (a.length !== b.length) return false;
|
|
1100
|
-
for (let i = 0; i < a.length; i++) {
|
|
1101
|
-
if (!this.deepEqual(a[i], b[i])) return false;
|
|
1102
|
-
}
|
|
1103
|
-
return true;
|
|
1104
|
-
}
|
|
1105
|
-
|
|
1106
|
-
if (this.isPlainObject(a) && this.isPlainObject(b)) {
|
|
1107
|
-
const keysA = Object.keys(a);
|
|
1108
|
-
const keysB = Object.keys(b);
|
|
1109
|
-
if (keysA.length !== keysB.length) return false;
|
|
1110
|
-
for (const key of keysA) {
|
|
1111
|
-
if (!Object.prototype.hasOwnProperty.call(b, key)) return false;
|
|
1112
|
-
if (!this.deepEqual(a[key], b[key])) return false;
|
|
1113
|
-
}
|
|
1114
|
-
return true;
|
|
1115
|
-
}
|
|
1116
|
-
|
|
1117
|
-
return false;
|
|
1118
|
-
}
|
|
1119
|
-
|
|
1120
|
-
private matchesInOperator(metaValue: unknown, candidates: unknown[]): boolean {
|
|
1121
|
-
if (Array.isArray(metaValue)) {
|
|
1122
|
-
for (const candidate of candidates) {
|
|
1123
|
-
for (const item of metaValue) {
|
|
1124
|
-
if (this.deepEqual(item, candidate)) return true;
|
|
1125
|
-
}
|
|
1126
|
-
}
|
|
1127
|
-
return false;
|
|
1128
|
-
}
|
|
1129
|
-
|
|
1130
|
-
for (const candidate of candidates) {
|
|
1131
|
-
if (this.deepEqual(metaValue, candidate)) return true;
|
|
1132
|
-
}
|
|
1133
|
-
return false;
|
|
1134
|
-
}
|
|
1135
|
-
|
|
1136
|
-
/**
|
|
1137
|
-
* Compact the collection by rebuilding the index without deleted vectors.
|
|
1138
|
-
* This permanently removes tombstoned vectors and reclaims space.
|
|
1139
|
-
*
|
|
1140
|
-
* @returns Number of vectors removed during compaction
|
|
1141
|
-
*/
|
|
1142
|
-
async compact(): Promise<number> {
|
|
1143
|
-
if (this.deletedIds.size === 0) return 0;
|
|
1144
|
-
|
|
1145
|
-
const removedCount = this.deletedIds.size;
|
|
1146
|
-
|
|
1147
|
-
// Collect all non-deleted vectors
|
|
1148
|
-
const activeVectors: Array<{ id: string; numericId: number; vector: Float32Array; meta?: Record<string, any> }> = [];
|
|
1149
|
-
|
|
1150
|
-
for (const [numericId, vector] of this.hnsw.getAllVectors()) {
|
|
1151
|
-
if (!this.deletedIds.has(numericId)) {
|
|
1152
|
-
const stringId = this.idReverseMap.get(numericId);
|
|
1153
|
-
if (stringId) {
|
|
1154
|
-
activeVectors.push({
|
|
1155
|
-
id: stringId,
|
|
1156
|
-
numericId,
|
|
1157
|
-
vector,
|
|
1158
|
-
meta: this.metadata.get(numericId)
|
|
1159
|
-
});
|
|
1160
|
-
}
|
|
1161
|
-
}
|
|
1162
|
-
}
|
|
1163
|
-
|
|
1164
|
-
// Rebuild into temporary state first so we can roll back on failure.
|
|
1165
|
-
const rebuiltHnsw = new HNSWIndex(
|
|
1166
|
-
this.dimension,
|
|
1167
|
-
this.metric,
|
|
1168
|
-
this.M,
|
|
1169
|
-
this.efConstruction
|
|
1170
|
-
);
|
|
1171
|
-
const rebuiltIdMap = new Map<string, number>();
|
|
1172
|
-
const rebuiltIdReverseMap = new Map<number, string>();
|
|
1173
|
-
const rebuiltMetadata = new Map<number, Record<string, any>>();
|
|
1174
|
-
const rebuiltDeletedIds = new Set<number>();
|
|
1175
|
-
|
|
1176
|
-
// Prepare bulk insert data and re-populate mappings
|
|
1177
|
-
const points: Array<{ id: number; vector: Float32Array }> = new Array(activeVectors.length);
|
|
1178
|
-
|
|
1179
|
-
for (let i = 0; i < activeVectors.length; i++) {
|
|
1180
|
-
const { id, vector, meta } = activeVectors[i];
|
|
1181
|
-
const newNumericId = i;
|
|
1182
|
-
|
|
1183
|
-
rebuiltIdMap.set(id, newNumericId);
|
|
1184
|
-
rebuiltIdReverseMap.set(newNumericId, id);
|
|
1185
|
-
if (meta) {
|
|
1186
|
-
rebuiltMetadata.set(newNumericId, meta);
|
|
1187
|
-
}
|
|
1188
|
-
|
|
1189
|
-
points[i] = { id: newNumericId, vector };
|
|
1190
|
-
}
|
|
1191
|
-
|
|
1192
|
-
// Use bulk insert for better performance
|
|
1193
|
-
try {
|
|
1194
|
-
await rebuiltHnsw.addPointsBulk(points);
|
|
1195
|
-
} catch (err) {
|
|
1196
|
-
rebuiltHnsw.destroy();
|
|
1197
|
-
throw err;
|
|
1198
|
-
}
|
|
1199
|
-
|
|
1200
|
-
const previousState = {
|
|
1201
|
-
hnsw: this.hnsw,
|
|
1202
|
-
idMap: this.idMap,
|
|
1203
|
-
idReverseMap: this.idReverseMap,
|
|
1204
|
-
metadata: this.metadata,
|
|
1205
|
-
deletedIds: this.deletedIds,
|
|
1206
|
-
nextNumericId: this.nextNumericId,
|
|
1207
|
-
activeCount: this.activeCount,
|
|
1208
|
-
indexKey: this.indexKey,
|
|
1209
|
-
metaKey: this.metaKey,
|
|
1210
|
-
deletedKey: this.deletedKey,
|
|
1211
|
-
};
|
|
1212
|
-
|
|
1213
|
-
this.hnsw = rebuiltHnsw;
|
|
1214
|
-
this.idMap = rebuiltIdMap;
|
|
1215
|
-
this.idReverseMap = rebuiltIdReverseMap;
|
|
1216
|
-
this.metadata = rebuiltMetadata;
|
|
1217
|
-
this.deletedIds = rebuiltDeletedIds;
|
|
1218
|
-
this.nextNumericId = activeVectors.length;
|
|
1219
|
-
this.activeCount = activeVectors.length;
|
|
1220
|
-
|
|
1221
|
-
try {
|
|
1222
|
-
await this.saveToDisk();
|
|
1223
|
-
} catch (err) {
|
|
1224
|
-
this.hnsw.destroy();
|
|
1225
|
-
this.hnsw = previousState.hnsw;
|
|
1226
|
-
this.idMap = previousState.idMap;
|
|
1227
|
-
this.idReverseMap = previousState.idReverseMap;
|
|
1228
|
-
this.metadata = previousState.metadata;
|
|
1229
|
-
this.deletedIds = previousState.deletedIds;
|
|
1230
|
-
this.nextNumericId = previousState.nextNumericId;
|
|
1231
|
-
this.activeCount = previousState.activeCount;
|
|
1232
|
-
this.setActiveStorageKeys(previousState.indexKey, previousState.metaKey, previousState.deletedKey);
|
|
1233
|
-
throw err;
|
|
1234
|
-
}
|
|
1235
|
-
|
|
1236
|
-
previousState.hnsw.destroy();
|
|
1237
|
-
return removedCount;
|
|
1238
|
-
}
|
|
1239
|
-
|
|
1240
|
-
/**
|
|
1241
|
-
* Destroy the collection, freeing all in-memory resources.
|
|
1242
|
-
* @param save Whether to persist data to storage before destroying (default: true).
|
|
1243
|
-
* Pass false when the collection is being deleted entirely.
|
|
1244
|
-
*/
|
|
1245
|
-
async destroy(save: boolean = true): Promise<void> {
|
|
1246
|
-
if (save) {
|
|
1247
|
-
await this.saveToDisk();
|
|
1248
|
-
}
|
|
1249
|
-
|
|
1250
|
-
// Destroy the HNSW index to free memory
|
|
1251
|
-
if (this.hnsw && typeof this.hnsw.destroy === 'function') {
|
|
1252
|
-
this.hnsw.destroy();
|
|
1253
|
-
}
|
|
1254
|
-
|
|
1255
|
-
// Clear all maps to free memory
|
|
1256
|
-
this.idMap.clear();
|
|
1257
|
-
this.idReverseMap.clear();
|
|
1258
|
-
this.metadata.clear();
|
|
1259
|
-
this.deletedIds.clear();
|
|
1260
|
-
this.activeCount = 0;
|
|
1261
|
-
}
|
|
1262
|
-
}
|