verso-db 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/CHANGELOG.md +13 -0
  2. package/README.md +13 -7
  3. package/dist/BinaryHeap.d.ts +11 -1
  4. package/dist/BinaryHeap.d.ts.map +1 -1
  5. package/dist/BinaryHeap.js +138 -0
  6. package/dist/BinaryHeap.js.map +1 -0
  7. package/dist/Collection.d.ts +30 -4
  8. package/dist/Collection.d.ts.map +1 -1
  9. package/dist/Collection.js +1186 -0
  10. package/dist/Collection.js.map +1 -0
  11. package/dist/HNSWIndex.d.ts +59 -0
  12. package/dist/HNSWIndex.d.ts.map +1 -1
  13. package/dist/HNSWIndex.js +2818 -0
  14. package/dist/HNSWIndex.js.map +1 -0
  15. package/dist/MaxBinaryHeap.d.ts +2 -64
  16. package/dist/MaxBinaryHeap.d.ts.map +1 -1
  17. package/dist/MaxBinaryHeap.js +5 -0
  18. package/dist/MaxBinaryHeap.js.map +1 -0
  19. package/dist/SearchWorker.d.ts +57 -4
  20. package/dist/SearchWorker.d.ts.map +1 -1
  21. package/dist/SearchWorker.js +573 -0
  22. package/dist/SearchWorker.js.map +1 -0
  23. package/dist/VectorDB.d.ts.map +1 -1
  24. package/dist/VectorDB.js +246 -0
  25. package/dist/VectorDB.js.map +1 -0
  26. package/dist/WorkerPool.d.ts +32 -2
  27. package/dist/WorkerPool.d.ts.map +1 -1
  28. package/dist/WorkerPool.js +266 -0
  29. package/dist/WorkerPool.js.map +1 -0
  30. package/dist/backends/JsDistanceBackend.d.ts.map +1 -1
  31. package/dist/backends/JsDistanceBackend.js +163 -0
  32. package/dist/backends/JsDistanceBackend.js.map +1 -0
  33. package/dist/encoding/DeltaEncoder.d.ts +2 -2
  34. package/dist/encoding/DeltaEncoder.d.ts.map +1 -1
  35. package/dist/encoding/DeltaEncoder.js +199 -0
  36. package/dist/encoding/DeltaEncoder.js.map +1 -0
  37. package/dist/errors.js +97 -0
  38. package/dist/errors.js.map +1 -0
  39. package/dist/index.d.ts +3 -3
  40. package/dist/index.d.ts.map +1 -1
  41. package/dist/index.js +61 -42
  42. package/dist/index.js.map +1 -9
  43. package/dist/presets.js +205 -0
  44. package/dist/presets.js.map +1 -0
  45. package/dist/quantization/ScalarQuantizer.d.ts +0 -34
  46. package/dist/quantization/ScalarQuantizer.d.ts.map +1 -1
  47. package/dist/quantization/ScalarQuantizer.js +346 -0
  48. package/dist/quantization/ScalarQuantizer.js.map +1 -0
  49. package/dist/storage/BatchWriter.js +351 -0
  50. package/dist/storage/BatchWriter.js.map +1 -0
  51. package/dist/storage/BunStorageBackend.d.ts +7 -3
  52. package/dist/storage/BunStorageBackend.d.ts.map +1 -1
  53. package/dist/storage/BunStorageBackend.js +182 -0
  54. package/dist/storage/BunStorageBackend.js.map +1 -0
  55. package/dist/storage/MemoryBackend.js +109 -0
  56. package/dist/storage/MemoryBackend.js.map +1 -0
  57. package/dist/storage/OPFSBackend.d.ts.map +1 -1
  58. package/dist/storage/OPFSBackend.js +325 -0
  59. package/dist/storage/OPFSBackend.js.map +1 -0
  60. package/dist/storage/StorageBackend.js +12 -0
  61. package/dist/storage/StorageBackend.js.map +1 -0
  62. package/dist/storage/WriteAheadLog.js +321 -0
  63. package/dist/storage/WriteAheadLog.js.map +1 -0
  64. package/dist/storage/createStorageBackend.d.ts +4 -0
  65. package/dist/storage/createStorageBackend.d.ts.map +1 -1
  66. package/dist/storage/createStorageBackend.js +119 -0
  67. package/dist/storage/createStorageBackend.js.map +1 -0
  68. package/{src/storage/index.ts → dist/storage/index.js} +7 -27
  69. package/dist/storage/index.js.map +1 -0
  70. package/dist/storage/nodeFsRuntime.d.ts +14 -0
  71. package/dist/storage/nodeFsRuntime.d.ts.map +1 -0
  72. package/dist/storage/nodeFsRuntime.js +105 -0
  73. package/dist/storage/nodeFsRuntime.js.map +1 -0
  74. package/package.json +9 -7
  75. package/src/BinaryHeap.ts +0 -136
  76. package/src/Collection.ts +0 -1262
  77. package/src/HNSWIndex.ts +0 -2894
  78. package/src/MaxBinaryHeap.ts +0 -181
  79. package/src/SearchWorker.ts +0 -264
  80. package/src/VectorDB.ts +0 -319
  81. package/src/WorkerPool.ts +0 -222
  82. package/src/backends/JsDistanceBackend.ts +0 -171
  83. package/src/encoding/DeltaEncoder.ts +0 -236
  84. package/src/errors.ts +0 -110
  85. package/src/index.ts +0 -106
  86. package/src/presets.ts +0 -229
  87. package/src/quantization/ScalarQuantizer.ts +0 -487
  88. package/src/storage/BatchWriter.ts +0 -420
  89. package/src/storage/BunStorageBackend.ts +0 -199
  90. package/src/storage/MemoryBackend.ts +0 -122
  91. package/src/storage/OPFSBackend.ts +0 -348
  92. package/src/storage/StorageBackend.ts +0 -74
  93. package/src/storage/WriteAheadLog.ts +0 -379
  94. package/src/storage/createStorageBackend.ts +0 -137
@@ -1,181 +0,0 @@
1
- /**
2
- * MaxBinaryHeap - A max-heap implementation for HNSW result tracking.
3
- *
4
- * In HNSW search, we need to track the top-k closest results efficiently.
5
- * A max-heap keeps the FURTHEST element at the top, allowing O(log n) eviction
6
- * of the worst result when a better one is found.
7
- *
8
- * This complements BinaryHeap (min-heap) which is used for candidate exploration.
9
- */
10
- export class MaxBinaryHeap {
11
- private ids: Uint32Array;
12
- private dists: Float32Array;
13
- private _size: number;
14
- private capacity: number;
15
-
16
- constructor(capacity: number) {
17
- this.capacity = capacity;
18
- this.ids = new Uint32Array(capacity);
19
- this.dists = new Float32Array(capacity);
20
- this._size = 0;
21
- }
22
-
23
- /**
24
- * Push an element onto the heap.
25
- * Auto-grows when at capacity to avoid silently dropping elements.
26
- */
27
- push(id: number, dist: number): void {
28
- if (this._size >= this.capacity) {
29
- this.grow();
30
- }
31
-
32
- this.ids[this._size] = id;
33
- this.dists[this._size] = dist;
34
- this._size++;
35
- this.heapifyUp(this._size - 1);
36
- }
37
-
38
- /**
39
- * Double the heap capacity when full.
40
- */
41
- private grow(): void {
42
- const newCapacity = Math.max(this.capacity * 2, 16);
43
- const newIds = new Uint32Array(newCapacity);
44
- const newDists = new Float32Array(newCapacity);
45
- newIds.set(this.ids);
46
- newDists.set(this.dists);
47
- this.ids = newIds;
48
- this.dists = newDists;
49
- this.capacity = newCapacity;
50
- }
51
-
52
- /**
53
- * Remove and return the ID of the maximum element (furthest distance).
54
- * Returns -1 if heap is empty.
55
- */
56
- pop(): number {
57
- if (this._size === 0) return -1;
58
-
59
- const result = this.ids[0];
60
- this._size--;
61
-
62
- if (this._size > 0) {
63
- this.ids[0] = this.ids[this._size];
64
- this.dists[0] = this.dists[this._size];
65
- this.heapifyDown(0);
66
- }
67
-
68
- return result;
69
- }
70
-
71
- /**
72
- * Peek at the ID of the maximum element without removing it.
73
- * Returns -1 if heap is empty.
74
- */
75
- peek(): number {
76
- return this._size > 0 ? this.ids[0] : -1;
77
- }
78
-
79
- /**
80
- * Peek at the distance of the maximum element (furthest from query).
81
- * Returns -Infinity if heap is empty.
82
- */
83
- peekValue(): number {
84
- return this._size > 0 ? this.dists[0] : -Infinity;
85
- }
86
-
87
- /**
88
- * Get the current number of elements in the heap.
89
- */
90
- size(): number {
91
- return this._size;
92
- }
93
-
94
- /**
95
- * Check if the heap is empty.
96
- */
97
- isEmpty(): boolean {
98
- return this._size === 0;
99
- }
100
-
101
- /**
102
- * Clear all elements from the heap.
103
- */
104
- clear(): void {
105
- this._size = 0;
106
- }
107
-
108
- /**
109
- * Get the capacity of the heap.
110
- */
111
- getCapacity(): number {
112
- return this.capacity;
113
- }
114
-
115
- /**
116
- * Heapify up: restore max-heap property after insertion.
117
- * Parent should be LARGER than children in a max-heap.
118
- */
119
- private heapifyUp(index: number): void {
120
- // Cache array references to avoid repeated property lookups
121
- const ids = this.ids;
122
- const dists = this.dists;
123
-
124
- while (index > 0) {
125
- // Use bitwise shift for faster integer division
126
- const parentIndex = (index - 1) >> 1;
127
- // MAX-HEAP: Parent should be >= child
128
- if (dists[parentIndex] >= dists[index]) break;
129
-
130
- // Inline swap for performance
131
- const tmpId = ids[index];
132
- ids[index] = ids[parentIndex];
133
- ids[parentIndex] = tmpId;
134
-
135
- const tmpDist = dists[index];
136
- dists[index] = dists[parentIndex];
137
- dists[parentIndex] = tmpDist;
138
-
139
- index = parentIndex;
140
- }
141
- }
142
-
143
- /**
144
- * Heapify down: restore max-heap property after removal.
145
- * Find largest among node and children, swap if needed.
146
- */
147
- private heapifyDown(index: number): void {
148
- // Cache array references to avoid repeated property lookups
149
- const ids = this.ids;
150
- const dists = this.dists;
151
- const size = this._size;
152
-
153
- while (true) {
154
- const leftChild = (index << 1) + 1; // 2 * index + 1
155
- const rightChild = leftChild + 1; // 2 * index + 2
156
- let largest = index;
157
-
158
- // MAX-HEAP: Find largest among node and children
159
- if (leftChild < size && dists[leftChild] > dists[largest]) {
160
- largest = leftChild;
161
- }
162
-
163
- if (rightChild < size && dists[rightChild] > dists[largest]) {
164
- largest = rightChild;
165
- }
166
-
167
- if (largest === index) break;
168
-
169
- // Inline swap for performance
170
- const tmpId = ids[index];
171
- ids[index] = ids[largest];
172
- ids[largest] = tmpId;
173
-
174
- const tmpDist = dists[index];
175
- dists[index] = dists[largest];
176
- dists[largest] = tmpDist;
177
-
178
- index = largest;
179
- }
180
- }
181
- }
@@ -1,264 +0,0 @@
1
- /**
2
- * Search worker entry point for parallel query processing.
3
- * Each worker maintains independent search state (visited array, heaps)
4
- * and operates on shared read-only vector data.
5
- */
6
-
7
- import { dotProductFast, l2SquaredFast, normalizeInPlace } from './backends/JsDistanceBackend';
8
- import { BinaryHeap } from './BinaryHeap';
9
- import { MaxBinaryHeap } from './MaxBinaryHeap';
10
-
11
- export interface WorkerInitData {
12
- flatVectors: Float32Array;
13
- dimension: number;
14
- nodeCount: number;
15
- metric: 'cosine' | 'euclidean' | 'dot_product';
16
- entryPointId: number;
17
- maxLevel: number;
18
- M: number;
19
- M0: number;
20
- // Graph structure: serialized neighbor lists
21
- graphData: ArrayBuffer;
22
- nodeLevels: Uint8Array;
23
- }
24
-
25
- interface WorkerInitMessage extends WorkerInitData {
26
- type: 'init';
27
- }
28
-
29
- interface WorkerSearchMessage {
30
- type: 'search';
31
- queryId: number;
32
- query: Float32Array;
33
- k: number;
34
- efSearch: number;
35
- }
36
-
37
- interface WorkerSearchResult {
38
- type: 'result';
39
- queryId: number;
40
- results: Array<{ id: number; distance: number }>;
41
- }
42
-
43
- // Only run as worker if loaded in worker context
44
- if (typeof self !== 'undefined' && typeof (self as any).postMessage === 'function') {
45
- let state: WorkerSearchState | null = null;
46
-
47
- self.onmessage = (event: MessageEvent) => {
48
- const msg = event.data;
49
-
50
- if (msg.type === 'init') {
51
- state = new WorkerSearchState(msg);
52
- (self as any).postMessage({ type: 'ready' });
53
- } else if (msg.type === 'search') {
54
- if (!state) {
55
- (self as any).postMessage({ type: 'error', queryId: msg.queryId, error: 'Worker not initialized' });
56
- return;
57
- }
58
- const results = state.searchKNN(msg.query, msg.k, msg.efSearch);
59
- (self as any).postMessage({ type: 'result', queryId: msg.queryId, results } as WorkerSearchResult);
60
- }
61
- };
62
- }
63
-
64
- /**
65
- * Independent search state for a single worker.
66
- * Mirrors the essential search logic from HNSWIndex without mutation.
67
- */
68
- export class WorkerSearchState {
69
- private flatVectors: Float32Array;
70
- private dimension: number;
71
- private nodeCount: number;
72
- private metric: 'cosine' | 'euclidean' | 'dot_product';
73
- private entryPointId: number;
74
- private maxLevel: number;
75
- private M: number;
76
- private M0: number;
77
- private nodeLevels: Uint8Array;
78
- // Deserialized neighbor lists: neighbors[nodeId][layer] = number[]
79
- private neighbors: number[][][];
80
-
81
- // Independent search state
82
- private visitedArray: Uint16Array;
83
- private visitedGeneration: number = 1;
84
- private candidatesHeap: BinaryHeap;
85
- private resultsHeap: MaxBinaryHeap;
86
- private queryNormBuffer: Float32Array;
87
- private vectorsAreNormalized: boolean;
88
-
89
- private distanceFn: (a: Float32Array, b: Float32Array) => number;
90
-
91
- constructor(init: WorkerInitData) {
92
- this.flatVectors = init.flatVectors;
93
- this.dimension = init.dimension;
94
- this.nodeCount = init.nodeCount;
95
- this.metric = init.metric;
96
- this.entryPointId = init.entryPointId;
97
- this.maxLevel = init.maxLevel;
98
- this.M = init.M;
99
- this.M0 = init.M0;
100
- this.nodeLevels = init.nodeLevels;
101
- this.vectorsAreNormalized = init.metric === 'cosine';
102
-
103
- // Deserialize graph structure
104
- this.neighbors = this.deserializeGraph(init.graphData);
105
-
106
- // Initialize search state
107
- this.visitedArray = new Uint16Array(init.nodeCount + 1000);
108
- const heapCap = Math.max(512, 500);
109
- this.candidatesHeap = new BinaryHeap(heapCap);
110
- this.resultsHeap = new MaxBinaryHeap(heapCap);
111
- this.queryNormBuffer = new Float32Array(init.dimension);
112
-
113
- // Set up distance function
114
- if (init.metric === 'cosine') {
115
- this.distanceFn = (a, b) => {
116
- const dot = dotProductFast(a, b);
117
- const d = 1 - dot;
118
- return d < 1e-10 ? 0 : d;
119
- };
120
- } else if (init.metric === 'euclidean') {
121
- this.distanceFn = (a, b) => Math.sqrt(l2SquaredFast(a, b));
122
- } else {
123
- this.distanceFn = (a, b) => -dotProductFast(a, b);
124
- }
125
- }
126
-
127
- private deserializeGraph(buffer: ArrayBuffer): number[][][] {
128
- const view = new DataView(buffer);
129
- const neighbors: number[][][] = new Array(this.nodeCount);
130
- let offset = 0;
131
-
132
- for (let nodeId = 0; nodeId < this.nodeCount; nodeId++) {
133
- const numLayers = view.getUint8(offset++);
134
- const layers: number[][] = new Array(numLayers);
135
-
136
- for (let l = 0; l < numLayers; l++) {
137
- const numNeighbors = view.getUint16(offset, true);
138
- offset += 2;
139
- const neighborList: number[] = new Array(numNeighbors);
140
-
141
- for (let n = 0; n < numNeighbors; n++) {
142
- neighborList[n] = view.getUint32(offset, true);
143
- offset += 4;
144
- }
145
- layers[l] = neighborList;
146
- }
147
- neighbors[nodeId] = layers;
148
- }
149
-
150
- return neighbors;
151
- }
152
-
153
- private getVector(nodeId: number): Float32Array {
154
- const offset = nodeId * this.dimension;
155
- return this.flatVectors.subarray(offset, offset + this.dimension);
156
- }
157
-
158
- private clearVisited(): void {
159
- this.visitedGeneration++;
160
- if (this.visitedGeneration > 65000) {
161
- this.visitedArray.fill(0);
162
- this.visitedGeneration = 1;
163
- }
164
- }
165
-
166
- private isVisited(id: number): boolean {
167
- return id < this.visitedArray.length && this.visitedArray[id] === this.visitedGeneration;
168
- }
169
-
170
- private markVisited(id: number): void {
171
- if (id >= this.visitedArray.length) {
172
- const newArr = new Uint16Array(Math.max(this.visitedArray.length * 2, id + 1000));
173
- newArr.set(this.visitedArray);
174
- this.visitedArray = newArr;
175
- }
176
- this.visitedArray[id] = this.visitedGeneration;
177
- }
178
-
179
- searchKNN(query: Float32Array, k: number, efSearch: number): Array<{ id: number; distance: number }> {
180
- if (this.entryPointId === -1 || this.nodeCount === 0) return [];
181
-
182
- const effectiveEf = efSearch || Math.max(k * 2, 50);
183
-
184
- let normalizedQuery = query;
185
- if (this.vectorsAreNormalized) {
186
- this.queryNormBuffer.set(query);
187
- normalizeInPlace(this.queryNormBuffer);
188
- normalizedQuery = this.queryNormBuffer;
189
- }
190
-
191
- // Greedy descent from top to layer 1
192
- let currentBestId = this.entryPointId;
193
- let currentBestDist = this.distanceFn(normalizedQuery, this.getVector(this.entryPointId));
194
-
195
- for (let l = this.maxLevel; l > 0; l--) {
196
- this.clearVisited();
197
- this.markVisited(currentBestId);
198
- let improved = true;
199
- while (improved) {
200
- improved = false;
201
- const nodeNeighbors = this.neighbors[currentBestId]?.[l] || [];
202
- for (const nid of nodeNeighbors) {
203
- if (this.isVisited(nid)) continue;
204
- this.markVisited(nid);
205
- const dist = this.distanceFn(normalizedQuery, this.getVector(nid));
206
- if (dist < currentBestDist) {
207
- currentBestDist = dist;
208
- currentBestId = nid;
209
- improved = true;
210
- }
211
- }
212
- }
213
- }
214
-
215
- // Search layer 0
216
- this.clearVisited();
217
- this.candidatesHeap.clear();
218
- this.resultsHeap.clear();
219
-
220
- this.candidatesHeap.push(currentBestId, currentBestDist);
221
- this.resultsHeap.push(currentBestId, currentBestDist);
222
- this.markVisited(currentBestId);
223
-
224
- let furthestResultDist = currentBestDist;
225
-
226
- while (!this.candidatesHeap.isEmpty()) {
227
- const candDist = this.candidatesHeap.peekValue();
228
- if (candDist > furthestResultDist && this.resultsHeap.size() >= effectiveEf) break;
229
-
230
- const candId = this.candidatesHeap.pop();
231
- const nodeNeighbors = this.neighbors[candId]?.[0] || [];
232
-
233
- for (const neighborId of nodeNeighbors) {
234
- if (this.isVisited(neighborId)) continue;
235
- this.markVisited(neighborId);
236
-
237
- const distance = this.distanceFn(normalizedQuery, this.getVector(neighborId));
238
-
239
- if (this.resultsHeap.size() < effectiveEf || distance < furthestResultDist) {
240
- this.candidatesHeap.push(neighborId, distance);
241
- this.resultsHeap.push(neighborId, distance);
242
-
243
- if (this.resultsHeap.size() > effectiveEf) {
244
- this.resultsHeap.pop();
245
- }
246
- furthestResultDist = this.resultsHeap.peekValue();
247
- }
248
- }
249
- }
250
-
251
- // Extract results in ascending distance order
252
- const resultCount = this.resultsHeap.size();
253
- const results: Array<{ id: number; distance: number }> = new Array(resultCount);
254
- let idx = resultCount - 1;
255
- while (!this.resultsHeap.isEmpty()) {
256
- const dist = this.resultsHeap.peekValue();
257
- const id = this.resultsHeap.pop();
258
- results[idx--] = { id, distance: dist };
259
- }
260
-
261
- if (results.length > k) results.length = k;
262
- return results;
263
- }
264
- }