verso-db 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/LICENSE +21 -0
- package/README.md +252 -0
- package/dist/BinaryHeap.d.ts +25 -0
- package/dist/BinaryHeap.d.ts.map +1 -0
- package/dist/Collection.d.ts +156 -0
- package/dist/Collection.d.ts.map +1 -0
- package/dist/HNSWIndex.d.ts +357 -0
- package/dist/HNSWIndex.d.ts.map +1 -0
- package/dist/MaxBinaryHeap.d.ts +63 -0
- package/dist/MaxBinaryHeap.d.ts.map +1 -0
- package/dist/Storage.d.ts +54 -0
- package/dist/Storage.d.ts.map +1 -0
- package/dist/VectorDB.d.ts +44 -0
- package/dist/VectorDB.d.ts.map +1 -0
- package/dist/backends/DistanceBackend.d.ts +5 -0
- package/dist/backends/DistanceBackend.d.ts.map +1 -0
- package/dist/backends/JsDistanceBackend.d.ts +37 -0
- package/dist/backends/JsDistanceBackend.d.ts.map +1 -0
- package/dist/encoding/DeltaEncoder.d.ts +61 -0
- package/dist/encoding/DeltaEncoder.d.ts.map +1 -0
- package/dist/errors.d.ts +58 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/index.d.ts +64 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +3732 -0
- package/dist/presets.d.ts +91 -0
- package/dist/presets.d.ts.map +1 -0
- package/dist/quantization/ScalarQuantizer.d.ts +114 -0
- package/dist/quantization/ScalarQuantizer.d.ts.map +1 -0
- package/dist/storage/BatchWriter.d.ts +104 -0
- package/dist/storage/BatchWriter.d.ts.map +1 -0
- package/dist/storage/BunStorageBackend.d.ts +58 -0
- package/dist/storage/BunStorageBackend.d.ts.map +1 -0
- package/dist/storage/MemoryBackend.d.ts +44 -0
- package/dist/storage/MemoryBackend.d.ts.map +1 -0
- package/dist/storage/OPFSBackend.d.ts +59 -0
- package/dist/storage/OPFSBackend.d.ts.map +1 -0
- package/dist/storage/StorageBackend.d.ts +66 -0
- package/dist/storage/StorageBackend.d.ts.map +1 -0
- package/dist/storage/WriteAheadLog.d.ts +111 -0
- package/dist/storage/WriteAheadLog.d.ts.map +1 -0
- package/dist/storage/createStorageBackend.d.ts +40 -0
- package/dist/storage/createStorageBackend.d.ts.map +1 -0
- package/dist/storage/index.d.ts +30 -0
- package/dist/storage/index.d.ts.map +1 -0
- package/package.json +98 -0
- package/src/BinaryHeap.ts +131 -0
- package/src/Collection.ts +695 -0
- package/src/HNSWIndex.ts +1839 -0
- package/src/MaxBinaryHeap.ts +175 -0
- package/src/Storage.ts +435 -0
- package/src/VectorDB.ts +109 -0
- package/src/backends/DistanceBackend.ts +17 -0
- package/src/backends/JsDistanceBackend.ts +227 -0
- package/src/encoding/DeltaEncoder.ts +217 -0
- package/src/errors.ts +110 -0
- package/src/index.ts +138 -0
- package/src/presets.ts +229 -0
- package/src/quantization/ScalarQuantizer.ts +383 -0
- package/src/storage/BatchWriter.ts +336 -0
- package/src/storage/BunStorageBackend.ts +161 -0
- package/src/storage/MemoryBackend.ts +120 -0
- package/src/storage/OPFSBackend.ts +250 -0
- package/src/storage/StorageBackend.ts +74 -0
- package/src/storage/WriteAheadLog.ts +326 -0
- package/src/storage/createStorageBackend.ts +137 -0
- package/src/storage/index.ts +53 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MaxBinaryHeap - A max-heap implementation for HNSW result tracking.
|
|
3
|
+
*
|
|
4
|
+
* In HNSW search, we need to track the top-k closest results efficiently.
|
|
5
|
+
* A max-heap keeps the FURTHEST element at the top, allowing O(log n) eviction
|
|
6
|
+
* of the worst result when a better one is found.
|
|
7
|
+
*
|
|
8
|
+
* This complements BinaryHeap (min-heap) which is used for candidate exploration.
|
|
9
|
+
*/
|
|
10
|
+
export class MaxBinaryHeap {
|
|
11
|
+
private ids: Uint32Array;
|
|
12
|
+
private dists: Float32Array;
|
|
13
|
+
private _size: number;
|
|
14
|
+
private capacity: number;
|
|
15
|
+
|
|
16
|
+
constructor(capacity: number) {
|
|
17
|
+
this.capacity = capacity;
|
|
18
|
+
this.ids = new Uint32Array(capacity);
|
|
19
|
+
this.dists = new Float32Array(capacity);
|
|
20
|
+
this._size = 0;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Push an element onto the heap.
|
|
25
|
+
* When at capacity, replaces the max element if the new one is closer.
|
|
26
|
+
*/
|
|
27
|
+
push(id: number, dist: number): void {
|
|
28
|
+
if (this._size >= this.capacity) {
|
|
29
|
+
// At capacity - replace max if new element is better (smaller distance)
|
|
30
|
+
if (dist < this.dists[0]) {
|
|
31
|
+
// Replace root with new element
|
|
32
|
+
this.ids[0] = id;
|
|
33
|
+
this.dists[0] = dist;
|
|
34
|
+
this.heapifyDown(0);
|
|
35
|
+
}
|
|
36
|
+
// Otherwise, new element is worse than max, don't add
|
|
37
|
+
return;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
this.ids[this._size] = id;
|
|
41
|
+
this.dists[this._size] = dist;
|
|
42
|
+
this._size++;
|
|
43
|
+
this.heapifyUp(this._size - 1);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Remove and return the ID of the maximum element (furthest distance).
|
|
48
|
+
* Returns -1 if heap is empty.
|
|
49
|
+
*/
|
|
50
|
+
pop(): number {
|
|
51
|
+
if (this._size === 0) return -1;
|
|
52
|
+
|
|
53
|
+
const result = this.ids[0];
|
|
54
|
+
this._size--;
|
|
55
|
+
|
|
56
|
+
if (this._size > 0) {
|
|
57
|
+
this.ids[0] = this.ids[this._size];
|
|
58
|
+
this.dists[0] = this.dists[this._size];
|
|
59
|
+
this.heapifyDown(0);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return result;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Peek at the ID of the maximum element without removing it.
|
|
67
|
+
* Returns -1 if heap is empty.
|
|
68
|
+
*/
|
|
69
|
+
peek(): number {
|
|
70
|
+
return this._size > 0 ? this.ids[0] : -1;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Peek at the distance of the maximum element (furthest from query).
|
|
75
|
+
* Returns -Infinity if heap is empty.
|
|
76
|
+
*/
|
|
77
|
+
peekValue(): number {
|
|
78
|
+
return this._size > 0 ? this.dists[0] : -Infinity;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Get the current number of elements in the heap.
|
|
83
|
+
*/
|
|
84
|
+
size(): number {
|
|
85
|
+
return this._size;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Check if the heap is empty.
|
|
90
|
+
*/
|
|
91
|
+
isEmpty(): boolean {
|
|
92
|
+
return this._size === 0;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Clear all elements from the heap.
|
|
97
|
+
*/
|
|
98
|
+
clear(): void {
|
|
99
|
+
this._size = 0;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Get the capacity of the heap.
|
|
104
|
+
*/
|
|
105
|
+
getCapacity(): number {
|
|
106
|
+
return this.capacity;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Heapify up: restore max-heap property after insertion.
|
|
111
|
+
* Parent should be LARGER than children in a max-heap.
|
|
112
|
+
*/
|
|
113
|
+
private heapifyUp(index: number): void {
|
|
114
|
+
// Cache array references to avoid repeated property lookups
|
|
115
|
+
const ids = this.ids;
|
|
116
|
+
const dists = this.dists;
|
|
117
|
+
|
|
118
|
+
while (index > 0) {
|
|
119
|
+
// Use bitwise shift for faster integer division
|
|
120
|
+
const parentIndex = (index - 1) >> 1;
|
|
121
|
+
// MAX-HEAP: Parent should be >= child
|
|
122
|
+
if (dists[parentIndex] >= dists[index]) break;
|
|
123
|
+
|
|
124
|
+
// Inline swap for performance
|
|
125
|
+
const tmpId = ids[index];
|
|
126
|
+
ids[index] = ids[parentIndex];
|
|
127
|
+
ids[parentIndex] = tmpId;
|
|
128
|
+
|
|
129
|
+
const tmpDist = dists[index];
|
|
130
|
+
dists[index] = dists[parentIndex];
|
|
131
|
+
dists[parentIndex] = tmpDist;
|
|
132
|
+
|
|
133
|
+
index = parentIndex;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Heapify down: restore max-heap property after removal.
|
|
139
|
+
* Find largest among node and children, swap if needed.
|
|
140
|
+
*/
|
|
141
|
+
private heapifyDown(index: number): void {
|
|
142
|
+
// Cache array references to avoid repeated property lookups
|
|
143
|
+
const ids = this.ids;
|
|
144
|
+
const dists = this.dists;
|
|
145
|
+
const size = this._size;
|
|
146
|
+
|
|
147
|
+
while (true) {
|
|
148
|
+
const leftChild = (index << 1) + 1; // 2 * index + 1
|
|
149
|
+
const rightChild = leftChild + 1; // 2 * index + 2
|
|
150
|
+
let largest = index;
|
|
151
|
+
|
|
152
|
+
// MAX-HEAP: Find largest among node and children
|
|
153
|
+
if (leftChild < size && dists[leftChild] > dists[largest]) {
|
|
154
|
+
largest = leftChild;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
if (rightChild < size && dists[rightChild] > dists[largest]) {
|
|
158
|
+
largest = rightChild;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if (largest === index) break;
|
|
162
|
+
|
|
163
|
+
// Inline swap for performance
|
|
164
|
+
const tmpId = ids[index];
|
|
165
|
+
ids[index] = ids[largest];
|
|
166
|
+
ids[largest] = tmpId;
|
|
167
|
+
|
|
168
|
+
const tmpDist = dists[index];
|
|
169
|
+
dists[index] = dists[largest];
|
|
170
|
+
dists[largest] = tmpDist;
|
|
171
|
+
|
|
172
|
+
index = largest;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
package/src/Storage.ts
ADDED
|
@@ -0,0 +1,435 @@
|
|
|
1
|
+
import * as path from 'path';
|
|
2
|
+
|
|
3
|
+
// Binary format for metadata storage
|
|
4
|
+
// Format: [count(4)] + repeated [id_len(4) + id + metadata_len(4) + metadata_json]
|
|
5
|
+
|
|
6
|
+
export class MetadataStorage {
|
|
7
|
+
private metadataMap: Map<string, any> | null;
|
|
8
|
+
private idList: string[] | null;
|
|
9
|
+
|
|
10
|
+
constructor() {
|
|
11
|
+
this.metadataMap = new Map();
|
|
12
|
+
this.idList = [];
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
private ensureInitialized(): void {
|
|
16
|
+
if (!this.metadataMap || !this.idList) {
|
|
17
|
+
throw new Error('MetadataStorage has been destroyed and cannot be used');
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
set(id: string, metadata: any): void {
|
|
22
|
+
this.ensureInitialized();
|
|
23
|
+
// Check Map presence first (O(1)) instead of array.includes() (O(n))
|
|
24
|
+
const isNew = !this.metadataMap!.has(id);
|
|
25
|
+
this.metadataMap!.set(id, metadata);
|
|
26
|
+
if (isNew) {
|
|
27
|
+
this.idList!.push(id);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
get(id: string): any | undefined {
|
|
32
|
+
this.ensureInitialized();
|
|
33
|
+
return this.metadataMap!.get(id);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
has(id: string): boolean {
|
|
37
|
+
this.ensureInitialized();
|
|
38
|
+
return this.metadataMap!.has(id);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
delete(id: string): boolean {
|
|
42
|
+
this.ensureInitialized();
|
|
43
|
+
// Use indexOf + splice for O(n) vs filter which creates new array
|
|
44
|
+
const idx = this.idList!.indexOf(id);
|
|
45
|
+
if (idx !== -1) {
|
|
46
|
+
this.idList!.splice(idx, 1);
|
|
47
|
+
}
|
|
48
|
+
return this.metadataMap!.delete(id);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
getAllIds(): string[] {
|
|
52
|
+
this.ensureInitialized();
|
|
53
|
+
return [...this.idList!];
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
entries(): [string, any][] {
|
|
57
|
+
this.ensureInitialized();
|
|
58
|
+
return Array.from(this.metadataMap!.entries());
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
clear(): void {
|
|
62
|
+
this.ensureInitialized();
|
|
63
|
+
this.metadataMap!.clear();
|
|
64
|
+
this.idList = [];
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
destroy(): void {
|
|
68
|
+
if (this.metadataMap) this.metadataMap.clear();
|
|
69
|
+
this.idList = null;
|
|
70
|
+
this.metadataMap = null;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
size(): number {
|
|
74
|
+
this.ensureInitialized();
|
|
75
|
+
return this.metadataMap!.size;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
async saveToDisk(filePath: string): Promise<void> {
|
|
79
|
+
const buffer = this.createBinaryBuffer();
|
|
80
|
+
await Bun.write(filePath, buffer);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
private createBinaryBuffer(): Buffer {
|
|
84
|
+
// Pre-serialize all metadata to avoid double JSON.stringify
|
|
85
|
+
const entries: Array<{ id: string; idLen: number; metaStr: string; metaLen: number }> = [];
|
|
86
|
+
let totalSize = 4; // count
|
|
87
|
+
|
|
88
|
+
for (const [id, metadata] of this.metadataMap!.entries()) {
|
|
89
|
+
const metaStr = JSON.stringify(metadata);
|
|
90
|
+
const idLen = Buffer.byteLength(id, 'utf8');
|
|
91
|
+
const metaLen = Buffer.byteLength(metaStr, 'utf8');
|
|
92
|
+
entries.push({ id, idLen, metaStr, metaLen });
|
|
93
|
+
totalSize += 4 + idLen + 4 + metaLen;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const buffer = Buffer.alloc(totalSize);
|
|
97
|
+
let offset = 0;
|
|
98
|
+
|
|
99
|
+
// Write count
|
|
100
|
+
buffer.writeUInt32LE(entries.length, offset);
|
|
101
|
+
offset += 4;
|
|
102
|
+
|
|
103
|
+
// Write each entry using cached values
|
|
104
|
+
for (const { id, idLen, metaStr, metaLen } of entries) {
|
|
105
|
+
buffer.writeUInt32LE(idLen, offset);
|
|
106
|
+
offset += 4;
|
|
107
|
+
buffer.write(id, offset, idLen, 'utf8');
|
|
108
|
+
offset += idLen;
|
|
109
|
+
|
|
110
|
+
buffer.writeUInt32LE(metaLen, offset);
|
|
111
|
+
offset += 4;
|
|
112
|
+
buffer.write(metaStr, offset, metaLen, 'utf8');
|
|
113
|
+
offset += metaLen;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return buffer;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
async loadFromDisk(filePath: string): Promise<void> {
|
|
120
|
+
try {
|
|
121
|
+
const file = Bun.file(filePath);
|
|
122
|
+
const arrayBuffer = await file.arrayBuffer();
|
|
123
|
+
const buffer = Buffer.from(arrayBuffer);
|
|
124
|
+
this.loadFromBinaryBuffer(buffer);
|
|
125
|
+
} catch (error) {
|
|
126
|
+
// If file doesn't exist or is corrupted, start with empty storage
|
|
127
|
+
this.metadataMap!.clear();
|
|
128
|
+
this.idList = [];
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
private loadFromBinaryBuffer(buffer: Buffer): void {
|
|
133
|
+
let offset = 0;
|
|
134
|
+
|
|
135
|
+
// Read count
|
|
136
|
+
const count = buffer.readUInt32LE(offset);
|
|
137
|
+
offset += 4;
|
|
138
|
+
|
|
139
|
+
this.metadataMap!.clear();
|
|
140
|
+
this.idList = [];
|
|
141
|
+
|
|
142
|
+
// Read each entry
|
|
143
|
+
for (let i = 0; i < count; i++) {
|
|
144
|
+
// Read ID length and ID
|
|
145
|
+
const idLen = buffer.readUInt32LE(offset);
|
|
146
|
+
offset += 4;
|
|
147
|
+
const id = buffer.toString('utf8', offset, offset + idLen);
|
|
148
|
+
offset += idLen;
|
|
149
|
+
|
|
150
|
+
// Read metadata length and metadata
|
|
151
|
+
const metadataLen = buffer.readUInt32LE(offset);
|
|
152
|
+
offset += 4;
|
|
153
|
+
const metadataStr = buffer.toString('utf8', offset, offset + metadataLen);
|
|
154
|
+
offset += metadataLen;
|
|
155
|
+
|
|
156
|
+
const metadata = JSON.parse(metadataStr);
|
|
157
|
+
this.metadataMap!.set(id, metadata);
|
|
158
|
+
this.idList!.push(id);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// For efficient vector storage with memory mapping
|
|
164
|
+
export class VectorStorage {
|
|
165
|
+
private dimension: number;
|
|
166
|
+
private vectors: Float32Array;
|
|
167
|
+
private vectorCount: number;
|
|
168
|
+
private capacity: number;
|
|
169
|
+
private filePath: string | null = null;
|
|
170
|
+
private fileHandle: any = null; // Bun's file handle for memory mapping
|
|
171
|
+
|
|
172
|
+
constructor(dimension: number, initialCapacity: number = 1000) {
|
|
173
|
+
this.dimension = dimension;
|
|
174
|
+
this.capacity = initialCapacity;
|
|
175
|
+
this.vectorCount = 0;
|
|
176
|
+
this.vectors = new Float32Array(this.capacity * dimension);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
async initializeForMemoryMapping(filePath: string): Promise<void> {
|
|
180
|
+
this.filePath = filePath;
|
|
181
|
+
|
|
182
|
+
// Check if file exists and load it if it does
|
|
183
|
+
try {
|
|
184
|
+
const file = Bun.file(filePath);
|
|
185
|
+
if (await file.exists()) {
|
|
186
|
+
await this.loadFromDisk(filePath);
|
|
187
|
+
}
|
|
188
|
+
} catch (error) {
|
|
189
|
+
// File doesn't exist, create new one
|
|
190
|
+
// We'll create it when we save for the first time
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
addVector(vector: number[]): number {
|
|
195
|
+
const dim = this.dimension;
|
|
196
|
+
if (vector.length !== dim) {
|
|
197
|
+
throw new Error(`Vector dimension mismatch. Expected ${dim}, got ${vector.length}`);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Ensure capacity
|
|
201
|
+
if (this.vectorCount >= this.capacity) {
|
|
202
|
+
this.expandCapacity();
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
const index = this.vectorCount;
|
|
206
|
+
const start = index * dim;
|
|
207
|
+
|
|
208
|
+
// Copy vector values (cache start offset)
|
|
209
|
+
for (let i = 0; i < dim; i++) {
|
|
210
|
+
this.vectors[start + i] = vector[i];
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
this.vectorCount++;
|
|
214
|
+
return index;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
getVector(index: number): number[] {
|
|
218
|
+
if (index < 0 || index >= this.vectorCount) {
|
|
219
|
+
throw new Error(`Vector index out of bounds: ${index}`);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
const dim = this.dimension;
|
|
223
|
+
const vector = new Array<number>(dim);
|
|
224
|
+
const start = index * dim;
|
|
225
|
+
for (let i = 0; i < dim; i++) {
|
|
226
|
+
vector[i] = this.vectors[start + i];
|
|
227
|
+
}
|
|
228
|
+
return vector;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
getVectorAsTypedArray(index: number): Float32Array {
|
|
232
|
+
if (index < 0 || index >= this.vectorCount) {
|
|
233
|
+
throw new Error(`Vector index out of bounds: ${index}`);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
const start = index * this.dimension;
|
|
237
|
+
return this.vectors.subarray(start, start + this.dimension);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
getVectorCount(): number {
|
|
241
|
+
return this.vectorCount;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
getDimension(): number {
|
|
245
|
+
return this.dimension;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
private expandCapacity(): void {
|
|
249
|
+
const newCapacity = this.capacity * 2;
|
|
250
|
+
const newVectors = new Float32Array(newCapacity * this.dimension);
|
|
251
|
+
newVectors.set(this.vectors);
|
|
252
|
+
this.vectors = newVectors;
|
|
253
|
+
this.capacity = newCapacity;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
async saveToDisk(filePath: string): Promise<void> {
|
|
257
|
+
const actualSize = this.vectorCount * this.dimension * 4;
|
|
258
|
+
const buffer = Buffer.from(this.vectors.buffer, 0, actualSize);
|
|
259
|
+
|
|
260
|
+
// Ensure directory exists using Bun's file operations
|
|
261
|
+
const dir = path.dirname(filePath);
|
|
262
|
+
try {
|
|
263
|
+
await Bun.write(dir + '/.tmp', new Uint8Array([0])); // Create a temp file to test/create directory
|
|
264
|
+
await Bun.write(dir + '/.tmp', new Uint8Array()); // Remove the temp file
|
|
265
|
+
} catch (e) {
|
|
266
|
+
// Directory might already exist, which is fine
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
await Bun.write(filePath, buffer);
|
|
270
|
+
|
|
271
|
+
// Update file path if needed
|
|
272
|
+
if (this.filePath !== filePath) {
|
|
273
|
+
this.filePath = filePath;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
async loadFromDisk(filePath: string): Promise<void> {
|
|
278
|
+
try {
|
|
279
|
+
const file = Bun.file(filePath);
|
|
280
|
+
const arrayBuffer = await file.arrayBuffer();
|
|
281
|
+
const uint8Array = new Uint8Array(arrayBuffer);
|
|
282
|
+
const newVectors = new Float32Array(arrayBuffer);
|
|
283
|
+
|
|
284
|
+
// Update internal state
|
|
285
|
+
this.vectors = newVectors;
|
|
286
|
+
this.vectorCount = uint8Array.length / (4 * this.dimension);
|
|
287
|
+
this.capacity = this.vectors.length / this.dimension;
|
|
288
|
+
|
|
289
|
+
// Update file path
|
|
290
|
+
this.filePath = filePath;
|
|
291
|
+
} catch (error) {
|
|
292
|
+
// If file doesn't exist, start with empty storage
|
|
293
|
+
this.vectorCount = 0;
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
destroy(): void {
|
|
298
|
+
// Clear the vectors array to free memory
|
|
299
|
+
this.vectors = new Float32Array(0);
|
|
300
|
+
this.vectorCount = 0;
|
|
301
|
+
this.capacity = 0;
|
|
302
|
+
// Allow garbage collection
|
|
303
|
+
(this as any).vectors = null;
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
// Create a MemoryMappedVectorStorage that uses Bun's memory mapping when available
|
|
308
|
+
export class MemoryMappedVectorStorage {
|
|
309
|
+
private dimension: number;
|
|
310
|
+
private vectorCount: number;
|
|
311
|
+
private filePath: string | null = null;
|
|
312
|
+
private mappedBuffer: ArrayBuffer | null = null;
|
|
313
|
+
private mappedView: Float32Array | null = null;
|
|
314
|
+
|
|
315
|
+
constructor(dimension: number) {
|
|
316
|
+
this.dimension = dimension;
|
|
317
|
+
this.vectorCount = 0;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
async initializeMemoryMapping(filePath: string): Promise<void> {
|
|
321
|
+
this.filePath = filePath;
|
|
322
|
+
|
|
323
|
+
try {
|
|
324
|
+
// Check if file exists
|
|
325
|
+
const file = Bun.file(filePath);
|
|
326
|
+
const fileSize = (await file.stat()).size;
|
|
327
|
+
|
|
328
|
+
if (fileSize > 0) {
|
|
329
|
+
// Calculate expected vector count based on file size
|
|
330
|
+
this.vectorCount = fileSize / (4 * this.dimension);
|
|
331
|
+
|
|
332
|
+
// Create a file handle and memory map
|
|
333
|
+
this.mappedBuffer = await file.arrayBuffer();
|
|
334
|
+
this.mappedView = new Float32Array(this.mappedBuffer);
|
|
335
|
+
} else {
|
|
336
|
+
// Create empty file
|
|
337
|
+
await Bun.write(filePath, new ArrayBuffer(0));
|
|
338
|
+
}
|
|
339
|
+
} catch (error) {
|
|
340
|
+
// File doesn't exist, create new one
|
|
341
|
+
this.vectorCount = 0;
|
|
342
|
+
await Bun.write(filePath, new ArrayBuffer(0));
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
getVector(index: number): number[] {
|
|
347
|
+
if (!this.mappedView) {
|
|
348
|
+
throw new Error('Memory mapping not initialized');
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
if (index < 0 || index >= this.vectorCount) {
|
|
352
|
+
throw new Error(`Vector index out of bounds: ${index}`);
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
const dim = this.dimension;
|
|
356
|
+
const vector = new Array<number>(dim);
|
|
357
|
+
const start = index * dim;
|
|
358
|
+
for (let i = 0; i < dim; i++) {
|
|
359
|
+
vector[i] = this.mappedView[start + i];
|
|
360
|
+
}
|
|
361
|
+
return vector;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
getVectorAsTypedArray(index: number): Float32Array {
|
|
365
|
+
if (!this.mappedView) {
|
|
366
|
+
throw new Error('Memory mapping not initialized');
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
if (index < 0 || index >= this.vectorCount) {
|
|
370
|
+
throw new Error(`Vector index out of bounds: ${index}`);
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
const start = index * this.dimension;
|
|
374
|
+
return this.mappedView.subarray(start, start + this.dimension);
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
getVectorCount(): number {
|
|
378
|
+
return this.vectorCount;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
getDimension(): number {
|
|
382
|
+
return this.dimension;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
async appendVector(vector: number[]): Promise<void> {
|
|
386
|
+
if (vector.length !== this.dimension) {
|
|
387
|
+
throw new Error(`Vector dimension mismatch. Expected ${this.dimension}, got ${vector.length}`);
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
if (!this.filePath) {
|
|
391
|
+
throw new Error('File path not set');
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
// For memory mapping, we'll append to the file and reload the mapping
|
|
395
|
+
// In a more optimized version, we could use a growable buffer
|
|
396
|
+
const newVectorCount = this.vectorCount + 1;
|
|
397
|
+
const newSize = newVectorCount * this.dimension * 4;
|
|
398
|
+
|
|
399
|
+
// Read the current file and append the new vector
|
|
400
|
+
let currentBuffer: ArrayBuffer;
|
|
401
|
+
try {
|
|
402
|
+
currentBuffer = await Bun.file(this.filePath).arrayBuffer();
|
|
403
|
+
} catch {
|
|
404
|
+
currentBuffer = new ArrayBuffer(0);
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
const newBuffer = new ArrayBuffer(newSize);
|
|
408
|
+
const newView = new Float32Array(newBuffer);
|
|
409
|
+
|
|
410
|
+
if (currentBuffer.byteLength > 0) {
|
|
411
|
+
const currentView = new Float32Array(currentBuffer);
|
|
412
|
+
newView.set(currentView);
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
// Add the new vector
|
|
416
|
+
for (let i = 0; i < this.dimension; i++) {
|
|
417
|
+
newView[this.vectorCount * this.dimension + i] = vector[i];
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
// Write the new buffer to file
|
|
421
|
+
await Bun.write(this.filePath, new Uint8Array(newBuffer));
|
|
422
|
+
|
|
423
|
+
// Update our mapping
|
|
424
|
+
this.vectorCount = newVectorCount;
|
|
425
|
+
this.mappedBuffer = newBuffer;
|
|
426
|
+
this.mappedView = newView;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
destroy(): void {
|
|
430
|
+
// Clear the mapped buffer to free memory
|
|
431
|
+
this.mappedBuffer = null;
|
|
432
|
+
this.mappedView = null;
|
|
433
|
+
this.vectorCount = 0;
|
|
434
|
+
}
|
|
435
|
+
}
|
package/src/VectorDB.ts
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import { Collection } from './Collection';
|
|
2
|
+
import * as path from 'path';
|
|
3
|
+
import type { StorageBackend } from './storage/StorageBackend';
|
|
4
|
+
import { BunStorageBackend } from './storage/BunStorageBackend';
|
|
5
|
+
|
|
6
|
+
export type DistanceMetric = 'cosine' | 'euclidean' | 'dot_product';
|
|
7
|
+
|
|
8
|
+
export interface VectorDBConfig {
|
|
9
|
+
/**
|
|
10
|
+
* Path for storing collections on disk. Default: './vectordb_data'
|
|
11
|
+
* Ignored if a custom storageBackend is provided.
|
|
12
|
+
*/
|
|
13
|
+
storagePath?: string;
|
|
14
|
+
/**
|
|
15
|
+
* Custom storage backend for platform-specific storage (e.g., OPFS for browsers).
|
|
16
|
+
* If not provided, defaults to BunStorageBackend with storagePath.
|
|
17
|
+
*/
|
|
18
|
+
storageBackend?: StorageBackend;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface CollectionConfig {
|
|
22
|
+
dimension: number;
|
|
23
|
+
metric?: DistanceMetric;
|
|
24
|
+
M?: number;
|
|
25
|
+
efConstruction?: number;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export class VectorDB {
|
|
29
|
+
private collections: Map<string, Collection>;
|
|
30
|
+
private storagePath: string;
|
|
31
|
+
private storageBackend: StorageBackend;
|
|
32
|
+
private initialized: boolean = false;
|
|
33
|
+
|
|
34
|
+
constructor(config?: VectorDBConfig) {
|
|
35
|
+
this.collections = new Map();
|
|
36
|
+
this.storagePath = config?.storagePath || './vectordb_data';
|
|
37
|
+
// Use provided backend or create default BunStorageBackend
|
|
38
|
+
this.storageBackend = config?.storageBackend || new BunStorageBackend(this.storagePath);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Get the storage backend used by this VectorDB instance.
|
|
43
|
+
* Useful for custom storage operations or debugging.
|
|
44
|
+
*/
|
|
45
|
+
getStorageBackend(): StorageBackend {
|
|
46
|
+
return this.storageBackend;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Initialize the VectorDB - creates storage directory if needed.
|
|
51
|
+
* Called automatically on first operation if not called explicitly.
|
|
52
|
+
*/
|
|
53
|
+
async init(): Promise<void> {
|
|
54
|
+
if (this.initialized) return;
|
|
55
|
+
|
|
56
|
+
// Use storage backend to create root directory
|
|
57
|
+
await this.storageBackend.mkdir('');
|
|
58
|
+
|
|
59
|
+
this.initialized = true;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async createCollection(name: string, config: CollectionConfig): Promise<Collection> {
|
|
63
|
+
// Ensure VectorDB is initialized
|
|
64
|
+
await this.init();
|
|
65
|
+
|
|
66
|
+
if (this.collections.has(name)) {
|
|
67
|
+
throw new Error(`Collection ${name} already exists`);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
const collectionPath = path.join(this.storagePath, name);
|
|
71
|
+
|
|
72
|
+
// Create collection directory using storage backend
|
|
73
|
+
await this.storageBackend.mkdir(name);
|
|
74
|
+
|
|
75
|
+
const collection = new Collection(name, config, collectionPath);
|
|
76
|
+
await collection.init();
|
|
77
|
+
this.collections.set(name, collection);
|
|
78
|
+
return collection;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
getCollection(name: string): Collection | undefined {
|
|
82
|
+
return this.collections.get(name);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
async listCollections(): Promise<string[]> {
|
|
86
|
+
return [...this.collections.keys()];
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
async deleteCollection(name: string): Promise<void> {
|
|
90
|
+
const collection = this.collections.get(name);
|
|
91
|
+
if (!collection) {
|
|
92
|
+
throw new Error(`Collection ${name} does not exist`);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
await collection.destroy();
|
|
96
|
+
this.collections.delete(name);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
async close(): Promise<void> {
|
|
100
|
+
// Close all collections
|
|
101
|
+
for (const collection of this.collections.values()) {
|
|
102
|
+
await collection.destroy();
|
|
103
|
+
}
|
|
104
|
+
this.collections.clear();
|
|
105
|
+
|
|
106
|
+
// Allow garbage collection by preventing access to collections
|
|
107
|
+
// but keep the map object for potential introspection
|
|
108
|
+
}
|
|
109
|
+
}
|