verso-db 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/LICENSE +21 -0
- package/README.md +252 -0
- package/dist/BinaryHeap.d.ts +25 -0
- package/dist/BinaryHeap.d.ts.map +1 -0
- package/dist/Collection.d.ts +156 -0
- package/dist/Collection.d.ts.map +1 -0
- package/dist/HNSWIndex.d.ts +357 -0
- package/dist/HNSWIndex.d.ts.map +1 -0
- package/dist/MaxBinaryHeap.d.ts +63 -0
- package/dist/MaxBinaryHeap.d.ts.map +1 -0
- package/dist/Storage.d.ts +54 -0
- package/dist/Storage.d.ts.map +1 -0
- package/dist/VectorDB.d.ts +44 -0
- package/dist/VectorDB.d.ts.map +1 -0
- package/dist/backends/DistanceBackend.d.ts +5 -0
- package/dist/backends/DistanceBackend.d.ts.map +1 -0
- package/dist/backends/JsDistanceBackend.d.ts +37 -0
- package/dist/backends/JsDistanceBackend.d.ts.map +1 -0
- package/dist/encoding/DeltaEncoder.d.ts +61 -0
- package/dist/encoding/DeltaEncoder.d.ts.map +1 -0
- package/dist/errors.d.ts +58 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/index.d.ts +64 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +3732 -0
- package/dist/presets.d.ts +91 -0
- package/dist/presets.d.ts.map +1 -0
- package/dist/quantization/ScalarQuantizer.d.ts +114 -0
- package/dist/quantization/ScalarQuantizer.d.ts.map +1 -0
- package/dist/storage/BatchWriter.d.ts +104 -0
- package/dist/storage/BatchWriter.d.ts.map +1 -0
- package/dist/storage/BunStorageBackend.d.ts +58 -0
- package/dist/storage/BunStorageBackend.d.ts.map +1 -0
- package/dist/storage/MemoryBackend.d.ts +44 -0
- package/dist/storage/MemoryBackend.d.ts.map +1 -0
- package/dist/storage/OPFSBackend.d.ts +59 -0
- package/dist/storage/OPFSBackend.d.ts.map +1 -0
- package/dist/storage/StorageBackend.d.ts +66 -0
- package/dist/storage/StorageBackend.d.ts.map +1 -0
- package/dist/storage/WriteAheadLog.d.ts +111 -0
- package/dist/storage/WriteAheadLog.d.ts.map +1 -0
- package/dist/storage/createStorageBackend.d.ts +40 -0
- package/dist/storage/createStorageBackend.d.ts.map +1 -0
- package/dist/storage/index.d.ts +30 -0
- package/dist/storage/index.d.ts.map +1 -0
- package/package.json +98 -0
- package/src/BinaryHeap.ts +131 -0
- package/src/Collection.ts +695 -0
- package/src/HNSWIndex.ts +1839 -0
- package/src/MaxBinaryHeap.ts +175 -0
- package/src/Storage.ts +435 -0
- package/src/VectorDB.ts +109 -0
- package/src/backends/DistanceBackend.ts +17 -0
- package/src/backends/JsDistanceBackend.ts +227 -0
- package/src/encoding/DeltaEncoder.ts +217 -0
- package/src/errors.ts +110 -0
- package/src/index.ts +138 -0
- package/src/presets.ts +229 -0
- package/src/quantization/ScalarQuantizer.ts +383 -0
- package/src/storage/BatchWriter.ts +336 -0
- package/src/storage/BunStorageBackend.ts +161 -0
- package/src/storage/MemoryBackend.ts +120 -0
- package/src/storage/OPFSBackend.ts +250 -0
- package/src/storage/StorageBackend.ts +74 -0
- package/src/storage/WriteAheadLog.ts +326 -0
- package/src/storage/createStorageBackend.ts +137 -0
- package/src/storage/index.ts +53 -0
package/src/presets.ts
ADDED
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HNSW Parameter Presets
|
|
3
|
+
*
|
|
4
|
+
* These presets are optimized based on extensive benchmarking to achieve
|
|
5
|
+
* the target recall@10 >= 95% for different dataset sizes and dimensions.
|
|
6
|
+
*
|
|
7
|
+
* Key parameters:
|
|
8
|
+
* - M: Maximum connections per node (higher = better recall, more memory)
|
|
9
|
+
* - efConstruction: Beam width during index building (higher = better quality, slower build)
|
|
10
|
+
* - efSearch: Beam width during search (higher = better recall, slower search)
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
export interface HNSWPreset {
|
|
14
|
+
name: string;
|
|
15
|
+
description: string;
|
|
16
|
+
M: number;
|
|
17
|
+
efConstruction: number;
|
|
18
|
+
efSearch: number;
|
|
19
|
+
expectedRecall: number;
|
|
20
|
+
targetDimensions: string;
|
|
21
|
+
targetDatasetSize: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Preset for low-dimensional vectors (128D or less)
|
|
26
|
+
* Suitable for: Image features, word2vec, GloVe embeddings
|
|
27
|
+
*/
|
|
28
|
+
export const PRESET_LOW_DIM: HNSWPreset = {
|
|
29
|
+
name: 'low-dim',
|
|
30
|
+
description: 'Optimized for low-dimensional vectors (<=128D)',
|
|
31
|
+
M: 16,
|
|
32
|
+
efConstruction: 200,
|
|
33
|
+
efSearch: 100,
|
|
34
|
+
expectedRecall: 0.99,
|
|
35
|
+
targetDimensions: '<=128',
|
|
36
|
+
targetDatasetSize: '1K-100K',
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Preset for medium-dimensional vectors (256-512D)
|
|
41
|
+
* Suitable for: Sentence embeddings, smaller transformer outputs
|
|
42
|
+
*/
|
|
43
|
+
export const PRESET_MEDIUM_DIM: HNSWPreset = {
|
|
44
|
+
name: 'medium-dim',
|
|
45
|
+
description: 'Optimized for medium-dimensional vectors (256-512D)',
|
|
46
|
+
M: 24,
|
|
47
|
+
efConstruction: 200,
|
|
48
|
+
efSearch: 150,
|
|
49
|
+
expectedRecall: 0.97,
|
|
50
|
+
targetDimensions: '256-512',
|
|
51
|
+
targetDatasetSize: '1K-100K',
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Preset for high-dimensional vectors (768D+)
|
|
56
|
+
* Suitable for: BERT, GPT embeddings, Cohere, OpenAI embeddings
|
|
57
|
+
* This is the recommended preset for RAG applications
|
|
58
|
+
*
|
|
59
|
+
* Benchmarked on Cohere Wikipedia 1024D (495K vectors):
|
|
60
|
+
* - efSearch=128: 99.2% recall, 168 QPS, 10.72ms P99
|
|
61
|
+
*/
|
|
62
|
+
export const PRESET_HIGH_DIM: HNSWPreset = {
|
|
63
|
+
name: 'high-dim',
|
|
64
|
+
description: 'Optimized for high-dimensional vectors (768D+)',
|
|
65
|
+
M: 32,
|
|
66
|
+
efConstruction: 200,
|
|
67
|
+
efSearch: 128,
|
|
68
|
+
expectedRecall: 0.99,
|
|
69
|
+
targetDimensions: '>=768',
|
|
70
|
+
targetDatasetSize: '1K-500K',
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Preset for very high-dimensional vectors (1536D+)
|
|
75
|
+
* Suitable for: OpenAI text-embedding-ada-002, text-embedding-3-large
|
|
76
|
+
*
|
|
77
|
+
* Scaled from PRESET_HIGH_DIM benchmarks (higher M for higher dimensions)
|
|
78
|
+
*/
|
|
79
|
+
export const PRESET_VERY_HIGH_DIM: HNSWPreset = {
|
|
80
|
+
name: 'very-high-dim',
|
|
81
|
+
description: 'Optimized for very high-dimensional vectors (1536D+)',
|
|
82
|
+
M: 48,
|
|
83
|
+
efConstruction: 300,
|
|
84
|
+
efSearch: 150,
|
|
85
|
+
expectedRecall: 0.99,
|
|
86
|
+
targetDimensions: '>=1536',
|
|
87
|
+
targetDatasetSize: '1K-500K',
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Preset for small datasets (<10K vectors)
|
|
92
|
+
* Prioritizes recall over speed since brute-force is viable
|
|
93
|
+
*/
|
|
94
|
+
export const PRESET_SMALL_DATASET: HNSWPreset = {
|
|
95
|
+
name: 'small-dataset',
|
|
96
|
+
description: 'Optimized for small datasets (<10K vectors)',
|
|
97
|
+
M: 16,
|
|
98
|
+
efConstruction: 200,
|
|
99
|
+
efSearch: 200,
|
|
100
|
+
expectedRecall: 0.99,
|
|
101
|
+
targetDimensions: 'any',
|
|
102
|
+
targetDatasetSize: '<10K',
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Preset for large datasets (100K-1M vectors)
|
|
107
|
+
* Balances recall with build time and memory
|
|
108
|
+
*
|
|
109
|
+
* Benchmarked on Cohere Wikipedia 1024D (495K vectors):
|
|
110
|
+
* - efSearch=128: 99.2% recall, 168 QPS
|
|
111
|
+
*/
|
|
112
|
+
export const PRESET_LARGE_DATASET: HNSWPreset = {
|
|
113
|
+
name: 'large-dataset',
|
|
114
|
+
description: 'Optimized for large datasets (100K-1M vectors)',
|
|
115
|
+
M: 32,
|
|
116
|
+
efConstruction: 200,
|
|
117
|
+
efSearch: 128,
|
|
118
|
+
expectedRecall: 0.99,
|
|
119
|
+
targetDimensions: 'any',
|
|
120
|
+
targetDatasetSize: '100K-1M',
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Preset for maximum recall (prioritizes accuracy over speed)
|
|
125
|
+
* Use when recall is critical and latency is acceptable
|
|
126
|
+
*/
|
|
127
|
+
export const PRESET_MAX_RECALL: HNSWPreset = {
|
|
128
|
+
name: 'max-recall',
|
|
129
|
+
description: 'Maximum recall configuration',
|
|
130
|
+
M: 48,
|
|
131
|
+
efConstruction: 500,
|
|
132
|
+
efSearch: 400,
|
|
133
|
+
expectedRecall: 0.99,
|
|
134
|
+
targetDimensions: 'any',
|
|
135
|
+
targetDatasetSize: 'any',
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Preset for minimum latency (prioritizes speed over recall)
|
|
140
|
+
* Use when latency is critical and 90% recall is acceptable
|
|
141
|
+
*/
|
|
142
|
+
export const PRESET_LOW_LATENCY: HNSWPreset = {
|
|
143
|
+
name: 'low-latency',
|
|
144
|
+
description: 'Minimum latency configuration (90% recall)',
|
|
145
|
+
M: 12,
|
|
146
|
+
efConstruction: 100,
|
|
147
|
+
efSearch: 50,
|
|
148
|
+
expectedRecall: 0.90,
|
|
149
|
+
targetDimensions: 'any',
|
|
150
|
+
targetDatasetSize: 'any',
|
|
151
|
+
};
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* All available presets
|
|
155
|
+
*/
|
|
156
|
+
export const PRESETS: Record<string, HNSWPreset> = {
|
|
157
|
+
'low-dim': PRESET_LOW_DIM,
|
|
158
|
+
'medium-dim': PRESET_MEDIUM_DIM,
|
|
159
|
+
'high-dim': PRESET_HIGH_DIM,
|
|
160
|
+
'very-high-dim': PRESET_VERY_HIGH_DIM,
|
|
161
|
+
'small-dataset': PRESET_SMALL_DATASET,
|
|
162
|
+
'large-dataset': PRESET_LARGE_DATASET,
|
|
163
|
+
'max-recall': PRESET_MAX_RECALL,
|
|
164
|
+
'low-latency': PRESET_LOW_LATENCY,
|
|
165
|
+
};
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Get recommended preset based on dimension and dataset size
|
|
169
|
+
*
|
|
170
|
+
* For high-dimensional vectors (768D+), dimension takes priority over dataset size
|
|
171
|
+
* because recall degrades significantly without higher M values.
|
|
172
|
+
*/
|
|
173
|
+
export function getRecommendedPreset(dimension: number, datasetSize?: number): HNSWPreset {
|
|
174
|
+
// For high-dimensional vectors, always use dimension-based presets
|
|
175
|
+
// (dimension matters more than dataset size for recall)
|
|
176
|
+
// Note: Check 1536 BEFORE 768 since 1536 >= 768 would match first
|
|
177
|
+
if (dimension >= 1536) return PRESET_VERY_HIGH_DIM;
|
|
178
|
+
if (dimension >= 768) return PRESET_HIGH_DIM;
|
|
179
|
+
|
|
180
|
+
// For lower dimensions, consider dataset size
|
|
181
|
+
if (datasetSize !== undefined) {
|
|
182
|
+
if (datasetSize < 10000) return PRESET_SMALL_DATASET;
|
|
183
|
+
if (datasetSize > 100000) return PRESET_LARGE_DATASET;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Dimension-based selection for medium dimensions
|
|
187
|
+
if (dimension <= 128) return PRESET_LOW_DIM;
|
|
188
|
+
if (dimension <= 512) return PRESET_MEDIUM_DIM;
|
|
189
|
+
|
|
190
|
+
return PRESET_HIGH_DIM;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Get preset by name
|
|
195
|
+
*/
|
|
196
|
+
export function getPreset(name: string): HNSWPreset | undefined {
|
|
197
|
+
return PRESETS[name];
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* RAG-specific preset recommendation
|
|
202
|
+
* For typical RAG applications using popular embedding models
|
|
203
|
+
*/
|
|
204
|
+
export function getRAGPreset(embeddingModel: string): HNSWPreset {
|
|
205
|
+
const model = embeddingModel.toLowerCase();
|
|
206
|
+
|
|
207
|
+
// OpenAI models
|
|
208
|
+
if (model.includes('ada-002') || model.includes('text-embedding-3')) {
|
|
209
|
+
return PRESET_VERY_HIGH_DIM;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// Cohere models
|
|
213
|
+
if (model.includes('cohere') || model.includes('embed-')) {
|
|
214
|
+
return PRESET_HIGH_DIM;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// BERT/Sentence Transformers
|
|
218
|
+
if (model.includes('bert') || model.includes('minilm') || model.includes('mpnet')) {
|
|
219
|
+
return PRESET_HIGH_DIM;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// E5 models
|
|
223
|
+
if (model.includes('e5-')) {
|
|
224
|
+
return model.includes('large') ? PRESET_HIGH_DIM : PRESET_MEDIUM_DIM;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// Default to high-dim for unknown models (most modern embeddings are 768D+)
|
|
228
|
+
return PRESET_HIGH_DIM;
|
|
229
|
+
}
|
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Scalar Quantizer for Int8 vector compression
|
|
3
|
+
*
|
|
4
|
+
* Provides 4x memory reduction by converting Float32 vectors to Int8.
|
|
5
|
+
* Expected performance:
|
|
6
|
+
* - Memory: 4x reduction (32 bits → 8 bits per dimension)
|
|
7
|
+
* - Speed: ~3.5x faster distance calculations (smaller data, better cache)
|
|
8
|
+
* - Recall: <2% loss compared to float32
|
|
9
|
+
*
|
|
10
|
+
* Uses range quantization: maps [min, max] to [-128, 127]
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
export interface QuantizationParams {
|
|
14
|
+
min: Float32Array; // Per-dimension minimum
|
|
15
|
+
max: Float32Array; // Per-dimension maximum
|
|
16
|
+
scale: Float32Array; // Per-dimension scale factor
|
|
17
|
+
offset: Float32Array; // Per-dimension offset
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export class ScalarQuantizer {
|
|
21
|
+
private dimension: number;
|
|
22
|
+
private params: QuantizationParams | null = null;
|
|
23
|
+
private trained: boolean = false;
|
|
24
|
+
|
|
25
|
+
constructor(dimension: number) {
|
|
26
|
+
this.dimension = dimension;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Train the quantizer on a set of vectors to determine optimal range
|
|
31
|
+
*/
|
|
32
|
+
train(vectors: Float32Array[]): void {
|
|
33
|
+
if (vectors.length === 0) {
|
|
34
|
+
throw new Error('Cannot train quantizer with empty vector set');
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const dim = this.dimension;
|
|
38
|
+
const min = new Float32Array(dim).fill(Infinity);
|
|
39
|
+
const max = new Float32Array(dim).fill(-Infinity);
|
|
40
|
+
|
|
41
|
+
// Find per-dimension min/max
|
|
42
|
+
for (const vector of vectors) {
|
|
43
|
+
for (let d = 0; d < dim; d++) {
|
|
44
|
+
if (vector[d] < min[d]) min[d] = vector[d];
|
|
45
|
+
if (vector[d] > max[d]) max[d] = vector[d];
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Compute scale and offset for mapping to [-128, 127]
|
|
50
|
+
const scale = new Float32Array(dim);
|
|
51
|
+
const offset = new Float32Array(dim);
|
|
52
|
+
|
|
53
|
+
for (let d = 0; d < dim; d++) {
|
|
54
|
+
const range = max[d] - min[d];
|
|
55
|
+
if (range === 0) {
|
|
56
|
+
// Handle constant dimensions
|
|
57
|
+
scale[d] = 1;
|
|
58
|
+
offset[d] = min[d];
|
|
59
|
+
} else {
|
|
60
|
+
// Map [min, max] to [-128, 127]
|
|
61
|
+
scale[d] = 255 / range;
|
|
62
|
+
offset[d] = min[d];
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
this.params = { min, max, scale, offset };
|
|
67
|
+
this.trained = true;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Get training status
|
|
72
|
+
*/
|
|
73
|
+
isTrained(): boolean {
|
|
74
|
+
return this.trained;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Get quantization parameters
|
|
79
|
+
*/
|
|
80
|
+
getParams(): QuantizationParams | null {
|
|
81
|
+
return this.params;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Set quantization parameters (for loading saved quantizer)
|
|
86
|
+
*/
|
|
87
|
+
setParams(params: QuantizationParams): void {
|
|
88
|
+
this.params = params;
|
|
89
|
+
this.trained = true;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Quantize a single float32 vector to int8
|
|
94
|
+
*/
|
|
95
|
+
quantize(vector: Float32Array): Int8Array {
|
|
96
|
+
if (!this.params) {
|
|
97
|
+
throw new Error('Quantizer not trained');
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const dim = this.dimension;
|
|
101
|
+
const result = new Int8Array(dim);
|
|
102
|
+
const { scale, offset } = this.params;
|
|
103
|
+
|
|
104
|
+
for (let d = 0; d < dim; d++) {
|
|
105
|
+
// Map to [0, 255] then shift to [-128, 127]
|
|
106
|
+
const normalized = (vector[d] - offset[d]) * scale[d];
|
|
107
|
+
result[d] = Math.max(-128, Math.min(127, Math.round(normalized - 128)));
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return result;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Quantize multiple vectors
|
|
115
|
+
*/
|
|
116
|
+
quantizeBatch(vectors: Float32Array[]): Int8Array[] {
|
|
117
|
+
const result = new Array<Int8Array>(vectors.length);
|
|
118
|
+
for (let i = 0; i < vectors.length; i++) {
|
|
119
|
+
result[i] = this.quantize(vectors[i]);
|
|
120
|
+
}
|
|
121
|
+
return result;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Dequantize an int8 vector back to float32 (for rescoring)
|
|
126
|
+
*/
|
|
127
|
+
dequantize(vector: Int8Array): Float32Array {
|
|
128
|
+
if (!this.params) {
|
|
129
|
+
throw new Error('Quantizer not trained');
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const dim = this.dimension;
|
|
133
|
+
const result = new Float32Array(dim);
|
|
134
|
+
const { scale, offset } = this.params;
|
|
135
|
+
|
|
136
|
+
for (let d = 0; d < dim; d++) {
|
|
137
|
+
// Reverse the quantization
|
|
138
|
+
result[d] = ((vector[d] + 128) / scale[d]) + offset[d];
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
return result;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Serialize quantization parameters for saving
|
|
146
|
+
*/
|
|
147
|
+
serialize(): ArrayBuffer {
|
|
148
|
+
if (!this.params) {
|
|
149
|
+
throw new Error('Quantizer not trained');
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
const dim = this.dimension;
|
|
153
|
+
// 4 bytes header (dimension) + 4 * dim * 4 bytes (4 float arrays)
|
|
154
|
+
const buffer = new ArrayBuffer(4 + 4 * dim * 4);
|
|
155
|
+
const view = new DataView(buffer);
|
|
156
|
+
|
|
157
|
+
view.setInt32(0, dim, true);
|
|
158
|
+
|
|
159
|
+
let offset = 4;
|
|
160
|
+
for (const arr of [this.params.min, this.params.max, this.params.scale, this.params.offset]) {
|
|
161
|
+
for (let d = 0; d < dim; d++) {
|
|
162
|
+
view.setFloat32(offset, arr[d], true);
|
|
163
|
+
offset += 4;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
return buffer;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Load quantization parameters
|
|
172
|
+
*/
|
|
173
|
+
static deserialize(buffer: ArrayBuffer): ScalarQuantizer {
|
|
174
|
+
const view = new DataView(buffer);
|
|
175
|
+
const dim = view.getInt32(0, true);
|
|
176
|
+
|
|
177
|
+
const quantizer = new ScalarQuantizer(dim);
|
|
178
|
+
const min = new Float32Array(dim);
|
|
179
|
+
const max = new Float32Array(dim);
|
|
180
|
+
const scale = new Float32Array(dim);
|
|
181
|
+
const offsetArr = new Float32Array(dim);
|
|
182
|
+
|
|
183
|
+
let offset = 4;
|
|
184
|
+
for (const arr of [min, max, scale, offsetArr]) {
|
|
185
|
+
for (let d = 0; d < dim; d++) {
|
|
186
|
+
arr[d] = view.getFloat32(offset, true);
|
|
187
|
+
offset += 4;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
quantizer.setParams({ min, max, scale, offset: offsetArr });
|
|
192
|
+
return quantizer;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Fast Int8 distance calculations
|
|
198
|
+
* These are optimized for quantized vectors and provide significant speedup
|
|
199
|
+
*/
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Compute dot product between two Int8 vectors
|
|
203
|
+
* Uses 8-wide unrolling for better ILP (instruction-level parallelism)
|
|
204
|
+
*/
|
|
205
|
+
export function dotProductInt8(a: Int8Array, b: Int8Array): number {
|
|
206
|
+
const len = a.length;
|
|
207
|
+
let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
|
|
208
|
+
let sum4 = 0, sum5 = 0, sum6 = 0, sum7 = 0;
|
|
209
|
+
let i = 0;
|
|
210
|
+
|
|
211
|
+
// 8-wide unrolling for high-dimensional vectors
|
|
212
|
+
const limit8 = len - 7;
|
|
213
|
+
for (; i < limit8; i += 8) {
|
|
214
|
+
sum0 += a[i] * b[i];
|
|
215
|
+
sum1 += a[i + 1] * b[i + 1];
|
|
216
|
+
sum2 += a[i + 2] * b[i + 2];
|
|
217
|
+
sum3 += a[i + 3] * b[i + 3];
|
|
218
|
+
sum4 += a[i + 4] * b[i + 4];
|
|
219
|
+
sum5 += a[i + 5] * b[i + 5];
|
|
220
|
+
sum6 += a[i + 6] * b[i + 6];
|
|
221
|
+
sum7 += a[i + 7] * b[i + 7];
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Handle remaining elements
|
|
225
|
+
for (; i < len; i++) {
|
|
226
|
+
sum0 += a[i] * b[i];
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Compute L2 squared distance between two Int8 vectors
|
|
234
|
+
* Uses 8-wide unrolling for better ILP
|
|
235
|
+
*/
|
|
236
|
+
export function l2SquaredInt8(a: Int8Array, b: Int8Array): number {
|
|
237
|
+
const len = a.length;
|
|
238
|
+
let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
|
|
239
|
+
let sum4 = 0, sum5 = 0, sum6 = 0, sum7 = 0;
|
|
240
|
+
let i = 0;
|
|
241
|
+
|
|
242
|
+
// 8-wide unrolling for high-dimensional vectors
|
|
243
|
+
const limit8 = len - 7;
|
|
244
|
+
for (; i < limit8; i += 8) {
|
|
245
|
+
const d0 = a[i] - b[i];
|
|
246
|
+
const d1 = a[i + 1] - b[i + 1];
|
|
247
|
+
const d2 = a[i + 2] - b[i + 2];
|
|
248
|
+
const d3 = a[i + 3] - b[i + 3];
|
|
249
|
+
const d4 = a[i + 4] - b[i + 4];
|
|
250
|
+
const d5 = a[i + 5] - b[i + 5];
|
|
251
|
+
const d6 = a[i + 6] - b[i + 6];
|
|
252
|
+
const d7 = a[i + 7] - b[i + 7];
|
|
253
|
+
sum0 += d0 * d0;
|
|
254
|
+
sum1 += d1 * d1;
|
|
255
|
+
sum2 += d2 * d2;
|
|
256
|
+
sum3 += d3 * d3;
|
|
257
|
+
sum4 += d4 * d4;
|
|
258
|
+
sum5 += d5 * d5;
|
|
259
|
+
sum6 += d6 * d6;
|
|
260
|
+
sum7 += d7 * d7;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// Handle remaining elements
|
|
264
|
+
for (; i < len; i++) {
|
|
265
|
+
const d = a[i] - b[i];
|
|
266
|
+
sum0 += d * d;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Compute approximate cosine distance for Int8 vectors
|
|
274
|
+
* Note: This is approximate because quantization changes magnitude
|
|
275
|
+
* Uses 8-wide unrolling with separate accumulators for better ILP
|
|
276
|
+
*/
|
|
277
|
+
export function cosineDistanceInt8(a: Int8Array, b: Int8Array): number {
|
|
278
|
+
const len = a.length;
|
|
279
|
+
// Use separate accumulators for better ILP
|
|
280
|
+
let dot0 = 0, dot1 = 0, dot2 = 0, dot3 = 0;
|
|
281
|
+
let normA0 = 0, normA1 = 0, normA2 = 0, normA3 = 0;
|
|
282
|
+
let normB0 = 0, normB1 = 0, normB2 = 0, normB3 = 0;
|
|
283
|
+
|
|
284
|
+
let i = 0;
|
|
285
|
+
const limit8 = len - 7;
|
|
286
|
+
for (; i < limit8; i += 8) {
|
|
287
|
+
dot0 += a[i] * b[i] + a[i + 4] * b[i + 4];
|
|
288
|
+
dot1 += a[i + 1] * b[i + 1] + a[i + 5] * b[i + 5];
|
|
289
|
+
dot2 += a[i + 2] * b[i + 2] + a[i + 6] * b[i + 6];
|
|
290
|
+
dot3 += a[i + 3] * b[i + 3] + a[i + 7] * b[i + 7];
|
|
291
|
+
normA0 += a[i] * a[i] + a[i + 4] * a[i + 4];
|
|
292
|
+
normA1 += a[i + 1] * a[i + 1] + a[i + 5] * a[i + 5];
|
|
293
|
+
normA2 += a[i + 2] * a[i + 2] + a[i + 6] * a[i + 6];
|
|
294
|
+
normA3 += a[i + 3] * a[i + 3] + a[i + 7] * a[i + 7];
|
|
295
|
+
normB0 += b[i] * b[i] + b[i + 4] * b[i + 4];
|
|
296
|
+
normB1 += b[i + 1] * b[i + 1] + b[i + 5] * b[i + 5];
|
|
297
|
+
normB2 += b[i + 2] * b[i + 2] + b[i + 6] * b[i + 6];
|
|
298
|
+
normB3 += b[i + 3] * b[i + 3] + b[i + 7] * b[i + 7];
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
// Handle remaining elements
|
|
302
|
+
for (; i < len; i++) {
|
|
303
|
+
dot0 += a[i] * b[i];
|
|
304
|
+
normA0 += a[i] * a[i];
|
|
305
|
+
normB0 += b[i] * b[i];
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
const dot = dot0 + dot1 + dot2 + dot3;
|
|
309
|
+
const normA = normA0 + normA1 + normA2 + normA3;
|
|
310
|
+
const normB = normB0 + normB1 + normB2 + normB3;
|
|
311
|
+
|
|
312
|
+
const magnitude = Math.sqrt(normA * normB);
|
|
313
|
+
if (magnitude === 0) return 1;
|
|
314
|
+
|
|
315
|
+
const distance = 1 - (dot / magnitude);
|
|
316
|
+
return distance < 1e-10 ? 0 : distance;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* QuantizedVectorStore - Efficient storage for quantized vectors
|
|
321
|
+
*/
|
|
322
|
+
export class QuantizedVectorStore {
|
|
323
|
+
private quantizer: ScalarQuantizer;
|
|
324
|
+
private vectors: Int8Array[]; // Quantized vectors
|
|
325
|
+
private originalVectors: Float32Array[] | null; // Keep originals for rescoring
|
|
326
|
+
private keepOriginals: boolean;
|
|
327
|
+
|
|
328
|
+
constructor(dimension: number, keepOriginals = true) {
|
|
329
|
+
this.quantizer = new ScalarQuantizer(dimension);
|
|
330
|
+
this.vectors = [];
|
|
331
|
+
this.originalVectors = keepOriginals ? [] : null;
|
|
332
|
+
this.keepOriginals = keepOriginals;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* Train the quantizer and add vectors
|
|
337
|
+
*/
|
|
338
|
+
addVectors(vectors: Float32Array[]): void {
|
|
339
|
+
if (!this.quantizer.isTrained()) {
|
|
340
|
+
this.quantizer.train(vectors);
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
for (const v of vectors) {
|
|
344
|
+
this.vectors.push(this.quantizer.quantize(v));
|
|
345
|
+
if (this.keepOriginals && this.originalVectors) {
|
|
346
|
+
this.originalVectors.push(v);
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
/**
|
|
352
|
+
* Get quantized vector by index
|
|
353
|
+
*/
|
|
354
|
+
getQuantized(index: number): Int8Array {
|
|
355
|
+
return this.vectors[index];
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
/**
|
|
359
|
+
* Get original float32 vector by index (for rescoring)
|
|
360
|
+
*/
|
|
361
|
+
getOriginal(index: number): Float32Array | null {
|
|
362
|
+
if (!this.originalVectors) return null;
|
|
363
|
+
return this.originalVectors[index];
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
/**
|
|
367
|
+
* Get number of vectors
|
|
368
|
+
*/
|
|
369
|
+
size(): number {
|
|
370
|
+
return this.vectors.length;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
/**
|
|
374
|
+
* Calculate memory usage in bytes
|
|
375
|
+
*/
|
|
376
|
+
memoryUsage(): { quantized: number; original: number; total: number } {
|
|
377
|
+
const quantized = this.vectors.reduce((sum, v) => sum + v.length, 0);
|
|
378
|
+
const original = this.originalVectors
|
|
379
|
+
? this.originalVectors.reduce((sum, v) => sum + v.length * 4, 0)
|
|
380
|
+
: 0;
|
|
381
|
+
return { quantized, original, total: quantized + original };
|
|
382
|
+
}
|
|
383
|
+
}
|