@sparkleideas/plugins 3.0.0-alpha.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +401 -0
- package/__tests__/collection-manager.test.ts +332 -0
- package/__tests__/dependency-graph.test.ts +434 -0
- package/__tests__/enhanced-plugin-registry.test.ts +488 -0
- package/__tests__/plugin-registry.test.ts +368 -0
- package/__tests__/ruvector-bridge.test.ts +2429 -0
- package/__tests__/ruvector-integration.test.ts +1602 -0
- package/__tests__/ruvector-migrations.test.ts +1099 -0
- package/__tests__/ruvector-quantization.test.ts +846 -0
- package/__tests__/ruvector-streaming.test.ts +1088 -0
- package/__tests__/sdk.test.ts +325 -0
- package/__tests__/security.test.ts +348 -0
- package/__tests__/utils/ruvector-test-utils.ts +860 -0
- package/examples/plugin-creator/index.ts +636 -0
- package/examples/plugin-creator/plugin-creator.test.ts +312 -0
- package/examples/ruvector/README.md +288 -0
- package/examples/ruvector/attention-patterns.ts +394 -0
- package/examples/ruvector/basic-usage.ts +288 -0
- package/examples/ruvector/docker-compose.yml +75 -0
- package/examples/ruvector/gnn-analysis.ts +501 -0
- package/examples/ruvector/hyperbolic-hierarchies.ts +557 -0
- package/examples/ruvector/init-db.sql +119 -0
- package/examples/ruvector/quantization.ts +680 -0
- package/examples/ruvector/self-learning.ts +447 -0
- package/examples/ruvector/semantic-search.ts +576 -0
- package/examples/ruvector/streaming-large-data.ts +507 -0
- package/examples/ruvector/transactions.ts +594 -0
- package/examples/ruvector-plugins/hook-pattern-library.ts +486 -0
- package/examples/ruvector-plugins/index.ts +79 -0
- package/examples/ruvector-plugins/intent-router.ts +354 -0
- package/examples/ruvector-plugins/mcp-tool-optimizer.ts +424 -0
- package/examples/ruvector-plugins/reasoning-bank.ts +657 -0
- package/examples/ruvector-plugins/ruvector-plugins.test.ts +518 -0
- package/examples/ruvector-plugins/semantic-code-search.ts +498 -0
- package/examples/ruvector-plugins/shared/index.ts +20 -0
- package/examples/ruvector-plugins/shared/vector-utils.ts +257 -0
- package/examples/ruvector-plugins/sona-learning.ts +445 -0
- package/package.json +97 -0
- package/src/collections/collection-manager.ts +661 -0
- package/src/collections/index.ts +56 -0
- package/src/collections/official/index.ts +1040 -0
- package/src/core/base-plugin.ts +416 -0
- package/src/core/plugin-interface.ts +215 -0
- package/src/hooks/index.ts +685 -0
- package/src/index.ts +378 -0
- package/src/integrations/agentic-flow.ts +743 -0
- package/src/integrations/index.ts +88 -0
- package/src/integrations/ruvector/ARCHITECTURE.md +1245 -0
- package/src/integrations/ruvector/attention-advanced.ts +1040 -0
- package/src/integrations/ruvector/attention-executor.ts +782 -0
- package/src/integrations/ruvector/attention-mechanisms.ts +757 -0
- package/src/integrations/ruvector/attention.ts +1063 -0
- package/src/integrations/ruvector/gnn.ts +3050 -0
- package/src/integrations/ruvector/hyperbolic.ts +1948 -0
- package/src/integrations/ruvector/index.ts +394 -0
- package/src/integrations/ruvector/migrations/001_create_extension.sql +135 -0
- package/src/integrations/ruvector/migrations/002_create_vector_tables.sql +259 -0
- package/src/integrations/ruvector/migrations/003_create_indices.sql +328 -0
- package/src/integrations/ruvector/migrations/004_create_functions.sql +598 -0
- package/src/integrations/ruvector/migrations/005_create_attention_functions.sql +654 -0
- package/src/integrations/ruvector/migrations/006_create_gnn_functions.sql +728 -0
- package/src/integrations/ruvector/migrations/007_create_hyperbolic_functions.sql +762 -0
- package/src/integrations/ruvector/migrations/index.ts +35 -0
- package/src/integrations/ruvector/migrations/migrations.ts +647 -0
- package/src/integrations/ruvector/quantization.ts +2036 -0
- package/src/integrations/ruvector/ruvector-bridge.ts +2000 -0
- package/src/integrations/ruvector/self-learning.ts +2376 -0
- package/src/integrations/ruvector/streaming.ts +1737 -0
- package/src/integrations/ruvector/types.ts +1945 -0
- package/src/providers/index.ts +643 -0
- package/src/registry/dependency-graph.ts +568 -0
- package/src/registry/enhanced-plugin-registry.ts +994 -0
- package/src/registry/plugin-registry.ts +604 -0
- package/src/sdk/index.ts +563 -0
- package/src/security/index.ts +594 -0
- package/src/types/index.ts +446 -0
- package/src/workers/index.ts +700 -0
- package/tmp.json +0 -0
- package/tsconfig.json +25 -0
- package/vitest.config.ts +23 -0
|
@@ -0,0 +1,2036 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RuVector PostgreSQL Bridge - Vector Quantization Module
|
|
3
|
+
*
|
|
4
|
+
* Comprehensive vector quantization for memory reduction:
|
|
5
|
+
* - Scalar Quantization (Int8): 4x memory reduction
|
|
6
|
+
* - Binary Quantization: 32x memory reduction
|
|
7
|
+
* - Product Quantization (PQ): High compression with codebooks
|
|
8
|
+
* - Optimized Product Quantization (OPQ): PQ with learned rotation
|
|
9
|
+
*
|
|
10
|
+
* @module @sparkleideas/plugins/integrations/ruvector/quantization
|
|
11
|
+
* @version 1.0.0
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
// ============================================================================
|
|
15
|
+
// Type Definitions
|
|
16
|
+
// ============================================================================
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Quantization type options.
|
|
20
|
+
*/
|
|
21
|
+
export type QuantizationType = 'scalar' | 'binary' | 'pq' | 'opq';
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Base interface for all quantizers.
|
|
25
|
+
*/
|
|
26
|
+
export interface IQuantizer {
|
|
27
|
+
/** Quantization type */
|
|
28
|
+
readonly type: QuantizationType;
|
|
29
|
+
/** Original vector dimensions */
|
|
30
|
+
readonly dimensions: number;
|
|
31
|
+
/** Quantize a batch of vectors */
|
|
32
|
+
quantize(vectors: number[][]): Uint8Array[] | Int8Array[];
|
|
33
|
+
/** Dequantize back to float vectors (lossy) */
|
|
34
|
+
dequantize(quantized: Uint8Array[] | Int8Array[]): number[][];
|
|
35
|
+
/** Get compression ratio */
|
|
36
|
+
getCompressionRatio(): number;
|
|
37
|
+
/** Get memory reduction string (e.g., "4x") */
|
|
38
|
+
getMemoryReduction(): string;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Options for scalar quantization.
|
|
43
|
+
*/
|
|
44
|
+
export interface ScalarQuantizationOptions {
|
|
45
|
+
/** Vector dimensions */
|
|
46
|
+
dimensions: number;
|
|
47
|
+
/** Minimum value for calibration (auto-computed if not provided) */
|
|
48
|
+
minValue?: number;
|
|
49
|
+
/** Maximum value for calibration (auto-computed if not provided) */
|
|
50
|
+
maxValue?: number;
|
|
51
|
+
/** Use symmetric quantization around zero */
|
|
52
|
+
symmetric?: boolean;
|
|
53
|
+
/** Number of bits for quantization (default: 8) */
|
|
54
|
+
bits?: number;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Options for binary quantization.
|
|
59
|
+
*/
|
|
60
|
+
export interface BinaryQuantizationOptions {
|
|
61
|
+
/** Vector dimensions */
|
|
62
|
+
dimensions: number;
|
|
63
|
+
/** Threshold for binarization (default: 0, use sign) */
|
|
64
|
+
threshold?: number;
|
|
65
|
+
/** Use learned thresholds per dimension */
|
|
66
|
+
learnedThresholds?: number[];
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Options for product quantization.
|
|
71
|
+
*/
|
|
72
|
+
export interface ProductQuantizationOptions {
|
|
73
|
+
/** Vector dimensions */
|
|
74
|
+
dimensions: number;
|
|
75
|
+
/** Number of subvectors (M) - must divide dimensions evenly */
|
|
76
|
+
numSubvectors: number;
|
|
77
|
+
/** Number of centroids per subvector (K) - typically 256 */
|
|
78
|
+
numCentroids: number;
|
|
79
|
+
/** Maximum iterations for k-means training */
|
|
80
|
+
maxIterations?: number;
|
|
81
|
+
/** Convergence tolerance */
|
|
82
|
+
tolerance?: number;
|
|
83
|
+
/** Random seed for reproducibility */
|
|
84
|
+
seed?: number;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Options for optimized product quantization.
|
|
89
|
+
*/
|
|
90
|
+
export interface OptimizedProductQuantizationOptions extends ProductQuantizationOptions {
|
|
91
|
+
/** Number of OPQ iterations */
|
|
92
|
+
opqIterations?: number;
|
|
93
|
+
/** Learning rate for rotation optimization */
|
|
94
|
+
learningRate?: number;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* General quantization options union type.
|
|
99
|
+
*/
|
|
100
|
+
export type QuantizationOptions =
|
|
101
|
+
| ScalarQuantizationOptions
|
|
102
|
+
| BinaryQuantizationOptions
|
|
103
|
+
| ProductQuantizationOptions
|
|
104
|
+
| OptimizedProductQuantizationOptions;
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Statistics from quantization operations.
|
|
108
|
+
*/
|
|
109
|
+
export interface QuantizationStats {
|
|
110
|
+
/** Compression ratio (original size / compressed size) */
|
|
111
|
+
compressionRatio: number;
|
|
112
|
+
/** Memory reduction string (e.g., "4x", "32x") */
|
|
113
|
+
memoryReduction: string;
|
|
114
|
+
/** Recall@10 for approximate search (0-1) */
|
|
115
|
+
recallAt10: number;
|
|
116
|
+
/** Search speedup compared to exact search */
|
|
117
|
+
searchSpeedup: number;
|
|
118
|
+
/** Mean squared error from quantization */
|
|
119
|
+
mse?: number;
|
|
120
|
+
/** Training time in milliseconds */
|
|
121
|
+
trainingTimeMs?: number;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Calibration data for scalar quantization.
|
|
126
|
+
*/
|
|
127
|
+
interface CalibrationData {
|
|
128
|
+
minValue: number;
|
|
129
|
+
maxValue: number;
|
|
130
|
+
scale: number;
|
|
131
|
+
zeroPoint: number;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Codebook for product quantization.
|
|
136
|
+
*/
|
|
137
|
+
interface Codebook {
|
|
138
|
+
/** Centroids [numCentroids, subvectorDim] */
|
|
139
|
+
centroids: number[][];
|
|
140
|
+
/** Assignment counts for statistics */
|
|
141
|
+
counts: number[];
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// ============================================================================
|
|
145
|
+
// Utility Functions
|
|
146
|
+
// ============================================================================
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Computes the Euclidean distance between two vectors.
|
|
150
|
+
*/
|
|
151
|
+
function euclideanDistance(a: number[], b: number[]): number {
|
|
152
|
+
let sum = 0;
|
|
153
|
+
for (let i = 0; i < a.length; i++) {
|
|
154
|
+
const diff = a[i] - b[i];
|
|
155
|
+
sum += diff * diff;
|
|
156
|
+
}
|
|
157
|
+
return Math.sqrt(sum);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Computes the squared Euclidean distance.
|
|
162
|
+
*/
|
|
163
|
+
function squaredEuclideanDistance(a: number[], b: number[]): number {
|
|
164
|
+
let sum = 0;
|
|
165
|
+
for (let i = 0; i < a.length; i++) {
|
|
166
|
+
const diff = a[i] - b[i];
|
|
167
|
+
sum += diff * diff;
|
|
168
|
+
}
|
|
169
|
+
return sum;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Computes the dot product of two vectors.
|
|
174
|
+
*/
|
|
175
|
+
function dot(a: number[], b: number[]): number {
|
|
176
|
+
let sum = 0;
|
|
177
|
+
for (let i = 0; i < a.length; i++) {
|
|
178
|
+
sum += a[i] * b[i];
|
|
179
|
+
}
|
|
180
|
+
return sum;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Computes the norm of a vector.
|
|
185
|
+
*/
|
|
186
|
+
function norm(v: number[]): number {
|
|
187
|
+
return Math.sqrt(dot(v, v));
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Normalizes a vector to unit length.
|
|
192
|
+
*/
|
|
193
|
+
function normalize(v: number[]): number[] {
|
|
194
|
+
const n = norm(v);
|
|
195
|
+
if (n < 1e-10) return v.map(() => 0);
|
|
196
|
+
return v.map(x => x / n);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Creates a zero-filled matrix.
|
|
201
|
+
*/
|
|
202
|
+
function zerosMatrix(rows: number, cols: number): number[][] {
|
|
203
|
+
return Array.from({ length: rows }, () => new Array(cols).fill(0));
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Creates an identity matrix.
|
|
208
|
+
*/
|
|
209
|
+
function identityMatrix(n: number): number[][] {
|
|
210
|
+
const result = zerosMatrix(n, n);
|
|
211
|
+
for (let i = 0; i < n; i++) {
|
|
212
|
+
result[i][i] = 1;
|
|
213
|
+
}
|
|
214
|
+
return result;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Matrix-vector multiplication.
|
|
219
|
+
*/
|
|
220
|
+
function matVec(matrix: number[][], vec: number[]): number[] {
|
|
221
|
+
return matrix.map(row => dot(row, vec));
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Matrix-matrix multiplication.
|
|
226
|
+
*/
|
|
227
|
+
function matMul(a: number[][], b: number[][]): number[][] {
|
|
228
|
+
const rows = a.length;
|
|
229
|
+
const cols = b[0].length;
|
|
230
|
+
const inner = b.length;
|
|
231
|
+
|
|
232
|
+
const result = zerosMatrix(rows, cols);
|
|
233
|
+
for (let i = 0; i < rows; i++) {
|
|
234
|
+
for (let j = 0; j < cols; j++) {
|
|
235
|
+
let sum = 0;
|
|
236
|
+
for (let k = 0; k < inner; k++) {
|
|
237
|
+
sum += a[i][k] * b[k][j];
|
|
238
|
+
}
|
|
239
|
+
result[i][j] = sum;
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
return result;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
/**
|
|
246
|
+
* Matrix transpose.
|
|
247
|
+
*/
|
|
248
|
+
function transpose(matrix: number[][]): number[][] {
|
|
249
|
+
const rows = matrix.length;
|
|
250
|
+
const cols = matrix[0].length;
|
|
251
|
+
const result = zerosMatrix(cols, rows);
|
|
252
|
+
for (let i = 0; i < rows; i++) {
|
|
253
|
+
for (let j = 0; j < cols; j++) {
|
|
254
|
+
result[j][i] = matrix[i][j];
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
return result;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
/**
|
|
261
|
+
* Simple seeded random number generator (Mulberry32).
|
|
262
|
+
*/
|
|
263
|
+
function createRng(seed: number): () => number {
|
|
264
|
+
return function() {
|
|
265
|
+
seed = seed + 0x6d2b79f5 | 0;
|
|
266
|
+
let t = Math.imul(seed ^ seed >>> 15, 1 | seed);
|
|
267
|
+
t = t + Math.imul(t ^ t >>> 7, 61 | t) ^ t;
|
|
268
|
+
return ((t ^ t >>> 14) >>> 0) / 4294967296;
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// ============================================================================
|
|
273
|
+
// Scalar Quantization
|
|
274
|
+
// ============================================================================
|
|
275
|
+
|
|
276
|
+
/**
|
|
277
|
+
* ScalarQuantizer implements per-dimension scalar quantization.
|
|
278
|
+
*
|
|
279
|
+
* Quantizes float32 vectors to int8 for 4x memory reduction.
|
|
280
|
+
* Supports symmetric and asymmetric quantization schemes.
|
|
281
|
+
*
|
|
282
|
+
* @example
|
|
283
|
+
* ```typescript
|
|
284
|
+
* const quantizer = new ScalarQuantizer({ dimensions: 128 });
|
|
285
|
+
* quantizer.calibrate(trainingVectors);
|
|
286
|
+
* const quantized = quantizer.quantize(vectors);
|
|
287
|
+
* const reconstructed = quantizer.dequantize(quantized);
|
|
288
|
+
* ```
|
|
289
|
+
*/
|
|
290
|
+
export class ScalarQuantizer implements IQuantizer {
|
|
291
|
+
readonly type: QuantizationType = 'scalar';
|
|
292
|
+
readonly dimensions: number;
|
|
293
|
+
|
|
294
|
+
private calibration: CalibrationData;
|
|
295
|
+
private readonly symmetric: boolean;
|
|
296
|
+
private readonly bits: number;
|
|
297
|
+
private readonly qmin: number;
|
|
298
|
+
private readonly qmax: number;
|
|
299
|
+
private isCalibrated: boolean = false;
|
|
300
|
+
|
|
301
|
+
constructor(options: ScalarQuantizationOptions) {
|
|
302
|
+
this.dimensions = options.dimensions;
|
|
303
|
+
this.symmetric = options.symmetric ?? false;
|
|
304
|
+
this.bits = options.bits ?? 8;
|
|
305
|
+
|
|
306
|
+
// Compute quantization range based on bits
|
|
307
|
+
this.qmin = -(1 << (this.bits - 1));
|
|
308
|
+
this.qmax = (1 << (this.bits - 1)) - 1;
|
|
309
|
+
|
|
310
|
+
// Initialize with default calibration
|
|
311
|
+
this.calibration = {
|
|
312
|
+
minValue: options.minValue ?? -1,
|
|
313
|
+
maxValue: options.maxValue ?? 1,
|
|
314
|
+
scale: 1,
|
|
315
|
+
zeroPoint: 0,
|
|
316
|
+
};
|
|
317
|
+
|
|
318
|
+
if (options.minValue !== undefined && options.maxValue !== undefined) {
|
|
319
|
+
this.computeCalibration(options.minValue, options.maxValue);
|
|
320
|
+
this.isCalibrated = true;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
/**
|
|
325
|
+
* Calibrates the quantizer using sample vectors.
|
|
326
|
+
*
|
|
327
|
+
* @param samples - Representative vectors for calibration
|
|
328
|
+
*/
|
|
329
|
+
calibrate(samples: number[][]): void {
|
|
330
|
+
if (samples.length === 0) {
|
|
331
|
+
throw new Error('Cannot calibrate with empty samples');
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// Find min and max across all dimensions and samples
|
|
335
|
+
let minValue = Infinity;
|
|
336
|
+
let maxValue = -Infinity;
|
|
337
|
+
|
|
338
|
+
for (const sample of samples) {
|
|
339
|
+
for (let i = 0; i < sample.length; i++) {
|
|
340
|
+
minValue = Math.min(minValue, sample[i]);
|
|
341
|
+
maxValue = Math.max(maxValue, sample[i]);
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// Add small margin for numerical stability
|
|
346
|
+
const range = maxValue - minValue;
|
|
347
|
+
minValue -= range * 0.01;
|
|
348
|
+
maxValue += range * 0.01;
|
|
349
|
+
|
|
350
|
+
this.computeCalibration(minValue, maxValue);
|
|
351
|
+
this.isCalibrated = true;
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
private computeCalibration(minValue: number, maxValue: number): void {
|
|
355
|
+
if (this.symmetric) {
|
|
356
|
+
// Symmetric quantization: use same scale for positive and negative
|
|
357
|
+
const absMax = Math.max(Math.abs(minValue), Math.abs(maxValue));
|
|
358
|
+
this.calibration = {
|
|
359
|
+
minValue: -absMax,
|
|
360
|
+
maxValue: absMax,
|
|
361
|
+
scale: (2 * absMax) / (this.qmax - this.qmin),
|
|
362
|
+
zeroPoint: 0,
|
|
363
|
+
};
|
|
364
|
+
} else {
|
|
365
|
+
// Asymmetric quantization: full range utilization
|
|
366
|
+
this.calibration = {
|
|
367
|
+
minValue,
|
|
368
|
+
maxValue,
|
|
369
|
+
scale: (maxValue - minValue) / (this.qmax - this.qmin),
|
|
370
|
+
zeroPoint: Math.round(this.qmin - minValue / ((maxValue - minValue) / (this.qmax - this.qmin))),
|
|
371
|
+
};
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
/**
|
|
376
|
+
* Quantizes float32 vectors to int8.
|
|
377
|
+
*
|
|
378
|
+
* @param vectors - Input vectors
|
|
379
|
+
* @returns Quantized int8 arrays
|
|
380
|
+
*/
|
|
381
|
+
quantize(vectors: number[][]): Int8Array[] {
|
|
382
|
+
if (!this.isCalibrated) {
|
|
383
|
+
// Auto-calibrate if not done
|
|
384
|
+
this.calibrate(vectors);
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
const { scale, zeroPoint } = this.calibration;
|
|
388
|
+
|
|
389
|
+
return vectors.map((vec) => {
|
|
390
|
+
const quantized = new Int8Array(vec.length);
|
|
391
|
+
for (let i = 0; i < vec.length; i++) {
|
|
392
|
+
const q = Math.round(vec[i] / scale) + zeroPoint;
|
|
393
|
+
quantized[i] = Math.max(this.qmin, Math.min(this.qmax, q));
|
|
394
|
+
}
|
|
395
|
+
return quantized;
|
|
396
|
+
});
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
/**
|
|
400
|
+
* Dequantizes int8 arrays back to float32 vectors.
|
|
401
|
+
*
|
|
402
|
+
* @param quantized - Quantized int8 arrays
|
|
403
|
+
* @returns Reconstructed float vectors (lossy)
|
|
404
|
+
*/
|
|
405
|
+
dequantize(quantized: Int8Array[]): number[][] {
|
|
406
|
+
const { scale, zeroPoint } = this.calibration;
|
|
407
|
+
|
|
408
|
+
return quantized.map((q) => {
|
|
409
|
+
const vec = new Array(q.length);
|
|
410
|
+
for (let i = 0; i < q.length; i++) {
|
|
411
|
+
vec[i] = (q[i] - zeroPoint) * scale;
|
|
412
|
+
}
|
|
413
|
+
return vec;
|
|
414
|
+
});
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
/**
|
|
418
|
+
* Computes approximate distance using quantized vectors.
|
|
419
|
+
*
|
|
420
|
+
* @param a - First quantized vector
|
|
421
|
+
* @param b - Second quantized vector
|
|
422
|
+
* @returns Approximate Euclidean distance
|
|
423
|
+
*/
|
|
424
|
+
quantizedDistance(a: Int8Array, b: Int8Array): number {
|
|
425
|
+
const { scale } = this.calibration;
|
|
426
|
+
let sum = 0;
|
|
427
|
+
for (let i = 0; i < a.length; i++) {
|
|
428
|
+
const diff = a[i] - b[i];
|
|
429
|
+
sum += diff * diff;
|
|
430
|
+
}
|
|
431
|
+
return Math.sqrt(sum) * scale;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
getCompressionRatio(): number {
|
|
435
|
+
// float32 (4 bytes) -> int8 (1 byte) = 4x
|
|
436
|
+
return 4;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
getMemoryReduction(): string {
|
|
440
|
+
return '4x';
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
/**
|
|
444
|
+
* Gets the current calibration data.
|
|
445
|
+
*/
|
|
446
|
+
getCalibration(): CalibrationData {
|
|
447
|
+
return { ...this.calibration };
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
/**
|
|
451
|
+
* Sets calibration data directly.
|
|
452
|
+
*/
|
|
453
|
+
setCalibration(calibration: CalibrationData): void {
|
|
454
|
+
this.calibration = { ...calibration };
|
|
455
|
+
this.isCalibrated = true;
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// ============================================================================
|
|
460
|
+
// Binary Quantization
|
|
461
|
+
// ============================================================================
|
|
462
|
+
|
|
463
|
+
/**
|
|
464
|
+
* BinaryQuantizer implements binary quantization for extreme compression.
|
|
465
|
+
*
|
|
466
|
+
* Quantizes float32 vectors to binary (1 bit per dimension) for 32x memory reduction.
|
|
467
|
+
* Uses Hamming distance for fast comparison.
|
|
468
|
+
*
|
|
469
|
+
* @example
|
|
470
|
+
* ```typescript
|
|
471
|
+
* const quantizer = new BinaryQuantizer({ dimensions: 128 });
|
|
472
|
+
* const quantized = quantizer.quantize(vectors);
|
|
473
|
+
* const distance = quantizer.hammingDistance(quantized[0], quantized[1]);
|
|
474
|
+
* ```
|
|
475
|
+
*/
|
|
476
|
+
export class BinaryQuantizer implements IQuantizer {
|
|
477
|
+
readonly type: QuantizationType = 'binary';
|
|
478
|
+
readonly dimensions: number;
|
|
479
|
+
|
|
480
|
+
private threshold: number;
|
|
481
|
+
private learnedThresholds: number[] | null;
|
|
482
|
+
private readonly bytesPerVector: number;
|
|
483
|
+
|
|
484
|
+
constructor(options: BinaryQuantizationOptions) {
|
|
485
|
+
this.dimensions = options.dimensions;
|
|
486
|
+
this.threshold = options.threshold ?? 0;
|
|
487
|
+
this.learnedThresholds = options.learnedThresholds ?? null;
|
|
488
|
+
|
|
489
|
+
// Calculate bytes needed (ceil(dimensions / 8))
|
|
490
|
+
this.bytesPerVector = Math.ceil(this.dimensions / 8);
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
/**
|
|
494
|
+
* Learns optimal thresholds per dimension from training data.
|
|
495
|
+
*
|
|
496
|
+
* @param samples - Training vectors
|
|
497
|
+
*/
|
|
498
|
+
learnThresholds(samples: number[][]): void {
|
|
499
|
+
if (samples.length === 0) {
|
|
500
|
+
throw new Error('Cannot learn thresholds from empty samples');
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
// Compute median per dimension as threshold
|
|
504
|
+
this.learnedThresholds = new Array(this.dimensions);
|
|
505
|
+
|
|
506
|
+
for (let d = 0; d < this.dimensions; d++) {
|
|
507
|
+
const values = samples.map(s => s[d]).sort((a, b) => a - b);
|
|
508
|
+
const mid = Math.floor(values.length / 2);
|
|
509
|
+
this.learnedThresholds[d] = values.length % 2 === 0
|
|
510
|
+
? (values[mid - 1] + values[mid]) / 2
|
|
511
|
+
: values[mid];
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
/**
|
|
516
|
+
* Quantizes float32 vectors to binary.
|
|
517
|
+
*
|
|
518
|
+
* @param vectors - Input vectors
|
|
519
|
+
* @returns Binary quantized arrays (packed bits)
|
|
520
|
+
*/
|
|
521
|
+
quantize(vectors: number[][]): Uint8Array[] {
|
|
522
|
+
return vectors.map((vec) => {
|
|
523
|
+
const binary = new Uint8Array(this.bytesPerVector);
|
|
524
|
+
|
|
525
|
+
for (let i = 0; i < this.dimensions; i++) {
|
|
526
|
+
const threshold = this.learnedThresholds
|
|
527
|
+
? this.learnedThresholds[i]
|
|
528
|
+
: this.threshold;
|
|
529
|
+
|
|
530
|
+
if (vec[i] > threshold) {
|
|
531
|
+
const byteIdx = Math.floor(i / 8);
|
|
532
|
+
const bitIdx = i % 8;
|
|
533
|
+
binary[byteIdx] |= (1 << bitIdx);
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
return binary;
|
|
538
|
+
});
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
/**
|
|
542
|
+
* Dequantizes binary arrays back to float vectors.
|
|
543
|
+
* Note: This is highly lossy and mainly for debugging.
|
|
544
|
+
*
|
|
545
|
+
* @param quantized - Binary quantized arrays
|
|
546
|
+
* @returns Reconstructed vectors (-1 or +1 per dimension)
|
|
547
|
+
*/
|
|
548
|
+
dequantize(quantized: Uint8Array[]): number[][] {
|
|
549
|
+
return quantized.map((binary) => {
|
|
550
|
+
const vec = new Array(this.dimensions);
|
|
551
|
+
|
|
552
|
+
for (let i = 0; i < this.dimensions; i++) {
|
|
553
|
+
const byteIdx = Math.floor(i / 8);
|
|
554
|
+
const bitIdx = i % 8;
|
|
555
|
+
const bit = (binary[byteIdx] >> bitIdx) & 1;
|
|
556
|
+
vec[i] = bit === 1 ? 1 : -1;
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
return vec;
|
|
560
|
+
});
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
/**
|
|
564
|
+
* Computes Hamming distance between two binary vectors.
|
|
565
|
+
*
|
|
566
|
+
* @param a - First binary vector
|
|
567
|
+
* @param b - Second binary vector
|
|
568
|
+
* @returns Hamming distance (number of differing bits)
|
|
569
|
+
*/
|
|
570
|
+
hammingDistance(a: Uint8Array, b: Uint8Array): number {
|
|
571
|
+
let distance = 0;
|
|
572
|
+
for (let i = 0; i < a.length; i++) {
|
|
573
|
+
const xor = a[i] ^ b[i];
|
|
574
|
+
// Count bits using Brian Kernighan's algorithm
|
|
575
|
+
let bits = xor;
|
|
576
|
+
while (bits) {
|
|
577
|
+
distance++;
|
|
578
|
+
bits &= bits - 1;
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
return distance;
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
/**
|
|
585
|
+
* Two-stage search: binary filter + rerank with exact distances.
|
|
586
|
+
*
|
|
587
|
+
* @param query - Query vector (float)
|
|
588
|
+
* @param candidates - Candidate vectors (float)
|
|
589
|
+
* @param k - Number of results to return
|
|
590
|
+
* @param filterRatio - Ratio of candidates to keep after binary filter (default: 10)
|
|
591
|
+
* @returns Indices of top-k candidates after reranking
|
|
592
|
+
*/
|
|
593
|
+
searchWithRerank(
|
|
594
|
+
query: number[],
|
|
595
|
+
candidates: number[][],
|
|
596
|
+
k: number,
|
|
597
|
+
filterRatio: number = 10
|
|
598
|
+
): number[] {
|
|
599
|
+
// Step 1: Quantize query and all candidates
|
|
600
|
+
const queryBinary = this.quantize([query])[0];
|
|
601
|
+
const candidatesBinary = this.quantize(candidates);
|
|
602
|
+
|
|
603
|
+
// Step 2: Compute Hamming distances
|
|
604
|
+
const distances: Array<{ index: number; hamming: number }> = [];
|
|
605
|
+
for (let i = 0; i < candidatesBinary.length; i++) {
|
|
606
|
+
distances.push({
|
|
607
|
+
index: i,
|
|
608
|
+
hamming: this.hammingDistance(queryBinary, candidatesBinary[i]),
|
|
609
|
+
});
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
// Step 3: Filter top candidates by Hamming distance
|
|
613
|
+
distances.sort((a, b) => a.hamming - b.hamming);
|
|
614
|
+
const numCandidates = Math.min(k * filterRatio, candidates.length);
|
|
615
|
+
const filtered = distances.slice(0, numCandidates);
|
|
616
|
+
|
|
617
|
+
// Step 4: Rerank filtered candidates with exact Euclidean distance
|
|
618
|
+
const reranked: Array<{ index: number; distance: number }> = [];
|
|
619
|
+
for (const { index } of filtered) {
|
|
620
|
+
reranked.push({
|
|
621
|
+
index,
|
|
622
|
+
distance: euclideanDistance(query, candidates[index]),
|
|
623
|
+
});
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
// Step 5: Sort by exact distance and return top-k
|
|
627
|
+
reranked.sort((a, b) => a.distance - b.distance);
|
|
628
|
+
return reranked.slice(0, k).map(r => r.index);
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
/**
|
|
632
|
+
* Batch Hamming distance computation.
|
|
633
|
+
*
|
|
634
|
+
* @param query - Query binary vector
|
|
635
|
+
* @param candidates - Candidate binary vectors
|
|
636
|
+
* @returns Array of Hamming distances
|
|
637
|
+
*/
|
|
638
|
+
batchHammingDistance(query: Uint8Array, candidates: Uint8Array[]): number[] {
|
|
639
|
+
return candidates.map(c => this.hammingDistance(query, c));
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
getCompressionRatio(): number {
|
|
643
|
+
// float32 (32 bits) -> binary (1 bit) = 32x
|
|
644
|
+
return 32;
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
getMemoryReduction(): string {
|
|
648
|
+
return '32x';
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
// ============================================================================
|
|
653
|
+
// Product Quantization
|
|
654
|
+
// ============================================================================
|
|
655
|
+
|
|
656
|
+
/**
|
|
657
|
+
* ProductQuantizer implements product quantization for high compression.
|
|
658
|
+
*
|
|
659
|
+
* Splits vectors into M subvectors and quantizes each to K centroids.
|
|
660
|
+
* Memory: M * ceil(log2(K)) bits per vector (e.g., M=8, K=256 = 8 bytes)
|
|
661
|
+
*
|
|
662
|
+
* @example
|
|
663
|
+
* ```typescript
|
|
664
|
+
* const pq = new ProductQuantizer({
|
|
665
|
+
* dimensions: 128,
|
|
666
|
+
* numSubvectors: 8,
|
|
667
|
+
* numCentroids: 256
|
|
668
|
+
* });
|
|
669
|
+
* await pq.train(trainingVectors);
|
|
670
|
+
* const codes = pq.encode(vectors);
|
|
671
|
+
* const distances = pq.computeDistances(query, codes);
|
|
672
|
+
* ```
|
|
673
|
+
*/
|
|
674
|
+
export class ProductQuantizer implements IQuantizer {
|
|
675
|
+
readonly type: QuantizationType = 'pq';
|
|
676
|
+
readonly dimensions: number;
|
|
677
|
+
readonly numSubvectors: number;
|
|
678
|
+
readonly numCentroids: number;
|
|
679
|
+
readonly subvectorDim: number;
|
|
680
|
+
|
|
681
|
+
protected codebooks: Codebook[] = [];
|
|
682
|
+
protected isTrained: boolean = false;
|
|
683
|
+
protected readonly maxIterations: number;
|
|
684
|
+
protected readonly tolerance: number;
|
|
685
|
+
protected readonly rng: () => number;
|
|
686
|
+
|
|
687
|
+
constructor(options: ProductQuantizationOptions) {
|
|
688
|
+
this.dimensions = options.dimensions;
|
|
689
|
+
this.numSubvectors = options.numSubvectors;
|
|
690
|
+
this.numCentroids = options.numCentroids;
|
|
691
|
+
|
|
692
|
+
// Validate dimensions divisibility
|
|
693
|
+
if (options.dimensions % options.numSubvectors !== 0) {
|
|
694
|
+
throw new Error(
|
|
695
|
+
`Dimensions (${options.dimensions}) must be divisible by numSubvectors (${options.numSubvectors})`
|
|
696
|
+
);
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
this.subvectorDim = options.dimensions / options.numSubvectors;
|
|
700
|
+
this.maxIterations = options.maxIterations ?? 100;
|
|
701
|
+
this.tolerance = options.tolerance ?? 1e-6;
|
|
702
|
+
this.rng = createRng(options.seed ?? 42);
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
/**
|
|
706
|
+
* Trains codebooks from training data using k-means clustering.
|
|
707
|
+
*
|
|
708
|
+
* @param vectors - Training vectors
|
|
709
|
+
*/
|
|
710
|
+
async train(vectors: number[][]): Promise<void> {
|
|
711
|
+
if (vectors.length < this.numCentroids) {
|
|
712
|
+
throw new Error(
|
|
713
|
+
`Need at least ${this.numCentroids} training vectors, got ${vectors.length}`
|
|
714
|
+
);
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
this.codebooks = [];
|
|
718
|
+
|
|
719
|
+
// Train a codebook for each subvector
|
|
720
|
+
for (let m = 0; m < this.numSubvectors; m++) {
|
|
721
|
+
// Extract subvectors
|
|
722
|
+
const subvectors = this.extractSubvectors(vectors, m);
|
|
723
|
+
|
|
724
|
+
// Train codebook using k-means
|
|
725
|
+
const codebook = await this.trainCodebook(subvectors);
|
|
726
|
+
this.codebooks.push(codebook);
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
this.isTrained = true;
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
/**
|
|
733
|
+
* Extracts the m-th subvector from all vectors.
|
|
734
|
+
*/
|
|
735
|
+
protected extractSubvectors(vectors: number[][], m: number): number[][] {
|
|
736
|
+
const start = m * this.subvectorDim;
|
|
737
|
+
return vectors.map(v => v.slice(start, start + this.subvectorDim));
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
/**
|
|
741
|
+
* Trains a single codebook using k-means clustering.
|
|
742
|
+
*/
|
|
743
|
+
protected async trainCodebook(subvectors: number[][]): Promise<Codebook> {
|
|
744
|
+
const k = this.numCentroids;
|
|
745
|
+
const dim = this.subvectorDim;
|
|
746
|
+
|
|
747
|
+
// Initialize centroids using k-means++ initialization
|
|
748
|
+
const centroids = this.kmeansppInit(subvectors, k);
|
|
749
|
+
const counts = new Array(k).fill(0);
|
|
750
|
+
|
|
751
|
+
// K-means iterations
|
|
752
|
+
for (let iter = 0; iter < this.maxIterations; iter++) {
|
|
753
|
+
// Assignment step
|
|
754
|
+
const assignments: number[][] = Array.from({ length: k }, () => []);
|
|
755
|
+
|
|
756
|
+
for (let i = 0; i < subvectors.length; i++) {
|
|
757
|
+
const nearestIdx = this.findNearestCentroid(subvectors[i], centroids);
|
|
758
|
+
assignments[nearestIdx].push(i);
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
// Update step
|
|
762
|
+
let maxShift = 0;
|
|
763
|
+
for (let c = 0; c < k; c++) {
|
|
764
|
+
if (assignments[c].length === 0) {
|
|
765
|
+
// Reinitialize empty centroid
|
|
766
|
+
const randomIdx = Math.floor(this.rng() * subvectors.length);
|
|
767
|
+
centroids[c] = [...subvectors[randomIdx]];
|
|
768
|
+
continue;
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
const newCentroid = new Array(dim).fill(0);
|
|
772
|
+
for (const idx of assignments[c]) {
|
|
773
|
+
for (let d = 0; d < dim; d++) {
|
|
774
|
+
newCentroid[d] += subvectors[idx][d];
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
for (let d = 0; d < dim; d++) {
|
|
778
|
+
newCentroid[d] /= assignments[c].length;
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
const shift = squaredEuclideanDistance(centroids[c], newCentroid);
|
|
782
|
+
maxShift = Math.max(maxShift, shift);
|
|
783
|
+
centroids[c] = newCentroid;
|
|
784
|
+
counts[c] = assignments[c].length;
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
// Check convergence
|
|
788
|
+
if (maxShift < this.tolerance) {
|
|
789
|
+
break;
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
// Yield to event loop periodically
|
|
793
|
+
if (iter % 10 === 0) {
|
|
794
|
+
await new Promise(resolve => setTimeout(resolve, 0));
|
|
795
|
+
}
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
return { centroids, counts };
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
/**
|
|
802
|
+
* K-means++ initialization for better centroid selection.
|
|
803
|
+
*/
|
|
804
|
+
protected kmeansppInit(subvectors: number[][], k: number): number[][] {
|
|
805
|
+
const centroids: number[][] = [];
|
|
806
|
+
|
|
807
|
+
// First centroid: random
|
|
808
|
+
const firstIdx = Math.floor(this.rng() * subvectors.length);
|
|
809
|
+
centroids.push([...subvectors[firstIdx]]);
|
|
810
|
+
|
|
811
|
+
// Remaining centroids: proportional to squared distance
|
|
812
|
+
for (let c = 1; c < k; c++) {
|
|
813
|
+
const distances = subvectors.map(v => {
|
|
814
|
+
let minDist = Infinity;
|
|
815
|
+
for (const centroid of centroids) {
|
|
816
|
+
const dist = squaredEuclideanDistance(v, centroid);
|
|
817
|
+
minDist = Math.min(minDist, dist);
|
|
818
|
+
}
|
|
819
|
+
return minDist;
|
|
820
|
+
});
|
|
821
|
+
|
|
822
|
+
const totalDist = distances.reduce((a, b) => a + b, 0);
|
|
823
|
+
let threshold = this.rng() * totalDist;
|
|
824
|
+
|
|
825
|
+
for (let i = 0; i < subvectors.length; i++) {
|
|
826
|
+
threshold -= distances[i];
|
|
827
|
+
if (threshold <= 0) {
|
|
828
|
+
centroids.push([...subvectors[i]]);
|
|
829
|
+
break;
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
|
|
833
|
+
// Fallback if we didn't select (numerical issues)
|
|
834
|
+
if (centroids.length <= c) {
|
|
835
|
+
const fallbackIdx = Math.floor(this.rng() * subvectors.length);
|
|
836
|
+
centroids.push([...subvectors[fallbackIdx]]);
|
|
837
|
+
}
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
return centroids;
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
/**
|
|
844
|
+
* Finds the nearest centroid index for a subvector.
|
|
845
|
+
*/
|
|
846
|
+
protected findNearestCentroid(subvector: number[], centroids: number[][]): number {
|
|
847
|
+
let minDist = Infinity;
|
|
848
|
+
let minIdx = 0;
|
|
849
|
+
|
|
850
|
+
for (let i = 0; i < centroids.length; i++) {
|
|
851
|
+
const dist = squaredEuclideanDistance(subvector, centroids[i]);
|
|
852
|
+
if (dist < minDist) {
|
|
853
|
+
minDist = dist;
|
|
854
|
+
minIdx = i;
|
|
855
|
+
}
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
return minIdx;
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
/**
|
|
862
|
+
* Encodes vectors to PQ codes.
|
|
863
|
+
*
|
|
864
|
+
* @param vectors - Input vectors
|
|
865
|
+
* @returns PQ codes (one byte per subvector, assuming K=256)
|
|
866
|
+
*/
|
|
867
|
+
encode(vectors: number[][]): Uint8Array[] {
|
|
868
|
+
if (!this.isTrained) {
|
|
869
|
+
throw new Error('ProductQuantizer must be trained before encoding');
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
return vectors.map((vec) => {
|
|
873
|
+
const codes = new Uint8Array(this.numSubvectors);
|
|
874
|
+
|
|
875
|
+
for (let m = 0; m < this.numSubvectors; m++) {
|
|
876
|
+
const start = m * this.subvectorDim;
|
|
877
|
+
const subvector = vec.slice(start, start + this.subvectorDim);
|
|
878
|
+
codes[m] = this.findNearestCentroid(subvector, this.codebooks[m].centroids);
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
return codes;
|
|
882
|
+
});
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
/**
|
|
886
|
+
* Implements IQuantizer interface - encodes vectors.
|
|
887
|
+
*/
|
|
888
|
+
quantize(vectors: number[][]): Uint8Array[] {
|
|
889
|
+
return this.encode(vectors);
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
/**
|
|
893
|
+
* Decodes PQ codes back to approximate vectors.
|
|
894
|
+
*
|
|
895
|
+
* @param codes - PQ codes
|
|
896
|
+
* @returns Reconstructed vectors
|
|
897
|
+
*/
|
|
898
|
+
decode(codes: Uint8Array[]): number[][] {
|
|
899
|
+
if (!this.isTrained) {
|
|
900
|
+
throw new Error('ProductQuantizer must be trained before decoding');
|
|
901
|
+
}
|
|
902
|
+
|
|
903
|
+
return codes.map((code) => {
|
|
904
|
+
const vec = new Array(this.dimensions);
|
|
905
|
+
|
|
906
|
+
for (let m = 0; m < this.numSubvectors; m++) {
|
|
907
|
+
const centroid = this.codebooks[m].centroids[code[m]];
|
|
908
|
+
const start = m * this.subvectorDim;
|
|
909
|
+
for (let d = 0; d < this.subvectorDim; d++) {
|
|
910
|
+
vec[start + d] = centroid[d];
|
|
911
|
+
}
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
return vec;
|
|
915
|
+
});
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
/**
|
|
919
|
+
* Implements IQuantizer interface - decodes vectors.
|
|
920
|
+
*/
|
|
921
|
+
dequantize(quantized: Uint8Array[]): number[][] {
|
|
922
|
+
return this.decode(quantized);
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
/**
|
|
926
|
+
* Computes asymmetric distances from a query to encoded vectors.
|
|
927
|
+
*
|
|
928
|
+
* Asymmetric distance computation (ADC):
|
|
929
|
+
* - Query is NOT quantized (exact)
|
|
930
|
+
* - Database vectors are quantized (codes)
|
|
931
|
+
* - Distance is computed using lookup tables
|
|
932
|
+
*
|
|
933
|
+
* @param query - Query vector (float)
|
|
934
|
+
* @param codes - Database PQ codes
|
|
935
|
+
* @returns Array of distances
|
|
936
|
+
*/
|
|
937
|
+
computeDistances(query: number[], codes: Uint8Array[]): number[] {
|
|
938
|
+
if (!this.isTrained) {
|
|
939
|
+
throw new Error('ProductQuantizer must be trained before computing distances');
|
|
940
|
+
}
|
|
941
|
+
|
|
942
|
+
// Build distance lookup tables
|
|
943
|
+
const distanceTables = this.buildDistanceTables(query);
|
|
944
|
+
|
|
945
|
+
// Compute distances using tables
|
|
946
|
+
return codes.map((code) => {
|
|
947
|
+
let distance = 0;
|
|
948
|
+
for (let m = 0; m < this.numSubvectors; m++) {
|
|
949
|
+
distance += distanceTables[m][code[m]];
|
|
950
|
+
}
|
|
951
|
+
return Math.sqrt(distance);
|
|
952
|
+
});
|
|
953
|
+
}
|
|
954
|
+
|
|
955
|
+
/**
|
|
956
|
+
* Builds distance lookup tables for asymmetric distance computation.
|
|
957
|
+
*/
|
|
958
|
+
protected buildDistanceTables(query: number[]): number[][] {
|
|
959
|
+
const tables: number[][] = [];
|
|
960
|
+
|
|
961
|
+
for (let m = 0; m < this.numSubvectors; m++) {
|
|
962
|
+
const start = m * this.subvectorDim;
|
|
963
|
+
const querySubvector = query.slice(start, start + this.subvectorDim);
|
|
964
|
+
|
|
965
|
+
const table = new Array(this.numCentroids);
|
|
966
|
+
for (let c = 0; c < this.numCentroids; c++) {
|
|
967
|
+
table[c] = squaredEuclideanDistance(
|
|
968
|
+
querySubvector,
|
|
969
|
+
this.codebooks[m].centroids[c]
|
|
970
|
+
);
|
|
971
|
+
}
|
|
972
|
+
tables.push(table);
|
|
973
|
+
}
|
|
974
|
+
|
|
975
|
+
return tables;
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
/**
|
|
979
|
+
* Computes symmetric distances between two sets of codes.
|
|
980
|
+
*
|
|
981
|
+
* @param codesA - First set of PQ codes
|
|
982
|
+
* @param codesB - Second set of PQ codes
|
|
983
|
+
* @returns Distance matrix
|
|
984
|
+
*/
|
|
985
|
+
computeSymmetricDistances(codesA: Uint8Array[], codesB: Uint8Array[]): number[][] {
|
|
986
|
+
if (!this.isTrained) {
|
|
987
|
+
throw new Error('ProductQuantizer must be trained before computing distances');
|
|
988
|
+
}
|
|
989
|
+
|
|
990
|
+
// Precompute inter-centroid distances for each subvector
|
|
991
|
+
const centroidDists: number[][][] = [];
|
|
992
|
+
for (let m = 0; m < this.numSubvectors; m++) {
|
|
993
|
+
const dists = zerosMatrix(this.numCentroids, this.numCentroids);
|
|
994
|
+
for (let i = 0; i < this.numCentroids; i++) {
|
|
995
|
+
for (let j = i; j < this.numCentroids; j++) {
|
|
996
|
+
const d = squaredEuclideanDistance(
|
|
997
|
+
this.codebooks[m].centroids[i],
|
|
998
|
+
this.codebooks[m].centroids[j]
|
|
999
|
+
);
|
|
1000
|
+
dists[i][j] = d;
|
|
1001
|
+
dists[j][i] = d;
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1004
|
+
centroidDists.push(dists);
|
|
1005
|
+
}
|
|
1006
|
+
|
|
1007
|
+
// Compute distance matrix
|
|
1008
|
+
const result = zerosMatrix(codesA.length, codesB.length);
|
|
1009
|
+
for (let i = 0; i < codesA.length; i++) {
|
|
1010
|
+
for (let j = 0; j < codesB.length; j++) {
|
|
1011
|
+
let dist = 0;
|
|
1012
|
+
for (let m = 0; m < this.numSubvectors; m++) {
|
|
1013
|
+
dist += centroidDists[m][codesA[i][m]][codesB[j][m]];
|
|
1014
|
+
}
|
|
1015
|
+
result[i][j] = Math.sqrt(dist);
|
|
1016
|
+
}
|
|
1017
|
+
}
|
|
1018
|
+
|
|
1019
|
+
return result;
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
getCompressionRatio(): number {
|
|
1023
|
+
// float32 * dimensions -> numSubvectors bytes (for K=256)
|
|
1024
|
+
// = (4 * dimensions) / numSubvectors
|
|
1025
|
+
return (4 * this.dimensions) / this.numSubvectors;
|
|
1026
|
+
}
|
|
1027
|
+
|
|
1028
|
+
getMemoryReduction(): string {
|
|
1029
|
+
const ratio = this.getCompressionRatio();
|
|
1030
|
+
return `${ratio.toFixed(1)}x`;
|
|
1031
|
+
}
|
|
1032
|
+
|
|
1033
|
+
/**
|
|
1034
|
+
* Gets the trained codebooks.
|
|
1035
|
+
*/
|
|
1036
|
+
getCodebooks(): Codebook[] {
|
|
1037
|
+
return this.codebooks.map(cb => ({
|
|
1038
|
+
centroids: cb.centroids.map(c => [...c]),
|
|
1039
|
+
counts: [...cb.counts],
|
|
1040
|
+
}));
|
|
1041
|
+
}
|
|
1042
|
+
|
|
1043
|
+
/**
|
|
1044
|
+
* Sets codebooks directly (for loading pretrained).
|
|
1045
|
+
*/
|
|
1046
|
+
setCodebooks(codebooks: Codebook[]): void {
|
|
1047
|
+
if (codebooks.length !== this.numSubvectors) {
|
|
1048
|
+
throw new Error(`Expected ${this.numSubvectors} codebooks, got ${codebooks.length}`);
|
|
1049
|
+
}
|
|
1050
|
+
this.codebooks = codebooks;
|
|
1051
|
+
this.isTrained = true;
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
/**
|
|
1055
|
+
* Checks if the quantizer is trained.
|
|
1056
|
+
*/
|
|
1057
|
+
get trained(): boolean {
|
|
1058
|
+
return this.isTrained;
|
|
1059
|
+
}
|
|
1060
|
+
}
|
|
1061
|
+
|
|
1062
|
+
// ============================================================================
|
|
1063
|
+
// Optimized Product Quantization (OPQ)
|
|
1064
|
+
// ============================================================================
|
|
1065
|
+
|
|
1066
|
+
/**
|
|
1067
|
+
* OptimizedProductQuantizer extends PQ with learned rotation.
|
|
1068
|
+
*
|
|
1069
|
+
* Learns an orthogonal rotation matrix to minimize quantization error.
|
|
1070
|
+
* The rotation decorrelates dimensions and distributes variance evenly.
|
|
1071
|
+
*
|
|
1072
|
+
* @example
|
|
1073
|
+
* ```typescript
|
|
1074
|
+
* const opq = new OptimizedProductQuantizer({
|
|
1075
|
+
* dimensions: 128,
|
|
1076
|
+
* numSubvectors: 8,
|
|
1077
|
+
* numCentroids: 256,
|
|
1078
|
+
* opqIterations: 10
|
|
1079
|
+
* });
|
|
1080
|
+
* await opq.trainWithRotation(trainingVectors);
|
|
1081
|
+
* const codes = opq.encode(vectors);
|
|
1082
|
+
* ```
|
|
1083
|
+
*/
|
|
1084
|
+
export class OptimizedProductQuantizer extends ProductQuantizer {
|
|
1085
|
+
override readonly type: QuantizationType = 'opq';
|
|
1086
|
+
|
|
1087
|
+
private rotationMatrix: number[][] | null = null;
|
|
1088
|
+
private readonly opqIterations: number;
|
|
1089
|
+
private readonly learningRate: number;
|
|
1090
|
+
|
|
1091
|
+
constructor(options: OptimizedProductQuantizationOptions) {
|
|
1092
|
+
super(options);
|
|
1093
|
+
this.opqIterations = options.opqIterations ?? 10;
|
|
1094
|
+
this.learningRate = options.learningRate ?? 0.01;
|
|
1095
|
+
}
|
|
1096
|
+
|
|
1097
|
+
/**
|
|
1098
|
+
* Trains the quantizer with rotation matrix optimization.
|
|
1099
|
+
*
|
|
1100
|
+
* @param vectors - Training vectors
|
|
1101
|
+
*/
|
|
1102
|
+
async trainWithRotation(vectors: number[][]): Promise<void> {
|
|
1103
|
+
// Initialize rotation matrix as identity
|
|
1104
|
+
this.rotationMatrix = identityMatrix(this.dimensions);
|
|
1105
|
+
|
|
1106
|
+
for (let opqIter = 0; opqIter < this.opqIterations; opqIter++) {
|
|
1107
|
+
// Step 1: Rotate vectors
|
|
1108
|
+
const rotatedVectors = this.rotateVectors(vectors);
|
|
1109
|
+
|
|
1110
|
+
// Step 2: Train PQ on rotated vectors
|
|
1111
|
+
await super.train(rotatedVectors);
|
|
1112
|
+
|
|
1113
|
+
// Step 3: Update rotation matrix using Procrustes analysis
|
|
1114
|
+
this.updateRotation(vectors);
|
|
1115
|
+
|
|
1116
|
+
// Yield to event loop
|
|
1117
|
+
await new Promise(resolve => setTimeout(resolve, 0));
|
|
1118
|
+
}
|
|
1119
|
+
|
|
1120
|
+
// Final PQ training with final rotation
|
|
1121
|
+
const finalRotated = this.rotateVectors(vectors);
|
|
1122
|
+
await super.train(finalRotated);
|
|
1123
|
+
}
|
|
1124
|
+
|
|
1125
|
+
/**
|
|
1126
|
+
* Rotates vectors using the learned rotation matrix.
|
|
1127
|
+
*/
|
|
1128
|
+
private rotateVectors(vectors: number[][]): number[][] {
|
|
1129
|
+
if (!this.rotationMatrix) {
|
|
1130
|
+
return vectors;
|
|
1131
|
+
}
|
|
1132
|
+
return vectors.map(v => matVec(this.rotationMatrix!, v));
|
|
1133
|
+
}
|
|
1134
|
+
|
|
1135
|
+
/**
|
|
1136
|
+
* Updates the rotation matrix using Procrustes analysis.
|
|
1137
|
+
* Minimizes ||X - R * decode(encode(R^T * X))||^2
|
|
1138
|
+
*/
|
|
1139
|
+
private updateRotation(vectors: number[][]): void {
|
|
1140
|
+
if (!this.rotationMatrix) return;
|
|
1141
|
+
|
|
1142
|
+
// Get reconstructed vectors
|
|
1143
|
+
const rotated = this.rotateVectors(vectors);
|
|
1144
|
+
const codes = this.encode(rotated);
|
|
1145
|
+
const reconstructed = this.decode(codes);
|
|
1146
|
+
|
|
1147
|
+
// Compute X^T * Y for Procrustes
|
|
1148
|
+
const xty = zerosMatrix(this.dimensions, this.dimensions);
|
|
1149
|
+
for (let i = 0; i < vectors.length; i++) {
|
|
1150
|
+
for (let j = 0; j < this.dimensions; j++) {
|
|
1151
|
+
for (let k = 0; k < this.dimensions; k++) {
|
|
1152
|
+
xty[j][k] += vectors[i][j] * reconstructed[i][k];
|
|
1153
|
+
}
|
|
1154
|
+
}
|
|
1155
|
+
}
|
|
1156
|
+
|
|
1157
|
+
// SVD approximation using power iteration
|
|
1158
|
+
// For simplicity, we use gradient descent on the rotation
|
|
1159
|
+
const gradientUpdate = this.computeRotationGradient(vectors, reconstructed);
|
|
1160
|
+
|
|
1161
|
+
// Update rotation matrix
|
|
1162
|
+
for (let i = 0; i < this.dimensions; i++) {
|
|
1163
|
+
for (let j = 0; j < this.dimensions; j++) {
|
|
1164
|
+
this.rotationMatrix![i][j] -= this.learningRate * gradientUpdate[i][j];
|
|
1165
|
+
}
|
|
1166
|
+
}
|
|
1167
|
+
|
|
1168
|
+
// Orthogonalize using Gram-Schmidt
|
|
1169
|
+
this.orthogonalize();
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1172
|
+
/**
|
|
1173
|
+
* Computes gradient for rotation update.
|
|
1174
|
+
*/
|
|
1175
|
+
private computeRotationGradient(
|
|
1176
|
+
original: number[][],
|
|
1177
|
+
reconstructed: number[][]
|
|
1178
|
+
): number[][] {
|
|
1179
|
+
const gradient = zerosMatrix(this.dimensions, this.dimensions);
|
|
1180
|
+
|
|
1181
|
+
for (let i = 0; i < original.length; i++) {
|
|
1182
|
+
const rotatedOrig = matVec(this.rotationMatrix!, original[i]);
|
|
1183
|
+
const error = rotatedOrig.map((v, j) => v - reconstructed[i][j]);
|
|
1184
|
+
|
|
1185
|
+
for (let j = 0; j < this.dimensions; j++) {
|
|
1186
|
+
for (let k = 0; k < this.dimensions; k++) {
|
|
1187
|
+
gradient[j][k] += error[j] * original[i][k];
|
|
1188
|
+
}
|
|
1189
|
+
}
|
|
1190
|
+
}
|
|
1191
|
+
|
|
1192
|
+
// Normalize
|
|
1193
|
+
const scale = 1 / original.length;
|
|
1194
|
+
for (let i = 0; i < this.dimensions; i++) {
|
|
1195
|
+
for (let j = 0; j < this.dimensions; j++) {
|
|
1196
|
+
gradient[i][j] *= scale;
|
|
1197
|
+
}
|
|
1198
|
+
}
|
|
1199
|
+
|
|
1200
|
+
return gradient;
|
|
1201
|
+
}
|
|
1202
|
+
|
|
1203
|
+
/**
|
|
1204
|
+
* Orthogonalizes the rotation matrix using modified Gram-Schmidt.
|
|
1205
|
+
*/
|
|
1206
|
+
private orthogonalize(): void {
|
|
1207
|
+
if (!this.rotationMatrix) return;
|
|
1208
|
+
|
|
1209
|
+
for (let i = 0; i < this.dimensions; i++) {
|
|
1210
|
+
// Normalize column i
|
|
1211
|
+
let n = 0;
|
|
1212
|
+
for (let j = 0; j < this.dimensions; j++) {
|
|
1213
|
+
n += this.rotationMatrix[j][i] * this.rotationMatrix[j][i];
|
|
1214
|
+
}
|
|
1215
|
+
n = Math.sqrt(n);
|
|
1216
|
+
if (n > 1e-10) {
|
|
1217
|
+
for (let j = 0; j < this.dimensions; j++) {
|
|
1218
|
+
this.rotationMatrix[j][i] /= n;
|
|
1219
|
+
}
|
|
1220
|
+
}
|
|
1221
|
+
|
|
1222
|
+
// Remove component from remaining columns
|
|
1223
|
+
for (let k = i + 1; k < this.dimensions; k++) {
|
|
1224
|
+
let projection = 0;
|
|
1225
|
+
for (let j = 0; j < this.dimensions; j++) {
|
|
1226
|
+
projection += this.rotationMatrix[j][i] * this.rotationMatrix[j][k];
|
|
1227
|
+
}
|
|
1228
|
+
for (let j = 0; j < this.dimensions; j++) {
|
|
1229
|
+
this.rotationMatrix[j][k] -= projection * this.rotationMatrix[j][i];
|
|
1230
|
+
}
|
|
1231
|
+
}
|
|
1232
|
+
}
|
|
1233
|
+
}
|
|
1234
|
+
|
|
1235
|
+
/**
|
|
1236
|
+
* Encodes vectors with rotation.
|
|
1237
|
+
*/
|
|
1238
|
+
override encode(vectors: number[][]): Uint8Array[] {
|
|
1239
|
+
const rotated = this.rotateVectors(vectors);
|
|
1240
|
+
return super.encode(rotated);
|
|
1241
|
+
}
|
|
1242
|
+
|
|
1243
|
+
/**
|
|
1244
|
+
* Decodes codes and applies inverse rotation.
|
|
1245
|
+
*/
|
|
1246
|
+
override decode(codes: Uint8Array[]): number[][] {
|
|
1247
|
+
const decoded = super.decode(codes);
|
|
1248
|
+
if (!this.rotationMatrix) {
|
|
1249
|
+
return decoded;
|
|
1250
|
+
}
|
|
1251
|
+
// Apply inverse rotation (transpose for orthogonal matrix)
|
|
1252
|
+
const invRotation = transpose(this.rotationMatrix);
|
|
1253
|
+
return decoded.map(v => matVec(invRotation, v));
|
|
1254
|
+
}
|
|
1255
|
+
|
|
1256
|
+
/**
|
|
1257
|
+
* Computes distances with rotation applied to query.
|
|
1258
|
+
*/
|
|
1259
|
+
override computeDistances(query: number[], codes: Uint8Array[]): number[] {
|
|
1260
|
+
const rotatedQuery = this.rotationMatrix
|
|
1261
|
+
? matVec(this.rotationMatrix, query)
|
|
1262
|
+
: query;
|
|
1263
|
+
return super.computeDistances(rotatedQuery, codes);
|
|
1264
|
+
}
|
|
1265
|
+
|
|
1266
|
+
/**
|
|
1267
|
+
* Gets the rotation matrix.
|
|
1268
|
+
*/
|
|
1269
|
+
getRotationMatrix(): number[][] | null {
|
|
1270
|
+
return this.rotationMatrix ? this.rotationMatrix.map(r => [...r]) : null;
|
|
1271
|
+
}
|
|
1272
|
+
|
|
1273
|
+
/**
|
|
1274
|
+
* Sets the rotation matrix directly.
|
|
1275
|
+
*/
|
|
1276
|
+
setRotationMatrix(matrix: number[][]): void {
|
|
1277
|
+
if (matrix.length !== this.dimensions || matrix[0].length !== this.dimensions) {
|
|
1278
|
+
throw new Error(`Expected ${this.dimensions}x${this.dimensions} matrix`);
|
|
1279
|
+
}
|
|
1280
|
+
this.rotationMatrix = matrix.map(r => [...r]);
|
|
1281
|
+
}
|
|
1282
|
+
}
|
|
1283
|
+
|
|
1284
|
+
// ============================================================================
|
|
1285
|
+
// SQL Integration
|
|
1286
|
+
// ============================================================================
|
|
1287
|
+
|
|
1288
|
+
/**
|
|
1289
|
+
* QuantizationSQL generates SQL for quantized vector operations.
|
|
1290
|
+
*
|
|
1291
|
+
* Provides SQL statements for:
|
|
1292
|
+
* - Creating quantized storage tables
|
|
1293
|
+
* - Inserting quantized vectors
|
|
1294
|
+
* - Searching with quantized distances
|
|
1295
|
+
*/
|
|
1296
|
+
export class QuantizationSQL {
|
|
1297
|
+
/**
|
|
1298
|
+
* Generates SQL for creating a table with quantized vector storage.
|
|
1299
|
+
*
|
|
1300
|
+
* @param tableName - Table name
|
|
1301
|
+
* @param type - Quantization type
|
|
1302
|
+
* @param options - Quantization options
|
|
1303
|
+
* @returns CREATE TABLE SQL statement
|
|
1304
|
+
*/
|
|
1305
|
+
static createQuantizedTable(
|
|
1306
|
+
tableName: string,
|
|
1307
|
+
type: QuantizationType,
|
|
1308
|
+
options?: {
|
|
1309
|
+
dimensions?: number;
|
|
1310
|
+
numSubvectors?: number;
|
|
1311
|
+
idType?: 'SERIAL' | 'BIGSERIAL' | 'UUID';
|
|
1312
|
+
additionalColumns?: string;
|
|
1313
|
+
}
|
|
1314
|
+
): string {
|
|
1315
|
+
const {
|
|
1316
|
+
dimensions = 128,
|
|
1317
|
+
numSubvectors = 8,
|
|
1318
|
+
idType = 'BIGSERIAL',
|
|
1319
|
+
additionalColumns = '',
|
|
1320
|
+
} = options ?? {};
|
|
1321
|
+
|
|
1322
|
+
let vectorColumn: string;
|
|
1323
|
+
let comment: string;
|
|
1324
|
+
|
|
1325
|
+
switch (type) {
|
|
1326
|
+
case 'scalar':
|
|
1327
|
+
vectorColumn = `quantized_vector BYTEA NOT NULL`;
|
|
1328
|
+
comment = `Scalar quantized vectors (int8, ${dimensions} dims, 4x compression)`;
|
|
1329
|
+
break;
|
|
1330
|
+
|
|
1331
|
+
case 'binary':
|
|
1332
|
+
const binaryBytes = Math.ceil(dimensions / 8);
|
|
1333
|
+
vectorColumn = `binary_vector BIT(${dimensions})`;
|
|
1334
|
+
comment = `Binary quantized vectors (${dimensions} dims, ${binaryBytes} bytes, 32x compression)`;
|
|
1335
|
+
break;
|
|
1336
|
+
|
|
1337
|
+
case 'pq':
|
|
1338
|
+
case 'opq':
|
|
1339
|
+
vectorColumn = `pq_codes BYTEA NOT NULL`;
|
|
1340
|
+
comment = `${type === 'opq' ? 'Optimized ' : ''}Product quantized vectors (M=${numSubvectors}, K=256)`;
|
|
1341
|
+
break;
|
|
1342
|
+
|
|
1343
|
+
default:
|
|
1344
|
+
throw new Error(`Unknown quantization type: ${type}`);
|
|
1345
|
+
}
|
|
1346
|
+
|
|
1347
|
+
const extraCols = additionalColumns ? `\n ${additionalColumns},` : '';
|
|
1348
|
+
|
|
1349
|
+
return `
|
|
1350
|
+
-- Table for ${comment}
|
|
1351
|
+
CREATE TABLE IF NOT EXISTS ${tableName} (
|
|
1352
|
+
id ${idType} PRIMARY KEY,${extraCols}
|
|
1353
|
+
original_vector vector(${dimensions}), -- Optional: keep original for reranking
|
|
1354
|
+
${vectorColumn},
|
|
1355
|
+
metadata JSONB,
|
|
1356
|
+
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP
|
|
1357
|
+
);
|
|
1358
|
+
|
|
1359
|
+
-- Index for quantized search
|
|
1360
|
+
CREATE INDEX IF NOT EXISTS idx_${tableName}_quantized ON ${tableName} (quantized_vector);
|
|
1361
|
+
|
|
1362
|
+
COMMENT ON TABLE ${tableName} IS '${comment}';
|
|
1363
|
+
`.trim();
|
|
1364
|
+
}
|
|
1365
|
+
|
|
1366
|
+
/**
|
|
1367
|
+
* Generates SQL for inserting a quantized vector.
|
|
1368
|
+
*
|
|
1369
|
+
* @param tableName - Table name
|
|
1370
|
+
* @param type - Quantization type
|
|
1371
|
+
* @returns INSERT SQL template with placeholders
|
|
1372
|
+
*/
|
|
1373
|
+
static insertQuantizedSQL(tableName: string, type: QuantizationType): string {
|
|
1374
|
+
const column = type === 'binary' ? 'binary_vector' :
|
|
1375
|
+
(type === 'pq' || type === 'opq') ? 'pq_codes' : 'quantized_vector';
|
|
1376
|
+
|
|
1377
|
+
return `
|
|
1378
|
+
INSERT INTO ${tableName} (original_vector, ${column}, metadata)
|
|
1379
|
+
VALUES ($1::vector, $2, $3::jsonb)
|
|
1380
|
+
RETURNING id;
|
|
1381
|
+
`.trim();
|
|
1382
|
+
}
|
|
1383
|
+
|
|
1384
|
+
/**
|
|
1385
|
+
* Generates SQL for batch insert of quantized vectors.
|
|
1386
|
+
*
|
|
1387
|
+
* @param tableName - Table name
|
|
1388
|
+
* @param type - Quantization type
|
|
1389
|
+
* @param count - Number of vectors
|
|
1390
|
+
* @returns Batch INSERT SQL
|
|
1391
|
+
*/
|
|
1392
|
+
static batchInsertSQL(
|
|
1393
|
+
tableName: string,
|
|
1394
|
+
type: QuantizationType,
|
|
1395
|
+
count: number
|
|
1396
|
+
): string {
|
|
1397
|
+
const column = type === 'binary' ? 'binary_vector' :
|
|
1398
|
+
(type === 'pq' || type === 'opq') ? 'pq_codes' : 'quantized_vector';
|
|
1399
|
+
|
|
1400
|
+
const values = Array.from({ length: count }, (_, i) => {
|
|
1401
|
+
const offset = i * 3;
|
|
1402
|
+
return `($${offset + 1}::vector, $${offset + 2}, $${offset + 3}::jsonb)`;
|
|
1403
|
+
}).join(',\n ');
|
|
1404
|
+
|
|
1405
|
+
return `
|
|
1406
|
+
INSERT INTO ${tableName} (original_vector, ${column}, metadata)
|
|
1407
|
+
VALUES
|
|
1408
|
+
${values}
|
|
1409
|
+
RETURNING id;
|
|
1410
|
+
`.trim();
|
|
1411
|
+
}
|
|
1412
|
+
|
|
1413
|
+
/**
|
|
1414
|
+
* Generates SQL for scalar quantized search.
|
|
1415
|
+
*
|
|
1416
|
+
* @param tableName - Table name
|
|
1417
|
+
* @param k - Number of results
|
|
1418
|
+
* @param useReranking - Whether to rerank with original vectors
|
|
1419
|
+
* @returns Search SQL template
|
|
1420
|
+
*/
|
|
1421
|
+
static scalarSearchSQL(
|
|
1422
|
+
tableName: string,
|
|
1423
|
+
k: number,
|
|
1424
|
+
useReranking: boolean = true
|
|
1425
|
+
): string {
|
|
1426
|
+
if (useReranking) {
|
|
1427
|
+
// Two-stage search: filter with quantized, rerank with original
|
|
1428
|
+
const filterK = k * 10;
|
|
1429
|
+
return `
|
|
1430
|
+
WITH candidates AS (
|
|
1431
|
+
SELECT id, original_vector, metadata,
|
|
1432
|
+
ruvector_scalar_distance(quantized_vector, $1::bytea) AS approx_dist
|
|
1433
|
+
FROM ${tableName}
|
|
1434
|
+
ORDER BY approx_dist ASC
|
|
1435
|
+
LIMIT ${filterK}
|
|
1436
|
+
)
|
|
1437
|
+
SELECT id, metadata,
|
|
1438
|
+
original_vector <-> $2::vector AS exact_dist
|
|
1439
|
+
FROM candidates
|
|
1440
|
+
ORDER BY exact_dist ASC
|
|
1441
|
+
LIMIT ${k};
|
|
1442
|
+
`.trim();
|
|
1443
|
+
}
|
|
1444
|
+
|
|
1445
|
+
return `
|
|
1446
|
+
SELECT id, metadata,
|
|
1447
|
+
ruvector_scalar_distance(quantized_vector, $1::bytea) AS distance
|
|
1448
|
+
FROM ${tableName}
|
|
1449
|
+
ORDER BY distance ASC
|
|
1450
|
+
LIMIT ${k};
|
|
1451
|
+
`.trim();
|
|
1452
|
+
}
|
|
1453
|
+
|
|
1454
|
+
/**
|
|
1455
|
+
* Generates SQL for binary quantized search with Hamming distance.
|
|
1456
|
+
*
|
|
1457
|
+
* @param tableName - Table name
|
|
1458
|
+
* @param k - Number of results
|
|
1459
|
+
* @param useReranking - Whether to rerank with original vectors
|
|
1460
|
+
* @returns Search SQL template
|
|
1461
|
+
*/
|
|
1462
|
+
static binarySearchSQL(
|
|
1463
|
+
tableName: string,
|
|
1464
|
+
k: number,
|
|
1465
|
+
useReranking: boolean = true
|
|
1466
|
+
): string {
|
|
1467
|
+
if (useReranking) {
|
|
1468
|
+
const filterK = k * 10;
|
|
1469
|
+
return `
|
|
1470
|
+
WITH candidates AS (
|
|
1471
|
+
SELECT id, original_vector, metadata,
|
|
1472
|
+
bit_count(binary_vector # $1::bit) AS hamming_dist
|
|
1473
|
+
FROM ${tableName}
|
|
1474
|
+
ORDER BY hamming_dist ASC
|
|
1475
|
+
LIMIT ${filterK}
|
|
1476
|
+
)
|
|
1477
|
+
SELECT id, metadata,
|
|
1478
|
+
original_vector <-> $2::vector AS exact_dist
|
|
1479
|
+
FROM candidates
|
|
1480
|
+
ORDER BY exact_dist ASC
|
|
1481
|
+
LIMIT ${k};
|
|
1482
|
+
`.trim();
|
|
1483
|
+
}
|
|
1484
|
+
|
|
1485
|
+
return `
|
|
1486
|
+
SELECT id, metadata,
|
|
1487
|
+
bit_count(binary_vector # $1::bit) AS hamming_distance
|
|
1488
|
+
FROM ${tableName}
|
|
1489
|
+
ORDER BY hamming_distance ASC
|
|
1490
|
+
LIMIT ${k};
|
|
1491
|
+
`.trim();
|
|
1492
|
+
}
|
|
1493
|
+
|
|
1494
|
+
/**
|
|
1495
|
+
* Generates SQL for PQ search using distance lookup tables.
|
|
1496
|
+
*
|
|
1497
|
+
* @param tableName - Table name
|
|
1498
|
+
* @param k - Number of results
|
|
1499
|
+
* @param numSubvectors - Number of PQ subvectors
|
|
1500
|
+
* @param useReranking - Whether to rerank
|
|
1501
|
+
* @returns Search SQL template
|
|
1502
|
+
*/
|
|
1503
|
+
static pqSearchSQL(
|
|
1504
|
+
tableName: string,
|
|
1505
|
+
k: number,
|
|
1506
|
+
numSubvectors: number = 8,
|
|
1507
|
+
useReranking: boolean = true
|
|
1508
|
+
): string {
|
|
1509
|
+
// Generate SQL for lookup table based distance computation
|
|
1510
|
+
const distanceTerms = Array.from(
|
|
1511
|
+
{ length: numSubvectors },
|
|
1512
|
+
(_, m) => `ruvector_pq_subvector_dist($1, ${m}, get_byte(pq_codes, ${m}))`
|
|
1513
|
+
).join(' + ');
|
|
1514
|
+
|
|
1515
|
+
if (useReranking) {
|
|
1516
|
+
const filterK = k * 10;
|
|
1517
|
+
return `
|
|
1518
|
+
WITH candidates AS (
|
|
1519
|
+
SELECT id, original_vector, metadata,
|
|
1520
|
+
sqrt(${distanceTerms}) AS approx_dist
|
|
1521
|
+
FROM ${tableName}
|
|
1522
|
+
ORDER BY approx_dist ASC
|
|
1523
|
+
LIMIT ${filterK}
|
|
1524
|
+
)
|
|
1525
|
+
SELECT id, metadata,
|
|
1526
|
+
original_vector <-> $2::vector AS exact_dist
|
|
1527
|
+
FROM candidates
|
|
1528
|
+
ORDER BY exact_dist ASC
|
|
1529
|
+
LIMIT ${k};
|
|
1530
|
+
`.trim();
|
|
1531
|
+
}
|
|
1532
|
+
|
|
1533
|
+
return `
|
|
1534
|
+
SELECT id, metadata,
|
|
1535
|
+
sqrt(${distanceTerms}) AS distance
|
|
1536
|
+
FROM ${tableName}
|
|
1537
|
+
ORDER BY distance ASC
|
|
1538
|
+
LIMIT ${k};
|
|
1539
|
+
`.trim();
|
|
1540
|
+
}
|
|
1541
|
+
|
|
1542
|
+
/**
|
|
1543
|
+
* Generates SQL for creating PQ lookup tables.
|
|
1544
|
+
*
|
|
1545
|
+
* @param tableName - Lookup table name
|
|
1546
|
+
* @param numSubvectors - Number of subvectors (M)
|
|
1547
|
+
* @param numCentroids - Number of centroids (K)
|
|
1548
|
+
* @returns CREATE TABLE SQL for lookup tables
|
|
1549
|
+
*/
|
|
1550
|
+
static createPQLookupTables(
|
|
1551
|
+
tableName: string,
|
|
1552
|
+
numSubvectors: number = 8,
|
|
1553
|
+
numCentroids: number = 256
|
|
1554
|
+
): string {
|
|
1555
|
+
return `
|
|
1556
|
+
-- PQ codebooks storage
|
|
1557
|
+
CREATE TABLE IF NOT EXISTS ${tableName}_codebooks (
|
|
1558
|
+
subvector_id INTEGER NOT NULL,
|
|
1559
|
+
centroid_id INTEGER NOT NULL,
|
|
1560
|
+
centroid vector NOT NULL,
|
|
1561
|
+
PRIMARY KEY (subvector_id, centroid_id)
|
|
1562
|
+
);
|
|
1563
|
+
|
|
1564
|
+
-- Precomputed distance lookup (for specific queries)
|
|
1565
|
+
CREATE TABLE IF NOT EXISTS ${tableName}_distance_lookup (
|
|
1566
|
+
query_id BIGINT NOT NULL,
|
|
1567
|
+
subvector_id INTEGER NOT NULL,
|
|
1568
|
+
centroid_id INTEGER NOT NULL,
|
|
1569
|
+
squared_distance REAL NOT NULL,
|
|
1570
|
+
PRIMARY KEY (query_id, subvector_id, centroid_id)
|
|
1571
|
+
);
|
|
1572
|
+
|
|
1573
|
+
CREATE INDEX IF NOT EXISTS idx_${tableName}_lookup_query
|
|
1574
|
+
ON ${tableName}_distance_lookup (query_id, subvector_id);
|
|
1575
|
+
|
|
1576
|
+
COMMENT ON TABLE ${tableName}_codebooks IS 'PQ codebooks: M=${numSubvectors}, K=${numCentroids}';
|
|
1577
|
+
`.trim();
|
|
1578
|
+
}
|
|
1579
|
+
|
|
1580
|
+
/**
|
|
1581
|
+
* Generates SQL for inserting PQ codebooks.
|
|
1582
|
+
*
|
|
1583
|
+
* @param tableName - Base table name
|
|
1584
|
+
* @param codebooks - Trained codebooks
|
|
1585
|
+
* @returns INSERT SQL for codebooks
|
|
1586
|
+
*/
|
|
1587
|
+
static insertCodebooksSQL(
|
|
1588
|
+
tableName: string,
|
|
1589
|
+
codebooks: Array<{ centroids: number[][] }>
|
|
1590
|
+
): string {
|
|
1591
|
+
const values: string[] = [];
|
|
1592
|
+
|
|
1593
|
+
for (let m = 0; m < codebooks.length; m++) {
|
|
1594
|
+
for (let k = 0; k < codebooks[m].centroids.length; k++) {
|
|
1595
|
+
const centroidStr = `'[${codebooks[m].centroids[k].join(',')}]'`;
|
|
1596
|
+
values.push(`(${m}, ${k}, ${centroidStr}::vector)`);
|
|
1597
|
+
}
|
|
1598
|
+
}
|
|
1599
|
+
|
|
1600
|
+
return `
|
|
1601
|
+
INSERT INTO ${tableName}_codebooks (subvector_id, centroid_id, centroid)
|
|
1602
|
+
VALUES
|
|
1603
|
+
${values.join(',\n ')}
|
|
1604
|
+
ON CONFLICT (subvector_id, centroid_id) DO UPDATE
|
|
1605
|
+
SET centroid = EXCLUDED.centroid;
|
|
1606
|
+
`.trim();
|
|
1607
|
+
}
|
|
1608
|
+
|
|
1609
|
+
/**
|
|
1610
|
+
* Generates SQL function for computing PQ distance.
|
|
1611
|
+
*
|
|
1612
|
+
* @param functionName - Function name
|
|
1613
|
+
* @param numSubvectors - Number of subvectors
|
|
1614
|
+
* @returns CREATE FUNCTION SQL
|
|
1615
|
+
*/
|
|
1616
|
+
static createPQDistanceFunction(
|
|
1617
|
+
functionName: string = 'pq_asymmetric_distance',
|
|
1618
|
+
numSubvectors: number = 8
|
|
1619
|
+
): string {
|
|
1620
|
+
return `
|
|
1621
|
+
CREATE OR REPLACE FUNCTION ${functionName}(
|
|
1622
|
+
query_vector vector,
|
|
1623
|
+
pq_codes bytea,
|
|
1624
|
+
codebook_table text
|
|
1625
|
+
)
|
|
1626
|
+
RETURNS real AS $$
|
|
1627
|
+
DECLARE
|
|
1628
|
+
total_distance real := 0;
|
|
1629
|
+
m integer;
|
|
1630
|
+
code integer;
|
|
1631
|
+
subvector_dim integer;
|
|
1632
|
+
query_subvector vector;
|
|
1633
|
+
centroid vector;
|
|
1634
|
+
BEGIN
|
|
1635
|
+
subvector_dim := vector_dims(query_vector) / ${numSubvectors};
|
|
1636
|
+
|
|
1637
|
+
FOR m IN 0..${numSubvectors - 1} LOOP
|
|
1638
|
+
code := get_byte(pq_codes, m);
|
|
1639
|
+
|
|
1640
|
+
-- Extract query subvector
|
|
1641
|
+
query_subvector := vector_slice(query_vector, m * subvector_dim, (m + 1) * subvector_dim);
|
|
1642
|
+
|
|
1643
|
+
-- Get centroid from codebook
|
|
1644
|
+
EXECUTE format('SELECT centroid FROM %I WHERE subvector_id = $1 AND centroid_id = $2',
|
|
1645
|
+
codebook_table || '_codebooks')
|
|
1646
|
+
INTO centroid
|
|
1647
|
+
USING m, code;
|
|
1648
|
+
|
|
1649
|
+
-- Add squared distance
|
|
1650
|
+
total_distance := total_distance + (query_subvector <-> centroid)^2;
|
|
1651
|
+
END LOOP;
|
|
1652
|
+
|
|
1653
|
+
RETURN sqrt(total_distance);
|
|
1654
|
+
END;
|
|
1655
|
+
$$ LANGUAGE plpgsql IMMUTABLE;
|
|
1656
|
+
`.trim();
|
|
1657
|
+
}
|
|
1658
|
+
|
|
1659
|
+
/**
|
|
1660
|
+
* Generates SQL for OPQ with rotation.
|
|
1661
|
+
*
|
|
1662
|
+
* @param tableName - Table name
|
|
1663
|
+
* @param dimensions - Vector dimensions
|
|
1664
|
+
* @returns SQL for rotation matrix storage
|
|
1665
|
+
*/
|
|
1666
|
+
static createOPQRotationTable(tableName: string, dimensions: number): string {
|
|
1667
|
+
return `
|
|
1668
|
+
-- OPQ rotation matrix storage
|
|
1669
|
+
CREATE TABLE IF NOT EXISTS ${tableName}_rotation (
|
|
1670
|
+
row_id INTEGER NOT NULL,
|
|
1671
|
+
col_id INTEGER NOT NULL,
|
|
1672
|
+
value REAL NOT NULL,
|
|
1673
|
+
PRIMARY KEY (row_id, col_id)
|
|
1674
|
+
);
|
|
1675
|
+
|
|
1676
|
+
-- Function to apply rotation
|
|
1677
|
+
CREATE OR REPLACE FUNCTION ${tableName}_rotate_vector(v vector)
|
|
1678
|
+
RETURNS vector AS $$
|
|
1679
|
+
DECLARE
|
|
1680
|
+
result float8[];
|
|
1681
|
+
i integer;
|
|
1682
|
+
sum float8;
|
|
1683
|
+
j integer;
|
|
1684
|
+
BEGIN
|
|
1685
|
+
result := array_fill(0::float8, ARRAY[${dimensions}]);
|
|
1686
|
+
|
|
1687
|
+
FOR i IN 0..${dimensions - 1} LOOP
|
|
1688
|
+
sum := 0;
|
|
1689
|
+
FOR j IN 0..${dimensions - 1} LOOP
|
|
1690
|
+
SELECT sum + r.value * v[j+1]
|
|
1691
|
+
INTO sum
|
|
1692
|
+
FROM ${tableName}_rotation r
|
|
1693
|
+
WHERE r.row_id = i AND r.col_id = j;
|
|
1694
|
+
END LOOP;
|
|
1695
|
+
result[i+1] := sum;
|
|
1696
|
+
END LOOP;
|
|
1697
|
+
|
|
1698
|
+
RETURN result::vector;
|
|
1699
|
+
END;
|
|
1700
|
+
$$ LANGUAGE plpgsql IMMUTABLE;
|
|
1701
|
+
|
|
1702
|
+
COMMENT ON TABLE ${tableName}_rotation IS 'OPQ rotation matrix (${dimensions}x${dimensions})';
|
|
1703
|
+
`.trim();
|
|
1704
|
+
}
|
|
1705
|
+
|
|
1706
|
+
/**
|
|
1707
|
+
* Generates SQL for quantization statistics view.
|
|
1708
|
+
*
|
|
1709
|
+
* @param tableName - Base table name
|
|
1710
|
+
* @returns CREATE VIEW SQL
|
|
1711
|
+
*/
|
|
1712
|
+
static createStatsView(tableName: string): string {
|
|
1713
|
+
return `
|
|
1714
|
+
CREATE OR REPLACE VIEW ${tableName}_quantization_stats AS
|
|
1715
|
+
SELECT
|
|
1716
|
+
pg_total_relation_size('${tableName}'::regclass) AS total_size_bytes,
|
|
1717
|
+
pg_relation_size('${tableName}'::regclass) AS table_size_bytes,
|
|
1718
|
+
pg_indexes_size('${tableName}'::regclass) AS index_size_bytes,
|
|
1719
|
+
(SELECT count(*) FROM ${tableName}) AS row_count,
|
|
1720
|
+
CASE
|
|
1721
|
+
WHEN (SELECT count(*) FROM ${tableName}) > 0
|
|
1722
|
+
THEN pg_relation_size('${tableName}'::regclass)::float / (SELECT count(*) FROM ${tableName})
|
|
1723
|
+
ELSE 0
|
|
1724
|
+
END AS avg_bytes_per_row;
|
|
1725
|
+
`.trim();
|
|
1726
|
+
}
|
|
1727
|
+
}
|
|
1728
|
+
|
|
1729
|
+
// ============================================================================
|
|
1730
|
+
// Factory and Utilities
|
|
1731
|
+
// ============================================================================
|
|
1732
|
+
|
|
1733
|
+
/**
|
|
1734
|
+
* Creates a quantizer based on the specified type.
|
|
1735
|
+
*
|
|
1736
|
+
* @param type - Quantization type
|
|
1737
|
+
* @param options - Type-specific options
|
|
1738
|
+
* @returns Configured quantizer instance
|
|
1739
|
+
*
|
|
1740
|
+
* @example
|
|
1741
|
+
* ```typescript
|
|
1742
|
+
* const scalar = createQuantizer('scalar', { dimensions: 128 });
|
|
1743
|
+
* const binary = createQuantizer('binary', { dimensions: 128 });
|
|
1744
|
+
* const pq = createQuantizer('pq', { dimensions: 128, numSubvectors: 8, numCentroids: 256 });
|
|
1745
|
+
* ```
|
|
1746
|
+
*/
|
|
1747
|
+
export function createQuantizer(
|
|
1748
|
+
type: 'scalar',
|
|
1749
|
+
options: ScalarQuantizationOptions
|
|
1750
|
+
): ScalarQuantizer;
|
|
1751
|
+
export function createQuantizer(
|
|
1752
|
+
type: 'binary',
|
|
1753
|
+
options: BinaryQuantizationOptions
|
|
1754
|
+
): BinaryQuantizer;
|
|
1755
|
+
export function createQuantizer(
|
|
1756
|
+
type: 'pq',
|
|
1757
|
+
options: ProductQuantizationOptions
|
|
1758
|
+
): ProductQuantizer;
|
|
1759
|
+
export function createQuantizer(
|
|
1760
|
+
type: 'opq',
|
|
1761
|
+
options: OptimizedProductQuantizationOptions
|
|
1762
|
+
): OptimizedProductQuantizer;
|
|
1763
|
+
export function createQuantizer(
|
|
1764
|
+
type: QuantizationType,
|
|
1765
|
+
options?: QuantizationOptions
|
|
1766
|
+
): IQuantizer;
|
|
1767
|
+
export function createQuantizer(
|
|
1768
|
+
type: QuantizationType,
|
|
1769
|
+
options?: QuantizationOptions
|
|
1770
|
+
): IQuantizer {
|
|
1771
|
+
switch (type) {
|
|
1772
|
+
case 'scalar':
|
|
1773
|
+
return new ScalarQuantizer(options as ScalarQuantizationOptions);
|
|
1774
|
+
case 'binary':
|
|
1775
|
+
return new BinaryQuantizer(options as BinaryQuantizationOptions);
|
|
1776
|
+
case 'pq':
|
|
1777
|
+
return new ProductQuantizer(options as ProductQuantizationOptions);
|
|
1778
|
+
case 'opq':
|
|
1779
|
+
return new OptimizedProductQuantizer(options as OptimizedProductQuantizationOptions);
|
|
1780
|
+
default:
|
|
1781
|
+
throw new Error(`Unknown quantization type: ${type}`);
|
|
1782
|
+
}
|
|
1783
|
+
}
|
|
1784
|
+
|
|
1785
|
+
/**
|
|
1786
|
+
* Computes quantization statistics by comparing original and reconstructed vectors.
|
|
1787
|
+
*
|
|
1788
|
+
* @param original - Original vectors
|
|
1789
|
+
* @param reconstructed - Reconstructed vectors after quantization
|
|
1790
|
+
* @param quantizer - The quantizer used
|
|
1791
|
+
* @returns Quantization statistics
|
|
1792
|
+
*/
|
|
1793
|
+
export function computeQuantizationStats(
|
|
1794
|
+
original: number[][],
|
|
1795
|
+
reconstructed: number[][],
|
|
1796
|
+
quantizer: IQuantizer
|
|
1797
|
+
): QuantizationStats {
|
|
1798
|
+
if (original.length !== reconstructed.length) {
|
|
1799
|
+
throw new Error('Original and reconstructed arrays must have same length');
|
|
1800
|
+
}
|
|
1801
|
+
|
|
1802
|
+
// Compute MSE
|
|
1803
|
+
let mse = 0;
|
|
1804
|
+
for (let i = 0; i < original.length; i++) {
|
|
1805
|
+
mse += squaredEuclideanDistance(original[i], reconstructed[i]);
|
|
1806
|
+
}
|
|
1807
|
+
mse /= original.length;
|
|
1808
|
+
|
|
1809
|
+
// Estimate recall@10 by comparing rankings
|
|
1810
|
+
// (simplified - real evaluation would use a test set)
|
|
1811
|
+
const recallAt10 = estimateRecall(original, reconstructed, 10);
|
|
1812
|
+
|
|
1813
|
+
return {
|
|
1814
|
+
compressionRatio: quantizer.getCompressionRatio(),
|
|
1815
|
+
memoryReduction: quantizer.getMemoryReduction(),
|
|
1816
|
+
recallAt10,
|
|
1817
|
+
searchSpeedup: quantizer.getCompressionRatio() * 0.8, // Approximate
|
|
1818
|
+
mse,
|
|
1819
|
+
};
|
|
1820
|
+
}
|
|
1821
|
+
|
|
1822
|
+
/**
|
|
1823
|
+
* Estimates recall@k by comparing original and reconstructed rankings.
|
|
1824
|
+
*/
|
|
1825
|
+
function estimateRecall(
|
|
1826
|
+
original: number[][],
|
|
1827
|
+
reconstructed: number[][],
|
|
1828
|
+
k: number
|
|
1829
|
+
): number {
|
|
1830
|
+
if (original.length < k + 1) {
|
|
1831
|
+
return 1.0; // Not enough data to evaluate
|
|
1832
|
+
}
|
|
1833
|
+
|
|
1834
|
+
let totalRecall = 0;
|
|
1835
|
+
const numQueries = Math.min(100, original.length);
|
|
1836
|
+
|
|
1837
|
+
for (let q = 0; q < numQueries; q++) {
|
|
1838
|
+
const query = original[q];
|
|
1839
|
+
|
|
1840
|
+
// Get true top-k using original vectors
|
|
1841
|
+
const trueDistances: Array<{ idx: number; dist: number }> = [];
|
|
1842
|
+
for (let i = 0; i < original.length; i++) {
|
|
1843
|
+
if (i !== q) {
|
|
1844
|
+
trueDistances.push({
|
|
1845
|
+
idx: i,
|
|
1846
|
+
dist: euclideanDistance(query, original[i]),
|
|
1847
|
+
});
|
|
1848
|
+
}
|
|
1849
|
+
}
|
|
1850
|
+
trueDistances.sort((a, b) => a.dist - b.dist);
|
|
1851
|
+
const trueTopK = new Set(trueDistances.slice(0, k).map(d => d.idx));
|
|
1852
|
+
|
|
1853
|
+
// Get approx top-k using reconstructed vectors
|
|
1854
|
+
const approxDistances: Array<{ idx: number; dist: number }> = [];
|
|
1855
|
+
for (let i = 0; i < reconstructed.length; i++) {
|
|
1856
|
+
if (i !== q) {
|
|
1857
|
+
approxDistances.push({
|
|
1858
|
+
idx: i,
|
|
1859
|
+
dist: euclideanDistance(query, reconstructed[i]),
|
|
1860
|
+
});
|
|
1861
|
+
}
|
|
1862
|
+
}
|
|
1863
|
+
approxDistances.sort((a, b) => a.dist - b.dist);
|
|
1864
|
+
const approxTopK = approxDistances.slice(0, k).map(d => d.idx);
|
|
1865
|
+
|
|
1866
|
+
// Count intersection
|
|
1867
|
+
let hits = 0;
|
|
1868
|
+
for (const idx of approxTopK) {
|
|
1869
|
+
if (trueTopK.has(idx)) {
|
|
1870
|
+
hits++;
|
|
1871
|
+
}
|
|
1872
|
+
}
|
|
1873
|
+
|
|
1874
|
+
totalRecall += hits / k;
|
|
1875
|
+
}
|
|
1876
|
+
|
|
1877
|
+
return totalRecall / numQueries;
|
|
1878
|
+
}
|
|
1879
|
+
|
|
1880
|
+
/**
|
|
1881
|
+
* Serializes a quantizer to JSON for persistence.
|
|
1882
|
+
*
|
|
1883
|
+
* @param quantizer - Quantizer to serialize
|
|
1884
|
+
* @returns JSON-serializable object
|
|
1885
|
+
*/
|
|
1886
|
+
export function serializeQuantizer(quantizer: IQuantizer): Record<string, unknown> {
|
|
1887
|
+
const base = {
|
|
1888
|
+
type: quantizer.type,
|
|
1889
|
+
dimensions: quantizer.dimensions,
|
|
1890
|
+
};
|
|
1891
|
+
|
|
1892
|
+
if (quantizer instanceof ScalarQuantizer) {
|
|
1893
|
+
return {
|
|
1894
|
+
...base,
|
|
1895
|
+
calibration: quantizer.getCalibration(),
|
|
1896
|
+
};
|
|
1897
|
+
}
|
|
1898
|
+
|
|
1899
|
+
if (quantizer instanceof OptimizedProductQuantizer) {
|
|
1900
|
+
return {
|
|
1901
|
+
...base,
|
|
1902
|
+
numSubvectors: quantizer.numSubvectors,
|
|
1903
|
+
numCentroids: quantizer.numCentroids,
|
|
1904
|
+
codebooks: quantizer.getCodebooks(),
|
|
1905
|
+
rotationMatrix: quantizer.getRotationMatrix(),
|
|
1906
|
+
};
|
|
1907
|
+
}
|
|
1908
|
+
|
|
1909
|
+
if (quantizer instanceof ProductQuantizer) {
|
|
1910
|
+
return {
|
|
1911
|
+
...base,
|
|
1912
|
+
numSubvectors: quantizer.numSubvectors,
|
|
1913
|
+
numCentroids: quantizer.numCentroids,
|
|
1914
|
+
codebooks: quantizer.getCodebooks(),
|
|
1915
|
+
};
|
|
1916
|
+
}
|
|
1917
|
+
|
|
1918
|
+
if (quantizer instanceof BinaryQuantizer) {
|
|
1919
|
+
return base;
|
|
1920
|
+
}
|
|
1921
|
+
|
|
1922
|
+
return base;
|
|
1923
|
+
}
|
|
1924
|
+
|
|
1925
|
+
/**
|
|
1926
|
+
* Deserializes a quantizer from JSON.
|
|
1927
|
+
*
|
|
1928
|
+
* @param data - Serialized quantizer data
|
|
1929
|
+
* @returns Restored quantizer instance
|
|
1930
|
+
*/
|
|
1931
|
+
export function deserializeQuantizer(data: Record<string, unknown>): IQuantizer {
|
|
1932
|
+
const type = data.type as QuantizationType;
|
|
1933
|
+
const dimensions = data.dimensions as number;
|
|
1934
|
+
|
|
1935
|
+
switch (type) {
|
|
1936
|
+
case 'scalar': {
|
|
1937
|
+
const quantizer = new ScalarQuantizer({ dimensions });
|
|
1938
|
+
if (data.calibration) {
|
|
1939
|
+
quantizer.setCalibration(data.calibration as CalibrationData);
|
|
1940
|
+
}
|
|
1941
|
+
return quantizer;
|
|
1942
|
+
}
|
|
1943
|
+
|
|
1944
|
+
case 'binary': {
|
|
1945
|
+
return new BinaryQuantizer({ dimensions });
|
|
1946
|
+
}
|
|
1947
|
+
|
|
1948
|
+
case 'pq': {
|
|
1949
|
+
const quantizer = new ProductQuantizer({
|
|
1950
|
+
dimensions,
|
|
1951
|
+
numSubvectors: data.numSubvectors as number,
|
|
1952
|
+
numCentroids: data.numCentroids as number,
|
|
1953
|
+
});
|
|
1954
|
+
if (data.codebooks) {
|
|
1955
|
+
quantizer.setCodebooks(data.codebooks as Codebook[]);
|
|
1956
|
+
}
|
|
1957
|
+
return quantizer;
|
|
1958
|
+
}
|
|
1959
|
+
|
|
1960
|
+
case 'opq': {
|
|
1961
|
+
const quantizer = new OptimizedProductQuantizer({
|
|
1962
|
+
dimensions,
|
|
1963
|
+
numSubvectors: data.numSubvectors as number,
|
|
1964
|
+
numCentroids: data.numCentroids as number,
|
|
1965
|
+
});
|
|
1966
|
+
if (data.codebooks) {
|
|
1967
|
+
quantizer.setCodebooks(data.codebooks as Codebook[]);
|
|
1968
|
+
}
|
|
1969
|
+
if (data.rotationMatrix) {
|
|
1970
|
+
quantizer.setRotationMatrix(data.rotationMatrix as number[][]);
|
|
1971
|
+
}
|
|
1972
|
+
return quantizer;
|
|
1973
|
+
}
|
|
1974
|
+
|
|
1975
|
+
default:
|
|
1976
|
+
throw new Error(`Unknown quantization type: ${type}`);
|
|
1977
|
+
}
|
|
1978
|
+
}
|
|
1979
|
+
|
|
1980
|
+
// ============================================================================
|
|
1981
|
+
// Constants
|
|
1982
|
+
// ============================================================================
|
|
1983
|
+
|
|
1984
|
+
/**
|
|
1985
|
+
* Default configurations for different use cases.
|
|
1986
|
+
*/
|
|
1987
|
+
export const QUANTIZATION_PRESETS = {
|
|
1988
|
+
/** Fast search with good accuracy (scalar int8) */
|
|
1989
|
+
balanced: {
|
|
1990
|
+
type: 'scalar' as const,
|
|
1991
|
+
options: {
|
|
1992
|
+
dimensions: 128,
|
|
1993
|
+
symmetric: true,
|
|
1994
|
+
},
|
|
1995
|
+
},
|
|
1996
|
+
|
|
1997
|
+
/** Maximum compression (binary) */
|
|
1998
|
+
maxCompression: {
|
|
1999
|
+
type: 'binary' as const,
|
|
2000
|
+
options: {
|
|
2001
|
+
dimensions: 128,
|
|
2002
|
+
threshold: 0,
|
|
2003
|
+
},
|
|
2004
|
+
},
|
|
2005
|
+
|
|
2006
|
+
/** High accuracy with compression (PQ) */
|
|
2007
|
+
highAccuracy: {
|
|
2008
|
+
type: 'pq' as const,
|
|
2009
|
+
options: {
|
|
2010
|
+
dimensions: 128,
|
|
2011
|
+
numSubvectors: 16,
|
|
2012
|
+
numCentroids: 256,
|
|
2013
|
+
},
|
|
2014
|
+
},
|
|
2015
|
+
|
|
2016
|
+
/** Best accuracy (OPQ) */
|
|
2017
|
+
bestAccuracy: {
|
|
2018
|
+
type: 'opq' as const,
|
|
2019
|
+
options: {
|
|
2020
|
+
dimensions: 128,
|
|
2021
|
+
numSubvectors: 16,
|
|
2022
|
+
numCentroids: 256,
|
|
2023
|
+
opqIterations: 10,
|
|
2024
|
+
},
|
|
2025
|
+
},
|
|
2026
|
+
} as const;
|
|
2027
|
+
|
|
2028
|
+
/**
|
|
2029
|
+
* Memory reduction factors for each quantization type.
|
|
2030
|
+
*/
|
|
2031
|
+
export const MEMORY_REDUCTION = {
|
|
2032
|
+
scalar: 4, // float32 -> int8
|
|
2033
|
+
binary: 32, // float32 -> 1 bit
|
|
2034
|
+
pq: 16, // Typical for M=8, K=256
|
|
2035
|
+
opq: 16, // Same as PQ
|
|
2036
|
+
} as const;
|