verso-db 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/dist/Collection.d.ts +17 -0
- package/dist/Collection.d.ts.map +1 -1
- package/dist/Collection.js +76 -1
- package/dist/Collection.js.map +1 -1
- package/dist/HNSWIndex.d.ts +27 -2
- package/dist/HNSWIndex.d.ts.map +1 -1
- package/dist/HNSWIndex.js +261 -96
- package/dist/HNSWIndex.js.map +1 -1
- package/package.json +1 -1
package/dist/HNSWIndex.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import { dotProductFast, l2SquaredFast, normalizeInPlace } from './backends/JsDistanceBackend.js';
|
|
3
3
|
import { BinaryHeap } from './BinaryHeap.js';
|
|
4
4
|
import { MaxBinaryHeap } from './MaxBinaryHeap.js';
|
|
5
|
-
import { ScalarQuantizer
|
|
5
|
+
import { ScalarQuantizer } from './quantization/ScalarQuantizer.js';
|
|
6
6
|
import { deltaEncodeNeighbors, deltaDecodeNeighbors } from './encoding/DeltaEncoder.js';
|
|
7
7
|
import { VectorDBError } from './errors.js';
|
|
8
8
|
export class HNSWIndex {
|
|
@@ -73,8 +73,6 @@ export class HNSWIndex {
|
|
|
73
73
|
random;
|
|
74
74
|
// Quantization support for 3-4x faster search with Int8
|
|
75
75
|
scalarQuantizer = null;
|
|
76
|
-
// OPTIMIZATION: Use array instead of Map for int8 vectors too
|
|
77
|
-
int8Vectors = [];
|
|
78
76
|
quantizationEnabled = false;
|
|
79
77
|
// OPTIMIZATION: Contiguous Int8 storage for cache-friendly batch distance calculations
|
|
80
78
|
// Layout: [v0_d0...v0_dn, v1_d0...v1_dn, ...], offset: nodeId * dim
|
|
@@ -831,8 +829,7 @@ export class HNSWIndex {
|
|
|
831
829
|
// Keep quantized representation in sync for indexes that were
|
|
832
830
|
// quantized before new inserts.
|
|
833
831
|
if (this.quantizationEnabled && this.scalarQuantizer) {
|
|
834
|
-
|
|
835
|
-
// Also update contiguous Int8 storage
|
|
832
|
+
// Update contiguous Int8 storage
|
|
836
833
|
if (this.flatInt8Vectors) {
|
|
837
834
|
this.scalarQuantizer.quantizeInto(floatVector, this.flatInt8Vectors, id * this.dimension);
|
|
838
835
|
}
|
|
@@ -1120,21 +1117,36 @@ export class HNSWIndex {
|
|
|
1120
1117
|
const dim = this.dimension;
|
|
1121
1118
|
const sampleSize = Math.min(Math.ceil(Math.sqrt(n)), 2000);
|
|
1122
1119
|
const seedCount = Math.min(Math.ceil(Math.sqrt(n)), 500);
|
|
1123
|
-
// Random sample indices
|
|
1120
|
+
// Random sample indices. Bound the random-draw attempts so a degenerate RNG
|
|
1121
|
+
// (e.g. a constant function injected via the constructor) can't spin forever;
|
|
1122
|
+
// any shortfall is filled deterministically by scanning unused indices.
|
|
1124
1123
|
const sampleIndices = [];
|
|
1125
1124
|
const usedIndices = new Set();
|
|
1126
|
-
|
|
1125
|
+
const maxAttempts = sampleSize * 20;
|
|
1126
|
+
let attempts = 0;
|
|
1127
|
+
while (sampleIndices.length < sampleSize && attempts < maxAttempts) {
|
|
1128
|
+
attempts++;
|
|
1127
1129
|
const idx = Math.floor(this.randomFloat() * n);
|
|
1128
1130
|
if (!usedIndices.has(idx)) {
|
|
1129
1131
|
usedIndices.add(idx);
|
|
1130
1132
|
sampleIndices.push(idx);
|
|
1131
1133
|
}
|
|
1132
1134
|
}
|
|
1135
|
+
if (sampleIndices.length < sampleSize) {
|
|
1136
|
+
for (let i = 0; i < n && sampleIndices.length < sampleSize; i++) {
|
|
1137
|
+
if (!usedIndices.has(i)) {
|
|
1138
|
+
usedIndices.add(i);
|
|
1139
|
+
sampleIndices.push(i);
|
|
1140
|
+
}
|
|
1141
|
+
}
|
|
1142
|
+
}
|
|
1133
1143
|
// Farthest-point sampling on the sample
|
|
1134
1144
|
// Start with a random point from the sample
|
|
1135
1145
|
const seedSamplePositions = [0]; // positions within sampleIndices
|
|
1136
1146
|
const minDists = new Float64Array(sampleSize);
|
|
1137
1147
|
minDists.fill(Infinity);
|
|
1148
|
+
// Seed 0 is already selected — mark it so the greedy loop doesn't re-pick it.
|
|
1149
|
+
minDists[0] = -1;
|
|
1138
1150
|
// Compute initial distances from first seed
|
|
1139
1151
|
const firstVec = points[sampleIndices[0]].vector;
|
|
1140
1152
|
for (let i = 1; i < sampleSize; i++) {
|
|
@@ -1359,7 +1371,6 @@ export class HNSWIndex {
|
|
|
1359
1371
|
this.setNode(newNode);
|
|
1360
1372
|
// Keep quantized representation in sync
|
|
1361
1373
|
if (this.quantizationEnabled && this.scalarQuantizer) {
|
|
1362
|
-
this.int8Vectors[id] = this.scalarQuantizer.quantize(vector);
|
|
1363
1374
|
if (this.flatInt8Vectors) {
|
|
1364
1375
|
this.scalarQuantizer.quantizeInto(vector, this.flatInt8Vectors, id * this.dimension);
|
|
1365
1376
|
}
|
|
@@ -1530,7 +1541,7 @@ export class HNSWIndex {
|
|
|
1530
1541
|
}
|
|
1531
1542
|
// Format version constants
|
|
1532
1543
|
static MAGIC = 0x484E5357; // "HNSW" in ASCII (big-endian: 0x48='H', 0x4E='N', 0x53='S', 0x57='W')
|
|
1533
|
-
static FORMAT_VERSION =
|
|
1544
|
+
static FORMAT_VERSION = 4; // v4: appended trailer with quantization params + calibration stats (graph/vector sections byte-identical to v3)
|
|
1534
1545
|
static HEADER_SIZE = 40; // 4 (magic) + 4 (version) + 28 (existing header) + 4 (vectorDataOffset)
|
|
1535
1546
|
static ensureReadable(bufferLength, offset, bytes, context) {
|
|
1536
1547
|
if (offset + bytes > bufferLength) {
|
|
@@ -1538,6 +1549,33 @@ export class HNSWIndex {
|
|
|
1538
1549
|
throw new VectorDBError(`Corrupt HNSW data: truncated ${context} (need ${bytes} bytes, only ${available} available)`, 'CORRUPT_INDEX');
|
|
1539
1550
|
}
|
|
1540
1551
|
}
|
|
1552
|
+
/**
|
|
1553
|
+
* Read a dimension-length float32 vector from a serialized buffer at the given
|
|
1554
|
+
* absolute byte offset. Callers must have already validated that
|
|
1555
|
+
* [absoluteOffset, absoluteOffset + dimension*4) fits within the buffer.
|
|
1556
|
+
*
|
|
1557
|
+
* Fast path: when the offset is 4-byte aligned we construct a Float32Array view
|
|
1558
|
+
* directly over the buffer and bulk-copy. Otherwise we fall back to a DataView
|
|
1559
|
+
* loop. Either way every component is validated for finiteness in a single pass.
|
|
1560
|
+
*/
|
|
1561
|
+
static readVectorBytes(buffer, view, absoluteOffset, dimension, nodeIndex) {
|
|
1562
|
+
HNSWIndex.ensureReadable(buffer.byteLength, absoluteOffset, dimension * 4, `node ${nodeIndex} vector payload`);
|
|
1563
|
+
const vector = new Float32Array(dimension);
|
|
1564
|
+
if (absoluteOffset % 4 === 0) {
|
|
1565
|
+
vector.set(new Float32Array(buffer, absoluteOffset, dimension));
|
|
1566
|
+
}
|
|
1567
|
+
else {
|
|
1568
|
+
for (let j = 0; j < dimension; j++) {
|
|
1569
|
+
vector[j] = view.getFloat32(absoluteOffset + j * 4, true);
|
|
1570
|
+
}
|
|
1571
|
+
}
|
|
1572
|
+
for (let j = 0; j < dimension; j++) {
|
|
1573
|
+
if (!Number.isFinite(vector[j])) {
|
|
1574
|
+
throw new VectorDBError(`Corrupt HNSW data: node ${nodeIndex} vector component ${j} is non-finite (${vector[j]})`, 'CORRUPT_INDEX');
|
|
1575
|
+
}
|
|
1576
|
+
}
|
|
1577
|
+
return vector;
|
|
1578
|
+
}
|
|
1541
1579
|
static wrapDeserializeError(error) {
|
|
1542
1580
|
if (error instanceof VectorDBError) {
|
|
1543
1581
|
return error;
|
|
@@ -1675,16 +1713,31 @@ export class HNSWIndex {
|
|
|
1675
1713
|
totalNeighbors += neighbors?.length ?? 0;
|
|
1676
1714
|
}
|
|
1677
1715
|
}
|
|
1678
|
-
//
|
|
1679
|
-
//
|
|
1680
|
-
// (
|
|
1681
|
-
//
|
|
1682
|
-
//
|
|
1683
|
-
// so
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
|
|
1687
|
-
|
|
1716
|
+
// Capacity sizing depends on whether this is a parallel build or a read-only pool.
|
|
1717
|
+
//
|
|
1718
|
+
// Parallel build (constructionMode === true): the graph grows after init and the
|
|
1719
|
+
// append-only update pattern (updateSharedGraphNodes) rewrites nodes, creating dead
|
|
1720
|
+
// space. We pre-allocate for ALL nodes (up to flatVectorsCapacity) at 3x the estimated
|
|
1721
|
+
// maximum so incremental updates have room. addPointsBulkParallel sets constructionMode
|
|
1722
|
+
// = true BEFORE pool.init() (which triggers this serialization), so this path is taken
|
|
1723
|
+
// during parallel build.
|
|
1724
|
+
//
|
|
1725
|
+
// Read-only pool (constructionMode === false): the graph is finished and immutable from
|
|
1726
|
+
// the workers' perspective. There are no incremental rewrites, so we size tightly to the
|
|
1727
|
+
// current node count. This avoids allocating ~864MB SABs for a finished 1M-node index.
|
|
1728
|
+
let neighborCapacity;
|
|
1729
|
+
let indexSize;
|
|
1730
|
+
if (this.constructionMode) {
|
|
1731
|
+
const nodeCapacity = Math.max(this.nodeCount, this.flatVectorsCapacity);
|
|
1732
|
+
const estimatedMaxNeighbors = nodeCapacity * (this.M0 + this.M);
|
|
1733
|
+
neighborCapacity = Math.max(Math.ceil(totalNeighbors * 3), totalNeighbors + 10000, estimatedMaxNeighbors * 3);
|
|
1734
|
+
indexSize = nodeCapacity * maxLayerSlots * 2;
|
|
1735
|
+
}
|
|
1736
|
+
else {
|
|
1737
|
+
neighborCapacity = Math.max(totalNeighbors + 1024, 1024);
|
|
1738
|
+
// Index area must still cover every node's layer slots.
|
|
1739
|
+
indexSize = this.nodeCount * maxLayerSlots * 2;
|
|
1740
|
+
}
|
|
1688
1741
|
const graphIndex = new Uint32Array(new SharedArrayBuffer(indexSize * 4));
|
|
1689
1742
|
const graphNeighborData = new Uint32Array(new SharedArrayBuffer(neighborCapacity * 4));
|
|
1690
1743
|
// Shared metadata: [nodeCount, entryPointId, maxLevel]
|
|
@@ -1742,11 +1795,14 @@ export class HNSWIndex {
|
|
|
1742
1795
|
this.clearSharedGraph();
|
|
1743
1796
|
return;
|
|
1744
1797
|
}
|
|
1745
|
-
|
|
1746
|
-
|
|
1798
|
+
// Write the neighbor payload FIRST, then publish the index entry (offset +
|
|
1799
|
+
// count). A concurrent worker that reads the index entry will then always
|
|
1800
|
+
// see fully-written neighbor data, never not-yet-written zeros.
|
|
1747
1801
|
for (let n = 0; n < neighbors.length; n++) {
|
|
1748
1802
|
this.sharedGraphNeighborData[writeOffset + n] = neighbors[n];
|
|
1749
1803
|
}
|
|
1804
|
+
this.sharedGraphIndex[base] = writeOffset;
|
|
1805
|
+
this.sharedGraphIndex[base + 1] = neighbors.length;
|
|
1750
1806
|
writeOffset += neighbors.length;
|
|
1751
1807
|
}
|
|
1752
1808
|
}
|
|
@@ -1832,6 +1888,20 @@ export class HNSWIndex {
|
|
|
1832
1888
|
if (this.nodeCount === 0 || this.entryPointId === -1) {
|
|
1833
1889
|
return new Map();
|
|
1834
1890
|
}
|
|
1891
|
+
// Reorder rebuilds all flat storage from node.vector, which holds zeros for
|
|
1892
|
+
// unloaded lazy vectors. Materialize every vector first, then drop lazy state
|
|
1893
|
+
// so the rebuilt structures and remapped IDs are self-consistent.
|
|
1894
|
+
if (this.lazyLoadEnabled) {
|
|
1895
|
+
for (let i = 0; i < this.nodeCount; i++) {
|
|
1896
|
+
if (this.nodes[i]) {
|
|
1897
|
+
this.loadVector(i);
|
|
1898
|
+
}
|
|
1899
|
+
}
|
|
1900
|
+
this.lazyLoadEnabled = false;
|
|
1901
|
+
this.vectorBuffer = null;
|
|
1902
|
+
this.vectorOffsets.clear();
|
|
1903
|
+
this.vectorsLoaded.clear();
|
|
1904
|
+
}
|
|
1835
1905
|
const dim = this.dimension;
|
|
1836
1906
|
const oldToNew = new Map();
|
|
1837
1907
|
const visited = new Set();
|
|
@@ -1840,8 +1910,9 @@ export class HNSWIndex {
|
|
|
1840
1910
|
let newId = 0;
|
|
1841
1911
|
queue.push(this.entryPointId);
|
|
1842
1912
|
visited.add(this.entryPointId);
|
|
1843
|
-
|
|
1844
|
-
|
|
1913
|
+
let head = 0;
|
|
1914
|
+
while (head < queue.length) {
|
|
1915
|
+
const oldId = queue[head++];
|
|
1845
1916
|
oldToNew.set(oldId, newId++);
|
|
1846
1917
|
const node = this.nodes[oldId];
|
|
1847
1918
|
if (!node)
|
|
@@ -1868,10 +1939,8 @@ export class HNSWIndex {
|
|
|
1868
1939
|
const newNodes = new Array(newId);
|
|
1869
1940
|
const newFlatVectors = HNSWIndex.allocateFloat32(Math.max(newId, this.flatVectorsCapacity) * dim, this.useSharedMemory);
|
|
1870
1941
|
let newFlatInt8Vectors = null;
|
|
1871
|
-
let newInt8Vectors = [];
|
|
1872
1942
|
if (this.quantizationEnabled && this.flatInt8Vectors) {
|
|
1873
1943
|
newFlatInt8Vectors = HNSWIndex.allocateInt8(Math.max(newId, this.flatInt8VectorsCapacity) * dim, this.useSharedMemory);
|
|
1874
|
-
newInt8Vectors = new Array(newId);
|
|
1875
1944
|
}
|
|
1876
1945
|
// Copy nodes with remapped IDs and neighbor lists
|
|
1877
1946
|
for (const [oldId, nid] of oldToNew) {
|
|
@@ -1899,9 +1968,6 @@ export class HNSWIndex {
|
|
|
1899
1968
|
newFlatInt8Vectors[newOffset + d] = this.flatInt8Vectors[oldOffset + d];
|
|
1900
1969
|
}
|
|
1901
1970
|
}
|
|
1902
|
-
if (this.int8Vectors[oldId]) {
|
|
1903
|
-
newInt8Vectors[nid] = this.int8Vectors[oldId];
|
|
1904
|
-
}
|
|
1905
1971
|
// Create new node with vector from flat storage
|
|
1906
1972
|
const vec = new Float32Array(dim);
|
|
1907
1973
|
vec.set(newFlatVectors.subarray(newOffset, newOffset + dim));
|
|
@@ -1922,7 +1988,6 @@ export class HNSWIndex {
|
|
|
1922
1988
|
this.flatInt8Vectors = newFlatInt8Vectors;
|
|
1923
1989
|
this.flatInt8VectorsCapacity = Math.max(newId, this.flatInt8VectorsCapacity);
|
|
1924
1990
|
}
|
|
1925
|
-
this.int8Vectors = newInt8Vectors;
|
|
1926
1991
|
this.nodeCount = newId;
|
|
1927
1992
|
this.nextAutoId = newId;
|
|
1928
1993
|
// Reset visited array for new ID space
|
|
@@ -1975,7 +2040,22 @@ export class HNSWIndex {
|
|
|
1975
2040
|
graphSize += nodeCount * 4; // vector offset table
|
|
1976
2041
|
const vectorDataOffset = graphSize;
|
|
1977
2042
|
const vectorDataSize = nodeCount * this.dimension * 4;
|
|
1978
|
-
|
|
2043
|
+
// v4 trailer (appended after the vector section). Graph + vector sections are
|
|
2044
|
+
// byte-identical to v3; v4 always writes at least the flags uint32 so a trailer
|
|
2045
|
+
// is guaranteed to exist. We do NOT store int8 vector bytes — they are rebuilt
|
|
2046
|
+
// by re-quantizing float32 vectors on load.
|
|
2047
|
+
const writeQuantParams = this.quantizationEnabled && this.scalarQuantizer !== null;
|
|
2048
|
+
const writeCalibration = this.calibrationStats !== null;
|
|
2049
|
+
let quantBlob = null;
|
|
2050
|
+
let trailerSize = 4; // flags uint32
|
|
2051
|
+
if (writeQuantParams) {
|
|
2052
|
+
quantBlob = this.scalarQuantizer.serialize();
|
|
2053
|
+
trailerSize += 4 + quantBlob.byteLength; // length prefix + blob
|
|
2054
|
+
}
|
|
2055
|
+
if (writeCalibration) {
|
|
2056
|
+
trailerSize += 16; // meanEntryDist (f64) + stdEntryDist (f64)
|
|
2057
|
+
}
|
|
2058
|
+
const totalSize = graphSize + vectorDataSize + trailerSize;
|
|
1979
2059
|
const buffer = new ArrayBuffer(totalSize);
|
|
1980
2060
|
const view = new DataView(buffer);
|
|
1981
2061
|
const uint8Array = new Uint8Array(buffer);
|
|
@@ -2034,14 +2114,50 @@ export class HNSWIndex {
|
|
|
2034
2114
|
view.setUint32(offset, i * this.dimension * 4, true); // Relative offset
|
|
2035
2115
|
offset += 4;
|
|
2036
2116
|
}
|
|
2037
|
-
// Write vectors at end (for lazy loading capability)
|
|
2117
|
+
// Write vectors at end (for lazy loading capability).
|
|
2118
|
+
// When this index is itself lazy-loaded, unloaded nodes still hold a
|
|
2119
|
+
// placeholder zero vector in node.vector, so read their bytes directly from
|
|
2120
|
+
// the backing buffer instead of materializing every vector into memory.
|
|
2121
|
+
const sourceView = this.lazyLoadEnabled && this.vectorBuffer ? new DataView(this.vectorBuffer) : null;
|
|
2038
2122
|
for (let i = 0; i < nodesArray.length; i++) {
|
|
2039
2123
|
const node = nodesArray[i];
|
|
2040
|
-
|
|
2041
|
-
|
|
2042
|
-
|
|
2124
|
+
if (this.lazyLoadEnabled && !this.vectorsLoaded.has(node.id)) {
|
|
2125
|
+
const srcOffset = this.vectorOffsets.get(node.id);
|
|
2126
|
+
if (sourceView === null || srcOffset === undefined) {
|
|
2127
|
+
throw new VectorDBError(`Corrupt HNSW data: lazy node ${node.id} has no backing vector buffer to serialize`, 'CORRUPT_INDEX');
|
|
2128
|
+
}
|
|
2129
|
+
for (let j = 0; j < this.dimension; j++) {
|
|
2130
|
+
view.setFloat32(offset, sourceView.getFloat32(srcOffset + j * 4, true), true);
|
|
2131
|
+
offset += 4;
|
|
2132
|
+
}
|
|
2133
|
+
}
|
|
2134
|
+
else {
|
|
2135
|
+
for (let j = 0; j < this.dimension; j++) {
|
|
2136
|
+
view.setFloat32(offset, node.vector[j], true);
|
|
2137
|
+
offset += 4;
|
|
2138
|
+
}
|
|
2043
2139
|
}
|
|
2044
2140
|
}
|
|
2141
|
+
// Write v4 trailer. bit 0 = quantization params present, bit 1 = calibration stats present.
|
|
2142
|
+
let flags = 0;
|
|
2143
|
+
if (writeQuantParams)
|
|
2144
|
+
flags |= 1;
|
|
2145
|
+
if (writeCalibration)
|
|
2146
|
+
flags |= 2;
|
|
2147
|
+
view.setUint32(offset, flags, true);
|
|
2148
|
+
offset += 4;
|
|
2149
|
+
if (writeQuantParams && quantBlob !== null) {
|
|
2150
|
+
view.setUint32(offset, quantBlob.byteLength, true);
|
|
2151
|
+
offset += 4;
|
|
2152
|
+
uint8Array.set(new Uint8Array(quantBlob), offset);
|
|
2153
|
+
offset += quantBlob.byteLength;
|
|
2154
|
+
}
|
|
2155
|
+
if (writeCalibration && this.calibrationStats !== null) {
|
|
2156
|
+
view.setFloat64(offset, this.calibrationStats.meanEntryDist, true);
|
|
2157
|
+
offset += 8;
|
|
2158
|
+
view.setFloat64(offset, this.calibrationStats.stdEntryDist, true);
|
|
2159
|
+
offset += 8;
|
|
2160
|
+
}
|
|
2045
2161
|
return buffer;
|
|
2046
2162
|
}
|
|
2047
2163
|
/**
|
|
@@ -2079,6 +2195,9 @@ export class HNSWIndex {
|
|
|
2079
2195
|
ensureReadable(4, context);
|
|
2080
2196
|
const value = view.getFloat32(offset, true);
|
|
2081
2197
|
offset += 4;
|
|
2198
|
+
if (!Number.isFinite(value)) {
|
|
2199
|
+
throw new VectorDBError(`Corrupt HNSW data: ${context} is non-finite (${value})`, 'CORRUPT_INDEX');
|
|
2200
|
+
}
|
|
2082
2201
|
return value;
|
|
2083
2202
|
};
|
|
2084
2203
|
// Check for magic header (new format v1+)
|
|
@@ -2265,15 +2384,14 @@ export class HNSWIndex {
|
|
|
2265
2384
|
}
|
|
2266
2385
|
}
|
|
2267
2386
|
else {
|
|
2268
|
-
// Eager loading: load all vectors now
|
|
2387
|
+
// Eager loading: load all vectors now.
|
|
2388
|
+
// The offset table validation above already guarantees each vector's
|
|
2389
|
+
// full [offset, offset + dimension*4) range fits within the buffer, so
|
|
2390
|
+
// we bulk-copy instead of bounds-checking each component.
|
|
2269
2391
|
for (let i = 0; i < nodeCount; i++) {
|
|
2270
2392
|
const { id, level } = nodeMetadata[i];
|
|
2271
2393
|
const vectorOffset = index.vectorOffsets.get(id);
|
|
2272
|
-
const vector =
|
|
2273
|
-
for (let j = 0; j < dimension; j++) {
|
|
2274
|
-
HNSWIndex.ensureReadable(buffer.byteLength, vectorOffset + j * 4, 4, `node ${i} vector component ${j}`);
|
|
2275
|
-
vector[j] = view.getFloat32(vectorOffset + j * 4, true);
|
|
2276
|
-
}
|
|
2394
|
+
const vector = HNSWIndex.readVectorBytes(buffer, view, vectorOffset, dimension, i);
|
|
2277
2395
|
const node = { id, level, vector, neighbors: nodeNeighbors[i] };
|
|
2278
2396
|
index.setNode(node);
|
|
2279
2397
|
index.vectorsLoaded.add(id);
|
|
@@ -2368,6 +2486,56 @@ export class HNSWIndex {
|
|
|
2368
2486
|
}
|
|
2369
2487
|
}
|
|
2370
2488
|
}
|
|
2489
|
+
// v4 trailer: appended after the vector section. v0-v3 buffers have no
|
|
2490
|
+
// trailer, so this is skipped entirely for them (their parsing above is
|
|
2491
|
+
// byte-for-byte unchanged). The vector section runs from vectorDataOffset
|
|
2492
|
+
// for nodeCount * dimension * 4 bytes; the trailer starts immediately after.
|
|
2493
|
+
if (formatVersion >= 4) {
|
|
2494
|
+
let trailerOffset = vectorDataOffset + nodeCount * dimension * 4;
|
|
2495
|
+
const readTrailerUint32 = (context) => {
|
|
2496
|
+
HNSWIndex.ensureReadable(buffer.byteLength, trailerOffset, 4, context);
|
|
2497
|
+
const value = view.getUint32(trailerOffset, true);
|
|
2498
|
+
trailerOffset += 4;
|
|
2499
|
+
return value;
|
|
2500
|
+
};
|
|
2501
|
+
const readTrailerFloat64 = (context) => {
|
|
2502
|
+
HNSWIndex.ensureReadable(buffer.byteLength, trailerOffset, 8, context);
|
|
2503
|
+
const value = view.getFloat64(trailerOffset, true);
|
|
2504
|
+
trailerOffset += 8;
|
|
2505
|
+
return value;
|
|
2506
|
+
};
|
|
2507
|
+
const flags = readTrailerUint32('v4 trailer flags');
|
|
2508
|
+
const hasQuant = (flags & 1) !== 0;
|
|
2509
|
+
const hasCalibration = (flags & 2) !== 0;
|
|
2510
|
+
if (hasQuant) {
|
|
2511
|
+
const quantBlobLength = readTrailerUint32('v4 quant blob length');
|
|
2512
|
+
HNSWIndex.ensureReadable(buffer.byteLength, trailerOffset, quantBlobLength, 'v4 quant blob');
|
|
2513
|
+
const blobStart = trailerOffset;
|
|
2514
|
+
const blobEnd = trailerOffset + quantBlobLength;
|
|
2515
|
+
trailerOffset = blobEnd;
|
|
2516
|
+
if (lazyLoad) {
|
|
2517
|
+
// Lazy loading skips restoring quantization: the float32 vectors are
|
|
2518
|
+
// not materialized, so we cannot re-quantize them into flatInt8Vectors.
|
|
2519
|
+
// Leave quantizationEnabled = false; quantized blob bytes are consumed
|
|
2520
|
+
// (offset advanced) but the quantizer is not reconstructed. Searches
|
|
2521
|
+
// fall back to the float32 path, which lazy-loads vectors on demand.
|
|
2522
|
+
}
|
|
2523
|
+
else {
|
|
2524
|
+
// slice() returns a fresh ArrayBuffer with byteOffset 0, which
|
|
2525
|
+
// ScalarQuantizer.deserialize() expects.
|
|
2526
|
+
const quantBlob = buffer.slice(blobStart, blobEnd);
|
|
2527
|
+
const quantizer = ScalarQuantizer.deserialize(quantBlob);
|
|
2528
|
+
index.installQuantizer(quantizer);
|
|
2529
|
+
}
|
|
2530
|
+
}
|
|
2531
|
+
if (hasCalibration) {
|
|
2532
|
+
const meanEntryDist = readTrailerFloat64('v4 calibration meanEntryDist');
|
|
2533
|
+
const stdEntryDist = readTrailerFloat64('v4 calibration stdEntryDist');
|
|
2534
|
+
// Calibration stats work for lazy loads too (no vectors needed).
|
|
2535
|
+
index.calibrationStats = { meanEntryDist, stdEntryDist };
|
|
2536
|
+
}
|
|
2537
|
+
// Bytes after the trailer (if any) are ignored.
|
|
2538
|
+
}
|
|
2371
2539
|
if (nodeCount > 0 && observedMaxLevel !== maxLevel) {
|
|
2372
2540
|
throw new VectorDBError(`Corrupt HNSW data: maxLevel ${maxLevel} does not match highest node level ${observedMaxLevel}`, 'CORRUPT_INDEX');
|
|
2373
2541
|
}
|
|
@@ -2403,10 +2571,7 @@ export class HNSWIndex {
|
|
|
2403
2571
|
if (vectorOffset === undefined)
|
|
2404
2572
|
return null;
|
|
2405
2573
|
const view = new DataView(this.vectorBuffer);
|
|
2406
|
-
const vector =
|
|
2407
|
-
for (let j = 0; j < this.dimension; j++) {
|
|
2408
|
-
vector[j] = view.getFloat32(vectorOffset + j * 4, true);
|
|
2409
|
-
}
|
|
2574
|
+
const vector = HNSWIndex.readVectorBytes(this.vectorBuffer, view, vectorOffset, this.dimension, nodeId);
|
|
2410
2575
|
// Update node with loaded vector
|
|
2411
2576
|
node.vector = vector;
|
|
2412
2577
|
this.vectorsLoaded.add(nodeId);
|
|
@@ -2482,10 +2647,27 @@ export class HNSWIndex {
|
|
|
2482
2647
|
// Reset entry point and level so reuse after destroy doesn't crash
|
|
2483
2648
|
this.entryPointId = -1;
|
|
2484
2649
|
this.maxLevel = -1;
|
|
2485
|
-
// Clear quantization state
|
|
2486
|
-
|
|
2650
|
+
// Clear quantization state, including the contiguous int8 buffer and the
|
|
2651
|
+
// reusable query buffer (these can be capacity*dimension bytes).
|
|
2487
2652
|
this.quantizationEnabled = false;
|
|
2488
2653
|
this.scalarQuantizer = null;
|
|
2654
|
+
this.flatInt8Vectors = null;
|
|
2655
|
+
this.flatInt8VectorsCapacity = 0;
|
|
2656
|
+
this.queryInt8Buffer = null;
|
|
2657
|
+
// Release shared-graph SABs (sharedGraphIndex/NeighborData/Metadata).
|
|
2658
|
+
this.clearSharedGraph();
|
|
2659
|
+
// Shrink visited tracking back to a small array.
|
|
2660
|
+
this.visitedArraySize = 1024;
|
|
2661
|
+
this.visitedArray = new Uint16Array(this.visitedArraySize);
|
|
2662
|
+
this.visitedGeneration = 1;
|
|
2663
|
+
// Replace grown search heaps with small-capacity instances.
|
|
2664
|
+
this.heapCapacity = 500;
|
|
2665
|
+
this.candidatesHeap = new BinaryHeap(this.heapCapacity);
|
|
2666
|
+
this.resultsHeap = new MaxBinaryHeap(this.heapCapacity);
|
|
2667
|
+
this.selectionHeap = new BinaryHeap(Math.max(this.M * 2, 1));
|
|
2668
|
+
// Drop calibration stats and construction-time neighbor sets.
|
|
2669
|
+
this.calibrationStats = null;
|
|
2670
|
+
this.neighborSets.clear();
|
|
2489
2671
|
// Clear lazy-load state
|
|
2490
2672
|
this.lazyLoadEnabled = false;
|
|
2491
2673
|
this.vectorOffsets.clear();
|
|
@@ -2549,6 +2731,15 @@ export class HNSWIndex {
|
|
|
2549
2731
|
}
|
|
2550
2732
|
return result;
|
|
2551
2733
|
}
|
|
2734
|
+
/**
|
|
2735
|
+
* Get a single vector by its numeric node id, or null if absent.
|
|
2736
|
+
* Returns a defensive copy so callers cannot mutate internal storage.
|
|
2737
|
+
* Keyed consistently with getAllVectors() and Collection's numeric ids.
|
|
2738
|
+
*/
|
|
2739
|
+
getVectorById(id) {
|
|
2740
|
+
const vector = this.getNodeVector(id);
|
|
2741
|
+
return vector ? new Float32Array(vector) : null;
|
|
2742
|
+
}
|
|
2552
2743
|
// ============================================
|
|
2553
2744
|
// Quantized Search (Int8 with automatic rescoring)
|
|
2554
2745
|
// ============================================
|
|
@@ -2587,12 +2778,24 @@ export class HNSWIndex {
|
|
|
2587
2778
|
vectors.push(vector);
|
|
2588
2779
|
}
|
|
2589
2780
|
// Initialize and train scalar (int8) quantizer
|
|
2590
|
-
|
|
2591
|
-
|
|
2592
|
-
//
|
|
2593
|
-
this.
|
|
2594
|
-
|
|
2781
|
+
const quantizer = new ScalarQuantizer(this.dimension);
|
|
2782
|
+
quantizer.train(vectors);
|
|
2783
|
+
// Build contiguous Int8 storage + reusable query buffer and enable quantization.
|
|
2784
|
+
this.installQuantizer(quantizer);
|
|
2785
|
+
}
|
|
2786
|
+
/**
|
|
2787
|
+
* Build quantized state from an already-trained ScalarQuantizer.
|
|
2788
|
+
*
|
|
2789
|
+
* Shared by enableQuantization() (which trains the quantizer first) and
|
|
2790
|
+
* deserialize() (which restores the quantizer from a serialized blob). Mirrors
|
|
2791
|
+
* enableQuantization() exactly EXCEPT training: allocates the contiguous
|
|
2792
|
+
* flatInt8Vectors buffer, quantizes every live node's float vector into it,
|
|
2793
|
+
* allocates the reusable queryInt8Buffer, and flips quantizationEnabled on.
|
|
2794
|
+
*/
|
|
2795
|
+
installQuantizer(quantizer) {
|
|
2595
2796
|
const dim = this.dimension;
|
|
2797
|
+
this.scalarQuantizer = quantizer;
|
|
2798
|
+
// Build contiguous Int8 storage for cache-friendly batch distance calculations
|
|
2596
2799
|
this.flatInt8VectorsCapacity = Math.max(this.nodeCount, this.flatVectorsCapacity);
|
|
2597
2800
|
this.flatInt8Vectors = HNSWIndex.allocateInt8(this.flatInt8VectorsCapacity * dim, this.useSharedMemory);
|
|
2598
2801
|
for (let i = 0; i < this.nodeCount; i++) {
|
|
@@ -2602,8 +2805,7 @@ export class HNSWIndex {
|
|
|
2602
2805
|
if (!vector) {
|
|
2603
2806
|
throw new VectorDBError(`Cannot quantize node ${node.id}: vector is unavailable`, 'CORRUPT_INDEX');
|
|
2604
2807
|
}
|
|
2605
|
-
|
|
2606
|
-
// Also store in contiguous buffer
|
|
2808
|
+
// Store in contiguous buffer
|
|
2607
2809
|
this.scalarQuantizer.quantizeInto(vector, this.flatInt8Vectors, node.id * dim);
|
|
2608
2810
|
}
|
|
2609
2811
|
}
|
|
@@ -2844,7 +3046,7 @@ export class HNSWIndex {
|
|
|
2844
3046
|
/**
|
|
2845
3047
|
* Search layer using Int8 quantized distances for speed.
|
|
2846
3048
|
* OPTIMIZED: Uses batch distance calculation on contiguous flatInt8Vectors.
|
|
2847
|
-
* Falls back to one-by-one
|
|
3049
|
+
* Falls back to one-by-one float32 distance if contiguous int8 storage is unavailable.
|
|
2848
3050
|
*/
|
|
2849
3051
|
searchLayerQuantized(query, nearest, layer, ef) {
|
|
2850
3052
|
// Quantize query once — reuse pre-allocated buffer when available
|
|
@@ -2889,20 +3091,6 @@ export class HNSWIndex {
|
|
|
2889
3091
|
entryDist = -sum;
|
|
2890
3092
|
}
|
|
2891
3093
|
}
|
|
2892
|
-
else if (int8Query) {
|
|
2893
|
-
const entryInt8 = this.int8Vectors[nearest.id];
|
|
2894
|
-
if (entryInt8) {
|
|
2895
|
-
if (this.metric === 'cosine') {
|
|
2896
|
-
entryDist = cosineDistanceInt8(int8Query, entryInt8);
|
|
2897
|
-
}
|
|
2898
|
-
else if (this.metric === 'dot_product') {
|
|
2899
|
-
entryDist = -dotProductInt8(int8Query, entryInt8);
|
|
2900
|
-
}
|
|
2901
|
-
else {
|
|
2902
|
-
entryDist = Math.sqrt(l2SquaredInt8(int8Query, entryInt8));
|
|
2903
|
-
}
|
|
2904
|
-
}
|
|
2905
|
-
}
|
|
2906
3094
|
this.candidatesHeap.push(nearest.id, entryDist);
|
|
2907
3095
|
this.resultsHeap.push(nearest.id, entryDist);
|
|
2908
3096
|
let furthestResultDist = entryDist;
|
|
@@ -2954,38 +3142,15 @@ export class HNSWIndex {
|
|
|
2954
3142
|
}
|
|
2955
3143
|
}
|
|
2956
3144
|
else {
|
|
2957
|
-
// Fallback: one-by-one (no contiguous storage
|
|
3145
|
+
// Fallback: one-by-one float32 (no contiguous int8 storage available)
|
|
2958
3146
|
for (const neighborId of neighbors) {
|
|
2959
3147
|
if (this.isVisited(neighborId))
|
|
2960
3148
|
continue;
|
|
2961
3149
|
this.markVisited(neighborId);
|
|
2962
|
-
|
|
2963
|
-
if (
|
|
2964
|
-
|
|
2965
|
-
|
|
2966
|
-
if (this.metric === 'cosine') {
|
|
2967
|
-
distance = cosineDistanceInt8(int8Query, neighborInt8);
|
|
2968
|
-
}
|
|
2969
|
-
else if (this.metric === 'dot_product') {
|
|
2970
|
-
distance = -dotProductInt8(int8Query, neighborInt8);
|
|
2971
|
-
}
|
|
2972
|
-
else {
|
|
2973
|
-
distance = Math.sqrt(l2SquaredInt8(int8Query, neighborInt8));
|
|
2974
|
-
}
|
|
2975
|
-
}
|
|
2976
|
-
else {
|
|
2977
|
-
const neighborVector = this.getNodeVector(neighborId);
|
|
2978
|
-
if (!neighborVector)
|
|
2979
|
-
continue;
|
|
2980
|
-
distance = this.calculateDistance(query, neighborVector);
|
|
2981
|
-
}
|
|
2982
|
-
}
|
|
2983
|
-
else {
|
|
2984
|
-
const neighborVector = this.getNodeVector(neighborId);
|
|
2985
|
-
if (!neighborVector)
|
|
2986
|
-
continue;
|
|
2987
|
-
distance = this.calculateDistance(query, neighborVector);
|
|
2988
|
-
}
|
|
3150
|
+
const neighborVector = this.getNodeVector(neighborId);
|
|
3151
|
+
if (!neighborVector)
|
|
3152
|
+
continue;
|
|
3153
|
+
const distance = this.calculateDistance(query, neighborVector);
|
|
2989
3154
|
if (this.resultsHeap.size() < ef || distance < furthestResultDist) {
|
|
2990
3155
|
this.candidatesHeap.push(neighborId, distance);
|
|
2991
3156
|
this.resultsHeap.push(neighborId, distance);
|