verso-db 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/HNSWIndex.js CHANGED
@@ -2,7 +2,7 @@
2
2
  import { dotProductFast, l2SquaredFast, normalizeInPlace } from './backends/JsDistanceBackend.js';
3
3
  import { BinaryHeap } from './BinaryHeap.js';
4
4
  import { MaxBinaryHeap } from './MaxBinaryHeap.js';
5
- import { ScalarQuantizer, l2SquaredInt8, cosineDistanceInt8, dotProductInt8 } from './quantization/ScalarQuantizer.js';
5
+ import { ScalarQuantizer } from './quantization/ScalarQuantizer.js';
6
6
  import { deltaEncodeNeighbors, deltaDecodeNeighbors } from './encoding/DeltaEncoder.js';
7
7
  import { VectorDBError } from './errors.js';
8
8
  export class HNSWIndex {
@@ -73,8 +73,6 @@ export class HNSWIndex {
73
73
  random;
74
74
  // Quantization support for 3-4x faster search with Int8
75
75
  scalarQuantizer = null;
76
- // OPTIMIZATION: Use array instead of Map for int8 vectors too
77
- int8Vectors = [];
78
76
  quantizationEnabled = false;
79
77
  // OPTIMIZATION: Contiguous Int8 storage for cache-friendly batch distance calculations
80
78
  // Layout: [v0_d0...v0_dn, v1_d0...v1_dn, ...], offset: nodeId * dim
@@ -831,8 +829,7 @@ export class HNSWIndex {
831
829
  // Keep quantized representation in sync for indexes that were
832
830
  // quantized before new inserts.
833
831
  if (this.quantizationEnabled && this.scalarQuantizer) {
834
- this.int8Vectors[id] = this.scalarQuantizer.quantize(floatVector);
835
- // Also update contiguous Int8 storage
832
+ // Update contiguous Int8 storage
836
833
  if (this.flatInt8Vectors) {
837
834
  this.scalarQuantizer.quantizeInto(floatVector, this.flatInt8Vectors, id * this.dimension);
838
835
  }
@@ -1120,21 +1117,36 @@ export class HNSWIndex {
1120
1117
  const dim = this.dimension;
1121
1118
  const sampleSize = Math.min(Math.ceil(Math.sqrt(n)), 2000);
1122
1119
  const seedCount = Math.min(Math.ceil(Math.sqrt(n)), 500);
1123
- // Random sample indices
1120
+ // Random sample indices. Bound the random-draw attempts so a degenerate RNG
1121
+ // (e.g. a constant function injected via the constructor) can't spin forever;
1122
+ // any shortfall is filled deterministically by scanning unused indices.
1124
1123
  const sampleIndices = [];
1125
1124
  const usedIndices = new Set();
1126
- while (sampleIndices.length < sampleSize) {
1125
+ const maxAttempts = sampleSize * 20;
1126
+ let attempts = 0;
1127
+ while (sampleIndices.length < sampleSize && attempts < maxAttempts) {
1128
+ attempts++;
1127
1129
  const idx = Math.floor(this.randomFloat() * n);
1128
1130
  if (!usedIndices.has(idx)) {
1129
1131
  usedIndices.add(idx);
1130
1132
  sampleIndices.push(idx);
1131
1133
  }
1132
1134
  }
1135
+ if (sampleIndices.length < sampleSize) {
1136
+ for (let i = 0; i < n && sampleIndices.length < sampleSize; i++) {
1137
+ if (!usedIndices.has(i)) {
1138
+ usedIndices.add(i);
1139
+ sampleIndices.push(i);
1140
+ }
1141
+ }
1142
+ }
1133
1143
  // Farthest-point sampling on the sample
1134
1144
  // Start with a random point from the sample
1135
1145
  const seedSamplePositions = [0]; // positions within sampleIndices
1136
1146
  const minDists = new Float64Array(sampleSize);
1137
1147
  minDists.fill(Infinity);
1148
+ // Seed 0 is already selected — mark it so the greedy loop doesn't re-pick it.
1149
+ minDists[0] = -1;
1138
1150
  // Compute initial distances from first seed
1139
1151
  const firstVec = points[sampleIndices[0]].vector;
1140
1152
  for (let i = 1; i < sampleSize; i++) {
@@ -1359,7 +1371,6 @@ export class HNSWIndex {
1359
1371
  this.setNode(newNode);
1360
1372
  // Keep quantized representation in sync
1361
1373
  if (this.quantizationEnabled && this.scalarQuantizer) {
1362
- this.int8Vectors[id] = this.scalarQuantizer.quantize(vector);
1363
1374
  if (this.flatInt8Vectors) {
1364
1375
  this.scalarQuantizer.quantizeInto(vector, this.flatInt8Vectors, id * this.dimension);
1365
1376
  }
@@ -1530,7 +1541,7 @@ export class HNSWIndex {
1530
1541
  }
1531
1542
  // Format version constants
1532
1543
  static MAGIC = 0x484E5357; // "HNSW" in ASCII (big-endian: 0x48='H', 0x4E='N', 0x53='S', 0x57='W')
1533
- static FORMAT_VERSION = 3; // v3: vector offset index for lazy loading
1544
+ static FORMAT_VERSION = 4; // v4: appended trailer with quantization params + calibration stats (graph/vector sections byte-identical to v3)
1534
1545
  static HEADER_SIZE = 40; // 4 (magic) + 4 (version) + 28 (existing header) + 4 (vectorDataOffset)
1535
1546
  static ensureReadable(bufferLength, offset, bytes, context) {
1536
1547
  if (offset + bytes > bufferLength) {
@@ -1538,6 +1549,33 @@ export class HNSWIndex {
1538
1549
  throw new VectorDBError(`Corrupt HNSW data: truncated ${context} (need ${bytes} bytes, only ${available} available)`, 'CORRUPT_INDEX');
1539
1550
  }
1540
1551
  }
1552
+ /**
1553
+ * Read a dimension-length float32 vector from a serialized buffer at the given
1554
+ * absolute byte offset. Callers must have already validated that
1555
+ * [absoluteOffset, absoluteOffset + dimension*4) fits within the buffer.
1556
+ *
1557
+ * Fast path: when the offset is 4-byte aligned we construct a Float32Array view
1558
+ * directly over the buffer and bulk-copy. Otherwise we fall back to a DataView
1559
+ * loop. Either way every component is validated for finiteness in a single pass.
1560
+ */
1561
+ static readVectorBytes(buffer, view, absoluteOffset, dimension, nodeIndex) {
1562
+ HNSWIndex.ensureReadable(buffer.byteLength, absoluteOffset, dimension * 4, `node ${nodeIndex} vector payload`);
1563
+ const vector = new Float32Array(dimension);
1564
+ if (absoluteOffset % 4 === 0) {
1565
+ vector.set(new Float32Array(buffer, absoluteOffset, dimension));
1566
+ }
1567
+ else {
1568
+ for (let j = 0; j < dimension; j++) {
1569
+ vector[j] = view.getFloat32(absoluteOffset + j * 4, true);
1570
+ }
1571
+ }
1572
+ for (let j = 0; j < dimension; j++) {
1573
+ if (!Number.isFinite(vector[j])) {
1574
+ throw new VectorDBError(`Corrupt HNSW data: node ${nodeIndex} vector component ${j} is non-finite (${vector[j]})`, 'CORRUPT_INDEX');
1575
+ }
1576
+ }
1577
+ return vector;
1578
+ }
1541
1579
  static wrapDeserializeError(error) {
1542
1580
  if (error instanceof VectorDBError) {
1543
1581
  return error;
@@ -1675,16 +1713,31 @@ export class HNSWIndex {
1675
1713
  totalNeighbors += neighbors?.length ?? 0;
1676
1714
  }
1677
1715
  }
1678
- // Pre-allocate with capacity for ALL nodes (not just current ones).
1679
- // During parallel build, serializeGraphToSharedBuffers is called after the seed phase
1680
- // (e.g. 500 nodes), but the full index may grow to flatVectorsCapacity nodes.
1681
- // Each node has up to M0 layer-0 neighbors + M upper-layer neighbors.
1682
- // The append-only write pattern creates dead space when nodes are rewritten,
1683
- // so we use 3x the estimated maximum to accommodate waste.
1684
- const nodeCapacity = Math.max(this.nodeCount, this.flatVectorsCapacity);
1685
- const estimatedMaxNeighbors = nodeCapacity * (this.M0 + this.M);
1686
- const neighborCapacity = Math.max(Math.ceil(totalNeighbors * 3), totalNeighbors + 10000, estimatedMaxNeighbors * 3);
1687
- const indexSize = nodeCapacity * maxLayerSlots * 2;
1716
+ // Capacity sizing depends on whether this is a parallel build or a read-only pool.
1717
+ //
1718
+ // Parallel build (constructionMode === true): the graph grows after init and the
1719
+ // append-only update pattern (updateSharedGraphNodes) rewrites nodes, creating dead
1720
+ // space. We pre-allocate for ALL nodes (up to flatVectorsCapacity) at 3x the estimated
1721
+ // maximum so incremental updates have room. addPointsBulkParallel sets constructionMode
1722
+ // = true BEFORE pool.init() (which triggers this serialization), so this path is taken
1723
+ // during parallel build.
1724
+ //
1725
+ // Read-only pool (constructionMode === false): the graph is finished and immutable from
1726
+ // the workers' perspective. There are no incremental rewrites, so we size tightly to the
1727
+ // current node count. This avoids allocating ~864MB SABs for a finished 1M-node index.
1728
+ let neighborCapacity;
1729
+ let indexSize;
1730
+ if (this.constructionMode) {
1731
+ const nodeCapacity = Math.max(this.nodeCount, this.flatVectorsCapacity);
1732
+ const estimatedMaxNeighbors = nodeCapacity * (this.M0 + this.M);
1733
+ neighborCapacity = Math.max(Math.ceil(totalNeighbors * 3), totalNeighbors + 10000, estimatedMaxNeighbors * 3);
1734
+ indexSize = nodeCapacity * maxLayerSlots * 2;
1735
+ }
1736
+ else {
1737
+ neighborCapacity = Math.max(totalNeighbors + 1024, 1024);
1738
+ // Index area must still cover every node's layer slots.
1739
+ indexSize = this.nodeCount * maxLayerSlots * 2;
1740
+ }
1688
1741
  const graphIndex = new Uint32Array(new SharedArrayBuffer(indexSize * 4));
1689
1742
  const graphNeighborData = new Uint32Array(new SharedArrayBuffer(neighborCapacity * 4));
1690
1743
  // Shared metadata: [nodeCount, entryPointId, maxLevel]
@@ -1742,11 +1795,14 @@ export class HNSWIndex {
1742
1795
  this.clearSharedGraph();
1743
1796
  return;
1744
1797
  }
1745
- this.sharedGraphIndex[base] = writeOffset;
1746
- this.sharedGraphIndex[base + 1] = neighbors.length;
1798
+ // Write the neighbor payload FIRST, then publish the index entry (offset +
1799
+ // count). A concurrent worker that reads the index entry will then always
1800
+ // see fully-written neighbor data, never not-yet-written zeros.
1747
1801
  for (let n = 0; n < neighbors.length; n++) {
1748
1802
  this.sharedGraphNeighborData[writeOffset + n] = neighbors[n];
1749
1803
  }
1804
+ this.sharedGraphIndex[base] = writeOffset;
1805
+ this.sharedGraphIndex[base + 1] = neighbors.length;
1750
1806
  writeOffset += neighbors.length;
1751
1807
  }
1752
1808
  }
@@ -1832,6 +1888,20 @@ export class HNSWIndex {
1832
1888
  if (this.nodeCount === 0 || this.entryPointId === -1) {
1833
1889
  return new Map();
1834
1890
  }
1891
+ // Reorder rebuilds all flat storage from node.vector, which holds zeros for
1892
+ // unloaded lazy vectors. Materialize every vector first, then drop lazy state
1893
+ // so the rebuilt structures and remapped IDs are self-consistent.
1894
+ if (this.lazyLoadEnabled) {
1895
+ for (let i = 0; i < this.nodeCount; i++) {
1896
+ if (this.nodes[i]) {
1897
+ this.loadVector(i);
1898
+ }
1899
+ }
1900
+ this.lazyLoadEnabled = false;
1901
+ this.vectorBuffer = null;
1902
+ this.vectorOffsets.clear();
1903
+ this.vectorsLoaded.clear();
1904
+ }
1835
1905
  const dim = this.dimension;
1836
1906
  const oldToNew = new Map();
1837
1907
  const visited = new Set();
@@ -1840,8 +1910,9 @@ export class HNSWIndex {
1840
1910
  let newId = 0;
1841
1911
  queue.push(this.entryPointId);
1842
1912
  visited.add(this.entryPointId);
1843
- while (queue.length > 0) {
1844
- const oldId = queue.shift();
1913
+ let head = 0;
1914
+ while (head < queue.length) {
1915
+ const oldId = queue[head++];
1845
1916
  oldToNew.set(oldId, newId++);
1846
1917
  const node = this.nodes[oldId];
1847
1918
  if (!node)
@@ -1868,10 +1939,8 @@ export class HNSWIndex {
1868
1939
  const newNodes = new Array(newId);
1869
1940
  const newFlatVectors = HNSWIndex.allocateFloat32(Math.max(newId, this.flatVectorsCapacity) * dim, this.useSharedMemory);
1870
1941
  let newFlatInt8Vectors = null;
1871
- let newInt8Vectors = [];
1872
1942
  if (this.quantizationEnabled && this.flatInt8Vectors) {
1873
1943
  newFlatInt8Vectors = HNSWIndex.allocateInt8(Math.max(newId, this.flatInt8VectorsCapacity) * dim, this.useSharedMemory);
1874
- newInt8Vectors = new Array(newId);
1875
1944
  }
1876
1945
  // Copy nodes with remapped IDs and neighbor lists
1877
1946
  for (const [oldId, nid] of oldToNew) {
@@ -1899,9 +1968,6 @@ export class HNSWIndex {
1899
1968
  newFlatInt8Vectors[newOffset + d] = this.flatInt8Vectors[oldOffset + d];
1900
1969
  }
1901
1970
  }
1902
- if (this.int8Vectors[oldId]) {
1903
- newInt8Vectors[nid] = this.int8Vectors[oldId];
1904
- }
1905
1971
  // Create new node with vector from flat storage
1906
1972
  const vec = new Float32Array(dim);
1907
1973
  vec.set(newFlatVectors.subarray(newOffset, newOffset + dim));
@@ -1922,7 +1988,6 @@ export class HNSWIndex {
1922
1988
  this.flatInt8Vectors = newFlatInt8Vectors;
1923
1989
  this.flatInt8VectorsCapacity = Math.max(newId, this.flatInt8VectorsCapacity);
1924
1990
  }
1925
- this.int8Vectors = newInt8Vectors;
1926
1991
  this.nodeCount = newId;
1927
1992
  this.nextAutoId = newId;
1928
1993
  // Reset visited array for new ID space
@@ -1975,7 +2040,22 @@ export class HNSWIndex {
1975
2040
  graphSize += nodeCount * 4; // vector offset table
1976
2041
  const vectorDataOffset = graphSize;
1977
2042
  const vectorDataSize = nodeCount * this.dimension * 4;
1978
- const totalSize = graphSize + vectorDataSize;
2043
+ // v4 trailer (appended after the vector section). Graph + vector sections are
2044
+ // byte-identical to v3; v4 always writes at least the flags uint32 so a trailer
2045
+ // is guaranteed to exist. We do NOT store int8 vector bytes — they are rebuilt
2046
+ // by re-quantizing float32 vectors on load.
2047
+ const writeQuantParams = this.quantizationEnabled && this.scalarQuantizer !== null;
2048
+ const writeCalibration = this.calibrationStats !== null;
2049
+ let quantBlob = null;
2050
+ let trailerSize = 4; // flags uint32
2051
+ if (writeQuantParams) {
2052
+ quantBlob = this.scalarQuantizer.serialize();
2053
+ trailerSize += 4 + quantBlob.byteLength; // length prefix + blob
2054
+ }
2055
+ if (writeCalibration) {
2056
+ trailerSize += 16; // meanEntryDist (f64) + stdEntryDist (f64)
2057
+ }
2058
+ const totalSize = graphSize + vectorDataSize + trailerSize;
1979
2059
  const buffer = new ArrayBuffer(totalSize);
1980
2060
  const view = new DataView(buffer);
1981
2061
  const uint8Array = new Uint8Array(buffer);
@@ -2034,14 +2114,50 @@ export class HNSWIndex {
2034
2114
  view.setUint32(offset, i * this.dimension * 4, true); // Relative offset
2035
2115
  offset += 4;
2036
2116
  }
2037
- // Write vectors at end (for lazy loading capability)
2117
+ // Write vectors at end (for lazy loading capability).
2118
+ // When this index is itself lazy-loaded, unloaded nodes still hold a
2119
+ // placeholder zero vector in node.vector, so read their bytes directly from
2120
+ // the backing buffer instead of materializing every vector into memory.
2121
+ const sourceView = this.lazyLoadEnabled && this.vectorBuffer ? new DataView(this.vectorBuffer) : null;
2038
2122
  for (let i = 0; i < nodesArray.length; i++) {
2039
2123
  const node = nodesArray[i];
2040
- for (let j = 0; j < this.dimension; j++) {
2041
- view.setFloat32(offset, node.vector[j], true);
2042
- offset += 4;
2124
+ if (this.lazyLoadEnabled && !this.vectorsLoaded.has(node.id)) {
2125
+ const srcOffset = this.vectorOffsets.get(node.id);
2126
+ if (sourceView === null || srcOffset === undefined) {
2127
+ throw new VectorDBError(`Corrupt HNSW data: lazy node ${node.id} has no backing vector buffer to serialize`, 'CORRUPT_INDEX');
2128
+ }
2129
+ for (let j = 0; j < this.dimension; j++) {
2130
+ view.setFloat32(offset, sourceView.getFloat32(srcOffset + j * 4, true), true);
2131
+ offset += 4;
2132
+ }
2133
+ }
2134
+ else {
2135
+ for (let j = 0; j < this.dimension; j++) {
2136
+ view.setFloat32(offset, node.vector[j], true);
2137
+ offset += 4;
2138
+ }
2043
2139
  }
2044
2140
  }
2141
+ // Write v4 trailer. bit 0 = quantization params present, bit 1 = calibration stats present.
2142
+ let flags = 0;
2143
+ if (writeQuantParams)
2144
+ flags |= 1;
2145
+ if (writeCalibration)
2146
+ flags |= 2;
2147
+ view.setUint32(offset, flags, true);
2148
+ offset += 4;
2149
+ if (writeQuantParams && quantBlob !== null) {
2150
+ view.setUint32(offset, quantBlob.byteLength, true);
2151
+ offset += 4;
2152
+ uint8Array.set(new Uint8Array(quantBlob), offset);
2153
+ offset += quantBlob.byteLength;
2154
+ }
2155
+ if (writeCalibration && this.calibrationStats !== null) {
2156
+ view.setFloat64(offset, this.calibrationStats.meanEntryDist, true);
2157
+ offset += 8;
2158
+ view.setFloat64(offset, this.calibrationStats.stdEntryDist, true);
2159
+ offset += 8;
2160
+ }
2045
2161
  return buffer;
2046
2162
  }
2047
2163
  /**
@@ -2079,6 +2195,9 @@ export class HNSWIndex {
2079
2195
  ensureReadable(4, context);
2080
2196
  const value = view.getFloat32(offset, true);
2081
2197
  offset += 4;
2198
+ if (!Number.isFinite(value)) {
2199
+ throw new VectorDBError(`Corrupt HNSW data: ${context} is non-finite (${value})`, 'CORRUPT_INDEX');
2200
+ }
2082
2201
  return value;
2083
2202
  };
2084
2203
  // Check for magic header (new format v1+)
@@ -2265,15 +2384,14 @@ export class HNSWIndex {
2265
2384
  }
2266
2385
  }
2267
2386
  else {
2268
- // Eager loading: load all vectors now
2387
+ // Eager loading: load all vectors now.
2388
+ // The offset table validation above already guarantees each vector's
2389
+ // full [offset, offset + dimension*4) range fits within the buffer, so
2390
+ // we bulk-copy instead of bounds-checking each component.
2269
2391
  for (let i = 0; i < nodeCount; i++) {
2270
2392
  const { id, level } = nodeMetadata[i];
2271
2393
  const vectorOffset = index.vectorOffsets.get(id);
2272
- const vector = new Float32Array(dimension);
2273
- for (let j = 0; j < dimension; j++) {
2274
- HNSWIndex.ensureReadable(buffer.byteLength, vectorOffset + j * 4, 4, `node ${i} vector component ${j}`);
2275
- vector[j] = view.getFloat32(vectorOffset + j * 4, true);
2276
- }
2394
+ const vector = HNSWIndex.readVectorBytes(buffer, view, vectorOffset, dimension, i);
2277
2395
  const node = { id, level, vector, neighbors: nodeNeighbors[i] };
2278
2396
  index.setNode(node);
2279
2397
  index.vectorsLoaded.add(id);
@@ -2368,6 +2486,56 @@ export class HNSWIndex {
2368
2486
  }
2369
2487
  }
2370
2488
  }
2489
+ // v4 trailer: appended after the vector section. v0-v3 buffers have no
2490
+ // trailer, so this is skipped entirely for them (their parsing above is
2491
+ // byte-for-byte unchanged). The vector section runs from vectorDataOffset
2492
+ // for nodeCount * dimension * 4 bytes; the trailer starts immediately after.
2493
+ if (formatVersion >= 4) {
2494
+ let trailerOffset = vectorDataOffset + nodeCount * dimension * 4;
2495
+ const readTrailerUint32 = (context) => {
2496
+ HNSWIndex.ensureReadable(buffer.byteLength, trailerOffset, 4, context);
2497
+ const value = view.getUint32(trailerOffset, true);
2498
+ trailerOffset += 4;
2499
+ return value;
2500
+ };
2501
+ const readTrailerFloat64 = (context) => {
2502
+ HNSWIndex.ensureReadable(buffer.byteLength, trailerOffset, 8, context);
2503
+ const value = view.getFloat64(trailerOffset, true);
2504
+ trailerOffset += 8;
2505
+ return value;
2506
+ };
2507
+ const flags = readTrailerUint32('v4 trailer flags');
2508
+ const hasQuant = (flags & 1) !== 0;
2509
+ const hasCalibration = (flags & 2) !== 0;
2510
+ if (hasQuant) {
2511
+ const quantBlobLength = readTrailerUint32('v4 quant blob length');
2512
+ HNSWIndex.ensureReadable(buffer.byteLength, trailerOffset, quantBlobLength, 'v4 quant blob');
2513
+ const blobStart = trailerOffset;
2514
+ const blobEnd = trailerOffset + quantBlobLength;
2515
+ trailerOffset = blobEnd;
2516
+ if (lazyLoad) {
2517
+ // Lazy loading skips restoring quantization: the float32 vectors are
2518
+ // not materialized, so we cannot re-quantize them into flatInt8Vectors.
2519
+ // Leave quantizationEnabled = false; quantized blob bytes are consumed
2520
+ // (offset advanced) but the quantizer is not reconstructed. Searches
2521
+ // fall back to the float32 path, which lazy-loads vectors on demand.
2522
+ }
2523
+ else {
2524
+ // slice() returns a fresh ArrayBuffer with byteOffset 0, which
2525
+ // ScalarQuantizer.deserialize() expects.
2526
+ const quantBlob = buffer.slice(blobStart, blobEnd);
2527
+ const quantizer = ScalarQuantizer.deserialize(quantBlob);
2528
+ index.installQuantizer(quantizer);
2529
+ }
2530
+ }
2531
+ if (hasCalibration) {
2532
+ const meanEntryDist = readTrailerFloat64('v4 calibration meanEntryDist');
2533
+ const stdEntryDist = readTrailerFloat64('v4 calibration stdEntryDist');
2534
+ // Calibration stats work for lazy loads too (no vectors needed).
2535
+ index.calibrationStats = { meanEntryDist, stdEntryDist };
2536
+ }
2537
+ // Bytes after the trailer (if any) are ignored.
2538
+ }
2371
2539
  if (nodeCount > 0 && observedMaxLevel !== maxLevel) {
2372
2540
  throw new VectorDBError(`Corrupt HNSW data: maxLevel ${maxLevel} does not match highest node level ${observedMaxLevel}`, 'CORRUPT_INDEX');
2373
2541
  }
@@ -2403,10 +2571,7 @@ export class HNSWIndex {
2403
2571
  if (vectorOffset === undefined)
2404
2572
  return null;
2405
2573
  const view = new DataView(this.vectorBuffer);
2406
- const vector = new Float32Array(this.dimension);
2407
- for (let j = 0; j < this.dimension; j++) {
2408
- vector[j] = view.getFloat32(vectorOffset + j * 4, true);
2409
- }
2574
+ const vector = HNSWIndex.readVectorBytes(this.vectorBuffer, view, vectorOffset, this.dimension, nodeId);
2410
2575
  // Update node with loaded vector
2411
2576
  node.vector = vector;
2412
2577
  this.vectorsLoaded.add(nodeId);
@@ -2482,10 +2647,27 @@ export class HNSWIndex {
2482
2647
  // Reset entry point and level so reuse after destroy doesn't crash
2483
2648
  this.entryPointId = -1;
2484
2649
  this.maxLevel = -1;
2485
- // Clear quantization state
2486
- this.int8Vectors = [];
2650
+ // Clear quantization state, including the contiguous int8 buffer and the
2651
+ // reusable query buffer (these can be capacity*dimension bytes).
2487
2652
  this.quantizationEnabled = false;
2488
2653
  this.scalarQuantizer = null;
2654
+ this.flatInt8Vectors = null;
2655
+ this.flatInt8VectorsCapacity = 0;
2656
+ this.queryInt8Buffer = null;
2657
+ // Release shared-graph SABs (sharedGraphIndex/NeighborData/Metadata).
2658
+ this.clearSharedGraph();
2659
+ // Shrink visited tracking back to a small array.
2660
+ this.visitedArraySize = 1024;
2661
+ this.visitedArray = new Uint16Array(this.visitedArraySize);
2662
+ this.visitedGeneration = 1;
2663
+ // Replace grown search heaps with small-capacity instances.
2664
+ this.heapCapacity = 500;
2665
+ this.candidatesHeap = new BinaryHeap(this.heapCapacity);
2666
+ this.resultsHeap = new MaxBinaryHeap(this.heapCapacity);
2667
+ this.selectionHeap = new BinaryHeap(Math.max(this.M * 2, 1));
2668
+ // Drop calibration stats and construction-time neighbor sets.
2669
+ this.calibrationStats = null;
2670
+ this.neighborSets.clear();
2489
2671
  // Clear lazy-load state
2490
2672
  this.lazyLoadEnabled = false;
2491
2673
  this.vectorOffsets.clear();
@@ -2549,6 +2731,15 @@ export class HNSWIndex {
2549
2731
  }
2550
2732
  return result;
2551
2733
  }
2734
+ /**
2735
+ * Get a single vector by its numeric node id, or null if absent.
2736
+ * Returns a defensive copy so callers cannot mutate internal storage.
2737
+ * Keyed consistently with getAllVectors() and Collection's numeric ids.
2738
+ */
2739
+ getVectorById(id) {
2740
+ const vector = this.getNodeVector(id);
2741
+ return vector ? new Float32Array(vector) : null;
2742
+ }
2552
2743
  // ============================================
2553
2744
  // Quantized Search (Int8 with automatic rescoring)
2554
2745
  // ============================================
@@ -2587,12 +2778,24 @@ export class HNSWIndex {
2587
2778
  vectors.push(vector);
2588
2779
  }
2589
2780
  // Initialize and train scalar (int8) quantizer
2590
- this.scalarQuantizer = new ScalarQuantizer(this.dimension);
2591
- this.scalarQuantizer.train(vectors);
2592
- // Quantize all existing vectors - use array instead of Map
2593
- this.int8Vectors = new Array(this.nodeCount);
2594
- // Also build contiguous Int8 storage for cache-friendly batch distance calculations
2781
+ const quantizer = new ScalarQuantizer(this.dimension);
2782
+ quantizer.train(vectors);
2783
+ // Build contiguous Int8 storage + reusable query buffer and enable quantization.
2784
+ this.installQuantizer(quantizer);
2785
+ }
2786
+ /**
2787
+ * Build quantized state from an already-trained ScalarQuantizer.
2788
+ *
2789
+ * Shared by enableQuantization() (which trains the quantizer first) and
2790
+ * deserialize() (which restores the quantizer from a serialized blob). Mirrors
2791
+ * enableQuantization() exactly EXCEPT training: allocates the contiguous
2792
+ * flatInt8Vectors buffer, quantizes every live node's float vector into it,
2793
+ * allocates the reusable queryInt8Buffer, and flips quantizationEnabled on.
2794
+ */
2795
+ installQuantizer(quantizer) {
2595
2796
  const dim = this.dimension;
2797
+ this.scalarQuantizer = quantizer;
2798
+ // Build contiguous Int8 storage for cache-friendly batch distance calculations
2596
2799
  this.flatInt8VectorsCapacity = Math.max(this.nodeCount, this.flatVectorsCapacity);
2597
2800
  this.flatInt8Vectors = HNSWIndex.allocateInt8(this.flatInt8VectorsCapacity * dim, this.useSharedMemory);
2598
2801
  for (let i = 0; i < this.nodeCount; i++) {
@@ -2602,8 +2805,7 @@ export class HNSWIndex {
2602
2805
  if (!vector) {
2603
2806
  throw new VectorDBError(`Cannot quantize node ${node.id}: vector is unavailable`, 'CORRUPT_INDEX');
2604
2807
  }
2605
- this.int8Vectors[node.id] = this.scalarQuantizer.quantize(vector);
2606
- // Also store in contiguous buffer
2808
+ // Store in contiguous buffer
2607
2809
  this.scalarQuantizer.quantizeInto(vector, this.flatInt8Vectors, node.id * dim);
2608
2810
  }
2609
2811
  }
@@ -2844,7 +3046,7 @@ export class HNSWIndex {
2844
3046
  /**
2845
3047
  * Search layer using Int8 quantized distances for speed.
2846
3048
  * OPTIMIZED: Uses batch distance calculation on contiguous flatInt8Vectors.
2847
- * Falls back to one-by-one with int8Vectors[] if contiguous storage unavailable.
3049
+ * Falls back to one-by-one float32 distance if contiguous int8 storage is unavailable.
2848
3050
  */
2849
3051
  searchLayerQuantized(query, nearest, layer, ef) {
2850
3052
  // Quantize query once — reuse pre-allocated buffer when available
@@ -2889,20 +3091,6 @@ export class HNSWIndex {
2889
3091
  entryDist = -sum;
2890
3092
  }
2891
3093
  }
2892
- else if (int8Query) {
2893
- const entryInt8 = this.int8Vectors[nearest.id];
2894
- if (entryInt8) {
2895
- if (this.metric === 'cosine') {
2896
- entryDist = cosineDistanceInt8(int8Query, entryInt8);
2897
- }
2898
- else if (this.metric === 'dot_product') {
2899
- entryDist = -dotProductInt8(int8Query, entryInt8);
2900
- }
2901
- else {
2902
- entryDist = Math.sqrt(l2SquaredInt8(int8Query, entryInt8));
2903
- }
2904
- }
2905
- }
2906
3094
  this.candidatesHeap.push(nearest.id, entryDist);
2907
3095
  this.resultsHeap.push(nearest.id, entryDist);
2908
3096
  let furthestResultDist = entryDist;
@@ -2954,38 +3142,15 @@ export class HNSWIndex {
2954
3142
  }
2955
3143
  }
2956
3144
  else {
2957
- // Fallback: one-by-one (no contiguous storage or no quantizer)
3145
+ // Fallback: one-by-one float32 (no contiguous int8 storage available)
2958
3146
  for (const neighborId of neighbors) {
2959
3147
  if (this.isVisited(neighborId))
2960
3148
  continue;
2961
3149
  this.markVisited(neighborId);
2962
- let distance;
2963
- if (int8Query) {
2964
- const neighborInt8 = this.int8Vectors[neighborId];
2965
- if (neighborInt8) {
2966
- if (this.metric === 'cosine') {
2967
- distance = cosineDistanceInt8(int8Query, neighborInt8);
2968
- }
2969
- else if (this.metric === 'dot_product') {
2970
- distance = -dotProductInt8(int8Query, neighborInt8);
2971
- }
2972
- else {
2973
- distance = Math.sqrt(l2SquaredInt8(int8Query, neighborInt8));
2974
- }
2975
- }
2976
- else {
2977
- const neighborVector = this.getNodeVector(neighborId);
2978
- if (!neighborVector)
2979
- continue;
2980
- distance = this.calculateDistance(query, neighborVector);
2981
- }
2982
- }
2983
- else {
2984
- const neighborVector = this.getNodeVector(neighborId);
2985
- if (!neighborVector)
2986
- continue;
2987
- distance = this.calculateDistance(query, neighborVector);
2988
- }
3150
+ const neighborVector = this.getNodeVector(neighborId);
3151
+ if (!neighborVector)
3152
+ continue;
3153
+ const distance = this.calculateDistance(query, neighborVector);
2989
3154
  if (this.resultsHeap.size() < ef || distance < furthestResultDist) {
2990
3155
  this.candidatesHeap.push(neighborId, distance);
2991
3156
  this.resultsHeap.push(neighborId, distance);