verso-db 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/HNSWIndex.js CHANGED
@@ -11,6 +11,7 @@ export class HNSWIndex {
11
11
  static MAX_NODE_ID = HNSWIndex.MISSING_ID_SENTINEL - 1;
12
12
  static MAX_ARRAY_BUFFER_BYTES = 0x7FFFFFFF;
13
13
  static MAX_NODE_SLOTS = 50_000_000;
14
+ static MAX_SERIALIZED_LEVEL = 31;
14
15
  /**
15
16
  * Allocate a Float32Array backed by SharedArrayBuffer when available.
16
17
  * This allows workers to read vector data without copying.
@@ -32,6 +33,12 @@ export class HNSWIndex {
32
33
  }
33
34
  /** Whether flatVectors/flatInt8Vectors use SharedArrayBuffer */
34
35
  useSharedMemory = false;
36
+ // Shared graph SAB state for zero-copy worker communication
37
+ sharedGraphIndex = null;
38
+ sharedGraphNeighborData = null;
39
+ sharedGraphMaxLayerSlots = 0;
40
+ sharedGraphWriteOffset = 0;
41
+ sharedMetadata = null;
35
42
  M; // Max number of connections per node per level
36
43
  M0; // Max number of connections for level 0 (typically M * 2)
37
44
  efConstruction; // Size of candidate list during construction
@@ -63,6 +70,7 @@ export class HNSWIndex {
63
70
  vectorsAreNormalized = false;
64
71
  // Cached distance function to avoid switch overhead
65
72
  distanceFn;
73
+ random;
66
74
  // Quantization support for 3-4x faster search with Int8
67
75
  scalarQuantizer = null;
68
76
  // OPTIMIZATION: Use array instead of Map for int8 vectors too
@@ -92,7 +100,7 @@ export class HNSWIndex {
92
100
  constructionMode = false;
93
101
  // Adaptive efSearch calibration stats
94
102
  calibrationStats = null;
95
- constructor(dimension, metric = 'cosine', M = 24, efConstruction = 200) {
103
+ constructor(dimension, metric = 'cosine', M = 24, efConstruction = 200, random = Math.random) {
96
104
  if (dimension <= 0 || !Number.isInteger(dimension)) {
97
105
  throw new VectorDBError(`Invalid dimension: must be a positive integer, got ${dimension}`, 'VALIDATION_ERROR');
98
106
  }
@@ -102,8 +110,12 @@ export class HNSWIndex {
102
110
  if (efConstruction <= 0 || !Number.isInteger(efConstruction)) {
103
111
  throw new VectorDBError(`Invalid efConstruction parameter: must be a positive integer, got ${efConstruction}`, 'VALIDATION_ERROR');
104
112
  }
113
+ if (typeof random !== 'function') {
114
+ throw new VectorDBError('Invalid random source: must be a function returning a number in [0, 1)', 'VALIDATION_ERROR');
115
+ }
105
116
  this.dimension = dimension;
106
117
  this.metric = metric;
118
+ this.random = random;
107
119
  this.M = M;
108
120
  this.M0 = M * 2;
109
121
  this.efConstruction = efConstruction;
@@ -125,6 +137,9 @@ export class HNSWIndex {
125
137
  this.visitedArraySize = 10000;
126
138
  this.visitedArray = new Uint16Array(this.visitedArraySize);
127
139
  this.visitedGeneration = 1;
140
+ // Pre-allocate batch distance buffers sized for max neighbors at layer 0
141
+ this.batchNeighborIds = new Uint32Array(this.M0 + 1);
142
+ this.batchDistances = new Float64Array(this.M0 + 1);
128
143
  // Pre-allocate searchLayer heaps - sized for typical ef values
129
144
  // Will be resized if needed for larger ef
130
145
  this.heapCapacity = Math.max(efConstruction * 2, 500);
@@ -223,9 +238,10 @@ export class HNSWIndex {
223
238
  this.nextAutoId = id + 1;
224
239
  }
225
240
  }
226
- // OPTIMIZATION: Reusable arrays for batch distance calculation
227
- batchNeighborIds = [];
228
- batchDistances = [];
241
+ // OPTIMIZATION: Pre-allocated typed arrays for batch distance calculation
242
+ // Sized to M0+1 (max neighbors at layer 0) to eliminate dynamic resizing
243
+ batchNeighborIds;
244
+ batchDistances;
229
245
  /**
230
246
  * OPTIMIZATION: Batch distance calculation for better cache locality
231
247
  * Computes distances from query to multiple neighbors at once
@@ -427,8 +443,18 @@ export class HNSWIndex {
427
443
  normalizeInPlace(vector);
428
444
  return vector;
429
445
  }
446
+ randomFloat() {
447
+ const value = this.random();
448
+ if (!Number.isFinite(value))
449
+ return 0;
450
+ if (value <= 0)
451
+ return 0;
452
+ if (value >= 1)
453
+ return 1 - Number.EPSILON;
454
+ return value;
455
+ }
430
456
  selectLevel() {
431
- const r = Math.random() || Number.MIN_VALUE;
457
+ const r = this.randomFloat() || Number.MIN_VALUE;
432
458
  const level = Math.floor(-Math.log(r) * this.levelMult);
433
459
  return Math.max(0, Math.min(level, this.maxLayers - 1));
434
460
  }
@@ -667,10 +693,6 @@ export class HNSWIndex {
667
693
  }
668
694
  // Calculate all distances at once (better cache utilization)
669
695
  if (batchCount > 0) {
670
- // Ensure batch arrays are large enough
671
- if (batchDists.length < batchCount) {
672
- this.batchDistances.length = batchCount;
673
- }
674
696
  this.calculateDistancesBatch(query, batchIds, batchDists, batchCount);
675
697
  // Process batch results
676
698
  for (let i = 0; i < batchCount; i++) {
@@ -774,23 +796,10 @@ export class HNSWIndex {
774
796
  if (this.nodes[id]) {
775
797
  throw new VectorDBError(`Duplicate node ID ${id}: node already exists`, 'DUPLICATE_VECTOR');
776
798
  }
777
- // Optimize: only copy when necessary
778
- // - Always copy arrays (need Float32Array)
779
- // - Copy Float32Array only if we need to normalize (modifies in place)
780
- // - Reuse input directly if skipNormalization is set (caller guarantees immutability)
781
- let floatVector;
782
- if (Array.isArray(vector)) {
783
- floatVector = new Float32Array(vector);
784
- }
785
- else if (this.vectorsAreNormalized && !options?.skipNormalization) {
786
- // Need to copy because normalizeVector modifies in place
787
- floatVector = new Float32Array(vector);
788
- }
789
- else {
790
- // No normalization needed and input is Float32Array - use directly
791
- // Note: caller should not modify this array after passing it
792
- floatVector = vector;
793
- }
799
+ // Always take ownership of vector data. Reusing caller-owned
800
+ // Float32Arrays lets later external mutation diverge node.vector from the
801
+ // flat vector store and corrupt search behavior.
802
+ let floatVector = new Float32Array(vector);
794
803
  if (floatVector.length !== this.dimension) {
795
804
  throw new VectorDBError(`Vector dimension ${floatVector.length} does not match expected ${this.dimension}`, 'DIMENSION_MISMATCH');
796
805
  }
@@ -1115,7 +1124,7 @@ export class HNSWIndex {
1115
1124
  const sampleIndices = [];
1116
1125
  const usedIndices = new Set();
1117
1126
  while (sampleIndices.length < sampleSize) {
1118
- const idx = Math.floor(Math.random() * n);
1127
+ const idx = Math.floor(this.randomFloat() * n);
1119
1128
  if (!usedIndices.has(idx)) {
1120
1129
  usedIndices.add(idx);
1121
1130
  sampleIndices.push(idx);
@@ -1297,12 +1306,11 @@ export class HNSWIndex {
1297
1306
  }
1298
1307
  // Pre-normalize all vectors if needed
1299
1308
  const normalizedPoints = points.map(p => {
1309
+ const vector = new Float32Array(p.vector);
1300
1310
  if (this.vectorsAreNormalized && !skipNorm) {
1301
- const v = new Float32Array(p.vector);
1302
- normalizeInPlace(v);
1303
- return { id: p.id, vector: v };
1311
+ normalizeInPlace(vector);
1304
1312
  }
1305
- return p;
1313
+ return { id: p.id, vector };
1306
1314
  });
1307
1315
  // Phase 1: Sequential seed insertion
1308
1316
  const seedSize = Math.min(Math.max(500, Math.floor(normalizedPoints.length * seedFraction)), normalizedPoints.length);
@@ -1400,17 +1408,27 @@ export class HNSWIndex {
1400
1408
  }
1401
1409
  batchCount++;
1402
1410
  // Send incremental graph update: ALL modified nodes (new + existing with new connections)
1403
- const graphUpdate = [];
1404
- for (const nid of modifiedNodeIds) {
1405
- const node = this.nodes[nid];
1406
- if (node) {
1407
- graphUpdate.push({ id: nid, neighbors: node.neighbors });
1411
+ if (this.hasSharedGraph()) {
1412
+ // Shared graph: write directly to SABs, workers see updates via shared memory
1413
+ this.updateSharedGraphNodes(modifiedNodeIds);
1414
+ this.updateSharedMetadata();
1415
+ }
1416
+ else {
1417
+ // Legacy: send graph data via postMessage
1418
+ const graphUpdate = [];
1419
+ for (const nid of modifiedNodeIds) {
1420
+ const node = this.nodes[nid];
1421
+ if (node) {
1422
+ graphUpdate.push({ id: nid, neighbors: node.neighbors });
1423
+ }
1408
1424
  }
1425
+ pool.broadcastGraphUpdate(graphUpdate, this.entryPointId, this.maxLevel);
1409
1426
  }
1410
- pool.broadcastGraphUpdate(graphUpdate, this.entryPointId, this.maxLevel);
1411
1427
  // Full re-sync periodically for accumulated neighbor changes
1412
- // (bidirectional connections modify existing nodes' neighbor lists)
1428
+ // With shared graph, this defragments dead space in neighbor data.
1429
+ // Without shared graph, this fixes accumulated drift from incremental updates.
1413
1430
  if (batchCount % resyncInterval === 0 && start + batchSize < normalizedPoints.length) {
1431
+ this.clearSharedGraph();
1414
1432
  pool.destroy();
1415
1433
  await pool.init(this);
1416
1434
  }
@@ -1572,7 +1590,6 @@ export class HNSWIndex {
1572
1590
  getSharedSearchData() {
1573
1591
  if (this.nodeCount === 0 || this.entryPointId === -1)
1574
1592
  return null;
1575
- const graphData = this.serializeGraphStructure();
1576
1593
  const nodeLevels = new Uint8Array(this.nodeCount);
1577
1594
  for (let i = 0; i < this.nodeCount; i++) {
1578
1595
  const node = this.nodes[i];
@@ -1606,7 +1623,7 @@ export class HNSWIndex {
1606
1623
  quantizationParams = this.scalarQuantizer.getParams();
1607
1624
  }
1608
1625
  }
1609
- return {
1626
+ const baseData = {
1610
1627
  flatVectors,
1611
1628
  flatInt8Vectors,
1612
1629
  dimension: this.dimension,
@@ -1616,14 +1633,154 @@ export class HNSWIndex {
1616
1633
  maxLevel: this.maxLevel,
1617
1634
  M: this.M,
1618
1635
  M0: this.M0,
1619
- graphData,
1620
1636
  nodeLevels,
1621
1637
  quantizationEnabled: this.quantizationEnabled,
1622
1638
  quantizationParams,
1623
1639
  };
1640
+ // Use shared graph SABs when SharedArrayBuffer is available
1641
+ if (this.useSharedMemory && typeof SharedArrayBuffer !== 'undefined') {
1642
+ const sharedGraph = this.serializeGraphToSharedBuffers();
1643
+ return {
1644
+ ...baseData,
1645
+ graphNeighborData: sharedGraph.graphNeighborData,
1646
+ graphIndex: sharedGraph.graphIndex,
1647
+ maxLayerSlots: sharedGraph.maxLayerSlots,
1648
+ sharedMetadata: sharedGraph.sharedMetadata,
1649
+ };
1650
+ }
1651
+ // Fallback: legacy serialized graph
1652
+ return {
1653
+ ...baseData,
1654
+ graphData: this.serializeGraphStructure(),
1655
+ };
1656
+ }
1657
+ /**
1658
+ * Serialize graph into SAB-backed flat typed arrays for zero-copy worker sharing.
1659
+ * Layout:
1660
+ * graphIndex[(nodeId * maxLayerSlots + layer) * 2] = offset into graphNeighborData
1661
+ * graphIndex[(nodeId * maxLayerSlots + layer) * 2 + 1] = neighbor count
1662
+ * graphNeighborData[offset..offset+count] = neighbor IDs
1663
+ *
1664
+ * Pre-allocates extra capacity for growth during parallel build.
1665
+ */
1666
+ serializeGraphToSharedBuffers() {
1667
+ const maxLayerSlots = Math.max(this.maxLevel + 4, 8);
1668
+ // Count total neighbors for sizing
1669
+ let totalNeighbors = 0;
1670
+ for (let i = 0; i < this.nodeCount; i++) {
1671
+ const node = this.nodes[i];
1672
+ if (!node)
1673
+ continue;
1674
+ for (const neighbors of node.neighbors) {
1675
+ totalNeighbors += neighbors?.length ?? 0;
1676
+ }
1677
+ }
1678
+ // Pre-allocate with capacity for ALL nodes (not just current ones).
1679
+ // During parallel build, serializeGraphToSharedBuffers is called after the seed phase
1680
+ // (e.g. 500 nodes), but the full index may grow to flatVectorsCapacity nodes.
1681
+ // Each node has up to M0 layer-0 neighbors + M upper-layer neighbors.
1682
+ // The append-only write pattern creates dead space when nodes are rewritten,
1683
+ // so we use 3x the estimated maximum to accommodate waste.
1684
+ const nodeCapacity = Math.max(this.nodeCount, this.flatVectorsCapacity);
1685
+ const estimatedMaxNeighbors = nodeCapacity * (this.M0 + this.M);
1686
+ const neighborCapacity = Math.max(Math.ceil(totalNeighbors * 3), totalNeighbors + 10000, estimatedMaxNeighbors * 3);
1687
+ const indexSize = nodeCapacity * maxLayerSlots * 2;
1688
+ const graphIndex = new Uint32Array(new SharedArrayBuffer(indexSize * 4));
1689
+ const graphNeighborData = new Uint32Array(new SharedArrayBuffer(neighborCapacity * 4));
1690
+ // Shared metadata: [nodeCount, entryPointId, maxLevel]
1691
+ const sharedMetadata = new Uint32Array(new SharedArrayBuffer(3 * 4));
1692
+ sharedMetadata[0] = this.nodeCount;
1693
+ sharedMetadata[1] = this.entryPointId;
1694
+ sharedMetadata[2] = this.maxLevel;
1695
+ // Serialize current graph
1696
+ let writeOffset = 0;
1697
+ for (let nodeId = 0; nodeId < this.nodeCount; nodeId++) {
1698
+ const node = this.nodes[nodeId];
1699
+ if (!node)
1700
+ continue;
1701
+ for (let l = 0; l < node.neighbors.length && l < maxLayerSlots; l++) {
1702
+ const neighbors = node.neighbors[l] ?? [];
1703
+ const base = (nodeId * maxLayerSlots + l) * 2;
1704
+ graphIndex[base] = writeOffset;
1705
+ graphIndex[base + 1] = neighbors.length;
1706
+ for (let n = 0; n < neighbors.length; n++) {
1707
+ graphNeighborData[writeOffset + n] = neighbors[n];
1708
+ }
1709
+ writeOffset += neighbors.length;
1710
+ }
1711
+ }
1712
+ // Store references for incremental updates during parallel build
1713
+ this.sharedGraphIndex = graphIndex;
1714
+ this.sharedGraphNeighborData = graphNeighborData;
1715
+ this.sharedGraphMaxLayerSlots = maxLayerSlots;
1716
+ this.sharedGraphWriteOffset = writeOffset;
1717
+ this.sharedMetadata = sharedMetadata;
1718
+ return { graphNeighborData, graphIndex, maxLayerSlots, sharedMetadata };
1719
+ }
1720
+ /**
1721
+ * Update shared graph SABs for specific nodes.
1722
+ * Called during parallel build after each batch to sync graph changes.
1723
+ * Workers see updates immediately via shared memory — no postMessage needed.
1724
+ */
1725
+ updateSharedGraphNodes(nodeIds) {
1726
+ if (!this.sharedGraphIndex || !this.sharedGraphNeighborData)
1727
+ return;
1728
+ const maxLayerSlots = this.sharedGraphMaxLayerSlots;
1729
+ let writeOffset = this.sharedGraphWriteOffset;
1730
+ for (const nodeId of nodeIds) {
1731
+ const node = this.nodes[nodeId];
1732
+ if (!node)
1733
+ continue;
1734
+ for (let l = 0; l < node.neighbors.length && l < maxLayerSlots; l++) {
1735
+ const neighbors = node.neighbors[l] ?? [];
1736
+ const base = (nodeId * maxLayerSlots + l) * 2;
1737
+ // If capacity exhausted, clear shared graph to force fallback to
1738
+ // broadcastGraphUpdate. Workers keep their current SAB data (stale but valid).
1739
+ // The periodic resync in addPointsBulkParallel will create fresh SABs.
1740
+ if (writeOffset + neighbors.length > this.sharedGraphNeighborData.length) {
1741
+ this.sharedGraphWriteOffset = writeOffset;
1742
+ this.clearSharedGraph();
1743
+ return;
1744
+ }
1745
+ this.sharedGraphIndex[base] = writeOffset;
1746
+ this.sharedGraphIndex[base + 1] = neighbors.length;
1747
+ for (let n = 0; n < neighbors.length; n++) {
1748
+ this.sharedGraphNeighborData[writeOffset + n] = neighbors[n];
1749
+ }
1750
+ writeOffset += neighbors.length;
1751
+ }
1752
+ }
1753
+ this.sharedGraphWriteOffset = writeOffset;
1754
+ }
1755
+ /**
1756
+ * Update shared metadata SAB with current index state.
1757
+ * Workers read these values during search.
1758
+ */
1759
+ updateSharedMetadata() {
1760
+ if (!this.sharedMetadata)
1761
+ return;
1762
+ this.sharedMetadata[0] = this.nodeCount;
1763
+ this.sharedMetadata[1] = this.entryPointId;
1764
+ this.sharedMetadata[2] = this.maxLevel;
1765
+ }
1766
+ /**
1767
+ * Check if shared graph SABs are active (for parallel build optimization).
1768
+ */
1769
+ hasSharedGraph() {
1770
+ return this.sharedGraphIndex !== null;
1771
+ }
1772
+ /**
1773
+ * Clear shared graph references (called when pool is destroyed/re-initialized).
1774
+ */
1775
+ clearSharedGraph() {
1776
+ this.sharedGraphIndex = null;
1777
+ this.sharedGraphNeighborData = null;
1778
+ this.sharedMetadata = null;
1779
+ this.sharedGraphWriteOffset = 0;
1624
1780
  }
1625
1781
  /**
1626
1782
  * Serialize graph structure (neighbor lists) into a compact ArrayBuffer.
1783
+ * Legacy format for non-SAB fallback.
1627
1784
  * Format per node: [numLayers:uint8] [numNeighbors:uint16, neighborId:uint32...] per layer
1628
1785
  */
1629
1786
  serializeGraphStructure() {
@@ -1959,6 +2116,18 @@ export class HNSWIndex {
1959
2116
  const entryPointId = readInt32('entryPointId');
1960
2117
  const nodeCount = readUint32('nodeCount');
1961
2118
  const vectorByteLength = dimension * 4;
2119
+ if (nodeCount > HNSWIndex.MAX_NODE_SLOTS) {
2120
+ throw new VectorDBError(`Corrupt HNSW data: node count ${nodeCount} exceeds maximum supported ${HNSWIndex.MAX_NODE_SLOTS}`, 'CORRUPT_INDEX');
2121
+ }
2122
+ if (maxLevel < -1 || maxLevel > HNSWIndex.MAX_SERIALIZED_LEVEL) {
2123
+ throw new VectorDBError(`Corrupt HNSW data: invalid maxLevel ${maxLevel}`, 'CORRUPT_INDEX');
2124
+ }
2125
+ if (nodeCount === 0 && (entryPointId !== -1 || maxLevel !== -1)) {
2126
+ throw new VectorDBError('Corrupt HNSW data: empty index has non-empty entry point metadata', 'CORRUPT_INDEX');
2127
+ }
2128
+ if (nodeCount > 0 && (entryPointId < 0 || maxLevel < 0)) {
2129
+ throw new VectorDBError('Corrupt HNSW data: populated index has missing entry point metadata', 'CORRUPT_INDEX');
2130
+ }
1962
2131
  // V3+ has vectorDataOffset in header
1963
2132
  let vectorDataOffset = 0;
1964
2133
  if (formatVersion >= 3) {
@@ -1971,6 +2140,41 @@ export class HNSWIndex {
1971
2140
  index.maxLevel = maxLevel;
1972
2141
  index.entryPointId = entryPointId;
1973
2142
  const indexToId = new Array(nodeCount);
2143
+ const seenNodeIds = new Set();
2144
+ let observedMaxLevel = -1;
2145
+ const validateSerializedNode = (id, level, nodeIndex) => {
2146
+ if (id > HNSWIndex.MAX_NODE_ID) {
2147
+ throw new VectorDBError(`Corrupt HNSW data: node ${nodeIndex} has reserved or unsupported ID ${id}`, 'CORRUPT_INDEX');
2148
+ }
2149
+ if (level > HNSWIndex.MAX_SERIALIZED_LEVEL || level > maxLevel) {
2150
+ throw new VectorDBError(`Corrupt HNSW data: node ${nodeIndex} has invalid level ${level}`, 'CORRUPT_INDEX');
2151
+ }
2152
+ if (seenNodeIds.has(id)) {
2153
+ throw new VectorDBError(`Corrupt HNSW data: duplicate node ID ${id}`, 'CORRUPT_INDEX');
2154
+ }
2155
+ seenNodeIds.add(id);
2156
+ if (level > observedMaxLevel)
2157
+ observedMaxLevel = level;
2158
+ };
2159
+ const validateNeighborList = (nodeId, layer, neighbors) => {
2160
+ const maxConnections = layer === 0 ? M * 2 : M;
2161
+ if (neighbors.length > maxConnections) {
2162
+ throw new VectorDBError(`Corrupt HNSW data: node ${nodeId} layer ${layer} has ${neighbors.length} neighbors, maximum is ${maxConnections}`, 'CORRUPT_INDEX');
2163
+ }
2164
+ const seenNeighbors = new Set();
2165
+ for (const neighborId of neighbors) {
2166
+ if (!seenNodeIds.has(neighborId)) {
2167
+ throw new VectorDBError(`Corrupt HNSW data: node ${nodeId} references missing neighbor ${neighborId}`, 'CORRUPT_INDEX');
2168
+ }
2169
+ if (neighborId === nodeId) {
2170
+ throw new VectorDBError(`Corrupt HNSW data: node ${nodeId} references itself as a neighbor`, 'CORRUPT_INDEX');
2171
+ }
2172
+ if (seenNeighbors.has(neighborId)) {
2173
+ throw new VectorDBError(`Corrupt HNSW data: node ${nodeId} has duplicate neighbor ${neighborId}`, 'CORRUPT_INDEX');
2174
+ }
2175
+ seenNeighbors.add(neighborId);
2176
+ }
2177
+ };
1974
2178
  if (formatVersion >= 3) {
1975
2179
  // V3 format: vectors at end, supports lazy loading
1976
2180
  const nodeMetadata = [];
@@ -1979,6 +2183,7 @@ export class HNSWIndex {
1979
2183
  for (let i = 0; i < nodeCount; i++) {
1980
2184
  const id = readUint32(`node ${i} id`);
1981
2185
  const level = readUint32(`node ${i} level`);
2186
+ validateSerializedNode(id, level, i);
1982
2187
  indexToId[i] = id;
1983
2188
  nodeMetadata.push({ id, level });
1984
2189
  }
@@ -2017,6 +2222,7 @@ export class HNSWIndex {
2017
2222
  }
2018
2223
  return indexToId[idx];
2019
2224
  });
2225
+ validateNeighborList(nodeMetadata[i].id, l, neighbors[l]);
2020
2226
  }
2021
2227
  nodeNeighbors.push(neighbors);
2022
2228
  }
@@ -2081,6 +2287,7 @@ export class HNSWIndex {
2081
2287
  for (let i = 0; i < nodeCount; i++) {
2082
2288
  const id = readUint32(`node ${i} id`);
2083
2289
  const level = readUint32(`node ${i} level`);
2290
+ validateSerializedNode(id, level, i);
2084
2291
  indexToId[i] = id;
2085
2292
  const vector = new Float32Array(dimension);
2086
2293
  for (let j = 0; j < dimension; j++) {
@@ -2120,6 +2327,7 @@ export class HNSWIndex {
2120
2327
  }
2121
2328
  return indexToId[idx];
2122
2329
  });
2330
+ validateNeighborList(id, l, neighbors[l]);
2123
2331
  }
2124
2332
  const node = { id, level, vector, neighbors };
2125
2333
  index.setNode(node);
@@ -2130,6 +2338,7 @@ export class HNSWIndex {
2130
2338
  for (let i = 0; i < nodeCount; i++) {
2131
2339
  const id = readUint32(`node ${i} id`);
2132
2340
  const level = readUint32(`node ${i} level`);
2341
+ validateSerializedNode(id, level, i);
2133
2342
  indexToId[i] = id;
2134
2343
  const vector = new Float32Array(dimension);
2135
2344
  for (let j = 0; j < dimension; j++) {
@@ -2155,9 +2364,20 @@ export class HNSWIndex {
2155
2364
  }
2156
2365
  node.neighbors[l][j] = indexToId[neighborIndex];
2157
2366
  }
2367
+ validateNeighborList(node.id, l, node.neighbors[l]);
2158
2368
  }
2159
2369
  }
2160
2370
  }
2371
+ if (nodeCount > 0 && observedMaxLevel !== maxLevel) {
2372
+ throw new VectorDBError(`Corrupt HNSW data: maxLevel ${maxLevel} does not match highest node level ${observedMaxLevel}`, 'CORRUPT_INDEX');
2373
+ }
2374
+ if (nodeCount > 0 && !seenNodeIds.has(entryPointId)) {
2375
+ throw new VectorDBError(`Corrupt HNSW data: entry point ${entryPointId} is not present in node table`, 'CORRUPT_INDEX');
2376
+ }
2377
+ const entryPoint = nodeCount > 0 ? index.nodes[entryPointId] : undefined;
2378
+ if (entryPoint && entryPoint.level !== maxLevel) {
2379
+ throw new VectorDBError(`Corrupt HNSW data: entry point level ${entryPoint.level} does not match maxLevel ${maxLevel}`, 'CORRUPT_INDEX');
2380
+ }
2161
2381
  return index;
2162
2382
  }
2163
2383
  catch (error) {
@@ -2324,7 +2544,7 @@ export class HNSWIndex {
2324
2544
  continue;
2325
2545
  const vector = this.getNodeVector(node.id);
2326
2546
  if (vector) {
2327
- result.set(node.id, vector);
2547
+ result.set(node.id, new Float32Array(vector));
2328
2548
  }
2329
2549
  }
2330
2550
  return result;
@@ -2718,9 +2938,6 @@ export class HNSWIndex {
2718
2938
  }
2719
2939
  }
2720
2940
  if (batchCount > 0) {
2721
- if (batchDists.length < batchCount) {
2722
- this.batchDistances.length = batchCount;
2723
- }
2724
2941
  this.calculateDistancesBatchInt8(int8Query, batchIds, batchDists, batchCount);
2725
2942
  for (let i = 0; i < batchCount; i++) {
2726
2943
  const neighborId = batchIds[i];