faiss 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/faiss/version.rb +1 -1
  4. data/vendor/faiss/faiss/Index.h +1 -1
  5. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +6 -7
  6. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +3 -3
  7. data/vendor/faiss/faiss/IndexHNSW.cpp +173 -143
  8. data/vendor/faiss/faiss/IndexIVF.cpp +2 -2
  9. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
  10. data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -1
  11. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +3 -3
  12. data/vendor/faiss/faiss/IndexIVFPQ.cpp +2 -3
  13. data/vendor/faiss/faiss/IndexIVFPQR.cpp +2 -3
  14. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +4 -13
  15. data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
  16. data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
  17. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +68 -6
  18. data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -0
  19. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +1 -1
  20. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +902 -12
  21. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +702 -10
  22. data/vendor/faiss/faiss/factory_tools.cpp +4 -0
  23. data/vendor/faiss/faiss/gpu/GpuResources.h +3 -2
  24. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +11 -12
  25. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +3 -3
  26. data/vendor/faiss/faiss/gpu_metal/MetalDistance.h +87 -0
  27. data/vendor/faiss/faiss/gpu_metal/MetalIndex.h +7 -0
  28. data/vendor/faiss/faiss/gpu_metal/MetalIndexIVFFlat.h +181 -0
  29. data/vendor/faiss/faiss/gpu_metal/MetalKernels.h +48 -3
  30. data/vendor/faiss/faiss/gpu_metal/MetalPythonBridge.h +45 -0
  31. data/vendor/faiss/faiss/gpu_metal/impl/MetalIVFFlat.h +193 -0
  32. data/vendor/faiss/faiss/impl/HNSW.cpp +556 -199
  33. data/vendor/faiss/faiss/impl/HNSW.h +51 -13
  34. data/vendor/faiss/faiss/impl/NSG.cpp +15 -11
  35. data/vendor/faiss/faiss/impl/Panorama.h +11 -0
  36. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -2
  37. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +1 -1
  38. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +7 -1
  39. data/vendor/faiss/faiss/impl/ResultHandler.h +1 -0
  40. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +271 -8
  41. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +50 -0
  42. data/vendor/faiss/faiss/impl/VisitedTable.cpp +10 -10
  43. data/vendor/faiss/faiss/impl/VisitedTable.h +69 -34
  44. data/vendor/faiss/faiss/impl/fast_scan/dispatching.h +3 -1
  45. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.cpp +35 -43
  46. data/vendor/faiss/faiss/impl/hnsw/MinimaxHeap.h +64 -15
  47. data/vendor/faiss/faiss/impl/hnsw/avx2.cpp +86 -40
  48. data/vendor/faiss/faiss/impl/hnsw/avx512.cpp +81 -50
  49. data/vendor/faiss/faiss/impl/index_read.cpp +100 -39
  50. data/vendor/faiss/faiss/impl/index_write.cpp +1 -0
  51. data/vendor/faiss/faiss/impl/io_macros.h +25 -0
  52. data/vendor/faiss/faiss/impl/platform_macros.h +12 -8
  53. data/vendor/faiss/faiss/impl/pq_code_distance/avx2.cpp +2 -0
  54. data/vendor/faiss/faiss/impl/pq_code_distance/avx512.cpp +2 -0
  55. data/vendor/faiss/faiss/impl/pq_code_distance/neon.cpp +2 -0
  56. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +20 -0
  57. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +36 -0
  58. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-sve.cpp +5 -0
  59. data/vendor/faiss/faiss/impl/pq_code_distance/pq_scan_impl.h +105 -0
  60. data/vendor/faiss/faiss/impl/pq_code_distance/rvv.cpp +2 -0
  61. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +6 -0
  62. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +327 -18
  63. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +264 -27
  64. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-impl.h +553 -0
  65. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512-spr.cpp +559 -0
  66. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +199 -27
  67. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +366 -3
  68. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +144 -19
  69. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-rvv.cpp +26 -0
  70. data/vendor/faiss/faiss/impl/simd_dispatch.h +65 -8
  71. data/vendor/faiss/faiss/index_factory.cpp +5 -1
  72. data/vendor/faiss/faiss/index_io.h +16 -0
  73. data/vendor/faiss/faiss/invlists/DirectMap.cpp +4 -1
  74. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +13 -13
  75. data/vendor/faiss/faiss/invlists/InvertedLists.h +2 -2
  76. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +119 -22
  77. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +15 -5
  78. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +3 -2
  79. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +2 -1
  80. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +65 -24
  81. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +3 -2
  82. data/vendor/faiss/faiss/utils/bf16.h +34 -0
  83. data/vendor/faiss/faiss/utils/distances_simd.cpp +0 -1
  84. data/vendor/faiss/faiss/utils/hamming.cpp +8 -8
  85. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx2.cpp +2 -1
  86. data/vendor/faiss/faiss/utils/hamming_distance/hamming_avx512_spr.cpp +15 -0
  87. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512.h +6 -30
  88. data/vendor/faiss/faiss/utils/hamming_distance/hamming_computer-avx512_spr.h +171 -0
  89. data/vendor/faiss/faiss/utils/partitioning.cpp +0 -2
  90. data/vendor/faiss/faiss/utils/simd_impl/partitioning_simdlib256.h +14 -68
  91. data/vendor/faiss/faiss/utils/simd_impl/rabitq_avx512_spr.cpp +343 -0
  92. data/vendor/faiss/faiss/utils/simd_levels.cpp +12 -2
  93. metadata +12 -2
@@ -16,96 +16,127 @@
16
16
 
17
17
  namespace faiss {
18
18
 
19
- template <>
20
- int MinimaxHeap::pop_min_tpl<SIMDLevel::AVX512>(float* vmin_out) {
21
- assert(k > 0);
19
+ namespace {
20
+
21
+ /// Templated AVX512 implementation of "pop best" for both CMax (returns
22
+ /// the smallest distance) and CMin (returns the largest similarity).
23
+ template <class HC>
24
+ int pop_best_avx512(MinimaxHeapT<HC>& heap, float* vmin_out) {
25
+ using storage_idx_t = typename MinimaxHeapT<HC>::storage_idx_t;
22
26
  static_assert(
23
27
  std::is_same<storage_idx_t, int32_t>::value,
24
28
  "This code expects storage_idx_t to be int32_t");
29
+ assert(heap.k > 0);
25
30
 
26
- int32_t min_idx = -1;
27
- float min_dis = std::numeric_limits<float>::infinity();
31
+ constexpr float worst_v = HC::is_max
32
+ ? std::numeric_limits<float>::infinity()
33
+ : -std::numeric_limits<float>::infinity();
28
34
 
29
- __m512i min_indices = _mm512_set1_epi32(-1);
30
- __m512 min_distances =
31
- _mm512_set1_ps(std::numeric_limits<float>::infinity());
35
+ int32_t best_idx = -1;
36
+ float best_dis = worst_v;
37
+
38
+ __m512i best_indices = _mm512_set1_epi32(-1);
39
+ __m512 best_distances = _mm512_set1_ps(worst_v);
32
40
  __m512i current_indices = _mm512_setr_epi32(
33
41
  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
34
42
  __m512i offset = _mm512_set1_epi32(16);
35
43
 
36
- // The following loop tracks the rightmost index with the min distance.
37
- // -1 index values are ignored.
38
- const size_t k16 = (k / 16) * 16;
44
+ auto best_vs_cand_mask = [](__m512 best_d, __m512 cand_d) -> __mmask16 {
45
+ // Returns the mask of lanes where the current best is already
46
+ // (strictly) better than the candidate.
47
+ if constexpr (HC::is_max) {
48
+ return _mm512_cmp_ps_mask(best_d, cand_d, _CMP_LT_OS);
49
+ } else {
50
+ return _mm512_cmp_ps_mask(best_d, cand_d, _CMP_GT_OS);
51
+ }
52
+ };
53
+
54
+ const size_t k16 = (heap.k / 16) * 16;
39
55
  for (size_t iii = 0; iii < k16; iii += 16) {
40
56
  __m512i indices =
41
- _mm512_loadu_si512((const __m512i*)(ids.data() + iii));
42
- __m512 distances = _mm512_loadu_ps(dis.data() + iii);
57
+ _mm512_loadu_si512((const __m512i*)(heap.ids.data() + iii));
58
+ __m512 distances = _mm512_loadu_ps(heap.dis.data() + iii);
43
59
 
44
- // This mask filters out -1 values among indices.
45
60
  __mmask16 m1mask =
46
61
  _mm512_cmpgt_epi32_mask(_mm512_setzero_si512(), indices);
47
-
48
- __mmask16 dmask =
49
- _mm512_cmp_ps_mask(min_distances, distances, _CMP_LT_OS);
62
+ __mmask16 dmask = best_vs_cand_mask(best_distances, distances);
50
63
  __mmask16 finalmask = m1mask | dmask;
51
64
 
52
- const __m512i min_indices_new = _mm512_mask_blend_epi32(
53
- finalmask, current_indices, min_indices);
54
- const __m512 min_distances_new =
55
- _mm512_mask_blend_ps(finalmask, distances, min_distances);
65
+ const __m512i best_indices_new = _mm512_mask_blend_epi32(
66
+ finalmask, current_indices, best_indices);
67
+ const __m512 best_distances_new =
68
+ _mm512_mask_blend_ps(finalmask, distances, best_distances);
56
69
 
57
- min_indices = min_indices_new;
58
- min_distances = min_distances_new;
70
+ best_indices = best_indices_new;
71
+ best_distances = best_distances_new;
59
72
 
60
73
  current_indices = _mm512_add_epi32(current_indices, offset);
61
74
  }
62
75
 
63
- // leftovers
64
- if (k16 != static_cast<size_t>(k)) {
65
- const __mmask16 kmask = (1 << (k - k16)) - 1;
76
+ // Leftovers.
77
+ if (k16 != static_cast<size_t>(heap.k)) {
78
+ const __mmask16 kmask = (1 << (heap.k - k16)) - 1;
66
79
 
67
80
  __m512i indices = _mm512_mask_loadu_epi32(
68
- _mm512_set1_epi32(-1), kmask, ids.data() + k16);
69
- __m512 distances = _mm512_maskz_loadu_ps(kmask, dis.data() + k16);
81
+ _mm512_set1_epi32(-1), kmask, heap.ids.data() + k16);
82
+ __m512 distances = _mm512_maskz_loadu_ps(kmask, heap.dis.data() + k16);
70
83
 
71
- // This mask filters out -1 values among indices.
72
84
  __mmask16 m1mask =
73
85
  _mm512_cmpgt_epi32_mask(_mm512_setzero_si512(), indices);
74
-
75
- __mmask16 dmask =
76
- _mm512_cmp_ps_mask(min_distances, distances, _CMP_LT_OS);
86
+ __mmask16 dmask = best_vs_cand_mask(best_distances, distances);
77
87
  __mmask16 finalmask = m1mask | dmask;
78
88
 
79
- const __m512i min_indices_new = _mm512_mask_blend_epi32(
80
- finalmask, current_indices, min_indices);
81
- const __m512 min_distances_new =
82
- _mm512_mask_blend_ps(finalmask, distances, min_distances);
89
+ const __m512i best_indices_new = _mm512_mask_blend_epi32(
90
+ finalmask, current_indices, best_indices);
91
+ const __m512 best_distances_new =
92
+ _mm512_mask_blend_ps(finalmask, distances, best_distances);
83
93
 
84
- min_indices = min_indices_new;
85
- min_distances = min_distances_new;
94
+ best_indices = best_indices_new;
95
+ best_distances = best_distances_new;
86
96
  }
87
97
 
88
- // grab min distance
89
- min_dis = _mm512_reduce_min_ps(min_distances);
90
- // blend
91
- __mmask16 mindmask =
92
- _mm512_cmpeq_ps_mask(min_distances, _mm512_set1_ps(min_dis));
93
- // pick the max one
94
- min_idx = _mm512_mask_reduce_max_epi32(mindmask, min_indices);
98
+ // Horizontal best: min for CMax (distance), max for CMin (similarity).
99
+ if constexpr (HC::is_max) {
100
+ best_dis = _mm512_reduce_min_ps(best_distances);
101
+ } else {
102
+ best_dis = _mm512_reduce_max_ps(best_distances);
103
+ }
104
+ // Tiebreak by picking the rightmost (largest) index among lanes
105
+ // matching the best distance, matching the original behavior.
106
+ __mmask16 best_lane_mask =
107
+ _mm512_cmpeq_ps_mask(best_distances, _mm512_set1_ps(best_dis));
108
+ best_idx = _mm512_mask_reduce_max_epi32(best_lane_mask, best_indices);
95
109
 
96
- if (min_idx == -1) {
110
+ if (best_idx == -1) {
97
111
  return -1;
98
112
  }
99
113
 
100
114
  if (vmin_out) {
101
- *vmin_out = min_dis;
115
+ *vmin_out = best_dis;
102
116
  }
103
- int ret = ids[min_idx];
104
- ids[min_idx] = -1;
105
- --nvalid;
117
+ int ret = heap.ids[best_idx];
118
+ heap.ids[best_idx] = -1;
119
+ --heap.nvalid;
106
120
  return ret;
107
121
  }
108
122
 
123
+ } // namespace
124
+
125
+ // Explicit specializations for AVX512
126
+ template <>
127
+ int pop_min_tpl<CMax<float, int32_t>, SIMDLevel::AVX512>(
128
+ MinimaxHeapT<CMax<float, int32_t>>* heap,
129
+ float* vmin_out) {
130
+ return pop_best_avx512<CMax<float, int32_t>>(*heap, vmin_out);
131
+ }
132
+
133
+ template <>
134
+ int pop_min_tpl<CMin<float, int32_t>, SIMDLevel::AVX512>(
135
+ MinimaxHeapT<CMin<float, int32_t>>* heap,
136
+ float* vmin_out) {
137
+ return pop_best_avx512<CMin<float, int32_t>>(*heap, vmin_out);
138
+ }
139
+
109
140
  } // namespace faiss
110
141
 
111
142
  #endif // COMPILE_SIMD_AVX512
@@ -86,6 +86,7 @@ namespace faiss {
86
86
  namespace {
87
87
  size_t deserialization_loop_limit_ = 0;
88
88
  size_t deserialization_vector_byte_limit_ = uint64_t{1} << 40; // 1 TB
89
+ size_t deserialization_lattice_r2_limit_ = 0;
89
90
 
90
91
  #ifdef FAISS_ENABLE_SVS
91
92
  // Read and validate an SVSStorageKind from the stream. Centralizes the
@@ -122,6 +123,14 @@ void set_deserialization_vector_byte_limit(size_t value) {
122
123
  deserialization_vector_byte_limit_ = value;
123
124
  }
124
125
 
126
+ size_t get_deserialization_lattice_r2_limit() {
127
+ return deserialization_lattice_r2_limit_;
128
+ }
129
+
130
+ void set_deserialization_lattice_r2_limit(size_t value) {
131
+ deserialization_lattice_r2_limit_ = value;
132
+ }
133
+
125
134
  #define FAISS_CHECK_DESERIALIZATION_LOOP_LIMIT(val, field_name) \
126
135
  do { \
127
136
  auto limit_ = get_deserialization_loop_limit(); \
@@ -279,7 +288,7 @@ static void read_index_header(Index& idx, IOReader* f) {
279
288
  idx_t dummy;
280
289
  READ1(dummy);
281
290
  READ1(dummy);
282
- READ1(idx.is_trained);
291
+ READ1_BOOL(idx.is_trained);
283
292
  int metric_type_int;
284
293
  READ1(metric_type_int);
285
294
  idx.metric_type = metric_type_from_int(metric_type_int);
@@ -307,7 +316,7 @@ std::unique_ptr<VectorTransform> read_VectorTransform_up(IOReader* f) {
307
316
  if (h == fourcc("Pcam")) {
308
317
  READ1(pca->epsilon);
309
318
  }
310
- READ1(pca->random_rotation);
319
+ READ1_BOOL(pca->random_rotation);
311
320
  if (h != fourcc("PCAm")) {
312
321
  READ1(pca->balanced_bins);
313
322
  }
@@ -323,7 +332,7 @@ std::unique_ptr<VectorTransform> read_VectorTransform_up(IOReader* f) {
323
332
  } else if (h == fourcc("LTra")) {
324
333
  lt = std::make_unique<LinearTransform>();
325
334
  }
326
- READ1(lt->have_bias);
335
+ READ1_BOOL(lt->have_bias);
327
336
  READVECTOR(lt->A);
328
337
  READVECTOR(lt->b);
329
338
  FAISS_THROW_IF_NOT(
@@ -347,7 +356,7 @@ std::unique_ptr<VectorTransform> read_VectorTransform_up(IOReader* f) {
347
356
  auto itqt = std::make_unique<ITQTransform>();
348
357
 
349
358
  READVECTOR(itqt->mean);
350
- READ1(itqt->do_pca);
359
+ READ1_BOOL(itqt->do_pca);
351
360
  {
352
361
  // Read, dereference, discard.
353
362
  auto sub_vt = read_VectorTransform_up(f);
@@ -376,7 +385,7 @@ std::unique_ptr<VectorTransform> read_VectorTransform_up(IOReader* f) {
376
385
  }
377
386
  READ1(vt->d_in);
378
387
  READ1(vt->d_out);
379
- READ1(vt->is_trained);
388
+ READ1_BOOL(vt->is_trained);
380
389
  FAISS_THROW_IF_NOT_FMT(
381
390
  vt->d_in >= 0,
382
391
  "invalid VectorTransform d_in=%d (must be >= 0)",
@@ -762,7 +771,7 @@ static void read_ResidualQuantizer_old(ResidualQuantizer& rq, IOReader* f) {
762
771
  "ResidualQuantizer nbits size %zd != M %zd",
763
772
  rq.nbits.size(),
764
773
  rq.M);
765
- READ1(rq.is_trained);
774
+ READ1_BOOL(rq.is_trained);
766
775
  READ1(rq.train_type);
767
776
  READ1(rq.max_beam_size);
768
777
  READVECTOR(rq.codebooks);
@@ -780,7 +789,7 @@ static void read_AdditiveQuantizer(AdditiveQuantizer& aq, IOReader* f) {
780
789
  FAISS_THROW_IF_NOT_FMT(
781
790
  aq.M > 0, "invalid AdditiveQuantizer M %zd, must be > 0", aq.M);
782
791
  READVECTOR(aq.nbits);
783
- READ1(aq.is_trained);
792
+ READ1_BOOL(aq.is_trained);
784
793
  READVECTOR(aq.codebooks);
785
794
  FAISS_THROW_IF_NOT_FMT(
786
795
  aq.nbits.size() == aq.M,
@@ -1035,6 +1044,8 @@ void read_ScalarQuantizer(
1035
1044
  ivsc->d,
1036
1045
  idx.d);
1037
1046
  READVECTOR(ivsc->trained);
1047
+ // Populate bits/code_size before the validation block uses ivsc->bits.
1048
+ ivsc->set_derived_sizes();
1038
1049
  // Validate trained vector size matches the quantizer type and dimension.
1039
1050
  // UNIFORM/NON_UNIFORM qtypes require training data; other qtypes
1040
1051
  // (fp16, bf16, 8bit_direct*) need none.
@@ -1075,6 +1086,16 @@ void read_ScalarQuantizer(
1075
1086
  case ScalarQuantizer::QT_8bit_tqmse:
1076
1087
  expected = 256 + 255;
1077
1088
  break;
1089
+ case ScalarQuantizer::QT_2bit_tq:
1090
+ case ScalarQuantizer::QT_3bit_tq:
1091
+ case ScalarQuantizer::QT_4bit_tq:
1092
+ case ScalarQuantizer::QT_5bit_tq: {
1093
+ // k centroids + (k-1) boundaries + 3 extra (seed + qjl_type)
1094
+ size_t mse_bits = ivsc->bits - 1;
1095
+ size_t k = size_t(1) << mse_bits;
1096
+ expected = k + (k - 1) + 3;
1097
+ break;
1098
+ }
1078
1099
  }
1079
1100
  if (ivsc->trained.empty() && expected > 0) {
1080
1101
  // Empty trained is only valid for untrained indices.
@@ -1102,7 +1123,19 @@ void read_ScalarQuantizer(
1102
1123
  }
1103
1124
  }
1104
1125
  }
1105
- ivsc->set_derived_sizes();
1126
+
1127
+ // TurboQ full types: extract seed and qjl_type from trained,
1128
+ // regenerate projection matrix.
1129
+ if (ScalarQuantizer::TurboQuantRefine::is_turboq_full(ivsc->qtype) &&
1130
+ ivsc->trained.size() >= 3) {
1131
+ size_t n = ivsc->trained.size();
1132
+ ivsc->turboq_refine.qjl_type =
1133
+ static_cast<uint8_t>(ivsc->trained[n - 1]);
1134
+ ivsc->turboq_refine.seed =
1135
+ ScalarQuantizer::TurboQuantRefine::unpack_seed(
1136
+ ivsc->trained[n - 3], ivsc->trained[n - 2]);
1137
+ ivsc->turboq_refine.init_projection(ivsc->d);
1138
+ }
1106
1139
  }
1107
1140
 
1108
1141
  static void validate_HNSW(const HNSW& hnsw) {
@@ -1246,7 +1279,7 @@ static void read_NSG(NSG& nsg, IOReader* f) {
1246
1279
  READ1(nsg.C);
1247
1280
  READ1(nsg.search_L);
1248
1281
  READ1(nsg.enterpoint);
1249
- READ1(nsg.is_built);
1282
+ READ1_BOOL(nsg.is_built);
1250
1283
 
1251
1284
  FAISS_THROW_IF_NOT_FMT(
1252
1285
  nsg.ntotal >= 0, "invalid NSG ntotal %d", nsg.ntotal);
@@ -1298,7 +1331,7 @@ static void read_NNDescent(NNDescent& nnd, IOReader* f) {
1298
1331
  READ1(nnd.iter);
1299
1332
  READ1(nnd.search_L);
1300
1333
  READ1(nnd.random_seed);
1301
- READ1(nnd.has_built);
1334
+ READ1_BOOL(nnd.has_built);
1302
1335
 
1303
1336
  FAISS_THROW_IF_NOT_FMT(
1304
1337
  nnd.ntotal >= 0, "invalid NNDescent ntotal %d", nnd.ntotal);
@@ -1437,7 +1470,7 @@ static std::unique_ptr<IndexIVFPQ> read_ivfpq(
1437
1470
 
1438
1471
  std::vector<std::vector<idx_t>> ids;
1439
1472
  read_ivf_header(ivpq.get(), f, legacy ? &ids : nullptr);
1440
- READ1(ivpq->by_residual);
1473
+ READ1_BOOL(ivpq->by_residual);
1441
1474
  READ1(ivpq->code_size);
1442
1475
  read_ProductQuantizer(&ivpq->pq, f);
1443
1476
 
@@ -1506,7 +1539,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
1506
1539
  d, n_levels, batch_size);
1507
1540
  }
1508
1541
  READ1(idxp->ntotal);
1509
- READ1(idxp->is_trained);
1542
+ READ1_BOOL(idxp->is_trained);
1510
1543
  READVECTOR(idxp->codes);
1511
1544
  READVECTOR(idxp->cum_sums);
1512
1545
  idxp->verbose = false;
@@ -1531,8 +1564,8 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
1531
1564
  auto idxl = std::make_unique<IndexLSH>();
1532
1565
  read_index_header(*idxl, f);
1533
1566
  READ1(idxl->nbits);
1534
- READ1(idxl->rotate_data);
1535
- READ1(idxl->train_thresholds);
1567
+ READ1_BOOL(idxl->rotate_data);
1568
+ READ1_BOOL(idxl->train_thresholds);
1536
1569
  READVECTOR(idxl->thresholds);
1537
1570
  int code_size_i;
1538
1571
  READ1(code_size_i);
@@ -1578,7 +1611,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
1578
1611
  idxp->codes.size() == idxp->ntotal * idxp->code_size);
1579
1612
  if (h == fourcc("IxPo") || h == fourcc("IxPq")) {
1580
1613
  READ1(idxp->search_type);
1581
- READ1(idxp->encode_signs);
1614
+ READ1_BOOL(idxp->encode_signs);
1582
1615
  READ1(idxp->polysemous_ht);
1583
1616
  }
1584
1617
  // Old versions of PQ all had metric_type set to INNER_PRODUCT
@@ -1741,7 +1774,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
1741
1774
  READ1(idxaqfs->ntotal2);
1742
1775
  READ1(idxaqfs->M2);
1743
1776
 
1744
- READ1(idxaqfs->rescale_norm);
1777
+ READ1_BOOL(idxaqfs->rescale_norm);
1745
1778
  READ1(idxaqfs->norm_scale);
1746
1779
  READ1(idxaqfs->max_train_points);
1747
1780
 
@@ -1791,7 +1824,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
1791
1824
  validate_aq_dimension_match(
1792
1825
  *ivaqfs->aq, ivaqfs->d, "IndexIVFAdditiveQuantizerFastScan");
1793
1826
 
1794
- READ1(ivaqfs->by_residual);
1827
+ READ1_BOOL(ivaqfs->by_residual);
1795
1828
  READ1(ivaqfs->implem);
1796
1829
  READ1(ivaqfs->bbs);
1797
1830
  READ1(ivaqfs->qbs);
@@ -1804,7 +1837,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
1804
1837
  READ1(ivaqfs->qbs2);
1805
1838
  READ1(ivaqfs->M2);
1806
1839
 
1807
- READ1(ivaqfs->rescale_norm);
1840
+ READ1_BOOL(ivaqfs->rescale_norm);
1808
1841
  READ1(ivaqfs->norm_scale);
1809
1842
  READ1(ivaqfs->max_train_points);
1810
1843
 
@@ -1902,6 +1935,24 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
1902
1935
  nsq);
1903
1936
  FAISS_THROW_IF_NOT_FMT(
1904
1937
  r2 > 0, "invalid IndexLattice r2 %d (must be > 0)", r2);
1938
+ {
1939
+ // ZnSphereCodecRec constructor populates a decode cache
1940
+ // whose build cost grows polynomially in r2. The
1941
+ // in-codec memory cap (lattice_Zn.cpp) bounds the cache
1942
+ // size but not the CPU cost of building it, so for small
1943
+ // dsq the cap permits enough decode() iterations to far
1944
+ // exceed reasonable load-time budgets. Callers that
1945
+ // operate on untrusted index payloads can opt in to a
1946
+ // tighter bound via set_deserialization_lattice_r2_limit;
1947
+ // the default of 0 preserves existing behavior.
1948
+ auto limit_ = get_deserialization_lattice_r2_limit();
1949
+ FAISS_THROW_IF_NOT_FMT(
1950
+ limit_ == 0 || static_cast<size_t>(r2) <= limit_,
1951
+ "IndexLattice r2=%d exceeds "
1952
+ "deserialization_lattice_r2_limit of %zd",
1953
+ r2,
1954
+ limit_);
1955
+ }
1905
1956
  int dsq = d / nsq;
1906
1957
  FAISS_THROW_IF_NOT_FMT(
1907
1958
  dsq >= 2 && (dsq & (dsq - 1)) == 0,
@@ -1956,7 +2007,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
1956
2007
  if (h == fourcc("IwSQ")) {
1957
2008
  ivsc->by_residual = true;
1958
2009
  } else {
1959
- READ1(ivsc->by_residual);
2010
+ READ1_BOOL(ivsc->by_residual);
1960
2011
  }
1961
2012
  read_InvertedLists(*ivsc, f, io_flags);
1962
2013
  idx = std::move(ivsc);
@@ -1995,7 +2046,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
1995
2046
  iva->code_size,
1996
2047
  iva->aq->code_size,
1997
2048
  "IndexIVFAdditiveQuantizer");
1998
- READ1(iva->by_residual);
2049
+ READ1_BOOL(iva->by_residual);
1999
2050
  READ1(iva->use_precomputed_table);
2000
2051
  read_InvertedLists(*iva, f, io_flags);
2001
2052
  idx = std::move(iva);
@@ -2022,7 +2073,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
2022
2073
  read_index_header(*indep, f);
2023
2074
  indep->quantizer = read_index(f, io_flags);
2024
2075
  bool has_vt;
2025
- READ1(has_vt);
2076
+ READ1_BOOL(has_vt);
2026
2077
  if (has_vt) {
2027
2078
  indep->vt = read_VectorTransform(f);
2028
2079
  }
@@ -2137,6 +2188,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
2137
2188
  : std::make_unique<IndexIDMap>();
2138
2189
  read_index_header(*idxmap, f);
2139
2190
  idxmap->index = read_index(f, io_flags);
2191
+ FAISS_THROW_IF_NOT_MSG(idxmap->index, "IndexIDMap inner index is null");
2140
2192
  idxmap->own_fields = true;
2141
2193
  READVECTOR(idxmap->id_map);
2142
2194
  FAISS_THROW_IF_NOT_FMT(
@@ -2217,11 +2269,11 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
2217
2269
  READVECTOR(idx_panorama->cum_sums);
2218
2270
  }
2219
2271
  if (h == fourcc("IHNc") || h == fourcc("IHc2")) {
2220
- READ1(idxhnsw->keep_max_size_level0);
2272
+ READ1_BOOL(idxhnsw->keep_max_size_level0);
2221
2273
  auto idx_hnsw_cagra = dynamic_cast<IndexHNSWCagra*>(idxhnsw.get());
2222
2274
  FAISS_THROW_IF_NOT_MSG(
2223
2275
  idx_hnsw_cagra, "dynamic_cast to IndexHNSWCagra failed");
2224
- READ1(idx_hnsw_cagra->base_level_only);
2276
+ READ1_BOOL(idx_hnsw_cagra->base_level_only);
2225
2277
  READ1(idx_hnsw_cagra->num_base_level_search_entrypoints);
2226
2278
  if (h == fourcc("IHc2")) {
2227
2279
  READ1(idx_hnsw_cagra->numeric_type_);
@@ -2237,6 +2289,12 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
2237
2289
  idxhnsw->hnsw.levels.size(),
2238
2290
  idxhnsw->ntotal);
2239
2291
  idxhnsw->hnsw.is_panorama = (h == fourcc("IHfP"));
2292
+ // `HNSW::is_similarity` is intentionally not serialized, so we
2293
+ // re-derive it here from the persisted metric type. Without this,
2294
+ // a saved IP/similarity index would come back configured as a
2295
+ // distance index and silently produce wrong rankings on search.
2296
+ idxhnsw->hnsw.is_similarity =
2297
+ is_similarity_metric(idxhnsw->metric_type);
2240
2298
  idxhnsw->storage = read_index(f, io_flags);
2241
2299
  idxhnsw->own_fields = idxhnsw->storage != nullptr;
2242
2300
  // Cross-check storage ntotal and d against index
@@ -2372,7 +2430,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
2372
2430
  } else if (h == fourcc("IwPf")) {
2373
2431
  auto ivpq = std::make_unique<IndexIVFPQFastScan>();
2374
2432
  read_ivf_header(ivpq.get(), f);
2375
- READ1(ivpq->by_residual);
2433
+ READ1_BOOL(ivpq->by_residual);
2376
2434
  READ1(ivpq->code_size);
2377
2435
  READ1(ivpq->bbs);
2378
2436
  READ1(ivpq->M2);
@@ -2512,7 +2570,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
2512
2570
  read_ivf_header(ivrq.get(), f);
2513
2571
  read_RaBitQuantizer(ivrq->rabitq, f, ivrq->d, false);
2514
2572
  READ1(ivrq->code_size);
2515
- READ1(ivrq->by_residual);
2573
+ READ1_BOOL(ivrq->by_residual);
2516
2574
  READ1(ivrq->qb);
2517
2575
  // qb=0: Not quantized - direct distance computation on given float32s.
2518
2576
  // qb>0 && qb<=8: Scalar-quantized with qb bits of precision.
@@ -2535,7 +2593,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
2535
2593
  read_RaBitQuantizer(
2536
2594
  ivrq->rabitq, f, ivrq->d, true); // Reads nb_bits from file
2537
2595
  READ1(ivrq->code_size);
2538
- READ1(ivrq->by_residual);
2596
+ READ1_BOOL(ivrq->by_residual);
2539
2597
  READ1(ivrq->qb);
2540
2598
  // qb=0: Not quantized - direct distance computation on given float32s.
2541
2599
  // qb>0 && qb<=8: Scalar-quantized with qb bits of precision.
@@ -2572,9 +2630,10 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
2572
2630
  READ1(svs->construction_window_size);
2573
2631
  READ1(svs->max_candidate_pool_size);
2574
2632
  READ1(svs->prune_to);
2575
- READ1(svs->use_full_search_history);
2633
+ READ1_BOOL(svs->use_full_search_history);
2576
2634
 
2577
2635
  svs->storage_kind = read_svs_storage_kind(f);
2636
+ READ1_BOOL(svs->is_static);
2578
2637
 
2579
2638
  if (h == fourcc("ISVL")) {
2580
2639
  auto* leanvec = dynamic_cast<IndexSVSVamanaLeanVec*>(svs.get());
@@ -2584,7 +2643,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
2584
2643
  }
2585
2644
 
2586
2645
  bool initialized;
2587
- READ1(initialized);
2646
+ READ1_BOOL(initialized);
2588
2647
  if (initialized) {
2589
2648
  faiss::svs_io::ReaderStreambuf rbuf(
2590
2649
  f, get_deserialization_vector_byte_limit());
@@ -2593,7 +2652,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
2593
2652
  }
2594
2653
  if (h == fourcc("ISVL")) {
2595
2654
  bool trained;
2596
- READ1(trained);
2655
+ READ1_BOOL(trained);
2597
2656
  if (trained) {
2598
2657
  faiss::svs_io::ReaderStreambuf rbuf(
2599
2658
  f, get_deserialization_vector_byte_limit());
@@ -2616,7 +2675,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
2616
2675
  read_index_header(*svs, f);
2617
2676
 
2618
2677
  bool initialized;
2619
- READ1(initialized);
2678
+ READ1_BOOL(initialized);
2620
2679
  if (initialized) {
2621
2680
  faiss::svs_io::ReaderStreambuf rbuf(
2622
2681
  f, get_deserialization_vector_byte_limit());
@@ -2639,7 +2698,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
2639
2698
  READ1(svs_ivf->num_centroids);
2640
2699
  READ1(svs_ivf->minibatch_size);
2641
2700
  READ1(svs_ivf->num_iterations);
2642
- READ1(svs_ivf->is_hierarchical);
2701
+ READ1_BOOL(svs_ivf->is_hierarchical);
2643
2702
  READ1(svs_ivf->training_fraction);
2644
2703
  READ1(svs_ivf->hierarchical_level1_clusters);
2645
2704
  READ1(svs_ivf->seed);
@@ -2648,7 +2707,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
2648
2707
  READ1(svs_ivf->num_threads);
2649
2708
  READ1(svs_ivf->intra_query_threads);
2650
2709
  svs_ivf->storage_kind = read_svs_storage_kind(f);
2651
- READ1(svs_ivf->is_static);
2710
+ READ1_BOOL(svs_ivf->is_static);
2652
2711
  if (h == fourcc("ISIL")) {
2653
2712
  auto* leanvec = dynamic_cast<IndexSVSIVFLeanVec*>(svs_ivf.get());
2654
2713
  FAISS_THROW_IF_NOT_MSG(
@@ -2657,7 +2716,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
2657
2716
  }
2658
2717
 
2659
2718
  bool initialized;
2660
- READ1(initialized);
2719
+ READ1_BOOL(initialized);
2661
2720
  if (initialized) {
2662
2721
  faiss::svs_io::ReaderStreambuf rbuf(f);
2663
2722
  std::istream is(&rbuf);
@@ -2665,7 +2724,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
2665
2724
  }
2666
2725
  if (h == fourcc("ISIL")) {
2667
2726
  bool trained;
2668
- READ1(trained);
2727
+ READ1_BOOL(trained);
2669
2728
  if (trained) {
2670
2729
  faiss::svs_io::ReaderStreambuf rbuf(f);
2671
2730
  std::istream is(&rbuf);
@@ -2687,7 +2746,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
2687
2746
  auto ivrqfs = std::make_unique<IndexIVFRaBitQFastScan>();
2688
2747
  read_ivf_header(ivrqfs.get(), f);
2689
2748
  read_RaBitQuantizer(ivrqfs->rabitq, f, ivrqfs->d);
2690
- READ1(ivrqfs->by_residual);
2749
+ READ1_BOOL(ivrqfs->by_residual);
2691
2750
  READ1(ivrqfs->code_size);
2692
2751
  READ1(ivrqfs->bbs);
2693
2752
  READ1(ivrqfs->qbs2);
@@ -2698,7 +2757,7 @@ std::unique_ptr<Index> read_index_up(IOReader* f, int io_flags) {
2698
2757
  ivrqfs->qb > 0 && ivrqfs->qb <= 8,
2699
2758
  "invalid RaBitQ qb=%d (must be in [1, 8])",
2700
2759
  ivrqfs->qb);
2701
- READ1(ivrqfs->centered);
2760
+ READ1_BOOL(ivrqfs->centered);
2702
2761
 
2703
2762
  std::vector<uint8_t> legacy_flat_storage;
2704
2763
  if (is_legacy) {
@@ -2825,7 +2884,7 @@ static void read_index_binary_header(IndexBinary& idx, IOReader* f) {
2825
2884
  READ1(idx.d);
2826
2885
  READ1(idx.code_size);
2827
2886
  READ1(idx.ntotal);
2828
- READ1(idx.is_trained);
2887
+ READ1_BOOL(idx.is_trained);
2829
2888
  int metric_type_int;
2830
2889
  READ1(metric_type_int);
2831
2890
  idx.metric_type = metric_type_from_int(metric_type_int);
@@ -2984,6 +3043,8 @@ std::unique_ptr<IndexBinary> read_index_binary_up(IOReader* f, int io_flags) {
2984
3043
  read_index_binary_header(*idxff, f);
2985
3044
  idxff->own_fields = true;
2986
3045
  idxff->index = read_index(f, io_flags);
3046
+ FAISS_THROW_IF_NOT_MSG(
3047
+ idxff->index, "IndexBinaryFromFloat inner index is null");
2987
3048
  idx = std::move(idxff);
2988
3049
  } else if (h == fourcc("IBHf")) {
2989
3050
  auto idxhnsw = std::make_unique<IndexBinaryHNSW>();
@@ -3009,8 +3070,8 @@ std::unique_ptr<IndexBinary> read_index_binary_up(IOReader* f, int io_flags) {
3009
3070
  } else if (h == fourcc("IBHc")) {
3010
3071
  auto idxhnsw = std::make_unique<IndexBinaryHNSWCagra>();
3011
3072
  read_index_binary_header(*idxhnsw, f);
3012
- READ1(idxhnsw->keep_max_size_level0);
3013
- READ1(idxhnsw->base_level_only);
3073
+ READ1_BOOL(idxhnsw->keep_max_size_level0);
3074
+ READ1_BOOL(idxhnsw->base_level_only);
3014
3075
  READ1(idxhnsw->num_base_level_search_entrypoints);
3015
3076
  read_HNSW(idxhnsw->hnsw, f);
3016
3077
  idxhnsw->hnsw.is_panorama = false;
@@ -1044,6 +1044,7 @@ void write_index(const Index* idx, IOWriter* f, int io_flags) {
1044
1044
  WRITE1(svs->prune_to);
1045
1045
  WRITE1(svs->use_full_search_history);
1046
1046
  WRITE1(svs->storage_kind);
1047
+ WRITE1(svs->is_static);
1047
1048
 
1048
1049
  if (lean != nullptr) {
1049
1050
  WRITE1(lean->leanvec_d);
@@ -35,6 +35,31 @@ size_t get_deserialization_vector_byte_limit();
35
35
 
36
36
  #define READ1(x) READANDCHECK(&(x), 1)
37
37
 
38
+ // Reads a single byte into a bool, rejecting any byte that is not the
39
+ // canonical encoding for the platform's bool representation. Reading a
40
+ // non-canonical byte directly into a bool is undefined behavior and
41
+ // trips UBSan's invalid-bool-load check. To stay ABI-portable, we
42
+ // assign via the language-defined conversion (b != 0) and then compare
43
+ // the resulting bool's storage byte back against the byte we read - the
44
+ // roundtrip succeeds iff the input byte was already canonical on this
45
+ // platform. FAISS only ever writes the canonical encoding via
46
+ // WRITE1(bool), so well-formed indices roundtrip cleanly; corrupt or
47
+ // attacker-controlled input that places a non-canonical byte at a bool
48
+ // offset is rejected as a FaissException.
49
+ #define READ1_BOOL(x) \
50
+ { \
51
+ static_assert( \
52
+ sizeof(x) == 1, "READ1_BOOL: destination must be 1 byte"); \
53
+ uint8_t b; \
54
+ READANDCHECK(&b, 1); \
55
+ (x) = (b != 0); \
56
+ FAISS_THROW_IF_NOT_FMT( \
57
+ *reinterpret_cast<const uint8_t*>(&(x)) == b, \
58
+ "invalid bool encoding 0x%02x for %s", \
59
+ b, \
60
+ #x); \
61
+ }
62
+
38
63
  #define READ1_DUMMY(x_type) \
39
64
  { \
40
65
  x_type x = {}; \