faiss 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/faiss/index.cpp +25 -6
  4. data/ext/faiss/index_binary.cpp +17 -4
  5. data/ext/faiss/kmeans.cpp +6 -6
  6. data/lib/faiss/version.rb +1 -1
  7. data/vendor/faiss/faiss/AutoTune.cpp +2 -3
  8. data/vendor/faiss/faiss/AutoTune.h +1 -1
  9. data/vendor/faiss/faiss/Clustering.cpp +2 -2
  10. data/vendor/faiss/faiss/Clustering.h +2 -2
  11. data/vendor/faiss/faiss/IVFlib.cpp +1 -2
  12. data/vendor/faiss/faiss/IVFlib.h +1 -1
  13. data/vendor/faiss/faiss/Index.h +10 -10
  14. data/vendor/faiss/faiss/Index2Layer.cpp +1 -1
  15. data/vendor/faiss/faiss/Index2Layer.h +2 -2
  16. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +9 -4
  17. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +5 -1
  18. data/vendor/faiss/faiss/IndexBinary.h +7 -7
  19. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +1 -1
  20. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +3 -1
  21. data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
  22. data/vendor/faiss/faiss/IndexBinaryHash.cpp +3 -3
  23. data/vendor/faiss/faiss/IndexBinaryHash.h +5 -5
  24. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +7 -6
  25. data/vendor/faiss/faiss/IndexFastScan.cpp +125 -49
  26. data/vendor/faiss/faiss/IndexFastScan.h +107 -7
  27. data/vendor/faiss/faiss/IndexFlat.h +1 -1
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +3 -1
  29. data/vendor/faiss/faiss/IndexHNSW.h +1 -1
  30. data/vendor/faiss/faiss/IndexIDMap.cpp +14 -13
  31. data/vendor/faiss/faiss/IndexIDMap.h +6 -6
  32. data/vendor/faiss/faiss/IndexIVF.cpp +1 -1
  33. data/vendor/faiss/faiss/IndexIVF.h +5 -5
  34. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +1 -1
  35. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +9 -3
  36. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +3 -1
  37. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +176 -90
  38. data/vendor/faiss/faiss/IndexIVFFastScan.h +173 -18
  39. data/vendor/faiss/faiss/IndexIVFFlat.cpp +1 -0
  40. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +366 -0
  41. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +64 -0
  42. data/vendor/faiss/faiss/IndexIVFPQ.cpp +3 -1
  43. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
  44. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +134 -2
  45. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -1
  46. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +13 -6
  47. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +1 -0
  48. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +650 -0
  49. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +216 -0
  50. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -1
  51. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  52. data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
  53. data/vendor/faiss/faiss/IndexNSG.cpp +1 -1
  54. data/vendor/faiss/faiss/IndexNeuralNetCodec.h +1 -1
  55. data/vendor/faiss/faiss/IndexPQ.h +1 -1
  56. data/vendor/faiss/faiss/IndexPQFastScan.cpp +6 -2
  57. data/vendor/faiss/faiss/IndexPQFastScan.h +5 -1
  58. data/vendor/faiss/faiss/IndexRaBitQ.cpp +13 -10
  59. data/vendor/faiss/faiss/IndexRaBitQ.h +7 -2
  60. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +586 -0
  61. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +149 -0
  62. data/vendor/faiss/faiss/IndexShards.cpp +1 -1
  63. data/vendor/faiss/faiss/MatrixStats.cpp +3 -3
  64. data/vendor/faiss/faiss/MetricType.h +1 -1
  65. data/vendor/faiss/faiss/VectorTransform.h +2 -2
  66. data/vendor/faiss/faiss/clone_index.cpp +3 -1
  67. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
  68. data/vendor/faiss/faiss/gpu/GpuIndex.h +11 -11
  69. data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +1 -1
  70. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +1 -1
  71. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +10 -6
  72. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -0
  73. data/vendor/faiss/faiss/gpu/test/TestGpuIcmEncoder.cpp +7 -0
  74. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +1 -1
  75. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +1 -1
  76. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -1
  77. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +2 -2
  78. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -1
  79. data/vendor/faiss/faiss/impl/CodePacker.h +2 -2
  80. data/vendor/faiss/faiss/impl/DistanceComputer.h +3 -3
  81. data/vendor/faiss/faiss/impl/FastScanDistancePostProcessing.h +53 -0
  82. data/vendor/faiss/faiss/impl/HNSW.cpp +1 -1
  83. data/vendor/faiss/faiss/impl/HNSW.h +4 -4
  84. data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -2
  85. data/vendor/faiss/faiss/impl/IDSelector.h +1 -1
  86. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +4 -4
  87. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +1 -1
  88. data/vendor/faiss/faiss/impl/LookupTableScaler.h +1 -1
  89. data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
  90. data/vendor/faiss/faiss/impl/NNDescent.h +2 -2
  91. data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
  92. data/vendor/faiss/faiss/impl/PanoramaStats.cpp +33 -0
  93. data/vendor/faiss/faiss/impl/PanoramaStats.h +38 -0
  94. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +5 -5
  95. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +1 -1
  96. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  97. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +2 -0
  98. data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
  99. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +246 -0
  100. data/vendor/faiss/faiss/impl/RaBitQUtils.h +153 -0
  101. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +54 -158
  102. data/vendor/faiss/faiss/impl/RaBitQuantizer.h +2 -1
  103. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  104. data/vendor/faiss/faiss/impl/ResultHandler.h +4 -4
  105. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1 -1
  106. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +1 -1
  107. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +7 -4
  108. data/vendor/faiss/faiss/impl/index_read.cpp +87 -3
  109. data/vendor/faiss/faiss/impl/index_write.cpp +73 -3
  110. data/vendor/faiss/faiss/impl/io.cpp +2 -2
  111. data/vendor/faiss/faiss/impl/io.h +4 -4
  112. data/vendor/faiss/faiss/impl/kmeans1d.cpp +1 -1
  113. data/vendor/faiss/faiss/impl/kmeans1d.h +1 -1
  114. data/vendor/faiss/faiss/impl/lattice_Zn.h +2 -2
  115. data/vendor/faiss/faiss/impl/mapped_io.cpp +2 -2
  116. data/vendor/faiss/faiss/impl/mapped_io.h +4 -3
  117. data/vendor/faiss/faiss/impl/maybe_owned_vector.h +8 -1
  118. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +30 -4
  119. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +14 -8
  120. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +5 -6
  121. data/vendor/faiss/faiss/impl/simd_result_handlers.h +55 -11
  122. data/vendor/faiss/faiss/impl/zerocopy_io.h +1 -1
  123. data/vendor/faiss/faiss/index_factory.cpp +43 -1
  124. data/vendor/faiss/faiss/index_factory.h +1 -1
  125. data/vendor/faiss/faiss/index_io.h +1 -1
  126. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +205 -0
  127. data/vendor/faiss/faiss/invlists/InvertedLists.h +62 -0
  128. data/vendor/faiss/faiss/utils/AlignedTable.h +1 -1
  129. data/vendor/faiss/faiss/utils/Heap.cpp +2 -2
  130. data/vendor/faiss/faiss/utils/Heap.h +3 -3
  131. data/vendor/faiss/faiss/utils/NeuralNet.cpp +1 -1
  132. data/vendor/faiss/faiss/utils/NeuralNet.h +3 -3
  133. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +2 -2
  134. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +2 -2
  135. data/vendor/faiss/faiss/utils/approx_topk/mode.h +1 -1
  136. data/vendor/faiss/faiss/utils/distances.h +2 -2
  137. data/vendor/faiss/faiss/utils/extra_distances-inl.h +3 -1
  138. data/vendor/faiss/faiss/utils/hamming-inl.h +2 -0
  139. data/vendor/faiss/faiss/utils/hamming.cpp +7 -6
  140. data/vendor/faiss/faiss/utils/hamming.h +1 -1
  141. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -2
  142. data/vendor/faiss/faiss/utils/partitioning.cpp +5 -5
  143. data/vendor/faiss/faiss/utils/partitioning.h +2 -2
  144. data/vendor/faiss/faiss/utils/rabitq_simd.h +222 -336
  145. data/vendor/faiss/faiss/utils/random.cpp +1 -1
  146. data/vendor/faiss/faiss/utils/simdlib_avx2.h +1 -1
  147. data/vendor/faiss/faiss/utils/simdlib_avx512.h +1 -1
  148. data/vendor/faiss/faiss/utils/simdlib_neon.h +2 -2
  149. data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +1 -1
  150. data/vendor/faiss/faiss/utils/utils.cpp +5 -2
  151. data/vendor/faiss/faiss/utils/utils.h +2 -2
  152. metadata +12 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 61316c4fa0bbf7f85dfaeb6890d75b35e20e013a5ed25b8d87a3d252dfea2f50
4
- data.tar.gz: af0de6135077f184092dbfec00f9bf7492eb38da66d5ef5f4f2d020ce0486648
3
+ metadata.gz: f3306dbb81c9168dcd82435c99d3404ab8501ac9b948adb2db7238ab89c01003
4
+ data.tar.gz: b14a120e9cc67baba96816b52cf34388aae79fa690fc511918543921b64380d5
5
5
  SHA512:
6
- metadata.gz: 6d21e6186d57eec3852ab285f0bcd89affacb56577c661b514763478ef6d610f808a7d1e4cbc062278aec6cbc11959b204de237b9669dcfb58de4b3070be2c1e
7
- data.tar.gz: a65646746c2f558b48285d8c192d246ce4a95cead1f4413349e8eac4393ac4aa47ad3919764e7caa1abf517edb57153e385d7a012a3dbb22258a98c2795aa5b1
6
+ metadata.gz: b7af3f6540d283d886301137e5fed5dff32b0c0659e9c46d6c45808439e94a0a91f8c953d5897b480568169ec23cacc4224db523d6ab9b3737d7f0df1ce8190c
7
+ data.tar.gz: fbe44473d9e3c61427aee597fed40264ba83a8184c0fe5280b6bf181d9bcc396583f033a69e8d3ec643ee0d41ef435dfeef4aa4441ade67703cbd8259326cb81
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.5.0 (2025-11-12)
2
+
3
+ - Updated Faiss to 1.13.0
4
+ - Added support for releasing GVL
5
+
1
6
  ## 0.4.3 (2025-10-26)
2
7
 
3
8
  - Fixed error with Rice 4.7
data/ext/faiss/index.cpp CHANGED
@@ -111,19 +111,28 @@ void init_index(Rice::Module& m) {
111
111
  })
112
112
  .define_method(
113
113
  "train",
114
- [](faiss::Index &self, numo::SFloat objects) {
114
+ [](Rice::Object rb_self, numo::SFloat objects) {
115
+ rb_check_frozen(rb_self.value());
116
+
117
+ auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
115
118
  auto n = check_shape(objects, self.d);
116
119
  self.train(n, objects.read_ptr());
117
120
  })
118
121
  .define_method(
119
122
  "add",
120
- [](faiss::Index &self, numo::SFloat objects) {
123
+ [](Rice::Object rb_self, numo::SFloat objects) {
124
+ rb_check_frozen(rb_self.value());
125
+
126
+ auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
121
127
  auto n = check_shape(objects, self.d);
122
128
  self.add(n, objects.read_ptr());
123
129
  })
124
130
  .define_method(
125
131
  "add_with_ids",
126
- [](faiss::Index &self, numo::SFloat objects, numo::Int64 ids) {
132
+ [](Rice::Object rb_self, numo::SFloat objects, numo::Int64 ids) {
133
+ rb_check_frozen(rb_self.value());
134
+
135
+ auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
127
136
  auto n = check_shape(objects, self.d);
128
137
  if (ids.ndim() != 1 || ids.shape()[0] != n) {
129
138
  throw Rice::Exception(rb_eArgError, "expected ids to be 1d array with size %d", n);
@@ -132,13 +141,20 @@ void init_index(Rice::Module& m) {
132
141
  })
133
142
  .define_method(
134
143
  "search",
135
- [](faiss::Index &self, numo::SFloat objects, size_t k) {
144
+ [](Rice::Object rb_self, numo::SFloat objects, size_t k) {
145
+ auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
136
146
  auto n = check_shape(objects, self.d);
137
147
 
138
148
  auto distances = numo::SFloat({n, k});
139
149
  auto labels = numo::Int64({n, k});
140
150
 
141
- self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
151
+ if (rb_self.is_frozen()) {
152
+ Rice::detail::no_gvl([&] {
153
+ self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
154
+ });
155
+ } else {
156
+ self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
157
+ }
142
158
 
143
159
  Rice::Array ret;
144
160
  ret.push(std::move(distances), false);
@@ -147,7 +163,10 @@ void init_index(Rice::Module& m) {
147
163
  })
148
164
  .define_method(
149
165
  "nprobe=",
150
- [](faiss::Index &self, double val) {
166
+ [](Rice::Object rb_self, double val) {
167
+ rb_check_frozen(rb_self.value());
168
+
169
+ auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
151
170
  faiss::ParameterSpace().set_index_parameter(&self, "nprobe", val);
152
171
  })
153
172
  .define_method(
@@ -27,25 +27,38 @@ void init_index_binary(Rice::Module& m) {
27
27
  })
28
28
  .define_method(
29
29
  "train",
30
- [](faiss::IndexBinary &self, numo::UInt8 objects) {
30
+ [](Rice::Object rb_self, numo::UInt8 objects) {
31
+ rb_check_frozen(rb_self.value());
32
+
33
+ auto &self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
31
34
  auto n = check_shape(objects, self.d / 8);
32
35
  self.train(n, objects.read_ptr());
33
36
  })
34
37
  .define_method(
35
38
  "add",
36
- [](faiss::IndexBinary &self, numo::UInt8 objects) {
39
+ [](Rice::Object rb_self, numo::UInt8 objects) {
40
+ rb_check_frozen(rb_self.value());
41
+
42
+ auto &self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
37
43
  auto n = check_shape(objects, self.d / 8);
38
44
  self.add(n, objects.read_ptr());
39
45
  })
40
46
  .define_method(
41
47
  "search",
42
- [](faiss::IndexBinary &self, numo::UInt8 objects, size_t k) {
48
+ [](Rice::Object rb_self, numo::UInt8 objects, size_t k) {
49
+ auto &self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
43
50
  auto n = check_shape(objects, self.d / 8);
44
51
 
45
52
  auto distances = numo::Int32({n, k});
46
53
  auto labels = numo::Int64({n, k});
47
54
 
48
- self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
55
+ if (rb_self.is_frozen()) {
56
+ Rice::detail::no_gvl([&] {
57
+ self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
58
+ });
59
+ } else {
60
+ self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
61
+ }
49
62
 
50
63
  Rice::Array ret;
51
64
  ret.push(std::move(distances), false);
data/ext/faiss/kmeans.cpp CHANGED
@@ -32,11 +32,11 @@ void init_kmeans(Rice::Module& m) {
32
32
  })
33
33
  .define_method(
34
34
  "train",
35
- [](Rice::Object self, numo::SFloat objects) {
36
- auto self_ptr = Rice::detail::From_Ruby<faiss::Clustering*>().convert(self.value());
37
- auto n = check_shape(objects, self_ptr->d);
38
- auto index = faiss::IndexFlatL2(self_ptr->d);
39
- self.iv_set("@index", Rice::Object(Rice::detail::To_Ruby<faiss::IndexFlatL2>().convert(index)));
40
- self_ptr->train(n, objects.read_ptr(), index);
35
+ [](Rice::Object rb_self, numo::SFloat objects) {
36
+ auto &self = *Rice::Data_Object<faiss::Clustering>{rb_self};
37
+ auto n = check_shape(objects, self.d);
38
+ auto index = faiss::IndexFlatL2(self.d);
39
+ rb_self.iv_set("@index", index);
40
+ self.train(n, objects.read_ptr(), index);
41
41
  });
42
42
  }
data/lib/faiss/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Faiss
2
- VERSION = "0.4.3"
2
+ VERSION = "0.5.0"
3
3
  end
@@ -20,15 +20,14 @@
20
20
  #include <faiss/utils/utils.h>
21
21
 
22
22
  #include <faiss/IndexHNSW.h>
23
+ #include <faiss/IndexIDMap.h>
23
24
  #include <faiss/IndexIVF.h>
24
- #include <faiss/IndexIVFFlat.h>
25
25
  #include <faiss/IndexIVFPQ.h>
26
26
  #include <faiss/IndexIVFPQR.h>
27
27
  #include <faiss/IndexPQ.h>
28
28
  #include <faiss/IndexPreTransform.h>
29
29
  #include <faiss/IndexRefine.h>
30
30
  #include <faiss/IndexShardsIVF.h>
31
- #include <faiss/MetaIndexes.h>
32
31
 
33
32
  namespace faiss {
34
33
 
@@ -126,7 +125,7 @@ bool OperatingPoints::add(
126
125
  }
127
126
  } else {
128
127
  int i;
129
- // stricto sensu this should be a bissection
128
+ // stricto sensu this should be a bisection
130
129
  for (i = 0; i < a.size(); i++) {
131
130
  if (a[i].perf >= perf) {
132
131
  break;
@@ -32,7 +32,7 @@ struct AutoTuneCriterion {
32
32
 
33
33
  AutoTuneCriterion(idx_t nq, idx_t nnn);
34
34
 
35
- /** Intitializes the gt_D and gt_I vectors. Must be called before evaluating
35
+ /** Initializes the gt_D and gt_I vectors. Must be called before evaluating
36
36
  *
37
37
  * @param gt_D_in size nq * gt_nnn
38
38
  * @param gt_I_in size nq * gt_nnn
@@ -212,7 +212,7 @@ void compute_centroids(
212
212
  * It works by slightly changing the centroids to make 2 clusters from
213
213
  * a single one. Takes the same arguments as compute_centroids.
214
214
  *
215
- * @return nb of spliting operations (larger is worse)
215
+ * @return nb of splitting operations (larger is worse)
216
216
  */
217
217
  int split_clusters(
218
218
  size_t d,
@@ -242,7 +242,7 @@ int split_clusters(
242
242
  centroids + cj * d,
243
243
  sizeof(*centroids) * d);
244
244
 
245
- /* small symmetric pertubation */
245
+ /* small symmetric perturbation */
246
246
  for (size_t j = 0; j < d; j++) {
247
247
  if (j % 2 == 0) {
248
248
  centroids[ci * d + j] *= 1 + EPS;
@@ -73,7 +73,7 @@ struct ClusteringIterationStats {
73
73
  * points to the centroids. Therefore, at each iteration the centroids
74
74
  * are added to the index.
75
75
  *
76
- * On output, the centoids table is set to the latest version
76
+ * On output, the centroids table is set to the latest version
77
77
  * of the centroids and they are also added to the index. If the
78
78
  * centroids table it is not empty on input, it is also used for
79
79
  * initialization.
@@ -109,7 +109,7 @@ struct Clustering : ClusteringParameters {
109
109
 
110
110
  /** run with encoded vectors
111
111
  *
112
- * win addition to train()'s parameters takes a codec as parameter
112
+ * in addition to train()'s parameters takes a codec as parameter
113
113
  * to decode the input vectors.
114
114
  *
115
115
  * @param codec codec used to decode the vectors (nullptr =
@@ -9,7 +9,6 @@
9
9
  #include <omp.h>
10
10
 
11
11
  #include <memory>
12
- #include <numeric>
13
12
 
14
13
  #include <faiss/IndexAdditiveQuantizer.h>
15
14
  #include <faiss/IndexIVFAdditiveQuantizer.h>
@@ -58,7 +57,7 @@ void check_compatible_for_merge(const Index* index0, const Index* index1) {
58
57
  ivf0->check_compatible_for_merge(*ivf1);
59
58
  }
60
59
 
61
- // TODO: check as thoroughfully for other index types
60
+ // TODO: check as thoroughly for other index types
62
61
  }
63
62
 
64
63
  const IndexIVF* try_extract_index_ivf(const Index* index) {
@@ -100,7 +100,7 @@ struct SlidingIndexWindow {
100
100
  std::vector<std::vector<size_t>> sizes;
101
101
 
102
102
  /// index should be initially empty and trained
103
- SlidingIndexWindow(Index* index);
103
+ explicit SlidingIndexWindow(Index* index);
104
104
 
105
105
  /** Add one index to the current index and remove the oldest one.
106
106
  *
@@ -17,7 +17,7 @@
17
17
  #include <sstream>
18
18
 
19
19
  #define FAISS_VERSION_MAJOR 1
20
- #define FAISS_VERSION_MINOR 12
20
+ #define FAISS_VERSION_MINOR 13
21
21
  #define FAISS_VERSION_PATCH 0
22
22
 
23
23
  // Macro to combine the version components into a single string
@@ -78,7 +78,7 @@ inline size_t get_numeric_type_size(NumericType numeric_type) {
78
78
  }
79
79
  }
80
80
 
81
- /** Parent class for the optional search paramenters.
81
+ /** Parent class for the optional search parameters.
82
82
  *
83
83
  * Sub-classes with additional search parameters should inherit this class.
84
84
  * Ownership of the object fields is always to the caller.
@@ -125,11 +125,11 @@ struct Index {
125
125
  /** Perform training on a representative set of vectors
126
126
  *
127
127
  * @param n nb of training vectors
128
- * @param x training vecors, size n * d
128
+ * @param x training vectors, size n * d
129
129
  */
130
130
  virtual void train(idx_t n, const float* x);
131
131
 
132
- virtual void trainEx(idx_t n, const void* x, NumericType numeric_type) {
132
+ virtual void train_ex(idx_t n, const void* x, NumericType numeric_type) {
133
133
  if (numeric_type == NumericType::Float32) {
134
134
  train(n, static_cast<const float*>(x));
135
135
  } else {
@@ -147,7 +147,7 @@ struct Index {
147
147
  */
148
148
  virtual void add(idx_t n, const float* x) = 0;
149
149
 
150
- virtual void addEx(idx_t n, const void* x, NumericType numeric_type) {
150
+ virtual void add_ex(idx_t n, const void* x, NumericType numeric_type) {
151
151
  if (numeric_type == NumericType::Float32) {
152
152
  add(n, static_cast<const float*>(x));
153
153
  } else {
@@ -165,7 +165,7 @@ struct Index {
165
165
  * @param xids if non-null, ids to store for the vectors (size n)
166
166
  */
167
167
  virtual void add_with_ids(idx_t n, const float* x, const idx_t* xids);
168
- virtual void add_with_idsEx(
168
+ virtual void add_with_ids_ex(
169
169
  idx_t n,
170
170
  const void* x,
171
171
  NumericType numeric_type,
@@ -196,7 +196,7 @@ struct Index {
196
196
  idx_t* labels,
197
197
  const SearchParameters* params = nullptr) const = 0;
198
198
 
199
- virtual void searchEx(
199
+ virtual void search_ex(
200
200
  idx_t n,
201
201
  const void* x,
202
202
  NumericType numeric_type,
@@ -258,7 +258,7 @@ struct Index {
258
258
  *
259
259
  * this function may not be defined for some indexes
260
260
  * @param key id of the vector to reconstruct
261
- * @param recons reconstucted vector (size d)
261
+ * @param recons reconstructed vector (size d)
262
262
  */
263
263
  virtual void reconstruct(idx_t key, float* recons) const;
264
264
 
@@ -268,7 +268,7 @@ struct Index {
268
268
  * this function may not be defined for some indexes
269
269
  * @param n number of vectors to reconstruct
270
270
  * @param keys ids of the vectors to reconstruct (size n)
271
- * @param recons reconstucted vector (size n * d)
271
+ * @param recons reconstructed vector (size n * d)
272
272
  */
273
273
  virtual void reconstruct_batch(idx_t n, const idx_t* keys, float* recons)
274
274
  const;
@@ -278,7 +278,7 @@ struct Index {
278
278
  * this function may not be defined for some indexes
279
279
  * @param i0 index of the first vector in the sequence
280
280
  * @param ni number of vectors in the sequence
281
- * @param recons reconstucted vector (size ni * d)
281
+ * @param recons reconstructed vector (size ni * d)
282
282
  */
283
283
  virtual void reconstruct_n(idx_t i0, idx_t ni, float* recons) const;
284
284
 
@@ -82,7 +82,7 @@ void Index2Layer::train(idx_t n, const float* x) {
82
82
 
83
83
  std::unique_ptr<const float[]> del_x(x_in == x ? nullptr : x);
84
84
 
85
- std::vector<idx_t> assign(n); // assignement to coarse centroids
85
+ std::vector<idx_t> assign(n); // assignment to coarse centroids
86
86
  q1.quantizer->assign(n, x, assign.data());
87
87
  std::vector<float> residuals(n * d);
88
88
  for (idx_t i = 0; i < n; i++) {
@@ -23,7 +23,7 @@ struct IndexIVFPQ;
23
23
  /** Same as an IndexIVFPQ without the inverted lists: codes are stored
24
24
  * sequentially
25
25
  *
26
- * The class is mainly inteded to store encoded vectors that can be
26
+ * The class is mainly intended to store encoded vectors that can be
27
27
  * accessed randomly, the search function is not implemented.
28
28
  */
29
29
  struct Index2Layer : IndexFlatCodes {
@@ -47,7 +47,7 @@ struct Index2Layer : IndexFlatCodes {
47
47
  MetricType metric = METRIC_L2);
48
48
 
49
49
  Index2Layer();
50
- ~Index2Layer();
50
+ ~Index2Layer() override;
51
51
 
52
52
  void train(idx_t n, const float* x) override;
53
53
 
@@ -11,6 +11,7 @@
11
11
  #include <memory>
12
12
 
13
13
  #include <faiss/impl/FaissAssert.h>
14
+ #include <faiss/impl/FastScanDistancePostProcessing.h>
14
15
  #include <faiss/impl/LocalSearchQuantizer.h>
15
16
  #include <faiss/impl/LookupTableScaler.h>
16
17
  #include <faiss/impl/ResidualQuantizer.h>
@@ -123,7 +124,8 @@ void IndexAdditiveQuantizerFastScan::estimate_norm_scale(
123
124
  }
124
125
 
125
126
  std::vector<float> dis_tables(n * M * ksub);
126
- compute_float_LUT(dis_tables.data(), n, x);
127
+ FastScanDistancePostProcessing empty_context;
128
+ compute_float_LUT(dis_tables.data(), n, x, empty_context);
127
129
 
128
130
  // here we compute the mean of scales for each query
129
131
  // TODO: try max of scales
@@ -153,7 +155,8 @@ void IndexAdditiveQuantizerFastScan::compute_codes(
153
155
  void IndexAdditiveQuantizerFastScan::compute_float_LUT(
154
156
  float* lut,
155
157
  idx_t n,
156
- const float* x) const {
158
+ const float* x,
159
+ const FastScanDistancePostProcessing&) const {
157
160
  if (metric_type == METRIC_INNER_PRODUCT) {
158
161
  aq->compute_LUT(n, x, lut, 1.0f);
159
162
  } else {
@@ -200,10 +203,12 @@ void IndexAdditiveQuantizerFastScan::search(
200
203
  }
201
204
 
202
205
  NormTableScaler scaler(norm_scale);
206
+ FastScanDistancePostProcessing context;
207
+ context.norm_scaler = &scaler;
203
208
  if (metric_type == METRIC_L2) {
204
- search_dispatch_implem<true>(n, x, k, distances, labels, &scaler);
209
+ search_dispatch_implem<true>(n, x, k, distances, labels, context);
205
210
  } else {
206
- search_dispatch_implem<false>(n, x, k, distances, labels, &scaler);
211
+ search_dispatch_implem<false>(n, x, k, distances, labels, context);
207
212
  }
208
213
  }
209
214
 
@@ -62,7 +62,11 @@ struct IndexAdditiveQuantizerFastScan : IndexFastScan {
62
62
 
63
63
  void compute_codes(uint8_t* codes, idx_t n, const float* x) const override;
64
64
 
65
- void compute_float_LUT(float* lut, idx_t n, const float* x) const override;
65
+ void compute_float_LUT(
66
+ float* lut,
67
+ idx_t n,
68
+ const float* x,
69
+ const FastScanDistancePostProcessing& context) const override;
66
70
 
67
71
  void search(
68
72
  idx_t n,
@@ -49,10 +49,10 @@ struct IndexBinary {
49
49
  /** Perform training on a representative set of vectors.
50
50
  *
51
51
  * @param n nb of training vectors
52
- * @param x training vecors, size n * d / 8
52
+ * @param x training vectors, size n * d / 8
53
53
  */
54
54
  virtual void train(idx_t n, const uint8_t* x);
55
- virtual void trainEx(idx_t n, const void* x, NumericType numeric_type) {
55
+ virtual void train_ex(idx_t n, const void* x, NumericType numeric_type) {
56
56
  if (numeric_type == NumericType::UInt8) {
57
57
  train(n, static_cast<const uint8_t*>(x));
58
58
  } else {
@@ -66,7 +66,7 @@ struct IndexBinary {
66
66
  * @param x input matrix, size n * d / 8
67
67
  */
68
68
  virtual void add(idx_t n, const uint8_t* x) = 0;
69
- virtual void addEx(idx_t n, const void* x, NumericType numeric_type) {
69
+ virtual void add_ex(idx_t n, const void* x, NumericType numeric_type) {
70
70
  if (numeric_type == NumericType::UInt8) {
71
71
  add(n, static_cast<const uint8_t*>(x));
72
72
  } else {
@@ -82,7 +82,7 @@ struct IndexBinary {
82
82
  * @param xids if non-null, ids to store for the vectors (size n)
83
83
  */
84
84
  virtual void add_with_ids(idx_t n, const uint8_t* x, const idx_t* xids);
85
- virtual void add_with_idsEx(
85
+ virtual void add_with_ids_ex(
86
86
  idx_t n,
87
87
  const void* x,
88
88
  NumericType numeric_type,
@@ -111,7 +111,7 @@ struct IndexBinary {
111
111
  int32_t* distances,
112
112
  idx_t* labels,
113
113
  const SearchParameters* params = nullptr) const = 0;
114
- virtual void searchEx(
114
+ virtual void search_ex(
115
115
  idx_t n,
116
116
  const void* x,
117
117
  NumericType numeric_type,
@@ -172,14 +172,14 @@ struct IndexBinary {
172
172
  *
173
173
  * This function may not be defined for some indexes.
174
174
  * @param key id of the vector to reconstruct
175
- * @param recons reconstucted vector (size d / 8)
175
+ * @param recons reconstructed vector (size d / 8)
176
176
  */
177
177
  virtual void reconstruct(idx_t key, uint8_t* recons) const;
178
178
 
179
179
  /** Reconstruct vectors i0 to i0 + ni - 1.
180
180
  *
181
181
  * This function may not be defined for some indexes.
182
- * @param recons reconstucted vectors (size ni * d / 8)
182
+ * @param recons reconstructed vectors (size ni * d / 8)
183
183
  */
184
184
  virtual void reconstruct_n(idx_t i0, idx_t ni, uint8_t* recons) const;
185
185
 
@@ -32,7 +32,7 @@ struct IndexBinaryFromFloat : IndexBinary {
32
32
 
33
33
  explicit IndexBinaryFromFloat(Index* index);
34
34
 
35
- ~IndexBinaryFromFloat();
35
+ ~IndexBinaryFromFloat() override;
36
36
 
37
37
  void add(idx_t n, const uint8_t* x) override;
38
38
 
@@ -290,7 +290,9 @@ struct FlatHammingDis : DistanceComputer {
290
290
 
291
291
  ~FlatHammingDis() override {
292
292
  #pragma omp critical
293
- { hnsw_stats.ndis += ndis; }
293
+ {
294
+ hnsw_stats.ndis += ndis;
295
+ }
294
296
  }
295
297
  };
296
298
 
@@ -36,7 +36,7 @@ struct IndexBinaryHNSW : IndexBinary {
36
36
 
37
37
  // When set to true, all neighbors in level 0 are filled up
38
38
  // to the maximum size allowed (2 * M). This option is used by
39
- // IndexBinaryHHNSW to create a full base layer graph that is
39
+ // IndexBinaryHNSW to create a full base layer graph that is
40
40
  // used when GpuIndexBinaryCagra::copyFrom(IndexBinaryHNSW*) is called.
41
41
  bool keep_max_size_level0 = false;
42
42
 
@@ -177,8 +177,8 @@ void search_single_query_template(
177
177
  struct Run_search_single_query {
178
178
  using T = void;
179
179
  template <class HammingComputer, class... Types>
180
- T f(Types... args) {
181
- search_single_query_template<HammingComputer>(args...);
180
+ T f(Types*... args) {
181
+ search_single_query_template<HammingComputer>(*args...);
182
182
  }
183
183
  };
184
184
 
@@ -192,7 +192,7 @@ void search_single_query(
192
192
  size_t& ndis) {
193
193
  Run_search_single_query r;
194
194
  dispatch_HammingComputer(
195
- index.code_size, r, index, q, res, n0, nlist, ndis);
195
+ index.code_size, r, &index, &q, &res, &n0, &nlist, &ndis);
196
196
  }
197
197
 
198
198
  } // anonymous namespace
@@ -66,10 +66,10 @@ struct IndexBinaryHash : IndexBinary {
66
66
  };
67
67
 
68
68
  struct IndexBinaryHashStats {
69
- size_t nq; // nb of queries run
70
- size_t n0; // nb of empty lists
71
- size_t nlist; // nb of non-empty inverted lists scanned
72
- size_t ndis; // nb of distancs computed
69
+ size_t nq; // nb of queries run
70
+ size_t n0; // nb of empty lists
71
+ size_t nlist; // nb of non-empty inverted lists scanned
72
+ size_t ndis{}; // nb of distances computed
73
73
 
74
74
  IndexBinaryHashStats() {
75
75
  reset();
@@ -99,7 +99,7 @@ struct IndexBinaryMultiHash : IndexBinary {
99
99
 
100
100
  IndexBinaryMultiHash();
101
101
 
102
- ~IndexBinaryMultiHash();
102
+ ~IndexBinaryMultiHash() override;
103
103
 
104
104
  void reset() override;
105
105
 
@@ -492,12 +492,13 @@ void search_knn_hamming_count(
492
492
 
493
493
  std::vector<HCounterState<HammingComputer>> cs;
494
494
  for (size_t i = 0; i < nx; ++i) {
495
- cs.push_back(HCounterState<HammingComputer>(
496
- all_counters.data() + i * nBuckets,
497
- all_ids_per_dis.get() + i * nBuckets * k,
498
- x + i * ivf->code_size,
499
- ivf->d,
500
- k));
495
+ cs.push_back(
496
+ HCounterState<HammingComputer>(
497
+ all_counters.data() + i * nBuckets,
498
+ all_ids_per_dis.get() + i * nBuckets * k,
499
+ x + i * ivf->code_size,
500
+ ivf->d,
501
+ k));
501
502
  }
502
503
 
503
504
  size_t nlistv = 0, ndis = 0;