faiss 0.4.3 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/README.md +2 -0
  4. data/ext/faiss/index.cpp +33 -6
  5. data/ext/faiss/index_binary.cpp +17 -4
  6. data/ext/faiss/kmeans.cpp +6 -6
  7. data/lib/faiss/version.rb +1 -1
  8. data/vendor/faiss/faiss/AutoTune.cpp +2 -3
  9. data/vendor/faiss/faiss/AutoTune.h +1 -1
  10. data/vendor/faiss/faiss/Clustering.cpp +2 -2
  11. data/vendor/faiss/faiss/Clustering.h +2 -2
  12. data/vendor/faiss/faiss/IVFlib.cpp +26 -51
  13. data/vendor/faiss/faiss/IVFlib.h +1 -1
  14. data/vendor/faiss/faiss/Index.cpp +11 -0
  15. data/vendor/faiss/faiss/Index.h +34 -11
  16. data/vendor/faiss/faiss/Index2Layer.cpp +1 -1
  17. data/vendor/faiss/faiss/Index2Layer.h +2 -2
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +1 -0
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +9 -4
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +5 -1
  21. data/vendor/faiss/faiss/IndexBinary.h +7 -7
  22. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +1 -1
  23. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +8 -2
  24. data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryHash.cpp +3 -3
  26. data/vendor/faiss/faiss/IndexBinaryHash.h +5 -5
  27. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +7 -6
  28. data/vendor/faiss/faiss/IndexFastScan.cpp +125 -49
  29. data/vendor/faiss/faiss/IndexFastScan.h +102 -7
  30. data/vendor/faiss/faiss/IndexFlat.cpp +374 -4
  31. data/vendor/faiss/faiss/IndexFlat.h +81 -1
  32. data/vendor/faiss/faiss/IndexHNSW.cpp +93 -2
  33. data/vendor/faiss/faiss/IndexHNSW.h +58 -2
  34. data/vendor/faiss/faiss/IndexIDMap.cpp +14 -13
  35. data/vendor/faiss/faiss/IndexIDMap.h +6 -6
  36. data/vendor/faiss/faiss/IndexIVF.cpp +1 -1
  37. data/vendor/faiss/faiss/IndexIVF.h +5 -5
  38. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +1 -1
  39. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +9 -3
  40. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +3 -1
  41. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +176 -90
  42. data/vendor/faiss/faiss/IndexIVFFastScan.h +173 -18
  43. data/vendor/faiss/faiss/IndexIVFFlat.cpp +1 -0
  44. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +251 -0
  45. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +64 -0
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +3 -1
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +134 -2
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -1
  50. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +99 -8
  51. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +4 -1
  52. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +828 -0
  53. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +252 -0
  54. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -1
  55. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  56. data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
  57. data/vendor/faiss/faiss/IndexNSG.cpp +1 -1
  58. data/vendor/faiss/faiss/IndexNeuralNetCodec.h +1 -1
  59. data/vendor/faiss/faiss/IndexPQ.cpp +4 -1
  60. data/vendor/faiss/faiss/IndexPQ.h +1 -1
  61. data/vendor/faiss/faiss/IndexPQFastScan.cpp +6 -2
  62. data/vendor/faiss/faiss/IndexPQFastScan.h +5 -1
  63. data/vendor/faiss/faiss/IndexPreTransform.cpp +14 -0
  64. data/vendor/faiss/faiss/IndexPreTransform.h +9 -0
  65. data/vendor/faiss/faiss/IndexRaBitQ.cpp +96 -13
  66. data/vendor/faiss/faiss/IndexRaBitQ.h +11 -2
  67. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +731 -0
  68. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +175 -0
  69. data/vendor/faiss/faiss/IndexRefine.cpp +49 -0
  70. data/vendor/faiss/faiss/IndexRefine.h +17 -0
  71. data/vendor/faiss/faiss/IndexShards.cpp +1 -1
  72. data/vendor/faiss/faiss/MatrixStats.cpp +3 -3
  73. data/vendor/faiss/faiss/MetricType.h +1 -1
  74. data/vendor/faiss/faiss/VectorTransform.h +2 -2
  75. data/vendor/faiss/faiss/clone_index.cpp +5 -1
  76. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
  77. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +3 -1
  78. data/vendor/faiss/faiss/gpu/GpuIndex.h +11 -11
  79. data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +1 -1
  80. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +1 -1
  81. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +11 -7
  82. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +1 -1
  83. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -0
  84. data/vendor/faiss/faiss/gpu/test/TestGpuIcmEncoder.cpp +7 -0
  85. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +1 -1
  86. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +1 -1
  87. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -1
  88. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +2 -2
  89. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -1
  90. data/vendor/faiss/faiss/impl/CodePacker.h +2 -2
  91. data/vendor/faiss/faiss/impl/DistanceComputer.h +77 -6
  92. data/vendor/faiss/faiss/impl/FastScanDistancePostProcessing.h +53 -0
  93. data/vendor/faiss/faiss/impl/HNSW.cpp +295 -16
  94. data/vendor/faiss/faiss/impl/HNSW.h +35 -6
  95. data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -2
  96. data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
  97. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +4 -4
  98. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +1 -1
  99. data/vendor/faiss/faiss/impl/LookupTableScaler.h +1 -1
  100. data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
  101. data/vendor/faiss/faiss/impl/NNDescent.h +2 -2
  102. data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
  103. data/vendor/faiss/faiss/impl/Panorama.cpp +193 -0
  104. data/vendor/faiss/faiss/impl/Panorama.h +204 -0
  105. data/vendor/faiss/faiss/impl/PanoramaStats.cpp +33 -0
  106. data/vendor/faiss/faiss/impl/PanoramaStats.h +38 -0
  107. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +5 -5
  108. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +1 -1
  109. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  110. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +2 -0
  111. data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
  112. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +29 -0
  113. data/vendor/faiss/faiss/impl/RaBitQStats.h +56 -0
  114. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +294 -0
  115. data/vendor/faiss/faiss/impl/RaBitQUtils.h +330 -0
  116. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +304 -223
  117. data/vendor/faiss/faiss/impl/RaBitQuantizer.h +72 -4
  118. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +362 -0
  119. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +112 -0
  120. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  121. data/vendor/faiss/faiss/impl/ResultHandler.h +4 -4
  122. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +7 -10
  123. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +2 -4
  124. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +7 -4
  125. data/vendor/faiss/faiss/impl/index_read.cpp +238 -10
  126. data/vendor/faiss/faiss/impl/index_write.cpp +212 -19
  127. data/vendor/faiss/faiss/impl/io.cpp +2 -2
  128. data/vendor/faiss/faiss/impl/io.h +4 -4
  129. data/vendor/faiss/faiss/impl/kmeans1d.cpp +1 -1
  130. data/vendor/faiss/faiss/impl/kmeans1d.h +1 -1
  131. data/vendor/faiss/faiss/impl/lattice_Zn.h +2 -2
  132. data/vendor/faiss/faiss/impl/mapped_io.cpp +2 -2
  133. data/vendor/faiss/faiss/impl/mapped_io.h +4 -3
  134. data/vendor/faiss/faiss/impl/maybe_owned_vector.h +8 -1
  135. data/vendor/faiss/faiss/impl/platform_macros.h +12 -0
  136. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +30 -4
  137. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +14 -8
  138. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +5 -6
  139. data/vendor/faiss/faiss/impl/simd_result_handlers.h +55 -11
  140. data/vendor/faiss/faiss/impl/svs_io.cpp +86 -0
  141. data/vendor/faiss/faiss/impl/svs_io.h +67 -0
  142. data/vendor/faiss/faiss/impl/zerocopy_io.h +1 -1
  143. data/vendor/faiss/faiss/index_factory.cpp +217 -8
  144. data/vendor/faiss/faiss/index_factory.h +1 -1
  145. data/vendor/faiss/faiss/index_io.h +1 -1
  146. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +1 -1
  147. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
  148. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +115 -1
  149. data/vendor/faiss/faiss/invlists/InvertedLists.h +46 -0
  150. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +1 -1
  151. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  152. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +261 -0
  153. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +117 -0
  154. data/vendor/faiss/faiss/svs/IndexSVSFlat.h +66 -0
  155. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +245 -0
  156. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +137 -0
  157. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +39 -0
  158. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +42 -0
  159. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +149 -0
  160. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +58 -0
  161. data/vendor/faiss/faiss/utils/AlignedTable.h +1 -1
  162. data/vendor/faiss/faiss/utils/Heap.cpp +2 -2
  163. data/vendor/faiss/faiss/utils/Heap.h +3 -3
  164. data/vendor/faiss/faiss/utils/NeuralNet.cpp +1 -1
  165. data/vendor/faiss/faiss/utils/NeuralNet.h +3 -3
  166. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +2 -2
  167. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +2 -2
  168. data/vendor/faiss/faiss/utils/approx_topk/mode.h +1 -1
  169. data/vendor/faiss/faiss/utils/distances.cpp +0 -3
  170. data/vendor/faiss/faiss/utils/distances.h +2 -2
  171. data/vendor/faiss/faiss/utils/extra_distances-inl.h +3 -1
  172. data/vendor/faiss/faiss/utils/hamming-inl.h +2 -0
  173. data/vendor/faiss/faiss/utils/hamming.cpp +7 -6
  174. data/vendor/faiss/faiss/utils/hamming.h +1 -1
  175. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -2
  176. data/vendor/faiss/faiss/utils/partitioning.cpp +5 -5
  177. data/vendor/faiss/faiss/utils/partitioning.h +2 -2
  178. data/vendor/faiss/faiss/utils/rabitq_simd.h +222 -336
  179. data/vendor/faiss/faiss/utils/random.cpp +1 -1
  180. data/vendor/faiss/faiss/utils/simdlib_avx2.h +1 -1
  181. data/vendor/faiss/faiss/utils/simdlib_avx512.h +1 -1
  182. data/vendor/faiss/faiss/utils/simdlib_neon.h +2 -2
  183. data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +1 -1
  184. data/vendor/faiss/faiss/utils/utils.cpp +9 -2
  185. data/vendor/faiss/faiss/utils/utils.h +2 -2
  186. metadata +29 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 61316c4fa0bbf7f85dfaeb6890d75b35e20e013a5ed25b8d87a3d252dfea2f50
4
- data.tar.gz: af0de6135077f184092dbfec00f9bf7492eb38da66d5ef5f4f2d020ce0486648
3
+ metadata.gz: 439ff96f613cd71e2a32197194d3814cf5ea516bd31489fdc2bd7a98747ec8ff
4
+ data.tar.gz: b2eaf07886acc74aaee4e6f1956a8c09f1d76da0ebaca14b6d4f3e889d728ef7
5
5
  SHA512:
6
- metadata.gz: 6d21e6186d57eec3852ab285f0bcd89affacb56577c661b514763478ef6d610f808a7d1e4cbc062278aec6cbc11959b204de237b9669dcfb58de4b3070be2c1e
7
- data.tar.gz: a65646746c2f558b48285d8c192d246ce4a95cead1f4413349e8eac4393ac4aa47ad3919764e7caa1abf517edb57153e385d7a012a3dbb22258a98c2795aa5b1
6
+ metadata.gz: 03d9a8aa01c86c176437adfab3038b91eb9b9991b2446f4b442114fac8e16133f213eec779d8bdedf80f13c643e96ec2ae39cfa9e1de4313148d53efa565d5c8
7
+ data.tar.gz: aa3ef7993a7411cbd480134e054528f069931c57ec55a8dd74c33642fa9debf203507f5744fa1ad178c04d142ac6efd482d63e6df1ced32e890fedd1def73769
data/CHANGELOG.md CHANGED
@@ -1,3 +1,13 @@
1
+ ## 0.5.1 (2025-12-20)
2
+
3
+ - Updated Faiss to 1.13.2
4
+ - Added `reconstruct` method
5
+
6
+ ## 0.5.0 (2025-11-12)
7
+
8
+ - Updated Faiss to 1.13.0
9
+ - Added support for releasing GVL
10
+
1
11
  ## 0.4.3 (2025-10-26)
2
12
 
3
13
  - Fixed error with Rice 4.7
data/README.md CHANGED
@@ -53,6 +53,8 @@ Search
53
53
  distances, ids = index.search(objects, 3)
54
54
  ```
55
55
 
56
+ > Use `index.freeze` to release the GVL for searches
57
+
56
58
  Save an index
57
59
 
58
60
  ```ruby
data/ext/faiss/index.cpp CHANGED
@@ -111,19 +111,28 @@ void init_index(Rice::Module& m) {
111
111
  })
112
112
  .define_method(
113
113
  "train",
114
- [](faiss::Index &self, numo::SFloat objects) {
114
+ [](Rice::Object rb_self, numo::SFloat objects) {
115
+ rb_check_frozen(rb_self.value());
116
+
117
+ auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
115
118
  auto n = check_shape(objects, self.d);
116
119
  self.train(n, objects.read_ptr());
117
120
  })
118
121
  .define_method(
119
122
  "add",
120
- [](faiss::Index &self, numo::SFloat objects) {
123
+ [](Rice::Object rb_self, numo::SFloat objects) {
124
+ rb_check_frozen(rb_self.value());
125
+
126
+ auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
121
127
  auto n = check_shape(objects, self.d);
122
128
  self.add(n, objects.read_ptr());
123
129
  })
124
130
  .define_method(
125
131
  "add_with_ids",
126
- [](faiss::Index &self, numo::SFloat objects, numo::Int64 ids) {
132
+ [](Rice::Object rb_self, numo::SFloat objects, numo::Int64 ids) {
133
+ rb_check_frozen(rb_self.value());
134
+
135
+ auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
127
136
  auto n = check_shape(objects, self.d);
128
137
  if (ids.ndim() != 1 || ids.shape()[0] != n) {
129
138
  throw Rice::Exception(rb_eArgError, "expected ids to be 1d array with size %d", n);
@@ -132,13 +141,20 @@ void init_index(Rice::Module& m) {
132
141
  })
133
142
  .define_method(
134
143
  "search",
135
- [](faiss::Index &self, numo::SFloat objects, size_t k) {
144
+ [](Rice::Object rb_self, numo::SFloat objects, size_t k) {
145
+ auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
136
146
  auto n = check_shape(objects, self.d);
137
147
 
138
148
  auto distances = numo::SFloat({n, k});
139
149
  auto labels = numo::Int64({n, k});
140
150
 
141
- self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
151
+ if (rb_self.is_frozen()) {
152
+ Rice::detail::no_gvl([&] {
153
+ self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
154
+ });
155
+ } else {
156
+ self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
157
+ }
142
158
 
143
159
  Rice::Array ret;
144
160
  ret.push(std::move(distances), false);
@@ -147,9 +163,20 @@ void init_index(Rice::Module& m) {
147
163
  })
148
164
  .define_method(
149
165
  "nprobe=",
150
- [](faiss::Index &self, double val) {
166
+ [](Rice::Object rb_self, double val) {
167
+ rb_check_frozen(rb_self.value());
168
+
169
+ auto &self = *Rice::Data_Object<faiss::Index>{rb_self};
151
170
  faiss::ParameterSpace().set_index_parameter(&self, "nprobe", val);
152
171
  })
172
+ .define_method(
173
+ "reconstruct",
174
+ [](faiss::Index &self, int64_t key) {
175
+ auto d = static_cast<std::size_t>(self.d);
176
+ auto recons = numo::SFloat({d});
177
+ self.reconstruct(key, recons.write_ptr());
178
+ return recons;
179
+ })
153
180
  .define_method(
154
181
  "save",
155
182
  [](faiss::Index &self, Rice::String fname) {
@@ -27,25 +27,38 @@ void init_index_binary(Rice::Module& m) {
27
27
  })
28
28
  .define_method(
29
29
  "train",
30
- [](faiss::IndexBinary &self, numo::UInt8 objects) {
30
+ [](Rice::Object rb_self, numo::UInt8 objects) {
31
+ rb_check_frozen(rb_self.value());
32
+
33
+ auto &self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
31
34
  auto n = check_shape(objects, self.d / 8);
32
35
  self.train(n, objects.read_ptr());
33
36
  })
34
37
  .define_method(
35
38
  "add",
36
- [](faiss::IndexBinary &self, numo::UInt8 objects) {
39
+ [](Rice::Object rb_self, numo::UInt8 objects) {
40
+ rb_check_frozen(rb_self.value());
41
+
42
+ auto &self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
37
43
  auto n = check_shape(objects, self.d / 8);
38
44
  self.add(n, objects.read_ptr());
39
45
  })
40
46
  .define_method(
41
47
  "search",
42
- [](faiss::IndexBinary &self, numo::UInt8 objects, size_t k) {
48
+ [](Rice::Object rb_self, numo::UInt8 objects, size_t k) {
49
+ auto &self = *Rice::Data_Object<faiss::IndexBinary>{rb_self};
43
50
  auto n = check_shape(objects, self.d / 8);
44
51
 
45
52
  auto distances = numo::Int32({n, k});
46
53
  auto labels = numo::Int64({n, k});
47
54
 
48
- self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
55
+ if (rb_self.is_frozen()) {
56
+ Rice::detail::no_gvl([&] {
57
+ self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
58
+ });
59
+ } else {
60
+ self.search(n, objects.read_ptr(), k, distances.write_ptr(), labels.write_ptr());
61
+ }
49
62
 
50
63
  Rice::Array ret;
51
64
  ret.push(std::move(distances), false);
data/ext/faiss/kmeans.cpp CHANGED
@@ -32,11 +32,11 @@ void init_kmeans(Rice::Module& m) {
32
32
  })
33
33
  .define_method(
34
34
  "train",
35
- [](Rice::Object self, numo::SFloat objects) {
36
- auto self_ptr = Rice::detail::From_Ruby<faiss::Clustering*>().convert(self.value());
37
- auto n = check_shape(objects, self_ptr->d);
38
- auto index = faiss::IndexFlatL2(self_ptr->d);
39
- self.iv_set("@index", Rice::Object(Rice::detail::To_Ruby<faiss::IndexFlatL2>().convert(index)));
40
- self_ptr->train(n, objects.read_ptr(), index);
35
+ [](Rice::Object rb_self, numo::SFloat objects) {
36
+ auto &self = *Rice::Data_Object<faiss::Clustering>{rb_self};
37
+ auto n = check_shape(objects, self.d);
38
+ auto index = faiss::IndexFlatL2(self.d);
39
+ rb_self.iv_set("@index", index);
40
+ self.train(n, objects.read_ptr(), index);
41
41
  });
42
42
  }
data/lib/faiss/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Faiss
2
- VERSION = "0.4.3"
2
+ VERSION = "0.5.1"
3
3
  end
@@ -20,15 +20,14 @@
20
20
  #include <faiss/utils/utils.h>
21
21
 
22
22
  #include <faiss/IndexHNSW.h>
23
+ #include <faiss/IndexIDMap.h>
23
24
  #include <faiss/IndexIVF.h>
24
- #include <faiss/IndexIVFFlat.h>
25
25
  #include <faiss/IndexIVFPQ.h>
26
26
  #include <faiss/IndexIVFPQR.h>
27
27
  #include <faiss/IndexPQ.h>
28
28
  #include <faiss/IndexPreTransform.h>
29
29
  #include <faiss/IndexRefine.h>
30
30
  #include <faiss/IndexShardsIVF.h>
31
- #include <faiss/MetaIndexes.h>
32
31
 
33
32
  namespace faiss {
34
33
 
@@ -126,7 +125,7 @@ bool OperatingPoints::add(
126
125
  }
127
126
  } else {
128
127
  int i;
129
- // stricto sensu this should be a bissection
128
+ // stricto sensu this should be a bisection
130
129
  for (i = 0; i < a.size(); i++) {
131
130
  if (a[i].perf >= perf) {
132
131
  break;
@@ -32,7 +32,7 @@ struct AutoTuneCriterion {
32
32
 
33
33
  AutoTuneCriterion(idx_t nq, idx_t nnn);
34
34
 
35
- /** Intitializes the gt_D and gt_I vectors. Must be called before evaluating
35
+ /** Initializes the gt_D and gt_I vectors. Must be called before evaluating
36
36
  *
37
37
  * @param gt_D_in size nq * gt_nnn
38
38
  * @param gt_I_in size nq * gt_nnn
@@ -212,7 +212,7 @@ void compute_centroids(
212
212
  * It works by slightly changing the centroids to make 2 clusters from
213
213
  * a single one. Takes the same arguments as compute_centroids.
214
214
  *
215
- * @return nb of spliting operations (larger is worse)
215
+ * @return nb of splitting operations (larger is worse)
216
216
  */
217
217
  int split_clusters(
218
218
  size_t d,
@@ -242,7 +242,7 @@ int split_clusters(
242
242
  centroids + cj * d,
243
243
  sizeof(*centroids) * d);
244
244
 
245
- /* small symmetric pertubation */
245
+ /* small symmetric perturbation */
246
246
  for (size_t j = 0; j < d; j++) {
247
247
  if (j % 2 == 0) {
248
248
  centroids[ci * d + j] *= 1 + EPS;
@@ -73,7 +73,7 @@ struct ClusteringIterationStats {
73
73
  * points to the centroids. Therefore, at each iteration the centroids
74
74
  * are added to the index.
75
75
  *
76
- * On output, the centoids table is set to the latest version
76
+ * On output, the centroids table is set to the latest version
77
77
  * of the centroids and they are also added to the index. If the
78
78
  * centroids table it is not empty on input, it is also used for
79
79
  * initialization.
@@ -109,7 +109,7 @@ struct Clustering : ClusteringParameters {
109
109
 
110
110
  /** run with encoded vectors
111
111
  *
112
- * win addition to train()'s parameters takes a codec as parameter
112
+ * in addition to train()'s parameters takes a codec as parameter
113
113
  * to decode the input vectors.
114
114
  *
115
115
  * @param codec codec used to decode the vectors (nullptr =
@@ -9,7 +9,6 @@
9
9
  #include <omp.h>
10
10
 
11
11
  #include <memory>
12
- #include <numeric>
13
12
 
14
13
  #include <faiss/IndexAdditiveQuantizer.h>
15
14
  #include <faiss/IndexIVFAdditiveQuantizer.h>
@@ -58,7 +57,7 @@ void check_compatible_for_merge(const Index* index0, const Index* index1) {
58
57
  ivf0->check_compatible_for_merge(*ivf1);
59
58
  }
60
59
 
61
- // TODO: check as thoroughfully for other index types
60
+ // TODO: check as thoroughly for other index types
62
61
  }
63
62
 
64
63
  const IndexIVF* try_extract_index_ivf(const Index* index) {
@@ -559,19 +558,6 @@ void handle_ivf(
559
558
  const std::string& filename_template,
560
559
  ShardingFunction* sharding_function,
561
560
  bool generate_ids) {
562
- std::vector<faiss::IndexIVF*> sharded_indexes(shard_count);
563
- auto clone = static_cast<faiss::IndexIVF*>(faiss::clone_index(index));
564
- clone->quantizer->reset();
565
- for (int64_t i = 0; i < shard_count; i++) {
566
- sharded_indexes[i] =
567
- static_cast<faiss::IndexIVF*>(faiss::clone_index(clone));
568
- if (generate_ids) {
569
- // Assume the quantizer does not natively support add_with_ids.
570
- sharded_indexes[i]->quantizer =
571
- new IndexIDMap2(sharded_indexes[i]->quantizer);
572
- }
573
- }
574
-
575
561
  // assign centroids to each sharded Index based on sharding_function, and
576
562
  // add them to the quantizer of each sharded index
577
563
  std::vector<std::vector<float>> sharded_centroids(shard_count);
@@ -589,27 +575,29 @@ void handle_ivf(
589
575
  &reconstructed[index->quantizer->d]);
590
576
  delete[] reconstructed;
591
577
  }
578
+
579
+ auto clone = static_cast<faiss::IndexIVF*>(faiss::clone_index(index));
580
+ clone->quantizer->reset();
592
581
  for (int64_t i = 0; i < shard_count; i++) {
582
+ auto sharded_index =
583
+ static_cast<faiss::IndexIVF*>(faiss::clone_index(clone));
593
584
  if (generate_ids) {
594
- sharded_indexes[i]->quantizer->add_with_ids(
585
+ // Assume the quantizer does not natively support add_with_ids.
586
+ sharded_index->quantizer =
587
+ new IndexIDMap2(sharded_index->quantizer);
588
+ sharded_index->quantizer->add_with_ids(
595
589
  sharded_centroids[i].size() / index->quantizer->d,
596
590
  sharded_centroids[i].data(),
597
591
  xids[i].data());
598
592
  } else {
599
- sharded_indexes[i]->quantizer->add(
593
+ sharded_index->quantizer->add(
600
594
  sharded_centroids[i].size() / index->quantizer->d,
601
595
  sharded_centroids[i].data());
602
596
  }
603
- }
604
-
605
- for (int64_t i = 0; i < shard_count; i++) {
606
597
  char fname[256];
607
598
  snprintf(fname, 256, filename_template.c_str(), i);
608
- faiss::write_index(sharded_indexes[i], fname);
609
- }
610
-
611
- for (int64_t i = 0; i < shard_count; i++) {
612
- delete sharded_indexes[i];
599
+ faiss::write_index(sharded_index, fname);
600
+ delete sharded_index;
613
601
  }
614
602
  }
615
603
 
@@ -619,22 +607,6 @@ void handle_binary_ivf(
619
607
  const std::string& filename_template,
620
608
  ShardingFunction* sharding_function,
621
609
  bool generate_ids) {
622
- std::vector<faiss::IndexBinaryIVF*> sharded_indexes(shard_count);
623
-
624
- auto clone = static_cast<faiss::IndexBinaryIVF*>(
625
- faiss::clone_binary_index(index));
626
- clone->quantizer->reset();
627
-
628
- for (int64_t i = 0; i < shard_count; i++) {
629
- sharded_indexes[i] = static_cast<faiss::IndexBinaryIVF*>(
630
- faiss::clone_binary_index(clone));
631
- if (generate_ids) {
632
- // Assume the quantizer does not natively support add_with_ids.
633
- sharded_indexes[i]->quantizer =
634
- new IndexBinaryIDMap2(sharded_indexes[i]->quantizer);
635
- }
636
- }
637
-
638
610
  // assign centroids to each sharded Index based on sharding_function, and
639
611
  // add them to the quantizer of each sharded index
640
612
  int64_t reconstruction_size = index->quantizer->d / 8;
@@ -653,27 +625,30 @@ void handle_binary_ivf(
653
625
  &reconstructed[reconstruction_size]);
654
626
  delete[] reconstructed;
655
627
  }
628
+
629
+ auto clone = static_cast<faiss::IndexBinaryIVF*>(
630
+ faiss::clone_binary_index(index));
631
+ clone->quantizer->reset();
656
632
  for (int64_t i = 0; i < shard_count; i++) {
633
+ auto sharded_index = static_cast<faiss::IndexBinaryIVF*>(
634
+ faiss::clone_binary_index(clone));
657
635
  if (generate_ids) {
658
- sharded_indexes[i]->quantizer->add_with_ids(
636
+ // Assume the quantizer does not natively support add_with_ids.
637
+ sharded_index->quantizer =
638
+ new IndexBinaryIDMap2(sharded_index->quantizer);
639
+ sharded_index->quantizer->add_with_ids(
659
640
  sharded_centroids[i].size() / reconstruction_size,
660
641
  sharded_centroids[i].data(),
661
642
  xids[i].data());
662
643
  } else {
663
- sharded_indexes[i]->quantizer->add(
644
+ sharded_index->quantizer->add(
664
645
  sharded_centroids[i].size() / reconstruction_size,
665
646
  sharded_centroids[i].data());
666
647
  }
667
- }
668
-
669
- for (int64_t i = 0; i < shard_count; i++) {
670
648
  char fname[256];
671
649
  snprintf(fname, 256, filename_template.c_str(), i);
672
- faiss::write_index_binary(sharded_indexes[i], fname);
673
- }
674
-
675
- for (int64_t i = 0; i < shard_count; i++) {
676
- delete sharded_indexes[i];
650
+ faiss::write_index_binary(sharded_index, fname);
651
+ delete sharded_index;
677
652
  }
678
653
  }
679
654
 
@@ -100,7 +100,7 @@ struct SlidingIndexWindow {
100
100
  std::vector<std::vector<size_t>> sizes;
101
101
 
102
102
  /// index should be initially empty and trained
103
- SlidingIndexWindow(Index* index);
103
+ explicit SlidingIndexWindow(Index* index);
104
104
 
105
105
  /** Add one index to the current index and remove the oldest one.
106
106
  *
@@ -104,6 +104,17 @@ void Index::search_and_reconstruct(
104
104
  }
105
105
  }
106
106
 
107
+ void Index::search_subset(
108
+ idx_t n,
109
+ const float* x,
110
+ idx_t k_base,
111
+ const idx_t* base_labels,
112
+ idx_t k,
113
+ float* distances,
114
+ idx_t* labels) const {
115
+ FAISS_THROW_MSG("search_subset not implemented for this type of index");
116
+ }
117
+
107
118
  void Index::compute_residual(const float* x, float* residual, idx_t key) const {
108
119
  reconstruct(key, residual);
109
120
  for (size_t i = 0; i < d; i++) {
@@ -17,8 +17,8 @@
17
17
  #include <sstream>
18
18
 
19
19
  #define FAISS_VERSION_MAJOR 1
20
- #define FAISS_VERSION_MINOR 12
21
- #define FAISS_VERSION_PATCH 0
20
+ #define FAISS_VERSION_MINOR 13
21
+ #define FAISS_VERSION_PATCH 2
22
22
 
23
23
  // Macro to combine the version components into a single string
24
24
  #ifndef FAISS_STRINGIFY
@@ -78,7 +78,7 @@ inline size_t get_numeric_type_size(NumericType numeric_type) {
78
78
  }
79
79
  }
80
80
 
81
- /** Parent class for the optional search paramenters.
81
+ /** Parent class for the optional search parameters.
82
82
  *
83
83
  * Sub-classes with additional search parameters should inherit this class.
84
84
  * Ownership of the object fields is always to the caller.
@@ -125,11 +125,11 @@ struct Index {
125
125
  /** Perform training on a representative set of vectors
126
126
  *
127
127
  * @param n nb of training vectors
128
- * @param x training vecors, size n * d
128
+ * @param x training vectors, size n * d
129
129
  */
130
130
  virtual void train(idx_t n, const float* x);
131
131
 
132
- virtual void trainEx(idx_t n, const void* x, NumericType numeric_type) {
132
+ virtual void train_ex(idx_t n, const void* x, NumericType numeric_type) {
133
133
  if (numeric_type == NumericType::Float32) {
134
134
  train(n, static_cast<const float*>(x));
135
135
  } else {
@@ -147,7 +147,7 @@ struct Index {
147
147
  */
148
148
  virtual void add(idx_t n, const float* x) = 0;
149
149
 
150
- virtual void addEx(idx_t n, const void* x, NumericType numeric_type) {
150
+ virtual void add_ex(idx_t n, const void* x, NumericType numeric_type) {
151
151
  if (numeric_type == NumericType::Float32) {
152
152
  add(n, static_cast<const float*>(x));
153
153
  } else {
@@ -165,7 +165,7 @@ struct Index {
165
165
  * @param xids if non-null, ids to store for the vectors (size n)
166
166
  */
167
167
  virtual void add_with_ids(idx_t n, const float* x, const idx_t* xids);
168
- virtual void add_with_idsEx(
168
+ virtual void add_with_ids_ex(
169
169
  idx_t n,
170
170
  const void* x,
171
171
  NumericType numeric_type,
@@ -196,7 +196,7 @@ struct Index {
196
196
  idx_t* labels,
197
197
  const SearchParameters* params = nullptr) const = 0;
198
198
 
199
- virtual void searchEx(
199
+ virtual void search_ex(
200
200
  idx_t n,
201
201
  const void* x,
202
202
  NumericType numeric_type,
@@ -258,7 +258,7 @@ struct Index {
258
258
  *
259
259
  * this function may not be defined for some indexes
260
260
  * @param key id of the vector to reconstruct
261
- * @param recons reconstucted vector (size d)
261
+ * @param recons reconstructed vector (size d)
262
262
  */
263
263
  virtual void reconstruct(idx_t key, float* recons) const;
264
264
 
@@ -268,7 +268,7 @@ struct Index {
268
268
  * this function may not be defined for some indexes
269
269
  * @param n number of vectors to reconstruct
270
270
  * @param keys ids of the vectors to reconstruct (size n)
271
- * @param recons reconstucted vector (size n * d)
271
+ * @param recons reconstructed vector (size n * d)
272
272
  */
273
273
  virtual void reconstruct_batch(idx_t n, const idx_t* keys, float* recons)
274
274
  const;
@@ -278,7 +278,7 @@ struct Index {
278
278
  * this function may not be defined for some indexes
279
279
  * @param i0 index of the first vector in the sequence
280
280
  * @param ni number of vectors in the sequence
281
- * @param recons reconstucted vector (size ni * d)
281
+ * @param recons reconstructed vector (size ni * d)
282
282
  */
283
283
  virtual void reconstruct_n(idx_t i0, idx_t ni, float* recons) const;
284
284
 
@@ -304,6 +304,29 @@ struct Index {
304
304
  float* recons,
305
305
  const SearchParameters* params = nullptr) const;
306
306
 
307
+ /** Similar to search, but operates on a potentially different subset
308
+ * of the dataset for each query.
309
+ *
310
+ * The default implementation fails with an assertion, as it is
311
+ * not supported by all indexes.
312
+ *
313
+ * @param n number of vectors
314
+ * @param x input vectors, size n * d
315
+ * @param k_base number of vectors to search from
316
+ * @param base_labels ids of the vectors to search from
317
+ * @param k desired number of results per query
318
+ * @param distances output pairwise distances, size n*k
319
+ * @param labels output labels of the NNs, size n*k
320
+ */
321
+ virtual void search_subset(
322
+ idx_t n,
323
+ const float* x,
324
+ idx_t k_base,
325
+ const idx_t* base_labels,
326
+ idx_t k,
327
+ float* distances,
328
+ idx_t* labels) const;
329
+
307
330
  /** Computes a residual vector after indexing encoding.
308
331
  *
309
332
  * The residual vector is the difference between a vector and the
@@ -82,7 +82,7 @@ void Index2Layer::train(idx_t n, const float* x) {
82
82
 
83
83
  std::unique_ptr<const float[]> del_x(x_in == x ? nullptr : x);
84
84
 
85
- std::vector<idx_t> assign(n); // assignement to coarse centroids
85
+ std::vector<idx_t> assign(n); // assignment to coarse centroids
86
86
  q1.quantizer->assign(n, x, assign.data());
87
87
  std::vector<float> residuals(n * d);
88
88
  for (idx_t i = 0; i < n; i++) {
@@ -23,7 +23,7 @@ struct IndexIVFPQ;
23
23
  /** Same as an IndexIVFPQ without the inverted lists: codes are stored
24
24
  * sequentially
25
25
  *
26
- * The class is mainly inteded to store encoded vectors that can be
26
+ * The class is mainly intended to store encoded vectors that can be
27
27
  * accessed randomly, the search function is not implemented.
28
28
  */
29
29
  struct Index2Layer : IndexFlatCodes {
@@ -47,7 +47,7 @@ struct Index2Layer : IndexFlatCodes {
47
47
  MetricType metric = METRIC_L2);
48
48
 
49
49
  Index2Layer();
50
- ~Index2Layer();
50
+ ~Index2Layer() override;
51
51
 
52
52
  void train(idx_t n, const float* x) override;
53
53
 
@@ -86,6 +86,7 @@ struct AQDistanceComputerLUT : FlatCodesDistanceComputer {
86
86
 
87
87
  float bias;
88
88
  void set_query(const float* x) final {
89
+ q = x;
89
90
  // this is quite sub-optimal for multiple queries
90
91
  aq.compute_LUT(1, x, LUT.data());
91
92
  if (is_IP) {
@@ -11,6 +11,7 @@
11
11
  #include <memory>
12
12
 
13
13
  #include <faiss/impl/FaissAssert.h>
14
+ #include <faiss/impl/FastScanDistancePostProcessing.h>
14
15
  #include <faiss/impl/LocalSearchQuantizer.h>
15
16
  #include <faiss/impl/LookupTableScaler.h>
16
17
  #include <faiss/impl/ResidualQuantizer.h>
@@ -123,7 +124,8 @@ void IndexAdditiveQuantizerFastScan::estimate_norm_scale(
123
124
  }
124
125
 
125
126
  std::vector<float> dis_tables(n * M * ksub);
126
- compute_float_LUT(dis_tables.data(), n, x);
127
+ FastScanDistancePostProcessing empty_context;
128
+ compute_float_LUT(dis_tables.data(), n, x, empty_context);
127
129
 
128
130
  // here we compute the mean of scales for each query
129
131
  // TODO: try max of scales
@@ -153,7 +155,8 @@ void IndexAdditiveQuantizerFastScan::compute_codes(
153
155
  void IndexAdditiveQuantizerFastScan::compute_float_LUT(
154
156
  float* lut,
155
157
  idx_t n,
156
- const float* x) const {
158
+ const float* x,
159
+ const FastScanDistancePostProcessing&) const {
157
160
  if (metric_type == METRIC_INNER_PRODUCT) {
158
161
  aq->compute_LUT(n, x, lut, 1.0f);
159
162
  } else {
@@ -200,10 +203,12 @@ void IndexAdditiveQuantizerFastScan::search(
200
203
  }
201
204
 
202
205
  NormTableScaler scaler(norm_scale);
206
+ FastScanDistancePostProcessing context;
207
+ context.norm_scaler = &scaler;
203
208
  if (metric_type == METRIC_L2) {
204
- search_dispatch_implem<true>(n, x, k, distances, labels, &scaler);
209
+ search_dispatch_implem<true>(n, x, k, distances, labels, context);
205
210
  } else {
206
- search_dispatch_implem<false>(n, x, k, distances, labels, &scaler);
211
+ search_dispatch_implem<false>(n, x, k, distances, labels, context);
207
212
  }
208
213
  }
209
214
 
@@ -62,7 +62,11 @@ struct IndexAdditiveQuantizerFastScan : IndexFastScan {
62
62
 
63
63
  void compute_codes(uint8_t* codes, idx_t n, const float* x) const override;
64
64
 
65
- void compute_float_LUT(float* lut, idx_t n, const float* x) const override;
65
+ void compute_float_LUT(
66
+ float* lut,
67
+ idx_t n,
68
+ const float* x,
69
+ const FastScanDistancePostProcessing& context) const override;
66
70
 
67
71
  void search(
68
72
  idx_t n,