faiss 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +23 -21
  4. data/ext/faiss/extconf.rb +11 -0
  5. data/ext/faiss/index.cpp +4 -4
  6. data/ext/faiss/index_binary.cpp +6 -6
  7. data/ext/faiss/product_quantizer.cpp +4 -4
  8. data/lib/faiss/version.rb +1 -1
  9. data/vendor/faiss/faiss/AutoTune.cpp +13 -0
  10. data/vendor/faiss/faiss/IVFlib.cpp +101 -2
  11. data/vendor/faiss/faiss/IVFlib.h +26 -2
  12. data/vendor/faiss/faiss/Index.cpp +36 -3
  13. data/vendor/faiss/faiss/Index.h +43 -6
  14. data/vendor/faiss/faiss/Index2Layer.cpp +6 -2
  15. data/vendor/faiss/faiss/Index2Layer.h +6 -1
  16. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +219 -16
  17. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +63 -5
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +299 -0
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +199 -0
  20. data/vendor/faiss/faiss/IndexBinary.cpp +20 -4
  21. data/vendor/faiss/faiss/IndexBinary.h +18 -3
  22. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -2
  23. data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -2
  24. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -1
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +2 -1
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +17 -4
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +8 -4
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +28 -13
  31. data/vendor/faiss/faiss/IndexBinaryIVF.h +10 -7
  32. data/vendor/faiss/faiss/IndexFastScan.cpp +626 -0
  33. data/vendor/faiss/faiss/IndexFastScan.h +145 -0
  34. data/vendor/faiss/faiss/IndexFlat.cpp +34 -21
  35. data/vendor/faiss/faiss/IndexFlat.h +7 -4
  36. data/vendor/faiss/faiss/IndexFlatCodes.cpp +35 -1
  37. data/vendor/faiss/faiss/IndexFlatCodes.h +12 -0
  38. data/vendor/faiss/faiss/IndexHNSW.cpp +66 -138
  39. data/vendor/faiss/faiss/IndexHNSW.h +4 -2
  40. data/vendor/faiss/faiss/IndexIDMap.cpp +247 -0
  41. data/vendor/faiss/faiss/IndexIDMap.h +107 -0
  42. data/vendor/faiss/faiss/IndexIVF.cpp +121 -33
  43. data/vendor/faiss/faiss/IndexIVF.h +35 -16
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -7
  45. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +63 -1
  46. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +590 -0
  47. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +171 -0
  48. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +1290 -0
  49. data/vendor/faiss/faiss/IndexIVFFastScan.h +213 -0
  50. data/vendor/faiss/faiss/IndexIVFFlat.cpp +37 -17
  51. data/vendor/faiss/faiss/IndexIVFFlat.h +4 -2
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +234 -50
  53. data/vendor/faiss/faiss/IndexIVFPQ.h +5 -1
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +23 -852
  55. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -112
  56. data/vendor/faiss/faiss/IndexIVFPQR.cpp +3 -3
  57. data/vendor/faiss/faiss/IndexIVFPQR.h +1 -1
  58. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +3 -1
  59. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
  60. data/vendor/faiss/faiss/IndexLSH.cpp +4 -2
  61. data/vendor/faiss/faiss/IndexLSH.h +2 -1
  62. data/vendor/faiss/faiss/IndexLattice.cpp +7 -1
  63. data/vendor/faiss/faiss/IndexLattice.h +3 -1
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +4 -3
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -1
  66. data/vendor/faiss/faiss/IndexNSG.cpp +37 -3
  67. data/vendor/faiss/faiss/IndexNSG.h +25 -1
  68. data/vendor/faiss/faiss/IndexPQ.cpp +106 -69
  69. data/vendor/faiss/faiss/IndexPQ.h +19 -5
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +15 -450
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +15 -78
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -8
  73. data/vendor/faiss/faiss/IndexPreTransform.h +15 -3
  74. data/vendor/faiss/faiss/IndexRefine.cpp +8 -4
  75. data/vendor/faiss/faiss/IndexRefine.h +4 -2
  76. data/vendor/faiss/faiss/IndexReplicas.cpp +4 -2
  77. data/vendor/faiss/faiss/IndexReplicas.h +2 -1
  78. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +438 -0
  79. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +92 -0
  80. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +26 -15
  81. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -7
  82. data/vendor/faiss/faiss/IndexShards.cpp +4 -1
  83. data/vendor/faiss/faiss/IndexShards.h +2 -1
  84. data/vendor/faiss/faiss/MetaIndexes.cpp +5 -178
  85. data/vendor/faiss/faiss/MetaIndexes.h +3 -81
  86. data/vendor/faiss/faiss/VectorTransform.cpp +43 -0
  87. data/vendor/faiss/faiss/VectorTransform.h +22 -4
  88. data/vendor/faiss/faiss/clone_index.cpp +23 -1
  89. data/vendor/faiss/faiss/clone_index.h +3 -0
  90. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +300 -0
  91. data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +24 -0
  92. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +195 -0
  93. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2058 -0
  94. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +408 -0
  95. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2147 -0
  96. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +460 -0
  97. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +465 -0
  98. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +1618 -0
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +251 -0
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +1452 -0
  101. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +1 -0
  102. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +0 -4
  103. data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
  104. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -1
  105. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +10 -8
  106. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +75 -14
  107. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +19 -32
  108. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -31
  109. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +22 -28
  110. data/vendor/faiss/faiss/gpu/GpuResources.cpp +14 -0
  111. data/vendor/faiss/faiss/gpu/GpuResources.h +16 -3
  112. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +3 -3
  113. data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +32 -0
  114. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -0
  115. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +311 -75
  116. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +10 -0
  117. data/vendor/faiss/faiss/gpu/test/TestUtils.h +3 -0
  118. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +2 -2
  119. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +5 -4
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +116 -47
  121. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +44 -13
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +0 -54
  123. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -76
  124. data/vendor/faiss/faiss/impl/DistanceComputer.h +64 -0
  125. data/vendor/faiss/faiss/impl/HNSW.cpp +123 -27
  126. data/vendor/faiss/faiss/impl/HNSW.h +19 -16
  127. data/vendor/faiss/faiss/impl/IDSelector.cpp +125 -0
  128. data/vendor/faiss/faiss/impl/IDSelector.h +135 -0
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +6 -28
  130. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +6 -1
  131. data/vendor/faiss/faiss/impl/LookupTableScaler.h +77 -0
  132. data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -0
  133. data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
  134. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +383 -0
  135. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +154 -0
  136. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +225 -145
  137. data/vendor/faiss/faiss/impl/ProductQuantizer.h +29 -10
  138. data/vendor/faiss/faiss/impl/Quantizer.h +43 -0
  139. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +192 -36
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +40 -20
  141. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -0
  142. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +97 -173
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +18 -18
  144. data/vendor/faiss/faiss/impl/index_read.cpp +240 -9
  145. data/vendor/faiss/faiss/impl/index_write.cpp +237 -5
  146. data/vendor/faiss/faiss/impl/kmeans1d.cpp +6 -4
  147. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +56 -16
  148. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +25 -8
  149. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +66 -25
  150. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +75 -27
  151. data/vendor/faiss/faiss/index_factory.cpp +196 -7
  152. data/vendor/faiss/faiss/index_io.h +5 -0
  153. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -0
  154. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +4 -1
  155. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +2 -1
  156. data/vendor/faiss/faiss/python/python_callbacks.cpp +27 -0
  157. data/vendor/faiss/faiss/python/python_callbacks.h +15 -0
  158. data/vendor/faiss/faiss/utils/Heap.h +31 -15
  159. data/vendor/faiss/faiss/utils/distances.cpp +380 -56
  160. data/vendor/faiss/faiss/utils/distances.h +113 -15
  161. data/vendor/faiss/faiss/utils/distances_simd.cpp +726 -6
  162. data/vendor/faiss/faiss/utils/extra_distances.cpp +12 -7
  163. data/vendor/faiss/faiss/utils/extra_distances.h +3 -1
  164. data/vendor/faiss/faiss/utils/fp16-fp16c.h +21 -0
  165. data/vendor/faiss/faiss/utils/fp16-inl.h +101 -0
  166. data/vendor/faiss/faiss/utils/fp16.h +11 -0
  167. data/vendor/faiss/faiss/utils/hamming-inl.h +54 -0
  168. data/vendor/faiss/faiss/utils/hamming.cpp +0 -48
  169. data/vendor/faiss/faiss/utils/ordered_key_value.h +10 -0
  170. data/vendor/faiss/faiss/utils/quantize_lut.cpp +62 -0
  171. data/vendor/faiss/faiss/utils/quantize_lut.h +20 -0
  172. data/vendor/faiss/faiss/utils/random.cpp +53 -0
  173. data/vendor/faiss/faiss/utils/random.h +5 -0
  174. data/vendor/faiss/faiss/utils/simdlib_avx2.h +4 -0
  175. data/vendor/faiss/faiss/utils/simdlib_emulated.h +6 -1
  176. data/vendor/faiss/faiss/utils/simdlib_neon.h +7 -2
  177. metadata +37 -3
@@ -0,0 +1,199 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <faiss/IndexAdditiveQuantizer.h>
11
+ #include <faiss/IndexFastScan.h>
12
+ #include <faiss/impl/AdditiveQuantizer.h>
13
+ #include <faiss/impl/ProductAdditiveQuantizer.h>
14
+ #include <faiss/utils/AlignedTable.h>
15
+
16
+ namespace faiss {
17
+
18
+ /** Fast scan version of IndexAQ. Works for 4-bit AQ for now.
19
+ *
20
+ * The codes are not stored sequentially but grouped in blocks of size bbs.
21
+ * This makes it possible to compute distances quickly with SIMD instructions.
22
+ *
23
+ * Implementations:
24
+ * 12: blocked loop with internal loop on Q with qbs
25
+ * 13: same with reservoir accumulator to store results
26
+ * 14: no qbs with heap accumulator
27
+ * 15: no qbs with reservoir accumulator
28
+ */
29
+
30
+ struct IndexAdditiveQuantizerFastScan : IndexFastScan {
31
+ AdditiveQuantizer* aq;
32
+ using Search_type_t = AdditiveQuantizer::Search_type_t;
33
+
34
+ bool rescale_norm = true;
35
+ int norm_scale = 1;
36
+
37
+ // max number of training vectors
38
+ size_t max_train_points = 0;
39
+
40
+ explicit IndexAdditiveQuantizerFastScan(
41
+ AdditiveQuantizer* aq,
42
+ MetricType metric = METRIC_L2,
43
+ int bbs = 32);
44
+
45
+ void init(
46
+ AdditiveQuantizer* aq,
47
+ MetricType metric = METRIC_L2,
48
+ int bbs = 32);
49
+
50
+ IndexAdditiveQuantizerFastScan();
51
+
52
+ ~IndexAdditiveQuantizerFastScan() override;
53
+
54
+ /// build from an existing IndexAQ
55
+ explicit IndexAdditiveQuantizerFastScan(
56
+ const IndexAdditiveQuantizer& orig,
57
+ int bbs = 32);
58
+
59
+ void train(idx_t n, const float* x) override;
60
+
61
+ void estimate_norm_scale(idx_t n, const float* x);
62
+
63
+ void compute_codes(uint8_t* codes, idx_t n, const float* x) const override;
64
+
65
+ void compute_float_LUT(float* lut, idx_t n, const float* x) const override;
66
+
67
+ void search(
68
+ idx_t n,
69
+ const float* x,
70
+ idx_t k,
71
+ float* distances,
72
+ idx_t* labels,
73
+ const SearchParameters* params = nullptr) const override;
74
+
75
+ /** Decode a set of vectors.
76
+ *
77
+ * NOTE: The codes in the IndexAdditiveQuantizerFastScan object are non-
78
+ * contiguous. But this method requires a contiguous representation.
79
+ *
80
+ * @param n number of vectors
81
+ * @param bytes input encoded vectors, size n * code_size
82
+ * @param x output vectors, size n * d
83
+ */
84
+ void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
85
+ };
86
+
87
+ /** Index based on a residual quantizer. Stored vectors are
88
+ * approximated by residual quantization codes.
89
+ * Can also be used as a codec
90
+ */
91
+ struct IndexResidualQuantizerFastScan : IndexAdditiveQuantizerFastScan {
92
+ /// The residual quantizer used to encode the vectors
93
+ ResidualQuantizer rq;
94
+
95
+ /** Constructor.
96
+ *
97
+ * @param d dimensionality of the input vectors
98
+ * @param M number of subquantizers
99
+ * @param nbits number of bit per subvector index
100
+ * @param metric metric type
101
+ * @param search_type AQ search type
102
+ */
103
+ IndexResidualQuantizerFastScan(
104
+ int d, ///< dimensionality of the input vectors
105
+ size_t M, ///< number of subquantizers
106
+ size_t nbits, ///< number of bit per subvector index
107
+ MetricType metric = METRIC_L2,
108
+ Search_type_t search_type = AdditiveQuantizer::ST_norm_rq2x4,
109
+ int bbs = 32);
110
+
111
+ IndexResidualQuantizerFastScan();
112
+ };
113
+
114
+ /** Index based on a local search quantizer. Stored vectors are
115
+ * approximated by local search quantization codes.
116
+ * Can also be used as a codec
117
+ */
118
+ struct IndexLocalSearchQuantizerFastScan : IndexAdditiveQuantizerFastScan {
119
+ LocalSearchQuantizer lsq;
120
+
121
+ /** Constructor.
122
+ *
123
+ * @param d dimensionality of the input vectors
124
+ * @param M number of subquantizers
125
+ * @param nbits number of bit per subvector index
126
+ * @param metric metric type
127
+ * @param search_type AQ search type
128
+ */
129
+ IndexLocalSearchQuantizerFastScan(
130
+ int d, ///< dimensionality of the input vectors
131
+ size_t M, ///< number of subquantizers
132
+ size_t nbits, ///< number of bit per subvector index
133
+ MetricType metric = METRIC_L2,
134
+ Search_type_t search_type = AdditiveQuantizer::ST_norm_lsq2x4,
135
+ int bbs = 32);
136
+
137
+ IndexLocalSearchQuantizerFastScan();
138
+ };
139
+
140
+ /** Index based on a product residual quantizer. Stored vectors are
141
+ * approximated by product residual quantization codes.
142
+ * Can also be used as a codec
143
+ */
144
+ struct IndexProductResidualQuantizerFastScan : IndexAdditiveQuantizerFastScan {
145
+ /// The product residual quantizer used to encode the vectors
146
+ ProductResidualQuantizer prq;
147
+
148
+ /** Constructor.
149
+ *
150
+ * @param d dimensionality of the input vectors
151
+ * @param nsplits number of residual quantizers
152
+ * @param Msub number of subquantizers per RQ
153
+ * @param nbits number of bit per subvector index
154
+ * @param metric metric type
155
+ * @param search_type AQ search type
156
+ */
157
+ IndexProductResidualQuantizerFastScan(
158
+ int d, ///< dimensionality of the input vectors
159
+ size_t nsplits, ///< number of residual quantizers
160
+ size_t Msub, ///< number of subquantizers per RQ
161
+ size_t nbits, ///< number of bit per subvector index
162
+ MetricType metric = METRIC_L2,
163
+ Search_type_t search_type = AdditiveQuantizer::ST_norm_rq2x4,
164
+ int bbs = 32);
165
+
166
+ IndexProductResidualQuantizerFastScan();
167
+ };
168
+
169
+ /** Index based on a product local search quantizer. Stored vectors are
170
+ * approximated by product local search quantization codes.
171
+ * Can also be used as a codec
172
+ */
173
+ struct IndexProductLocalSearchQuantizerFastScan
174
+ : IndexAdditiveQuantizerFastScan {
175
+ /// The product local search quantizer used to encode the vectors
176
+ ProductLocalSearchQuantizer plsq;
177
+
178
+ /** Constructor.
179
+ *
180
+ * @param d dimensionality of the input vectors
181
+ * @param nsplits number of local search quantizers
182
+ * @param Msub number of subquantizers per LSQ
183
+ * @param nbits number of bit per subvector index
184
+ * @param metric metric type
185
+ * @param search_type AQ search type
186
+ */
187
+ IndexProductLocalSearchQuantizerFastScan(
188
+ int d, ///< dimensionality of the input vectors
189
+ size_t nsplits, ///< number of local search quantizers
190
+ size_t Msub, ///< number of subquantizers per LSQ
191
+ size_t nbits, ///< number of bit per subvector index
192
+ MetricType metric = METRIC_L2,
193
+ Search_type_t search_type = AdditiveQuantizer::ST_norm_rq2x4,
194
+ int bbs = 32);
195
+
196
+ IndexProductLocalSearchQuantizerFastScan();
197
+ };
198
+
199
+ } // namespace faiss
@@ -21,8 +21,12 @@ void IndexBinary::train(idx_t, const uint8_t*) {
21
21
  // Does nothing by default.
22
22
  }
23
23
 
24
- void IndexBinary::range_search(idx_t, const uint8_t*, int, RangeSearchResult*)
25
- const {
24
+ void IndexBinary::range_search(
25
+ idx_t,
26
+ const uint8_t*,
27
+ int,
28
+ RangeSearchResult*,
29
+ const SearchParameters*) const {
26
30
  FAISS_THROW_MSG("range search not implemented");
27
31
  }
28
32
 
@@ -57,10 +61,11 @@ void IndexBinary::search_and_reconstruct(
57
61
  idx_t k,
58
62
  int32_t* distances,
59
63
  idx_t* labels,
60
- uint8_t* recons) const {
64
+ uint8_t* recons,
65
+ const SearchParameters* params) const {
61
66
  FAISS_THROW_IF_NOT(k > 0);
62
67
 
63
- search(n, x, k, distances, labels);
68
+ search(n, x, k, distances, labels, params);
64
69
  for (idx_t i = 0; i < n; ++i) {
65
70
  for (idx_t j = 0; j < k; ++j) {
66
71
  idx_t ij = i * k + j;
@@ -82,4 +87,15 @@ void IndexBinary::display() const {
82
87
  ntotal);
83
88
  }
84
89
 
90
+ void IndexBinary::merge_from(
91
+ IndexBinary& /* otherIndex */,
92
+ idx_t /* add_id */) {
93
+ FAISS_THROW_MSG("merge_from() not implemented");
94
+ }
95
+
96
+ void IndexBinary::check_compatible_for_merge(
97
+ const IndexBinary& /* otherIndex */) const {
98
+ FAISS_THROW_MSG("check_compatible_for_merge() not implemented");
99
+ }
100
+
85
101
  } // namespace faiss
@@ -97,7 +97,8 @@ struct IndexBinary {
97
97
  const uint8_t* x,
98
98
  idx_t k,
99
99
  int32_t* distances,
100
- idx_t* labels) const = 0;
100
+ idx_t* labels,
101
+ const SearchParameters* params = nullptr) const = 0;
101
102
 
102
103
  /** Query n vectors of dimension d to the index.
103
104
  *
@@ -117,7 +118,8 @@ struct IndexBinary {
117
118
  idx_t n,
118
119
  const uint8_t* x,
119
120
  int radius,
120
- RangeSearchResult* result) const;
121
+ RangeSearchResult* result,
122
+ const SearchParameters* params = nullptr) const;
121
123
 
122
124
  /** Return the indexes of the k vectors closest to the query x.
123
125
  *
@@ -164,10 +166,23 @@ struct IndexBinary {
164
166
  idx_t k,
165
167
  int32_t* distances,
166
168
  idx_t* labels,
167
- uint8_t* recons) const;
169
+ uint8_t* recons,
170
+ const SearchParameters* params = nullptr) const;
168
171
 
169
172
  /** Display the actual class name and some more info. */
170
173
  void display() const;
174
+
175
+ /** moves the entries from another dataset to self.
176
+ * On output, other is empty.
177
+ * add_id is added to all moved ids
178
+ * (for sequential ids, this would be this->ntotal) */
179
+ virtual void merge_from(IndexBinary& otherIndex, idx_t add_id = 0);
180
+
181
+ /** check that the two indexes are compatible (ie, they are
182
+ * trained in the same way and have the same
183
+ * parameters). Otherwise throw. */
184
+ virtual void check_compatible_for_merge(
185
+ const IndexBinary& otherIndex) const;
171
186
  };
172
187
 
173
188
  } // namespace faiss
@@ -11,6 +11,7 @@
11
11
 
12
12
  #include <faiss/impl/AuxIndexStructures.h>
13
13
  #include <faiss/impl/FaissAssert.h>
14
+ #include <faiss/impl/IDSelector.h>
14
15
  #include <faiss/utils/Heap.h>
15
16
  #include <faiss/utils/hamming.h>
16
17
  #include <faiss/utils/utils.h>
@@ -35,7 +36,10 @@ void IndexBinaryFlat::search(
35
36
  const uint8_t* x,
36
37
  idx_t k,
37
38
  int32_t* distances,
38
- idx_t* labels) const {
39
+ idx_t* labels,
40
+ const SearchParameters* params) const {
41
+ FAISS_THROW_IF_NOT_MSG(
42
+ !params, "search params not supported for this index");
39
43
  FAISS_THROW_IF_NOT(k > 0);
40
44
 
41
45
  const idx_t block_size = query_batch_size;
@@ -101,7 +105,10 @@ void IndexBinaryFlat::range_search(
101
105
  idx_t n,
102
106
  const uint8_t* x,
103
107
  int radius,
104
- RangeSearchResult* result) const {
108
+ RangeSearchResult* result,
109
+ const SearchParameters* params) const {
110
+ FAISS_THROW_IF_NOT_MSG(
111
+ !params, "search params not supported for this index");
105
112
  hamming_range_search(x, xb.data(), n, ntotal, radius, code_size, result);
106
113
  }
107
114
 
@@ -39,13 +39,15 @@ struct IndexBinaryFlat : IndexBinary {
39
39
  const uint8_t* x,
40
40
  idx_t k,
41
41
  int32_t* distances,
42
- idx_t* labels) const override;
42
+ idx_t* labels,
43
+ const SearchParameters* params = nullptr) const override;
43
44
 
44
45
  void range_search(
45
46
  idx_t n,
46
47
  const uint8_t* x,
47
48
  int radius,
48
- RangeSearchResult* result) const override;
49
+ RangeSearchResult* result,
50
+ const SearchParameters* params = nullptr) const override;
49
51
 
50
52
  void reconstruct(idx_t key, uint8_t* recons) const override;
51
53
 
@@ -52,7 +52,10 @@ void IndexBinaryFromFloat::search(
52
52
  const uint8_t* x,
53
53
  idx_t k,
54
54
  int32_t* distances,
55
- idx_t* labels) const {
55
+ idx_t* labels,
56
+ const SearchParameters* params) const {
57
+ FAISS_THROW_IF_NOT_MSG(
58
+ !params, "search params not supported for this index");
56
59
  FAISS_THROW_IF_NOT(k > 0);
57
60
 
58
61
  constexpr idx_t bs = 32768;
@@ -43,7 +43,8 @@ struct IndexBinaryFromFloat : IndexBinary {
43
43
  const uint8_t* x,
44
44
  idx_t k,
45
45
  int32_t* distances,
46
- idx_t* labels) const override;
46
+ idx_t* labels,
47
+ const SearchParameters* params = nullptr) const override;
47
48
 
48
49
  void train(idx_t n, const uint8_t* x) override;
49
50
  };
@@ -26,6 +26,7 @@
26
26
 
27
27
  #include <faiss/IndexBinaryFlat.h>
28
28
  #include <faiss/impl/AuxIndexStructures.h>
29
+ #include <faiss/impl/DistanceComputer.h>
29
30
  #include <faiss/impl/FaissAssert.h>
30
31
  #include <faiss/utils/Heap.h>
31
32
  #include <faiss/utils/hamming.h>
@@ -194,7 +195,10 @@ void IndexBinaryHNSW::search(
194
195
  const uint8_t* x,
195
196
  idx_t k,
196
197
  int32_t* distances,
197
- idx_t* labels) const {
198
+ idx_t* labels,
199
+ const SearchParameters* params) const {
200
+ FAISS_THROW_IF_NOT_MSG(
201
+ !params, "search params not supported for this index");
198
202
  FAISS_THROW_IF_NOT(k > 0);
199
203
 
200
204
  #pragma omp parallel
@@ -47,7 +47,8 @@ struct IndexBinaryHNSW : IndexBinary {
47
47
  const uint8_t* x,
48
48
  idx_t k,
49
49
  int32_t* distances,
50
- idx_t* labels) const override;
50
+ idx_t* labels,
51
+ const SearchParameters* params = nullptr) const override;
51
52
 
52
53
  void reconstruct(idx_t key, uint8_t* recons) const override;
53
54
 
@@ -12,6 +12,7 @@
12
12
  #include <cinttypes>
13
13
  #include <cstdio>
14
14
  #include <memory>
15
+ #include <unordered_set>
15
16
 
16
17
  #include <faiss/utils/hamming.h>
17
18
  #include <faiss/utils/utils.h>
@@ -216,7 +217,10 @@ void IndexBinaryHash::range_search(
216
217
  idx_t n,
217
218
  const uint8_t* x,
218
219
  int radius,
219
- RangeSearchResult* result) const {
220
+ RangeSearchResult* result,
221
+ const SearchParameters* params) const {
222
+ FAISS_THROW_IF_NOT_MSG(
223
+ !params, "search params not supported for this index");
220
224
  size_t nlist = 0, ndis = 0, n0 = 0;
221
225
 
222
226
  #pragma omp parallel if (n > 100) reduction(+ : ndis, n0, nlist)
@@ -244,7 +248,10 @@ void IndexBinaryHash::search(
244
248
  const uint8_t* x,
245
249
  idx_t k,
246
250
  int32_t* distances,
247
- idx_t* labels) const {
251
+ idx_t* labels,
252
+ const SearchParameters* params) const {
253
+ FAISS_THROW_IF_NOT_MSG(
254
+ !params, "search params not supported for this index");
248
255
  FAISS_THROW_IF_NOT(k > 0);
249
256
 
250
257
  using HeapForL2 = CMax<int32_t, idx_t>;
@@ -431,7 +438,10 @@ void IndexBinaryMultiHash::range_search(
431
438
  idx_t n,
432
439
  const uint8_t* x,
433
440
  int radius,
434
- RangeSearchResult* result) const {
441
+ RangeSearchResult* result,
442
+ const SearchParameters* params) const {
443
+ FAISS_THROW_IF_NOT_MSG(
444
+ !params, "search params not supported for this index");
435
445
  size_t nlist = 0, ndis = 0, n0 = 0;
436
446
 
437
447
  #pragma omp parallel if (n > 100) reduction(+ : ndis, n0, nlist)
@@ -459,7 +469,10 @@ void IndexBinaryMultiHash::search(
459
469
  const uint8_t* x,
460
470
  idx_t k,
461
471
  int32_t* distances,
462
- idx_t* labels) const {
472
+ idx_t* labels,
473
+ const SearchParameters* params) const {
474
+ FAISS_THROW_IF_NOT_MSG(
475
+ !params, "search params not supported for this index");
463
476
  FAISS_THROW_IF_NOT(k > 0);
464
477
 
465
478
  using HeapForL2 = CMax<int32_t, idx_t>;
@@ -50,14 +50,16 @@ struct IndexBinaryHash : IndexBinary {
50
50
  idx_t n,
51
51
  const uint8_t* x,
52
52
  int radius,
53
- RangeSearchResult* result) const override;
53
+ RangeSearchResult* result,
54
+ const SearchParameters* params = nullptr) const override;
54
55
 
55
56
  void search(
56
57
  idx_t n,
57
58
  const uint8_t* x,
58
59
  idx_t k,
59
60
  int32_t* distances,
60
- idx_t* labels) const override;
61
+ idx_t* labels,
62
+ const SearchParameters* params = nullptr) const override;
61
63
 
62
64
  void display() const;
63
65
  size_t hashtable_size() const;
@@ -107,14 +109,16 @@ struct IndexBinaryMultiHash : IndexBinary {
107
109
  idx_t n,
108
110
  const uint8_t* x,
109
111
  int radius,
110
- RangeSearchResult* result) const override;
112
+ RangeSearchResult* result,
113
+ const SearchParameters* params = nullptr) const override;
111
114
 
112
115
  void search(
113
116
  idx_t n,
114
117
  const uint8_t* x,
115
118
  idx_t k,
116
119
  int32_t* distances,
117
- idx_t* labels) const override;
120
+ idx_t* labels,
121
+ const SearchParameters* params = nullptr) const override;
118
122
 
119
123
  size_t hashtable_size() const;
120
124
  };
@@ -125,7 +125,10 @@ void IndexBinaryIVF::search(
125
125
  const uint8_t* x,
126
126
  idx_t k,
127
127
  int32_t* distances,
128
- idx_t* labels) const {
128
+ idx_t* labels,
129
+ const SearchParameters* params) const {
130
+ FAISS_THROW_IF_NOT_MSG(
131
+ !params, "search params not supported for this index");
129
132
  FAISS_THROW_IF_NOT(k > 0);
130
133
  FAISS_THROW_IF_NOT(nprobe > 0);
131
134
 
@@ -175,7 +178,10 @@ void IndexBinaryIVF::search_and_reconstruct(
175
178
  idx_t k,
176
179
  int32_t* distances,
177
180
  idx_t* labels,
178
- uint8_t* recons) const {
181
+ uint8_t* recons,
182
+ const SearchParameters* params) const {
183
+ FAISS_THROW_IF_NOT_MSG(
184
+ !params, "search params not supported for this index");
179
185
  const size_t nprobe = std::min(nlist, this->nprobe);
180
186
  FAISS_THROW_IF_NOT(k > 0);
181
187
  FAISS_THROW_IF_NOT(nprobe > 0);
@@ -279,22 +285,28 @@ void IndexBinaryIVF::train(idx_t n, const uint8_t* x) {
279
285
  is_trained = true;
280
286
  }
281
287
 
282
- void IndexBinaryIVF::merge_from(IndexBinaryIVF& other, idx_t add_id) {
283
- // minimal sanity checks
284
- FAISS_THROW_IF_NOT(other.d == d);
285
- FAISS_THROW_IF_NOT(other.nlist == nlist);
286
- FAISS_THROW_IF_NOT(other.code_size == code_size);
288
+ void IndexBinaryIVF::check_compatible_for_merge(
289
+ const IndexBinary& otherIndex) const {
290
+ auto other = dynamic_cast<const IndexBinaryIVF*>(&otherIndex);
291
+ FAISS_THROW_IF_NOT(other);
292
+ FAISS_THROW_IF_NOT(other->d == d);
293
+ FAISS_THROW_IF_NOT(other->nlist == nlist);
294
+ FAISS_THROW_IF_NOT(other->code_size == code_size);
287
295
  FAISS_THROW_IF_NOT_MSG(
288
- direct_map.no() && other.direct_map.no(),
296
+ direct_map.no() && other->direct_map.no(),
289
297
  "direct map copy not implemented");
290
298
  FAISS_THROW_IF_NOT_MSG(
291
299
  typeid(*this) == typeid(other),
292
300
  "can only merge indexes of the same type");
301
+ }
293
302
 
294
- invlists->merge_from(other.invlists, add_id);
295
-
296
- ntotal += other.ntotal;
297
- other.ntotal = 0;
303
+ void IndexBinaryIVF::merge_from(IndexBinary& otherIndex, idx_t add_id) {
304
+ // minimal sanity checks
305
+ check_compatible_for_merge(otherIndex);
306
+ auto other = static_cast<IndexBinaryIVF*>(&otherIndex);
307
+ invlists->merge_from(other->invlists, add_id);
308
+ ntotal += other->ntotal;
309
+ other->ntotal = 0;
298
310
  }
299
311
 
300
312
  void IndexBinaryIVF::replace_invlists(InvertedLists* il, bool own) {
@@ -650,7 +662,10 @@ void IndexBinaryIVF::range_search(
650
662
  idx_t n,
651
663
  const uint8_t* x,
652
664
  int radius,
653
- RangeSearchResult* res) const {
665
+ RangeSearchResult* res,
666
+ const SearchParameters* params) const {
667
+ FAISS_THROW_IF_NOT_MSG(
668
+ !params, "search params not supported for this index");
654
669
  const size_t nprobe = std::min(nlist, this->nprobe);
655
670
  std::unique_ptr<idx_t[]> idx(new idx_t[n * nprobe]);
656
671
  std::unique_ptr<int32_t[]> coarse_dis(new int32_t[n * nprobe]);
@@ -123,13 +123,15 @@ struct IndexBinaryIVF : IndexBinary {
123
123
  const uint8_t* x,
124
124
  idx_t k,
125
125
  int32_t* distances,
126
- idx_t* labels) const override;
126
+ idx_t* labels,
127
+ const SearchParameters* params = nullptr) const override;
127
128
 
128
129
  void range_search(
129
130
  idx_t n,
130
131
  const uint8_t* x,
131
132
  int radius,
132
- RangeSearchResult* result) const override;
133
+ RangeSearchResult* result,
134
+ const SearchParameters* params = nullptr) const override;
133
135
 
134
136
  void range_search_preassigned(
135
137
  idx_t n,
@@ -167,7 +169,8 @@ struct IndexBinaryIVF : IndexBinary {
167
169
  idx_t k,
168
170
  int32_t* distances,
169
171
  idx_t* labels,
170
- uint8_t* recons) const override;
172
+ uint8_t* recons,
173
+ const SearchParameters* params = nullptr) const override;
171
174
 
172
175
  /** Reconstruct a vector given the location in terms of (inv list index +
173
176
  * inv list offset) instead of the id.
@@ -184,10 +187,10 @@ struct IndexBinaryIVF : IndexBinary {
184
187
  /// Dataset manipulation functions
185
188
  size_t remove_ids(const IDSelector& sel) override;
186
189
 
187
- /** moves the entries from another dataset to self. On output,
188
- * other is empty. add_id is added to all moved ids (for
189
- * sequential ids, this would be this->ntotal */
190
- virtual void merge_from(IndexBinaryIVF& other, idx_t add_id);
190
+ void merge_from(IndexBinary& other, idx_t add_id) override;
191
+
192
+ void check_compatible_for_merge(
193
+ const IndexBinary& otherIndex) const override;
191
194
 
192
195
  size_t get_list_size(size_t list_no) const {
193
196
  return invlists->list_size(list_no);