faiss 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +9 -2
  6. data/ext/faiss/index.cpp +1 -1
  7. data/ext/faiss/index_binary.cpp +2 -2
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +7 -7
  11. data/vendor/faiss/faiss/AutoTune.h +0 -1
  12. data/vendor/faiss/faiss/Clustering.cpp +4 -18
  13. data/vendor/faiss/faiss/Clustering.h +31 -21
  14. data/vendor/faiss/faiss/IVFlib.cpp +22 -11
  15. data/vendor/faiss/faiss/Index.cpp +1 -1
  16. data/vendor/faiss/faiss/Index.h +20 -5
  17. data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
  20. data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
  21. data/vendor/faiss/faiss/IndexBinary.h +8 -19
  22. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
  23. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
  24. data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
  25. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +106 -187
  26. data/vendor/faiss/faiss/IndexFastScan.cpp +90 -159
  27. data/vendor/faiss/faiss/IndexFastScan.h +9 -8
  28. data/vendor/faiss/faiss/IndexFlat.cpp +195 -3
  29. data/vendor/faiss/faiss/IndexFlat.h +20 -1
  30. data/vendor/faiss/faiss/IndexFlatCodes.cpp +11 -0
  31. data/vendor/faiss/faiss/IndexFlatCodes.h +3 -1
  32. data/vendor/faiss/faiss/IndexHNSW.cpp +112 -316
  33. data/vendor/faiss/faiss/IndexHNSW.h +12 -48
  34. data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
  35. data/vendor/faiss/faiss/IndexIDMap.h +24 -2
  36. data/vendor/faiss/faiss/IndexIVF.cpp +159 -53
  37. data/vendor/faiss/faiss/IndexIVF.h +37 -5
  38. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +18 -26
  39. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -2
  40. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
  41. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
  42. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +433 -405
  43. data/vendor/faiss/faiss/IndexIVFFastScan.h +56 -26
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
  46. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
  47. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
  48. data/vendor/faiss/faiss/IndexIVFPQ.cpp +78 -122
  49. data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
  50. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +18 -50
  51. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
  52. data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
  53. data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
  54. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
  55. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
  56. data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
  57. data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -4
  58. data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
  59. data/vendor/faiss/faiss/IndexNSG.h +10 -10
  60. data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
  61. data/vendor/faiss/faiss/IndexPQ.h +1 -4
  62. data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
  63. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
  64. data/vendor/faiss/faiss/IndexRefine.cpp +49 -19
  65. data/vendor/faiss/faiss/IndexRefine.h +7 -0
  66. data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
  67. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +22 -16
  68. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
  69. data/vendor/faiss/faiss/IndexShards.cpp +21 -29
  70. data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
  71. data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
  72. data/vendor/faiss/faiss/MatrixStats.h +21 -9
  73. data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
  74. data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
  75. data/vendor/faiss/faiss/VectorTransform.h +7 -7
  76. data/vendor/faiss/faiss/clone_index.cpp +15 -10
  77. data/vendor/faiss/faiss/clone_index.h +3 -0
  78. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +87 -4
  79. data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
  80. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +7 -0
  81. data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
  82. data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
  83. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
  84. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +8 -9
  85. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +18 -3
  86. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
  87. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
  88. data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
  89. data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
  90. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +117 -17
  91. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
  92. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +1 -1
  93. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
  94. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
  95. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +267 -40
  96. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
  97. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
  98. data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
  99. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
  100. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
  101. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
  102. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -2
  103. data/vendor/faiss/faiss/impl/DistanceComputer.h +24 -1
  104. data/vendor/faiss/faiss/impl/FaissException.h +13 -34
  105. data/vendor/faiss/faiss/impl/HNSW.cpp +321 -70
  106. data/vendor/faiss/faiss/impl/HNSW.h +9 -8
  107. data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
  108. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +3 -1
  109. data/vendor/faiss/faiss/impl/NNDescent.cpp +29 -19
  110. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  111. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
  112. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  113. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +24 -22
  114. data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
  115. data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
  116. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
  117. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
  118. data/vendor/faiss/faiss/impl/ResultHandler.h +232 -176
  119. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +444 -104
  120. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -8
  121. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +280 -42
  122. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
  123. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
  124. data/vendor/faiss/faiss/impl/index_read.cpp +45 -19
  125. data/vendor/faiss/faiss/impl/index_write.cpp +60 -41
  126. data/vendor/faiss/faiss/impl/io.cpp +10 -10
  127. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  128. data/vendor/faiss/faiss/impl/platform_macros.h +18 -1
  129. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +3 -0
  130. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
  131. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
  132. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +40 -49
  133. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
  134. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
  135. data/vendor/faiss/faiss/impl/simd_result_handlers.h +374 -202
  136. data/vendor/faiss/faiss/index_factory.cpp +10 -7
  137. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
  138. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +27 -9
  139. data/vendor/faiss/faiss/invlists/InvertedLists.h +12 -3
  140. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
  141. data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
  142. data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
  143. data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
  144. data/vendor/faiss/faiss/utils/distances.cpp +128 -74
  145. data/vendor/faiss/faiss/utils/distances.h +81 -4
  146. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
  147. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
  148. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
  149. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
  150. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
  151. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
  152. data/vendor/faiss/faiss/utils/distances_simd.cpp +428 -70
  153. data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
  154. data/vendor/faiss/faiss/utils/fp16.h +2 -0
  155. data/vendor/faiss/faiss/utils/hamming.cpp +162 -110
  156. data/vendor/faiss/faiss/utils/hamming.h +58 -0
  157. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
  158. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
  159. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +15 -87
  160. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +57 -0
  161. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
  162. data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
  163. data/vendor/faiss/faiss/utils/prefetch.h +77 -0
  164. data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
  165. data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
  166. data/vendor/faiss/faiss/utils/simdlib_neon.h +72 -77
  167. data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
  168. data/vendor/faiss/faiss/utils/sorting.h +27 -0
  169. data/vendor/faiss/faiss/utils/utils.cpp +112 -6
  170. data/vendor/faiss/faiss/utils/utils.h +57 -20
  171. metadata +10 -3
@@ -230,18 +230,18 @@ struct ProductQuantizer;
230
230
  *
231
231
  */
232
232
  struct OPQMatrix : LinearTransform {
233
- int M; ///< nb of subquantizers
234
- int niter; ///< Number of outer training iterations
235
- int niter_pq; ///< Number of training iterations for the PQ
236
- int niter_pq_0; ///< same, for the first outer iteration
233
+ int M; ///< nb of subquantizers
234
+ int niter = 50; ///< Number of outer training iterations
235
+ int niter_pq = 4; ///< Number of training iterations for the PQ
236
+ int niter_pq_0 = 40; ///< same, for the first outer iteration
237
237
 
238
238
  /// if there are too many training points, resample
239
- size_t max_train_points;
240
- bool verbose;
239
+ size_t max_train_points = 256 * 256;
240
+ bool verbose = false;
241
241
 
242
242
  /// if non-NULL, use this product quantizer for training
243
243
  /// should be constructed with (d_out, M, _)
244
- ProductQuantizer* pq;
244
+ ProductQuantizer* pq = nullptr;
245
245
 
246
246
  /// if d2 != -1, output vectors of this dimension
247
247
  explicit OPQMatrix(int d = 0, int M = 1, int d2 = -1);
@@ -17,6 +17,8 @@
17
17
  #include <faiss/Index2Layer.h>
18
18
  #include <faiss/IndexAdditiveQuantizer.h>
19
19
  #include <faiss/IndexAdditiveQuantizerFastScan.h>
20
+ #include <faiss/IndexBinary.h>
21
+ #include <faiss/IndexBinaryFlat.h>
20
22
  #include <faiss/IndexFlat.h>
21
23
  #include <faiss/IndexHNSW.h>
22
24
  #include <faiss/IndexIVF.h>
@@ -35,6 +37,7 @@
35
37
  #include <faiss/IndexRefine.h>
36
38
  #include <faiss/IndexRowwiseMinMax.h>
37
39
  #include <faiss/IndexScalarQuantizer.h>
40
+
38
41
  #include <faiss/MetaIndexes.h>
39
42
  #include <faiss/VectorTransform.h>
40
43
 
@@ -60,9 +63,10 @@ Index* clone_index(const Index* index) {
60
63
  // assumes there is a copy constructor ready. Always try from most
61
64
  // specific to most general. Most indexes don't have complicated
62
65
  // structs, the default copy constructor often just works.
63
- #define TRYCLONE(classname, obj) \
64
- if (const classname* clo = dynamic_cast<const classname*>(obj)) { \
65
- return new classname(*clo); \
66
+ #define TRYCLONE(classname, obj) \
67
+ if (const classname* clo##classname = \
68
+ dynamic_cast<const classname*>(obj)) { \
69
+ return new classname(*clo##classname); \
66
70
  } else
67
71
 
68
72
  VectorTransform* Cloner::clone_VectorTransform(const VectorTransform* vt) {
@@ -234,13 +238,6 @@ Index* clone_AdditiveQuantizerIndex(const Index* index) {
234
238
 
235
239
  namespace {
236
240
 
237
- IndexHNSW* clone_HNSW(const IndexHNSW* ihnsw) {
238
- TRYCLONE(IndexHNSWFlat, ihnsw)
239
- TRYCLONE(IndexHNSWPQ, ihnsw)
240
- TRYCLONE(IndexHNSWSQ, ihnsw)
241
- return new IndexHNSW(*ihnsw);
242
- }
243
-
244
241
  InvertedLists* clone_InvertedLists(const InvertedLists* invlists) {
245
242
  if (auto* ails = dynamic_cast<const ArrayInvertedLists*>(invlists)) {
246
243
  return new ArrayInvertedLists(*ails);
@@ -385,4 +382,12 @@ Quantizer* clone_Quantizer(const Quantizer* quant) {
385
382
  FAISS_THROW_MSG("Did not recognize quantizer to clone");
386
383
  }
387
384
 
385
+ IndexBinary* clone_binary_index(const IndexBinary* index) {
386
+ if (auto ii = dynamic_cast<const IndexBinaryFlat*>(index)) {
387
+ return new IndexBinaryFlat(*ii);
388
+ } else {
389
+ FAISS_THROW_MSG("cannot clone this type of index");
390
+ }
391
+ }
392
+
388
393
  } // namespace faiss
@@ -17,6 +17,7 @@ struct Index;
17
17
  struct IndexIVF;
18
18
  struct VectorTransform;
19
19
  struct Quantizer;
20
+ struct IndexBinary;
20
21
 
21
22
  /* cloning functions */
22
23
  Index* clone_index(const Index*);
@@ -33,4 +34,6 @@ struct Cloner {
33
34
 
34
35
  Quantizer* clone_Quantizer(const Quantizer* quant);
35
36
 
37
+ IndexBinary* clone_binary_index(const IndexBinary* index);
38
+
36
39
  } // namespace faiss
@@ -7,10 +7,12 @@
7
7
 
8
8
  #include <faiss/gpu/GpuCloner.h>
9
9
  #include <faiss/impl/FaissAssert.h>
10
+ #include <memory>
10
11
  #include <typeinfo>
11
12
 
12
13
  #include <faiss/gpu/StandardGpuResources.h>
13
14
 
15
+ #include <faiss/IndexBinaryFlat.h>
14
16
  #include <faiss/IndexFlat.h>
15
17
  #include <faiss/IndexIVF.h>
16
18
  #include <faiss/IndexIVFFlat.h>
@@ -21,6 +23,7 @@
21
23
  #include <faiss/IndexShardsIVF.h>
22
24
  #include <faiss/MetaIndexes.h>
23
25
  #include <faiss/gpu/GpuIndex.h>
26
+ #include <faiss/gpu/GpuIndexBinaryFlat.h>
24
27
  #include <faiss/gpu/GpuIndexFlat.h>
25
28
  #include <faiss/gpu/GpuIndexIVFFlat.h>
26
29
  #include <faiss/gpu/GpuIndexIVFPQ.h>
@@ -121,6 +124,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
121
124
  GpuIndexFlatConfig config;
122
125
  config.device = device;
123
126
  config.useFloat16 = useFloat16;
127
+ config.use_raft = use_raft;
124
128
  return new GpuIndexFlat(provider, ifl, config);
125
129
  } else if (
126
130
  dynamic_cast<const IndexScalarQuantizer*>(index) &&
@@ -129,6 +133,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
129
133
  GpuIndexFlatConfig config;
130
134
  config.device = device;
131
135
  config.useFloat16 = true;
136
+ FAISS_THROW_IF_NOT_MSG(
137
+ !use_raft, "this type of index is not implemented for RAFT");
132
138
  GpuIndexFlat* gif = new GpuIndexFlat(
133
139
  provider, index->d, index->metric_type, config);
134
140
  // transfer data by blocks
@@ -146,6 +152,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
146
152
  config.device = device;
147
153
  config.indicesOptions = indicesOptions;
148
154
  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
155
+ config.use_raft = use_raft;
149
156
 
150
157
  GpuIndexIVFFlat* res = new GpuIndexIVFFlat(
151
158
  provider, ifl->d, ifl->nlist, ifl->metric_type, config);
@@ -162,6 +169,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
162
169
  config.device = device;
163
170
  config.indicesOptions = indicesOptions;
164
171
  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
172
+ FAISS_THROW_IF_NOT_MSG(
173
+ !use_raft, "this type of index is not implemented for RAFT");
165
174
 
166
175
  GpuIndexIVFScalarQuantizer* res = new GpuIndexIVFScalarQuantizer(
167
176
  provider,
@@ -194,6 +203,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
194
203
  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
195
204
  config.useFloat16LookupTables = useFloat16;
196
205
  config.usePrecomputedTables = usePrecomputed;
206
+ config.use_raft = use_raft;
207
+ config.interleavedLayout = use_raft;
197
208
 
198
209
  GpuIndexIVFPQ* res = new GpuIndexIVFPQ(provider, ipq, config);
199
210
 
@@ -229,7 +240,7 @@ ToGpuClonerMultiple::ToGpuClonerMultiple(
229
240
  : GpuMultipleClonerOptions(options) {
230
241
  FAISS_THROW_IF_NOT(provider.size() == devices.size());
231
242
  for (size_t i = 0; i < provider.size(); i++) {
232
- sub_cloners.push_back(ToGpuCloner(provider[i], devices[i], options));
243
+ sub_cloners.emplace_back(provider[i], devices[i], options);
233
244
  }
234
245
  }
235
246
 
@@ -298,8 +309,8 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
298
309
  !dynamic_cast<const IndexFlat*>(quantizer)) {
299
310
  // then we flatten the coarse quantizer so that everything remains
300
311
  // on GPU
301
- new_quantizer.reset(
302
- new IndexFlat(quantizer->d, quantizer->metric_type));
312
+ new_quantizer = std::make_unique<IndexFlat>(
313
+ quantizer->d, quantizer->metric_type);
303
314
  std::vector<float> centroids(quantizer->d * quantizer->ntotal);
304
315
  quantizer->reconstruct_n(0, quantizer->ntotal, centroids.data());
305
316
  new_quantizer->add(quantizer->ntotal, centroids.data());
@@ -309,6 +320,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
309
320
 
310
321
  std::vector<faiss::Index*> shards(n);
311
322
 
323
+ #pragma omp parallel for
312
324
  for (idx_t i = 0; i < n; i++) {
313
325
  // make a shallow copy
314
326
  if (reserveVecs) {
@@ -321,7 +333,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
321
333
  const_cast<Index*>(quantizer),
322
334
  index_ivfpq->d,
323
335
  index_ivfpq->nlist,
324
- index_ivfpq->code_size,
336
+ index_ivfpq->pq.M,
325
337
  index_ivfpq->pq.nbits);
326
338
  idx2.metric_type = index_ivfpq->metric_type;
327
339
  idx2.pq = index_ivfpq->pq;
@@ -473,5 +485,76 @@ Index* GpuProgressiveDimIndexFactory::operator()(int dim) {
473
485
  return index_cpu_to_gpu_multiple(vres, devices, &index, &options);
474
486
  }
475
487
 
488
+ /*********************************************
489
+ * Cloning binary indexes
490
+ *********************************************/
491
+
492
+ faiss::IndexBinary* index_binary_gpu_to_cpu(
493
+ const faiss::IndexBinary* gpu_index) {
494
+ if (auto ii = dynamic_cast<const GpuIndexBinaryFlat*>(gpu_index)) {
495
+ IndexBinaryFlat* ret = new IndexBinaryFlat();
496
+ ii->copyTo(ret);
497
+ return ret;
498
+ } else {
499
+ FAISS_THROW_MSG("cannot clone this type of index");
500
+ }
501
+ }
502
+
503
+ faiss::IndexBinary* index_binary_cpu_to_gpu(
504
+ GpuResourcesProvider* provider,
505
+ int device,
506
+ const faiss::IndexBinary* index,
507
+ const GpuClonerOptions* options) {
508
+ if (auto ii = dynamic_cast<const IndexBinaryFlat*>(index)) {
509
+ GpuIndexBinaryFlatConfig config;
510
+ config.device = device;
511
+ if (options) {
512
+ config.use_raft = options->use_raft;
513
+ }
514
+ return new GpuIndexBinaryFlat(provider, ii, config);
515
+ } else {
516
+ FAISS_THROW_MSG("cannot clone this type of index");
517
+ }
518
+ }
519
+
520
+ faiss::IndexBinary* index_binary_cpu_to_gpu_multiple(
521
+ std::vector<GpuResourcesProvider*>& provider,
522
+ std::vector<int>& devices,
523
+ const faiss::IndexBinary* index,
524
+ const GpuMultipleClonerOptions* options) {
525
+ GpuMultipleClonerOptions defaults;
526
+ FAISS_THROW_IF_NOT(devices.size() == provider.size());
527
+ int n = devices.size();
528
+ if (n == 1) {
529
+ return index_binary_cpu_to_gpu(provider[0], devices[0], index, options);
530
+ }
531
+ if (!options) {
532
+ options = &defaults;
533
+ }
534
+ if (options->shard) {
535
+ auto* fi = dynamic_cast<const IndexBinaryFlat*>(index);
536
+ FAISS_THROW_IF_NOT_MSG(fi, "only flat index cloning supported");
537
+ IndexBinaryShards* ret = new IndexBinaryShards(true, true);
538
+ for (int i = 0; i < n; i++) {
539
+ IndexBinaryFlat fig(fi->d);
540
+ size_t i0 = i * fi->ntotal / n;
541
+ size_t i1 = (i + 1) * fi->ntotal / n;
542
+ fig.add(i1 - i0, fi->xb.data() + i0 * fi->code_size);
543
+ ret->addIndex(index_binary_cpu_to_gpu(
544
+ provider[i], devices[i], &fig, options));
545
+ }
546
+ ret->own_indices = true;
547
+ return ret;
548
+ } else { // replicas
549
+ IndexBinaryReplicas* ret = new IndexBinaryReplicas(true);
550
+ for (int i = 0; i < n; i++) {
551
+ ret->addIndex(index_binary_cpu_to_gpu(
552
+ provider[i], devices[i], index, options));
553
+ }
554
+ ret->own_indices = true;
555
+ return ret;
556
+ }
557
+ }
558
+
476
559
  } // namespace gpu
477
560
  } // namespace faiss
@@ -11,10 +11,12 @@
11
11
 
12
12
  #include <faiss/Clustering.h>
13
13
  #include <faiss/Index.h>
14
+ #include <faiss/IndexBinary.h>
14
15
  #include <faiss/clone_index.h>
15
16
  #include <faiss/gpu/GpuClonerOptions.h>
16
17
  #include <faiss/gpu/GpuIndex.h>
17
18
  #include <faiss/gpu/GpuIndicesOptions.h>
19
+
18
20
  namespace faiss {
19
21
  namespace gpu {
20
22
 
@@ -95,5 +97,25 @@ struct GpuProgressiveDimIndexFactory : ProgressiveDimIndexFactory {
95
97
  virtual ~GpuProgressiveDimIndexFactory() override;
96
98
  };
97
99
 
100
+ /*********************************************
101
+ * Cloning binary indexes
102
+ *********************************************/
103
+
104
+ faiss::IndexBinary* index_binary_gpu_to_cpu(
105
+ const faiss::IndexBinary* gpu_index);
106
+
107
+ /// converts any CPU index that can be converted to GPU
108
+ faiss::IndexBinary* index_binary_cpu_to_gpu(
109
+ GpuResourcesProvider* provider,
110
+ int device,
111
+ const faiss::IndexBinary* index,
112
+ const GpuClonerOptions* options = nullptr);
113
+
114
+ faiss::IndexBinary* index_binary_cpu_to_gpu_multiple(
115
+ std::vector<GpuResourcesProvider*>& provider,
116
+ std::vector<int>& devices,
117
+ const faiss::IndexBinary* index,
118
+ const GpuMultipleClonerOptions* options = nullptr);
119
+
98
120
  } // namespace gpu
99
121
  } // namespace faiss
@@ -36,6 +36,13 @@ struct GpuClonerOptions {
36
36
 
37
37
  /// Set verbose options on the index
38
38
  bool verbose = false;
39
+
40
+ /// use the RAFT implementation
41
+ #if defined USE_NVIDIA_RAFT
42
+ bool use_raft = true;
43
+ #else
44
+ bool use_raft = false;
45
+ #endif
39
46
  };
40
47
 
41
48
  struct GpuMultipleClonerOptions : public GpuClonerOptions {
@@ -9,6 +9,7 @@
9
9
 
10
10
  #include <faiss/Index.h>
11
11
 
12
+ #pragma GCC visibility push(default)
12
13
  namespace faiss {
13
14
  namespace gpu {
14
15
 
@@ -28,44 +29,24 @@ enum class IndicesDataType {
28
29
 
29
30
  /// Arguments to brute-force GPU k-nearest neighbor searching
30
31
  struct GpuDistanceParams {
31
- GpuDistanceParams()
32
- : metric(faiss::MetricType::METRIC_L2),
33
- metricArg(0),
34
- k(0),
35
- dims(0),
36
- vectors(nullptr),
37
- vectorType(DistanceDataType::F32),
38
- vectorsRowMajor(true),
39
- numVectors(0),
40
- vectorNorms(nullptr),
41
- queries(nullptr),
42
- queryType(DistanceDataType::F32),
43
- queriesRowMajor(true),
44
- numQueries(0),
45
- outDistances(nullptr),
46
- ignoreOutDistances(false),
47
- outIndicesType(IndicesDataType::I64),
48
- outIndices(nullptr),
49
- device(-1) {}
50
-
51
32
  //
52
33
  // Search parameters
53
34
  //
54
35
 
55
36
  /// Search parameter: distance metric
56
- faiss::MetricType metric;
37
+ faiss::MetricType metric = METRIC_L2;
57
38
 
58
39
  /// Search parameter: distance metric argument (if applicable)
59
40
  /// For metric == METRIC_Lp, this is the p-value
60
- float metricArg;
41
+ float metricArg = 0;
61
42
 
62
43
  /// Search parameter: return k nearest neighbors
63
44
  /// If the value provided is -1, then we report all pairwise distances
64
45
  /// without top-k filtering
65
- int k;
46
+ int k = 0;
66
47
 
67
48
  /// Vector dimensionality
68
- int dims;
49
+ int dims = 0;
69
50
 
70
51
  //
71
52
  // Vectors being queried
@@ -74,14 +55,14 @@ struct GpuDistanceParams {
74
55
  /// If vectorsRowMajor is true, this is
75
56
  /// numVectors x dims, with dims innermost; otherwise,
76
57
  /// dims x numVectors, with numVectors innermost
77
- const void* vectors;
78
- DistanceDataType vectorType;
79
- bool vectorsRowMajor;
80
- idx_t numVectors;
58
+ const void* vectors = nullptr;
59
+ DistanceDataType vectorType = DistanceDataType::F32;
60
+ bool vectorsRowMajor = true;
61
+ idx_t numVectors = 0;
81
62
 
82
63
  /// Precomputed L2 norms for each vector in `vectors`, which can be
83
64
  /// optionally provided in advance to speed computation for METRIC_L2
84
- const float* vectorNorms;
65
+ const float* vectorNorms = nullptr;
85
66
 
86
67
  //
87
68
  // The query vectors (i.e., find k-nearest neighbors in `vectors` for each
@@ -91,10 +72,10 @@ struct GpuDistanceParams {
91
72
  /// If queriesRowMajor is true, this is
92
73
  /// numQueries x dims, with dims innermost; otherwise,
93
74
  /// dims x numQueries, with numQueries innermost
94
- const void* queries;
95
- DistanceDataType queryType;
96
- bool queriesRowMajor;
97
- idx_t numQueries;
75
+ const void* queries = nullptr;
76
+ DistanceDataType queryType = DistanceDataType::F32;
77
+ bool queriesRowMajor = true;
78
+ idx_t numQueries = 0;
98
79
 
99
80
  //
100
81
  // Output results
@@ -103,16 +84,16 @@ struct GpuDistanceParams {
103
84
  /// A region of memory size numQueries x k, with k
104
85
  /// innermost (row major) if k > 0, or if k == -1, a region of memory of
105
86
  /// size numQueries x numVectors
106
- float* outDistances;
87
+ float* outDistances = nullptr;
107
88
 
108
89
  /// Do we only care about the indices reported, rather than the output
109
90
  /// distances? Not used if k == -1 (all pairwise distances)
110
- bool ignoreOutDistances;
91
+ bool ignoreOutDistances = false;
111
92
 
112
93
  /// A region of memory size numQueries x k, with k
113
94
  /// innermost (row major). Not used if k == -1 (all pairwise distances)
114
- IndicesDataType outIndicesType;
115
- void* outIndices;
95
+ IndicesDataType outIndicesType = IndicesDataType::I64;
96
+ void* outIndices = nullptr;
116
97
 
117
98
  //
118
99
  // Execution information
@@ -123,9 +104,17 @@ struct GpuDistanceParams {
123
104
  /// (via cudaGetDevice/cudaSetDevice) is used
124
105
  /// Otherwise, an integer 0 <= device < numDevices indicates the device for
125
106
  /// execution
126
- int device;
107
+ int device = -1;
108
+
109
+ /// Should the index dispatch down to RAFT?
110
+ /// TODO: change default to true if RAFT is enabled
111
+ bool use_raft = false;
127
112
  };
128
113
 
114
+ /// A function that determines whether RAFT should be used based on various
115
+ /// conditions (such as unsupported architecture)
116
+ bool should_use_raft(GpuDistanceParams args);
117
+
129
118
  /// A wrapper for gpu/impl/Distance.cuh to expose direct brute-force k-nearest
130
119
  /// neighbor searches on an externally-provided region of memory (e.g., from a
131
120
  /// pytorch tensor).
@@ -140,6 +129,24 @@ struct GpuDistanceParams {
140
129
  /// nearest neighbors with respect to the given metric
141
130
  void bfKnn(GpuResourcesProvider* resources, const GpuDistanceParams& args);
142
131
 
132
+ // bfKnn which takes two extra parameters to control the maximum GPU
133
+ // memory allowed for vectors and queries, the latter including the
134
+ // memory required for the results.
135
+ // If 0, the corresponding input must fit into GPU memory.
136
+ // If greater than 0, the function will use at most this much GPU
137
+ // memory (in bytes) for vectors and queries respectively.
138
+ // Vectors are broken up into chunks of size vectorsMemoryLimit,
139
+ // and queries are broken up into chunks of size queriesMemoryLimit.
140
+ // The tiles resulting from the product of the query and vector
141
+ // chunks are processed sequentially on the GPU.
142
+ // Only supported for row major matrices and k > 0. The input that
143
+ // needs sharding must reside on the CPU.
144
+ void bfKnn_tiling(
145
+ GpuResourcesProvider* resources,
146
+ const GpuDistanceParams& args,
147
+ size_t vectorsMemoryLimit,
148
+ size_t queriesMemoryLimit);
149
+
143
150
  /// Deprecated legacy implementation
144
151
  void bruteForceKnn(
145
152
  GpuResourcesProvider* resources,
@@ -167,3 +174,4 @@ void bruteForceKnn(
167
174
 
168
175
  } // namespace gpu
169
176
  } // namespace faiss
177
+ #pragma GCC visibility pop
@@ -4,6 +4,21 @@
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
+ /*
8
+ * Copyright (c) 2023, NVIDIA CORPORATION.
9
+ *
10
+ * Licensed under the Apache License, Version 2.0 (the "License");
11
+ * you may not use this file except in compliance with the License.
12
+ * You may obtain a copy of the License at
13
+ *
14
+ * http://www.apache.org/licenses/LICENSE-2.0
15
+ *
16
+ * Unless required by applicable law or agreed to in writing, software
17
+ * distributed under the License is distributed on an "AS IS" BASIS,
18
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ * See the License for the specific language governing permissions and
20
+ * limitations under the License.
21
+ */
7
22
 
8
23
  #pragma once
9
24
 
@@ -14,17 +29,26 @@ namespace faiss {
14
29
  namespace gpu {
15
30
 
16
31
  struct GpuIndexConfig {
17
- inline GpuIndexConfig() : device(0), memorySpace(MemorySpace::Device) {}
18
-
19
32
  /// GPU device on which the index is resident
20
- int device;
33
+ int device = 0;
21
34
 
22
35
  /// What memory space to use for primary storage.
23
36
  /// On Pascal and above (CC 6+) architectures, allows GPUs to use
24
37
  /// more memory than is available on the GPU.
25
- MemorySpace memorySpace;
38
+ MemorySpace memorySpace = MemorySpace::Device;
39
+
40
+ /// Should the index dispatch down to RAFT?
41
+ #if defined USE_NVIDIA_RAFT
42
+ bool use_raft = true;
43
+ #else
44
+ bool use_raft = false;
45
+ #endif
26
46
  };
27
47
 
48
+ /// A centralized function that determines whether RAFT should
49
+ /// be used based on various conditions (such as unsupported architecture)
50
+ bool should_use_raft(GpuIndexConfig config_);
51
+
28
52
  class GpuIndex : public faiss::Index {
29
53
  public:
30
54
  GpuIndex(
@@ -24,15 +24,13 @@ namespace gpu {
24
24
  class FlatIndex;
25
25
 
26
26
  struct GpuIndexFlatConfig : public GpuIndexConfig {
27
- inline GpuIndexFlatConfig() : useFloat16(false) {}
28
-
29
27
  /// Whether or not data is stored as float16
30
- bool useFloat16;
28
+ bool ALIGNED(8) useFloat16 = false;
31
29
 
32
30
  /// Deprecated: no longer used
33
31
  /// Previously used to indicate whether internal storage of vectors is
34
32
  /// transposed
35
- bool storeTransposed;
33
+ bool storeTransposed = false;
36
34
  };
37
35
 
38
36
  /// Wrapper around the GPU implementation that looks like
@@ -115,6 +113,8 @@ class GpuIndexFlat : public GpuIndex {
115
113
  }
116
114
 
117
115
  protected:
116
+ void resetIndex_(int dims);
117
+
118
118
  /// Flat index does not require IDs as there is no storage available for
119
119
  /// them
120
120
  bool addImplRequiresIDs_() const override;
@@ -21,10 +21,8 @@ class GpuIndexFlat;
21
21
  class IVFBase;
22
22
 
23
23
  struct GpuIndexIVFConfig : public GpuIndexConfig {
24
- inline GpuIndexIVFConfig() : indicesOptions(INDICES_64_BIT) {}
25
-
26
24
  /// Index storage options for the GPU
27
- IndicesOptions indicesOptions;
25
+ IndicesOptions indicesOptions = INDICES_64_BIT;
28
26
 
29
27
  /// Configuration for the coarse quantizer object
30
28
  GpuIndexFlatConfig flatConfig;
@@ -75,10 +73,10 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
75
73
  virtual void updateQuantizer() = 0;
76
74
 
77
75
  /// Returns the number of inverted lists we're managing
78
- idx_t getNumLists() const;
76
+ virtual idx_t getNumLists() const;
79
77
 
80
78
  /// Returns the number of vectors present in a particular inverted list
81
- idx_t getListLength(idx_t listId) const;
79
+ virtual idx_t getListLength(idx_t listId) const;
82
80
 
83
81
  /// Return the encoded vector data contained in a particular inverted list,
84
82
  /// for debugging purposes.
@@ -86,12 +84,13 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
86
84
  /// GPU-side representation.
87
85
  /// Otherwise, it is converted to the CPU format.
88
86
  /// compliant format, while the native GPU format may differ.
89
- std::vector<uint8_t> getListVectorData(idx_t listId, bool gpuFormat = false)
90
- const;
87
+ virtual std::vector<uint8_t> getListVectorData(
88
+ idx_t listId,
89
+ bool gpuFormat = false) const;
91
90
 
92
91
  /// Return the vector indices contained in a particular inverted list, for
93
92
  /// debugging purposes.
94
- std::vector<idx_t> getListIndices(idx_t listId) const;
93
+ virtual std::vector<idx_t> getListIndices(idx_t listId) const;
95
94
 
96
95
  void search_preassigned(
97
96
  idx_t n,
@@ -123,7 +122,7 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
123
122
  int getCurrentNProbe_(const SearchParameters* params) const;
124
123
  void verifyIVFSettings_() const;
125
124
  bool addImplRequiresIDs_() const override;
126
- void trainQuantizer_(idx_t n, const float* x);
125
+ virtual void trainQuantizer_(idx_t n, const float* x);
127
126
 
128
127
  /// Called from GpuIndex for add/add_with_ids
129
128
  void addImpl_(idx_t n, const float* x, const idx_t* ids) override;
@@ -8,6 +8,8 @@
8
8
  #pragma once
9
9
 
10
10
  #include <faiss/gpu/GpuIndexIVF.h>
11
+ #include <faiss/impl/ScalarQuantizer.h>
12
+
11
13
  #include <memory>
12
14
 
13
15
  namespace faiss {
@@ -21,11 +23,9 @@ class IVFFlat;
21
23
  class GpuIndexFlat;
22
24
 
23
25
  struct GpuIndexIVFFlatConfig : public GpuIndexIVFConfig {
24
- inline GpuIndexIVFFlatConfig() : interleavedLayout(true) {}
25
-
26
26
  /// Use the alternative memory layout for the IVF lists
27
27
  /// (currently the default)
28
- bool interleavedLayout;
28
+ bool interleavedLayout = true;
29
29
  };
30
30
 
31
31
  /// Wrapper around the GPU implementation that looks like
@@ -87,6 +87,21 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
87
87
  /// Trains the coarse quantizer based on the given vector data
88
88
  void train(idx_t n, const float* x) override;
89
89
 
90
+ protected:
91
+ /// Initialize appropriate index
92
+ void setIndex_(
93
+ GpuResources* resources,
94
+ int dim,
95
+ int nlist,
96
+ faiss::MetricType metric,
97
+ float metricArg,
98
+ bool useResidual,
99
+ /// Optional ScalarQuantizer
100
+ faiss::ScalarQuantizer* scalarQ,
101
+ bool interleavedLayout,
102
+ IndicesOptions indicesOptions,
103
+ MemorySpace space);
104
+
90
105
  protected:
91
106
  /// Our configuration options
92
107
  const GpuIndexIVFFlatConfig ivfFlatConfig_;