faiss 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (171) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +9 -2
  6. data/ext/faiss/index.cpp +1 -1
  7. data/ext/faiss/index_binary.cpp +2 -2
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +7 -7
  11. data/vendor/faiss/faiss/AutoTune.h +0 -1
  12. data/vendor/faiss/faiss/Clustering.cpp +4 -18
  13. data/vendor/faiss/faiss/Clustering.h +31 -21
  14. data/vendor/faiss/faiss/IVFlib.cpp +22 -11
  15. data/vendor/faiss/faiss/Index.cpp +1 -1
  16. data/vendor/faiss/faiss/Index.h +20 -5
  17. data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
  20. data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
  21. data/vendor/faiss/faiss/IndexBinary.h +8 -19
  22. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
  23. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
  24. data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
  25. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +106 -187
  26. data/vendor/faiss/faiss/IndexFastScan.cpp +90 -159
  27. data/vendor/faiss/faiss/IndexFastScan.h +9 -8
  28. data/vendor/faiss/faiss/IndexFlat.cpp +195 -3
  29. data/vendor/faiss/faiss/IndexFlat.h +20 -1
  30. data/vendor/faiss/faiss/IndexFlatCodes.cpp +11 -0
  31. data/vendor/faiss/faiss/IndexFlatCodes.h +3 -1
  32. data/vendor/faiss/faiss/IndexHNSW.cpp +112 -316
  33. data/vendor/faiss/faiss/IndexHNSW.h +12 -48
  34. data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
  35. data/vendor/faiss/faiss/IndexIDMap.h +24 -2
  36. data/vendor/faiss/faiss/IndexIVF.cpp +159 -53
  37. data/vendor/faiss/faiss/IndexIVF.h +37 -5
  38. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +18 -26
  39. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -2
  40. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
  41. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
  42. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +433 -405
  43. data/vendor/faiss/faiss/IndexIVFFastScan.h +56 -26
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
  46. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
  47. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
  48. data/vendor/faiss/faiss/IndexIVFPQ.cpp +78 -122
  49. data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
  50. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +18 -50
  51. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
  52. data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
  53. data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
  54. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
  55. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
  56. data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
  57. data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -4
  58. data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
  59. data/vendor/faiss/faiss/IndexNSG.h +10 -10
  60. data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
  61. data/vendor/faiss/faiss/IndexPQ.h +1 -4
  62. data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
  63. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
  64. data/vendor/faiss/faiss/IndexRefine.cpp +49 -19
  65. data/vendor/faiss/faiss/IndexRefine.h +7 -0
  66. data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
  67. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +22 -16
  68. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
  69. data/vendor/faiss/faiss/IndexShards.cpp +21 -29
  70. data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
  71. data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
  72. data/vendor/faiss/faiss/MatrixStats.h +21 -9
  73. data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
  74. data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
  75. data/vendor/faiss/faiss/VectorTransform.h +7 -7
  76. data/vendor/faiss/faiss/clone_index.cpp +15 -10
  77. data/vendor/faiss/faiss/clone_index.h +3 -0
  78. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +87 -4
  79. data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
  80. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +7 -0
  81. data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
  82. data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
  83. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
  84. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +8 -9
  85. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +18 -3
  86. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
  87. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
  88. data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
  89. data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
  90. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +117 -17
  91. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
  92. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +1 -1
  93. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
  94. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
  95. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +267 -40
  96. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
  97. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
  98. data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
  99. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
  100. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
  101. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
  102. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -2
  103. data/vendor/faiss/faiss/impl/DistanceComputer.h +24 -1
  104. data/vendor/faiss/faiss/impl/FaissException.h +13 -34
  105. data/vendor/faiss/faiss/impl/HNSW.cpp +321 -70
  106. data/vendor/faiss/faiss/impl/HNSW.h +9 -8
  107. data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
  108. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +3 -1
  109. data/vendor/faiss/faiss/impl/NNDescent.cpp +29 -19
  110. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  111. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
  112. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  113. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +24 -22
  114. data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
  115. data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
  116. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
  117. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
  118. data/vendor/faiss/faiss/impl/ResultHandler.h +232 -176
  119. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +444 -104
  120. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -8
  121. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +280 -42
  122. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
  123. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
  124. data/vendor/faiss/faiss/impl/index_read.cpp +45 -19
  125. data/vendor/faiss/faiss/impl/index_write.cpp +60 -41
  126. data/vendor/faiss/faiss/impl/io.cpp +10 -10
  127. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  128. data/vendor/faiss/faiss/impl/platform_macros.h +18 -1
  129. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +3 -0
  130. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
  131. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
  132. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +40 -49
  133. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
  134. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
  135. data/vendor/faiss/faiss/impl/simd_result_handlers.h +374 -202
  136. data/vendor/faiss/faiss/index_factory.cpp +10 -7
  137. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
  138. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +27 -9
  139. data/vendor/faiss/faiss/invlists/InvertedLists.h +12 -3
  140. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
  141. data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
  142. data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
  143. data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
  144. data/vendor/faiss/faiss/utils/distances.cpp +128 -74
  145. data/vendor/faiss/faiss/utils/distances.h +81 -4
  146. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
  147. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
  148. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
  149. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
  150. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
  151. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
  152. data/vendor/faiss/faiss/utils/distances_simd.cpp +428 -70
  153. data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
  154. data/vendor/faiss/faiss/utils/fp16.h +2 -0
  155. data/vendor/faiss/faiss/utils/hamming.cpp +162 -110
  156. data/vendor/faiss/faiss/utils/hamming.h +58 -0
  157. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
  158. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
  159. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +15 -87
  160. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +57 -0
  161. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
  162. data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
  163. data/vendor/faiss/faiss/utils/prefetch.h +77 -0
  164. data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
  165. data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
  166. data/vendor/faiss/faiss/utils/simdlib_neon.h +72 -77
  167. data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
  168. data/vendor/faiss/faiss/utils/sorting.h +27 -0
  169. data/vendor/faiss/faiss/utils/utils.cpp +112 -6
  170. data/vendor/faiss/faiss/utils/utils.h +57 -20
  171. metadata +10 -3
@@ -230,18 +230,18 @@ struct ProductQuantizer;
230
230
  *
231
231
  */
232
232
  struct OPQMatrix : LinearTransform {
233
- int M; ///< nb of subquantizers
234
- int niter; ///< Number of outer training iterations
235
- int niter_pq; ///< Number of training iterations for the PQ
236
- int niter_pq_0; ///< same, for the first outer iteration
233
+ int M; ///< nb of subquantizers
234
+ int niter = 50; ///< Number of outer training iterations
235
+ int niter_pq = 4; ///< Number of training iterations for the PQ
236
+ int niter_pq_0 = 40; ///< same, for the first outer iteration
237
237
 
238
238
  /// if there are too many training points, resample
239
- size_t max_train_points;
240
- bool verbose;
239
+ size_t max_train_points = 256 * 256;
240
+ bool verbose = false;
241
241
 
242
242
  /// if non-NULL, use this product quantizer for training
243
243
  /// should be constructed with (d_out, M, _)
244
- ProductQuantizer* pq;
244
+ ProductQuantizer* pq = nullptr;
245
245
 
246
246
  /// if d2 != -1, output vectors of this dimension
247
247
  explicit OPQMatrix(int d = 0, int M = 1, int d2 = -1);
@@ -17,6 +17,8 @@
17
17
  #include <faiss/Index2Layer.h>
18
18
  #include <faiss/IndexAdditiveQuantizer.h>
19
19
  #include <faiss/IndexAdditiveQuantizerFastScan.h>
20
+ #include <faiss/IndexBinary.h>
21
+ #include <faiss/IndexBinaryFlat.h>
20
22
  #include <faiss/IndexFlat.h>
21
23
  #include <faiss/IndexHNSW.h>
22
24
  #include <faiss/IndexIVF.h>
@@ -35,6 +37,7 @@
35
37
  #include <faiss/IndexRefine.h>
36
38
  #include <faiss/IndexRowwiseMinMax.h>
37
39
  #include <faiss/IndexScalarQuantizer.h>
40
+
38
41
  #include <faiss/MetaIndexes.h>
39
42
  #include <faiss/VectorTransform.h>
40
43
 
@@ -60,9 +63,10 @@ Index* clone_index(const Index* index) {
60
63
  // assumes there is a copy constructor ready. Always try from most
61
64
  // specific to most general. Most indexes don't have complicated
62
65
  // structs, the default copy constructor often just works.
63
- #define TRYCLONE(classname, obj) \
64
- if (const classname* clo = dynamic_cast<const classname*>(obj)) { \
65
- return new classname(*clo); \
66
+ #define TRYCLONE(classname, obj) \
67
+ if (const classname* clo##classname = \
68
+ dynamic_cast<const classname*>(obj)) { \
69
+ return new classname(*clo##classname); \
66
70
  } else
67
71
 
68
72
  VectorTransform* Cloner::clone_VectorTransform(const VectorTransform* vt) {
@@ -234,13 +238,6 @@ Index* clone_AdditiveQuantizerIndex(const Index* index) {
234
238
 
235
239
  namespace {
236
240
 
237
- IndexHNSW* clone_HNSW(const IndexHNSW* ihnsw) {
238
- TRYCLONE(IndexHNSWFlat, ihnsw)
239
- TRYCLONE(IndexHNSWPQ, ihnsw)
240
- TRYCLONE(IndexHNSWSQ, ihnsw)
241
- return new IndexHNSW(*ihnsw);
242
- }
243
-
244
241
  InvertedLists* clone_InvertedLists(const InvertedLists* invlists) {
245
242
  if (auto* ails = dynamic_cast<const ArrayInvertedLists*>(invlists)) {
246
243
  return new ArrayInvertedLists(*ails);
@@ -385,4 +382,12 @@ Quantizer* clone_Quantizer(const Quantizer* quant) {
385
382
  FAISS_THROW_MSG("Did not recognize quantizer to clone");
386
383
  }
387
384
 
385
+ IndexBinary* clone_binary_index(const IndexBinary* index) {
386
+ if (auto ii = dynamic_cast<const IndexBinaryFlat*>(index)) {
387
+ return new IndexBinaryFlat(*ii);
388
+ } else {
389
+ FAISS_THROW_MSG("cannot clone this type of index");
390
+ }
391
+ }
392
+
388
393
  } // namespace faiss
@@ -17,6 +17,7 @@ struct Index;
17
17
  struct IndexIVF;
18
18
  struct VectorTransform;
19
19
  struct Quantizer;
20
+ struct IndexBinary;
20
21
 
21
22
  /* cloning functions */
22
23
  Index* clone_index(const Index*);
@@ -33,4 +34,6 @@ struct Cloner {
33
34
 
34
35
  Quantizer* clone_Quantizer(const Quantizer* quant);
35
36
 
37
+ IndexBinary* clone_binary_index(const IndexBinary* index);
38
+
36
39
  } // namespace faiss
@@ -7,10 +7,12 @@
7
7
 
8
8
  #include <faiss/gpu/GpuCloner.h>
9
9
  #include <faiss/impl/FaissAssert.h>
10
+ #include <memory>
10
11
  #include <typeinfo>
11
12
 
12
13
  #include <faiss/gpu/StandardGpuResources.h>
13
14
 
15
+ #include <faiss/IndexBinaryFlat.h>
14
16
  #include <faiss/IndexFlat.h>
15
17
  #include <faiss/IndexIVF.h>
16
18
  #include <faiss/IndexIVFFlat.h>
@@ -21,6 +23,7 @@
21
23
  #include <faiss/IndexShardsIVF.h>
22
24
  #include <faiss/MetaIndexes.h>
23
25
  #include <faiss/gpu/GpuIndex.h>
26
+ #include <faiss/gpu/GpuIndexBinaryFlat.h>
24
27
  #include <faiss/gpu/GpuIndexFlat.h>
25
28
  #include <faiss/gpu/GpuIndexIVFFlat.h>
26
29
  #include <faiss/gpu/GpuIndexIVFPQ.h>
@@ -121,6 +124,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
121
124
  GpuIndexFlatConfig config;
122
125
  config.device = device;
123
126
  config.useFloat16 = useFloat16;
127
+ config.use_raft = use_raft;
124
128
  return new GpuIndexFlat(provider, ifl, config);
125
129
  } else if (
126
130
  dynamic_cast<const IndexScalarQuantizer*>(index) &&
@@ -129,6 +133,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
129
133
  GpuIndexFlatConfig config;
130
134
  config.device = device;
131
135
  config.useFloat16 = true;
136
+ FAISS_THROW_IF_NOT_MSG(
137
+ !use_raft, "this type of index is not implemented for RAFT");
132
138
  GpuIndexFlat* gif = new GpuIndexFlat(
133
139
  provider, index->d, index->metric_type, config);
134
140
  // transfer data by blocks
@@ -146,6 +152,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
146
152
  config.device = device;
147
153
  config.indicesOptions = indicesOptions;
148
154
  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
155
+ config.use_raft = use_raft;
149
156
 
150
157
  GpuIndexIVFFlat* res = new GpuIndexIVFFlat(
151
158
  provider, ifl->d, ifl->nlist, ifl->metric_type, config);
@@ -162,6 +169,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
162
169
  config.device = device;
163
170
  config.indicesOptions = indicesOptions;
164
171
  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
172
+ FAISS_THROW_IF_NOT_MSG(
173
+ !use_raft, "this type of index is not implemented for RAFT");
165
174
 
166
175
  GpuIndexIVFScalarQuantizer* res = new GpuIndexIVFScalarQuantizer(
167
176
  provider,
@@ -194,6 +203,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
194
203
  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
195
204
  config.useFloat16LookupTables = useFloat16;
196
205
  config.usePrecomputedTables = usePrecomputed;
206
+ config.use_raft = use_raft;
207
+ config.interleavedLayout = use_raft;
197
208
 
198
209
  GpuIndexIVFPQ* res = new GpuIndexIVFPQ(provider, ipq, config);
199
210
 
@@ -229,7 +240,7 @@ ToGpuClonerMultiple::ToGpuClonerMultiple(
229
240
  : GpuMultipleClonerOptions(options) {
230
241
  FAISS_THROW_IF_NOT(provider.size() == devices.size());
231
242
  for (size_t i = 0; i < provider.size(); i++) {
232
- sub_cloners.push_back(ToGpuCloner(provider[i], devices[i], options));
243
+ sub_cloners.emplace_back(provider[i], devices[i], options);
233
244
  }
234
245
  }
235
246
 
@@ -298,8 +309,8 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
298
309
  !dynamic_cast<const IndexFlat*>(quantizer)) {
299
310
  // then we flatten the coarse quantizer so that everything remains
300
311
  // on GPU
301
- new_quantizer.reset(
302
- new IndexFlat(quantizer->d, quantizer->metric_type));
312
+ new_quantizer = std::make_unique<IndexFlat>(
313
+ quantizer->d, quantizer->metric_type);
303
314
  std::vector<float> centroids(quantizer->d * quantizer->ntotal);
304
315
  quantizer->reconstruct_n(0, quantizer->ntotal, centroids.data());
305
316
  new_quantizer->add(quantizer->ntotal, centroids.data());
@@ -309,6 +320,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
309
320
 
310
321
  std::vector<faiss::Index*> shards(n);
311
322
 
323
+ #pragma omp parallel for
312
324
  for (idx_t i = 0; i < n; i++) {
313
325
  // make a shallow copy
314
326
  if (reserveVecs) {
@@ -321,7 +333,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
321
333
  const_cast<Index*>(quantizer),
322
334
  index_ivfpq->d,
323
335
  index_ivfpq->nlist,
324
- index_ivfpq->code_size,
336
+ index_ivfpq->pq.M,
325
337
  index_ivfpq->pq.nbits);
326
338
  idx2.metric_type = index_ivfpq->metric_type;
327
339
  idx2.pq = index_ivfpq->pq;
@@ -473,5 +485,76 @@ Index* GpuProgressiveDimIndexFactory::operator()(int dim) {
473
485
  return index_cpu_to_gpu_multiple(vres, devices, &index, &options);
474
486
  }
475
487
 
488
+ /*********************************************
489
+ * Cloning binary indexes
490
+ *********************************************/
491
+
492
+ faiss::IndexBinary* index_binary_gpu_to_cpu(
493
+ const faiss::IndexBinary* gpu_index) {
494
+ if (auto ii = dynamic_cast<const GpuIndexBinaryFlat*>(gpu_index)) {
495
+ IndexBinaryFlat* ret = new IndexBinaryFlat();
496
+ ii->copyTo(ret);
497
+ return ret;
498
+ } else {
499
+ FAISS_THROW_MSG("cannot clone this type of index");
500
+ }
501
+ }
502
+
503
+ faiss::IndexBinary* index_binary_cpu_to_gpu(
504
+ GpuResourcesProvider* provider,
505
+ int device,
506
+ const faiss::IndexBinary* index,
507
+ const GpuClonerOptions* options) {
508
+ if (auto ii = dynamic_cast<const IndexBinaryFlat*>(index)) {
509
+ GpuIndexBinaryFlatConfig config;
510
+ config.device = device;
511
+ if (options) {
512
+ config.use_raft = options->use_raft;
513
+ }
514
+ return new GpuIndexBinaryFlat(provider, ii, config);
515
+ } else {
516
+ FAISS_THROW_MSG("cannot clone this type of index");
517
+ }
518
+ }
519
+
520
+ faiss::IndexBinary* index_binary_cpu_to_gpu_multiple(
521
+ std::vector<GpuResourcesProvider*>& provider,
522
+ std::vector<int>& devices,
523
+ const faiss::IndexBinary* index,
524
+ const GpuMultipleClonerOptions* options) {
525
+ GpuMultipleClonerOptions defaults;
526
+ FAISS_THROW_IF_NOT(devices.size() == provider.size());
527
+ int n = devices.size();
528
+ if (n == 1) {
529
+ return index_binary_cpu_to_gpu(provider[0], devices[0], index, options);
530
+ }
531
+ if (!options) {
532
+ options = &defaults;
533
+ }
534
+ if (options->shard) {
535
+ auto* fi = dynamic_cast<const IndexBinaryFlat*>(index);
536
+ FAISS_THROW_IF_NOT_MSG(fi, "only flat index cloning supported");
537
+ IndexBinaryShards* ret = new IndexBinaryShards(true, true);
538
+ for (int i = 0; i < n; i++) {
539
+ IndexBinaryFlat fig(fi->d);
540
+ size_t i0 = i * fi->ntotal / n;
541
+ size_t i1 = (i + 1) * fi->ntotal / n;
542
+ fig.add(i1 - i0, fi->xb.data() + i0 * fi->code_size);
543
+ ret->addIndex(index_binary_cpu_to_gpu(
544
+ provider[i], devices[i], &fig, options));
545
+ }
546
+ ret->own_indices = true;
547
+ return ret;
548
+ } else { // replicas
549
+ IndexBinaryReplicas* ret = new IndexBinaryReplicas(true);
550
+ for (int i = 0; i < n; i++) {
551
+ ret->addIndex(index_binary_cpu_to_gpu(
552
+ provider[i], devices[i], index, options));
553
+ }
554
+ ret->own_indices = true;
555
+ return ret;
556
+ }
557
+ }
558
+
476
559
  } // namespace gpu
477
560
  } // namespace faiss
@@ -11,10 +11,12 @@
11
11
 
12
12
  #include <faiss/Clustering.h>
13
13
  #include <faiss/Index.h>
14
+ #include <faiss/IndexBinary.h>
14
15
  #include <faiss/clone_index.h>
15
16
  #include <faiss/gpu/GpuClonerOptions.h>
16
17
  #include <faiss/gpu/GpuIndex.h>
17
18
  #include <faiss/gpu/GpuIndicesOptions.h>
19
+
18
20
  namespace faiss {
19
21
  namespace gpu {
20
22
 
@@ -95,5 +97,25 @@ struct GpuProgressiveDimIndexFactory : ProgressiveDimIndexFactory {
95
97
  virtual ~GpuProgressiveDimIndexFactory() override;
96
98
  };
97
99
 
100
+ /*********************************************
101
+ * Cloning binary indexes
102
+ *********************************************/
103
+
104
+ faiss::IndexBinary* index_binary_gpu_to_cpu(
105
+ const faiss::IndexBinary* gpu_index);
106
+
107
+ /// converts any CPU index that can be converted to GPU
108
+ faiss::IndexBinary* index_binary_cpu_to_gpu(
109
+ GpuResourcesProvider* provider,
110
+ int device,
111
+ const faiss::IndexBinary* index,
112
+ const GpuClonerOptions* options = nullptr);
113
+
114
+ faiss::IndexBinary* index_binary_cpu_to_gpu_multiple(
115
+ std::vector<GpuResourcesProvider*>& provider,
116
+ std::vector<int>& devices,
117
+ const faiss::IndexBinary* index,
118
+ const GpuMultipleClonerOptions* options = nullptr);
119
+
98
120
  } // namespace gpu
99
121
  } // namespace faiss
@@ -36,6 +36,13 @@ struct GpuClonerOptions {
36
36
 
37
37
  /// Set verbose options on the index
38
38
  bool verbose = false;
39
+
40
+ /// use the RAFT implementation
41
+ #if defined USE_NVIDIA_RAFT
42
+ bool use_raft = true;
43
+ #else
44
+ bool use_raft = false;
45
+ #endif
39
46
  };
40
47
 
41
48
  struct GpuMultipleClonerOptions : public GpuClonerOptions {
@@ -9,6 +9,7 @@
9
9
 
10
10
  #include <faiss/Index.h>
11
11
 
12
+ #pragma GCC visibility push(default)
12
13
  namespace faiss {
13
14
  namespace gpu {
14
15
 
@@ -28,44 +29,24 @@ enum class IndicesDataType {
28
29
 
29
30
  /// Arguments to brute-force GPU k-nearest neighbor searching
30
31
  struct GpuDistanceParams {
31
- GpuDistanceParams()
32
- : metric(faiss::MetricType::METRIC_L2),
33
- metricArg(0),
34
- k(0),
35
- dims(0),
36
- vectors(nullptr),
37
- vectorType(DistanceDataType::F32),
38
- vectorsRowMajor(true),
39
- numVectors(0),
40
- vectorNorms(nullptr),
41
- queries(nullptr),
42
- queryType(DistanceDataType::F32),
43
- queriesRowMajor(true),
44
- numQueries(0),
45
- outDistances(nullptr),
46
- ignoreOutDistances(false),
47
- outIndicesType(IndicesDataType::I64),
48
- outIndices(nullptr),
49
- device(-1) {}
50
-
51
32
  //
52
33
  // Search parameters
53
34
  //
54
35
 
55
36
  /// Search parameter: distance metric
56
- faiss::MetricType metric;
37
+ faiss::MetricType metric = METRIC_L2;
57
38
 
58
39
  /// Search parameter: distance metric argument (if applicable)
59
40
  /// For metric == METRIC_Lp, this is the p-value
60
- float metricArg;
41
+ float metricArg = 0;
61
42
 
62
43
  /// Search parameter: return k nearest neighbors
63
44
  /// If the value provided is -1, then we report all pairwise distances
64
45
  /// without top-k filtering
65
- int k;
46
+ int k = 0;
66
47
 
67
48
  /// Vector dimensionality
68
- int dims;
49
+ int dims = 0;
69
50
 
70
51
  //
71
52
  // Vectors being queried
@@ -74,14 +55,14 @@ struct GpuDistanceParams {
74
55
  /// If vectorsRowMajor is true, this is
75
56
  /// numVectors x dims, with dims innermost; otherwise,
76
57
  /// dims x numVectors, with numVectors innermost
77
- const void* vectors;
78
- DistanceDataType vectorType;
79
- bool vectorsRowMajor;
80
- idx_t numVectors;
58
+ const void* vectors = nullptr;
59
+ DistanceDataType vectorType = DistanceDataType::F32;
60
+ bool vectorsRowMajor = true;
61
+ idx_t numVectors = 0;
81
62
 
82
63
  /// Precomputed L2 norms for each vector in `vectors`, which can be
83
64
  /// optionally provided in advance to speed computation for METRIC_L2
84
- const float* vectorNorms;
65
+ const float* vectorNorms = nullptr;
85
66
 
86
67
  //
87
68
  // The query vectors (i.e., find k-nearest neighbors in `vectors` for each
@@ -91,10 +72,10 @@ struct GpuDistanceParams {
91
72
  /// If queriesRowMajor is true, this is
92
73
  /// numQueries x dims, with dims innermost; otherwise,
93
74
  /// dims x numQueries, with numQueries innermost
94
- const void* queries;
95
- DistanceDataType queryType;
96
- bool queriesRowMajor;
97
- idx_t numQueries;
75
+ const void* queries = nullptr;
76
+ DistanceDataType queryType = DistanceDataType::F32;
77
+ bool queriesRowMajor = true;
78
+ idx_t numQueries = 0;
98
79
 
99
80
  //
100
81
  // Output results
@@ -103,16 +84,16 @@ struct GpuDistanceParams {
103
84
  /// A region of memory size numQueries x k, with k
104
85
  /// innermost (row major) if k > 0, or if k == -1, a region of memory of
105
86
  /// size numQueries x numVectors
106
- float* outDistances;
87
+ float* outDistances = nullptr;
107
88
 
108
89
  /// Do we only care about the indices reported, rather than the output
109
90
  /// distances? Not used if k == -1 (all pairwise distances)
110
- bool ignoreOutDistances;
91
+ bool ignoreOutDistances = false;
111
92
 
112
93
  /// A region of memory size numQueries x k, with k
113
94
  /// innermost (row major). Not used if k == -1 (all pairwise distances)
114
- IndicesDataType outIndicesType;
115
- void* outIndices;
95
+ IndicesDataType outIndicesType = IndicesDataType::I64;
96
+ void* outIndices = nullptr;
116
97
 
117
98
  //
118
99
  // Execution information
@@ -123,9 +104,17 @@ struct GpuDistanceParams {
123
104
  /// (via cudaGetDevice/cudaSetDevice) is used
124
105
  /// Otherwise, an integer 0 <= device < numDevices indicates the device for
125
106
  /// execution
126
- int device;
107
+ int device = -1;
108
+
109
+ /// Should the index dispatch down to RAFT?
110
+ /// TODO: change default to true if RAFT is enabled
111
+ bool use_raft = false;
127
112
  };
128
113
 
114
+ /// A function that determines whether RAFT should be used based on various
115
+ /// conditions (such as unsupported architecture)
116
+ bool should_use_raft(GpuDistanceParams args);
117
+
129
118
  /// A wrapper for gpu/impl/Distance.cuh to expose direct brute-force k-nearest
130
119
  /// neighbor searches on an externally-provided region of memory (e.g., from a
131
120
  /// pytorch tensor).
@@ -140,6 +129,24 @@ struct GpuDistanceParams {
140
129
  /// nearest neighbors with respect to the given metric
141
130
  void bfKnn(GpuResourcesProvider* resources, const GpuDistanceParams& args);
142
131
 
132
+ // bfKnn which takes two extra parameters to control the maximum GPU
133
+ // memory allowed for vectors and queries, the latter including the
134
+ // memory required for the results.
135
+ // If 0, the corresponding input must fit into GPU memory.
136
+ // If greater than 0, the function will use at most this much GPU
137
+ // memory (in bytes) for vectors and queries respectively.
138
+ // Vectors are broken up into chunks of size vectorsMemoryLimit,
139
+ // and queries are broken up into chunks of size queriesMemoryLimit.
140
+ // The tiles resulting from the product of the query and vector
141
+ // chunks are processed sequentially on the GPU.
142
+ // Only supported for row major matrices and k > 0. The input that
143
+ // needs sharding must reside on the CPU.
144
+ void bfKnn_tiling(
145
+ GpuResourcesProvider* resources,
146
+ const GpuDistanceParams& args,
147
+ size_t vectorsMemoryLimit,
148
+ size_t queriesMemoryLimit);
149
+
143
150
  /// Deprecated legacy implementation
144
151
  void bruteForceKnn(
145
152
  GpuResourcesProvider* resources,
@@ -167,3 +174,4 @@ void bruteForceKnn(
167
174
 
168
175
  } // namespace gpu
169
176
  } // namespace faiss
177
+ #pragma GCC visibility pop
@@ -4,6 +4,21 @@
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
+ /*
8
+ * Copyright (c) 2023, NVIDIA CORPORATION.
9
+ *
10
+ * Licensed under the Apache License, Version 2.0 (the "License");
11
+ * you may not use this file except in compliance with the License.
12
+ * You may obtain a copy of the License at
13
+ *
14
+ * http://www.apache.org/licenses/LICENSE-2.0
15
+ *
16
+ * Unless required by applicable law or agreed to in writing, software
17
+ * distributed under the License is distributed on an "AS IS" BASIS,
18
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ * See the License for the specific language governing permissions and
20
+ * limitations under the License.
21
+ */
7
22
 
8
23
  #pragma once
9
24
 
@@ -14,17 +29,26 @@ namespace faiss {
14
29
  namespace gpu {
15
30
 
16
31
  struct GpuIndexConfig {
17
- inline GpuIndexConfig() : device(0), memorySpace(MemorySpace::Device) {}
18
-
19
32
  /// GPU device on which the index is resident
20
- int device;
33
+ int device = 0;
21
34
 
22
35
  /// What memory space to use for primary storage.
23
36
  /// On Pascal and above (CC 6+) architectures, allows GPUs to use
24
37
  /// more memory than is available on the GPU.
25
- MemorySpace memorySpace;
38
+ MemorySpace memorySpace = MemorySpace::Device;
39
+
40
+ /// Should the index dispatch down to RAFT?
41
+ #if defined USE_NVIDIA_RAFT
42
+ bool use_raft = true;
43
+ #else
44
+ bool use_raft = false;
45
+ #endif
26
46
  };
27
47
 
48
+ /// A centralized function that determines whether RAFT should
49
+ /// be used based on various conditions (such as unsupported architecture)
50
+ bool should_use_raft(GpuIndexConfig config_);
51
+
28
52
  class GpuIndex : public faiss::Index {
29
53
  public:
30
54
  GpuIndex(
@@ -24,15 +24,13 @@ namespace gpu {
24
24
  class FlatIndex;
25
25
 
26
26
  struct GpuIndexFlatConfig : public GpuIndexConfig {
27
- inline GpuIndexFlatConfig() : useFloat16(false) {}
28
-
29
27
  /// Whether or not data is stored as float16
30
- bool useFloat16;
28
+ bool ALIGNED(8) useFloat16 = false;
31
29
 
32
30
  /// Deprecated: no longer used
33
31
  /// Previously used to indicate whether internal storage of vectors is
34
32
  /// transposed
35
- bool storeTransposed;
33
+ bool storeTransposed = false;
36
34
  };
37
35
 
38
36
  /// Wrapper around the GPU implementation that looks like
@@ -115,6 +113,8 @@ class GpuIndexFlat : public GpuIndex {
115
113
  }
116
114
 
117
115
  protected:
116
+ void resetIndex_(int dims);
117
+
118
118
  /// Flat index does not require IDs as there is no storage available for
119
119
  /// them
120
120
  bool addImplRequiresIDs_() const override;
@@ -21,10 +21,8 @@ class GpuIndexFlat;
21
21
  class IVFBase;
22
22
 
23
23
  struct GpuIndexIVFConfig : public GpuIndexConfig {
24
- inline GpuIndexIVFConfig() : indicesOptions(INDICES_64_BIT) {}
25
-
26
24
  /// Index storage options for the GPU
27
- IndicesOptions indicesOptions;
25
+ IndicesOptions indicesOptions = INDICES_64_BIT;
28
26
 
29
27
  /// Configuration for the coarse quantizer object
30
28
  GpuIndexFlatConfig flatConfig;
@@ -75,10 +73,10 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
75
73
  virtual void updateQuantizer() = 0;
76
74
 
77
75
  /// Returns the number of inverted lists we're managing
78
- idx_t getNumLists() const;
76
+ virtual idx_t getNumLists() const;
79
77
 
80
78
  /// Returns the number of vectors present in a particular inverted list
81
- idx_t getListLength(idx_t listId) const;
79
+ virtual idx_t getListLength(idx_t listId) const;
82
80
 
83
81
  /// Return the encoded vector data contained in a particular inverted list,
84
82
  /// for debugging purposes.
@@ -86,12 +84,13 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
86
84
  /// GPU-side representation.
87
85
  /// Otherwise, it is converted to the CPU format.
88
86
  /// compliant format, while the native GPU format may differ.
89
- std::vector<uint8_t> getListVectorData(idx_t listId, bool gpuFormat = false)
90
- const;
87
+ virtual std::vector<uint8_t> getListVectorData(
88
+ idx_t listId,
89
+ bool gpuFormat = false) const;
91
90
 
92
91
  /// Return the vector indices contained in a particular inverted list, for
93
92
  /// debugging purposes.
94
- std::vector<idx_t> getListIndices(idx_t listId) const;
93
+ virtual std::vector<idx_t> getListIndices(idx_t listId) const;
95
94
 
96
95
  void search_preassigned(
97
96
  idx_t n,
@@ -123,7 +122,7 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
123
122
  int getCurrentNProbe_(const SearchParameters* params) const;
124
123
  void verifyIVFSettings_() const;
125
124
  bool addImplRequiresIDs_() const override;
126
- void trainQuantizer_(idx_t n, const float* x);
125
+ virtual void trainQuantizer_(idx_t n, const float* x);
127
126
 
128
127
  /// Called from GpuIndex for add/add_with_ids
129
128
  void addImpl_(idx_t n, const float* x, const idx_t* ids) override;
@@ -8,6 +8,8 @@
8
8
  #pragma once
9
9
 
10
10
  #include <faiss/gpu/GpuIndexIVF.h>
11
+ #include <faiss/impl/ScalarQuantizer.h>
12
+
11
13
  #include <memory>
12
14
 
13
15
  namespace faiss {
@@ -21,11 +23,9 @@ class IVFFlat;
21
23
  class GpuIndexFlat;
22
24
 
23
25
  struct GpuIndexIVFFlatConfig : public GpuIndexIVFConfig {
24
- inline GpuIndexIVFFlatConfig() : interleavedLayout(true) {}
25
-
26
26
  /// Use the alternative memory layout for the IVF lists
27
27
  /// (currently the default)
28
- bool interleavedLayout;
28
+ bool interleavedLayout = true;
29
29
  };
30
30
 
31
31
  /// Wrapper around the GPU implementation that looks like
@@ -87,6 +87,21 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
87
87
  /// Trains the coarse quantizer based on the given vector data
88
88
  void train(idx_t n, const float* x) override;
89
89
 
90
+ protected:
91
+ /// Initialize appropriate index
92
+ void setIndex_(
93
+ GpuResources* resources,
94
+ int dim,
95
+ int nlist,
96
+ faiss::MetricType metric,
97
+ float metricArg,
98
+ bool useResidual,
99
+ /// Optional ScalarQuantizer
100
+ faiss::ScalarQuantizer* scalarQ,
101
+ bool interleavedLayout,
102
+ IndicesOptions indicesOptions,
103
+ MemorySpace space);
104
+
90
105
  protected:
91
106
  /// Our configuration options
92
107
  const GpuIndexIVFFlatConfig ivfFlatConfig_;