faiss 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +9 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +9 -2
  6. data/ext/faiss/index.cpp +1 -1
  7. data/ext/faiss/index_binary.cpp +2 -2
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +7 -7
  11. data/vendor/faiss/faiss/AutoTune.h +1 -2
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -22
  13. data/vendor/faiss/faiss/Clustering.h +40 -21
  14. data/vendor/faiss/faiss/IVFlib.cpp +26 -12
  15. data/vendor/faiss/faiss/Index.cpp +1 -1
  16. data/vendor/faiss/faiss/Index.h +40 -10
  17. data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
  20. data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
  21. data/vendor/faiss/faiss/IndexBinary.h +8 -19
  22. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
  23. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
  24. data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
  26. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +107 -188
  27. data/vendor/faiss/faiss/IndexFastScan.cpp +95 -146
  28. data/vendor/faiss/faiss/IndexFastScan.h +9 -8
  29. data/vendor/faiss/faiss/IndexFlat.cpp +206 -10
  30. data/vendor/faiss/faiss/IndexFlat.h +20 -1
  31. data/vendor/faiss/faiss/IndexFlatCodes.cpp +170 -5
  32. data/vendor/faiss/faiss/IndexFlatCodes.h +23 -4
  33. data/vendor/faiss/faiss/IndexHNSW.cpp +231 -382
  34. data/vendor/faiss/faiss/IndexHNSW.h +62 -49
  35. data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
  36. data/vendor/faiss/faiss/IndexIDMap.h +24 -2
  37. data/vendor/faiss/faiss/IndexIVF.cpp +162 -56
  38. data/vendor/faiss/faiss/IndexIVF.h +46 -6
  39. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +33 -26
  40. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +6 -2
  41. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
  43. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +502 -401
  44. data/vendor/faiss/faiss/IndexIVFFastScan.h +63 -26
  45. data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
  46. data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
  47. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
  48. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
  49. data/vendor/faiss/faiss/IndexIVFPQ.cpp +79 -125
  50. data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
  51. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +39 -52
  52. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
  53. data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
  54. data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
  55. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
  56. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
  57. data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
  58. data/vendor/faiss/faiss/IndexLattice.cpp +1 -19
  59. data/vendor/faiss/faiss/IndexLattice.h +3 -22
  60. data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -33
  61. data/vendor/faiss/faiss/IndexNNDescent.h +1 -1
  62. data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
  63. data/vendor/faiss/faiss/IndexNSG.h +11 -11
  64. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +56 -0
  65. data/vendor/faiss/faiss/IndexNeuralNetCodec.h +49 -0
  66. data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
  67. data/vendor/faiss/faiss/IndexPQ.h +1 -4
  68. data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
  69. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
  70. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  71. data/vendor/faiss/faiss/IndexRefine.cpp +54 -24
  72. data/vendor/faiss/faiss/IndexRefine.h +7 -0
  73. data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
  74. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +25 -17
  75. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
  76. data/vendor/faiss/faiss/IndexShards.cpp +21 -29
  77. data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
  78. data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
  79. data/vendor/faiss/faiss/MatrixStats.h +21 -9
  80. data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
  81. data/vendor/faiss/faiss/MetricType.h +7 -2
  82. data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
  83. data/vendor/faiss/faiss/VectorTransform.h +7 -7
  84. data/vendor/faiss/faiss/clone_index.cpp +15 -10
  85. data/vendor/faiss/faiss/clone_index.h +3 -0
  86. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +95 -17
  87. data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +152 -0
  88. data/vendor/faiss/faiss/cppcontrib/factory_tools.h +24 -0
  89. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +83 -30
  90. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +123 -8
  91. data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
  92. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +13 -0
  93. data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
  94. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -1
  95. data/vendor/faiss/faiss/gpu/GpuIndex.h +30 -12
  96. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +282 -0
  97. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +14 -9
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +20 -3
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
  101. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
  102. data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +142 -17
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
  106. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +26 -21
  107. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +7 -1
  108. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +8 -5
  109. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
  110. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
  111. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +332 -40
  112. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
  113. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
  114. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
  115. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
  116. data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
  117. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +4 -1
  118. data/vendor/faiss/faiss/gpu/utils/Timer.h +1 -1
  119. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
  121. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +26 -1
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +10 -3
  123. data/vendor/faiss/faiss/impl/DistanceComputer.h +70 -1
  124. data/vendor/faiss/faiss/impl/FaissAssert.h +4 -2
  125. data/vendor/faiss/faiss/impl/FaissException.h +13 -34
  126. data/vendor/faiss/faiss/impl/HNSW.cpp +605 -186
  127. data/vendor/faiss/faiss/impl/HNSW.h +52 -30
  128. data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +11 -9
  130. data/vendor/faiss/faiss/impl/LookupTableScaler.h +34 -0
  131. data/vendor/faiss/faiss/impl/NNDescent.cpp +42 -27
  132. data/vendor/faiss/faiss/impl/NSG.cpp +0 -29
  133. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  134. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
  135. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  136. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -22
  137. data/vendor/faiss/faiss/impl/ProductQuantizer.h +6 -2
  138. data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
  139. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
  141. data/vendor/faiss/faiss/impl/ResultHandler.h +347 -172
  142. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1104 -147
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +3 -8
  144. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +285 -42
  145. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +248 -0
  146. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
  147. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
  148. data/vendor/faiss/faiss/impl/index_read.cpp +74 -34
  149. data/vendor/faiss/faiss/impl/index_read_utils.h +37 -0
  150. data/vendor/faiss/faiss/impl/index_write.cpp +88 -51
  151. data/vendor/faiss/faiss/impl/io.cpp +23 -15
  152. data/vendor/faiss/faiss/impl/io.h +4 -4
  153. data/vendor/faiss/faiss/impl/io_macros.h +6 -0
  154. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  155. data/vendor/faiss/faiss/impl/platform_macros.h +40 -1
  156. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +14 -0
  157. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
  158. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
  159. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +487 -49
  160. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
  161. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
  162. data/vendor/faiss/faiss/impl/simd_result_handlers.h +481 -225
  163. data/vendor/faiss/faiss/index_factory.cpp +41 -20
  164. data/vendor/faiss/faiss/index_io.h +12 -5
  165. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +28 -8
  166. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +3 -0
  167. data/vendor/faiss/faiss/invlists/DirectMap.cpp +10 -2
  168. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +73 -17
  169. data/vendor/faiss/faiss/invlists/InvertedLists.h +26 -8
  170. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +24 -9
  171. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +2 -1
  172. data/vendor/faiss/faiss/python/python_callbacks.cpp +4 -4
  173. data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
  174. data/vendor/faiss/faiss/utils/Heap.h +105 -0
  175. data/vendor/faiss/faiss/utils/NeuralNet.cpp +342 -0
  176. data/vendor/faiss/faiss/utils/NeuralNet.h +147 -0
  177. data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
  178. data/vendor/faiss/faiss/utils/bf16.h +36 -0
  179. data/vendor/faiss/faiss/utils/distances.cpp +147 -123
  180. data/vendor/faiss/faiss/utils/distances.h +86 -9
  181. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
  182. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
  183. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
  184. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
  185. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
  186. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
  187. data/vendor/faiss/faiss/utils/distances_simd.cpp +1589 -243
  188. data/vendor/faiss/faiss/utils/extra_distances-inl.h +70 -0
  189. data/vendor/faiss/faiss/utils/extra_distances.cpp +85 -137
  190. data/vendor/faiss/faiss/utils/extra_distances.h +3 -2
  191. data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
  192. data/vendor/faiss/faiss/utils/fp16.h +2 -0
  193. data/vendor/faiss/faiss/utils/hamming.cpp +163 -111
  194. data/vendor/faiss/faiss/utils/hamming.h +58 -0
  195. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
  196. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
  197. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +19 -88
  198. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +58 -0
  199. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
  200. data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
  201. data/vendor/faiss/faiss/utils/prefetch.h +77 -0
  202. data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
  203. data/vendor/faiss/faiss/utils/random.cpp +43 -0
  204. data/vendor/faiss/faiss/utils/random.h +25 -0
  205. data/vendor/faiss/faiss/utils/simdlib.h +10 -1
  206. data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
  207. data/vendor/faiss/faiss/utils/simdlib_avx512.h +296 -0
  208. data/vendor/faiss/faiss/utils/simdlib_neon.h +77 -79
  209. data/vendor/faiss/faiss/utils/simdlib_ppc64.h +1084 -0
  210. data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
  211. data/vendor/faiss/faiss/utils/sorting.h +27 -0
  212. data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +176 -0
  213. data/vendor/faiss/faiss/utils/utils.cpp +120 -7
  214. data/vendor/faiss/faiss/utils/utils.h +60 -20
  215. metadata +23 -4
  216. data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +0 -102
@@ -7,11 +7,16 @@
7
7
 
8
8
  #include <faiss/gpu/GpuCloner.h>
9
9
  #include <faiss/impl/FaissAssert.h>
10
+ #include <memory>
10
11
  #include <typeinfo>
11
12
 
12
13
  #include <faiss/gpu/StandardGpuResources.h>
13
14
 
15
+ #include <faiss/IndexBinaryFlat.h>
14
16
  #include <faiss/IndexFlat.h>
17
+ #if defined USE_NVIDIA_RAFT
18
+ #include <faiss/IndexHNSW.h>
19
+ #endif
15
20
  #include <faiss/IndexIVF.h>
16
21
  #include <faiss/IndexIVFFlat.h>
17
22
  #include <faiss/IndexIVFPQ.h>
@@ -21,6 +26,10 @@
21
26
  #include <faiss/IndexShardsIVF.h>
22
27
  #include <faiss/MetaIndexes.h>
23
28
  #include <faiss/gpu/GpuIndex.h>
29
+ #include <faiss/gpu/GpuIndexBinaryFlat.h>
30
+ #if defined USE_NVIDIA_RAFT
31
+ #include <faiss/gpu/GpuIndexCagra.h>
32
+ #endif
24
33
  #include <faiss/gpu/GpuIndexFlat.h>
25
34
  #include <faiss/gpu/GpuIndexIVFFlat.h>
26
35
  #include <faiss/gpu/GpuIndexIVFPQ.h>
@@ -82,7 +91,15 @@ Index* ToCPUCloner::clone_Index(const Index* index) {
82
91
  // objective is to make a single component out of them
83
92
  // (inverse op of ToGpuClonerMultiple)
84
93
 
85
- } else if (auto ish = dynamic_cast<const IndexShards*>(index)) {
94
+ }
95
+ #if defined USE_NVIDIA_RAFT
96
+ else if (auto icg = dynamic_cast<const GpuIndexCagra*>(index)) {
97
+ IndexHNSWCagra* res = new IndexHNSWCagra();
98
+ icg->copyTo(res);
99
+ return res;
100
+ }
101
+ #endif
102
+ else if (auto ish = dynamic_cast<const IndexShards*>(index)) {
86
103
  int nshard = ish->count();
87
104
  FAISS_ASSERT(nshard > 0);
88
105
  Index* res = clone_Index(ish->at(0));
@@ -121,6 +138,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
121
138
  GpuIndexFlatConfig config;
122
139
  config.device = device;
123
140
  config.useFloat16 = useFloat16;
141
+ config.use_raft = use_raft;
124
142
  return new GpuIndexFlat(provider, ifl, config);
125
143
  } else if (
126
144
  dynamic_cast<const IndexScalarQuantizer*>(index) &&
@@ -129,6 +147,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
129
147
  GpuIndexFlatConfig config;
130
148
  config.device = device;
131
149
  config.useFloat16 = true;
150
+ FAISS_THROW_IF_NOT_MSG(
151
+ !use_raft, "this type of index is not implemented for RAFT");
132
152
  GpuIndexFlat* gif = new GpuIndexFlat(
133
153
  provider, index->d, index->metric_type, config);
134
154
  // transfer data by blocks
@@ -146,6 +166,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
146
166
  config.device = device;
147
167
  config.indicesOptions = indicesOptions;
148
168
  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
169
+ config.use_raft = use_raft;
170
+ config.allowCpuCoarseQuantizer = allowCpuCoarseQuantizer;
149
171
 
150
172
  GpuIndexIVFFlat* res = new GpuIndexIVFFlat(
151
173
  provider, ifl->d, ifl->nlist, ifl->metric_type, config);
@@ -162,6 +184,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
162
184
  config.device = device;
163
185
  config.indicesOptions = indicesOptions;
164
186
  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
187
+ FAISS_THROW_IF_NOT_MSG(
188
+ !use_raft, "this type of index is not implemented for RAFT");
165
189
 
166
190
  GpuIndexIVFScalarQuantizer* res = new GpuIndexIVFScalarQuantizer(
167
191
  provider,
@@ -194,6 +218,9 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
194
218
  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
195
219
  config.useFloat16LookupTables = useFloat16;
196
220
  config.usePrecomputedTables = usePrecomputed;
221
+ config.use_raft = use_raft;
222
+ config.interleavedLayout = use_raft;
223
+ config.allowCpuCoarseQuantizer = allowCpuCoarseQuantizer;
197
224
 
198
225
  GpuIndexIVFPQ* res = new GpuIndexIVFPQ(provider, ipq, config);
199
226
 
@@ -202,9 +229,25 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
202
229
  }
203
230
 
204
231
  return res;
205
- } else {
206
- // default: use CPU cloner
207
- return Cloner::clone_Index(index);
232
+ }
233
+ #if defined USE_NVIDIA_RAFT
234
+ else if (auto icg = dynamic_cast<const faiss::IndexHNSWCagra*>(index)) {
235
+ GpuIndexCagraConfig config;
236
+ config.device = device;
237
+ GpuIndexCagra* res =
238
+ new GpuIndexCagra(provider, icg->d, icg->metric_type, config);
239
+ res->copyFrom(icg);
240
+ return res;
241
+ }
242
+ #endif
243
+ else {
244
+ // use CPU cloner for IDMap and PreTransform
245
+ auto index_idmap = dynamic_cast<const IndexIDMap*>(index);
246
+ auto index_pt = dynamic_cast<const IndexPreTransform*>(index);
247
+ if (index_idmap || index_pt) {
248
+ return Cloner::clone_Index(index);
249
+ }
250
+ FAISS_THROW_MSG("This index type is not implemented on GPU.");
208
251
  }
209
252
  }
210
253
 
@@ -229,7 +272,7 @@ ToGpuClonerMultiple::ToGpuClonerMultiple(
229
272
  : GpuMultipleClonerOptions(options) {
230
273
  FAISS_THROW_IF_NOT(provider.size() == devices.size());
231
274
  for (size_t i = 0; i < provider.size(); i++) {
232
- sub_cloners.push_back(ToGpuCloner(provider[i], devices[i], options));
275
+ sub_cloners.emplace_back(provider[i], devices[i], options);
233
276
  }
234
277
  }
235
278
 
@@ -298,8 +341,8 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
298
341
  !dynamic_cast<const IndexFlat*>(quantizer)) {
299
342
  // then we flatten the coarse quantizer so that everything remains
300
343
  // on GPU
301
- new_quantizer.reset(
302
- new IndexFlat(quantizer->d, quantizer->metric_type));
344
+ new_quantizer = std::make_unique<IndexFlat>(
345
+ quantizer->d, quantizer->metric_type);
303
346
  std::vector<float> centroids(quantizer->d * quantizer->ntotal);
304
347
  quantizer->reconstruct_n(0, quantizer->ntotal, centroids.data());
305
348
  new_quantizer->add(quantizer->ntotal, centroids.data());
@@ -309,6 +352,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
309
352
 
310
353
  std::vector<faiss::Index*> shards(n);
311
354
 
355
+ #pragma omp parallel for
312
356
  for (idx_t i = 0; i < n; i++) {
313
357
  // make a shallow copy
314
358
  if (reserveVecs) {
@@ -321,7 +365,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
321
365
  const_cast<Index*>(quantizer),
322
366
  index_ivfpq->d,
323
367
  index_ivfpq->nlist,
324
- index_ivfpq->code_size,
368
+ index_ivfpq->pq.M,
325
369
  index_ivfpq->pq.nbits);
326
370
  idx2.metric_type = index_ivfpq->metric_type;
327
371
  idx2.pq = index_ivfpq->pq;
@@ -473,5 +517,76 @@ Index* GpuProgressiveDimIndexFactory::operator()(int dim) {
473
517
  return index_cpu_to_gpu_multiple(vres, devices, &index, &options);
474
518
  }
475
519
 
520
+ /*********************************************
521
+ * Cloning binary indexes
522
+ *********************************************/
523
+
524
+ faiss::IndexBinary* index_binary_gpu_to_cpu(
525
+ const faiss::IndexBinary* gpu_index) {
526
+ if (auto ii = dynamic_cast<const GpuIndexBinaryFlat*>(gpu_index)) {
527
+ IndexBinaryFlat* ret = new IndexBinaryFlat();
528
+ ii->copyTo(ret);
529
+ return ret;
530
+ } else {
531
+ FAISS_THROW_MSG("cannot clone this type of index");
532
+ }
533
+ }
534
+
535
+ faiss::IndexBinary* index_binary_cpu_to_gpu(
536
+ GpuResourcesProvider* provider,
537
+ int device,
538
+ const faiss::IndexBinary* index,
539
+ const GpuClonerOptions* options) {
540
+ if (auto ii = dynamic_cast<const IndexBinaryFlat*>(index)) {
541
+ GpuIndexBinaryFlatConfig config;
542
+ config.device = device;
543
+ if (options) {
544
+ config.use_raft = options->use_raft;
545
+ }
546
+ return new GpuIndexBinaryFlat(provider, ii, config);
547
+ } else {
548
+ FAISS_THROW_MSG("cannot clone this type of index");
549
+ }
550
+ }
551
+
552
+ faiss::IndexBinary* index_binary_cpu_to_gpu_multiple(
553
+ std::vector<GpuResourcesProvider*>& provider,
554
+ std::vector<int>& devices,
555
+ const faiss::IndexBinary* index,
556
+ const GpuMultipleClonerOptions* options) {
557
+ GpuMultipleClonerOptions defaults;
558
+ FAISS_THROW_IF_NOT(devices.size() == provider.size());
559
+ int n = devices.size();
560
+ if (n == 1) {
561
+ return index_binary_cpu_to_gpu(provider[0], devices[0], index, options);
562
+ }
563
+ if (!options) {
564
+ options = &defaults;
565
+ }
566
+ if (options->shard) {
567
+ auto* fi = dynamic_cast<const IndexBinaryFlat*>(index);
568
+ FAISS_THROW_IF_NOT_MSG(fi, "only flat index cloning supported");
569
+ IndexBinaryShards* ret = new IndexBinaryShards(true, true);
570
+ for (int i = 0; i < n; i++) {
571
+ IndexBinaryFlat fig(fi->d);
572
+ size_t i0 = i * fi->ntotal / n;
573
+ size_t i1 = (i + 1) * fi->ntotal / n;
574
+ fig.add(i1 - i0, fi->xb.data() + i0 * fi->code_size);
575
+ ret->addIndex(index_binary_cpu_to_gpu(
576
+ provider[i], devices[i], &fig, options));
577
+ }
578
+ ret->own_indices = true;
579
+ return ret;
580
+ } else { // replicas
581
+ IndexBinaryReplicas* ret = new IndexBinaryReplicas(true);
582
+ for (int i = 0; i < n; i++) {
583
+ ret->addIndex(index_binary_cpu_to_gpu(
584
+ provider[i], devices[i], index, options));
585
+ }
586
+ ret->own_indices = true;
587
+ return ret;
588
+ }
589
+ }
590
+
476
591
  } // namespace gpu
477
592
  } // namespace faiss
@@ -11,10 +11,12 @@
11
11
 
12
12
  #include <faiss/Clustering.h>
13
13
  #include <faiss/Index.h>
14
+ #include <faiss/IndexBinary.h>
14
15
  #include <faiss/clone_index.h>
15
16
  #include <faiss/gpu/GpuClonerOptions.h>
16
17
  #include <faiss/gpu/GpuIndex.h>
17
18
  #include <faiss/gpu/GpuIndicesOptions.h>
19
+
18
20
  namespace faiss {
19
21
  namespace gpu {
20
22
 
@@ -95,5 +97,25 @@ struct GpuProgressiveDimIndexFactory : ProgressiveDimIndexFactory {
95
97
  virtual ~GpuProgressiveDimIndexFactory() override;
96
98
  };
97
99
 
100
+ /*********************************************
101
+ * Cloning binary indexes
102
+ *********************************************/
103
+
104
+ faiss::IndexBinary* index_binary_gpu_to_cpu(
105
+ const faiss::IndexBinary* gpu_index);
106
+
107
+ /// converts any CPU index that can be converted to GPU
108
+ faiss::IndexBinary* index_binary_cpu_to_gpu(
109
+ GpuResourcesProvider* provider,
110
+ int device,
111
+ const faiss::IndexBinary* index,
112
+ const GpuClonerOptions* options = nullptr);
113
+
114
+ faiss::IndexBinary* index_binary_cpu_to_gpu_multiple(
115
+ std::vector<GpuResourcesProvider*>& provider,
116
+ std::vector<int>& devices,
117
+ const faiss::IndexBinary* index,
118
+ const GpuMultipleClonerOptions* options = nullptr);
119
+
98
120
  } // namespace gpu
99
121
  } // namespace faiss
@@ -36,6 +36,19 @@ struct GpuClonerOptions {
36
36
 
37
37
  /// Set verbose options on the index
38
38
  bool verbose = false;
39
+
40
+ /// use the RAFT implementation
41
+ #if defined USE_NVIDIA_RAFT
42
+ bool use_raft = true;
43
+ #else
44
+ bool use_raft = false;
45
+ #endif
46
+
47
+ /// This flag controls the CPU fallback logic for coarse quantizer
48
+ /// component of the index. When set to false (default), the cloner will
49
+ /// throw an exception for indices not implemented on GPU. When set to
50
+ /// true, it will fallback to a CPU implementation.
51
+ bool allowCpuCoarseQuantizer = false;
39
52
  };
40
53
 
41
54
  struct GpuMultipleClonerOptions : public GpuClonerOptions {
@@ -9,6 +9,7 @@
9
9
 
10
10
  #include <faiss/Index.h>
11
11
 
12
+ #pragma GCC visibility push(default)
12
13
  namespace faiss {
13
14
  namespace gpu {
14
15
 
@@ -28,44 +29,24 @@ enum class IndicesDataType {
28
29
 
29
30
  /// Arguments to brute-force GPU k-nearest neighbor searching
30
31
  struct GpuDistanceParams {
31
- GpuDistanceParams()
32
- : metric(faiss::MetricType::METRIC_L2),
33
- metricArg(0),
34
- k(0),
35
- dims(0),
36
- vectors(nullptr),
37
- vectorType(DistanceDataType::F32),
38
- vectorsRowMajor(true),
39
- numVectors(0),
40
- vectorNorms(nullptr),
41
- queries(nullptr),
42
- queryType(DistanceDataType::F32),
43
- queriesRowMajor(true),
44
- numQueries(0),
45
- outDistances(nullptr),
46
- ignoreOutDistances(false),
47
- outIndicesType(IndicesDataType::I64),
48
- outIndices(nullptr),
49
- device(-1) {}
50
-
51
32
  //
52
33
  // Search parameters
53
34
  //
54
35
 
55
36
  /// Search parameter: distance metric
56
- faiss::MetricType metric;
37
+ faiss::MetricType metric = METRIC_L2;
57
38
 
58
39
  /// Search parameter: distance metric argument (if applicable)
59
40
  /// For metric == METRIC_Lp, this is the p-value
60
- float metricArg;
41
+ float metricArg = 0;
61
42
 
62
43
  /// Search parameter: return k nearest neighbors
63
44
  /// If the value provided is -1, then we report all pairwise distances
64
45
  /// without top-k filtering
65
- int k;
46
+ int k = 0;
66
47
 
67
48
  /// Vector dimensionality
68
- int dims;
49
+ int dims = 0;
69
50
 
70
51
  //
71
52
  // Vectors being queried
@@ -74,14 +55,14 @@ struct GpuDistanceParams {
74
55
  /// If vectorsRowMajor is true, this is
75
56
  /// numVectors x dims, with dims innermost; otherwise,
76
57
  /// dims x numVectors, with numVectors innermost
77
- const void* vectors;
78
- DistanceDataType vectorType;
79
- bool vectorsRowMajor;
80
- idx_t numVectors;
58
+ const void* vectors = nullptr;
59
+ DistanceDataType vectorType = DistanceDataType::F32;
60
+ bool vectorsRowMajor = true;
61
+ idx_t numVectors = 0;
81
62
 
82
63
  /// Precomputed L2 norms for each vector in `vectors`, which can be
83
64
  /// optionally provided in advance to speed computation for METRIC_L2
84
- const float* vectorNorms;
65
+ const float* vectorNorms = nullptr;
85
66
 
86
67
  //
87
68
  // The query vectors (i.e., find k-nearest neighbors in `vectors` for each
@@ -91,10 +72,10 @@ struct GpuDistanceParams {
91
72
  /// If queriesRowMajor is true, this is
92
73
  /// numQueries x dims, with dims innermost; otherwise,
93
74
  /// dims x numQueries, with numQueries innermost
94
- const void* queries;
95
- DistanceDataType queryType;
96
- bool queriesRowMajor;
97
- idx_t numQueries;
75
+ const void* queries = nullptr;
76
+ DistanceDataType queryType = DistanceDataType::F32;
77
+ bool queriesRowMajor = true;
78
+ idx_t numQueries = 0;
98
79
 
99
80
  //
100
81
  // Output results
@@ -103,16 +84,16 @@ struct GpuDistanceParams {
103
84
  /// A region of memory size numQueries x k, with k
104
85
  /// innermost (row major) if k > 0, or if k == -1, a region of memory of
105
86
  /// size numQueries x numVectors
106
- float* outDistances;
87
+ float* outDistances = nullptr;
107
88
 
108
89
  /// Do we only care about the indices reported, rather than the output
109
90
  /// distances? Not used if k == -1 (all pairwise distances)
110
- bool ignoreOutDistances;
91
+ bool ignoreOutDistances = false;
111
92
 
112
93
  /// A region of memory size numQueries x k, with k
113
94
  /// innermost (row major). Not used if k == -1 (all pairwise distances)
114
- IndicesDataType outIndicesType;
115
- void* outIndices;
95
+ IndicesDataType outIndicesType = IndicesDataType::I64;
96
+ void* outIndices = nullptr;
116
97
 
117
98
  //
118
99
  // Execution information
@@ -123,9 +104,17 @@ struct GpuDistanceParams {
123
104
  /// (via cudaGetDevice/cudaSetDevice) is used
124
105
  /// Otherwise, an integer 0 <= device < numDevices indicates the device for
125
106
  /// execution
126
- int device;
107
+ int device = -1;
108
+
109
+ /// Should the index dispatch down to RAFT?
110
+ /// TODO: change default to true if RAFT is enabled
111
+ bool use_raft = false;
127
112
  };
128
113
 
114
+ /// A function that determines whether RAFT should be used based on various
115
+ /// conditions (such as unsupported architecture)
116
+ bool should_use_raft(GpuDistanceParams args);
117
+
129
118
  /// A wrapper for gpu/impl/Distance.cuh to expose direct brute-force k-nearest
130
119
  /// neighbor searches on an externally-provided region of memory (e.g., from a
131
120
  /// pytorch tensor).
@@ -140,6 +129,24 @@ struct GpuDistanceParams {
140
129
  /// nearest neighbors with respect to the given metric
141
130
  void bfKnn(GpuResourcesProvider* resources, const GpuDistanceParams& args);
142
131
 
132
+ // bfKnn which takes two extra parameters to control the maximum GPU
133
+ // memory allowed for vectors and queries, the latter including the
134
+ // memory required for the results.
135
+ // If 0, the corresponding input must fit into GPU memory.
136
+ // If greater than 0, the function will use at most this much GPU
137
+ // memory (in bytes) for vectors and queries respectively.
138
+ // Vectors are broken up into chunks of size vectorsMemoryLimit,
139
+ // and queries are broken up into chunks of size queriesMemoryLimit.
140
+ // The tiles resulting from the product of the query and vector
141
+ // chunks are processed sequentially on the GPU.
142
+ // Only supported for row major matrices and k > 0. The input that
143
+ // needs sharding must reside on the CPU.
144
+ void bfKnn_tiling(
145
+ GpuResourcesProvider* resources,
146
+ const GpuDistanceParams& args,
147
+ size_t vectorsMemoryLimit,
148
+ size_t queriesMemoryLimit);
149
+
143
150
  /// Deprecated legacy implementation
144
151
  void bruteForceKnn(
145
152
  GpuResourcesProvider* resources,
@@ -167,3 +174,4 @@ void bruteForceKnn(
167
174
 
168
175
  } // namespace gpu
169
176
  } // namespace faiss
177
+ #pragma GCC visibility pop
@@ -15,7 +15,7 @@
15
15
  /// Assertions
16
16
  ///
17
17
 
18
- #ifdef __CUDA_ARCH__
18
+ #if defined(__CUDA_ARCH__) || defined(USE_AMD_ROCM)
19
19
  #define GPU_FAISS_ASSERT(X) assert(X)
20
20
  #define GPU_FAISS_ASSERT_MSG(X, MSG) assert(X)
21
21
  #define GPU_FAISS_ASSERT_FMT(X, FMT, ...) assert(X)
@@ -4,6 +4,21 @@
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
+ /*
8
+ * Copyright (c) 2023, NVIDIA CORPORATION.
9
+ *
10
+ * Licensed under the Apache License, Version 2.0 (the "License");
11
+ * you may not use this file except in compliance with the License.
12
+ * You may obtain a copy of the License at
13
+ *
14
+ * http://www.apache.org/licenses/LICENSE-2.0
15
+ *
16
+ * Unless required by applicable law or agreed to in writing, software
17
+ * distributed under the License is distributed on an "AS IS" BASIS,
18
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ * See the License for the specific language governing permissions and
20
+ * limitations under the License.
21
+ */
7
22
 
8
23
  #pragma once
9
24
 
@@ -14,17 +29,26 @@ namespace faiss {
14
29
  namespace gpu {
15
30
 
16
31
  struct GpuIndexConfig {
17
- inline GpuIndexConfig() : device(0), memorySpace(MemorySpace::Device) {}
18
-
19
32
  /// GPU device on which the index is resident
20
- int device;
33
+ int device = 0;
21
34
 
22
35
  /// What memory space to use for primary storage.
23
36
  /// On Pascal and above (CC 6+) architectures, allows GPUs to use
24
37
  /// more memory than is available on the GPU.
25
- MemorySpace memorySpace;
38
+ MemorySpace memorySpace = MemorySpace::Device;
39
+
40
+ /// Should the index dispatch down to RAFT?
41
+ #if defined USE_NVIDIA_RAFT
42
+ bool use_raft = true;
43
+ #else
44
+ bool use_raft = false;
45
+ #endif
26
46
  };
27
47
 
48
+ /// A centralized function that determines whether RAFT should
49
+ /// be used based on various conditions (such as unsupported architecture)
50
+ bool should_use_raft(GpuIndexConfig config_);
51
+
28
52
  class GpuIndex : public faiss::Index {
29
53
  public:
30
54
  GpuIndex(
@@ -60,19 +84,14 @@ class GpuIndex : public faiss::Index {
60
84
 
61
85
  /// `x` and `labels` can be resident on the CPU or any GPU; copies are
62
86
  /// performed as needed
63
- void assign(
64
- idx_t n,
65
- const float* x,
66
- idx_t* labels,
67
- // faiss::Index has idx_t for k
68
- idx_t k = 1) const override;
87
+ void assign(idx_t n, const float* x, idx_t* labels, idx_t k = 1)
88
+ const override;
69
89
 
70
90
  /// `x`, `distances` and `labels` can be resident on the CPU or any
71
91
  /// GPU; copies are performed as needed
72
92
  void search(
73
93
  idx_t n,
74
94
  const float* x,
75
- // faiss::Index has idx_t for k
76
95
  idx_t k,
77
96
  float* distances,
78
97
  idx_t* labels,
@@ -83,7 +102,6 @@ class GpuIndex : public faiss::Index {
83
102
  void search_and_reconstruct(
84
103
  idx_t n,
85
104
  const float* x,
86
- // faiss::Index has idx_t for k
87
105
  idx_t k,
88
106
  float* distances,
89
107
  idx_t* labels,