faiss 0.2.7 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +9 -2
  6. data/ext/faiss/index.cpp +1 -1
  7. data/ext/faiss/index_binary.cpp +2 -2
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/lib/faiss.rb +1 -1
  11. data/vendor/faiss/faiss/AutoTune.cpp +7 -7
  12. data/vendor/faiss/faiss/AutoTune.h +0 -1
  13. data/vendor/faiss/faiss/Clustering.cpp +4 -18
  14. data/vendor/faiss/faiss/Clustering.h +31 -21
  15. data/vendor/faiss/faiss/IVFlib.cpp +22 -11
  16. data/vendor/faiss/faiss/Index.cpp +1 -1
  17. data/vendor/faiss/faiss/Index.h +20 -5
  18. data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
  21. data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
  22. data/vendor/faiss/faiss/IndexBinary.h +8 -19
  23. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
  24. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
  25. data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
  26. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +106 -187
  27. data/vendor/faiss/faiss/IndexFastScan.cpp +90 -159
  28. data/vendor/faiss/faiss/IndexFastScan.h +9 -8
  29. data/vendor/faiss/faiss/IndexFlat.cpp +195 -3
  30. data/vendor/faiss/faiss/IndexFlat.h +20 -1
  31. data/vendor/faiss/faiss/IndexFlatCodes.cpp +11 -0
  32. data/vendor/faiss/faiss/IndexFlatCodes.h +3 -1
  33. data/vendor/faiss/faiss/IndexHNSW.cpp +112 -316
  34. data/vendor/faiss/faiss/IndexHNSW.h +12 -48
  35. data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
  36. data/vendor/faiss/faiss/IndexIDMap.h +24 -2
  37. data/vendor/faiss/faiss/IndexIVF.cpp +159 -53
  38. data/vendor/faiss/faiss/IndexIVF.h +37 -5
  39. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +18 -26
  40. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -2
  41. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
  43. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +433 -405
  44. data/vendor/faiss/faiss/IndexIVFFastScan.h +56 -26
  45. data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
  46. data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
  47. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
  48. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
  49. data/vendor/faiss/faiss/IndexIVFPQ.cpp +78 -122
  50. data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
  51. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +18 -50
  52. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
  53. data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
  54. data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
  55. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
  56. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
  57. data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -4
  59. data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
  60. data/vendor/faiss/faiss/IndexNSG.h +10 -10
  61. data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
  62. data/vendor/faiss/faiss/IndexPQ.h +1 -4
  63. data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
  64. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
  65. data/vendor/faiss/faiss/IndexRefine.cpp +49 -19
  66. data/vendor/faiss/faiss/IndexRefine.h +7 -0
  67. data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
  68. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +22 -16
  69. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
  70. data/vendor/faiss/faiss/IndexShards.cpp +21 -29
  71. data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
  72. data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
  73. data/vendor/faiss/faiss/MatrixStats.h +21 -9
  74. data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
  75. data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
  76. data/vendor/faiss/faiss/VectorTransform.h +7 -7
  77. data/vendor/faiss/faiss/clone_index.cpp +15 -10
  78. data/vendor/faiss/faiss/clone_index.h +3 -0
  79. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +87 -4
  80. data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
  81. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +7 -0
  82. data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
  83. data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
  84. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
  85. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +8 -9
  86. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +18 -3
  87. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
  88. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
  89. data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
  90. data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
  91. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +117 -17
  92. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
  93. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +1 -1
  94. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
  95. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
  96. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +267 -40
  97. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
  98. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
  99. data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
  100. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
  101. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
  102. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
  103. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -2
  104. data/vendor/faiss/faiss/impl/DistanceComputer.h +24 -1
  105. data/vendor/faiss/faiss/impl/FaissException.h +13 -34
  106. data/vendor/faiss/faiss/impl/HNSW.cpp +321 -70
  107. data/vendor/faiss/faiss/impl/HNSW.h +9 -8
  108. data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
  109. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +3 -1
  110. data/vendor/faiss/faiss/impl/NNDescent.cpp +29 -19
  111. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  112. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
  113. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  114. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +24 -22
  115. data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
  116. data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
  117. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
  118. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
  119. data/vendor/faiss/faiss/impl/ResultHandler.h +232 -176
  120. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +444 -104
  121. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -8
  122. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +280 -42
  123. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
  124. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
  125. data/vendor/faiss/faiss/impl/index_read.cpp +45 -19
  126. data/vendor/faiss/faiss/impl/index_write.cpp +60 -41
  127. data/vendor/faiss/faiss/impl/io.cpp +10 -10
  128. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  129. data/vendor/faiss/faiss/impl/platform_macros.h +18 -1
  130. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +3 -0
  131. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
  132. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
  133. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +40 -49
  134. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
  135. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
  136. data/vendor/faiss/faiss/impl/simd_result_handlers.h +374 -202
  137. data/vendor/faiss/faiss/index_factory.cpp +10 -7
  138. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
  139. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +27 -9
  140. data/vendor/faiss/faiss/invlists/InvertedLists.h +12 -3
  141. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
  142. data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
  143. data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
  144. data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
  145. data/vendor/faiss/faiss/utils/distances.cpp +128 -74
  146. data/vendor/faiss/faiss/utils/distances.h +81 -4
  147. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
  148. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
  149. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
  150. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
  151. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
  152. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
  153. data/vendor/faiss/faiss/utils/distances_simd.cpp +428 -70
  154. data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
  155. data/vendor/faiss/faiss/utils/fp16.h +2 -0
  156. data/vendor/faiss/faiss/utils/hamming.cpp +162 -110
  157. data/vendor/faiss/faiss/utils/hamming.h +58 -0
  158. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
  159. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
  160. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +15 -87
  161. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +57 -0
  162. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
  163. data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
  164. data/vendor/faiss/faiss/utils/prefetch.h +77 -0
  165. data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
  166. data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
  167. data/vendor/faiss/faiss/utils/simdlib_neon.h +72 -77
  168. data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
  169. data/vendor/faiss/faiss/utils/sorting.h +27 -0
  170. data/vendor/faiss/faiss/utils/utils.cpp +112 -6
  171. data/vendor/faiss/faiss/utils/utils.h +57 -20
  172. metadata +11 -4
@@ -23,24 +23,19 @@ class GpuIndexFlat;
23
23
  class IVFPQ;
24
24
 
25
25
  struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
26
- inline GpuIndexIVFPQConfig()
27
- : useFloat16LookupTables(false),
28
- usePrecomputedTables(false),
29
- interleavedLayout(false),
30
- useMMCodeDistance(false) {}
31
-
32
26
  /// Whether or not float16 residual distance tables are used in the
33
27
  /// list scanning kernels. When subQuantizers * 2^bitsPerCode >
34
28
  /// 16384, this is required.
35
- bool useFloat16LookupTables;
29
+ bool useFloat16LookupTables = false;
36
30
 
37
31
  /// Whether or not we enable the precomputed table option for
38
32
  /// search, which can substantially increase the memory requirement.
39
- bool usePrecomputedTables;
33
+ bool usePrecomputedTables = false;
40
34
 
41
35
  /// Use the alternative memory layout for the IVF lists
42
- /// WARNING: this is a feature under development, do not use!
43
- bool interleavedLayout;
36
+ /// WARNING: this is a feature under development, and is only supported with
37
+ /// RAFT enabled for the index. Do not use if RAFT is not enabled.
38
+ bool interleavedLayout = false;
44
39
 
45
40
  /// Use GEMM-backed computation of PQ code distances for the no precomputed
46
41
  /// table version of IVFPQ.
@@ -50,7 +45,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
50
45
  /// Note that MM code distance is enabled automatically if one uses a number
51
46
  /// of dimensions per sub-quantizer that is not natively specialized (an odd
52
47
  /// number like 7 or so).
53
- bool useMMCodeDistance;
48
+ bool useMMCodeDistance = false;
54
49
  };
55
50
 
56
51
  /// IVFPQ index for the GPU
@@ -139,6 +134,22 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
139
134
  ProductQuantizer pq;
140
135
 
141
136
  protected:
137
+ /// Initialize appropriate index
138
+ void setIndex_(
139
+ GpuResources* resources,
140
+ int dim,
141
+ idx_t nlist,
142
+ faiss::MetricType metric,
143
+ float metricArg,
144
+ int numSubQuantizers,
145
+ int bitsPerSubQuantizer,
146
+ bool useFloat16LookupTables,
147
+ bool useMMCodeDistance,
148
+ bool interleavedLayout,
149
+ float* pqCentroidData,
150
+ IndicesOptions indicesOptions,
151
+ MemorySpace space);
152
+
142
153
  /// Throws errors if configuration settings are improper
143
154
  void verifyPQSettings_() const;
144
155
 
@@ -18,11 +18,9 @@ class IVFFlat;
18
18
  class GpuIndexFlat;
19
19
 
20
20
  struct GpuIndexIVFScalarQuantizerConfig : public GpuIndexIVFConfig {
21
- inline GpuIndexIVFScalarQuantizerConfig() : interleavedLayout(true) {}
22
-
23
21
  /// Use the alternative memory layout for the IVF lists
24
22
  /// (currently the default)
25
- bool interleavedLayout;
23
+ bool interleavedLayout = true;
26
24
  };
27
25
 
28
26
  /// Wrapper around the GPU implementation that looks like
@@ -4,6 +4,21 @@
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
+ /*
8
+ * Copyright (c) 2023, NVIDIA CORPORATION.
9
+ *
10
+ * Licensed under the Apache License, Version 2.0 (the "License");
11
+ * you may not use this file except in compliance with the License.
12
+ * You may obtain a copy of the License at
13
+ *
14
+ * http://www.apache.org/licenses/LICENSE-2.0
15
+ *
16
+ * Unless required by applicable law or agreed to in writing, software
17
+ * distributed under the License is distributed on an "AS IS" BASIS,
18
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ * See the License for the specific language governing permissions and
20
+ * limitations under the License.
21
+ */
7
22
 
8
23
  #include <faiss/gpu/GpuResources.h>
9
24
  #include <faiss/gpu/utils/DeviceUtils.h>
@@ -143,7 +158,7 @@ GpuMemoryReservation::~GpuMemoryReservation() {
143
158
  // GpuResources
144
159
  //
145
160
 
146
- GpuResources::~GpuResources() {}
161
+ GpuResources::~GpuResources() = default;
147
162
 
148
163
  cublasHandle_t GpuResources::getBlasHandleCurrentDevice() {
149
164
  return getBlasHandle(getCurrentDevice());
@@ -153,6 +168,12 @@ cudaStream_t GpuResources::getDefaultStreamCurrentDevice() {
153
168
  return getDefaultStream(getCurrentDevice());
154
169
  }
155
170
 
171
+ #if defined USE_NVIDIA_RAFT
172
+ raft::device_resources& GpuResources::getRaftHandleCurrentDevice() {
173
+ return getRaftHandle(getCurrentDevice());
174
+ }
175
+ #endif
176
+
156
177
  std::vector<cudaStream_t> GpuResources::getAlternateStreamsCurrentDevice() {
157
178
  return getAlternateStreams(getCurrentDevice());
158
179
  }
@@ -182,7 +203,7 @@ size_t GpuResources::getTempMemoryAvailableCurrentDevice() const {
182
203
  // GpuResourcesProvider
183
204
  //
184
205
 
185
- GpuResourcesProvider::~GpuResourcesProvider() {}
206
+ GpuResourcesProvider::~GpuResourcesProvider() = default;
186
207
 
187
208
  //
188
209
  // GpuResourcesProviderFromResourceInstance
@@ -192,7 +213,7 @@ GpuResourcesProviderFromInstance::GpuResourcesProviderFromInstance(
192
213
  std::shared_ptr<GpuResources> p)
193
214
  : res_(p) {}
194
215
 
195
- GpuResourcesProviderFromInstance::~GpuResourcesProviderFromInstance() {}
216
+ GpuResourcesProviderFromInstance::~GpuResourcesProviderFromInstance() = default;
196
217
 
197
218
  std::shared_ptr<GpuResources> GpuResourcesProviderFromInstance::getResources() {
198
219
  return res_;
@@ -4,16 +4,37 @@
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
+ /*
8
+ * Copyright (c) 2023, NVIDIA CORPORATION.
9
+ *
10
+ * Licensed under the Apache License, Version 2.0 (the "License");
11
+ * you may not use this file except in compliance with the License.
12
+ * You may obtain a copy of the License at
13
+ *
14
+ * http://www.apache.org/licenses/LICENSE-2.0
15
+ *
16
+ * Unless required by applicable law or agreed to in writing, software
17
+ * distributed under the License is distributed on an "AS IS" BASIS,
18
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ * See the License for the specific language governing permissions and
20
+ * limitations under the License.
21
+ */
7
22
 
8
23
  #pragma once
9
24
 
10
25
  #include <cublas_v2.h>
11
26
  #include <cuda_runtime.h>
12
27
  #include <faiss/impl/FaissAssert.h>
28
+
13
29
  #include <memory>
14
30
  #include <utility>
15
31
  #include <vector>
16
32
 
33
+ #if defined USE_NVIDIA_RAFT
34
+ #include <raft/core/device_resources.hpp>
35
+ #include <rmm/mr/device/device_memory_resource.hpp>
36
+ #endif
37
+
17
38
  namespace faiss {
18
39
  namespace gpu {
19
40
 
@@ -82,11 +103,7 @@ std::string memorySpaceToString(MemorySpace s);
82
103
 
83
104
  /// Information on what/where an allocation is
84
105
  struct AllocInfo {
85
- inline AllocInfo()
86
- : type(AllocType::Other),
87
- device(0),
88
- space(MemorySpace::Device),
89
- stream(nullptr) {}
106
+ inline AllocInfo() {}
90
107
 
91
108
  inline AllocInfo(AllocType at, int dev, MemorySpace sp, cudaStream_t st)
92
109
  : type(at), device(dev), space(sp), stream(st) {}
@@ -95,13 +112,13 @@ struct AllocInfo {
95
112
  std::string toString() const;
96
113
 
97
114
  /// The internal category of the allocation
98
- AllocType type;
115
+ AllocType type = AllocType::Other;
99
116
 
100
117
  /// The device on which the allocation is happening
101
- int device;
118
+ int device = 0;
102
119
 
103
120
  /// The memory space of the allocation
104
- MemorySpace space;
121
+ MemorySpace space = MemorySpace::Device;
105
122
 
106
123
  /// The stream on which new work on the memory will be ordered (e.g., if a
107
124
  /// piece of memory cached and to be returned for this call was last used on
@@ -111,7 +128,7 @@ struct AllocInfo {
111
128
  ///
112
129
  /// The memory manager guarantees that the returned memory is free to use
113
130
  /// without data races on this stream specified.
114
- cudaStream_t stream;
131
+ cudaStream_t stream = nullptr;
115
132
  };
116
133
 
117
134
  /// Create an AllocInfo for the current device with MemorySpace::Device
@@ -125,7 +142,7 @@ AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st);
125
142
 
126
143
  /// Information on what/where an allocation is, along with how big it should be
127
144
  struct AllocRequest : public AllocInfo {
128
- inline AllocRequest() : AllocInfo(), size(0) {}
145
+ inline AllocRequest() {}
129
146
 
130
147
  inline AllocRequest(const AllocInfo& info, size_t sz)
131
148
  : AllocInfo(info), size(sz) {}
@@ -142,7 +159,11 @@ struct AllocRequest : public AllocInfo {
142
159
  std::string toString() const;
143
160
 
144
161
  /// The size in bytes of the allocation
145
- size_t size;
162
+ size_t size = 0;
163
+
164
+ #if defined USE_NVIDIA_RAFT
165
+ rmm::mr::device_memory_resource* mr = nullptr;
166
+ #endif
146
167
  };
147
168
 
148
169
  /// A RAII object that manages a temporary memory request
@@ -190,6 +211,13 @@ class GpuResources {
190
211
  /// given device
191
212
  virtual cudaStream_t getDefaultStream(int device) = 0;
192
213
 
214
+ #if defined USE_NVIDIA_RAFT
215
+ /// Returns the raft handle for the given device which can be used to
216
+ /// make calls to other raft primitives.
217
+ virtual raft::device_resources& getRaftHandle(int device) = 0;
218
+ raft::device_resources& getRaftHandleCurrentDevice();
219
+ #endif
220
+
193
221
  /// Overrides the default stream for a device to the user-supplied stream.
194
222
  /// The resources object does not own this stream (i.e., it will not destroy
195
223
  /// it).
@@ -4,6 +4,29 @@
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
+ /*
8
+ * Copyright (c) 2023, NVIDIA CORPORATION.
9
+ *
10
+ * Licensed under the Apache License, Version 2.0 (the "License");
11
+ * you may not use this file except in compliance with the License.
12
+ * You may obtain a copy of the License at
13
+ *
14
+ * http://www.apache.org/licenses/LICENSE-2.0
15
+ *
16
+ * Unless required by applicable law or agreed to in writing, software
17
+ * distributed under the License is distributed on an "AS IS" BASIS,
18
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ * See the License for the specific language governing permissions and
20
+ * limitations under the License.
21
+ */
22
+
23
+ #if defined USE_NVIDIA_RAFT
24
+ #include <raft/core/device_resources.hpp>
25
+ #include <rmm/mr/device/managed_memory_resource.hpp>
26
+ #include <rmm/mr/device/per_device_resource.hpp>
27
+ #include <rmm/mr/host/pinned_memory_resource.hpp>
28
+ #include <memory>
29
+ #endif
7
30
 
8
31
  #include <faiss/gpu/StandardGpuResources.h>
9
32
  #include <faiss/gpu/utils/DeviceUtils.h>
@@ -66,7 +89,12 @@ std::string allocsToString(const std::unordered_map<void*, AllocRequest>& map) {
66
89
  //
67
90
 
68
91
  StandardGpuResourcesImpl::StandardGpuResourcesImpl()
69
- : pinnedMemAlloc_(nullptr),
92
+ :
93
+ #if defined USE_NVIDIA_RAFT
94
+ mmr_(new rmm::mr::managed_memory_resource),
95
+ pmr_(new rmm::mr::pinned_memory_resource),
96
+ #endif
97
+ pinnedMemAlloc_(nullptr),
70
98
  pinnedMemAllocSize_(0),
71
99
  // let the adjustment function determine the memory size for us by
72
100
  // passing in a huge value that will then be adjusted
@@ -74,7 +102,8 @@ StandardGpuResourcesImpl::StandardGpuResourcesImpl()
74
102
  -1,
75
103
  std::numeric_limits<size_t>::max())),
76
104
  pinnedMemSize_(kDefaultPinnedMemoryAllocation),
77
- allocLogging_(false) {}
105
+ allocLogging_(false) {
106
+ }
78
107
 
79
108
  StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
80
109
  // The temporary memory allocator has allocated memory through us, so clean
@@ -129,6 +158,9 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
129
158
  }
130
159
 
131
160
  if (pinnedMemAlloc_) {
161
+ #if defined USE_NVIDIA_RAFT
162
+ pmr_->deallocate(pinnedMemAlloc_, pinnedMemAllocSize_);
163
+ #else
132
164
  auto err = cudaFreeHost(pinnedMemAlloc_);
133
165
  FAISS_ASSERT_FMT(
134
166
  err == cudaSuccess,
@@ -136,6 +168,7 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
136
168
  pinnedMemAlloc_,
137
169
  (int)err,
138
170
  cudaGetErrorString(err));
171
+ #endif
139
172
  }
140
173
  }
141
174
 
@@ -187,11 +220,11 @@ void StandardGpuResourcesImpl::setTempMemory(size_t size) {
187
220
  p.second.reset();
188
221
 
189
222
  // Allocate new
190
- p.second = std::unique_ptr<StackDeviceMemory>(new StackDeviceMemory(
223
+ p.second = std::make_unique<StackDeviceMemory>(
191
224
  this,
192
225
  p.first,
193
226
  // adjust for this specific device
194
- getDefaultTempMemForGPU(device, tempMemSize_)));
227
+ getDefaultTempMemForGPU(device, tempMemSize_));
195
228
  }
196
229
  }
197
230
  }
@@ -274,6 +307,19 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
274
307
  // If this is the first device that we're initializing, create our
275
308
  // pinned memory allocation
276
309
  if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
310
+ #if defined USE_NVIDIA_RAFT
311
+ // If this is the first device that we're initializing, create our
312
+ // pinned memory allocation
313
+ if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
314
+ try {
315
+ pinnedMemAlloc_ = pmr_->allocate(pinnedMemSize_);
316
+ } catch (const std::bad_alloc& rmm_ex) {
317
+ FAISS_THROW_MSG("CUDA memory allocation error");
318
+ }
319
+
320
+ pinnedMemAllocSize_ = pinnedMemSize_;
321
+ }
322
+ #else
277
323
  auto err = cudaHostAlloc(
278
324
  &pinnedMemAlloc_, pinnedMemSize_, cudaHostAllocDefault);
279
325
 
@@ -286,6 +332,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
286
332
  cudaGetErrorString(err));
287
333
 
288
334
  pinnedMemAllocSize_ = pinnedMemSize_;
335
+ #endif
289
336
  }
290
337
 
291
338
  // Make sure that device properties for all devices are cached
@@ -307,12 +354,16 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
307
354
  device);
308
355
 
309
356
  // Create streams
310
- cudaStream_t defaultStream = 0;
357
+ cudaStream_t defaultStream = nullptr;
311
358
  CUDA_VERIFY(
312
359
  cudaStreamCreateWithFlags(&defaultStream, cudaStreamNonBlocking));
313
360
 
314
361
  defaultStreams_[device] = defaultStream;
315
362
 
363
+ #if defined USE_NVIDIA_RAFT
364
+ raftHandles_.emplace(std::make_pair(device, defaultStream));
365
+ #endif
366
+
316
367
  cudaStream_t asyncCopyStream = 0;
317
368
  CUDA_VERIFY(
318
369
  cudaStreamCreateWithFlags(&asyncCopyStream, cudaStreamNonBlocking));
@@ -321,7 +372,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
321
372
 
322
373
  std::vector<cudaStream_t> deviceStreams;
323
374
  for (int j = 0; j < kNumStreams; ++j) {
324
- cudaStream_t stream = 0;
375
+ cudaStream_t stream = nullptr;
325
376
  CUDA_VERIFY(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
326
377
 
327
378
  deviceStreams.push_back(stream);
@@ -330,7 +381,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
330
381
  alternateStreams_[device] = std::move(deviceStreams);
331
382
 
332
383
  // Create cuBLAS handle
333
- cublasHandle_t blasHandle = 0;
384
+ cublasHandle_t blasHandle = nullptr;
334
385
  auto blasStatus = cublasCreate(&blasHandle);
335
386
  FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
336
387
  blasHandles_[device] = blasHandle;
@@ -348,11 +399,11 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
348
399
  allocs_[device] = std::unordered_map<void*, AllocRequest>();
349
400
 
350
401
  FAISS_ASSERT(tempMemory_.count(device) == 0);
351
- auto mem = std::unique_ptr<StackDeviceMemory>(new StackDeviceMemory(
402
+ auto mem = std::make_unique<StackDeviceMemory>(
352
403
  this,
353
404
  device,
354
405
  // adjust for this specific device
355
- getDefaultTempMemForGPU(device, tempMemSize_)));
406
+ getDefaultTempMemForGPU(device, tempMemSize_));
356
407
 
357
408
  tempMemory_.emplace(device, std::move(mem));
358
409
  }
@@ -375,6 +426,25 @@ cudaStream_t StandardGpuResourcesImpl::getDefaultStream(int device) {
375
426
  return defaultStreams_[device];
376
427
  }
377
428
 
429
+ #if defined USE_NVIDIA_RAFT
430
+ raft::device_resources& StandardGpuResourcesImpl::getRaftHandle(int device) {
431
+ initializeForDevice(device);
432
+
433
+ auto it = raftHandles_.find(device);
434
+ if (it == raftHandles_.end()) {
435
+ // Make sure we are using the stream the user may have already assigned
436
+ // to the current GpuResources
437
+ raftHandles_.emplace(device, getDefaultStream(device));
438
+
439
+ // Initialize cublas handle
440
+ raftHandles_[device].get_cublas_handle();
441
+ }
442
+
443
+ // Otherwise, our base default handle
444
+ return raftHandles_[device];
445
+ }
446
+ #endif
447
+
378
448
  std::vector<cudaStream_t> StandardGpuResourcesImpl::getAlternateStreams(
379
449
  int device) {
380
450
  initializeForDevice(device);
@@ -406,8 +476,6 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
406
476
  void* p = nullptr;
407
477
 
408
478
  if (adjReq.space == MemorySpace::Temporary) {
409
- // If we don't have enough space in our temporary memory manager, we
410
- // need to allocate this request separately
411
479
  auto& tempMem = tempMemory_[adjReq.device];
412
480
 
413
481
  if (adjReq.size > tempMem->getSizeAvailable()) {
@@ -428,15 +496,25 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
428
496
 
429
497
  // Otherwise, we can handle this locally
430
498
  p = tempMemory_[adjReq.device]->allocMemory(adjReq.stream, adjReq.size);
431
-
432
499
  } else if (adjReq.space == MemorySpace::Device) {
500
+ #if defined USE_NVIDIA_RAFT
501
+ try {
502
+ rmm::mr::device_memory_resource* current_mr =
503
+ rmm::mr::get_per_device_resource(
504
+ rmm::cuda_device_id{adjReq.device});
505
+ p = current_mr->allocate_async(adjReq.size, adjReq.stream);
506
+ adjReq.mr = current_mr;
507
+ } catch (const std::bad_alloc& rmm_ex) {
508
+ FAISS_THROW_MSG("CUDA memory allocation error");
509
+ }
510
+ #else
433
511
  auto err = cudaMalloc(&p, adjReq.size);
434
512
 
435
513
  // Throw if we fail to allocate
436
514
  if (err != cudaSuccess) {
437
515
  // FIXME: as of CUDA 11, a memory allocation error appears to be
438
- // presented via cudaGetLastError as well, and needs to be cleared.
439
- // Just call the function to clear it
516
+ // presented via cudaGetLastError as well, and needs to be
517
+ // cleared. Just call the function to clear it
440
518
  cudaGetLastError();
441
519
 
442
520
  std::stringstream ss;
@@ -451,7 +529,20 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
451
529
 
452
530
  FAISS_THROW_IF_NOT_FMT(err == cudaSuccess, "%s", str.c_str());
453
531
  }
532
+ #endif
454
533
  } else if (adjReq.space == MemorySpace::Unified) {
534
+ #if defined USE_NVIDIA_RAFT
535
+ try {
536
+ // for now, use our own managed MR to do Unified Memory allocations.
537
+ // TODO: change this to use the current device resource once RMM has
538
+ // a way to retrieve a "guaranteed" managed memory resource for a
539
+ // device.
540
+ p = mmr_->allocate_async(adjReq.size, adjReq.stream);
541
+ adjReq.mr = mmr_.get();
542
+ } catch (const std::bad_alloc& rmm_ex) {
543
+ FAISS_THROW_MSG("CUDA memory allocation error");
544
+ }
545
+ #else
455
546
  auto err = cudaMallocManaged(&p, adjReq.size);
456
547
 
457
548
  if (err != cudaSuccess) {
@@ -472,6 +563,7 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
472
563
 
473
564
  FAISS_THROW_IF_NOT_FMT(err == cudaSuccess, "%s", str.c_str());
474
565
  }
566
+ #endif
475
567
  } else {
476
568
  FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int)adjReq.space);
477
569
  }
@@ -505,10 +597,12 @@ void StandardGpuResourcesImpl::deallocMemory(int device, void* p) {
505
597
 
506
598
  if (req.space == MemorySpace::Temporary) {
507
599
  tempMemory_[device]->deallocMemory(device, req.stream, req.size, p);
508
-
509
600
  } else if (
510
601
  req.space == MemorySpace::Device ||
511
602
  req.space == MemorySpace::Unified) {
603
+ #if defined USE_NVIDIA_RAFT
604
+ req.mr->deallocate_async(p, req.size, req.stream);
605
+ #else
512
606
  auto err = cudaFree(p);
513
607
  FAISS_ASSERT_FMT(
514
608
  err == cudaSuccess,
@@ -516,7 +610,7 @@ void StandardGpuResourcesImpl::deallocMemory(int device, void* p) {
516
610
  p,
517
611
  (int)err,
518
612
  cudaGetErrorString(err));
519
-
613
+ #endif
520
614
  } else {
521
615
  FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int)req.space);
522
616
  }
@@ -561,7 +655,7 @@ StandardGpuResourcesImpl::getMemoryInfo() const {
561
655
  StandardGpuResources::StandardGpuResources()
562
656
  : res_(new StandardGpuResourcesImpl) {}
563
657
 
564
- StandardGpuResources::~StandardGpuResources() {}
658
+ StandardGpuResources::~StandardGpuResources() = default;
565
659
 
566
660
  std::shared_ptr<GpuResources> StandardGpuResources::getResources() {
567
661
  return res_;
@@ -600,6 +694,12 @@ cudaStream_t StandardGpuResources::getDefaultStream(int device) {
600
694
  return res_->getDefaultStream(device);
601
695
  }
602
696
 
697
+ #if defined USE_NVIDIA_RAFT
698
+ raft::device_resources& StandardGpuResources::getRaftHandle(int device) {
699
+ return res_->getRaftHandle(device);
700
+ }
701
+ #endif
702
+
603
703
  size_t StandardGpuResources::getTempMemoryAvailable(int device) const {
604
704
  return res_->getTempMemoryAvailable(device);
605
705
  }
@@ -4,9 +4,29 @@
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
+ /*
8
+ * Copyright (c) 2023, NVIDIA CORPORATION.
9
+ *
10
+ * Licensed under the Apache License, Version 2.0 (the "License");
11
+ * you may not use this file except in compliance with the License.
12
+ * You may obtain a copy of the License at
13
+ *
14
+ * http://www.apache.org/licenses/LICENSE-2.0
15
+ *
16
+ * Unless required by applicable law or agreed to in writing, software
17
+ * distributed under the License is distributed on an "AS IS" BASIS,
18
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ * See the License for the specific language governing permissions and
20
+ * limitations under the License.
21
+ */
7
22
 
8
23
  #pragma once
9
24
 
25
+ #if defined USE_NVIDIA_RAFT
26
+ #include <raft/core/device_resources.hpp>
27
+ #include <rmm/mr/host/pinned_memory_resource.hpp>
28
+ #endif
29
+
10
30
  #include <faiss/gpu/GpuResources.h>
11
31
  #include <faiss/gpu/utils/DeviceUtils.h>
12
32
  #include <faiss/gpu/utils/StackDeviceMemory.h>
@@ -15,6 +35,7 @@
15
35
  #include <unordered_map>
16
36
  #include <vector>
17
37
 
38
+ #pragma GCC visibility push(default)
18
39
  namespace faiss {
19
40
  namespace gpu {
20
41
 
@@ -58,6 +79,12 @@ class StandardGpuResourcesImpl : public GpuResources {
58
79
  /// this stream upon exit from an index or other Faiss GPU call.
59
80
  cudaStream_t getDefaultStream(int device) override;
60
81
 
82
+ #if defined USE_NVIDIA_RAFT
83
+ /// Returns the raft handle for the given device which can be used to
84
+ /// make calls to other raft primitives.
85
+ raft::device_resources& getRaftHandle(int device) override;
86
+ #endif
87
+
61
88
  /// Called to change the work ordering streams to the null stream
62
89
  /// for all devices
63
90
  void setDefaultNullStreamAllDevices();
@@ -92,7 +119,7 @@ class StandardGpuResourcesImpl : public GpuResources {
92
119
 
93
120
  cudaStream_t getAsyncCopyStream(int device) override;
94
121
 
95
- private:
122
+ protected:
96
123
  /// Have GPU resources been initialized for this device yet?
97
124
  bool isInitialized(int device) const;
98
125
 
@@ -100,7 +127,7 @@ class StandardGpuResourcesImpl : public GpuResources {
100
127
  /// memory size
101
128
  static size_t getDefaultTempMemForGPU(int device, size_t requested);
102
129
 
103
- private:
130
+ protected:
104
131
  /// Set of currently outstanding memory allocations per device
105
132
  /// device -> (alloc request, allocated ptr)
106
133
  std::unordered_map<int, std::unordered_map<void*, AllocRequest>> allocs_;
@@ -124,6 +151,27 @@ class StandardGpuResourcesImpl : public GpuResources {
124
151
  /// cuBLAS handle for each device
125
152
  std::unordered_map<int, cublasHandle_t> blasHandles_;
126
153
 
154
+ #if defined USE_NVIDIA_RAFT
155
+ /// raft handle for each device
156
+ std::unordered_map<int, raft::device_resources> raftHandles_;
157
+
158
+ /**
159
+ * FIXME: Integrating these in a separate code path for now. Ultimately,
160
+ * it would be nice if we use a simple memory resource abstraction
161
+ * in FAISS so we could plug in whether to use RMM's memory resources
162
+ * or the default.
163
+ *
164
+ * There's enough duplicated logic that it doesn't *seem* to make sense
165
+ * to create a subclass only for the RMM memory resources.
166
+ */
167
+
168
+ // managed_memory_resource
169
+ std::unique_ptr<rmm::mr::device_memory_resource> mmr_;
170
+
171
+ // pinned_memory_resource
172
+ std::unique_ptr<rmm::mr::host_memory_resource> pmr_;
173
+ #endif
174
+
127
175
  /// Pinned memory allocation for use with this GPU
128
176
  void* pinnedMemAlloc_;
129
177
  size_t pinnedMemAllocSize_;
@@ -183,10 +231,15 @@ class StandardGpuResources : public GpuResourcesProvider {
183
231
  /// Export a description of memory used for Python
184
232
  std::map<int, std::map<std::string, std::pair<int, size_t>>> getMemoryInfo()
185
233
  const;
186
-
187
234
  /// Returns the current default stream
188
235
  cudaStream_t getDefaultStream(int device);
189
236
 
237
+ #if defined USE_NVIDIA_RAFT
238
+ /// Returns the raft handle for the given device which can be used to
239
+ /// make calls to other raft primitives.
240
+ raft::device_resources& getRaftHandle(int device);
241
+ #endif
242
+
190
243
  /// Returns the current amount of temp memory available
191
244
  size_t getTempMemoryAvailable(int device) const;
192
245
 
@@ -203,3 +256,4 @@ class StandardGpuResources : public GpuResourcesProvider {
203
256
 
204
257
  } // namespace gpu
205
258
  } // namespace faiss
259
+ #pragma GCC visibility pop
@@ -42,7 +42,7 @@ int main(int argc, char** argv) {
42
42
 
43
43
  cudaProfilerStop();
44
44
 
45
- auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(nullptr);
45
+ auto seed = FLAGS_seed != -1 ? FLAGS_seed : time(nullptr);
46
46
  printf("using seed %ld\n", seed);
47
47
 
48
48
  std::vector<float> vecs((size_t)FLAGS_num * FLAGS_dim);