faiss 0.2.7 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (172) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +9 -2
  6. data/ext/faiss/index.cpp +1 -1
  7. data/ext/faiss/index_binary.cpp +2 -2
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/lib/faiss.rb +1 -1
  11. data/vendor/faiss/faiss/AutoTune.cpp +7 -7
  12. data/vendor/faiss/faiss/AutoTune.h +0 -1
  13. data/vendor/faiss/faiss/Clustering.cpp +4 -18
  14. data/vendor/faiss/faiss/Clustering.h +31 -21
  15. data/vendor/faiss/faiss/IVFlib.cpp +22 -11
  16. data/vendor/faiss/faiss/Index.cpp +1 -1
  17. data/vendor/faiss/faiss/Index.h +20 -5
  18. data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
  21. data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
  22. data/vendor/faiss/faiss/IndexBinary.h +8 -19
  23. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
  24. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
  25. data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
  26. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +106 -187
  27. data/vendor/faiss/faiss/IndexFastScan.cpp +90 -159
  28. data/vendor/faiss/faiss/IndexFastScan.h +9 -8
  29. data/vendor/faiss/faiss/IndexFlat.cpp +195 -3
  30. data/vendor/faiss/faiss/IndexFlat.h +20 -1
  31. data/vendor/faiss/faiss/IndexFlatCodes.cpp +11 -0
  32. data/vendor/faiss/faiss/IndexFlatCodes.h +3 -1
  33. data/vendor/faiss/faiss/IndexHNSW.cpp +112 -316
  34. data/vendor/faiss/faiss/IndexHNSW.h +12 -48
  35. data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
  36. data/vendor/faiss/faiss/IndexIDMap.h +24 -2
  37. data/vendor/faiss/faiss/IndexIVF.cpp +159 -53
  38. data/vendor/faiss/faiss/IndexIVF.h +37 -5
  39. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +18 -26
  40. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -2
  41. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
  43. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +433 -405
  44. data/vendor/faiss/faiss/IndexIVFFastScan.h +56 -26
  45. data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
  46. data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
  47. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
  48. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
  49. data/vendor/faiss/faiss/IndexIVFPQ.cpp +78 -122
  50. data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
  51. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +18 -50
  52. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
  53. data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
  54. data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
  55. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
  56. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
  57. data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -4
  59. data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
  60. data/vendor/faiss/faiss/IndexNSG.h +10 -10
  61. data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
  62. data/vendor/faiss/faiss/IndexPQ.h +1 -4
  63. data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
  64. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
  65. data/vendor/faiss/faiss/IndexRefine.cpp +49 -19
  66. data/vendor/faiss/faiss/IndexRefine.h +7 -0
  67. data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
  68. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +22 -16
  69. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
  70. data/vendor/faiss/faiss/IndexShards.cpp +21 -29
  71. data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
  72. data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
  73. data/vendor/faiss/faiss/MatrixStats.h +21 -9
  74. data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
  75. data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
  76. data/vendor/faiss/faiss/VectorTransform.h +7 -7
  77. data/vendor/faiss/faiss/clone_index.cpp +15 -10
  78. data/vendor/faiss/faiss/clone_index.h +3 -0
  79. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +87 -4
  80. data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
  81. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +7 -0
  82. data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
  83. data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
  84. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
  85. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +8 -9
  86. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +18 -3
  87. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
  88. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
  89. data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
  90. data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
  91. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +117 -17
  92. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
  93. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +1 -1
  94. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
  95. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
  96. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +267 -40
  97. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
  98. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
  99. data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
  100. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
  101. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
  102. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
  103. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -2
  104. data/vendor/faiss/faiss/impl/DistanceComputer.h +24 -1
  105. data/vendor/faiss/faiss/impl/FaissException.h +13 -34
  106. data/vendor/faiss/faiss/impl/HNSW.cpp +321 -70
  107. data/vendor/faiss/faiss/impl/HNSW.h +9 -8
  108. data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
  109. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +3 -1
  110. data/vendor/faiss/faiss/impl/NNDescent.cpp +29 -19
  111. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  112. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
  113. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  114. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +24 -22
  115. data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
  116. data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
  117. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
  118. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
  119. data/vendor/faiss/faiss/impl/ResultHandler.h +232 -176
  120. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +444 -104
  121. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -8
  122. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +280 -42
  123. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
  124. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
  125. data/vendor/faiss/faiss/impl/index_read.cpp +45 -19
  126. data/vendor/faiss/faiss/impl/index_write.cpp +60 -41
  127. data/vendor/faiss/faiss/impl/io.cpp +10 -10
  128. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  129. data/vendor/faiss/faiss/impl/platform_macros.h +18 -1
  130. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +3 -0
  131. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
  132. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
  133. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +40 -49
  134. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
  135. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
  136. data/vendor/faiss/faiss/impl/simd_result_handlers.h +374 -202
  137. data/vendor/faiss/faiss/index_factory.cpp +10 -7
  138. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
  139. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +27 -9
  140. data/vendor/faiss/faiss/invlists/InvertedLists.h +12 -3
  141. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
  142. data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
  143. data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
  144. data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
  145. data/vendor/faiss/faiss/utils/distances.cpp +128 -74
  146. data/vendor/faiss/faiss/utils/distances.h +81 -4
  147. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
  148. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
  149. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
  150. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
  151. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
  152. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
  153. data/vendor/faiss/faiss/utils/distances_simd.cpp +428 -70
  154. data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
  155. data/vendor/faiss/faiss/utils/fp16.h +2 -0
  156. data/vendor/faiss/faiss/utils/hamming.cpp +162 -110
  157. data/vendor/faiss/faiss/utils/hamming.h +58 -0
  158. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
  159. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
  160. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +15 -87
  161. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +57 -0
  162. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
  163. data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
  164. data/vendor/faiss/faiss/utils/prefetch.h +77 -0
  165. data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
  166. data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
  167. data/vendor/faiss/faiss/utils/simdlib_neon.h +72 -77
  168. data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
  169. data/vendor/faiss/faiss/utils/sorting.h +27 -0
  170. data/vendor/faiss/faiss/utils/utils.cpp +112 -6
  171. data/vendor/faiss/faiss/utils/utils.h +57 -20
  172. metadata +11 -4
@@ -23,24 +23,19 @@ class GpuIndexFlat;
23
23
  class IVFPQ;
24
24
 
25
25
  struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
26
- inline GpuIndexIVFPQConfig()
27
- : useFloat16LookupTables(false),
28
- usePrecomputedTables(false),
29
- interleavedLayout(false),
30
- useMMCodeDistance(false) {}
31
-
32
26
  /// Whether or not float16 residual distance tables are used in the
33
27
  /// list scanning kernels. When subQuantizers * 2^bitsPerCode >
34
28
  /// 16384, this is required.
35
- bool useFloat16LookupTables;
29
+ bool useFloat16LookupTables = false;
36
30
 
37
31
  /// Whether or not we enable the precomputed table option for
38
32
  /// search, which can substantially increase the memory requirement.
39
- bool usePrecomputedTables;
33
+ bool usePrecomputedTables = false;
40
34
 
41
35
  /// Use the alternative memory layout for the IVF lists
42
- /// WARNING: this is a feature under development, do not use!
43
- bool interleavedLayout;
36
+ /// WARNING: this is a feature under development, and is only supported with
37
+ /// RAFT enabled for the index. Do not use if RAFT is not enabled.
38
+ bool interleavedLayout = false;
44
39
 
45
40
  /// Use GEMM-backed computation of PQ code distances for the no precomputed
46
41
  /// table version of IVFPQ.
@@ -50,7 +45,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
50
45
  /// Note that MM code distance is enabled automatically if one uses a number
51
46
  /// of dimensions per sub-quantizer that is not natively specialized (an odd
52
47
  /// number like 7 or so).
53
- bool useMMCodeDistance;
48
+ bool useMMCodeDistance = false;
54
49
  };
55
50
 
56
51
  /// IVFPQ index for the GPU
@@ -139,6 +134,22 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
139
134
  ProductQuantizer pq;
140
135
 
141
136
  protected:
137
+ /// Initialize appropriate index
138
+ void setIndex_(
139
+ GpuResources* resources,
140
+ int dim,
141
+ idx_t nlist,
142
+ faiss::MetricType metric,
143
+ float metricArg,
144
+ int numSubQuantizers,
145
+ int bitsPerSubQuantizer,
146
+ bool useFloat16LookupTables,
147
+ bool useMMCodeDistance,
148
+ bool interleavedLayout,
149
+ float* pqCentroidData,
150
+ IndicesOptions indicesOptions,
151
+ MemorySpace space);
152
+
142
153
  /// Throws errors if configuration settings are improper
143
154
  void verifyPQSettings_() const;
144
155
 
@@ -18,11 +18,9 @@ class IVFFlat;
18
18
  class GpuIndexFlat;
19
19
 
20
20
  struct GpuIndexIVFScalarQuantizerConfig : public GpuIndexIVFConfig {
21
- inline GpuIndexIVFScalarQuantizerConfig() : interleavedLayout(true) {}
22
-
23
21
  /// Use the alternative memory layout for the IVF lists
24
22
  /// (currently the default)
25
- bool interleavedLayout;
23
+ bool interleavedLayout = true;
26
24
  };
27
25
 
28
26
  /// Wrapper around the GPU implementation that looks like
@@ -4,6 +4,21 @@
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
+ /*
8
+ * Copyright (c) 2023, NVIDIA CORPORATION.
9
+ *
10
+ * Licensed under the Apache License, Version 2.0 (the "License");
11
+ * you may not use this file except in compliance with the License.
12
+ * You may obtain a copy of the License at
13
+ *
14
+ * http://www.apache.org/licenses/LICENSE-2.0
15
+ *
16
+ * Unless required by applicable law or agreed to in writing, software
17
+ * distributed under the License is distributed on an "AS IS" BASIS,
18
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ * See the License for the specific language governing permissions and
20
+ * limitations under the License.
21
+ */
7
22
 
8
23
  #include <faiss/gpu/GpuResources.h>
9
24
  #include <faiss/gpu/utils/DeviceUtils.h>
@@ -143,7 +158,7 @@ GpuMemoryReservation::~GpuMemoryReservation() {
143
158
  // GpuResources
144
159
  //
145
160
 
146
- GpuResources::~GpuResources() {}
161
+ GpuResources::~GpuResources() = default;
147
162
 
148
163
  cublasHandle_t GpuResources::getBlasHandleCurrentDevice() {
149
164
  return getBlasHandle(getCurrentDevice());
@@ -153,6 +168,12 @@ cudaStream_t GpuResources::getDefaultStreamCurrentDevice() {
153
168
  return getDefaultStream(getCurrentDevice());
154
169
  }
155
170
 
171
+ #if defined USE_NVIDIA_RAFT
172
+ raft::device_resources& GpuResources::getRaftHandleCurrentDevice() {
173
+ return getRaftHandle(getCurrentDevice());
174
+ }
175
+ #endif
176
+
156
177
  std::vector<cudaStream_t> GpuResources::getAlternateStreamsCurrentDevice() {
157
178
  return getAlternateStreams(getCurrentDevice());
158
179
  }
@@ -182,7 +203,7 @@ size_t GpuResources::getTempMemoryAvailableCurrentDevice() const {
182
203
  // GpuResourcesProvider
183
204
  //
184
205
 
185
- GpuResourcesProvider::~GpuResourcesProvider() {}
206
+ GpuResourcesProvider::~GpuResourcesProvider() = default;
186
207
 
187
208
  //
188
209
  // GpuResourcesProviderFromResourceInstance
@@ -192,7 +213,7 @@ GpuResourcesProviderFromInstance::GpuResourcesProviderFromInstance(
192
213
  std::shared_ptr<GpuResources> p)
193
214
  : res_(p) {}
194
215
 
195
- GpuResourcesProviderFromInstance::~GpuResourcesProviderFromInstance() {}
216
+ GpuResourcesProviderFromInstance::~GpuResourcesProviderFromInstance() = default;
196
217
 
197
218
  std::shared_ptr<GpuResources> GpuResourcesProviderFromInstance::getResources() {
198
219
  return res_;
@@ -4,16 +4,37 @@
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
+ /*
8
+ * Copyright (c) 2023, NVIDIA CORPORATION.
9
+ *
10
+ * Licensed under the Apache License, Version 2.0 (the "License");
11
+ * you may not use this file except in compliance with the License.
12
+ * You may obtain a copy of the License at
13
+ *
14
+ * http://www.apache.org/licenses/LICENSE-2.0
15
+ *
16
+ * Unless required by applicable law or agreed to in writing, software
17
+ * distributed under the License is distributed on an "AS IS" BASIS,
18
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ * See the License for the specific language governing permissions and
20
+ * limitations under the License.
21
+ */
7
22
 
8
23
  #pragma once
9
24
 
10
25
  #include <cublas_v2.h>
11
26
  #include <cuda_runtime.h>
12
27
  #include <faiss/impl/FaissAssert.h>
28
+
13
29
  #include <memory>
14
30
  #include <utility>
15
31
  #include <vector>
16
32
 
33
+ #if defined USE_NVIDIA_RAFT
34
+ #include <raft/core/device_resources.hpp>
35
+ #include <rmm/mr/device/device_memory_resource.hpp>
36
+ #endif
37
+
17
38
  namespace faiss {
18
39
  namespace gpu {
19
40
 
@@ -82,11 +103,7 @@ std::string memorySpaceToString(MemorySpace s);
82
103
 
83
104
  /// Information on what/where an allocation is
84
105
  struct AllocInfo {
85
- inline AllocInfo()
86
- : type(AllocType::Other),
87
- device(0),
88
- space(MemorySpace::Device),
89
- stream(nullptr) {}
106
+ inline AllocInfo() {}
90
107
 
91
108
  inline AllocInfo(AllocType at, int dev, MemorySpace sp, cudaStream_t st)
92
109
  : type(at), device(dev), space(sp), stream(st) {}
@@ -95,13 +112,13 @@ struct AllocInfo {
95
112
  std::string toString() const;
96
113
 
97
114
  /// The internal category of the allocation
98
- AllocType type;
115
+ AllocType type = AllocType::Other;
99
116
 
100
117
  /// The device on which the allocation is happening
101
- int device;
118
+ int device = 0;
102
119
 
103
120
  /// The memory space of the allocation
104
- MemorySpace space;
121
+ MemorySpace space = MemorySpace::Device;
105
122
 
106
123
  /// The stream on which new work on the memory will be ordered (e.g., if a
107
124
  /// piece of memory cached and to be returned for this call was last used on
@@ -111,7 +128,7 @@ struct AllocInfo {
111
128
  ///
112
129
  /// The memory manager guarantees that the returned memory is free to use
113
130
  /// without data races on this stream specified.
114
- cudaStream_t stream;
131
+ cudaStream_t stream = nullptr;
115
132
  };
116
133
 
117
134
  /// Create an AllocInfo for the current device with MemorySpace::Device
@@ -125,7 +142,7 @@ AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st);
125
142
 
126
143
  /// Information on what/where an allocation is, along with how big it should be
127
144
  struct AllocRequest : public AllocInfo {
128
- inline AllocRequest() : AllocInfo(), size(0) {}
145
+ inline AllocRequest() {}
129
146
 
130
147
  inline AllocRequest(const AllocInfo& info, size_t sz)
131
148
  : AllocInfo(info), size(sz) {}
@@ -142,7 +159,11 @@ struct AllocRequest : public AllocInfo {
142
159
  std::string toString() const;
143
160
 
144
161
  /// The size in bytes of the allocation
145
- size_t size;
162
+ size_t size = 0;
163
+
164
+ #if defined USE_NVIDIA_RAFT
165
+ rmm::mr::device_memory_resource* mr = nullptr;
166
+ #endif
146
167
  };
147
168
 
148
169
  /// A RAII object that manages a temporary memory request
@@ -190,6 +211,13 @@ class GpuResources {
190
211
  /// given device
191
212
  virtual cudaStream_t getDefaultStream(int device) = 0;
192
213
 
214
+ #if defined USE_NVIDIA_RAFT
215
+ /// Returns the raft handle for the given device which can be used to
216
+ /// make calls to other raft primitives.
217
+ virtual raft::device_resources& getRaftHandle(int device) = 0;
218
+ raft::device_resources& getRaftHandleCurrentDevice();
219
+ #endif
220
+
193
221
  /// Overrides the default stream for a device to the user-supplied stream.
194
222
  /// The resources object does not own this stream (i.e., it will not destroy
195
223
  /// it).
@@ -4,6 +4,29 @@
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
+ /*
8
+ * Copyright (c) 2023, NVIDIA CORPORATION.
9
+ *
10
+ * Licensed under the Apache License, Version 2.0 (the "License");
11
+ * you may not use this file except in compliance with the License.
12
+ * You may obtain a copy of the License at
13
+ *
14
+ * http://www.apache.org/licenses/LICENSE-2.0
15
+ *
16
+ * Unless required by applicable law or agreed to in writing, software
17
+ * distributed under the License is distributed on an "AS IS" BASIS,
18
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ * See the License for the specific language governing permissions and
20
+ * limitations under the License.
21
+ */
22
+
23
+ #if defined USE_NVIDIA_RAFT
24
+ #include <raft/core/device_resources.hpp>
25
+ #include <rmm/mr/device/managed_memory_resource.hpp>
26
+ #include <rmm/mr/device/per_device_resource.hpp>
27
+ #include <rmm/mr/host/pinned_memory_resource.hpp>
28
+ #include <memory>
29
+ #endif
7
30
 
8
31
  #include <faiss/gpu/StandardGpuResources.h>
9
32
  #include <faiss/gpu/utils/DeviceUtils.h>
@@ -66,7 +89,12 @@ std::string allocsToString(const std::unordered_map<void*, AllocRequest>& map) {
66
89
  //
67
90
 
68
91
  StandardGpuResourcesImpl::StandardGpuResourcesImpl()
69
- : pinnedMemAlloc_(nullptr),
92
+ :
93
+ #if defined USE_NVIDIA_RAFT
94
+ mmr_(new rmm::mr::managed_memory_resource),
95
+ pmr_(new rmm::mr::pinned_memory_resource),
96
+ #endif
97
+ pinnedMemAlloc_(nullptr),
70
98
  pinnedMemAllocSize_(0),
71
99
  // let the adjustment function determine the memory size for us by
72
100
  // passing in a huge value that will then be adjusted
@@ -74,7 +102,8 @@ StandardGpuResourcesImpl::StandardGpuResourcesImpl()
74
102
  -1,
75
103
  std::numeric_limits<size_t>::max())),
76
104
  pinnedMemSize_(kDefaultPinnedMemoryAllocation),
77
- allocLogging_(false) {}
105
+ allocLogging_(false) {
106
+ }
78
107
 
79
108
  StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
80
109
  // The temporary memory allocator has allocated memory through us, so clean
@@ -129,6 +158,9 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
129
158
  }
130
159
 
131
160
  if (pinnedMemAlloc_) {
161
+ #if defined USE_NVIDIA_RAFT
162
+ pmr_->deallocate(pinnedMemAlloc_, pinnedMemAllocSize_);
163
+ #else
132
164
  auto err = cudaFreeHost(pinnedMemAlloc_);
133
165
  FAISS_ASSERT_FMT(
134
166
  err == cudaSuccess,
@@ -136,6 +168,7 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
136
168
  pinnedMemAlloc_,
137
169
  (int)err,
138
170
  cudaGetErrorString(err));
171
+ #endif
139
172
  }
140
173
  }
141
174
 
@@ -187,11 +220,11 @@ void StandardGpuResourcesImpl::setTempMemory(size_t size) {
187
220
  p.second.reset();
188
221
 
189
222
  // Allocate new
190
- p.second = std::unique_ptr<StackDeviceMemory>(new StackDeviceMemory(
223
+ p.second = std::make_unique<StackDeviceMemory>(
191
224
  this,
192
225
  p.first,
193
226
  // adjust for this specific device
194
- getDefaultTempMemForGPU(device, tempMemSize_)));
227
+ getDefaultTempMemForGPU(device, tempMemSize_));
195
228
  }
196
229
  }
197
230
  }
@@ -274,6 +307,19 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
274
307
  // If this is the first device that we're initializing, create our
275
308
  // pinned memory allocation
276
309
  if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
310
+ #if defined USE_NVIDIA_RAFT
311
+ // If this is the first device that we're initializing, create our
312
+ // pinned memory allocation
313
+ if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
314
+ try {
315
+ pinnedMemAlloc_ = pmr_->allocate(pinnedMemSize_);
316
+ } catch (const std::bad_alloc& rmm_ex) {
317
+ FAISS_THROW_MSG("CUDA memory allocation error");
318
+ }
319
+
320
+ pinnedMemAllocSize_ = pinnedMemSize_;
321
+ }
322
+ #else
277
323
  auto err = cudaHostAlloc(
278
324
  &pinnedMemAlloc_, pinnedMemSize_, cudaHostAllocDefault);
279
325
 
@@ -286,6 +332,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
286
332
  cudaGetErrorString(err));
287
333
 
288
334
  pinnedMemAllocSize_ = pinnedMemSize_;
335
+ #endif
289
336
  }
290
337
 
291
338
  // Make sure that device properties for all devices are cached
@@ -307,12 +354,16 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
307
354
  device);
308
355
 
309
356
  // Create streams
310
- cudaStream_t defaultStream = 0;
357
+ cudaStream_t defaultStream = nullptr;
311
358
  CUDA_VERIFY(
312
359
  cudaStreamCreateWithFlags(&defaultStream, cudaStreamNonBlocking));
313
360
 
314
361
  defaultStreams_[device] = defaultStream;
315
362
 
363
+ #if defined USE_NVIDIA_RAFT
364
+ raftHandles_.emplace(std::make_pair(device, defaultStream));
365
+ #endif
366
+
316
367
  cudaStream_t asyncCopyStream = 0;
317
368
  CUDA_VERIFY(
318
369
  cudaStreamCreateWithFlags(&asyncCopyStream, cudaStreamNonBlocking));
@@ -321,7 +372,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
321
372
 
322
373
  std::vector<cudaStream_t> deviceStreams;
323
374
  for (int j = 0; j < kNumStreams; ++j) {
324
- cudaStream_t stream = 0;
375
+ cudaStream_t stream = nullptr;
325
376
  CUDA_VERIFY(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
326
377
 
327
378
  deviceStreams.push_back(stream);
@@ -330,7 +381,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
330
381
  alternateStreams_[device] = std::move(deviceStreams);
331
382
 
332
383
  // Create cuBLAS handle
333
- cublasHandle_t blasHandle = 0;
384
+ cublasHandle_t blasHandle = nullptr;
334
385
  auto blasStatus = cublasCreate(&blasHandle);
335
386
  FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
336
387
  blasHandles_[device] = blasHandle;
@@ -348,11 +399,11 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
348
399
  allocs_[device] = std::unordered_map<void*, AllocRequest>();
349
400
 
350
401
  FAISS_ASSERT(tempMemory_.count(device) == 0);
351
- auto mem = std::unique_ptr<StackDeviceMemory>(new StackDeviceMemory(
402
+ auto mem = std::make_unique<StackDeviceMemory>(
352
403
  this,
353
404
  device,
354
405
  // adjust for this specific device
355
- getDefaultTempMemForGPU(device, tempMemSize_)));
406
+ getDefaultTempMemForGPU(device, tempMemSize_));
356
407
 
357
408
  tempMemory_.emplace(device, std::move(mem));
358
409
  }
@@ -375,6 +426,25 @@ cudaStream_t StandardGpuResourcesImpl::getDefaultStream(int device) {
375
426
  return defaultStreams_[device];
376
427
  }
377
428
 
429
+ #if defined USE_NVIDIA_RAFT
430
+ raft::device_resources& StandardGpuResourcesImpl::getRaftHandle(int device) {
431
+ initializeForDevice(device);
432
+
433
+ auto it = raftHandles_.find(device);
434
+ if (it == raftHandles_.end()) {
435
+ // Make sure we are using the stream the user may have already assigned
436
+ // to the current GpuResources
437
+ raftHandles_.emplace(device, getDefaultStream(device));
438
+
439
+ // Initialize cublas handle
440
+ raftHandles_[device].get_cublas_handle();
441
+ }
442
+
443
+ // Otherwise, our base default handle
444
+ return raftHandles_[device];
445
+ }
446
+ #endif
447
+
378
448
  std::vector<cudaStream_t> StandardGpuResourcesImpl::getAlternateStreams(
379
449
  int device) {
380
450
  initializeForDevice(device);
@@ -406,8 +476,6 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
406
476
  void* p = nullptr;
407
477
 
408
478
  if (adjReq.space == MemorySpace::Temporary) {
409
- // If we don't have enough space in our temporary memory manager, we
410
- // need to allocate this request separately
411
479
  auto& tempMem = tempMemory_[adjReq.device];
412
480
 
413
481
  if (adjReq.size > tempMem->getSizeAvailable()) {
@@ -428,15 +496,25 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
428
496
 
429
497
  // Otherwise, we can handle this locally
430
498
  p = tempMemory_[adjReq.device]->allocMemory(adjReq.stream, adjReq.size);
431
-
432
499
  } else if (adjReq.space == MemorySpace::Device) {
500
+ #if defined USE_NVIDIA_RAFT
501
+ try {
502
+ rmm::mr::device_memory_resource* current_mr =
503
+ rmm::mr::get_per_device_resource(
504
+ rmm::cuda_device_id{adjReq.device});
505
+ p = current_mr->allocate_async(adjReq.size, adjReq.stream);
506
+ adjReq.mr = current_mr;
507
+ } catch (const std::bad_alloc& rmm_ex) {
508
+ FAISS_THROW_MSG("CUDA memory allocation error");
509
+ }
510
+ #else
433
511
  auto err = cudaMalloc(&p, adjReq.size);
434
512
 
435
513
  // Throw if we fail to allocate
436
514
  if (err != cudaSuccess) {
437
515
  // FIXME: as of CUDA 11, a memory allocation error appears to be
438
- // presented via cudaGetLastError as well, and needs to be cleared.
439
- // Just call the function to clear it
516
+ // presented via cudaGetLastError as well, and needs to be
517
+ // cleared. Just call the function to clear it
440
518
  cudaGetLastError();
441
519
 
442
520
  std::stringstream ss;
@@ -451,7 +529,20 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
451
529
 
452
530
  FAISS_THROW_IF_NOT_FMT(err == cudaSuccess, "%s", str.c_str());
453
531
  }
532
+ #endif
454
533
  } else if (adjReq.space == MemorySpace::Unified) {
534
+ #if defined USE_NVIDIA_RAFT
535
+ try {
536
+ // for now, use our own managed MR to do Unified Memory allocations.
537
+ // TODO: change this to use the current device resource once RMM has
538
+ // a way to retrieve a "guaranteed" managed memory resource for a
539
+ // device.
540
+ p = mmr_->allocate_async(adjReq.size, adjReq.stream);
541
+ adjReq.mr = mmr_.get();
542
+ } catch (const std::bad_alloc& rmm_ex) {
543
+ FAISS_THROW_MSG("CUDA memory allocation error");
544
+ }
545
+ #else
455
546
  auto err = cudaMallocManaged(&p, adjReq.size);
456
547
 
457
548
  if (err != cudaSuccess) {
@@ -472,6 +563,7 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
472
563
 
473
564
  FAISS_THROW_IF_NOT_FMT(err == cudaSuccess, "%s", str.c_str());
474
565
  }
566
+ #endif
475
567
  } else {
476
568
  FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int)adjReq.space);
477
569
  }
@@ -505,10 +597,12 @@ void StandardGpuResourcesImpl::deallocMemory(int device, void* p) {
505
597
 
506
598
  if (req.space == MemorySpace::Temporary) {
507
599
  tempMemory_[device]->deallocMemory(device, req.stream, req.size, p);
508
-
509
600
  } else if (
510
601
  req.space == MemorySpace::Device ||
511
602
  req.space == MemorySpace::Unified) {
603
+ #if defined USE_NVIDIA_RAFT
604
+ req.mr->deallocate_async(p, req.size, req.stream);
605
+ #else
512
606
  auto err = cudaFree(p);
513
607
  FAISS_ASSERT_FMT(
514
608
  err == cudaSuccess,
@@ -516,7 +610,7 @@ void StandardGpuResourcesImpl::deallocMemory(int device, void* p) {
516
610
  p,
517
611
  (int)err,
518
612
  cudaGetErrorString(err));
519
-
613
+ #endif
520
614
  } else {
521
615
  FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int)req.space);
522
616
  }
@@ -561,7 +655,7 @@ StandardGpuResourcesImpl::getMemoryInfo() const {
561
655
  StandardGpuResources::StandardGpuResources()
562
656
  : res_(new StandardGpuResourcesImpl) {}
563
657
 
564
- StandardGpuResources::~StandardGpuResources() {}
658
+ StandardGpuResources::~StandardGpuResources() = default;
565
659
 
566
660
  std::shared_ptr<GpuResources> StandardGpuResources::getResources() {
567
661
  return res_;
@@ -600,6 +694,12 @@ cudaStream_t StandardGpuResources::getDefaultStream(int device) {
600
694
  return res_->getDefaultStream(device);
601
695
  }
602
696
 
697
+ #if defined USE_NVIDIA_RAFT
698
+ raft::device_resources& StandardGpuResources::getRaftHandle(int device) {
699
+ return res_->getRaftHandle(device);
700
+ }
701
+ #endif
702
+
603
703
  size_t StandardGpuResources::getTempMemoryAvailable(int device) const {
604
704
  return res_->getTempMemoryAvailable(device);
605
705
  }
@@ -4,9 +4,29 @@
4
4
  * This source code is licensed under the MIT license found in the
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
+ /*
8
+ * Copyright (c) 2023, NVIDIA CORPORATION.
9
+ *
10
+ * Licensed under the Apache License, Version 2.0 (the "License");
11
+ * you may not use this file except in compliance with the License.
12
+ * You may obtain a copy of the License at
13
+ *
14
+ * http://www.apache.org/licenses/LICENSE-2.0
15
+ *
16
+ * Unless required by applicable law or agreed to in writing, software
17
+ * distributed under the License is distributed on an "AS IS" BASIS,
18
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ * See the License for the specific language governing permissions and
20
+ * limitations under the License.
21
+ */
7
22
 
8
23
  #pragma once
9
24
 
25
+ #if defined USE_NVIDIA_RAFT
26
+ #include <raft/core/device_resources.hpp>
27
+ #include <rmm/mr/host/pinned_memory_resource.hpp>
28
+ #endif
29
+
10
30
  #include <faiss/gpu/GpuResources.h>
11
31
  #include <faiss/gpu/utils/DeviceUtils.h>
12
32
  #include <faiss/gpu/utils/StackDeviceMemory.h>
@@ -15,6 +35,7 @@
15
35
  #include <unordered_map>
16
36
  #include <vector>
17
37
 
38
+ #pragma GCC visibility push(default)
18
39
  namespace faiss {
19
40
  namespace gpu {
20
41
 
@@ -58,6 +79,12 @@ class StandardGpuResourcesImpl : public GpuResources {
58
79
  /// this stream upon exit from an index or other Faiss GPU call.
59
80
  cudaStream_t getDefaultStream(int device) override;
60
81
 
82
+ #if defined USE_NVIDIA_RAFT
83
+ /// Returns the raft handle for the given device which can be used to
84
+ /// make calls to other raft primitives.
85
+ raft::device_resources& getRaftHandle(int device) override;
86
+ #endif
87
+
61
88
  /// Called to change the work ordering streams to the null stream
62
89
  /// for all devices
63
90
  void setDefaultNullStreamAllDevices();
@@ -92,7 +119,7 @@ class StandardGpuResourcesImpl : public GpuResources {
92
119
 
93
120
  cudaStream_t getAsyncCopyStream(int device) override;
94
121
 
95
- private:
122
+ protected:
96
123
  /// Have GPU resources been initialized for this device yet?
97
124
  bool isInitialized(int device) const;
98
125
 
@@ -100,7 +127,7 @@ class StandardGpuResourcesImpl : public GpuResources {
100
127
  /// memory size
101
128
  static size_t getDefaultTempMemForGPU(int device, size_t requested);
102
129
 
103
- private:
130
+ protected:
104
131
  /// Set of currently outstanding memory allocations per device
105
132
  /// device -> (alloc request, allocated ptr)
106
133
  std::unordered_map<int, std::unordered_map<void*, AllocRequest>> allocs_;
@@ -124,6 +151,27 @@ class StandardGpuResourcesImpl : public GpuResources {
124
151
  /// cuBLAS handle for each device
125
152
  std::unordered_map<int, cublasHandle_t> blasHandles_;
126
153
 
154
+ #if defined USE_NVIDIA_RAFT
155
+ /// raft handle for each device
156
+ std::unordered_map<int, raft::device_resources> raftHandles_;
157
+
158
+ /**
159
+ * FIXME: Integrating these in a separate code path for now. Ultimately,
160
+ * it would be nice if we use a simple memory resource abstraction
161
+ * in FAISS so we could plug in whether to use RMM's memory resources
162
+ * or the default.
163
+ *
164
+ * There's enough duplicated logic that it doesn't *seem* to make sense
165
+ * to create a subclass only for the RMM memory resources.
166
+ */
167
+
168
+ // managed_memory_resource
169
+ std::unique_ptr<rmm::mr::device_memory_resource> mmr_;
170
+
171
+ // pinned_memory_resource
172
+ std::unique_ptr<rmm::mr::host_memory_resource> pmr_;
173
+ #endif
174
+
127
175
  /// Pinned memory allocation for use with this GPU
128
176
  void* pinnedMemAlloc_;
129
177
  size_t pinnedMemAllocSize_;
@@ -183,10 +231,15 @@ class StandardGpuResources : public GpuResourcesProvider {
183
231
  /// Export a description of memory used for Python
184
232
  std::map<int, std::map<std::string, std::pair<int, size_t>>> getMemoryInfo()
185
233
  const;
186
-
187
234
  /// Returns the current default stream
188
235
  cudaStream_t getDefaultStream(int device);
189
236
 
237
+ #if defined USE_NVIDIA_RAFT
238
+ /// Returns the raft handle for the given device which can be used to
239
+ /// make calls to other raft primitives.
240
+ raft::device_resources& getRaftHandle(int device);
241
+ #endif
242
+
190
243
  /// Returns the current amount of temp memory available
191
244
  size_t getTempMemoryAvailable(int device) const;
192
245
 
@@ -203,3 +256,4 @@ class StandardGpuResources : public GpuResourcesProvider {
203
256
 
204
257
  } // namespace gpu
205
258
  } // namespace faiss
259
+ #pragma GCC visibility pop
@@ -42,7 +42,7 @@ int main(int argc, char** argv) {
42
42
 
43
43
  cudaProfilerStop();
44
44
 
45
- auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(nullptr);
45
+ auto seed = FLAGS_seed != -1 ? FLAGS_seed : time(nullptr);
46
46
  printf("using seed %ld\n", seed);
47
47
 
48
48
  std::vector<float> vecs((size_t)FLAGS_num * FLAGS_dim);