faiss 0.2.7 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +9 -2
- data/ext/faiss/index.cpp +1 -1
- data/ext/faiss/index_binary.cpp +2 -2
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +7 -7
- data/vendor/faiss/faiss/AutoTune.h +0 -1
- data/vendor/faiss/faiss/Clustering.cpp +4 -18
- data/vendor/faiss/faiss/Clustering.h +31 -21
- data/vendor/faiss/faiss/IVFlib.cpp +22 -11
- data/vendor/faiss/faiss/Index.cpp +1 -1
- data/vendor/faiss/faiss/Index.h +20 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
- data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
- data/vendor/faiss/faiss/IndexBinary.h +8 -19
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +106 -187
- data/vendor/faiss/faiss/IndexFastScan.cpp +90 -159
- data/vendor/faiss/faiss/IndexFastScan.h +9 -8
- data/vendor/faiss/faiss/IndexFlat.cpp +195 -3
- data/vendor/faiss/faiss/IndexFlat.h +20 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +11 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +3 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +112 -316
- data/vendor/faiss/faiss/IndexHNSW.h +12 -48
- data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
- data/vendor/faiss/faiss/IndexIDMap.h +24 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +159 -53
- data/vendor/faiss/faiss/IndexIVF.h +37 -5
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +18 -26
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -2
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +433 -405
- data/vendor/faiss/faiss/IndexIVFFastScan.h +56 -26
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
- data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +78 -122
- data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +18 -50
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
- data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
- data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -4
- data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
- data/vendor/faiss/faiss/IndexNSG.h +10 -10
- data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
- data/vendor/faiss/faiss/IndexPQ.h +1 -4
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
- data/vendor/faiss/faiss/IndexRefine.cpp +49 -19
- data/vendor/faiss/faiss/IndexRefine.h +7 -0
- data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +22 -16
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
- data/vendor/faiss/faiss/IndexShards.cpp +21 -29
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
- data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
- data/vendor/faiss/faiss/MatrixStats.h +21 -9
- data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
- data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
- data/vendor/faiss/faiss/VectorTransform.h +7 -7
- data/vendor/faiss/faiss/clone_index.cpp +15 -10
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +87 -4
- data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +7 -0
- data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
- data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +8 -9
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +18 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
- data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +117 -17
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +267 -40
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -2
- data/vendor/faiss/faiss/impl/DistanceComputer.h +24 -1
- data/vendor/faiss/faiss/impl/FaissException.h +13 -34
- data/vendor/faiss/faiss/impl/HNSW.cpp +321 -70
- data/vendor/faiss/faiss/impl/HNSW.h +9 -8
- data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/NNDescent.cpp +29 -19
- data/vendor/faiss/faiss/impl/NSG.h +1 -1
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +24 -22
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
- data/vendor/faiss/faiss/impl/ResultHandler.h +232 -176
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +444 -104
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -8
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +280 -42
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
- data/vendor/faiss/faiss/impl/index_read.cpp +45 -19
- data/vendor/faiss/faiss/impl/index_write.cpp +60 -41
- data/vendor/faiss/faiss/impl/io.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +18 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +3 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +40 -49
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +374 -202
- data/vendor/faiss/faiss/index_factory.cpp +10 -7
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +27 -9
- data/vendor/faiss/faiss/invlists/InvertedLists.h +12 -3
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
- data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
- data/vendor/faiss/faiss/utils/distances.cpp +128 -74
- data/vendor/faiss/faiss/utils/distances.h +81 -4
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
- data/vendor/faiss/faiss/utils/distances_simd.cpp +428 -70
- data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
- data/vendor/faiss/faiss/utils/fp16.h +2 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +162 -110
- data/vendor/faiss/faiss/utils/hamming.h +58 -0
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +15 -87
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +57 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
- data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
- data/vendor/faiss/faiss/utils/prefetch.h +77 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
- data/vendor/faiss/faiss/utils/simdlib_neon.h +72 -77
- data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
- data/vendor/faiss/faiss/utils/sorting.h +27 -0
- data/vendor/faiss/faiss/utils/utils.cpp +112 -6
- data/vendor/faiss/faiss/utils/utils.h +57 -20
- metadata +11 -4
@@ -23,24 +23,19 @@ class GpuIndexFlat;
|
|
23
23
|
class IVFPQ;
|
24
24
|
|
25
25
|
struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
|
26
|
-
inline GpuIndexIVFPQConfig()
|
27
|
-
: useFloat16LookupTables(false),
|
28
|
-
usePrecomputedTables(false),
|
29
|
-
interleavedLayout(false),
|
30
|
-
useMMCodeDistance(false) {}
|
31
|
-
|
32
26
|
/// Whether or not float16 residual distance tables are used in the
|
33
27
|
/// list scanning kernels. When subQuantizers * 2^bitsPerCode >
|
34
28
|
/// 16384, this is required.
|
35
|
-
bool useFloat16LookupTables;
|
29
|
+
bool useFloat16LookupTables = false;
|
36
30
|
|
37
31
|
/// Whether or not we enable the precomputed table option for
|
38
32
|
/// search, which can substantially increase the memory requirement.
|
39
|
-
bool usePrecomputedTables;
|
33
|
+
bool usePrecomputedTables = false;
|
40
34
|
|
41
35
|
/// Use the alternative memory layout for the IVF lists
|
42
|
-
/// WARNING: this is a feature under development,
|
43
|
-
|
36
|
+
/// WARNING: this is a feature under development, and is only supported with
|
37
|
+
/// RAFT enabled for the index. Do not use if RAFT is not enabled.
|
38
|
+
bool interleavedLayout = false;
|
44
39
|
|
45
40
|
/// Use GEMM-backed computation of PQ code distances for the no precomputed
|
46
41
|
/// table version of IVFPQ.
|
@@ -50,7 +45,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
|
|
50
45
|
/// Note that MM code distance is enabled automatically if one uses a number
|
51
46
|
/// of dimensions per sub-quantizer that is not natively specialized (an odd
|
52
47
|
/// number like 7 or so).
|
53
|
-
bool useMMCodeDistance;
|
48
|
+
bool useMMCodeDistance = false;
|
54
49
|
};
|
55
50
|
|
56
51
|
/// IVFPQ index for the GPU
|
@@ -139,6 +134,22 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
|
|
139
134
|
ProductQuantizer pq;
|
140
135
|
|
141
136
|
protected:
|
137
|
+
/// Initialize appropriate index
|
138
|
+
void setIndex_(
|
139
|
+
GpuResources* resources,
|
140
|
+
int dim,
|
141
|
+
idx_t nlist,
|
142
|
+
faiss::MetricType metric,
|
143
|
+
float metricArg,
|
144
|
+
int numSubQuantizers,
|
145
|
+
int bitsPerSubQuantizer,
|
146
|
+
bool useFloat16LookupTables,
|
147
|
+
bool useMMCodeDistance,
|
148
|
+
bool interleavedLayout,
|
149
|
+
float* pqCentroidData,
|
150
|
+
IndicesOptions indicesOptions,
|
151
|
+
MemorySpace space);
|
152
|
+
|
142
153
|
/// Throws errors if configuration settings are improper
|
143
154
|
void verifyPQSettings_() const;
|
144
155
|
|
@@ -18,11 +18,9 @@ class IVFFlat;
|
|
18
18
|
class GpuIndexFlat;
|
19
19
|
|
20
20
|
struct GpuIndexIVFScalarQuantizerConfig : public GpuIndexIVFConfig {
|
21
|
-
inline GpuIndexIVFScalarQuantizerConfig() : interleavedLayout(true) {}
|
22
|
-
|
23
21
|
/// Use the alternative memory layout for the IVF lists
|
24
22
|
/// (currently the default)
|
25
|
-
bool interleavedLayout;
|
23
|
+
bool interleavedLayout = true;
|
26
24
|
};
|
27
25
|
|
28
26
|
/// Wrapper around the GPU implementation that looks like
|
@@ -4,6 +4,21 @@
|
|
4
4
|
* This source code is licensed under the MIT license found in the
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
6
6
|
*/
|
7
|
+
/*
|
8
|
+
* Copyright (c) 2023, NVIDIA CORPORATION.
|
9
|
+
*
|
10
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
11
|
+
* you may not use this file except in compliance with the License.
|
12
|
+
* You may obtain a copy of the License at
|
13
|
+
*
|
14
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
15
|
+
*
|
16
|
+
* Unless required by applicable law or agreed to in writing, software
|
17
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
18
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
19
|
+
* See the License for the specific language governing permissions and
|
20
|
+
* limitations under the License.
|
21
|
+
*/
|
7
22
|
|
8
23
|
#include <faiss/gpu/GpuResources.h>
|
9
24
|
#include <faiss/gpu/utils/DeviceUtils.h>
|
@@ -143,7 +158,7 @@ GpuMemoryReservation::~GpuMemoryReservation() {
|
|
143
158
|
// GpuResources
|
144
159
|
//
|
145
160
|
|
146
|
-
GpuResources::~GpuResources()
|
161
|
+
GpuResources::~GpuResources() = default;
|
147
162
|
|
148
163
|
cublasHandle_t GpuResources::getBlasHandleCurrentDevice() {
|
149
164
|
return getBlasHandle(getCurrentDevice());
|
@@ -153,6 +168,12 @@ cudaStream_t GpuResources::getDefaultStreamCurrentDevice() {
|
|
153
168
|
return getDefaultStream(getCurrentDevice());
|
154
169
|
}
|
155
170
|
|
171
|
+
#if defined USE_NVIDIA_RAFT
|
172
|
+
raft::device_resources& GpuResources::getRaftHandleCurrentDevice() {
|
173
|
+
return getRaftHandle(getCurrentDevice());
|
174
|
+
}
|
175
|
+
#endif
|
176
|
+
|
156
177
|
std::vector<cudaStream_t> GpuResources::getAlternateStreamsCurrentDevice() {
|
157
178
|
return getAlternateStreams(getCurrentDevice());
|
158
179
|
}
|
@@ -182,7 +203,7 @@ size_t GpuResources::getTempMemoryAvailableCurrentDevice() const {
|
|
182
203
|
// GpuResourcesProvider
|
183
204
|
//
|
184
205
|
|
185
|
-
GpuResourcesProvider::~GpuResourcesProvider()
|
206
|
+
GpuResourcesProvider::~GpuResourcesProvider() = default;
|
186
207
|
|
187
208
|
//
|
188
209
|
// GpuResourcesProviderFromResourceInstance
|
@@ -192,7 +213,7 @@ GpuResourcesProviderFromInstance::GpuResourcesProviderFromInstance(
|
|
192
213
|
std::shared_ptr<GpuResources> p)
|
193
214
|
: res_(p) {}
|
194
215
|
|
195
|
-
GpuResourcesProviderFromInstance::~GpuResourcesProviderFromInstance()
|
216
|
+
GpuResourcesProviderFromInstance::~GpuResourcesProviderFromInstance() = default;
|
196
217
|
|
197
218
|
std::shared_ptr<GpuResources> GpuResourcesProviderFromInstance::getResources() {
|
198
219
|
return res_;
|
@@ -4,16 +4,37 @@
|
|
4
4
|
* This source code is licensed under the MIT license found in the
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
6
6
|
*/
|
7
|
+
/*
|
8
|
+
* Copyright (c) 2023, NVIDIA CORPORATION.
|
9
|
+
*
|
10
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
11
|
+
* you may not use this file except in compliance with the License.
|
12
|
+
* You may obtain a copy of the License at
|
13
|
+
*
|
14
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
15
|
+
*
|
16
|
+
* Unless required by applicable law or agreed to in writing, software
|
17
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
18
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
19
|
+
* See the License for the specific language governing permissions and
|
20
|
+
* limitations under the License.
|
21
|
+
*/
|
7
22
|
|
8
23
|
#pragma once
|
9
24
|
|
10
25
|
#include <cublas_v2.h>
|
11
26
|
#include <cuda_runtime.h>
|
12
27
|
#include <faiss/impl/FaissAssert.h>
|
28
|
+
|
13
29
|
#include <memory>
|
14
30
|
#include <utility>
|
15
31
|
#include <vector>
|
16
32
|
|
33
|
+
#if defined USE_NVIDIA_RAFT
|
34
|
+
#include <raft/core/device_resources.hpp>
|
35
|
+
#include <rmm/mr/device/device_memory_resource.hpp>
|
36
|
+
#endif
|
37
|
+
|
17
38
|
namespace faiss {
|
18
39
|
namespace gpu {
|
19
40
|
|
@@ -82,11 +103,7 @@ std::string memorySpaceToString(MemorySpace s);
|
|
82
103
|
|
83
104
|
/// Information on what/where an allocation is
|
84
105
|
struct AllocInfo {
|
85
|
-
inline AllocInfo()
|
86
|
-
: type(AllocType::Other),
|
87
|
-
device(0),
|
88
|
-
space(MemorySpace::Device),
|
89
|
-
stream(nullptr) {}
|
106
|
+
inline AllocInfo() {}
|
90
107
|
|
91
108
|
inline AllocInfo(AllocType at, int dev, MemorySpace sp, cudaStream_t st)
|
92
109
|
: type(at), device(dev), space(sp), stream(st) {}
|
@@ -95,13 +112,13 @@ struct AllocInfo {
|
|
95
112
|
std::string toString() const;
|
96
113
|
|
97
114
|
/// The internal category of the allocation
|
98
|
-
AllocType type;
|
115
|
+
AllocType type = AllocType::Other;
|
99
116
|
|
100
117
|
/// The device on which the allocation is happening
|
101
|
-
int device;
|
118
|
+
int device = 0;
|
102
119
|
|
103
120
|
/// The memory space of the allocation
|
104
|
-
MemorySpace space;
|
121
|
+
MemorySpace space = MemorySpace::Device;
|
105
122
|
|
106
123
|
/// The stream on which new work on the memory will be ordered (e.g., if a
|
107
124
|
/// piece of memory cached and to be returned for this call was last used on
|
@@ -111,7 +128,7 @@ struct AllocInfo {
|
|
111
128
|
///
|
112
129
|
/// The memory manager guarantees that the returned memory is free to use
|
113
130
|
/// without data races on this stream specified.
|
114
|
-
cudaStream_t stream;
|
131
|
+
cudaStream_t stream = nullptr;
|
115
132
|
};
|
116
133
|
|
117
134
|
/// Create an AllocInfo for the current device with MemorySpace::Device
|
@@ -125,7 +142,7 @@ AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st);
|
|
125
142
|
|
126
143
|
/// Information on what/where an allocation is, along with how big it should be
|
127
144
|
struct AllocRequest : public AllocInfo {
|
128
|
-
inline AllocRequest()
|
145
|
+
inline AllocRequest() {}
|
129
146
|
|
130
147
|
inline AllocRequest(const AllocInfo& info, size_t sz)
|
131
148
|
: AllocInfo(info), size(sz) {}
|
@@ -142,7 +159,11 @@ struct AllocRequest : public AllocInfo {
|
|
142
159
|
std::string toString() const;
|
143
160
|
|
144
161
|
/// The size in bytes of the allocation
|
145
|
-
size_t size;
|
162
|
+
size_t size = 0;
|
163
|
+
|
164
|
+
#if defined USE_NVIDIA_RAFT
|
165
|
+
rmm::mr::device_memory_resource* mr = nullptr;
|
166
|
+
#endif
|
146
167
|
};
|
147
168
|
|
148
169
|
/// A RAII object that manages a temporary memory request
|
@@ -190,6 +211,13 @@ class GpuResources {
|
|
190
211
|
/// given device
|
191
212
|
virtual cudaStream_t getDefaultStream(int device) = 0;
|
192
213
|
|
214
|
+
#if defined USE_NVIDIA_RAFT
|
215
|
+
/// Returns the raft handle for the given device which can be used to
|
216
|
+
/// make calls to other raft primitives.
|
217
|
+
virtual raft::device_resources& getRaftHandle(int device) = 0;
|
218
|
+
raft::device_resources& getRaftHandleCurrentDevice();
|
219
|
+
#endif
|
220
|
+
|
193
221
|
/// Overrides the default stream for a device to the user-supplied stream.
|
194
222
|
/// The resources object does not own this stream (i.e., it will not destroy
|
195
223
|
/// it).
|
@@ -4,6 +4,29 @@
|
|
4
4
|
* This source code is licensed under the MIT license found in the
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
6
6
|
*/
|
7
|
+
/*
|
8
|
+
* Copyright (c) 2023, NVIDIA CORPORATION.
|
9
|
+
*
|
10
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
11
|
+
* you may not use this file except in compliance with the License.
|
12
|
+
* You may obtain a copy of the License at
|
13
|
+
*
|
14
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
15
|
+
*
|
16
|
+
* Unless required by applicable law or agreed to in writing, software
|
17
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
18
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
19
|
+
* See the License for the specific language governing permissions and
|
20
|
+
* limitations under the License.
|
21
|
+
*/
|
22
|
+
|
23
|
+
#if defined USE_NVIDIA_RAFT
|
24
|
+
#include <raft/core/device_resources.hpp>
|
25
|
+
#include <rmm/mr/device/managed_memory_resource.hpp>
|
26
|
+
#include <rmm/mr/device/per_device_resource.hpp>
|
27
|
+
#include <rmm/mr/host/pinned_memory_resource.hpp>
|
28
|
+
#include <memory>
|
29
|
+
#endif
|
7
30
|
|
8
31
|
#include <faiss/gpu/StandardGpuResources.h>
|
9
32
|
#include <faiss/gpu/utils/DeviceUtils.h>
|
@@ -66,7 +89,12 @@ std::string allocsToString(const std::unordered_map<void*, AllocRequest>& map) {
|
|
66
89
|
//
|
67
90
|
|
68
91
|
StandardGpuResourcesImpl::StandardGpuResourcesImpl()
|
69
|
-
:
|
92
|
+
:
|
93
|
+
#if defined USE_NVIDIA_RAFT
|
94
|
+
mmr_(new rmm::mr::managed_memory_resource),
|
95
|
+
pmr_(new rmm::mr::pinned_memory_resource),
|
96
|
+
#endif
|
97
|
+
pinnedMemAlloc_(nullptr),
|
70
98
|
pinnedMemAllocSize_(0),
|
71
99
|
// let the adjustment function determine the memory size for us by
|
72
100
|
// passing in a huge value that will then be adjusted
|
@@ -74,7 +102,8 @@ StandardGpuResourcesImpl::StandardGpuResourcesImpl()
|
|
74
102
|
-1,
|
75
103
|
std::numeric_limits<size_t>::max())),
|
76
104
|
pinnedMemSize_(kDefaultPinnedMemoryAllocation),
|
77
|
-
allocLogging_(false) {
|
105
|
+
allocLogging_(false) {
|
106
|
+
}
|
78
107
|
|
79
108
|
StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
|
80
109
|
// The temporary memory allocator has allocated memory through us, so clean
|
@@ -129,6 +158,9 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
|
|
129
158
|
}
|
130
159
|
|
131
160
|
if (pinnedMemAlloc_) {
|
161
|
+
#if defined USE_NVIDIA_RAFT
|
162
|
+
pmr_->deallocate(pinnedMemAlloc_, pinnedMemAllocSize_);
|
163
|
+
#else
|
132
164
|
auto err = cudaFreeHost(pinnedMemAlloc_);
|
133
165
|
FAISS_ASSERT_FMT(
|
134
166
|
err == cudaSuccess,
|
@@ -136,6 +168,7 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
|
|
136
168
|
pinnedMemAlloc_,
|
137
169
|
(int)err,
|
138
170
|
cudaGetErrorString(err));
|
171
|
+
#endif
|
139
172
|
}
|
140
173
|
}
|
141
174
|
|
@@ -187,11 +220,11 @@ void StandardGpuResourcesImpl::setTempMemory(size_t size) {
|
|
187
220
|
p.second.reset();
|
188
221
|
|
189
222
|
// Allocate new
|
190
|
-
p.second = std::
|
223
|
+
p.second = std::make_unique<StackDeviceMemory>(
|
191
224
|
this,
|
192
225
|
p.first,
|
193
226
|
// adjust for this specific device
|
194
|
-
getDefaultTempMemForGPU(device, tempMemSize_))
|
227
|
+
getDefaultTempMemForGPU(device, tempMemSize_));
|
195
228
|
}
|
196
229
|
}
|
197
230
|
}
|
@@ -274,6 +307,19 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
|
|
274
307
|
// If this is the first device that we're initializing, create our
|
275
308
|
// pinned memory allocation
|
276
309
|
if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
|
310
|
+
#if defined USE_NVIDIA_RAFT
|
311
|
+
// If this is the first device that we're initializing, create our
|
312
|
+
// pinned memory allocation
|
313
|
+
if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
|
314
|
+
try {
|
315
|
+
pinnedMemAlloc_ = pmr_->allocate(pinnedMemSize_);
|
316
|
+
} catch (const std::bad_alloc& rmm_ex) {
|
317
|
+
FAISS_THROW_MSG("CUDA memory allocation error");
|
318
|
+
}
|
319
|
+
|
320
|
+
pinnedMemAllocSize_ = pinnedMemSize_;
|
321
|
+
}
|
322
|
+
#else
|
277
323
|
auto err = cudaHostAlloc(
|
278
324
|
&pinnedMemAlloc_, pinnedMemSize_, cudaHostAllocDefault);
|
279
325
|
|
@@ -286,6 +332,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
|
|
286
332
|
cudaGetErrorString(err));
|
287
333
|
|
288
334
|
pinnedMemAllocSize_ = pinnedMemSize_;
|
335
|
+
#endif
|
289
336
|
}
|
290
337
|
|
291
338
|
// Make sure that device properties for all devices are cached
|
@@ -307,12 +354,16 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
|
|
307
354
|
device);
|
308
355
|
|
309
356
|
// Create streams
|
310
|
-
cudaStream_t defaultStream =
|
357
|
+
cudaStream_t defaultStream = nullptr;
|
311
358
|
CUDA_VERIFY(
|
312
359
|
cudaStreamCreateWithFlags(&defaultStream, cudaStreamNonBlocking));
|
313
360
|
|
314
361
|
defaultStreams_[device] = defaultStream;
|
315
362
|
|
363
|
+
#if defined USE_NVIDIA_RAFT
|
364
|
+
raftHandles_.emplace(std::make_pair(device, defaultStream));
|
365
|
+
#endif
|
366
|
+
|
316
367
|
cudaStream_t asyncCopyStream = 0;
|
317
368
|
CUDA_VERIFY(
|
318
369
|
cudaStreamCreateWithFlags(&asyncCopyStream, cudaStreamNonBlocking));
|
@@ -321,7 +372,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
|
|
321
372
|
|
322
373
|
std::vector<cudaStream_t> deviceStreams;
|
323
374
|
for (int j = 0; j < kNumStreams; ++j) {
|
324
|
-
cudaStream_t stream =
|
375
|
+
cudaStream_t stream = nullptr;
|
325
376
|
CUDA_VERIFY(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
|
326
377
|
|
327
378
|
deviceStreams.push_back(stream);
|
@@ -330,7 +381,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
|
|
330
381
|
alternateStreams_[device] = std::move(deviceStreams);
|
331
382
|
|
332
383
|
// Create cuBLAS handle
|
333
|
-
cublasHandle_t blasHandle =
|
384
|
+
cublasHandle_t blasHandle = nullptr;
|
334
385
|
auto blasStatus = cublasCreate(&blasHandle);
|
335
386
|
FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
|
336
387
|
blasHandles_[device] = blasHandle;
|
@@ -348,11 +399,11 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
|
|
348
399
|
allocs_[device] = std::unordered_map<void*, AllocRequest>();
|
349
400
|
|
350
401
|
FAISS_ASSERT(tempMemory_.count(device) == 0);
|
351
|
-
auto mem = std::
|
402
|
+
auto mem = std::make_unique<StackDeviceMemory>(
|
352
403
|
this,
|
353
404
|
device,
|
354
405
|
// adjust for this specific device
|
355
|
-
getDefaultTempMemForGPU(device, tempMemSize_))
|
406
|
+
getDefaultTempMemForGPU(device, tempMemSize_));
|
356
407
|
|
357
408
|
tempMemory_.emplace(device, std::move(mem));
|
358
409
|
}
|
@@ -375,6 +426,25 @@ cudaStream_t StandardGpuResourcesImpl::getDefaultStream(int device) {
|
|
375
426
|
return defaultStreams_[device];
|
376
427
|
}
|
377
428
|
|
429
|
+
#if defined USE_NVIDIA_RAFT
|
430
|
+
raft::device_resources& StandardGpuResourcesImpl::getRaftHandle(int device) {
|
431
|
+
initializeForDevice(device);
|
432
|
+
|
433
|
+
auto it = raftHandles_.find(device);
|
434
|
+
if (it == raftHandles_.end()) {
|
435
|
+
// Make sure we are using the stream the user may have already assigned
|
436
|
+
// to the current GpuResources
|
437
|
+
raftHandles_.emplace(device, getDefaultStream(device));
|
438
|
+
|
439
|
+
// Initialize cublas handle
|
440
|
+
raftHandles_[device].get_cublas_handle();
|
441
|
+
}
|
442
|
+
|
443
|
+
// Otherwise, our base default handle
|
444
|
+
return raftHandles_[device];
|
445
|
+
}
|
446
|
+
#endif
|
447
|
+
|
378
448
|
std::vector<cudaStream_t> StandardGpuResourcesImpl::getAlternateStreams(
|
379
449
|
int device) {
|
380
450
|
initializeForDevice(device);
|
@@ -406,8 +476,6 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
|
|
406
476
|
void* p = nullptr;
|
407
477
|
|
408
478
|
if (adjReq.space == MemorySpace::Temporary) {
|
409
|
-
// If we don't have enough space in our temporary memory manager, we
|
410
|
-
// need to allocate this request separately
|
411
479
|
auto& tempMem = tempMemory_[adjReq.device];
|
412
480
|
|
413
481
|
if (adjReq.size > tempMem->getSizeAvailable()) {
|
@@ -428,15 +496,25 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
|
|
428
496
|
|
429
497
|
// Otherwise, we can handle this locally
|
430
498
|
p = tempMemory_[adjReq.device]->allocMemory(adjReq.stream, adjReq.size);
|
431
|
-
|
432
499
|
} else if (adjReq.space == MemorySpace::Device) {
|
500
|
+
#if defined USE_NVIDIA_RAFT
|
501
|
+
try {
|
502
|
+
rmm::mr::device_memory_resource* current_mr =
|
503
|
+
rmm::mr::get_per_device_resource(
|
504
|
+
rmm::cuda_device_id{adjReq.device});
|
505
|
+
p = current_mr->allocate_async(adjReq.size, adjReq.stream);
|
506
|
+
adjReq.mr = current_mr;
|
507
|
+
} catch (const std::bad_alloc& rmm_ex) {
|
508
|
+
FAISS_THROW_MSG("CUDA memory allocation error");
|
509
|
+
}
|
510
|
+
#else
|
433
511
|
auto err = cudaMalloc(&p, adjReq.size);
|
434
512
|
|
435
513
|
// Throw if we fail to allocate
|
436
514
|
if (err != cudaSuccess) {
|
437
515
|
// FIXME: as of CUDA 11, a memory allocation error appears to be
|
438
|
-
// presented via cudaGetLastError as well, and needs to be
|
439
|
-
// Just call the function to clear it
|
516
|
+
// presented via cudaGetLastError as well, and needs to be
|
517
|
+
// cleared. Just call the function to clear it
|
440
518
|
cudaGetLastError();
|
441
519
|
|
442
520
|
std::stringstream ss;
|
@@ -451,7 +529,20 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
|
|
451
529
|
|
452
530
|
FAISS_THROW_IF_NOT_FMT(err == cudaSuccess, "%s", str.c_str());
|
453
531
|
}
|
532
|
+
#endif
|
454
533
|
} else if (adjReq.space == MemorySpace::Unified) {
|
534
|
+
#if defined USE_NVIDIA_RAFT
|
535
|
+
try {
|
536
|
+
// for now, use our own managed MR to do Unified Memory allocations.
|
537
|
+
// TODO: change this to use the current device resource once RMM has
|
538
|
+
// a way to retrieve a "guaranteed" managed memory resource for a
|
539
|
+
// device.
|
540
|
+
p = mmr_->allocate_async(adjReq.size, adjReq.stream);
|
541
|
+
adjReq.mr = mmr_.get();
|
542
|
+
} catch (const std::bad_alloc& rmm_ex) {
|
543
|
+
FAISS_THROW_MSG("CUDA memory allocation error");
|
544
|
+
}
|
545
|
+
#else
|
455
546
|
auto err = cudaMallocManaged(&p, adjReq.size);
|
456
547
|
|
457
548
|
if (err != cudaSuccess) {
|
@@ -472,6 +563,7 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
|
|
472
563
|
|
473
564
|
FAISS_THROW_IF_NOT_FMT(err == cudaSuccess, "%s", str.c_str());
|
474
565
|
}
|
566
|
+
#endif
|
475
567
|
} else {
|
476
568
|
FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int)adjReq.space);
|
477
569
|
}
|
@@ -505,10 +597,12 @@ void StandardGpuResourcesImpl::deallocMemory(int device, void* p) {
|
|
505
597
|
|
506
598
|
if (req.space == MemorySpace::Temporary) {
|
507
599
|
tempMemory_[device]->deallocMemory(device, req.stream, req.size, p);
|
508
|
-
|
509
600
|
} else if (
|
510
601
|
req.space == MemorySpace::Device ||
|
511
602
|
req.space == MemorySpace::Unified) {
|
603
|
+
#if defined USE_NVIDIA_RAFT
|
604
|
+
req.mr->deallocate_async(p, req.size, req.stream);
|
605
|
+
#else
|
512
606
|
auto err = cudaFree(p);
|
513
607
|
FAISS_ASSERT_FMT(
|
514
608
|
err == cudaSuccess,
|
@@ -516,7 +610,7 @@ void StandardGpuResourcesImpl::deallocMemory(int device, void* p) {
|
|
516
610
|
p,
|
517
611
|
(int)err,
|
518
612
|
cudaGetErrorString(err));
|
519
|
-
|
613
|
+
#endif
|
520
614
|
} else {
|
521
615
|
FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int)req.space);
|
522
616
|
}
|
@@ -561,7 +655,7 @@ StandardGpuResourcesImpl::getMemoryInfo() const {
|
|
561
655
|
StandardGpuResources::StandardGpuResources()
|
562
656
|
: res_(new StandardGpuResourcesImpl) {}
|
563
657
|
|
564
|
-
StandardGpuResources::~StandardGpuResources()
|
658
|
+
StandardGpuResources::~StandardGpuResources() = default;
|
565
659
|
|
566
660
|
std::shared_ptr<GpuResources> StandardGpuResources::getResources() {
|
567
661
|
return res_;
|
@@ -600,6 +694,12 @@ cudaStream_t StandardGpuResources::getDefaultStream(int device) {
|
|
600
694
|
return res_->getDefaultStream(device);
|
601
695
|
}
|
602
696
|
|
697
|
+
#if defined USE_NVIDIA_RAFT
|
698
|
+
raft::device_resources& StandardGpuResources::getRaftHandle(int device) {
|
699
|
+
return res_->getRaftHandle(device);
|
700
|
+
}
|
701
|
+
#endif
|
702
|
+
|
603
703
|
size_t StandardGpuResources::getTempMemoryAvailable(int device) const {
|
604
704
|
return res_->getTempMemoryAvailable(device);
|
605
705
|
}
|
@@ -4,9 +4,29 @@
|
|
4
4
|
* This source code is licensed under the MIT license found in the
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
6
6
|
*/
|
7
|
+
/*
|
8
|
+
* Copyright (c) 2023, NVIDIA CORPORATION.
|
9
|
+
*
|
10
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
11
|
+
* you may not use this file except in compliance with the License.
|
12
|
+
* You may obtain a copy of the License at
|
13
|
+
*
|
14
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
15
|
+
*
|
16
|
+
* Unless required by applicable law or agreed to in writing, software
|
17
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
18
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
19
|
+
* See the License for the specific language governing permissions and
|
20
|
+
* limitations under the License.
|
21
|
+
*/
|
7
22
|
|
8
23
|
#pragma once
|
9
24
|
|
25
|
+
#if defined USE_NVIDIA_RAFT
|
26
|
+
#include <raft/core/device_resources.hpp>
|
27
|
+
#include <rmm/mr/host/pinned_memory_resource.hpp>
|
28
|
+
#endif
|
29
|
+
|
10
30
|
#include <faiss/gpu/GpuResources.h>
|
11
31
|
#include <faiss/gpu/utils/DeviceUtils.h>
|
12
32
|
#include <faiss/gpu/utils/StackDeviceMemory.h>
|
@@ -15,6 +35,7 @@
|
|
15
35
|
#include <unordered_map>
|
16
36
|
#include <vector>
|
17
37
|
|
38
|
+
#pragma GCC visibility push(default)
|
18
39
|
namespace faiss {
|
19
40
|
namespace gpu {
|
20
41
|
|
@@ -58,6 +79,12 @@ class StandardGpuResourcesImpl : public GpuResources {
|
|
58
79
|
/// this stream upon exit from an index or other Faiss GPU call.
|
59
80
|
cudaStream_t getDefaultStream(int device) override;
|
60
81
|
|
82
|
+
#if defined USE_NVIDIA_RAFT
|
83
|
+
/// Returns the raft handle for the given device which can be used to
|
84
|
+
/// make calls to other raft primitives.
|
85
|
+
raft::device_resources& getRaftHandle(int device) override;
|
86
|
+
#endif
|
87
|
+
|
61
88
|
/// Called to change the work ordering streams to the null stream
|
62
89
|
/// for all devices
|
63
90
|
void setDefaultNullStreamAllDevices();
|
@@ -92,7 +119,7 @@ class StandardGpuResourcesImpl : public GpuResources {
|
|
92
119
|
|
93
120
|
cudaStream_t getAsyncCopyStream(int device) override;
|
94
121
|
|
95
|
-
|
122
|
+
protected:
|
96
123
|
/// Have GPU resources been initialized for this device yet?
|
97
124
|
bool isInitialized(int device) const;
|
98
125
|
|
@@ -100,7 +127,7 @@ class StandardGpuResourcesImpl : public GpuResources {
|
|
100
127
|
/// memory size
|
101
128
|
static size_t getDefaultTempMemForGPU(int device, size_t requested);
|
102
129
|
|
103
|
-
|
130
|
+
protected:
|
104
131
|
/// Set of currently outstanding memory allocations per device
|
105
132
|
/// device -> (alloc request, allocated ptr)
|
106
133
|
std::unordered_map<int, std::unordered_map<void*, AllocRequest>> allocs_;
|
@@ -124,6 +151,27 @@ class StandardGpuResourcesImpl : public GpuResources {
|
|
124
151
|
/// cuBLAS handle for each device
|
125
152
|
std::unordered_map<int, cublasHandle_t> blasHandles_;
|
126
153
|
|
154
|
+
#if defined USE_NVIDIA_RAFT
|
155
|
+
/// raft handle for each device
|
156
|
+
std::unordered_map<int, raft::device_resources> raftHandles_;
|
157
|
+
|
158
|
+
/**
|
159
|
+
* FIXME: Integrating these in a separate code path for now. Ultimately,
|
160
|
+
* it would be nice if we use a simple memory resource abstraction
|
161
|
+
* in FAISS so we could plug in whether to use RMM's memory resources
|
162
|
+
* or the default.
|
163
|
+
*
|
164
|
+
* There's enough duplicated logic that it doesn't *seem* to make sense
|
165
|
+
* to create a subclass only for the RMM memory resources.
|
166
|
+
*/
|
167
|
+
|
168
|
+
// managed_memory_resource
|
169
|
+
std::unique_ptr<rmm::mr::device_memory_resource> mmr_;
|
170
|
+
|
171
|
+
// pinned_memory_resource
|
172
|
+
std::unique_ptr<rmm::mr::host_memory_resource> pmr_;
|
173
|
+
#endif
|
174
|
+
|
127
175
|
/// Pinned memory allocation for use with this GPU
|
128
176
|
void* pinnedMemAlloc_;
|
129
177
|
size_t pinnedMemAllocSize_;
|
@@ -183,10 +231,15 @@ class StandardGpuResources : public GpuResourcesProvider {
|
|
183
231
|
/// Export a description of memory used for Python
|
184
232
|
std::map<int, std::map<std::string, std::pair<int, size_t>>> getMemoryInfo()
|
185
233
|
const;
|
186
|
-
|
187
234
|
/// Returns the current default stream
|
188
235
|
cudaStream_t getDefaultStream(int device);
|
189
236
|
|
237
|
+
#if defined USE_NVIDIA_RAFT
|
238
|
+
/// Returns the raft handle for the given device which can be used to
|
239
|
+
/// make calls to other raft primitives.
|
240
|
+
raft::device_resources& getRaftHandle(int device);
|
241
|
+
#endif
|
242
|
+
|
190
243
|
/// Returns the current amount of temp memory available
|
191
244
|
size_t getTempMemoryAvailable(int device) const;
|
192
245
|
|
@@ -203,3 +256,4 @@ class StandardGpuResources : public GpuResourcesProvider {
|
|
203
256
|
|
204
257
|
} // namespace gpu
|
205
258
|
} // namespace faiss
|
259
|
+
#pragma GCC visibility pop
|
@@ -42,7 +42,7 @@ int main(int argc, char** argv) {
|
|
42
42
|
|
43
43
|
cudaProfilerStop();
|
44
44
|
|
45
|
-
auto seed = FLAGS_seed != -
|
45
|
+
auto seed = FLAGS_seed != -1 ? FLAGS_seed : time(nullptr);
|
46
46
|
printf("using seed %ld\n", seed);
|
47
47
|
|
48
48
|
std::vector<float> vecs((size_t)FLAGS_num * FLAGS_dim);
|