faiss 0.2.7 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +9 -2
- data/ext/faiss/index.cpp +1 -1
- data/ext/faiss/index_binary.cpp +2 -2
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +7 -7
- data/vendor/faiss/faiss/AutoTune.h +0 -1
- data/vendor/faiss/faiss/Clustering.cpp +4 -18
- data/vendor/faiss/faiss/Clustering.h +31 -21
- data/vendor/faiss/faiss/IVFlib.cpp +22 -11
- data/vendor/faiss/faiss/Index.cpp +1 -1
- data/vendor/faiss/faiss/Index.h +20 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
- data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
- data/vendor/faiss/faiss/IndexBinary.h +8 -19
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +106 -187
- data/vendor/faiss/faiss/IndexFastScan.cpp +90 -159
- data/vendor/faiss/faiss/IndexFastScan.h +9 -8
- data/vendor/faiss/faiss/IndexFlat.cpp +195 -3
- data/vendor/faiss/faiss/IndexFlat.h +20 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +11 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +3 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +112 -316
- data/vendor/faiss/faiss/IndexHNSW.h +12 -48
- data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
- data/vendor/faiss/faiss/IndexIDMap.h +24 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +159 -53
- data/vendor/faiss/faiss/IndexIVF.h +37 -5
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +18 -26
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -2
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +433 -405
- data/vendor/faiss/faiss/IndexIVFFastScan.h +56 -26
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
- data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +78 -122
- data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +18 -50
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
- data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
- data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -4
- data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
- data/vendor/faiss/faiss/IndexNSG.h +10 -10
- data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
- data/vendor/faiss/faiss/IndexPQ.h +1 -4
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
- data/vendor/faiss/faiss/IndexRefine.cpp +49 -19
- data/vendor/faiss/faiss/IndexRefine.h +7 -0
- data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +22 -16
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
- data/vendor/faiss/faiss/IndexShards.cpp +21 -29
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
- data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
- data/vendor/faiss/faiss/MatrixStats.h +21 -9
- data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
- data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
- data/vendor/faiss/faiss/VectorTransform.h +7 -7
- data/vendor/faiss/faiss/clone_index.cpp +15 -10
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +87 -4
- data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +7 -0
- data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
- data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +8 -9
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +18 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
- data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +117 -17
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +267 -40
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -2
- data/vendor/faiss/faiss/impl/DistanceComputer.h +24 -1
- data/vendor/faiss/faiss/impl/FaissException.h +13 -34
- data/vendor/faiss/faiss/impl/HNSW.cpp +321 -70
- data/vendor/faiss/faiss/impl/HNSW.h +9 -8
- data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/NNDescent.cpp +29 -19
- data/vendor/faiss/faiss/impl/NSG.h +1 -1
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +24 -22
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
- data/vendor/faiss/faiss/impl/ResultHandler.h +232 -176
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +444 -104
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -8
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +280 -42
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
- data/vendor/faiss/faiss/impl/index_read.cpp +45 -19
- data/vendor/faiss/faiss/impl/index_write.cpp +60 -41
- data/vendor/faiss/faiss/impl/io.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +18 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +3 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +40 -49
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +374 -202
- data/vendor/faiss/faiss/index_factory.cpp +10 -7
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +27 -9
- data/vendor/faiss/faiss/invlists/InvertedLists.h +12 -3
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
- data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
- data/vendor/faiss/faiss/utils/distances.cpp +128 -74
- data/vendor/faiss/faiss/utils/distances.h +81 -4
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
- data/vendor/faiss/faiss/utils/distances_simd.cpp +428 -70
- data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
- data/vendor/faiss/faiss/utils/fp16.h +2 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +162 -110
- data/vendor/faiss/faiss/utils/hamming.h +58 -0
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +15 -87
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +57 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
- data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
- data/vendor/faiss/faiss/utils/prefetch.h +77 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
- data/vendor/faiss/faiss/utils/simdlib_neon.h +72 -77
- data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
- data/vendor/faiss/faiss/utils/sorting.h +27 -0
- data/vendor/faiss/faiss/utils/utils.cpp +112 -6
- data/vendor/faiss/faiss/utils/utils.h +57 -20
- metadata +11 -4
@@ -23,24 +23,19 @@ class GpuIndexFlat;
|
|
23
23
|
class IVFPQ;
|
24
24
|
|
25
25
|
struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
|
26
|
-
inline GpuIndexIVFPQConfig()
|
27
|
-
: useFloat16LookupTables(false),
|
28
|
-
usePrecomputedTables(false),
|
29
|
-
interleavedLayout(false),
|
30
|
-
useMMCodeDistance(false) {}
|
31
|
-
|
32
26
|
/// Whether or not float16 residual distance tables are used in the
|
33
27
|
/// list scanning kernels. When subQuantizers * 2^bitsPerCode >
|
34
28
|
/// 16384, this is required.
|
35
|
-
bool useFloat16LookupTables;
|
29
|
+
bool useFloat16LookupTables = false;
|
36
30
|
|
37
31
|
/// Whether or not we enable the precomputed table option for
|
38
32
|
/// search, which can substantially increase the memory requirement.
|
39
|
-
bool usePrecomputedTables;
|
33
|
+
bool usePrecomputedTables = false;
|
40
34
|
|
41
35
|
/// Use the alternative memory layout for the IVF lists
|
42
|
-
/// WARNING: this is a feature under development,
|
43
|
-
|
36
|
+
/// WARNING: this is a feature under development, and is only supported with
|
37
|
+
/// RAFT enabled for the index. Do not use if RAFT is not enabled.
|
38
|
+
bool interleavedLayout = false;
|
44
39
|
|
45
40
|
/// Use GEMM-backed computation of PQ code distances for the no precomputed
|
46
41
|
/// table version of IVFPQ.
|
@@ -50,7 +45,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
|
|
50
45
|
/// Note that MM code distance is enabled automatically if one uses a number
|
51
46
|
/// of dimensions per sub-quantizer that is not natively specialized (an odd
|
52
47
|
/// number like 7 or so).
|
53
|
-
bool useMMCodeDistance;
|
48
|
+
bool useMMCodeDistance = false;
|
54
49
|
};
|
55
50
|
|
56
51
|
/// IVFPQ index for the GPU
|
@@ -139,6 +134,22 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
|
|
139
134
|
ProductQuantizer pq;
|
140
135
|
|
141
136
|
protected:
|
137
|
+
/// Initialize appropriate index
|
138
|
+
void setIndex_(
|
139
|
+
GpuResources* resources,
|
140
|
+
int dim,
|
141
|
+
idx_t nlist,
|
142
|
+
faiss::MetricType metric,
|
143
|
+
float metricArg,
|
144
|
+
int numSubQuantizers,
|
145
|
+
int bitsPerSubQuantizer,
|
146
|
+
bool useFloat16LookupTables,
|
147
|
+
bool useMMCodeDistance,
|
148
|
+
bool interleavedLayout,
|
149
|
+
float* pqCentroidData,
|
150
|
+
IndicesOptions indicesOptions,
|
151
|
+
MemorySpace space);
|
152
|
+
|
142
153
|
/// Throws errors if configuration settings are improper
|
143
154
|
void verifyPQSettings_() const;
|
144
155
|
|
@@ -18,11 +18,9 @@ class IVFFlat;
|
|
18
18
|
class GpuIndexFlat;
|
19
19
|
|
20
20
|
struct GpuIndexIVFScalarQuantizerConfig : public GpuIndexIVFConfig {
|
21
|
-
inline GpuIndexIVFScalarQuantizerConfig() : interleavedLayout(true) {}
|
22
|
-
|
23
21
|
/// Use the alternative memory layout for the IVF lists
|
24
22
|
/// (currently the default)
|
25
|
-
bool interleavedLayout;
|
23
|
+
bool interleavedLayout = true;
|
26
24
|
};
|
27
25
|
|
28
26
|
/// Wrapper around the GPU implementation that looks like
|
@@ -4,6 +4,21 @@
|
|
4
4
|
* This source code is licensed under the MIT license found in the
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
6
6
|
*/
|
7
|
+
/*
|
8
|
+
* Copyright (c) 2023, NVIDIA CORPORATION.
|
9
|
+
*
|
10
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
11
|
+
* you may not use this file except in compliance with the License.
|
12
|
+
* You may obtain a copy of the License at
|
13
|
+
*
|
14
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
15
|
+
*
|
16
|
+
* Unless required by applicable law or agreed to in writing, software
|
17
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
18
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
19
|
+
* See the License for the specific language governing permissions and
|
20
|
+
* limitations under the License.
|
21
|
+
*/
|
7
22
|
|
8
23
|
#include <faiss/gpu/GpuResources.h>
|
9
24
|
#include <faiss/gpu/utils/DeviceUtils.h>
|
@@ -143,7 +158,7 @@ GpuMemoryReservation::~GpuMemoryReservation() {
|
|
143
158
|
// GpuResources
|
144
159
|
//
|
145
160
|
|
146
|
-
GpuResources::~GpuResources()
|
161
|
+
GpuResources::~GpuResources() = default;
|
147
162
|
|
148
163
|
cublasHandle_t GpuResources::getBlasHandleCurrentDevice() {
|
149
164
|
return getBlasHandle(getCurrentDevice());
|
@@ -153,6 +168,12 @@ cudaStream_t GpuResources::getDefaultStreamCurrentDevice() {
|
|
153
168
|
return getDefaultStream(getCurrentDevice());
|
154
169
|
}
|
155
170
|
|
171
|
+
#if defined USE_NVIDIA_RAFT
|
172
|
+
raft::device_resources& GpuResources::getRaftHandleCurrentDevice() {
|
173
|
+
return getRaftHandle(getCurrentDevice());
|
174
|
+
}
|
175
|
+
#endif
|
176
|
+
|
156
177
|
std::vector<cudaStream_t> GpuResources::getAlternateStreamsCurrentDevice() {
|
157
178
|
return getAlternateStreams(getCurrentDevice());
|
158
179
|
}
|
@@ -182,7 +203,7 @@ size_t GpuResources::getTempMemoryAvailableCurrentDevice() const {
|
|
182
203
|
// GpuResourcesProvider
|
183
204
|
//
|
184
205
|
|
185
|
-
GpuResourcesProvider::~GpuResourcesProvider()
|
206
|
+
GpuResourcesProvider::~GpuResourcesProvider() = default;
|
186
207
|
|
187
208
|
//
|
188
209
|
// GpuResourcesProviderFromResourceInstance
|
@@ -192,7 +213,7 @@ GpuResourcesProviderFromInstance::GpuResourcesProviderFromInstance(
|
|
192
213
|
std::shared_ptr<GpuResources> p)
|
193
214
|
: res_(p) {}
|
194
215
|
|
195
|
-
GpuResourcesProviderFromInstance::~GpuResourcesProviderFromInstance()
|
216
|
+
GpuResourcesProviderFromInstance::~GpuResourcesProviderFromInstance() = default;
|
196
217
|
|
197
218
|
std::shared_ptr<GpuResources> GpuResourcesProviderFromInstance::getResources() {
|
198
219
|
return res_;
|
@@ -4,16 +4,37 @@
|
|
4
4
|
* This source code is licensed under the MIT license found in the
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
6
6
|
*/
|
7
|
+
/*
|
8
|
+
* Copyright (c) 2023, NVIDIA CORPORATION.
|
9
|
+
*
|
10
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
11
|
+
* you may not use this file except in compliance with the License.
|
12
|
+
* You may obtain a copy of the License at
|
13
|
+
*
|
14
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
15
|
+
*
|
16
|
+
* Unless required by applicable law or agreed to in writing, software
|
17
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
18
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
19
|
+
* See the License for the specific language governing permissions and
|
20
|
+
* limitations under the License.
|
21
|
+
*/
|
7
22
|
|
8
23
|
#pragma once
|
9
24
|
|
10
25
|
#include <cublas_v2.h>
|
11
26
|
#include <cuda_runtime.h>
|
12
27
|
#include <faiss/impl/FaissAssert.h>
|
28
|
+
|
13
29
|
#include <memory>
|
14
30
|
#include <utility>
|
15
31
|
#include <vector>
|
16
32
|
|
33
|
+
#if defined USE_NVIDIA_RAFT
|
34
|
+
#include <raft/core/device_resources.hpp>
|
35
|
+
#include <rmm/mr/device/device_memory_resource.hpp>
|
36
|
+
#endif
|
37
|
+
|
17
38
|
namespace faiss {
|
18
39
|
namespace gpu {
|
19
40
|
|
@@ -82,11 +103,7 @@ std::string memorySpaceToString(MemorySpace s);
|
|
82
103
|
|
83
104
|
/// Information on what/where an allocation is
|
84
105
|
struct AllocInfo {
|
85
|
-
inline AllocInfo()
|
86
|
-
: type(AllocType::Other),
|
87
|
-
device(0),
|
88
|
-
space(MemorySpace::Device),
|
89
|
-
stream(nullptr) {}
|
106
|
+
inline AllocInfo() {}
|
90
107
|
|
91
108
|
inline AllocInfo(AllocType at, int dev, MemorySpace sp, cudaStream_t st)
|
92
109
|
: type(at), device(dev), space(sp), stream(st) {}
|
@@ -95,13 +112,13 @@ struct AllocInfo {
|
|
95
112
|
std::string toString() const;
|
96
113
|
|
97
114
|
/// The internal category of the allocation
|
98
|
-
AllocType type;
|
115
|
+
AllocType type = AllocType::Other;
|
99
116
|
|
100
117
|
/// The device on which the allocation is happening
|
101
|
-
int device;
|
118
|
+
int device = 0;
|
102
119
|
|
103
120
|
/// The memory space of the allocation
|
104
|
-
MemorySpace space;
|
121
|
+
MemorySpace space = MemorySpace::Device;
|
105
122
|
|
106
123
|
/// The stream on which new work on the memory will be ordered (e.g., if a
|
107
124
|
/// piece of memory cached and to be returned for this call was last used on
|
@@ -111,7 +128,7 @@ struct AllocInfo {
|
|
111
128
|
///
|
112
129
|
/// The memory manager guarantees that the returned memory is free to use
|
113
130
|
/// without data races on this stream specified.
|
114
|
-
cudaStream_t stream;
|
131
|
+
cudaStream_t stream = nullptr;
|
115
132
|
};
|
116
133
|
|
117
134
|
/// Create an AllocInfo for the current device with MemorySpace::Device
|
@@ -125,7 +142,7 @@ AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st);
|
|
125
142
|
|
126
143
|
/// Information on what/where an allocation is, along with how big it should be
|
127
144
|
struct AllocRequest : public AllocInfo {
|
128
|
-
inline AllocRequest()
|
145
|
+
inline AllocRequest() {}
|
129
146
|
|
130
147
|
inline AllocRequest(const AllocInfo& info, size_t sz)
|
131
148
|
: AllocInfo(info), size(sz) {}
|
@@ -142,7 +159,11 @@ struct AllocRequest : public AllocInfo {
|
|
142
159
|
std::string toString() const;
|
143
160
|
|
144
161
|
/// The size in bytes of the allocation
|
145
|
-
size_t size;
|
162
|
+
size_t size = 0;
|
163
|
+
|
164
|
+
#if defined USE_NVIDIA_RAFT
|
165
|
+
rmm::mr::device_memory_resource* mr = nullptr;
|
166
|
+
#endif
|
146
167
|
};
|
147
168
|
|
148
169
|
/// A RAII object that manages a temporary memory request
|
@@ -190,6 +211,13 @@ class GpuResources {
|
|
190
211
|
/// given device
|
191
212
|
virtual cudaStream_t getDefaultStream(int device) = 0;
|
192
213
|
|
214
|
+
#if defined USE_NVIDIA_RAFT
|
215
|
+
/// Returns the raft handle for the given device which can be used to
|
216
|
+
/// make calls to other raft primitives.
|
217
|
+
virtual raft::device_resources& getRaftHandle(int device) = 0;
|
218
|
+
raft::device_resources& getRaftHandleCurrentDevice();
|
219
|
+
#endif
|
220
|
+
|
193
221
|
/// Overrides the default stream for a device to the user-supplied stream.
|
194
222
|
/// The resources object does not own this stream (i.e., it will not destroy
|
195
223
|
/// it).
|
@@ -4,6 +4,29 @@
|
|
4
4
|
* This source code is licensed under the MIT license found in the
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
6
6
|
*/
|
7
|
+
/*
|
8
|
+
* Copyright (c) 2023, NVIDIA CORPORATION.
|
9
|
+
*
|
10
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
11
|
+
* you may not use this file except in compliance with the License.
|
12
|
+
* You may obtain a copy of the License at
|
13
|
+
*
|
14
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
15
|
+
*
|
16
|
+
* Unless required by applicable law or agreed to in writing, software
|
17
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
18
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
19
|
+
* See the License for the specific language governing permissions and
|
20
|
+
* limitations under the License.
|
21
|
+
*/
|
22
|
+
|
23
|
+
#if defined USE_NVIDIA_RAFT
|
24
|
+
#include <raft/core/device_resources.hpp>
|
25
|
+
#include <rmm/mr/device/managed_memory_resource.hpp>
|
26
|
+
#include <rmm/mr/device/per_device_resource.hpp>
|
27
|
+
#include <rmm/mr/host/pinned_memory_resource.hpp>
|
28
|
+
#include <memory>
|
29
|
+
#endif
|
7
30
|
|
8
31
|
#include <faiss/gpu/StandardGpuResources.h>
|
9
32
|
#include <faiss/gpu/utils/DeviceUtils.h>
|
@@ -66,7 +89,12 @@ std::string allocsToString(const std::unordered_map<void*, AllocRequest>& map) {
|
|
66
89
|
//
|
67
90
|
|
68
91
|
StandardGpuResourcesImpl::StandardGpuResourcesImpl()
|
69
|
-
:
|
92
|
+
:
|
93
|
+
#if defined USE_NVIDIA_RAFT
|
94
|
+
mmr_(new rmm::mr::managed_memory_resource),
|
95
|
+
pmr_(new rmm::mr::pinned_memory_resource),
|
96
|
+
#endif
|
97
|
+
pinnedMemAlloc_(nullptr),
|
70
98
|
pinnedMemAllocSize_(0),
|
71
99
|
// let the adjustment function determine the memory size for us by
|
72
100
|
// passing in a huge value that will then be adjusted
|
@@ -74,7 +102,8 @@ StandardGpuResourcesImpl::StandardGpuResourcesImpl()
|
|
74
102
|
-1,
|
75
103
|
std::numeric_limits<size_t>::max())),
|
76
104
|
pinnedMemSize_(kDefaultPinnedMemoryAllocation),
|
77
|
-
allocLogging_(false) {
|
105
|
+
allocLogging_(false) {
|
106
|
+
}
|
78
107
|
|
79
108
|
StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
|
80
109
|
// The temporary memory allocator has allocated memory through us, so clean
|
@@ -129,6 +158,9 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
|
|
129
158
|
}
|
130
159
|
|
131
160
|
if (pinnedMemAlloc_) {
|
161
|
+
#if defined USE_NVIDIA_RAFT
|
162
|
+
pmr_->deallocate(pinnedMemAlloc_, pinnedMemAllocSize_);
|
163
|
+
#else
|
132
164
|
auto err = cudaFreeHost(pinnedMemAlloc_);
|
133
165
|
FAISS_ASSERT_FMT(
|
134
166
|
err == cudaSuccess,
|
@@ -136,6 +168,7 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
|
|
136
168
|
pinnedMemAlloc_,
|
137
169
|
(int)err,
|
138
170
|
cudaGetErrorString(err));
|
171
|
+
#endif
|
139
172
|
}
|
140
173
|
}
|
141
174
|
|
@@ -187,11 +220,11 @@ void StandardGpuResourcesImpl::setTempMemory(size_t size) {
|
|
187
220
|
p.second.reset();
|
188
221
|
|
189
222
|
// Allocate new
|
190
|
-
p.second = std::
|
223
|
+
p.second = std::make_unique<StackDeviceMemory>(
|
191
224
|
this,
|
192
225
|
p.first,
|
193
226
|
// adjust for this specific device
|
194
|
-
getDefaultTempMemForGPU(device, tempMemSize_))
|
227
|
+
getDefaultTempMemForGPU(device, tempMemSize_));
|
195
228
|
}
|
196
229
|
}
|
197
230
|
}
|
@@ -274,6 +307,19 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
|
|
274
307
|
// If this is the first device that we're initializing, create our
|
275
308
|
// pinned memory allocation
|
276
309
|
if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
|
310
|
+
#if defined USE_NVIDIA_RAFT
|
311
|
+
// If this is the first device that we're initializing, create our
|
312
|
+
// pinned memory allocation
|
313
|
+
if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
|
314
|
+
try {
|
315
|
+
pinnedMemAlloc_ = pmr_->allocate(pinnedMemSize_);
|
316
|
+
} catch (const std::bad_alloc& rmm_ex) {
|
317
|
+
FAISS_THROW_MSG("CUDA memory allocation error");
|
318
|
+
}
|
319
|
+
|
320
|
+
pinnedMemAllocSize_ = pinnedMemSize_;
|
321
|
+
}
|
322
|
+
#else
|
277
323
|
auto err = cudaHostAlloc(
|
278
324
|
&pinnedMemAlloc_, pinnedMemSize_, cudaHostAllocDefault);
|
279
325
|
|
@@ -286,6 +332,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
|
|
286
332
|
cudaGetErrorString(err));
|
287
333
|
|
288
334
|
pinnedMemAllocSize_ = pinnedMemSize_;
|
335
|
+
#endif
|
289
336
|
}
|
290
337
|
|
291
338
|
// Make sure that device properties for all devices are cached
|
@@ -307,12 +354,16 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
|
|
307
354
|
device);
|
308
355
|
|
309
356
|
// Create streams
|
310
|
-
cudaStream_t defaultStream =
|
357
|
+
cudaStream_t defaultStream = nullptr;
|
311
358
|
CUDA_VERIFY(
|
312
359
|
cudaStreamCreateWithFlags(&defaultStream, cudaStreamNonBlocking));
|
313
360
|
|
314
361
|
defaultStreams_[device] = defaultStream;
|
315
362
|
|
363
|
+
#if defined USE_NVIDIA_RAFT
|
364
|
+
raftHandles_.emplace(std::make_pair(device, defaultStream));
|
365
|
+
#endif
|
366
|
+
|
316
367
|
cudaStream_t asyncCopyStream = 0;
|
317
368
|
CUDA_VERIFY(
|
318
369
|
cudaStreamCreateWithFlags(&asyncCopyStream, cudaStreamNonBlocking));
|
@@ -321,7 +372,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
|
|
321
372
|
|
322
373
|
std::vector<cudaStream_t> deviceStreams;
|
323
374
|
for (int j = 0; j < kNumStreams; ++j) {
|
324
|
-
cudaStream_t stream =
|
375
|
+
cudaStream_t stream = nullptr;
|
325
376
|
CUDA_VERIFY(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
|
326
377
|
|
327
378
|
deviceStreams.push_back(stream);
|
@@ -330,7 +381,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
|
|
330
381
|
alternateStreams_[device] = std::move(deviceStreams);
|
331
382
|
|
332
383
|
// Create cuBLAS handle
|
333
|
-
cublasHandle_t blasHandle =
|
384
|
+
cublasHandle_t blasHandle = nullptr;
|
334
385
|
auto blasStatus = cublasCreate(&blasHandle);
|
335
386
|
FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
|
336
387
|
blasHandles_[device] = blasHandle;
|
@@ -348,11 +399,11 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
|
|
348
399
|
allocs_[device] = std::unordered_map<void*, AllocRequest>();
|
349
400
|
|
350
401
|
FAISS_ASSERT(tempMemory_.count(device) == 0);
|
351
|
-
auto mem = std::
|
402
|
+
auto mem = std::make_unique<StackDeviceMemory>(
|
352
403
|
this,
|
353
404
|
device,
|
354
405
|
// adjust for this specific device
|
355
|
-
getDefaultTempMemForGPU(device, tempMemSize_))
|
406
|
+
getDefaultTempMemForGPU(device, tempMemSize_));
|
356
407
|
|
357
408
|
tempMemory_.emplace(device, std::move(mem));
|
358
409
|
}
|
@@ -375,6 +426,25 @@ cudaStream_t StandardGpuResourcesImpl::getDefaultStream(int device) {
|
|
375
426
|
return defaultStreams_[device];
|
376
427
|
}
|
377
428
|
|
429
|
+
#if defined USE_NVIDIA_RAFT
|
430
|
+
raft::device_resources& StandardGpuResourcesImpl::getRaftHandle(int device) {
|
431
|
+
initializeForDevice(device);
|
432
|
+
|
433
|
+
auto it = raftHandles_.find(device);
|
434
|
+
if (it == raftHandles_.end()) {
|
435
|
+
// Make sure we are using the stream the user may have already assigned
|
436
|
+
// to the current GpuResources
|
437
|
+
raftHandles_.emplace(device, getDefaultStream(device));
|
438
|
+
|
439
|
+
// Initialize cublas handle
|
440
|
+
raftHandles_[device].get_cublas_handle();
|
441
|
+
}
|
442
|
+
|
443
|
+
// Otherwise, our base default handle
|
444
|
+
return raftHandles_[device];
|
445
|
+
}
|
446
|
+
#endif
|
447
|
+
|
378
448
|
std::vector<cudaStream_t> StandardGpuResourcesImpl::getAlternateStreams(
|
379
449
|
int device) {
|
380
450
|
initializeForDevice(device);
|
@@ -406,8 +476,6 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
|
|
406
476
|
void* p = nullptr;
|
407
477
|
|
408
478
|
if (adjReq.space == MemorySpace::Temporary) {
|
409
|
-
// If we don't have enough space in our temporary memory manager, we
|
410
|
-
// need to allocate this request separately
|
411
479
|
auto& tempMem = tempMemory_[adjReq.device];
|
412
480
|
|
413
481
|
if (adjReq.size > tempMem->getSizeAvailable()) {
|
@@ -428,15 +496,25 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
|
|
428
496
|
|
429
497
|
// Otherwise, we can handle this locally
|
430
498
|
p = tempMemory_[adjReq.device]->allocMemory(adjReq.stream, adjReq.size);
|
431
|
-
|
432
499
|
} else if (adjReq.space == MemorySpace::Device) {
|
500
|
+
#if defined USE_NVIDIA_RAFT
|
501
|
+
try {
|
502
|
+
rmm::mr::device_memory_resource* current_mr =
|
503
|
+
rmm::mr::get_per_device_resource(
|
504
|
+
rmm::cuda_device_id{adjReq.device});
|
505
|
+
p = current_mr->allocate_async(adjReq.size, adjReq.stream);
|
506
|
+
adjReq.mr = current_mr;
|
507
|
+
} catch (const std::bad_alloc& rmm_ex) {
|
508
|
+
FAISS_THROW_MSG("CUDA memory allocation error");
|
509
|
+
}
|
510
|
+
#else
|
433
511
|
auto err = cudaMalloc(&p, adjReq.size);
|
434
512
|
|
435
513
|
// Throw if we fail to allocate
|
436
514
|
if (err != cudaSuccess) {
|
437
515
|
// FIXME: as of CUDA 11, a memory allocation error appears to be
|
438
|
-
// presented via cudaGetLastError as well, and needs to be
|
439
|
-
// Just call the function to clear it
|
516
|
+
// presented via cudaGetLastError as well, and needs to be
|
517
|
+
// cleared. Just call the function to clear it
|
440
518
|
cudaGetLastError();
|
441
519
|
|
442
520
|
std::stringstream ss;
|
@@ -451,7 +529,20 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
|
|
451
529
|
|
452
530
|
FAISS_THROW_IF_NOT_FMT(err == cudaSuccess, "%s", str.c_str());
|
453
531
|
}
|
532
|
+
#endif
|
454
533
|
} else if (adjReq.space == MemorySpace::Unified) {
|
534
|
+
#if defined USE_NVIDIA_RAFT
|
535
|
+
try {
|
536
|
+
// for now, use our own managed MR to do Unified Memory allocations.
|
537
|
+
// TODO: change this to use the current device resource once RMM has
|
538
|
+
// a way to retrieve a "guaranteed" managed memory resource for a
|
539
|
+
// device.
|
540
|
+
p = mmr_->allocate_async(adjReq.size, adjReq.stream);
|
541
|
+
adjReq.mr = mmr_.get();
|
542
|
+
} catch (const std::bad_alloc& rmm_ex) {
|
543
|
+
FAISS_THROW_MSG("CUDA memory allocation error");
|
544
|
+
}
|
545
|
+
#else
|
455
546
|
auto err = cudaMallocManaged(&p, adjReq.size);
|
456
547
|
|
457
548
|
if (err != cudaSuccess) {
|
@@ -472,6 +563,7 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
|
|
472
563
|
|
473
564
|
FAISS_THROW_IF_NOT_FMT(err == cudaSuccess, "%s", str.c_str());
|
474
565
|
}
|
566
|
+
#endif
|
475
567
|
} else {
|
476
568
|
FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int)adjReq.space);
|
477
569
|
}
|
@@ -505,10 +597,12 @@ void StandardGpuResourcesImpl::deallocMemory(int device, void* p) {
|
|
505
597
|
|
506
598
|
if (req.space == MemorySpace::Temporary) {
|
507
599
|
tempMemory_[device]->deallocMemory(device, req.stream, req.size, p);
|
508
|
-
|
509
600
|
} else if (
|
510
601
|
req.space == MemorySpace::Device ||
|
511
602
|
req.space == MemorySpace::Unified) {
|
603
|
+
#if defined USE_NVIDIA_RAFT
|
604
|
+
req.mr->deallocate_async(p, req.size, req.stream);
|
605
|
+
#else
|
512
606
|
auto err = cudaFree(p);
|
513
607
|
FAISS_ASSERT_FMT(
|
514
608
|
err == cudaSuccess,
|
@@ -516,7 +610,7 @@ void StandardGpuResourcesImpl::deallocMemory(int device, void* p) {
|
|
516
610
|
p,
|
517
611
|
(int)err,
|
518
612
|
cudaGetErrorString(err));
|
519
|
-
|
613
|
+
#endif
|
520
614
|
} else {
|
521
615
|
FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int)req.space);
|
522
616
|
}
|
@@ -561,7 +655,7 @@ StandardGpuResourcesImpl::getMemoryInfo() const {
|
|
561
655
|
StandardGpuResources::StandardGpuResources()
|
562
656
|
: res_(new StandardGpuResourcesImpl) {}
|
563
657
|
|
564
|
-
StandardGpuResources::~StandardGpuResources()
|
658
|
+
StandardGpuResources::~StandardGpuResources() = default;
|
565
659
|
|
566
660
|
std::shared_ptr<GpuResources> StandardGpuResources::getResources() {
|
567
661
|
return res_;
|
@@ -600,6 +694,12 @@ cudaStream_t StandardGpuResources::getDefaultStream(int device) {
|
|
600
694
|
return res_->getDefaultStream(device);
|
601
695
|
}
|
602
696
|
|
697
|
+
#if defined USE_NVIDIA_RAFT
|
698
|
+
raft::device_resources& StandardGpuResources::getRaftHandle(int device) {
|
699
|
+
return res_->getRaftHandle(device);
|
700
|
+
}
|
701
|
+
#endif
|
702
|
+
|
603
703
|
size_t StandardGpuResources::getTempMemoryAvailable(int device) const {
|
604
704
|
return res_->getTempMemoryAvailable(device);
|
605
705
|
}
|
@@ -4,9 +4,29 @@
|
|
4
4
|
* This source code is licensed under the MIT license found in the
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
6
6
|
*/
|
7
|
+
/*
|
8
|
+
* Copyright (c) 2023, NVIDIA CORPORATION.
|
9
|
+
*
|
10
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
11
|
+
* you may not use this file except in compliance with the License.
|
12
|
+
* You may obtain a copy of the License at
|
13
|
+
*
|
14
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
15
|
+
*
|
16
|
+
* Unless required by applicable law or agreed to in writing, software
|
17
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
18
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
19
|
+
* See the License for the specific language governing permissions and
|
20
|
+
* limitations under the License.
|
21
|
+
*/
|
7
22
|
|
8
23
|
#pragma once
|
9
24
|
|
25
|
+
#if defined USE_NVIDIA_RAFT
|
26
|
+
#include <raft/core/device_resources.hpp>
|
27
|
+
#include <rmm/mr/host/pinned_memory_resource.hpp>
|
28
|
+
#endif
|
29
|
+
|
10
30
|
#include <faiss/gpu/GpuResources.h>
|
11
31
|
#include <faiss/gpu/utils/DeviceUtils.h>
|
12
32
|
#include <faiss/gpu/utils/StackDeviceMemory.h>
|
@@ -15,6 +35,7 @@
|
|
15
35
|
#include <unordered_map>
|
16
36
|
#include <vector>
|
17
37
|
|
38
|
+
#pragma GCC visibility push(default)
|
18
39
|
namespace faiss {
|
19
40
|
namespace gpu {
|
20
41
|
|
@@ -58,6 +79,12 @@ class StandardGpuResourcesImpl : public GpuResources {
|
|
58
79
|
/// this stream upon exit from an index or other Faiss GPU call.
|
59
80
|
cudaStream_t getDefaultStream(int device) override;
|
60
81
|
|
82
|
+
#if defined USE_NVIDIA_RAFT
|
83
|
+
/// Returns the raft handle for the given device which can be used to
|
84
|
+
/// make calls to other raft primitives.
|
85
|
+
raft::device_resources& getRaftHandle(int device) override;
|
86
|
+
#endif
|
87
|
+
|
61
88
|
/// Called to change the work ordering streams to the null stream
|
62
89
|
/// for all devices
|
63
90
|
void setDefaultNullStreamAllDevices();
|
@@ -92,7 +119,7 @@ class StandardGpuResourcesImpl : public GpuResources {
|
|
92
119
|
|
93
120
|
cudaStream_t getAsyncCopyStream(int device) override;
|
94
121
|
|
95
|
-
|
122
|
+
protected:
|
96
123
|
/// Have GPU resources been initialized for this device yet?
|
97
124
|
bool isInitialized(int device) const;
|
98
125
|
|
@@ -100,7 +127,7 @@ class StandardGpuResourcesImpl : public GpuResources {
|
|
100
127
|
/// memory size
|
101
128
|
static size_t getDefaultTempMemForGPU(int device, size_t requested);
|
102
129
|
|
103
|
-
|
130
|
+
protected:
|
104
131
|
/// Set of currently outstanding memory allocations per device
|
105
132
|
/// device -> (alloc request, allocated ptr)
|
106
133
|
std::unordered_map<int, std::unordered_map<void*, AllocRequest>> allocs_;
|
@@ -124,6 +151,27 @@ class StandardGpuResourcesImpl : public GpuResources {
|
|
124
151
|
/// cuBLAS handle for each device
|
125
152
|
std::unordered_map<int, cublasHandle_t> blasHandles_;
|
126
153
|
|
154
|
+
#if defined USE_NVIDIA_RAFT
|
155
|
+
/// raft handle for each device
|
156
|
+
std::unordered_map<int, raft::device_resources> raftHandles_;
|
157
|
+
|
158
|
+
/**
|
159
|
+
* FIXME: Integrating these in a separate code path for now. Ultimately,
|
160
|
+
* it would be nice if we use a simple memory resource abstraction
|
161
|
+
* in FAISS so we could plug in whether to use RMM's memory resources
|
162
|
+
* or the default.
|
163
|
+
*
|
164
|
+
* There's enough duplicated logic that it doesn't *seem* to make sense
|
165
|
+
* to create a subclass only for the RMM memory resources.
|
166
|
+
*/
|
167
|
+
|
168
|
+
// managed_memory_resource
|
169
|
+
std::unique_ptr<rmm::mr::device_memory_resource> mmr_;
|
170
|
+
|
171
|
+
// pinned_memory_resource
|
172
|
+
std::unique_ptr<rmm::mr::host_memory_resource> pmr_;
|
173
|
+
#endif
|
174
|
+
|
127
175
|
/// Pinned memory allocation for use with this GPU
|
128
176
|
void* pinnedMemAlloc_;
|
129
177
|
size_t pinnedMemAllocSize_;
|
@@ -183,10 +231,15 @@ class StandardGpuResources : public GpuResourcesProvider {
|
|
183
231
|
/// Export a description of memory used for Python
|
184
232
|
std::map<int, std::map<std::string, std::pair<int, size_t>>> getMemoryInfo()
|
185
233
|
const;
|
186
|
-
|
187
234
|
/// Returns the current default stream
|
188
235
|
cudaStream_t getDefaultStream(int device);
|
189
236
|
|
237
|
+
#if defined USE_NVIDIA_RAFT
|
238
|
+
/// Returns the raft handle for the given device which can be used to
|
239
|
+
/// make calls to other raft primitives.
|
240
|
+
raft::device_resources& getRaftHandle(int device);
|
241
|
+
#endif
|
242
|
+
|
190
243
|
/// Returns the current amount of temp memory available
|
191
244
|
size_t getTempMemoryAvailable(int device) const;
|
192
245
|
|
@@ -203,3 +256,4 @@ class StandardGpuResources : public GpuResourcesProvider {
|
|
203
256
|
|
204
257
|
} // namespace gpu
|
205
258
|
} // namespace faiss
|
259
|
+
#pragma GCC visibility pop
|
@@ -42,7 +42,7 @@ int main(int argc, char** argv) {
|
|
42
42
|
|
43
43
|
cudaProfilerStop();
|
44
44
|
|
45
|
-
auto seed = FLAGS_seed != -
|
45
|
+
auto seed = FLAGS_seed != -1 ? FLAGS_seed : time(nullptr);
|
46
46
|
printf("using seed %ld\n", seed);
|
47
47
|
|
48
48
|
std::vector<float> vecs((size_t)FLAGS_num * FLAGS_dim);
|