faiss 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +103 -3
- data/ext/faiss/ext.cpp +99 -32
- data/ext/faiss/extconf.rb +12 -2
- data/lib/faiss/ext.bundle +0 -0
- data/lib/faiss/index.rb +3 -3
- data/lib/faiss/index_binary.rb +3 -3
- data/lib/faiss/kmeans.rb +1 -1
- data/lib/faiss/pca_matrix.rb +2 -2
- data/lib/faiss/product_quantizer.rb +3 -3
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/AutoTune.cpp +719 -0
- data/vendor/faiss/AutoTune.h +212 -0
- data/vendor/faiss/Clustering.cpp +261 -0
- data/vendor/faiss/Clustering.h +101 -0
- data/vendor/faiss/IVFlib.cpp +339 -0
- data/vendor/faiss/IVFlib.h +132 -0
- data/vendor/faiss/Index.cpp +171 -0
- data/vendor/faiss/Index.h +261 -0
- data/vendor/faiss/Index2Layer.cpp +437 -0
- data/vendor/faiss/Index2Layer.h +85 -0
- data/vendor/faiss/IndexBinary.cpp +77 -0
- data/vendor/faiss/IndexBinary.h +163 -0
- data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
- data/vendor/faiss/IndexBinaryFlat.h +54 -0
- data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
- data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
- data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
- data/vendor/faiss/IndexBinaryHNSW.h +56 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
- data/vendor/faiss/IndexBinaryIVF.h +211 -0
- data/vendor/faiss/IndexFlat.cpp +508 -0
- data/vendor/faiss/IndexFlat.h +175 -0
- data/vendor/faiss/IndexHNSW.cpp +1090 -0
- data/vendor/faiss/IndexHNSW.h +170 -0
- data/vendor/faiss/IndexIVF.cpp +909 -0
- data/vendor/faiss/IndexIVF.h +353 -0
- data/vendor/faiss/IndexIVFFlat.cpp +502 -0
- data/vendor/faiss/IndexIVFFlat.h +118 -0
- data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
- data/vendor/faiss/IndexIVFPQ.h +161 -0
- data/vendor/faiss/IndexIVFPQR.cpp +219 -0
- data/vendor/faiss/IndexIVFPQR.h +65 -0
- data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
- data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
- data/vendor/faiss/IndexLSH.cpp +225 -0
- data/vendor/faiss/IndexLSH.h +87 -0
- data/vendor/faiss/IndexLattice.cpp +143 -0
- data/vendor/faiss/IndexLattice.h +68 -0
- data/vendor/faiss/IndexPQ.cpp +1188 -0
- data/vendor/faiss/IndexPQ.h +199 -0
- data/vendor/faiss/IndexPreTransform.cpp +288 -0
- data/vendor/faiss/IndexPreTransform.h +91 -0
- data/vendor/faiss/IndexReplicas.cpp +123 -0
- data/vendor/faiss/IndexReplicas.h +76 -0
- data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
- data/vendor/faiss/IndexScalarQuantizer.h +127 -0
- data/vendor/faiss/IndexShards.cpp +317 -0
- data/vendor/faiss/IndexShards.h +100 -0
- data/vendor/faiss/InvertedLists.cpp +623 -0
- data/vendor/faiss/InvertedLists.h +334 -0
- data/vendor/faiss/LICENSE +21 -0
- data/vendor/faiss/MatrixStats.cpp +252 -0
- data/vendor/faiss/MatrixStats.h +62 -0
- data/vendor/faiss/MetaIndexes.cpp +351 -0
- data/vendor/faiss/MetaIndexes.h +126 -0
- data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
- data/vendor/faiss/OnDiskInvertedLists.h +127 -0
- data/vendor/faiss/VectorTransform.cpp +1157 -0
- data/vendor/faiss/VectorTransform.h +322 -0
- data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
- data/vendor/faiss/c_api/AutoTune_c.h +64 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
- data/vendor/faiss/c_api/Clustering_c.h +117 -0
- data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
- data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
- data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
- data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
- data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
- data/vendor/faiss/c_api/IndexShards_c.h +42 -0
- data/vendor/faiss/c_api/Index_c.cpp +105 -0
- data/vendor/faiss/c_api/Index_c.h +183 -0
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
- data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
- data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
- data/vendor/faiss/c_api/clone_index_c.h +32 -0
- data/vendor/faiss/c_api/error_c.h +42 -0
- data/vendor/faiss/c_api/error_impl.cpp +27 -0
- data/vendor/faiss/c_api/error_impl.h +16 -0
- data/vendor/faiss/c_api/faiss_c.h +58 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
- data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
- data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
- data/vendor/faiss/c_api/index_factory_c.h +30 -0
- data/vendor/faiss/c_api/index_io_c.cpp +42 -0
- data/vendor/faiss/c_api/index_io_c.h +50 -0
- data/vendor/faiss/c_api/macros_impl.h +110 -0
- data/vendor/faiss/clone_index.cpp +147 -0
- data/vendor/faiss/clone_index.h +38 -0
- data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
- data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
- data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
- data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
- data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
- data/vendor/faiss/gpu/GpuCloner.h +82 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
- data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
- data/vendor/faiss/gpu/GpuDistance.h +52 -0
- data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
- data/vendor/faiss/gpu/GpuIndex.h +148 -0
- data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
- data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
- data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
- data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
- data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
- data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
- data/vendor/faiss/gpu/GpuResources.cpp +52 -0
- data/vendor/faiss/gpu/GpuResources.h +73 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
- data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
- data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
- data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
- data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
- data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
- data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
- data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
- data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
- data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
- data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
- data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
- data/vendor/faiss/gpu/test/TestUtils.h +93 -0
- data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
- data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
- data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
- data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
- data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
- data/vendor/faiss/gpu/utils/Timer.h +52 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
- data/vendor/faiss/impl/FaissAssert.h +95 -0
- data/vendor/faiss/impl/FaissException.cpp +66 -0
- data/vendor/faiss/impl/FaissException.h +71 -0
- data/vendor/faiss/impl/HNSW.cpp +818 -0
- data/vendor/faiss/impl/HNSW.h +275 -0
- data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
- data/vendor/faiss/impl/PolysemousTraining.h +158 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
- data/vendor/faiss/impl/ProductQuantizer.h +242 -0
- data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
- data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
- data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
- data/vendor/faiss/impl/ThreadedIndex.h +80 -0
- data/vendor/faiss/impl/index_read.cpp +793 -0
- data/vendor/faiss/impl/index_write.cpp +558 -0
- data/vendor/faiss/impl/io.cpp +142 -0
- data/vendor/faiss/impl/io.h +98 -0
- data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
- data/vendor/faiss/impl/lattice_Zn.h +199 -0
- data/vendor/faiss/index_factory.cpp +392 -0
- data/vendor/faiss/index_factory.h +25 -0
- data/vendor/faiss/index_io.h +75 -0
- data/vendor/faiss/misc/test_blas.cpp +84 -0
- data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
- data/vendor/faiss/tests/test_merge.cpp +258 -0
- data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
- data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
- data/vendor/faiss/tests/test_params_override.cpp +231 -0
- data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
- data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
- data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
- data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
- data/vendor/faiss/utils/Heap.cpp +122 -0
- data/vendor/faiss/utils/Heap.h +495 -0
- data/vendor/faiss/utils/WorkerThread.cpp +126 -0
- data/vendor/faiss/utils/WorkerThread.h +61 -0
- data/vendor/faiss/utils/distances.cpp +765 -0
- data/vendor/faiss/utils/distances.h +243 -0
- data/vendor/faiss/utils/distances_simd.cpp +809 -0
- data/vendor/faiss/utils/extra_distances.cpp +336 -0
- data/vendor/faiss/utils/extra_distances.h +54 -0
- data/vendor/faiss/utils/hamming-inl.h +472 -0
- data/vendor/faiss/utils/hamming.cpp +792 -0
- data/vendor/faiss/utils/hamming.h +220 -0
- data/vendor/faiss/utils/random.cpp +192 -0
- data/vendor/faiss/utils/random.h +60 -0
- data/vendor/faiss/utils/utils.cpp +783 -0
- data/vendor/faiss/utils/utils.h +181 -0
- metadata +216 -2
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
#include <faiss/gpu/utils/DeviceMemory.h>
|
|
10
|
+
#include <faiss/gpu/utils/DeviceUtils.h>
|
|
11
|
+
#include <faiss/impl/FaissAssert.h>
|
|
12
|
+
|
|
13
|
+
namespace faiss { namespace gpu {
|
|
14
|
+
|
|
15
|
+
DeviceMemoryReservation::DeviceMemoryReservation()
|
|
16
|
+
: state_(NULL),
|
|
17
|
+
device_(0),
|
|
18
|
+
data_(NULL),
|
|
19
|
+
size_(0),
|
|
20
|
+
stream_(0) {
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
DeviceMemoryReservation::DeviceMemoryReservation(DeviceMemory* state,
|
|
24
|
+
int device,
|
|
25
|
+
void* p,
|
|
26
|
+
size_t size,
|
|
27
|
+
cudaStream_t stream)
|
|
28
|
+
: state_(state),
|
|
29
|
+
device_(device),
|
|
30
|
+
data_(p),
|
|
31
|
+
size_(size),
|
|
32
|
+
stream_(stream) {
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
DeviceMemoryReservation::DeviceMemoryReservation(
|
|
36
|
+
DeviceMemoryReservation&& m) noexcept {
|
|
37
|
+
|
|
38
|
+
state_ = m.state_;
|
|
39
|
+
device_ = m.device_;
|
|
40
|
+
data_ = m.data_;
|
|
41
|
+
size_ = m.size_;
|
|
42
|
+
stream_ = m.stream_;
|
|
43
|
+
|
|
44
|
+
m.data_ = NULL;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
DeviceMemoryReservation::~DeviceMemoryReservation() {
|
|
48
|
+
if (data_) {
|
|
49
|
+
FAISS_ASSERT(state_);
|
|
50
|
+
state_->returnAllocation(*this);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
data_ = NULL;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
DeviceMemoryReservation&
|
|
57
|
+
DeviceMemoryReservation::operator=(DeviceMemoryReservation&& m) {
|
|
58
|
+
if (data_) {
|
|
59
|
+
FAISS_ASSERT(state_);
|
|
60
|
+
state_->returnAllocation(*this);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
state_ = m.state_;
|
|
64
|
+
device_ = m.device_;
|
|
65
|
+
data_ = m.data_;
|
|
66
|
+
size_ = m.size_;
|
|
67
|
+
stream_ = m.stream_;
|
|
68
|
+
|
|
69
|
+
m.data_ = NULL;
|
|
70
|
+
|
|
71
|
+
return *this;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
DeviceMemory::~DeviceMemory() {
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
} } // namespace
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
#pragma once
|
|
10
|
+
|
|
11
|
+
#include <cuda_runtime.h>
|
|
12
|
+
#include <string>
|
|
13
|
+
|
|
14
|
+
namespace faiss { namespace gpu {
|
|
15
|
+
|
|
16
|
+
class DeviceMemory;
|
|
17
|
+
|
|
18
|
+
class DeviceMemoryReservation {
|
|
19
|
+
public:
|
|
20
|
+
DeviceMemoryReservation();
|
|
21
|
+
DeviceMemoryReservation(DeviceMemory* state,
|
|
22
|
+
int device, void* p, size_t size,
|
|
23
|
+
cudaStream_t stream);
|
|
24
|
+
DeviceMemoryReservation(DeviceMemoryReservation&& m) noexcept;
|
|
25
|
+
~DeviceMemoryReservation();
|
|
26
|
+
|
|
27
|
+
DeviceMemoryReservation& operator=(DeviceMemoryReservation&& m);
|
|
28
|
+
|
|
29
|
+
int device() { return device_; }
|
|
30
|
+
void* get() { return data_; }
|
|
31
|
+
size_t size() { return size_; }
|
|
32
|
+
cudaStream_t stream() { return stream_; }
|
|
33
|
+
|
|
34
|
+
private:
|
|
35
|
+
DeviceMemory* state_;
|
|
36
|
+
|
|
37
|
+
int device_;
|
|
38
|
+
void* data_;
|
|
39
|
+
size_t size_;
|
|
40
|
+
cudaStream_t stream_;
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
/// Manages temporary memory allocations on a GPU device
|
|
44
|
+
class DeviceMemory {
|
|
45
|
+
public:
|
|
46
|
+
virtual ~DeviceMemory();
|
|
47
|
+
|
|
48
|
+
/// Returns the device we are managing memory for
|
|
49
|
+
virtual int getDevice() const = 0;
|
|
50
|
+
|
|
51
|
+
/// Obtains a temporary memory allocation for our device,
|
|
52
|
+
/// whose usage is ordered with respect to the given stream.
|
|
53
|
+
virtual DeviceMemoryReservation getMemory(cudaStream_t stream,
|
|
54
|
+
size_t size) = 0;
|
|
55
|
+
|
|
56
|
+
/// Returns the current size available without calling cudaMalloc
|
|
57
|
+
virtual size_t getSizeAvailable() const = 0;
|
|
58
|
+
|
|
59
|
+
/// Returns a string containing our current memory manager state
|
|
60
|
+
virtual std::string toString() const = 0;
|
|
61
|
+
|
|
62
|
+
/// Returns the high-water mark of cudaMalloc allocations for our
|
|
63
|
+
/// device
|
|
64
|
+
virtual size_t getHighWaterCudaMalloc() const = 0;
|
|
65
|
+
|
|
66
|
+
protected:
|
|
67
|
+
friend class DeviceMemoryReservation;
|
|
68
|
+
virtual void returnAllocation(DeviceMemoryReservation& m) = 0;
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
} } // namespace
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
#pragma once
|
|
10
|
+
|
|
11
|
+
#include <faiss/impl/FaissAssert.h>
|
|
12
|
+
#include <cuda_runtime.h>
|
|
13
|
+
#include <cublas_v2.h>
|
|
14
|
+
#include <vector>
|
|
15
|
+
|
|
16
|
+
namespace faiss { namespace gpu {
|
|
17
|
+
|
|
18
|
+
/// Returns the current thread-local GPU device
|
|
19
|
+
int getCurrentDevice();
|
|
20
|
+
|
|
21
|
+
/// Sets the current thread-local GPU device
|
|
22
|
+
void setCurrentDevice(int device);
|
|
23
|
+
|
|
24
|
+
/// Returns the number of available GPU devices
|
|
25
|
+
int getNumDevices();
|
|
26
|
+
|
|
27
|
+
/// Starts the CUDA profiler (exposed via SWIG)
|
|
28
|
+
void profilerStart();
|
|
29
|
+
|
|
30
|
+
/// Stops the CUDA profiler (exposed via SWIG)
|
|
31
|
+
void profilerStop();
|
|
32
|
+
|
|
33
|
+
/// Synchronizes the CPU against all devices (equivalent to
|
|
34
|
+
/// cudaDeviceSynchronize for each device)
|
|
35
|
+
void synchronizeAllDevices();
|
|
36
|
+
|
|
37
|
+
/// Returns a cached cudaDeviceProp for the given device
|
|
38
|
+
const cudaDeviceProp& getDeviceProperties(int device);
|
|
39
|
+
|
|
40
|
+
/// Returns the cached cudaDeviceProp for the current device
|
|
41
|
+
const cudaDeviceProp& getCurrentDeviceProperties();
|
|
42
|
+
|
|
43
|
+
/// Returns the maximum number of threads available for the given GPU
|
|
44
|
+
/// device
|
|
45
|
+
int getMaxThreads(int device);
|
|
46
|
+
|
|
47
|
+
/// Equivalent to getMaxThreads(getCurrentDevice())
|
|
48
|
+
int getMaxThreadsCurrentDevice();
|
|
49
|
+
|
|
50
|
+
/// Returns the maximum smem available for the given GPU device
|
|
51
|
+
size_t getMaxSharedMemPerBlock(int device);
|
|
52
|
+
|
|
53
|
+
/// Equivalent to getMaxSharedMemPerBlock(getCurrentDevice())
|
|
54
|
+
size_t getMaxSharedMemPerBlockCurrentDevice();
|
|
55
|
+
|
|
56
|
+
/// For a given pointer, returns whether or not it is located on
|
|
57
|
+
/// a device (deviceId >= 0) or the host (-1).
|
|
58
|
+
int getDeviceForAddress(const void* p);
|
|
59
|
+
|
|
60
|
+
/// Does the given device support full unified memory sharing host
|
|
61
|
+
/// memory?
|
|
62
|
+
bool getFullUnifiedMemSupport(int device);
|
|
63
|
+
|
|
64
|
+
/// Equivalent to getFullUnifiedMemSupport(getCurrentDevice())
|
|
65
|
+
bool getFullUnifiedMemSupportCurrentDevice();
|
|
66
|
+
|
|
67
|
+
/// Returns the maximum k-selection value supported based on the CUDA SDK that
|
|
68
|
+
/// we were compiled with. .cu files can use DeviceDefs.cuh, but this is for
|
|
69
|
+
/// non-CUDA files
|
|
70
|
+
int getMaxKSelection();
|
|
71
|
+
|
|
72
|
+
/// RAII object to set the current device, and restore the previous
|
|
73
|
+
/// device upon destruction
|
|
74
|
+
class DeviceScope {
|
|
75
|
+
public:
|
|
76
|
+
explicit DeviceScope(int device);
|
|
77
|
+
~DeviceScope();
|
|
78
|
+
|
|
79
|
+
private:
|
|
80
|
+
int prevDevice_;
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
/// RAII object to manage a cublasHandle_t
|
|
84
|
+
class CublasHandleScope {
|
|
85
|
+
public:
|
|
86
|
+
CublasHandleScope();
|
|
87
|
+
~CublasHandleScope();
|
|
88
|
+
|
|
89
|
+
cublasHandle_t get() { return blasHandle_; }
|
|
90
|
+
|
|
91
|
+
private:
|
|
92
|
+
cublasHandle_t blasHandle_;
|
|
93
|
+
};
|
|
94
|
+
|
|
95
|
+
// RAII object to manage a cudaEvent_t
|
|
96
|
+
class CudaEvent {
|
|
97
|
+
public:
|
|
98
|
+
/// Creates an event and records it in this stream
|
|
99
|
+
explicit CudaEvent(cudaStream_t stream);
|
|
100
|
+
CudaEvent(const CudaEvent& event) = delete;
|
|
101
|
+
CudaEvent(CudaEvent&& event) noexcept;
|
|
102
|
+
~CudaEvent();
|
|
103
|
+
|
|
104
|
+
inline cudaEvent_t get() { return event_; }
|
|
105
|
+
|
|
106
|
+
/// Wait on this event in this stream
|
|
107
|
+
void streamWaitOnEvent(cudaStream_t stream);
|
|
108
|
+
|
|
109
|
+
/// Have the CPU wait for the completion of this event
|
|
110
|
+
void cpuWaitOnEvent();
|
|
111
|
+
|
|
112
|
+
CudaEvent& operator=(CudaEvent&& event) noexcept;
|
|
113
|
+
CudaEvent& operator=(CudaEvent& event) = delete;
|
|
114
|
+
|
|
115
|
+
private:
|
|
116
|
+
cudaEvent_t event_;
|
|
117
|
+
};
|
|
118
|
+
|
|
119
|
+
/// Wrapper to test return status of CUDA functions
|
|
120
|
+
#define CUDA_VERIFY(X) \
|
|
121
|
+
do { \
|
|
122
|
+
auto err__ = (X); \
|
|
123
|
+
FAISS_ASSERT_FMT(err__ == cudaSuccess, "CUDA error %d %s", \
|
|
124
|
+
(int) err__, cudaGetErrorString(err__)); \
|
|
125
|
+
} while (0)
|
|
126
|
+
|
|
127
|
+
/// Wrapper to synchronously probe for CUDA errors
|
|
128
|
+
// #define FAISS_GPU_SYNC_ERROR 1
|
|
129
|
+
|
|
130
|
+
#ifdef FAISS_GPU_SYNC_ERROR
|
|
131
|
+
#define CUDA_TEST_ERROR() \
|
|
132
|
+
do { \
|
|
133
|
+
CUDA_VERIFY(cudaDeviceSynchronize()); \
|
|
134
|
+
} while (0)
|
|
135
|
+
#else
|
|
136
|
+
#define CUDA_TEST_ERROR() \
|
|
137
|
+
do { \
|
|
138
|
+
CUDA_VERIFY(cudaGetLastError()); \
|
|
139
|
+
} while (0)
|
|
140
|
+
#endif
|
|
141
|
+
|
|
142
|
+
/// Call for a collection of streams to wait on
|
|
143
|
+
template <typename L1, typename L2>
|
|
144
|
+
void streamWaitBase(const L1& listWaiting, const L2& listWaitOn) {
|
|
145
|
+
// For all the streams we are waiting on, create an event
|
|
146
|
+
std::vector<cudaEvent_t> events;
|
|
147
|
+
for (auto& stream : listWaitOn) {
|
|
148
|
+
cudaEvent_t event;
|
|
149
|
+
CUDA_VERIFY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
|
|
150
|
+
CUDA_VERIFY(cudaEventRecord(event, stream));
|
|
151
|
+
events.push_back(event);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// For all the streams that are waiting, issue a wait
|
|
155
|
+
for (auto& stream : listWaiting) {
|
|
156
|
+
for (auto& event : events) {
|
|
157
|
+
CUDA_VERIFY(cudaStreamWaitEvent(stream, event, 0));
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
for (auto& event : events) {
|
|
162
|
+
CUDA_VERIFY(cudaEventDestroy(event));
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/// These versions allow usage of initializer_list as arguments, since
|
|
167
|
+
/// otherwise {...} doesn't have a type
|
|
168
|
+
template <typename L1>
|
|
169
|
+
void streamWait(const L1& a,
|
|
170
|
+
const std::initializer_list<cudaStream_t>& b) {
|
|
171
|
+
streamWaitBase(a, b);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
template <typename L2>
|
|
175
|
+
void streamWait(const std::initializer_list<cudaStream_t>& a,
|
|
176
|
+
const L2& b) {
|
|
177
|
+
streamWaitBase(a, b);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
inline void streamWait(const std::initializer_list<cudaStream_t>& a,
|
|
181
|
+
const std::initializer_list<cudaStream_t>& b) {
|
|
182
|
+
streamWaitBase(a, b);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
} } // namespace
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
#include <faiss/gpu/utils/MemorySpace.h>
|
|
10
|
+
#include <faiss/impl/FaissAssert.h>
|
|
11
|
+
#include <cuda_runtime.h>
|
|
12
|
+
|
|
13
|
+
namespace faiss { namespace gpu {
|
|
14
|
+
|
|
15
|
+
/// Allocates CUDA memory for a given memory space
|
|
16
|
+
void allocMemorySpaceV(MemorySpace space, void** p, size_t size) {
|
|
17
|
+
switch (space) {
|
|
18
|
+
case MemorySpace::Device:
|
|
19
|
+
{
|
|
20
|
+
auto err = cudaMalloc(p, size);
|
|
21
|
+
|
|
22
|
+
// Throw if we fail to allocate
|
|
23
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
24
|
+
err == cudaSuccess,
|
|
25
|
+
"failed to cudaMalloc %zu bytes (error %d %s)",
|
|
26
|
+
size, (int) err, cudaGetErrorString(err));
|
|
27
|
+
}
|
|
28
|
+
break;
|
|
29
|
+
case MemorySpace::Unified:
|
|
30
|
+
{
|
|
31
|
+
#ifdef FAISS_UNIFIED_MEM
|
|
32
|
+
auto err = cudaMallocManaged(p, size);
|
|
33
|
+
|
|
34
|
+
// Throw if we fail to allocate
|
|
35
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
36
|
+
err == cudaSuccess,
|
|
37
|
+
"failed to cudaMallocManaged %zu bytes (error %d %s)",
|
|
38
|
+
size, (int) err, cudaGetErrorString(err));
|
|
39
|
+
#else
|
|
40
|
+
FAISS_THROW_MSG("Attempting to allocate via cudaMallocManaged "
|
|
41
|
+
"without CUDA 8+ support");
|
|
42
|
+
#endif
|
|
43
|
+
}
|
|
44
|
+
break;
|
|
45
|
+
case MemorySpace::HostPinned:
|
|
46
|
+
{
|
|
47
|
+
auto err = cudaHostAlloc(p, size, cudaHostAllocDefault);
|
|
48
|
+
|
|
49
|
+
// Throw if we fail to allocate
|
|
50
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
51
|
+
err == cudaSuccess,
|
|
52
|
+
"failed to cudaHostAlloc %zu bytes (error %d %s)",
|
|
53
|
+
size, (int) err, cudaGetErrorString(err));
|
|
54
|
+
}
|
|
55
|
+
break;
|
|
56
|
+
default:
|
|
57
|
+
FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int) space);
|
|
58
|
+
break;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// We'll allow allocation to fail, but free should always succeed and be a
|
|
63
|
+
// fatal error if it doesn't free
|
|
64
|
+
void freeMemorySpace(MemorySpace space, void* p) {
|
|
65
|
+
switch (space) {
|
|
66
|
+
case MemorySpace::Device:
|
|
67
|
+
case MemorySpace::Unified:
|
|
68
|
+
{
|
|
69
|
+
auto err = cudaFree(p);
|
|
70
|
+
FAISS_ASSERT_FMT(err == cudaSuccess,
|
|
71
|
+
"Failed to cudaFree pointer %p (error %d %s)",
|
|
72
|
+
p, (int) err, cudaGetErrorString(err));
|
|
73
|
+
}
|
|
74
|
+
break;
|
|
75
|
+
case MemorySpace::HostPinned:
|
|
76
|
+
{
|
|
77
|
+
auto err = cudaFreeHost(p);
|
|
78
|
+
FAISS_ASSERT_FMT(err == cudaSuccess,
|
|
79
|
+
"Failed to cudaFreeHost pointer %p (error %d %s)",
|
|
80
|
+
p, (int) err, cudaGetErrorString(err));
|
|
81
|
+
}
|
|
82
|
+
break;
|
|
83
|
+
default:
|
|
84
|
+
FAISS_ASSERT_FMT(false, "unknown MemorySpace %d", (int) space);
|
|
85
|
+
break;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
} }
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
#pragma once
|
|
10
|
+
|
|
11
|
+
#include <cuda.h>
|
|
12
|
+
|
|
13
|
+
#if CUDA_VERSION >= 8000
|
|
14
|
+
// Whether or not we enable usage of CUDA Unified Memory
|
|
15
|
+
#define FAISS_UNIFIED_MEM 1
|
|
16
|
+
#endif
|
|
17
|
+
|
|
18
|
+
namespace faiss { namespace gpu {
|
|
19
|
+
|
|
20
|
+
enum MemorySpace {
|
|
21
|
+
/// Managed using cudaMalloc/cudaFree
|
|
22
|
+
Device = 1,
|
|
23
|
+
/// Managed using cudaMallocManaged/cudaFree
|
|
24
|
+
Unified = 2,
|
|
25
|
+
/// Managed using cudaHostAlloc/cudaFreeHost
|
|
26
|
+
HostPinned = 3,
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
/// All memory allocations and de-allocations come through these functions
|
|
30
|
+
|
|
31
|
+
/// Allocates CUDA memory for a given memory space (void pointer)
|
|
32
|
+
/// Throws a FaissException if we are unable to allocate the memory
|
|
33
|
+
void allocMemorySpaceV(MemorySpace space, void** p, size_t size);
|
|
34
|
+
|
|
35
|
+
template <typename T>
|
|
36
|
+
inline void allocMemorySpace(MemorySpace space, T** p, size_t size) {
|
|
37
|
+
allocMemorySpaceV(space, (void**)(void*) p, size);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/// Frees CUDA memory for a given memory space
|
|
41
|
+
/// Asserts if we are unable to free the region
|
|
42
|
+
void freeMemorySpace(MemorySpace space, void* p);
|
|
43
|
+
|
|
44
|
+
} }
|