faiss 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/ext/faiss/extconf.rb +6 -3
- data/ext/faiss/numo.hpp +4 -4
- data/ext/faiss/utils.cpp +1 -1
- data/ext/faiss/utils.h +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +365 -194
- data/vendor/faiss/faiss/Clustering.h +102 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
- data/vendor/faiss/faiss/Index2Layer.h +22 -36
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
- data/vendor/faiss/faiss/IndexFlat.h +42 -59
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
- data/vendor/faiss/faiss/IndexIVF.h +169 -118
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
- data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
- data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
- data/vendor/faiss/faiss/IndexLSH.h +20 -38
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
- data/vendor/faiss/faiss/IndexPQ.h +64 -82
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
- data/vendor/faiss/faiss/IndexRefine.h +32 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
- data/vendor/faiss/faiss/VectorTransform.h +64 -89
- data/vendor/faiss/faiss/clone_index.cpp +78 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
- data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
- data/vendor/faiss/faiss/impl/io.cpp +76 -95
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +60 -29
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +619 -397
- data/vendor/faiss/faiss/index_factory.h +8 -6
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +305 -312
- data/vendor/faiss/faiss/utils/distances.h +170 -122
- data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +54 -49
- metadata +29 -4
|
@@ -5,138 +5,138 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
#pragma once
|
|
10
9
|
|
|
11
10
|
#include <faiss/gpu/GpuResources.h>
|
|
12
|
-
#include <faiss/gpu/utils/StackDeviceMemory.h>
|
|
13
11
|
#include <faiss/gpu/utils/DeviceUtils.h>
|
|
12
|
+
#include <faiss/gpu/utils/StackDeviceMemory.h>
|
|
14
13
|
#include <functional>
|
|
15
14
|
#include <map>
|
|
16
15
|
#include <unordered_map>
|
|
17
16
|
#include <vector>
|
|
18
17
|
|
|
19
|
-
namespace faiss {
|
|
18
|
+
namespace faiss {
|
|
19
|
+
namespace gpu {
|
|
20
20
|
|
|
21
21
|
/// Standard implementation of the GpuResources object that provides for a
|
|
22
22
|
/// temporary memory manager
|
|
23
23
|
class StandardGpuResourcesImpl : public GpuResources {
|
|
24
|
-
|
|
25
|
-
|
|
24
|
+
public:
|
|
25
|
+
StandardGpuResourcesImpl();
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
~StandardGpuResourcesImpl() override;
|
|
28
28
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
29
|
+
/// Disable allocation of temporary memory; all temporary memory
|
|
30
|
+
/// requests will call cudaMalloc / cudaFree at the point of use
|
|
31
|
+
void noTempMemory();
|
|
32
32
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
33
|
+
/// Specify that we wish to use a certain fixed size of memory on
|
|
34
|
+
/// all devices as temporary memory. This is the upper bound for the GPU
|
|
35
|
+
/// memory that we will reserve. We will never go above 1.5 GiB on any GPU;
|
|
36
|
+
/// smaller GPUs (with <= 4 GiB or <= 8 GiB) will use less memory than that.
|
|
37
|
+
/// To avoid any temporary memory allocation, pass 0.
|
|
38
|
+
void setTempMemory(size_t size);
|
|
39
39
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
40
|
+
/// Set amount of pinned memory to allocate, for async GPU <-> CPU
|
|
41
|
+
/// transfers
|
|
42
|
+
void setPinnedMemory(size_t size);
|
|
43
43
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
44
|
+
/// Called to change the stream for work ordering. We do not own `stream`;
|
|
45
|
+
/// i.e., it will not be destroyed when the GpuResources object gets cleaned
|
|
46
|
+
/// up.
|
|
47
|
+
/// We are guaranteed that all Faiss GPU work is ordered with respect to
|
|
48
|
+
/// this stream upon exit from an index or other Faiss GPU call.
|
|
49
|
+
void setDefaultStream(int device, cudaStream_t stream) override;
|
|
50
50
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
51
|
+
/// Revert the default stream to the original stream managed by this
|
|
52
|
+
/// resources object, in case someone called `setDefaultStream`.
|
|
53
|
+
void revertDefaultStream(int device);
|
|
54
54
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
55
|
+
/// Returns the stream for the given device on which all Faiss GPU work is
|
|
56
|
+
/// ordered.
|
|
57
|
+
/// We are guaranteed that all Faiss GPU work is ordered with respect to
|
|
58
|
+
/// this stream upon exit from an index or other Faiss GPU call.
|
|
59
|
+
cudaStream_t getDefaultStream(int device) override;
|
|
60
60
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
61
|
+
/// Called to change the work ordering streams to the null stream
|
|
62
|
+
/// for all devices
|
|
63
|
+
void setDefaultNullStreamAllDevices();
|
|
64
64
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
65
|
+
/// If enabled, will print every GPU memory allocation and deallocation to
|
|
66
|
+
/// standard output
|
|
67
|
+
void setLogMemoryAllocations(bool enable);
|
|
68
68
|
|
|
69
|
-
|
|
70
|
-
|
|
69
|
+
public:
|
|
70
|
+
/// Internal system calls
|
|
71
71
|
|
|
72
|
-
|
|
73
|
-
|
|
72
|
+
/// Initialize resources for this device
|
|
73
|
+
void initializeForDevice(int device) override;
|
|
74
74
|
|
|
75
|
-
|
|
75
|
+
cublasHandle_t getBlasHandle(int device) override;
|
|
76
76
|
|
|
77
|
-
|
|
77
|
+
std::vector<cudaStream_t> getAlternateStreams(int device) override;
|
|
78
78
|
|
|
79
|
-
|
|
80
|
-
|
|
79
|
+
/// Allocate non-temporary GPU memory
|
|
80
|
+
void* allocMemory(const AllocRequest& req) override;
|
|
81
81
|
|
|
82
|
-
|
|
83
|
-
|
|
82
|
+
/// Returns a previous allocation
|
|
83
|
+
void deallocMemory(int device, void* in) override;
|
|
84
84
|
|
|
85
|
-
|
|
85
|
+
size_t getTempMemoryAvailable(int device) const override;
|
|
86
86
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
87
|
+
/// Export a description of memory used for Python
|
|
88
|
+
std::map<int, std::map<std::string, std::pair<int, size_t>>> getMemoryInfo()
|
|
89
|
+
const;
|
|
90
90
|
|
|
91
|
-
|
|
91
|
+
std::pair<void*, size_t> getPinnedMemory() override;
|
|
92
92
|
|
|
93
|
-
|
|
93
|
+
cudaStream_t getAsyncCopyStream(int device) override;
|
|
94
94
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
95
|
+
private:
|
|
96
|
+
/// Have GPU resources been initialized for this device yet?
|
|
97
|
+
bool isInitialized(int device) const;
|
|
98
98
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
99
|
+
/// Adjust the default temporary memory allocation based on the total GPU
|
|
100
|
+
/// memory size
|
|
101
|
+
static size_t getDefaultTempMemForGPU(int device, size_t requested);
|
|
102
102
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
103
|
+
private:
|
|
104
|
+
/// Set of currently outstanding memory allocations per device
|
|
105
|
+
/// device -> (alloc request, allocated ptr)
|
|
106
|
+
std::unordered_map<int, std::unordered_map<void*, AllocRequest>> allocs_;
|
|
107
107
|
|
|
108
|
-
|
|
109
|
-
|
|
108
|
+
/// Temporary memory provider, per each device
|
|
109
|
+
std::unordered_map<int, std::unique_ptr<StackDeviceMemory>> tempMemory_;
|
|
110
110
|
|
|
111
|
-
|
|
112
|
-
|
|
111
|
+
/// Our default stream that work is ordered on, one per each device
|
|
112
|
+
std::unordered_map<int, cudaStream_t> defaultStreams_;
|
|
113
113
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
114
|
+
/// This contains particular streams as set by the user for
|
|
115
|
+
/// ordering, if any
|
|
116
|
+
std::unordered_map<int, cudaStream_t> userDefaultStreams_;
|
|
117
117
|
|
|
118
|
-
|
|
119
|
-
|
|
118
|
+
/// Other streams we can use, per each device
|
|
119
|
+
std::unordered_map<int, std::vector<cudaStream_t>> alternateStreams_;
|
|
120
120
|
|
|
121
|
-
|
|
122
|
-
|
|
121
|
+
/// Async copy stream to use for GPU <-> CPU pinned memory copies
|
|
122
|
+
std::unordered_map<int, cudaStream_t> asyncCopyStreams_;
|
|
123
123
|
|
|
124
|
-
|
|
125
|
-
|
|
124
|
+
/// cuBLAS handle for each device
|
|
125
|
+
std::unordered_map<int, cublasHandle_t> blasHandles_;
|
|
126
126
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
127
|
+
/// Pinned memory allocation for use with this GPU
|
|
128
|
+
void* pinnedMemAlloc_;
|
|
129
|
+
size_t pinnedMemAllocSize_;
|
|
130
130
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
131
|
+
/// Another option is to use a specified amount of memory on all
|
|
132
|
+
/// devices
|
|
133
|
+
size_t tempMemSize_;
|
|
134
134
|
|
|
135
|
-
|
|
136
|
-
|
|
135
|
+
/// Amount of pinned memory we should allocate
|
|
136
|
+
size_t pinnedMemSize_;
|
|
137
137
|
|
|
138
|
-
|
|
139
|
-
|
|
138
|
+
/// Whether or not we log every GPU memory allocation and deallocation
|
|
139
|
+
bool allocLogging_;
|
|
140
140
|
};
|
|
141
141
|
|
|
142
142
|
/// Default implementation of GpuResources that allocates a cuBLAS
|
|
@@ -144,61 +144,62 @@ class StandardGpuResourcesImpl : public GpuResources {
|
|
|
144
144
|
/// Internally, the Faiss GPU code uses the instance managed by getResources,
|
|
145
145
|
/// but this is the user-facing object that is internally reference counted.
|
|
146
146
|
class StandardGpuResources : public GpuResourcesProvider {
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
147
|
+
public:
|
|
148
|
+
StandardGpuResources();
|
|
149
|
+
~StandardGpuResources() override;
|
|
150
150
|
|
|
151
|
-
|
|
151
|
+
std::shared_ptr<GpuResources> getResources() override;
|
|
152
152
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
153
|
+
/// Disable allocation of temporary memory; all temporary memory
|
|
154
|
+
/// requests will call cudaMalloc / cudaFree at the point of use
|
|
155
|
+
void noTempMemory();
|
|
156
156
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
157
|
+
/// Specify that we wish to use a certain fixed size of memory on
|
|
158
|
+
/// all devices as temporary memory. This is the upper bound for the GPU
|
|
159
|
+
/// memory that we will reserve. We will never go above 1.5 GiB on any GPU;
|
|
160
|
+
/// smaller GPUs (with <= 4 GiB or <= 8 GiB) will use less memory than that.
|
|
161
|
+
/// To avoid any temporary memory allocation, pass 0.
|
|
162
|
+
void setTempMemory(size_t size);
|
|
163
163
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
164
|
+
/// Set amount of pinned memory to allocate, for async GPU <-> CPU
|
|
165
|
+
/// transfers
|
|
166
|
+
void setPinnedMemory(size_t size);
|
|
167
167
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
168
|
+
/// Called to change the stream for work ordering. We do not own `stream`;
|
|
169
|
+
/// i.e., it will not be destroyed when the GpuResources object gets cleaned
|
|
170
|
+
/// up.
|
|
171
|
+
/// We are guaranteed that all Faiss GPU work is ordered with respect to
|
|
172
|
+
/// this stream upon exit from an index or other Faiss GPU call.
|
|
173
|
+
void setDefaultStream(int device, cudaStream_t stream);
|
|
174
174
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
175
|
+
/// Revert the default stream to the original stream managed by this
|
|
176
|
+
/// resources object, in case someone called `setDefaultStream`.
|
|
177
|
+
void revertDefaultStream(int device);
|
|
178
178
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
179
|
+
/// Called to change the work ordering streams to the null stream
|
|
180
|
+
/// for all devices
|
|
181
|
+
void setDefaultNullStreamAllDevices();
|
|
182
182
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
183
|
+
/// Export a description of memory used for Python
|
|
184
|
+
std::map<int, std::map<std::string, std::pair<int, size_t>>> getMemoryInfo()
|
|
185
|
+
const;
|
|
186
186
|
|
|
187
|
-
|
|
188
|
-
|
|
187
|
+
/// Returns the current default stream
|
|
188
|
+
cudaStream_t getDefaultStream(int device);
|
|
189
189
|
|
|
190
|
-
|
|
191
|
-
|
|
190
|
+
/// Returns the current amount of temp memory available
|
|
191
|
+
size_t getTempMemoryAvailable(int device) const;
|
|
192
192
|
|
|
193
|
-
|
|
194
|
-
|
|
193
|
+
/// Synchronize our default stream with the CPU
|
|
194
|
+
void syncDefaultStreamCurrentDevice();
|
|
195
195
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
196
|
+
/// If enabled, will print every GPU memory allocation and deallocation to
|
|
197
|
+
/// standard output
|
|
198
|
+
void setLogMemoryAllocations(bool enable);
|
|
199
199
|
|
|
200
|
-
|
|
201
|
-
|
|
200
|
+
private:
|
|
201
|
+
std::shared_ptr<StandardGpuResourcesImpl> res_;
|
|
202
202
|
};
|
|
203
203
|
|
|
204
|
-
}
|
|
204
|
+
} // namespace gpu
|
|
205
|
+
} // namespace faiss
|