faiss 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/ext/faiss/extconf.rb +6 -3
- data/ext/faiss/numo.hpp +4 -4
- data/ext/faiss/utils.cpp +1 -1
- data/ext/faiss/utils.h +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +365 -194
- data/vendor/faiss/faiss/Clustering.h +102 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
- data/vendor/faiss/faiss/Index2Layer.h +22 -36
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
- data/vendor/faiss/faiss/IndexFlat.h +42 -59
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
- data/vendor/faiss/faiss/IndexIVF.h +169 -118
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
- data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
- data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
- data/vendor/faiss/faiss/IndexLSH.h +20 -38
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
- data/vendor/faiss/faiss/IndexPQ.h +64 -82
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
- data/vendor/faiss/faiss/IndexRefine.h +32 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
- data/vendor/faiss/faiss/VectorTransform.h +64 -89
- data/vendor/faiss/faiss/clone_index.cpp +78 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
- data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
- data/vendor/faiss/faiss/impl/io.cpp +76 -95
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +60 -29
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +619 -397
- data/vendor/faiss/faiss/index_factory.h +8 -6
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +305 -312
- data/vendor/faiss/faiss/utils/distances.h +170 -122
- data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +54 -49
- metadata +29 -4
|
@@ -5,55 +5,59 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
#pragma once
|
|
10
9
|
|
|
11
|
-
#include <faiss/impl/FaissAssert.h>
|
|
12
|
-
#include <cuda_runtime.h>
|
|
13
10
|
#include <cublas_v2.h>
|
|
11
|
+
#include <cuda_runtime.h>
|
|
12
|
+
#include <faiss/impl/FaissAssert.h>
|
|
14
13
|
#include <memory>
|
|
15
14
|
#include <utility>
|
|
16
15
|
#include <vector>
|
|
17
16
|
|
|
18
|
-
namespace faiss {
|
|
17
|
+
namespace faiss {
|
|
18
|
+
namespace gpu {
|
|
19
19
|
|
|
20
20
|
class GpuResources;
|
|
21
21
|
|
|
22
22
|
enum AllocType {
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
23
|
+
/// Unknown allocation type or miscellaneous (not currently categorized)
|
|
24
|
+
Other = 0,
|
|
25
|
+
|
|
26
|
+
/// Primary data storage for GpuIndexFlat (the raw matrix of vectors and
|
|
27
|
+
/// vector norms if needed)
|
|
28
|
+
FlatData = 1,
|
|
29
|
+
|
|
30
|
+
/// Primary data storage for GpuIndexIVF* (the storage for each individual
|
|
31
|
+
/// IVF
|
|
32
|
+
/// list)
|
|
33
|
+
IVFLists = 2,
|
|
34
|
+
|
|
35
|
+
/// Quantizer (PQ, SQ) dictionary information
|
|
36
|
+
Quantizer = 3,
|
|
37
|
+
|
|
38
|
+
/// For GpuIndexIVFPQ, "precomputed codes" for more efficient PQ lookup
|
|
39
|
+
/// require the use of possibly large tables. These are marked separately
|
|
40
|
+
/// from
|
|
41
|
+
/// Quantizer as these can frequently be 100s - 1000s of MiB in size
|
|
42
|
+
QuantizerPrecomputedCodes = 4,
|
|
43
|
+
|
|
44
|
+
///
|
|
45
|
+
/// StandardGpuResources implementation specific types
|
|
46
|
+
///
|
|
47
|
+
|
|
48
|
+
/// When using StandardGpuResources, temporary memory allocations
|
|
49
|
+
/// (MemorySpace::Temporary) come out of a stack region of memory that is
|
|
50
|
+
/// allocated up front for each gpu (e.g., 1.5 GiB upon initialization).
|
|
51
|
+
/// This
|
|
52
|
+
/// allocation by StandardGpuResources is marked with this AllocType.
|
|
53
|
+
TemporaryMemoryBuffer = 10,
|
|
54
|
+
|
|
55
|
+
/// When using StandardGpuResources, any MemorySpace::Temporary allocations
|
|
56
|
+
/// that cannot be satisfied within the TemporaryMemoryBuffer region fall
|
|
57
|
+
/// back
|
|
58
|
+
/// to calling cudaMalloc which are sized to just the request at hand. These
|
|
59
|
+
/// "overflow" temporary allocations are marked with this AllocType.
|
|
60
|
+
TemporaryMemoryOverflow = 11,
|
|
57
61
|
};
|
|
58
62
|
|
|
59
63
|
/// Convert an AllocType to string
|
|
@@ -61,16 +65,17 @@ std::string allocTypeToString(AllocType t);
|
|
|
61
65
|
|
|
62
66
|
/// Memory regions accessible to the GPU
|
|
63
67
|
enum MemorySpace {
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
+
/// Temporary device memory (guaranteed to no longer be used upon exit of a
|
|
69
|
+
/// top-level index call, and where the streams using it have completed GPU
|
|
70
|
+
/// work). Typically backed by Device memory (cudaMalloc/cudaFree).
|
|
71
|
+
Temporary = 0,
|
|
68
72
|
|
|
69
|
-
|
|
70
|
-
|
|
73
|
+
/// Managed using cudaMalloc/cudaFree (typical GPU device memory)
|
|
74
|
+
Device = 1,
|
|
71
75
|
|
|
72
|
-
|
|
73
|
-
|
|
76
|
+
/// Managed using cudaMallocManaged/cudaFree (typical Unified CPU/GPU
|
|
77
|
+
/// memory)
|
|
78
|
+
Unified = 2,
|
|
74
79
|
};
|
|
75
80
|
|
|
76
81
|
/// Convert a MemorySpace to string
|
|
@@ -78,44 +83,36 @@ std::string memorySpaceToString(MemorySpace s);
|
|
|
78
83
|
|
|
79
84
|
/// Information on what/where an allocation is
|
|
80
85
|
struct AllocInfo {
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
/// piece of memory cached and to be returned for this call was last used on
|
|
112
|
-
/// stream 3 and a new memory request is for stream 4, the memory manager will
|
|
113
|
-
/// synchronize stream 4 to wait for the completion of stream 3 via events or
|
|
114
|
-
/// other stream synchronization.
|
|
115
|
-
///
|
|
116
|
-
/// The memory manager guarantees that the returned memory is free to use
|
|
117
|
-
/// without data races on this stream specified.
|
|
118
|
-
cudaStream_t stream;
|
|
86
|
+
inline AllocInfo()
|
|
87
|
+
: type(AllocType::Other),
|
|
88
|
+
device(0),
|
|
89
|
+
space(MemorySpace::Device),
|
|
90
|
+
stream(nullptr) {}
|
|
91
|
+
|
|
92
|
+
inline AllocInfo(AllocType at, int dev, MemorySpace sp, cudaStream_t st)
|
|
93
|
+
: type(at), device(dev), space(sp), stream(st) {}
|
|
94
|
+
|
|
95
|
+
/// Returns a string representation of this info
|
|
96
|
+
std::string toString() const;
|
|
97
|
+
|
|
98
|
+
/// The internal category of the allocation
|
|
99
|
+
AllocType type;
|
|
100
|
+
|
|
101
|
+
/// The device on which the allocation is happening
|
|
102
|
+
int device;
|
|
103
|
+
|
|
104
|
+
/// The memory space of the allocation
|
|
105
|
+
MemorySpace space;
|
|
106
|
+
|
|
107
|
+
/// The stream on which new work on the memory will be ordered (e.g., if a
|
|
108
|
+
/// piece of memory cached and to be returned for this call was last used on
|
|
109
|
+
/// stream 3 and a new memory request is for stream 4, the memory manager
|
|
110
|
+
/// will synchronize stream 4 to wait for the completion of stream 3 via
|
|
111
|
+
/// events or other stream synchronization.
|
|
112
|
+
///
|
|
113
|
+
/// The memory manager guarantees that the returned memory is free to use
|
|
114
|
+
/// without data races on this stream specified.
|
|
115
|
+
cudaStream_t stream;
|
|
119
116
|
};
|
|
120
117
|
|
|
121
118
|
/// Create an AllocInfo for the current device with MemorySpace::Device
|
|
@@ -129,140 +126,139 @@ AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st);
|
|
|
129
126
|
|
|
130
127
|
/// Information on what/where an allocation is, along with how big it should be
|
|
131
128
|
struct AllocRequest : public AllocInfo {
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
/// Returns a string representation of this request
|
|
153
|
-
std::string toString() const;
|
|
154
|
-
|
|
155
|
-
/// The size in bytes of the allocation
|
|
156
|
-
size_t size;
|
|
129
|
+
inline AllocRequest() : AllocInfo(), size(0) {}
|
|
130
|
+
|
|
131
|
+
inline AllocRequest(const AllocInfo& info, size_t sz)
|
|
132
|
+
: AllocInfo(info), size(sz) {}
|
|
133
|
+
|
|
134
|
+
inline AllocRequest(
|
|
135
|
+
AllocType at,
|
|
136
|
+
int dev,
|
|
137
|
+
MemorySpace sp,
|
|
138
|
+
cudaStream_t st,
|
|
139
|
+
size_t sz)
|
|
140
|
+
: AllocInfo(at, dev, sp, st), size(sz) {}
|
|
141
|
+
|
|
142
|
+
/// Returns a string representation of this request
|
|
143
|
+
std::string toString() const;
|
|
144
|
+
|
|
145
|
+
/// The size in bytes of the allocation
|
|
146
|
+
size_t size;
|
|
157
147
|
};
|
|
158
148
|
|
|
159
149
|
/// A RAII object that manages a temporary memory request
|
|
160
150
|
struct GpuMemoryReservation {
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
151
|
+
GpuMemoryReservation();
|
|
152
|
+
GpuMemoryReservation(
|
|
153
|
+
GpuResources* r,
|
|
154
|
+
int dev,
|
|
155
|
+
cudaStream_t str,
|
|
156
|
+
void* p,
|
|
157
|
+
size_t sz);
|
|
158
|
+
GpuMemoryReservation(GpuMemoryReservation&& m) noexcept;
|
|
159
|
+
~GpuMemoryReservation();
|
|
160
|
+
|
|
161
|
+
GpuMemoryReservation& operator=(GpuMemoryReservation&& m);
|
|
162
|
+
|
|
163
|
+
inline void* get() {
|
|
164
|
+
return data;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
void release();
|
|
168
|
+
|
|
169
|
+
GpuResources* res;
|
|
170
|
+
int device;
|
|
171
|
+
cudaStream_t stream;
|
|
172
|
+
void* data;
|
|
173
|
+
size_t size;
|
|
181
174
|
};
|
|
182
175
|
|
|
183
176
|
/// Base class of GPU-side resource provider; hides provision of
|
|
184
177
|
/// cuBLAS handles, CUDA streams and all device memory allocation performed
|
|
185
178
|
class GpuResources {
|
|
186
|
-
|
|
187
|
-
|
|
179
|
+
public:
|
|
180
|
+
virtual ~GpuResources();
|
|
188
181
|
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
182
|
+
/// Call to pre-allocate resources for a particular device. If this is
|
|
183
|
+
/// not called, then resources will be allocated at the first time
|
|
184
|
+
/// of demand
|
|
185
|
+
virtual void initializeForDevice(int device) = 0;
|
|
193
186
|
|
|
194
|
-
|
|
195
|
-
|
|
187
|
+
/// Returns the cuBLAS handle that we use for the given device
|
|
188
|
+
virtual cublasHandle_t getBlasHandle(int device) = 0;
|
|
196
189
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
190
|
+
/// Returns the stream that we order all computation on for the
|
|
191
|
+
/// given device
|
|
192
|
+
virtual cudaStream_t getDefaultStream(int device) = 0;
|
|
200
193
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
194
|
+
/// Overrides the default stream for a device to the user-supplied stream.
|
|
195
|
+
/// The resources object does not own this stream (i.e., it will not destroy
|
|
196
|
+
/// it).
|
|
197
|
+
virtual void setDefaultStream(int device, cudaStream_t stream) = 0;
|
|
204
198
|
|
|
205
|
-
|
|
206
|
-
|
|
199
|
+
/// Returns the set of alternative streams that we use for the given device
|
|
200
|
+
virtual std::vector<cudaStream_t> getAlternateStreams(int device) = 0;
|
|
207
201
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
202
|
+
/// Memory management
|
|
203
|
+
/// Returns an allocation from the given memory space, ordered with respect
|
|
204
|
+
/// to the given stream (i.e., the first user will be a kernel in this
|
|
205
|
+
/// stream). All allocations are sized internally to be the next highest
|
|
206
|
+
/// multiple of 16 bytes, and all allocations returned are guaranteed to be
|
|
207
|
+
/// 16 byte aligned.
|
|
208
|
+
virtual void* allocMemory(const AllocRequest& req) = 0;
|
|
214
209
|
|
|
215
|
-
|
|
216
|
-
|
|
210
|
+
/// Returns a previous allocation
|
|
211
|
+
virtual void deallocMemory(int device, void* in) = 0;
|
|
217
212
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
213
|
+
/// For MemorySpace::Temporary, how much space is immediately available
|
|
214
|
+
/// without cudaMalloc allocation?
|
|
215
|
+
virtual size_t getTempMemoryAvailable(int device) const = 0;
|
|
221
216
|
|
|
222
|
-
|
|
223
|
-
|
|
217
|
+
/// Returns the available CPU pinned memory buffer
|
|
218
|
+
virtual std::pair<void*, size_t> getPinnedMemory() = 0;
|
|
224
219
|
|
|
225
|
-
|
|
226
|
-
|
|
220
|
+
/// Returns the stream on which we perform async CPU <-> GPU copies
|
|
221
|
+
virtual cudaStream_t getAsyncCopyStream(int device) = 0;
|
|
227
222
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
223
|
+
///
|
|
224
|
+
/// Functions provided by default
|
|
225
|
+
///
|
|
231
226
|
|
|
232
|
-
|
|
233
|
-
|
|
227
|
+
/// Calls getBlasHandle with the current device
|
|
228
|
+
cublasHandle_t getBlasHandleCurrentDevice();
|
|
234
229
|
|
|
235
|
-
|
|
236
|
-
|
|
230
|
+
/// Calls getDefaultStream with the current device
|
|
231
|
+
cudaStream_t getDefaultStreamCurrentDevice();
|
|
237
232
|
|
|
238
|
-
|
|
239
|
-
|
|
233
|
+
/// Calls getTempMemoryAvailable with the current device
|
|
234
|
+
size_t getTempMemoryAvailableCurrentDevice() const;
|
|
240
235
|
|
|
241
|
-
|
|
242
|
-
|
|
236
|
+
/// Returns a temporary memory allocation via a RAII object
|
|
237
|
+
GpuMemoryReservation allocMemoryHandle(const AllocRequest& req);
|
|
243
238
|
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
239
|
+
/// Synchronizes the CPU with respect to the default stream for the
|
|
240
|
+
/// given device
|
|
241
|
+
// equivalent to cudaDeviceSynchronize(getDefaultStream(device))
|
|
242
|
+
void syncDefaultStream(int device);
|
|
248
243
|
|
|
249
|
-
|
|
250
|
-
|
|
244
|
+
/// Calls syncDefaultStream for the current device
|
|
245
|
+
void syncDefaultStreamCurrentDevice();
|
|
251
246
|
|
|
252
|
-
|
|
253
|
-
|
|
247
|
+
/// Calls getAlternateStreams for the current device
|
|
248
|
+
std::vector<cudaStream_t> getAlternateStreamsCurrentDevice();
|
|
254
249
|
|
|
255
|
-
|
|
256
|
-
|
|
250
|
+
/// Calls getAsyncCopyStream for the current device
|
|
251
|
+
cudaStream_t getAsyncCopyStreamCurrentDevice();
|
|
257
252
|
};
|
|
258
253
|
|
|
259
254
|
/// Interface for a provider of a shared resources object
|
|
260
255
|
class GpuResourcesProvider {
|
|
261
|
-
|
|
262
|
-
|
|
256
|
+
public:
|
|
257
|
+
virtual ~GpuResourcesProvider();
|
|
263
258
|
|
|
264
|
-
|
|
265
|
-
|
|
259
|
+
/// Returns the shared resources object
|
|
260
|
+
virtual std::shared_ptr<GpuResources> getResources() = 0;
|
|
266
261
|
};
|
|
267
262
|
|
|
268
|
-
}
|
|
263
|
+
} // namespace gpu
|
|
264
|
+
} // namespace faiss
|