faiss 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/ext/faiss/extconf.rb +6 -3
- data/ext/faiss/numo.hpp +4 -4
- data/ext/faiss/utils.cpp +1 -1
- data/ext/faiss/utils.h +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +365 -194
- data/vendor/faiss/faiss/Clustering.h +102 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
- data/vendor/faiss/faiss/Index2Layer.h +22 -36
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
- data/vendor/faiss/faiss/IndexFlat.h +42 -59
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
- data/vendor/faiss/faiss/IndexIVF.h +169 -118
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
- data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
- data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
- data/vendor/faiss/faiss/IndexLSH.h +20 -38
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
- data/vendor/faiss/faiss/IndexPQ.h +64 -82
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
- data/vendor/faiss/faiss/IndexRefine.h +32 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
- data/vendor/faiss/faiss/VectorTransform.h +64 -89
- data/vendor/faiss/faiss/clone_index.cpp +78 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
- data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
- data/vendor/faiss/faiss/impl/io.cpp +76 -95
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +60 -29
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +619 -397
- data/vendor/faiss/faiss/index_factory.h +8 -6
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +305 -312
- data/vendor/faiss/faiss/utils/distances.h +170 -122
- data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +54 -49
- metadata +29 -4
|
@@ -5,124 +5,122 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
#pragma once
|
|
10
9
|
|
|
11
|
-
#include <faiss/gpu/GpuIndexIVF.h>
|
|
12
10
|
#include <faiss/IndexScalarQuantizer.h>
|
|
11
|
+
#include <faiss/gpu/GpuIndexIVF.h>
|
|
13
12
|
#include <memory>
|
|
14
13
|
|
|
15
|
-
namespace faiss {
|
|
14
|
+
namespace faiss {
|
|
15
|
+
namespace gpu {
|
|
16
16
|
|
|
17
17
|
class IVFFlat;
|
|
18
18
|
class GpuIndexFlat;
|
|
19
19
|
|
|
20
20
|
struct GpuIndexIVFScalarQuantizerConfig : public GpuIndexIVFConfig {
|
|
21
|
-
|
|
22
|
-
: interleavedLayout(true) {
|
|
23
|
-
}
|
|
21
|
+
inline GpuIndexIVFScalarQuantizerConfig() : interleavedLayout(true) {}
|
|
24
22
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
23
|
+
/// Use the alternative memory layout for the IVF lists
|
|
24
|
+
/// (currently the default)
|
|
25
|
+
bool interleavedLayout;
|
|
28
26
|
};
|
|
29
27
|
|
|
30
28
|
/// Wrapper around the GPU implementation that looks like
|
|
31
29
|
/// faiss::IndexIVFScalarQuantizer
|
|
32
30
|
class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
std::unique_ptr<IVFFlat> index_;
|
|
31
|
+
public:
|
|
32
|
+
/// Construct from a pre-existing faiss::IndexIVFScalarQuantizer instance,
|
|
33
|
+
/// copying data over to the given GPU, if the input index is trained.
|
|
34
|
+
GpuIndexIVFScalarQuantizer(
|
|
35
|
+
GpuResourcesProvider* provider,
|
|
36
|
+
const faiss::IndexIVFScalarQuantizer* index,
|
|
37
|
+
GpuIndexIVFScalarQuantizerConfig config =
|
|
38
|
+
GpuIndexIVFScalarQuantizerConfig());
|
|
39
|
+
|
|
40
|
+
/// Constructs a new instance with an empty flat quantizer; the user
|
|
41
|
+
/// provides the number of lists desired.
|
|
42
|
+
GpuIndexIVFScalarQuantizer(
|
|
43
|
+
GpuResourcesProvider* provider,
|
|
44
|
+
int dims,
|
|
45
|
+
int nlist,
|
|
46
|
+
faiss::ScalarQuantizer::QuantizerType qtype,
|
|
47
|
+
faiss::MetricType metric = MetricType::METRIC_L2,
|
|
48
|
+
bool encodeResidual = true,
|
|
49
|
+
GpuIndexIVFScalarQuantizerConfig config =
|
|
50
|
+
GpuIndexIVFScalarQuantizerConfig());
|
|
51
|
+
|
|
52
|
+
~GpuIndexIVFScalarQuantizer() override;
|
|
53
|
+
|
|
54
|
+
/// Reserve GPU memory in our inverted lists for this number of vectors
|
|
55
|
+
void reserveMemory(size_t numVecs);
|
|
56
|
+
|
|
57
|
+
/// Initialize ourselves from the given CPU index; will overwrite
|
|
58
|
+
/// all data in ourselves
|
|
59
|
+
void copyFrom(const faiss::IndexIVFScalarQuantizer* index);
|
|
60
|
+
|
|
61
|
+
/// Copy ourselves to the given CPU index; will overwrite all data
|
|
62
|
+
/// in the index instance
|
|
63
|
+
void copyTo(faiss::IndexIVFScalarQuantizer* index) const;
|
|
64
|
+
|
|
65
|
+
/// After adding vectors, one can call this to reclaim device memory
|
|
66
|
+
/// to exactly the amount needed. Returns space reclaimed in bytes
|
|
67
|
+
size_t reclaimMemory();
|
|
68
|
+
|
|
69
|
+
/// Clears out all inverted lists, but retains the coarse and scalar
|
|
70
|
+
/// quantizer information
|
|
71
|
+
void reset() override;
|
|
72
|
+
|
|
73
|
+
/// Trains the coarse and scalar quantizer based on the given vector data
|
|
74
|
+
void train(Index::idx_t n, const float* x) override;
|
|
75
|
+
|
|
76
|
+
/// Returns the number of vectors present in a particular inverted list
|
|
77
|
+
int getListLength(int listId) const override;
|
|
78
|
+
|
|
79
|
+
/// Return the encoded vector data contained in a particular inverted list,
|
|
80
|
+
/// for debugging purposes.
|
|
81
|
+
/// If gpuFormat is true, the data is returned as it is encoded in the
|
|
82
|
+
/// GPU-side representation.
|
|
83
|
+
/// Otherwise, it is converted to the CPU format.
|
|
84
|
+
/// compliant format, while the native GPU format may differ.
|
|
85
|
+
std::vector<uint8_t> getListVectorData(int listId, bool gpuFormat = false)
|
|
86
|
+
const override;
|
|
87
|
+
|
|
88
|
+
/// Return the vector indices contained in a particular inverted list, for
|
|
89
|
+
/// debugging purposes.
|
|
90
|
+
std::vector<Index::idx_t> getListIndices(int listId) const override;
|
|
91
|
+
|
|
92
|
+
protected:
|
|
93
|
+
/// Called from GpuIndex for add/add_with_ids
|
|
94
|
+
void addImpl_(int n, const float* x, const Index::idx_t* ids) override;
|
|
95
|
+
|
|
96
|
+
/// Called from GpuIndex for search
|
|
97
|
+
void searchImpl_(
|
|
98
|
+
int n,
|
|
99
|
+
const float* x,
|
|
100
|
+
int k,
|
|
101
|
+
float* distances,
|
|
102
|
+
Index::idx_t* labels) const override;
|
|
103
|
+
|
|
104
|
+
/// Called from train to handle SQ residual training
|
|
105
|
+
void trainResiduals_(Index::idx_t n, const float* x);
|
|
106
|
+
|
|
107
|
+
public:
|
|
108
|
+
/// Exposed like the CPU version
|
|
109
|
+
faiss::ScalarQuantizer sq;
|
|
110
|
+
|
|
111
|
+
/// Exposed like the CPU version
|
|
112
|
+
bool by_residual;
|
|
113
|
+
|
|
114
|
+
protected:
|
|
115
|
+
/// Our configuration options
|
|
116
|
+
const GpuIndexIVFScalarQuantizerConfig ivfSQConfig_;
|
|
117
|
+
|
|
118
|
+
/// Desired inverted list memory reservation
|
|
119
|
+
size_t reserveMemoryVecs_;
|
|
120
|
+
|
|
121
|
+
/// Instance that we own; contains the inverted list
|
|
122
|
+
std::unique_ptr<IVFFlat> index_;
|
|
126
123
|
};
|
|
127
124
|
|
|
128
|
-
}
|
|
125
|
+
} // namespace gpu
|
|
126
|
+
} // namespace faiss
|
|
@@ -5,26 +5,27 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
#pragma once
|
|
10
9
|
|
|
11
|
-
namespace faiss {
|
|
10
|
+
namespace faiss {
|
|
11
|
+
namespace gpu {
|
|
12
12
|
|
|
13
13
|
/// How user vector index data is stored on the GPU
|
|
14
14
|
enum IndicesOptions {
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
15
|
+
/// The user indices are only stored on the CPU; the GPU returns
|
|
16
|
+
/// (inverted list, offset) to the CPU which is then translated to
|
|
17
|
+
/// the real user index.
|
|
18
|
+
INDICES_CPU = 0,
|
|
19
|
+
/// The indices are not stored at all, on either the CPU or
|
|
20
|
+
/// GPU. Only (inverted list, offset) is returned to the user as the
|
|
21
|
+
/// index.
|
|
22
|
+
INDICES_IVF = 1,
|
|
23
|
+
/// Indices are stored as 32 bit integers on the GPU, but returned
|
|
24
|
+
/// as 64 bit integers
|
|
25
|
+
INDICES_32_BIT = 2,
|
|
26
|
+
/// Indices are stored as 64 bit integers on the GPU
|
|
27
|
+
INDICES_64_BIT = 3,
|
|
28
28
|
};
|
|
29
29
|
|
|
30
|
-
}
|
|
30
|
+
} // namespace gpu
|
|
31
|
+
} // namespace faiss
|
|
@@ -5,76 +5,72 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
#include <faiss/gpu/GpuResources.h>
|
|
10
9
|
#include <faiss/gpu/utils/DeviceUtils.h>
|
|
11
10
|
#include <sstream>
|
|
12
11
|
|
|
13
|
-
namespace faiss {
|
|
12
|
+
namespace faiss {
|
|
13
|
+
namespace gpu {
|
|
14
14
|
|
|
15
15
|
std::string allocTypeToString(AllocType t) {
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
16
|
+
switch (t) {
|
|
17
|
+
case AllocType::Other:
|
|
18
|
+
return "Other";
|
|
19
|
+
case AllocType::FlatData:
|
|
20
|
+
return "FlatData";
|
|
21
|
+
case AllocType::IVFLists:
|
|
22
|
+
return "IVFLists";
|
|
23
|
+
case AllocType::Quantizer:
|
|
24
|
+
return "Quantizer";
|
|
25
|
+
case AllocType::QuantizerPrecomputedCodes:
|
|
26
|
+
return "QuantizerPrecomputedCodes";
|
|
27
|
+
case AllocType::TemporaryMemoryBuffer:
|
|
28
|
+
return "TemporaryMemoryBuffer";
|
|
29
|
+
case AllocType::TemporaryMemoryOverflow:
|
|
30
|
+
return "TemporaryMemoryOverflow";
|
|
31
|
+
default:
|
|
32
|
+
return "Unknown";
|
|
33
|
+
}
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
std::string memorySpaceToString(MemorySpace s) {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
37
|
+
switch (s) {
|
|
38
|
+
case MemorySpace::Temporary:
|
|
39
|
+
return "Temporary";
|
|
40
|
+
case MemorySpace::Device:
|
|
41
|
+
return "Device";
|
|
42
|
+
case MemorySpace::Unified:
|
|
43
|
+
return "Unified";
|
|
44
|
+
default:
|
|
45
|
+
return "Unknown";
|
|
46
|
+
}
|
|
47
47
|
}
|
|
48
48
|
|
|
49
|
-
std::string
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
<< " dev " << device
|
|
54
|
-
<< " space " << memorySpaceToString(space)
|
|
55
|
-
<< " stream " << (void*) stream;
|
|
49
|
+
std::string AllocInfo::toString() const {
|
|
50
|
+
std::stringstream ss;
|
|
51
|
+
ss << "type " << allocTypeToString(type) << " dev " << device << " space "
|
|
52
|
+
<< memorySpaceToString(space) << " stream " << (void*)stream;
|
|
56
53
|
|
|
57
|
-
|
|
54
|
+
return ss.str();
|
|
58
55
|
}
|
|
59
56
|
|
|
60
|
-
std::string
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
ss << AllocInfo::toString() << " size " << size << " bytes";
|
|
57
|
+
std::string AllocRequest::toString() const {
|
|
58
|
+
std::stringstream ss;
|
|
59
|
+
ss << AllocInfo::toString() << " size " << size << " bytes";
|
|
64
60
|
|
|
65
|
-
|
|
61
|
+
return ss.str();
|
|
66
62
|
}
|
|
67
63
|
|
|
68
64
|
AllocInfo makeDevAlloc(AllocType at, cudaStream_t st) {
|
|
69
|
-
|
|
65
|
+
return AllocInfo(at, getCurrentDevice(), MemorySpace::Device, st);
|
|
70
66
|
}
|
|
71
67
|
|
|
72
68
|
AllocInfo makeTempAlloc(AllocType at, cudaStream_t st) {
|
|
73
|
-
|
|
69
|
+
return AllocInfo(at, getCurrentDevice(), MemorySpace::Temporary, st);
|
|
74
70
|
}
|
|
75
71
|
|
|
76
72
|
AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st) {
|
|
77
|
-
|
|
73
|
+
return AllocInfo(at, getCurrentDevice(), sp, st);
|
|
78
74
|
}
|
|
79
75
|
|
|
80
76
|
//
|
|
@@ -82,119 +78,111 @@ AllocInfo makeSpaceAlloc(AllocType at, MemorySpace sp, cudaStream_t st) {
|
|
|
82
78
|
//
|
|
83
79
|
|
|
84
80
|
GpuMemoryReservation::GpuMemoryReservation()
|
|
85
|
-
|
|
86
|
-
device(0),
|
|
87
|
-
stream(nullptr),
|
|
88
|
-
data(nullptr),
|
|
89
|
-
size(0) {
|
|
90
|
-
}
|
|
81
|
+
: res(nullptr), device(0), stream(nullptr), data(nullptr), size(0) {}
|
|
91
82
|
|
|
92
|
-
GpuMemoryReservation::GpuMemoryReservation(
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
stream(str),
|
|
100
|
-
data(p),
|
|
101
|
-
size(sz) {
|
|
102
|
-
}
|
|
83
|
+
GpuMemoryReservation::GpuMemoryReservation(
|
|
84
|
+
GpuResources* r,
|
|
85
|
+
int dev,
|
|
86
|
+
cudaStream_t str,
|
|
87
|
+
void* p,
|
|
88
|
+
size_t sz)
|
|
89
|
+
: res(r), device(dev), stream(str), data(p), size(sz) {}
|
|
103
90
|
|
|
104
91
|
GpuMemoryReservation::GpuMemoryReservation(GpuMemoryReservation&& m) noexcept {
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
92
|
+
res = m.res;
|
|
93
|
+
m.res = nullptr;
|
|
94
|
+
device = m.device;
|
|
95
|
+
m.device = 0;
|
|
96
|
+
stream = m.stream;
|
|
97
|
+
m.stream = nullptr;
|
|
98
|
+
data = m.data;
|
|
99
|
+
m.data = nullptr;
|
|
100
|
+
size = m.size;
|
|
101
|
+
m.size = 0;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
GpuMemoryReservation& GpuMemoryReservation::operator=(
|
|
105
|
+
GpuMemoryReservation&& m) {
|
|
106
|
+
// Can't be both a valid allocation and the same allocation
|
|
107
|
+
FAISS_ASSERT(
|
|
108
|
+
!(res && res == m.res && device == m.device && data == m.data));
|
|
109
|
+
|
|
110
|
+
release();
|
|
111
|
+
res = m.res;
|
|
112
|
+
m.res = nullptr;
|
|
113
|
+
device = m.device;
|
|
114
|
+
m.device = 0;
|
|
115
|
+
stream = m.stream;
|
|
116
|
+
m.stream = nullptr;
|
|
117
|
+
data = m.data;
|
|
118
|
+
m.data = nullptr;
|
|
119
|
+
size = m.size;
|
|
120
|
+
m.size = 0;
|
|
121
|
+
|
|
122
|
+
return *this;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
void GpuMemoryReservation::release() {
|
|
126
|
+
if (res) {
|
|
127
|
+
res->deallocMemory(device, data);
|
|
128
|
+
res = nullptr;
|
|
129
|
+
device = 0;
|
|
130
|
+
stream = nullptr;
|
|
131
|
+
data = nullptr;
|
|
132
|
+
size = 0;
|
|
133
|
+
}
|
|
137
134
|
}
|
|
138
135
|
|
|
139
136
|
GpuMemoryReservation::~GpuMemoryReservation() {
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
137
|
+
if (res) {
|
|
138
|
+
res->deallocMemory(device, data);
|
|
139
|
+
}
|
|
143
140
|
}
|
|
144
141
|
|
|
145
142
|
//
|
|
146
143
|
// GpuResources
|
|
147
144
|
//
|
|
148
145
|
|
|
149
|
-
GpuResources::~GpuResources() {
|
|
150
|
-
}
|
|
146
|
+
GpuResources::~GpuResources() {}
|
|
151
147
|
|
|
152
|
-
cublasHandle_t
|
|
153
|
-
|
|
154
|
-
return getBlasHandle(getCurrentDevice());
|
|
148
|
+
cublasHandle_t GpuResources::getBlasHandleCurrentDevice() {
|
|
149
|
+
return getBlasHandle(getCurrentDevice());
|
|
155
150
|
}
|
|
156
151
|
|
|
157
|
-
cudaStream_t
|
|
158
|
-
|
|
159
|
-
return getDefaultStream(getCurrentDevice());
|
|
152
|
+
cudaStream_t GpuResources::getDefaultStreamCurrentDevice() {
|
|
153
|
+
return getDefaultStream(getCurrentDevice());
|
|
160
154
|
}
|
|
161
155
|
|
|
162
|
-
std::vector<cudaStream_t>
|
|
163
|
-
|
|
164
|
-
return getAlternateStreams(getCurrentDevice());
|
|
156
|
+
std::vector<cudaStream_t> GpuResources::getAlternateStreamsCurrentDevice() {
|
|
157
|
+
return getAlternateStreams(getCurrentDevice());
|
|
165
158
|
}
|
|
166
159
|
|
|
167
|
-
cudaStream_t
|
|
168
|
-
|
|
169
|
-
return getAsyncCopyStream(getCurrentDevice());
|
|
160
|
+
cudaStream_t GpuResources::getAsyncCopyStreamCurrentDevice() {
|
|
161
|
+
return getAsyncCopyStream(getCurrentDevice());
|
|
170
162
|
}
|
|
171
163
|
|
|
172
|
-
void
|
|
173
|
-
|
|
174
|
-
CUDA_VERIFY(cudaStreamSynchronize(getDefaultStream(device)));
|
|
164
|
+
void GpuResources::syncDefaultStream(int device) {
|
|
165
|
+
CUDA_VERIFY(cudaStreamSynchronize(getDefaultStream(device)));
|
|
175
166
|
}
|
|
176
167
|
|
|
177
|
-
void
|
|
178
|
-
|
|
179
|
-
syncDefaultStream(getCurrentDevice());
|
|
168
|
+
void GpuResources::syncDefaultStreamCurrentDevice() {
|
|
169
|
+
syncDefaultStream(getCurrentDevice());
|
|
180
170
|
}
|
|
181
171
|
|
|
182
|
-
GpuMemoryReservation
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
this, req.device, req.stream, allocMemory(req), req.size);
|
|
172
|
+
GpuMemoryReservation GpuResources::allocMemoryHandle(const AllocRequest& req) {
|
|
173
|
+
return GpuMemoryReservation(
|
|
174
|
+
this, req.device, req.stream, allocMemory(req), req.size);
|
|
186
175
|
}
|
|
187
176
|
|
|
188
|
-
size_t
|
|
189
|
-
|
|
190
|
-
return getTempMemoryAvailable(getCurrentDevice());
|
|
177
|
+
size_t GpuResources::getTempMemoryAvailableCurrentDevice() const {
|
|
178
|
+
return getTempMemoryAvailable(getCurrentDevice());
|
|
191
179
|
}
|
|
192
180
|
|
|
193
181
|
//
|
|
194
182
|
// GpuResourcesProvider
|
|
195
183
|
//
|
|
196
184
|
|
|
197
|
-
GpuResourcesProvider::~GpuResourcesProvider() {
|
|
198
|
-
}
|
|
185
|
+
GpuResourcesProvider::~GpuResourcesProvider() {}
|
|
199
186
|
|
|
200
|
-
}
|
|
187
|
+
} // namespace gpu
|
|
188
|
+
} // namespace faiss
|