faiss 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +36 -33
- data/vendor/faiss/faiss/AutoTune.h +6 -3
- data/vendor/faiss/faiss/Clustering.cpp +16 -12
- data/vendor/faiss/faiss/Index.cpp +3 -4
- data/vendor/faiss/faiss/Index.h +3 -3
- data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
- data/vendor/faiss/faiss/IndexBinary.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
- data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
- data/vendor/faiss/faiss/IndexFlat.h +0 -51
- data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
- data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
- data/vendor/faiss/faiss/IndexIVF.h +22 -15
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
- data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
- data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
- data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
- data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
- data/vendor/faiss/faiss/IndexRefine.h +73 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
- data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
- data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
- data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
- data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
- data/vendor/faiss/faiss/impl/io.cpp +33 -2
- data/vendor/faiss/faiss/impl/io.h +7 -2
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
- data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
- data/vendor/faiss/faiss/index_factory.cpp +112 -7
- data/vendor/faiss/faiss/index_io.h +1 -48
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
- data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
- data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
- data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
- data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
- data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
- data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
- data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
- data/vendor/faiss/faiss/utils/Heap.h +61 -50
- data/vendor/faiss/faiss/utils/distances.cpp +164 -319
- data/vendor/faiss/faiss/utils/distances.h +28 -20
- data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
- data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
- data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
- data/vendor/faiss/faiss/utils/hamming.h +2 -7
- data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
- data/vendor/faiss/faiss/utils/partitioning.h +69 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
- data/vendor/faiss/faiss/utils/simdlib.h +31 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
- metadata +43 -141
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
- data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
- data/vendor/faiss/c_api/AutoTune_c.h +0 -66
- data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
- data/vendor/faiss/c_api/Clustering_c.h +0 -123
- data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
- data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
- data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
- data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
- data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
- data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
- data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
- data/vendor/faiss/c_api/IndexShards_c.h +0 -39
- data/vendor/faiss/c_api/Index_c.cpp +0 -105
- data/vendor/faiss/c_api/Index_c.h +0 -183
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
- data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
- data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
- data/vendor/faiss/c_api/clone_index_c.h +0 -32
- data/vendor/faiss/c_api/error_c.h +0 -42
- data/vendor/faiss/c_api/error_impl.cpp +0 -27
- data/vendor/faiss/c_api/error_impl.h +0 -16
- data/vendor/faiss/c_api/faiss_c.h +0 -58
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
- data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
- data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
- data/vendor/faiss/c_api/index_factory_c.h +0 -30
- data/vendor/faiss/c_api/index_io_c.cpp +0 -42
- data/vendor/faiss/c_api/index_io_c.h +0 -50
- data/vendor/faiss/c_api/macros_impl.h +0 -110
- data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
- data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
- data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
- data/vendor/faiss/misc/test_blas.cpp +0 -87
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
- data/vendor/faiss/tests/test_merge.cpp +0 -260
- data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
- data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
- data/vendor/faiss/tests/test_params_override.cpp +0 -236
- data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
- data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
- data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
- data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -192,7 +192,7 @@ void IndexIVFScalarQuantizer::encode_vectors(idx_t n, const float* x,
|
|
192
192
|
size_t coarse_size = include_listnos ? coarse_code_size () : 0;
|
193
193
|
memset(codes, 0, (code_size + coarse_size) * n);
|
194
194
|
|
195
|
-
#pragma omp parallel if(n >
|
195
|
+
#pragma omp parallel if(n > 1000)
|
196
196
|
{
|
197
197
|
std::vector<float> residual (d);
|
198
198
|
|
@@ -222,7 +222,7 @@ void IndexIVFScalarQuantizer::sa_decode (idx_t n, const uint8_t *codes,
|
|
222
222
|
std::unique_ptr<ScalarQuantizer::Quantizer> squant (sq.select_quantizer ());
|
223
223
|
size_t coarse_size = coarse_code_size ();
|
224
224
|
|
225
|
-
#pragma omp parallel if(n >
|
225
|
+
#pragma omp parallel if(n > 1000)
|
226
226
|
{
|
227
227
|
std::vector<float> residual (d);
|
228
228
|
|
@@ -82,7 +82,7 @@ struct IndexScalarQuantizer: Index {
|
|
82
82
|
|
83
83
|
|
84
84
|
/** An IVF implementation where the components of the residuals are
|
85
|
-
* encoded with a scalar
|
85
|
+
* encoded with a scalar quantizer. All distance computations
|
86
86
|
* are asymmetric, so the encoded vectors are decoded and approximate
|
87
87
|
* distances are computed.
|
88
88
|
*/
|
@@ -36,9 +36,12 @@ class GpuIndex : public faiss::Index {
|
|
36
36
|
float metricArg,
|
37
37
|
GpuIndexConfig config);
|
38
38
|
|
39
|
-
|
40
|
-
|
41
|
-
|
39
|
+
/// Returns the device that this index is resident on
|
40
|
+
int getDevice() const;
|
41
|
+
|
42
|
+
/// Returns a reference to our GpuResources object that manages memory, stream
|
43
|
+
/// and handle resources on the GPU
|
44
|
+
std::shared_ptr<GpuResources> getResources();
|
42
45
|
|
43
46
|
/// Set the minimum data size for searches (in MiB) for which we use
|
44
47
|
/// CPU -> GPU paging
|
@@ -50,7 +53,7 @@ class GpuIndex : public faiss::Index {
|
|
50
53
|
/// `x` can be resident on the CPU or any GPU; copies are performed
|
51
54
|
/// as needed
|
52
55
|
/// Handles paged adds if the add set is too large; calls addInternal_
|
53
|
-
void add(
|
56
|
+
void add(Index::idx_t, const float* x) override;
|
54
57
|
|
55
58
|
/// `x` and `ids` can be resident on the CPU or any GPU; copies are
|
56
59
|
/// performed as needed
|
@@ -59,6 +62,13 @@ class GpuIndex : public faiss::Index {
|
|
59
62
|
const float* x,
|
60
63
|
const Index::idx_t* ids) override;
|
61
64
|
|
65
|
+
/// `x` and `labels` can be resident on the CPU or any GPU; copies are
|
66
|
+
/// performed as needed
|
67
|
+
void assign(Index::idx_t n,
|
68
|
+
const float* x,
|
69
|
+
Index::idx_t* labels,
|
70
|
+
Index::idx_t k = 1) const override;
|
71
|
+
|
62
72
|
/// `x`, `distances` and `labels` can be resident on the CPU or any
|
63
73
|
/// GPU; copies are performed as needed
|
64
74
|
void search(Index::idx_t n,
|
@@ -136,11 +146,8 @@ private:
|
|
136
146
|
/// Manages streams, cuBLAS handles and scratch memory for devices
|
137
147
|
std::shared_ptr<GpuResources> resources_;
|
138
148
|
|
139
|
-
///
|
140
|
-
const
|
141
|
-
|
142
|
-
/// The memory space of our primary storage on the GPU
|
143
|
-
const MemorySpace memorySpace_;
|
149
|
+
/// Our configuration options
|
150
|
+
const GpuIndexConfig config_;
|
144
151
|
|
145
152
|
/// Size above which we page copies from the CPU to GPU
|
146
153
|
size_t minPagedSize_;
|
@@ -38,6 +38,13 @@ class GpuIndexBinaryFlat : public IndexBinary {
|
|
38
38
|
|
39
39
|
~GpuIndexBinaryFlat() override;
|
40
40
|
|
41
|
+
/// Returns the device that this index is resident on
|
42
|
+
int getDevice() const;
|
43
|
+
|
44
|
+
/// Returns a reference to our GpuResources object that manages memory, stream
|
45
|
+
/// and handle resources on the GPU
|
46
|
+
std::shared_ptr<GpuResources> getResources();
|
47
|
+
|
41
48
|
/// Initialize ourselves from the given CPU index; will overwrite
|
42
49
|
/// all data in ourselves
|
43
50
|
void copyFrom(const faiss::IndexBinaryFlat* index);
|
@@ -80,7 +87,7 @@ class GpuIndexBinaryFlat : public IndexBinary {
|
|
80
87
|
std::shared_ptr<GpuResources> resources_;
|
81
88
|
|
82
89
|
/// Configuration options
|
83
|
-
GpuIndexBinaryFlatConfig
|
90
|
+
const GpuIndexBinaryFlatConfig binaryFlatConfig_;
|
84
91
|
|
85
92
|
/// Holds our GPU data containing the list of vectors
|
86
93
|
std::unique_ptr<BinaryFlatIndex> data_;
|
@@ -21,7 +21,7 @@ struct IndexFlatIP;
|
|
21
21
|
|
22
22
|
namespace faiss { namespace gpu {
|
23
23
|
|
24
|
-
|
24
|
+
class FlatIndex;
|
25
25
|
|
26
26
|
struct GpuIndexFlatConfig : public GpuIndexConfig {
|
27
27
|
inline GpuIndexFlatConfig()
|
@@ -87,27 +87,27 @@ class GpuIndexFlat : public GpuIndex {
|
|
87
87
|
void train(Index::idx_t n, const float* x) override;
|
88
88
|
|
89
89
|
/// Overrides to avoid excessive copies
|
90
|
-
void add(
|
90
|
+
void add(Index::idx_t, const float* x) override;
|
91
91
|
|
92
92
|
/// Reconstruction methods; prefer the batch reconstruct as it will
|
93
93
|
/// be more efficient
|
94
|
-
void reconstruct(
|
94
|
+
void reconstruct(Index::idx_t key, float* out) const override;
|
95
95
|
|
96
96
|
/// Batch reconstruction method
|
97
|
-
void reconstruct_n(
|
98
|
-
|
97
|
+
void reconstruct_n(Index::idx_t i0,
|
98
|
+
Index::idx_t num,
|
99
99
|
float* out) const override;
|
100
100
|
|
101
101
|
/// Compute residual
|
102
102
|
void compute_residual(const float* x,
|
103
103
|
float* residual,
|
104
|
-
|
104
|
+
Index::idx_t key) const override;
|
105
105
|
|
106
106
|
/// Compute residual (batch mode)
|
107
|
-
void compute_residual_n(
|
107
|
+
void compute_residual_n(Index::idx_t n,
|
108
108
|
const float* xs,
|
109
109
|
float* residuals,
|
110
|
-
const
|
110
|
+
const Index::idx_t* keys) const override;
|
111
111
|
|
112
112
|
/// For internal access
|
113
113
|
inline FlatIndex* getGpuData() { return data_.get(); }
|
@@ -126,11 +126,11 @@ class GpuIndexFlat : public GpuIndex {
|
|
126
126
|
const float* x,
|
127
127
|
int k,
|
128
128
|
float* distances,
|
129
|
-
|
129
|
+
Index::idx_t* labels) const override;
|
130
130
|
|
131
131
|
protected:
|
132
|
-
/// Our
|
133
|
-
const GpuIndexFlatConfig
|
132
|
+
/// Our configuration options
|
133
|
+
const GpuIndexFlatConfig flatConfig_;
|
134
134
|
|
135
135
|
/// Holds our GPU data containing the list of vectors
|
136
136
|
std::unique_ptr<FlatIndex> data_;
|
@@ -56,6 +56,22 @@ class GpuIndexIVF : public GpuIndex {
|
|
56
56
|
/// Returns the number of inverted lists we're managing
|
57
57
|
int getNumLists() const;
|
58
58
|
|
59
|
+
/// Returns the number of vectors present in a particular inverted list
|
60
|
+
virtual int getListLength(int listId) const = 0;
|
61
|
+
|
62
|
+
/// Return the encoded vector data contained in a particular inverted list,
|
63
|
+
/// for debugging purposes.
|
64
|
+
/// If gpuFormat is true, the data is returned as it is encoded in the
|
65
|
+
/// GPU-side representation.
|
66
|
+
/// Otherwise, it is converted to the CPU format.
|
67
|
+
/// compliant format, while the native GPU format may differ.
|
68
|
+
virtual std::vector<uint8_t>
|
69
|
+
getListVectorData(int listId, bool gpuFormat = false) const = 0;
|
70
|
+
|
71
|
+
/// Return the vector indices contained in a particular inverted list, for
|
72
|
+
/// debugging purposes.
|
73
|
+
virtual std::vector<Index::idx_t> getListIndices(int listId) const = 0;
|
74
|
+
|
59
75
|
/// Return the quantizer we're using
|
60
76
|
GpuIndexFlat* getQuantizer();
|
61
77
|
|
@@ -67,7 +83,7 @@ class GpuIndexIVF : public GpuIndex {
|
|
67
83
|
|
68
84
|
protected:
|
69
85
|
bool addImplRequiresIDs_() const override;
|
70
|
-
void trainQuantizer_(
|
86
|
+
void trainQuantizer_(Index::idx_t n, const float* x);
|
71
87
|
|
72
88
|
public:
|
73
89
|
/// Exposing this like the CPU version for manipulation
|
@@ -83,7 +99,8 @@ class GpuIndexIVF : public GpuIndex {
|
|
83
99
|
GpuIndexFlat* quantizer;
|
84
100
|
|
85
101
|
protected:
|
86
|
-
|
102
|
+
/// Our configuration options
|
103
|
+
const GpuIndexIVFConfig ivfConfig_;
|
87
104
|
};
|
88
105
|
|
89
106
|
} } // namespace
|
@@ -19,6 +19,13 @@ class IVFFlat;
|
|
19
19
|
class GpuIndexFlat;
|
20
20
|
|
21
21
|
struct GpuIndexIVFFlatConfig : public GpuIndexIVFConfig {
|
22
|
+
inline GpuIndexIVFFlatConfig()
|
23
|
+
: interleavedLayout(true) {
|
24
|
+
}
|
25
|
+
|
26
|
+
/// Use the alternative memory layout for the IVF lists
|
27
|
+
/// (currently the default)
|
28
|
+
bool interleavedLayout;
|
22
29
|
};
|
23
30
|
|
24
31
|
/// Wrapper around the GPU implementation that looks like
|
@@ -56,10 +63,28 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
|
|
56
63
|
/// to exactly the amount needed. Returns space reclaimed in bytes
|
57
64
|
size_t reclaimMemory();
|
58
65
|
|
66
|
+
/// Clears out all inverted lists, but retains the coarse centroid information
|
59
67
|
void reset() override;
|
60
68
|
|
69
|
+
/// Trains the coarse quantizer based on the given vector data
|
61
70
|
void train(Index::idx_t n, const float* x) override;
|
62
71
|
|
72
|
+
/// Returns the number of vectors present in a particular inverted list
|
73
|
+
int getListLength(int listId) const override;
|
74
|
+
|
75
|
+
/// Return the encoded vector data contained in a particular inverted list,
|
76
|
+
/// for debugging purposes.
|
77
|
+
/// If gpuFormat is true, the data is returned as it is encoded in the
|
78
|
+
/// GPU-side representation.
|
79
|
+
/// Otherwise, it is converted to the CPU format.
|
80
|
+
/// compliant format, while the native GPU format may differ.
|
81
|
+
std::vector<uint8_t>
|
82
|
+
getListVectorData(int listId, bool gpuFormat = false) const override;
|
83
|
+
|
84
|
+
/// Return the vector indices contained in a particular inverted list, for
|
85
|
+
/// debugging purposes.
|
86
|
+
std::vector<Index::idx_t> getListIndices(int listId) const override;
|
87
|
+
|
63
88
|
protected:
|
64
89
|
/// Called from GpuIndex for add/add_with_ids
|
65
90
|
void addImpl_(int n,
|
@@ -73,8 +98,9 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
|
|
73
98
|
float* distances,
|
74
99
|
Index::idx_t* labels) const override;
|
75
100
|
|
76
|
-
|
77
|
-
|
101
|
+
protected:
|
102
|
+
/// Our configuration options
|
103
|
+
const GpuIndexIVFFlatConfig ivfFlatConfig_;
|
78
104
|
|
79
105
|
/// Desired inverted list memory reservation
|
80
106
|
size_t reserveMemoryVecs_;
|
@@ -23,7 +23,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
|
|
23
23
|
inline GpuIndexIVFPQConfig()
|
24
24
|
: useFloat16LookupTables(false),
|
25
25
|
usePrecomputedTables(false),
|
26
|
-
|
26
|
+
interleavedLayout(false),
|
27
27
|
useMMCodeDistance(false) {
|
28
28
|
}
|
29
29
|
|
@@ -38,7 +38,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
|
|
38
38
|
|
39
39
|
/// Use the alternative memory layout for the IVF lists
|
40
40
|
/// WARNING: this is a feature under development, do not use!
|
41
|
-
bool
|
41
|
+
bool interleavedLayout;
|
42
42
|
|
43
43
|
/// Use GEMM-backed computation of PQ code distances for the no precomputed
|
44
44
|
/// table version of IVFPQ.
|
@@ -108,19 +108,24 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
|
|
108
108
|
/// product centroid information
|
109
109
|
void reset() override;
|
110
110
|
|
111
|
+
/// Trains the coarse and product quantizer based on the given vector data
|
111
112
|
void train(Index::idx_t n, const float* x) override;
|
112
113
|
|
113
|
-
///
|
114
|
-
|
115
|
-
int getListLength(int listId) const;
|
114
|
+
/// Returns the number of vectors present in a particular inverted list
|
115
|
+
int getListLength(int listId) const override;
|
116
116
|
|
117
|
-
///
|
118
|
-
///
|
119
|
-
|
117
|
+
/// Return the encoded vector data contained in a particular inverted list,
|
118
|
+
/// for debugging purposes.
|
119
|
+
/// If gpuFormat is true, the data is returned as it is encoded in the
|
120
|
+
/// GPU-side representation.
|
121
|
+
/// Otherwise, it is converted to the CPU format.
|
122
|
+
/// compliant format, while the native GPU format may differ.
|
123
|
+
std::vector<uint8_t>
|
124
|
+
getListVectorData(int listId, bool gpuFormat = false) const override;
|
120
125
|
|
121
|
-
///
|
122
|
-
///
|
123
|
-
std::vector<
|
126
|
+
/// Return the vector indices contained in a particular inverted list, for
|
127
|
+
/// debugging purposes.
|
128
|
+
std::vector<Index::idx_t> getListIndices(int listId) const override;
|
124
129
|
|
125
130
|
protected:
|
126
131
|
/// Called from GpuIndex for add/add_with_ids
|
@@ -135,13 +140,18 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
|
|
135
140
|
float* distances,
|
136
141
|
Index::idx_t* labels) const override;
|
137
142
|
|
138
|
-
|
143
|
+
/// Throws errors if configuration settings are improper
|
139
144
|
void verifySettings_() const;
|
140
145
|
|
146
|
+
/// Trains the PQ quantizer based on the given vector data
|
141
147
|
void trainResidualQuantizer_(Index::idx_t n, const float* x);
|
142
148
|
|
143
|
-
|
144
|
-
|
149
|
+
protected:
|
150
|
+
/// Our configuration options that we were initialized with
|
151
|
+
const GpuIndexIVFPQConfig ivfpqConfig_;
|
152
|
+
|
153
|
+
/// Runtime override: whether or not we use precomputed tables
|
154
|
+
bool usePrecomputedTables_;
|
145
155
|
|
146
156
|
/// Number of sub-quantizers per encoded vector
|
147
157
|
int subQuantizers_;
|
@@ -18,6 +18,13 @@ class IVFFlat;
|
|
18
18
|
class GpuIndexFlat;
|
19
19
|
|
20
20
|
struct GpuIndexIVFScalarQuantizerConfig : public GpuIndexIVFConfig {
|
21
|
+
inline GpuIndexIVFScalarQuantizerConfig()
|
22
|
+
: interleavedLayout(true) {
|
23
|
+
}
|
24
|
+
|
25
|
+
/// Use the alternative memory layout for the IVF lists
|
26
|
+
/// (currently the default)
|
27
|
+
bool interleavedLayout;
|
21
28
|
};
|
22
29
|
|
23
30
|
/// Wrapper around the GPU implementation that looks like
|
@@ -61,10 +68,29 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
|
|
61
68
|
/// to exactly the amount needed. Returns space reclaimed in bytes
|
62
69
|
size_t reclaimMemory();
|
63
70
|
|
71
|
+
/// Clears out all inverted lists, but retains the coarse and scalar quantizer
|
72
|
+
/// information
|
64
73
|
void reset() override;
|
65
74
|
|
75
|
+
/// Trains the coarse and scalar quantizer based on the given vector data
|
66
76
|
void train(Index::idx_t n, const float* x) override;
|
67
77
|
|
78
|
+
/// Returns the number of vectors present in a particular inverted list
|
79
|
+
int getListLength(int listId) const override;
|
80
|
+
|
81
|
+
/// Return the encoded vector data contained in a particular inverted list,
|
82
|
+
/// for debugging purposes.
|
83
|
+
/// If gpuFormat is true, the data is returned as it is encoded in the
|
84
|
+
/// GPU-side representation.
|
85
|
+
/// Otherwise, it is converted to the CPU format.
|
86
|
+
/// compliant format, while the native GPU format may differ.
|
87
|
+
std::vector<uint8_t>
|
88
|
+
getListVectorData(int listId, bool gpuFormat = false) const override;
|
89
|
+
|
90
|
+
/// Return the vector indices contained in a particular inverted list, for
|
91
|
+
/// debugging purposes.
|
92
|
+
std::vector<Index::idx_t> getListIndices(int listId) const override;
|
93
|
+
|
68
94
|
protected:
|
69
95
|
/// Called from GpuIndex for add/add_with_ids
|
70
96
|
void addImpl_(int n,
|
@@ -88,8 +114,9 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
|
|
88
114
|
/// Exposed like the CPU version
|
89
115
|
bool by_residual;
|
90
116
|
|
91
|
-
|
92
|
-
|
117
|
+
protected:
|
118
|
+
/// Our configuration options
|
119
|
+
const GpuIndexIVFScalarQuantizerConfig ivfSQConfig_;
|
93
120
|
|
94
121
|
/// Desired inverted list memory reservation
|
95
122
|
size_t reserveMemoryVecs_;
|
@@ -198,6 +198,10 @@ class GpuResources {
|
|
198
198
|
/// given device
|
199
199
|
virtual cudaStream_t getDefaultStream(int device) = 0;
|
200
200
|
|
201
|
+
/// Overrides the default stream for a device to the user-supplied stream. The
|
202
|
+
/// resources object does not own this stream (i.e., it will not destroy it).
|
203
|
+
virtual void setDefaultStream(int device, cudaStream_t stream) = 0;
|
204
|
+
|
201
205
|
/// Returns the set of alternative streams that we use for the given device
|
202
206
|
virtual std::vector<cudaStream_t> getAlternateStreams(int device) = 0;
|
203
207
|
|
@@ -101,12 +101,8 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
|
|
101
101
|
for (auto& entry : defaultStreams_) {
|
102
102
|
DeviceScope scope(entry.first);
|
103
103
|
|
104
|
-
|
105
|
-
|
106
|
-
// The user did not specify this stream, thus we are the ones
|
107
|
-
// who have created it
|
108
|
-
CUDA_VERIFY(cudaStreamDestroy(entry.second));
|
109
|
-
}
|
104
|
+
// We created these streams, so are responsible for destroying them
|
105
|
+
CUDA_VERIFY(cudaStreamDestroy(entry.second));
|
110
106
|
}
|
111
107
|
|
112
108
|
for (auto& entry : alternateStreams_) {
|
@@ -210,16 +206,47 @@ StandardGpuResourcesImpl::setPinnedMemory(size_t size) {
|
|
210
206
|
|
211
207
|
void
|
212
208
|
StandardGpuResourcesImpl::setDefaultStream(int device, cudaStream_t stream) {
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
it
|
209
|
+
if (isInitialized(device)) {
|
210
|
+
// A new series of calls may not be ordered with what was the previous
|
211
|
+
// stream, so if the stream being specified is different, then we need to
|
212
|
+
// ensure ordering between the two (new stream waits on old).
|
213
|
+
auto it = userDefaultStreams_.find(device);
|
214
|
+
cudaStream_t prevStream = nullptr;
|
215
|
+
|
216
|
+
if (it != userDefaultStreams_.end()) {
|
217
|
+
prevStream = it->second;
|
218
|
+
} else {
|
219
|
+
FAISS_ASSERT(defaultStreams_.count(device));
|
220
|
+
prevStream = defaultStreams_[device];
|
221
|
+
}
|
222
|
+
|
223
|
+
if (prevStream != stream) {
|
224
|
+
streamWait({stream}, {prevStream});
|
225
|
+
}
|
218
226
|
}
|
219
227
|
|
220
228
|
userDefaultStreams_[device] = stream;
|
221
229
|
}
|
222
230
|
|
231
|
+
void
|
232
|
+
StandardGpuResourcesImpl::revertDefaultStream(int device) {
|
233
|
+
if (isInitialized(device)) {
|
234
|
+
auto it = userDefaultStreams_.find(device);
|
235
|
+
|
236
|
+
if (it != userDefaultStreams_.end()) {
|
237
|
+
// There was a user stream set that we need to synchronize against
|
238
|
+
cudaStream_t prevStream = userDefaultStreams_[device];
|
239
|
+
|
240
|
+
FAISS_ASSERT(defaultStreams_.count(device));
|
241
|
+
cudaStream_t newStream = defaultStreams_[device];
|
242
|
+
|
243
|
+
streamWait({newStream}, {prevStream});
|
244
|
+
}
|
245
|
+
}
|
246
|
+
|
247
|
+
userDefaultStreams_.erase(device);
|
248
|
+
}
|
249
|
+
|
223
250
|
void
|
224
251
|
StandardGpuResourcesImpl::setDefaultNullStreamAllDevices() {
|
225
252
|
for (int dev = 0; dev < getNumDevices(); ++dev) {
|
@@ -274,14 +301,8 @@ StandardGpuResourcesImpl::initializeForDevice(int device) {
|
|
274
301
|
|
275
302
|
// Create streams
|
276
303
|
cudaStream_t defaultStream = 0;
|
277
|
-
|
278
|
-
|
279
|
-
// We already have a stream provided by the user
|
280
|
-
defaultStream = it->second;
|
281
|
-
} else {
|
282
|
-
CUDA_VERIFY(cudaStreamCreateWithFlags(&defaultStream,
|
283
|
-
cudaStreamNonBlocking));
|
284
|
-
}
|
304
|
+
CUDA_VERIFY(cudaStreamCreateWithFlags(&defaultStream,
|
305
|
+
cudaStreamNonBlocking));
|
285
306
|
|
286
307
|
defaultStreams_[device] = defaultStream;
|
287
308
|
|
@@ -308,15 +329,14 @@ StandardGpuResourcesImpl::initializeForDevice(int device) {
|
|
308
329
|
FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
|
309
330
|
blasHandles_[device] = blasHandle;
|
310
331
|
|
311
|
-
//
|
312
|
-
|
313
|
-
//
|
314
|
-
if
|
315
|
-
|
316
|
-
}
|
317
|
-
#endif
|
332
|
+
// For CUDA 10 on V100, enabling tensor core usage would enable automatic
|
333
|
+
// rounding down of inputs to f16 (though accumulate in f32) which results in
|
334
|
+
// unacceptable loss of precision in general.
|
335
|
+
// For CUDA 11 / A100, only enable tensor core support if it doesn't result in
|
336
|
+
// a loss of precision.
|
318
337
|
#if CUDA_VERSION >= 11000
|
319
|
-
cublasSetMathMode(blasHandle,
|
338
|
+
cublasSetMathMode(blasHandle,
|
339
|
+
CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION);
|
320
340
|
#endif
|
321
341
|
|
322
342
|
FAISS_ASSERT(allocs_.count(device) == 0);
|
@@ -341,6 +361,14 @@ StandardGpuResourcesImpl::getBlasHandle(int device) {
|
|
341
361
|
cudaStream_t
|
342
362
|
StandardGpuResourcesImpl::getDefaultStream(int device) {
|
343
363
|
initializeForDevice(device);
|
364
|
+
|
365
|
+
auto it = userDefaultStreams_.find(device);
|
366
|
+
if (it != userDefaultStreams_.end()) {
|
367
|
+
// There is a user override stream set
|
368
|
+
return it->second;
|
369
|
+
}
|
370
|
+
|
371
|
+
// Otherwise, our base default stream
|
344
372
|
return defaultStreams_[device];
|
345
373
|
}
|
346
374
|
|
@@ -539,6 +567,11 @@ StandardGpuResources::setDefaultStream(int device, cudaStream_t stream) {
|
|
539
567
|
res_->setDefaultStream(device, stream);
|
540
568
|
}
|
541
569
|
|
570
|
+
void
|
571
|
+
StandardGpuResources::revertDefaultStream(int device) {
|
572
|
+
res_->revertDefaultStream(device);
|
573
|
+
}
|
574
|
+
|
542
575
|
void
|
543
576
|
StandardGpuResources::setDefaultNullStreamAllDevices() {
|
544
577
|
res_->setDefaultNullStreamAllDevices();
|