faiss 0.2.4 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +23 -21
- data/ext/faiss/extconf.rb +11 -0
- data/ext/faiss/index.cpp +4 -4
- data/ext/faiss/index_binary.cpp +6 -6
- data/ext/faiss/product_quantizer.cpp +4 -4
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +13 -0
- data/vendor/faiss/faiss/IVFlib.cpp +101 -2
- data/vendor/faiss/faiss/IVFlib.h +26 -2
- data/vendor/faiss/faiss/Index.cpp +36 -3
- data/vendor/faiss/faiss/Index.h +43 -6
- data/vendor/faiss/faiss/Index2Layer.cpp +6 -2
- data/vendor/faiss/faiss/Index2Layer.h +6 -1
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +219 -16
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +63 -5
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +299 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +199 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +20 -4
- data/vendor/faiss/faiss/IndexBinary.h +18 -3
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -2
- data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -2
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +2 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +17 -4
- data/vendor/faiss/faiss/IndexBinaryHash.h +8 -4
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +28 -13
- data/vendor/faiss/faiss/IndexBinaryIVF.h +10 -7
- data/vendor/faiss/faiss/IndexFastScan.cpp +626 -0
- data/vendor/faiss/faiss/IndexFastScan.h +145 -0
- data/vendor/faiss/faiss/IndexFlat.cpp +34 -21
- data/vendor/faiss/faiss/IndexFlat.h +7 -4
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +35 -1
- data/vendor/faiss/faiss/IndexFlatCodes.h +12 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +66 -138
- data/vendor/faiss/faiss/IndexHNSW.h +4 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +247 -0
- data/vendor/faiss/faiss/IndexIDMap.h +107 -0
- data/vendor/faiss/faiss/IndexIVF.cpp +121 -33
- data/vendor/faiss/faiss/IndexIVF.h +35 -16
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -7
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +63 -1
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +590 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +171 -0
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +1290 -0
- data/vendor/faiss/faiss/IndexIVFFastScan.h +213 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +37 -17
- data/vendor/faiss/faiss/IndexIVFFlat.h +4 -2
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +234 -50
- data/vendor/faiss/faiss/IndexIVFPQ.h +5 -1
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +23 -852
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -112
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFPQR.h +1 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +3 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +4 -2
- data/vendor/faiss/faiss/IndexLSH.h +2 -1
- data/vendor/faiss/faiss/IndexLattice.cpp +7 -1
- data/vendor/faiss/faiss/IndexLattice.h +3 -1
- data/vendor/faiss/faiss/IndexNNDescent.cpp +4 -3
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +37 -3
- data/vendor/faiss/faiss/IndexNSG.h +25 -1
- data/vendor/faiss/faiss/IndexPQ.cpp +106 -69
- data/vendor/faiss/faiss/IndexPQ.h +19 -5
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +15 -450
- data/vendor/faiss/faiss/IndexPQFastScan.h +15 -78
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -8
- data/vendor/faiss/faiss/IndexPreTransform.h +15 -3
- data/vendor/faiss/faiss/IndexRefine.cpp +8 -4
- data/vendor/faiss/faiss/IndexRefine.h +4 -2
- data/vendor/faiss/faiss/IndexReplicas.cpp +4 -2
- data/vendor/faiss/faiss/IndexReplicas.h +2 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +438 -0
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +92 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +26 -15
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -7
- data/vendor/faiss/faiss/IndexShards.cpp +4 -1
- data/vendor/faiss/faiss/IndexShards.h +2 -1
- data/vendor/faiss/faiss/MetaIndexes.cpp +5 -178
- data/vendor/faiss/faiss/MetaIndexes.h +3 -81
- data/vendor/faiss/faiss/VectorTransform.cpp +43 -0
- data/vendor/faiss/faiss/VectorTransform.h +22 -4
- data/vendor/faiss/faiss/clone_index.cpp +23 -1
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +300 -0
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +24 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +195 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2058 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +408 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2147 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +460 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +465 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +1618 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +251 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +1452 -0
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +1 -0
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +0 -4
- data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +10 -8
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +75 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +19 -32
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -31
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +22 -28
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +14 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +16 -3
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +32 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +311 -75
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +10 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +3 -0
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +2 -2
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +5 -4
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +116 -47
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +44 -13
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +0 -54
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -76
- data/vendor/faiss/faiss/impl/DistanceComputer.h +64 -0
- data/vendor/faiss/faiss/impl/HNSW.cpp +123 -27
- data/vendor/faiss/faiss/impl/HNSW.h +19 -16
- data/vendor/faiss/faiss/impl/IDSelector.cpp +125 -0
- data/vendor/faiss/faiss/impl/IDSelector.h +135 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +6 -28
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +6 -1
- data/vendor/faiss/faiss/impl/LookupTableScaler.h +77 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +383 -0
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +154 -0
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +225 -145
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +29 -10
- data/vendor/faiss/faiss/impl/Quantizer.h +43 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +192 -36
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +40 -20
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +97 -173
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +18 -18
- data/vendor/faiss/faiss/impl/index_read.cpp +240 -9
- data/vendor/faiss/faiss/impl/index_write.cpp +237 -5
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +6 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +56 -16
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +25 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +66 -25
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +75 -27
- data/vendor/faiss/faiss/index_factory.cpp +196 -7
- data/vendor/faiss/faiss/index_io.h +5 -0
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -0
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +4 -1
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +2 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +27 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +15 -0
- data/vendor/faiss/faiss/utils/Heap.h +31 -15
- data/vendor/faiss/faiss/utils/distances.cpp +380 -56
- data/vendor/faiss/faiss/utils/distances.h +113 -15
- data/vendor/faiss/faiss/utils/distances_simd.cpp +726 -6
- data/vendor/faiss/faiss/utils/extra_distances.cpp +12 -7
- data/vendor/faiss/faiss/utils/extra_distances.h +3 -1
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +21 -0
- data/vendor/faiss/faiss/utils/fp16-inl.h +101 -0
- data/vendor/faiss/faiss/utils/fp16.h +11 -0
- data/vendor/faiss/faiss/utils/hamming-inl.h +54 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +0 -48
- data/vendor/faiss/faiss/utils/ordered_key_value.h +10 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +62 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +20 -0
- data/vendor/faiss/faiss/utils/random.cpp +53 -0
- data/vendor/faiss/faiss/utils/random.h +5 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +4 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +6 -1
- data/vendor/faiss/faiss/utils/simdlib_neon.h +7 -2
- metadata +37 -3
@@ -16,6 +16,7 @@
|
|
16
16
|
#include <faiss/gpu/GpuIndexIVFFlat.h>
|
17
17
|
#include <faiss/gpu/GpuIndexIVFPQ.h>
|
18
18
|
#include <faiss/gpu/GpuIndexIVFScalarQuantizer.h>
|
19
|
+
#include <faiss/gpu/impl/IndexUtils.h>
|
19
20
|
#include <faiss/gpu/utils/DeviceUtils.h>
|
20
21
|
#include <faiss/impl/FaissAssert.h>
|
21
22
|
|
@@ -121,7 +121,6 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
121
121
|
GpuIndexFlatConfig config;
|
122
122
|
config.device = device;
|
123
123
|
config.useFloat16 = useFloat16;
|
124
|
-
config.storeTransposed = storeTransposed;
|
125
124
|
return new GpuIndexFlat(provider, ifl, config);
|
126
125
|
} else if (
|
127
126
|
dynamic_cast<const IndexScalarQuantizer*>(index) &&
|
@@ -147,7 +146,6 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
147
146
|
config.device = device;
|
148
147
|
config.indicesOptions = indicesOptions;
|
149
148
|
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
|
150
|
-
config.flatConfig.storeTransposed = storeTransposed;
|
151
149
|
|
152
150
|
GpuIndexIVFFlat* res = new GpuIndexIVFFlat(
|
153
151
|
provider, ifl->d, ifl->nlist, ifl->metric_type, config);
|
@@ -164,7 +162,6 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
164
162
|
config.device = device;
|
165
163
|
config.indicesOptions = indicesOptions;
|
166
164
|
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
|
167
|
-
config.flatConfig.storeTransposed = storeTransposed;
|
168
165
|
|
169
166
|
GpuIndexIVFScalarQuantizer* res = new GpuIndexIVFScalarQuantizer(
|
170
167
|
provider,
|
@@ -195,7 +192,6 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
195
192
|
config.device = device;
|
196
193
|
config.indicesOptions = indicesOptions;
|
197
194
|
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
|
198
|
-
config.flatConfig.storeTransposed = storeTransposed;
|
199
195
|
config.useFloat16LookupTables = useFloat16;
|
200
196
|
config.usePrecomputedTables = usePrecomputed;
|
201
197
|
|
@@ -74,7 +74,19 @@ class GpuIndex : public faiss::Index {
|
|
74
74
|
const float* x,
|
75
75
|
Index::idx_t k,
|
76
76
|
float* distances,
|
77
|
-
Index::idx_t* labels
|
77
|
+
Index::idx_t* labels,
|
78
|
+
const SearchParameters* params = nullptr) const override;
|
79
|
+
|
80
|
+
/// `x`, `distances` and `labels` and `recons` can be resident on the CPU or
|
81
|
+
/// any GPU; copies are performed as needed
|
82
|
+
void search_and_reconstruct(
|
83
|
+
idx_t n,
|
84
|
+
const float* x,
|
85
|
+
idx_t k,
|
86
|
+
float* distances,
|
87
|
+
idx_t* labels,
|
88
|
+
float* recons,
|
89
|
+
const SearchParameters* params = nullptr) const override;
|
78
90
|
|
79
91
|
/// Overridden to force GPU indices to provide their own GPU-friendly
|
80
92
|
/// implementation
|
@@ -111,7 +123,8 @@ class GpuIndex : public faiss::Index {
|
|
111
123
|
const float* x,
|
112
124
|
int k,
|
113
125
|
float* distances,
|
114
|
-
Index::idx_t* labels
|
126
|
+
Index::idx_t* labels,
|
127
|
+
const SearchParameters* params) const = 0;
|
115
128
|
|
116
129
|
private:
|
117
130
|
/// Handles paged adds if the add set is too large, passes to
|
@@ -127,7 +140,8 @@ class GpuIndex : public faiss::Index {
|
|
127
140
|
const float* x,
|
128
141
|
int k,
|
129
142
|
float* outDistancesData,
|
130
|
-
Index::idx_t* outIndicesData
|
143
|
+
Index::idx_t* outIndicesData,
|
144
|
+
const SearchParameters* params) const;
|
131
145
|
|
132
146
|
/// Calls searchImpl_ for a single page of GPU-resident data,
|
133
147
|
/// handling paging of the data and copies from the CPU
|
@@ -136,7 +150,8 @@ class GpuIndex : public faiss::Index {
|
|
136
150
|
const float* x,
|
137
151
|
int k,
|
138
152
|
float* outDistancesData,
|
139
|
-
Index::idx_t* outIndicesData
|
153
|
+
Index::idx_t* outIndicesData,
|
154
|
+
const SearchParameters* params) const;
|
140
155
|
|
141
156
|
protected:
|
142
157
|
/// Manages streams, cuBLAS handles and scratch memory for devices
|
@@ -149,5 +164,14 @@ class GpuIndex : public faiss::Index {
|
|
149
164
|
size_t minPagedSize_;
|
150
165
|
};
|
151
166
|
|
167
|
+
/// If the given index is a GPU index, this returns the index instance
|
168
|
+
GpuIndex* tryCastGpuIndex(faiss::Index* index);
|
169
|
+
|
170
|
+
/// Is the given index instance a GPU index?
|
171
|
+
bool isGpuIndex(faiss::Index* index);
|
172
|
+
|
173
|
+
/// Does the given CPU index instance have a corresponding GPU implementation?
|
174
|
+
bool isGpuIndexImplemented(faiss::Index* index);
|
175
|
+
|
152
176
|
} // namespace gpu
|
153
177
|
} // namespace faiss
|
@@ -62,7 +62,8 @@ class GpuIndexBinaryFlat : public IndexBinary {
|
|
62
62
|
const uint8_t* x,
|
63
63
|
faiss::IndexBinary::idx_t k,
|
64
64
|
int32_t* distances,
|
65
|
-
faiss::IndexBinary::idx_t* labels
|
65
|
+
faiss::IndexBinary::idx_t* labels,
|
66
|
+
const faiss::SearchParameters* params = nullptr) const override;
|
66
67
|
|
67
68
|
void reconstruct(faiss::IndexBinary::idx_t key, uint8_t* recons)
|
68
69
|
const override;
|
@@ -24,17 +24,14 @@ namespace gpu {
|
|
24
24
|
class FlatIndex;
|
25
25
|
|
26
26
|
struct GpuIndexFlatConfig : public GpuIndexConfig {
|
27
|
-
inline GpuIndexFlatConfig() : useFloat16(false)
|
27
|
+
inline GpuIndexFlatConfig() : useFloat16(false) {}
|
28
28
|
|
29
29
|
/// Whether or not data is stored as float16
|
30
30
|
bool useFloat16;
|
31
31
|
|
32
|
-
///
|
33
|
-
///
|
34
|
-
///
|
35
|
-
/// substantially slow down any add() calls made, as all data must
|
36
|
-
/// be transposed, and will increase storage requirements (we store
|
37
|
-
/// data in both transposed and non-transposed layouts).
|
32
|
+
/// Deprecated: no longer used
|
33
|
+
/// Previously used to indicate whether internal storage of vectors is
|
34
|
+
/// transposed
|
38
35
|
bool storeTransposed;
|
39
36
|
};
|
40
37
|
|
@@ -98,6 +95,10 @@ class GpuIndexFlat : public GpuIndex {
|
|
98
95
|
void reconstruct_n(Index::idx_t i0, Index::idx_t num, float* out)
|
99
96
|
const override;
|
100
97
|
|
98
|
+
/// Batch reconstruction method
|
99
|
+
void reconstruct_batch(Index::idx_t n, const Index::idx_t* keys, float* out)
|
100
|
+
const override;
|
101
|
+
|
101
102
|
/// Compute residual
|
102
103
|
void compute_residual(const float* x, float* residual, Index::idx_t key)
|
103
104
|
const override;
|
@@ -128,7 +129,8 @@ class GpuIndexFlat : public GpuIndex {
|
|
128
129
|
const float* x,
|
129
130
|
int k,
|
130
131
|
float* distances,
|
131
|
-
Index::idx_t* labels
|
132
|
+
Index::idx_t* labels,
|
133
|
+
const SearchParameters* params) const override;
|
132
134
|
|
133
135
|
protected:
|
134
136
|
/// Our configuration options
|
@@ -8,18 +8,17 @@
|
|
8
8
|
#pragma once
|
9
9
|
|
10
10
|
#include <faiss/Clustering.h>
|
11
|
+
#include <faiss/IndexIVF.h> // for SearchParametersIVF
|
11
12
|
#include <faiss/gpu/GpuIndex.h>
|
12
13
|
#include <faiss/gpu/GpuIndexFlat.h>
|
13
14
|
#include <faiss/gpu/GpuIndicesOptions.h>
|
14
|
-
|
15
|
-
namespace faiss {
|
16
|
-
struct IndexIVF;
|
17
|
-
}
|
15
|
+
#include <memory>
|
18
16
|
|
19
17
|
namespace faiss {
|
20
18
|
namespace gpu {
|
21
19
|
|
22
20
|
class GpuIndexFlat;
|
21
|
+
class IVFBase;
|
23
22
|
|
24
23
|
struct GpuIndexIVFConfig : public GpuIndexConfig {
|
25
24
|
inline GpuIndexIVFConfig() : indicesOptions(INDICES_64_BIT) {}
|
@@ -31,8 +30,13 @@ struct GpuIndexIVFConfig : public GpuIndexConfig {
|
|
31
30
|
GpuIndexFlatConfig flatConfig;
|
32
31
|
};
|
33
32
|
|
33
|
+
/// Base class of all GPU IVF index types. This (for now) deliberately does not
|
34
|
+
/// inherit from IndexIVF, as many of the public data members and functionality
|
35
|
+
/// in IndexIVF is not supported in the same manner on the GPU.
|
34
36
|
class GpuIndexIVF : public GpuIndex {
|
35
37
|
public:
|
38
|
+
/// Version that auto-constructs a flat coarse quantizer based on the
|
39
|
+
/// desired metric
|
36
40
|
GpuIndexIVF(
|
37
41
|
GpuResourcesProvider* provider,
|
38
42
|
int dims,
|
@@ -41,6 +45,17 @@ class GpuIndexIVF : public GpuIndex {
|
|
41
45
|
int nlist,
|
42
46
|
GpuIndexIVFConfig config = GpuIndexIVFConfig());
|
43
47
|
|
48
|
+
/// Version that takes a coarse quantizer instance. The GpuIndexIVF does not
|
49
|
+
/// own the coarseQuantizer instance by default (functions like IndexIVF).
|
50
|
+
GpuIndexIVF(
|
51
|
+
GpuResourcesProvider* provider,
|
52
|
+
Index* coarseQuantizer,
|
53
|
+
int dims,
|
54
|
+
faiss::MetricType metric,
|
55
|
+
float metricArg,
|
56
|
+
int nlist,
|
57
|
+
GpuIndexIVFConfig config = GpuIndexIVFConfig());
|
58
|
+
|
44
59
|
~GpuIndexIVF() override;
|
45
60
|
|
46
61
|
private:
|
@@ -54,11 +69,16 @@ class GpuIndexIVF : public GpuIndex {
|
|
54
69
|
/// Copy what we have to the CPU equivalent
|
55
70
|
void copyTo(faiss::IndexIVF* index) const;
|
56
71
|
|
72
|
+
/// Should be called if the user ever changes the state of the IVF coarse
|
73
|
+
/// quantizer manually (e.g., substitutes a new instance or changes vectors
|
74
|
+
/// in the coarse quantizer outside the scope of training)
|
75
|
+
virtual void updateQuantizer() = 0;
|
76
|
+
|
57
77
|
/// Returns the number of inverted lists we're managing
|
58
78
|
int getNumLists() const;
|
59
79
|
|
60
80
|
/// Returns the number of vectors present in a particular inverted list
|
61
|
-
|
81
|
+
int getListLength(int listId) const;
|
62
82
|
|
63
83
|
/// Return the encoded vector data contained in a particular inverted list,
|
64
84
|
/// for debugging purposes.
|
@@ -66,16 +86,12 @@ class GpuIndexIVF : public GpuIndex {
|
|
66
86
|
/// GPU-side representation.
|
67
87
|
/// Otherwise, it is converted to the CPU format.
|
68
88
|
/// compliant format, while the native GPU format may differ.
|
69
|
-
|
70
|
-
|
71
|
-
bool gpuFormat = false) const = 0;
|
89
|
+
std::vector<uint8_t> getListVectorData(int listId, bool gpuFormat = false)
|
90
|
+
const;
|
72
91
|
|
73
92
|
/// Return the vector indices contained in a particular inverted list, for
|
74
93
|
/// debugging purposes.
|
75
|
-
|
76
|
-
|
77
|
-
/// Return the quantizer we're using
|
78
|
-
GpuIndexFlat* getQuantizer();
|
94
|
+
std::vector<Index::idx_t> getListIndices(int listId) const;
|
79
95
|
|
80
96
|
/// Sets the number of list probes per query
|
81
97
|
void setNumProbes(int nprobe);
|
@@ -83,10 +99,49 @@ class GpuIndexIVF : public GpuIndex {
|
|
83
99
|
/// Returns our current number of list probes per query
|
84
100
|
int getNumProbes() const;
|
85
101
|
|
102
|
+
/// Same interface as faiss::IndexIVF, in order to search a set of vectors
|
103
|
+
/// pre-quantized by the IVF quantizer. Does not include IndexIVFStats as
|
104
|
+
/// that can only be obtained on the host via a GPU d2h copy.
|
105
|
+
/// @param n nb of vectors to query
|
106
|
+
/// @param x query vectors, size nx * d
|
107
|
+
/// @param assign coarse quantization indices, size nx * nprobe
|
108
|
+
/// @param centroid_dis
|
109
|
+
/// distances to coarse centroids, size nx * nprobe
|
110
|
+
/// @param distance
|
111
|
+
/// output distances, size n * k
|
112
|
+
/// @param labels output labels, size n * k
|
113
|
+
/// @param store_pairs store inv list index + inv list offset
|
114
|
+
/// instead in upper/lower 32 bit of result,
|
115
|
+
/// instead of ids (used for reranking).
|
116
|
+
/// @param params used to override the object's search parameters
|
117
|
+
void search_preassigned(
|
118
|
+
idx_t n,
|
119
|
+
const float* x,
|
120
|
+
idx_t k,
|
121
|
+
const idx_t* assign,
|
122
|
+
const float* centroid_dis,
|
123
|
+
float* distances,
|
124
|
+
idx_t* labels,
|
125
|
+
bool store_pairs,
|
126
|
+
const SearchParametersIVF* params = nullptr) const;
|
127
|
+
|
86
128
|
protected:
|
129
|
+
void verifyIVFSettings_() const;
|
87
130
|
bool addImplRequiresIDs_() const override;
|
88
131
|
void trainQuantizer_(Index::idx_t n, const float* x);
|
89
132
|
|
133
|
+
/// Called from GpuIndex for add/add_with_ids
|
134
|
+
void addImpl_(int n, const float* x, const Index::idx_t* ids) override;
|
135
|
+
|
136
|
+
/// Called from GpuIndex for search
|
137
|
+
void searchImpl_(
|
138
|
+
int n,
|
139
|
+
const float* x,
|
140
|
+
int k,
|
141
|
+
float* distances,
|
142
|
+
Index::idx_t* labels,
|
143
|
+
const SearchParameters* params) const override;
|
144
|
+
|
90
145
|
public:
|
91
146
|
/// Exposing this like the CPU version for manipulation
|
92
147
|
ClusteringParameters cp;
|
@@ -97,12 +152,18 @@ class GpuIndexIVF : public GpuIndex {
|
|
97
152
|
/// Exposing this like the CPU version for manipulation
|
98
153
|
int nprobe;
|
99
154
|
|
100
|
-
///
|
101
|
-
|
155
|
+
/// A user-pluggable coarse quantizer
|
156
|
+
Index* quantizer;
|
157
|
+
|
158
|
+
/// Whether or not we own the coarse quantizer
|
159
|
+
bool own_fields;
|
102
160
|
|
103
161
|
protected:
|
104
162
|
/// Our configuration options
|
105
163
|
const GpuIndexIVFConfig ivfConfig_;
|
164
|
+
|
165
|
+
/// For a trained/initialized index, this is a reference to the base class
|
166
|
+
std::shared_ptr<IVFBase> baseIndex_;
|
106
167
|
};
|
107
168
|
|
108
169
|
} // namespace gpu
|
@@ -40,12 +40,22 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
|
|
40
40
|
GpuIndexIVFFlatConfig config = GpuIndexIVFFlatConfig());
|
41
41
|
|
42
42
|
/// Constructs a new instance with an empty flat quantizer; the user
|
43
|
-
/// provides the number of lists desired.
|
43
|
+
/// provides the number of IVF lists desired.
|
44
44
|
GpuIndexIVFFlat(
|
45
45
|
GpuResourcesProvider* provider,
|
46
46
|
int dims,
|
47
47
|
int nlist,
|
48
|
-
faiss::MetricType metric,
|
48
|
+
faiss::MetricType metric = faiss::METRIC_L2,
|
49
|
+
GpuIndexIVFFlatConfig config = GpuIndexIVFFlatConfig());
|
50
|
+
|
51
|
+
/// Constructs a new instance with a provided CPU or GPU coarse quantizer;
|
52
|
+
/// the user provides the number of IVF lists desired.
|
53
|
+
GpuIndexIVFFlat(
|
54
|
+
GpuResourcesProvider* provider,
|
55
|
+
Index* coarseQuantizer,
|
56
|
+
int dims,
|
57
|
+
int nlist,
|
58
|
+
faiss::MetricType metric = faiss::METRIC_L2,
|
49
59
|
GpuIndexIVFFlatConfig config = GpuIndexIVFFlatConfig());
|
50
60
|
|
51
61
|
~GpuIndexIVFFlat() override;
|
@@ -69,37 +79,14 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
|
|
69
79
|
/// information
|
70
80
|
void reset() override;
|
71
81
|
|
82
|
+
/// Should be called if the user ever changes the state of the IVF coarse
|
83
|
+
/// quantizer manually (e.g., substitutes a new instance or changes vectors
|
84
|
+
/// in the coarse quantizer outside the scope of training)
|
85
|
+
void updateQuantizer() override;
|
86
|
+
|
72
87
|
/// Trains the coarse quantizer based on the given vector data
|
73
88
|
void train(Index::idx_t n, const float* x) override;
|
74
89
|
|
75
|
-
/// Returns the number of vectors present in a particular inverted list
|
76
|
-
int getListLength(int listId) const override;
|
77
|
-
|
78
|
-
/// Return the encoded vector data contained in a particular inverted list,
|
79
|
-
/// for debugging purposes.
|
80
|
-
/// If gpuFormat is true, the data is returned as it is encoded in the
|
81
|
-
/// GPU-side representation.
|
82
|
-
/// Otherwise, it is converted to the CPU format.
|
83
|
-
/// compliant format, while the native GPU format may differ.
|
84
|
-
std::vector<uint8_t> getListVectorData(int listId, bool gpuFormat = false)
|
85
|
-
const override;
|
86
|
-
|
87
|
-
/// Return the vector indices contained in a particular inverted list, for
|
88
|
-
/// debugging purposes.
|
89
|
-
std::vector<Index::idx_t> getListIndices(int listId) const override;
|
90
|
-
|
91
|
-
protected:
|
92
|
-
/// Called from GpuIndex for add/add_with_ids
|
93
|
-
void addImpl_(int n, const float* x, const Index::idx_t* ids) override;
|
94
|
-
|
95
|
-
/// Called from GpuIndex for search
|
96
|
-
void searchImpl_(
|
97
|
-
int n,
|
98
|
-
const float* x,
|
99
|
-
int k,
|
100
|
-
float* distances,
|
101
|
-
Index::idx_t* labels) const override;
|
102
|
-
|
103
90
|
protected:
|
104
91
|
/// Our configuration options
|
105
92
|
const GpuIndexIVFFlatConfig ivfFlatConfig_;
|
@@ -107,8 +94,8 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
|
|
107
94
|
/// Desired inverted list memory reservation
|
108
95
|
size_t reserveMemoryVecs_;
|
109
96
|
|
110
|
-
/// Instance that we own; contains the inverted
|
111
|
-
std::
|
97
|
+
/// Instance that we own; contains the inverted lists
|
98
|
+
std::shared_ptr<IVFFlat> index_;
|
112
99
|
};
|
113
100
|
|
114
101
|
} // namespace gpu
|
@@ -63,14 +63,27 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
|
|
63
63
|
const faiss::IndexIVFPQ* index,
|
64
64
|
GpuIndexIVFPQConfig config = GpuIndexIVFPQConfig());
|
65
65
|
|
66
|
-
///
|
66
|
+
/// Constructs a new instance with an empty flat quantizer; the user
|
67
|
+
/// provides the number of IVF lists desired.
|
67
68
|
GpuIndexIVFPQ(
|
68
69
|
GpuResourcesProvider* provider,
|
69
70
|
int dims,
|
70
71
|
int nlist,
|
71
72
|
int subQuantizers,
|
72
73
|
int bitsPerCode,
|
73
|
-
faiss::MetricType metric,
|
74
|
+
faiss::MetricType metric = faiss::METRIC_L2,
|
75
|
+
GpuIndexIVFPQConfig config = GpuIndexIVFPQConfig());
|
76
|
+
|
77
|
+
/// Constructs a new instance with a provided CPU or GPU coarse quantizer;
|
78
|
+
/// the user provides the number of IVF lists desired.
|
79
|
+
GpuIndexIVFPQ(
|
80
|
+
GpuResourcesProvider* provider,
|
81
|
+
Index* coarseQuantizer,
|
82
|
+
int dims,
|
83
|
+
int nlist,
|
84
|
+
int subQuantizers,
|
85
|
+
int bitsPerCode,
|
86
|
+
faiss::MetricType metric = faiss::METRIC_L2,
|
74
87
|
GpuIndexIVFPQConfig config = GpuIndexIVFPQConfig());
|
75
88
|
|
76
89
|
~GpuIndexIVFPQ() override;
|
@@ -112,44 +125,22 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
|
|
112
125
|
/// product centroid information
|
113
126
|
void reset() override;
|
114
127
|
|
128
|
+
/// Should be called if the user ever changes the state of the IVF coarse
|
129
|
+
/// quantizer manually (e.g., substitutes a new instance or changes vectors
|
130
|
+
/// in the coarse quantizer outside the scope of training)
|
131
|
+
void updateQuantizer() override;
|
132
|
+
|
115
133
|
/// Trains the coarse and product quantizer based on the given vector data
|
116
134
|
void train(Index::idx_t n, const float* x) override;
|
117
135
|
|
118
|
-
/// Returns the number of vectors present in a particular inverted list
|
119
|
-
int getListLength(int listId) const override;
|
120
|
-
|
121
|
-
/// Return the encoded vector data contained in a particular inverted list,
|
122
|
-
/// for debugging purposes.
|
123
|
-
/// If gpuFormat is true, the data is returned as it is encoded in the
|
124
|
-
/// GPU-side representation.
|
125
|
-
/// Otherwise, it is converted to the CPU format.
|
126
|
-
/// compliant format, while the native GPU format may differ.
|
127
|
-
std::vector<uint8_t> getListVectorData(int listId, bool gpuFormat = false)
|
128
|
-
const override;
|
129
|
-
|
130
|
-
/// Return the vector indices contained in a particular inverted list, for
|
131
|
-
/// debugging purposes.
|
132
|
-
std::vector<Index::idx_t> getListIndices(int listId) const override;
|
133
|
-
|
134
136
|
public:
|
135
137
|
/// Like the CPU version, we expose a publically-visible ProductQuantizer
|
136
138
|
/// for manipulation
|
137
139
|
ProductQuantizer pq;
|
138
140
|
|
139
141
|
protected:
|
140
|
-
/// Called from GpuIndex for add/add_with_ids
|
141
|
-
void addImpl_(int n, const float* x, const Index::idx_t* ids) override;
|
142
|
-
|
143
|
-
/// Called from GpuIndex for search
|
144
|
-
void searchImpl_(
|
145
|
-
int n,
|
146
|
-
const float* x,
|
147
|
-
int k,
|
148
|
-
float* distances,
|
149
|
-
Index::idx_t* labels) const override;
|
150
|
-
|
151
142
|
/// Throws errors if configuration settings are improper
|
152
|
-
void
|
143
|
+
void verifyPQSettings_() const;
|
153
144
|
|
154
145
|
/// Trains the PQ quantizer based on the given vector data
|
155
146
|
void trainResidualQuantizer_(Index::idx_t n, const float* x);
|
@@ -172,7 +163,7 @@ class GpuIndexIVFPQ : public GpuIndexIVF {
|
|
172
163
|
|
173
164
|
/// The product quantizer instance that we own; contains the
|
174
165
|
/// inverted lists
|
175
|
-
std::
|
166
|
+
std::shared_ptr<IVFPQ> index_;
|
176
167
|
};
|
177
168
|
|
178
169
|
} // namespace gpu
|
@@ -38,7 +38,7 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
|
|
38
38
|
GpuIndexIVFScalarQuantizerConfig());
|
39
39
|
|
40
40
|
/// Constructs a new instance with an empty flat quantizer; the user
|
41
|
-
/// provides the number of lists desired.
|
41
|
+
/// provides the number of IVF lists desired.
|
42
42
|
GpuIndexIVFScalarQuantizer(
|
43
43
|
GpuResourcesProvider* provider,
|
44
44
|
int dims,
|
@@ -49,6 +49,19 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
|
|
49
49
|
GpuIndexIVFScalarQuantizerConfig config =
|
50
50
|
GpuIndexIVFScalarQuantizerConfig());
|
51
51
|
|
52
|
+
/// Constructs a new instance with a provided CPU or GPU coarse quantizer;
|
53
|
+
/// the user provides the number of IVF lists desired.
|
54
|
+
GpuIndexIVFScalarQuantizer(
|
55
|
+
GpuResourcesProvider* provider,
|
56
|
+
Index* coarseQuantizer,
|
57
|
+
int dims,
|
58
|
+
int nlist,
|
59
|
+
faiss::ScalarQuantizer::QuantizerType qtype,
|
60
|
+
faiss::MetricType metric = MetricType::METRIC_L2,
|
61
|
+
bool encodeResidual = true,
|
62
|
+
GpuIndexIVFScalarQuantizerConfig config =
|
63
|
+
GpuIndexIVFScalarQuantizerConfig());
|
64
|
+
|
52
65
|
~GpuIndexIVFScalarQuantizer() override;
|
53
66
|
|
54
67
|
/// Reserve GPU memory in our inverted lists for this number of vectors
|
@@ -70,36 +83,17 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
|
|
70
83
|
/// quantizer information
|
71
84
|
void reset() override;
|
72
85
|
|
86
|
+
/// Should be called if the user ever changes the state of the IVF coarse
|
87
|
+
/// quantizer manually (e.g., substitutes a new instance or changes vectors
|
88
|
+
/// in the coarse quantizer outside the scope of training)
|
89
|
+
void updateQuantizer() override;
|
90
|
+
|
73
91
|
/// Trains the coarse and scalar quantizer based on the given vector data
|
74
92
|
void train(Index::idx_t n, const float* x) override;
|
75
93
|
|
76
|
-
/// Returns the number of vectors present in a particular inverted list
|
77
|
-
int getListLength(int listId) const override;
|
78
|
-
|
79
|
-
/// Return the encoded vector data contained in a particular inverted list,
|
80
|
-
/// for debugging purposes.
|
81
|
-
/// If gpuFormat is true, the data is returned as it is encoded in the
|
82
|
-
/// GPU-side representation.
|
83
|
-
/// Otherwise, it is converted to the CPU format.
|
84
|
-
/// compliant format, while the native GPU format may differ.
|
85
|
-
std::vector<uint8_t> getListVectorData(int listId, bool gpuFormat = false)
|
86
|
-
const override;
|
87
|
-
|
88
|
-
/// Return the vector indices contained in a particular inverted list, for
|
89
|
-
/// debugging purposes.
|
90
|
-
std::vector<Index::idx_t> getListIndices(int listId) const override;
|
91
|
-
|
92
94
|
protected:
|
93
|
-
///
|
94
|
-
void
|
95
|
-
|
96
|
-
/// Called from GpuIndex for search
|
97
|
-
void searchImpl_(
|
98
|
-
int n,
|
99
|
-
const float* x,
|
100
|
-
int k,
|
101
|
-
float* distances,
|
102
|
-
Index::idx_t* labels) const override;
|
95
|
+
/// Validates index SQ parameters
|
96
|
+
void verifySQSettings_() const;
|
103
97
|
|
104
98
|
/// Called from train to handle SQ residual training
|
105
99
|
void trainResiduals_(Index::idx_t n, const float* x);
|
@@ -119,7 +113,7 @@ class GpuIndexIVFScalarQuantizer : public GpuIndexIVF {
|
|
119
113
|
size_t reserveMemoryVecs_;
|
120
114
|
|
121
115
|
/// Instance that we own; contains the inverted list
|
122
|
-
std::
|
116
|
+
std::shared_ptr<IVFFlat> index_;
|
123
117
|
};
|
124
118
|
|
125
119
|
} // namespace gpu
|
@@ -184,5 +184,19 @@ size_t GpuResources::getTempMemoryAvailableCurrentDevice() const {
|
|
184
184
|
|
185
185
|
GpuResourcesProvider::~GpuResourcesProvider() {}
|
186
186
|
|
187
|
+
//
|
188
|
+
// GpuResourcesProviderFromResourceInstance
|
189
|
+
//
|
190
|
+
|
191
|
+
GpuResourcesProviderFromInstance::GpuResourcesProviderFromInstance(
|
192
|
+
std::shared_ptr<GpuResources> p)
|
193
|
+
: res_(p) {}
|
194
|
+
|
195
|
+
GpuResourcesProviderFromInstance::~GpuResourcesProviderFromInstance() {}
|
196
|
+
|
197
|
+
std::shared_ptr<GpuResources> GpuResourcesProviderFromInstance::getResources() {
|
198
|
+
return res_;
|
199
|
+
}
|
200
|
+
|
187
201
|
} // namespace gpu
|
188
202
|
} // namespace faiss
|
@@ -28,8 +28,7 @@ enum AllocType {
|
|
28
28
|
FlatData = 1,
|
29
29
|
|
30
30
|
/// Primary data storage for GpuIndexIVF* (the storage for each individual
|
31
|
-
/// IVF
|
32
|
-
/// list)
|
31
|
+
/// IVF list)
|
33
32
|
IVFLists = 2,
|
34
33
|
|
35
34
|
/// Quantizer (PQ, SQ) dictionary information
|
@@ -251,7 +250,8 @@ class GpuResources {
|
|
251
250
|
cudaStream_t getAsyncCopyStreamCurrentDevice();
|
252
251
|
};
|
253
252
|
|
254
|
-
/// Interface for a provider of a shared resources object
|
253
|
+
/// Interface for a provider of a shared resources object. This is to avoid
|
254
|
+
/// interfacing std::shared_ptr to Python
|
255
255
|
class GpuResourcesProvider {
|
256
256
|
public:
|
257
257
|
virtual ~GpuResourcesProvider();
|
@@ -260,5 +260,18 @@ class GpuResourcesProvider {
|
|
260
260
|
virtual std::shared_ptr<GpuResources> getResources() = 0;
|
261
261
|
};
|
262
262
|
|
263
|
+
/// A simple wrapper for a GpuResources object to make a GpuResourcesProvider
|
264
|
+
/// out of it again
|
265
|
+
class GpuResourcesProviderFromInstance : public GpuResourcesProvider {
|
266
|
+
public:
|
267
|
+
explicit GpuResourcesProviderFromInstance(std::shared_ptr<GpuResources> p);
|
268
|
+
~GpuResourcesProviderFromInstance() override;
|
269
|
+
|
270
|
+
std::shared_ptr<GpuResources> getResources() override;
|
271
|
+
|
272
|
+
private:
|
273
|
+
std::shared_ptr<GpuResources> res_;
|
274
|
+
};
|
275
|
+
|
263
276
|
} // namespace gpu
|
264
277
|
} // namespace faiss
|
@@ -268,6 +268,9 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
|
|
268
268
|
return;
|
269
269
|
}
|
270
270
|
|
271
|
+
FAISS_ASSERT(device < getNumDevices());
|
272
|
+
DeviceScope scope(device);
|
273
|
+
|
271
274
|
// If this is the first device that we're initializing, create our
|
272
275
|
// pinned memory allocation
|
273
276
|
if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
|
@@ -285,9 +288,6 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
|
|
285
288
|
pinnedMemAllocSize_ = pinnedMemSize_;
|
286
289
|
}
|
287
290
|
|
288
|
-
FAISS_ASSERT(device < getNumDevices());
|
289
|
-
DeviceScope scope(device);
|
290
|
-
|
291
291
|
// Make sure that device properties for all devices are cached
|
292
292
|
auto& prop = getDeviceProperties(device);
|
293
293
|
|