faiss 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +9 -2
- data/ext/faiss/index.cpp +1 -1
- data/ext/faiss/index_binary.cpp +2 -2
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +7 -7
- data/vendor/faiss/faiss/AutoTune.h +0 -1
- data/vendor/faiss/faiss/Clustering.cpp +4 -18
- data/vendor/faiss/faiss/Clustering.h +31 -21
- data/vendor/faiss/faiss/IVFlib.cpp +22 -11
- data/vendor/faiss/faiss/Index.cpp +1 -1
- data/vendor/faiss/faiss/Index.h +20 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
- data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
- data/vendor/faiss/faiss/IndexBinary.h +8 -19
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +106 -187
- data/vendor/faiss/faiss/IndexFastScan.cpp +90 -159
- data/vendor/faiss/faiss/IndexFastScan.h +9 -8
- data/vendor/faiss/faiss/IndexFlat.cpp +195 -3
- data/vendor/faiss/faiss/IndexFlat.h +20 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +11 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +3 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +112 -316
- data/vendor/faiss/faiss/IndexHNSW.h +12 -48
- data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
- data/vendor/faiss/faiss/IndexIDMap.h +24 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +159 -53
- data/vendor/faiss/faiss/IndexIVF.h +37 -5
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +18 -26
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -2
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +433 -405
- data/vendor/faiss/faiss/IndexIVFFastScan.h +56 -26
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
- data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +78 -122
- data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +18 -50
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
- data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
- data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -4
- data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
- data/vendor/faiss/faiss/IndexNSG.h +10 -10
- data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
- data/vendor/faiss/faiss/IndexPQ.h +1 -4
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
- data/vendor/faiss/faiss/IndexRefine.cpp +49 -19
- data/vendor/faiss/faiss/IndexRefine.h +7 -0
- data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +22 -16
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
- data/vendor/faiss/faiss/IndexShards.cpp +21 -29
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
- data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
- data/vendor/faiss/faiss/MatrixStats.h +21 -9
- data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
- data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
- data/vendor/faiss/faiss/VectorTransform.h +7 -7
- data/vendor/faiss/faiss/clone_index.cpp +15 -10
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +87 -4
- data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +7 -0
- data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
- data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +8 -9
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +18 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
- data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +117 -17
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +267 -40
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -2
- data/vendor/faiss/faiss/impl/DistanceComputer.h +24 -1
- data/vendor/faiss/faiss/impl/FaissException.h +13 -34
- data/vendor/faiss/faiss/impl/HNSW.cpp +321 -70
- data/vendor/faiss/faiss/impl/HNSW.h +9 -8
- data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/NNDescent.cpp +29 -19
- data/vendor/faiss/faiss/impl/NSG.h +1 -1
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +24 -22
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
- data/vendor/faiss/faiss/impl/ResultHandler.h +232 -176
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +444 -104
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -8
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +280 -42
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
- data/vendor/faiss/faiss/impl/index_read.cpp +45 -19
- data/vendor/faiss/faiss/impl/index_write.cpp +60 -41
- data/vendor/faiss/faiss/impl/io.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +18 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +3 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +40 -49
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +374 -202
- data/vendor/faiss/faiss/index_factory.cpp +10 -7
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +27 -9
- data/vendor/faiss/faiss/invlists/InvertedLists.h +12 -3
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
- data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
- data/vendor/faiss/faiss/utils/distances.cpp +128 -74
- data/vendor/faiss/faiss/utils/distances.h +81 -4
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
- data/vendor/faiss/faiss/utils/distances_simd.cpp +428 -70
- data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
- data/vendor/faiss/faiss/utils/fp16.h +2 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +162 -110
- data/vendor/faiss/faiss/utils/hamming.h +58 -0
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +15 -87
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +57 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
- data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
- data/vendor/faiss/faiss/utils/prefetch.h +77 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
- data/vendor/faiss/faiss/utils/simdlib_neon.h +72 -77
- data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
- data/vendor/faiss/faiss/utils/sorting.h +27 -0
- data/vendor/faiss/faiss/utils/utils.cpp +112 -6
- data/vendor/faiss/faiss/utils/utils.h +57 -20
- metadata +10 -3
|
@@ -230,18 +230,18 @@ struct ProductQuantizer;
|
|
|
230
230
|
*
|
|
231
231
|
*/
|
|
232
232
|
struct OPQMatrix : LinearTransform {
|
|
233
|
-
int M;
|
|
234
|
-
int niter; ///< Number of outer training iterations
|
|
235
|
-
int niter_pq;
|
|
236
|
-
int niter_pq_0; ///< same, for the first outer iteration
|
|
233
|
+
int M; ///< nb of subquantizers
|
|
234
|
+
int niter = 50; ///< Number of outer training iterations
|
|
235
|
+
int niter_pq = 4; ///< Number of training iterations for the PQ
|
|
236
|
+
int niter_pq_0 = 40; ///< same, for the first outer iteration
|
|
237
237
|
|
|
238
238
|
/// if there are too many training points, resample
|
|
239
|
-
size_t max_train_points;
|
|
240
|
-
bool verbose;
|
|
239
|
+
size_t max_train_points = 256 * 256;
|
|
240
|
+
bool verbose = false;
|
|
241
241
|
|
|
242
242
|
/// if non-NULL, use this product quantizer for training
|
|
243
243
|
/// should be constructed with (d_out, M, _)
|
|
244
|
-
ProductQuantizer* pq;
|
|
244
|
+
ProductQuantizer* pq = nullptr;
|
|
245
245
|
|
|
246
246
|
/// if d2 != -1, output vectors of this dimension
|
|
247
247
|
explicit OPQMatrix(int d = 0, int M = 1, int d2 = -1);
|
|
@@ -17,6 +17,8 @@
|
|
|
17
17
|
#include <faiss/Index2Layer.h>
|
|
18
18
|
#include <faiss/IndexAdditiveQuantizer.h>
|
|
19
19
|
#include <faiss/IndexAdditiveQuantizerFastScan.h>
|
|
20
|
+
#include <faiss/IndexBinary.h>
|
|
21
|
+
#include <faiss/IndexBinaryFlat.h>
|
|
20
22
|
#include <faiss/IndexFlat.h>
|
|
21
23
|
#include <faiss/IndexHNSW.h>
|
|
22
24
|
#include <faiss/IndexIVF.h>
|
|
@@ -35,6 +37,7 @@
|
|
|
35
37
|
#include <faiss/IndexRefine.h>
|
|
36
38
|
#include <faiss/IndexRowwiseMinMax.h>
|
|
37
39
|
#include <faiss/IndexScalarQuantizer.h>
|
|
40
|
+
|
|
38
41
|
#include <faiss/MetaIndexes.h>
|
|
39
42
|
#include <faiss/VectorTransform.h>
|
|
40
43
|
|
|
@@ -60,9 +63,10 @@ Index* clone_index(const Index* index) {
|
|
|
60
63
|
// assumes there is a copy constructor ready. Always try from most
|
|
61
64
|
// specific to most general. Most indexes don't have complicated
|
|
62
65
|
// structs, the default copy constructor often just works.
|
|
63
|
-
#define TRYCLONE(classname, obj)
|
|
64
|
-
if (const classname* clo =
|
|
65
|
-
|
|
66
|
+
#define TRYCLONE(classname, obj) \
|
|
67
|
+
if (const classname* clo##classname = \
|
|
68
|
+
dynamic_cast<const classname*>(obj)) { \
|
|
69
|
+
return new classname(*clo##classname); \
|
|
66
70
|
} else
|
|
67
71
|
|
|
68
72
|
VectorTransform* Cloner::clone_VectorTransform(const VectorTransform* vt) {
|
|
@@ -234,13 +238,6 @@ Index* clone_AdditiveQuantizerIndex(const Index* index) {
|
|
|
234
238
|
|
|
235
239
|
namespace {
|
|
236
240
|
|
|
237
|
-
IndexHNSW* clone_HNSW(const IndexHNSW* ihnsw) {
|
|
238
|
-
TRYCLONE(IndexHNSWFlat, ihnsw)
|
|
239
|
-
TRYCLONE(IndexHNSWPQ, ihnsw)
|
|
240
|
-
TRYCLONE(IndexHNSWSQ, ihnsw)
|
|
241
|
-
return new IndexHNSW(*ihnsw);
|
|
242
|
-
}
|
|
243
|
-
|
|
244
241
|
InvertedLists* clone_InvertedLists(const InvertedLists* invlists) {
|
|
245
242
|
if (auto* ails = dynamic_cast<const ArrayInvertedLists*>(invlists)) {
|
|
246
243
|
return new ArrayInvertedLists(*ails);
|
|
@@ -385,4 +382,12 @@ Quantizer* clone_Quantizer(const Quantizer* quant) {
|
|
|
385
382
|
FAISS_THROW_MSG("Did not recognize quantizer to clone");
|
|
386
383
|
}
|
|
387
384
|
|
|
385
|
+
IndexBinary* clone_binary_index(const IndexBinary* index) {
|
|
386
|
+
if (auto ii = dynamic_cast<const IndexBinaryFlat*>(index)) {
|
|
387
|
+
return new IndexBinaryFlat(*ii);
|
|
388
|
+
} else {
|
|
389
|
+
FAISS_THROW_MSG("cannot clone this type of index");
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
|
|
388
393
|
} // namespace faiss
|
|
@@ -17,6 +17,7 @@ struct Index;
|
|
|
17
17
|
struct IndexIVF;
|
|
18
18
|
struct VectorTransform;
|
|
19
19
|
struct Quantizer;
|
|
20
|
+
struct IndexBinary;
|
|
20
21
|
|
|
21
22
|
/* cloning functions */
|
|
22
23
|
Index* clone_index(const Index*);
|
|
@@ -33,4 +34,6 @@ struct Cloner {
|
|
|
33
34
|
|
|
34
35
|
Quantizer* clone_Quantizer(const Quantizer* quant);
|
|
35
36
|
|
|
37
|
+
IndexBinary* clone_binary_index(const IndexBinary* index);
|
|
38
|
+
|
|
36
39
|
} // namespace faiss
|
|
@@ -7,10 +7,12 @@
|
|
|
7
7
|
|
|
8
8
|
#include <faiss/gpu/GpuCloner.h>
|
|
9
9
|
#include <faiss/impl/FaissAssert.h>
|
|
10
|
+
#include <memory>
|
|
10
11
|
#include <typeinfo>
|
|
11
12
|
|
|
12
13
|
#include <faiss/gpu/StandardGpuResources.h>
|
|
13
14
|
|
|
15
|
+
#include <faiss/IndexBinaryFlat.h>
|
|
14
16
|
#include <faiss/IndexFlat.h>
|
|
15
17
|
#include <faiss/IndexIVF.h>
|
|
16
18
|
#include <faiss/IndexIVFFlat.h>
|
|
@@ -21,6 +23,7 @@
|
|
|
21
23
|
#include <faiss/IndexShardsIVF.h>
|
|
22
24
|
#include <faiss/MetaIndexes.h>
|
|
23
25
|
#include <faiss/gpu/GpuIndex.h>
|
|
26
|
+
#include <faiss/gpu/GpuIndexBinaryFlat.h>
|
|
24
27
|
#include <faiss/gpu/GpuIndexFlat.h>
|
|
25
28
|
#include <faiss/gpu/GpuIndexIVFFlat.h>
|
|
26
29
|
#include <faiss/gpu/GpuIndexIVFPQ.h>
|
|
@@ -121,6 +124,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
|
121
124
|
GpuIndexFlatConfig config;
|
|
122
125
|
config.device = device;
|
|
123
126
|
config.useFloat16 = useFloat16;
|
|
127
|
+
config.use_raft = use_raft;
|
|
124
128
|
return new GpuIndexFlat(provider, ifl, config);
|
|
125
129
|
} else if (
|
|
126
130
|
dynamic_cast<const IndexScalarQuantizer*>(index) &&
|
|
@@ -129,6 +133,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
|
129
133
|
GpuIndexFlatConfig config;
|
|
130
134
|
config.device = device;
|
|
131
135
|
config.useFloat16 = true;
|
|
136
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
137
|
+
!use_raft, "this type of index is not implemented for RAFT");
|
|
132
138
|
GpuIndexFlat* gif = new GpuIndexFlat(
|
|
133
139
|
provider, index->d, index->metric_type, config);
|
|
134
140
|
// transfer data by blocks
|
|
@@ -146,6 +152,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
|
146
152
|
config.device = device;
|
|
147
153
|
config.indicesOptions = indicesOptions;
|
|
148
154
|
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
|
|
155
|
+
config.use_raft = use_raft;
|
|
149
156
|
|
|
150
157
|
GpuIndexIVFFlat* res = new GpuIndexIVFFlat(
|
|
151
158
|
provider, ifl->d, ifl->nlist, ifl->metric_type, config);
|
|
@@ -162,6 +169,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
|
162
169
|
config.device = device;
|
|
163
170
|
config.indicesOptions = indicesOptions;
|
|
164
171
|
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
|
|
172
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
173
|
+
!use_raft, "this type of index is not implemented for RAFT");
|
|
165
174
|
|
|
166
175
|
GpuIndexIVFScalarQuantizer* res = new GpuIndexIVFScalarQuantizer(
|
|
167
176
|
provider,
|
|
@@ -194,6 +203,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
|
194
203
|
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
|
|
195
204
|
config.useFloat16LookupTables = useFloat16;
|
|
196
205
|
config.usePrecomputedTables = usePrecomputed;
|
|
206
|
+
config.use_raft = use_raft;
|
|
207
|
+
config.interleavedLayout = use_raft;
|
|
197
208
|
|
|
198
209
|
GpuIndexIVFPQ* res = new GpuIndexIVFPQ(provider, ipq, config);
|
|
199
210
|
|
|
@@ -229,7 +240,7 @@ ToGpuClonerMultiple::ToGpuClonerMultiple(
|
|
|
229
240
|
: GpuMultipleClonerOptions(options) {
|
|
230
241
|
FAISS_THROW_IF_NOT(provider.size() == devices.size());
|
|
231
242
|
for (size_t i = 0; i < provider.size(); i++) {
|
|
232
|
-
sub_cloners.
|
|
243
|
+
sub_cloners.emplace_back(provider[i], devices[i], options);
|
|
233
244
|
}
|
|
234
245
|
}
|
|
235
246
|
|
|
@@ -298,8 +309,8 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
|
|
|
298
309
|
!dynamic_cast<const IndexFlat*>(quantizer)) {
|
|
299
310
|
// then we flatten the coarse quantizer so that everything remains
|
|
300
311
|
// on GPU
|
|
301
|
-
new_quantizer
|
|
302
|
-
|
|
312
|
+
new_quantizer = std::make_unique<IndexFlat>(
|
|
313
|
+
quantizer->d, quantizer->metric_type);
|
|
303
314
|
std::vector<float> centroids(quantizer->d * quantizer->ntotal);
|
|
304
315
|
quantizer->reconstruct_n(0, quantizer->ntotal, centroids.data());
|
|
305
316
|
new_quantizer->add(quantizer->ntotal, centroids.data());
|
|
@@ -309,6 +320,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
|
|
|
309
320
|
|
|
310
321
|
std::vector<faiss::Index*> shards(n);
|
|
311
322
|
|
|
323
|
+
#pragma omp parallel for
|
|
312
324
|
for (idx_t i = 0; i < n; i++) {
|
|
313
325
|
// make a shallow copy
|
|
314
326
|
if (reserveVecs) {
|
|
@@ -321,7 +333,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
|
|
|
321
333
|
const_cast<Index*>(quantizer),
|
|
322
334
|
index_ivfpq->d,
|
|
323
335
|
index_ivfpq->nlist,
|
|
324
|
-
index_ivfpq->
|
|
336
|
+
index_ivfpq->pq.M,
|
|
325
337
|
index_ivfpq->pq.nbits);
|
|
326
338
|
idx2.metric_type = index_ivfpq->metric_type;
|
|
327
339
|
idx2.pq = index_ivfpq->pq;
|
|
@@ -473,5 +485,76 @@ Index* GpuProgressiveDimIndexFactory::operator()(int dim) {
|
|
|
473
485
|
return index_cpu_to_gpu_multiple(vres, devices, &index, &options);
|
|
474
486
|
}
|
|
475
487
|
|
|
488
|
+
/*********************************************
|
|
489
|
+
* Cloning binary indexes
|
|
490
|
+
*********************************************/
|
|
491
|
+
|
|
492
|
+
faiss::IndexBinary* index_binary_gpu_to_cpu(
|
|
493
|
+
const faiss::IndexBinary* gpu_index) {
|
|
494
|
+
if (auto ii = dynamic_cast<const GpuIndexBinaryFlat*>(gpu_index)) {
|
|
495
|
+
IndexBinaryFlat* ret = new IndexBinaryFlat();
|
|
496
|
+
ii->copyTo(ret);
|
|
497
|
+
return ret;
|
|
498
|
+
} else {
|
|
499
|
+
FAISS_THROW_MSG("cannot clone this type of index");
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
faiss::IndexBinary* index_binary_cpu_to_gpu(
|
|
504
|
+
GpuResourcesProvider* provider,
|
|
505
|
+
int device,
|
|
506
|
+
const faiss::IndexBinary* index,
|
|
507
|
+
const GpuClonerOptions* options) {
|
|
508
|
+
if (auto ii = dynamic_cast<const IndexBinaryFlat*>(index)) {
|
|
509
|
+
GpuIndexBinaryFlatConfig config;
|
|
510
|
+
config.device = device;
|
|
511
|
+
if (options) {
|
|
512
|
+
config.use_raft = options->use_raft;
|
|
513
|
+
}
|
|
514
|
+
return new GpuIndexBinaryFlat(provider, ii, config);
|
|
515
|
+
} else {
|
|
516
|
+
FAISS_THROW_MSG("cannot clone this type of index");
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
faiss::IndexBinary* index_binary_cpu_to_gpu_multiple(
|
|
521
|
+
std::vector<GpuResourcesProvider*>& provider,
|
|
522
|
+
std::vector<int>& devices,
|
|
523
|
+
const faiss::IndexBinary* index,
|
|
524
|
+
const GpuMultipleClonerOptions* options) {
|
|
525
|
+
GpuMultipleClonerOptions defaults;
|
|
526
|
+
FAISS_THROW_IF_NOT(devices.size() == provider.size());
|
|
527
|
+
int n = devices.size();
|
|
528
|
+
if (n == 1) {
|
|
529
|
+
return index_binary_cpu_to_gpu(provider[0], devices[0], index, options);
|
|
530
|
+
}
|
|
531
|
+
if (!options) {
|
|
532
|
+
options = &defaults;
|
|
533
|
+
}
|
|
534
|
+
if (options->shard) {
|
|
535
|
+
auto* fi = dynamic_cast<const IndexBinaryFlat*>(index);
|
|
536
|
+
FAISS_THROW_IF_NOT_MSG(fi, "only flat index cloning supported");
|
|
537
|
+
IndexBinaryShards* ret = new IndexBinaryShards(true, true);
|
|
538
|
+
for (int i = 0; i < n; i++) {
|
|
539
|
+
IndexBinaryFlat fig(fi->d);
|
|
540
|
+
size_t i0 = i * fi->ntotal / n;
|
|
541
|
+
size_t i1 = (i + 1) * fi->ntotal / n;
|
|
542
|
+
fig.add(i1 - i0, fi->xb.data() + i0 * fi->code_size);
|
|
543
|
+
ret->addIndex(index_binary_cpu_to_gpu(
|
|
544
|
+
provider[i], devices[i], &fig, options));
|
|
545
|
+
}
|
|
546
|
+
ret->own_indices = true;
|
|
547
|
+
return ret;
|
|
548
|
+
} else { // replicas
|
|
549
|
+
IndexBinaryReplicas* ret = new IndexBinaryReplicas(true);
|
|
550
|
+
for (int i = 0; i < n; i++) {
|
|
551
|
+
ret->addIndex(index_binary_cpu_to_gpu(
|
|
552
|
+
provider[i], devices[i], index, options));
|
|
553
|
+
}
|
|
554
|
+
ret->own_indices = true;
|
|
555
|
+
return ret;
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
|
|
476
559
|
} // namespace gpu
|
|
477
560
|
} // namespace faiss
|
|
@@ -11,10 +11,12 @@
|
|
|
11
11
|
|
|
12
12
|
#include <faiss/Clustering.h>
|
|
13
13
|
#include <faiss/Index.h>
|
|
14
|
+
#include <faiss/IndexBinary.h>
|
|
14
15
|
#include <faiss/clone_index.h>
|
|
15
16
|
#include <faiss/gpu/GpuClonerOptions.h>
|
|
16
17
|
#include <faiss/gpu/GpuIndex.h>
|
|
17
18
|
#include <faiss/gpu/GpuIndicesOptions.h>
|
|
19
|
+
|
|
18
20
|
namespace faiss {
|
|
19
21
|
namespace gpu {
|
|
20
22
|
|
|
@@ -95,5 +97,25 @@ struct GpuProgressiveDimIndexFactory : ProgressiveDimIndexFactory {
|
|
|
95
97
|
virtual ~GpuProgressiveDimIndexFactory() override;
|
|
96
98
|
};
|
|
97
99
|
|
|
100
|
+
/*********************************************
|
|
101
|
+
* Cloning binary indexes
|
|
102
|
+
*********************************************/
|
|
103
|
+
|
|
104
|
+
faiss::IndexBinary* index_binary_gpu_to_cpu(
|
|
105
|
+
const faiss::IndexBinary* gpu_index);
|
|
106
|
+
|
|
107
|
+
/// converts any CPU index that can be converted to GPU
|
|
108
|
+
faiss::IndexBinary* index_binary_cpu_to_gpu(
|
|
109
|
+
GpuResourcesProvider* provider,
|
|
110
|
+
int device,
|
|
111
|
+
const faiss::IndexBinary* index,
|
|
112
|
+
const GpuClonerOptions* options = nullptr);
|
|
113
|
+
|
|
114
|
+
faiss::IndexBinary* index_binary_cpu_to_gpu_multiple(
|
|
115
|
+
std::vector<GpuResourcesProvider*>& provider,
|
|
116
|
+
std::vector<int>& devices,
|
|
117
|
+
const faiss::IndexBinary* index,
|
|
118
|
+
const GpuMultipleClonerOptions* options = nullptr);
|
|
119
|
+
|
|
98
120
|
} // namespace gpu
|
|
99
121
|
} // namespace faiss
|
|
@@ -36,6 +36,13 @@ struct GpuClonerOptions {
|
|
|
36
36
|
|
|
37
37
|
/// Set verbose options on the index
|
|
38
38
|
bool verbose = false;
|
|
39
|
+
|
|
40
|
+
/// use the RAFT implementation
|
|
41
|
+
#if defined USE_NVIDIA_RAFT
|
|
42
|
+
bool use_raft = true;
|
|
43
|
+
#else
|
|
44
|
+
bool use_raft = false;
|
|
45
|
+
#endif
|
|
39
46
|
};
|
|
40
47
|
|
|
41
48
|
struct GpuMultipleClonerOptions : public GpuClonerOptions {
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
#include <faiss/Index.h>
|
|
11
11
|
|
|
12
|
+
#pragma GCC visibility push(default)
|
|
12
13
|
namespace faiss {
|
|
13
14
|
namespace gpu {
|
|
14
15
|
|
|
@@ -28,44 +29,24 @@ enum class IndicesDataType {
|
|
|
28
29
|
|
|
29
30
|
/// Arguments to brute-force GPU k-nearest neighbor searching
|
|
30
31
|
struct GpuDistanceParams {
|
|
31
|
-
GpuDistanceParams()
|
|
32
|
-
: metric(faiss::MetricType::METRIC_L2),
|
|
33
|
-
metricArg(0),
|
|
34
|
-
k(0),
|
|
35
|
-
dims(0),
|
|
36
|
-
vectors(nullptr),
|
|
37
|
-
vectorType(DistanceDataType::F32),
|
|
38
|
-
vectorsRowMajor(true),
|
|
39
|
-
numVectors(0),
|
|
40
|
-
vectorNorms(nullptr),
|
|
41
|
-
queries(nullptr),
|
|
42
|
-
queryType(DistanceDataType::F32),
|
|
43
|
-
queriesRowMajor(true),
|
|
44
|
-
numQueries(0),
|
|
45
|
-
outDistances(nullptr),
|
|
46
|
-
ignoreOutDistances(false),
|
|
47
|
-
outIndicesType(IndicesDataType::I64),
|
|
48
|
-
outIndices(nullptr),
|
|
49
|
-
device(-1) {}
|
|
50
|
-
|
|
51
32
|
//
|
|
52
33
|
// Search parameters
|
|
53
34
|
//
|
|
54
35
|
|
|
55
36
|
/// Search parameter: distance metric
|
|
56
|
-
faiss::MetricType metric;
|
|
37
|
+
faiss::MetricType metric = METRIC_L2;
|
|
57
38
|
|
|
58
39
|
/// Search parameter: distance metric argument (if applicable)
|
|
59
40
|
/// For metric == METRIC_Lp, this is the p-value
|
|
60
|
-
float metricArg;
|
|
41
|
+
float metricArg = 0;
|
|
61
42
|
|
|
62
43
|
/// Search parameter: return k nearest neighbors
|
|
63
44
|
/// If the value provided is -1, then we report all pairwise distances
|
|
64
45
|
/// without top-k filtering
|
|
65
|
-
int k;
|
|
46
|
+
int k = 0;
|
|
66
47
|
|
|
67
48
|
/// Vector dimensionality
|
|
68
|
-
int dims;
|
|
49
|
+
int dims = 0;
|
|
69
50
|
|
|
70
51
|
//
|
|
71
52
|
// Vectors being queried
|
|
@@ -74,14 +55,14 @@ struct GpuDistanceParams {
|
|
|
74
55
|
/// If vectorsRowMajor is true, this is
|
|
75
56
|
/// numVectors x dims, with dims innermost; otherwise,
|
|
76
57
|
/// dims x numVectors, with numVectors innermost
|
|
77
|
-
const void* vectors;
|
|
78
|
-
DistanceDataType vectorType;
|
|
79
|
-
bool vectorsRowMajor;
|
|
80
|
-
idx_t numVectors;
|
|
58
|
+
const void* vectors = nullptr;
|
|
59
|
+
DistanceDataType vectorType = DistanceDataType::F32;
|
|
60
|
+
bool vectorsRowMajor = true;
|
|
61
|
+
idx_t numVectors = 0;
|
|
81
62
|
|
|
82
63
|
/// Precomputed L2 norms for each vector in `vectors`, which can be
|
|
83
64
|
/// optionally provided in advance to speed computation for METRIC_L2
|
|
84
|
-
const float* vectorNorms;
|
|
65
|
+
const float* vectorNorms = nullptr;
|
|
85
66
|
|
|
86
67
|
//
|
|
87
68
|
// The query vectors (i.e., find k-nearest neighbors in `vectors` for each
|
|
@@ -91,10 +72,10 @@ struct GpuDistanceParams {
|
|
|
91
72
|
/// If queriesRowMajor is true, this is
|
|
92
73
|
/// numQueries x dims, with dims innermost; otherwise,
|
|
93
74
|
/// dims x numQueries, with numQueries innermost
|
|
94
|
-
const void* queries;
|
|
95
|
-
DistanceDataType queryType;
|
|
96
|
-
bool queriesRowMajor;
|
|
97
|
-
idx_t numQueries;
|
|
75
|
+
const void* queries = nullptr;
|
|
76
|
+
DistanceDataType queryType = DistanceDataType::F32;
|
|
77
|
+
bool queriesRowMajor = true;
|
|
78
|
+
idx_t numQueries = 0;
|
|
98
79
|
|
|
99
80
|
//
|
|
100
81
|
// Output results
|
|
@@ -103,16 +84,16 @@ struct GpuDistanceParams {
|
|
|
103
84
|
/// A region of memory size numQueries x k, with k
|
|
104
85
|
/// innermost (row major) if k > 0, or if k == -1, a region of memory of
|
|
105
86
|
/// size numQueries x numVectors
|
|
106
|
-
float* outDistances;
|
|
87
|
+
float* outDistances = nullptr;
|
|
107
88
|
|
|
108
89
|
/// Do we only care about the indices reported, rather than the output
|
|
109
90
|
/// distances? Not used if k == -1 (all pairwise distances)
|
|
110
|
-
bool ignoreOutDistances;
|
|
91
|
+
bool ignoreOutDistances = false;
|
|
111
92
|
|
|
112
93
|
/// A region of memory size numQueries x k, with k
|
|
113
94
|
/// innermost (row major). Not used if k == -1 (all pairwise distances)
|
|
114
|
-
IndicesDataType outIndicesType;
|
|
115
|
-
void* outIndices;
|
|
95
|
+
IndicesDataType outIndicesType = IndicesDataType::I64;
|
|
96
|
+
void* outIndices = nullptr;
|
|
116
97
|
|
|
117
98
|
//
|
|
118
99
|
// Execution information
|
|
@@ -123,9 +104,17 @@ struct GpuDistanceParams {
|
|
|
123
104
|
/// (via cudaGetDevice/cudaSetDevice) is used
|
|
124
105
|
/// Otherwise, an integer 0 <= device < numDevices indicates the device for
|
|
125
106
|
/// execution
|
|
126
|
-
int device;
|
|
107
|
+
int device = -1;
|
|
108
|
+
|
|
109
|
+
/// Should the index dispatch down to RAFT?
|
|
110
|
+
/// TODO: change default to true if RAFT is enabled
|
|
111
|
+
bool use_raft = false;
|
|
127
112
|
};
|
|
128
113
|
|
|
114
|
+
/// A function that determines whether RAFT should be used based on various
|
|
115
|
+
/// conditions (such as unsupported architecture)
|
|
116
|
+
bool should_use_raft(GpuDistanceParams args);
|
|
117
|
+
|
|
129
118
|
/// A wrapper for gpu/impl/Distance.cuh to expose direct brute-force k-nearest
|
|
130
119
|
/// neighbor searches on an externally-provided region of memory (e.g., from a
|
|
131
120
|
/// pytorch tensor).
|
|
@@ -140,6 +129,24 @@ struct GpuDistanceParams {
|
|
|
140
129
|
/// nearest neighbors with respect to the given metric
|
|
141
130
|
void bfKnn(GpuResourcesProvider* resources, const GpuDistanceParams& args);
|
|
142
131
|
|
|
132
|
+
// bfKnn which takes two extra parameters to control the maximum GPU
|
|
133
|
+
// memory allowed for vectors and queries, the latter including the
|
|
134
|
+
// memory required for the results.
|
|
135
|
+
// If 0, the corresponding input must fit into GPU memory.
|
|
136
|
+
// If greater than 0, the function will use at most this much GPU
|
|
137
|
+
// memory (in bytes) for vectors and queries respectively.
|
|
138
|
+
// Vectors are broken up into chunks of size vectorsMemoryLimit,
|
|
139
|
+
// and queries are broken up into chunks of size queriesMemoryLimit.
|
|
140
|
+
// The tiles resulting from the product of the query and vector
|
|
141
|
+
// chunks are processed sequentially on the GPU.
|
|
142
|
+
// Only supported for row major matrices and k > 0. The input that
|
|
143
|
+
// needs sharding must reside on the CPU.
|
|
144
|
+
void bfKnn_tiling(
|
|
145
|
+
GpuResourcesProvider* resources,
|
|
146
|
+
const GpuDistanceParams& args,
|
|
147
|
+
size_t vectorsMemoryLimit,
|
|
148
|
+
size_t queriesMemoryLimit);
|
|
149
|
+
|
|
143
150
|
/// Deprecated legacy implementation
|
|
144
151
|
void bruteForceKnn(
|
|
145
152
|
GpuResourcesProvider* resources,
|
|
@@ -167,3 +174,4 @@ void bruteForceKnn(
|
|
|
167
174
|
|
|
168
175
|
} // namespace gpu
|
|
169
176
|
} // namespace faiss
|
|
177
|
+
#pragma GCC visibility pop
|
|
@@ -4,6 +4,21 @@
|
|
|
4
4
|
* This source code is licensed under the MIT license found in the
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
|
+
/*
|
|
8
|
+
* Copyright (c) 2023, NVIDIA CORPORATION.
|
|
9
|
+
*
|
|
10
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
11
|
+
* you may not use this file except in compliance with the License.
|
|
12
|
+
* You may obtain a copy of the License at
|
|
13
|
+
*
|
|
14
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
15
|
+
*
|
|
16
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
17
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
18
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
19
|
+
* See the License for the specific language governing permissions and
|
|
20
|
+
* limitations under the License.
|
|
21
|
+
*/
|
|
7
22
|
|
|
8
23
|
#pragma once
|
|
9
24
|
|
|
@@ -14,17 +29,26 @@ namespace faiss {
|
|
|
14
29
|
namespace gpu {
|
|
15
30
|
|
|
16
31
|
struct GpuIndexConfig {
|
|
17
|
-
inline GpuIndexConfig() : device(0), memorySpace(MemorySpace::Device) {}
|
|
18
|
-
|
|
19
32
|
/// GPU device on which the index is resident
|
|
20
|
-
int device;
|
|
33
|
+
int device = 0;
|
|
21
34
|
|
|
22
35
|
/// What memory space to use for primary storage.
|
|
23
36
|
/// On Pascal and above (CC 6+) architectures, allows GPUs to use
|
|
24
37
|
/// more memory than is available on the GPU.
|
|
25
|
-
MemorySpace memorySpace;
|
|
38
|
+
MemorySpace memorySpace = MemorySpace::Device;
|
|
39
|
+
|
|
40
|
+
/// Should the index dispatch down to RAFT?
|
|
41
|
+
#if defined USE_NVIDIA_RAFT
|
|
42
|
+
bool use_raft = true;
|
|
43
|
+
#else
|
|
44
|
+
bool use_raft = false;
|
|
45
|
+
#endif
|
|
26
46
|
};
|
|
27
47
|
|
|
48
|
+
/// A centralized function that determines whether RAFT should
|
|
49
|
+
/// be used based on various conditions (such as unsupported architecture)
|
|
50
|
+
bool should_use_raft(GpuIndexConfig config_);
|
|
51
|
+
|
|
28
52
|
class GpuIndex : public faiss::Index {
|
|
29
53
|
public:
|
|
30
54
|
GpuIndex(
|
|
@@ -24,15 +24,13 @@ namespace gpu {
|
|
|
24
24
|
class FlatIndex;
|
|
25
25
|
|
|
26
26
|
struct GpuIndexFlatConfig : public GpuIndexConfig {
|
|
27
|
-
inline GpuIndexFlatConfig() : useFloat16(false) {}
|
|
28
|
-
|
|
29
27
|
/// Whether or not data is stored as float16
|
|
30
|
-
bool useFloat16;
|
|
28
|
+
bool ALIGNED(8) useFloat16 = false;
|
|
31
29
|
|
|
32
30
|
/// Deprecated: no longer used
|
|
33
31
|
/// Previously used to indicate whether internal storage of vectors is
|
|
34
32
|
/// transposed
|
|
35
|
-
bool storeTransposed;
|
|
33
|
+
bool storeTransposed = false;
|
|
36
34
|
};
|
|
37
35
|
|
|
38
36
|
/// Wrapper around the GPU implementation that looks like
|
|
@@ -115,6 +113,8 @@ class GpuIndexFlat : public GpuIndex {
|
|
|
115
113
|
}
|
|
116
114
|
|
|
117
115
|
protected:
|
|
116
|
+
void resetIndex_(int dims);
|
|
117
|
+
|
|
118
118
|
/// Flat index does not require IDs as there is no storage available for
|
|
119
119
|
/// them
|
|
120
120
|
bool addImplRequiresIDs_() const override;
|
|
@@ -21,10 +21,8 @@ class GpuIndexFlat;
|
|
|
21
21
|
class IVFBase;
|
|
22
22
|
|
|
23
23
|
struct GpuIndexIVFConfig : public GpuIndexConfig {
|
|
24
|
-
inline GpuIndexIVFConfig() : indicesOptions(INDICES_64_BIT) {}
|
|
25
|
-
|
|
26
24
|
/// Index storage options for the GPU
|
|
27
|
-
IndicesOptions indicesOptions;
|
|
25
|
+
IndicesOptions indicesOptions = INDICES_64_BIT;
|
|
28
26
|
|
|
29
27
|
/// Configuration for the coarse quantizer object
|
|
30
28
|
GpuIndexFlatConfig flatConfig;
|
|
@@ -75,10 +73,10 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
|
|
|
75
73
|
virtual void updateQuantizer() = 0;
|
|
76
74
|
|
|
77
75
|
/// Returns the number of inverted lists we're managing
|
|
78
|
-
idx_t getNumLists() const;
|
|
76
|
+
virtual idx_t getNumLists() const;
|
|
79
77
|
|
|
80
78
|
/// Returns the number of vectors present in a particular inverted list
|
|
81
|
-
idx_t getListLength(idx_t listId) const;
|
|
79
|
+
virtual idx_t getListLength(idx_t listId) const;
|
|
82
80
|
|
|
83
81
|
/// Return the encoded vector data contained in a particular inverted list,
|
|
84
82
|
/// for debugging purposes.
|
|
@@ -86,12 +84,13 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
|
|
|
86
84
|
/// GPU-side representation.
|
|
87
85
|
/// Otherwise, it is converted to the CPU format.
|
|
88
86
|
/// compliant format, while the native GPU format may differ.
|
|
89
|
-
std::vector<uint8_t> getListVectorData(
|
|
90
|
-
|
|
87
|
+
virtual std::vector<uint8_t> getListVectorData(
|
|
88
|
+
idx_t listId,
|
|
89
|
+
bool gpuFormat = false) const;
|
|
91
90
|
|
|
92
91
|
/// Return the vector indices contained in a particular inverted list, for
|
|
93
92
|
/// debugging purposes.
|
|
94
|
-
std::vector<idx_t> getListIndices(idx_t listId) const;
|
|
93
|
+
virtual std::vector<idx_t> getListIndices(idx_t listId) const;
|
|
95
94
|
|
|
96
95
|
void search_preassigned(
|
|
97
96
|
idx_t n,
|
|
@@ -123,7 +122,7 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
|
|
|
123
122
|
int getCurrentNProbe_(const SearchParameters* params) const;
|
|
124
123
|
void verifyIVFSettings_() const;
|
|
125
124
|
bool addImplRequiresIDs_() const override;
|
|
126
|
-
void trainQuantizer_(idx_t n, const float* x);
|
|
125
|
+
virtual void trainQuantizer_(idx_t n, const float* x);
|
|
127
126
|
|
|
128
127
|
/// Called from GpuIndex for add/add_with_ids
|
|
129
128
|
void addImpl_(idx_t n, const float* x, const idx_t* ids) override;
|
|
@@ -8,6 +8,8 @@
|
|
|
8
8
|
#pragma once
|
|
9
9
|
|
|
10
10
|
#include <faiss/gpu/GpuIndexIVF.h>
|
|
11
|
+
#include <faiss/impl/ScalarQuantizer.h>
|
|
12
|
+
|
|
11
13
|
#include <memory>
|
|
12
14
|
|
|
13
15
|
namespace faiss {
|
|
@@ -21,11 +23,9 @@ class IVFFlat;
|
|
|
21
23
|
class GpuIndexFlat;
|
|
22
24
|
|
|
23
25
|
struct GpuIndexIVFFlatConfig : public GpuIndexIVFConfig {
|
|
24
|
-
inline GpuIndexIVFFlatConfig() : interleavedLayout(true) {}
|
|
25
|
-
|
|
26
26
|
/// Use the alternative memory layout for the IVF lists
|
|
27
27
|
/// (currently the default)
|
|
28
|
-
bool interleavedLayout;
|
|
28
|
+
bool interleavedLayout = true;
|
|
29
29
|
};
|
|
30
30
|
|
|
31
31
|
/// Wrapper around the GPU implementation that looks like
|
|
@@ -87,6 +87,21 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
|
|
|
87
87
|
/// Trains the coarse quantizer based on the given vector data
|
|
88
88
|
void train(idx_t n, const float* x) override;
|
|
89
89
|
|
|
90
|
+
protected:
|
|
91
|
+
/// Initialize appropriate index
|
|
92
|
+
void setIndex_(
|
|
93
|
+
GpuResources* resources,
|
|
94
|
+
int dim,
|
|
95
|
+
int nlist,
|
|
96
|
+
faiss::MetricType metric,
|
|
97
|
+
float metricArg,
|
|
98
|
+
bool useResidual,
|
|
99
|
+
/// Optional ScalarQuantizer
|
|
100
|
+
faiss::ScalarQuantizer* scalarQ,
|
|
101
|
+
bool interleavedLayout,
|
|
102
|
+
IndicesOptions indicesOptions,
|
|
103
|
+
MemorySpace space);
|
|
104
|
+
|
|
90
105
|
protected:
|
|
91
106
|
/// Our configuration options
|
|
92
107
|
const GpuIndexIVFFlatConfig ivfFlatConfig_;
|