faiss 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +9 -2
- data/ext/faiss/index.cpp +1 -1
- data/ext/faiss/index_binary.cpp +2 -2
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +7 -7
- data/vendor/faiss/faiss/AutoTune.h +1 -2
- data/vendor/faiss/faiss/Clustering.cpp +39 -22
- data/vendor/faiss/faiss/Clustering.h +40 -21
- data/vendor/faiss/faiss/IVFlib.cpp +26 -12
- data/vendor/faiss/faiss/Index.cpp +1 -1
- data/vendor/faiss/faiss/Index.h +40 -10
- data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
- data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
- data/vendor/faiss/faiss/IndexBinary.h +8 -19
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +107 -188
- data/vendor/faiss/faiss/IndexFastScan.cpp +95 -146
- data/vendor/faiss/faiss/IndexFastScan.h +9 -8
- data/vendor/faiss/faiss/IndexFlat.cpp +206 -10
- data/vendor/faiss/faiss/IndexFlat.h +20 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +170 -5
- data/vendor/faiss/faiss/IndexFlatCodes.h +23 -4
- data/vendor/faiss/faiss/IndexHNSW.cpp +231 -382
- data/vendor/faiss/faiss/IndexHNSW.h +62 -49
- data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
- data/vendor/faiss/faiss/IndexIDMap.h +24 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +162 -56
- data/vendor/faiss/faiss/IndexIVF.h +46 -6
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +33 -26
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +6 -2
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +502 -401
- data/vendor/faiss/faiss/IndexIVFFastScan.h +63 -26
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
- data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +79 -125
- data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +39 -52
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
- data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
- data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
- data/vendor/faiss/faiss/IndexLattice.cpp +1 -19
- data/vendor/faiss/faiss/IndexLattice.h +3 -22
- data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -33
- data/vendor/faiss/faiss/IndexNNDescent.h +1 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
- data/vendor/faiss/faiss/IndexNSG.h +11 -11
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +56 -0
- data/vendor/faiss/faiss/IndexNeuralNetCodec.h +49 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
- data/vendor/faiss/faiss/IndexPQ.h +1 -4
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRefine.cpp +54 -24
- data/vendor/faiss/faiss/IndexRefine.h +7 -0
- data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +25 -17
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
- data/vendor/faiss/faiss/IndexShards.cpp +21 -29
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
- data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
- data/vendor/faiss/faiss/MatrixStats.h +21 -9
- data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
- data/vendor/faiss/faiss/MetricType.h +7 -2
- data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
- data/vendor/faiss/faiss/VectorTransform.h +7 -7
- data/vendor/faiss/faiss/clone_index.cpp +15 -10
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +95 -17
- data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +152 -0
- data/vendor/faiss/faiss/cppcontrib/factory_tools.h +24 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +83 -30
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +123 -8
- data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +13 -0
- data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +30 -12
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +282 -0
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +14 -9
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +20 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
- data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +142 -17
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +26 -21
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +7 -1
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +8 -5
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +332 -40
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +4 -1
- data/vendor/faiss/faiss/gpu/utils/Timer.h +1 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +26 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +10 -3
- data/vendor/faiss/faiss/impl/DistanceComputer.h +70 -1
- data/vendor/faiss/faiss/impl/FaissAssert.h +4 -2
- data/vendor/faiss/faiss/impl/FaissException.h +13 -34
- data/vendor/faiss/faiss/impl/HNSW.cpp +605 -186
- data/vendor/faiss/faiss/impl/HNSW.h +52 -30
- data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +11 -9
- data/vendor/faiss/faiss/impl/LookupTableScaler.h +34 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +42 -27
- data/vendor/faiss/faiss/impl/NSG.cpp +0 -29
- data/vendor/faiss/faiss/impl/NSG.h +1 -1
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -22
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +6 -2
- data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
- data/vendor/faiss/faiss/impl/ResultHandler.h +347 -172
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1104 -147
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +3 -8
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +285 -42
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +248 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
- data/vendor/faiss/faiss/impl/index_read.cpp +74 -34
- data/vendor/faiss/faiss/impl/index_read_utils.h +37 -0
- data/vendor/faiss/faiss/impl/index_write.cpp +88 -51
- data/vendor/faiss/faiss/impl/io.cpp +23 -15
- data/vendor/faiss/faiss/impl/io.h +4 -4
- data/vendor/faiss/faiss/impl/io_macros.h +6 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +40 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +14 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +487 -49
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +481 -225
- data/vendor/faiss/faiss/index_factory.cpp +41 -20
- data/vendor/faiss/faiss/index_io.h +12 -5
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +28 -8
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +3 -0
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +10 -2
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +73 -17
- data/vendor/faiss/faiss/invlists/InvertedLists.h +26 -8
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +24 -9
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +2 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +4 -4
- data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
- data/vendor/faiss/faiss/utils/Heap.h +105 -0
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +342 -0
- data/vendor/faiss/faiss/utils/NeuralNet.h +147 -0
- data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
- data/vendor/faiss/faiss/utils/bf16.h +36 -0
- data/vendor/faiss/faiss/utils/distances.cpp +147 -123
- data/vendor/faiss/faiss/utils/distances.h +86 -9
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
- data/vendor/faiss/faiss/utils/distances_simd.cpp +1589 -243
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +70 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +85 -137
- data/vendor/faiss/faiss/utils/extra_distances.h +3 -2
- data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
- data/vendor/faiss/faiss/utils/fp16.h +2 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +163 -111
- data/vendor/faiss/faiss/utils/hamming.h +58 -0
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +19 -88
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +58 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
- data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
- data/vendor/faiss/faiss/utils/prefetch.h +77 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
- data/vendor/faiss/faiss/utils/random.cpp +43 -0
- data/vendor/faiss/faiss/utils/random.h +25 -0
- data/vendor/faiss/faiss/utils/simdlib.h +10 -1
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +296 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +77 -79
- data/vendor/faiss/faiss/utils/simdlib_ppc64.h +1084 -0
- data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
- data/vendor/faiss/faiss/utils/sorting.h +27 -0
- data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +176 -0
- data/vendor/faiss/faiss/utils/utils.cpp +120 -7
- data/vendor/faiss/faiss/utils/utils.h +60 -20
- metadata +23 -4
- data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +0 -102
|
@@ -7,11 +7,16 @@
|
|
|
7
7
|
|
|
8
8
|
#include <faiss/gpu/GpuCloner.h>
|
|
9
9
|
#include <faiss/impl/FaissAssert.h>
|
|
10
|
+
#include <memory>
|
|
10
11
|
#include <typeinfo>
|
|
11
12
|
|
|
12
13
|
#include <faiss/gpu/StandardGpuResources.h>
|
|
13
14
|
|
|
15
|
+
#include <faiss/IndexBinaryFlat.h>
|
|
14
16
|
#include <faiss/IndexFlat.h>
|
|
17
|
+
#if defined USE_NVIDIA_RAFT
|
|
18
|
+
#include <faiss/IndexHNSW.h>
|
|
19
|
+
#endif
|
|
15
20
|
#include <faiss/IndexIVF.h>
|
|
16
21
|
#include <faiss/IndexIVFFlat.h>
|
|
17
22
|
#include <faiss/IndexIVFPQ.h>
|
|
@@ -21,6 +26,10 @@
|
|
|
21
26
|
#include <faiss/IndexShardsIVF.h>
|
|
22
27
|
#include <faiss/MetaIndexes.h>
|
|
23
28
|
#include <faiss/gpu/GpuIndex.h>
|
|
29
|
+
#include <faiss/gpu/GpuIndexBinaryFlat.h>
|
|
30
|
+
#if defined USE_NVIDIA_RAFT
|
|
31
|
+
#include <faiss/gpu/GpuIndexCagra.h>
|
|
32
|
+
#endif
|
|
24
33
|
#include <faiss/gpu/GpuIndexFlat.h>
|
|
25
34
|
#include <faiss/gpu/GpuIndexIVFFlat.h>
|
|
26
35
|
#include <faiss/gpu/GpuIndexIVFPQ.h>
|
|
@@ -82,7 +91,15 @@ Index* ToCPUCloner::clone_Index(const Index* index) {
|
|
|
82
91
|
// objective is to make a single component out of them
|
|
83
92
|
// (inverse op of ToGpuClonerMultiple)
|
|
84
93
|
|
|
85
|
-
}
|
|
94
|
+
}
|
|
95
|
+
#if defined USE_NVIDIA_RAFT
|
|
96
|
+
else if (auto icg = dynamic_cast<const GpuIndexCagra*>(index)) {
|
|
97
|
+
IndexHNSWCagra* res = new IndexHNSWCagra();
|
|
98
|
+
icg->copyTo(res);
|
|
99
|
+
return res;
|
|
100
|
+
}
|
|
101
|
+
#endif
|
|
102
|
+
else if (auto ish = dynamic_cast<const IndexShards*>(index)) {
|
|
86
103
|
int nshard = ish->count();
|
|
87
104
|
FAISS_ASSERT(nshard > 0);
|
|
88
105
|
Index* res = clone_Index(ish->at(0));
|
|
@@ -121,6 +138,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
|
121
138
|
GpuIndexFlatConfig config;
|
|
122
139
|
config.device = device;
|
|
123
140
|
config.useFloat16 = useFloat16;
|
|
141
|
+
config.use_raft = use_raft;
|
|
124
142
|
return new GpuIndexFlat(provider, ifl, config);
|
|
125
143
|
} else if (
|
|
126
144
|
dynamic_cast<const IndexScalarQuantizer*>(index) &&
|
|
@@ -129,6 +147,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
|
129
147
|
GpuIndexFlatConfig config;
|
|
130
148
|
config.device = device;
|
|
131
149
|
config.useFloat16 = true;
|
|
150
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
151
|
+
!use_raft, "this type of index is not implemented for RAFT");
|
|
132
152
|
GpuIndexFlat* gif = new GpuIndexFlat(
|
|
133
153
|
provider, index->d, index->metric_type, config);
|
|
134
154
|
// transfer data by blocks
|
|
@@ -146,6 +166,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
|
146
166
|
config.device = device;
|
|
147
167
|
config.indicesOptions = indicesOptions;
|
|
148
168
|
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
|
|
169
|
+
config.use_raft = use_raft;
|
|
170
|
+
config.allowCpuCoarseQuantizer = allowCpuCoarseQuantizer;
|
|
149
171
|
|
|
150
172
|
GpuIndexIVFFlat* res = new GpuIndexIVFFlat(
|
|
151
173
|
provider, ifl->d, ifl->nlist, ifl->metric_type, config);
|
|
@@ -162,6 +184,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
|
162
184
|
config.device = device;
|
|
163
185
|
config.indicesOptions = indicesOptions;
|
|
164
186
|
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
|
|
187
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
188
|
+
!use_raft, "this type of index is not implemented for RAFT");
|
|
165
189
|
|
|
166
190
|
GpuIndexIVFScalarQuantizer* res = new GpuIndexIVFScalarQuantizer(
|
|
167
191
|
provider,
|
|
@@ -194,6 +218,9 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
|
194
218
|
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
|
|
195
219
|
config.useFloat16LookupTables = useFloat16;
|
|
196
220
|
config.usePrecomputedTables = usePrecomputed;
|
|
221
|
+
config.use_raft = use_raft;
|
|
222
|
+
config.interleavedLayout = use_raft;
|
|
223
|
+
config.allowCpuCoarseQuantizer = allowCpuCoarseQuantizer;
|
|
197
224
|
|
|
198
225
|
GpuIndexIVFPQ* res = new GpuIndexIVFPQ(provider, ipq, config);
|
|
199
226
|
|
|
@@ -202,9 +229,25 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
|
|
|
202
229
|
}
|
|
203
230
|
|
|
204
231
|
return res;
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
|
|
232
|
+
}
|
|
233
|
+
#if defined USE_NVIDIA_RAFT
|
|
234
|
+
else if (auto icg = dynamic_cast<const faiss::IndexHNSWCagra*>(index)) {
|
|
235
|
+
GpuIndexCagraConfig config;
|
|
236
|
+
config.device = device;
|
|
237
|
+
GpuIndexCagra* res =
|
|
238
|
+
new GpuIndexCagra(provider, icg->d, icg->metric_type, config);
|
|
239
|
+
res->copyFrom(icg);
|
|
240
|
+
return res;
|
|
241
|
+
}
|
|
242
|
+
#endif
|
|
243
|
+
else {
|
|
244
|
+
// use CPU cloner for IDMap and PreTransform
|
|
245
|
+
auto index_idmap = dynamic_cast<const IndexIDMap*>(index);
|
|
246
|
+
auto index_pt = dynamic_cast<const IndexPreTransform*>(index);
|
|
247
|
+
if (index_idmap || index_pt) {
|
|
248
|
+
return Cloner::clone_Index(index);
|
|
249
|
+
}
|
|
250
|
+
FAISS_THROW_MSG("This index type is not implemented on GPU.");
|
|
208
251
|
}
|
|
209
252
|
}
|
|
210
253
|
|
|
@@ -229,7 +272,7 @@ ToGpuClonerMultiple::ToGpuClonerMultiple(
|
|
|
229
272
|
: GpuMultipleClonerOptions(options) {
|
|
230
273
|
FAISS_THROW_IF_NOT(provider.size() == devices.size());
|
|
231
274
|
for (size_t i = 0; i < provider.size(); i++) {
|
|
232
|
-
sub_cloners.
|
|
275
|
+
sub_cloners.emplace_back(provider[i], devices[i], options);
|
|
233
276
|
}
|
|
234
277
|
}
|
|
235
278
|
|
|
@@ -298,8 +341,8 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
|
|
|
298
341
|
!dynamic_cast<const IndexFlat*>(quantizer)) {
|
|
299
342
|
// then we flatten the coarse quantizer so that everything remains
|
|
300
343
|
// on GPU
|
|
301
|
-
new_quantizer
|
|
302
|
-
|
|
344
|
+
new_quantizer = std::make_unique<IndexFlat>(
|
|
345
|
+
quantizer->d, quantizer->metric_type);
|
|
303
346
|
std::vector<float> centroids(quantizer->d * quantizer->ntotal);
|
|
304
347
|
quantizer->reconstruct_n(0, quantizer->ntotal, centroids.data());
|
|
305
348
|
new_quantizer->add(quantizer->ntotal, centroids.data());
|
|
@@ -309,6 +352,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
|
|
|
309
352
|
|
|
310
353
|
std::vector<faiss::Index*> shards(n);
|
|
311
354
|
|
|
355
|
+
#pragma omp parallel for
|
|
312
356
|
for (idx_t i = 0; i < n; i++) {
|
|
313
357
|
// make a shallow copy
|
|
314
358
|
if (reserveVecs) {
|
|
@@ -321,7 +365,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
|
|
|
321
365
|
const_cast<Index*>(quantizer),
|
|
322
366
|
index_ivfpq->d,
|
|
323
367
|
index_ivfpq->nlist,
|
|
324
|
-
index_ivfpq->
|
|
368
|
+
index_ivfpq->pq.M,
|
|
325
369
|
index_ivfpq->pq.nbits);
|
|
326
370
|
idx2.metric_type = index_ivfpq->metric_type;
|
|
327
371
|
idx2.pq = index_ivfpq->pq;
|
|
@@ -473,5 +517,76 @@ Index* GpuProgressiveDimIndexFactory::operator()(int dim) {
|
|
|
473
517
|
return index_cpu_to_gpu_multiple(vres, devices, &index, &options);
|
|
474
518
|
}
|
|
475
519
|
|
|
520
|
+
/*********************************************
|
|
521
|
+
* Cloning binary indexes
|
|
522
|
+
*********************************************/
|
|
523
|
+
|
|
524
|
+
faiss::IndexBinary* index_binary_gpu_to_cpu(
|
|
525
|
+
const faiss::IndexBinary* gpu_index) {
|
|
526
|
+
if (auto ii = dynamic_cast<const GpuIndexBinaryFlat*>(gpu_index)) {
|
|
527
|
+
IndexBinaryFlat* ret = new IndexBinaryFlat();
|
|
528
|
+
ii->copyTo(ret);
|
|
529
|
+
return ret;
|
|
530
|
+
} else {
|
|
531
|
+
FAISS_THROW_MSG("cannot clone this type of index");
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
faiss::IndexBinary* index_binary_cpu_to_gpu(
|
|
536
|
+
GpuResourcesProvider* provider,
|
|
537
|
+
int device,
|
|
538
|
+
const faiss::IndexBinary* index,
|
|
539
|
+
const GpuClonerOptions* options) {
|
|
540
|
+
if (auto ii = dynamic_cast<const IndexBinaryFlat*>(index)) {
|
|
541
|
+
GpuIndexBinaryFlatConfig config;
|
|
542
|
+
config.device = device;
|
|
543
|
+
if (options) {
|
|
544
|
+
config.use_raft = options->use_raft;
|
|
545
|
+
}
|
|
546
|
+
return new GpuIndexBinaryFlat(provider, ii, config);
|
|
547
|
+
} else {
|
|
548
|
+
FAISS_THROW_MSG("cannot clone this type of index");
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
faiss::IndexBinary* index_binary_cpu_to_gpu_multiple(
|
|
553
|
+
std::vector<GpuResourcesProvider*>& provider,
|
|
554
|
+
std::vector<int>& devices,
|
|
555
|
+
const faiss::IndexBinary* index,
|
|
556
|
+
const GpuMultipleClonerOptions* options) {
|
|
557
|
+
GpuMultipleClonerOptions defaults;
|
|
558
|
+
FAISS_THROW_IF_NOT(devices.size() == provider.size());
|
|
559
|
+
int n = devices.size();
|
|
560
|
+
if (n == 1) {
|
|
561
|
+
return index_binary_cpu_to_gpu(provider[0], devices[0], index, options);
|
|
562
|
+
}
|
|
563
|
+
if (!options) {
|
|
564
|
+
options = &defaults;
|
|
565
|
+
}
|
|
566
|
+
if (options->shard) {
|
|
567
|
+
auto* fi = dynamic_cast<const IndexBinaryFlat*>(index);
|
|
568
|
+
FAISS_THROW_IF_NOT_MSG(fi, "only flat index cloning supported");
|
|
569
|
+
IndexBinaryShards* ret = new IndexBinaryShards(true, true);
|
|
570
|
+
for (int i = 0; i < n; i++) {
|
|
571
|
+
IndexBinaryFlat fig(fi->d);
|
|
572
|
+
size_t i0 = i * fi->ntotal / n;
|
|
573
|
+
size_t i1 = (i + 1) * fi->ntotal / n;
|
|
574
|
+
fig.add(i1 - i0, fi->xb.data() + i0 * fi->code_size);
|
|
575
|
+
ret->addIndex(index_binary_cpu_to_gpu(
|
|
576
|
+
provider[i], devices[i], &fig, options));
|
|
577
|
+
}
|
|
578
|
+
ret->own_indices = true;
|
|
579
|
+
return ret;
|
|
580
|
+
} else { // replicas
|
|
581
|
+
IndexBinaryReplicas* ret = new IndexBinaryReplicas(true);
|
|
582
|
+
for (int i = 0; i < n; i++) {
|
|
583
|
+
ret->addIndex(index_binary_cpu_to_gpu(
|
|
584
|
+
provider[i], devices[i], index, options));
|
|
585
|
+
}
|
|
586
|
+
ret->own_indices = true;
|
|
587
|
+
return ret;
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
|
|
476
591
|
} // namespace gpu
|
|
477
592
|
} // namespace faiss
|
|
@@ -11,10 +11,12 @@
|
|
|
11
11
|
|
|
12
12
|
#include <faiss/Clustering.h>
|
|
13
13
|
#include <faiss/Index.h>
|
|
14
|
+
#include <faiss/IndexBinary.h>
|
|
14
15
|
#include <faiss/clone_index.h>
|
|
15
16
|
#include <faiss/gpu/GpuClonerOptions.h>
|
|
16
17
|
#include <faiss/gpu/GpuIndex.h>
|
|
17
18
|
#include <faiss/gpu/GpuIndicesOptions.h>
|
|
19
|
+
|
|
18
20
|
namespace faiss {
|
|
19
21
|
namespace gpu {
|
|
20
22
|
|
|
@@ -95,5 +97,25 @@ struct GpuProgressiveDimIndexFactory : ProgressiveDimIndexFactory {
|
|
|
95
97
|
virtual ~GpuProgressiveDimIndexFactory() override;
|
|
96
98
|
};
|
|
97
99
|
|
|
100
|
+
/*********************************************
|
|
101
|
+
* Cloning binary indexes
|
|
102
|
+
*********************************************/
|
|
103
|
+
|
|
104
|
+
faiss::IndexBinary* index_binary_gpu_to_cpu(
|
|
105
|
+
const faiss::IndexBinary* gpu_index);
|
|
106
|
+
|
|
107
|
+
/// converts any CPU index that can be converted to GPU
|
|
108
|
+
faiss::IndexBinary* index_binary_cpu_to_gpu(
|
|
109
|
+
GpuResourcesProvider* provider,
|
|
110
|
+
int device,
|
|
111
|
+
const faiss::IndexBinary* index,
|
|
112
|
+
const GpuClonerOptions* options = nullptr);
|
|
113
|
+
|
|
114
|
+
faiss::IndexBinary* index_binary_cpu_to_gpu_multiple(
|
|
115
|
+
std::vector<GpuResourcesProvider*>& provider,
|
|
116
|
+
std::vector<int>& devices,
|
|
117
|
+
const faiss::IndexBinary* index,
|
|
118
|
+
const GpuMultipleClonerOptions* options = nullptr);
|
|
119
|
+
|
|
98
120
|
} // namespace gpu
|
|
99
121
|
} // namespace faiss
|
|
@@ -36,6 +36,19 @@ struct GpuClonerOptions {
|
|
|
36
36
|
|
|
37
37
|
/// Set verbose options on the index
|
|
38
38
|
bool verbose = false;
|
|
39
|
+
|
|
40
|
+
/// use the RAFT implementation
|
|
41
|
+
#if defined USE_NVIDIA_RAFT
|
|
42
|
+
bool use_raft = true;
|
|
43
|
+
#else
|
|
44
|
+
bool use_raft = false;
|
|
45
|
+
#endif
|
|
46
|
+
|
|
47
|
+
/// This flag controls the CPU fallback logic for coarse quantizer
|
|
48
|
+
/// component of the index. When set to false (default), the cloner will
|
|
49
|
+
/// throw an exception for indices not implemented on GPU. When set to
|
|
50
|
+
/// true, it will fallback to a CPU implementation.
|
|
51
|
+
bool allowCpuCoarseQuantizer = false;
|
|
39
52
|
};
|
|
40
53
|
|
|
41
54
|
struct GpuMultipleClonerOptions : public GpuClonerOptions {
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
#include <faiss/Index.h>
|
|
11
11
|
|
|
12
|
+
#pragma GCC visibility push(default)
|
|
12
13
|
namespace faiss {
|
|
13
14
|
namespace gpu {
|
|
14
15
|
|
|
@@ -28,44 +29,24 @@ enum class IndicesDataType {
|
|
|
28
29
|
|
|
29
30
|
/// Arguments to brute-force GPU k-nearest neighbor searching
|
|
30
31
|
struct GpuDistanceParams {
|
|
31
|
-
GpuDistanceParams()
|
|
32
|
-
: metric(faiss::MetricType::METRIC_L2),
|
|
33
|
-
metricArg(0),
|
|
34
|
-
k(0),
|
|
35
|
-
dims(0),
|
|
36
|
-
vectors(nullptr),
|
|
37
|
-
vectorType(DistanceDataType::F32),
|
|
38
|
-
vectorsRowMajor(true),
|
|
39
|
-
numVectors(0),
|
|
40
|
-
vectorNorms(nullptr),
|
|
41
|
-
queries(nullptr),
|
|
42
|
-
queryType(DistanceDataType::F32),
|
|
43
|
-
queriesRowMajor(true),
|
|
44
|
-
numQueries(0),
|
|
45
|
-
outDistances(nullptr),
|
|
46
|
-
ignoreOutDistances(false),
|
|
47
|
-
outIndicesType(IndicesDataType::I64),
|
|
48
|
-
outIndices(nullptr),
|
|
49
|
-
device(-1) {}
|
|
50
|
-
|
|
51
32
|
//
|
|
52
33
|
// Search parameters
|
|
53
34
|
//
|
|
54
35
|
|
|
55
36
|
/// Search parameter: distance metric
|
|
56
|
-
faiss::MetricType metric;
|
|
37
|
+
faiss::MetricType metric = METRIC_L2;
|
|
57
38
|
|
|
58
39
|
/// Search parameter: distance metric argument (if applicable)
|
|
59
40
|
/// For metric == METRIC_Lp, this is the p-value
|
|
60
|
-
float metricArg;
|
|
41
|
+
float metricArg = 0;
|
|
61
42
|
|
|
62
43
|
/// Search parameter: return k nearest neighbors
|
|
63
44
|
/// If the value provided is -1, then we report all pairwise distances
|
|
64
45
|
/// without top-k filtering
|
|
65
|
-
int k;
|
|
46
|
+
int k = 0;
|
|
66
47
|
|
|
67
48
|
/// Vector dimensionality
|
|
68
|
-
int dims;
|
|
49
|
+
int dims = 0;
|
|
69
50
|
|
|
70
51
|
//
|
|
71
52
|
// Vectors being queried
|
|
@@ -74,14 +55,14 @@ struct GpuDistanceParams {
|
|
|
74
55
|
/// If vectorsRowMajor is true, this is
|
|
75
56
|
/// numVectors x dims, with dims innermost; otherwise,
|
|
76
57
|
/// dims x numVectors, with numVectors innermost
|
|
77
|
-
const void* vectors;
|
|
78
|
-
DistanceDataType vectorType;
|
|
79
|
-
bool vectorsRowMajor;
|
|
80
|
-
idx_t numVectors;
|
|
58
|
+
const void* vectors = nullptr;
|
|
59
|
+
DistanceDataType vectorType = DistanceDataType::F32;
|
|
60
|
+
bool vectorsRowMajor = true;
|
|
61
|
+
idx_t numVectors = 0;
|
|
81
62
|
|
|
82
63
|
/// Precomputed L2 norms for each vector in `vectors`, which can be
|
|
83
64
|
/// optionally provided in advance to speed computation for METRIC_L2
|
|
84
|
-
const float* vectorNorms;
|
|
65
|
+
const float* vectorNorms = nullptr;
|
|
85
66
|
|
|
86
67
|
//
|
|
87
68
|
// The query vectors (i.e., find k-nearest neighbors in `vectors` for each
|
|
@@ -91,10 +72,10 @@ struct GpuDistanceParams {
|
|
|
91
72
|
/// If queriesRowMajor is true, this is
|
|
92
73
|
/// numQueries x dims, with dims innermost; otherwise,
|
|
93
74
|
/// dims x numQueries, with numQueries innermost
|
|
94
|
-
const void* queries;
|
|
95
|
-
DistanceDataType queryType;
|
|
96
|
-
bool queriesRowMajor;
|
|
97
|
-
idx_t numQueries;
|
|
75
|
+
const void* queries = nullptr;
|
|
76
|
+
DistanceDataType queryType = DistanceDataType::F32;
|
|
77
|
+
bool queriesRowMajor = true;
|
|
78
|
+
idx_t numQueries = 0;
|
|
98
79
|
|
|
99
80
|
//
|
|
100
81
|
// Output results
|
|
@@ -103,16 +84,16 @@ struct GpuDistanceParams {
|
|
|
103
84
|
/// A region of memory size numQueries x k, with k
|
|
104
85
|
/// innermost (row major) if k > 0, or if k == -1, a region of memory of
|
|
105
86
|
/// size numQueries x numVectors
|
|
106
|
-
float* outDistances;
|
|
87
|
+
float* outDistances = nullptr;
|
|
107
88
|
|
|
108
89
|
/// Do we only care about the indices reported, rather than the output
|
|
109
90
|
/// distances? Not used if k == -1 (all pairwise distances)
|
|
110
|
-
bool ignoreOutDistances;
|
|
91
|
+
bool ignoreOutDistances = false;
|
|
111
92
|
|
|
112
93
|
/// A region of memory size numQueries x k, with k
|
|
113
94
|
/// innermost (row major). Not used if k == -1 (all pairwise distances)
|
|
114
|
-
IndicesDataType outIndicesType;
|
|
115
|
-
void* outIndices;
|
|
95
|
+
IndicesDataType outIndicesType = IndicesDataType::I64;
|
|
96
|
+
void* outIndices = nullptr;
|
|
116
97
|
|
|
117
98
|
//
|
|
118
99
|
// Execution information
|
|
@@ -123,9 +104,17 @@ struct GpuDistanceParams {
|
|
|
123
104
|
/// (via cudaGetDevice/cudaSetDevice) is used
|
|
124
105
|
/// Otherwise, an integer 0 <= device < numDevices indicates the device for
|
|
125
106
|
/// execution
|
|
126
|
-
int device;
|
|
107
|
+
int device = -1;
|
|
108
|
+
|
|
109
|
+
/// Should the index dispatch down to RAFT?
|
|
110
|
+
/// TODO: change default to true if RAFT is enabled
|
|
111
|
+
bool use_raft = false;
|
|
127
112
|
};
|
|
128
113
|
|
|
114
|
+
/// A function that determines whether RAFT should be used based on various
|
|
115
|
+
/// conditions (such as unsupported architecture)
|
|
116
|
+
bool should_use_raft(GpuDistanceParams args);
|
|
117
|
+
|
|
129
118
|
/// A wrapper for gpu/impl/Distance.cuh to expose direct brute-force k-nearest
|
|
130
119
|
/// neighbor searches on an externally-provided region of memory (e.g., from a
|
|
131
120
|
/// pytorch tensor).
|
|
@@ -140,6 +129,24 @@ struct GpuDistanceParams {
|
|
|
140
129
|
/// nearest neighbors with respect to the given metric
|
|
141
130
|
void bfKnn(GpuResourcesProvider* resources, const GpuDistanceParams& args);
|
|
142
131
|
|
|
132
|
+
// bfKnn which takes two extra parameters to control the maximum GPU
|
|
133
|
+
// memory allowed for vectors and queries, the latter including the
|
|
134
|
+
// memory required for the results.
|
|
135
|
+
// If 0, the corresponding input must fit into GPU memory.
|
|
136
|
+
// If greater than 0, the function will use at most this much GPU
|
|
137
|
+
// memory (in bytes) for vectors and queries respectively.
|
|
138
|
+
// Vectors are broken up into chunks of size vectorsMemoryLimit,
|
|
139
|
+
// and queries are broken up into chunks of size queriesMemoryLimit.
|
|
140
|
+
// The tiles resulting from the product of the query and vector
|
|
141
|
+
// chunks are processed sequentially on the GPU.
|
|
142
|
+
// Only supported for row major matrices and k > 0. The input that
|
|
143
|
+
// needs sharding must reside on the CPU.
|
|
144
|
+
void bfKnn_tiling(
|
|
145
|
+
GpuResourcesProvider* resources,
|
|
146
|
+
const GpuDistanceParams& args,
|
|
147
|
+
size_t vectorsMemoryLimit,
|
|
148
|
+
size_t queriesMemoryLimit);
|
|
149
|
+
|
|
143
150
|
/// Deprecated legacy implementation
|
|
144
151
|
void bruteForceKnn(
|
|
145
152
|
GpuResourcesProvider* resources,
|
|
@@ -167,3 +174,4 @@ void bruteForceKnn(
|
|
|
167
174
|
|
|
168
175
|
} // namespace gpu
|
|
169
176
|
} // namespace faiss
|
|
177
|
+
#pragma GCC visibility pop
|
|
@@ -4,6 +4,21 @@
|
|
|
4
4
|
* This source code is licensed under the MIT license found in the
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
|
+
/*
|
|
8
|
+
* Copyright (c) 2023, NVIDIA CORPORATION.
|
|
9
|
+
*
|
|
10
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
11
|
+
* you may not use this file except in compliance with the License.
|
|
12
|
+
* You may obtain a copy of the License at
|
|
13
|
+
*
|
|
14
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
15
|
+
*
|
|
16
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
17
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
18
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
19
|
+
* See the License for the specific language governing permissions and
|
|
20
|
+
* limitations under the License.
|
|
21
|
+
*/
|
|
7
22
|
|
|
8
23
|
#pragma once
|
|
9
24
|
|
|
@@ -14,17 +29,26 @@ namespace faiss {
|
|
|
14
29
|
namespace gpu {
|
|
15
30
|
|
|
16
31
|
struct GpuIndexConfig {
|
|
17
|
-
inline GpuIndexConfig() : device(0), memorySpace(MemorySpace::Device) {}
|
|
18
|
-
|
|
19
32
|
/// GPU device on which the index is resident
|
|
20
|
-
int device;
|
|
33
|
+
int device = 0;
|
|
21
34
|
|
|
22
35
|
/// What memory space to use for primary storage.
|
|
23
36
|
/// On Pascal and above (CC 6+) architectures, allows GPUs to use
|
|
24
37
|
/// more memory than is available on the GPU.
|
|
25
|
-
MemorySpace memorySpace;
|
|
38
|
+
MemorySpace memorySpace = MemorySpace::Device;
|
|
39
|
+
|
|
40
|
+
/// Should the index dispatch down to RAFT?
|
|
41
|
+
#if defined USE_NVIDIA_RAFT
|
|
42
|
+
bool use_raft = true;
|
|
43
|
+
#else
|
|
44
|
+
bool use_raft = false;
|
|
45
|
+
#endif
|
|
26
46
|
};
|
|
27
47
|
|
|
48
|
+
/// A centralized function that determines whether RAFT should
|
|
49
|
+
/// be used based on various conditions (such as unsupported architecture)
|
|
50
|
+
bool should_use_raft(GpuIndexConfig config_);
|
|
51
|
+
|
|
28
52
|
class GpuIndex : public faiss::Index {
|
|
29
53
|
public:
|
|
30
54
|
GpuIndex(
|
|
@@ -60,19 +84,14 @@ class GpuIndex : public faiss::Index {
|
|
|
60
84
|
|
|
61
85
|
/// `x` and `labels` can be resident on the CPU or any GPU; copies are
|
|
62
86
|
/// performed as needed
|
|
63
|
-
void assign(
|
|
64
|
-
|
|
65
|
-
const float* x,
|
|
66
|
-
idx_t* labels,
|
|
67
|
-
// faiss::Index has idx_t for k
|
|
68
|
-
idx_t k = 1) const override;
|
|
87
|
+
void assign(idx_t n, const float* x, idx_t* labels, idx_t k = 1)
|
|
88
|
+
const override;
|
|
69
89
|
|
|
70
90
|
/// `x`, `distances` and `labels` can be resident on the CPU or any
|
|
71
91
|
/// GPU; copies are performed as needed
|
|
72
92
|
void search(
|
|
73
93
|
idx_t n,
|
|
74
94
|
const float* x,
|
|
75
|
-
// faiss::Index has idx_t for k
|
|
76
95
|
idx_t k,
|
|
77
96
|
float* distances,
|
|
78
97
|
idx_t* labels,
|
|
@@ -83,7 +102,6 @@ class GpuIndex : public faiss::Index {
|
|
|
83
102
|
void search_and_reconstruct(
|
|
84
103
|
idx_t n,
|
|
85
104
|
const float* x,
|
|
86
|
-
// faiss::Index has idx_t for k
|
|
87
105
|
idx_t k,
|
|
88
106
|
float* distances,
|
|
89
107
|
idx_t* labels,
|