faiss 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +23 -21
- data/ext/faiss/extconf.rb +11 -0
- data/ext/faiss/index.cpp +4 -4
- data/ext/faiss/index_binary.cpp +6 -6
- data/ext/faiss/product_quantizer.cpp +4 -4
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +13 -0
- data/vendor/faiss/faiss/IVFlib.cpp +101 -2
- data/vendor/faiss/faiss/IVFlib.h +26 -2
- data/vendor/faiss/faiss/Index.cpp +36 -3
- data/vendor/faiss/faiss/Index.h +43 -6
- data/vendor/faiss/faiss/Index2Layer.cpp +6 -2
- data/vendor/faiss/faiss/Index2Layer.h +6 -1
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +219 -16
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +63 -5
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +299 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +199 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +20 -4
- data/vendor/faiss/faiss/IndexBinary.h +18 -3
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -2
- data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -2
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +2 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +17 -4
- data/vendor/faiss/faiss/IndexBinaryHash.h +8 -4
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +28 -13
- data/vendor/faiss/faiss/IndexBinaryIVF.h +10 -7
- data/vendor/faiss/faiss/IndexFastScan.cpp +626 -0
- data/vendor/faiss/faiss/IndexFastScan.h +145 -0
- data/vendor/faiss/faiss/IndexFlat.cpp +34 -21
- data/vendor/faiss/faiss/IndexFlat.h +7 -4
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +35 -1
- data/vendor/faiss/faiss/IndexFlatCodes.h +12 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +66 -138
- data/vendor/faiss/faiss/IndexHNSW.h +4 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +247 -0
- data/vendor/faiss/faiss/IndexIDMap.h +107 -0
- data/vendor/faiss/faiss/IndexIVF.cpp +121 -33
- data/vendor/faiss/faiss/IndexIVF.h +35 -16
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -7
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +63 -1
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +590 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +171 -0
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +1290 -0
- data/vendor/faiss/faiss/IndexIVFFastScan.h +213 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +37 -17
- data/vendor/faiss/faiss/IndexIVFFlat.h +4 -2
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +234 -50
- data/vendor/faiss/faiss/IndexIVFPQ.h +5 -1
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +23 -852
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -112
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFPQR.h +1 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +3 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +4 -2
- data/vendor/faiss/faiss/IndexLSH.h +2 -1
- data/vendor/faiss/faiss/IndexLattice.cpp +7 -1
- data/vendor/faiss/faiss/IndexLattice.h +3 -1
- data/vendor/faiss/faiss/IndexNNDescent.cpp +4 -3
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +37 -3
- data/vendor/faiss/faiss/IndexNSG.h +25 -1
- data/vendor/faiss/faiss/IndexPQ.cpp +106 -69
- data/vendor/faiss/faiss/IndexPQ.h +19 -5
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +15 -450
- data/vendor/faiss/faiss/IndexPQFastScan.h +15 -78
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -8
- data/vendor/faiss/faiss/IndexPreTransform.h +15 -3
- data/vendor/faiss/faiss/IndexRefine.cpp +8 -4
- data/vendor/faiss/faiss/IndexRefine.h +4 -2
- data/vendor/faiss/faiss/IndexReplicas.cpp +4 -2
- data/vendor/faiss/faiss/IndexReplicas.h +2 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +438 -0
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +92 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +26 -15
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -7
- data/vendor/faiss/faiss/IndexShards.cpp +4 -1
- data/vendor/faiss/faiss/IndexShards.h +2 -1
- data/vendor/faiss/faiss/MetaIndexes.cpp +5 -178
- data/vendor/faiss/faiss/MetaIndexes.h +3 -81
- data/vendor/faiss/faiss/VectorTransform.cpp +43 -0
- data/vendor/faiss/faiss/VectorTransform.h +22 -4
- data/vendor/faiss/faiss/clone_index.cpp +23 -1
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +300 -0
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +24 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +195 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2058 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +408 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2147 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +460 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +465 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +1618 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +251 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +1452 -0
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +1 -0
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +0 -4
- data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +10 -8
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +75 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +19 -32
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -31
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +22 -28
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +14 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +16 -3
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +32 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +311 -75
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +10 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +3 -0
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +2 -2
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +5 -4
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +116 -47
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +44 -13
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +0 -54
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -76
- data/vendor/faiss/faiss/impl/DistanceComputer.h +64 -0
- data/vendor/faiss/faiss/impl/HNSW.cpp +123 -27
- data/vendor/faiss/faiss/impl/HNSW.h +19 -16
- data/vendor/faiss/faiss/impl/IDSelector.cpp +125 -0
- data/vendor/faiss/faiss/impl/IDSelector.h +135 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +6 -28
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +6 -1
- data/vendor/faiss/faiss/impl/LookupTableScaler.h +77 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +383 -0
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +154 -0
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +225 -145
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +29 -10
- data/vendor/faiss/faiss/impl/Quantizer.h +43 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +192 -36
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +40 -20
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +97 -173
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +18 -18
- data/vendor/faiss/faiss/impl/index_read.cpp +240 -9
- data/vendor/faiss/faiss/impl/index_write.cpp +237 -5
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +6 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +56 -16
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +25 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +66 -25
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +75 -27
- data/vendor/faiss/faiss/index_factory.cpp +196 -7
- data/vendor/faiss/faiss/index_io.h +5 -0
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -0
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +4 -1
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +2 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +27 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +15 -0
- data/vendor/faiss/faiss/utils/Heap.h +31 -15
- data/vendor/faiss/faiss/utils/distances.cpp +380 -56
- data/vendor/faiss/faiss/utils/distances.h +113 -15
- data/vendor/faiss/faiss/utils/distances_simd.cpp +726 -6
- data/vendor/faiss/faiss/utils/extra_distances.cpp +12 -7
- data/vendor/faiss/faiss/utils/extra_distances.h +3 -1
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +21 -0
- data/vendor/faiss/faiss/utils/fp16-inl.h +101 -0
- data/vendor/faiss/faiss/utils/fp16.h +11 -0
- data/vendor/faiss/faiss/utils/hamming-inl.h +54 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +0 -48
- data/vendor/faiss/faiss/utils/ordered_key_value.h +10 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +62 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +20 -0
- data/vendor/faiss/faiss/utils/random.cpp +53 -0
- data/vendor/faiss/faiss/utils/random.h +5 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +4 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +6 -1
- data/vendor/faiss/faiss/utils/simdlib_neon.h +7 -2
- metadata +37 -3
@@ -0,0 +1,32 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
#pragma once
|
9
|
+
|
10
|
+
#include <faiss/Index.h>
|
11
|
+
|
12
|
+
namespace faiss {
|
13
|
+
namespace gpu {
|
14
|
+
|
15
|
+
/// A collection of various utility functions for index implementation
|
16
|
+
|
17
|
+
/// Returns the maximum k-selection value supported based on the CUDA SDK that
|
18
|
+
/// we were compiled with. .cu files can use DeviceDefs.cuh, but this is for
|
19
|
+
/// non-CUDA files
|
20
|
+
int getMaxKSelection();
|
21
|
+
|
22
|
+
// Validate the k parameter for search
|
23
|
+
void validateKSelect(Index::idx_t k);
|
24
|
+
|
25
|
+
// Validate the nprobe parameter for search
|
26
|
+
void validateNProbe(Index::idx_t nprobe);
|
27
|
+
|
28
|
+
/// Validate the n (number of vectors) parameter for add, search, reconstruct
|
29
|
+
void validateNumVectors(Index::idx_t n);
|
30
|
+
|
31
|
+
} // namespace gpu
|
32
|
+
} // namespace faiss
|
@@ -8,6 +8,7 @@
|
|
8
8
|
#include <faiss/IndexBinaryFlat.h>
|
9
9
|
#include <faiss/gpu/GpuIndexBinaryFlat.h>
|
10
10
|
#include <faiss/gpu/StandardGpuResources.h>
|
11
|
+
#include <faiss/gpu/impl/IndexUtils.h>
|
11
12
|
#include <faiss/gpu/test/TestUtils.h>
|
12
13
|
#include <faiss/gpu/utils/DeviceUtils.h>
|
13
14
|
#include <faiss/utils/utils.h>
|
@@ -8,10 +8,12 @@
|
|
8
8
|
#include <faiss/IndexFlat.h>
|
9
9
|
#include <faiss/gpu/GpuIndexFlat.h>
|
10
10
|
#include <faiss/gpu/StandardGpuResources.h>
|
11
|
+
#include <faiss/gpu/impl/IndexUtils.h>
|
11
12
|
#include <faiss/gpu/test/TestUtils.h>
|
12
13
|
#include <faiss/gpu/utils/DeviceUtils.h>
|
13
14
|
#include <gtest/gtest.h>
|
14
15
|
#include <sstream>
|
16
|
+
#include <unordered_map>
|
15
17
|
#include <vector>
|
16
18
|
|
17
19
|
// FIXME: figure out a better way to test fp16
|
@@ -23,7 +25,6 @@ struct TestFlatOptions {
|
|
23
25
|
: metric(faiss::MetricType::METRIC_L2),
|
24
26
|
metricArg(0),
|
25
27
|
useFloat16(false),
|
26
|
-
useTransposed(false),
|
27
28
|
numVecsOverride(-1),
|
28
29
|
numQueriesOverride(-1),
|
29
30
|
kOverride(-1),
|
@@ -33,7 +34,6 @@ struct TestFlatOptions {
|
|
33
34
|
float metricArg;
|
34
35
|
|
35
36
|
bool useFloat16;
|
36
|
-
bool useTransposed;
|
37
37
|
int numVecsOverride;
|
38
38
|
int numQueriesOverride;
|
39
39
|
int kOverride;
|
@@ -73,7 +73,6 @@ void testFlat(const TestFlatOptions& opt) {
|
|
73
73
|
faiss::gpu::GpuIndexFlatConfig config;
|
74
74
|
config.device = device;
|
75
75
|
config.useFloat16 = opt.useFloat16;
|
76
|
-
config.storeTransposed = opt.useTransposed;
|
77
76
|
|
78
77
|
faiss::gpu::GpuIndexFlat gpuIndex(&res, dim, opt.metric, config);
|
79
78
|
gpuIndex.metric_arg = opt.metricArg;
|
@@ -85,8 +84,7 @@ void testFlat(const TestFlatOptions& opt) {
|
|
85
84
|
std::stringstream str;
|
86
85
|
str << "metric " << opt.metric << " marg " << opt.metricArg << " numVecs "
|
87
86
|
<< numVecs << " dim " << dim << " useFloat16 " << opt.useFloat16
|
88
|
-
<< "
|
89
|
-
<< " k " << k;
|
87
|
+
<< " numQuery " << numQuery << " k " << k;
|
90
88
|
|
91
89
|
// To some extent, we depend upon the relative error for the test
|
92
90
|
// for float16
|
@@ -110,12 +108,8 @@ TEST(TestGpuIndexFlat, IP_Float32) {
|
|
110
108
|
TestFlatOptions opt;
|
111
109
|
opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
|
112
110
|
opt.useFloat16 = false;
|
113
|
-
opt.useTransposed = false;
|
114
111
|
|
115
112
|
testFlat(opt);
|
116
|
-
|
117
|
-
opt.useTransposed = true;
|
118
|
-
testFlat(opt);
|
119
113
|
}
|
120
114
|
}
|
121
115
|
|
@@ -123,11 +117,7 @@ TEST(TestGpuIndexFlat, L1_Float32) {
|
|
123
117
|
TestFlatOptions opt;
|
124
118
|
opt.metric = faiss::MetricType::METRIC_L1;
|
125
119
|
opt.useFloat16 = false;
|
126
|
-
opt.useTransposed = false;
|
127
|
-
|
128
|
-
testFlat(opt);
|
129
120
|
|
130
|
-
opt.useTransposed = true;
|
131
121
|
testFlat(opt);
|
132
122
|
}
|
133
123
|
|
@@ -136,12 +126,8 @@ TEST(TestGpuIndexFlat, Lp_Float32) {
|
|
136
126
|
opt.metric = faiss::MetricType::METRIC_Lp;
|
137
127
|
opt.metricArg = 5;
|
138
128
|
opt.useFloat16 = false;
|
139
|
-
opt.useTransposed = false;
|
140
129
|
|
141
130
|
testFlat(opt);
|
142
|
-
|
143
|
-
// Don't bother testing the transposed version, the L1 test should be good
|
144
|
-
// enough for that
|
145
131
|
}
|
146
132
|
|
147
133
|
TEST(TestGpuIndexFlat, L2_Float32) {
|
@@ -150,11 +136,7 @@ TEST(TestGpuIndexFlat, L2_Float32) {
|
|
150
136
|
opt.metric = faiss::MetricType::METRIC_L2;
|
151
137
|
|
152
138
|
opt.useFloat16 = false;
|
153
|
-
opt.useTransposed = false;
|
154
|
-
|
155
|
-
testFlat(opt);
|
156
139
|
|
157
|
-
opt.useTransposed = true;
|
158
140
|
testFlat(opt);
|
159
141
|
}
|
160
142
|
}
|
@@ -165,7 +147,6 @@ TEST(TestGpuIndexFlat, L2_Float32_K1) {
|
|
165
147
|
TestFlatOptions opt;
|
166
148
|
opt.metric = faiss::MetricType::METRIC_L2;
|
167
149
|
opt.useFloat16 = false;
|
168
|
-
opt.useTransposed = false;
|
169
150
|
opt.kOverride = 1;
|
170
151
|
|
171
152
|
testFlat(opt);
|
@@ -177,12 +158,8 @@ TEST(TestGpuIndexFlat, IP_Float16) {
|
|
177
158
|
TestFlatOptions opt;
|
178
159
|
opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
|
179
160
|
opt.useFloat16 = true;
|
180
|
-
opt.useTransposed = false;
|
181
161
|
|
182
162
|
testFlat(opt);
|
183
|
-
|
184
|
-
opt.useTransposed = true;
|
185
|
-
testFlat(opt);
|
186
163
|
}
|
187
164
|
}
|
188
165
|
|
@@ -191,11 +168,7 @@ TEST(TestGpuIndexFlat, L2_Float16) {
|
|
191
168
|
TestFlatOptions opt;
|
192
169
|
opt.metric = faiss::MetricType::METRIC_L2;
|
193
170
|
opt.useFloat16 = true;
|
194
|
-
opt.useTransposed = false;
|
195
|
-
|
196
|
-
testFlat(opt);
|
197
171
|
|
198
|
-
opt.useTransposed = true;
|
199
172
|
testFlat(opt);
|
200
173
|
}
|
201
174
|
}
|
@@ -206,7 +179,6 @@ TEST(TestGpuIndexFlat, L2_Float16_K1) {
|
|
206
179
|
TestFlatOptions opt;
|
207
180
|
opt.metric = faiss::MetricType::METRIC_L2;
|
208
181
|
opt.useFloat16 = true;
|
209
|
-
opt.useTransposed = false;
|
210
182
|
opt.kOverride = 1;
|
211
183
|
|
212
184
|
testFlat(opt);
|
@@ -219,7 +191,6 @@ TEST(TestGpuIndexFlat, L2_Tiling) {
|
|
219
191
|
TestFlatOptions opt;
|
220
192
|
opt.metric = faiss::MetricType::METRIC_L2;
|
221
193
|
opt.useFloat16 = false;
|
222
|
-
opt.useTransposed = false;
|
223
194
|
opt.numVecsOverride = 1000000;
|
224
195
|
|
225
196
|
// keep the rest of the problem reasonably small
|
@@ -238,7 +209,6 @@ TEST(TestGpuIndexFlat, QueryEmpty) {
|
|
238
209
|
faiss::gpu::GpuIndexFlatConfig config;
|
239
210
|
config.device = 0;
|
240
211
|
config.useFloat16 = false;
|
241
|
-
config.storeTransposed = false;
|
242
212
|
|
243
213
|
int dim = 128;
|
244
214
|
faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
|
@@ -267,40 +237,46 @@ TEST(TestGpuIndexFlat, CopyFrom) {
|
|
267
237
|
int numVecs = faiss::gpu::randVal(100, 200);
|
268
238
|
int dim = faiss::gpu::randVal(1, 1000);
|
269
239
|
|
270
|
-
faiss::IndexFlatL2 cpuIndex(dim);
|
271
|
-
|
272
240
|
std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
|
241
|
+
|
242
|
+
faiss::IndexFlatL2 cpuIndex(dim);
|
273
243
|
cpuIndex.add(numVecs, vecs.data());
|
274
244
|
|
275
245
|
faiss::gpu::StandardGpuResources res;
|
276
246
|
res.noTempMemory();
|
277
247
|
|
278
|
-
// Fill with garbage values
|
279
248
|
int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
|
280
249
|
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, 2000, config);
|
287
|
-
gpuIndex.copyFrom(&cpuIndex);
|
250
|
+
for (bool useFloat16 : {false, true}) {
|
251
|
+
faiss::gpu::GpuIndexFlatConfig config;
|
252
|
+
config.device = device;
|
253
|
+
config.useFloat16 = useFloat16;
|
288
254
|
|
289
|
-
|
290
|
-
|
255
|
+
// Fill with garbage values
|
256
|
+
faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, 2000, config);
|
257
|
+
gpuIndex.copyFrom(&cpuIndex);
|
291
258
|
|
292
|
-
|
293
|
-
|
259
|
+
EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
|
260
|
+
EXPECT_EQ(gpuIndex.ntotal, numVecs);
|
294
261
|
|
295
|
-
|
262
|
+
EXPECT_EQ(cpuIndex.d, gpuIndex.d);
|
263
|
+
EXPECT_EQ(cpuIndex.d, dim);
|
296
264
|
|
297
|
-
|
298
|
-
|
265
|
+
std::vector<float> gpuVals(numVecs * dim);
|
266
|
+
gpuIndex.reconstruct_n(0, gpuIndex.ntotal, gpuVals.data());
|
299
267
|
|
300
|
-
|
301
|
-
|
268
|
+
std::vector<float> cpuVals(numVecs * dim);
|
269
|
+
cpuIndex.reconstruct_n(0, gpuIndex.ntotal, cpuVals.data());
|
302
270
|
|
303
|
-
|
271
|
+
// The CPU is the source of (float32) truth here, while the GPU index
|
272
|
+
// may be in float16 mode and thus was subject to rounding
|
273
|
+
if (useFloat16) {
|
274
|
+
EXPECT_EQ(gpuVals, faiss::gpu::roundToHalf(cpuVals));
|
275
|
+
} else {
|
276
|
+
// Should be exactly the same
|
277
|
+
EXPECT_EQ(gpuVals, cpuVals);
|
278
|
+
}
|
279
|
+
}
|
304
280
|
}
|
305
281
|
|
306
282
|
TEST(TestGpuIndexFlat, CopyTo) {
|
@@ -311,36 +287,36 @@ TEST(TestGpuIndexFlat, CopyTo) {
|
|
311
287
|
int dim = faiss::gpu::randVal(1, 1000);
|
312
288
|
|
313
289
|
int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
|
314
|
-
|
315
|
-
faiss::gpu::GpuIndexFlatConfig config;
|
316
|
-
config.device = device;
|
317
|
-
config.useFloat16 = false;
|
318
|
-
config.storeTransposed = false;
|
319
|
-
|
320
|
-
faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
|
321
|
-
|
322
290
|
std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
|
323
|
-
gpuIndex.add(numVecs, vecs.data());
|
324
291
|
|
325
|
-
|
326
|
-
|
327
|
-
|
292
|
+
for (bool useFloat16 : {false, true}) {
|
293
|
+
faiss::gpu::GpuIndexFlatConfig config;
|
294
|
+
config.device = device;
|
295
|
+
config.useFloat16 = useFloat16;
|
296
|
+
|
297
|
+
faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
|
298
|
+
gpuIndex.add(numVecs, vecs.data());
|
328
299
|
|
329
|
-
|
330
|
-
|
300
|
+
// Fill with garbage values
|
301
|
+
faiss::IndexFlatL2 cpuIndex(2000);
|
302
|
+
gpuIndex.copyTo(&cpuIndex);
|
331
303
|
|
332
|
-
|
333
|
-
|
304
|
+
EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
|
305
|
+
EXPECT_EQ(gpuIndex.ntotal, numVecs);
|
334
306
|
|
335
|
-
|
307
|
+
EXPECT_EQ(cpuIndex.d, gpuIndex.d);
|
308
|
+
EXPECT_EQ(cpuIndex.d, dim);
|
336
309
|
|
337
|
-
|
338
|
-
|
310
|
+
std::vector<float> gpuVals(numVecs * dim);
|
311
|
+
gpuIndex.reconstruct_n(0, gpuIndex.ntotal, gpuVals.data());
|
339
312
|
|
340
|
-
|
341
|
-
|
313
|
+
std::vector<float> cpuVals(numVecs * dim);
|
314
|
+
cpuIndex.reconstruct_n(0, gpuIndex.ntotal, cpuVals.data());
|
342
315
|
|
343
|
-
|
316
|
+
// The GPU is the source of truth here, so the float32 exact comparison
|
317
|
+
// even if the index uses float16 is ok
|
318
|
+
EXPECT_EQ(gpuVals, cpuVals);
|
319
|
+
}
|
344
320
|
}
|
345
321
|
|
346
322
|
TEST(TestGpuIndexFlat, UnifiedMemory) {
|
@@ -390,6 +366,266 @@ TEST(TestGpuIndexFlat, UnifiedMemory) {
|
|
390
366
|
0.015f);
|
391
367
|
}
|
392
368
|
|
369
|
+
TEST(TestGpuIndexFlat, LargeIndex) {
|
370
|
+
// Construct on a random device to test multi-device, if we have
|
371
|
+
// multiple devices
|
372
|
+
int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
|
373
|
+
|
374
|
+
faiss::gpu::StandardGpuResources res;
|
375
|
+
res.noTempMemory();
|
376
|
+
|
377
|
+
// Skip this device if we do not have sufficient memory
|
378
|
+
constexpr size_t kMem = size_t(8) * 1024 * 1024 * 1024;
|
379
|
+
|
380
|
+
if (faiss::gpu::getFreeMemory(device) < kMem) {
|
381
|
+
std::cout << "TestGpuIndexFlat.LargeIndex: skipping due "
|
382
|
+
"to insufficient device memory\n";
|
383
|
+
return;
|
384
|
+
}
|
385
|
+
|
386
|
+
std::cout << "Running LargeIndex test\n";
|
387
|
+
|
388
|
+
size_t dim = 256; // each vec is sizeof(float) * 256 = 1 KiB in size
|
389
|
+
size_t nb = 5000000;
|
390
|
+
size_t nq = 10;
|
391
|
+
|
392
|
+
auto xb = faiss::gpu::randVecs(nb, dim);
|
393
|
+
|
394
|
+
int k = 10;
|
395
|
+
|
396
|
+
faiss::IndexFlatL2 cpuIndexL2(dim);
|
397
|
+
|
398
|
+
faiss::gpu::GpuIndexFlatConfig config;
|
399
|
+
config.device = device;
|
400
|
+
faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);
|
401
|
+
|
402
|
+
cpuIndexL2.add(nb, xb.data());
|
403
|
+
gpuIndexL2.add(nb, xb.data());
|
404
|
+
|
405
|
+
// To some extent, we depend upon the relative error for the test
|
406
|
+
// for float16
|
407
|
+
faiss::gpu::compareIndices(
|
408
|
+
cpuIndexL2,
|
409
|
+
gpuIndexL2,
|
410
|
+
nq,
|
411
|
+
dim,
|
412
|
+
k,
|
413
|
+
"LargeIndex",
|
414
|
+
kF32MaxRelErr,
|
415
|
+
0.1f,
|
416
|
+
0.015f);
|
417
|
+
}
|
418
|
+
|
419
|
+
TEST(TestGpuIndexFlat, Residual) {
|
420
|
+
// Construct on a random device to test multi-device, if we have
|
421
|
+
// multiple devices
|
422
|
+
int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
|
423
|
+
|
424
|
+
faiss::gpu::StandardGpuResources res;
|
425
|
+
res.noTempMemory();
|
426
|
+
|
427
|
+
faiss::gpu::GpuIndexFlatConfig config;
|
428
|
+
config.device = device;
|
429
|
+
|
430
|
+
int dim = 32;
|
431
|
+
faiss::IndexFlat cpuIndex(dim, faiss::MetricType::METRIC_L2);
|
432
|
+
faiss::gpu::GpuIndexFlat gpuIndex(
|
433
|
+
&res, dim, faiss::MetricType::METRIC_L2, config);
|
434
|
+
|
435
|
+
int numVecs = 100;
|
436
|
+
auto vecs = faiss::gpu::randVecs(numVecs, dim);
|
437
|
+
cpuIndex.add(numVecs, vecs.data());
|
438
|
+
gpuIndex.add(numVecs, vecs.data());
|
439
|
+
|
440
|
+
auto indexVecs = std::vector<faiss::Index::idx_t>{0, 2, 4, 6, 8};
|
441
|
+
auto queryVecs = faiss::gpu::randVecs(indexVecs.size(), dim);
|
442
|
+
|
443
|
+
auto residualsCpu = std::vector<float>(indexVecs.size() * dim);
|
444
|
+
auto residualsGpu = std::vector<float>(indexVecs.size() * dim);
|
445
|
+
|
446
|
+
cpuIndex.compute_residual_n(
|
447
|
+
indexVecs.size(),
|
448
|
+
queryVecs.data(),
|
449
|
+
residualsCpu.data(),
|
450
|
+
indexVecs.data());
|
451
|
+
gpuIndex.compute_residual_n(
|
452
|
+
indexVecs.size(),
|
453
|
+
queryVecs.data(),
|
454
|
+
residualsGpu.data(),
|
455
|
+
indexVecs.data());
|
456
|
+
|
457
|
+
// Should be exactly the same, as this is just a single float32 subtraction
|
458
|
+
EXPECT_EQ(residualsCpu, residualsGpu);
|
459
|
+
}
|
460
|
+
|
461
|
+
TEST(TestGpuIndexFlat, Reconstruct) {
|
462
|
+
// Construct on a random device to test multi-device, if we have
|
463
|
+
// multiple devices
|
464
|
+
int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
|
465
|
+
|
466
|
+
faiss::gpu::StandardGpuResources res;
|
467
|
+
res.noTempMemory();
|
468
|
+
|
469
|
+
int dim = 32;
|
470
|
+
int numVecs = 100;
|
471
|
+
auto vecs = faiss::gpu::randVecs(numVecs, dim);
|
472
|
+
auto vecs16 = faiss::gpu::roundToHalf(vecs);
|
473
|
+
|
474
|
+
for (bool useFloat16 : {false, true}) {
|
475
|
+
faiss::gpu::GpuIndexFlatConfig config;
|
476
|
+
config.device = device;
|
477
|
+
config.useFloat16 = useFloat16;
|
478
|
+
|
479
|
+
faiss::gpu::GpuIndexFlat gpuIndex(
|
480
|
+
&res, dim, faiss::MetricType::METRIC_L2, config);
|
481
|
+
|
482
|
+
gpuIndex.add(numVecs, vecs.data());
|
483
|
+
|
484
|
+
// Test reconstruct
|
485
|
+
{
|
486
|
+
auto reconstructVecs = std::vector<float>(dim);
|
487
|
+
gpuIndex.reconstruct(15, reconstructVecs.data());
|
488
|
+
|
489
|
+
auto& ref = useFloat16 ? vecs16 : vecs;
|
490
|
+
|
491
|
+
for (int i = 0; i < dim; ++i) {
|
492
|
+
EXPECT_EQ(reconstructVecs[i], ref[15 * dim + i]);
|
493
|
+
}
|
494
|
+
}
|
495
|
+
|
496
|
+
// Test reconstruct_n
|
497
|
+
if (false) {
|
498
|
+
auto reconstructVecs = std::vector<float>((numVecs - 1) * dim);
|
499
|
+
|
500
|
+
int startVec = 5;
|
501
|
+
int endVec = numVecs - 1;
|
502
|
+
int numReconstructVec = endVec - startVec + 1;
|
503
|
+
|
504
|
+
gpuIndex.reconstruct_n(
|
505
|
+
startVec, numReconstructVec, reconstructVecs.data());
|
506
|
+
|
507
|
+
auto& ref = useFloat16 ? vecs16 : vecs;
|
508
|
+
|
509
|
+
for (int i = 0; i < numReconstructVec; ++i) {
|
510
|
+
for (int j = 0; j < dim; ++j) {
|
511
|
+
EXPECT_EQ(
|
512
|
+
reconstructVecs[i * dim + j],
|
513
|
+
ref[(i + startVec) * dim + j]);
|
514
|
+
}
|
515
|
+
}
|
516
|
+
}
|
517
|
+
|
518
|
+
// Test reconstruct_batch
|
519
|
+
if (false) {
|
520
|
+
auto reconstructKeys = std::vector<faiss::Index::idx_t>{1, 3, 5};
|
521
|
+
auto reconstructVecs =
|
522
|
+
std::vector<float>(reconstructKeys.size() * dim);
|
523
|
+
|
524
|
+
gpuIndex.reconstruct_batch(
|
525
|
+
reconstructKeys.size(),
|
526
|
+
reconstructKeys.data(),
|
527
|
+
reconstructVecs.data());
|
528
|
+
|
529
|
+
auto& ref = useFloat16 ? vecs16 : vecs;
|
530
|
+
|
531
|
+
for (int i = 0; i < reconstructKeys.size(); ++i) {
|
532
|
+
for (int j = 0; j < dim; ++j) {
|
533
|
+
EXPECT_EQ(
|
534
|
+
reconstructVecs[i * dim + j],
|
535
|
+
ref[reconstructKeys[i] * dim + j]);
|
536
|
+
}
|
537
|
+
}
|
538
|
+
}
|
539
|
+
}
|
540
|
+
}
|
541
|
+
|
542
|
+
TEST(TestGpuIndexFlat, SearchAndReconstruct) {
|
543
|
+
// Construct on a random device to test multi-device, if we have
|
544
|
+
// multiple devices
|
545
|
+
int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
|
546
|
+
|
547
|
+
faiss::gpu::StandardGpuResources res;
|
548
|
+
res.noTempMemory();
|
549
|
+
|
550
|
+
size_t dim = 32;
|
551
|
+
size_t nb = 5000;
|
552
|
+
size_t nq = 10;
|
553
|
+
int k = 10;
|
554
|
+
|
555
|
+
auto xb = faiss::gpu::randVecs(nb, dim);
|
556
|
+
auto xq = faiss::gpu::randVecs(nq, dim);
|
557
|
+
|
558
|
+
faiss::IndexFlatL2 cpuIndex(dim);
|
559
|
+
|
560
|
+
faiss::gpu::GpuIndexFlatConfig config;
|
561
|
+
config.device = device;
|
562
|
+
faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
|
563
|
+
|
564
|
+
cpuIndex.add(nb, xb.data());
|
565
|
+
gpuIndex.add(nb, xb.data());
|
566
|
+
|
567
|
+
std::vector<float> refDistance(nq * k, 0);
|
568
|
+
std::vector<faiss::Index::idx_t> refIndices(nq * k, -1);
|
569
|
+
std::vector<float> refReconstruct(nq * k * dim, 0);
|
570
|
+
cpuIndex.search_and_reconstruct(
|
571
|
+
nq,
|
572
|
+
xq.data(),
|
573
|
+
k,
|
574
|
+
refDistance.data(),
|
575
|
+
refIndices.data(),
|
576
|
+
refReconstruct.data());
|
577
|
+
|
578
|
+
std::vector<float> testDistance(nq * k, 0);
|
579
|
+
std::vector<faiss::Index::idx_t> testIndices(nq * k, -1);
|
580
|
+
std::vector<float> testReconstruct(nq * k * dim, 0);
|
581
|
+
gpuIndex.search_and_reconstruct(
|
582
|
+
nq,
|
583
|
+
xq.data(),
|
584
|
+
k,
|
585
|
+
testDistance.data(),
|
586
|
+
testIndices.data(),
|
587
|
+
testReconstruct.data());
|
588
|
+
|
589
|
+
// This handles the search results
|
590
|
+
faiss::gpu::compareLists(
|
591
|
+
refDistance.data(),
|
592
|
+
refIndices.data(),
|
593
|
+
testDistance.data(),
|
594
|
+
testIndices.data(),
|
595
|
+
nq,
|
596
|
+
k,
|
597
|
+
"SearchAndReconstruct",
|
598
|
+
true,
|
599
|
+
false,
|
600
|
+
true,
|
601
|
+
kF32MaxRelErr,
|
602
|
+
0.1f,
|
603
|
+
0.015f);
|
604
|
+
|
605
|
+
// As the search results may be slightly different (though compareLists
|
606
|
+
// above will ensure a decent number of matches), reconstruction should be
|
607
|
+
// the same for the vectors that do match
|
608
|
+
for (int i = 0; i < nq; ++i) {
|
609
|
+
std::unordered_map<faiss::Index::idx_t, int> refLocation;
|
610
|
+
|
611
|
+
for (int j = 0; j < k; ++j) {
|
612
|
+
refLocation.insert(std::make_pair(refIndices[i * k + j], j));
|
613
|
+
}
|
614
|
+
|
615
|
+
for (int j = 0; j < k; ++j) {
|
616
|
+
auto idx = testIndices[i * k + j];
|
617
|
+
auto it = refLocation.find(idx);
|
618
|
+
if (it != refLocation.end()) {
|
619
|
+
for (int d = 0; d < dim; ++d) {
|
620
|
+
EXPECT_EQ(
|
621
|
+
refReconstruct[(i * k + it->second) * dim + d],
|
622
|
+
testReconstruct[(i * k + j) * dim + d]);
|
623
|
+
}
|
624
|
+
}
|
625
|
+
}
|
626
|
+
}
|
627
|
+
}
|
628
|
+
|
393
629
|
int main(int argc, char** argv) {
|
394
630
|
testing::InitGoogleTest(&argc, argv);
|
395
631
|
|
@@ -5,6 +5,7 @@
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
6
6
|
*/
|
7
7
|
|
8
|
+
#include <cuda_fp16.h>
|
8
9
|
#include <faiss/gpu/test/TestUtils.h>
|
9
10
|
#include <faiss/utils/random.h>
|
10
11
|
#include <gtest/gtest.h>
|
@@ -74,6 +75,15 @@ std::vector<unsigned char> randBinaryVecs(size_t num, size_t dim) {
|
|
74
75
|
return v;
|
75
76
|
}
|
76
77
|
|
78
|
+
std::vector<float> roundToHalf(const std::vector<float>& v) {
|
79
|
+
auto out = std::vector<float>(v.size());
|
80
|
+
for (int i = 0; i < v.size(); ++i) {
|
81
|
+
out[i] = __half2float(__float2half(v[i]));
|
82
|
+
}
|
83
|
+
|
84
|
+
return out;
|
85
|
+
}
|
86
|
+
|
77
87
|
void compareIndices(
|
78
88
|
const std::vector<float>& queryVecs,
|
79
89
|
faiss::Index& refIndex,
|
@@ -60,6 +60,9 @@ std::vector<float> randVecs(size_t num, size_t dim);
|
|
60
60
|
/// Generates a collection of random bit vectors
|
61
61
|
std::vector<unsigned char> randBinaryVecs(size_t num, size_t dim);
|
62
62
|
|
63
|
+
// returns to_fp32(to_fp16(v)); useful in comparing fp16 results on CPU
|
64
|
+
std::vector<float> roundToHalf(const std::vector<float>& v);
|
65
|
+
|
63
66
|
/// Compare two indices via query for similarity, with a user-specified set of
|
64
67
|
/// query vectors
|
65
68
|
void compareIndices(
|
@@ -12,10 +12,10 @@
|
|
12
12
|
|
13
13
|
#include <sys/time.h>
|
14
14
|
|
15
|
+
#include <faiss/gpu/GpuAutoTune.h>
|
16
|
+
#include <faiss/gpu/GpuCloner.h>
|
15
17
|
#include <faiss/gpu/GpuIndexIVFPQ.h>
|
16
18
|
#include <faiss/gpu/StandardGpuResources.h>
|
17
|
-
|
18
|
-
#include <faiss/gpu/GpuAutoTune.h>
|
19
19
|
#include <faiss/index_io.h>
|
20
20
|
|
21
21
|
double elapsed() {
|
@@ -70,10 +70,11 @@ bool getTensorCoreSupport(int device);
|
|
70
70
|
/// Equivalent to getTensorCoreSupport(getCurrentDevice())
|
71
71
|
bool getTensorCoreSupportCurrentDevice();
|
72
72
|
|
73
|
-
/// Returns the
|
74
|
-
|
75
|
-
|
76
|
-
|
73
|
+
/// Returns the amount of currently available memory on the given device
|
74
|
+
size_t getFreeMemory(int device);
|
75
|
+
|
76
|
+
/// Equivalent to getFreeMemory(getCurrentDevice())
|
77
|
+
size_t getFreeMemoryCurrentDevice();
|
77
78
|
|
78
79
|
/// RAII object to set the current device, and restore the previous
|
79
80
|
/// device upon destruction
|