faiss 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/ext/faiss/extconf.rb +6 -3
- data/ext/faiss/numo.hpp +4 -4
- data/ext/faiss/utils.cpp +1 -1
- data/ext/faiss/utils.h +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +365 -194
- data/vendor/faiss/faiss/Clustering.h +102 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
- data/vendor/faiss/faiss/Index2Layer.h +22 -36
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
- data/vendor/faiss/faiss/IndexFlat.h +42 -59
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
- data/vendor/faiss/faiss/IndexIVF.h +169 -118
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
- data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
- data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
- data/vendor/faiss/faiss/IndexLSH.h +20 -38
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
- data/vendor/faiss/faiss/IndexPQ.h +64 -82
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
- data/vendor/faiss/faiss/IndexRefine.h +32 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
- data/vendor/faiss/faiss/VectorTransform.h +64 -89
- data/vendor/faiss/faiss/clone_index.cpp +78 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
- data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
- data/vendor/faiss/faiss/impl/io.cpp +76 -95
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +60 -29
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +619 -397
- data/vendor/faiss/faiss/index_factory.h +8 -6
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +305 -312
- data/vendor/faiss/faiss/utils/distances.h +170 -122
- data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +54 -49
- metadata +29 -4
|
@@ -5,15 +5,14 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
#include <faiss/IndexFlat.h>
|
|
10
9
|
#include <faiss/IndexIVFFlat.h>
|
|
11
10
|
#include <faiss/gpu/GpuIndexIVFFlat.h>
|
|
12
11
|
#include <faiss/gpu/StandardGpuResources.h>
|
|
13
|
-
#include <faiss/gpu/utils/DeviceUtils.h>
|
|
14
12
|
#include <faiss/gpu/test/TestUtils.h>
|
|
15
|
-
#include <
|
|
13
|
+
#include <faiss/gpu/utils/DeviceUtils.h>
|
|
16
14
|
#include <gtest/gtest.h>
|
|
15
|
+
#include <cmath>
|
|
17
16
|
#include <sstream>
|
|
18
17
|
#include <vector>
|
|
19
18
|
|
|
@@ -21,76 +20,157 @@
|
|
|
21
20
|
constexpr float kF16MaxRelErr = 0.3f;
|
|
22
21
|
constexpr float kF32MaxRelErr = 0.03f;
|
|
23
22
|
|
|
24
|
-
|
|
25
23
|
struct Options {
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
int numQuery;
|
|
67
|
-
int k;
|
|
68
|
-
int device;
|
|
69
|
-
faiss::gpu::IndicesOptions indicesOpt;
|
|
24
|
+
Options() {
|
|
25
|
+
numAdd = 2 * faiss::gpu::randVal(2000, 5000);
|
|
26
|
+
dim = faiss::gpu::randVal(64, 200);
|
|
27
|
+
|
|
28
|
+
numCentroids = std::sqrt((float)numAdd / 2);
|
|
29
|
+
numTrain = numCentroids * 40;
|
|
30
|
+
nprobe = faiss::gpu::randVal(std::min(10, numCentroids), numCentroids);
|
|
31
|
+
numQuery = faiss::gpu::randVal(32, 100);
|
|
32
|
+
|
|
33
|
+
// Due to the approximate nature of the query and of floating point
|
|
34
|
+
// differences between GPU and CPU, to stay within our error bounds,
|
|
35
|
+
// only use a small k
|
|
36
|
+
k = std::min(faiss::gpu::randVal(10, 30), numAdd / 40);
|
|
37
|
+
indicesOpt = faiss::gpu::randSelect(
|
|
38
|
+
{faiss::gpu::INDICES_CPU,
|
|
39
|
+
faiss::gpu::INDICES_32_BIT,
|
|
40
|
+
faiss::gpu::INDICES_64_BIT});
|
|
41
|
+
|
|
42
|
+
device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
std::string toString() const {
|
|
46
|
+
std::stringstream str;
|
|
47
|
+
str << "IVFFlat device " << device << " numVecs " << numAdd << " dim "
|
|
48
|
+
<< dim << " numCentroids " << numCentroids << " nprobe " << nprobe
|
|
49
|
+
<< " numQuery " << numQuery << " k " << k << " indicesOpt "
|
|
50
|
+
<< indicesOpt;
|
|
51
|
+
|
|
52
|
+
return str.str();
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
int numAdd;
|
|
56
|
+
int dim;
|
|
57
|
+
int numCentroids;
|
|
58
|
+
int numTrain;
|
|
59
|
+
int nprobe;
|
|
60
|
+
int numQuery;
|
|
61
|
+
int k;
|
|
62
|
+
int device;
|
|
63
|
+
faiss::gpu::IndicesOptions indicesOpt;
|
|
70
64
|
};
|
|
71
65
|
|
|
72
|
-
void queryTest(
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
66
|
+
void queryTest(
|
|
67
|
+
faiss::MetricType metricType,
|
|
68
|
+
bool useFloat16CoarseQuantizer,
|
|
69
|
+
int dimOverride = -1) {
|
|
70
|
+
for (int tries = 0; tries < 2; ++tries) {
|
|
71
|
+
Options opt;
|
|
72
|
+
opt.dim = dimOverride != -1 ? dimOverride : opt.dim;
|
|
73
|
+
|
|
74
|
+
std::vector<float> trainVecs =
|
|
75
|
+
faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
|
76
|
+
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
|
|
77
|
+
|
|
78
|
+
faiss::IndexFlatL2 quantizerL2(opt.dim);
|
|
79
|
+
faiss::IndexFlatIP quantizerIP(opt.dim);
|
|
80
|
+
faiss::Index* quantizer = metricType == faiss::METRIC_L2
|
|
81
|
+
? (faiss::Index*)&quantizerL2
|
|
82
|
+
: (faiss::Index*)&quantizerIP;
|
|
83
|
+
|
|
84
|
+
faiss::IndexIVFFlat cpuIndex(
|
|
85
|
+
quantizer, opt.dim, opt.numCentroids, metricType);
|
|
86
|
+
cpuIndex.train(opt.numTrain, trainVecs.data());
|
|
87
|
+
cpuIndex.add(opt.numAdd, addVecs.data());
|
|
88
|
+
cpuIndex.nprobe = opt.nprobe;
|
|
89
|
+
|
|
90
|
+
faiss::gpu::StandardGpuResources res;
|
|
91
|
+
res.noTempMemory();
|
|
92
|
+
|
|
93
|
+
faiss::gpu::GpuIndexIVFFlatConfig config;
|
|
94
|
+
config.device = opt.device;
|
|
95
|
+
config.indicesOptions = opt.indicesOpt;
|
|
96
|
+
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
|
|
97
|
+
|
|
98
|
+
faiss::gpu::GpuIndexIVFFlat gpuIndex(
|
|
99
|
+
&res, cpuIndex.d, cpuIndex.nlist, cpuIndex.metric_type, config);
|
|
100
|
+
gpuIndex.copyFrom(&cpuIndex);
|
|
101
|
+
gpuIndex.setNumProbes(opt.nprobe);
|
|
102
|
+
|
|
103
|
+
bool compFloat16 = useFloat16CoarseQuantizer;
|
|
104
|
+
faiss::gpu::compareIndices(
|
|
105
|
+
cpuIndex,
|
|
106
|
+
gpuIndex,
|
|
107
|
+
opt.numQuery,
|
|
108
|
+
opt.dim,
|
|
109
|
+
opt.k,
|
|
110
|
+
opt.toString(),
|
|
111
|
+
compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
|
|
112
|
+
// FIXME: the fp16 bounds are
|
|
113
|
+
// useless when math (the accumulator) is
|
|
114
|
+
// in fp16. Figure out another way to test
|
|
115
|
+
compFloat16 ? 0.70f : 0.1f,
|
|
116
|
+
compFloat16 ? 0.65f : 0.015f);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
78
119
|
|
|
120
|
+
void addTest(faiss::MetricType metricType, bool useFloat16CoarseQuantizer) {
|
|
121
|
+
for (int tries = 0; tries < 2; ++tries) {
|
|
122
|
+
Options opt;
|
|
123
|
+
|
|
124
|
+
std::vector<float> trainVecs =
|
|
125
|
+
faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
|
126
|
+
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
|
|
127
|
+
|
|
128
|
+
faiss::IndexFlatL2 quantizerL2(opt.dim);
|
|
129
|
+
faiss::IndexFlatIP quantizerIP(opt.dim);
|
|
130
|
+
faiss::Index* quantizer = metricType == faiss::METRIC_L2
|
|
131
|
+
? (faiss::Index*)&quantizerL2
|
|
132
|
+
: (faiss::Index*)&quantizerIP;
|
|
133
|
+
|
|
134
|
+
faiss::IndexIVFFlat cpuIndex(
|
|
135
|
+
quantizer, opt.dim, opt.numCentroids, metricType);
|
|
136
|
+
cpuIndex.train(opt.numTrain, trainVecs.data());
|
|
137
|
+
cpuIndex.nprobe = opt.nprobe;
|
|
138
|
+
|
|
139
|
+
faiss::gpu::StandardGpuResources res;
|
|
140
|
+
res.noTempMemory();
|
|
141
|
+
|
|
142
|
+
faiss::gpu::GpuIndexIVFFlatConfig config;
|
|
143
|
+
config.device = opt.device;
|
|
144
|
+
config.indicesOptions = opt.indicesOpt;
|
|
145
|
+
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
|
|
146
|
+
|
|
147
|
+
faiss::gpu::GpuIndexIVFFlat gpuIndex(
|
|
148
|
+
&res, cpuIndex.d, cpuIndex.nlist, cpuIndex.metric_type, config);
|
|
149
|
+
gpuIndex.copyFrom(&cpuIndex);
|
|
150
|
+
gpuIndex.setNumProbes(opt.nprobe);
|
|
151
|
+
|
|
152
|
+
cpuIndex.add(opt.numAdd, addVecs.data());
|
|
153
|
+
gpuIndex.add(opt.numAdd, addVecs.data());
|
|
154
|
+
|
|
155
|
+
bool compFloat16 = useFloat16CoarseQuantizer;
|
|
156
|
+
faiss::gpu::compareIndices(
|
|
157
|
+
cpuIndex,
|
|
158
|
+
gpuIndex,
|
|
159
|
+
opt.numQuery,
|
|
160
|
+
opt.dim,
|
|
161
|
+
opt.k,
|
|
162
|
+
opt.toString(),
|
|
163
|
+
compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
|
|
164
|
+
compFloat16 ? 0.70f : 0.1f,
|
|
165
|
+
compFloat16 ? 0.30f : 0.015f);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
void copyToTest(bool useFloat16CoarseQuantizer) {
|
|
170
|
+
Options opt;
|
|
79
171
|
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
|
80
172
|
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
|
|
81
173
|
|
|
82
|
-
faiss::IndexFlatL2 quantizerL2(opt.dim);
|
|
83
|
-
faiss::IndexFlatIP quantizerIP(opt.dim);
|
|
84
|
-
faiss::Index* quantizer =
|
|
85
|
-
metricType == faiss::METRIC_L2 ?
|
|
86
|
-
(faiss::Index*) &quantizerL2 : (faiss::Index*) &quantizerIP;
|
|
87
|
-
|
|
88
|
-
faiss::IndexIVFFlat cpuIndex(quantizer,
|
|
89
|
-
opt.dim, opt.numCentroids, metricType);
|
|
90
|
-
cpuIndex.train(opt.numTrain, trainVecs.data());
|
|
91
|
-
cpuIndex.add(opt.numAdd, addVecs.data());
|
|
92
|
-
cpuIndex.nprobe = opt.nprobe;
|
|
93
|
-
|
|
94
174
|
faiss::gpu::StandardGpuResources res;
|
|
95
175
|
res.noTempMemory();
|
|
96
176
|
|
|
@@ -99,47 +179,57 @@ void queryTest(faiss::MetricType metricType,
|
|
|
99
179
|
config.indicesOptions = opt.indicesOpt;
|
|
100
180
|
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
|
|
101
181
|
|
|
102
|
-
faiss::gpu::GpuIndexIVFFlat gpuIndex(
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
config);
|
|
107
|
-
gpuIndex.copyFrom(&cpuIndex);
|
|
182
|
+
faiss::gpu::GpuIndexIVFFlat gpuIndex(
|
|
183
|
+
&res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config);
|
|
184
|
+
gpuIndex.train(opt.numTrain, trainVecs.data());
|
|
185
|
+
gpuIndex.add(opt.numAdd, addVecs.data());
|
|
108
186
|
gpuIndex.setNumProbes(opt.nprobe);
|
|
109
187
|
|
|
188
|
+
// use garbage values to see if we overwrite then
|
|
189
|
+
faiss::IndexFlatL2 cpuQuantizer(1);
|
|
190
|
+
faiss::IndexIVFFlat cpuIndex(&cpuQuantizer, 1, 1, faiss::METRIC_L2);
|
|
191
|
+
cpuIndex.nprobe = 1;
|
|
192
|
+
|
|
193
|
+
gpuIndex.copyTo(&cpuIndex);
|
|
194
|
+
|
|
195
|
+
EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
|
|
196
|
+
EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
|
|
197
|
+
|
|
198
|
+
EXPECT_EQ(cpuIndex.d, gpuIndex.d);
|
|
199
|
+
EXPECT_EQ(cpuIndex.quantizer->d, gpuIndex.quantizer->d);
|
|
200
|
+
EXPECT_EQ(cpuIndex.d, opt.dim);
|
|
201
|
+
EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
|
|
202
|
+
EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
|
|
203
|
+
|
|
204
|
+
testIVFEquality(cpuIndex, gpuIndex);
|
|
205
|
+
|
|
206
|
+
// Query both objects; results should be equivalent
|
|
110
207
|
bool compFloat16 = useFloat16CoarseQuantizer;
|
|
111
|
-
faiss::gpu::compareIndices(
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
208
|
+
faiss::gpu::compareIndices(
|
|
209
|
+
cpuIndex,
|
|
210
|
+
gpuIndex,
|
|
211
|
+
opt.numQuery,
|
|
212
|
+
opt.dim,
|
|
213
|
+
opt.k,
|
|
214
|
+
opt.toString(),
|
|
215
|
+
compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
|
|
216
|
+
compFloat16 ? 0.70f : 0.1f,
|
|
217
|
+
compFloat16 ? 0.30f : 0.015f);
|
|
120
218
|
}
|
|
121
219
|
|
|
122
|
-
void
|
|
123
|
-
bool useFloat16CoarseQuantizer) {
|
|
124
|
-
for (int tries = 0; tries < 2; ++tries) {
|
|
220
|
+
void copyFromTest(bool useFloat16CoarseQuantizer) {
|
|
125
221
|
Options opt;
|
|
126
|
-
|
|
127
222
|
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
|
128
223
|
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
|
|
129
224
|
|
|
130
|
-
faiss::IndexFlatL2
|
|
131
|
-
faiss::
|
|
132
|
-
|
|
133
|
-
metricType == faiss::METRIC_L2 ?
|
|
134
|
-
(faiss::Index*) &quantizerL2 : (faiss::Index*) &quantizerIP;
|
|
135
|
-
|
|
136
|
-
faiss::IndexIVFFlat cpuIndex(quantizer,
|
|
137
|
-
opt.dim,
|
|
138
|
-
opt.numCentroids,
|
|
139
|
-
metricType);
|
|
140
|
-
cpuIndex.train(opt.numTrain, trainVecs.data());
|
|
225
|
+
faiss::IndexFlatL2 cpuQuantizer(opt.dim);
|
|
226
|
+
faiss::IndexIVFFlat cpuIndex(
|
|
227
|
+
&cpuQuantizer, opt.dim, opt.numCentroids, faiss::METRIC_L2);
|
|
141
228
|
cpuIndex.nprobe = opt.nprobe;
|
|
229
|
+
cpuIndex.train(opt.numTrain, trainVecs.data());
|
|
230
|
+
cpuIndex.add(opt.numAdd, addVecs.data());
|
|
142
231
|
|
|
232
|
+
// use garbage values to see if we overwrite then
|
|
143
233
|
faiss::gpu::StandardGpuResources res;
|
|
144
234
|
res.noTempMemory();
|
|
145
235
|
|
|
@@ -148,140 +238,49 @@ void addTest(faiss::MetricType metricType,
|
|
|
148
238
|
config.indicesOptions = opt.indicesOpt;
|
|
149
239
|
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
|
|
150
240
|
|
|
151
|
-
faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
cpuIndex.metric_type,
|
|
155
|
-
config);
|
|
241
|
+
faiss::gpu::GpuIndexIVFFlat gpuIndex(&res, 1, 1, faiss::METRIC_L2, config);
|
|
242
|
+
gpuIndex.setNumProbes(1);
|
|
243
|
+
|
|
156
244
|
gpuIndex.copyFrom(&cpuIndex);
|
|
157
|
-
gpuIndex.setNumProbes(opt.nprobe);
|
|
158
245
|
|
|
159
|
-
cpuIndex.
|
|
160
|
-
gpuIndex.
|
|
246
|
+
EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
|
|
247
|
+
EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
|
|
161
248
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
compFloat16 ? 0.70f : 0.1f,
|
|
167
|
-
compFloat16 ? 0.30f : 0.015f);
|
|
168
|
-
}
|
|
169
|
-
}
|
|
249
|
+
EXPECT_EQ(cpuIndex.d, gpuIndex.d);
|
|
250
|
+
EXPECT_EQ(cpuIndex.d, opt.dim);
|
|
251
|
+
EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
|
|
252
|
+
EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
|
|
170
253
|
|
|
171
|
-
|
|
172
|
-
Options opt;
|
|
173
|
-
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
|
174
|
-
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
|
|
175
|
-
|
|
176
|
-
faiss::gpu::StandardGpuResources res;
|
|
177
|
-
res.noTempMemory();
|
|
178
|
-
|
|
179
|
-
faiss::gpu::GpuIndexIVFFlatConfig config;
|
|
180
|
-
config.device = opt.device;
|
|
181
|
-
config.indicesOptions = opt.indicesOpt;
|
|
182
|
-
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
|
|
183
|
-
|
|
184
|
-
faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
|
|
185
|
-
opt.dim,
|
|
186
|
-
opt.numCentroids,
|
|
187
|
-
faiss::METRIC_L2,
|
|
188
|
-
config);
|
|
189
|
-
gpuIndex.train(opt.numTrain, trainVecs.data());
|
|
190
|
-
gpuIndex.add(opt.numAdd, addVecs.data());
|
|
191
|
-
gpuIndex.setNumProbes(opt.nprobe);
|
|
192
|
-
|
|
193
|
-
// use garbage values to see if we overwrite then
|
|
194
|
-
faiss::IndexFlatL2 cpuQuantizer(1);
|
|
195
|
-
faiss::IndexIVFFlat cpuIndex(&cpuQuantizer, 1, 1, faiss::METRIC_L2);
|
|
196
|
-
cpuIndex.nprobe = 1;
|
|
197
|
-
|
|
198
|
-
gpuIndex.copyTo(&cpuIndex);
|
|
199
|
-
|
|
200
|
-
EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
|
|
201
|
-
EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
|
|
202
|
-
|
|
203
|
-
EXPECT_EQ(cpuIndex.d, gpuIndex.d);
|
|
204
|
-
EXPECT_EQ(cpuIndex.quantizer->d, gpuIndex.quantizer->d);
|
|
205
|
-
EXPECT_EQ(cpuIndex.d, opt.dim);
|
|
206
|
-
EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
|
|
207
|
-
EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
|
|
208
|
-
|
|
209
|
-
testIVFEquality(cpuIndex, gpuIndex);
|
|
210
|
-
|
|
211
|
-
// Query both objects; results should be equivalent
|
|
212
|
-
bool compFloat16 = useFloat16CoarseQuantizer;
|
|
213
|
-
faiss::gpu::compareIndices(cpuIndex, gpuIndex,
|
|
214
|
-
opt.numQuery, opt.dim, opt.k, opt.toString(),
|
|
215
|
-
compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
|
|
216
|
-
compFloat16 ? 0.70f : 0.1f,
|
|
217
|
-
compFloat16 ? 0.30f : 0.015f);
|
|
218
|
-
}
|
|
254
|
+
testIVFEquality(cpuIndex, gpuIndex);
|
|
219
255
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
cpuIndex.add(opt.numAdd, addVecs.data());
|
|
233
|
-
|
|
234
|
-
// use garbage values to see if we overwrite then
|
|
235
|
-
faiss::gpu::StandardGpuResources res;
|
|
236
|
-
res.noTempMemory();
|
|
237
|
-
|
|
238
|
-
faiss::gpu::GpuIndexIVFFlatConfig config;
|
|
239
|
-
config.device = opt.device;
|
|
240
|
-
config.indicesOptions = opt.indicesOpt;
|
|
241
|
-
config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
|
|
242
|
-
|
|
243
|
-
faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
|
|
244
|
-
1,
|
|
245
|
-
1,
|
|
246
|
-
faiss::METRIC_L2,
|
|
247
|
-
config);
|
|
248
|
-
gpuIndex.setNumProbes(1);
|
|
249
|
-
|
|
250
|
-
gpuIndex.copyFrom(&cpuIndex);
|
|
251
|
-
|
|
252
|
-
EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
|
|
253
|
-
EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
|
|
254
|
-
|
|
255
|
-
EXPECT_EQ(cpuIndex.d, gpuIndex.d);
|
|
256
|
-
EXPECT_EQ(cpuIndex.d, opt.dim);
|
|
257
|
-
EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
|
|
258
|
-
EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
|
|
259
|
-
|
|
260
|
-
testIVFEquality(cpuIndex, gpuIndex);
|
|
261
|
-
|
|
262
|
-
// Query both objects; results should be equivalent
|
|
263
|
-
bool compFloat16 = useFloat16CoarseQuantizer;
|
|
264
|
-
faiss::gpu::compareIndices(cpuIndex, gpuIndex,
|
|
265
|
-
opt.numQuery, opt.dim, opt.k, opt.toString(),
|
|
266
|
-
compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
|
|
267
|
-
compFloat16 ? 0.70f : 0.1f,
|
|
268
|
-
compFloat16 ? 0.30f : 0.015f);
|
|
256
|
+
// Query both objects; results should be equivalent
|
|
257
|
+
bool compFloat16 = useFloat16CoarseQuantizer;
|
|
258
|
+
faiss::gpu::compareIndices(
|
|
259
|
+
cpuIndex,
|
|
260
|
+
gpuIndex,
|
|
261
|
+
opt.numQuery,
|
|
262
|
+
opt.dim,
|
|
263
|
+
opt.k,
|
|
264
|
+
opt.toString(),
|
|
265
|
+
compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
|
|
266
|
+
compFloat16 ? 0.70f : 0.1f,
|
|
267
|
+
compFloat16 ? 0.30f : 0.015f);
|
|
269
268
|
}
|
|
270
269
|
|
|
271
270
|
TEST(TestGpuIndexIVFFlat, Float32_32_Add_L2) {
|
|
272
|
-
|
|
271
|
+
addTest(faiss::METRIC_L2, false);
|
|
273
272
|
}
|
|
274
273
|
|
|
275
274
|
TEST(TestGpuIndexIVFFlat, Float32_32_Add_IP) {
|
|
276
|
-
|
|
275
|
+
addTest(faiss::METRIC_INNER_PRODUCT, false);
|
|
277
276
|
}
|
|
278
277
|
|
|
279
278
|
TEST(TestGpuIndexIVFFlat, Float16_32_Add_L2) {
|
|
280
|
-
|
|
279
|
+
addTest(faiss::METRIC_L2, true);
|
|
281
280
|
}
|
|
282
281
|
|
|
283
282
|
TEST(TestGpuIndexIVFFlat, Float16_32_Add_IP) {
|
|
284
|
-
|
|
283
|
+
addTest(faiss::METRIC_INNER_PRODUCT, true);
|
|
285
284
|
}
|
|
286
285
|
|
|
287
286
|
//
|
|
@@ -289,21 +288,21 @@ TEST(TestGpuIndexIVFFlat, Float16_32_Add_IP) {
|
|
|
289
288
|
//
|
|
290
289
|
|
|
291
290
|
TEST(TestGpuIndexIVFFlat, Float32_Query_L2) {
|
|
292
|
-
|
|
291
|
+
queryTest(faiss::METRIC_L2, false);
|
|
293
292
|
}
|
|
294
293
|
|
|
295
294
|
TEST(TestGpuIndexIVFFlat, Float32_Query_IP) {
|
|
296
|
-
|
|
295
|
+
queryTest(faiss::METRIC_INNER_PRODUCT, false);
|
|
297
296
|
}
|
|
298
297
|
|
|
299
298
|
// float16 coarse quantizer
|
|
300
299
|
|
|
301
300
|
TEST(TestGpuIndexIVFFlat, Float16_32_Query_L2) {
|
|
302
|
-
|
|
301
|
+
queryTest(faiss::METRIC_L2, true);
|
|
303
302
|
}
|
|
304
303
|
|
|
305
304
|
TEST(TestGpuIndexIVFFlat, Float16_32_Query_IP) {
|
|
306
|
-
|
|
305
|
+
queryTest(faiss::METRIC_INNER_PRODUCT, true);
|
|
307
306
|
}
|
|
308
307
|
|
|
309
308
|
//
|
|
@@ -312,19 +311,19 @@ TEST(TestGpuIndexIVFFlat, Float16_32_Query_IP) {
|
|
|
312
311
|
//
|
|
313
312
|
|
|
314
313
|
TEST(TestGpuIndexIVFFlat, Float32_Query_L2_64) {
|
|
315
|
-
|
|
314
|
+
queryTest(faiss::METRIC_L2, false, 64);
|
|
316
315
|
}
|
|
317
316
|
|
|
318
317
|
TEST(TestGpuIndexIVFFlat, Float32_Query_IP_64) {
|
|
319
|
-
|
|
318
|
+
queryTest(faiss::METRIC_INNER_PRODUCT, false, 64);
|
|
320
319
|
}
|
|
321
320
|
|
|
322
321
|
TEST(TestGpuIndexIVFFlat, Float32_Query_L2_128) {
|
|
323
|
-
|
|
322
|
+
queryTest(faiss::METRIC_L2, false, 128);
|
|
324
323
|
}
|
|
325
324
|
|
|
326
325
|
TEST(TestGpuIndexIVFFlat, Float32_Query_IP_128) {
|
|
327
|
-
|
|
326
|
+
queryTest(faiss::METRIC_INNER_PRODUCT, false, 128);
|
|
328
327
|
}
|
|
329
328
|
|
|
330
329
|
//
|
|
@@ -332,71 +331,72 @@ TEST(TestGpuIndexIVFFlat, Float32_Query_IP_128) {
|
|
|
332
331
|
//
|
|
333
332
|
|
|
334
333
|
TEST(TestGpuIndexIVFFlat, Float32_32_CopyTo) {
|
|
335
|
-
|
|
334
|
+
copyToTest(false);
|
|
336
335
|
}
|
|
337
336
|
|
|
338
337
|
TEST(TestGpuIndexIVFFlat, Float32_32_CopyFrom) {
|
|
339
|
-
|
|
338
|
+
copyFromTest(false);
|
|
340
339
|
}
|
|
341
340
|
|
|
342
341
|
TEST(TestGpuIndexIVFFlat, Float32_negative) {
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
342
|
+
Options opt;
|
|
343
|
+
|
|
344
|
+
auto trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
|
345
|
+
auto addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
|
|
346
|
+
|
|
347
|
+
// Put all vecs on negative side
|
|
348
|
+
for (auto& f : trainVecs) {
|
|
349
|
+
f = std::abs(f) * -1.0f;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
for (auto& f : addVecs) {
|
|
353
|
+
f *= std::abs(f) * -1.0f;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
faiss::IndexFlatIP quantizerIP(opt.dim);
|
|
357
|
+
faiss::Index* quantizer = (faiss::Index*)&quantizerIP;
|
|
358
|
+
|
|
359
|
+
faiss::IndexIVFFlat cpuIndex(
|
|
360
|
+
quantizer, opt.dim, opt.numCentroids, faiss::METRIC_INNER_PRODUCT);
|
|
361
|
+
cpuIndex.train(opt.numTrain, trainVecs.data());
|
|
362
|
+
cpuIndex.add(opt.numAdd, addVecs.data());
|
|
363
|
+
cpuIndex.nprobe = opt.nprobe;
|
|
364
|
+
|
|
365
|
+
faiss::gpu::StandardGpuResources res;
|
|
366
|
+
res.noTempMemory();
|
|
367
|
+
|
|
368
|
+
faiss::gpu::GpuIndexIVFFlatConfig config;
|
|
369
|
+
config.device = opt.device;
|
|
370
|
+
config.indicesOptions = opt.indicesOpt;
|
|
371
|
+
|
|
372
|
+
faiss::gpu::GpuIndexIVFFlat gpuIndex(
|
|
373
|
+
&res, cpuIndex.d, cpuIndex.nlist, cpuIndex.metric_type, config);
|
|
374
|
+
gpuIndex.copyFrom(&cpuIndex);
|
|
375
|
+
gpuIndex.setNumProbes(opt.nprobe);
|
|
376
|
+
|
|
377
|
+
// Construct a positive test set
|
|
378
|
+
auto queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
|
|
379
|
+
|
|
380
|
+
// Put all vecs on positive size
|
|
381
|
+
for (auto& f : queryVecs) {
|
|
382
|
+
f = std::abs(f);
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
bool compFloat16 = false;
|
|
386
|
+
faiss::gpu::compareIndices(
|
|
387
|
+
queryVecs,
|
|
388
|
+
cpuIndex,
|
|
389
|
+
gpuIndex,
|
|
390
|
+
opt.numQuery,
|
|
391
|
+
opt.dim,
|
|
392
|
+
opt.k,
|
|
393
|
+
opt.toString(),
|
|
394
|
+
compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
|
|
395
|
+
// FIXME: the fp16 bounds are
|
|
396
|
+
// useless when math (the accumulator) is
|
|
397
|
+
// in fp16. Figure out another way to test
|
|
398
|
+
compFloat16 ? 0.99f : 0.1f,
|
|
399
|
+
compFloat16 ? 0.65f : 0.015f);
|
|
400
400
|
}
|
|
401
401
|
|
|
402
402
|
//
|
|
@@ -404,152 +404,152 @@ TEST(TestGpuIndexIVFFlat, Float32_negative) {
|
|
|
404
404
|
//
|
|
405
405
|
|
|
406
406
|
TEST(TestGpuIndexIVFFlat, QueryNaN) {
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
for (int k = 0; k < opt.k; ++k) {
|
|
445
|
-
EXPECT_EQ(indices[q * opt.k + k], -1);
|
|
446
|
-
EXPECT_EQ(distances[q * opt.k + k], std::numeric_limits<float>::max());
|
|
407
|
+
Options opt;
|
|
408
|
+
|
|
409
|
+
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
|
410
|
+
std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
|
|
411
|
+
|
|
412
|
+
faiss::gpu::StandardGpuResources res;
|
|
413
|
+
res.noTempMemory();
|
|
414
|
+
|
|
415
|
+
faiss::gpu::GpuIndexIVFFlatConfig config;
|
|
416
|
+
config.device = opt.device;
|
|
417
|
+
config.indicesOptions = opt.indicesOpt;
|
|
418
|
+
config.flatConfig.useFloat16 = faiss::gpu::randBool();
|
|
419
|
+
|
|
420
|
+
faiss::gpu::GpuIndexIVFFlat gpuIndex(
|
|
421
|
+
&res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config);
|
|
422
|
+
gpuIndex.setNumProbes(opt.nprobe);
|
|
423
|
+
|
|
424
|
+
gpuIndex.train(opt.numTrain, trainVecs.data());
|
|
425
|
+
gpuIndex.add(opt.numAdd, addVecs.data());
|
|
426
|
+
|
|
427
|
+
int numQuery = 10;
|
|
428
|
+
std::vector<float> nans(
|
|
429
|
+
numQuery * opt.dim, std::numeric_limits<float>::quiet_NaN());
|
|
430
|
+
|
|
431
|
+
std::vector<float> distances(numQuery * opt.k, 0);
|
|
432
|
+
std::vector<faiss::Index::idx_t> indices(numQuery * opt.k, 0);
|
|
433
|
+
|
|
434
|
+
gpuIndex.search(
|
|
435
|
+
numQuery, nans.data(), opt.k, distances.data(), indices.data());
|
|
436
|
+
|
|
437
|
+
for (int q = 0; q < numQuery; ++q) {
|
|
438
|
+
for (int k = 0; k < opt.k; ++k) {
|
|
439
|
+
EXPECT_EQ(indices[q * opt.k + k], -1);
|
|
440
|
+
EXPECT_EQ(
|
|
441
|
+
distances[q * opt.k + k],
|
|
442
|
+
std::numeric_limits<float>::max());
|
|
443
|
+
}
|
|
447
444
|
}
|
|
448
|
-
}
|
|
449
445
|
}
|
|
450
446
|
|
|
451
447
|
TEST(TestGpuIndexIVFFlat, AddNaN) {
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
448
|
+
Options opt;
|
|
449
|
+
|
|
450
|
+
faiss::gpu::StandardGpuResources res;
|
|
451
|
+
res.noTempMemory();
|
|
452
|
+
|
|
453
|
+
faiss::gpu::GpuIndexIVFFlatConfig config;
|
|
454
|
+
config.device = opt.device;
|
|
455
|
+
config.indicesOptions = opt.indicesOpt;
|
|
456
|
+
config.flatConfig.useFloat16 = faiss::gpu::randBool();
|
|
457
|
+
|
|
458
|
+
faiss::gpu::GpuIndexIVFFlat gpuIndex(
|
|
459
|
+
&res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config);
|
|
460
|
+
gpuIndex.setNumProbes(opt.nprobe);
|
|
461
|
+
|
|
462
|
+
int numNans = 10;
|
|
463
|
+
std::vector<float> nans(
|
|
464
|
+
numNans * opt.dim, std::numeric_limits<float>::quiet_NaN());
|
|
465
|
+
|
|
466
|
+
// Make one vector valid (not the first vector, in order to test offset
|
|
467
|
+
// issues), which should actually add
|
|
468
|
+
for (int i = 0; i < opt.dim; ++i) {
|
|
469
|
+
nans[opt.dim + i] = i;
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
|
|
473
|
+
gpuIndex.train(opt.numTrain, trainVecs.data());
|
|
474
|
+
|
|
475
|
+
// should not crash
|
|
476
|
+
EXPECT_EQ(gpuIndex.ntotal, 0);
|
|
477
|
+
gpuIndex.add(numNans, nans.data());
|
|
478
|
+
|
|
479
|
+
std::vector<float> queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
|
|
480
|
+
std::vector<float> distance(opt.numQuery * opt.k, 0);
|
|
481
|
+
std::vector<faiss::Index::idx_t> indices(opt.numQuery * opt.k, 0);
|
|
482
|
+
|
|
483
|
+
// should not crash
|
|
484
|
+
gpuIndex.search(
|
|
485
|
+
opt.numQuery,
|
|
486
|
+
queryVecs.data(),
|
|
487
|
+
opt.k,
|
|
488
|
+
distance.data(),
|
|
489
|
+
indices.data());
|
|
493
490
|
}
|
|
494
491
|
|
|
495
492
|
TEST(TestGpuIndexIVFFlat, UnifiedMemory) {
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
493
|
+
// Construct on a random device to test multi-device, if we have
|
|
494
|
+
// multiple devices
|
|
495
|
+
int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
|
|
496
|
+
|
|
497
|
+
if (!faiss::gpu::getFullUnifiedMemSupport(device)) {
|
|
498
|
+
return;
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
int dim = 128;
|
|
502
|
+
|
|
503
|
+
int numCentroids = 256;
|
|
504
|
+
// Unfortunately it would take forever to add 24 GB in IVFPQ data,
|
|
505
|
+
// so just perform a small test with data allocated in the unified
|
|
506
|
+
// memory address space
|
|
507
|
+
size_t numAdd = 10000;
|
|
508
|
+
size_t numTrain = numCentroids * 40;
|
|
509
|
+
int numQuery = 10;
|
|
510
|
+
int k = 10;
|
|
511
|
+
int nprobe = 8;
|
|
512
|
+
|
|
513
|
+
std::vector<float> trainVecs = faiss::gpu::randVecs(numTrain, dim);
|
|
514
|
+
std::vector<float> addVecs = faiss::gpu::randVecs(numAdd, dim);
|
|
515
|
+
|
|
516
|
+
faiss::IndexFlatL2 quantizer(dim);
|
|
517
|
+
faiss::IndexIVFFlat cpuIndex(
|
|
518
|
+
&quantizer, dim, numCentroids, faiss::METRIC_L2);
|
|
519
|
+
|
|
520
|
+
cpuIndex.train(numTrain, trainVecs.data());
|
|
521
|
+
cpuIndex.add(numAdd, addVecs.data());
|
|
522
|
+
cpuIndex.nprobe = nprobe;
|
|
523
|
+
|
|
524
|
+
faiss::gpu::StandardGpuResources res;
|
|
525
|
+
res.noTempMemory();
|
|
526
|
+
|
|
527
|
+
faiss::gpu::GpuIndexIVFFlatConfig config;
|
|
528
|
+
config.device = device;
|
|
529
|
+
config.memorySpace = faiss::gpu::MemorySpace::Unified;
|
|
530
|
+
|
|
531
|
+
faiss::gpu::GpuIndexIVFFlat gpuIndex(
|
|
532
|
+
&res, dim, numCentroids, faiss::METRIC_L2, config);
|
|
533
|
+
gpuIndex.copyFrom(&cpuIndex);
|
|
534
|
+
gpuIndex.setNumProbes(nprobe);
|
|
535
|
+
|
|
536
|
+
faiss::gpu::compareIndices(
|
|
537
|
+
cpuIndex,
|
|
538
|
+
gpuIndex,
|
|
539
|
+
numQuery,
|
|
540
|
+
dim,
|
|
541
|
+
k,
|
|
542
|
+
"Unified Memory",
|
|
543
|
+
kF32MaxRelErr,
|
|
544
|
+
0.1f,
|
|
545
|
+
0.015f);
|
|
546
546
|
}
|
|
547
547
|
|
|
548
548
|
int main(int argc, char** argv) {
|
|
549
|
-
|
|
549
|
+
testing::InitGoogleTest(&argc, argv);
|
|
550
550
|
|
|
551
|
-
|
|
552
|
-
|
|
551
|
+
// just run with a fixed test seed
|
|
552
|
+
faiss::gpu::setTestSeed(100);
|
|
553
553
|
|
|
554
|
-
|
|
554
|
+
return RUN_ALL_TESTS();
|
|
555
555
|
}
|