faiss 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/ext/faiss/extconf.rb +6 -3
- data/ext/faiss/numo.hpp +4 -4
- data/ext/faiss/utils.cpp +1 -1
- data/ext/faiss/utils.h +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +365 -194
- data/vendor/faiss/faiss/Clustering.h +102 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
- data/vendor/faiss/faiss/Index2Layer.h +22 -36
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
- data/vendor/faiss/faiss/IndexFlat.h +42 -59
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
- data/vendor/faiss/faiss/IndexIVF.h +169 -118
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
- data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
- data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
- data/vendor/faiss/faiss/IndexLSH.h +20 -38
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
- data/vendor/faiss/faiss/IndexPQ.h +64 -82
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
- data/vendor/faiss/faiss/IndexRefine.h +32 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
- data/vendor/faiss/faiss/VectorTransform.h +64 -89
- data/vendor/faiss/faiss/clone_index.cpp +78 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
- data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
- data/vendor/faiss/faiss/impl/io.cpp +76 -95
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +60 -29
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +619 -397
- data/vendor/faiss/faiss/index_factory.h +8 -6
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +305 -312
- data/vendor/faiss/faiss/utils/distances.h +170 -122
- data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +54 -49
- metadata +29 -4
|
@@ -5,20 +5,20 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
9
8
|
#include <faiss/gpu/test/TestUtils.h>
|
|
10
9
|
#include <faiss/utils/random.h>
|
|
11
|
-
#include <cmath>
|
|
12
10
|
#include <gtest/gtest.h>
|
|
11
|
+
#include <time.h>
|
|
12
|
+
#include <cmath>
|
|
13
13
|
#include <set>
|
|
14
14
|
#include <sstream>
|
|
15
|
-
#include <time.h>
|
|
16
15
|
#include <unordered_map>
|
|
17
16
|
|
|
18
|
-
namespace faiss {
|
|
17
|
+
namespace faiss {
|
|
18
|
+
namespace gpu {
|
|
19
19
|
|
|
20
20
|
inline float relativeError(float a, float b) {
|
|
21
|
-
|
|
21
|
+
return std::abs(a - b) / (0.5f * (std::abs(a) + std::abs(b)));
|
|
22
22
|
}
|
|
23
23
|
|
|
24
24
|
// This seed is also used for the faiss float_rand API; in a test it
|
|
@@ -28,290 +28,326 @@ std::mt19937 rng(1);
|
|
|
28
28
|
std::uniform_int_distribution<> distrib;
|
|
29
29
|
|
|
30
30
|
void newTestSeed() {
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
struct timespec t;
|
|
32
|
+
clock_gettime(CLOCK_REALTIME, &t);
|
|
33
33
|
|
|
34
|
-
|
|
34
|
+
setTestSeed(t.tv_nsec);
|
|
35
35
|
}
|
|
36
36
|
|
|
37
37
|
void setTestSeed(long seed) {
|
|
38
|
-
|
|
38
|
+
printf("testing with random seed %ld\n", seed);
|
|
39
39
|
|
|
40
|
-
|
|
41
|
-
|
|
40
|
+
rng = std::mt19937(seed);
|
|
41
|
+
s_seed = seed;
|
|
42
42
|
}
|
|
43
43
|
|
|
44
44
|
int randVal(int a, int b) {
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
EXPECT_GE(a, 0);
|
|
46
|
+
EXPECT_LE(a, b);
|
|
47
47
|
|
|
48
|
-
|
|
48
|
+
return a + (distrib(rng) % (b + 1 - a));
|
|
49
49
|
}
|
|
50
50
|
|
|
51
51
|
bool randBool() {
|
|
52
|
-
|
|
52
|
+
return randSelect<bool>({true, false});
|
|
53
53
|
}
|
|
54
54
|
|
|
55
55
|
std::vector<float> randVecs(size_t num, size_t dim) {
|
|
56
|
-
|
|
56
|
+
std::vector<float> v(num * dim);
|
|
57
57
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
58
|
+
faiss::float_rand(v.data(), v.size(), s_seed);
|
|
59
|
+
// unfortunately we generate separate sets of vectors, and don't
|
|
60
|
+
// want the same values
|
|
61
|
+
++s_seed;
|
|
62
62
|
|
|
63
|
-
|
|
63
|
+
return v;
|
|
64
64
|
}
|
|
65
65
|
|
|
66
66
|
std::vector<unsigned char> randBinaryVecs(size_t num, size_t dim) {
|
|
67
|
-
|
|
67
|
+
std::vector<unsigned char> v(num * (dim / 8));
|
|
68
68
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
69
|
+
faiss::byte_rand(v.data(), v.size(), s_seed);
|
|
70
|
+
// unfortunately we generate separate sets of vectors, and don't
|
|
71
|
+
// want the same values
|
|
72
|
+
++s_seed;
|
|
73
73
|
|
|
74
|
-
|
|
74
|
+
return v;
|
|
75
75
|
}
|
|
76
76
|
|
|
77
77
|
void compareIndices(
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
78
|
+
const std::vector<float>& queryVecs,
|
|
79
|
+
faiss::Index& refIndex,
|
|
80
|
+
faiss::Index& testIndex,
|
|
81
|
+
int numQuery,
|
|
82
|
+
int /*dim*/,
|
|
83
|
+
int k,
|
|
84
|
+
const std::string& configMsg,
|
|
85
|
+
float maxRelativeError,
|
|
86
|
+
float pctMaxDiff1,
|
|
87
|
+
float pctMaxDiffN) {
|
|
88
|
+
// Compare
|
|
89
|
+
std::vector<float> refDistance(numQuery * k, 0);
|
|
90
|
+
std::vector<faiss::Index::idx_t> refIndices(numQuery * k, -1);
|
|
91
|
+
refIndex.search(
|
|
92
|
+
numQuery,
|
|
93
|
+
queryVecs.data(),
|
|
94
|
+
k,
|
|
95
|
+
refDistance.data(),
|
|
96
|
+
refIndices.data());
|
|
97
|
+
|
|
98
|
+
std::vector<float> testDistance(numQuery * k, 0);
|
|
99
|
+
std::vector<faiss::Index::idx_t> testIndices(numQuery * k, -1);
|
|
100
|
+
testIndex.search(
|
|
101
|
+
numQuery,
|
|
102
|
+
queryVecs.data(),
|
|
103
|
+
k,
|
|
104
|
+
testDistance.data(),
|
|
105
|
+
testIndices.data());
|
|
106
|
+
|
|
107
|
+
faiss::gpu::compareLists(
|
|
108
|
+
refDistance.data(),
|
|
109
|
+
refIndices.data(),
|
|
110
|
+
testDistance.data(),
|
|
111
|
+
testIndices.data(),
|
|
112
|
+
numQuery,
|
|
113
|
+
k,
|
|
114
|
+
configMsg,
|
|
115
|
+
true,
|
|
116
|
+
false,
|
|
117
|
+
true,
|
|
118
|
+
maxRelativeError,
|
|
119
|
+
pctMaxDiff1,
|
|
120
|
+
pctMaxDiffN);
|
|
107
121
|
}
|
|
108
122
|
|
|
109
|
-
void compareIndices(
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
123
|
+
void compareIndices(
|
|
124
|
+
faiss::Index& refIndex,
|
|
125
|
+
faiss::Index& testIndex,
|
|
126
|
+
int numQuery,
|
|
127
|
+
int dim,
|
|
128
|
+
int k,
|
|
129
|
+
const std::string& configMsg,
|
|
130
|
+
float maxRelativeError,
|
|
131
|
+
float pctMaxDiff1,
|
|
132
|
+
float pctMaxDiffN) {
|
|
133
|
+
auto queryVecs = faiss::gpu::randVecs(numQuery, dim);
|
|
134
|
+
|
|
135
|
+
compareIndices(
|
|
136
|
+
queryVecs,
|
|
137
|
+
refIndex,
|
|
138
|
+
testIndex,
|
|
139
|
+
numQuery,
|
|
140
|
+
dim,
|
|
141
|
+
k,
|
|
142
|
+
configMsg,
|
|
143
|
+
maxRelativeError,
|
|
144
|
+
pctMaxDiff1,
|
|
145
|
+
pctMaxDiffN);
|
|
126
146
|
}
|
|
127
147
|
|
|
128
148
|
template <typename T>
|
|
129
149
|
inline T lookup(const T* p, int i, int j, int /*dim1*/, int dim2) {
|
|
130
|
-
|
|
150
|
+
return p[i * dim2 + j];
|
|
131
151
|
}
|
|
132
152
|
|
|
133
|
-
void compareLists(
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
// query -> {index -> result position}
|
|
151
|
-
std::vector<std::unordered_map<faiss::Index::idx_t, int>> refIndexMap;
|
|
152
|
-
|
|
153
|
-
for (int query = 0; query < dim1; ++query) {
|
|
154
|
-
std::unordered_map<faiss::Index::idx_t, int> indices;
|
|
155
|
-
|
|
156
|
-
for (int result = 0; result < dim2; ++result) {
|
|
157
|
-
indices[lookup(refInd, query, result, dim1, dim2)] = result;
|
|
153
|
+
void compareLists(
|
|
154
|
+
const float* refDist,
|
|
155
|
+
const faiss::Index::idx_t* refInd,
|
|
156
|
+
const float* testDist,
|
|
157
|
+
const faiss::Index::idx_t* testInd,
|
|
158
|
+
int dim1,
|
|
159
|
+
int dim2,
|
|
160
|
+
const std::string& configMsg,
|
|
161
|
+
bool printBasicStats,
|
|
162
|
+
bool printDiffs,
|
|
163
|
+
bool assertOnErr,
|
|
164
|
+
float maxRelativeError,
|
|
165
|
+
float pctMaxDiff1,
|
|
166
|
+
float pctMaxDiffN) {
|
|
167
|
+
float maxAbsErr = 0.0f;
|
|
168
|
+
for (int i = 0; i < dim1 * dim2; ++i) {
|
|
169
|
+
maxAbsErr = std::max(maxAbsErr, std::abs(refDist[i] - testDist[i]));
|
|
158
170
|
}
|
|
171
|
+
int numResults = dim1 * dim2;
|
|
159
172
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
// See how far off the indices are
|
|
164
|
-
// Keep track of the difference for each entry
|
|
165
|
-
std::vector<std::vector<int>> indexDiffs;
|
|
166
|
-
|
|
167
|
-
int diff1 = 0; // index differs by 1
|
|
168
|
-
int diffN = 0; // index differs by >1
|
|
169
|
-
int diffInf = 0; // index not found in the other
|
|
170
|
-
int nonUniqueIndices = 0;
|
|
171
|
-
|
|
172
|
-
double avgDiff = 0.0;
|
|
173
|
-
int maxDiff = 0;
|
|
174
|
-
float maxRelErr = 0.0f;
|
|
175
|
-
|
|
176
|
-
for (int query = 0; query < dim1; ++query) {
|
|
177
|
-
std::vector<int> diffs;
|
|
178
|
-
std::set<faiss::Index::idx_t> uniqueIndices;
|
|
179
|
-
|
|
180
|
-
auto& indices = refIndexMap[query];
|
|
181
|
-
|
|
182
|
-
for (int result = 0; result < dim2; ++result) {
|
|
183
|
-
auto t = lookup(testInd, query, result, dim1, dim2);
|
|
184
|
-
|
|
185
|
-
// All indices reported within a query should be unique; this is
|
|
186
|
-
// a serious error if is otherwise the case.
|
|
187
|
-
// If -1 is reported (no result due to IVF partitioning or not enough
|
|
188
|
-
// entries in the index), then duplicates are allowed, but both the
|
|
189
|
-
// reference and test must have -1 in the same position.
|
|
190
|
-
if (t == -1) {
|
|
191
|
-
EXPECT_EQ(lookup(refInd, query, result, dim1, dim2), t);
|
|
192
|
-
} else {
|
|
193
|
-
bool uniqueIndex = uniqueIndices.count(t) == 0;
|
|
194
|
-
if (assertOnErr) {
|
|
195
|
-
EXPECT_TRUE(uniqueIndex) << configMsg
|
|
196
|
-
<< " " << query
|
|
197
|
-
<< " " << result
|
|
198
|
-
<< " " << t;
|
|
199
|
-
}
|
|
173
|
+
// query -> {index -> result position}
|
|
174
|
+
std::vector<std::unordered_map<faiss::Index::idx_t, int>> refIndexMap;
|
|
200
175
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
} else {
|
|
204
|
-
uniqueIndices.insert(t);
|
|
205
|
-
}
|
|
176
|
+
for (int query = 0; query < dim1; ++query) {
|
|
177
|
+
std::unordered_map<faiss::Index::idx_t, int> indices;
|
|
206
178
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
int diff = std::abs(result - it->second);
|
|
210
|
-
diffs.push_back(diff);
|
|
211
|
-
|
|
212
|
-
if (diff == 1) {
|
|
213
|
-
++diff1;
|
|
214
|
-
maxDiff = std::max(diff, maxDiff);
|
|
215
|
-
} else if (diff > 1) {
|
|
216
|
-
++diffN;
|
|
217
|
-
maxDiff = std::max(diff, maxDiff);
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
avgDiff += (double) diff;
|
|
221
|
-
} else {
|
|
222
|
-
++diffInf;
|
|
223
|
-
diffs.push_back(-1);
|
|
224
|
-
// don't count this for maxDiff
|
|
179
|
+
for (int result = 0; result < dim2; ++result) {
|
|
180
|
+
indices[lookup(refInd, query, result, dim1, dim2)] = result;
|
|
225
181
|
}
|
|
226
|
-
}
|
|
227
182
|
|
|
228
|
-
|
|
229
|
-
|
|
183
|
+
refIndexMap.emplace_back(std::move(indices));
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// See how far off the indices are
|
|
187
|
+
// Keep track of the difference for each entry
|
|
188
|
+
std::vector<std::vector<int>> indexDiffs;
|
|
230
189
|
|
|
231
|
-
|
|
190
|
+
int diff1 = 0; // index differs by 1
|
|
191
|
+
int diffN = 0; // index differs by >1
|
|
192
|
+
int diffInf = 0; // index not found in the other
|
|
193
|
+
int nonUniqueIndices = 0;
|
|
232
194
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
195
|
+
double avgDiff = 0.0;
|
|
196
|
+
int maxDiff = 0;
|
|
197
|
+
float maxRelErr = 0.0f;
|
|
198
|
+
|
|
199
|
+
for (int query = 0; query < dim1; ++query) {
|
|
200
|
+
std::vector<int> diffs;
|
|
201
|
+
std::set<faiss::Index::idx_t> uniqueIndices;
|
|
202
|
+
|
|
203
|
+
auto& indices = refIndexMap[query];
|
|
204
|
+
|
|
205
|
+
for (int result = 0; result < dim2; ++result) {
|
|
206
|
+
auto t = lookup(testInd, query, result, dim1, dim2);
|
|
207
|
+
|
|
208
|
+
// All indices reported within a query should be unique; this is
|
|
209
|
+
// a serious error if is otherwise the case.
|
|
210
|
+
// If -1 is reported (no result due to IVF partitioning or not
|
|
211
|
+
// enough entries in the index), then duplicates are allowed, but
|
|
212
|
+
// both the reference and test must have -1 in the same position.
|
|
213
|
+
if (t == -1) {
|
|
214
|
+
EXPECT_EQ(lookup(refInd, query, result, dim1, dim2), t);
|
|
215
|
+
} else {
|
|
216
|
+
bool uniqueIndex = uniqueIndices.count(t) == 0;
|
|
217
|
+
if (assertOnErr) {
|
|
218
|
+
EXPECT_TRUE(uniqueIndex) << configMsg << " " << query << " "
|
|
219
|
+
<< result << " " << t;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
if (!uniqueIndex) {
|
|
223
|
+
++nonUniqueIndices;
|
|
224
|
+
} else {
|
|
225
|
+
uniqueIndices.insert(t);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
auto it = indices.find(t);
|
|
229
|
+
if (it != indices.end()) {
|
|
230
|
+
int diff = std::abs(result - it->second);
|
|
231
|
+
diffs.push_back(diff);
|
|
232
|
+
|
|
233
|
+
if (diff == 1) {
|
|
234
|
+
++diff1;
|
|
235
|
+
maxDiff = std::max(diff, maxDiff);
|
|
236
|
+
} else if (diff > 1) {
|
|
237
|
+
++diffN;
|
|
238
|
+
maxDiff = std::max(diff, maxDiff);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
avgDiff += (double)diff;
|
|
242
|
+
} else {
|
|
243
|
+
++diffInf;
|
|
244
|
+
diffs.push_back(-1);
|
|
245
|
+
// don't count this for maxDiff
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
auto refD = lookup(refDist, query, result, dim1, dim2);
|
|
250
|
+
auto testD = lookup(testDist, query, result, dim1, dim2);
|
|
251
|
+
|
|
252
|
+
float relErr = relativeError(refD, testD);
|
|
253
|
+
|
|
254
|
+
if (assertOnErr) {
|
|
255
|
+
EXPECT_LE(relErr, maxRelativeError)
|
|
256
|
+
<< configMsg << " (" << query << ", " << result
|
|
257
|
+
<< ") refD: " << refD << " testD: " << testD;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
maxRelErr = std::max(maxRelErr, relErr);
|
|
261
|
+
}
|
|
239
262
|
|
|
240
|
-
|
|
263
|
+
indexDiffs.emplace_back(std::move(diffs));
|
|
241
264
|
}
|
|
242
265
|
|
|
243
|
-
|
|
244
|
-
|
|
266
|
+
if (assertOnErr) {
|
|
267
|
+
EXPECT_LE(
|
|
268
|
+
(float)(diff1 + diffN + diffInf),
|
|
269
|
+
(float)numResults * pctMaxDiff1)
|
|
270
|
+
<< configMsg;
|
|
245
271
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
272
|
+
// Don't count diffInf because that could be diff1 as far as we
|
|
273
|
+
// know
|
|
274
|
+
EXPECT_LE((float)diffN, (float)numResults * pctMaxDiffN) << configMsg;
|
|
275
|
+
}
|
|
249
276
|
|
|
250
|
-
|
|
251
|
-
// know
|
|
252
|
-
EXPECT_LE((float) diffN, (float) numResults * pctMaxDiffN) << configMsg;
|
|
253
|
-
}
|
|
277
|
+
avgDiff /= (double)numResults;
|
|
254
278
|
|
|
255
|
-
|
|
279
|
+
if (printBasicStats) {
|
|
280
|
+
if (!configMsg.empty()) {
|
|
281
|
+
printf("Config\n"
|
|
282
|
+
"----------------------------\n"
|
|
283
|
+
"%s\n",
|
|
284
|
+
configMsg.c_str());
|
|
285
|
+
}
|
|
256
286
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
287
|
+
printf("Result error and differences\n"
|
|
288
|
+
"----------------------------\n"
|
|
289
|
+
"max abs diff %.7f rel diff %.7f\n"
|
|
290
|
+
"idx diff avg: %.5g max: %d\n"
|
|
291
|
+
"idx diff of 1: %d (%.3f%% of queries)\n"
|
|
292
|
+
"idx diff of >1: %d (%.3f%% of queries)\n"
|
|
293
|
+
"idx diff not found: %d (%.3f%% of queries)"
|
|
294
|
+
" [typically a last element inversion]\n"
|
|
295
|
+
"non-unique indices: %d (a serious error if >0)\n",
|
|
296
|
+
maxAbsErr,
|
|
297
|
+
maxRelErr,
|
|
298
|
+
avgDiff,
|
|
299
|
+
maxDiff,
|
|
300
|
+
diff1,
|
|
301
|
+
100.0f * (float)diff1 / (float)numResults,
|
|
302
|
+
diffN,
|
|
303
|
+
100.0f * (float)diffN / (float)numResults,
|
|
304
|
+
diffInf,
|
|
305
|
+
100.0f * (float)diffInf / (float)numResults,
|
|
306
|
+
nonUniqueIndices);
|
|
263
307
|
}
|
|
264
308
|
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
} else {
|
|
305
|
-
printf("(%d, %d [%d]) (ref %ld tst %ld abs %.8f "
|
|
306
|
-
"rel %.8f ref %a tst %a)\n",
|
|
307
|
-
query, result,
|
|
308
|
-
indexDiffs[query][result],
|
|
309
|
-
refI, testI, delta, relErr, refD, testD);
|
|
310
|
-
}
|
|
309
|
+
if (printDiffs) {
|
|
310
|
+
printf("differences:\n");
|
|
311
|
+
printf("==================\n");
|
|
312
|
+
for (int query = 0; query < dim1; ++query) {
|
|
313
|
+
for (int result = 0; result < dim2; ++result) {
|
|
314
|
+
long refI = lookup(refInd, query, result, dim1, dim2);
|
|
315
|
+
long testI = lookup(testInd, query, result, dim1, dim2);
|
|
316
|
+
|
|
317
|
+
if (refI != testI) {
|
|
318
|
+
float refD = lookup(refDist, query, result, dim1, dim2);
|
|
319
|
+
float testD = lookup(testDist, query, result, dim1, dim2);
|
|
320
|
+
|
|
321
|
+
float maxDist = std::max(refD, testD);
|
|
322
|
+
float delta = std::abs(refD - testD);
|
|
323
|
+
|
|
324
|
+
float relErr = delta / maxDist;
|
|
325
|
+
|
|
326
|
+
if (refD == testD) {
|
|
327
|
+
printf("(%d, %d [%d]) (ref %ld tst %ld dist ==)\n",
|
|
328
|
+
query,
|
|
329
|
+
result,
|
|
330
|
+
indexDiffs[query][result],
|
|
331
|
+
refI,
|
|
332
|
+
testI);
|
|
333
|
+
} else {
|
|
334
|
+
printf("(%d, %d [%d]) (ref %ld tst %ld abs %.8f "
|
|
335
|
+
"rel %.8f ref %a tst %a)\n",
|
|
336
|
+
query,
|
|
337
|
+
result,
|
|
338
|
+
indexDiffs[query][result],
|
|
339
|
+
refI,
|
|
340
|
+
testI,
|
|
341
|
+
delta,
|
|
342
|
+
relErr,
|
|
343
|
+
refD,
|
|
344
|
+
testD);
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
}
|
|
311
348
|
}
|
|
312
|
-
}
|
|
313
349
|
}
|
|
314
|
-
}
|
|
315
350
|
}
|
|
316
351
|
|
|
317
|
-
}
|
|
352
|
+
} // namespace gpu
|
|
353
|
+
} // namespace faiss
|