faiss 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +103 -3
- data/ext/faiss/ext.cpp +99 -32
- data/ext/faiss/extconf.rb +12 -2
- data/lib/faiss/ext.bundle +0 -0
- data/lib/faiss/index.rb +3 -3
- data/lib/faiss/index_binary.rb +3 -3
- data/lib/faiss/kmeans.rb +1 -1
- data/lib/faiss/pca_matrix.rb +2 -2
- data/lib/faiss/product_quantizer.rb +3 -3
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/AutoTune.cpp +719 -0
- data/vendor/faiss/AutoTune.h +212 -0
- data/vendor/faiss/Clustering.cpp +261 -0
- data/vendor/faiss/Clustering.h +101 -0
- data/vendor/faiss/IVFlib.cpp +339 -0
- data/vendor/faiss/IVFlib.h +132 -0
- data/vendor/faiss/Index.cpp +171 -0
- data/vendor/faiss/Index.h +261 -0
- data/vendor/faiss/Index2Layer.cpp +437 -0
- data/vendor/faiss/Index2Layer.h +85 -0
- data/vendor/faiss/IndexBinary.cpp +77 -0
- data/vendor/faiss/IndexBinary.h +163 -0
- data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
- data/vendor/faiss/IndexBinaryFlat.h +54 -0
- data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
- data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
- data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
- data/vendor/faiss/IndexBinaryHNSW.h +56 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
- data/vendor/faiss/IndexBinaryIVF.h +211 -0
- data/vendor/faiss/IndexFlat.cpp +508 -0
- data/vendor/faiss/IndexFlat.h +175 -0
- data/vendor/faiss/IndexHNSW.cpp +1090 -0
- data/vendor/faiss/IndexHNSW.h +170 -0
- data/vendor/faiss/IndexIVF.cpp +909 -0
- data/vendor/faiss/IndexIVF.h +353 -0
- data/vendor/faiss/IndexIVFFlat.cpp +502 -0
- data/vendor/faiss/IndexIVFFlat.h +118 -0
- data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
- data/vendor/faiss/IndexIVFPQ.h +161 -0
- data/vendor/faiss/IndexIVFPQR.cpp +219 -0
- data/vendor/faiss/IndexIVFPQR.h +65 -0
- data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
- data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
- data/vendor/faiss/IndexLSH.cpp +225 -0
- data/vendor/faiss/IndexLSH.h +87 -0
- data/vendor/faiss/IndexLattice.cpp +143 -0
- data/vendor/faiss/IndexLattice.h +68 -0
- data/vendor/faiss/IndexPQ.cpp +1188 -0
- data/vendor/faiss/IndexPQ.h +199 -0
- data/vendor/faiss/IndexPreTransform.cpp +288 -0
- data/vendor/faiss/IndexPreTransform.h +91 -0
- data/vendor/faiss/IndexReplicas.cpp +123 -0
- data/vendor/faiss/IndexReplicas.h +76 -0
- data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
- data/vendor/faiss/IndexScalarQuantizer.h +127 -0
- data/vendor/faiss/IndexShards.cpp +317 -0
- data/vendor/faiss/IndexShards.h +100 -0
- data/vendor/faiss/InvertedLists.cpp +623 -0
- data/vendor/faiss/InvertedLists.h +334 -0
- data/vendor/faiss/LICENSE +21 -0
- data/vendor/faiss/MatrixStats.cpp +252 -0
- data/vendor/faiss/MatrixStats.h +62 -0
- data/vendor/faiss/MetaIndexes.cpp +351 -0
- data/vendor/faiss/MetaIndexes.h +126 -0
- data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
- data/vendor/faiss/OnDiskInvertedLists.h +127 -0
- data/vendor/faiss/VectorTransform.cpp +1157 -0
- data/vendor/faiss/VectorTransform.h +322 -0
- data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
- data/vendor/faiss/c_api/AutoTune_c.h +64 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
- data/vendor/faiss/c_api/Clustering_c.h +117 -0
- data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
- data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
- data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
- data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
- data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
- data/vendor/faiss/c_api/IndexShards_c.h +42 -0
- data/vendor/faiss/c_api/Index_c.cpp +105 -0
- data/vendor/faiss/c_api/Index_c.h +183 -0
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
- data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
- data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
- data/vendor/faiss/c_api/clone_index_c.h +32 -0
- data/vendor/faiss/c_api/error_c.h +42 -0
- data/vendor/faiss/c_api/error_impl.cpp +27 -0
- data/vendor/faiss/c_api/error_impl.h +16 -0
- data/vendor/faiss/c_api/faiss_c.h +58 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
- data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
- data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
- data/vendor/faiss/c_api/index_factory_c.h +30 -0
- data/vendor/faiss/c_api/index_io_c.cpp +42 -0
- data/vendor/faiss/c_api/index_io_c.h +50 -0
- data/vendor/faiss/c_api/macros_impl.h +110 -0
- data/vendor/faiss/clone_index.cpp +147 -0
- data/vendor/faiss/clone_index.h +38 -0
- data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
- data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
- data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
- data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
- data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
- data/vendor/faiss/gpu/GpuCloner.h +82 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
- data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
- data/vendor/faiss/gpu/GpuDistance.h +52 -0
- data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
- data/vendor/faiss/gpu/GpuIndex.h +148 -0
- data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
- data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
- data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
- data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
- data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
- data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
- data/vendor/faiss/gpu/GpuResources.cpp +52 -0
- data/vendor/faiss/gpu/GpuResources.h +73 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
- data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
- data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
- data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
- data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
- data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
- data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
- data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
- data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
- data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
- data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
- data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
- data/vendor/faiss/gpu/test/TestUtils.h +93 -0
- data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
- data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
- data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
- data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
- data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
- data/vendor/faiss/gpu/utils/Timer.h +52 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
- data/vendor/faiss/impl/FaissAssert.h +95 -0
- data/vendor/faiss/impl/FaissException.cpp +66 -0
- data/vendor/faiss/impl/FaissException.h +71 -0
- data/vendor/faiss/impl/HNSW.cpp +818 -0
- data/vendor/faiss/impl/HNSW.h +275 -0
- data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
- data/vendor/faiss/impl/PolysemousTraining.h +158 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
- data/vendor/faiss/impl/ProductQuantizer.h +242 -0
- data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
- data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
- data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
- data/vendor/faiss/impl/ThreadedIndex.h +80 -0
- data/vendor/faiss/impl/index_read.cpp +793 -0
- data/vendor/faiss/impl/index_write.cpp +558 -0
- data/vendor/faiss/impl/io.cpp +142 -0
- data/vendor/faiss/impl/io.h +98 -0
- data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
- data/vendor/faiss/impl/lattice_Zn.h +199 -0
- data/vendor/faiss/index_factory.cpp +392 -0
- data/vendor/faiss/index_factory.h +25 -0
- data/vendor/faiss/index_io.h +75 -0
- data/vendor/faiss/misc/test_blas.cpp +84 -0
- data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
- data/vendor/faiss/tests/test_merge.cpp +258 -0
- data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
- data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
- data/vendor/faiss/tests/test_params_override.cpp +231 -0
- data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
- data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
- data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
- data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
- data/vendor/faiss/utils/Heap.cpp +122 -0
- data/vendor/faiss/utils/Heap.h +495 -0
- data/vendor/faiss/utils/WorkerThread.cpp +126 -0
- data/vendor/faiss/utils/WorkerThread.h +61 -0
- data/vendor/faiss/utils/distances.cpp +765 -0
- data/vendor/faiss/utils/distances.h +243 -0
- data/vendor/faiss/utils/distances_simd.cpp +809 -0
- data/vendor/faiss/utils/extra_distances.cpp +336 -0
- data/vendor/faiss/utils/extra_distances.h +54 -0
- data/vendor/faiss/utils/hamming-inl.h +472 -0
- data/vendor/faiss/utils/hamming.cpp +792 -0
- data/vendor/faiss/utils/hamming.h +220 -0
- data/vendor/faiss/utils/random.cpp +192 -0
- data/vendor/faiss/utils/random.h +60 -0
- data/vendor/faiss/utils/utils.cpp +783 -0
- data/vendor/faiss/utils/utils.h +181 -0
- metadata +216 -2
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
#include <faiss/gpu/test/TestUtils.h>
|
|
10
|
+
#include <faiss/utils/random.h>
|
|
11
|
+
#include <cmath>
|
|
12
|
+
#include <gtest/gtest.h>
|
|
13
|
+
#include <set>
|
|
14
|
+
#include <sstream>
|
|
15
|
+
#include <time.h>
|
|
16
|
+
#include <unordered_map>
|
|
17
|
+
|
|
18
|
+
namespace faiss { namespace gpu {
|
|
19
|
+
|
|
20
|
+
inline float relativeError(float a, float b) {
|
|
21
|
+
return std::abs(a - b) / (0.5f * (std::abs(a) + std::abs(b)));
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// This seed is also used for the faiss float_rand API; in a test it
|
|
25
|
+
// is all within a single thread, so it is ok
|
|
26
|
+
long s_seed = 1;
|
|
27
|
+
|
|
28
|
+
void newTestSeed() {
|
|
29
|
+
struct timespec t;
|
|
30
|
+
clock_gettime(CLOCK_REALTIME, &t);
|
|
31
|
+
|
|
32
|
+
setTestSeed(t.tv_nsec);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
void setTestSeed(long seed) {
|
|
36
|
+
printf("testing with random seed %ld\n", seed);
|
|
37
|
+
|
|
38
|
+
srand48(seed);
|
|
39
|
+
s_seed = seed;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
int randVal(int a, int b) {
|
|
43
|
+
EXPECT_GE(a, 0);
|
|
44
|
+
EXPECT_LE(a, b);
|
|
45
|
+
|
|
46
|
+
return a + (lrand48() % (b + 1 - a));
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
bool randBool() {
|
|
50
|
+
return randSelect<bool>({true, false});
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
std::vector<float> randVecs(size_t num, size_t dim) {
|
|
54
|
+
std::vector<float> v(num * dim);
|
|
55
|
+
|
|
56
|
+
faiss::float_rand(v.data(), v.size(), s_seed);
|
|
57
|
+
// unfortunately we generate separate sets of vectors, and don't
|
|
58
|
+
// want the same values
|
|
59
|
+
++s_seed;
|
|
60
|
+
|
|
61
|
+
return v;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
std::vector<unsigned char> randBinaryVecs(size_t num, size_t dim) {
|
|
65
|
+
std::vector<unsigned char> v(num * (dim / 8));
|
|
66
|
+
|
|
67
|
+
faiss::byte_rand(v.data(), v.size(), s_seed);
|
|
68
|
+
// unfortunately we generate separate sets of vectors, and don't
|
|
69
|
+
// want the same values
|
|
70
|
+
++s_seed;
|
|
71
|
+
|
|
72
|
+
return v;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
void compareIndices(
|
|
76
|
+
const std::vector<float>& queryVecs,
|
|
77
|
+
faiss::Index& refIndex,
|
|
78
|
+
faiss::Index& testIndex,
|
|
79
|
+
int numQuery,
|
|
80
|
+
int /*dim*/,
|
|
81
|
+
int k,
|
|
82
|
+
const std::string& configMsg,
|
|
83
|
+
float maxRelativeError,
|
|
84
|
+
float pctMaxDiff1,
|
|
85
|
+
float pctMaxDiffN) {
|
|
86
|
+
// Compare
|
|
87
|
+
std::vector<float> refDistance(numQuery * k, 0);
|
|
88
|
+
std::vector<faiss::Index::idx_t> refIndices(numQuery * k, -1);
|
|
89
|
+
refIndex.search(numQuery, queryVecs.data(),
|
|
90
|
+
k, refDistance.data(), refIndices.data());
|
|
91
|
+
|
|
92
|
+
std::vector<float> testDistance(numQuery * k, 0);
|
|
93
|
+
std::vector<faiss::Index::idx_t> testIndices(numQuery * k, -1);
|
|
94
|
+
testIndex.search(numQuery, queryVecs.data(),
|
|
95
|
+
k, testDistance.data(), testIndices.data());
|
|
96
|
+
|
|
97
|
+
faiss::gpu::compareLists(refDistance.data(),
|
|
98
|
+
refIndices.data(),
|
|
99
|
+
testDistance.data(),
|
|
100
|
+
testIndices.data(),
|
|
101
|
+
numQuery, k,
|
|
102
|
+
configMsg,
|
|
103
|
+
true, false, true,
|
|
104
|
+
maxRelativeError, pctMaxDiff1, pctMaxDiffN);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
void compareIndices(faiss::Index& refIndex,
|
|
108
|
+
faiss::Index& testIndex,
|
|
109
|
+
int numQuery, int dim, int k,
|
|
110
|
+
const std::string& configMsg,
|
|
111
|
+
float maxRelativeError,
|
|
112
|
+
float pctMaxDiff1,
|
|
113
|
+
float pctMaxDiffN) {
|
|
114
|
+
auto queryVecs = faiss::gpu::randVecs(numQuery, dim);
|
|
115
|
+
|
|
116
|
+
compareIndices(queryVecs,
|
|
117
|
+
refIndex,
|
|
118
|
+
testIndex,
|
|
119
|
+
numQuery, dim, k,
|
|
120
|
+
configMsg,
|
|
121
|
+
maxRelativeError,
|
|
122
|
+
pctMaxDiff1,
|
|
123
|
+
pctMaxDiffN);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
template <typename T>
|
|
127
|
+
inline T lookup(const T* p, int i, int j, int /*dim1*/, int dim2) {
|
|
128
|
+
return p[i * dim2 + j];
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
void compareLists(const float* refDist,
|
|
132
|
+
const faiss::Index::idx_t* refInd,
|
|
133
|
+
const float* testDist,
|
|
134
|
+
const faiss::Index::idx_t* testInd,
|
|
135
|
+
int dim1, int dim2,
|
|
136
|
+
const std::string& configMsg,
|
|
137
|
+
bool printBasicStats, bool printDiffs, bool assertOnErr,
|
|
138
|
+
float maxRelativeError,
|
|
139
|
+
float pctMaxDiff1,
|
|
140
|
+
float pctMaxDiffN) {
|
|
141
|
+
|
|
142
|
+
float maxAbsErr = 0.0f;
|
|
143
|
+
for (int i = 0; i < dim1 * dim2; ++i) {
|
|
144
|
+
maxAbsErr = std::max(maxAbsErr, std::abs(refDist[i] - testDist[i]));
|
|
145
|
+
}
|
|
146
|
+
int numResults = dim1 * dim2;
|
|
147
|
+
|
|
148
|
+
// query -> {index -> result position}
|
|
149
|
+
std::vector<std::unordered_map<faiss::Index::idx_t, int>> refIndexMap;
|
|
150
|
+
|
|
151
|
+
for (int query = 0; query < dim1; ++query) {
|
|
152
|
+
std::unordered_map<faiss::Index::idx_t, int> indices;
|
|
153
|
+
|
|
154
|
+
for (int result = 0; result < dim2; ++result) {
|
|
155
|
+
indices[lookup(refInd, query, result, dim1, dim2)] = result;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
refIndexMap.emplace_back(std::move(indices));
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// See how far off the indices are
|
|
162
|
+
// Keep track of the difference for each entry
|
|
163
|
+
std::vector<std::vector<int>> indexDiffs;
|
|
164
|
+
|
|
165
|
+
int diff1 = 0; // index differs by 1
|
|
166
|
+
int diffN = 0; // index differs by >1
|
|
167
|
+
int diffInf = 0; // index not found in the other
|
|
168
|
+
int nonUniqueIndices = 0;
|
|
169
|
+
|
|
170
|
+
double avgDiff = 0.0;
|
|
171
|
+
int maxDiff = 0;
|
|
172
|
+
float maxRelErr = 0.0f;
|
|
173
|
+
|
|
174
|
+
for (int query = 0; query < dim1; ++query) {
|
|
175
|
+
std::vector<int> diffs;
|
|
176
|
+
std::set<faiss::Index::idx_t> uniqueIndices;
|
|
177
|
+
|
|
178
|
+
auto& indices = refIndexMap[query];
|
|
179
|
+
|
|
180
|
+
for (int result = 0; result < dim2; ++result) {
|
|
181
|
+
auto t = lookup(testInd, query, result, dim1, dim2);
|
|
182
|
+
|
|
183
|
+
// All indices reported within a query should be unique; this is
|
|
184
|
+
// a serious error if is otherwise the case.
|
|
185
|
+
// If -1 is reported (no result due to IVF partitioning or not enough
|
|
186
|
+
// entries in the index), then duplicates are allowed, but both the
|
|
187
|
+
// reference and test must have -1 in the same position.
|
|
188
|
+
if (t == -1) {
|
|
189
|
+
EXPECT_EQ(lookup(refInd, query, result, dim1, dim2), t);
|
|
190
|
+
} else {
|
|
191
|
+
bool uniqueIndex = uniqueIndices.count(t) == 0;
|
|
192
|
+
if (assertOnErr) {
|
|
193
|
+
EXPECT_TRUE(uniqueIndex) << configMsg
|
|
194
|
+
<< " " << query
|
|
195
|
+
<< " " << result
|
|
196
|
+
<< " " << t;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
if (!uniqueIndex) {
|
|
200
|
+
++nonUniqueIndices;
|
|
201
|
+
} else {
|
|
202
|
+
uniqueIndices.insert(t);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
auto it = indices.find(t);
|
|
206
|
+
if (it != indices.end()) {
|
|
207
|
+
int diff = std::abs(result - it->second);
|
|
208
|
+
diffs.push_back(diff);
|
|
209
|
+
|
|
210
|
+
if (diff == 1) {
|
|
211
|
+
++diff1;
|
|
212
|
+
maxDiff = std::max(diff, maxDiff);
|
|
213
|
+
} else if (diff > 1) {
|
|
214
|
+
++diffN;
|
|
215
|
+
maxDiff = std::max(diff, maxDiff);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
avgDiff += (double) diff;
|
|
219
|
+
} else {
|
|
220
|
+
++diffInf;
|
|
221
|
+
diffs.push_back(-1);
|
|
222
|
+
// don't count this for maxDiff
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
auto refD = lookup(refDist, query, result, dim1, dim2);
|
|
227
|
+
auto testD = lookup(testDist, query, result, dim1, dim2);
|
|
228
|
+
|
|
229
|
+
float relErr = relativeError(refD, testD);
|
|
230
|
+
|
|
231
|
+
if (assertOnErr) {
|
|
232
|
+
EXPECT_LE(relErr, maxRelativeError) << configMsg
|
|
233
|
+
<< " (" << query << ", " << result
|
|
234
|
+
<< ") refD: " << refD
|
|
235
|
+
<< " testD: " << testD;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
maxRelErr = std::max(maxRelErr, relErr);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
indexDiffs.emplace_back(std::move(diffs));
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
if (assertOnErr) {
|
|
245
|
+
EXPECT_LE((float) (diff1 + diffN + diffInf),
|
|
246
|
+
(float) numResults * pctMaxDiff1) << configMsg;
|
|
247
|
+
|
|
248
|
+
// Don't count diffInf because that could be diff1 as far as we
|
|
249
|
+
// know
|
|
250
|
+
EXPECT_LE((float) diffN, (float) numResults * pctMaxDiffN) << configMsg;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
avgDiff /= (double) numResults;
|
|
254
|
+
|
|
255
|
+
if (printBasicStats) {
|
|
256
|
+
if (!configMsg.empty()) {
|
|
257
|
+
printf("Config\n"
|
|
258
|
+
"----------------------------\n"
|
|
259
|
+
"%s\n",
|
|
260
|
+
configMsg.c_str());
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
printf("Result error and differences\n"
|
|
264
|
+
"----------------------------\n"
|
|
265
|
+
"max abs diff %.7f rel diff %.7f\n"
|
|
266
|
+
"idx diff avg: %.5g max: %d\n"
|
|
267
|
+
"idx diff of 1: %d (%.3f%% of queries)\n"
|
|
268
|
+
"idx diff of >1: %d (%.3f%% of queries)\n"
|
|
269
|
+
"idx diff not found: %d (%.3f%% of queries)"
|
|
270
|
+
" [typically a last element inversion]\n"
|
|
271
|
+
"non-unique indices: %d (a serious error if >0)\n",
|
|
272
|
+
maxAbsErr, maxRelErr,
|
|
273
|
+
avgDiff, maxDiff,
|
|
274
|
+
diff1, 100.0f * (float) diff1 / (float) numResults,
|
|
275
|
+
diffN, 100.0f * (float) diffN / (float) numResults,
|
|
276
|
+
diffInf, 100.0f * (float) diffInf / (float) numResults,
|
|
277
|
+
nonUniqueIndices);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
if (printDiffs) {
|
|
281
|
+
printf("differences:\n");
|
|
282
|
+
printf("==================\n");
|
|
283
|
+
for (int query = 0; query < dim1; ++query) {
|
|
284
|
+
for (int result = 0; result < dim2; ++result) {
|
|
285
|
+
long refI = lookup(refInd, query, result, dim1, dim2);
|
|
286
|
+
long testI = lookup(testInd, query, result, dim1, dim2);
|
|
287
|
+
|
|
288
|
+
if (refI != testI) {
|
|
289
|
+
float refD = lookup(refDist, query, result, dim1, dim2);
|
|
290
|
+
float testD = lookup(testDist, query, result, dim1, dim2);
|
|
291
|
+
|
|
292
|
+
float maxDist = std::max(refD, testD);
|
|
293
|
+
float delta = std::abs(refD - testD);
|
|
294
|
+
|
|
295
|
+
float relErr = delta / maxDist;
|
|
296
|
+
|
|
297
|
+
if (refD == testD) {
|
|
298
|
+
printf("(%d, %d [%d]) (ref %ld tst %ld dist ==)\n",
|
|
299
|
+
query, result,
|
|
300
|
+
indexDiffs[query][result],
|
|
301
|
+
refI, testI);
|
|
302
|
+
} else {
|
|
303
|
+
printf("(%d, %d [%d]) (ref %ld tst %ld abs %.8f "
|
|
304
|
+
"rel %.8f ref %a tst %a)\n",
|
|
305
|
+
query, result,
|
|
306
|
+
indexDiffs[query][result],
|
|
307
|
+
refI, testI, delta, relErr, refD, testD);
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
} }
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
#pragma once
|
|
10
|
+
|
|
11
|
+
#include <faiss/impl/FaissAssert.h>
|
|
12
|
+
#include <faiss/Index.h>
|
|
13
|
+
#include <initializer_list>
|
|
14
|
+
#include <memory>
|
|
15
|
+
#include <string>
|
|
16
|
+
#include <vector>
|
|
17
|
+
|
|
18
|
+
namespace faiss { namespace gpu {
|
|
19
|
+
|
|
20
|
+
/// Generates and displays a new seed for the test
|
|
21
|
+
void newTestSeed();
|
|
22
|
+
|
|
23
|
+
/// Uses an explicit seed for the test
|
|
24
|
+
void setTestSeed(long seed);
|
|
25
|
+
|
|
26
|
+
/// Returns the relative error in difference between a and b
|
|
27
|
+
/// (|a - b| / (0.5 * (|a| + |b|))
|
|
28
|
+
float relativeError(float a, float b);
|
|
29
|
+
|
|
30
|
+
/// Generates a random integer in the range [a, b]
|
|
31
|
+
int randVal(int a, int b);
|
|
32
|
+
|
|
33
|
+
/// Generates a random bool
|
|
34
|
+
bool randBool();
|
|
35
|
+
|
|
36
|
+
/// Select a random value from the given list of values provided as an
|
|
37
|
+
/// initializer_list
|
|
38
|
+
template <typename T>
|
|
39
|
+
T randSelect(std::initializer_list<T> vals) {
|
|
40
|
+
FAISS_ASSERT(vals.size() > 0);
|
|
41
|
+
int sel = randVal(0, vals.size());
|
|
42
|
+
|
|
43
|
+
int i = 0;
|
|
44
|
+
for (auto v : vals) {
|
|
45
|
+
if (i++ == sel) {
|
|
46
|
+
return v;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// should not get here
|
|
51
|
+
return *vals.begin();
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/// Generates a collection of random vectors in the range [0, 1]
|
|
55
|
+
std::vector<float> randVecs(size_t num, size_t dim);
|
|
56
|
+
|
|
57
|
+
/// Generates a collection of random bit vectors
|
|
58
|
+
std::vector<unsigned char> randBinaryVecs(size_t num, size_t dim);
|
|
59
|
+
|
|
60
|
+
/// Compare two indices via query for similarity, with a user-specified set of
|
|
61
|
+
/// query vectors
|
|
62
|
+
void compareIndices(const std::vector<float>& queryVecs,
|
|
63
|
+
faiss::Index& refIndex,
|
|
64
|
+
faiss::Index& testIndex,
|
|
65
|
+
int numQuery, int dim, int k,
|
|
66
|
+
const std::string& configMsg,
|
|
67
|
+
float maxRelativeError = 6e-5f,
|
|
68
|
+
float pctMaxDiff1 = 0.1f,
|
|
69
|
+
float pctMaxDiffN = 0.005f);
|
|
70
|
+
|
|
71
|
+
/// Compare two indices via query for similarity, generating random query
|
|
72
|
+
/// vectors
|
|
73
|
+
void compareIndices(faiss::Index& refIndex,
|
|
74
|
+
faiss::Index& testIndex,
|
|
75
|
+
int numQuery, int dim, int k,
|
|
76
|
+
const std::string& configMsg,
|
|
77
|
+
float maxRelativeError = 6e-5f,
|
|
78
|
+
float pctMaxDiff1 = 0.1f,
|
|
79
|
+
float pctMaxDiffN = 0.005f);
|
|
80
|
+
|
|
81
|
+
/// Display specific differences in the two (distance, index) lists
|
|
82
|
+
void compareLists(const float* refDist,
|
|
83
|
+
const faiss::Index::idx_t* refInd,
|
|
84
|
+
const float* testDist,
|
|
85
|
+
const faiss::Index::idx_t* testInd,
|
|
86
|
+
int dim1, int dim2,
|
|
87
|
+
const std::string& configMsg,
|
|
88
|
+
bool printBasicStats, bool printDiffs, bool assertOnErr,
|
|
89
|
+
float maxRelativeError = 6e-5f,
|
|
90
|
+
float pctMaxDiff1 = 0.1f,
|
|
91
|
+
float pctMaxDiffN = 0.005f);
|
|
92
|
+
|
|
93
|
+
} }
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// Copyright 2004-present Facebook. All Rights Reserved
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
#include <cmath>
|
|
12
|
+
#include <cstdio>
|
|
13
|
+
#include <cstdlib>
|
|
14
|
+
|
|
15
|
+
#include <sys/time.h>
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
#include <faiss/gpu/StandardGpuResources.h>
|
|
19
|
+
#include <faiss/gpu/GpuIndexIVFPQ.h>
|
|
20
|
+
|
|
21
|
+
#include <faiss/gpu/GpuAutoTune.h>
|
|
22
|
+
#include <faiss/index_io.h>
|
|
23
|
+
|
|
24
|
+
double elapsed ()
|
|
25
|
+
{
|
|
26
|
+
struct timeval tv;
|
|
27
|
+
gettimeofday (&tv, NULL);
|
|
28
|
+
return tv.tv_sec + tv.tv_usec * 1e-6;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
int main ()
|
|
33
|
+
{
|
|
34
|
+
|
|
35
|
+
double t0 = elapsed();
|
|
36
|
+
|
|
37
|
+
// dimension of the vectors to index
|
|
38
|
+
int d = 128;
|
|
39
|
+
|
|
40
|
+
// size of the database we plan to index
|
|
41
|
+
size_t nb = 200 * 1000;
|
|
42
|
+
|
|
43
|
+
// make a set of nt training vectors in the unit cube
|
|
44
|
+
// (could be the database)
|
|
45
|
+
size_t nt = 100 * 1000;
|
|
46
|
+
|
|
47
|
+
int dev_no = 0;
|
|
48
|
+
/*
|
|
49
|
+
printf ("[%.3f s] Begin d=%d nb=%ld nt=%nt dev_no=%d\n",
|
|
50
|
+
elapsed() - t0, d, nb, nt, dev_no);
|
|
51
|
+
*/
|
|
52
|
+
// a reasonable number of centroids to index nb vectors
|
|
53
|
+
int ncentroids = int (4 * sqrt (nb));
|
|
54
|
+
|
|
55
|
+
faiss::gpu::StandardGpuResources resources;
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
// the coarse quantizer should not be dealloced before the index
|
|
59
|
+
// 4 = nb of bytes per code (d must be a multiple of this)
|
|
60
|
+
// 8 = nb of bits per sub-code (almost always 8)
|
|
61
|
+
faiss::gpu::GpuIndexIVFPQConfig config;
|
|
62
|
+
config.device = dev_no;
|
|
63
|
+
|
|
64
|
+
faiss::gpu::GpuIndexIVFPQ index (
|
|
65
|
+
&resources, d, ncentroids, 4, 8, faiss::METRIC_L2, config);
|
|
66
|
+
|
|
67
|
+
{ // training
|
|
68
|
+
printf ("[%.3f s] Generating %ld vectors in %dD for training\n",
|
|
69
|
+
elapsed() - t0, nt, d);
|
|
70
|
+
|
|
71
|
+
std::vector <float> trainvecs (nt * d);
|
|
72
|
+
for (size_t i = 0; i < nt * d; i++) {
|
|
73
|
+
trainvecs[i] = drand48();
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
printf ("[%.3f s] Training the index\n",
|
|
77
|
+
elapsed() - t0);
|
|
78
|
+
index.verbose = true;
|
|
79
|
+
|
|
80
|
+
index.train (nt, trainvecs.data());
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
{ // I/O demo
|
|
84
|
+
const char *outfilename = "/tmp/index_trained.faissindex";
|
|
85
|
+
printf ("[%.3f s] storing the pre-trained index to %s\n",
|
|
86
|
+
elapsed() - t0, outfilename);
|
|
87
|
+
|
|
88
|
+
faiss::Index * cpu_index = faiss::gpu::index_gpu_to_cpu (&index);
|
|
89
|
+
|
|
90
|
+
write_index (cpu_index, outfilename);
|
|
91
|
+
|
|
92
|
+
delete cpu_index;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
size_t nq;
|
|
96
|
+
std::vector<float> queries;
|
|
97
|
+
|
|
98
|
+
{ // populating the database
|
|
99
|
+
printf ("[%.3f s] Building a dataset of %ld vectors to index\n",
|
|
100
|
+
elapsed() - t0, nb);
|
|
101
|
+
|
|
102
|
+
std::vector <float> database (nb * d);
|
|
103
|
+
for (size_t i = 0; i < nb * d; i++) {
|
|
104
|
+
database[i] = drand48();
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
printf ("[%.3f s] Adding the vectors to the index\n",
|
|
108
|
+
elapsed() - t0);
|
|
109
|
+
|
|
110
|
+
index.add (nb, database.data());
|
|
111
|
+
|
|
112
|
+
printf ("[%.3f s] done\n", elapsed() - t0);
|
|
113
|
+
|
|
114
|
+
// remember a few elements from the database as queries
|
|
115
|
+
int i0 = 1234;
|
|
116
|
+
int i1 = 1243;
|
|
117
|
+
|
|
118
|
+
nq = i1 - i0;
|
|
119
|
+
queries.resize (nq * d);
|
|
120
|
+
for (int i = i0; i < i1; i++) {
|
|
121
|
+
for (int j = 0; j < d; j++) {
|
|
122
|
+
queries [(i - i0) * d + j] = database [i * d + j];
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
{ // searching the database
|
|
129
|
+
int k = 5;
|
|
130
|
+
printf ("[%.3f s] Searching the %d nearest neighbors "
|
|
131
|
+
"of %ld vectors in the index\n",
|
|
132
|
+
elapsed() - t0, k, nq);
|
|
133
|
+
|
|
134
|
+
std::vector<faiss::Index::idx_t> nns (k * nq);
|
|
135
|
+
std::vector<float> dis (k * nq);
|
|
136
|
+
|
|
137
|
+
index.search (nq, queries.data(), k, dis.data(), nns.data());
|
|
138
|
+
|
|
139
|
+
printf ("[%.3f s] Query results (vector ids, then distances):\n",
|
|
140
|
+
elapsed() - t0);
|
|
141
|
+
|
|
142
|
+
for (int i = 0; i < nq; i++) {
|
|
143
|
+
printf ("query %2d: ", i);
|
|
144
|
+
for (int j = 0; j < k; j++) {
|
|
145
|
+
printf ("%7ld ", nns[j + i * k]);
|
|
146
|
+
}
|
|
147
|
+
printf ("\n dis: ");
|
|
148
|
+
for (int j = 0; j < k; j++) {
|
|
149
|
+
printf ("%7g ", dis[j + i * k]);
|
|
150
|
+
}
|
|
151
|
+
printf ("\n");
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
printf ("note that the nearest neighbor is not at "
|
|
155
|
+
"distance 0 due to quantization errors\n");
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
return 0;
|
|
159
|
+
}
|