faiss 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,315 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/gpu/test/TestUtils.h>
10
+ #include <faiss/utils/random.h>
11
+ #include <cmath>
12
+ #include <gtest/gtest.h>
13
+ #include <set>
14
+ #include <sstream>
15
+ #include <time.h>
16
+ #include <unordered_map>
17
+
18
+ namespace faiss { namespace gpu {
19
+
20
+ inline float relativeError(float a, float b) {
21
+ return std::abs(a - b) / (0.5f * (std::abs(a) + std::abs(b)));
22
+ }
23
+
24
+ // This seed is also used for the faiss float_rand API; in a test it
25
+ // is all within a single thread, so it is ok
26
+ long s_seed = 1;
27
+
28
+ void newTestSeed() {
29
+ struct timespec t;
30
+ clock_gettime(CLOCK_REALTIME, &t);
31
+
32
+ setTestSeed(t.tv_nsec);
33
+ }
34
+
35
+ void setTestSeed(long seed) {
36
+ printf("testing with random seed %ld\n", seed);
37
+
38
+ srand48(seed);
39
+ s_seed = seed;
40
+ }
41
+
42
+ int randVal(int a, int b) {
43
+ EXPECT_GE(a, 0);
44
+ EXPECT_LE(a, b);
45
+
46
+ return a + (lrand48() % (b + 1 - a));
47
+ }
48
+
49
+ bool randBool() {
50
+ return randSelect<bool>({true, false});
51
+ }
52
+
53
+ std::vector<float> randVecs(size_t num, size_t dim) {
54
+ std::vector<float> v(num * dim);
55
+
56
+ faiss::float_rand(v.data(), v.size(), s_seed);
57
+ // unfortunately we generate separate sets of vectors, and don't
58
+ // want the same values
59
+ ++s_seed;
60
+
61
+ return v;
62
+ }
63
+
64
+ std::vector<unsigned char> randBinaryVecs(size_t num, size_t dim) {
65
+ std::vector<unsigned char> v(num * (dim / 8));
66
+
67
+ faiss::byte_rand(v.data(), v.size(), s_seed);
68
+ // unfortunately we generate separate sets of vectors, and don't
69
+ // want the same values
70
+ ++s_seed;
71
+
72
+ return v;
73
+ }
74
+
75
+ void compareIndices(
76
+ const std::vector<float>& queryVecs,
77
+ faiss::Index& refIndex,
78
+ faiss::Index& testIndex,
79
+ int numQuery,
80
+ int /*dim*/,
81
+ int k,
82
+ const std::string& configMsg,
83
+ float maxRelativeError,
84
+ float pctMaxDiff1,
85
+ float pctMaxDiffN) {
86
+ // Compare
87
+ std::vector<float> refDistance(numQuery * k, 0);
88
+ std::vector<faiss::Index::idx_t> refIndices(numQuery * k, -1);
89
+ refIndex.search(numQuery, queryVecs.data(),
90
+ k, refDistance.data(), refIndices.data());
91
+
92
+ std::vector<float> testDistance(numQuery * k, 0);
93
+ std::vector<faiss::Index::idx_t> testIndices(numQuery * k, -1);
94
+ testIndex.search(numQuery, queryVecs.data(),
95
+ k, testDistance.data(), testIndices.data());
96
+
97
+ faiss::gpu::compareLists(refDistance.data(),
98
+ refIndices.data(),
99
+ testDistance.data(),
100
+ testIndices.data(),
101
+ numQuery, k,
102
+ configMsg,
103
+ true, false, true,
104
+ maxRelativeError, pctMaxDiff1, pctMaxDiffN);
105
+ }
106
+
107
+ void compareIndices(faiss::Index& refIndex,
108
+ faiss::Index& testIndex,
109
+ int numQuery, int dim, int k,
110
+ const std::string& configMsg,
111
+ float maxRelativeError,
112
+ float pctMaxDiff1,
113
+ float pctMaxDiffN) {
114
+ auto queryVecs = faiss::gpu::randVecs(numQuery, dim);
115
+
116
+ compareIndices(queryVecs,
117
+ refIndex,
118
+ testIndex,
119
+ numQuery, dim, k,
120
+ configMsg,
121
+ maxRelativeError,
122
+ pctMaxDiff1,
123
+ pctMaxDiffN);
124
+ }
125
+
126
+ template <typename T>
127
+ inline T lookup(const T* p, int i, int j, int /*dim1*/, int dim2) {
128
+ return p[i * dim2 + j];
129
+ }
130
+
131
+ void compareLists(const float* refDist,
132
+ const faiss::Index::idx_t* refInd,
133
+ const float* testDist,
134
+ const faiss::Index::idx_t* testInd,
135
+ int dim1, int dim2,
136
+ const std::string& configMsg,
137
+ bool printBasicStats, bool printDiffs, bool assertOnErr,
138
+ float maxRelativeError,
139
+ float pctMaxDiff1,
140
+ float pctMaxDiffN) {
141
+
142
+ float maxAbsErr = 0.0f;
143
+ for (int i = 0; i < dim1 * dim2; ++i) {
144
+ maxAbsErr = std::max(maxAbsErr, std::abs(refDist[i] - testDist[i]));
145
+ }
146
+ int numResults = dim1 * dim2;
147
+
148
+ // query -> {index -> result position}
149
+ std::vector<std::unordered_map<faiss::Index::idx_t, int>> refIndexMap;
150
+
151
+ for (int query = 0; query < dim1; ++query) {
152
+ std::unordered_map<faiss::Index::idx_t, int> indices;
153
+
154
+ for (int result = 0; result < dim2; ++result) {
155
+ indices[lookup(refInd, query, result, dim1, dim2)] = result;
156
+ }
157
+
158
+ refIndexMap.emplace_back(std::move(indices));
159
+ }
160
+
161
+ // See how far off the indices are
162
+ // Keep track of the difference for each entry
163
+ std::vector<std::vector<int>> indexDiffs;
164
+
165
+ int diff1 = 0; // index differs by 1
166
+ int diffN = 0; // index differs by >1
167
+ int diffInf = 0; // index not found in the other
168
+ int nonUniqueIndices = 0;
169
+
170
+ double avgDiff = 0.0;
171
+ int maxDiff = 0;
172
+ float maxRelErr = 0.0f;
173
+
174
+ for (int query = 0; query < dim1; ++query) {
175
+ std::vector<int> diffs;
176
+ std::set<faiss::Index::idx_t> uniqueIndices;
177
+
178
+ auto& indices = refIndexMap[query];
179
+
180
+ for (int result = 0; result < dim2; ++result) {
181
+ auto t = lookup(testInd, query, result, dim1, dim2);
182
+
183
+ // All indices reported within a query should be unique; this is
184
+ // a serious error if is otherwise the case.
185
+ // If -1 is reported (no result due to IVF partitioning or not enough
186
+ // entries in the index), then duplicates are allowed, but both the
187
+ // reference and test must have -1 in the same position.
188
+ if (t == -1) {
189
+ EXPECT_EQ(lookup(refInd, query, result, dim1, dim2), t);
190
+ } else {
191
+ bool uniqueIndex = uniqueIndices.count(t) == 0;
192
+ if (assertOnErr) {
193
+ EXPECT_TRUE(uniqueIndex) << configMsg
194
+ << " " << query
195
+ << " " << result
196
+ << " " << t;
197
+ }
198
+
199
+ if (!uniqueIndex) {
200
+ ++nonUniqueIndices;
201
+ } else {
202
+ uniqueIndices.insert(t);
203
+ }
204
+
205
+ auto it = indices.find(t);
206
+ if (it != indices.end()) {
207
+ int diff = std::abs(result - it->second);
208
+ diffs.push_back(diff);
209
+
210
+ if (diff == 1) {
211
+ ++diff1;
212
+ maxDiff = std::max(diff, maxDiff);
213
+ } else if (diff > 1) {
214
+ ++diffN;
215
+ maxDiff = std::max(diff, maxDiff);
216
+ }
217
+
218
+ avgDiff += (double) diff;
219
+ } else {
220
+ ++diffInf;
221
+ diffs.push_back(-1);
222
+ // don't count this for maxDiff
223
+ }
224
+ }
225
+
226
+ auto refD = lookup(refDist, query, result, dim1, dim2);
227
+ auto testD = lookup(testDist, query, result, dim1, dim2);
228
+
229
+ float relErr = relativeError(refD, testD);
230
+
231
+ if (assertOnErr) {
232
+ EXPECT_LE(relErr, maxRelativeError) << configMsg
233
+ << " (" << query << ", " << result
234
+ << ") refD: " << refD
235
+ << " testD: " << testD;
236
+ }
237
+
238
+ maxRelErr = std::max(maxRelErr, relErr);
239
+ }
240
+
241
+ indexDiffs.emplace_back(std::move(diffs));
242
+ }
243
+
244
+ if (assertOnErr) {
245
+ EXPECT_LE((float) (diff1 + diffN + diffInf),
246
+ (float) numResults * pctMaxDiff1) << configMsg;
247
+
248
+ // Don't count diffInf because that could be diff1 as far as we
249
+ // know
250
+ EXPECT_LE((float) diffN, (float) numResults * pctMaxDiffN) << configMsg;
251
+ }
252
+
253
+ avgDiff /= (double) numResults;
254
+
255
+ if (printBasicStats) {
256
+ if (!configMsg.empty()) {
257
+ printf("Config\n"
258
+ "----------------------------\n"
259
+ "%s\n",
260
+ configMsg.c_str());
261
+ }
262
+
263
+ printf("Result error and differences\n"
264
+ "----------------------------\n"
265
+ "max abs diff %.7f rel diff %.7f\n"
266
+ "idx diff avg: %.5g max: %d\n"
267
+ "idx diff of 1: %d (%.3f%% of queries)\n"
268
+ "idx diff of >1: %d (%.3f%% of queries)\n"
269
+ "idx diff not found: %d (%.3f%% of queries)"
270
+ " [typically a last element inversion]\n"
271
+ "non-unique indices: %d (a serious error if >0)\n",
272
+ maxAbsErr, maxRelErr,
273
+ avgDiff, maxDiff,
274
+ diff1, 100.0f * (float) diff1 / (float) numResults,
275
+ diffN, 100.0f * (float) diffN / (float) numResults,
276
+ diffInf, 100.0f * (float) diffInf / (float) numResults,
277
+ nonUniqueIndices);
278
+ }
279
+
280
+ if (printDiffs) {
281
+ printf("differences:\n");
282
+ printf("==================\n");
283
+ for (int query = 0; query < dim1; ++query) {
284
+ for (int result = 0; result < dim2; ++result) {
285
+ long refI = lookup(refInd, query, result, dim1, dim2);
286
+ long testI = lookup(testInd, query, result, dim1, dim2);
287
+
288
+ if (refI != testI) {
289
+ float refD = lookup(refDist, query, result, dim1, dim2);
290
+ float testD = lookup(testDist, query, result, dim1, dim2);
291
+
292
+ float maxDist = std::max(refD, testD);
293
+ float delta = std::abs(refD - testD);
294
+
295
+ float relErr = delta / maxDist;
296
+
297
+ if (refD == testD) {
298
+ printf("(%d, %d [%d]) (ref %ld tst %ld dist ==)\n",
299
+ query, result,
300
+ indexDiffs[query][result],
301
+ refI, testI);
302
+ } else {
303
+ printf("(%d, %d [%d]) (ref %ld tst %ld abs %.8f "
304
+ "rel %.8f ref %a tst %a)\n",
305
+ query, result,
306
+ indexDiffs[query][result],
307
+ refI, testI, delta, relErr, refD, testD);
308
+ }
309
+ }
310
+ }
311
+ }
312
+ }
313
+ }
314
+
315
+ } }
@@ -0,0 +1,93 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+ #include <faiss/impl/FaissAssert.h>
12
+ #include <faiss/Index.h>
13
+ #include <initializer_list>
14
+ #include <memory>
15
+ #include <string>
16
+ #include <vector>
17
+
18
+ namespace faiss { namespace gpu {
19
+
20
+ /// Generates and displays a new seed for the test
21
+ void newTestSeed();
22
+
23
+ /// Uses an explicit seed for the test
24
+ void setTestSeed(long seed);
25
+
26
+ /// Returns the relative error in difference between a and b
27
+ /// (|a - b| / (0.5 * (|a| + |b|))
28
+ float relativeError(float a, float b);
29
+
30
+ /// Generates a random integer in the range [a, b]
31
+ int randVal(int a, int b);
32
+
33
+ /// Generates a random bool
34
+ bool randBool();
35
+
36
+ /// Select a random value from the given list of values provided as an
37
+ /// initializer_list
38
+ template <typename T>
39
+ T randSelect(std::initializer_list<T> vals) {
40
+ FAISS_ASSERT(vals.size() > 0);
41
+ int sel = randVal(0, vals.size());
42
+
43
+ int i = 0;
44
+ for (auto v : vals) {
45
+ if (i++ == sel) {
46
+ return v;
47
+ }
48
+ }
49
+
50
+ // should not get here
51
+ return *vals.begin();
52
+ }
53
+
54
+ /// Generates a collection of random vectors in the range [0, 1]
55
+ std::vector<float> randVecs(size_t num, size_t dim);
56
+
57
+ /// Generates a collection of random bit vectors
58
+ std::vector<unsigned char> randBinaryVecs(size_t num, size_t dim);
59
+
60
+ /// Compare two indices via query for similarity, with a user-specified set of
61
+ /// query vectors
62
+ void compareIndices(const std::vector<float>& queryVecs,
63
+ faiss::Index& refIndex,
64
+ faiss::Index& testIndex,
65
+ int numQuery, int dim, int k,
66
+ const std::string& configMsg,
67
+ float maxRelativeError = 6e-5f,
68
+ float pctMaxDiff1 = 0.1f,
69
+ float pctMaxDiffN = 0.005f);
70
+
71
+ /// Compare two indices via query for similarity, generating random query
72
+ /// vectors
73
+ void compareIndices(faiss::Index& refIndex,
74
+ faiss::Index& testIndex,
75
+ int numQuery, int dim, int k,
76
+ const std::string& configMsg,
77
+ float maxRelativeError = 6e-5f,
78
+ float pctMaxDiff1 = 0.1f,
79
+ float pctMaxDiffN = 0.005f);
80
+
81
+ /// Display specific differences in the two (distance, index) lists
82
+ void compareLists(const float* refDist,
83
+ const faiss::Index::idx_t* refInd,
84
+ const float* testDist,
85
+ const faiss::Index::idx_t* testInd,
86
+ int dim1, int dim2,
87
+ const std::string& configMsg,
88
+ bool printBasicStats, bool printDiffs, bool assertOnErr,
89
+ float maxRelativeError = 6e-5f,
90
+ float pctMaxDiff1 = 0.1f,
91
+ float pctMaxDiffN = 0.005f);
92
+
93
+ } }
@@ -0,0 +1,159 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // Copyright 2004-present Facebook. All Rights Reserved
9
+
10
+
11
+ #include <cmath>
12
+ #include <cstdio>
13
+ #include <cstdlib>
14
+
15
+ #include <sys/time.h>
16
+
17
+
18
+ #include <faiss/gpu/StandardGpuResources.h>
19
+ #include <faiss/gpu/GpuIndexIVFPQ.h>
20
+
21
+ #include <faiss/gpu/GpuAutoTune.h>
22
+ #include <faiss/index_io.h>
23
+
24
+ double elapsed ()
25
+ {
26
+ struct timeval tv;
27
+ gettimeofday (&tv, NULL);
28
+ return tv.tv_sec + tv.tv_usec * 1e-6;
29
+ }
30
+
31
+
32
+ int main ()
33
+ {
34
+
35
+ double t0 = elapsed();
36
+
37
+ // dimension of the vectors to index
38
+ int d = 128;
39
+
40
+ // size of the database we plan to index
41
+ size_t nb = 200 * 1000;
42
+
43
+ // make a set of nt training vectors in the unit cube
44
+ // (could be the database)
45
+ size_t nt = 100 * 1000;
46
+
47
+ int dev_no = 0;
48
+ /*
49
+ printf ("[%.3f s] Begin d=%d nb=%ld nt=%nt dev_no=%d\n",
50
+ elapsed() - t0, d, nb, nt, dev_no);
51
+ */
52
+ // a reasonable number of centroids to index nb vectors
53
+ int ncentroids = int (4 * sqrt (nb));
54
+
55
+ faiss::gpu::StandardGpuResources resources;
56
+
57
+
58
+ // the coarse quantizer should not be dealloced before the index
59
+ // 4 = nb of bytes per code (d must be a multiple of this)
60
+ // 8 = nb of bits per sub-code (almost always 8)
61
+ faiss::gpu::GpuIndexIVFPQConfig config;
62
+ config.device = dev_no;
63
+
64
+ faiss::gpu::GpuIndexIVFPQ index (
65
+ &resources, d, ncentroids, 4, 8, faiss::METRIC_L2, config);
66
+
67
+ { // training
68
+ printf ("[%.3f s] Generating %ld vectors in %dD for training\n",
69
+ elapsed() - t0, nt, d);
70
+
71
+ std::vector <float> trainvecs (nt * d);
72
+ for (size_t i = 0; i < nt * d; i++) {
73
+ trainvecs[i] = drand48();
74
+ }
75
+
76
+ printf ("[%.3f s] Training the index\n",
77
+ elapsed() - t0);
78
+ index.verbose = true;
79
+
80
+ index.train (nt, trainvecs.data());
81
+ }
82
+
83
+ { // I/O demo
84
+ const char *outfilename = "/tmp/index_trained.faissindex";
85
+ printf ("[%.3f s] storing the pre-trained index to %s\n",
86
+ elapsed() - t0, outfilename);
87
+
88
+ faiss::Index * cpu_index = faiss::gpu::index_gpu_to_cpu (&index);
89
+
90
+ write_index (cpu_index, outfilename);
91
+
92
+ delete cpu_index;
93
+ }
94
+
95
+ size_t nq;
96
+ std::vector<float> queries;
97
+
98
+ { // populating the database
99
+ printf ("[%.3f s] Building a dataset of %ld vectors to index\n",
100
+ elapsed() - t0, nb);
101
+
102
+ std::vector <float> database (nb * d);
103
+ for (size_t i = 0; i < nb * d; i++) {
104
+ database[i] = drand48();
105
+ }
106
+
107
+ printf ("[%.3f s] Adding the vectors to the index\n",
108
+ elapsed() - t0);
109
+
110
+ index.add (nb, database.data());
111
+
112
+ printf ("[%.3f s] done\n", elapsed() - t0);
113
+
114
+ // remember a few elements from the database as queries
115
+ int i0 = 1234;
116
+ int i1 = 1243;
117
+
118
+ nq = i1 - i0;
119
+ queries.resize (nq * d);
120
+ for (int i = i0; i < i1; i++) {
121
+ for (int j = 0; j < d; j++) {
122
+ queries [(i - i0) * d + j] = database [i * d + j];
123
+ }
124
+ }
125
+
126
+ }
127
+
128
+ { // searching the database
129
+ int k = 5;
130
+ printf ("[%.3f s] Searching the %d nearest neighbors "
131
+ "of %ld vectors in the index\n",
132
+ elapsed() - t0, k, nq);
133
+
134
+ std::vector<faiss::Index::idx_t> nns (k * nq);
135
+ std::vector<float> dis (k * nq);
136
+
137
+ index.search (nq, queries.data(), k, dis.data(), nns.data());
138
+
139
+ printf ("[%.3f s] Query results (vector ids, then distances):\n",
140
+ elapsed() - t0);
141
+
142
+ for (int i = 0; i < nq; i++) {
143
+ printf ("query %2d: ", i);
144
+ for (int j = 0; j < k; j++) {
145
+ printf ("%7ld ", nns[j + i * k]);
146
+ }
147
+ printf ("\n dis: ");
148
+ for (int j = 0; j < k; j++) {
149
+ printf ("%7g ", dis[j + i * k]);
150
+ }
151
+ printf ("\n");
152
+ }
153
+
154
+ printf ("note that the nearest neighbor is not at "
155
+ "distance 0 due to quantization errors\n");
156
+ }
157
+
158
+ return 0;
159
+ }