faiss 0.1.7 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -0
  3. data/README.md +7 -7
  4. data/ext/faiss/ext.cpp +1 -1
  5. data/ext/faiss/extconf.rb +8 -2
  6. data/ext/faiss/index.cpp +102 -69
  7. data/ext/faiss/index_binary.cpp +24 -30
  8. data/ext/faiss/kmeans.cpp +20 -16
  9. data/ext/faiss/numo.hpp +867 -0
  10. data/ext/faiss/pca_matrix.cpp +13 -14
  11. data/ext/faiss/product_quantizer.cpp +23 -24
  12. data/ext/faiss/utils.cpp +10 -37
  13. data/ext/faiss/utils.h +2 -13
  14. data/lib/faiss/version.rb +1 -1
  15. data/lib/faiss.rb +0 -5
  16. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  17. data/vendor/faiss/faiss/AutoTune.h +55 -56
  18. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  19. data/vendor/faiss/faiss/Clustering.h +88 -35
  20. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  21. data/vendor/faiss/faiss/IVFlib.h +48 -51
  22. data/vendor/faiss/faiss/Index.cpp +85 -103
  23. data/vendor/faiss/faiss/Index.h +54 -48
  24. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  25. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  26. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  27. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  28. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  29. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  30. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  31. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  32. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  33. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  34. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  35. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  36. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  37. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  38. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  39. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  40. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  41. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  42. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  43. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  54. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  55. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  69. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  73. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  74. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  75. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  76. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  77. data/vendor/faiss/faiss/IndexShards.h +85 -73
  78. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  79. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  80. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  81. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  82. data/vendor/faiss/faiss/MetricType.h +7 -7
  83. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  84. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  85. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  86. data/vendor/faiss/faiss/clone_index.h +4 -9
  87. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  88. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  89. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  90. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  91. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  92. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  93. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  94. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  95. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  96. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  97. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  101. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  102. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  103. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  104. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  106. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  108. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  110. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  112. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  113. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  114. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  115. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  116. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  121. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  122. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  124. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  125. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  126. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  128. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  129. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  130. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  131. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  133. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  135. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  136. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  137. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  138. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  139. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  140. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  142. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  144. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  145. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  146. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  148. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  149. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  151. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  153. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  154. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  156. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  157. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  158. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  159. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  160. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  161. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  162. data/vendor/faiss/faiss/impl/io.h +31 -41
  163. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  164. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  165. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  166. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  167. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  171. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  172. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  173. data/vendor/faiss/faiss/index_factory.h +6 -7
  174. data/vendor/faiss/faiss/index_io.h +23 -26
  175. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  177. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  178. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  179. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  180. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  181. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  183. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  185. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  186. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  187. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  188. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  189. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  190. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  191. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  192. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  193. data/vendor/faiss/faiss/utils/distances.h +133 -118
  194. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  195. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  196. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  197. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  198. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  199. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  200. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  201. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  202. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  203. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  204. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  205. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  206. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  207. data/vendor/faiss/faiss/utils/random.h +13 -16
  208. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  209. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  210. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  211. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  212. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  213. data/vendor/faiss/faiss/utils/utils.h +53 -48
  214. metadata +26 -12
  215. data/lib/faiss/index.rb +0 -20
  216. data/lib/faiss/index_binary.rb +0 -20
  217. data/lib/faiss/kmeans.rb +0 -15
  218. data/lib/faiss/pca_matrix.rb +0 -15
  219. data/lib/faiss/product_quantizer.rb +0 -22
@@ -5,20 +5,20 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
8
  #include <faiss/gpu/test/TestUtils.h>
10
9
  #include <faiss/utils/random.h>
11
- #include <cmath>
12
10
  #include <gtest/gtest.h>
11
+ #include <time.h>
12
+ #include <cmath>
13
13
  #include <set>
14
14
  #include <sstream>
15
- #include <time.h>
16
15
  #include <unordered_map>
17
16
 
18
- namespace faiss { namespace gpu {
17
+ namespace faiss {
18
+ namespace gpu {
19
19
 
20
20
  inline float relativeError(float a, float b) {
21
- return std::abs(a - b) / (0.5f * (std::abs(a) + std::abs(b)));
21
+ return std::abs(a - b) / (0.5f * (std::abs(a) + std::abs(b)));
22
22
  }
23
23
 
24
24
  // This seed is also used for the faiss float_rand API; in a test it
@@ -28,290 +28,326 @@ std::mt19937 rng(1);
28
28
  std::uniform_int_distribution<> distrib;
29
29
 
30
30
  void newTestSeed() {
31
- struct timespec t;
32
- clock_gettime(CLOCK_REALTIME, &t);
31
+ struct timespec t;
32
+ clock_gettime(CLOCK_REALTIME, &t);
33
33
 
34
- setTestSeed(t.tv_nsec);
34
+ setTestSeed(t.tv_nsec);
35
35
  }
36
36
 
37
37
  void setTestSeed(long seed) {
38
- printf("testing with random seed %ld\n", seed);
38
+ printf("testing with random seed %ld\n", seed);
39
39
 
40
- rng = std::mt19937(seed);
41
- s_seed = seed;
40
+ rng = std::mt19937(seed);
41
+ s_seed = seed;
42
42
  }
43
43
 
44
44
  int randVal(int a, int b) {
45
- EXPECT_GE(a, 0);
46
- EXPECT_LE(a, b);
45
+ EXPECT_GE(a, 0);
46
+ EXPECT_LE(a, b);
47
47
 
48
- return a + (distrib(rng) % (b + 1 - a));
48
+ return a + (distrib(rng) % (b + 1 - a));
49
49
  }
50
50
 
51
51
  bool randBool() {
52
- return randSelect<bool>({true, false});
52
+ return randSelect<bool>({true, false});
53
53
  }
54
54
 
55
55
  std::vector<float> randVecs(size_t num, size_t dim) {
56
- std::vector<float> v(num * dim);
56
+ std::vector<float> v(num * dim);
57
57
 
58
- faiss::float_rand(v.data(), v.size(), s_seed);
59
- // unfortunately we generate separate sets of vectors, and don't
60
- // want the same values
61
- ++s_seed;
58
+ faiss::float_rand(v.data(), v.size(), s_seed);
59
+ // unfortunately we generate separate sets of vectors, and don't
60
+ // want the same values
61
+ ++s_seed;
62
62
 
63
- return v;
63
+ return v;
64
64
  }
65
65
 
66
66
  std::vector<unsigned char> randBinaryVecs(size_t num, size_t dim) {
67
- std::vector<unsigned char> v(num * (dim / 8));
67
+ std::vector<unsigned char> v(num * (dim / 8));
68
68
 
69
- faiss::byte_rand(v.data(), v.size(), s_seed);
70
- // unfortunately we generate separate sets of vectors, and don't
71
- // want the same values
72
- ++s_seed;
69
+ faiss::byte_rand(v.data(), v.size(), s_seed);
70
+ // unfortunately we generate separate sets of vectors, and don't
71
+ // want the same values
72
+ ++s_seed;
73
73
 
74
- return v;
74
+ return v;
75
75
  }
76
76
 
77
77
  void compareIndices(
78
- const std::vector<float>& queryVecs,
79
- faiss::Index& refIndex,
80
- faiss::Index& testIndex,
81
- int numQuery,
82
- int /*dim*/,
83
- int k,
84
- const std::string& configMsg,
85
- float maxRelativeError,
86
- float pctMaxDiff1,
87
- float pctMaxDiffN) {
88
- // Compare
89
- std::vector<float> refDistance(numQuery * k, 0);
90
- std::vector<faiss::Index::idx_t> refIndices(numQuery * k, -1);
91
- refIndex.search(numQuery, queryVecs.data(),
92
- k, refDistance.data(), refIndices.data());
93
-
94
- std::vector<float> testDistance(numQuery * k, 0);
95
- std::vector<faiss::Index::idx_t> testIndices(numQuery * k, -1);
96
- testIndex.search(numQuery, queryVecs.data(),
97
- k, testDistance.data(), testIndices.data());
98
-
99
- faiss::gpu::compareLists(refDistance.data(),
100
- refIndices.data(),
101
- testDistance.data(),
102
- testIndices.data(),
103
- numQuery, k,
104
- configMsg,
105
- true, false, true,
106
- maxRelativeError, pctMaxDiff1, pctMaxDiffN);
78
+ const std::vector<float>& queryVecs,
79
+ faiss::Index& refIndex,
80
+ faiss::Index& testIndex,
81
+ int numQuery,
82
+ int /*dim*/,
83
+ int k,
84
+ const std::string& configMsg,
85
+ float maxRelativeError,
86
+ float pctMaxDiff1,
87
+ float pctMaxDiffN) {
88
+ // Compare
89
+ std::vector<float> refDistance(numQuery * k, 0);
90
+ std::vector<faiss::Index::idx_t> refIndices(numQuery * k, -1);
91
+ refIndex.search(
92
+ numQuery,
93
+ queryVecs.data(),
94
+ k,
95
+ refDistance.data(),
96
+ refIndices.data());
97
+
98
+ std::vector<float> testDistance(numQuery * k, 0);
99
+ std::vector<faiss::Index::idx_t> testIndices(numQuery * k, -1);
100
+ testIndex.search(
101
+ numQuery,
102
+ queryVecs.data(),
103
+ k,
104
+ testDistance.data(),
105
+ testIndices.data());
106
+
107
+ faiss::gpu::compareLists(
108
+ refDistance.data(),
109
+ refIndices.data(),
110
+ testDistance.data(),
111
+ testIndices.data(),
112
+ numQuery,
113
+ k,
114
+ configMsg,
115
+ true,
116
+ false,
117
+ true,
118
+ maxRelativeError,
119
+ pctMaxDiff1,
120
+ pctMaxDiffN);
107
121
  }
108
122
 
109
- void compareIndices(faiss::Index& refIndex,
110
- faiss::Index& testIndex,
111
- int numQuery, int dim, int k,
112
- const std::string& configMsg,
113
- float maxRelativeError,
114
- float pctMaxDiff1,
115
- float pctMaxDiffN) {
116
- auto queryVecs = faiss::gpu::randVecs(numQuery, dim);
117
-
118
- compareIndices(queryVecs,
119
- refIndex,
120
- testIndex,
121
- numQuery, dim, k,
122
- configMsg,
123
- maxRelativeError,
124
- pctMaxDiff1,
125
- pctMaxDiffN);
123
+ void compareIndices(
124
+ faiss::Index& refIndex,
125
+ faiss::Index& testIndex,
126
+ int numQuery,
127
+ int dim,
128
+ int k,
129
+ const std::string& configMsg,
130
+ float maxRelativeError,
131
+ float pctMaxDiff1,
132
+ float pctMaxDiffN) {
133
+ auto queryVecs = faiss::gpu::randVecs(numQuery, dim);
134
+
135
+ compareIndices(
136
+ queryVecs,
137
+ refIndex,
138
+ testIndex,
139
+ numQuery,
140
+ dim,
141
+ k,
142
+ configMsg,
143
+ maxRelativeError,
144
+ pctMaxDiff1,
145
+ pctMaxDiffN);
126
146
  }
127
147
 
128
148
  template <typename T>
129
149
  inline T lookup(const T* p, int i, int j, int /*dim1*/, int dim2) {
130
- return p[i * dim2 + j];
150
+ return p[i * dim2 + j];
131
151
  }
132
152
 
133
- void compareLists(const float* refDist,
134
- const faiss::Index::idx_t* refInd,
135
- const float* testDist,
136
- const faiss::Index::idx_t* testInd,
137
- int dim1, int dim2,
138
- const std::string& configMsg,
139
- bool printBasicStats, bool printDiffs, bool assertOnErr,
140
- float maxRelativeError,
141
- float pctMaxDiff1,
142
- float pctMaxDiffN) {
143
-
144
- float maxAbsErr = 0.0f;
145
- for (int i = 0; i < dim1 * dim2; ++i) {
146
- maxAbsErr = std::max(maxAbsErr, std::abs(refDist[i] - testDist[i]));
147
- }
148
- int numResults = dim1 * dim2;
149
-
150
- // query -> {index -> result position}
151
- std::vector<std::unordered_map<faiss::Index::idx_t, int>> refIndexMap;
152
-
153
- for (int query = 0; query < dim1; ++query) {
154
- std::unordered_map<faiss::Index::idx_t, int> indices;
155
-
156
- for (int result = 0; result < dim2; ++result) {
157
- indices[lookup(refInd, query, result, dim1, dim2)] = result;
153
+ void compareLists(
154
+ const float* refDist,
155
+ const faiss::Index::idx_t* refInd,
156
+ const float* testDist,
157
+ const faiss::Index::idx_t* testInd,
158
+ int dim1,
159
+ int dim2,
160
+ const std::string& configMsg,
161
+ bool printBasicStats,
162
+ bool printDiffs,
163
+ bool assertOnErr,
164
+ float maxRelativeError,
165
+ float pctMaxDiff1,
166
+ float pctMaxDiffN) {
167
+ float maxAbsErr = 0.0f;
168
+ for (int i = 0; i < dim1 * dim2; ++i) {
169
+ maxAbsErr = std::max(maxAbsErr, std::abs(refDist[i] - testDist[i]));
158
170
  }
171
+ int numResults = dim1 * dim2;
159
172
 
160
- refIndexMap.emplace_back(std::move(indices));
161
- }
162
-
163
- // See how far off the indices are
164
- // Keep track of the difference for each entry
165
- std::vector<std::vector<int>> indexDiffs;
166
-
167
- int diff1 = 0; // index differs by 1
168
- int diffN = 0; // index differs by >1
169
- int diffInf = 0; // index not found in the other
170
- int nonUniqueIndices = 0;
171
-
172
- double avgDiff = 0.0;
173
- int maxDiff = 0;
174
- float maxRelErr = 0.0f;
175
-
176
- for (int query = 0; query < dim1; ++query) {
177
- std::vector<int> diffs;
178
- std::set<faiss::Index::idx_t> uniqueIndices;
179
-
180
- auto& indices = refIndexMap[query];
181
-
182
- for (int result = 0; result < dim2; ++result) {
183
- auto t = lookup(testInd, query, result, dim1, dim2);
184
-
185
- // All indices reported within a query should be unique; this is
186
- // a serious error if is otherwise the case.
187
- // If -1 is reported (no result due to IVF partitioning or not enough
188
- // entries in the index), then duplicates are allowed, but both the
189
- // reference and test must have -1 in the same position.
190
- if (t == -1) {
191
- EXPECT_EQ(lookup(refInd, query, result, dim1, dim2), t);
192
- } else {
193
- bool uniqueIndex = uniqueIndices.count(t) == 0;
194
- if (assertOnErr) {
195
- EXPECT_TRUE(uniqueIndex) << configMsg
196
- << " " << query
197
- << " " << result
198
- << " " << t;
199
- }
173
+ // query -> {index -> result position}
174
+ std::vector<std::unordered_map<faiss::Index::idx_t, int>> refIndexMap;
200
175
 
201
- if (!uniqueIndex) {
202
- ++nonUniqueIndices;
203
- } else {
204
- uniqueIndices.insert(t);
205
- }
176
+ for (int query = 0; query < dim1; ++query) {
177
+ std::unordered_map<faiss::Index::idx_t, int> indices;
206
178
 
207
- auto it = indices.find(t);
208
- if (it != indices.end()) {
209
- int diff = std::abs(result - it->second);
210
- diffs.push_back(diff);
211
-
212
- if (diff == 1) {
213
- ++diff1;
214
- maxDiff = std::max(diff, maxDiff);
215
- } else if (diff > 1) {
216
- ++diffN;
217
- maxDiff = std::max(diff, maxDiff);
218
- }
219
-
220
- avgDiff += (double) diff;
221
- } else {
222
- ++diffInf;
223
- diffs.push_back(-1);
224
- // don't count this for maxDiff
179
+ for (int result = 0; result < dim2; ++result) {
180
+ indices[lookup(refInd, query, result, dim1, dim2)] = result;
225
181
  }
226
- }
227
182
 
228
- auto refD = lookup(refDist, query, result, dim1, dim2);
229
- auto testD = lookup(testDist, query, result, dim1, dim2);
183
+ refIndexMap.emplace_back(std::move(indices));
184
+ }
185
+
186
+ // See how far off the indices are
187
+ // Keep track of the difference for each entry
188
+ std::vector<std::vector<int>> indexDiffs;
230
189
 
231
- float relErr = relativeError(refD, testD);
190
+ int diff1 = 0; // index differs by 1
191
+ int diffN = 0; // index differs by >1
192
+ int diffInf = 0; // index not found in the other
193
+ int nonUniqueIndices = 0;
232
194
 
233
- if (assertOnErr) {
234
- EXPECT_LE(relErr, maxRelativeError) << configMsg
235
- << " (" << query << ", " << result
236
- << ") refD: " << refD
237
- << " testD: " << testD;
238
- }
195
+ double avgDiff = 0.0;
196
+ int maxDiff = 0;
197
+ float maxRelErr = 0.0f;
198
+
199
+ for (int query = 0; query < dim1; ++query) {
200
+ std::vector<int> diffs;
201
+ std::set<faiss::Index::idx_t> uniqueIndices;
202
+
203
+ auto& indices = refIndexMap[query];
204
+
205
+ for (int result = 0; result < dim2; ++result) {
206
+ auto t = lookup(testInd, query, result, dim1, dim2);
207
+
208
+ // All indices reported within a query should be unique; this is
209
+ // a serious error if is otherwise the case.
210
+ // If -1 is reported (no result due to IVF partitioning or not
211
+ // enough entries in the index), then duplicates are allowed, but
212
+ // both the reference and test must have -1 in the same position.
213
+ if (t == -1) {
214
+ EXPECT_EQ(lookup(refInd, query, result, dim1, dim2), t);
215
+ } else {
216
+ bool uniqueIndex = uniqueIndices.count(t) == 0;
217
+ if (assertOnErr) {
218
+ EXPECT_TRUE(uniqueIndex) << configMsg << " " << query << " "
219
+ << result << " " << t;
220
+ }
221
+
222
+ if (!uniqueIndex) {
223
+ ++nonUniqueIndices;
224
+ } else {
225
+ uniqueIndices.insert(t);
226
+ }
227
+
228
+ auto it = indices.find(t);
229
+ if (it != indices.end()) {
230
+ int diff = std::abs(result - it->second);
231
+ diffs.push_back(diff);
232
+
233
+ if (diff == 1) {
234
+ ++diff1;
235
+ maxDiff = std::max(diff, maxDiff);
236
+ } else if (diff > 1) {
237
+ ++diffN;
238
+ maxDiff = std::max(diff, maxDiff);
239
+ }
240
+
241
+ avgDiff += (double)diff;
242
+ } else {
243
+ ++diffInf;
244
+ diffs.push_back(-1);
245
+ // don't count this for maxDiff
246
+ }
247
+ }
248
+
249
+ auto refD = lookup(refDist, query, result, dim1, dim2);
250
+ auto testD = lookup(testDist, query, result, dim1, dim2);
251
+
252
+ float relErr = relativeError(refD, testD);
253
+
254
+ if (assertOnErr) {
255
+ EXPECT_LE(relErr, maxRelativeError)
256
+ << configMsg << " (" << query << ", " << result
257
+ << ") refD: " << refD << " testD: " << testD;
258
+ }
259
+
260
+ maxRelErr = std::max(maxRelErr, relErr);
261
+ }
239
262
 
240
- maxRelErr = std::max(maxRelErr, relErr);
263
+ indexDiffs.emplace_back(std::move(diffs));
241
264
  }
242
265
 
243
- indexDiffs.emplace_back(std::move(diffs));
244
- }
266
+ if (assertOnErr) {
267
+ EXPECT_LE(
268
+ (float)(diff1 + diffN + diffInf),
269
+ (float)numResults * pctMaxDiff1)
270
+ << configMsg;
245
271
 
246
- if (assertOnErr) {
247
- EXPECT_LE((float) (diff1 + diffN + diffInf),
248
- (float) numResults * pctMaxDiff1) << configMsg;
272
+ // Don't count diffInf because that could be diff1 as far as we
273
+ // know
274
+ EXPECT_LE((float)diffN, (float)numResults * pctMaxDiffN) << configMsg;
275
+ }
249
276
 
250
- // Don't count diffInf because that could be diff1 as far as we
251
- // know
252
- EXPECT_LE((float) diffN, (float) numResults * pctMaxDiffN) << configMsg;
253
- }
277
+ avgDiff /= (double)numResults;
254
278
 
255
- avgDiff /= (double) numResults;
279
+ if (printBasicStats) {
280
+ if (!configMsg.empty()) {
281
+ printf("Config\n"
282
+ "----------------------------\n"
283
+ "%s\n",
284
+ configMsg.c_str());
285
+ }
256
286
 
257
- if (printBasicStats) {
258
- if (!configMsg.empty()) {
259
- printf("Config\n"
260
- "----------------------------\n"
261
- "%s\n",
262
- configMsg.c_str());
287
+ printf("Result error and differences\n"
288
+ "----------------------------\n"
289
+ "max abs diff %.7f rel diff %.7f\n"
290
+ "idx diff avg: %.5g max: %d\n"
291
+ "idx diff of 1: %d (%.3f%% of queries)\n"
292
+ "idx diff of >1: %d (%.3f%% of queries)\n"
293
+ "idx diff not found: %d (%.3f%% of queries)"
294
+ " [typically a last element inversion]\n"
295
+ "non-unique indices: %d (a serious error if >0)\n",
296
+ maxAbsErr,
297
+ maxRelErr,
298
+ avgDiff,
299
+ maxDiff,
300
+ diff1,
301
+ 100.0f * (float)diff1 / (float)numResults,
302
+ diffN,
303
+ 100.0f * (float)diffN / (float)numResults,
304
+ diffInf,
305
+ 100.0f * (float)diffInf / (float)numResults,
306
+ nonUniqueIndices);
263
307
  }
264
308
 
265
- printf("Result error and differences\n"
266
- "----------------------------\n"
267
- "max abs diff %.7f rel diff %.7f\n"
268
- "idx diff avg: %.5g max: %d\n"
269
- "idx diff of 1: %d (%.3f%% of queries)\n"
270
- "idx diff of >1: %d (%.3f%% of queries)\n"
271
- "idx diff not found: %d (%.3f%% of queries)"
272
- " [typically a last element inversion]\n"
273
- "non-unique indices: %d (a serious error if >0)\n",
274
- maxAbsErr, maxRelErr,
275
- avgDiff, maxDiff,
276
- diff1, 100.0f * (float) diff1 / (float) numResults,
277
- diffN, 100.0f * (float) diffN / (float) numResults,
278
- diffInf, 100.0f * (float) diffInf / (float) numResults,
279
- nonUniqueIndices);
280
- }
281
-
282
- if (printDiffs) {
283
- printf("differences:\n");
284
- printf("==================\n");
285
- for (int query = 0; query < dim1; ++query) {
286
- for (int result = 0; result < dim2; ++result) {
287
- long refI = lookup(refInd, query, result, dim1, dim2);
288
- long testI = lookup(testInd, query, result, dim1, dim2);
289
-
290
- if (refI != testI) {
291
- float refD = lookup(refDist, query, result, dim1, dim2);
292
- float testD = lookup(testDist, query, result, dim1, dim2);
293
-
294
- float maxDist = std::max(refD, testD);
295
- float delta = std::abs(refD - testD);
296
-
297
- float relErr = delta / maxDist;
298
-
299
- if (refD == testD) {
300
- printf("(%d, %d [%d]) (ref %ld tst %ld dist ==)\n",
301
- query, result,
302
- indexDiffs[query][result],
303
- refI, testI);
304
- } else {
305
- printf("(%d, %d [%d]) (ref %ld tst %ld abs %.8f "
306
- "rel %.8f ref %a tst %a)\n",
307
- query, result,
308
- indexDiffs[query][result],
309
- refI, testI, delta, relErr, refD, testD);
310
- }
309
+ if (printDiffs) {
310
+ printf("differences:\n");
311
+ printf("==================\n");
312
+ for (int query = 0; query < dim1; ++query) {
313
+ for (int result = 0; result < dim2; ++result) {
314
+ long refI = lookup(refInd, query, result, dim1, dim2);
315
+ long testI = lookup(testInd, query, result, dim1, dim2);
316
+
317
+ if (refI != testI) {
318
+ float refD = lookup(refDist, query, result, dim1, dim2);
319
+ float testD = lookup(testDist, query, result, dim1, dim2);
320
+
321
+ float maxDist = std::max(refD, testD);
322
+ float delta = std::abs(refD - testD);
323
+
324
+ float relErr = delta / maxDist;
325
+
326
+ if (refD == testD) {
327
+ printf("(%d, %d [%d]) (ref %ld tst %ld dist ==)\n",
328
+ query,
329
+ result,
330
+ indexDiffs[query][result],
331
+ refI,
332
+ testI);
333
+ } else {
334
+ printf("(%d, %d [%d]) (ref %ld tst %ld abs %.8f "
335
+ "rel %.8f ref %a tst %a)\n",
336
+ query,
337
+ result,
338
+ indexDiffs[query][result],
339
+ refI,
340
+ testI,
341
+ delta,
342
+ relErr,
343
+ refD,
344
+ testD);
345
+ }
346
+ }
347
+ }
311
348
  }
312
- }
313
349
  }
314
- }
315
350
  }
316
351
 
317
- } }
352
+ } // namespace gpu
353
+ } // namespace faiss