faiss 0.2.0 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +7 -7
  5. data/ext/faiss/extconf.rb +6 -3
  6. data/ext/faiss/numo.hpp +4 -4
  7. data/ext/faiss/utils.cpp +1 -1
  8. data/ext/faiss/utils.h +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  11. data/vendor/faiss/faiss/AutoTune.h +55 -56
  12. data/vendor/faiss/faiss/Clustering.cpp +365 -194
  13. data/vendor/faiss/faiss/Clustering.h +102 -35
  14. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  15. data/vendor/faiss/faiss/IVFlib.h +48 -51
  16. data/vendor/faiss/faiss/Index.cpp +85 -103
  17. data/vendor/faiss/faiss/Index.h +54 -48
  18. data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
  19. data/vendor/faiss/faiss/Index2Layer.h +22 -36
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
  21. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
  22. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  23. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  24. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  25. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  26. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  27. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  28. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  29. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  30. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  31. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  32. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  33. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  34. data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
  35. data/vendor/faiss/faiss/IndexFlat.h +42 -59
  36. data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
  37. data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
  38. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  39. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  40. data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
  41. data/vendor/faiss/faiss/IndexIVF.h +169 -118
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
  54. data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
  55. data/vendor/faiss/faiss/IndexLSH.h +20 -38
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -82
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
  69. data/vendor/faiss/faiss/IndexRefine.h +32 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
  73. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
  74. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  75. data/vendor/faiss/faiss/IndexShards.h +85 -73
  76. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  77. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  78. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  79. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  80. data/vendor/faiss/faiss/MetricType.h +7 -7
  81. data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
  82. data/vendor/faiss/faiss/VectorTransform.h +64 -89
  83. data/vendor/faiss/faiss/clone_index.cpp +78 -73
  84. data/vendor/faiss/faiss/clone_index.h +4 -9
  85. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  86. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  87. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
  88. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  89. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  90. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  91. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  92. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  93. data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
  94. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  95. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  96. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  97. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  101. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  102. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  106. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  108. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  110. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  112. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  113. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  114. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  115. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  116. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  121. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  122. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  124. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  125. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  126. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  128. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  129. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  130. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  135. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  136. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  137. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  138. data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
  139. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
  142. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  144. data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
  145. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  146. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  148. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  149. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  151. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
  153. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  154. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  156. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  157. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  158. data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
  159. data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
  160. data/vendor/faiss/faiss/impl/io.cpp +76 -95
  161. data/vendor/faiss/faiss/impl/io.h +31 -41
  162. data/vendor/faiss/faiss/impl/io_macros.h +60 -29
  163. data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
  164. data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
  165. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  166. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  167. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  171. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  172. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  173. data/vendor/faiss/faiss/index_factory.cpp +619 -397
  174. data/vendor/faiss/faiss/index_factory.h +8 -6
  175. data/vendor/faiss/faiss/index_io.h +23 -26
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  177. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  178. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  179. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  180. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  181. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  183. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  185. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  186. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  187. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  188. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  189. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  190. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  191. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  192. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  193. data/vendor/faiss/faiss/utils/distances.cpp +305 -312
  194. data/vendor/faiss/faiss/utils/distances.h +170 -122
  195. data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
  196. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  197. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  198. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  199. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  200. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  201. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  202. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  203. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  204. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  205. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  206. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  207. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  208. data/vendor/faiss/faiss/utils/random.h +13 -16
  209. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  210. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  211. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  212. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  213. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  214. data/vendor/faiss/faiss/utils/utils.h +54 -49
  215. metadata +29 -4
@@ -5,20 +5,20 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
8
  #include <faiss/gpu/test/TestUtils.h>
10
9
  #include <faiss/utils/random.h>
11
- #include <cmath>
12
10
  #include <gtest/gtest.h>
11
+ #include <time.h>
12
+ #include <cmath>
13
13
  #include <set>
14
14
  #include <sstream>
15
- #include <time.h>
16
15
  #include <unordered_map>
17
16
 
18
- namespace faiss { namespace gpu {
17
+ namespace faiss {
18
+ namespace gpu {
19
19
 
20
20
  inline float relativeError(float a, float b) {
21
- return std::abs(a - b) / (0.5f * (std::abs(a) + std::abs(b)));
21
+ return std::abs(a - b) / (0.5f * (std::abs(a) + std::abs(b)));
22
22
  }
23
23
 
24
24
  // This seed is also used for the faiss float_rand API; in a test it
@@ -28,290 +28,326 @@ std::mt19937 rng(1);
28
28
  std::uniform_int_distribution<> distrib;
29
29
 
30
30
  void newTestSeed() {
31
- struct timespec t;
32
- clock_gettime(CLOCK_REALTIME, &t);
31
+ struct timespec t;
32
+ clock_gettime(CLOCK_REALTIME, &t);
33
33
 
34
- setTestSeed(t.tv_nsec);
34
+ setTestSeed(t.tv_nsec);
35
35
  }
36
36
 
37
37
  void setTestSeed(long seed) {
38
- printf("testing with random seed %ld\n", seed);
38
+ printf("testing with random seed %ld\n", seed);
39
39
 
40
- rng = std::mt19937(seed);
41
- s_seed = seed;
40
+ rng = std::mt19937(seed);
41
+ s_seed = seed;
42
42
  }
43
43
 
44
44
  int randVal(int a, int b) {
45
- EXPECT_GE(a, 0);
46
- EXPECT_LE(a, b);
45
+ EXPECT_GE(a, 0);
46
+ EXPECT_LE(a, b);
47
47
 
48
- return a + (distrib(rng) % (b + 1 - a));
48
+ return a + (distrib(rng) % (b + 1 - a));
49
49
  }
50
50
 
51
51
  bool randBool() {
52
- return randSelect<bool>({true, false});
52
+ return randSelect<bool>({true, false});
53
53
  }
54
54
 
55
55
  std::vector<float> randVecs(size_t num, size_t dim) {
56
- std::vector<float> v(num * dim);
56
+ std::vector<float> v(num * dim);
57
57
 
58
- faiss::float_rand(v.data(), v.size(), s_seed);
59
- // unfortunately we generate separate sets of vectors, and don't
60
- // want the same values
61
- ++s_seed;
58
+ faiss::float_rand(v.data(), v.size(), s_seed);
59
+ // unfortunately we generate separate sets of vectors, and don't
60
+ // want the same values
61
+ ++s_seed;
62
62
 
63
- return v;
63
+ return v;
64
64
  }
65
65
 
66
66
  std::vector<unsigned char> randBinaryVecs(size_t num, size_t dim) {
67
- std::vector<unsigned char> v(num * (dim / 8));
67
+ std::vector<unsigned char> v(num * (dim / 8));
68
68
 
69
- faiss::byte_rand(v.data(), v.size(), s_seed);
70
- // unfortunately we generate separate sets of vectors, and don't
71
- // want the same values
72
- ++s_seed;
69
+ faiss::byte_rand(v.data(), v.size(), s_seed);
70
+ // unfortunately we generate separate sets of vectors, and don't
71
+ // want the same values
72
+ ++s_seed;
73
73
 
74
- return v;
74
+ return v;
75
75
  }
76
76
 
77
77
  void compareIndices(
78
- const std::vector<float>& queryVecs,
79
- faiss::Index& refIndex,
80
- faiss::Index& testIndex,
81
- int numQuery,
82
- int /*dim*/,
83
- int k,
84
- const std::string& configMsg,
85
- float maxRelativeError,
86
- float pctMaxDiff1,
87
- float pctMaxDiffN) {
88
- // Compare
89
- std::vector<float> refDistance(numQuery * k, 0);
90
- std::vector<faiss::Index::idx_t> refIndices(numQuery * k, -1);
91
- refIndex.search(numQuery, queryVecs.data(),
92
- k, refDistance.data(), refIndices.data());
93
-
94
- std::vector<float> testDistance(numQuery * k, 0);
95
- std::vector<faiss::Index::idx_t> testIndices(numQuery * k, -1);
96
- testIndex.search(numQuery, queryVecs.data(),
97
- k, testDistance.data(), testIndices.data());
98
-
99
- faiss::gpu::compareLists(refDistance.data(),
100
- refIndices.data(),
101
- testDistance.data(),
102
- testIndices.data(),
103
- numQuery, k,
104
- configMsg,
105
- true, false, true,
106
- maxRelativeError, pctMaxDiff1, pctMaxDiffN);
78
+ const std::vector<float>& queryVecs,
79
+ faiss::Index& refIndex,
80
+ faiss::Index& testIndex,
81
+ int numQuery,
82
+ int /*dim*/,
83
+ int k,
84
+ const std::string& configMsg,
85
+ float maxRelativeError,
86
+ float pctMaxDiff1,
87
+ float pctMaxDiffN) {
88
+ // Compare
89
+ std::vector<float> refDistance(numQuery * k, 0);
90
+ std::vector<faiss::Index::idx_t> refIndices(numQuery * k, -1);
91
+ refIndex.search(
92
+ numQuery,
93
+ queryVecs.data(),
94
+ k,
95
+ refDistance.data(),
96
+ refIndices.data());
97
+
98
+ std::vector<float> testDistance(numQuery * k, 0);
99
+ std::vector<faiss::Index::idx_t> testIndices(numQuery * k, -1);
100
+ testIndex.search(
101
+ numQuery,
102
+ queryVecs.data(),
103
+ k,
104
+ testDistance.data(),
105
+ testIndices.data());
106
+
107
+ faiss::gpu::compareLists(
108
+ refDistance.data(),
109
+ refIndices.data(),
110
+ testDistance.data(),
111
+ testIndices.data(),
112
+ numQuery,
113
+ k,
114
+ configMsg,
115
+ true,
116
+ false,
117
+ true,
118
+ maxRelativeError,
119
+ pctMaxDiff1,
120
+ pctMaxDiffN);
107
121
  }
108
122
 
109
- void compareIndices(faiss::Index& refIndex,
110
- faiss::Index& testIndex,
111
- int numQuery, int dim, int k,
112
- const std::string& configMsg,
113
- float maxRelativeError,
114
- float pctMaxDiff1,
115
- float pctMaxDiffN) {
116
- auto queryVecs = faiss::gpu::randVecs(numQuery, dim);
117
-
118
- compareIndices(queryVecs,
119
- refIndex,
120
- testIndex,
121
- numQuery, dim, k,
122
- configMsg,
123
- maxRelativeError,
124
- pctMaxDiff1,
125
- pctMaxDiffN);
123
+ void compareIndices(
124
+ faiss::Index& refIndex,
125
+ faiss::Index& testIndex,
126
+ int numQuery,
127
+ int dim,
128
+ int k,
129
+ const std::string& configMsg,
130
+ float maxRelativeError,
131
+ float pctMaxDiff1,
132
+ float pctMaxDiffN) {
133
+ auto queryVecs = faiss::gpu::randVecs(numQuery, dim);
134
+
135
+ compareIndices(
136
+ queryVecs,
137
+ refIndex,
138
+ testIndex,
139
+ numQuery,
140
+ dim,
141
+ k,
142
+ configMsg,
143
+ maxRelativeError,
144
+ pctMaxDiff1,
145
+ pctMaxDiffN);
126
146
  }
127
147
 
128
148
  template <typename T>
129
149
  inline T lookup(const T* p, int i, int j, int /*dim1*/, int dim2) {
130
- return p[i * dim2 + j];
150
+ return p[i * dim2 + j];
131
151
  }
132
152
 
133
- void compareLists(const float* refDist,
134
- const faiss::Index::idx_t* refInd,
135
- const float* testDist,
136
- const faiss::Index::idx_t* testInd,
137
- int dim1, int dim2,
138
- const std::string& configMsg,
139
- bool printBasicStats, bool printDiffs, bool assertOnErr,
140
- float maxRelativeError,
141
- float pctMaxDiff1,
142
- float pctMaxDiffN) {
143
-
144
- float maxAbsErr = 0.0f;
145
- for (int i = 0; i < dim1 * dim2; ++i) {
146
- maxAbsErr = std::max(maxAbsErr, std::abs(refDist[i] - testDist[i]));
147
- }
148
- int numResults = dim1 * dim2;
149
-
150
- // query -> {index -> result position}
151
- std::vector<std::unordered_map<faiss::Index::idx_t, int>> refIndexMap;
152
-
153
- for (int query = 0; query < dim1; ++query) {
154
- std::unordered_map<faiss::Index::idx_t, int> indices;
155
-
156
- for (int result = 0; result < dim2; ++result) {
157
- indices[lookup(refInd, query, result, dim1, dim2)] = result;
153
+ void compareLists(
154
+ const float* refDist,
155
+ const faiss::Index::idx_t* refInd,
156
+ const float* testDist,
157
+ const faiss::Index::idx_t* testInd,
158
+ int dim1,
159
+ int dim2,
160
+ const std::string& configMsg,
161
+ bool printBasicStats,
162
+ bool printDiffs,
163
+ bool assertOnErr,
164
+ float maxRelativeError,
165
+ float pctMaxDiff1,
166
+ float pctMaxDiffN) {
167
+ float maxAbsErr = 0.0f;
168
+ for (int i = 0; i < dim1 * dim2; ++i) {
169
+ maxAbsErr = std::max(maxAbsErr, std::abs(refDist[i] - testDist[i]));
158
170
  }
171
+ int numResults = dim1 * dim2;
159
172
 
160
- refIndexMap.emplace_back(std::move(indices));
161
- }
162
-
163
- // See how far off the indices are
164
- // Keep track of the difference for each entry
165
- std::vector<std::vector<int>> indexDiffs;
166
-
167
- int diff1 = 0; // index differs by 1
168
- int diffN = 0; // index differs by >1
169
- int diffInf = 0; // index not found in the other
170
- int nonUniqueIndices = 0;
171
-
172
- double avgDiff = 0.0;
173
- int maxDiff = 0;
174
- float maxRelErr = 0.0f;
175
-
176
- for (int query = 0; query < dim1; ++query) {
177
- std::vector<int> diffs;
178
- std::set<faiss::Index::idx_t> uniqueIndices;
179
-
180
- auto& indices = refIndexMap[query];
181
-
182
- for (int result = 0; result < dim2; ++result) {
183
- auto t = lookup(testInd, query, result, dim1, dim2);
184
-
185
- // All indices reported within a query should be unique; this is
186
- // a serious error if is otherwise the case.
187
- // If -1 is reported (no result due to IVF partitioning or not enough
188
- // entries in the index), then duplicates are allowed, but both the
189
- // reference and test must have -1 in the same position.
190
- if (t == -1) {
191
- EXPECT_EQ(lookup(refInd, query, result, dim1, dim2), t);
192
- } else {
193
- bool uniqueIndex = uniqueIndices.count(t) == 0;
194
- if (assertOnErr) {
195
- EXPECT_TRUE(uniqueIndex) << configMsg
196
- << " " << query
197
- << " " << result
198
- << " " << t;
199
- }
173
+ // query -> {index -> result position}
174
+ std::vector<std::unordered_map<faiss::Index::idx_t, int>> refIndexMap;
200
175
 
201
- if (!uniqueIndex) {
202
- ++nonUniqueIndices;
203
- } else {
204
- uniqueIndices.insert(t);
205
- }
176
+ for (int query = 0; query < dim1; ++query) {
177
+ std::unordered_map<faiss::Index::idx_t, int> indices;
206
178
 
207
- auto it = indices.find(t);
208
- if (it != indices.end()) {
209
- int diff = std::abs(result - it->second);
210
- diffs.push_back(diff);
211
-
212
- if (diff == 1) {
213
- ++diff1;
214
- maxDiff = std::max(diff, maxDiff);
215
- } else if (diff > 1) {
216
- ++diffN;
217
- maxDiff = std::max(diff, maxDiff);
218
- }
219
-
220
- avgDiff += (double) diff;
221
- } else {
222
- ++diffInf;
223
- diffs.push_back(-1);
224
- // don't count this for maxDiff
179
+ for (int result = 0; result < dim2; ++result) {
180
+ indices[lookup(refInd, query, result, dim1, dim2)] = result;
225
181
  }
226
- }
227
182
 
228
- auto refD = lookup(refDist, query, result, dim1, dim2);
229
- auto testD = lookup(testDist, query, result, dim1, dim2);
183
+ refIndexMap.emplace_back(std::move(indices));
184
+ }
185
+
186
+ // See how far off the indices are
187
+ // Keep track of the difference for each entry
188
+ std::vector<std::vector<int>> indexDiffs;
230
189
 
231
- float relErr = relativeError(refD, testD);
190
+ int diff1 = 0; // index differs by 1
191
+ int diffN = 0; // index differs by >1
192
+ int diffInf = 0; // index not found in the other
193
+ int nonUniqueIndices = 0;
232
194
 
233
- if (assertOnErr) {
234
- EXPECT_LE(relErr, maxRelativeError) << configMsg
235
- << " (" << query << ", " << result
236
- << ") refD: " << refD
237
- << " testD: " << testD;
238
- }
195
+ double avgDiff = 0.0;
196
+ int maxDiff = 0;
197
+ float maxRelErr = 0.0f;
198
+
199
+ for (int query = 0; query < dim1; ++query) {
200
+ std::vector<int> diffs;
201
+ std::set<faiss::Index::idx_t> uniqueIndices;
202
+
203
+ auto& indices = refIndexMap[query];
204
+
205
+ for (int result = 0; result < dim2; ++result) {
206
+ auto t = lookup(testInd, query, result, dim1, dim2);
207
+
208
+ // All indices reported within a query should be unique; this is
209
+ // a serious error if is otherwise the case.
210
+ // If -1 is reported (no result due to IVF partitioning or not
211
+ // enough entries in the index), then duplicates are allowed, but
212
+ // both the reference and test must have -1 in the same position.
213
+ if (t == -1) {
214
+ EXPECT_EQ(lookup(refInd, query, result, dim1, dim2), t);
215
+ } else {
216
+ bool uniqueIndex = uniqueIndices.count(t) == 0;
217
+ if (assertOnErr) {
218
+ EXPECT_TRUE(uniqueIndex) << configMsg << " " << query << " "
219
+ << result << " " << t;
220
+ }
221
+
222
+ if (!uniqueIndex) {
223
+ ++nonUniqueIndices;
224
+ } else {
225
+ uniqueIndices.insert(t);
226
+ }
227
+
228
+ auto it = indices.find(t);
229
+ if (it != indices.end()) {
230
+ int diff = std::abs(result - it->second);
231
+ diffs.push_back(diff);
232
+
233
+ if (diff == 1) {
234
+ ++diff1;
235
+ maxDiff = std::max(diff, maxDiff);
236
+ } else if (diff > 1) {
237
+ ++diffN;
238
+ maxDiff = std::max(diff, maxDiff);
239
+ }
240
+
241
+ avgDiff += (double)diff;
242
+ } else {
243
+ ++diffInf;
244
+ diffs.push_back(-1);
245
+ // don't count this for maxDiff
246
+ }
247
+ }
248
+
249
+ auto refD = lookup(refDist, query, result, dim1, dim2);
250
+ auto testD = lookup(testDist, query, result, dim1, dim2);
251
+
252
+ float relErr = relativeError(refD, testD);
253
+
254
+ if (assertOnErr) {
255
+ EXPECT_LE(relErr, maxRelativeError)
256
+ << configMsg << " (" << query << ", " << result
257
+ << ") refD: " << refD << " testD: " << testD;
258
+ }
259
+
260
+ maxRelErr = std::max(maxRelErr, relErr);
261
+ }
239
262
 
240
- maxRelErr = std::max(maxRelErr, relErr);
263
+ indexDiffs.emplace_back(std::move(diffs));
241
264
  }
242
265
 
243
- indexDiffs.emplace_back(std::move(diffs));
244
- }
266
+ if (assertOnErr) {
267
+ EXPECT_LE(
268
+ (float)(diff1 + diffN + diffInf),
269
+ (float)numResults * pctMaxDiff1)
270
+ << configMsg;
245
271
 
246
- if (assertOnErr) {
247
- EXPECT_LE((float) (diff1 + diffN + diffInf),
248
- (float) numResults * pctMaxDiff1) << configMsg;
272
+ // Don't count diffInf because that could be diff1 as far as we
273
+ // know
274
+ EXPECT_LE((float)diffN, (float)numResults * pctMaxDiffN) << configMsg;
275
+ }
249
276
 
250
- // Don't count diffInf because that could be diff1 as far as we
251
- // know
252
- EXPECT_LE((float) diffN, (float) numResults * pctMaxDiffN) << configMsg;
253
- }
277
+ avgDiff /= (double)numResults;
254
278
 
255
- avgDiff /= (double) numResults;
279
+ if (printBasicStats) {
280
+ if (!configMsg.empty()) {
281
+ printf("Config\n"
282
+ "----------------------------\n"
283
+ "%s\n",
284
+ configMsg.c_str());
285
+ }
256
286
 
257
- if (printBasicStats) {
258
- if (!configMsg.empty()) {
259
- printf("Config\n"
260
- "----------------------------\n"
261
- "%s\n",
262
- configMsg.c_str());
287
+ printf("Result error and differences\n"
288
+ "----------------------------\n"
289
+ "max abs diff %.7f rel diff %.7f\n"
290
+ "idx diff avg: %.5g max: %d\n"
291
+ "idx diff of 1: %d (%.3f%% of queries)\n"
292
+ "idx diff of >1: %d (%.3f%% of queries)\n"
293
+ "idx diff not found: %d (%.3f%% of queries)"
294
+ " [typically a last element inversion]\n"
295
+ "non-unique indices: %d (a serious error if >0)\n",
296
+ maxAbsErr,
297
+ maxRelErr,
298
+ avgDiff,
299
+ maxDiff,
300
+ diff1,
301
+ 100.0f * (float)diff1 / (float)numResults,
302
+ diffN,
303
+ 100.0f * (float)diffN / (float)numResults,
304
+ diffInf,
305
+ 100.0f * (float)diffInf / (float)numResults,
306
+ nonUniqueIndices);
263
307
  }
264
308
 
265
- printf("Result error and differences\n"
266
- "----------------------------\n"
267
- "max abs diff %.7f rel diff %.7f\n"
268
- "idx diff avg: %.5g max: %d\n"
269
- "idx diff of 1: %d (%.3f%% of queries)\n"
270
- "idx diff of >1: %d (%.3f%% of queries)\n"
271
- "idx diff not found: %d (%.3f%% of queries)"
272
- " [typically a last element inversion]\n"
273
- "non-unique indices: %d (a serious error if >0)\n",
274
- maxAbsErr, maxRelErr,
275
- avgDiff, maxDiff,
276
- diff1, 100.0f * (float) diff1 / (float) numResults,
277
- diffN, 100.0f * (float) diffN / (float) numResults,
278
- diffInf, 100.0f * (float) diffInf / (float) numResults,
279
- nonUniqueIndices);
280
- }
281
-
282
- if (printDiffs) {
283
- printf("differences:\n");
284
- printf("==================\n");
285
- for (int query = 0; query < dim1; ++query) {
286
- for (int result = 0; result < dim2; ++result) {
287
- long refI = lookup(refInd, query, result, dim1, dim2);
288
- long testI = lookup(testInd, query, result, dim1, dim2);
289
-
290
- if (refI != testI) {
291
- float refD = lookup(refDist, query, result, dim1, dim2);
292
- float testD = lookup(testDist, query, result, dim1, dim2);
293
-
294
- float maxDist = std::max(refD, testD);
295
- float delta = std::abs(refD - testD);
296
-
297
- float relErr = delta / maxDist;
298
-
299
- if (refD == testD) {
300
- printf("(%d, %d [%d]) (ref %ld tst %ld dist ==)\n",
301
- query, result,
302
- indexDiffs[query][result],
303
- refI, testI);
304
- } else {
305
- printf("(%d, %d [%d]) (ref %ld tst %ld abs %.8f "
306
- "rel %.8f ref %a tst %a)\n",
307
- query, result,
308
- indexDiffs[query][result],
309
- refI, testI, delta, relErr, refD, testD);
310
- }
309
+ if (printDiffs) {
310
+ printf("differences:\n");
311
+ printf("==================\n");
312
+ for (int query = 0; query < dim1; ++query) {
313
+ for (int result = 0; result < dim2; ++result) {
314
+ long refI = lookup(refInd, query, result, dim1, dim2);
315
+ long testI = lookup(testInd, query, result, dim1, dim2);
316
+
317
+ if (refI != testI) {
318
+ float refD = lookup(refDist, query, result, dim1, dim2);
319
+ float testD = lookup(testDist, query, result, dim1, dim2);
320
+
321
+ float maxDist = std::max(refD, testD);
322
+ float delta = std::abs(refD - testD);
323
+
324
+ float relErr = delta / maxDist;
325
+
326
+ if (refD == testD) {
327
+ printf("(%d, %d [%d]) (ref %ld tst %ld dist ==)\n",
328
+ query,
329
+ result,
330
+ indexDiffs[query][result],
331
+ refI,
332
+ testI);
333
+ } else {
334
+ printf("(%d, %d [%d]) (ref %ld tst %ld abs %.8f "
335
+ "rel %.8f ref %a tst %a)\n",
336
+ query,
337
+ result,
338
+ indexDiffs[query][result],
339
+ refI,
340
+ testI,
341
+ delta,
342
+ relErr,
343
+ refD,
344
+ testD);
345
+ }
346
+ }
347
+ }
311
348
  }
312
- }
313
349
  }
314
- }
315
350
  }
316
351
 
317
- } }
352
+ } // namespace gpu
353
+ } // namespace faiss