faiss 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/faiss/version.rb +1 -1
  4. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  5. data/vendor/faiss/faiss/AutoTune.h +55 -56
  6. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  7. data/vendor/faiss/faiss/Clustering.h +88 -35
  8. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  9. data/vendor/faiss/faiss/IVFlib.h +48 -51
  10. data/vendor/faiss/faiss/Index.cpp +85 -103
  11. data/vendor/faiss/faiss/Index.h +54 -48
  12. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  13. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  14. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  15. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  16. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  17. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  18. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  19. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  20. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  21. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  22. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  23. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  24. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  25. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  26. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  27. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  29. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  30. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  31. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  32. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  33. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  34. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  35. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  36. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  37. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  38. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  39. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  40. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  41. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  42. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  43. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  44. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  45. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  46. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  47. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  48. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  49. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  50. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  51. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  52. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  53. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  54. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  55. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  56. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  57. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  58. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  59. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  60. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  61. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  62. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  63. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  64. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  65. data/vendor/faiss/faiss/IndexShards.h +85 -73
  66. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  67. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  68. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  69. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  70. data/vendor/faiss/faiss/MetricType.h +7 -7
  71. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  72. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  73. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  74. data/vendor/faiss/faiss/clone_index.h +4 -9
  75. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  76. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  77. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  78. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  79. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  80. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  81. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  82. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  83. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  84. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  85. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  86. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  87. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  88. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  89. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  90. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  91. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  92. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  93. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  94. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  95. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  96. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  97. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  98. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  99. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  100. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  101. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  102. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  103. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  104. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  105. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  109. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  110. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  111. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  112. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  113. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  114. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  115. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  116. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  117. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  118. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  119. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  121. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  123. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  124. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  125. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  126. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  127. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  128. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  130. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  131. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  132. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  133. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  134. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  135. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  136. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  137. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  138. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  139. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  141. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  142. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  144. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  145. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  146. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  147. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  148. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  149. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  150. data/vendor/faiss/faiss/impl/io.h +31 -41
  151. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  152. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  153. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  154. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  155. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  156. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  157. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  158. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  159. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  160. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  161. data/vendor/faiss/faiss/index_factory.h +6 -7
  162. data/vendor/faiss/faiss/index_io.h +23 -26
  163. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  164. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  165. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  166. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  167. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  168. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  169. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  170. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  171. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  172. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  173. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  174. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  175. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  176. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  177. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  178. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  179. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  180. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  181. data/vendor/faiss/faiss/utils/distances.h +133 -118
  182. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  183. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  184. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  185. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  186. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  187. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  188. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  189. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  190. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  191. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  192. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  193. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  194. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  195. data/vendor/faiss/faiss/utils/random.h +13 -16
  196. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  197. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  198. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  199. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  200. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  201. data/vendor/faiss/faiss/utils/utils.h +53 -48
  202. metadata +20 -2
@@ -5,126 +5,120 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
8
  #include <faiss/IndexBinaryFlat.h>
10
9
  #include <faiss/gpu/GpuIndexBinaryFlat.h>
11
10
  #include <faiss/gpu/StandardGpuResources.h>
12
- #include <faiss/gpu/utils/DeviceUtils.h>
13
11
  #include <faiss/gpu/test/TestUtils.h>
12
+ #include <faiss/gpu/utils/DeviceUtils.h>
14
13
  #include <faiss/utils/utils.h>
15
14
  #include <gtest/gtest.h>
16
15
  #include <sstream>
17
16
  #include <vector>
18
17
 
19
- void compareBinaryDist(const std::vector<int>& cpuDist,
20
- const std::vector<faiss::IndexBinary::idx_t>& cpuLabels,
21
- const std::vector<int>& gpuDist,
22
- const std::vector<faiss::IndexBinary::idx_t>& gpuLabels,
23
- int numQuery,
24
- int k) {
25
- for (int i = 0; i < numQuery; ++i) {
26
- // The index order can be permuted within a group that has the same
27
- // distance, since this is based on the order in which the algorithm
28
- // encounters the values. The last set of equivalent distances seen in the
29
- // min-k might be truncated, so we can't check that set, but all others we
30
- // can check.
31
- std::set<faiss::IndexBinary::idx_t> cpuLabelSet;
32
- std::set<faiss::IndexBinary::idx_t> gpuLabelSet;
33
-
34
- int curDist = -1;
35
-
36
- for (int j = 0; j < k; ++j) {
37
- int idx = i * k + j;
38
-
39
- if (curDist == -1) {
40
- curDist = cpuDist[idx];
41
- }
42
-
43
- if (curDist != cpuDist[idx]) {
44
- // Distances must be monotonically increasing
45
- EXPECT_LT(curDist, cpuDist[idx]);
46
-
47
- // This is a new set of distances
48
- EXPECT_EQ(cpuLabelSet, gpuLabelSet);
49
- curDist = cpuDist[idx];
50
- cpuLabelSet.clear();
51
- gpuLabelSet.clear();
52
- }
53
-
54
- cpuLabelSet.insert(cpuLabels[idx]);
55
- gpuLabelSet.insert(gpuLabels[idx]);
56
-
57
- // Because the distances are reproducible, they must be exactly the same
58
- EXPECT_EQ(cpuDist[idx], gpuDist[idx]);
18
+ void compareBinaryDist(
19
+ const std::vector<int>& cpuDist,
20
+ const std::vector<faiss::IndexBinary::idx_t>& cpuLabels,
21
+ const std::vector<int>& gpuDist,
22
+ const std::vector<faiss::IndexBinary::idx_t>& gpuLabels,
23
+ int numQuery,
24
+ int k) {
25
+ for (int i = 0; i < numQuery; ++i) {
26
+ // The index order can be permuted within a group that has the same
27
+ // distance, since this is based on the order in which the algorithm
28
+ // encounters the values. The last set of equivalent distances seen in
29
+ // the min-k might be truncated, so we can't check that set, but all
30
+ // others we can check.
31
+ std::set<faiss::IndexBinary::idx_t> cpuLabelSet;
32
+ std::set<faiss::IndexBinary::idx_t> gpuLabelSet;
33
+
34
+ int curDist = -1;
35
+
36
+ for (int j = 0; j < k; ++j) {
37
+ int idx = i * k + j;
38
+
39
+ if (curDist == -1) {
40
+ curDist = cpuDist[idx];
41
+ }
42
+
43
+ if (curDist != cpuDist[idx]) {
44
+ // Distances must be monotonically increasing
45
+ EXPECT_LT(curDist, cpuDist[idx]);
46
+
47
+ // This is a new set of distances
48
+ EXPECT_EQ(cpuLabelSet, gpuLabelSet);
49
+ curDist = cpuDist[idx];
50
+ cpuLabelSet.clear();
51
+ gpuLabelSet.clear();
52
+ }
53
+
54
+ cpuLabelSet.insert(cpuLabels[idx]);
55
+ gpuLabelSet.insert(gpuLabels[idx]);
56
+
57
+ // Because the distances are reproducible, they must be exactly the
58
+ // same
59
+ EXPECT_EQ(cpuDist[idx], gpuDist[idx]);
60
+ }
59
61
  }
60
- }
61
62
  }
62
63
 
63
64
  template <int DimMultiple>
64
65
  void testGpuIndexBinaryFlat(int kOverride = -1) {
65
- faiss::gpu::StandardGpuResources res;
66
- res.noTempMemory();
66
+ faiss::gpu::StandardGpuResources res;
67
+ res.noTempMemory();
67
68
 
68
- faiss::gpu::GpuIndexBinaryFlatConfig config;
69
- config.device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
69
+ faiss::gpu::GpuIndexBinaryFlatConfig config;
70
+ config.device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
70
71
 
71
- // multiples of 8 and multiples of 32 use different implementations
72
- int dims = faiss::gpu::randVal(1, 20) * DimMultiple;
73
- faiss::gpu::GpuIndexBinaryFlat gpuIndex(&res, dims, config);
72
+ // multiples of 8 and multiples of 32 use different implementations
73
+ int dims = faiss::gpu::randVal(1, 20) * DimMultiple;
74
+ faiss::gpu::GpuIndexBinaryFlat gpuIndex(&res, dims, config);
74
75
 
75
- faiss::IndexBinaryFlat cpuIndex(dims);
76
+ faiss::IndexBinaryFlat cpuIndex(dims);
76
77
 
77
- int k = kOverride > 0 ?
78
- kOverride : faiss::gpu::randVal(1, faiss::gpu::getMaxKSelection());
79
- int numVecs = faiss::gpu::randVal(k + 1, 20000);
80
- int numQuery = faiss::gpu::randVal(1, 1000);
78
+ int k = kOverride > 0
79
+ ? kOverride
80
+ : faiss::gpu::randVal(1, faiss::gpu::getMaxKSelection());
81
+ int numVecs = faiss::gpu::randVal(k + 1, 20000);
82
+ int numQuery = faiss::gpu::randVal(1, 1000);
81
83
 
82
- auto data = faiss::gpu::randBinaryVecs(numVecs, dims);
83
- gpuIndex.add(numVecs, data.data());
84
- cpuIndex.add(numVecs, data.data());
84
+ auto data = faiss::gpu::randBinaryVecs(numVecs, dims);
85
+ gpuIndex.add(numVecs, data.data());
86
+ cpuIndex.add(numVecs, data.data());
85
87
 
86
- auto query = faiss::gpu::randBinaryVecs(numQuery, dims);
88
+ auto query = faiss::gpu::randBinaryVecs(numQuery, dims);
87
89
 
88
- std::vector<int> cpuDist(numQuery * k);
89
- std::vector<faiss::IndexBinary::idx_t> cpuLabels(numQuery * k);
90
+ std::vector<int> cpuDist(numQuery * k);
91
+ std::vector<faiss::IndexBinary::idx_t> cpuLabels(numQuery * k);
90
92
 
91
- cpuIndex.search(numQuery,
92
- query.data(),
93
- k,
94
- cpuDist.data(),
95
- cpuLabels.data());
93
+ cpuIndex.search(
94
+ numQuery, query.data(), k, cpuDist.data(), cpuLabels.data());
96
95
 
97
- std::vector<int> gpuDist(numQuery * k);
98
- std::vector<faiss::IndexBinary::idx_t> gpuLabels(numQuery * k);
96
+ std::vector<int> gpuDist(numQuery * k);
97
+ std::vector<faiss::IndexBinary::idx_t> gpuLabels(numQuery * k);
99
98
 
100
- gpuIndex.search(numQuery,
101
- query.data(),
102
- k,
103
- gpuDist.data(),
104
- gpuLabels.data());
99
+ gpuIndex.search(
100
+ numQuery, query.data(), k, gpuDist.data(), gpuLabels.data());
105
101
 
106
- compareBinaryDist(cpuDist, cpuLabels,
107
- gpuDist, gpuLabels,
108
- numQuery, k);
102
+ compareBinaryDist(cpuDist, cpuLabels, gpuDist, gpuLabels, numQuery, k);
109
103
  }
110
104
 
111
105
  TEST(TestGpuIndexBinaryFlat, Test8) {
112
- for (int tries = 0; tries < 4; ++tries) {
113
- testGpuIndexBinaryFlat<8>();
114
- }
106
+ for (int tries = 0; tries < 4; ++tries) {
107
+ testGpuIndexBinaryFlat<8>();
108
+ }
115
109
  }
116
110
 
117
111
  TEST(TestGpuIndexBinaryFlat, Test32) {
118
- for (int tries = 0; tries < 4; ++tries) {
119
- testGpuIndexBinaryFlat<32>();
120
- }
112
+ for (int tries = 0; tries < 4; ++tries) {
113
+ testGpuIndexBinaryFlat<32>();
114
+ }
121
115
  }
122
116
 
123
117
  int main(int argc, char** argv) {
124
- testing::InitGoogleTest(&argc, argv);
118
+ testing::InitGoogleTest(&argc, argv);
125
119
 
126
- // just run with a fixed test seed
127
- faiss::gpu::setTestSeed(100);
120
+ // just run with a fixed test seed
121
+ faiss::gpu::setTestSeed(100);
128
122
 
129
- return RUN_ALL_TESTS();
123
+ return RUN_ALL_TESTS();
130
124
  }
@@ -5,12 +5,11 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
8
  #include <faiss/IndexFlat.h>
10
9
  #include <faiss/gpu/GpuIndexFlat.h>
11
10
  #include <faiss/gpu/StandardGpuResources.h>
12
- #include <faiss/gpu/utils/DeviceUtils.h>
13
11
  #include <faiss/gpu/test/TestUtils.h>
12
+ #include <faiss/gpu/utils/DeviceUtils.h>
14
13
  #include <gtest/gtest.h>
15
14
  #include <sstream>
16
15
  #include <vector>
@@ -20,93 +19,109 @@ constexpr float kF16MaxRelErr = 0.07f;
20
19
  constexpr float kF32MaxRelErr = 6e-3f;
21
20
 
22
21
  struct TestFlatOptions {
23
- TestFlatOptions()
24
- : metric(faiss::MetricType::METRIC_L2),
25
- metricArg(0),
26
- useFloat16(false),
27
- useTransposed(false),
28
- numVecsOverride(-1),
29
- numQueriesOverride(-1),
30
- kOverride(-1),
31
- dimOverride(-1) {
32
- }
33
-
34
- faiss::MetricType metric;
35
- float metricArg;
36
-
37
- bool useFloat16;
38
- bool useTransposed;
39
- int numVecsOverride;
40
- int numQueriesOverride;
41
- int kOverride;
42
- int dimOverride;
22
+ TestFlatOptions()
23
+ : metric(faiss::MetricType::METRIC_L2),
24
+ metricArg(0),
25
+ useFloat16(false),
26
+ useTransposed(false),
27
+ numVecsOverride(-1),
28
+ numQueriesOverride(-1),
29
+ kOverride(-1),
30
+ dimOverride(-1) {}
31
+
32
+ faiss::MetricType metric;
33
+ float metricArg;
34
+
35
+ bool useFloat16;
36
+ bool useTransposed;
37
+ int numVecsOverride;
38
+ int numQueriesOverride;
39
+ int kOverride;
40
+ int dimOverride;
43
41
  };
44
42
 
45
43
  void testFlat(const TestFlatOptions& opt) {
46
- int numVecs = opt.numVecsOverride > 0 ?
47
- opt.numVecsOverride : faiss::gpu::randVal(1000, 5000);
48
- int dim = opt.dimOverride > 0 ?
49
- opt.dimOverride : faiss::gpu::randVal(50, 800);
50
- int numQuery = opt.numQueriesOverride > 0 ?
51
- opt.numQueriesOverride : faiss::gpu::randVal(1, 512);
52
-
53
- // Due to loss of precision in a float16 accumulator, for large k,
54
- // the number of differences is pretty huge. Restrict ourselves to a
55
- // fairly small `k` for float16
56
- int k = opt.useFloat16 ?
57
- std::min(faiss::gpu::randVal(1, 50), numVecs) :
58
- std::min(faiss::gpu::randVal(1, faiss::gpu::getMaxKSelection()), numVecs);
59
- if (opt.kOverride > 0) {
60
- k = opt.kOverride;
61
- }
62
-
63
- faiss::IndexFlat cpuIndex(dim, opt.metric);
64
- cpuIndex.metric_arg = opt.metricArg;
65
-
66
- // Construct on a random device to test multi-device, if we have
67
- // multiple devices
68
- int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
69
-
70
- faiss::gpu::StandardGpuResources res;
71
- res.noTempMemory();
72
-
73
- faiss::gpu::GpuIndexFlatConfig config;
74
- config.device = device;
75
- config.useFloat16 = opt.useFloat16;
76
- config.storeTransposed = opt.useTransposed;
77
-
78
- faiss::gpu::GpuIndexFlat gpuIndex(&res, dim, opt.metric, config);
79
- gpuIndex.metric_arg = opt.metricArg;
80
-
81
- std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
82
- cpuIndex.add(numVecs, vecs.data());
83
- gpuIndex.add(numVecs, vecs.data());
84
-
85
- std::stringstream str;
86
- str << "metric " << opt.metric
87
- << " marg " << opt.metricArg
88
- << " numVecs " << numVecs
89
- << " dim " << dim
90
- << " useFloat16 " << opt.useFloat16
91
- << " transposed " << opt.useTransposed
92
- << " numQuery " << numQuery
93
- << " k " << k;
94
-
95
- // To some extent, we depend upon the relative error for the test
96
- // for float16
97
- faiss::gpu::compareIndices(cpuIndex, gpuIndex, numQuery, dim, k, str.str(),
98
- opt.useFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
99
- // FIXME: the fp16 bounds are
100
- // useless when math (the accumulator) is
101
- // in fp16. Figure out another way to test
102
- opt.useFloat16 ? 0.99f : 0.1f,
103
- opt.useFloat16 ? 0.65f : 0.015f);
44
+ int numVecs = opt.numVecsOverride > 0 ? opt.numVecsOverride
45
+ : faiss::gpu::randVal(1000, 5000);
46
+ int dim = opt.dimOverride > 0 ? opt.dimOverride
47
+ : faiss::gpu::randVal(50, 800);
48
+ int numQuery = opt.numQueriesOverride > 0 ? opt.numQueriesOverride
49
+ : faiss::gpu::randVal(1, 512);
50
+
51
+ // Due to loss of precision in a float16 accumulator, for large k,
52
+ // the number of differences is pretty huge. Restrict ourselves to a
53
+ // fairly small `k` for float16
54
+ int k = opt.useFloat16
55
+ ? std::min(faiss::gpu::randVal(1, 50), numVecs)
56
+ : std::min(
57
+ faiss::gpu::randVal(1, faiss::gpu::getMaxKSelection()),
58
+ numVecs);
59
+ if (opt.kOverride > 0) {
60
+ k = opt.kOverride;
61
+ }
62
+
63
+ faiss::IndexFlat cpuIndex(dim, opt.metric);
64
+ cpuIndex.metric_arg = opt.metricArg;
65
+
66
+ // Construct on a random device to test multi-device, if we have
67
+ // multiple devices
68
+ int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
69
+
70
+ faiss::gpu::StandardGpuResources res;
71
+ res.noTempMemory();
72
+
73
+ faiss::gpu::GpuIndexFlatConfig config;
74
+ config.device = device;
75
+ config.useFloat16 = opt.useFloat16;
76
+ config.storeTransposed = opt.useTransposed;
77
+
78
+ faiss::gpu::GpuIndexFlat gpuIndex(&res, dim, opt.metric, config);
79
+ gpuIndex.metric_arg = opt.metricArg;
80
+
81
+ std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
82
+ cpuIndex.add(numVecs, vecs.data());
83
+ gpuIndex.add(numVecs, vecs.data());
84
+
85
+ std::stringstream str;
86
+ str << "metric " << opt.metric << " marg " << opt.metricArg << " numVecs "
87
+ << numVecs << " dim " << dim << " useFloat16 " << opt.useFloat16
88
+ << " transposed " << opt.useTransposed << " numQuery " << numQuery
89
+ << " k " << k;
90
+
91
+ // To some extent, we depend upon the relative error for the test
92
+ // for float16
93
+ faiss::gpu::compareIndices(
94
+ cpuIndex,
95
+ gpuIndex,
96
+ numQuery,
97
+ dim,
98
+ k,
99
+ str.str(),
100
+ opt.useFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
101
+ // FIXME: the fp16 bounds are
102
+ // useless when math (the accumulator) is
103
+ // in fp16. Figure out another way to test
104
+ opt.useFloat16 ? 0.99f : 0.1f,
105
+ opt.useFloat16 ? 0.65f : 0.015f);
104
106
  }
105
107
 
106
108
  TEST(TestGpuIndexFlat, IP_Float32) {
107
- for (int tries = 0; tries < 3; ++tries) {
109
+ for (int tries = 0; tries < 3; ++tries) {
110
+ TestFlatOptions opt;
111
+ opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
112
+ opt.useFloat16 = false;
113
+ opt.useTransposed = false;
114
+
115
+ testFlat(opt);
116
+
117
+ opt.useTransposed = true;
118
+ testFlat(opt);
119
+ }
120
+ }
121
+
122
+ TEST(TestGpuIndexFlat, L1_Float32) {
108
123
  TestFlatOptions opt;
109
- opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
124
+ opt.metric = faiss::MetricType::METRIC_L1;
110
125
  opt.useFloat16 = false;
111
126
  opt.useTransposed = false;
112
127
 
@@ -114,280 +129,272 @@ TEST(TestGpuIndexFlat, IP_Float32) {
114
129
 
115
130
  opt.useTransposed = true;
116
131
  testFlat(opt);
117
- }
118
- }
119
-
120
- TEST(TestGpuIndexFlat, L1_Float32) {
121
- TestFlatOptions opt;
122
- opt.metric = faiss::MetricType::METRIC_L1;
123
- opt.useFloat16 = false;
124
- opt.useTransposed = false;
125
-
126
- testFlat(opt);
127
-
128
- opt.useTransposed = true;
129
- testFlat(opt);
130
132
  }
131
133
 
132
134
  TEST(TestGpuIndexFlat, Lp_Float32) {
133
- TestFlatOptions opt;
134
- opt.metric = faiss::MetricType::METRIC_Lp;
135
- opt.metricArg = 5;
136
- opt.useFloat16 = false;
137
- opt.useTransposed = false;
135
+ TestFlatOptions opt;
136
+ opt.metric = faiss::MetricType::METRIC_Lp;
137
+ opt.metricArg = 5;
138
+ opt.useFloat16 = false;
139
+ opt.useTransposed = false;
138
140
 
139
- testFlat(opt);
141
+ testFlat(opt);
140
142
 
141
- // Don't bother testing the transposed version, the L1 test should be good
142
- // enough for that
143
+ // Don't bother testing the transposed version, the L1 test should be good
144
+ // enough for that
143
145
  }
144
146
 
145
147
  TEST(TestGpuIndexFlat, L2_Float32) {
146
- for (int tries = 0; tries < 3; ++tries) {
147
- TestFlatOptions opt;
148
- opt.metric = faiss::MetricType::METRIC_L2;
148
+ for (int tries = 0; tries < 3; ++tries) {
149
+ TestFlatOptions opt;
150
+ opt.metric = faiss::MetricType::METRIC_L2;
149
151
 
150
- opt.useFloat16 = false;
151
- opt.useTransposed = false;
152
+ opt.useFloat16 = false;
153
+ opt.useTransposed = false;
152
154
 
153
- testFlat(opt);
155
+ testFlat(opt);
154
156
 
155
- opt.useTransposed = true;
156
- testFlat(opt);
157
- }
157
+ opt.useTransposed = true;
158
+ testFlat(opt);
159
+ }
158
160
  }
159
161
 
160
162
  // test specialized k == 1 codepath
161
163
  TEST(TestGpuIndexFlat, L2_Float32_K1) {
162
- for (int tries = 0; tries < 3; ++tries) {
163
- TestFlatOptions opt;
164
- opt.metric = faiss::MetricType::METRIC_L2;
165
- opt.useFloat16 = false;
166
- opt.useTransposed = false;
167
- opt.kOverride = 1;
168
-
169
- testFlat(opt);
170
- }
164
+ for (int tries = 0; tries < 3; ++tries) {
165
+ TestFlatOptions opt;
166
+ opt.metric = faiss::MetricType::METRIC_L2;
167
+ opt.useFloat16 = false;
168
+ opt.useTransposed = false;
169
+ opt.kOverride = 1;
170
+
171
+ testFlat(opt);
172
+ }
171
173
  }
172
174
 
173
175
  TEST(TestGpuIndexFlat, IP_Float16) {
174
- for (int tries = 0; tries < 3; ++tries) {
175
- TestFlatOptions opt;
176
- opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
177
- opt.useFloat16 = true;
178
- opt.useTransposed = false;
176
+ for (int tries = 0; tries < 3; ++tries) {
177
+ TestFlatOptions opt;
178
+ opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
179
+ opt.useFloat16 = true;
180
+ opt.useTransposed = false;
179
181
 
180
- testFlat(opt);
182
+ testFlat(opt);
181
183
 
182
- opt.useTransposed = true;
183
- testFlat(opt);
184
- }
184
+ opt.useTransposed = true;
185
+ testFlat(opt);
186
+ }
185
187
  }
186
188
 
187
189
  TEST(TestGpuIndexFlat, L2_Float16) {
188
- for (int tries = 0; tries < 3; ++tries) {
189
- TestFlatOptions opt;
190
- opt.metric = faiss::MetricType::METRIC_L2;
191
- opt.useFloat16 = true;
192
- opt.useTransposed = false;
190
+ for (int tries = 0; tries < 3; ++tries) {
191
+ TestFlatOptions opt;
192
+ opt.metric = faiss::MetricType::METRIC_L2;
193
+ opt.useFloat16 = true;
194
+ opt.useTransposed = false;
193
195
 
194
- testFlat(opt);
196
+ testFlat(opt);
195
197
 
196
- opt.useTransposed = true;
197
- testFlat(opt);
198
- }
198
+ opt.useTransposed = true;
199
+ testFlat(opt);
200
+ }
199
201
  }
200
202
 
201
203
  // test specialized k == 1 codepath
202
204
  TEST(TestGpuIndexFlat, L2_Float16_K1) {
203
- for (int tries = 0; tries < 3; ++tries) {
204
- TestFlatOptions opt;
205
- opt.metric = faiss::MetricType::METRIC_L2;
206
- opt.useFloat16 = true;
207
- opt.useTransposed = false;
208
- opt.kOverride = 1;
209
-
210
- testFlat(opt);
211
- }
205
+ for (int tries = 0; tries < 3; ++tries) {
206
+ TestFlatOptions opt;
207
+ opt.metric = faiss::MetricType::METRIC_L2;
208
+ opt.useFloat16 = true;
209
+ opt.useTransposed = false;
210
+ opt.kOverride = 1;
211
+
212
+ testFlat(opt);
213
+ }
212
214
  }
213
215
 
214
216
  // test tiling along a huge vector set
215
217
  TEST(TestGpuIndexFlat, L2_Tiling) {
216
- for (int tries = 0; tries < 2; ++tries) {
217
- TestFlatOptions opt;
218
- opt.metric = faiss::MetricType::METRIC_L2;
219
- opt.useFloat16 = false;
220
- opt.useTransposed = false;
221
- opt.numVecsOverride = 1000000;
222
-
223
- // keep the rest of the problem reasonably small
224
- opt.numQueriesOverride = 4;
225
- opt.dimOverride = 64;
226
- opt.kOverride = 64;
227
-
228
- testFlat(opt);
229
- }
218
+ for (int tries = 0; tries < 2; ++tries) {
219
+ TestFlatOptions opt;
220
+ opt.metric = faiss::MetricType::METRIC_L2;
221
+ opt.useFloat16 = false;
222
+ opt.useTransposed = false;
223
+ opt.numVecsOverride = 1000000;
224
+
225
+ // keep the rest of the problem reasonably small
226
+ opt.numQueriesOverride = 4;
227
+ opt.dimOverride = 64;
228
+ opt.kOverride = 64;
229
+
230
+ testFlat(opt);
231
+ }
230
232
  }
231
233
 
232
234
  TEST(TestGpuIndexFlat, QueryEmpty) {
233
- faiss::gpu::StandardGpuResources res;
234
- res.noTempMemory();
235
+ faiss::gpu::StandardGpuResources res;
236
+ res.noTempMemory();
235
237
 
236
- faiss::gpu::GpuIndexFlatConfig config;
237
- config.device = 0;
238
- config.useFloat16 = false;
239
- config.storeTransposed = false;
238
+ faiss::gpu::GpuIndexFlatConfig config;
239
+ config.device = 0;
240
+ config.useFloat16 = false;
241
+ config.storeTransposed = false;
240
242
 
241
- int dim = 128;
242
- faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
243
+ int dim = 128;
244
+ faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
243
245
 
244
- // Querying an empty index should not blow up, and just return
245
- // (FLT_MAX, -1)
246
- int numQuery = 10;
247
- int k = 50;
248
- std::vector<float> queries(numQuery * dim, 1.0f);
246
+ // Querying an empty index should not blow up, and just return
247
+ // (FLT_MAX, -1)
248
+ int numQuery = 10;
249
+ int k = 50;
250
+ std::vector<float> queries(numQuery * dim, 1.0f);
249
251
 
250
- std::vector<float> dist(numQuery * k, 0);
251
- std::vector<faiss::Index::idx_t> ind(numQuery * k);
252
+ std::vector<float> dist(numQuery * k, 0);
253
+ std::vector<faiss::Index::idx_t> ind(numQuery * k);
252
254
 
253
- gpuIndex.search(numQuery, queries.data(), k, dist.data(), ind.data());
255
+ gpuIndex.search(numQuery, queries.data(), k, dist.data(), ind.data());
254
256
 
255
- for (auto d : dist) {
256
- EXPECT_EQ(d, std::numeric_limits<float>::max());
257
- }
257
+ for (auto d : dist) {
258
+ EXPECT_EQ(d, std::numeric_limits<float>::max());
259
+ }
258
260
 
259
- for (auto i : ind) {
260
- EXPECT_EQ(i, -1);
261
- }
261
+ for (auto i : ind) {
262
+ EXPECT_EQ(i, -1);
263
+ }
262
264
  }
263
265
 
264
266
  TEST(TestGpuIndexFlat, CopyFrom) {
265
- int numVecs = faiss::gpu::randVal(100, 200);
266
- int dim = faiss::gpu::randVal(1, 1000);
267
+ int numVecs = faiss::gpu::randVal(100, 200);
268
+ int dim = faiss::gpu::randVal(1, 1000);
267
269
 
268
- faiss::IndexFlatL2 cpuIndex(dim);
270
+ faiss::IndexFlatL2 cpuIndex(dim);
269
271
 
270
- std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
271
- cpuIndex.add(numVecs, vecs.data());
272
+ std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
273
+ cpuIndex.add(numVecs, vecs.data());
272
274
 
273
- faiss::gpu::StandardGpuResources res;
274
- res.noTempMemory();
275
+ faiss::gpu::StandardGpuResources res;
276
+ res.noTempMemory();
275
277
 
276
- // Fill with garbage values
277
- int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
278
+ // Fill with garbage values
279
+ int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
278
280
 
279
- faiss::gpu::GpuIndexFlatConfig config;
280
- config.device = device;
281
- config.useFloat16 = false;
282
- config.storeTransposed = false;
281
+ faiss::gpu::GpuIndexFlatConfig config;
282
+ config.device = device;
283
+ config.useFloat16 = false;
284
+ config.storeTransposed = false;
283
285
 
284
- faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, 2000, config);
285
- gpuIndex.copyFrom(&cpuIndex);
286
+ faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, 2000, config);
287
+ gpuIndex.copyFrom(&cpuIndex);
286
288
 
287
- EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
288
- EXPECT_EQ(gpuIndex.ntotal, numVecs);
289
+ EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
290
+ EXPECT_EQ(gpuIndex.ntotal, numVecs);
289
291
 
290
- EXPECT_EQ(cpuIndex.d, gpuIndex.d);
291
- EXPECT_EQ(cpuIndex.d, dim);
292
+ EXPECT_EQ(cpuIndex.d, gpuIndex.d);
293
+ EXPECT_EQ(cpuIndex.d, dim);
292
294
 
293
- int idx = faiss::gpu::randVal(0, numVecs - 1);
295
+ int idx = faiss::gpu::randVal(0, numVecs - 1);
294
296
 
295
- std::vector<float> gpuVals(dim);
296
- gpuIndex.reconstruct(idx, gpuVals.data());
297
+ std::vector<float> gpuVals(dim);
298
+ gpuIndex.reconstruct(idx, gpuVals.data());
297
299
 
298
- std::vector<float> cpuVals(dim);
299
- cpuIndex.reconstruct(idx, cpuVals.data());
300
+ std::vector<float> cpuVals(dim);
301
+ cpuIndex.reconstruct(idx, cpuVals.data());
300
302
 
301
- EXPECT_EQ(gpuVals, cpuVals);
303
+ EXPECT_EQ(gpuVals, cpuVals);
302
304
  }
303
305
 
304
306
  TEST(TestGpuIndexFlat, CopyTo) {
305
- faiss::gpu::StandardGpuResources res;
306
- res.noTempMemory();
307
+ faiss::gpu::StandardGpuResources res;
308
+ res.noTempMemory();
307
309
 
308
- int numVecs = faiss::gpu::randVal(100, 200);
309
- int dim = faiss::gpu::randVal(1, 1000);
310
+ int numVecs = faiss::gpu::randVal(100, 200);
311
+ int dim = faiss::gpu::randVal(1, 1000);
310
312
 
311
- int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
313
+ int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
312
314
 
313
- faiss::gpu::GpuIndexFlatConfig config;
314
- config.device = device;
315
- config.useFloat16 = false;
316
- config.storeTransposed = false;
315
+ faiss::gpu::GpuIndexFlatConfig config;
316
+ config.device = device;
317
+ config.useFloat16 = false;
318
+ config.storeTransposed = false;
317
319
 
318
- faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
320
+ faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
319
321
 
320
- std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
321
- gpuIndex.add(numVecs, vecs.data());
322
+ std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
323
+ gpuIndex.add(numVecs, vecs.data());
322
324
 
323
- // Fill with garbage values
324
- faiss::IndexFlatL2 cpuIndex(2000);
325
- gpuIndex.copyTo(&cpuIndex);
325
+ // Fill with garbage values
326
+ faiss::IndexFlatL2 cpuIndex(2000);
327
+ gpuIndex.copyTo(&cpuIndex);
326
328
 
327
- EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
328
- EXPECT_EQ(gpuIndex.ntotal, numVecs);
329
+ EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
330
+ EXPECT_EQ(gpuIndex.ntotal, numVecs);
329
331
 
330
- EXPECT_EQ(cpuIndex.d, gpuIndex.d);
331
- EXPECT_EQ(cpuIndex.d, dim);
332
+ EXPECT_EQ(cpuIndex.d, gpuIndex.d);
333
+ EXPECT_EQ(cpuIndex.d, dim);
332
334
 
333
- int idx = faiss::gpu::randVal(0, numVecs - 1);
335
+ int idx = faiss::gpu::randVal(0, numVecs - 1);
334
336
 
335
- std::vector<float> gpuVals(dim);
336
- gpuIndex.reconstruct(idx, gpuVals.data());
337
+ std::vector<float> gpuVals(dim);
338
+ gpuIndex.reconstruct(idx, gpuVals.data());
337
339
 
338
- std::vector<float> cpuVals(dim);
339
- cpuIndex.reconstruct(idx, cpuVals.data());
340
+ std::vector<float> cpuVals(dim);
341
+ cpuIndex.reconstruct(idx, cpuVals.data());
340
342
 
341
- EXPECT_EQ(gpuVals, cpuVals);
343
+ EXPECT_EQ(gpuVals, cpuVals);
342
344
  }
343
345
 
344
346
  TEST(TestGpuIndexFlat, UnifiedMemory) {
345
- // Construct on a random device to test multi-device, if we have
346
- // multiple devices
347
- int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
348
-
349
- if (!faiss::gpu::getFullUnifiedMemSupport(device)) {
350
- return;
351
- }
352
-
353
- int dim = 256;
354
-
355
- // FIXME: GpuIndexFlat doesn't support > 2^31 (vecs * dims) due to
356
- // kernel indexing, so we can't test unified memory for memory
357
- // oversubscription.
358
- size_t numVecs = 50000;
359
- int numQuery = 10;
360
- int k = 10;
361
-
362
- faiss::IndexFlatL2 cpuIndexL2(dim);
363
-
364
- faiss::gpu::StandardGpuResources res;
365
- res.noTempMemory();
366
-
367
- faiss::gpu::GpuIndexFlatConfig config;
368
- config.device = device;
369
- config.memorySpace = faiss::gpu::MemorySpace::Unified;
370
-
371
- faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);
372
-
373
- std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
374
- cpuIndexL2.add(numVecs, vecs.data());
375
- gpuIndexL2.add(numVecs, vecs.data());
376
-
377
- // To some extent, we depend upon the relative error for the test
378
- // for float16
379
- faiss::gpu::compareIndices(cpuIndexL2, gpuIndexL2,
380
- numQuery, dim, k, "Unified Memory",
381
- kF32MaxRelErr,
382
- 0.1f,
383
- 0.015f);
347
+ // Construct on a random device to test multi-device, if we have
348
+ // multiple devices
349
+ int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
350
+
351
+ if (!faiss::gpu::getFullUnifiedMemSupport(device)) {
352
+ return;
353
+ }
354
+
355
+ int dim = 256;
356
+
357
+ // FIXME: GpuIndexFlat doesn't support > 2^31 (vecs * dims) due to
358
+ // kernel indexing, so we can't test unified memory for memory
359
+ // oversubscription.
360
+ size_t numVecs = 50000;
361
+ int numQuery = 10;
362
+ int k = 10;
363
+
364
+ faiss::IndexFlatL2 cpuIndexL2(dim);
365
+
366
+ faiss::gpu::StandardGpuResources res;
367
+ res.noTempMemory();
368
+
369
+ faiss::gpu::GpuIndexFlatConfig config;
370
+ config.device = device;
371
+ config.memorySpace = faiss::gpu::MemorySpace::Unified;
372
+
373
+ faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);
374
+
375
+ std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
376
+ cpuIndexL2.add(numVecs, vecs.data());
377
+ gpuIndexL2.add(numVecs, vecs.data());
378
+
379
+ // To some extent, we depend upon the relative error for the test
380
+ // for float16
381
+ faiss::gpu::compareIndices(
382
+ cpuIndexL2,
383
+ gpuIndexL2,
384
+ numQuery,
385
+ dim,
386
+ k,
387
+ "Unified Memory",
388
+ kF32MaxRelErr,
389
+ 0.1f,
390
+ 0.015f);
384
391
  }
385
392
 
386
393
  int main(int argc, char** argv) {
387
- testing::InitGoogleTest(&argc, argv);
394
+ testing::InitGoogleTest(&argc, argv);
388
395
 
389
- // just run with a fixed test seed
390
- faiss::gpu::setTestSeed(100);
396
+ // just run with a fixed test seed
397
+ faiss::gpu::setTestSeed(100);
391
398
 
392
- return RUN_ALL_TESTS();
399
+ return RUN_ALL_TESTS();
393
400
  }