faiss 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/faiss/version.rb +1 -1
  4. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  5. data/vendor/faiss/faiss/AutoTune.h +55 -56
  6. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  7. data/vendor/faiss/faiss/Clustering.h +88 -35
  8. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  9. data/vendor/faiss/faiss/IVFlib.h +48 -51
  10. data/vendor/faiss/faiss/Index.cpp +85 -103
  11. data/vendor/faiss/faiss/Index.h +54 -48
  12. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  13. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  14. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  15. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  16. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  17. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  18. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  19. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  20. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  21. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  22. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  23. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  24. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  25. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  26. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  27. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  29. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  30. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  31. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  32. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  33. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  34. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  35. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  36. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  37. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  38. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  39. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  40. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  41. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  42. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  43. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  44. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  45. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  46. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  47. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  48. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  49. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  50. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  51. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  52. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  53. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  54. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  55. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  56. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  57. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  58. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  59. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  60. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  61. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  62. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  63. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  64. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  65. data/vendor/faiss/faiss/IndexShards.h +85 -73
  66. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  67. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  68. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  69. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  70. data/vendor/faiss/faiss/MetricType.h +7 -7
  71. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  72. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  73. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  74. data/vendor/faiss/faiss/clone_index.h +4 -9
  75. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  76. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  77. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  78. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  79. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  80. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  81. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  82. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  83. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  84. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  85. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  86. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  87. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  88. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  89. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  90. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  91. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  92. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  93. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  94. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  95. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  96. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  97. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  98. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  99. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  100. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  101. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  102. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  103. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  104. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  105. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  109. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  110. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  111. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  112. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  113. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  114. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  115. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  116. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  117. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  118. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  119. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  121. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  123. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  124. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  125. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  126. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  127. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  128. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  130. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  131. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  132. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  133. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  134. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  135. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  136. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  137. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  138. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  139. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  141. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  142. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  144. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  145. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  146. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  147. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  148. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  149. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  150. data/vendor/faiss/faiss/impl/io.h +31 -41
  151. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  152. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  153. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  154. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  155. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  156. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  157. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  158. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  159. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  160. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  161. data/vendor/faiss/faiss/index_factory.h +6 -7
  162. data/vendor/faiss/faiss/index_io.h +23 -26
  163. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  164. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  165. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  166. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  167. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  168. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  169. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  170. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  171. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  172. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  173. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  174. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  175. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  176. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  177. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  178. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  179. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  180. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  181. data/vendor/faiss/faiss/utils/distances.h +133 -118
  182. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  183. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  184. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  185. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  186. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  187. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  188. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  189. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  190. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  191. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  192. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  193. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  194. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  195. data/vendor/faiss/faiss/utils/random.h +13 -16
  196. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  197. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  198. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  199. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  200. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  201. data/vendor/faiss/faiss/utils/utils.h +53 -48
  202. metadata +20 -2
@@ -5,8 +5,6 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
-
10
8
  #include <cuda_profiler_api.h>
11
9
  #include <faiss/IndexFlat.h>
12
10
  #include <faiss/IndexIVFPQ.h>
@@ -32,108 +30,131 @@ DEFINE_bool(per_batch_time, false, "print per-batch times");
32
30
  DEFINE_bool(reserve_memory, false, "whether or not to pre-reserve memory");
33
31
 
34
32
  int main(int argc, char** argv) {
35
- gflags::ParseCommandLineFlags(&argc, &argv, true);
36
-
37
- cudaProfilerStop();
38
-
39
- int dim = FLAGS_dim;
40
- int numCentroids = FLAGS_centroids;
41
- int bytesPerVec = FLAGS_bytes_per_vec;
42
- int bitsPerCode = FLAGS_bits_per_code;
33
+ gflags::ParseCommandLineFlags(&argc, &argv, true);
43
34
 
44
- faiss::gpu::StandardGpuResources res;
35
+ cudaProfilerStop();
45
36
 
46
- // IndexIVFPQ will complain, but just give us enough to get through this
47
- int numTrain = 4 * numCentroids;
48
- std::vector<float> trainVecs = faiss::gpu::randVecs(numTrain, dim);
37
+ int dim = FLAGS_dim;
38
+ int numCentroids = FLAGS_centroids;
39
+ int bytesPerVec = FLAGS_bytes_per_vec;
40
+ int bitsPerCode = FLAGS_bits_per_code;
49
41
 
50
- faiss::IndexFlatL2 coarseQuantizer(dim);
51
- faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, dim, numCentroids,
52
- bytesPerVec, bitsPerCode);
53
- if (FLAGS_time_cpu) {
54
- cpuIndex.train(numTrain, trainVecs.data());
55
- }
42
+ faiss::gpu::StandardGpuResources res;
56
43
 
57
- faiss::gpu::GpuIndexIVFPQConfig config;
58
- config.device = 0;
59
- config.indicesOptions = (faiss::gpu::IndicesOptions) FLAGS_index;
44
+ // IndexIVFPQ will complain, but just give us enough to get through this
45
+ int numTrain = 4 * numCentroids;
46
+ std::vector<float> trainVecs = faiss::gpu::randVecs(numTrain, dim);
60
47
 
61
- faiss::gpu::GpuIndexIVFPQ gpuIndex(
62
- &res, dim, numCentroids, bytesPerVec, bitsPerCode,
63
- faiss::METRIC_L2, config);
64
-
65
- if (FLAGS_time_gpu) {
66
- gpuIndex.train(numTrain, trainVecs.data());
67
- if (FLAGS_reserve_memory) {
68
- size_t numVecs = (size_t) FLAGS_batches * (size_t) FLAGS_batch_size;
69
- gpuIndex.reserveMemory(numVecs);
48
+ faiss::IndexFlatL2 coarseQuantizer(dim);
49
+ faiss::IndexIVFPQ cpuIndex(
50
+ &coarseQuantizer, dim, numCentroids, bytesPerVec, bitsPerCode);
51
+ if (FLAGS_time_cpu) {
52
+ cpuIndex.train(numTrain, trainVecs.data());
70
53
  }
71
- }
72
54
 
73
- cudaDeviceSynchronize();
74
- CUDA_VERIFY(cudaProfilerStart());
55
+ faiss::gpu::GpuIndexIVFPQConfig config;
56
+ config.device = 0;
57
+ config.indicesOptions = (faiss::gpu::IndicesOptions)FLAGS_index;
75
58
 
76
- float totalGpuTime = 0.0f;
77
- float totalCpuTime = 0.0f;
59
+ faiss::gpu::GpuIndexIVFPQ gpuIndex(
60
+ &res,
61
+ dim,
62
+ numCentroids,
63
+ bytesPerVec,
64
+ bitsPerCode,
65
+ faiss::METRIC_L2,
66
+ config);
78
67
 
79
- for (int i = 0; i < FLAGS_batches; ++i) {
80
- if (!FLAGS_per_batch_time) {
81
- if (i % 10 == 0) {
82
- printf("Adding batch %d\n", i + 1);
83
- }
68
+ if (FLAGS_time_gpu) {
69
+ gpuIndex.train(numTrain, trainVecs.data());
70
+ if (FLAGS_reserve_memory) {
71
+ size_t numVecs = (size_t)FLAGS_batches * (size_t)FLAGS_batch_size;
72
+ gpuIndex.reserveMemory(numVecs);
73
+ }
84
74
  }
85
75
 
86
- auto addVecs = faiss::gpu::randVecs(FLAGS_batch_size, dim);
76
+ cudaDeviceSynchronize();
77
+ CUDA_VERIFY(cudaProfilerStart());
78
+
79
+ float totalGpuTime = 0.0f;
80
+ float totalCpuTime = 0.0f;
81
+
82
+ for (int i = 0; i < FLAGS_batches; ++i) {
83
+ if (!FLAGS_per_batch_time) {
84
+ if (i % 10 == 0) {
85
+ printf("Adding batch %d\n", i + 1);
86
+ }
87
+ }
88
+
89
+ auto addVecs = faiss::gpu::randVecs(FLAGS_batch_size, dim);
90
+
91
+ if (FLAGS_time_gpu) {
92
+ faiss::gpu::CpuTimer timer;
93
+ gpuIndex.add(FLAGS_batch_size, addVecs.data());
94
+ CUDA_VERIFY(cudaDeviceSynchronize());
95
+ auto time = timer.elapsedMilliseconds();
96
+
97
+ totalGpuTime += time;
98
+
99
+ if (FLAGS_per_batch_time) {
100
+ printf("Batch %d | GPU time to add %d vecs: %.3f ms (%.5f ms per)\n",
101
+ i + 1,
102
+ FLAGS_batch_size,
103
+ time,
104
+ time / (float)FLAGS_batch_size);
105
+ }
106
+ }
107
+
108
+ if (FLAGS_time_cpu) {
109
+ faiss::gpu::CpuTimer timer;
110
+ cpuIndex.add(FLAGS_batch_size, addVecs.data());
111
+ auto time = timer.elapsedMilliseconds();
112
+
113
+ totalCpuTime += time;
114
+
115
+ if (FLAGS_per_batch_time) {
116
+ printf("Batch %d | CPU time to add %d vecs: %.3f ms (%.5f ms per)\n",
117
+ i + 1,
118
+ FLAGS_batch_size,
119
+ time,
120
+ time / (float)FLAGS_batch_size);
121
+ }
122
+ }
123
+ }
87
124
 
88
- if (FLAGS_time_gpu) {
89
- faiss::gpu::CpuTimer timer;
90
- gpuIndex.add(FLAGS_batch_size, addVecs.data());
91
- CUDA_VERIFY(cudaDeviceSynchronize());
92
- auto time = timer.elapsedMilliseconds();
125
+ CUDA_VERIFY(cudaProfilerStop());
93
126
 
94
- totalGpuTime += time;
127
+ int total = FLAGS_batch_size * FLAGS_batches;
95
128
 
96
- if (FLAGS_per_batch_time) {
97
- printf("Batch %d | GPU time to add %d vecs: %.3f ms (%.5f ms per)\n",
98
- i + 1, FLAGS_batch_size, time, time / (float) FLAGS_batch_size);
99
- }
129
+ if (FLAGS_time_gpu) {
130
+ printf("%d dim, %d centroids, %d x %d encoding\n"
131
+ "GPU time to add %d vectors (%d batches, %d per batch): "
132
+ "%.3f ms (%.3f us per)\n",
133
+ dim,
134
+ numCentroids,
135
+ bytesPerVec,
136
+ bitsPerCode,
137
+ total,
138
+ FLAGS_batches,
139
+ FLAGS_batch_size,
140
+ totalGpuTime,
141
+ totalGpuTime * 1000.0f / (float)total);
100
142
  }
101
143
 
102
144
  if (FLAGS_time_cpu) {
103
- faiss::gpu::CpuTimer timer;
104
- cpuIndex.add(FLAGS_batch_size, addVecs.data());
105
- auto time = timer.elapsedMilliseconds();
106
-
107
- totalCpuTime += time;
108
-
109
- if (FLAGS_per_batch_time) {
110
- printf("Batch %d | CPU time to add %d vecs: %.3f ms (%.5f ms per)\n",
111
- i + 1, FLAGS_batch_size, time, time / (float) FLAGS_batch_size);
112
- }
145
+ printf("%d dim, %d centroids, %d x %d encoding\n"
146
+ "CPU time to add %d vectors (%d batches, %d per batch): "
147
+ "%.3f ms (%.3f us per)\n",
148
+ dim,
149
+ numCentroids,
150
+ bytesPerVec,
151
+ bitsPerCode,
152
+ total,
153
+ FLAGS_batches,
154
+ FLAGS_batch_size,
155
+ totalCpuTime,
156
+ totalCpuTime * 1000.0f / (float)total);
113
157
  }
114
- }
115
-
116
- CUDA_VERIFY(cudaProfilerStop());
117
-
118
- int total = FLAGS_batch_size * FLAGS_batches;
119
-
120
- if (FLAGS_time_gpu) {
121
- printf("%d dim, %d centroids, %d x %d encoding\n"
122
- "GPU time to add %d vectors (%d batches, %d per batch): "
123
- "%.3f ms (%.3f us per)\n",
124
- dim, numCentroids, bytesPerVec, bitsPerCode,
125
- total, FLAGS_batches, FLAGS_batch_size,
126
- totalGpuTime, totalGpuTime * 1000.0f / (float) total);
127
- }
128
-
129
- if (FLAGS_time_cpu) {
130
- printf("%d dim, %d centroids, %d x %d encoding\n"
131
- "CPU time to add %d vectors (%d batches, %d per batch): "
132
- "%.3f ms (%.3f us per)\n",
133
- dim, numCentroids, bytesPerVec, bitsPerCode,
134
- total, FLAGS_batches, FLAGS_batch_size,
135
- totalCpuTime, totalCpuTime * 1000.0f / (float) total);
136
- }
137
-
138
- return 0;
158
+
159
+ return 0;
139
160
  }
@@ -5,14 +5,13 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
8
+ #include <faiss/IndexFlat.h>
9
9
  #include <faiss/IndexIVFFlat.h>
10
10
  #include <faiss/IndexIVFPQ.h>
11
- #include <faiss/IndexFlat.h>
12
- #include <faiss/index_io.h>
13
11
  #include <faiss/gpu/test/TestUtils.h>
14
- #include <vector>
12
+ #include <faiss/index_io.h>
15
13
  #include <gflags/gflags.h>
14
+ #include <vector>
16
15
 
17
16
  // For IVFPQ:
18
17
  DEFINE_bool(ivfpq, false, "use IVFPQ encoding");
@@ -32,71 +31,83 @@ DEFINE_int32(num_train, -1, "number of database vecs to train on");
32
31
 
33
32
  template <typename T>
34
33
  void fillAndSave(T& index, int numTrain, int num, int dim) {
35
- auto trainVecs = faiss::gpu::randVecs(numTrain, dim);
36
- index.train(numTrain, trainVecs.data());
34
+ auto trainVecs = faiss::gpu::randVecs(numTrain, dim);
35
+ index.train(numTrain, trainVecs.data());
37
36
 
38
- constexpr int kAddChunk = 1000000;
37
+ constexpr int kAddChunk = 1000000;
39
38
 
40
- for (int i = 0; i < num; i += kAddChunk) {
41
- int numRemaining = (num - i) < kAddChunk ? (num - i) : kAddChunk;
42
- auto vecs = faiss::gpu::randVecs(numRemaining, dim);
39
+ for (int i = 0; i < num; i += kAddChunk) {
40
+ int numRemaining = (num - i) < kAddChunk ? (num - i) : kAddChunk;
41
+ auto vecs = faiss::gpu::randVecs(numRemaining, dim);
43
42
 
44
- printf("adding at %d: %d\n", i, numRemaining);
45
- index.add(numRemaining, vecs.data());
46
- }
43
+ printf("adding at %d: %d\n", i, numRemaining);
44
+ index.add(numRemaining, vecs.data());
45
+ }
47
46
 
48
- faiss::write_index(&index, FLAGS_out.c_str());
47
+ faiss::write_index(&index, FLAGS_out.c_str());
49
48
  }
50
49
 
51
50
  int main(int argc, char** argv) {
52
- gflags::ParseCommandLineFlags(&argc, &argv, true);
53
-
54
- // Either ivfpq or ivfflat must be set
55
- if ((FLAGS_ivfpq && FLAGS_ivfflat) ||
56
- (!FLAGS_ivfpq && !FLAGS_ivfflat)) {
57
- printf("must specify either ivfpq or ivfflat\n");
58
- return 1;
59
- }
60
-
61
- auto dim = FLAGS_dim;
62
- auto numCentroids = FLAGS_num_coarse;
63
- auto num = FLAGS_num;
64
- auto numTrain = FLAGS_num_train;
65
- numTrain = numTrain == -1 ? std::max((num / 4), 1) : numTrain;
66
- numTrain = std::min(num, numTrain);
67
-
68
- if (FLAGS_ivfpq) {
69
- faiss::IndexFlatL2 quantizer(dim);
70
- faiss::IndexIVFPQ index(&quantizer, dim, numCentroids,
71
- FLAGS_codes, FLAGS_bits_per_code);
72
- index.verbose = true;
73
-
74
- printf("IVFPQ: codes %d bits per code %d\n",
75
- FLAGS_codes, FLAGS_bits_per_code);
76
- printf("Lists: %d\n", numCentroids);
77
- printf("Database: dim %d num vecs %d trained on %d\n", dim, num, numTrain);
78
- printf("output file: %s\n", FLAGS_out.c_str());
79
-
80
- fillAndSave(index, numTrain, num, dim);
81
- } else if (FLAGS_ivfflat) {
82
- faiss::IndexFlatL2 quantizerL2(dim);
83
- faiss::IndexFlatIP quantizerIP(dim);
84
-
85
- faiss::IndexFlat* quantizer = FLAGS_l2 ?
86
- (faiss::IndexFlat*) &quantizerL2 :
87
- (faiss::IndexFlat*) &quantizerIP;
88
-
89
- faiss::IndexIVFFlat index(quantizer, dim, numCentroids,
90
- FLAGS_l2 ? faiss::METRIC_L2 :
91
- faiss::METRIC_INNER_PRODUCT);
92
-
93
- printf("IVFFlat: metric %s\n", FLAGS_l2 ? "L2" : "IP");
94
- printf("Lists: %d\n", numCentroids);
95
- printf("Database: dim %d num vecs %d trained on %d\n", dim, num, numTrain);
96
- printf("output file: %s\n", FLAGS_out.c_str());
97
-
98
- fillAndSave(index, numTrain, num, dim);
99
- }
100
-
101
- return 0;
51
+ gflags::ParseCommandLineFlags(&argc, &argv, true);
52
+
53
+ // Either ivfpq or ivfflat must be set
54
+ if ((FLAGS_ivfpq && FLAGS_ivfflat) || (!FLAGS_ivfpq && !FLAGS_ivfflat)) {
55
+ printf("must specify either ivfpq or ivfflat\n");
56
+ return 1;
57
+ }
58
+
59
+ auto dim = FLAGS_dim;
60
+ auto numCentroids = FLAGS_num_coarse;
61
+ auto num = FLAGS_num;
62
+ auto numTrain = FLAGS_num_train;
63
+ numTrain = numTrain == -1 ? std::max((num / 4), 1) : numTrain;
64
+ numTrain = std::min(num, numTrain);
65
+
66
+ if (FLAGS_ivfpq) {
67
+ faiss::IndexFlatL2 quantizer(dim);
68
+ faiss::IndexIVFPQ index(
69
+ &quantizer,
70
+ dim,
71
+ numCentroids,
72
+ FLAGS_codes,
73
+ FLAGS_bits_per_code);
74
+ index.verbose = true;
75
+
76
+ printf("IVFPQ: codes %d bits per code %d\n",
77
+ FLAGS_codes,
78
+ FLAGS_bits_per_code);
79
+ printf("Lists: %d\n", numCentroids);
80
+ printf("Database: dim %d num vecs %d trained on %d\n",
81
+ dim,
82
+ num,
83
+ numTrain);
84
+ printf("output file: %s\n", FLAGS_out.c_str());
85
+
86
+ fillAndSave(index, numTrain, num, dim);
87
+ } else if (FLAGS_ivfflat) {
88
+ faiss::IndexFlatL2 quantizerL2(dim);
89
+ faiss::IndexFlatIP quantizerIP(dim);
90
+
91
+ faiss::IndexFlat* quantizer = FLAGS_l2
92
+ ? (faiss::IndexFlat*)&quantizerL2
93
+ : (faiss::IndexFlat*)&quantizerIP;
94
+
95
+ faiss::IndexIVFFlat index(
96
+ quantizer,
97
+ dim,
98
+ numCentroids,
99
+ FLAGS_l2 ? faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT);
100
+
101
+ printf("IVFFlat: metric %s\n", FLAGS_l2 ? "L2" : "IP");
102
+ printf("Lists: %d\n", numCentroids);
103
+ printf("Database: dim %d num vecs %d trained on %d\n",
104
+ dim,
105
+ num,
106
+ numTrain);
107
+ printf("output file: %s\n", FLAGS_out.c_str());
108
+
109
+ fillAndSave(index, numTrain, num, dim);
110
+ }
111
+
112
+ return 0;
102
113
  }
@@ -5,270 +5,285 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
8
  #include <faiss/gpu/impl/InterleavedCodes.h>
10
- #include <faiss/gpu/utils/StaticUtils.h>
11
9
  #include <faiss/gpu/test/TestUtils.h>
12
- #include <cmath>
10
+ #include <faiss/gpu/utils/StaticUtils.h>
13
11
  #include <gtest/gtest.h>
12
+ #include <cmath>
14
13
  #include <random>
15
14
  #include <sstream>
16
15
  #include <vector>
17
16
 
18
17
  TEST(TestCodePacking, NonInterleavedCodes_UnpackPack) {
19
- using namespace faiss::gpu;
18
+ using namespace faiss::gpu;
20
19
 
21
- // We are fine using non-fixed seeds here, the results should be fully
22
- // deterministic
23
- auto seed = std::random_device()();
24
- std::mt19937 gen(seed);
25
- std::uniform_int_distribution<uint8_t> dist;
20
+ // We are fine using non-fixed seeds here, the results should be fully
21
+ // deterministic
22
+ auto seed = std::random_device()();
23
+ std::mt19937 gen(seed);
24
+ std::uniform_int_distribution<uint8_t> dist;
26
25
 
27
- std::cout << "seed " << seed << "\n";
26
+ std::cout << "seed " << seed << "\n";
28
27
 
29
- for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
30
- for (auto dims : {1, 7, 8, 31, 32}) {
31
- for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
32
- std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
28
+ for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
29
+ for (auto dims : {1, 7, 8, 31, 32}) {
30
+ for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
31
+ std::cout << bitsPerCode << " " << dims << " " << numVecs
32
+ << "\n";
33
33
 
34
- int srcVecSize = utils::divUp(dims * bitsPerCode, 8);
35
- std::vector<uint8_t> data(numVecs * srcVecSize);
34
+ int srcVecSize = utils::divUp(dims * bitsPerCode, 8);
35
+ std::vector<uint8_t> data(numVecs * srcVecSize);
36
36
 
37
- for (auto& v : data) {
38
- v = dist(gen);
39
- }
37
+ for (auto& v : data) {
38
+ v = dist(gen);
39
+ }
40
40
 
41
- // currently unimplemented
42
- EXPECT_FALSE(bitsPerCode > 8 && bitsPerCode % 8 != 0);
41
+ // currently unimplemented
42
+ EXPECT_FALSE(bitsPerCode > 8 && bitsPerCode % 8 != 0);
43
43
 
44
- // Due to bit packing, mask out bits that should be zero based on
45
- // dimensions we shouldn't have present
46
- int vectorSizeBits = dims * bitsPerCode;
47
- int vectorSizeBytes = utils::divUp(vectorSizeBits, 8);
48
- int remainder = vectorSizeBits % 8;
44
+ // Due to bit packing, mask out bits that should be zero based
45
+ // on dimensions we shouldn't have present
46
+ int vectorSizeBits = dims * bitsPerCode;
47
+ int vectorSizeBytes = utils::divUp(vectorSizeBits, 8);
48
+ int remainder = vectorSizeBits % 8;
49
49
 
50
- if (remainder > 0) {
51
- uint8_t mask = 0xff >> (8 - remainder);
50
+ if (remainder > 0) {
51
+ uint8_t mask = 0xff >> (8 - remainder);
52
52
 
53
- for (int i = 0; i < numVecs; ++i) {
54
- int lastVecByte = (i + 1) * vectorSizeBytes - 1;
55
- data[lastVecByte] &= mask;
56
- }
57
- }
53
+ for (int i = 0; i < numVecs; ++i) {
54
+ int lastVecByte = (i + 1) * vectorSizeBytes - 1;
55
+ data[lastVecByte] &= mask;
56
+ }
57
+ }
58
58
 
59
- auto up = unpackNonInterleaved(data, numVecs, dims, bitsPerCode);
60
- auto p = packNonInterleaved(up, numVecs, dims, bitsPerCode);
59
+ auto up =
60
+ unpackNonInterleaved(data, numVecs, dims, bitsPerCode);
61
+ auto p = packNonInterleaved(up, numVecs, dims, bitsPerCode);
61
62
 
62
- EXPECT_EQ(data, p);
63
- }
63
+ EXPECT_EQ(data, p);
64
+ }
65
+ }
64
66
  }
65
- }
66
67
  }
67
68
 
68
69
  TEST(TestCodePacking, NonInterleavedCodes_PackUnpack) {
69
- using namespace faiss::gpu;
70
+ using namespace faiss::gpu;
70
71
 
71
- // We are fine using non-fixed seeds here, the results should be fully
72
- // deterministic
73
- std::random_device rd;
74
- std::mt19937 gen(rd());
75
- std::uniform_int_distribution<uint8_t> dist;
72
+ // We are fine using non-fixed seeds here, the results should be fully
73
+ // deterministic
74
+ std::random_device rd;
75
+ std::mt19937 gen(rd());
76
+ std::uniform_int_distribution<uint8_t> dist;
76
77
 
77
- for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
78
- for (auto dims : {1, 7, 8, 31, 32}) {
79
- for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
80
- std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
78
+ for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
79
+ for (auto dims : {1, 7, 8, 31, 32}) {
80
+ for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
81
+ std::cout << bitsPerCode << " " << dims << " " << numVecs
82
+ << "\n";
81
83
 
82
- std::vector<uint8_t> data(numVecs * dims * utils::divUp(bitsPerCode, 8));
84
+ std::vector<uint8_t> data(
85
+ numVecs * dims * utils::divUp(bitsPerCode, 8));
83
86
 
84
- // currently unimplemented
85
- EXPECT_FALSE(bitsPerCode > 8 && bitsPerCode % 8 != 0);
87
+ // currently unimplemented
88
+ EXPECT_FALSE(bitsPerCode > 8 && bitsPerCode % 8 != 0);
86
89
 
87
- // Mask out high bits we shouldn't have based on code size
88
- uint8_t mask = bitsPerCode < 8 ? (0xff >> (8 - bitsPerCode)) : 0xff;
90
+ // Mask out high bits we shouldn't have based on code size
91
+ uint8_t mask =
92
+ bitsPerCode < 8 ? (0xff >> (8 - bitsPerCode)) : 0xff;
89
93
 
90
- for (auto& v : data) {
91
- v = dist(gen) & mask;
92
- }
94
+ for (auto& v : data) {
95
+ v = dist(gen) & mask;
96
+ }
93
97
 
94
- auto p = packNonInterleaved(data, numVecs, dims, bitsPerCode);
95
- auto up = unpackNonInterleaved(p, numVecs, dims, bitsPerCode);
98
+ auto p = packNonInterleaved(data, numVecs, dims, bitsPerCode);
99
+ auto up = unpackNonInterleaved(p, numVecs, dims, bitsPerCode);
96
100
 
97
- EXPECT_EQ(data, up);
98
- }
101
+ EXPECT_EQ(data, up);
102
+ }
103
+ }
99
104
  }
100
- }
101
105
  }
102
106
 
103
107
  TEST(TestCodePacking, InterleavedCodes_UnpackPack) {
104
- using namespace faiss::gpu;
105
-
106
- // We are fine using non-fixed seeds here, the results should be fully
107
- // deterministic
108
- std::random_device rd;
109
- std::mt19937 gen(rd());
110
- std::uniform_int_distribution<uint8_t> dist;
111
-
112
- for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
113
- for (auto dims : {1, 7, 8, 31, 32}) {
114
- for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
115
- std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
116
-
117
- int blocks = utils::divUp(numVecs, 32);
118
- int bytesPerDimBlock = 32 * bitsPerCode / 8;
119
- int bytesPerBlock = bytesPerDimBlock * dims;
120
- int size = blocks * bytesPerBlock;
121
-
122
- std::vector<uint8_t> data(size);
123
-
124
- if (bitsPerCode == 8 || bitsPerCode == 16 || bitsPerCode == 32) {
125
- int bytesPerCode = bitsPerCode / 8;
126
-
127
- for (int i = 0; i < blocks; ++i) {
128
- for (int j = 0; j < dims; ++j) {
129
- for (int k = 0; k < 32; ++k) {
130
- for (int l = 0; l < bytesPerCode; ++l) {
131
- int vec = i * 32 + k;
132
- if (vec < numVecs) {
133
- data[i * bytesPerBlock +
134
- j * bytesPerDimBlock +
135
- k * bytesPerCode + l] = dist(gen);
136
- }
137
- }
138
- }
139
- }
140
- }
141
- } else if (bitsPerCode < 8) {
142
- for (int i = 0; i < blocks; ++i) {
143
- for (int j = 0; j < dims; ++j) {
144
- for (int k = 0; k < bytesPerDimBlock; ++k) {
145
- int loVec = i * 32 + (k * 8) / bitsPerCode;
146
- int hiVec = loVec + 1;
147
- int hiVec2 = hiVec + 1;
148
-
149
- uint8_t lo = loVec < numVecs ?
150
- dist(gen) & (0xff >> (8 - bitsPerCode)) : 0;
151
- uint8_t hi = hiVec < numVecs ?
152
- dist(gen) & (0xff >> (8 - bitsPerCode)) : 0;
153
- uint8_t hi2 = hiVec2 < numVecs ?
154
- dist(gen) & (0xff >> (8 - bitsPerCode)) : 0;
155
-
156
- uint8_t v = 0;
157
- if (bitsPerCode == 4) {
158
- v = lo | (hi << 4);
159
- } else if (bitsPerCode == 5) {
160
- switch (k % 5) {
161
- case 0:
162
- // 5 msbs of lower as vOut lsbs
163
- // 3 lsbs of upper as vOut msbs
164
- v = (lo & 0x1f) | (hi << 5);
165
- break;
166
- case 1:
167
- // 2 msbs of lower as vOut lsbs
168
- // 5 lsbs of upper as vOut msbs
169
- // 1 lsbs of upper2 as vOut msb
170
- v = (lo >> 3) | (hi << 2) | (hi2 << 7);
171
- break;
172
- case 2:
173
- // 4 msbs of lower as vOut lsbs
174
- // 4 lsbs of upper as vOut msbs
175
- v = (lo >> 1) | (hi << 4);
176
- break;
177
- case 3:
178
- // 1 msbs of lower as vOut lsbs
179
- // 5 lsbs of upper as vOut msbs
180
- // 2 lsbs of upper2 as vOut msb
181
- v = (lo >> 4) | (hi << 1) | (hi2 << 6);
182
- break;
183
- case 4:
184
- // 3 msbs of lower as vOut lsbs
185
- // 5 lsbs of upper as vOut msbs
186
- v = (lo >> 2) | (hi << 3);
187
- break;
188
- }
189
- } else if (bitsPerCode == 6) {
190
- switch (k % 3) {
191
- case 0:
192
- // 6 msbs of lower as vOut lsbs
193
- // 2 lsbs of upper as vOut msbs
194
- v = (lo & 0x3f) | (hi << 6);
195
- break;
196
- case 1:
197
- // 4 msbs of lower as vOut lsbs
198
- // 4 lsbs of upper as vOut msbs
199
- v = (lo >> 2) | (hi << 4);
200
- break;
201
- case 2:
202
- // 2 msbs of lower as vOut lsbs
203
- // 6 lsbs of upper as vOut msbs
204
- v = (lo >> 4) | (hi << 2);
205
- break;
206
- }
108
+ using namespace faiss::gpu;
109
+
110
+ // We are fine using non-fixed seeds here, the results should be fully
111
+ // deterministic
112
+ std::random_device rd;
113
+ std::mt19937 gen(rd());
114
+ std::uniform_int_distribution<uint8_t> dist;
115
+
116
+ for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
117
+ for (auto dims : {1, 7, 8, 31, 32}) {
118
+ for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
119
+ std::cout << bitsPerCode << " " << dims << " " << numVecs
120
+ << "\n";
121
+
122
+ int blocks = utils::divUp(numVecs, 32);
123
+ int bytesPerDimBlock = 32 * bitsPerCode / 8;
124
+ int bytesPerBlock = bytesPerDimBlock * dims;
125
+ int size = blocks * bytesPerBlock;
126
+
127
+ std::vector<uint8_t> data(size);
128
+
129
+ if (bitsPerCode == 8 || bitsPerCode == 16 ||
130
+ bitsPerCode == 32) {
131
+ int bytesPerCode = bitsPerCode / 8;
132
+
133
+ for (int i = 0; i < blocks; ++i) {
134
+ for (int j = 0; j < dims; ++j) {
135
+ for (int k = 0; k < 32; ++k) {
136
+ for (int l = 0; l < bytesPerCode; ++l) {
137
+ int vec = i * 32 + k;
138
+ if (vec < numVecs) {
139
+ data[i * bytesPerBlock +
140
+ j * bytesPerDimBlock +
141
+ k * bytesPerCode + l] = dist(gen);
142
+ }
143
+ }
144
+ }
145
+ }
146
+ }
147
+ } else if (bitsPerCode < 8) {
148
+ for (int i = 0; i < blocks; ++i) {
149
+ for (int j = 0; j < dims; ++j) {
150
+ for (int k = 0; k < bytesPerDimBlock; ++k) {
151
+ int loVec = i * 32 + (k * 8) / bitsPerCode;
152
+ int hiVec = loVec + 1;
153
+ int hiVec2 = hiVec + 1;
154
+
155
+ uint8_t lo = loVec < numVecs ? dist(gen) &
156
+ (0xff >> (8 - bitsPerCode))
157
+ : 0;
158
+ uint8_t hi = hiVec < numVecs ? dist(gen) &
159
+ (0xff >> (8 - bitsPerCode))
160
+ : 0;
161
+ uint8_t hi2 = hiVec2 < numVecs ? dist(gen) &
162
+ (0xff >> (8 - bitsPerCode))
163
+ : 0;
164
+
165
+ uint8_t v = 0;
166
+ if (bitsPerCode == 4) {
167
+ v = lo | (hi << 4);
168
+ } else if (bitsPerCode == 5) {
169
+ switch (k % 5) {
170
+ case 0:
171
+ // 5 msbs of lower as vOut lsbs
172
+ // 3 lsbs of upper as vOut msbs
173
+ v = (lo & 0x1f) | (hi << 5);
174
+ break;
175
+ case 1:
176
+ // 2 msbs of lower as vOut lsbs
177
+ // 5 lsbs of upper as vOut msbs
178
+ // 1 lsbs of upper2 as vOut msb
179
+ v = (lo >> 3) | (hi << 2) |
180
+ (hi2 << 7);
181
+ break;
182
+ case 2:
183
+ // 4 msbs of lower as vOut lsbs
184
+ // 4 lsbs of upper as vOut msbs
185
+ v = (lo >> 1) | (hi << 4);
186
+ break;
187
+ case 3:
188
+ // 1 msbs of lower as vOut lsbs
189
+ // 5 lsbs of upper as vOut msbs
190
+ // 2 lsbs of upper2 as vOut msb
191
+ v = (lo >> 4) | (hi << 1) |
192
+ (hi2 << 6);
193
+ break;
194
+ case 4:
195
+ // 3 msbs of lower as vOut lsbs
196
+ // 5 lsbs of upper as vOut msbs
197
+ v = (lo >> 2) | (hi << 3);
198
+ break;
199
+ }
200
+ } else if (bitsPerCode == 6) {
201
+ switch (k % 3) {
202
+ case 0:
203
+ // 6 msbs of lower as vOut lsbs
204
+ // 2 lsbs of upper as vOut msbs
205
+ v = (lo & 0x3f) | (hi << 6);
206
+ break;
207
+ case 1:
208
+ // 4 msbs of lower as vOut lsbs
209
+ // 4 lsbs of upper as vOut msbs
210
+ v = (lo >> 2) | (hi << 4);
211
+ break;
212
+ case 2:
213
+ // 2 msbs of lower as vOut lsbs
214
+ // 6 lsbs of upper as vOut msbs
215
+ v = (lo >> 4) | (hi << 2);
216
+ break;
217
+ }
218
+ } else {
219
+ // unimplemented
220
+ EXPECT_TRUE(false);
221
+ }
222
+
223
+ data[i * bytesPerBlock + j * bytesPerDimBlock +
224
+ k] = v;
225
+ }
226
+ }
227
+ }
207
228
  } else {
208
- // unimplemented
209
- EXPECT_TRUE(false);
229
+ // unimplemented
230
+ EXPECT_TRUE(false);
210
231
  }
211
232
 
212
- data[i * bytesPerBlock + j * bytesPerDimBlock + k] = v;
213
- }
233
+ auto up = unpackInterleaved(data, numVecs, dims, bitsPerCode);
234
+ auto p = packInterleaved(up, numVecs, dims, bitsPerCode);
235
+
236
+ EXPECT_EQ(data, p);
214
237
  }
215
- }
216
- } else {
217
- // unimplemented
218
- EXPECT_TRUE(false);
219
238
  }
220
-
221
- auto up = unpackInterleaved(data, numVecs, dims, bitsPerCode);
222
- auto p = packInterleaved(up, numVecs, dims, bitsPerCode);
223
-
224
- EXPECT_EQ(data, p);
225
- }
226
239
  }
227
- }
228
240
  }
229
241
 
230
242
  TEST(TestCodePacking, InterleavedCodes_PackUnpack) {
231
- using namespace faiss::gpu;
232
-
233
- // We are fine using non-fixed seeds here, the results should be fully
234
- // deterministic
235
- std::random_device rd;
236
- std::mt19937 gen(rd());
237
- std::uniform_int_distribution<uint8_t> dist;
238
-
239
- for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
240
- for (auto dims : {1, 7, 8, 31, 32}) {
241
- for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
242
- std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
243
-
244
- std::vector<uint8_t> data(numVecs * dims * utils::divUp(bitsPerCode, 8));
245
-
246
- if (bitsPerCode == 8 || bitsPerCode == 16 || bitsPerCode == 32) {
247
- for (auto& v : data) {
248
- v = dist(gen);
249
- }
250
- } else if (bitsPerCode < 8) {
251
- uint8_t mask = 0xff >> (8 - bitsPerCode);
252
-
253
- for (auto& v : data) {
254
- v = dist(gen) & mask;
255
- }
256
- } else {
257
- // unimplemented
258
- EXPECT_TRUE(false);
259
- }
243
+ using namespace faiss::gpu;
244
+
245
+ // We are fine using non-fixed seeds here, the results should be fully
246
+ // deterministic
247
+ std::random_device rd;
248
+ std::mt19937 gen(rd());
249
+ std::uniform_int_distribution<uint8_t> dist;
250
+
251
+ for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
252
+ for (auto dims : {1, 7, 8, 31, 32}) {
253
+ for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
254
+ std::cout << bitsPerCode << " " << dims << " " << numVecs
255
+ << "\n";
256
+
257
+ std::vector<uint8_t> data(
258
+ numVecs * dims * utils::divUp(bitsPerCode, 8));
259
+
260
+ if (bitsPerCode == 8 || bitsPerCode == 16 ||
261
+ bitsPerCode == 32) {
262
+ for (auto& v : data) {
263
+ v = dist(gen);
264
+ }
265
+ } else if (bitsPerCode < 8) {
266
+ uint8_t mask = 0xff >> (8 - bitsPerCode);
267
+
268
+ for (auto& v : data) {
269
+ v = dist(gen) & mask;
270
+ }
271
+ } else {
272
+ // unimplemented
273
+ EXPECT_TRUE(false);
274
+ }
260
275
 
261
- auto p = packInterleaved(data, numVecs, dims, bitsPerCode);
262
- auto up = unpackInterleaved(p, numVecs, dims, bitsPerCode);
276
+ auto p = packInterleaved(data, numVecs, dims, bitsPerCode);
277
+ auto up = unpackInterleaved(p, numVecs, dims, bitsPerCode);
263
278
 
264
- EXPECT_EQ(data, up);
265
- }
279
+ EXPECT_EQ(data, up);
280
+ }
281
+ }
266
282
  }
267
- }
268
283
  }
269
284
 
270
285
  int main(int argc, char** argv) {
271
- testing::InitGoogleTest(&argc, argv);
286
+ testing::InitGoogleTest(&argc, argv);
272
287
 
273
- return RUN_ALL_TESTS();
288
+ return RUN_ALL_TESTS();
274
289
  }