RubyGems - faiss - Versions diffs - 0.2.0 → 0.2.1 - Mend

faiss 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (202) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +292 -291
data/vendor/faiss/faiss/AutoTune.h +55 -56
data/vendor/faiss/faiss/Clustering.cpp +334 -195
data/vendor/faiss/faiss/Clustering.h +88 -35
data/vendor/faiss/faiss/IVFlib.cpp +171 -195
data/vendor/faiss/faiss/IVFlib.h +48 -51
data/vendor/faiss/faiss/Index.cpp +85 -103
data/vendor/faiss/faiss/Index.h +54 -48
data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
data/vendor/faiss/faiss/Index2Layer.h +22 -22
data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
data/vendor/faiss/faiss/IndexBinary.h +140 -132
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
data/vendor/faiss/faiss/IndexFlat.h +35 -46
data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
data/vendor/faiss/faiss/IndexHNSW.h +57 -41
data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
data/vendor/faiss/faiss/IndexIVF.h +146 -113
data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
data/vendor/faiss/faiss/IndexLSH.h +21 -26
data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
data/vendor/faiss/faiss/IndexLattice.h +11 -16
data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
data/vendor/faiss/faiss/IndexNSG.h +85 -0
data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
data/vendor/faiss/faiss/IndexPQ.h +64 -67
data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
data/vendor/faiss/faiss/IndexRefine.h +22 -23
data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
data/vendor/faiss/faiss/IndexReplicas.h +62 -56
data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
data/vendor/faiss/faiss/IndexResidual.h +152 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
data/vendor/faiss/faiss/IndexShards.cpp +256 -240
data/vendor/faiss/faiss/IndexShards.h +85 -73
data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
data/vendor/faiss/faiss/MatrixStats.h +7 -10
data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
data/vendor/faiss/faiss/MetaIndexes.h +40 -34
data/vendor/faiss/faiss/MetricType.h +7 -7
data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
data/vendor/faiss/faiss/VectorTransform.h +61 -89
data/vendor/faiss/faiss/clone_index.cpp +77 -73
data/vendor/faiss/faiss/clone_index.h +4 -9
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
data/vendor/faiss/faiss/impl/FaissException.h +41 -29
data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
data/vendor/faiss/faiss/impl/HNSW.h +179 -200
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
data/vendor/faiss/faiss/impl/NSG.h +199 -0
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
data/vendor/faiss/faiss/impl/io.cpp +75 -94
data/vendor/faiss/faiss/impl/io.h +31 -41
data/vendor/faiss/faiss/impl/io_macros.h +40 -29
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
data/vendor/faiss/faiss/index_factory.cpp +269 -218
data/vendor/faiss/faiss/index_factory.h +6 -7
data/vendor/faiss/faiss/index_io.h +23 -26
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
data/vendor/faiss/faiss/utils/Heap.h +186 -209
data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
data/vendor/faiss/faiss/utils/distances.cpp +301 -310
data/vendor/faiss/faiss/utils/distances.h +133 -118
data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
data/vendor/faiss/faiss/utils/hamming.h +62 -85
data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
data/vendor/faiss/faiss/utils/partitioning.h +26 -21
data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
data/vendor/faiss/faiss/utils/random.cpp +39 -63
data/vendor/faiss/faiss/utils/random.h +13 -16
data/vendor/faiss/faiss/utils/simdlib.h +4 -2
data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
data/vendor/faiss/faiss/utils/utils.cpp +304 -287
data/vendor/faiss/faiss/utils/utils.h +53 -48
metadata +20 -2

data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp CHANGED Viewed

@@ -5,8 +5,6 @@
  * LICENSE file in the root directory of this source tree.
  */
 #include <cuda_profiler_api.h>
 #include <faiss/IndexFlat.h>
 #include <faiss/IndexIVFPQ.h>
@@ -32,108 +30,131 @@ DEFINE_bool(per_batch_time, false, "print per-batch times");
 DEFINE_bool(reserve_memory, false, "whether or not to pre-reserve memory");
 int main(int argc, char** argv) {
-  gflags::ParseCommandLineFlags(&argc, &argv, true);
-  cudaProfilerStop();
-  int dim = FLAGS_dim;
-  int numCentroids = FLAGS_centroids;
-  int bytesPerVec = FLAGS_bytes_per_vec;
-  int bitsPerCode = FLAGS_bits_per_code;
+    gflags::ParseCommandLineFlags(&argc, &argv, true);
-  faiss::gpu::StandardGpuResources res;
+    cudaProfilerStop();
-  // IndexIVFPQ will complain, but just give us enough to get through this
-  int numTrain = 4 * numCentroids;
-  std::vector<float> trainVecs = faiss::gpu::randVecs(numTrain, dim);
+    int dim = FLAGS_dim;
+    int numCentroids = FLAGS_centroids;
+    int bytesPerVec = FLAGS_bytes_per_vec;
+    int bitsPerCode = FLAGS_bits_per_code;
-  faiss::IndexFlatL2 coarseQuantizer(dim);
-  faiss::IndexIVFPQ cpuIndex(&coarseQuantizer, dim, numCentroids,
-                             bytesPerVec, bitsPerCode);
-  if (FLAGS_time_cpu) {
-    cpuIndex.train(numTrain, trainVecs.data());
-  }
+    faiss::gpu::StandardGpuResources res;
-  faiss::gpu::GpuIndexIVFPQConfig config;
-  config.device = 0;
-  config.indicesOptions = (faiss::gpu::IndicesOptions) FLAGS_index;
+    // IndexIVFPQ will complain, but just give us enough to get through this
+    int numTrain = 4 * numCentroids;
+    std::vector<float> trainVecs = faiss::gpu::randVecs(numTrain, dim);
-  faiss::gpu::GpuIndexIVFPQ gpuIndex(
-    &res, dim, numCentroids, bytesPerVec, bitsPerCode,
-    faiss::METRIC_L2, config);
-  if (FLAGS_time_gpu) {
-    gpuIndex.train(numTrain, trainVecs.data());
-    if (FLAGS_reserve_memory) {
-      size_t numVecs = (size_t) FLAGS_batches * (size_t) FLAGS_batch_size;
-      gpuIndex.reserveMemory(numVecs);
+    faiss::IndexFlatL2 coarseQuantizer(dim);
+    faiss::IndexIVFPQ cpuIndex(
+            &coarseQuantizer, dim, numCentroids, bytesPerVec, bitsPerCode);
+    if (FLAGS_time_cpu) {
+        cpuIndex.train(numTrain, trainVecs.data());
     }
-  }
-  cudaDeviceSynchronize();
-  CUDA_VERIFY(cudaProfilerStart());
+    faiss::gpu::GpuIndexIVFPQConfig config;
+    config.device = 0;
+    config.indicesOptions = (faiss::gpu::IndicesOptions)FLAGS_index;
-  float totalGpuTime = 0.0f;
-  float totalCpuTime = 0.0f;
+    faiss::gpu::GpuIndexIVFPQ gpuIndex(
+            &res,
+            dim,
+            numCentroids,
+            bytesPerVec,
+            bitsPerCode,
+            faiss::METRIC_L2,
+            config);
-  for (int i = 0; i < FLAGS_batches; ++i) {
-    if (!FLAGS_per_batch_time) {
-      if (i % 10 == 0) {
-        printf("Adding batch %d\n", i + 1);
-      }
+    if (FLAGS_time_gpu) {
+        gpuIndex.train(numTrain, trainVecs.data());
+        if (FLAGS_reserve_memory) {
+            size_t numVecs = (size_t)FLAGS_batches * (size_t)FLAGS_batch_size;
+            gpuIndex.reserveMemory(numVecs);
+        }
     }
-    auto addVecs = faiss::gpu::randVecs(FLAGS_batch_size, dim);
+    cudaDeviceSynchronize();
+    CUDA_VERIFY(cudaProfilerStart());
+    float totalGpuTime = 0.0f;
+    float totalCpuTime = 0.0f;
+    for (int i = 0; i < FLAGS_batches; ++i) {
+        if (!FLAGS_per_batch_time) {
+            if (i % 10 == 0) {
+                printf("Adding batch %d\n", i + 1);
+            }
+        }
+        auto addVecs = faiss::gpu::randVecs(FLAGS_batch_size, dim);
+        if (FLAGS_time_gpu) {
+            faiss::gpu::CpuTimer timer;
+            gpuIndex.add(FLAGS_batch_size, addVecs.data());
+            CUDA_VERIFY(cudaDeviceSynchronize());
+            auto time = timer.elapsedMilliseconds();
+            totalGpuTime += time;
+            if (FLAGS_per_batch_time) {
+                printf("Batch %d | GPU time to add %d vecs: %.3f ms (%.5f ms per)\n",
+                       i + 1,
+                       FLAGS_batch_size,
+                       time,
+                       time / (float)FLAGS_batch_size);
+            }
+        }
+        if (FLAGS_time_cpu) {
+            faiss::gpu::CpuTimer timer;
+            cpuIndex.add(FLAGS_batch_size, addVecs.data());
+            auto time = timer.elapsedMilliseconds();
+            totalCpuTime += time;
+            if (FLAGS_per_batch_time) {
+                printf("Batch %d | CPU time to add %d vecs: %.3f ms (%.5f ms per)\n",
+                       i + 1,
+                       FLAGS_batch_size,
+                       time,
+                       time / (float)FLAGS_batch_size);
+            }
+        }
+    }
-    if (FLAGS_time_gpu) {
-      faiss::gpu::CpuTimer timer;
-      gpuIndex.add(FLAGS_batch_size, addVecs.data());
-      CUDA_VERIFY(cudaDeviceSynchronize());
-      auto time = timer.elapsedMilliseconds();
+    CUDA_VERIFY(cudaProfilerStop());
-      totalGpuTime += time;
+    int total = FLAGS_batch_size * FLAGS_batches;
-      if (FLAGS_per_batch_time) {
-      printf("Batch %d | GPU time to add %d vecs: %.3f ms (%.5f ms per)\n",
-             i + 1, FLAGS_batch_size, time, time / (float) FLAGS_batch_size);
-      }
+    if (FLAGS_time_gpu) {
+        printf("%d dim, %d centroids, %d x %d encoding\n"
+               "GPU time to add %d vectors (%d batches, %d per batch): "
+               "%.3f ms (%.3f us per)\n",
+               dim,
+               numCentroids,
+               bytesPerVec,
+               bitsPerCode,
+               total,
+               FLAGS_batches,
+               FLAGS_batch_size,
+               totalGpuTime,
+               totalGpuTime * 1000.0f / (float)total);
     }
     if (FLAGS_time_cpu) {
-      faiss::gpu::CpuTimer timer;
-      cpuIndex.add(FLAGS_batch_size, addVecs.data());
-      auto time = timer.elapsedMilliseconds();
-      totalCpuTime += time;
-      if (FLAGS_per_batch_time) {
-        printf("Batch %d | CPU time to add %d vecs: %.3f ms (%.5f ms per)\n",
-               i + 1, FLAGS_batch_size, time, time / (float) FLAGS_batch_size);
-      }
+        printf("%d dim, %d centroids, %d x %d encoding\n"
+               "CPU time to add %d vectors (%d batches, %d per batch): "
+               "%.3f ms (%.3f us per)\n",
+               dim,
+               numCentroids,
+               bytesPerVec,
+               bitsPerCode,
+               total,
+               FLAGS_batches,
+               FLAGS_batch_size,
+               totalCpuTime,
+               totalCpuTime * 1000.0f / (float)total);
     }
-  }
-  CUDA_VERIFY(cudaProfilerStop());
-  int total = FLAGS_batch_size * FLAGS_batches;
-  if (FLAGS_time_gpu) {
-    printf("%d dim, %d centroids, %d x %d encoding\n"
-           "GPU time to add %d vectors (%d batches, %d per batch): "
-           "%.3f ms (%.3f us per)\n",
-           dim, numCentroids, bytesPerVec, bitsPerCode,
-           total, FLAGS_batches, FLAGS_batch_size,
-           totalGpuTime, totalGpuTime * 1000.0f / (float) total);
-  }
-  if (FLAGS_time_cpu) {
-    printf("%d dim, %d centroids, %d x %d encoding\n"
-           "CPU time to add %d vectors (%d batches, %d per batch): "
-           "%.3f ms (%.3f us per)\n",
-           dim, numCentroids, bytesPerVec, bitsPerCode,
-           total, FLAGS_batches, FLAGS_batch_size,
-           totalCpuTime, totalCpuTime * 1000.0f / (float) total);
-  }
-  return 0;
+    return 0;
 }

data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp CHANGED Viewed

@@ -5,14 +5,13 @@
  * LICENSE file in the root directory of this source tree.
  */
+#include <faiss/IndexFlat.h>
 #include <faiss/IndexIVFFlat.h>
 #include <faiss/IndexIVFPQ.h>
-#include <faiss/IndexFlat.h>
-#include <faiss/index_io.h>
 #include <faiss/gpu/test/TestUtils.h>
-#include <vector>
+#include <faiss/index_io.h>
 #include <gflags/gflags.h>
+#include <vector>
 // For IVFPQ:
 DEFINE_bool(ivfpq, false, "use IVFPQ encoding");
@@ -32,71 +31,83 @@ DEFINE_int32(num_train, -1, "number of database vecs to train on");
 template <typename T>
 void fillAndSave(T& index, int numTrain, int num, int dim) {
-  auto trainVecs = faiss::gpu::randVecs(numTrain, dim);
-  index.train(numTrain, trainVecs.data());
+    auto trainVecs = faiss::gpu::randVecs(numTrain, dim);
+    index.train(numTrain, trainVecs.data());
-  constexpr int kAddChunk = 1000000;
+    constexpr int kAddChunk = 1000000;
-  for (int i = 0; i < num; i += kAddChunk) {
-    int numRemaining = (num - i) < kAddChunk ? (num - i) : kAddChunk;
-    auto vecs = faiss::gpu::randVecs(numRemaining, dim);
+    for (int i = 0; i < num; i += kAddChunk) {
+        int numRemaining = (num - i) < kAddChunk ? (num - i) : kAddChunk;
+        auto vecs = faiss::gpu::randVecs(numRemaining, dim);
-    printf("adding at %d: %d\n", i, numRemaining);
-    index.add(numRemaining, vecs.data());
-  }
+        printf("adding at %d: %d\n", i, numRemaining);
+        index.add(numRemaining, vecs.data());
+    }
-  faiss::write_index(&index, FLAGS_out.c_str());
+    faiss::write_index(&index, FLAGS_out.c_str());
 }
 int main(int argc, char** argv) {
-  gflags::ParseCommandLineFlags(&argc, &argv, true);
-  // Either ivfpq or ivfflat must be set
-  if ((FLAGS_ivfpq && FLAGS_ivfflat) ||
-      (!FLAGS_ivfpq && !FLAGS_ivfflat)) {
-    printf("must specify either ivfpq or ivfflat\n");
-    return 1;
-  }
-  auto dim = FLAGS_dim;
-  auto numCentroids = FLAGS_num_coarse;
-  auto num = FLAGS_num;
-  auto numTrain = FLAGS_num_train;
-  numTrain = numTrain == -1 ? std::max((num / 4), 1) : numTrain;
-  numTrain = std::min(num, numTrain);
-  if (FLAGS_ivfpq) {
-    faiss::IndexFlatL2 quantizer(dim);
-    faiss::IndexIVFPQ index(&quantizer, dim, numCentroids,
-                            FLAGS_codes, FLAGS_bits_per_code);
-    index.verbose = true;
-    printf("IVFPQ: codes %d bits per code %d\n",
-           FLAGS_codes, FLAGS_bits_per_code);
-    printf("Lists: %d\n", numCentroids);
-    printf("Database: dim %d num vecs %d trained on %d\n", dim, num, numTrain);
-    printf("output file: %s\n", FLAGS_out.c_str());
-    fillAndSave(index, numTrain, num, dim);
-  } else if (FLAGS_ivfflat) {
-    faiss::IndexFlatL2 quantizerL2(dim);
-    faiss::IndexFlatIP quantizerIP(dim);
-    faiss::IndexFlat* quantizer = FLAGS_l2 ?
-      (faiss::IndexFlat*) &quantizerL2 :
-      (faiss::IndexFlat*) &quantizerIP;
-    faiss::IndexIVFFlat index(quantizer, dim, numCentroids,
-                              FLAGS_l2 ? faiss::METRIC_L2 :
-                              faiss::METRIC_INNER_PRODUCT);
-    printf("IVFFlat: metric %s\n", FLAGS_l2 ? "L2" : "IP");
-    printf("Lists: %d\n", numCentroids);
-    printf("Database: dim %d num vecs %d trained on %d\n", dim, num, numTrain);
-    printf("output file: %s\n", FLAGS_out.c_str());
-    fillAndSave(index, numTrain, num, dim);
-  }
-  return 0;
+    gflags::ParseCommandLineFlags(&argc, &argv, true);
+    // Either ivfpq or ivfflat must be set
+    if ((FLAGS_ivfpq && FLAGS_ivfflat) || (!FLAGS_ivfpq && !FLAGS_ivfflat)) {
+        printf("must specify either ivfpq or ivfflat\n");
+        return 1;
+    }
+    auto dim = FLAGS_dim;
+    auto numCentroids = FLAGS_num_coarse;
+    auto num = FLAGS_num;
+    auto numTrain = FLAGS_num_train;
+    numTrain = numTrain == -1 ? std::max((num / 4), 1) : numTrain;
+    numTrain = std::min(num, numTrain);
+    if (FLAGS_ivfpq) {
+        faiss::IndexFlatL2 quantizer(dim);
+        faiss::IndexIVFPQ index(
+                &quantizer,
+                dim,
+                numCentroids,
+                FLAGS_codes,
+                FLAGS_bits_per_code);
+        index.verbose = true;
+        printf("IVFPQ: codes %d bits per code %d\n",
+               FLAGS_codes,
+               FLAGS_bits_per_code);
+        printf("Lists: %d\n", numCentroids);
+        printf("Database: dim %d num vecs %d trained on %d\n",
+               dim,
+               num,
+               numTrain);
+        printf("output file: %s\n", FLAGS_out.c_str());
+        fillAndSave(index, numTrain, num, dim);
+    } else if (FLAGS_ivfflat) {
+        faiss::IndexFlatL2 quantizerL2(dim);
+        faiss::IndexFlatIP quantizerIP(dim);
+        faiss::IndexFlat* quantizer = FLAGS_l2
+                ? (faiss::IndexFlat*)&quantizerL2
+                : (faiss::IndexFlat*)&quantizerIP;
+        faiss::IndexIVFFlat index(
+                quantizer,
+                dim,
+                numCentroids,
+                FLAGS_l2 ? faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT);
+        printf("IVFFlat: metric %s\n", FLAGS_l2 ? "L2" : "IP");
+        printf("Lists: %d\n", numCentroids);
+        printf("Database: dim %d num vecs %d trained on %d\n",
+               dim,
+               num,
+               numTrain);
+        printf("output file: %s\n", FLAGS_out.c_str());
+        fillAndSave(index, numTrain, num, dim);
+    }
+    return 0;
 }

data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp CHANGED Viewed

@@ -5,270 +5,285 @@
  * LICENSE file in the root directory of this source tree.
  */
 #include <faiss/gpu/impl/InterleavedCodes.h>
-#include <faiss/gpu/utils/StaticUtils.h>
 #include <faiss/gpu/test/TestUtils.h>
-#include <cmath>
+#include <faiss/gpu/utils/StaticUtils.h>
 #include <gtest/gtest.h>
+#include <cmath>
 #include <random>
 #include <sstream>
 #include <vector>
 TEST(TestCodePacking, NonInterleavedCodes_UnpackPack) {
-  using namespace faiss::gpu;
+    using namespace faiss::gpu;
-  // We are fine using non-fixed seeds here, the results should be fully
-  // deterministic
-  auto seed = std::random_device()();
-  std::mt19937 gen(seed);
-  std::uniform_int_distribution<uint8_t> dist;
+    // We are fine using non-fixed seeds here, the results should be fully
+    // deterministic
+    auto seed = std::random_device()();
+    std::mt19937 gen(seed);
+    std::uniform_int_distribution<uint8_t> dist;
-  std::cout << "seed " << seed << "\n";
+    std::cout << "seed " << seed << "\n";
-  for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
-    for (auto dims : {1, 7, 8, 31, 32}) {
-      for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
-        std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
+    for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
+        for (auto dims : {1, 7, 8, 31, 32}) {
+            for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
+                std::cout << bitsPerCode << " " << dims << " " << numVecs
+                          << "\n";
-        int srcVecSize = utils::divUp(dims * bitsPerCode, 8);
-        std::vector<uint8_t> data(numVecs * srcVecSize);
+                int srcVecSize = utils::divUp(dims * bitsPerCode, 8);
+                std::vector<uint8_t> data(numVecs * srcVecSize);
-        for (auto& v : data) {
-          v = dist(gen);
-        }
+                for (auto& v : data) {
+                    v = dist(gen);
+                }
-        // currently unimplemented
-        EXPECT_FALSE(bitsPerCode > 8 && bitsPerCode % 8 != 0);
+                // currently unimplemented
+                EXPECT_FALSE(bitsPerCode > 8 && bitsPerCode % 8 != 0);
-        // Due to bit packing, mask out bits that should be zero based on
-        // dimensions we shouldn't have present
-        int vectorSizeBits = dims * bitsPerCode;
-        int vectorSizeBytes = utils::divUp(vectorSizeBits, 8);
-        int remainder = vectorSizeBits % 8;
+                // Due to bit packing, mask out bits that should be zero based
+                // on dimensions we shouldn't have present
+                int vectorSizeBits = dims * bitsPerCode;
+                int vectorSizeBytes = utils::divUp(vectorSizeBits, 8);
+                int remainder = vectorSizeBits % 8;
-        if (remainder > 0) {
-          uint8_t mask = 0xff >> (8 - remainder);
+                if (remainder > 0) {
+                    uint8_t mask = 0xff >> (8 - remainder);
-          for (int i = 0; i < numVecs; ++i) {
-            int lastVecByte = (i + 1) * vectorSizeBytes - 1;
-            data[lastVecByte] &= mask;
-          }
-        }
+                    for (int i = 0; i < numVecs; ++i) {
+                        int lastVecByte = (i + 1) * vectorSizeBytes - 1;
+                        data[lastVecByte] &= mask;
+                    }
+                }
-        auto up = unpackNonInterleaved(data, numVecs, dims, bitsPerCode);
-        auto p = packNonInterleaved(up, numVecs, dims, bitsPerCode);
+                auto up =
+                        unpackNonInterleaved(data, numVecs, dims, bitsPerCode);
+                auto p = packNonInterleaved(up, numVecs, dims, bitsPerCode);
-        EXPECT_EQ(data, p);
-      }
+                EXPECT_EQ(data, p);
+            }
+        }
     }
-  }
 }
 TEST(TestCodePacking, NonInterleavedCodes_PackUnpack) {
-  using namespace faiss::gpu;
+    using namespace faiss::gpu;
-  // We are fine using non-fixed seeds here, the results should be fully
-  // deterministic
-  std::random_device rd;
-  std::mt19937 gen(rd());
-  std::uniform_int_distribution<uint8_t> dist;
+    // We are fine using non-fixed seeds here, the results should be fully
+    // deterministic
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<uint8_t> dist;
-  for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
-    for (auto dims : {1, 7, 8, 31, 32}) {
-      for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
-        std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
+    for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
+        for (auto dims : {1, 7, 8, 31, 32}) {
+            for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
+                std::cout << bitsPerCode << " " << dims << " " << numVecs
+                          << "\n";
-        std::vector<uint8_t> data(numVecs * dims * utils::divUp(bitsPerCode, 8));
+                std::vector<uint8_t> data(
+                        numVecs * dims * utils::divUp(bitsPerCode, 8));
-        // currently unimplemented
-        EXPECT_FALSE(bitsPerCode > 8 && bitsPerCode % 8 != 0);
+                // currently unimplemented
+                EXPECT_FALSE(bitsPerCode > 8 && bitsPerCode % 8 != 0);
-        // Mask out high bits we shouldn't have based on code size
-        uint8_t mask = bitsPerCode < 8 ? (0xff >> (8 - bitsPerCode)) : 0xff;
+                // Mask out high bits we shouldn't have based on code size
+                uint8_t mask =
+                        bitsPerCode < 8 ? (0xff >> (8 - bitsPerCode)) : 0xff;
-        for (auto& v : data) {
-          v = dist(gen) & mask;
-        }
+                for (auto& v : data) {
+                    v = dist(gen) & mask;
+                }
-        auto p = packNonInterleaved(data, numVecs, dims, bitsPerCode);
-        auto up = unpackNonInterleaved(p, numVecs, dims, bitsPerCode);
+                auto p = packNonInterleaved(data, numVecs, dims, bitsPerCode);
+                auto up = unpackNonInterleaved(p, numVecs, dims, bitsPerCode);
-        EXPECT_EQ(data, up);
-      }
+                EXPECT_EQ(data, up);
+            }
+        }
     }
-  }
 }
 TEST(TestCodePacking, InterleavedCodes_UnpackPack) {
-  using namespace faiss::gpu;
-  // We are fine using non-fixed seeds here, the results should be fully
-  // deterministic
-  std::random_device rd;
-  std::mt19937 gen(rd());
-  std::uniform_int_distribution<uint8_t> dist;
-  for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
-    for (auto dims : {1, 7, 8, 31, 32}) {
-      for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
-        std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
-        int blocks = utils::divUp(numVecs, 32);
-        int bytesPerDimBlock = 32 * bitsPerCode / 8;
-        int bytesPerBlock = bytesPerDimBlock * dims;
-        int size = blocks * bytesPerBlock;
-        std::vector<uint8_t> data(size);
-        if (bitsPerCode == 8 || bitsPerCode == 16 || bitsPerCode == 32) {
-          int bytesPerCode = bitsPerCode / 8;
-          for (int i = 0; i < blocks; ++i) {
-            for (int j = 0; j < dims; ++j) {
-              for (int k = 0; k < 32; ++k) {
-                for (int l = 0; l < bytesPerCode; ++l) {
-                  int vec = i * 32 + k;
-                  if (vec < numVecs) {
-                    data[i * bytesPerBlock +
-                         j * bytesPerDimBlock +
-                         k * bytesPerCode + l] = dist(gen);
-                  }
-                }
-              }
-            }
-          }
-        } else if (bitsPerCode < 8) {
-          for (int i = 0; i < blocks; ++i) {
-            for (int j = 0; j < dims; ++j) {
-              for (int k = 0; k < bytesPerDimBlock; ++k) {
-                int loVec = i * 32 + (k * 8) / bitsPerCode;
-                int hiVec = loVec + 1;
-                int hiVec2 = hiVec + 1;
-                uint8_t lo = loVec < numVecs ?
-                  dist(gen) & (0xff >> (8 - bitsPerCode)) : 0;
-                uint8_t hi = hiVec < numVecs ?
-                  dist(gen) & (0xff >> (8 - bitsPerCode)) : 0;
-                uint8_t hi2 = hiVec2 < numVecs ?
-                  dist(gen) & (0xff >> (8 - bitsPerCode)) : 0;
-                uint8_t v = 0;
-                if (bitsPerCode == 4) {
-                  v = lo | (hi << 4);
-                } else if (bitsPerCode == 5) {
-                  switch (k % 5) {
-                    case 0:
-                      // 5 msbs of lower as vOut lsbs
-                      // 3 lsbs of upper as vOut msbs
-                      v = (lo & 0x1f) | (hi << 5);
-                      break;
-                    case 1:
-                      // 2 msbs of lower as vOut lsbs
-                      // 5 lsbs of upper as vOut msbs
-                      // 1 lsbs of upper2 as vOut msb
-                      v = (lo >> 3) | (hi << 2) | (hi2 << 7);
-                      break;
-                    case 2:
-                      // 4 msbs of lower as vOut lsbs
-                      // 4 lsbs of upper as vOut msbs
-                      v = (lo >> 1) | (hi << 4);
-                      break;
-                    case 3:
-                      // 1 msbs of lower as vOut lsbs
-                      // 5 lsbs of upper as vOut msbs
-                      // 2 lsbs of upper2 as vOut msb
-                      v = (lo >> 4) | (hi << 1) | (hi2 << 6);
-                      break;
-                    case 4:
-                      // 3 msbs of lower as vOut lsbs
-                      // 5 lsbs of upper as vOut msbs
-                      v = (lo >> 2) | (hi << 3);
-                      break;
-                  }
-                } else if (bitsPerCode == 6) {
-                  switch (k % 3) {
-                    case 0:
-                      // 6 msbs of lower as vOut lsbs
-                      // 2 lsbs of upper as vOut msbs
-                      v = (lo & 0x3f) | (hi << 6);
-                      break;
-                    case 1:
-                      // 4 msbs of lower as vOut lsbs
-                      // 4 lsbs of upper as vOut msbs
-                      v = (lo >> 2) | (hi << 4);
-                      break;
-                    case 2:
-                      // 2 msbs of lower as vOut lsbs
-                      // 6 lsbs of upper as vOut msbs
-                      v = (lo >> 4) | (hi << 2);
-                      break;
-                  }
+    using namespace faiss::gpu;
+    // We are fine using non-fixed seeds here, the results should be fully
+    // deterministic
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<uint8_t> dist;
+    for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
+        for (auto dims : {1, 7, 8, 31, 32}) {
+            for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
+                std::cout << bitsPerCode << " " << dims << " " << numVecs
+                          << "\n";
+                int blocks = utils::divUp(numVecs, 32);
+                int bytesPerDimBlock = 32 * bitsPerCode / 8;
+                int bytesPerBlock = bytesPerDimBlock * dims;
+                int size = blocks * bytesPerBlock;
+                std::vector<uint8_t> data(size);
+                if (bitsPerCode == 8 || bitsPerCode == 16 ||
+                    bitsPerCode == 32) {
+                    int bytesPerCode = bitsPerCode / 8;
+                    for (int i = 0; i < blocks; ++i) {
+                        for (int j = 0; j < dims; ++j) {
+                            for (int k = 0; k < 32; ++k) {
+                                for (int l = 0; l < bytesPerCode; ++l) {
+                                    int vec = i * 32 + k;
+                                    if (vec < numVecs) {
+                                        data[i * bytesPerBlock +
+                                             j * bytesPerDimBlock +
+                                             k * bytesPerCode + l] = dist(gen);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                } else if (bitsPerCode < 8) {
+                    for (int i = 0; i < blocks; ++i) {
+                        for (int j = 0; j < dims; ++j) {
+                            for (int k = 0; k < bytesPerDimBlock; ++k) {
+                                int loVec = i * 32 + (k * 8) / bitsPerCode;
+                                int hiVec = loVec + 1;
+                                int hiVec2 = hiVec + 1;
+                                uint8_t lo = loVec < numVecs ? dist(gen) &
+                                                (0xff >> (8 - bitsPerCode))
+                                                             : 0;
+                                uint8_t hi = hiVec < numVecs ? dist(gen) &
+                                                (0xff >> (8 - bitsPerCode))
+                                                             : 0;
+                                uint8_t hi2 = hiVec2 < numVecs ? dist(gen) &
+                                                (0xff >> (8 - bitsPerCode))
+                                                               : 0;
+                                uint8_t v = 0;
+                                if (bitsPerCode == 4) {
+                                    v = lo | (hi << 4);
+                                } else if (bitsPerCode == 5) {
+                                    switch (k % 5) {
+                                        case 0:
+                                            // 5 msbs of lower as vOut lsbs
+                                            // 3 lsbs of upper as vOut msbs
+                                            v = (lo & 0x1f) | (hi << 5);
+                                            break;
+                                        case 1:
+                                            // 2 msbs of lower as vOut lsbs
+                                            // 5 lsbs of upper as vOut msbs
+                                            // 1 lsbs of upper2 as vOut msb
+                                            v = (lo >> 3) | (hi << 2) |
+                                                    (hi2 << 7);
+                                            break;
+                                        case 2:
+                                            // 4 msbs of lower as vOut lsbs
+                                            // 4 lsbs of upper as vOut msbs
+                                            v = (lo >> 1) | (hi << 4);
+                                            break;
+                                        case 3:
+                                            // 1 msbs of lower as vOut lsbs
+                                            // 5 lsbs of upper as vOut msbs
+                                            // 2 lsbs of upper2 as vOut msb
+                                            v = (lo >> 4) | (hi << 1) |
+                                                    (hi2 << 6);
+                                            break;
+                                        case 4:
+                                            // 3 msbs of lower as vOut lsbs
+                                            // 5 lsbs of upper as vOut msbs
+                                            v = (lo >> 2) | (hi << 3);
+                                            break;
+                                    }
+                                } else if (bitsPerCode == 6) {
+                                    switch (k % 3) {
+                                        case 0:
+                                            // 6 msbs of lower as vOut lsbs
+                                            // 2 lsbs of upper as vOut msbs
+                                            v = (lo & 0x3f) | (hi << 6);
+                                            break;
+                                        case 1:
+                                            // 4 msbs of lower as vOut lsbs
+                                            // 4 lsbs of upper as vOut msbs
+                                            v = (lo >> 2) | (hi << 4);
+                                            break;
+                                        case 2:
+                                            // 2 msbs of lower as vOut lsbs
+                                            // 6 lsbs of upper as vOut msbs
+                                            v = (lo >> 4) | (hi << 2);
+                                            break;
+                                    }
+                                } else {
+                                    // unimplemented
+                                    EXPECT_TRUE(false);
+                                }
+                                data[i * bytesPerBlock + j * bytesPerDimBlock +
+                                     k] = v;
+                            }
+                        }
+                    }
                 } else {
-                  // unimplemented
-                  EXPECT_TRUE(false);
+                    // unimplemented
+                    EXPECT_TRUE(false);
                 }
-                data[i * bytesPerBlock + j * bytesPerDimBlock + k] = v;
-              }
+                auto up = unpackInterleaved(data, numVecs, dims, bitsPerCode);
+                auto p = packInterleaved(up, numVecs, dims, bitsPerCode);
+                EXPECT_EQ(data, p);
             }
-          }
-        } else {
-          // unimplemented
-          EXPECT_TRUE(false);
         }
-        auto up = unpackInterleaved(data, numVecs, dims, bitsPerCode);
-        auto p = packInterleaved(up, numVecs, dims, bitsPerCode);
-        EXPECT_EQ(data, p);
-      }
     }
-  }
 }
 TEST(TestCodePacking, InterleavedCodes_PackUnpack) {
-  using namespace faiss::gpu;
-  // We are fine using non-fixed seeds here, the results should be fully
-  // deterministic
-  std::random_device rd;
-  std::mt19937 gen(rd());
-  std::uniform_int_distribution<uint8_t> dist;
-  for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
-    for (auto dims : {1, 7, 8, 31, 32}) {
-      for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
-        std::cout << bitsPerCode << " " << dims << " " << numVecs << "\n";
-        std::vector<uint8_t> data(numVecs * dims * utils::divUp(bitsPerCode, 8));
-        if (bitsPerCode == 8 || bitsPerCode == 16 || bitsPerCode == 32) {
-          for (auto& v : data) {
-            v = dist(gen);
-          }
-        } else if (bitsPerCode < 8) {
-          uint8_t mask = 0xff >> (8 - bitsPerCode);
-          for (auto& v : data) {
-            v = dist(gen) & mask;
-          }
-        } else {
-          // unimplemented
-          EXPECT_TRUE(false);
-        }
+    using namespace faiss::gpu;
+    // We are fine using non-fixed seeds here, the results should be fully
+    // deterministic
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<uint8_t> dist;
+    for (auto bitsPerCode : {4, 5, 6, 8, 16, 32}) {
+        for (auto dims : {1, 7, 8, 31, 32}) {
+            for (auto numVecs : {1, 3, 4, 5, 6, 8, 31, 32, 33, 65}) {
+                std::cout << bitsPerCode << " " << dims << " " << numVecs
+                          << "\n";
+                std::vector<uint8_t> data(
+                        numVecs * dims * utils::divUp(bitsPerCode, 8));
+                if (bitsPerCode == 8 || bitsPerCode == 16 ||
+                    bitsPerCode == 32) {
+                    for (auto& v : data) {
+                        v = dist(gen);
+                    }
+                } else if (bitsPerCode < 8) {
+                    uint8_t mask = 0xff >> (8 - bitsPerCode);
+                    for (auto& v : data) {
+                        v = dist(gen) & mask;
+                    }
+                } else {
+                    // unimplemented
+                    EXPECT_TRUE(false);
+                }
-        auto p = packInterleaved(data, numVecs, dims, bitsPerCode);
-        auto up = unpackInterleaved(p, numVecs, dims, bitsPerCode);
+                auto p = packInterleaved(data, numVecs, dims, bitsPerCode);
+                auto up = unpackInterleaved(p, numVecs, dims, bitsPerCode);
-        EXPECT_EQ(data, up);
-      }
+                EXPECT_EQ(data, up);
+            }
+        }
     }
-  }
 }
 int main(int argc, char** argv) {
-  testing::InitGoogleTest(&argc, argv);
+    testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
+    return RUN_ALL_TESTS();
 }