RubyGems - faiss - Versions diffs - 0.2.0 → 0.2.1 - Mend

faiss 0.2.0 → 0.2.1

Files changed (202) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +292 -291
data/vendor/faiss/faiss/AutoTune.h +55 -56
data/vendor/faiss/faiss/Clustering.cpp +334 -195
data/vendor/faiss/faiss/Clustering.h +88 -35
data/vendor/faiss/faiss/IVFlib.cpp +171 -195
data/vendor/faiss/faiss/IVFlib.h +48 -51
data/vendor/faiss/faiss/Index.cpp +85 -103
data/vendor/faiss/faiss/Index.h +54 -48
data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
data/vendor/faiss/faiss/Index2Layer.h +22 -22
data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
data/vendor/faiss/faiss/IndexBinary.h +140 -132
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
data/vendor/faiss/faiss/IndexFlat.h +35 -46
data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
data/vendor/faiss/faiss/IndexHNSW.h +57 -41
data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
data/vendor/faiss/faiss/IndexIVF.h +146 -113
data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
data/vendor/faiss/faiss/IndexLSH.h +21 -26
data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
data/vendor/faiss/faiss/IndexLattice.h +11 -16
data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
data/vendor/faiss/faiss/IndexNSG.h +85 -0
data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
data/vendor/faiss/faiss/IndexPQ.h +64 -67
data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
data/vendor/faiss/faiss/IndexRefine.h +22 -23
data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
data/vendor/faiss/faiss/IndexReplicas.h +62 -56
data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
data/vendor/faiss/faiss/IndexResidual.h +152 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
data/vendor/faiss/faiss/IndexShards.cpp +256 -240
data/vendor/faiss/faiss/IndexShards.h +85 -73
data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
data/vendor/faiss/faiss/MatrixStats.h +7 -10
data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
data/vendor/faiss/faiss/MetaIndexes.h +40 -34
data/vendor/faiss/faiss/MetricType.h +7 -7
data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
data/vendor/faiss/faiss/VectorTransform.h +61 -89
data/vendor/faiss/faiss/clone_index.cpp +77 -73
data/vendor/faiss/faiss/clone_index.h +4 -9
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
data/vendor/faiss/faiss/impl/FaissException.h +41 -29
data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
data/vendor/faiss/faiss/impl/HNSW.h +179 -200
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
data/vendor/faiss/faiss/impl/NSG.h +199 -0
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
data/vendor/faiss/faiss/impl/io.cpp +75 -94
data/vendor/faiss/faiss/impl/io.h +31 -41
data/vendor/faiss/faiss/impl/io_macros.h +40 -29
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
data/vendor/faiss/faiss/index_factory.cpp +269 -218
data/vendor/faiss/faiss/index_factory.h +6 -7
data/vendor/faiss/faiss/index_io.h +23 -26
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
data/vendor/faiss/faiss/utils/Heap.h +186 -209
data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
data/vendor/faiss/faiss/utils/distances.cpp +301 -310
data/vendor/faiss/faiss/utils/distances.h +133 -118
data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
data/vendor/faiss/faiss/utils/hamming.h +62 -85
data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
data/vendor/faiss/faiss/utils/partitioning.h +26 -21
data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
data/vendor/faiss/faiss/utils/random.cpp +39 -63
data/vendor/faiss/faiss/utils/random.h +13 -16
data/vendor/faiss/faiss/utils/simdlib.h +4 -2
data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
data/vendor/faiss/faiss/utils/utils.cpp +304 -287
data/vendor/faiss/faiss/utils/utils.h +53 -48
metadata +20 -2

data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp CHANGED Viewed

@@ -5,15 +5,14 @@
  * LICENSE file in the root directory of this source tree.
  */
 #include <faiss/IndexFlat.h>
 #include <faiss/IndexIVFFlat.h>
 #include <faiss/gpu/GpuIndexIVFFlat.h>
 #include <faiss/gpu/StandardGpuResources.h>
-#include <faiss/gpu/utils/DeviceUtils.h>
 #include <faiss/gpu/test/TestUtils.h>
-#include <cmath>
+#include <faiss/gpu/utils/DeviceUtils.h>
 #include <gtest/gtest.h>
+#include <cmath>
 #include <sstream>
 #include <vector>
@@ -21,76 +20,157 @@
 constexpr float kF16MaxRelErr = 0.3f;
 constexpr float kF32MaxRelErr = 0.03f;
 struct Options {
-  Options() {
-    numAdd = 2 * faiss::gpu::randVal(2000, 5000);
-    dim = faiss::gpu::randVal(64, 200);
-    numCentroids = std::sqrt((float) numAdd / 2);
-    numTrain = numCentroids * 40;
-    nprobe = faiss::gpu::randVal(std::min(10, numCentroids), numCentroids);
-    numQuery = faiss::gpu::randVal(32, 100);
-    // Due to the approximate nature of the query and of floating point
-    // differences between GPU and CPU, to stay within our error bounds, only
-    // use a small k
-    k = std::min(faiss::gpu::randVal(10, 30), numAdd / 40);
-    indicesOpt = faiss::gpu::randSelect({
-        faiss::gpu::INDICES_CPU,
-          faiss::gpu::INDICES_32_BIT,
-          faiss::gpu::INDICES_64_BIT});
-    device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
-  }
-  std::string toString() const {
-    std::stringstream str;
-    str << "IVFFlat device " << device
-        << " numVecs " << numAdd
-        << " dim " << dim
-        << " numCentroids " << numCentroids
-        << " nprobe " << nprobe
-        << " numQuery " << numQuery
-        << " k " << k
-        << " indicesOpt " << indicesOpt;
-    return str.str();
-  }
-  int numAdd;
-  int dim;
-  int numCentroids;
-  int numTrain;
-  int nprobe;
-  int numQuery;
-  int k;
-  int device;
-  faiss::gpu::IndicesOptions indicesOpt;
+    Options() {
+        numAdd = 2 * faiss::gpu::randVal(2000, 5000);
+        dim = faiss::gpu::randVal(64, 200);
+        numCentroids = std::sqrt((float)numAdd / 2);
+        numTrain = numCentroids * 40;
+        nprobe = faiss::gpu::randVal(std::min(10, numCentroids), numCentroids);
+        numQuery = faiss::gpu::randVal(32, 100);
+        // Due to the approximate nature of the query and of floating point
+        // differences between GPU and CPU, to stay within our error bounds,
+        // only use a small k
+        k = std::min(faiss::gpu::randVal(10, 30), numAdd / 40);
+        indicesOpt = faiss::gpu::randSelect(
+                {faiss::gpu::INDICES_CPU,
+                 faiss::gpu::INDICES_32_BIT,
+                 faiss::gpu::INDICES_64_BIT});
+        device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
+    }
+    std::string toString() const {
+        std::stringstream str;
+        str << "IVFFlat device " << device << " numVecs " << numAdd << " dim "
+            << dim << " numCentroids " << numCentroids << " nprobe " << nprobe
+            << " numQuery " << numQuery << " k " << k << " indicesOpt "
+            << indicesOpt;
+        return str.str();
+    }
+    int numAdd;
+    int dim;
+    int numCentroids;
+    int numTrain;
+    int nprobe;
+    int numQuery;
+    int k;
+    int device;
+    faiss::gpu::IndicesOptions indicesOpt;
 };
-void queryTest(faiss::MetricType metricType,
-               bool useFloat16CoarseQuantizer,
-               int dimOverride = -1) {
-  for (int tries = 0; tries < 2; ++tries) {
-    Options opt;
-    opt.dim = dimOverride != -1 ? dimOverride : opt.dim;
+void queryTest(
+        faiss::MetricType metricType,
+        bool useFloat16CoarseQuantizer,
+        int dimOverride = -1) {
+    for (int tries = 0; tries < 2; ++tries) {
+        Options opt;
+        opt.dim = dimOverride != -1 ? dimOverride : opt.dim;
+        std::vector<float> trainVecs =
+                faiss::gpu::randVecs(opt.numTrain, opt.dim);
+        std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
+        faiss::IndexFlatL2 quantizerL2(opt.dim);
+        faiss::IndexFlatIP quantizerIP(opt.dim);
+        faiss::Index* quantizer = metricType == faiss::METRIC_L2
+                ? (faiss::Index*)&quantizerL2
+                : (faiss::Index*)&quantizerIP;
+        faiss::IndexIVFFlat cpuIndex(
+                quantizer, opt.dim, opt.numCentroids, metricType);
+        cpuIndex.train(opt.numTrain, trainVecs.data());
+        cpuIndex.add(opt.numAdd, addVecs.data());
+        cpuIndex.nprobe = opt.nprobe;
+        faiss::gpu::StandardGpuResources res;
+        res.noTempMemory();
+        faiss::gpu::GpuIndexIVFFlatConfig config;
+        config.device = opt.device;
+        config.indicesOptions = opt.indicesOpt;
+        config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
+        faiss::gpu::GpuIndexIVFFlat gpuIndex(
+                &res, cpuIndex.d, cpuIndex.nlist, cpuIndex.metric_type, config);
+        gpuIndex.copyFrom(&cpuIndex);
+        gpuIndex.setNumProbes(opt.nprobe);
+        bool compFloat16 = useFloat16CoarseQuantizer;
+        faiss::gpu::compareIndices(
+                cpuIndex,
+                gpuIndex,
+                opt.numQuery,
+                opt.dim,
+                opt.k,
+                opt.toString(),
+                compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
+                // FIXME: the fp16 bounds are
+                // useless when math (the accumulator) is
+                // in fp16. Figure out another way to test
+                compFloat16 ? 0.70f : 0.1f,
+                compFloat16 ? 0.65f : 0.015f);
+    }
+}
+void addTest(faiss::MetricType metricType, bool useFloat16CoarseQuantizer) {
+    for (int tries = 0; tries < 2; ++tries) {
+        Options opt;
+        std::vector<float> trainVecs =
+                faiss::gpu::randVecs(opt.numTrain, opt.dim);
+        std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
+        faiss::IndexFlatL2 quantizerL2(opt.dim);
+        faiss::IndexFlatIP quantizerIP(opt.dim);
+        faiss::Index* quantizer = metricType == faiss::METRIC_L2
+                ? (faiss::Index*)&quantizerL2
+                : (faiss::Index*)&quantizerIP;
+        faiss::IndexIVFFlat cpuIndex(
+                quantizer, opt.dim, opt.numCentroids, metricType);
+        cpuIndex.train(opt.numTrain, trainVecs.data());
+        cpuIndex.nprobe = opt.nprobe;
+        faiss::gpu::StandardGpuResources res;
+        res.noTempMemory();
+        faiss::gpu::GpuIndexIVFFlatConfig config;
+        config.device = opt.device;
+        config.indicesOptions = opt.indicesOpt;
+        config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
+        faiss::gpu::GpuIndexIVFFlat gpuIndex(
+                &res, cpuIndex.d, cpuIndex.nlist, cpuIndex.metric_type, config);
+        gpuIndex.copyFrom(&cpuIndex);
+        gpuIndex.setNumProbes(opt.nprobe);
+        cpuIndex.add(opt.numAdd, addVecs.data());
+        gpuIndex.add(opt.numAdd, addVecs.data());
+        bool compFloat16 = useFloat16CoarseQuantizer;
+        faiss::gpu::compareIndices(
+                cpuIndex,
+                gpuIndex,
+                opt.numQuery,
+                opt.dim,
+                opt.k,
+                opt.toString(),
+                compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
+                compFloat16 ? 0.70f : 0.1f,
+                compFloat16 ? 0.30f : 0.015f);
+    }
+}
+void copyToTest(bool useFloat16CoarseQuantizer) {
+    Options opt;
     std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
     std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
-    faiss::IndexFlatL2 quantizerL2(opt.dim);
-    faiss::IndexFlatIP quantizerIP(opt.dim);
-    faiss::Index* quantizer =
-      metricType == faiss::METRIC_L2 ?
-      (faiss::Index*) &quantizerL2 : (faiss::Index*) &quantizerIP;
-    faiss::IndexIVFFlat cpuIndex(quantizer,
-                                 opt.dim, opt.numCentroids, metricType);
-    cpuIndex.train(opt.numTrain, trainVecs.data());
-    cpuIndex.add(opt.numAdd, addVecs.data());
-    cpuIndex.nprobe = opt.nprobe;
     faiss::gpu::StandardGpuResources res;
     res.noTempMemory();
@@ -99,47 +179,57 @@ void queryTest(faiss::MetricType metricType,
     config.indicesOptions = opt.indicesOpt;
     config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
-    faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
-                                         cpuIndex.d,
-                                         cpuIndex.nlist,
-                                         cpuIndex.metric_type,
-                                         config);
-    gpuIndex.copyFrom(&cpuIndex);
+    faiss::gpu::GpuIndexIVFFlat gpuIndex(
+            &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config);
+    gpuIndex.train(opt.numTrain, trainVecs.data());
+    gpuIndex.add(opt.numAdd, addVecs.data());
     gpuIndex.setNumProbes(opt.nprobe);
+    // use garbage values to see if we overwrite then
+    faiss::IndexFlatL2 cpuQuantizer(1);
+    faiss::IndexIVFFlat cpuIndex(&cpuQuantizer, 1, 1, faiss::METRIC_L2);
+    cpuIndex.nprobe = 1;
+    gpuIndex.copyTo(&cpuIndex);
+    EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
+    EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
+    EXPECT_EQ(cpuIndex.d, gpuIndex.d);
+    EXPECT_EQ(cpuIndex.quantizer->d, gpuIndex.quantizer->d);
+    EXPECT_EQ(cpuIndex.d, opt.dim);
+    EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
+    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
+    testIVFEquality(cpuIndex, gpuIndex);
+    // Query both objects; results should be equivalent
     bool compFloat16 = useFloat16CoarseQuantizer;
-    faiss::gpu::compareIndices(cpuIndex, gpuIndex,
-                               opt.numQuery, opt.dim, opt.k, opt.toString(),
-                               compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
-                               // FIXME: the fp16 bounds are
-                               // useless when math (the accumulator) is
-                               // in fp16. Figure out another way to test
-                               compFloat16 ? 0.70f : 0.1f,
-                               compFloat16 ? 0.65f : 0.015f);
-  }
+    faiss::gpu::compareIndices(
+            cpuIndex,
+            gpuIndex,
+            opt.numQuery,
+            opt.dim,
+            opt.k,
+            opt.toString(),
+            compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
+            compFloat16 ? 0.70f : 0.1f,
+            compFloat16 ? 0.30f : 0.015f);
 }
-void addTest(faiss::MetricType metricType,
-             bool useFloat16CoarseQuantizer) {
-  for (int tries = 0; tries < 2; ++tries) {
+void copyFromTest(bool useFloat16CoarseQuantizer) {
     Options opt;
     std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
     std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
-    faiss::IndexFlatL2 quantizerL2(opt.dim);
-    faiss::IndexFlatIP quantizerIP(opt.dim);
-    faiss::Index* quantizer =
-      metricType == faiss::METRIC_L2 ?
-      (faiss::Index*) &quantizerL2 : (faiss::Index*) &quantizerIP;
-    faiss::IndexIVFFlat cpuIndex(quantizer,
-                                 opt.dim,
-                                 opt.numCentroids,
-                                 metricType);
-    cpuIndex.train(opt.numTrain, trainVecs.data());
+    faiss::IndexFlatL2 cpuQuantizer(opt.dim);
+    faiss::IndexIVFFlat cpuIndex(
+            &cpuQuantizer, opt.dim, opt.numCentroids, faiss::METRIC_L2);
     cpuIndex.nprobe = opt.nprobe;
+    cpuIndex.train(opt.numTrain, trainVecs.data());
+    cpuIndex.add(opt.numAdd, addVecs.data());
+    // use garbage values to see if we overwrite then
     faiss::gpu::StandardGpuResources res;
     res.noTempMemory();
@@ -148,140 +238,49 @@ void addTest(faiss::MetricType metricType,
     config.indicesOptions = opt.indicesOpt;
     config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
-    faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
-                                         cpuIndex.d,
-                                         cpuIndex.nlist,
-                                         cpuIndex.metric_type,
-                                         config);
+    faiss::gpu::GpuIndexIVFFlat gpuIndex(&res, 1, 1, faiss::METRIC_L2, config);
+    gpuIndex.setNumProbes(1);
     gpuIndex.copyFrom(&cpuIndex);
-    gpuIndex.setNumProbes(opt.nprobe);
-    cpuIndex.add(opt.numAdd, addVecs.data());
-    gpuIndex.add(opt.numAdd, addVecs.data());
+    EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
+    EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
-    bool compFloat16 = useFloat16CoarseQuantizer;
-    faiss::gpu::compareIndices(cpuIndex, gpuIndex,
-                               opt.numQuery, opt.dim, opt.k, opt.toString(),
-                               compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
-                               compFloat16 ? 0.70f : 0.1f,
-                               compFloat16 ? 0.30f : 0.015f);
-  }
-}
+    EXPECT_EQ(cpuIndex.d, gpuIndex.d);
+    EXPECT_EQ(cpuIndex.d, opt.dim);
+    EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
+    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
-void copyToTest(bool useFloat16CoarseQuantizer) {
-  Options opt;
-  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
-  std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
-  faiss::gpu::StandardGpuResources res;
-  res.noTempMemory();
-  faiss::gpu::GpuIndexIVFFlatConfig config;
-  config.device = opt.device;
-  config.indicesOptions = opt.indicesOpt;
-  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
-  faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
-                                       opt.dim,
-                                       opt.numCentroids,
-                                       faiss::METRIC_L2,
-                                       config);
-  gpuIndex.train(opt.numTrain, trainVecs.data());
-  gpuIndex.add(opt.numAdd, addVecs.data());
-  gpuIndex.setNumProbes(opt.nprobe);
-  // use garbage values to see if we overwrite then
-  faiss::IndexFlatL2 cpuQuantizer(1);
-  faiss::IndexIVFFlat cpuIndex(&cpuQuantizer, 1, 1, faiss::METRIC_L2);
-  cpuIndex.nprobe = 1;
-  gpuIndex.copyTo(&cpuIndex);
-  EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
-  EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
-  EXPECT_EQ(cpuIndex.d, gpuIndex.d);
-  EXPECT_EQ(cpuIndex.quantizer->d, gpuIndex.quantizer->d);
-  EXPECT_EQ(cpuIndex.d, opt.dim);
-  EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
-  EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
-  testIVFEquality(cpuIndex, gpuIndex);
-  // Query both objects; results should be equivalent
-  bool compFloat16 = useFloat16CoarseQuantizer;
-  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
-                             opt.numQuery, opt.dim, opt.k, opt.toString(),
-                             compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
-                             compFloat16 ? 0.70f : 0.1f,
-                             compFloat16 ? 0.30f : 0.015f);
-}
+    testIVFEquality(cpuIndex, gpuIndex);
-void copyFromTest(bool useFloat16CoarseQuantizer) {
-  Options opt;
-  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
-  std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
-  faiss::IndexFlatL2 cpuQuantizer(opt.dim);
-  faiss::IndexIVFFlat cpuIndex(&cpuQuantizer,
-                               opt.dim,
-                               opt.numCentroids,
-                               faiss::METRIC_L2);
-  cpuIndex.nprobe = opt.nprobe;
-  cpuIndex.train(opt.numTrain, trainVecs.data());
-  cpuIndex.add(opt.numAdd, addVecs.data());
-  // use garbage values to see if we overwrite then
-  faiss::gpu::StandardGpuResources res;
-  res.noTempMemory();
-  faiss::gpu::GpuIndexIVFFlatConfig config;
-  config.device = opt.device;
-  config.indicesOptions = opt.indicesOpt;
-  config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
-  faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
-                                       1,
-                                       1,
-                                       faiss::METRIC_L2,
-                                       config);
-  gpuIndex.setNumProbes(1);
-  gpuIndex.copyFrom(&cpuIndex);
-  EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
-  EXPECT_EQ(gpuIndex.ntotal, opt.numAdd);
-  EXPECT_EQ(cpuIndex.d, gpuIndex.d);
-  EXPECT_EQ(cpuIndex.d, opt.dim);
-  EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
-  EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
-  testIVFEquality(cpuIndex, gpuIndex);
-  // Query both objects; results should be equivalent
-  bool compFloat16 = useFloat16CoarseQuantizer;
-  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
-                             opt.numQuery, opt.dim, opt.k, opt.toString(),
-                             compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
-                             compFloat16 ? 0.70f : 0.1f,
-                             compFloat16 ? 0.30f : 0.015f);
+    // Query both objects; results should be equivalent
+    bool compFloat16 = useFloat16CoarseQuantizer;
+    faiss::gpu::compareIndices(
+            cpuIndex,
+            gpuIndex,
+            opt.numQuery,
+            opt.dim,
+            opt.k,
+            opt.toString(),
+            compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
+            compFloat16 ? 0.70f : 0.1f,
+            compFloat16 ? 0.30f : 0.015f);
 }
 TEST(TestGpuIndexIVFFlat, Float32_32_Add_L2) {
-  addTest(faiss::METRIC_L2, false);
+    addTest(faiss::METRIC_L2, false);
 }
 TEST(TestGpuIndexIVFFlat, Float32_32_Add_IP) {
-  addTest(faiss::METRIC_INNER_PRODUCT, false);
+    addTest(faiss::METRIC_INNER_PRODUCT, false);
 }
 TEST(TestGpuIndexIVFFlat, Float16_32_Add_L2) {
-  addTest(faiss::METRIC_L2, true);
+    addTest(faiss::METRIC_L2, true);
 }
 TEST(TestGpuIndexIVFFlat, Float16_32_Add_IP) {
-  addTest(faiss::METRIC_INNER_PRODUCT, true);
+    addTest(faiss::METRIC_INNER_PRODUCT, true);
 }
 //
@@ -289,21 +288,21 @@ TEST(TestGpuIndexIVFFlat, Float16_32_Add_IP) {
 //
 TEST(TestGpuIndexIVFFlat, Float32_Query_L2) {
-  queryTest(faiss::METRIC_L2, false);
+    queryTest(faiss::METRIC_L2, false);
 }
 TEST(TestGpuIndexIVFFlat, Float32_Query_IP) {
-  queryTest(faiss::METRIC_INNER_PRODUCT, false);
+    queryTest(faiss::METRIC_INNER_PRODUCT, false);
 }
 // float16 coarse quantizer
 TEST(TestGpuIndexIVFFlat, Float16_32_Query_L2) {
-  queryTest(faiss::METRIC_L2, true);
+    queryTest(faiss::METRIC_L2, true);
 }
 TEST(TestGpuIndexIVFFlat, Float16_32_Query_IP) {
-  queryTest(faiss::METRIC_INNER_PRODUCT, true);
+    queryTest(faiss::METRIC_INNER_PRODUCT, true);
 }
 //
@@ -312,19 +311,19 @@ TEST(TestGpuIndexIVFFlat, Float16_32_Query_IP) {
 //
 TEST(TestGpuIndexIVFFlat, Float32_Query_L2_64) {
-  queryTest(faiss::METRIC_L2, false, 64);
+    queryTest(faiss::METRIC_L2, false, 64);
 }
 TEST(TestGpuIndexIVFFlat, Float32_Query_IP_64) {
-  queryTest(faiss::METRIC_INNER_PRODUCT, false, 64);
+    queryTest(faiss::METRIC_INNER_PRODUCT, false, 64);
 }
 TEST(TestGpuIndexIVFFlat, Float32_Query_L2_128) {
-  queryTest(faiss::METRIC_L2, false, 128);
+    queryTest(faiss::METRIC_L2, false, 128);
 }
 TEST(TestGpuIndexIVFFlat, Float32_Query_IP_128) {
-  queryTest(faiss::METRIC_INNER_PRODUCT, false, 128);
+    queryTest(faiss::METRIC_INNER_PRODUCT, false, 128);
 }
 //
@@ -332,71 +331,72 @@ TEST(TestGpuIndexIVFFlat, Float32_Query_IP_128) {
 //
 TEST(TestGpuIndexIVFFlat, Float32_32_CopyTo) {
-  copyToTest(false);
+    copyToTest(false);
 }
 TEST(TestGpuIndexIVFFlat, Float32_32_CopyFrom) {
-  copyFromTest(false);
+    copyFromTest(false);
 }
 TEST(TestGpuIndexIVFFlat, Float32_negative) {
-  Options opt;
-  auto trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
-  auto addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
-  // Put all vecs on negative side
-  for (auto& f : trainVecs) {
-    f = std::abs(f) * -1.0f;
-  }
-  for (auto& f : addVecs) {
-    f *= std::abs(f) * -1.0f;
-  }
-  faiss::IndexFlatIP quantizerIP(opt.dim);
-  faiss::Index* quantizer = (faiss::Index*) &quantizerIP;
-  faiss::IndexIVFFlat cpuIndex(quantizer,
-                               opt.dim, opt.numCentroids,
-                               faiss::METRIC_INNER_PRODUCT);
-  cpuIndex.train(opt.numTrain, trainVecs.data());
-  cpuIndex.add(opt.numAdd, addVecs.data());
-  cpuIndex.nprobe = opt.nprobe;
-  faiss::gpu::StandardGpuResources res;
-  res.noTempMemory();
-  faiss::gpu::GpuIndexIVFFlatConfig config;
-  config.device = opt.device;
-  config.indicesOptions = opt.indicesOpt;
-  faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
-                                       cpuIndex.d,
-                                       cpuIndex.nlist,
-                                       cpuIndex.metric_type,
-                                       config);
-  gpuIndex.copyFrom(&cpuIndex);
-  gpuIndex.setNumProbes(opt.nprobe);
-  // Construct a positive test set
-  auto queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
-  // Put all vecs on positive size
-  for (auto& f : queryVecs) {
-    f = std::abs(f);
-  }
-  bool compFloat16 = false;
-  faiss::gpu::compareIndices(queryVecs,
-                             cpuIndex, gpuIndex,
-                             opt.numQuery, opt.dim, opt.k, opt.toString(),
-                             compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
-                             // FIXME: the fp16 bounds are
-                             // useless when math (the accumulator) is
-                             // in fp16. Figure out another way to test
-                             compFloat16 ? 0.99f : 0.1f,
-                             compFloat16 ? 0.65f : 0.015f);
+    Options opt;
+    auto trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
+    auto addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
+    // Put all vecs on negative side
+    for (auto& f : trainVecs) {
+        f = std::abs(f) * -1.0f;
+    }
+    for (auto& f : addVecs) {
+        f *= std::abs(f) * -1.0f;
+    }
+    faiss::IndexFlatIP quantizerIP(opt.dim);
+    faiss::Index* quantizer = (faiss::Index*)&quantizerIP;
+    faiss::IndexIVFFlat cpuIndex(
+            quantizer, opt.dim, opt.numCentroids, faiss::METRIC_INNER_PRODUCT);
+    cpuIndex.train(opt.numTrain, trainVecs.data());
+    cpuIndex.add(opt.numAdd, addVecs.data());
+    cpuIndex.nprobe = opt.nprobe;
+    faiss::gpu::StandardGpuResources res;
+    res.noTempMemory();
+    faiss::gpu::GpuIndexIVFFlatConfig config;
+    config.device = opt.device;
+    config.indicesOptions = opt.indicesOpt;
+    faiss::gpu::GpuIndexIVFFlat gpuIndex(
+            &res, cpuIndex.d, cpuIndex.nlist, cpuIndex.metric_type, config);
+    gpuIndex.copyFrom(&cpuIndex);
+    gpuIndex.setNumProbes(opt.nprobe);
+    // Construct a positive test set
+    auto queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
+    // Put all vecs on positive size
+    for (auto& f : queryVecs) {
+        f = std::abs(f);
+    }
+    bool compFloat16 = false;
+    faiss::gpu::compareIndices(
+            queryVecs,
+            cpuIndex,
+            gpuIndex,
+            opt.numQuery,
+            opt.dim,
+            opt.k,
+            opt.toString(),
+            compFloat16 ? kF16MaxRelErr : kF32MaxRelErr,
+            // FIXME: the fp16 bounds are
+            // useless when math (the accumulator) is
+            // in fp16. Figure out another way to test
+            compFloat16 ? 0.99f : 0.1f,
+            compFloat16 ? 0.65f : 0.015f);
 }
 //
@@ -404,152 +404,152 @@ TEST(TestGpuIndexIVFFlat, Float32_negative) {
 //
 TEST(TestGpuIndexIVFFlat, QueryNaN) {
-  Options opt;
-  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
-  std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
-  faiss::gpu::StandardGpuResources res;
-  res.noTempMemory();
-  faiss::gpu::GpuIndexIVFFlatConfig config;
-  config.device = opt.device;
-  config.indicesOptions = opt.indicesOpt;
-  config.flatConfig.useFloat16 = faiss::gpu::randBool();
-  faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
-                                       opt.dim,
-                                       opt.numCentroids,
-                                       faiss::METRIC_L2,
-                                       config);
-  gpuIndex.setNumProbes(opt.nprobe);
-  gpuIndex.train(opt.numTrain, trainVecs.data());
-  gpuIndex.add(opt.numAdd, addVecs.data());
-  int numQuery = 10;
-  std::vector<float> nans(numQuery * opt.dim,
-                          std::numeric_limits<float>::quiet_NaN());
-  std::vector<float> distances(numQuery * opt.k, 0);
-  std::vector<faiss::Index::idx_t> indices(numQuery * opt.k, 0);
-  gpuIndex.search(numQuery,
-                  nans.data(),
-                  opt.k,
-                  distances.data(),
-                  indices.data());
-  for (int q = 0; q < numQuery; ++q) {
-    for (int k = 0; k < opt.k; ++k) {
-      EXPECT_EQ(indices[q * opt.k + k], -1);
-      EXPECT_EQ(distances[q * opt.k + k], std::numeric_limits<float>::max());
+    Options opt;
+    std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
+    std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
+    faiss::gpu::StandardGpuResources res;
+    res.noTempMemory();
+    faiss::gpu::GpuIndexIVFFlatConfig config;
+    config.device = opt.device;
+    config.indicesOptions = opt.indicesOpt;
+    config.flatConfig.useFloat16 = faiss::gpu::randBool();
+    faiss::gpu::GpuIndexIVFFlat gpuIndex(
+            &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config);
+    gpuIndex.setNumProbes(opt.nprobe);
+    gpuIndex.train(opt.numTrain, trainVecs.data());
+    gpuIndex.add(opt.numAdd, addVecs.data());
+    int numQuery = 10;
+    std::vector<float> nans(
+            numQuery * opt.dim, std::numeric_limits<float>::quiet_NaN());
+    std::vector<float> distances(numQuery * opt.k, 0);
+    std::vector<faiss::Index::idx_t> indices(numQuery * opt.k, 0);
+    gpuIndex.search(
+            numQuery, nans.data(), opt.k, distances.data(), indices.data());
+    for (int q = 0; q < numQuery; ++q) {
+        for (int k = 0; k < opt.k; ++k) {
+            EXPECT_EQ(indices[q * opt.k + k], -1);
+            EXPECT_EQ(
+                    distances[q * opt.k + k],
+                    std::numeric_limits<float>::max());
+        }
     }
-  }
 }
 TEST(TestGpuIndexIVFFlat, AddNaN) {
-  Options opt;
-  faiss::gpu::StandardGpuResources res;
-  res.noTempMemory();
-  faiss::gpu::GpuIndexIVFFlatConfig config;
-  config.device = opt.device;
-  config.indicesOptions = opt.indicesOpt;
-  config.flatConfig.useFloat16 = faiss::gpu::randBool();
-  faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
-                                       opt.dim,
-                                       opt.numCentroids,
-                                       faiss::METRIC_L2,
-                                       config);
-  gpuIndex.setNumProbes(opt.nprobe);
-  int numNans = 10;
-  std::vector<float> nans(numNans * opt.dim,
-                          std::numeric_limits<float>::quiet_NaN());
-  // Make one vector valid (not the first vector, in order to test offset
-  // issues), which should actually add
-  for (int i = 0; i < opt.dim; ++i) {
-    nans[opt.dim + i] = i;
-  }
-  std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
-  gpuIndex.train(opt.numTrain, trainVecs.data());
-  // should not crash
-  EXPECT_EQ(gpuIndex.ntotal, 0);
-  gpuIndex.add(numNans, nans.data());
-  std::vector<float> queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
-  std::vector<float> distance(opt.numQuery * opt.k, 0);
-  std::vector<faiss::Index::idx_t> indices(opt.numQuery * opt.k, 0);
-  // should not crash
-  gpuIndex.search(opt.numQuery, queryVecs.data(), opt.k,
-                  distance.data(), indices.data());
+    Options opt;
+    faiss::gpu::StandardGpuResources res;
+    res.noTempMemory();
+    faiss::gpu::GpuIndexIVFFlatConfig config;
+    config.device = opt.device;
+    config.indicesOptions = opt.indicesOpt;
+    config.flatConfig.useFloat16 = faiss::gpu::randBool();
+    faiss::gpu::GpuIndexIVFFlat gpuIndex(
+            &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config);
+    gpuIndex.setNumProbes(opt.nprobe);
+    int numNans = 10;
+    std::vector<float> nans(
+            numNans * opt.dim, std::numeric_limits<float>::quiet_NaN());
+    // Make one vector valid (not the first vector, in order to test offset
+    // issues), which should actually add
+    for (int i = 0; i < opt.dim; ++i) {
+        nans[opt.dim + i] = i;
+    }
+    std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
+    gpuIndex.train(opt.numTrain, trainVecs.data());
+    // should not crash
+    EXPECT_EQ(gpuIndex.ntotal, 0);
+    gpuIndex.add(numNans, nans.data());
+    std::vector<float> queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
+    std::vector<float> distance(opt.numQuery * opt.k, 0);
+    std::vector<faiss::Index::idx_t> indices(opt.numQuery * opt.k, 0);
+    // should not crash
+    gpuIndex.search(
+            opt.numQuery,
+            queryVecs.data(),
+            opt.k,
+            distance.data(),
+            indices.data());
 }
 TEST(TestGpuIndexIVFFlat, UnifiedMemory) {
-  // Construct on a random device to test multi-device, if we have
-  // multiple devices
-  int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
-  if (!faiss::gpu::getFullUnifiedMemSupport(device)) {
-    return;
-  }
-  int dim = 128;
-  int numCentroids = 256;
-  // Unfortunately it would take forever to add 24 GB in IVFPQ data,
-  // so just perform a small test with data allocated in the unified
-  // memory address space
-  size_t numAdd = 10000;
-  size_t numTrain = numCentroids * 40;
-  int numQuery = 10;
-  int k = 10;
-  int nprobe = 8;
-  std::vector<float> trainVecs = faiss::gpu::randVecs(numTrain, dim);
-  std::vector<float> addVecs = faiss::gpu::randVecs(numAdd, dim);
-  faiss::IndexFlatL2 quantizer(dim);
-  faiss::IndexIVFFlat cpuIndex(&quantizer, dim, numCentroids, faiss::METRIC_L2);
-  cpuIndex.train(numTrain, trainVecs.data());
-  cpuIndex.add(numAdd, addVecs.data());
-  cpuIndex.nprobe = nprobe;
-  faiss::gpu::StandardGpuResources res;
-  res.noTempMemory();
-  faiss::gpu::GpuIndexIVFFlatConfig config;
-  config.device = device;
-  config.memorySpace = faiss::gpu::MemorySpace::Unified;
-  faiss::gpu::GpuIndexIVFFlat gpuIndex(&res,
-                                       dim,
-                                       numCentroids,
-                                       faiss::METRIC_L2,
-                                       config);
-  gpuIndex.copyFrom(&cpuIndex);
-  gpuIndex.setNumProbes(nprobe);
-  faiss::gpu::compareIndices(cpuIndex, gpuIndex,
-                             numQuery, dim, k, "Unified Memory",
-                             kF32MaxRelErr,
-                             0.1f,
-                             0.015f);
+    // Construct on a random device to test multi-device, if we have
+    // multiple devices
+    int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
+    if (!faiss::gpu::getFullUnifiedMemSupport(device)) {
+        return;
+    }
+    int dim = 128;
+    int numCentroids = 256;
+    // Unfortunately it would take forever to add 24 GB in IVFPQ data,
+    // so just perform a small test with data allocated in the unified
+    // memory address space
+    size_t numAdd = 10000;
+    size_t numTrain = numCentroids * 40;
+    int numQuery = 10;
+    int k = 10;
+    int nprobe = 8;
+    std::vector<float> trainVecs = faiss::gpu::randVecs(numTrain, dim);
+    std::vector<float> addVecs = faiss::gpu::randVecs(numAdd, dim);
+    faiss::IndexFlatL2 quantizer(dim);
+    faiss::IndexIVFFlat cpuIndex(
+            &quantizer, dim, numCentroids, faiss::METRIC_L2);
+    cpuIndex.train(numTrain, trainVecs.data());
+    cpuIndex.add(numAdd, addVecs.data());
+    cpuIndex.nprobe = nprobe;
+    faiss::gpu::StandardGpuResources res;
+    res.noTempMemory();
+    faiss::gpu::GpuIndexIVFFlatConfig config;
+    config.device = device;
+    config.memorySpace = faiss::gpu::MemorySpace::Unified;
+    faiss::gpu::GpuIndexIVFFlat gpuIndex(
+            &res, dim, numCentroids, faiss::METRIC_L2, config);
+    gpuIndex.copyFrom(&cpuIndex);
+    gpuIndex.setNumProbes(nprobe);
+    faiss::gpu::compareIndices(
+            cpuIndex,
+            gpuIndex,
+            numQuery,
+            dim,
+            k,
+            "Unified Memory",
+            kF32MaxRelErr,
+            0.1f,
+            0.015f);
 }
 int main(int argc, char** argv) {
-  testing::InitGoogleTest(&argc, argv);
+    testing::InitGoogleTest(&argc, argv);
-  // just run with a fixed test seed
-  faiss::gpu::setTestSeed(100);
+    // just run with a fixed test seed
+    faiss::gpu::setTestSeed(100);
-  return RUN_ALL_TESTS();
+    return RUN_ALL_TESTS();
 }