RubyGems - faiss - Versions diffs - 0.2.4 → 0.2.6 - Mend

faiss 0.2.4 → 0.2.6

Files changed (178) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +10 -0
data/LICENSE.txt +1 -1
data/README.md +23 -21
data/ext/faiss/extconf.rb +11 -0
data/ext/faiss/index.cpp +17 -4
data/ext/faiss/index_binary.cpp +6 -6
data/ext/faiss/product_quantizer.cpp +4 -4
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +13 -0
data/vendor/faiss/faiss/IVFlib.cpp +101 -2
data/vendor/faiss/faiss/IVFlib.h +26 -2
data/vendor/faiss/faiss/Index.cpp +36 -3
data/vendor/faiss/faiss/Index.h +43 -6
data/vendor/faiss/faiss/Index2Layer.cpp +6 -2
data/vendor/faiss/faiss/Index2Layer.h +6 -1
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +219 -16
data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +63 -5
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +299 -0
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +199 -0
data/vendor/faiss/faiss/IndexBinary.cpp +20 -4
data/vendor/faiss/faiss/IndexBinary.h +18 -3
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -2
data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -2
data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -1
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.h +2 -1
data/vendor/faiss/faiss/IndexBinaryHash.cpp +17 -4
data/vendor/faiss/faiss/IndexBinaryHash.h +8 -4
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +28 -13
data/vendor/faiss/faiss/IndexBinaryIVF.h +10 -7
data/vendor/faiss/faiss/IndexFastScan.cpp +626 -0
data/vendor/faiss/faiss/IndexFastScan.h +145 -0
data/vendor/faiss/faiss/IndexFlat.cpp +34 -21
data/vendor/faiss/faiss/IndexFlat.h +7 -4
data/vendor/faiss/faiss/IndexFlatCodes.cpp +35 -1
data/vendor/faiss/faiss/IndexFlatCodes.h +12 -0
data/vendor/faiss/faiss/IndexHNSW.cpp +66 -138
data/vendor/faiss/faiss/IndexHNSW.h +4 -2
data/vendor/faiss/faiss/IndexIDMap.cpp +247 -0
data/vendor/faiss/faiss/IndexIDMap.h +107 -0
data/vendor/faiss/faiss/IndexIVF.cpp +121 -33
data/vendor/faiss/faiss/IndexIVF.h +35 -16
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -7
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +63 -1
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +590 -0
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +171 -0
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +1290 -0
data/vendor/faiss/faiss/IndexIVFFastScan.h +213 -0
data/vendor/faiss/faiss/IndexIVFFlat.cpp +37 -17
data/vendor/faiss/faiss/IndexIVFFlat.h +4 -2
data/vendor/faiss/faiss/IndexIVFPQ.cpp +234 -50
data/vendor/faiss/faiss/IndexIVFPQ.h +5 -1
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +23 -852
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -112
data/vendor/faiss/faiss/IndexIVFPQR.cpp +3 -3
data/vendor/faiss/faiss/IndexIVFPQR.h +1 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +3 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
data/vendor/faiss/faiss/IndexLSH.cpp +4 -2
data/vendor/faiss/faiss/IndexLSH.h +2 -1
data/vendor/faiss/faiss/IndexLattice.cpp +7 -1
data/vendor/faiss/faiss/IndexLattice.h +3 -1
data/vendor/faiss/faiss/IndexNNDescent.cpp +4 -3
data/vendor/faiss/faiss/IndexNNDescent.h +2 -1
data/vendor/faiss/faiss/IndexNSG.cpp +37 -3
data/vendor/faiss/faiss/IndexNSG.h +25 -1
data/vendor/faiss/faiss/IndexPQ.cpp +106 -69
data/vendor/faiss/faiss/IndexPQ.h +19 -5
data/vendor/faiss/faiss/IndexPQFastScan.cpp +15 -450
data/vendor/faiss/faiss/IndexPQFastScan.h +15 -78
data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -8
data/vendor/faiss/faiss/IndexPreTransform.h +15 -3
data/vendor/faiss/faiss/IndexRefine.cpp +8 -4
data/vendor/faiss/faiss/IndexRefine.h +4 -2
data/vendor/faiss/faiss/IndexReplicas.cpp +4 -2
data/vendor/faiss/faiss/IndexReplicas.h +2 -1
data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +438 -0
data/vendor/faiss/faiss/IndexRowwiseMinMax.h +92 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +26 -15
data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -7
data/vendor/faiss/faiss/IndexShards.cpp +4 -1
data/vendor/faiss/faiss/IndexShards.h +2 -1
data/vendor/faiss/faiss/MetaIndexes.cpp +5 -178
data/vendor/faiss/faiss/MetaIndexes.h +3 -81
data/vendor/faiss/faiss/VectorTransform.cpp +43 -0
data/vendor/faiss/faiss/VectorTransform.h +22 -4
data/vendor/faiss/faiss/clone_index.cpp +23 -1
data/vendor/faiss/faiss/clone_index.h +3 -0
data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +300 -0
data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +24 -0
data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +195 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2058 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +408 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2147 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +460 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +465 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +1618 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +251 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +1452 -0
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +1 -0
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +0 -4
data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -1
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +10 -8
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +75 -14
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +19 -32
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -31
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +22 -28
data/vendor/faiss/faiss/gpu/GpuResources.cpp +14 -0
data/vendor/faiss/faiss/gpu/GpuResources.h +16 -3
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +3 -3
data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +32 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +311 -75
data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +10 -0
data/vendor/faiss/faiss/gpu/test/TestUtils.h +3 -0
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +2 -2
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +5 -4
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +116 -47
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +44 -13
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +0 -54
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -76
data/vendor/faiss/faiss/impl/DistanceComputer.h +64 -0
data/vendor/faiss/faiss/impl/HNSW.cpp +123 -27
data/vendor/faiss/faiss/impl/HNSW.h +19 -16
data/vendor/faiss/faiss/impl/IDSelector.cpp +125 -0
data/vendor/faiss/faiss/impl/IDSelector.h +135 -0
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +6 -28
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +6 -1
data/vendor/faiss/faiss/impl/LookupTableScaler.h +77 -0
data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -0
data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +383 -0
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +154 -0
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +225 -145
data/vendor/faiss/faiss/impl/ProductQuantizer.h +29 -10
data/vendor/faiss/faiss/impl/Quantizer.h +43 -0
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +192 -36
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +40 -20
data/vendor/faiss/faiss/impl/ResultHandler.h +96 -0
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +97 -173
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +18 -18
data/vendor/faiss/faiss/impl/index_read.cpp +240 -9
data/vendor/faiss/faiss/impl/index_write.cpp +237 -5
data/vendor/faiss/faiss/impl/kmeans1d.cpp +6 -4
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +56 -16
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +25 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +66 -25
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +75 -27
data/vendor/faiss/faiss/index_factory.cpp +196 -7
data/vendor/faiss/faiss/index_io.h +5 -0
data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -0
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +4 -1
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +2 -1
data/vendor/faiss/faiss/python/python_callbacks.cpp +27 -0
data/vendor/faiss/faiss/python/python_callbacks.h +15 -0
data/vendor/faiss/faiss/utils/Heap.h +31 -15
data/vendor/faiss/faiss/utils/distances.cpp +380 -56
data/vendor/faiss/faiss/utils/distances.h +113 -15
data/vendor/faiss/faiss/utils/distances_simd.cpp +726 -6
data/vendor/faiss/faiss/utils/extra_distances.cpp +12 -7
data/vendor/faiss/faiss/utils/extra_distances.h +3 -1
data/vendor/faiss/faiss/utils/fp16-fp16c.h +21 -0
data/vendor/faiss/faiss/utils/fp16-inl.h +101 -0
data/vendor/faiss/faiss/utils/fp16.h +11 -0
data/vendor/faiss/faiss/utils/hamming-inl.h +54 -0
data/vendor/faiss/faiss/utils/hamming.cpp +0 -48
data/vendor/faiss/faiss/utils/ordered_key_value.h +10 -0
data/vendor/faiss/faiss/utils/quantize_lut.cpp +62 -0
data/vendor/faiss/faiss/utils/quantize_lut.h +20 -0
data/vendor/faiss/faiss/utils/random.cpp +53 -0
data/vendor/faiss/faiss/utils/random.h +5 -0
data/vendor/faiss/faiss/utils/simdlib_avx2.h +4 -0
data/vendor/faiss/faiss/utils/simdlib_emulated.h +6 -1
data/vendor/faiss/faiss/utils/simdlib_neon.h +7 -2
metadata +37 -3

data/vendor/faiss/faiss/gpu/impl/IndexUtils.h ADDED Viewed

@@ -0,0 +1,32 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <faiss/Index.h>
+namespace faiss {
+namespace gpu {
+/// A collection of various utility functions for index implementation
+/// Returns the maximum k-selection value supported based on the CUDA SDK that
+/// we were compiled with. .cu files can use DeviceDefs.cuh, but this is for
+/// non-CUDA files
+int getMaxKSelection();
+// Validate the k parameter for search
+void validateKSelect(Index::idx_t k);
+// Validate the nprobe parameter for search
+void validateNProbe(Index::idx_t nprobe);
+/// Validate the n (number of vectors) parameter for add, search, reconstruct
+void validateNumVectors(Index::idx_t n);
+} // namespace gpu
+} // namespace faiss

data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp CHANGED Viewed

@@ -8,6 +8,7 @@
 #include <faiss/IndexBinaryFlat.h>
 #include <faiss/gpu/GpuIndexBinaryFlat.h>
 #include <faiss/gpu/StandardGpuResources.h>
+#include <faiss/gpu/impl/IndexUtils.h>
 #include <faiss/gpu/test/TestUtils.h>
 #include <faiss/gpu/utils/DeviceUtils.h>
 #include <faiss/utils/utils.h>

data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp CHANGED Viewed

@@ -8,10 +8,12 @@
 #include <faiss/IndexFlat.h>
 #include <faiss/gpu/GpuIndexFlat.h>
 #include <faiss/gpu/StandardGpuResources.h>
+#include <faiss/gpu/impl/IndexUtils.h>
 #include <faiss/gpu/test/TestUtils.h>
 #include <faiss/gpu/utils/DeviceUtils.h>
 #include <gtest/gtest.h>
 #include <sstream>
+#include <unordered_map>
 #include <vector>
 // FIXME: figure out a better way to test fp16
@@ -23,7 +25,6 @@ struct TestFlatOptions {
             : metric(faiss::MetricType::METRIC_L2),
               metricArg(0),
               useFloat16(false),
-              useTransposed(false),
               numVecsOverride(-1),
               numQueriesOverride(-1),
               kOverride(-1),
@@ -33,7 +34,6 @@ struct TestFlatOptions {
     float metricArg;
     bool useFloat16;
-    bool useTransposed;
     int numVecsOverride;
     int numQueriesOverride;
     int kOverride;
@@ -73,7 +73,6 @@ void testFlat(const TestFlatOptions& opt) {
     faiss::gpu::GpuIndexFlatConfig config;
     config.device = device;
     config.useFloat16 = opt.useFloat16;
-    config.storeTransposed = opt.useTransposed;
     faiss::gpu::GpuIndexFlat gpuIndex(&res, dim, opt.metric, config);
     gpuIndex.metric_arg = opt.metricArg;
@@ -85,8 +84,7 @@ void testFlat(const TestFlatOptions& opt) {
     std::stringstream str;
     str << "metric " << opt.metric << " marg " << opt.metricArg << " numVecs "
         << numVecs << " dim " << dim << " useFloat16 " << opt.useFloat16
-        << " transposed " << opt.useTransposed << " numQuery " << numQuery
-        << " k " << k;
+        << " numQuery " << numQuery << " k " << k;
     // To some extent, we depend upon the relative error for the test
     // for float16
@@ -110,12 +108,8 @@ TEST(TestGpuIndexFlat, IP_Float32) {
         TestFlatOptions opt;
         opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
         opt.useFloat16 = false;
-        opt.useTransposed = false;
         testFlat(opt);
-        opt.useTransposed = true;
-        testFlat(opt);
     }
 }
@@ -123,11 +117,7 @@ TEST(TestGpuIndexFlat, L1_Float32) {
     TestFlatOptions opt;
     opt.metric = faiss::MetricType::METRIC_L1;
     opt.useFloat16 = false;
-    opt.useTransposed = false;
-    testFlat(opt);
-    opt.useTransposed = true;
     testFlat(opt);
 }
@@ -136,12 +126,8 @@ TEST(TestGpuIndexFlat, Lp_Float32) {
     opt.metric = faiss::MetricType::METRIC_Lp;
     opt.metricArg = 5;
     opt.useFloat16 = false;
-    opt.useTransposed = false;
     testFlat(opt);
-    // Don't bother testing the transposed version, the L1 test should be good
-    // enough for that
 }
 TEST(TestGpuIndexFlat, L2_Float32) {
@@ -150,11 +136,7 @@ TEST(TestGpuIndexFlat, L2_Float32) {
         opt.metric = faiss::MetricType::METRIC_L2;
         opt.useFloat16 = false;
-        opt.useTransposed = false;
-        testFlat(opt);
-        opt.useTransposed = true;
         testFlat(opt);
     }
 }
@@ -165,7 +147,6 @@ TEST(TestGpuIndexFlat, L2_Float32_K1) {
         TestFlatOptions opt;
         opt.metric = faiss::MetricType::METRIC_L2;
         opt.useFloat16 = false;
-        opt.useTransposed = false;
         opt.kOverride = 1;
         testFlat(opt);
@@ -177,12 +158,8 @@ TEST(TestGpuIndexFlat, IP_Float16) {
         TestFlatOptions opt;
         opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
         opt.useFloat16 = true;
-        opt.useTransposed = false;
         testFlat(opt);
-        opt.useTransposed = true;
-        testFlat(opt);
     }
 }
@@ -191,11 +168,7 @@ TEST(TestGpuIndexFlat, L2_Float16) {
         TestFlatOptions opt;
         opt.metric = faiss::MetricType::METRIC_L2;
         opt.useFloat16 = true;
-        opt.useTransposed = false;
-        testFlat(opt);
-        opt.useTransposed = true;
         testFlat(opt);
     }
 }
@@ -206,7 +179,6 @@ TEST(TestGpuIndexFlat, L2_Float16_K1) {
         TestFlatOptions opt;
         opt.metric = faiss::MetricType::METRIC_L2;
         opt.useFloat16 = true;
-        opt.useTransposed = false;
         opt.kOverride = 1;
         testFlat(opt);
@@ -219,7 +191,6 @@ TEST(TestGpuIndexFlat, L2_Tiling) {
         TestFlatOptions opt;
         opt.metric = faiss::MetricType::METRIC_L2;
         opt.useFloat16 = false;
-        opt.useTransposed = false;
         opt.numVecsOverride = 1000000;
         // keep the rest of the problem reasonably small
@@ -238,7 +209,6 @@ TEST(TestGpuIndexFlat, QueryEmpty) {
     faiss::gpu::GpuIndexFlatConfig config;
     config.device = 0;
     config.useFloat16 = false;
-    config.storeTransposed = false;
     int dim = 128;
     faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
@@ -267,40 +237,46 @@ TEST(TestGpuIndexFlat, CopyFrom) {
     int numVecs = faiss::gpu::randVal(100, 200);
     int dim = faiss::gpu::randVal(1, 1000);
-    faiss::IndexFlatL2 cpuIndex(dim);
     std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
+    faiss::IndexFlatL2 cpuIndex(dim);
     cpuIndex.add(numVecs, vecs.data());
     faiss::gpu::StandardGpuResources res;
     res.noTempMemory();
-    // Fill with garbage values
     int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
-    faiss::gpu::GpuIndexFlatConfig config;
-    config.device = device;
-    config.useFloat16 = false;
-    config.storeTransposed = false;
-    faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, 2000, config);
-    gpuIndex.copyFrom(&cpuIndex);
+    for (bool useFloat16 : {false, true}) {
+        faiss::gpu::GpuIndexFlatConfig config;
+        config.device = device;
+        config.useFloat16 = useFloat16;
-    EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
-    EXPECT_EQ(gpuIndex.ntotal, numVecs);
+        // Fill with garbage values
+        faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, 2000, config);
+        gpuIndex.copyFrom(&cpuIndex);
-    EXPECT_EQ(cpuIndex.d, gpuIndex.d);
-    EXPECT_EQ(cpuIndex.d, dim);
+        EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
+        EXPECT_EQ(gpuIndex.ntotal, numVecs);
-    int idx = faiss::gpu::randVal(0, numVecs - 1);
+        EXPECT_EQ(cpuIndex.d, gpuIndex.d);
+        EXPECT_EQ(cpuIndex.d, dim);
-    std::vector<float> gpuVals(dim);
-    gpuIndex.reconstruct(idx, gpuVals.data());
+        std::vector<float> gpuVals(numVecs * dim);
+        gpuIndex.reconstruct_n(0, gpuIndex.ntotal, gpuVals.data());
-    std::vector<float> cpuVals(dim);
-    cpuIndex.reconstruct(idx, cpuVals.data());
+        std::vector<float> cpuVals(numVecs * dim);
+        cpuIndex.reconstruct_n(0, gpuIndex.ntotal, cpuVals.data());
-    EXPECT_EQ(gpuVals, cpuVals);
+        // The CPU is the source of (float32) truth here, while the GPU index
+        // may be in float16 mode and thus was subject to rounding
+        if (useFloat16) {
+            EXPECT_EQ(gpuVals, faiss::gpu::roundToHalf(cpuVals));
+        } else {
+            // Should be exactly the same
+            EXPECT_EQ(gpuVals, cpuVals);
+        }
+    }
 }
 TEST(TestGpuIndexFlat, CopyTo) {
@@ -311,36 +287,36 @@ TEST(TestGpuIndexFlat, CopyTo) {
     int dim = faiss::gpu::randVal(1, 1000);
     int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
-    faiss::gpu::GpuIndexFlatConfig config;
-    config.device = device;
-    config.useFloat16 = false;
-    config.storeTransposed = false;
-    faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
     std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
-    gpuIndex.add(numVecs, vecs.data());
-    // Fill with garbage values
-    faiss::IndexFlatL2 cpuIndex(2000);
-    gpuIndex.copyTo(&cpuIndex);
+    for (bool useFloat16 : {false, true}) {
+        faiss::gpu::GpuIndexFlatConfig config;
+        config.device = device;
+        config.useFloat16 = useFloat16;
+        faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
+        gpuIndex.add(numVecs, vecs.data());
-    EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
-    EXPECT_EQ(gpuIndex.ntotal, numVecs);
+        // Fill with garbage values
+        faiss::IndexFlatL2 cpuIndex(2000);
+        gpuIndex.copyTo(&cpuIndex);
-    EXPECT_EQ(cpuIndex.d, gpuIndex.d);
-    EXPECT_EQ(cpuIndex.d, dim);
+        EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
+        EXPECT_EQ(gpuIndex.ntotal, numVecs);
-    int idx = faiss::gpu::randVal(0, numVecs - 1);
+        EXPECT_EQ(cpuIndex.d, gpuIndex.d);
+        EXPECT_EQ(cpuIndex.d, dim);
-    std::vector<float> gpuVals(dim);
-    gpuIndex.reconstruct(idx, gpuVals.data());
+        std::vector<float> gpuVals(numVecs * dim);
+        gpuIndex.reconstruct_n(0, gpuIndex.ntotal, gpuVals.data());
-    std::vector<float> cpuVals(dim);
-    cpuIndex.reconstruct(idx, cpuVals.data());
+        std::vector<float> cpuVals(numVecs * dim);
+        cpuIndex.reconstruct_n(0, gpuIndex.ntotal, cpuVals.data());
-    EXPECT_EQ(gpuVals, cpuVals);
+        // The GPU is the source of truth here, so the float32 exact comparison
+        // even if the index uses float16 is ok
+        EXPECT_EQ(gpuVals, cpuVals);
+    }
 }
 TEST(TestGpuIndexFlat, UnifiedMemory) {
@@ -390,6 +366,266 @@ TEST(TestGpuIndexFlat, UnifiedMemory) {
             0.015f);
 }
+TEST(TestGpuIndexFlat, LargeIndex) {
+    // Construct on a random device to test multi-device, if we have
+    // multiple devices
+    int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
+    faiss::gpu::StandardGpuResources res;
+    res.noTempMemory();
+    // Skip this device if we do not have sufficient memory
+    constexpr size_t kMem = size_t(8) * 1024 * 1024 * 1024;
+    if (faiss::gpu::getFreeMemory(device) < kMem) {
+        std::cout << "TestGpuIndexFlat.LargeIndex: skipping due "
+                     "to insufficient device memory\n";
+        return;
+    }
+    std::cout << "Running LargeIndex test\n";
+    size_t dim = 256; // each vec is sizeof(float) * 256 = 1 KiB in size
+    size_t nb = 5000000;
+    size_t nq = 10;
+    auto xb = faiss::gpu::randVecs(nb, dim);
+    int k = 10;
+    faiss::IndexFlatL2 cpuIndexL2(dim);
+    faiss::gpu::GpuIndexFlatConfig config;
+    config.device = device;
+    faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);
+    cpuIndexL2.add(nb, xb.data());
+    gpuIndexL2.add(nb, xb.data());
+    // To some extent, we depend upon the relative error for the test
+    // for float16
+    faiss::gpu::compareIndices(
+            cpuIndexL2,
+            gpuIndexL2,
+            nq,
+            dim,
+            k,
+            "LargeIndex",
+            kF32MaxRelErr,
+            0.1f,
+            0.015f);
+}
+TEST(TestGpuIndexFlat, Residual) {
+    // Construct on a random device to test multi-device, if we have
+    // multiple devices
+    int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
+    faiss::gpu::StandardGpuResources res;
+    res.noTempMemory();
+    faiss::gpu::GpuIndexFlatConfig config;
+    config.device = device;
+    int dim = 32;
+    faiss::IndexFlat cpuIndex(dim, faiss::MetricType::METRIC_L2);
+    faiss::gpu::GpuIndexFlat gpuIndex(
+            &res, dim, faiss::MetricType::METRIC_L2, config);
+    int numVecs = 100;
+    auto vecs = faiss::gpu::randVecs(numVecs, dim);
+    cpuIndex.add(numVecs, vecs.data());
+    gpuIndex.add(numVecs, vecs.data());
+    auto indexVecs = std::vector<faiss::Index::idx_t>{0, 2, 4, 6, 8};
+    auto queryVecs = faiss::gpu::randVecs(indexVecs.size(), dim);
+    auto residualsCpu = std::vector<float>(indexVecs.size() * dim);
+    auto residualsGpu = std::vector<float>(indexVecs.size() * dim);
+    cpuIndex.compute_residual_n(
+            indexVecs.size(),
+            queryVecs.data(),
+            residualsCpu.data(),
+            indexVecs.data());
+    gpuIndex.compute_residual_n(
+            indexVecs.size(),
+            queryVecs.data(),
+            residualsGpu.data(),
+            indexVecs.data());
+    // Should be exactly the same, as this is just a single float32 subtraction
+    EXPECT_EQ(residualsCpu, residualsGpu);
+}
+TEST(TestGpuIndexFlat, Reconstruct) {
+    // Construct on a random device to test multi-device, if we have
+    // multiple devices
+    int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
+    faiss::gpu::StandardGpuResources res;
+    res.noTempMemory();
+    int dim = 32;
+    int numVecs = 100;
+    auto vecs = faiss::gpu::randVecs(numVecs, dim);
+    auto vecs16 = faiss::gpu::roundToHalf(vecs);
+    for (bool useFloat16 : {false, true}) {
+        faiss::gpu::GpuIndexFlatConfig config;
+        config.device = device;
+        config.useFloat16 = useFloat16;
+        faiss::gpu::GpuIndexFlat gpuIndex(
+                &res, dim, faiss::MetricType::METRIC_L2, config);
+        gpuIndex.add(numVecs, vecs.data());
+        // Test reconstruct
+        {
+            auto reconstructVecs = std::vector<float>(dim);
+            gpuIndex.reconstruct(15, reconstructVecs.data());
+            auto& ref = useFloat16 ? vecs16 : vecs;
+            for (int i = 0; i < dim; ++i) {
+                EXPECT_EQ(reconstructVecs[i], ref[15 * dim + i]);
+            }
+        }
+        // Test reconstruct_n
+        if (false) {
+            auto reconstructVecs = std::vector<float>((numVecs - 1) * dim);
+            int startVec = 5;
+            int endVec = numVecs - 1;
+            int numReconstructVec = endVec - startVec + 1;
+            gpuIndex.reconstruct_n(
+                    startVec, numReconstructVec, reconstructVecs.data());
+            auto& ref = useFloat16 ? vecs16 : vecs;
+            for (int i = 0; i < numReconstructVec; ++i) {
+                for (int j = 0; j < dim; ++j) {
+                    EXPECT_EQ(
+                            reconstructVecs[i * dim + j],
+                            ref[(i + startVec) * dim + j]);
+                }
+            }
+        }
+        // Test reconstruct_batch
+        if (false) {
+            auto reconstructKeys = std::vector<faiss::Index::idx_t>{1, 3, 5};
+            auto reconstructVecs =
+                    std::vector<float>(reconstructKeys.size() * dim);
+            gpuIndex.reconstruct_batch(
+                    reconstructKeys.size(),
+                    reconstructKeys.data(),
+                    reconstructVecs.data());
+            auto& ref = useFloat16 ? vecs16 : vecs;
+            for (int i = 0; i < reconstructKeys.size(); ++i) {
+                for (int j = 0; j < dim; ++j) {
+                    EXPECT_EQ(
+                            reconstructVecs[i * dim + j],
+                            ref[reconstructKeys[i] * dim + j]);
+                }
+            }
+        }
+    }
+}
+TEST(TestGpuIndexFlat, SearchAndReconstruct) {
+    // Construct on a random device to test multi-device, if we have
+    // multiple devices
+    int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
+    faiss::gpu::StandardGpuResources res;
+    res.noTempMemory();
+    size_t dim = 32;
+    size_t nb = 5000;
+    size_t nq = 10;
+    int k = 10;
+    auto xb = faiss::gpu::randVecs(nb, dim);
+    auto xq = faiss::gpu::randVecs(nq, dim);
+    faiss::IndexFlatL2 cpuIndex(dim);
+    faiss::gpu::GpuIndexFlatConfig config;
+    config.device = device;
+    faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
+    cpuIndex.add(nb, xb.data());
+    gpuIndex.add(nb, xb.data());
+    std::vector<float> refDistance(nq * k, 0);
+    std::vector<faiss::Index::idx_t> refIndices(nq * k, -1);
+    std::vector<float> refReconstruct(nq * k * dim, 0);
+    cpuIndex.search_and_reconstruct(
+            nq,
+            xq.data(),
+            k,
+            refDistance.data(),
+            refIndices.data(),
+            refReconstruct.data());
+    std::vector<float> testDistance(nq * k, 0);
+    std::vector<faiss::Index::idx_t> testIndices(nq * k, -1);
+    std::vector<float> testReconstruct(nq * k * dim, 0);
+    gpuIndex.search_and_reconstruct(
+            nq,
+            xq.data(),
+            k,
+            testDistance.data(),
+            testIndices.data(),
+            testReconstruct.data());
+    // This handles the search results
+    faiss::gpu::compareLists(
+            refDistance.data(),
+            refIndices.data(),
+            testDistance.data(),
+            testIndices.data(),
+            nq,
+            k,
+            "SearchAndReconstruct",
+            true,
+            false,
+            true,
+            kF32MaxRelErr,
+            0.1f,
+            0.015f);
+    // As the search results may be slightly different (though compareLists
+    // above will ensure a decent number of matches), reconstruction should be
+    // the same for the vectors that do match
+    for (int i = 0; i < nq; ++i) {
+        std::unordered_map<faiss::Index::idx_t, int> refLocation;
+        for (int j = 0; j < k; ++j) {
+            refLocation.insert(std::make_pair(refIndices[i * k + j], j));
+        }
+        for (int j = 0; j < k; ++j) {
+            auto idx = testIndices[i * k + j];
+            auto it = refLocation.find(idx);
+            if (it != refLocation.end()) {
+                for (int d = 0; d < dim; ++d) {
+                    EXPECT_EQ(
+                            refReconstruct[(i * k + it->second) * dim + d],
+                            testReconstruct[(i * k + j) * dim + d]);
+                }
+            }
+        }
+    }
+}
 int main(int argc, char** argv) {
     testing::InitGoogleTest(&argc, argv);

data/vendor/faiss/faiss/gpu/test/TestUtils.cpp CHANGED Viewed

@@ -5,6 +5,7 @@
  * LICENSE file in the root directory of this source tree.
  */
+#include <cuda_fp16.h>
 #include <faiss/gpu/test/TestUtils.h>
 #include <faiss/utils/random.h>
 #include <gtest/gtest.h>
@@ -74,6 +75,15 @@ std::vector<unsigned char> randBinaryVecs(size_t num, size_t dim) {
     return v;
 }
+std::vector<float> roundToHalf(const std::vector<float>& v) {
+    auto out = std::vector<float>(v.size());
+    for (int i = 0; i < v.size(); ++i) {
+        out[i] = __half2float(__float2half(v[i]));
+    }
+    return out;
+}
 void compareIndices(
         const std::vector<float>& queryVecs,
         faiss::Index& refIndex,

data/vendor/faiss/faiss/gpu/test/TestUtils.h CHANGED Viewed

@@ -60,6 +60,9 @@ std::vector<float> randVecs(size_t num, size_t dim);
 /// Generates a collection of random bit vectors
 std::vector<unsigned char> randBinaryVecs(size_t num, size_t dim);
+// returns to_fp32(to_fp16(v)); useful in comparing fp16 results on CPU
+std::vector<float> roundToHalf(const std::vector<float>& v);
 /// Compare two indices via query for similarity, with a user-specified set of
 /// query vectors
 void compareIndices(

data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp CHANGED Viewed

@@ -12,10 +12,10 @@
 #include <sys/time.h>
+#include <faiss/gpu/GpuAutoTune.h>
+#include <faiss/gpu/GpuCloner.h>
 #include <faiss/gpu/GpuIndexIVFPQ.h>
 #include <faiss/gpu/StandardGpuResources.h>
-#include <faiss/gpu/GpuAutoTune.h>
 #include <faiss/index_io.h>
 double elapsed() {

data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h CHANGED Viewed

@@ -70,10 +70,11 @@ bool getTensorCoreSupport(int device);
 /// Equivalent to getTensorCoreSupport(getCurrentDevice())
 bool getTensorCoreSupportCurrentDevice();
-/// Returns the maximum k-selection value supported based on the CUDA SDK that
-/// we were compiled with. .cu files can use DeviceDefs.cuh, but this is for
-/// non-CUDA files
-int getMaxKSelection();
+/// Returns the amount of currently available memory on the given device
+size_t getFreeMemory(int device);
+/// Equivalent to getFreeMemory(getCurrentDevice())
+size_t getFreeMemoryCurrentDevice();
 /// RAII object to set the current device, and restore the previous
 /// device upon destruction