RubyGems - faiss - Versions diffs - 0.2.4 → 0.2.5 - Mend

faiss 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (177) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/README.md +23 -21
data/ext/faiss/extconf.rb +11 -0
data/ext/faiss/index.cpp +4 -4
data/ext/faiss/index_binary.cpp +6 -6
data/ext/faiss/product_quantizer.cpp +4 -4
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +13 -0
data/vendor/faiss/faiss/IVFlib.cpp +101 -2
data/vendor/faiss/faiss/IVFlib.h +26 -2
data/vendor/faiss/faiss/Index.cpp +36 -3
data/vendor/faiss/faiss/Index.h +43 -6
data/vendor/faiss/faiss/Index2Layer.cpp +6 -2
data/vendor/faiss/faiss/Index2Layer.h +6 -1
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +219 -16
data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +63 -5
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +299 -0
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +199 -0
data/vendor/faiss/faiss/IndexBinary.cpp +20 -4
data/vendor/faiss/faiss/IndexBinary.h +18 -3
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -2
data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -2
data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -1
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.h +2 -1
data/vendor/faiss/faiss/IndexBinaryHash.cpp +17 -4
data/vendor/faiss/faiss/IndexBinaryHash.h +8 -4
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +28 -13
data/vendor/faiss/faiss/IndexBinaryIVF.h +10 -7
data/vendor/faiss/faiss/IndexFastScan.cpp +626 -0
data/vendor/faiss/faiss/IndexFastScan.h +145 -0
data/vendor/faiss/faiss/IndexFlat.cpp +34 -21
data/vendor/faiss/faiss/IndexFlat.h +7 -4
data/vendor/faiss/faiss/IndexFlatCodes.cpp +35 -1
data/vendor/faiss/faiss/IndexFlatCodes.h +12 -0
data/vendor/faiss/faiss/IndexHNSW.cpp +66 -138
data/vendor/faiss/faiss/IndexHNSW.h +4 -2
data/vendor/faiss/faiss/IndexIDMap.cpp +247 -0
data/vendor/faiss/faiss/IndexIDMap.h +107 -0
data/vendor/faiss/faiss/IndexIVF.cpp +121 -33
data/vendor/faiss/faiss/IndexIVF.h +35 -16
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -7
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +63 -1
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +590 -0
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +171 -0
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +1290 -0
data/vendor/faiss/faiss/IndexIVFFastScan.h +213 -0
data/vendor/faiss/faiss/IndexIVFFlat.cpp +37 -17
data/vendor/faiss/faiss/IndexIVFFlat.h +4 -2
data/vendor/faiss/faiss/IndexIVFPQ.cpp +234 -50
data/vendor/faiss/faiss/IndexIVFPQ.h +5 -1
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +23 -852
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -112
data/vendor/faiss/faiss/IndexIVFPQR.cpp +3 -3
data/vendor/faiss/faiss/IndexIVFPQR.h +1 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +3 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
data/vendor/faiss/faiss/IndexLSH.cpp +4 -2
data/vendor/faiss/faiss/IndexLSH.h +2 -1
data/vendor/faiss/faiss/IndexLattice.cpp +7 -1
data/vendor/faiss/faiss/IndexLattice.h +3 -1
data/vendor/faiss/faiss/IndexNNDescent.cpp +4 -3
data/vendor/faiss/faiss/IndexNNDescent.h +2 -1
data/vendor/faiss/faiss/IndexNSG.cpp +37 -3
data/vendor/faiss/faiss/IndexNSG.h +25 -1
data/vendor/faiss/faiss/IndexPQ.cpp +106 -69
data/vendor/faiss/faiss/IndexPQ.h +19 -5
data/vendor/faiss/faiss/IndexPQFastScan.cpp +15 -450
data/vendor/faiss/faiss/IndexPQFastScan.h +15 -78
data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -8
data/vendor/faiss/faiss/IndexPreTransform.h +15 -3
data/vendor/faiss/faiss/IndexRefine.cpp +8 -4
data/vendor/faiss/faiss/IndexRefine.h +4 -2
data/vendor/faiss/faiss/IndexReplicas.cpp +4 -2
data/vendor/faiss/faiss/IndexReplicas.h +2 -1
data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +438 -0
data/vendor/faiss/faiss/IndexRowwiseMinMax.h +92 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +26 -15
data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -7
data/vendor/faiss/faiss/IndexShards.cpp +4 -1
data/vendor/faiss/faiss/IndexShards.h +2 -1
data/vendor/faiss/faiss/MetaIndexes.cpp +5 -178
data/vendor/faiss/faiss/MetaIndexes.h +3 -81
data/vendor/faiss/faiss/VectorTransform.cpp +43 -0
data/vendor/faiss/faiss/VectorTransform.h +22 -4
data/vendor/faiss/faiss/clone_index.cpp +23 -1
data/vendor/faiss/faiss/clone_index.h +3 -0
data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +300 -0
data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +24 -0
data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +195 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2058 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +408 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2147 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +460 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +465 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +1618 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +251 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +1452 -0
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +1 -0
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +0 -4
data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -1
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +10 -8
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +75 -14
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +19 -32
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -31
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +22 -28
data/vendor/faiss/faiss/gpu/GpuResources.cpp +14 -0
data/vendor/faiss/faiss/gpu/GpuResources.h +16 -3
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +3 -3
data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +32 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +311 -75
data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +10 -0
data/vendor/faiss/faiss/gpu/test/TestUtils.h +3 -0
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +2 -2
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +5 -4
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +116 -47
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +44 -13
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +0 -54
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -76
data/vendor/faiss/faiss/impl/DistanceComputer.h +64 -0
data/vendor/faiss/faiss/impl/HNSW.cpp +123 -27
data/vendor/faiss/faiss/impl/HNSW.h +19 -16
data/vendor/faiss/faiss/impl/IDSelector.cpp +125 -0
data/vendor/faiss/faiss/impl/IDSelector.h +135 -0
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +6 -28
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +6 -1
data/vendor/faiss/faiss/impl/LookupTableScaler.h +77 -0
data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -0
data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +383 -0
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +154 -0
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +225 -145
data/vendor/faiss/faiss/impl/ProductQuantizer.h +29 -10
data/vendor/faiss/faiss/impl/Quantizer.h +43 -0
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +192 -36
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +40 -20
data/vendor/faiss/faiss/impl/ResultHandler.h +96 -0
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +97 -173
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +18 -18
data/vendor/faiss/faiss/impl/index_read.cpp +240 -9
data/vendor/faiss/faiss/impl/index_write.cpp +237 -5
data/vendor/faiss/faiss/impl/kmeans1d.cpp +6 -4
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +56 -16
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +25 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +66 -25
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +75 -27
data/vendor/faiss/faiss/index_factory.cpp +196 -7
data/vendor/faiss/faiss/index_io.h +5 -0
data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -0
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +4 -1
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +2 -1
data/vendor/faiss/faiss/python/python_callbacks.cpp +27 -0
data/vendor/faiss/faiss/python/python_callbacks.h +15 -0
data/vendor/faiss/faiss/utils/Heap.h +31 -15
data/vendor/faiss/faiss/utils/distances.cpp +380 -56
data/vendor/faiss/faiss/utils/distances.h +113 -15
data/vendor/faiss/faiss/utils/distances_simd.cpp +726 -6
data/vendor/faiss/faiss/utils/extra_distances.cpp +12 -7
data/vendor/faiss/faiss/utils/extra_distances.h +3 -1
data/vendor/faiss/faiss/utils/fp16-fp16c.h +21 -0
data/vendor/faiss/faiss/utils/fp16-inl.h +101 -0
data/vendor/faiss/faiss/utils/fp16.h +11 -0
data/vendor/faiss/faiss/utils/hamming-inl.h +54 -0
data/vendor/faiss/faiss/utils/hamming.cpp +0 -48
data/vendor/faiss/faiss/utils/ordered_key_value.h +10 -0
data/vendor/faiss/faiss/utils/quantize_lut.cpp +62 -0
data/vendor/faiss/faiss/utils/quantize_lut.h +20 -0
data/vendor/faiss/faiss/utils/random.cpp +53 -0
data/vendor/faiss/faiss/utils/random.h +5 -0
data/vendor/faiss/faiss/utils/simdlib_avx2.h +4 -0
data/vendor/faiss/faiss/utils/simdlib_emulated.h +6 -1
data/vendor/faiss/faiss/utils/simdlib_neon.h +7 -2
metadata +37 -3

data/vendor/faiss/faiss/gpu/impl/IndexUtils.h ADDED Viewed

@@ -0,0 +1,32 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <faiss/Index.h>
+namespace faiss {
+namespace gpu {
+/// A collection of various utility functions for index implementation
+/// Returns the maximum k-selection value supported based on the CUDA SDK that
+/// we were compiled with. .cu files can use DeviceDefs.cuh, but this is for
+/// non-CUDA files
+int getMaxKSelection();
+// Validate the k parameter for search
+void validateKSelect(Index::idx_t k);
+// Validate the nprobe parameter for search
+void validateNProbe(Index::idx_t nprobe);
+/// Validate the n (number of vectors) parameter for add, search, reconstruct
+void validateNumVectors(Index::idx_t n);
+} // namespace gpu
+} // namespace faiss

data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp CHANGED Viewed

@@ -8,6 +8,7 @@
 #include <faiss/IndexBinaryFlat.h>
 #include <faiss/gpu/GpuIndexBinaryFlat.h>
 #include <faiss/gpu/StandardGpuResources.h>
+#include <faiss/gpu/impl/IndexUtils.h>
 #include <faiss/gpu/test/TestUtils.h>
 #include <faiss/gpu/utils/DeviceUtils.h>
 #include <faiss/utils/utils.h>

data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp CHANGED Viewed

@@ -8,10 +8,12 @@
 #include <faiss/IndexFlat.h>
 #include <faiss/gpu/GpuIndexFlat.h>
 #include <faiss/gpu/StandardGpuResources.h>
+#include <faiss/gpu/impl/IndexUtils.h>
 #include <faiss/gpu/test/TestUtils.h>
 #include <faiss/gpu/utils/DeviceUtils.h>
 #include <gtest/gtest.h>
 #include <sstream>
+#include <unordered_map>
 #include <vector>
 // FIXME: figure out a better way to test fp16
@@ -23,7 +25,6 @@ struct TestFlatOptions {
             : metric(faiss::MetricType::METRIC_L2),
               metricArg(0),
               useFloat16(false),
-              useTransposed(false),
               numVecsOverride(-1),
               numQueriesOverride(-1),
               kOverride(-1),
@@ -33,7 +34,6 @@ struct TestFlatOptions {
     float metricArg;
     bool useFloat16;
-    bool useTransposed;
     int numVecsOverride;
     int numQueriesOverride;
     int kOverride;
@@ -73,7 +73,6 @@ void testFlat(const TestFlatOptions& opt) {
     faiss::gpu::GpuIndexFlatConfig config;
     config.device = device;
     config.useFloat16 = opt.useFloat16;
-    config.storeTransposed = opt.useTransposed;
     faiss::gpu::GpuIndexFlat gpuIndex(&res, dim, opt.metric, config);
     gpuIndex.metric_arg = opt.metricArg;
@@ -85,8 +84,7 @@ void testFlat(const TestFlatOptions& opt) {
     std::stringstream str;
     str << "metric " << opt.metric << " marg " << opt.metricArg << " numVecs "
         << numVecs << " dim " << dim << " useFloat16 " << opt.useFloat16
-        << " transposed " << opt.useTransposed << " numQuery " << numQuery
-        << " k " << k;
+        << " numQuery " << numQuery << " k " << k;
     // To some extent, we depend upon the relative error for the test
     // for float16
@@ -110,12 +108,8 @@ TEST(TestGpuIndexFlat, IP_Float32) {
         TestFlatOptions opt;
         opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
         opt.useFloat16 = false;
-        opt.useTransposed = false;
         testFlat(opt);
-        opt.useTransposed = true;
-        testFlat(opt);
     }
 }
@@ -123,11 +117,7 @@ TEST(TestGpuIndexFlat, L1_Float32) {
     TestFlatOptions opt;
     opt.metric = faiss::MetricType::METRIC_L1;
     opt.useFloat16 = false;
-    opt.useTransposed = false;
-    testFlat(opt);
-    opt.useTransposed = true;
     testFlat(opt);
 }
@@ -136,12 +126,8 @@ TEST(TestGpuIndexFlat, Lp_Float32) {
     opt.metric = faiss::MetricType::METRIC_Lp;
     opt.metricArg = 5;
     opt.useFloat16 = false;
-    opt.useTransposed = false;
     testFlat(opt);
-    // Don't bother testing the transposed version, the L1 test should be good
-    // enough for that
 }
 TEST(TestGpuIndexFlat, L2_Float32) {
@@ -150,11 +136,7 @@ TEST(TestGpuIndexFlat, L2_Float32) {
         opt.metric = faiss::MetricType::METRIC_L2;
         opt.useFloat16 = false;
-        opt.useTransposed = false;
-        testFlat(opt);
-        opt.useTransposed = true;
         testFlat(opt);
     }
 }
@@ -165,7 +147,6 @@ TEST(TestGpuIndexFlat, L2_Float32_K1) {
         TestFlatOptions opt;
         opt.metric = faiss::MetricType::METRIC_L2;
         opt.useFloat16 = false;
-        opt.useTransposed = false;
         opt.kOverride = 1;
         testFlat(opt);
@@ -177,12 +158,8 @@ TEST(TestGpuIndexFlat, IP_Float16) {
         TestFlatOptions opt;
         opt.metric = faiss::MetricType::METRIC_INNER_PRODUCT;
         opt.useFloat16 = true;
-        opt.useTransposed = false;
         testFlat(opt);
-        opt.useTransposed = true;
-        testFlat(opt);
     }
 }
@@ -191,11 +168,7 @@ TEST(TestGpuIndexFlat, L2_Float16) {
         TestFlatOptions opt;
         opt.metric = faiss::MetricType::METRIC_L2;
         opt.useFloat16 = true;
-        opt.useTransposed = false;
-        testFlat(opt);
-        opt.useTransposed = true;
         testFlat(opt);
     }
 }
@@ -206,7 +179,6 @@ TEST(TestGpuIndexFlat, L2_Float16_K1) {
         TestFlatOptions opt;
         opt.metric = faiss::MetricType::METRIC_L2;
         opt.useFloat16 = true;
-        opt.useTransposed = false;
         opt.kOverride = 1;
         testFlat(opt);
@@ -219,7 +191,6 @@ TEST(TestGpuIndexFlat, L2_Tiling) {
         TestFlatOptions opt;
         opt.metric = faiss::MetricType::METRIC_L2;
         opt.useFloat16 = false;
-        opt.useTransposed = false;
         opt.numVecsOverride = 1000000;
         // keep the rest of the problem reasonably small
@@ -238,7 +209,6 @@ TEST(TestGpuIndexFlat, QueryEmpty) {
     faiss::gpu::GpuIndexFlatConfig config;
     config.device = 0;
     config.useFloat16 = false;
-    config.storeTransposed = false;
     int dim = 128;
     faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
@@ -267,40 +237,46 @@ TEST(TestGpuIndexFlat, CopyFrom) {
     int numVecs = faiss::gpu::randVal(100, 200);
     int dim = faiss::gpu::randVal(1, 1000);
-    faiss::IndexFlatL2 cpuIndex(dim);
     std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
+    faiss::IndexFlatL2 cpuIndex(dim);
     cpuIndex.add(numVecs, vecs.data());
     faiss::gpu::StandardGpuResources res;
     res.noTempMemory();
-    // Fill with garbage values
     int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
-    faiss::gpu::GpuIndexFlatConfig config;
-    config.device = device;
-    config.useFloat16 = false;
-    config.storeTransposed = false;
-    faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, 2000, config);
-    gpuIndex.copyFrom(&cpuIndex);
+    for (bool useFloat16 : {false, true}) {
+        faiss::gpu::GpuIndexFlatConfig config;
+        config.device = device;
+        config.useFloat16 = useFloat16;
-    EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
-    EXPECT_EQ(gpuIndex.ntotal, numVecs);
+        // Fill with garbage values
+        faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, 2000, config);
+        gpuIndex.copyFrom(&cpuIndex);
-    EXPECT_EQ(cpuIndex.d, gpuIndex.d);
-    EXPECT_EQ(cpuIndex.d, dim);
+        EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
+        EXPECT_EQ(gpuIndex.ntotal, numVecs);
-    int idx = faiss::gpu::randVal(0, numVecs - 1);
+        EXPECT_EQ(cpuIndex.d, gpuIndex.d);
+        EXPECT_EQ(cpuIndex.d, dim);
-    std::vector<float> gpuVals(dim);
-    gpuIndex.reconstruct(idx, gpuVals.data());
+        std::vector<float> gpuVals(numVecs * dim);
+        gpuIndex.reconstruct_n(0, gpuIndex.ntotal, gpuVals.data());
-    std::vector<float> cpuVals(dim);
-    cpuIndex.reconstruct(idx, cpuVals.data());
+        std::vector<float> cpuVals(numVecs * dim);
+        cpuIndex.reconstruct_n(0, gpuIndex.ntotal, cpuVals.data());
-    EXPECT_EQ(gpuVals, cpuVals);
+        // The CPU is the source of (float32) truth here, while the GPU index
+        // may be in float16 mode and thus was subject to rounding
+        if (useFloat16) {
+            EXPECT_EQ(gpuVals, faiss::gpu::roundToHalf(cpuVals));
+        } else {
+            // Should be exactly the same
+            EXPECT_EQ(gpuVals, cpuVals);
+        }
+    }
 }
 TEST(TestGpuIndexFlat, CopyTo) {
@@ -311,36 +287,36 @@ TEST(TestGpuIndexFlat, CopyTo) {
     int dim = faiss::gpu::randVal(1, 1000);
     int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
-    faiss::gpu::GpuIndexFlatConfig config;
-    config.device = device;
-    config.useFloat16 = false;
-    config.storeTransposed = false;
-    faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
     std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);
-    gpuIndex.add(numVecs, vecs.data());
-    // Fill with garbage values
-    faiss::IndexFlatL2 cpuIndex(2000);
-    gpuIndex.copyTo(&cpuIndex);
+    for (bool useFloat16 : {false, true}) {
+        faiss::gpu::GpuIndexFlatConfig config;
+        config.device = device;
+        config.useFloat16 = useFloat16;
+        faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
+        gpuIndex.add(numVecs, vecs.data());
-    EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
-    EXPECT_EQ(gpuIndex.ntotal, numVecs);
+        // Fill with garbage values
+        faiss::IndexFlatL2 cpuIndex(2000);
+        gpuIndex.copyTo(&cpuIndex);
-    EXPECT_EQ(cpuIndex.d, gpuIndex.d);
-    EXPECT_EQ(cpuIndex.d, dim);
+        EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);
+        EXPECT_EQ(gpuIndex.ntotal, numVecs);
-    int idx = faiss::gpu::randVal(0, numVecs - 1);
+        EXPECT_EQ(cpuIndex.d, gpuIndex.d);
+        EXPECT_EQ(cpuIndex.d, dim);
-    std::vector<float> gpuVals(dim);
-    gpuIndex.reconstruct(idx, gpuVals.data());
+        std::vector<float> gpuVals(numVecs * dim);
+        gpuIndex.reconstruct_n(0, gpuIndex.ntotal, gpuVals.data());
-    std::vector<float> cpuVals(dim);
-    cpuIndex.reconstruct(idx, cpuVals.data());
+        std::vector<float> cpuVals(numVecs * dim);
+        cpuIndex.reconstruct_n(0, gpuIndex.ntotal, cpuVals.data());
-    EXPECT_EQ(gpuVals, cpuVals);
+        // The GPU is the source of truth here, so the float32 exact comparison
+        // even if the index uses float16 is ok
+        EXPECT_EQ(gpuVals, cpuVals);
+    }
 }
 TEST(TestGpuIndexFlat, UnifiedMemory) {
@@ -390,6 +366,266 @@ TEST(TestGpuIndexFlat, UnifiedMemory) {
             0.015f);
 }
+TEST(TestGpuIndexFlat, LargeIndex) {
+    // Construct on a random device to test multi-device, if we have
+    // multiple devices
+    int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
+    faiss::gpu::StandardGpuResources res;
+    res.noTempMemory();
+    // Skip this device if we do not have sufficient memory
+    constexpr size_t kMem = size_t(8) * 1024 * 1024 * 1024;
+    if (faiss::gpu::getFreeMemory(device) < kMem) {
+        std::cout << "TestGpuIndexFlat.LargeIndex: skipping due "
+                     "to insufficient device memory\n";
+        return;
+    }
+    std::cout << "Running LargeIndex test\n";
+    size_t dim = 256; // each vec is sizeof(float) * 256 = 1 KiB in size
+    size_t nb = 5000000;
+    size_t nq = 10;
+    auto xb = faiss::gpu::randVecs(nb, dim);
+    int k = 10;
+    faiss::IndexFlatL2 cpuIndexL2(dim);
+    faiss::gpu::GpuIndexFlatConfig config;
+    config.device = device;
+    faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);
+    cpuIndexL2.add(nb, xb.data());
+    gpuIndexL2.add(nb, xb.data());
+    // To some extent, we depend upon the relative error for the test
+    // for float16
+    faiss::gpu::compareIndices(
+            cpuIndexL2,
+            gpuIndexL2,
+            nq,
+            dim,
+            k,
+            "LargeIndex",
+            kF32MaxRelErr,
+            0.1f,
+            0.015f);
+}
+TEST(TestGpuIndexFlat, Residual) {
+    // Construct on a random device to test multi-device, if we have
+    // multiple devices
+    int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
+    faiss::gpu::StandardGpuResources res;
+    res.noTempMemory();
+    faiss::gpu::GpuIndexFlatConfig config;
+    config.device = device;
+    int dim = 32;
+    faiss::IndexFlat cpuIndex(dim, faiss::MetricType::METRIC_L2);
+    faiss::gpu::GpuIndexFlat gpuIndex(
+            &res, dim, faiss::MetricType::METRIC_L2, config);
+    int numVecs = 100;
+    auto vecs = faiss::gpu::randVecs(numVecs, dim);
+    cpuIndex.add(numVecs, vecs.data());
+    gpuIndex.add(numVecs, vecs.data());
+    auto indexVecs = std::vector<faiss::Index::idx_t>{0, 2, 4, 6, 8};
+    auto queryVecs = faiss::gpu::randVecs(indexVecs.size(), dim);
+    auto residualsCpu = std::vector<float>(indexVecs.size() * dim);
+    auto residualsGpu = std::vector<float>(indexVecs.size() * dim);
+    cpuIndex.compute_residual_n(
+            indexVecs.size(),
+            queryVecs.data(),
+            residualsCpu.data(),
+            indexVecs.data());
+    gpuIndex.compute_residual_n(
+            indexVecs.size(),
+            queryVecs.data(),
+            residualsGpu.data(),
+            indexVecs.data());
+    // Should be exactly the same, as this is just a single float32 subtraction
+    EXPECT_EQ(residualsCpu, residualsGpu);
+}
+TEST(TestGpuIndexFlat, Reconstruct) {
+    // Construct on a random device to test multi-device, if we have
+    // multiple devices
+    int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
+    faiss::gpu::StandardGpuResources res;
+    res.noTempMemory();
+    int dim = 32;
+    int numVecs = 100;
+    auto vecs = faiss::gpu::randVecs(numVecs, dim);
+    auto vecs16 = faiss::gpu::roundToHalf(vecs);
+    for (bool useFloat16 : {false, true}) {
+        faiss::gpu::GpuIndexFlatConfig config;
+        config.device = device;
+        config.useFloat16 = useFloat16;
+        faiss::gpu::GpuIndexFlat gpuIndex(
+                &res, dim, faiss::MetricType::METRIC_L2, config);
+        gpuIndex.add(numVecs, vecs.data());
+        // Test reconstruct
+        {
+            auto reconstructVecs = std::vector<float>(dim);
+            gpuIndex.reconstruct(15, reconstructVecs.data());
+            auto& ref = useFloat16 ? vecs16 : vecs;
+            for (int i = 0; i < dim; ++i) {
+                EXPECT_EQ(reconstructVecs[i], ref[15 * dim + i]);
+            }
+        }
+        // Test reconstruct_n
+        if (false) {
+            auto reconstructVecs = std::vector<float>((numVecs - 1) * dim);
+            int startVec = 5;
+            int endVec = numVecs - 1;
+            int numReconstructVec = endVec - startVec + 1;
+            gpuIndex.reconstruct_n(
+                    startVec, numReconstructVec, reconstructVecs.data());
+            auto& ref = useFloat16 ? vecs16 : vecs;
+            for (int i = 0; i < numReconstructVec; ++i) {
+                for (int j = 0; j < dim; ++j) {
+                    EXPECT_EQ(
+                            reconstructVecs[i * dim + j],
+                            ref[(i + startVec) * dim + j]);
+                }
+            }
+        }
+        // Test reconstruct_batch
+        if (false) {
+            auto reconstructKeys = std::vector<faiss::Index::idx_t>{1, 3, 5};
+            auto reconstructVecs =
+                    std::vector<float>(reconstructKeys.size() * dim);
+            gpuIndex.reconstruct_batch(
+                    reconstructKeys.size(),
+                    reconstructKeys.data(),
+                    reconstructVecs.data());
+            auto& ref = useFloat16 ? vecs16 : vecs;
+            for (int i = 0; i < reconstructKeys.size(); ++i) {
+                for (int j = 0; j < dim; ++j) {
+                    EXPECT_EQ(
+                            reconstructVecs[i * dim + j],
+                            ref[reconstructKeys[i] * dim + j]);
+                }
+            }
+        }
+    }
+}
+TEST(TestGpuIndexFlat, SearchAndReconstruct) {
+    // Construct on a random device to test multi-device, if we have
+    // multiple devices
+    int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
+    faiss::gpu::StandardGpuResources res;
+    res.noTempMemory();
+    size_t dim = 32;
+    size_t nb = 5000;
+    size_t nq = 10;
+    int k = 10;
+    auto xb = faiss::gpu::randVecs(nb, dim);
+    auto xq = faiss::gpu::randVecs(nq, dim);
+    faiss::IndexFlatL2 cpuIndex(dim);
+    faiss::gpu::GpuIndexFlatConfig config;
+    config.device = device;
+    faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
+    cpuIndex.add(nb, xb.data());
+    gpuIndex.add(nb, xb.data());
+    std::vector<float> refDistance(nq * k, 0);
+    std::vector<faiss::Index::idx_t> refIndices(nq * k, -1);
+    std::vector<float> refReconstruct(nq * k * dim, 0);
+    cpuIndex.search_and_reconstruct(
+            nq,
+            xq.data(),
+            k,
+            refDistance.data(),
+            refIndices.data(),
+            refReconstruct.data());
+    std::vector<float> testDistance(nq * k, 0);
+    std::vector<faiss::Index::idx_t> testIndices(nq * k, -1);
+    std::vector<float> testReconstruct(nq * k * dim, 0);
+    gpuIndex.search_and_reconstruct(
+            nq,
+            xq.data(),
+            k,
+            testDistance.data(),
+            testIndices.data(),
+            testReconstruct.data());
+    // This handles the search results
+    faiss::gpu::compareLists(
+            refDistance.data(),
+            refIndices.data(),
+            testDistance.data(),
+            testIndices.data(),
+            nq,
+            k,
+            "SearchAndReconstruct",
+            true,
+            false,
+            true,
+            kF32MaxRelErr,
+            0.1f,
+            0.015f);
+    // As the search results may be slightly different (though compareLists
+    // above will ensure a decent number of matches), reconstruction should be
+    // the same for the vectors that do match
+    for (int i = 0; i < nq; ++i) {
+        std::unordered_map<faiss::Index::idx_t, int> refLocation;
+        for (int j = 0; j < k; ++j) {
+            refLocation.insert(std::make_pair(refIndices[i * k + j], j));
+        }
+        for (int j = 0; j < k; ++j) {
+            auto idx = testIndices[i * k + j];
+            auto it = refLocation.find(idx);
+            if (it != refLocation.end()) {
+                for (int d = 0; d < dim; ++d) {
+                    EXPECT_EQ(
+                            refReconstruct[(i * k + it->second) * dim + d],
+                            testReconstruct[(i * k + j) * dim + d]);
+                }
+            }
+        }
+    }
+}
 int main(int argc, char** argv) {
     testing::InitGoogleTest(&argc, argv);

data/vendor/faiss/faiss/gpu/test/TestUtils.cpp CHANGED Viewed

@@ -5,6 +5,7 @@
  * LICENSE file in the root directory of this source tree.
  */
+#include <cuda_fp16.h>
 #include <faiss/gpu/test/TestUtils.h>
 #include <faiss/utils/random.h>
 #include <gtest/gtest.h>
@@ -74,6 +75,15 @@ std::vector<unsigned char> randBinaryVecs(size_t num, size_t dim) {
     return v;
 }
+std::vector<float> roundToHalf(const std::vector<float>& v) {
+    auto out = std::vector<float>(v.size());
+    for (int i = 0; i < v.size(); ++i) {
+        out[i] = __half2float(__float2half(v[i]));
+    }
+    return out;
+}
 void compareIndices(
         const std::vector<float>& queryVecs,
         faiss::Index& refIndex,

data/vendor/faiss/faiss/gpu/test/TestUtils.h CHANGED Viewed

@@ -60,6 +60,9 @@ std::vector<float> randVecs(size_t num, size_t dim);
 /// Generates a collection of random bit vectors
 std::vector<unsigned char> randBinaryVecs(size_t num, size_t dim);
+// returns to_fp32(to_fp16(v)); useful in comparing fp16 results on CPU
+std::vector<float> roundToHalf(const std::vector<float>& v);
 /// Compare two indices via query for similarity, with a user-specified set of
 /// query vectors
 void compareIndices(

data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp CHANGED Viewed

@@ -12,10 +12,10 @@
 #include <sys/time.h>
+#include <faiss/gpu/GpuAutoTune.h>
+#include <faiss/gpu/GpuCloner.h>
 #include <faiss/gpu/GpuIndexIVFPQ.h>
 #include <faiss/gpu/StandardGpuResources.h>
-#include <faiss/gpu/GpuAutoTune.h>
 #include <faiss/index_io.h>
 double elapsed() {

data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h CHANGED Viewed

@@ -70,10 +70,11 @@ bool getTensorCoreSupport(int device);
 /// Equivalent to getTensorCoreSupport(getCurrentDevice())
 bool getTensorCoreSupportCurrentDevice();
-/// Returns the maximum k-selection value supported based on the CUDA SDK that
-/// we were compiled with. .cu files can use DeviceDefs.cuh, but this is for
-/// non-CUDA files
-int getMaxKSelection();
+/// Returns the amount of currently available memory on the given device
+size_t getFreeMemory(int device);
+/// Equivalent to getFreeMemory(getCurrentDevice())
+size_t getFreeMemoryCurrentDevice();
 /// RAII object to set the current device, and restore the previous
 /// device upon destruction