RubyGems - faiss - Versions diffs - 0.2.6 → 0.2.7 - Mend

faiss 0.2.6 → 0.2.7

Files changed (189) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/ext/faiss/extconf.rb +1 -1
data/lib/faiss/version.rb +1 -1
data/lib/faiss.rb +2 -2
data/vendor/faiss/faiss/AutoTune.cpp +15 -4
data/vendor/faiss/faiss/AutoTune.h +0 -1
data/vendor/faiss/faiss/Clustering.cpp +1 -5
data/vendor/faiss/faiss/Clustering.h +0 -2
data/vendor/faiss/faiss/IVFlib.h +0 -2
data/vendor/faiss/faiss/Index.h +1 -2
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
data/vendor/faiss/faiss/IndexBinary.h +0 -1
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
data/vendor/faiss/faiss/IndexFastScan.h +5 -1
data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
data/vendor/faiss/faiss/IndexFlat.h +1 -1
data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
data/vendor/faiss/faiss/IndexHNSW.h +0 -1
data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
data/vendor/faiss/faiss/IndexIDMap.h +0 -2
data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
data/vendor/faiss/faiss/IndexIVF.h +121 -61
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
data/vendor/faiss/faiss/IndexReplicas.h +0 -1
data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
data/vendor/faiss/faiss/IndexShards.cpp +26 -109
data/vendor/faiss/faiss/IndexShards.h +2 -3
data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
data/vendor/faiss/faiss/MetaIndexes.h +29 -0
data/vendor/faiss/faiss/MetricType.h +14 -0
data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
data/vendor/faiss/faiss/VectorTransform.h +1 -3
data/vendor/faiss/faiss/clone_index.cpp +232 -18
data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
data/vendor/faiss/faiss/impl/HNSW.h +6 -9
data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
data/vendor/faiss/faiss/impl/NSG.h +4 -7
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
data/vendor/faiss/faiss/index_factory.cpp +8 -10
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
data/vendor/faiss/faiss/utils/Heap.h +35 -1
data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
data/vendor/faiss/faiss/utils/distances.cpp +61 -7
data/vendor/faiss/faiss/utils/distances.h +11 -0
data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
data/vendor/faiss/faiss/utils/fp16.h +7 -0
data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
data/vendor/faiss/faiss/utils/hamming.h +21 -10
data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
data/vendor/faiss/faiss/utils/sorting.h +71 -0
data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
data/vendor/faiss/faiss/utils/utils.cpp +4 -176
data/vendor/faiss/faiss/utils/utils.h +2 -9
metadata +29 -3
data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26

data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp CHANGED Viewed

@@ -64,13 +64,10 @@ struct Options {
 };
 void queryTest(
+        Options opt,
         faiss::MetricType metricType,
-        bool useFloat16CoarseQuantizer,
-        int dimOverride = -1) {
+        bool useFloat16CoarseQuantizer) {
     for (int tries = 0; tries < 2; ++tries) {
-        Options opt;
-        opt.dim = dimOverride != -1 ? dimOverride : opt.dim;
         std::vector<float> trainVecs =
                 faiss::gpu::randVecs(opt.numTrain, opt.dim);
         std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
@@ -98,7 +95,7 @@ void queryTest(
         faiss::gpu::GpuIndexIVFFlat gpuIndex(
                 &res, cpuIndex.d, cpuIndex.nlist, cpuIndex.metric_type, config);
         gpuIndex.copyFrom(&cpuIndex);
-        gpuIndex.setNumProbes(opt.nprobe);
+        gpuIndex.nprobe = opt.nprobe;
         bool compFloat16 = useFloat16CoarseQuantizer;
         faiss::gpu::compareIndices(
@@ -147,7 +144,7 @@ void addTest(faiss::MetricType metricType, bool useFloat16CoarseQuantizer) {
         faiss::gpu::GpuIndexIVFFlat gpuIndex(
                 &res, cpuIndex.d, cpuIndex.nlist, cpuIndex.metric_type, config);
         gpuIndex.copyFrom(&cpuIndex);
-        gpuIndex.setNumProbes(opt.nprobe);
+        gpuIndex.nprobe = opt.nprobe;
         cpuIndex.add(opt.numAdd, addVecs.data());
         gpuIndex.add(opt.numAdd, addVecs.data());
@@ -183,7 +180,7 @@ void copyToTest(bool useFloat16CoarseQuantizer) {
             &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config);
     gpuIndex.train(opt.numTrain, trainVecs.data());
     gpuIndex.add(opt.numAdd, addVecs.data());
-    gpuIndex.setNumProbes(opt.nprobe);
+    gpuIndex.nprobe = opt.nprobe;
     // use garbage values to see if we overwrite then
     faiss::IndexFlatL2 cpuQuantizer(1);
@@ -199,7 +196,7 @@ void copyToTest(bool useFloat16CoarseQuantizer) {
     EXPECT_EQ(cpuIndex.quantizer->d, gpuIndex.quantizer->d);
     EXPECT_EQ(cpuIndex.d, opt.dim);
     EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
-    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
+    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.nprobe);
     testIVFEquality(cpuIndex, gpuIndex);
@@ -239,7 +236,7 @@ void copyFromTest(bool useFloat16CoarseQuantizer) {
     config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
     faiss::gpu::GpuIndexIVFFlat gpuIndex(&res, 1, 1, faiss::METRIC_L2, config);
-    gpuIndex.setNumProbes(1);
+    gpuIndex.nprobe = 1;
     gpuIndex.copyFrom(&cpuIndex);
@@ -249,7 +246,7 @@ void copyFromTest(bool useFloat16CoarseQuantizer) {
     EXPECT_EQ(cpuIndex.d, gpuIndex.d);
     EXPECT_EQ(cpuIndex.d, opt.dim);
     EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
-    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
+    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.nprobe);
     testIVFEquality(cpuIndex, gpuIndex);
@@ -288,21 +285,28 @@ TEST(TestGpuIndexIVFFlat, Float16_32_Add_IP) {
 //
 TEST(TestGpuIndexIVFFlat, Float32_Query_L2) {
-    queryTest(faiss::METRIC_L2, false);
+    queryTest(Options(), faiss::METRIC_L2, false);
 }
 TEST(TestGpuIndexIVFFlat, Float32_Query_IP) {
-    queryTest(faiss::METRIC_INNER_PRODUCT, false);
+    queryTest(Options(), faiss::METRIC_INNER_PRODUCT, false);
+}
+TEST(TestGpuIndexIVFFlat, LargeBatch) {
+    Options opt;
+    opt.dim = 3;
+    opt.numQuery = 100000;
+    queryTest(opt, faiss::METRIC_L2, false);
 }
 // float16 coarse quantizer
 TEST(TestGpuIndexIVFFlat, Float16_32_Query_L2) {
-    queryTest(faiss::METRIC_L2, true);
+    queryTest(Options(), faiss::METRIC_L2, true);
 }
 TEST(TestGpuIndexIVFFlat, Float16_32_Query_IP) {
-    queryTest(faiss::METRIC_INNER_PRODUCT, true);
+    queryTest(Options(), faiss::METRIC_INNER_PRODUCT, true);
 }
 //
@@ -311,19 +315,27 @@ TEST(TestGpuIndexIVFFlat, Float16_32_Query_IP) {
 //
 TEST(TestGpuIndexIVFFlat, Float32_Query_L2_64) {
-    queryTest(faiss::METRIC_L2, false, 64);
+    Options opt;
+    opt.dim = 64;
+    queryTest(opt, faiss::METRIC_L2, false);
 }
 TEST(TestGpuIndexIVFFlat, Float32_Query_IP_64) {
-    queryTest(faiss::METRIC_INNER_PRODUCT, false, 64);
+    Options opt;
+    opt.dim = 64;
+    queryTest(opt, faiss::METRIC_INNER_PRODUCT, false);
 }
 TEST(TestGpuIndexIVFFlat, Float32_Query_L2_128) {
-    queryTest(faiss::METRIC_L2, false, 128);
+    Options opt;
+    opt.dim = 128;
+    queryTest(opt, faiss::METRIC_L2, false);
 }
 TEST(TestGpuIndexIVFFlat, Float32_Query_IP_128) {
-    queryTest(faiss::METRIC_INNER_PRODUCT, false, 128);
+    Options opt;
+    opt.dim = 128;
+    queryTest(opt, faiss::METRIC_INNER_PRODUCT, false);
 }
 //
@@ -372,7 +384,7 @@ TEST(TestGpuIndexIVFFlat, Float32_negative) {
     faiss::gpu::GpuIndexIVFFlat gpuIndex(
             &res, cpuIndex.d, cpuIndex.nlist, cpuIndex.metric_type, config);
     gpuIndex.copyFrom(&cpuIndex);
-    gpuIndex.setNumProbes(opt.nprobe);
+    gpuIndex.nprobe = opt.nprobe;
     // Construct a positive test set
     auto queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
@@ -419,7 +431,7 @@ TEST(TestGpuIndexIVFFlat, QueryNaN) {
     faiss::gpu::GpuIndexIVFFlat gpuIndex(
             &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config);
-    gpuIndex.setNumProbes(opt.nprobe);
+    gpuIndex.nprobe = opt.nprobe;
     gpuIndex.train(opt.numTrain, trainVecs.data());
     gpuIndex.add(opt.numAdd, addVecs.data());
@@ -429,7 +441,7 @@ TEST(TestGpuIndexIVFFlat, QueryNaN) {
             numQuery * opt.dim, std::numeric_limits<float>::quiet_NaN());
     std::vector<float> distances(numQuery * opt.k, 0);
-    std::vector<faiss::Index::idx_t> indices(numQuery * opt.k, 0);
+    std::vector<faiss::idx_t> indices(numQuery * opt.k, 0);
     gpuIndex.search(
             numQuery, nans.data(), opt.k, distances.data(), indices.data());
@@ -457,7 +469,7 @@ TEST(TestGpuIndexIVFFlat, AddNaN) {
     faiss::gpu::GpuIndexIVFFlat gpuIndex(
             &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config);
-    gpuIndex.setNumProbes(opt.nprobe);
+    gpuIndex.nprobe = opt.nprobe;
     int numNans = 10;
     std::vector<float> nans(
@@ -478,7 +490,7 @@ TEST(TestGpuIndexIVFFlat, AddNaN) {
     std::vector<float> queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
     std::vector<float> distance(opt.numQuery * opt.k, 0);
-    std::vector<faiss::Index::idx_t> indices(opt.numQuery * opt.k, 0);
+    std::vector<faiss::idx_t> indices(opt.numQuery * opt.k, 0);
     // should not crash
     gpuIndex.search(
@@ -531,7 +543,65 @@ TEST(TestGpuIndexIVFFlat, UnifiedMemory) {
     faiss::gpu::GpuIndexIVFFlat gpuIndex(
             &res, dim, numCentroids, faiss::METRIC_L2, config);
     gpuIndex.copyFrom(&cpuIndex);
-    gpuIndex.setNumProbes(nprobe);
+    gpuIndex.nprobe = nprobe;
+    faiss::gpu::compareIndices(
+            cpuIndex,
+            gpuIndex,
+            numQuery,
+            dim,
+            k,
+            "Unified Memory",
+            kF32MaxRelErr,
+            0.1f,
+            0.015f);
+}
+TEST(TestGpuIndexIVFFlat, LongIVFList) {
+    int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
+    // Skip this device if we do not have sufficient memory
+    constexpr size_t kMem = size_t(24) * 1024 * 1024 * 1024;
+    if (faiss::gpu::getFreeMemory(device) < kMem) {
+        std::cout << "TestGpuIndexIVFFlat.LongIVFList: skipping due "
+                     "to insufficient device memory\n";
+        return;
+    }
+    std::cout << "Running LongIVFList test\n";
+    // Test functionality where a single IVF list has more than 2B code values
+    int dim = 64;
+    int numCentroids = 1;
+    size_t numAdd = (size_t(1024) * 1024 * 1024 * 2 + 100000) / dim;
+    size_t numTrain = 100;
+    int numQuery = 5;
+    int k = 10;
+    std::vector<float> trainVecs = faiss::gpu::randVecs(numTrain, dim);
+    std::vector<float> addVecs = faiss::gpu::randVecs(numAdd, dim);
+    faiss::IndexFlatL2 quantizer(dim);
+    faiss::IndexIVFFlat cpuIndex(
+            &quantizer, dim, numCentroids, faiss::METRIC_L2);
+    cpuIndex.train(numTrain, trainVecs.data());
+    cpuIndex.add(numAdd, addVecs.data());
+    cpuIndex.nprobe = 1;
+    faiss::gpu::StandardGpuResources res;
+    res.noTempMemory();
+    faiss::gpu::GpuIndexIVFFlatConfig config;
+    config.device = device;
+    faiss::gpu::GpuIndexIVFFlat gpuIndex(
+            &res, dim, numCentroids, faiss::METRIC_L2, config);
+    gpuIndex.train(numTrain, trainVecs.data());
+    gpuIndex.add(numAdd, addVecs.data());
+    gpuIndex.nprobe = 1;
     faiss::gpu::compareIndices(
             cpuIndex,

data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp CHANGED Viewed

@@ -137,7 +137,58 @@ TEST(TestGpuIndexIVFPQ, Query_L2) {
         config.useFloat16LookupTables = opt.useFloat16;
         faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
-        gpuIndex.setNumProbes(opt.nprobe);
+        gpuIndex.nprobe = opt.nprobe;
+        faiss::gpu::compareIndices(
+                cpuIndex,
+                gpuIndex,
+                opt.numQuery,
+                opt.dim,
+                opt.k,
+                opt.toString(),
+                opt.getCompareEpsilon(),
+                opt.getPctMaxDiff1(),
+                opt.getPctMaxDiffN());
+    }
+}
+// Large batch sizes (>= 65536) should also work
+TEST(TestGpuIndexIVFPQ, LargeBatch) {
+    for (bool usePrecomputed : {false, true}) {
+        Options opt;
+        // override for large sizes
+        opt.dim = 4;
+        opt.numQuery = 100000;
+        opt.codes = 2;
+        std::vector<float> trainVecs =
+                faiss::gpu::randVecs(opt.numTrain, opt.dim);
+        std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
+        faiss::IndexFlatL2 coarseQuantizer(opt.dim);
+        faiss::IndexIVFPQ cpuIndex(
+                &coarseQuantizer,
+                opt.dim,
+                opt.numCentroids,
+                opt.codes,
+                opt.bitsPerCode);
+        cpuIndex.nprobe = opt.nprobe;
+        cpuIndex.train(opt.numTrain, trainVecs.data());
+        cpuIndex.add(opt.numAdd, addVecs.data());
+        // Use the default temporary memory management to test the memory
+        // manager
+        faiss::gpu::StandardGpuResources res;
+        faiss::gpu::GpuIndexIVFPQConfig config;
+        config.device = opt.device;
+        config.usePrecomputedTables = usePrecomputed;
+        config.indicesOptions = opt.indicesOpt;
+        config.useFloat16LookupTables = false;
+        faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
+        gpuIndex.nprobe = opt.nprobe;
         faiss::gpu::compareIndices(
                 cpuIndex,
@@ -189,7 +240,7 @@ void testMMCodeDistance(faiss::MetricType mt) {
         config.flatConfig.useFloat16 = (tries % 2 == 1);
         faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
-        gpuIndex.setNumProbes(opt.nprobe);
+        gpuIndex.nprobe = opt.nprobe;
         faiss::gpu::compareIndices(
                 cpuIndex,
@@ -238,7 +289,7 @@ void testMMCodeDistance(faiss::MetricType mt) {
         config.useFloat16LookupTables = (dimPerSubQ == 7);
         faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
-        gpuIndex.setNumProbes(opt.nprobe);
+        gpuIndex.nprobe = opt.nprobe;
         faiss::gpu::compareIndices(
                 cpuIndex,
@@ -293,7 +344,7 @@ TEST(TestGpuIndexIVFPQ, Query_IP) {
         config.useFloat16LookupTables = opt.useFloat16;
         faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
-        gpuIndex.setNumProbes(opt.nprobe);
+        gpuIndex.nprobe = opt.nprobe;
         faiss::gpu::compareIndices(
                 cpuIndex,
@@ -335,7 +386,7 @@ TEST(TestGpuIndexIVFPQ, Float16Coarse) {
     config.useFloat16LookupTables = opt.useFloat16;
     faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
-    gpuIndex.setNumProbes(opt.nprobe);
+    gpuIndex.nprobe = opt.nprobe;
     gpuIndex.add(opt.numAdd, addVecs.data());
     cpuIndex.add(opt.numAdd, addVecs.data());
@@ -381,7 +432,7 @@ TEST(TestGpuIndexIVFPQ, Add_L2) {
         config.useFloat16LookupTables = opt.useFloat16;
         faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
-        gpuIndex.setNumProbes(opt.nprobe);
+        gpuIndex.nprobe = opt.nprobe;
         gpuIndex.add(opt.numAdd, addVecs.data());
         cpuIndex.add(opt.numAdd, addVecs.data());
@@ -429,7 +480,7 @@ TEST(TestGpuIndexIVFPQ, Add_IP) {
         config.useFloat16LookupTables = opt.useFloat16;
         faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
-        gpuIndex.setNumProbes(opt.nprobe);
+        gpuIndex.nprobe = opt.nprobe;
         gpuIndex.add(opt.numAdd, addVecs.data());
         cpuIndex.add(opt.numAdd, addVecs.data());
@@ -472,7 +523,7 @@ TEST(TestGpuIndexIVFPQ, CopyTo) {
                 opt.bitsPerCode,
                 faiss::METRIC_L2,
                 config);
-        gpuIndex.setNumProbes(opt.nprobe);
+        gpuIndex.nprobe = opt.nprobe;
         gpuIndex.train(opt.numTrain, trainVecs.data());
         gpuIndex.add(opt.numAdd, addVecs.data());
@@ -488,7 +539,7 @@ TEST(TestGpuIndexIVFPQ, CopyTo) {
         EXPECT_EQ(cpuIndex.d, gpuIndex.d);
         EXPECT_EQ(cpuIndex.d, opt.dim);
         EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
-        EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
+        EXPECT_EQ(cpuIndex.nprobe, gpuIndex.nprobe);
         EXPECT_EQ(cpuIndex.pq.M, gpuIndex.getNumSubQuantizers());
         EXPECT_EQ(gpuIndex.getNumSubQuantizers(), opt.codes);
         EXPECT_EQ(cpuIndex.pq.nbits, gpuIndex.getBitsPerCode());
@@ -538,7 +589,7 @@ TEST(TestGpuIndexIVFPQ, CopyFrom) {
     // Use garbage values to see if we overwrite them
     faiss::gpu::GpuIndexIVFPQ gpuIndex(
             &res, 1, 1, 1, 8, faiss::METRIC_L2, config);
-    gpuIndex.setNumProbes(1);
+    gpuIndex.nprobe = 1;
     gpuIndex.copyFrom(&cpuIndex);
@@ -549,7 +600,7 @@ TEST(TestGpuIndexIVFPQ, CopyFrom) {
     EXPECT_EQ(cpuIndex.d, gpuIndex.d);
     EXPECT_EQ(cpuIndex.d, opt.dim);
     EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
-    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
+    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.nprobe);
     EXPECT_EQ(cpuIndex.pq.M, gpuIndex.getNumSubQuantizers());
     EXPECT_EQ(gpuIndex.getNumSubQuantizers(), opt.codes);
     EXPECT_EQ(cpuIndex.pq.nbits, gpuIndex.getBitsPerCode());
@@ -594,7 +645,7 @@ TEST(TestGpuIndexIVFPQ, QueryNaN) {
             faiss::METRIC_L2,
             config);
-    gpuIndex.setNumProbes(opt.nprobe);
+    gpuIndex.nprobe = opt.nprobe;
     gpuIndex.train(opt.numTrain, trainVecs.data());
     gpuIndex.add(opt.numAdd, addVecs.data());
@@ -604,7 +655,7 @@ TEST(TestGpuIndexIVFPQ, QueryNaN) {
             numQuery * opt.dim, std::numeric_limits<float>::quiet_NaN());
     std::vector<float> distances(numQuery * opt.k, 0);
-    std::vector<faiss::Index::idx_t> indices(numQuery * opt.k, 0);
+    std::vector<faiss::idx_t> indices(numQuery * opt.k, 0);
     gpuIndex.search(
             numQuery, nans.data(), opt.k, distances.data(), indices.data());
@@ -640,7 +691,7 @@ TEST(TestGpuIndexIVFPQ, AddNaN) {
             faiss::METRIC_L2,
             config);
-    gpuIndex.setNumProbes(opt.nprobe);
+    gpuIndex.nprobe = opt.nprobe;
     int numNans = 10;
     std::vector<float> nans(
@@ -660,7 +711,7 @@ TEST(TestGpuIndexIVFPQ, AddNaN) {
     std::vector<float> queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
     std::vector<float> distance(opt.numQuery * opt.k, 0);
-    std::vector<faiss::Index::idx_t> indices(opt.numQuery * opt.k, 0);
+    std::vector<faiss::idx_t> indices(opt.numQuery * opt.k, 0);
     // should not crash
     gpuIndex.search(
@@ -721,7 +772,7 @@ TEST(TestGpuIndexIVFPQ, UnifiedMemory) {
             faiss::METRIC_L2,
             config);
     gpuIndex.copyFrom(&cpuIndex);
-    gpuIndex.setNumProbes(nprobe);
+    gpuIndex.nprobe = nprobe;
     faiss::gpu::compareIndices(
             cpuIndex,

data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp CHANGED Viewed

@@ -79,7 +79,7 @@ void runCopyToTest(faiss::ScalarQuantizer::QuantizerType qtype) {
             &res, opt.dim, opt.numCentroids, qtype, METRIC_L2, true, config);
     gpuIndex.train(opt.numTrain, trainVecs.data());
     gpuIndex.add(opt.numAdd, addVecs.data());
-    gpuIndex.setNumProbes(opt.nprobe);
+    gpuIndex.nprobe = opt.nprobe;
     // use garbage values to see if we overwrite then
     IndexFlatL2 cpuQuantizer(1);
@@ -100,7 +100,7 @@ void runCopyToTest(faiss::ScalarQuantizer::QuantizerType qtype) {
     EXPECT_EQ(cpuIndex.quantizer->d, gpuIndex.quantizer->d);
     EXPECT_EQ(cpuIndex.d, opt.dim);
     EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
-    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
+    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.nprobe);
     testIVFEquality(cpuIndex, gpuIndex);
@@ -172,7 +172,7 @@ void runCopyFromTest(faiss::ScalarQuantizer::QuantizerType qtype) {
             METRIC_L2,
             false,
             config);
-    gpuIndex.setNumProbes(1);
+    gpuIndex.nprobe = 1;
     gpuIndex.copyFrom(&cpuIndex);
@@ -182,7 +182,7 @@ void runCopyFromTest(faiss::ScalarQuantizer::QuantizerType qtype) {
     EXPECT_EQ(cpuIndex.d, gpuIndex.d);
     EXPECT_EQ(cpuIndex.d, opt.dim);
     EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
-    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
+    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.nprobe);
     testIVFEquality(cpuIndex, gpuIndex);

data/vendor/faiss/faiss/gpu/test/TestUtils.cpp CHANGED Viewed

@@ -97,7 +97,7 @@ void compareIndices(
         float pctMaxDiffN) {
     // Compare
     std::vector<float> refDistance(numQuery * k, 0);
-    std::vector<faiss::Index::idx_t> refIndices(numQuery * k, -1);
+    std::vector<faiss::idx_t> refIndices(numQuery * k, -1);
     refIndex.search(
             numQuery,
             queryVecs.data(),
@@ -106,7 +106,7 @@ void compareIndices(
             refIndices.data());
     std::vector<float> testDistance(numQuery * k, 0);
-    std::vector<faiss::Index::idx_t> testIndices(numQuery * k, -1);
+    std::vector<faiss::idx_t> testIndices(numQuery * k, -1);
     testIndex.search(
             numQuery,
             queryVecs.data(),
@@ -162,9 +162,9 @@ inline T lookup(const T* p, int i, int j, int /*dim1*/, int dim2) {
 void compareLists(
         const float* refDist,
-        const faiss::Index::idx_t* refInd,
+        const faiss::idx_t* refInd,
         const float* testDist,
-        const faiss::Index::idx_t* testInd,
+        const faiss::idx_t* testInd,
         int dim1,
         int dim2,
         const std::string& configMsg,
@@ -181,10 +181,10 @@ void compareLists(
     int numResults = dim1 * dim2;
     // query -> {index -> result position}
-    std::vector<std::unordered_map<faiss::Index::idx_t, int>> refIndexMap;
+    std::vector<std::unordered_map<faiss::idx_t, int>> refIndexMap;
     for (int query = 0; query < dim1; ++query) {
-        std::unordered_map<faiss::Index::idx_t, int> indices;
+        std::unordered_map<faiss::idx_t, int> indices;
         for (int result = 0; result < dim2; ++result) {
             indices[lookup(refInd, query, result, dim1, dim2)] = result;
@@ -208,7 +208,7 @@ void compareLists(
     for (int query = 0; query < dim1; ++query) {
         std::vector<int> diffs;
-        std::set<faiss::Index::idx_t> uniqueIndices;
+        std::set<faiss::idx_t> uniqueIndices;
         auto& indices = refIndexMap[query];

data/vendor/faiss/faiss/gpu/test/TestUtils.h CHANGED Viewed

@@ -93,9 +93,9 @@ void compareIndices(
 /// Display specific differences in the two (distance, index) lists
 void compareLists(
         const float* refDist,
-        const faiss::Index::idx_t* refInd,
+        const faiss::idx_t* refInd,
         const float* testDist,
-        const faiss::Index::idx_t* testInd,
+        const faiss::idx_t* testInd,
         int dim1,
         int dim2,
         const std::string& configMsg,
@@ -130,13 +130,13 @@ void testIVFEquality(A& cpuIndex, B& gpuIndex) {
         EXPECT_EQ(cpuCodes, gpuCodes);
         // Index equality
-        std::vector<Index::idx_t> cpuIndices(cpuLists->list_size(i));
+        std::vector<idx_t> cpuIndices(cpuLists->list_size(i));
         auto si = faiss::InvertedLists::ScopedIds(cpuLists, i);
         std::memcpy(
                 cpuIndices.data(),
                 si.get(),
-                cpuLists->list_size(i) * sizeof(faiss::Index::idx_t));
+                cpuLists->list_size(i) * sizeof(faiss::idx_t));
         EXPECT_EQ(cpuIndices, gpuIndex.getListIndices(i));
     }
 }

data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp CHANGED Viewed

@@ -130,7 +130,7 @@ int main() {
                k,
                nq);
-        std::vector<faiss::Index::idx_t> nns(k * nq);
+        std::vector<faiss::idx_t> nns(k * nq);
         std::vector<float> dis(k * nq);
         index.search(nq, queries.data(), k, dis.data(), nns.data());

data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h CHANGED Viewed

@@ -47,6 +47,12 @@ int getMaxThreads(int device);
 /// Equivalent to getMaxThreads(getCurrentDevice())
 int getMaxThreadsCurrentDevice();
+/// Returns the maximum grid size for the given GPU device
+dim3 getMaxGrid(int device);
+/// Equivalent to getMaxGrid(getCurrentDevice())
+dim3 getMaxGridCurrentDevice();
 /// Returns the maximum smem available for the given GPU device
 size_t getMaxSharedMemPerBlock(int device);

data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp CHANGED Viewed

@@ -54,14 +54,7 @@ AdditiveQuantizer::AdditiveQuantizer(
         : Quantizer(d),
           M(nbits.size()),
           nbits(nbits),
-          verbose(false),
-          is_trained(false),
-          max_mem_distances(5 * (size_t(1) << 30)), // 5 GiB
           search_type(search_type) {
-    norm_max = norm_min = NAN;
-    tot_bits = 0;
-    total_codebook_size = 0;
-    only_8bit = false;
     set_derived_values();
 }

data/vendor/faiss/faiss/impl/AdditiveQuantizer.h CHANGED Viewed

@@ -7,6 +7,7 @@
 #pragma once
+#include <cmath>
 #include <cstdint>
 #include <vector>
@@ -29,13 +30,13 @@ struct AdditiveQuantizer : Quantizer {
     // derived values
     std::vector<uint64_t> codebook_offsets;
-    size_t tot_bits;            ///< total number of bits (indexes + norms)
-    size_t norm_bits;           ///< bits allocated for the norms
-    size_t total_codebook_size; ///< size of the codebook in vectors
-    bool only_8bit;             ///< are all nbits = 8 (use faster decoder)
+    size_t tot_bits = 0;            ///< total number of bits (indexes + norms)
+    size_t norm_bits = 0;           ///< bits allocated for the norms
+    size_t total_codebook_size = 0; ///< size of the codebook in vectors
+    bool only_8bit = false;         ///< are all nbits = 8 (use faster decoder)
-    bool verbose;    ///< verbose during training?
-    bool is_trained; ///< is trained or not
+    bool verbose = false;    ///< verbose during training?
+    bool is_trained = false; ///< is trained or not
     IndexFlat1D qnorm;            ///< store and search norms
     std::vector<float> norm_tabs; ///< store norms of codebook entries for 4-bit
@@ -43,7 +44,7 @@ struct AdditiveQuantizer : Quantizer {
     /// norms and distance matrixes with beam search can get large, so use this
     /// to control for the amount of memory that can be allocated
-    size_t max_mem_distances;
+    size_t max_mem_distances = 5 * (size_t(1) << 30);
     /// encode a norm into norm_bits bits
     uint64_t encode_norm(float norm) const;
@@ -145,7 +146,7 @@ struct AdditiveQuantizer : Quantizer {
     Search_type_t search_type;
     /// min/max for quantization of norms
-    float norm_min, norm_max;
+    float norm_min = NAN, norm_max = NAN;
     template <bool is_IP, Search_type_t effective_search_type>
     float compute_1_distance_LUT(const uint8_t* codes, const float* LUT) const;
@@ -157,7 +158,6 @@ struct AdditiveQuantizer : Quantizer {
      * Support for exhaustive distance computations with all the centroids.
      * Hence, the number of these centroids should not be too large.
      ****************************************************************************/
-    using idx_t = Index::idx_t;
     /// decoding function for a code in a 64-bit word
     void decode_64bit(idx_t n, float* x) const;

data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp CHANGED Viewed

@@ -20,7 +20,7 @@ namespace faiss {
  * RangeSearchResult
  ***********************************************************************/
-RangeSearchResult::RangeSearchResult(idx_t nq, bool alloc_lims) : nq(nq) {
+RangeSearchResult::RangeSearchResult(size_t nq, bool alloc_lims) : nq(nq) {
     if (alloc_lims) {
         lims = new size_t[nq + 1];
         memset(lims, 0, sizeof(*lims) * (nq + 1));