RubyGems - faiss - Versions diffs - 0.2.5 → 0.2.7 - Mend

faiss 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (191) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +9 -0
data/LICENSE.txt +1 -1
data/ext/faiss/extconf.rb +1 -1
data/ext/faiss/index.cpp +13 -0
data/lib/faiss/version.rb +1 -1
data/lib/faiss.rb +2 -2
data/vendor/faiss/faiss/AutoTune.cpp +15 -4
data/vendor/faiss/faiss/AutoTune.h +0 -1
data/vendor/faiss/faiss/Clustering.cpp +1 -5
data/vendor/faiss/faiss/Clustering.h +0 -2
data/vendor/faiss/faiss/IVFlib.h +0 -2
data/vendor/faiss/faiss/Index.h +1 -2
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
data/vendor/faiss/faiss/IndexBinary.h +0 -1
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
data/vendor/faiss/faiss/IndexFastScan.h +5 -1
data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
data/vendor/faiss/faiss/IndexFlat.h +1 -1
data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
data/vendor/faiss/faiss/IndexHNSW.h +0 -1
data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
data/vendor/faiss/faiss/IndexIDMap.h +0 -2
data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
data/vendor/faiss/faiss/IndexIVF.h +121 -61
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
data/vendor/faiss/faiss/IndexReplicas.h +0 -1
data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
data/vendor/faiss/faiss/IndexShards.cpp +26 -109
data/vendor/faiss/faiss/IndexShards.h +2 -3
data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
data/vendor/faiss/faiss/MetaIndexes.h +29 -0
data/vendor/faiss/faiss/MetricType.h +14 -0
data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
data/vendor/faiss/faiss/VectorTransform.h +1 -3
data/vendor/faiss/faiss/clone_index.cpp +232 -18
data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
data/vendor/faiss/faiss/impl/HNSW.h +6 -9
data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
data/vendor/faiss/faiss/impl/NSG.h +4 -7
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
data/vendor/faiss/faiss/index_factory.cpp +8 -10
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
data/vendor/faiss/faiss/utils/Heap.h +35 -1
data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
data/vendor/faiss/faiss/utils/distances.cpp +61 -7
data/vendor/faiss/faiss/utils/distances.h +11 -0
data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
data/vendor/faiss/faiss/utils/fp16.h +7 -0
data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
data/vendor/faiss/faiss/utils/hamming.h +21 -10
data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
data/vendor/faiss/faiss/utils/sorting.h +71 -0
data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
data/vendor/faiss/faiss/utils/utils.cpp +4 -176
data/vendor/faiss/faiss/utils/utils.h +2 -9
metadata +30 -4
data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26

data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp CHANGED Viewed

@@ -64,13 +64,10 @@ struct Options {
 };
 void queryTest(
+        Options opt,
         faiss::MetricType metricType,
-        bool useFloat16CoarseQuantizer,
-        int dimOverride = -1) {
+        bool useFloat16CoarseQuantizer) {
     for (int tries = 0; tries < 2; ++tries) {
-        Options opt;
-        opt.dim = dimOverride != -1 ? dimOverride : opt.dim;
         std::vector<float> trainVecs =
                 faiss::gpu::randVecs(opt.numTrain, opt.dim);
         std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
@@ -98,7 +95,7 @@ void queryTest(
         faiss::gpu::GpuIndexIVFFlat gpuIndex(
                 &res, cpuIndex.d, cpuIndex.nlist, cpuIndex.metric_type, config);
         gpuIndex.copyFrom(&cpuIndex);
-        gpuIndex.setNumProbes(opt.nprobe);
+        gpuIndex.nprobe = opt.nprobe;
         bool compFloat16 = useFloat16CoarseQuantizer;
         faiss::gpu::compareIndices(
@@ -147,7 +144,7 @@ void addTest(faiss::MetricType metricType, bool useFloat16CoarseQuantizer) {
         faiss::gpu::GpuIndexIVFFlat gpuIndex(
                 &res, cpuIndex.d, cpuIndex.nlist, cpuIndex.metric_type, config);
         gpuIndex.copyFrom(&cpuIndex);
-        gpuIndex.setNumProbes(opt.nprobe);
+        gpuIndex.nprobe = opt.nprobe;
         cpuIndex.add(opt.numAdd, addVecs.data());
         gpuIndex.add(opt.numAdd, addVecs.data());
@@ -183,7 +180,7 @@ void copyToTest(bool useFloat16CoarseQuantizer) {
             &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config);
     gpuIndex.train(opt.numTrain, trainVecs.data());
     gpuIndex.add(opt.numAdd, addVecs.data());
-    gpuIndex.setNumProbes(opt.nprobe);
+    gpuIndex.nprobe = opt.nprobe;
     // use garbage values to see if we overwrite then
     faiss::IndexFlatL2 cpuQuantizer(1);
@@ -199,7 +196,7 @@ void copyToTest(bool useFloat16CoarseQuantizer) {
     EXPECT_EQ(cpuIndex.quantizer->d, gpuIndex.quantizer->d);
     EXPECT_EQ(cpuIndex.d, opt.dim);
     EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
-    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
+    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.nprobe);
     testIVFEquality(cpuIndex, gpuIndex);
@@ -239,7 +236,7 @@ void copyFromTest(bool useFloat16CoarseQuantizer) {
     config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
     faiss::gpu::GpuIndexIVFFlat gpuIndex(&res, 1, 1, faiss::METRIC_L2, config);
-    gpuIndex.setNumProbes(1);
+    gpuIndex.nprobe = 1;
     gpuIndex.copyFrom(&cpuIndex);
@@ -249,7 +246,7 @@ void copyFromTest(bool useFloat16CoarseQuantizer) {
     EXPECT_EQ(cpuIndex.d, gpuIndex.d);
     EXPECT_EQ(cpuIndex.d, opt.dim);
     EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
-    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
+    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.nprobe);
     testIVFEquality(cpuIndex, gpuIndex);
@@ -288,21 +285,28 @@ TEST(TestGpuIndexIVFFlat, Float16_32_Add_IP) {
 //
 TEST(TestGpuIndexIVFFlat, Float32_Query_L2) {
-    queryTest(faiss::METRIC_L2, false);
+    queryTest(Options(), faiss::METRIC_L2, false);
 }
 TEST(TestGpuIndexIVFFlat, Float32_Query_IP) {
-    queryTest(faiss::METRIC_INNER_PRODUCT, false);
+    queryTest(Options(), faiss::METRIC_INNER_PRODUCT, false);
+}
+TEST(TestGpuIndexIVFFlat, LargeBatch) {
+    Options opt;
+    opt.dim = 3;
+    opt.numQuery = 100000;
+    queryTest(opt, faiss::METRIC_L2, false);
 }
 // float16 coarse quantizer
 TEST(TestGpuIndexIVFFlat, Float16_32_Query_L2) {
-    queryTest(faiss::METRIC_L2, true);
+    queryTest(Options(), faiss::METRIC_L2, true);
 }
 TEST(TestGpuIndexIVFFlat, Float16_32_Query_IP) {
-    queryTest(faiss::METRIC_INNER_PRODUCT, true);
+    queryTest(Options(), faiss::METRIC_INNER_PRODUCT, true);
 }
 //
@@ -311,19 +315,27 @@ TEST(TestGpuIndexIVFFlat, Float16_32_Query_IP) {
 //
 TEST(TestGpuIndexIVFFlat, Float32_Query_L2_64) {
-    queryTest(faiss::METRIC_L2, false, 64);
+    Options opt;
+    opt.dim = 64;
+    queryTest(opt, faiss::METRIC_L2, false);
 }
 TEST(TestGpuIndexIVFFlat, Float32_Query_IP_64) {
-    queryTest(faiss::METRIC_INNER_PRODUCT, false, 64);
+    Options opt;
+    opt.dim = 64;
+    queryTest(opt, faiss::METRIC_INNER_PRODUCT, false);
 }
 TEST(TestGpuIndexIVFFlat, Float32_Query_L2_128) {
-    queryTest(faiss::METRIC_L2, false, 128);
+    Options opt;
+    opt.dim = 128;
+    queryTest(opt, faiss::METRIC_L2, false);
 }
 TEST(TestGpuIndexIVFFlat, Float32_Query_IP_128) {
-    queryTest(faiss::METRIC_INNER_PRODUCT, false, 128);
+    Options opt;
+    opt.dim = 128;
+    queryTest(opt, faiss::METRIC_INNER_PRODUCT, false);
 }
 //
@@ -372,7 +384,7 @@ TEST(TestGpuIndexIVFFlat, Float32_negative) {
     faiss::gpu::GpuIndexIVFFlat gpuIndex(
             &res, cpuIndex.d, cpuIndex.nlist, cpuIndex.metric_type, config);
     gpuIndex.copyFrom(&cpuIndex);
-    gpuIndex.setNumProbes(opt.nprobe);
+    gpuIndex.nprobe = opt.nprobe;
     // Construct a positive test set
     auto queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
@@ -419,7 +431,7 @@ TEST(TestGpuIndexIVFFlat, QueryNaN) {
     faiss::gpu::GpuIndexIVFFlat gpuIndex(
             &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config);
-    gpuIndex.setNumProbes(opt.nprobe);
+    gpuIndex.nprobe = opt.nprobe;
     gpuIndex.train(opt.numTrain, trainVecs.data());
     gpuIndex.add(opt.numAdd, addVecs.data());
@@ -429,7 +441,7 @@ TEST(TestGpuIndexIVFFlat, QueryNaN) {
             numQuery * opt.dim, std::numeric_limits<float>::quiet_NaN());
     std::vector<float> distances(numQuery * opt.k, 0);
-    std::vector<faiss::Index::idx_t> indices(numQuery * opt.k, 0);
+    std::vector<faiss::idx_t> indices(numQuery * opt.k, 0);
     gpuIndex.search(
             numQuery, nans.data(), opt.k, distances.data(), indices.data());
@@ -457,7 +469,7 @@ TEST(TestGpuIndexIVFFlat, AddNaN) {
     faiss::gpu::GpuIndexIVFFlat gpuIndex(
             &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config);
-    gpuIndex.setNumProbes(opt.nprobe);
+    gpuIndex.nprobe = opt.nprobe;
     int numNans = 10;
     std::vector<float> nans(
@@ -478,7 +490,7 @@ TEST(TestGpuIndexIVFFlat, AddNaN) {
     std::vector<float> queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
     std::vector<float> distance(opt.numQuery * opt.k, 0);
-    std::vector<faiss::Index::idx_t> indices(opt.numQuery * opt.k, 0);
+    std::vector<faiss::idx_t> indices(opt.numQuery * opt.k, 0);
     // should not crash
     gpuIndex.search(
@@ -531,7 +543,65 @@ TEST(TestGpuIndexIVFFlat, UnifiedMemory) {
     faiss::gpu::GpuIndexIVFFlat gpuIndex(
             &res, dim, numCentroids, faiss::METRIC_L2, config);
     gpuIndex.copyFrom(&cpuIndex);
-    gpuIndex.setNumProbes(nprobe);
+    gpuIndex.nprobe = nprobe;
+    faiss::gpu::compareIndices(
+            cpuIndex,
+            gpuIndex,
+            numQuery,
+            dim,
+            k,
+            "Unified Memory",
+            kF32MaxRelErr,
+            0.1f,
+            0.015f);
+}
+TEST(TestGpuIndexIVFFlat, LongIVFList) {
+    int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
+    // Skip this device if we do not have sufficient memory
+    constexpr size_t kMem = size_t(24) * 1024 * 1024 * 1024;
+    if (faiss::gpu::getFreeMemory(device) < kMem) {
+        std::cout << "TestGpuIndexIVFFlat.LongIVFList: skipping due "
+                     "to insufficient device memory\n";
+        return;
+    }
+    std::cout << "Running LongIVFList test\n";
+    // Test functionality where a single IVF list has more than 2B code values
+    int dim = 64;
+    int numCentroids = 1;
+    size_t numAdd = (size_t(1024) * 1024 * 1024 * 2 + 100000) / dim;
+    size_t numTrain = 100;
+    int numQuery = 5;
+    int k = 10;
+    std::vector<float> trainVecs = faiss::gpu::randVecs(numTrain, dim);
+    std::vector<float> addVecs = faiss::gpu::randVecs(numAdd, dim);
+    faiss::IndexFlatL2 quantizer(dim);
+    faiss::IndexIVFFlat cpuIndex(
+            &quantizer, dim, numCentroids, faiss::METRIC_L2);
+    cpuIndex.train(numTrain, trainVecs.data());
+    cpuIndex.add(numAdd, addVecs.data());
+    cpuIndex.nprobe = 1;
+    faiss::gpu::StandardGpuResources res;
+    res.noTempMemory();
+    faiss::gpu::GpuIndexIVFFlatConfig config;
+    config.device = device;
+    faiss::gpu::GpuIndexIVFFlat gpuIndex(
+            &res, dim, numCentroids, faiss::METRIC_L2, config);
+    gpuIndex.train(numTrain, trainVecs.data());
+    gpuIndex.add(numAdd, addVecs.data());
+    gpuIndex.nprobe = 1;
     faiss::gpu::compareIndices(
             cpuIndex,

data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp CHANGED Viewed

@@ -137,7 +137,58 @@ TEST(TestGpuIndexIVFPQ, Query_L2) {
         config.useFloat16LookupTables = opt.useFloat16;
         faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
-        gpuIndex.setNumProbes(opt.nprobe);
+        gpuIndex.nprobe = opt.nprobe;
+        faiss::gpu::compareIndices(
+                cpuIndex,
+                gpuIndex,
+                opt.numQuery,
+                opt.dim,
+                opt.k,
+                opt.toString(),
+                opt.getCompareEpsilon(),
+                opt.getPctMaxDiff1(),
+                opt.getPctMaxDiffN());
+    }
+}
+// Large batch sizes (>= 65536) should also work
+TEST(TestGpuIndexIVFPQ, LargeBatch) {
+    for (bool usePrecomputed : {false, true}) {
+        Options opt;
+        // override for large sizes
+        opt.dim = 4;
+        opt.numQuery = 100000;
+        opt.codes = 2;
+        std::vector<float> trainVecs =
+                faiss::gpu::randVecs(opt.numTrain, opt.dim);
+        std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
+        faiss::IndexFlatL2 coarseQuantizer(opt.dim);
+        faiss::IndexIVFPQ cpuIndex(
+                &coarseQuantizer,
+                opt.dim,
+                opt.numCentroids,
+                opt.codes,
+                opt.bitsPerCode);
+        cpuIndex.nprobe = opt.nprobe;
+        cpuIndex.train(opt.numTrain, trainVecs.data());
+        cpuIndex.add(opt.numAdd, addVecs.data());
+        // Use the default temporary memory management to test the memory
+        // manager
+        faiss::gpu::StandardGpuResources res;
+        faiss::gpu::GpuIndexIVFPQConfig config;
+        config.device = opt.device;
+        config.usePrecomputedTables = usePrecomputed;
+        config.indicesOptions = opt.indicesOpt;
+        config.useFloat16LookupTables = false;
+        faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
+        gpuIndex.nprobe = opt.nprobe;
         faiss::gpu::compareIndices(
                 cpuIndex,
@@ -189,7 +240,7 @@ void testMMCodeDistance(faiss::MetricType mt) {
         config.flatConfig.useFloat16 = (tries % 2 == 1);
         faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
-        gpuIndex.setNumProbes(opt.nprobe);
+        gpuIndex.nprobe = opt.nprobe;
         faiss::gpu::compareIndices(
                 cpuIndex,
@@ -238,7 +289,7 @@ void testMMCodeDistance(faiss::MetricType mt) {
         config.useFloat16LookupTables = (dimPerSubQ == 7);
         faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
-        gpuIndex.setNumProbes(opt.nprobe);
+        gpuIndex.nprobe = opt.nprobe;
         faiss::gpu::compareIndices(
                 cpuIndex,
@@ -293,7 +344,7 @@ TEST(TestGpuIndexIVFPQ, Query_IP) {
         config.useFloat16LookupTables = opt.useFloat16;
         faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
-        gpuIndex.setNumProbes(opt.nprobe);
+        gpuIndex.nprobe = opt.nprobe;
         faiss::gpu::compareIndices(
                 cpuIndex,
@@ -335,7 +386,7 @@ TEST(TestGpuIndexIVFPQ, Float16Coarse) {
     config.useFloat16LookupTables = opt.useFloat16;
     faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
-    gpuIndex.setNumProbes(opt.nprobe);
+    gpuIndex.nprobe = opt.nprobe;
     gpuIndex.add(opt.numAdd, addVecs.data());
     cpuIndex.add(opt.numAdd, addVecs.data());
@@ -381,7 +432,7 @@ TEST(TestGpuIndexIVFPQ, Add_L2) {
         config.useFloat16LookupTables = opt.useFloat16;
         faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
-        gpuIndex.setNumProbes(opt.nprobe);
+        gpuIndex.nprobe = opt.nprobe;
         gpuIndex.add(opt.numAdd, addVecs.data());
         cpuIndex.add(opt.numAdd, addVecs.data());
@@ -429,7 +480,7 @@ TEST(TestGpuIndexIVFPQ, Add_IP) {
         config.useFloat16LookupTables = opt.useFloat16;
         faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
-        gpuIndex.setNumProbes(opt.nprobe);
+        gpuIndex.nprobe = opt.nprobe;
         gpuIndex.add(opt.numAdd, addVecs.data());
         cpuIndex.add(opt.numAdd, addVecs.data());
@@ -472,7 +523,7 @@ TEST(TestGpuIndexIVFPQ, CopyTo) {
                 opt.bitsPerCode,
                 faiss::METRIC_L2,
                 config);
-        gpuIndex.setNumProbes(opt.nprobe);
+        gpuIndex.nprobe = opt.nprobe;
         gpuIndex.train(opt.numTrain, trainVecs.data());
         gpuIndex.add(opt.numAdd, addVecs.data());
@@ -488,7 +539,7 @@ TEST(TestGpuIndexIVFPQ, CopyTo) {
         EXPECT_EQ(cpuIndex.d, gpuIndex.d);
         EXPECT_EQ(cpuIndex.d, opt.dim);
         EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
-        EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
+        EXPECT_EQ(cpuIndex.nprobe, gpuIndex.nprobe);
         EXPECT_EQ(cpuIndex.pq.M, gpuIndex.getNumSubQuantizers());
         EXPECT_EQ(gpuIndex.getNumSubQuantizers(), opt.codes);
         EXPECT_EQ(cpuIndex.pq.nbits, gpuIndex.getBitsPerCode());
@@ -538,7 +589,7 @@ TEST(TestGpuIndexIVFPQ, CopyFrom) {
     // Use garbage values to see if we overwrite them
     faiss::gpu::GpuIndexIVFPQ gpuIndex(
             &res, 1, 1, 1, 8, faiss::METRIC_L2, config);
-    gpuIndex.setNumProbes(1);
+    gpuIndex.nprobe = 1;
     gpuIndex.copyFrom(&cpuIndex);
@@ -549,7 +600,7 @@ TEST(TestGpuIndexIVFPQ, CopyFrom) {
     EXPECT_EQ(cpuIndex.d, gpuIndex.d);
     EXPECT_EQ(cpuIndex.d, opt.dim);
     EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
-    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
+    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.nprobe);
     EXPECT_EQ(cpuIndex.pq.M, gpuIndex.getNumSubQuantizers());
     EXPECT_EQ(gpuIndex.getNumSubQuantizers(), opt.codes);
     EXPECT_EQ(cpuIndex.pq.nbits, gpuIndex.getBitsPerCode());
@@ -594,7 +645,7 @@ TEST(TestGpuIndexIVFPQ, QueryNaN) {
             faiss::METRIC_L2,
             config);
-    gpuIndex.setNumProbes(opt.nprobe);
+    gpuIndex.nprobe = opt.nprobe;
     gpuIndex.train(opt.numTrain, trainVecs.data());
     gpuIndex.add(opt.numAdd, addVecs.data());
@@ -604,7 +655,7 @@ TEST(TestGpuIndexIVFPQ, QueryNaN) {
             numQuery * opt.dim, std::numeric_limits<float>::quiet_NaN());
     std::vector<float> distances(numQuery * opt.k, 0);
-    std::vector<faiss::Index::idx_t> indices(numQuery * opt.k, 0);
+    std::vector<faiss::idx_t> indices(numQuery * opt.k, 0);
     gpuIndex.search(
             numQuery, nans.data(), opt.k, distances.data(), indices.data());
@@ -640,7 +691,7 @@ TEST(TestGpuIndexIVFPQ, AddNaN) {
             faiss::METRIC_L2,
             config);
-    gpuIndex.setNumProbes(opt.nprobe);
+    gpuIndex.nprobe = opt.nprobe;
     int numNans = 10;
     std::vector<float> nans(
@@ -660,7 +711,7 @@ TEST(TestGpuIndexIVFPQ, AddNaN) {
     std::vector<float> queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
     std::vector<float> distance(opt.numQuery * opt.k, 0);
-    std::vector<faiss::Index::idx_t> indices(opt.numQuery * opt.k, 0);
+    std::vector<faiss::idx_t> indices(opt.numQuery * opt.k, 0);
     // should not crash
     gpuIndex.search(
@@ -721,7 +772,7 @@ TEST(TestGpuIndexIVFPQ, UnifiedMemory) {
             faiss::METRIC_L2,
             config);
     gpuIndex.copyFrom(&cpuIndex);
-    gpuIndex.setNumProbes(nprobe);
+    gpuIndex.nprobe = nprobe;
     faiss::gpu::compareIndices(
             cpuIndex,

data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp CHANGED Viewed

@@ -79,7 +79,7 @@ void runCopyToTest(faiss::ScalarQuantizer::QuantizerType qtype) {
             &res, opt.dim, opt.numCentroids, qtype, METRIC_L2, true, config);
     gpuIndex.train(opt.numTrain, trainVecs.data());
     gpuIndex.add(opt.numAdd, addVecs.data());
-    gpuIndex.setNumProbes(opt.nprobe);
+    gpuIndex.nprobe = opt.nprobe;
     // use garbage values to see if we overwrite then
     IndexFlatL2 cpuQuantizer(1);
@@ -100,7 +100,7 @@ void runCopyToTest(faiss::ScalarQuantizer::QuantizerType qtype) {
     EXPECT_EQ(cpuIndex.quantizer->d, gpuIndex.quantizer->d);
     EXPECT_EQ(cpuIndex.d, opt.dim);
     EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
-    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
+    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.nprobe);
     testIVFEquality(cpuIndex, gpuIndex);
@@ -172,7 +172,7 @@ void runCopyFromTest(faiss::ScalarQuantizer::QuantizerType qtype) {
             METRIC_L2,
             false,
             config);
-    gpuIndex.setNumProbes(1);
+    gpuIndex.nprobe = 1;
     gpuIndex.copyFrom(&cpuIndex);
@@ -182,7 +182,7 @@ void runCopyFromTest(faiss::ScalarQuantizer::QuantizerType qtype) {
     EXPECT_EQ(cpuIndex.d, gpuIndex.d);
     EXPECT_EQ(cpuIndex.d, opt.dim);
     EXPECT_EQ(cpuIndex.nlist, gpuIndex.getNumLists());
-    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.getNumProbes());
+    EXPECT_EQ(cpuIndex.nprobe, gpuIndex.nprobe);
     testIVFEquality(cpuIndex, gpuIndex);

data/vendor/faiss/faiss/gpu/test/TestUtils.cpp CHANGED Viewed

@@ -97,7 +97,7 @@ void compareIndices(
         float pctMaxDiffN) {
     // Compare
     std::vector<float> refDistance(numQuery * k, 0);
-    std::vector<faiss::Index::idx_t> refIndices(numQuery * k, -1);
+    std::vector<faiss::idx_t> refIndices(numQuery * k, -1);
     refIndex.search(
             numQuery,
             queryVecs.data(),
@@ -106,7 +106,7 @@ void compareIndices(
             refIndices.data());
     std::vector<float> testDistance(numQuery * k, 0);
-    std::vector<faiss::Index::idx_t> testIndices(numQuery * k, -1);
+    std::vector<faiss::idx_t> testIndices(numQuery * k, -1);
     testIndex.search(
             numQuery,
             queryVecs.data(),
@@ -162,9 +162,9 @@ inline T lookup(const T* p, int i, int j, int /*dim1*/, int dim2) {
 void compareLists(
         const float* refDist,
-        const faiss::Index::idx_t* refInd,
+        const faiss::idx_t* refInd,
         const float* testDist,
-        const faiss::Index::idx_t* testInd,
+        const faiss::idx_t* testInd,
         int dim1,
         int dim2,
         const std::string& configMsg,
@@ -181,10 +181,10 @@ void compareLists(
     int numResults = dim1 * dim2;
     // query -> {index -> result position}
-    std::vector<std::unordered_map<faiss::Index::idx_t, int>> refIndexMap;
+    std::vector<std::unordered_map<faiss::idx_t, int>> refIndexMap;
     for (int query = 0; query < dim1; ++query) {
-        std::unordered_map<faiss::Index::idx_t, int> indices;
+        std::unordered_map<faiss::idx_t, int> indices;
         for (int result = 0; result < dim2; ++result) {
             indices[lookup(refInd, query, result, dim1, dim2)] = result;
@@ -208,7 +208,7 @@ void compareLists(
     for (int query = 0; query < dim1; ++query) {
         std::vector<int> diffs;
-        std::set<faiss::Index::idx_t> uniqueIndices;
+        std::set<faiss::idx_t> uniqueIndices;
         auto& indices = refIndexMap[query];

data/vendor/faiss/faiss/gpu/test/TestUtils.h CHANGED Viewed

@@ -93,9 +93,9 @@ void compareIndices(
 /// Display specific differences in the two (distance, index) lists
 void compareLists(
         const float* refDist,
-        const faiss::Index::idx_t* refInd,
+        const faiss::idx_t* refInd,
         const float* testDist,
-        const faiss::Index::idx_t* testInd,
+        const faiss::idx_t* testInd,
         int dim1,
         int dim2,
         const std::string& configMsg,
@@ -130,13 +130,13 @@ void testIVFEquality(A& cpuIndex, B& gpuIndex) {
         EXPECT_EQ(cpuCodes, gpuCodes);
         // Index equality
-        std::vector<Index::idx_t> cpuIndices(cpuLists->list_size(i));
+        std::vector<idx_t> cpuIndices(cpuLists->list_size(i));
         auto si = faiss::InvertedLists::ScopedIds(cpuLists, i);
         std::memcpy(
                 cpuIndices.data(),
                 si.get(),
-                cpuLists->list_size(i) * sizeof(faiss::Index::idx_t));
+                cpuLists->list_size(i) * sizeof(faiss::idx_t));
         EXPECT_EQ(cpuIndices, gpuIndex.getListIndices(i));
     }
 }

data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp CHANGED Viewed

@@ -130,7 +130,7 @@ int main() {
                k,
                nq);
-        std::vector<faiss::Index::idx_t> nns(k * nq);
+        std::vector<faiss::idx_t> nns(k * nq);
         std::vector<float> dis(k * nq);
         index.search(nq, queries.data(), k, dis.data(), nns.data());

data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h CHANGED Viewed

@@ -47,6 +47,12 @@ int getMaxThreads(int device);
 /// Equivalent to getMaxThreads(getCurrentDevice())
 int getMaxThreadsCurrentDevice();
+/// Returns the maximum grid size for the given GPU device
+dim3 getMaxGrid(int device);
+/// Equivalent to getMaxGrid(getCurrentDevice())
+dim3 getMaxGridCurrentDevice();
 /// Returns the maximum smem available for the given GPU device
 size_t getMaxSharedMemPerBlock(int device);

data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp CHANGED Viewed

@@ -54,14 +54,7 @@ AdditiveQuantizer::AdditiveQuantizer(
         : Quantizer(d),
           M(nbits.size()),
           nbits(nbits),
-          verbose(false),
-          is_trained(false),
-          max_mem_distances(5 * (size_t(1) << 30)), // 5 GiB
           search_type(search_type) {
-    norm_max = norm_min = NAN;
-    tot_bits = 0;
-    total_codebook_size = 0;
-    only_8bit = false;
     set_derived_values();
 }

data/vendor/faiss/faiss/impl/AdditiveQuantizer.h CHANGED Viewed

@@ -7,6 +7,7 @@
 #pragma once
+#include <cmath>
 #include <cstdint>
 #include <vector>
@@ -29,13 +30,13 @@ struct AdditiveQuantizer : Quantizer {
     // derived values
     std::vector<uint64_t> codebook_offsets;
-    size_t tot_bits;            ///< total number of bits (indexes + norms)
-    size_t norm_bits;           ///< bits allocated for the norms
-    size_t total_codebook_size; ///< size of the codebook in vectors
-    bool only_8bit;             ///< are all nbits = 8 (use faster decoder)
+    size_t tot_bits = 0;            ///< total number of bits (indexes + norms)
+    size_t norm_bits = 0;           ///< bits allocated for the norms
+    size_t total_codebook_size = 0; ///< size of the codebook in vectors
+    bool only_8bit = false;         ///< are all nbits = 8 (use faster decoder)
-    bool verbose;    ///< verbose during training?
-    bool is_trained; ///< is trained or not
+    bool verbose = false;    ///< verbose during training?
+    bool is_trained = false; ///< is trained or not
     IndexFlat1D qnorm;            ///< store and search norms
     std::vector<float> norm_tabs; ///< store norms of codebook entries for 4-bit
@@ -43,7 +44,7 @@ struct AdditiveQuantizer : Quantizer {
     /// norms and distance matrixes with beam search can get large, so use this
     /// to control for the amount of memory that can be allocated
-    size_t max_mem_distances;
+    size_t max_mem_distances = 5 * (size_t(1) << 30);
     /// encode a norm into norm_bits bits
     uint64_t encode_norm(float norm) const;
@@ -145,7 +146,7 @@ struct AdditiveQuantizer : Quantizer {
     Search_type_t search_type;
     /// min/max for quantization of norms
-    float norm_min, norm_max;
+    float norm_min = NAN, norm_max = NAN;
     template <bool is_IP, Search_type_t effective_search_type>
     float compute_1_distance_LUT(const uint8_t* codes, const float* LUT) const;
@@ -157,7 +158,6 @@ struct AdditiveQuantizer : Quantizer {
      * Support for exhaustive distance computations with all the centroids.
      * Hence, the number of these centroids should not be too large.
      ****************************************************************************/
-    using idx_t = Index::idx_t;
     /// decoding function for a code in a 64-bit word
     void decode_64bit(idx_t n, float* x) const;

data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp CHANGED Viewed

@@ -20,7 +20,7 @@ namespace faiss {
  * RangeSearchResult
  ***********************************************************************/
-RangeSearchResult::RangeSearchResult(idx_t nq, bool alloc_lims) : nq(nq) {
+RangeSearchResult::RangeSearchResult(size_t nq, bool alloc_lims) : nq(nq) {
     if (alloc_lims) {
         lims = new size_t[nq + 1];
         memset(lims, 0, sizeof(*lims) * (nq + 1));