RubyGems - faiss - Versions diffs - 0.2.6 → 0.2.7 - Mend

faiss 0.2.6 → 0.2.7

Files changed (189) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/ext/faiss/extconf.rb +1 -1
data/lib/faiss/version.rb +1 -1
data/lib/faiss.rb +2 -2
data/vendor/faiss/faiss/AutoTune.cpp +15 -4
data/vendor/faiss/faiss/AutoTune.h +0 -1
data/vendor/faiss/faiss/Clustering.cpp +1 -5
data/vendor/faiss/faiss/Clustering.h +0 -2
data/vendor/faiss/faiss/IVFlib.h +0 -2
data/vendor/faiss/faiss/Index.h +1 -2
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
data/vendor/faiss/faiss/IndexBinary.h +0 -1
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
data/vendor/faiss/faiss/IndexFastScan.h +5 -1
data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
data/vendor/faiss/faiss/IndexFlat.h +1 -1
data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
data/vendor/faiss/faiss/IndexHNSW.h +0 -1
data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
data/vendor/faiss/faiss/IndexIDMap.h +0 -2
data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
data/vendor/faiss/faiss/IndexIVF.h +121 -61
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
data/vendor/faiss/faiss/IndexReplicas.h +0 -1
data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
data/vendor/faiss/faiss/IndexShards.cpp +26 -109
data/vendor/faiss/faiss/IndexShards.h +2 -3
data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
data/vendor/faiss/faiss/MetaIndexes.h +29 -0
data/vendor/faiss/faiss/MetricType.h +14 -0
data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
data/vendor/faiss/faiss/VectorTransform.h +1 -3
data/vendor/faiss/faiss/clone_index.cpp +232 -18
data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
data/vendor/faiss/faiss/impl/HNSW.h +6 -9
data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
data/vendor/faiss/faiss/impl/NSG.h +4 -7
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
data/vendor/faiss/faiss/index_factory.cpp +8 -10
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
data/vendor/faiss/faiss/utils/Heap.h +35 -1
data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
data/vendor/faiss/faiss/utils/distances.cpp +61 -7
data/vendor/faiss/faiss/utils/distances.h +11 -0
data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
data/vendor/faiss/faiss/utils/fp16.h +7 -0
data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
data/vendor/faiss/faiss/utils/hamming.h +21 -10
data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
data/vendor/faiss/faiss/utils/sorting.h +71 -0
data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
data/vendor/faiss/faiss/utils/utils.cpp +4 -176
data/vendor/faiss/faiss/utils/utils.h +2 -9
metadata +29 -3
data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26

data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h CHANGED Viewed

@@ -1,4 +1,10 @@
-// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
 #ifndef LEVEL2_NEON_INL_H
 #define LEVEL2_NEON_INL_H
@@ -1940,9 +1946,15 @@ struct Index2LevelDecoderImpl<
 } // namespace
 // Suitable for IVF256,PQ[1]x8
+// Subtable for IVF256,PQ[1]x10 (such as IVF256,PQ16x10np)
+// Subtable for IVF256,PQ[1]x12 (such as IVF256,PQ16x12np)
+// Suitable for IVF256,PQ[1]x16 (such as IVF256,PQ16x16np)
 // Suitable for Residual[1]x8,PQ[2]x8
-// Suitable for IVF[9-16 bit],PQ[1]x8 (such as IVF1024,PQ16np)
-// Suitable for Residual1x[9-16 bit],PQ[1]x8 (such as Residual1x9,PQ8)
+// Suitable for IVF[2^9-2^16 bit],PQ[1]x8 (such as IVF1024,PQ16np)
+// Suitable for IVF[2^9-2^16 bit],PQ[1]x10 (such as IVF1024,PQ16x10np)
+// Suitable for IVF[2^9-2^16 bit],PQ[1]x12 (such as IVF1024,PQ16x12np)
+// Suitable for IVF[2^9-2^16 bit],PQ[1]x16 (such as IVF1024,PQ16x16np)
+// Suitable for Residual[1]x[9-16 bit],PQ[2]x[3] (such as Residual2x9,PQ8)
 template <
         intptr_t DIM,
         intptr_t COARSE_SIZE,
@@ -1951,11 +1963,13 @@ template <
         intptr_t FINE_BITS = 8>
 struct Index2LevelDecoder {
     static_assert(
-            COARSE_BITS == 8 || COARSE_BITS == 10 || COARSE_BITS == 16,
-            "Only 8, 10 or 16 bits are currently supported for COARSE_BITS");
+            COARSE_BITS == 8 || COARSE_BITS == 10 || COARSE_BITS == 12 ||
+                    COARSE_BITS == 16,
+            "Only 8, 10, 12 or 16 bits are currently supported for COARSE_BITS");
     static_assert(
-            FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 16,
-            "Only 8, 10 or 16 bits are currently supported for FINE_BITS");
+            FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 12 ||
+                    FINE_BITS == 16,
+            "Only 8, 10, 12 or 16 bits are currently supported for FINE_BITS");
     static constexpr intptr_t dim = DIM;
     static constexpr intptr_t coarseSize = COARSE_SIZE;

data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h CHANGED Viewed

@@ -1,3 +1,10 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
 #pragma once
 #include <cstddef>

data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h CHANGED Viewed

@@ -1,3 +1,10 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
 #pragma once
 #include <cstddef>

data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h CHANGED Viewed

@@ -1,4 +1,9 @@
-// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
 #ifndef PQ_AVX2_INL_H
 #define PQ_AVX2_INL_H
@@ -1488,12 +1493,14 @@ struct IndexPQDecoderImpl<
 // Suitable for PQ[1]x8
 // Suitable for PQ[1]x10
+// Suitable for PQ[1]x12
 // Suitable for PQ[1]x16
 template <intptr_t DIM, intptr_t FINE_SIZE, intptr_t FINE_BITS = 8>
 struct IndexPQDecoder {
     static_assert(
-            FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 16,
-            "Only 8, 10 or 16 bits are currently supported for FINE_BITS");
+            FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 12 ||
+                    FINE_BITS == 16,
+            "Only 8, 10, 12 or 16 bits are currently supported for FINE_BITS");
     static constexpr intptr_t dim = DIM;
     static constexpr intptr_t fineSize = FINE_SIZE;

data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h CHANGED Viewed

@@ -1,4 +1,10 @@
-// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
 #ifndef PQ_INL_H
 #define PQ_INL_H

data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h CHANGED Viewed

@@ -1,4 +1,10 @@
-// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
 #ifndef PQ_NEON_INL_H
 #define PQ_NEON_INL_H
@@ -1322,12 +1328,14 @@ struct IndexPQDecoderImpl<
 // Suitable for PQ[1]x8
 // Suitable for PQ[1]x10
+// Suitable for PQ[1]x12
 // Suitable for PQ[1]x16
 template <intptr_t DIM, intptr_t FINE_SIZE, intptr_t FINE_BITS = 8>
 struct IndexPQDecoder {
     static_assert(
-            FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 16,
-            "Only 8, 10 or 16 bits are currently supported for FINE_BITS");
+            FINE_BITS == 8 || FINE_BITS == 10 || FINE_BITS == 12 ||
+                    FINE_BITS == 16,
+            "Only 8, 10, 12 or 16 bits are currently supported for FINE_BITS");
     static constexpr intptr_t dim = DIM;
     static constexpr intptr_t fineSize = FINE_SIZE;

data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp CHANGED Viewed

@@ -11,6 +11,8 @@
 #include <faiss/IndexPreTransform.h>
 #include <faiss/IndexReplicas.h>
 #include <faiss/IndexShards.h>
+#include <faiss/IndexShardsIVF.h>
 #include <faiss/gpu/GpuIndex.h>
 #include <faiss/gpu/GpuIndexFlat.h>
 #include <faiss/gpu/GpuIndexIVFFlat.h>
@@ -33,7 +35,12 @@ using namespace ::faiss;
 void GpuParameterSpace::initialize(const Index* index) {
     if (DC(IndexPreTransform)) {
-        index = ix->index;
+        initialize(ix->index);
+        return;
+    }
+    if (DC(IndexShardsIVF)) {
+        ParameterSpace::initialize(index);
+        return;
     }
     if (DC(IndexReplicas)) {
         if (ix->count() == 0)
@@ -53,6 +60,14 @@ void GpuParameterSpace::initialize(const Index* index) {
                 break;
             pr.values.push_back(nprobe);
         }
+        ParameterSpace ivf_pspace;
+        ivf_pspace.initialize(ix->quantizer);
+        for (const ParameterRange& p : ivf_pspace.parameter_ranges) {
+            ParameterRange& pr = add_range("quantizer_" + p.name);
+            pr.values = p.values;
+        }
     }
     // not sure we should call the parent initializer
 }
@@ -72,7 +87,7 @@ void GpuParameterSpace::set_index_parameter(
     }
     if (name == "nprobe") {
         if (DC(GpuIndexIVF)) {
-            ix->setNumProbes(int(val));
+            ix->nprobe = size_t(val);
             return;
         }
     }
@@ -83,6 +98,14 @@ void GpuParameterSpace::set_index_parameter(
         }
     }
+    if (name.find("quantizer_") == 0) {
+        if (DC(GpuIndexIVF)) {
+            std::string sub_name = name.substr(strlen("quantizer_"));
+            set_index_parameter(ix->quantizer, sub_name, val);
+            return;
+        }
+    }
     // maybe normal index parameters apply?
     ParameterSpace::set_index_parameter(index, name, val);
 }

data/vendor/faiss/faiss/gpu/GpuCloner.cpp CHANGED Viewed

@@ -18,6 +18,7 @@
 #include <faiss/IndexPreTransform.h>
 #include <faiss/IndexReplicas.h>
 #include <faiss/IndexScalarQuantizer.h>
+#include <faiss/IndexShardsIVF.h>
 #include <faiss/MetaIndexes.h>
 #include <faiss/gpu/GpuIndex.h>
 #include <faiss/gpu/GpuIndexFlat.h>
@@ -116,7 +117,6 @@ ToGpuCloner::ToGpuCloner(
         : GpuClonerOptions(options), provider(prov), device(device) {}
 Index* ToGpuCloner::clone_Index(const Index* index) {
-    using idx_t = Index::idx_t;
     if (auto ifl = dynamic_cast<const IndexFlat*>(index)) {
         GpuIndexFlatConfig config;
         config.device = device;
@@ -227,8 +227,8 @@ ToGpuClonerMultiple::ToGpuClonerMultiple(
         std::vector<int>& devices,
         const GpuMultipleClonerOptions& options)
         : GpuMultipleClonerOptions(options) {
-    FAISS_ASSERT(provider.size() == devices.size());
-    for (int i = 0; i < provider.size(); i++) {
+    FAISS_THROW_IF_NOT(provider.size() == devices.size());
+    for (size_t i = 0; i < provider.size(); i++) {
         sub_cloners.push_back(ToGpuCloner(provider[i], devices[i], options));
     }
 }
@@ -241,28 +241,43 @@ ToGpuClonerMultiple::ToGpuClonerMultiple(
 void ToGpuClonerMultiple::copy_ivf_shard(
         const IndexIVF* index_ivf,
         IndexIVF* idx2,
-        long n,
-        long i) {
+        idx_t n,
+        idx_t i) {
     if (shard_type == 2) {
-        long i0 = i * index_ivf->ntotal / n;
-        long i1 = (i + 1) * index_ivf->ntotal / n;
+        idx_t i0 = i * index_ivf->ntotal / n;
+        idx_t i1 = (i + 1) * index_ivf->ntotal / n;
         if (verbose)
             printf("IndexShards shard %ld indices %ld:%ld\n", i, i0, i1);
-        index_ivf->copy_subset_to(*idx2, 2, i0, i1);
+        index_ivf->copy_subset_to(
+                *idx2, InvertedLists::SUBSET_TYPE_ID_RANGE, i0, i1);
         FAISS_ASSERT(idx2->ntotal == i1 - i0);
     } else if (shard_type == 1) {
         if (verbose)
             printf("IndexShards shard %ld select modulo %ld = %ld\n", i, n, i);
-        index_ivf->copy_subset_to(*idx2, 1, n, i);
+        index_ivf->copy_subset_to(
+                *idx2, InvertedLists::SUBSET_TYPE_ID_MOD, n, i);
+    } else if (shard_type == 4) {
+        idx_t i0 = i * index_ivf->nlist / n;
+        idx_t i1 = (i + 1) * index_ivf->nlist / n;
+        if (verbose) {
+            printf("IndexShards %ld/%ld select lists %d:%d\n",
+                   i,
+                   n,
+                   int(i0),
+                   int(i1));
+        }
+        index_ivf->copy_subset_to(
+                *idx2, InvertedLists::SUBSET_TYPE_INVLIST, i0, i1);
     } else {
         FAISS_THROW_FMT("shard_type %d not implemented", shard_type);
     }
 }
 Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
-    long n = sub_cloners.size();
+    idx_t n = sub_cloners.size();
+    auto index_ivf = dynamic_cast<const faiss::IndexIVF*>(index);
     auto index_ivfpq = dynamic_cast<const faiss::IndexIVFPQ*>(index);
     auto index_ivfflat = dynamic_cast<const faiss::IndexIVFFlat*>(index);
     auto index_ivfsq =
@@ -274,16 +289,36 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
             "IndexIVFFlat, IndexIVFScalarQuantizer, "
             "IndexFlat and IndexIVFPQ");
+    // decide what coarse quantizer the sub-indexes are going to have
+    const Index* quantizer = nullptr;
+    std::unique_ptr<Index> new_quantizer;
+    if (index_ivf) {
+        quantizer = index_ivf->quantizer;
+        if (common_ivf_quantizer &&
+            !dynamic_cast<const IndexFlat*>(quantizer)) {
+            // then we flatten the coarse quantizer so that everything remains
+            // on GPU
+            new_quantizer.reset(
+                    new IndexFlat(quantizer->d, quantizer->metric_type));
+            std::vector<float> centroids(quantizer->d * quantizer->ntotal);
+            quantizer->reconstruct_n(0, quantizer->ntotal, centroids.data());
+            new_quantizer->add(quantizer->ntotal, centroids.data());
+            quantizer = new_quantizer.get();
+        }
+    }
     std::vector<faiss::Index*> shards(n);
-    for (long i = 0; i < n; i++) {
+    for (idx_t i = 0; i < n; i++) {
         // make a shallow copy
-        if (reserveVecs)
+        if (reserveVecs) {
             sub_cloners[i].reserveVecs = (reserveVecs + n - 1) / n;
+        }
+        // note: const_casts here are harmless because the indexes build here
+        // are short-lived, translated immediately to GPU indexes.
         if (index_ivfpq) {
             faiss::IndexIVFPQ idx2(
-                    index_ivfpq->quantizer,
+                    const_cast<Index*>(quantizer),
                     index_ivfpq->d,
                     index_ivfpq->nlist,
                     index_ivfpq->code_size,
@@ -297,7 +332,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
             shards[i] = sub_cloners[i].clone_Index(&idx2);
         } else if (index_ivfflat) {
             faiss::IndexIVFFlat idx2(
-                    index_ivfflat->quantizer,
+                    const_cast<Index*>(quantizer),
                     index->d,
                     index_ivfflat->nlist,
                     index_ivfflat->metric_type);
@@ -307,7 +342,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
             shards[i] = sub_cloners[i].clone_Index(&idx2);
         } else if (index_ivfsq) {
             faiss::IndexIVFScalarQuantizer idx2(
-                    index_ivfsq->quantizer,
+                    const_cast<Index*>(quantizer),
                     index->d,
                     index_ivfsq->nlist,
                     index_ivfsq->sq.qtype,
@@ -323,40 +358,52 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
             faiss::IndexFlat idx2(index->d, index->metric_type);
             shards[i] = sub_cloners[i].clone_Index(&idx2);
             if (index->ntotal > 0) {
-                long i0 = index->ntotal * i / n;
-                long i1 = index->ntotal * (i + 1) / n;
+                idx_t i0 = index->ntotal * i / n;
+                idx_t i1 = index->ntotal * (i + 1) / n;
                 shards[i]->add(i1 - i0, index_flat->get_xb() + i0 * index->d);
             }
         }
     }
     bool successive_ids = index_flat != nullptr;
-    faiss::IndexShards* res =
-            new faiss::IndexShards(index->d, true, successive_ids);
+    faiss::IndexShards* res;
+    if (common_ivf_quantizer && index_ivf) {
+        this->shard = false;
+        Index* common_quantizer = clone_Index(index_ivf->quantizer);
+        this->shard = true;
+        IndexShardsIVF* idx = new faiss::IndexShardsIVF(
+                common_quantizer, index_ivf->nlist, true, false);
+        idx->own_fields = true;
+        idx->own_indices = true;
+        res = idx;
+    } else {
+        res = new faiss::IndexShards(index->d, true, successive_ids);
+        res->own_indices = true;
+    }
     for (int i = 0; i < n; i++) {
         res->add_shard(shards[i]);
     }
-    res->own_fields = true;
     FAISS_ASSERT(index->ntotal == res->ntotal);
     return res;
 }
 Index* ToGpuClonerMultiple::clone_Index(const Index* index) {
-    long n = sub_cloners.size();
-    if (n == 1)
+    idx_t n = sub_cloners.size();
+    if (n == 1) {
         return sub_cloners[0].clone_Index(index);
+    }
     if (dynamic_cast<const IndexFlat*>(index) ||
-        dynamic_cast<const faiss::IndexIVFFlat*>(index) ||
-        dynamic_cast<const faiss::IndexIVFScalarQuantizer*>(index) ||
-        dynamic_cast<const faiss::IndexIVFPQ*>(index)) {
+        dynamic_cast<const IndexIVFFlat*>(index) ||
+        dynamic_cast<const IndexIVFScalarQuantizer*>(index) ||
+        dynamic_cast<const IndexIVFPQ*>(index)) {
         if (!shard) {
             IndexReplicas* res = new IndexReplicas();
             for (auto& sub_cloner : sub_cloners) {
                 res->addIndex(sub_cloner.clone_Index(index));
             }
-            res->own_fields = true;
+            res->own_indices = true;
             return res;
         } else {
             return clone_Index_to_shards(index);
@@ -373,8 +420,8 @@ Index* ToGpuClonerMultiple::clone_Index(const Index* index) {
         for (int m = 0; m < pq.M; m++) {
             // which GPU(s) will be assigned to this sub-quantizer
-            long i0 = m * n / pq.M;
-            long i1 = pq.M <= n ? (m + 1) * n / pq.M : i0 + 1;
+            idx_t i0 = m * n / pq.M;
+            idx_t i1 = pq.M <= n ? (m + 1) * n / pq.M : i0 + 1;
             std::vector<ToGpuCloner> sub_cloners_2;
             sub_cloners_2.insert(
                     sub_cloners_2.begin(),

data/vendor/faiss/faiss/gpu/GpuCloner.h CHANGED Viewed

@@ -55,8 +55,8 @@ struct ToGpuClonerMultiple : faiss::Cloner, GpuMultipleClonerOptions {
     void copy_ivf_shard(
             const IndexIVF* index_ivf,
             IndexIVF* idx2,
-            long n,
-            long i);
+            idx_t n,
+            idx_t i);
     Index* clone_Index_to_shards(const Index* index);

data/vendor/faiss/faiss/gpu/GpuClonerOptions.h CHANGED Viewed

@@ -14,41 +14,42 @@ namespace gpu {
 /// set some options on how to copy to GPU
 struct GpuClonerOptions {
-    GpuClonerOptions();
     /// how should indices be stored on index types that support indices
     /// (anything but GpuIndexFlat*)?
-    IndicesOptions indicesOptions;
+    IndicesOptions indicesOptions = INDICES_64_BIT;
     /// is the coarse quantizer in float16?
-    bool useFloat16CoarseQuantizer;
+    bool useFloat16CoarseQuantizer = false;
     /// for GpuIndexIVFFlat, is storage in float16?
     /// for GpuIndexIVFPQ, are intermediate calculations in float16?
-    bool useFloat16;
+    bool useFloat16 = false;
     /// use precomputed tables?
-    bool usePrecomputed;
+    bool usePrecomputed = false;
     /// reserve vectors in the invfiles?
-    long reserveVecs;
+    long reserveVecs = 0;
     /// For GpuIndexFlat, store data in transposed layout?
-    bool storeTransposed;
+    bool storeTransposed = false;
     /// Set verbose options on the index
-    bool verbose;
+    bool verbose = false;
 };
 struct GpuMultipleClonerOptions : public GpuClonerOptions {
-    GpuMultipleClonerOptions();
     /// Whether to shard the index across GPUs, versus replication
     /// across GPUs
-    bool shard;
+    bool shard = false;
     /// IndexIVF::copy_subset_to subset type
-    int shard_type;
+    int shard_type = 1;
+    /// set to true if an IndexIVF is to be dispatched to multiple GPUs with a
+    /// single common IVF quantizer, ie. only the inverted lists are sharded on
+    /// the sub-indexes (uses an IndexShardsIVF)
+    bool common_ivf_quantizer = false;
 };
 } // namespace gpu

data/vendor/faiss/faiss/gpu/GpuDistance.h CHANGED Viewed

@@ -45,7 +45,8 @@ struct GpuDistanceParams {
               outDistances(nullptr),
               ignoreOutDistances(false),
               outIndicesType(IndicesDataType::I64),
-              outIndices(nullptr) {}
+              outIndices(nullptr),
+              device(-1) {}
     //
     // Search parameters
@@ -76,7 +77,7 @@ struct GpuDistanceParams {
     const void* vectors;
     DistanceDataType vectorType;
     bool vectorsRowMajor;
-    int numVectors;
+    idx_t numVectors;
     /// Precomputed L2 norms for each vector in `vectors`, which can be
     /// optionally provided in advance to speed computation for METRIC_L2
@@ -93,7 +94,7 @@ struct GpuDistanceParams {
     const void* queries;
     DistanceDataType queryType;
     bool queriesRowMajor;
-    int numQueries;
+    idx_t numQueries;
     //
     // Output results
@@ -112,6 +113,17 @@ struct GpuDistanceParams {
     /// innermost (row major). Not used if k == -1 (all pairwise distances)
     IndicesDataType outIndicesType;
     void* outIndices;
+    //
+    // Execution information
+    //
+    /// On which GPU device should the search run?
+    /// -1 indicates that the current CUDA thread-local device
+    /// (via cudaGetDevice/cudaSetDevice) is used
+    /// Otherwise, an integer 0 <= device < numDevices indicates the device for
+    /// execution
+    int device;
 };
 /// A wrapper for gpu/impl/Distance.cuh to expose direct brute-force k-nearest
@@ -137,13 +149,13 @@ void bruteForceKnn(
         // dims x numVectors, with numVectors innermost
         const float* vectors,
         bool vectorsRowMajor,
-        int numVectors,
+        idx_t numVectors,
         // If queriesRowMajor is true, this is
         // numQueries x dims, with dims innermost; otherwise,
         // dims x numQueries, with numQueries innermost
         const float* queries,
         bool queriesRowMajor,
-        int numQueries,
+        idx_t numQueries,
         int dims,
         int k,
         // A region of memory size numQueries x k, with k
@@ -151,7 +163,7 @@ void bruteForceKnn(
         float* outDistances,
         // A region of memory size numQueries x k, with k
         // innermost (row major)
-        Index::idx_t* outIndices);
+        idx_t* outIndices);
 } // namespace gpu
 } // namespace faiss

data/vendor/faiss/faiss/gpu/GpuIndex.h CHANGED Viewed

@@ -51,30 +51,31 @@ class GpuIndex : public faiss::Index {
     /// `x` can be resident on the CPU or any GPU; copies are performed
     /// as needed
     /// Handles paged adds if the add set is too large; calls addInternal_
-    void add(Index::idx_t, const float* x) override;
+    void add(idx_t, const float* x) override;
     /// `x` and `ids` can be resident on the CPU or any GPU; copies are
     /// performed as needed
     /// Handles paged adds if the add set is too large; calls addInternal_
-    void add_with_ids(Index::idx_t n, const float* x, const Index::idx_t* ids)
-            override;
+    void add_with_ids(idx_t n, const float* x, const idx_t* ids) override;
     /// `x` and `labels` can be resident on the CPU or any GPU; copies are
     /// performed as needed
     void assign(
-            Index::idx_t n,
+            idx_t n,
             const float* x,
-            Index::idx_t* labels,
-            Index::idx_t k = 1) const override;
+            idx_t* labels,
+            // faiss::Index has idx_t for k
+            idx_t k = 1) const override;
     /// `x`, `distances` and `labels` can be resident on the CPU or any
     /// GPU; copies are performed as needed
     void search(
-            Index::idx_t n,
+            idx_t n,
             const float* x,
-            Index::idx_t k,
+            // faiss::Index has idx_t for k
+            idx_t k,
             float* distances,
-            Index::idx_t* labels,
+            idx_t* labels,
             const SearchParameters* params = nullptr) const override;
     /// `x`, `distances` and `labels` and `recons` can be resident on the CPU or
@@ -82,6 +83,7 @@ class GpuIndex : public faiss::Index {
     void search_and_reconstruct(
             idx_t n,
             const float* x,
+            // faiss::Index has idx_t for k
             idx_t k,
             float* distances,
             idx_t* labels,
@@ -90,16 +92,16 @@ class GpuIndex : public faiss::Index {
     /// Overridden to force GPU indices to provide their own GPU-friendly
     /// implementation
-    void compute_residual(const float* x, float* residual, Index::idx_t key)
+    void compute_residual(const float* x, float* residual, idx_t key)
             const override;
     /// Overridden to force GPU indices to provide their own GPU-friendly
     /// implementation
     void compute_residual_n(
-            Index::idx_t n,
+            idx_t n,
             const float* xs,
             float* residuals,
-            const Index::idx_t* keys) const override;
+            const idx_t* keys) const override;
    protected:
     /// Copy what we need from the CPU equivalent
@@ -114,43 +116,43 @@ class GpuIndex : public faiss::Index {
     /// Overridden to actually perform the add
     /// All data is guaranteed to be resident on our device
-    virtual void addImpl_(int n, const float* x, const Index::idx_t* ids) = 0;
+    virtual void addImpl_(idx_t n, const float* x, const idx_t* ids) = 0;
     /// Overridden to actually perform the search
     /// All data is guaranteed to be resident on our device
     virtual void searchImpl_(
-            int n,
+            idx_t n,
             const float* x,
             int k,
             float* distances,
-            Index::idx_t* labels,
+            idx_t* labels,
             const SearchParameters* params) const = 0;
    private:
     /// Handles paged adds if the add set is too large, passes to
     /// addImpl_ to actually perform the add for the current page
-    void addPaged_(int n, const float* x, const Index::idx_t* ids);
+    void addPaged_(idx_t n, const float* x, const idx_t* ids);
     /// Calls addImpl_ for a single page of GPU-resident data
-    void addPage_(int n, const float* x, const Index::idx_t* ids);
+    void addPage_(idx_t n, const float* x, const idx_t* ids);
     /// Calls searchImpl_ for a single page of GPU-resident data
     void searchNonPaged_(
-            int n,
+            idx_t n,
             const float* x,
             int k,
             float* outDistancesData,
-            Index::idx_t* outIndicesData,
+            idx_t* outIndicesData,
             const SearchParameters* params) const;
     /// Calls searchImpl_ for a single page of GPU-resident data,
     /// handling paging of the data and copies from the CPU
     void searchFromCpuPaged_(
-            int n,
+            idx_t n,
             const float* x,
             int k,
             float* outDistancesData,
-            Index::idx_t* outIndicesData,
+            idx_t* outIndicesData,
             const SearchParameters* params) const;
    protected: