RubyGems - faiss - Versions diffs - 0.3.0 → 0.3.1 - Mend

faiss 0.3.0 → 0.3.1

Files changed (171) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/LICENSE.txt +1 -1
data/README.md +1 -1
data/ext/faiss/extconf.rb +9 -2
data/ext/faiss/index.cpp +1 -1
data/ext/faiss/index_binary.cpp +2 -2
data/ext/faiss/product_quantizer.cpp +1 -1
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +7 -7
data/vendor/faiss/faiss/AutoTune.h +0 -1
data/vendor/faiss/faiss/Clustering.cpp +4 -18
data/vendor/faiss/faiss/Clustering.h +31 -21
data/vendor/faiss/faiss/IVFlib.cpp +22 -11
data/vendor/faiss/faiss/Index.cpp +1 -1
data/vendor/faiss/faiss/Index.h +20 -5
data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
data/vendor/faiss/faiss/IndexBinary.h +8 -19
data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +106 -187
data/vendor/faiss/faiss/IndexFastScan.cpp +90 -159
data/vendor/faiss/faiss/IndexFastScan.h +9 -8
data/vendor/faiss/faiss/IndexFlat.cpp +195 -3
data/vendor/faiss/faiss/IndexFlat.h +20 -1
data/vendor/faiss/faiss/IndexFlatCodes.cpp +11 -0
data/vendor/faiss/faiss/IndexFlatCodes.h +3 -1
data/vendor/faiss/faiss/IndexHNSW.cpp +112 -316
data/vendor/faiss/faiss/IndexHNSW.h +12 -48
data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
data/vendor/faiss/faiss/IndexIDMap.h +24 -2
data/vendor/faiss/faiss/IndexIVF.cpp +159 -53
data/vendor/faiss/faiss/IndexIVF.h +37 -5
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +18 -26
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -2
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +433 -405
data/vendor/faiss/faiss/IndexIVFFastScan.h +56 -26
data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
data/vendor/faiss/faiss/IndexIVFPQ.cpp +78 -122
data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +18 -50
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -4
data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
data/vendor/faiss/faiss/IndexNSG.h +10 -10
data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
data/vendor/faiss/faiss/IndexPQ.h +1 -4
data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
data/vendor/faiss/faiss/IndexRefine.cpp +49 -19
data/vendor/faiss/faiss/IndexRefine.h +7 -0
data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +22 -16
data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
data/vendor/faiss/faiss/IndexShards.cpp +21 -29
data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
data/vendor/faiss/faiss/MatrixStats.h +21 -9
data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
data/vendor/faiss/faiss/VectorTransform.h +7 -7
data/vendor/faiss/faiss/clone_index.cpp +15 -10
data/vendor/faiss/faiss/clone_index.h +3 -0
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +87 -4
data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +7 -0
data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +8 -9
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +18 -3
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +117 -17
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +1 -1
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +267 -40
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -2
data/vendor/faiss/faiss/impl/DistanceComputer.h +24 -1
data/vendor/faiss/faiss/impl/FaissException.h +13 -34
data/vendor/faiss/faiss/impl/HNSW.cpp +321 -70
data/vendor/faiss/faiss/impl/HNSW.h +9 -8
data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +3 -1
data/vendor/faiss/faiss/impl/NNDescent.cpp +29 -19
data/vendor/faiss/faiss/impl/NSG.h +1 -1
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +24 -22
data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
data/vendor/faiss/faiss/impl/ResultHandler.h +232 -176
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +444 -104
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -8
data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +280 -42
data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
data/vendor/faiss/faiss/impl/index_read.cpp +45 -19
data/vendor/faiss/faiss/impl/index_write.cpp +60 -41
data/vendor/faiss/faiss/impl/io.cpp +10 -10
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
data/vendor/faiss/faiss/impl/platform_macros.h +18 -1
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +3 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +40 -49
data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
data/vendor/faiss/faiss/impl/simd_result_handlers.h +374 -202
data/vendor/faiss/faiss/index_factory.cpp +10 -7
data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +27 -9
data/vendor/faiss/faiss/invlists/InvertedLists.h +12 -3
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
data/vendor/faiss/faiss/utils/distances.cpp +128 -74
data/vendor/faiss/faiss/utils/distances.h +81 -4
data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
data/vendor/faiss/faiss/utils/distances_simd.cpp +428 -70
data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
data/vendor/faiss/faiss/utils/fp16.h +2 -0
data/vendor/faiss/faiss/utils/hamming.cpp +162 -110
data/vendor/faiss/faiss/utils/hamming.h +58 -0
data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +15 -87
data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +57 -0
data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
data/vendor/faiss/faiss/utils/prefetch.h +77 -0
data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
data/vendor/faiss/faiss/utils/simdlib_neon.h +72 -77
data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
data/vendor/faiss/faiss/utils/sorting.h +27 -0
data/vendor/faiss/faiss/utils/utils.cpp +112 -6
data/vendor/faiss/faiss/utils/utils.h +57 -20
metadata +10 -3

data/vendor/faiss/faiss/VectorTransform.h CHANGED Viewed

@@ -230,18 +230,18 @@ struct ProductQuantizer;
  *
  */
 struct OPQMatrix : LinearTransform {
-    int M;          ///< nb of subquantizers
-    int niter;      ///< Number of outer training iterations
-    int niter_pq;   ///< Number of training iterations for the PQ
-    int niter_pq_0; ///< same, for the first outer iteration
+    int M;               ///< nb of subquantizers
+    int niter = 50;      ///< Number of outer training iterations
+    int niter_pq = 4;    ///< Number of training iterations for the PQ
+    int niter_pq_0 = 40; ///< same, for the first outer iteration
     /// if there are too many training points, resample
-    size_t max_train_points;
-    bool verbose;
+    size_t max_train_points = 256 * 256;
+    bool verbose = false;
     /// if non-NULL, use this product quantizer for training
     /// should be constructed with (d_out, M, _)
-    ProductQuantizer* pq;
+    ProductQuantizer* pq = nullptr;
     /// if d2 != -1, output vectors of this dimension
     explicit OPQMatrix(int d = 0, int M = 1, int d2 = -1);

data/vendor/faiss/faiss/clone_index.cpp CHANGED Viewed

@@ -17,6 +17,8 @@
 #include <faiss/Index2Layer.h>
 #include <faiss/IndexAdditiveQuantizer.h>
 #include <faiss/IndexAdditiveQuantizerFastScan.h>
+#include <faiss/IndexBinary.h>
+#include <faiss/IndexBinaryFlat.h>
 #include <faiss/IndexFlat.h>
 #include <faiss/IndexHNSW.h>
 #include <faiss/IndexIVF.h>
@@ -35,6 +37,7 @@
 #include <faiss/IndexRefine.h>
 #include <faiss/IndexRowwiseMinMax.h>
 #include <faiss/IndexScalarQuantizer.h>
 #include <faiss/MetaIndexes.h>
 #include <faiss/VectorTransform.h>
@@ -60,9 +63,10 @@ Index* clone_index(const Index* index) {
 // assumes there is a copy constructor ready. Always try from most
 // specific to most general. Most indexes don't have complicated
 // structs, the default copy constructor often just works.
-#define TRYCLONE(classname, obj)                                      \
-    if (const classname* clo = dynamic_cast<const classname*>(obj)) { \
-        return new classname(*clo);                                   \
+#define TRYCLONE(classname, obj)                       \
+    if (const classname* clo##classname =              \
+                dynamic_cast<const classname*>(obj)) { \
+        return new classname(*clo##classname);         \
     } else
 VectorTransform* Cloner::clone_VectorTransform(const VectorTransform* vt) {
@@ -234,13 +238,6 @@ Index* clone_AdditiveQuantizerIndex(const Index* index) {
 namespace {
-IndexHNSW* clone_HNSW(const IndexHNSW* ihnsw) {
-    TRYCLONE(IndexHNSWFlat, ihnsw)
-    TRYCLONE(IndexHNSWPQ, ihnsw)
-    TRYCLONE(IndexHNSWSQ, ihnsw)
-    return new IndexHNSW(*ihnsw);
-}
 InvertedLists* clone_InvertedLists(const InvertedLists* invlists) {
     if (auto* ails = dynamic_cast<const ArrayInvertedLists*>(invlists)) {
         return new ArrayInvertedLists(*ails);
@@ -385,4 +382,12 @@ Quantizer* clone_Quantizer(const Quantizer* quant) {
     FAISS_THROW_MSG("Did not recognize quantizer to clone");
 }
+IndexBinary* clone_binary_index(const IndexBinary* index) {
+    if (auto ii = dynamic_cast<const IndexBinaryFlat*>(index)) {
+        return new IndexBinaryFlat(*ii);
+    } else {
+        FAISS_THROW_MSG("cannot clone this type of index");
+    }
+}
 } // namespace faiss

data/vendor/faiss/faiss/clone_index.h CHANGED Viewed

@@ -17,6 +17,7 @@ struct Index;
 struct IndexIVF;
 struct VectorTransform;
 struct Quantizer;
+struct IndexBinary;
 /* cloning functions */
 Index* clone_index(const Index*);
@@ -33,4 +34,6 @@ struct Cloner {
 Quantizer* clone_Quantizer(const Quantizer* quant);
+IndexBinary* clone_binary_index(const IndexBinary* index);
 } // namespace faiss

data/vendor/faiss/faiss/gpu/GpuCloner.cpp CHANGED Viewed

@@ -7,10 +7,12 @@
 #include <faiss/gpu/GpuCloner.h>
 #include <faiss/impl/FaissAssert.h>
+#include <memory>
 #include <typeinfo>
 #include <faiss/gpu/StandardGpuResources.h>
+#include <faiss/IndexBinaryFlat.h>
 #include <faiss/IndexFlat.h>
 #include <faiss/IndexIVF.h>
 #include <faiss/IndexIVFFlat.h>
@@ -21,6 +23,7 @@
 #include <faiss/IndexShardsIVF.h>
 #include <faiss/MetaIndexes.h>
 #include <faiss/gpu/GpuIndex.h>
+#include <faiss/gpu/GpuIndexBinaryFlat.h>
 #include <faiss/gpu/GpuIndexFlat.h>
 #include <faiss/gpu/GpuIndexIVFFlat.h>
 #include <faiss/gpu/GpuIndexIVFPQ.h>
@@ -121,6 +124,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
         GpuIndexFlatConfig config;
         config.device = device;
         config.useFloat16 = useFloat16;
+        config.use_raft = use_raft;
         return new GpuIndexFlat(provider, ifl, config);
     } else if (
             dynamic_cast<const IndexScalarQuantizer*>(index) &&
@@ -129,6 +133,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
         GpuIndexFlatConfig config;
         config.device = device;
         config.useFloat16 = true;
+        FAISS_THROW_IF_NOT_MSG(
+                !use_raft, "this type of index is not implemented for RAFT");
         GpuIndexFlat* gif = new GpuIndexFlat(
                 provider, index->d, index->metric_type, config);
         // transfer data by blocks
@@ -146,6 +152,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
         config.device = device;
         config.indicesOptions = indicesOptions;
         config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
+        config.use_raft = use_raft;
         GpuIndexIVFFlat* res = new GpuIndexIVFFlat(
                 provider, ifl->d, ifl->nlist, ifl->metric_type, config);
@@ -162,6 +169,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
         config.device = device;
         config.indicesOptions = indicesOptions;
         config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
+        FAISS_THROW_IF_NOT_MSG(
+                !use_raft, "this type of index is not implemented for RAFT");
         GpuIndexIVFScalarQuantizer* res = new GpuIndexIVFScalarQuantizer(
                 provider,
@@ -194,6 +203,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
         config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
         config.useFloat16LookupTables = useFloat16;
         config.usePrecomputedTables = usePrecomputed;
+        config.use_raft = use_raft;
+        config.interleavedLayout = use_raft;
         GpuIndexIVFPQ* res = new GpuIndexIVFPQ(provider, ipq, config);
@@ -229,7 +240,7 @@ ToGpuClonerMultiple::ToGpuClonerMultiple(
         : GpuMultipleClonerOptions(options) {
     FAISS_THROW_IF_NOT(provider.size() == devices.size());
     for (size_t i = 0; i < provider.size(); i++) {
-        sub_cloners.push_back(ToGpuCloner(provider[i], devices[i], options));
+        sub_cloners.emplace_back(provider[i], devices[i], options);
     }
 }
@@ -298,8 +309,8 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
             !dynamic_cast<const IndexFlat*>(quantizer)) {
             // then we flatten the coarse quantizer so that everything remains
             // on GPU
-            new_quantizer.reset(
-                    new IndexFlat(quantizer->d, quantizer->metric_type));
+            new_quantizer = std::make_unique<IndexFlat>(
+                    quantizer->d, quantizer->metric_type);
             std::vector<float> centroids(quantizer->d * quantizer->ntotal);
             quantizer->reconstruct_n(0, quantizer->ntotal, centroids.data());
             new_quantizer->add(quantizer->ntotal, centroids.data());
@@ -309,6 +320,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
     std::vector<faiss::Index*> shards(n);
+#pragma omp parallel for
     for (idx_t i = 0; i < n; i++) {
         // make a shallow copy
         if (reserveVecs) {
@@ -321,7 +333,7 @@ Index* ToGpuClonerMultiple::clone_Index_to_shards(const Index* index) {
                     const_cast<Index*>(quantizer),
                     index_ivfpq->d,
                     index_ivfpq->nlist,
-                    index_ivfpq->code_size,
+                    index_ivfpq->pq.M,
                     index_ivfpq->pq.nbits);
             idx2.metric_type = index_ivfpq->metric_type;
             idx2.pq = index_ivfpq->pq;
@@ -473,5 +485,76 @@ Index* GpuProgressiveDimIndexFactory::operator()(int dim) {
     return index_cpu_to_gpu_multiple(vres, devices, &index, &options);
 }
+/*********************************************
+ * Cloning binary indexes
+ *********************************************/
+faiss::IndexBinary* index_binary_gpu_to_cpu(
+        const faiss::IndexBinary* gpu_index) {
+    if (auto ii = dynamic_cast<const GpuIndexBinaryFlat*>(gpu_index)) {
+        IndexBinaryFlat* ret = new IndexBinaryFlat();
+        ii->copyTo(ret);
+        return ret;
+    } else {
+        FAISS_THROW_MSG("cannot clone this type of index");
+    }
+}
+faiss::IndexBinary* index_binary_cpu_to_gpu(
+        GpuResourcesProvider* provider,
+        int device,
+        const faiss::IndexBinary* index,
+        const GpuClonerOptions* options) {
+    if (auto ii = dynamic_cast<const IndexBinaryFlat*>(index)) {
+        GpuIndexBinaryFlatConfig config;
+        config.device = device;
+        if (options) {
+            config.use_raft = options->use_raft;
+        }
+        return new GpuIndexBinaryFlat(provider, ii, config);
+    } else {
+        FAISS_THROW_MSG("cannot clone this type of index");
+    }
+}
+faiss::IndexBinary* index_binary_cpu_to_gpu_multiple(
+        std::vector<GpuResourcesProvider*>& provider,
+        std::vector<int>& devices,
+        const faiss::IndexBinary* index,
+        const GpuMultipleClonerOptions* options) {
+    GpuMultipleClonerOptions defaults;
+    FAISS_THROW_IF_NOT(devices.size() == provider.size());
+    int n = devices.size();
+    if (n == 1) {
+        return index_binary_cpu_to_gpu(provider[0], devices[0], index, options);
+    }
+    if (!options) {
+        options = &defaults;
+    }
+    if (options->shard) {
+        auto* fi = dynamic_cast<const IndexBinaryFlat*>(index);
+        FAISS_THROW_IF_NOT_MSG(fi, "only flat index cloning supported");
+        IndexBinaryShards* ret = new IndexBinaryShards(true, true);
+        for (int i = 0; i < n; i++) {
+            IndexBinaryFlat fig(fi->d);
+            size_t i0 = i * fi->ntotal / n;
+            size_t i1 = (i + 1) * fi->ntotal / n;
+            fig.add(i1 - i0, fi->xb.data() + i0 * fi->code_size);
+            ret->addIndex(index_binary_cpu_to_gpu(
+                    provider[i], devices[i], &fig, options));
+        }
+        ret->own_indices = true;
+        return ret;
+    } else { // replicas
+        IndexBinaryReplicas* ret = new IndexBinaryReplicas(true);
+        for (int i = 0; i < n; i++) {
+            ret->addIndex(index_binary_cpu_to_gpu(
+                    provider[i], devices[i], index, options));
+        }
+        ret->own_indices = true;
+        return ret;
+    }
+}
 } // namespace gpu
 } // namespace faiss

data/vendor/faiss/faiss/gpu/GpuCloner.h CHANGED Viewed

@@ -11,10 +11,12 @@
 #include <faiss/Clustering.h>
 #include <faiss/Index.h>
+#include <faiss/IndexBinary.h>
 #include <faiss/clone_index.h>
 #include <faiss/gpu/GpuClonerOptions.h>
 #include <faiss/gpu/GpuIndex.h>
 #include <faiss/gpu/GpuIndicesOptions.h>
 namespace faiss {
 namespace gpu {
@@ -95,5 +97,25 @@ struct GpuProgressiveDimIndexFactory : ProgressiveDimIndexFactory {
     virtual ~GpuProgressiveDimIndexFactory() override;
 };
+/*********************************************
+ * Cloning binary indexes
+ *********************************************/
+faiss::IndexBinary* index_binary_gpu_to_cpu(
+        const faiss::IndexBinary* gpu_index);
+/// converts any CPU index that can be converted to GPU
+faiss::IndexBinary* index_binary_cpu_to_gpu(
+        GpuResourcesProvider* provider,
+        int device,
+        const faiss::IndexBinary* index,
+        const GpuClonerOptions* options = nullptr);
+faiss::IndexBinary* index_binary_cpu_to_gpu_multiple(
+        std::vector<GpuResourcesProvider*>& provider,
+        std::vector<int>& devices,
+        const faiss::IndexBinary* index,
+        const GpuMultipleClonerOptions* options = nullptr);
 } // namespace gpu
 } // namespace faiss

data/vendor/faiss/faiss/gpu/GpuClonerOptions.h CHANGED Viewed

@@ -36,6 +36,13 @@ struct GpuClonerOptions {
     /// Set verbose options on the index
     bool verbose = false;
+    /// use the RAFT implementation
+#if defined USE_NVIDIA_RAFT
+    bool use_raft = true;
+#else
+    bool use_raft = false;
+#endif
 };
 struct GpuMultipleClonerOptions : public GpuClonerOptions {

data/vendor/faiss/faiss/gpu/GpuDistance.h CHANGED Viewed

@@ -9,6 +9,7 @@
 #include <faiss/Index.h>
+#pragma GCC visibility push(default)
 namespace faiss {
 namespace gpu {
@@ -28,44 +29,24 @@ enum class IndicesDataType {
 /// Arguments to brute-force GPU k-nearest neighbor searching
 struct GpuDistanceParams {
-    GpuDistanceParams()
-            : metric(faiss::MetricType::METRIC_L2),
-              metricArg(0),
-              k(0),
-              dims(0),
-              vectors(nullptr),
-              vectorType(DistanceDataType::F32),
-              vectorsRowMajor(true),
-              numVectors(0),
-              vectorNorms(nullptr),
-              queries(nullptr),
-              queryType(DistanceDataType::F32),
-              queriesRowMajor(true),
-              numQueries(0),
-              outDistances(nullptr),
-              ignoreOutDistances(false),
-              outIndicesType(IndicesDataType::I64),
-              outIndices(nullptr),
-              device(-1) {}
     //
     // Search parameters
     //
     /// Search parameter: distance metric
-    faiss::MetricType metric;
+    faiss::MetricType metric = METRIC_L2;
     /// Search parameter: distance metric argument (if applicable)
     /// For metric == METRIC_Lp, this is the p-value
-    float metricArg;
+    float metricArg = 0;
     /// Search parameter: return k nearest neighbors
     /// If the value provided is -1, then we report all pairwise distances
     /// without top-k filtering
-    int k;
+    int k = 0;
     /// Vector dimensionality
-    int dims;
+    int dims = 0;
     //
     // Vectors being queried
@@ -74,14 +55,14 @@ struct GpuDistanceParams {
     /// If vectorsRowMajor is true, this is
     /// numVectors x dims, with dims innermost; otherwise,
     /// dims x numVectors, with numVectors innermost
-    const void* vectors;
-    DistanceDataType vectorType;
-    bool vectorsRowMajor;
-    idx_t numVectors;
+    const void* vectors = nullptr;
+    DistanceDataType vectorType = DistanceDataType::F32;
+    bool vectorsRowMajor = true;
+    idx_t numVectors = 0;
     /// Precomputed L2 norms for each vector in `vectors`, which can be
     /// optionally provided in advance to speed computation for METRIC_L2
-    const float* vectorNorms;
+    const float* vectorNorms = nullptr;
     //
     // The query vectors (i.e., find k-nearest neighbors in `vectors` for each
@@ -91,10 +72,10 @@ struct GpuDistanceParams {
     /// If queriesRowMajor is true, this is
     /// numQueries x dims, with dims innermost; otherwise,
     /// dims x numQueries, with numQueries innermost
-    const void* queries;
-    DistanceDataType queryType;
-    bool queriesRowMajor;
-    idx_t numQueries;
+    const void* queries = nullptr;
+    DistanceDataType queryType = DistanceDataType::F32;
+    bool queriesRowMajor = true;
+    idx_t numQueries = 0;
     //
     // Output results
@@ -103,16 +84,16 @@ struct GpuDistanceParams {
     /// A region of memory size numQueries x k, with k
     /// innermost (row major) if k > 0, or if k == -1, a region of memory of
     /// size numQueries x numVectors
-    float* outDistances;
+    float* outDistances = nullptr;
     /// Do we only care about the indices reported, rather than the output
     /// distances? Not used if k == -1 (all pairwise distances)
-    bool ignoreOutDistances;
+    bool ignoreOutDistances = false;
     /// A region of memory size numQueries x k, with k
     /// innermost (row major). Not used if k == -1 (all pairwise distances)
-    IndicesDataType outIndicesType;
-    void* outIndices;
+    IndicesDataType outIndicesType = IndicesDataType::I64;
+    void* outIndices = nullptr;
     //
     // Execution information
@@ -123,9 +104,17 @@ struct GpuDistanceParams {
     /// (via cudaGetDevice/cudaSetDevice) is used
     /// Otherwise, an integer 0 <= device < numDevices indicates the device for
     /// execution
-    int device;
+    int device = -1;
+    /// Should the index dispatch down to RAFT?
+    /// TODO: change default to true if RAFT is enabled
+    bool use_raft = false;
 };
+/// A function that determines whether RAFT should be used based on various
+/// conditions (such as unsupported architecture)
+bool should_use_raft(GpuDistanceParams args);
 /// A wrapper for gpu/impl/Distance.cuh to expose direct brute-force k-nearest
 /// neighbor searches on an externally-provided region of memory (e.g., from a
 /// pytorch tensor).
@@ -140,6 +129,24 @@ struct GpuDistanceParams {
 /// nearest neighbors with respect to the given metric
 void bfKnn(GpuResourcesProvider* resources, const GpuDistanceParams& args);
+// bfKnn which takes two extra parameters to control the maximum GPU
+// memory allowed for vectors and queries, the latter including the
+// memory required for the results.
+// If 0, the corresponding input must fit into GPU memory.
+// If greater than 0, the function will use at most this much GPU
+// memory (in bytes) for vectors and queries respectively.
+// Vectors are broken up into chunks of size vectorsMemoryLimit,
+// and queries are broken up into chunks of size queriesMemoryLimit.
+// The tiles resulting from the product of the query and vector
+// chunks are processed sequentially on the GPU.
+// Only supported for row major matrices and k > 0. The input that
+// needs sharding must reside on the CPU.
+void bfKnn_tiling(
+        GpuResourcesProvider* resources,
+        const GpuDistanceParams& args,
+        size_t vectorsMemoryLimit,
+        size_t queriesMemoryLimit);
 /// Deprecated legacy implementation
 void bruteForceKnn(
         GpuResourcesProvider* resources,
@@ -167,3 +174,4 @@ void bruteForceKnn(
 } // namespace gpu
 } // namespace faiss
+#pragma GCC visibility pop

data/vendor/faiss/faiss/gpu/GpuIndex.h CHANGED Viewed

@@ -4,6 +4,21 @@
  * This source code is licensed under the MIT license found in the
  * LICENSE file in the root directory of this source tree.
  */
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 #pragma once
@@ -14,17 +29,26 @@ namespace faiss {
 namespace gpu {
 struct GpuIndexConfig {
-    inline GpuIndexConfig() : device(0), memorySpace(MemorySpace::Device) {}
     /// GPU device on which the index is resident
-    int device;
+    int device = 0;
     /// What memory space to use for primary storage.
     /// On Pascal and above (CC 6+) architectures, allows GPUs to use
     /// more memory than is available on the GPU.
-    MemorySpace memorySpace;
+    MemorySpace memorySpace = MemorySpace::Device;
+    /// Should the index dispatch down to RAFT?
+#if defined USE_NVIDIA_RAFT
+    bool use_raft = true;
+#else
+    bool use_raft = false;
+#endif
 };
+/// A centralized function that determines whether RAFT should
+/// be used based on various conditions (such as unsupported architecture)
+bool should_use_raft(GpuIndexConfig config_);
 class GpuIndex : public faiss::Index {
    public:
     GpuIndex(

data/vendor/faiss/faiss/gpu/GpuIndexFlat.h CHANGED Viewed

@@ -24,15 +24,13 @@ namespace gpu {
 class FlatIndex;
 struct GpuIndexFlatConfig : public GpuIndexConfig {
-    inline GpuIndexFlatConfig() : useFloat16(false) {}
     /// Whether or not data is stored as float16
-    bool useFloat16;
+    bool ALIGNED(8) useFloat16 = false;
     /// Deprecated: no longer used
     /// Previously used to indicate whether internal storage of vectors is
     /// transposed
-    bool storeTransposed;
+    bool storeTransposed = false;
 };
 /// Wrapper around the GPU implementation that looks like
@@ -115,6 +113,8 @@ class GpuIndexFlat : public GpuIndex {
     }
    protected:
+    void resetIndex_(int dims);
     /// Flat index does not require IDs as there is no storage available for
     /// them
     bool addImplRequiresIDs_() const override;

data/vendor/faiss/faiss/gpu/GpuIndexIVF.h CHANGED Viewed

@@ -21,10 +21,8 @@ class GpuIndexFlat;
 class IVFBase;
 struct GpuIndexIVFConfig : public GpuIndexConfig {
-    inline GpuIndexIVFConfig() : indicesOptions(INDICES_64_BIT) {}
     /// Index storage options for the GPU
-    IndicesOptions indicesOptions;
+    IndicesOptions indicesOptions = INDICES_64_BIT;
     /// Configuration for the coarse quantizer object
     GpuIndexFlatConfig flatConfig;
@@ -75,10 +73,10 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
     virtual void updateQuantizer() = 0;
     /// Returns the number of inverted lists we're managing
-    idx_t getNumLists() const;
+    virtual idx_t getNumLists() const;
     /// Returns the number of vectors present in a particular inverted list
-    idx_t getListLength(idx_t listId) const;
+    virtual idx_t getListLength(idx_t listId) const;
     /// Return the encoded vector data contained in a particular inverted list,
     /// for debugging purposes.
@@ -86,12 +84,13 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
     /// GPU-side representation.
     /// Otherwise, it is converted to the CPU format.
     /// compliant format, while the native GPU format may differ.
-    std::vector<uint8_t> getListVectorData(idx_t listId, bool gpuFormat = false)
-            const;
+    virtual std::vector<uint8_t> getListVectorData(
+            idx_t listId,
+            bool gpuFormat = false) const;
     /// Return the vector indices contained in a particular inverted list, for
     /// debugging purposes.
-    std::vector<idx_t> getListIndices(idx_t listId) const;
+    virtual std::vector<idx_t> getListIndices(idx_t listId) const;
     void search_preassigned(
             idx_t n,
@@ -123,7 +122,7 @@ class GpuIndexIVF : public GpuIndex, public IndexIVFInterface {
     int getCurrentNProbe_(const SearchParameters* params) const;
     void verifyIVFSettings_() const;
     bool addImplRequiresIDs_() const override;
-    void trainQuantizer_(idx_t n, const float* x);
+    virtual void trainQuantizer_(idx_t n, const float* x);
     /// Called from GpuIndex for add/add_with_ids
     void addImpl_(idx_t n, const float* x, const idx_t* ids) override;

data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h CHANGED Viewed

@@ -8,6 +8,8 @@
 #pragma once
 #include <faiss/gpu/GpuIndexIVF.h>
+#include <faiss/impl/ScalarQuantizer.h>
 #include <memory>
 namespace faiss {
@@ -21,11 +23,9 @@ class IVFFlat;
 class GpuIndexFlat;
 struct GpuIndexIVFFlatConfig : public GpuIndexIVFConfig {
-    inline GpuIndexIVFFlatConfig() : interleavedLayout(true) {}
     /// Use the alternative memory layout for the IVF lists
     /// (currently the default)
-    bool interleavedLayout;
+    bool interleavedLayout = true;
 };
 /// Wrapper around the GPU implementation that looks like
@@ -87,6 +87,21 @@ class GpuIndexIVFFlat : public GpuIndexIVF {
     /// Trains the coarse quantizer based on the given vector data
     void train(idx_t n, const float* x) override;
+   protected:
+    /// Initialize appropriate index
+    void setIndex_(
+            GpuResources* resources,
+            int dim,
+            int nlist,
+            faiss::MetricType metric,
+            float metricArg,
+            bool useResidual,
+            /// Optional ScalarQuantizer
+            faiss::ScalarQuantizer* scalarQ,
+            bool interleavedLayout,
+            IndicesOptions indicesOptions,
+            MemorySpace space);
    protected:
     /// Our configuration options
     const GpuIndexIVFFlatConfig ivfFlatConfig_;