RubyGems - faiss - Versions diffs - 0.4.1 → 0.4.2 - Mend

faiss 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (106) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +39 -29
data/vendor/faiss/faiss/Clustering.cpp +4 -2
data/vendor/faiss/faiss/IVFlib.cpp +14 -7
data/vendor/faiss/faiss/Index.h +72 -3
data/vendor/faiss/faiss/Index2Layer.cpp +2 -4
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +0 -1
data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +1 -0
data/vendor/faiss/faiss/IndexBinary.h +46 -3
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +118 -4
data/vendor/faiss/faiss/IndexBinaryHNSW.h +41 -0
data/vendor/faiss/faiss/IndexBinaryHash.cpp +0 -1
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +18 -7
data/vendor/faiss/faiss/IndexBinaryIVF.h +5 -1
data/vendor/faiss/faiss/IndexFlat.cpp +6 -4
data/vendor/faiss/faiss/IndexHNSW.cpp +65 -24
data/vendor/faiss/faiss/IndexHNSW.h +10 -1
data/vendor/faiss/faiss/IndexIDMap.cpp +96 -18
data/vendor/faiss/faiss/IndexIDMap.h +20 -0
data/vendor/faiss/faiss/IndexIVF.cpp +28 -10
data/vendor/faiss/faiss/IndexIVF.h +16 -1
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -16
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +18 -6
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +33 -21
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +16 -6
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +24 -15
data/vendor/faiss/faiss/IndexIVFFastScan.h +4 -2
data/vendor/faiss/faiss/IndexIVFFlat.cpp +59 -43
data/vendor/faiss/faiss/IndexIVFFlat.h +10 -2
data/vendor/faiss/faiss/IndexIVFPQ.cpp +16 -3
data/vendor/faiss/faiss/IndexIVFPQ.h +8 -1
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +14 -6
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +2 -1
data/vendor/faiss/faiss/IndexIVFPQR.cpp +14 -4
data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +28 -3
data/vendor/faiss/faiss/IndexIVFRaBitQ.h +8 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +9 -2
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
data/vendor/faiss/faiss/IndexLattice.cpp +8 -4
data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -7
data/vendor/faiss/faiss/IndexNSG.cpp +3 -3
data/vendor/faiss/faiss/IndexPQ.cpp +0 -1
data/vendor/faiss/faiss/IndexPQ.h +1 -0
data/vendor/faiss/faiss/IndexPQFastScan.cpp +0 -2
data/vendor/faiss/faiss/IndexPreTransform.cpp +4 -2
data/vendor/faiss/faiss/IndexRefine.cpp +11 -6
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +16 -4
data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -3
data/vendor/faiss/faiss/IndexShards.cpp +7 -6
data/vendor/faiss/faiss/MatrixStats.cpp +16 -8
data/vendor/faiss/faiss/MetaIndexes.cpp +12 -6
data/vendor/faiss/faiss/MetricType.h +5 -3
data/vendor/faiss/faiss/clone_index.cpp +2 -4
data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +6 -0
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +9 -4
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +32 -10
data/vendor/faiss/faiss/gpu/GpuIndex.h +88 -0
data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +125 -0
data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +39 -4
data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +3 -3
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -1
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +3 -2
data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +41 -0
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +6 -3
data/vendor/faiss/faiss/impl/HNSW.cpp +34 -19
data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -1
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +2 -3
data/vendor/faiss/faiss/impl/NNDescent.cpp +17 -9
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +42 -21
data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +6 -24
data/vendor/faiss/faiss/impl/ResultHandler.h +56 -47
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +28 -15
data/vendor/faiss/faiss/impl/index_read.cpp +36 -11
data/vendor/faiss/faiss/impl/index_write.cpp +19 -6
data/vendor/faiss/faiss/impl/io.cpp +9 -5
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +18 -11
data/vendor/faiss/faiss/impl/mapped_io.cpp +4 -7
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +0 -1
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +0 -1
data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +6 -6
data/vendor/faiss/faiss/impl/zerocopy_io.cpp +1 -1
data/vendor/faiss/faiss/impl/zerocopy_io.h +2 -2
data/vendor/faiss/faiss/index_factory.cpp +49 -33
data/vendor/faiss/faiss/index_factory.h +8 -2
data/vendor/faiss/faiss/index_io.h +0 -3
data/vendor/faiss/faiss/invlists/DirectMap.cpp +2 -1
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +12 -6
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +8 -4
data/vendor/faiss/faiss/utils/Heap.cpp +15 -8
data/vendor/faiss/faiss/utils/Heap.h +23 -12
data/vendor/faiss/faiss/utils/distances.cpp +42 -21
data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +1 -1
data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -3
data/vendor/faiss/faiss/utils/extra_distances-inl.h +27 -4
data/vendor/faiss/faiss/utils/extra_distances.cpp +8 -4
data/vendor/faiss/faiss/utils/hamming.cpp +20 -10
data/vendor/faiss/faiss/utils/partitioning.cpp +8 -4
data/vendor/faiss/faiss/utils/quantize_lut.cpp +17 -9
data/vendor/faiss/faiss/utils/rabitq_simd.h +539 -0
data/vendor/faiss/faiss/utils/random.cpp +14 -7
data/vendor/faiss/faiss/utils/utils.cpp +0 -3
metadata +5 -2

data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp CHANGED Viewed

@@ -18,7 +18,6 @@
 #include <faiss/impl/pq4_fast_scan.h>
 #include <faiss/invlists/BlockInvertedLists.h>
 #include <faiss/utils/distances.h>
-#include <faiss/utils/hamming.h>
 #include <faiss/utils/quantize_lut.h>
 #include <faiss/utils/utils.h>
@@ -34,10 +33,11 @@ IndexIVFAdditiveQuantizerFastScan::IndexIVFAdditiveQuantizerFastScan(
         size_t d,
         size_t nlist,
         MetricType metric,
-        int bbs)
-        : IndexIVFFastScan(quantizer, d, nlist, 0, metric) {
+        int bbs,
+        bool own_invlists)
+        : IndexIVFFastScan(quantizer, d, nlist, 0, metric, own_invlists) {
     if (aq != nullptr) {
-        init(aq, nlist, metric, bbs);
+        init(aq, nlist, metric, bbs, own_invlists);
     }
 }
@@ -45,7 +45,8 @@ void IndexIVFAdditiveQuantizerFastScan::init(
         AdditiveQuantizer* aq,
         size_t nlist,
         MetricType metric,
-        int bbs) {
+        int bbs,
+        bool own_invlists) {
     FAISS_THROW_IF_NOT(aq != nullptr);
     FAISS_THROW_IF_NOT(!aq->nbits.empty());
     FAISS_THROW_IF_NOT(aq->nbits[0] == 4);
@@ -66,7 +67,7 @@ void IndexIVFAdditiveQuantizerFastScan::init(
     } else {
         M = aq->M;
     }
-    init_fastscan(aq, M, 4, nlist, metric, bbs);
+    init_fastscan(aq, M, 4, nlist, metric, bbs, own_invlists);
     max_train_points = 1024 * ksub * M;
     by_residual = true;
@@ -80,17 +81,20 @@ IndexIVFAdditiveQuantizerFastScan::IndexIVFAdditiveQuantizerFastScan(
                   orig.d,
                   orig.nlist,
                   0,
-                  orig.metric_type),
+                  orig.metric_type,
+                  orig.own_invlists),
           aq(orig.aq) {
     FAISS_THROW_IF_NOT(
             metric_type == METRIC_INNER_PRODUCT || !orig.by_residual);
-    init(aq, nlist, metric_type, bbs);
+    init(aq, nlist, metric_type, bbs, own_invlists);
     is_trained = orig.is_trained;
     ntotal = orig.ntotal;
     nprobe = orig.nprobe;
+    if (!orig.own_invlists) {
+        return; // skip packing codes below
+    }
     for (size_t i = 0; i < nlist; i++) {
         size_t nb = orig.invlists->list_size(i);
         size_t nb2 = roundup(nb, bbs);
@@ -448,17 +452,19 @@ IndexIVFLocalSearchQuantizerFastScan::IndexIVFLocalSearchQuantizerFastScan(
         size_t nbits,
         MetricType metric,
         Search_type_t search_type,
-        int bbs)
+        int bbs,
+        bool own_invlists)
         : IndexIVFAdditiveQuantizerFastScan(
                   quantizer,
                   nullptr,
                   d,
                   nlist,
                   metric,
-                  bbs),
+                  bbs,
+                  own_invlists),
           lsq(d, M, nbits, search_type) {
     FAISS_THROW_IF_NOT(nbits == 4);
-    init(&lsq, nlist, metric, bbs);
+    init(&lsq, nlist, metric, bbs, own_invlists);
 }
 IndexIVFLocalSearchQuantizerFastScan::IndexIVFLocalSearchQuantizerFastScan() {
@@ -474,17 +480,19 @@ IndexIVFResidualQuantizerFastScan::IndexIVFResidualQuantizerFastScan(
         size_t nbits,
         MetricType metric,
         Search_type_t search_type,
-        int bbs)
+        int bbs,
+        bool own_invlists)
         : IndexIVFAdditiveQuantizerFastScan(
                   quantizer,
                   nullptr,
                   d,
                   nlist,
                   metric,
-                  bbs),
+                  bbs,
+                  own_invlists),
           rq(d, M, nbits, search_type) {
     FAISS_THROW_IF_NOT(nbits == 4);
-    init(&rq, nlist, metric, bbs);
+    init(&rq, nlist, metric, bbs, own_invlists);
 }
 IndexIVFResidualQuantizerFastScan::IndexIVFResidualQuantizerFastScan() {
@@ -502,17 +510,19 @@ IndexIVFProductLocalSearchQuantizerFastScan::
                 size_t nbits,
                 MetricType metric,
                 Search_type_t search_type,
-                int bbs)
+                int bbs,
+                bool own_invlists)
         : IndexIVFAdditiveQuantizerFastScan(
                   quantizer,
                   nullptr,
                   d,
                   nlist,
                   metric,
-                  bbs),
+                  bbs,
+                  own_invlists),
           plsq(d, nsplits, Msub, nbits, search_type) {
     FAISS_THROW_IF_NOT(nbits == 4);
-    init(&plsq, nlist, metric, bbs);
+    init(&plsq, nlist, metric, bbs, own_invlists);
 }
 IndexIVFProductLocalSearchQuantizerFastScan::
@@ -531,17 +541,19 @@ IndexIVFProductResidualQuantizerFastScan::
                 size_t nbits,
                 MetricType metric,
                 Search_type_t search_type,
-                int bbs)
+                int bbs,
+                bool own_invlists)
         : IndexIVFAdditiveQuantizerFastScan(
                   quantizer,
                   nullptr,
                   d,
                   nlist,
                   metric,
-                  bbs),
+                  bbs,
+                  own_invlists),
           prq(d, nsplits, Msub, nbits, search_type) {
     FAISS_THROW_IF_NOT(nbits == 4);
-    init(&prq, nlist, metric, bbs);
+    init(&prq, nlist, metric, bbs, own_invlists);
 }
 IndexIVFProductResidualQuantizerFastScan::

data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h CHANGED Viewed

@@ -50,9 +50,15 @@ struct IndexIVFAdditiveQuantizerFastScan : IndexIVFFastScan {
             size_t d,
             size_t nlist,
             MetricType metric = METRIC_L2,
-            int bbs = 32);
+            int bbs = 32,
+            bool own_invlists = true);
-    void init(AdditiveQuantizer* aq, size_t nlist, MetricType metric, int bbs);
+    void init(
+            AdditiveQuantizer* aq,
+            size_t nlist,
+            MetricType metric,
+            int bbs,
+            bool own_invlists);
     IndexIVFAdditiveQuantizerFastScan();
@@ -110,7 +116,8 @@ struct IndexIVFLocalSearchQuantizerFastScan
             size_t nbits,
             MetricType metric = METRIC_L2,
             Search_type_t search_type = AdditiveQuantizer::ST_norm_lsq2x4,
-            int bbs = 32);
+            int bbs = 32,
+            bool own_invlists = true);
     IndexIVFLocalSearchQuantizerFastScan();
 };
@@ -126,7 +133,8 @@ struct IndexIVFResidualQuantizerFastScan : IndexIVFAdditiveQuantizerFastScan {
             size_t nbits,
             MetricType metric = METRIC_L2,
             Search_type_t search_type = AdditiveQuantizer::ST_norm_lsq2x4,
-            int bbs = 32);
+            int bbs = 32,
+            bool own_invlists = true);
     IndexIVFResidualQuantizerFastScan();
 };
@@ -144,7 +152,8 @@ struct IndexIVFProductLocalSearchQuantizerFastScan
             size_t nbits,
             MetricType metric = METRIC_L2,
             Search_type_t search_type = AdditiveQuantizer::ST_norm_lsq2x4,
-            int bbs = 32);
+            int bbs = 32,
+            bool own_invlists = true);
     IndexIVFProductLocalSearchQuantizerFastScan();
 };
@@ -162,7 +171,8 @@ struct IndexIVFProductResidualQuantizerFastScan
             size_t nbits,
             MetricType metric = METRIC_L2,
             Search_type_t search_type = AdditiveQuantizer::ST_norm_lsq2x4,
-            int bbs = 32);
+            int bbs = 32,
+            bool own_invlists = true);
     IndexIVFProductResidualQuantizerFastScan();
 };

data/vendor/faiss/faiss/IndexIVFFastScan.cpp CHANGED Viewed

@@ -8,7 +8,6 @@
 #include <faiss/IndexIVFFastScan.h>
 #include <cassert>
-#include <cinttypes>
 #include <cstdio>
 #include <set>
@@ -40,8 +39,9 @@ IndexIVFFastScan::IndexIVFFastScan(
         size_t d,
         size_t nlist,
         size_t code_size,
-        MetricType metric)
-        : IndexIVF(quantizer, d, nlist, code_size, metric) {
+        MetricType metric,
+        bool own_invlists)
+        : IndexIVF(quantizer, d, nlist, code_size, metric, own_invlists) {
     // unlike other indexes, we prefer no residuals for performance reasons.
     by_residual = false;
     FAISS_THROW_IF_NOT(metric == METRIC_L2 || metric == METRIC_INNER_PRODUCT);
@@ -60,7 +60,8 @@ void IndexIVFFastScan::init_fastscan(
         size_t nbits_init,
         size_t nlist,
         MetricType /* metric */,
-        int bbs_2) {
+        int bbs_2,
+        bool own_invlists) {
     FAISS_THROW_IF_NOT(bbs_2 % 32 == 0);
     FAISS_THROW_IF_NOT(nbits_init == 4);
     FAISS_THROW_IF_NOT(fine_quantizer->d == d);
@@ -75,7 +76,9 @@ void IndexIVFFastScan::init_fastscan(
     FAISS_THROW_IF_NOT(code_size == fine_quantizer->code_size);
     is_trained = false;
-    replace_invlists(new BlockInvertedLists(nlist, get_CodePacker()), true);
+    if (own_invlists) {
+        replace_invlists(new BlockInvertedLists(nlist, get_CodePacker()), true);
+    }
 }
 void IndexIVFFastScan::init_code_packer() {
@@ -793,11 +796,13 @@ void IndexIVFFastScan::search_implem_1(
                 LUT = dis_tables.get() + (i * nprobe + j) * dim12;
             }
             idx_t list_no = cq.ids[i * nprobe + j];
-            if (list_no < 0)
+            if (list_no < 0) {
                 continue;
+            }
             size_t ls = orig_invlists->list_size(list_no);
-            if (ls == 0)
+            if (ls == 0) {
                 continue;
+            }
             InvertedLists::ScopedCodes codes(orig_invlists, list_no);
             InvertedLists::ScopedIds ids(orig_invlists, list_no);
@@ -815,7 +820,7 @@ void IndexIVFFastScan::search_implem_1(
                     heap_ids,
                     scaler);
             nlist_visited++;
-            ndis++;
+            ndis += ls;
         }
         heap_reorder<C>(k, heap_dis, heap_ids);
     }
@@ -864,11 +869,13 @@ void IndexIVFFastScan::search_implem_2(
                 LUT = dis_tables.get() + (i * nprobe + j) * dim12;
             }
             idx_t list_no = cq.ids[i * nprobe + j];
-            if (list_no < 0)
+            if (list_no < 0) {
                 continue;
+            }
             size_t ls = orig_invlists->list_size(list_no);
-            if (ls == 0)
+            if (ls == 0) {
                 continue;
+            }
             InvertedLists::ScopedCodes codes(orig_invlists, list_no);
             InvertedLists::ScopedIds ids(orig_invlists, list_no);
@@ -926,7 +933,7 @@ void IndexIVFFastScan::search_implem_10(
     bool single_LUT = !lookup_table_is_3d();
-    size_t ndis = 0;
+    size_t ndis = 0, nlist_visited = 0;
     int qmap1[1];
     handler.q_map = qmap1;
@@ -974,13 +981,14 @@ void IndexIVFFastScan::search_implem_10(
                     handler,
                     scaler);
-            ndis++;
+            ndis += ls;
+            nlist_visited++;
         }
     }
     handler.end();
     *ndis_out = ndis;
-    *nlist_out = nlist;
+    *nlist_out = nlist_visited;
 }
 void IndexIVFFastScan::search_implem_12(
@@ -1040,7 +1048,7 @@ void IndexIVFFastScan::search_implem_12(
         handler.dbias = tmp_bias.data();
     }
-    size_t ndis = 0;
+    size_t ndis = 0, nlist_visited = 0;
     size_t i0 = 0;
     uint64_t t_copy_pack = 0, t_scan = 0;
@@ -1062,6 +1070,7 @@ void IndexIVFFastScan::search_implem_12(
             i0 = i1;
             continue;
         }
+        nlist_visited++;
         // re-organize LUTs and biases into the right order
         int nc = i1 - i0;
@@ -1120,7 +1129,7 @@ void IndexIVFFastScan::search_implem_12(
     IVFFastScan_stats.t_scan += t_scan;
     *ndis_out = ndis;
-    *nlist_out = nlist;
+    *nlist_out = nlist_visited;
 }
 void IndexIVFFastScan::search_implem_14(

data/vendor/faiss/faiss/IndexIVFFastScan.h CHANGED Viewed

@@ -68,7 +68,8 @@ struct IndexIVFFastScan : IndexIVF {
             size_t d,
             size_t nlist,
             size_t code_size,
-            MetricType metric = METRIC_L2);
+            MetricType metric = METRIC_L2,
+            bool own_invlists = true);
     IndexIVFFastScan();
@@ -79,7 +80,8 @@ struct IndexIVFFastScan : IndexIVF {
             size_t nbits,
             size_t nlist,
             MetricType metric,
-            int bbs);
+            int bbs,
+            bool own_invlists);
     // initialize the CodePacker in the InvertedLists
     void init_code_packer();

data/vendor/faiss/faiss/IndexIVFFlat.cpp CHANGED Viewed

@@ -21,6 +21,7 @@
 #include <faiss/impl/FaissAssert.h>
 #include <faiss/utils/distances.h>
+#include <faiss/utils/extra_distances.h>
 #include <faiss/utils/utils.h>
 namespace faiss {
@@ -33,8 +34,15 @@ IndexIVFFlat::IndexIVFFlat(
         Index* quantizer,
         size_t d,
         size_t nlist,
-        MetricType metric)
-        : IndexIVF(quantizer, d, nlist, sizeof(float) * d, metric) {
+        MetricType metric,
+        bool own_invlists)
+        : IndexIVF(
+                  quantizer,
+                  d,
+                  nlist,
+                  sizeof(float) * d,
+                  metric,
+                  own_invlists) {
     code_size = sizeof(float) * d;
     by_residual = false;
 }
@@ -115,6 +123,18 @@ void IndexIVFFlat::encode_vectors(
     }
 }
+void IndexIVFFlat::decode_vectors(
+        idx_t n,
+        const uint8_t* codes,
+        const idx_t* /*listnos*/,
+        float* x) const {
+    for (size_t i = 0; i < n; i++) {
+        const uint8_t* code = codes + i * code_size;
+        float* xi = x + i * d;
+        memcpy(xi, code, code_size);
+    }
+}
 void IndexIVFFlat::sa_decode(idx_t n, const uint8_t* bytes, float* x) const {
     size_t coarse_size = coarse_code_size();
     for (size_t i = 0; i < n; i++) {
@@ -126,13 +146,18 @@ void IndexIVFFlat::sa_decode(idx_t n, const uint8_t* bytes, float* x) const {
 namespace {
-template <MetricType metric, class C, bool use_sel>
+template <typename VectorDistance, bool use_sel>
 struct IVFFlatScanner : InvertedListScanner {
-    size_t d;
-    IVFFlatScanner(size_t d, bool store_pairs, const IDSelector* sel)
-            : InvertedListScanner(store_pairs, sel), d(d) {
-        keep_max = is_similarity_metric(metric);
+    VectorDistance vd;
+    using C = typename VectorDistance::C;
+    IVFFlatScanner(
+            const VectorDistance& vd,
+            bool store_pairs,
+            const IDSelector* sel)
+            : InvertedListScanner(store_pairs, sel), vd(vd) {
+        keep_max = vd.is_similarity;
+        code_size = vd.d * sizeof(float);
     }
     const float* xi;
@@ -146,10 +171,7 @@ struct IVFFlatScanner : InvertedListScanner {
     float distance_to_code(const uint8_t* code) const override {
         const float* yj = (float*)code;
-        float dis = metric == METRIC_INNER_PRODUCT
-                ? fvec_inner_product(xi, yj, d)
-                : fvec_L2sqr(xi, yj, d);
-        return dis;
+        return vd(xi, yj);
     }
     size_t scan_codes(
@@ -162,13 +184,11 @@ struct IVFFlatScanner : InvertedListScanner {
         const float* list_vecs = (const float*)codes;
         size_t nup = 0;
         for (size_t j = 0; j < list_size; j++) {
-            const float* yj = list_vecs + d * j;
+            const float* yj = list_vecs + vd.d * j;
             if (use_sel && !sel->is_member(ids[j])) {
                 continue;
             }
-            float dis = metric == METRIC_INNER_PRODUCT
-                    ? fvec_inner_product(xi, yj, d)
-                    : fvec_L2sqr(xi, yj, d);
+            float dis = vd(xi, yj);
             if (C::cmp(simi[0], dis)) {
                 int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
                 heap_replace_top<C>(k, simi, idxi, dis, id);
@@ -186,13 +206,11 @@ struct IVFFlatScanner : InvertedListScanner {
             RangeQueryResult& res) const override {
         const float* list_vecs = (const float*)codes;
         for (size_t j = 0; j < list_size; j++) {
-            const float* yj = list_vecs + d * j;
+            const float* yj = list_vecs + vd.d * j;
             if (use_sel && !sel->is_member(ids[j])) {
                 continue;
             }
-            float dis = metric == METRIC_INNER_PRODUCT
-                    ? fvec_inner_product(xi, yj, d)
-                    : fvec_L2sqr(xi, yj, d);
+            float dis = vd(xi, yj);
             if (C::cmp(radius, dis)) {
                 int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
                 res.add(dis, id);
@@ -201,23 +219,22 @@ struct IVFFlatScanner : InvertedListScanner {
     }
 };
-template <bool use_sel>
-InvertedListScanner* get_InvertedListScanner1(
-        const IndexIVFFlat* ivf,
-        bool store_pairs,
-        const IDSelector* sel) {
-    if (ivf->metric_type == METRIC_INNER_PRODUCT) {
-        return new IVFFlatScanner<
-                METRIC_INNER_PRODUCT,
-                CMin<float, int64_t>,
-                use_sel>(ivf->d, store_pairs, sel);
-    } else if (ivf->metric_type == METRIC_L2) {
-        return new IVFFlatScanner<METRIC_L2, CMax<float, int64_t>, use_sel>(
-                ivf->d, store_pairs, sel);
-    } else {
-        FAISS_THROW_MSG("metric type not supported");
+struct Run_get_InvertedListScanner {
+    using T = InvertedListScanner*;
+    template <class VD>
+    InvertedListScanner* f(
+            VD& vd,
+            const IndexIVFFlat* ivf,
+            bool store_pairs,
+            const IDSelector* sel) {
+        if (sel) {
+            return new IVFFlatScanner<VD, true>(vd, store_pairs, sel);
+        } else {
+            return new IVFFlatScanner<VD, false>(vd, store_pairs, sel);
+        }
     }
-}
+};
 } // anonymous namespace
@@ -225,11 +242,9 @@ InvertedListScanner* IndexIVFFlat::get_InvertedListScanner(
         bool store_pairs,
         const IDSelector* sel,
         const IVFSearchParameters*) const {
-    if (sel) {
-        return get_InvertedListScanner1<true>(this, store_pairs, sel);
-    } else {
-        return get_InvertedListScanner1<false>(this, store_pairs, sel);
-    }
+    Run_get_InvertedListScanner run;
+    return dispatch_VectorDistance(
+            d, metric_type, metric_arg, run, this, store_pairs, sel);
 }
 void IndexIVFFlat::reconstruct_from_offset(
@@ -247,8 +262,9 @@ IndexIVFFlatDedup::IndexIVFFlatDedup(
         Index* quantizer,
         size_t d,
         size_t nlist_,
-        MetricType metric_type)
-        : IndexIVFFlat(quantizer, d, nlist_, metric_type) {}
+        MetricType metric_type,
+        bool own_invlists)
+        : IndexIVFFlat(quantizer, d, nlist_, metric_type, own_invlists) {}
 void IndexIVFFlatDedup::train(idx_t n, const float* x) {
     std::unordered_map<uint64_t, idx_t> map;

data/vendor/faiss/faiss/IndexIVFFlat.h CHANGED Viewed

@@ -26,7 +26,8 @@ struct IndexIVFFlat : IndexIVF {
             Index* quantizer,
             size_t d,
             size_t nlist_,
-            MetricType = METRIC_L2);
+            MetricType = METRIC_L2,
+            bool own_invlists = true);
     void add_core(
             idx_t n,
@@ -42,6 +43,12 @@ struct IndexIVFFlat : IndexIVF {
             uint8_t* codes,
             bool include_listnos = false) const override;
+    void decode_vectors(
+            idx_t n,
+            const uint8_t* codes,
+            const idx_t* list_nos,
+            float* x) const override;
     InvertedListScanner* get_InvertedListScanner(
             bool store_pairs,
             const IDSelector* sel,
@@ -65,7 +72,8 @@ struct IndexIVFFlatDedup : IndexIVFFlat {
             Index* quantizer,
             size_t d,
             size_t nlist_,
-            MetricType = METRIC_L2);
+            MetricType = METRIC_L2,
+            bool own_invlists = true);
     /// also dedups the training set
     void train(idx_t n, const float* x) override;

data/vendor/faiss/faiss/IndexIVFPQ.cpp CHANGED Viewed

@@ -46,10 +46,14 @@ IndexIVFPQ::IndexIVFPQ(
         size_t nlist,
         size_t M,
         size_t nbits_per_idx,
-        MetricType metric)
-        : IndexIVF(quantizer, d, nlist, 0, metric), pq(d, M, nbits_per_idx) {
+        MetricType metric,
+        bool own_invlists)
+        : IndexIVF(quantizer, d, nlist, 0, metric, own_invlists),
+          pq(d, M, nbits_per_idx) {
     code_size = pq.code_size;
-    invlists->code_size = code_size;
+    if (own_invlists) {
+        invlists->code_size = code_size;
+    }
     is_trained = false;
     by_residual = true;
     use_precomputed_table = 0;
@@ -181,6 +185,14 @@ void IndexIVFPQ::encode_vectors(
     }
 }
+void IndexIVFPQ::decode_vectors(
+        idx_t n,
+        const uint8_t* codes,
+        const idx_t* listnos,
+        float* x) const {
+    return decode_multiple(n, listnos, codes, x);
+}
 void IndexIVFPQ::sa_decode(idx_t n, const uint8_t* codes, float* x) const {
     size_t coarse_size = coarse_code_size();
@@ -1201,6 +1213,7 @@ struct IVFPQScanner : IVFPQScannerT<idx_t, METRIC_TYPE, PQDecoder>,
               sel(sel) {
         this->store_pairs = store_pairs;
         this->keep_max = is_similarity_metric(METRIC_TYPE);
+        this->code_size = this->pq.code_size;
     }
     void set_query(const float* query) override {

data/vendor/faiss/faiss/IndexIVFPQ.h CHANGED Viewed

@@ -56,7 +56,8 @@ struct IndexIVFPQ : IndexIVF {
             size_t nlist,
             size_t M,
             size_t nbits_per_idx,
-            MetricType metric = METRIC_L2);
+            MetricType metric = METRIC_L2,
+            bool own_invlists = true);
     void encode_vectors(
             idx_t n,
@@ -65,6 +66,12 @@ struct IndexIVFPQ : IndexIVF {
             uint8_t* codes,
             bool include_listnos = false) const override;
+    void decode_vectors(
+            idx_t n,
+            const uint8_t* codes,
+            const idx_t* listnos,
+            float* x) const override;
     void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
     void add_core(

data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp CHANGED Viewed

@@ -8,7 +8,6 @@
 #include <faiss/IndexIVFPQFastScan.h>
 #include <cassert>
-#include <cinttypes>
 #include <cstdio>
 #include <memory>
@@ -38,11 +37,13 @@ IndexIVFPQFastScan::IndexIVFPQFastScan(
         size_t M,
         size_t nbits,
         MetricType metric,
-        int bbs)
-        : IndexIVFFastScan(quantizer, d, nlist, 0, metric), pq(d, M, nbits) {
+        int bbs,
+        bool own_invlists)
+        : IndexIVFFastScan(quantizer, d, nlist, 0, metric, own_invlists),
+          pq(d, M, nbits) {
     by_residual = false; // set to false by default because it's faster
-    init_fastscan(&pq, M, nbits, nlist, metric, bbs);
+    init_fastscan(&pq, M, nbits, nlist, metric, bbs, own_invlists);
 }
 IndexIVFPQFastScan::IndexIVFPQFastScan() {
@@ -57,12 +58,19 @@ IndexIVFPQFastScan::IndexIVFPQFastScan(const IndexIVFPQ& orig, int bbs)
                   orig.d,
                   orig.nlist,
                   orig.pq.code_size,
-                  orig.metric_type),
+                  orig.metric_type,
+                  orig.own_invlists),
           pq(orig.pq) {
     FAISS_THROW_IF_NOT(orig.pq.nbits == 4);
     init_fastscan(
-            &pq, orig.pq.M, orig.pq.nbits, orig.nlist, orig.metric_type, bbs);
+            &pq,
+            orig.pq.M,
+            orig.pq.nbits,
+            orig.nlist,
+            orig.metric_type,
+            bbs,
+            orig.own_invlists);
     by_residual = orig.by_residual;
     ntotal = orig.ntotal;