RubyGems - faiss - Versions diffs - 0.2.3 → 0.2.4 - Mend

faiss 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/LICENSE.txt +1 -1
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/Clustering.cpp +32 -0
data/vendor/faiss/faiss/Clustering.h +14 -0
data/vendor/faiss/faiss/Index.h +1 -1
data/vendor/faiss/faiss/Index2Layer.cpp +19 -92
data/vendor/faiss/faiss/Index2Layer.h +2 -16
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
data/vendor/faiss/faiss/{IndexResidual.h → IndexAdditiveQuantizer.h} +101 -58
data/vendor/faiss/faiss/IndexFlat.cpp +22 -52
data/vendor/faiss/faiss/IndexFlat.h +9 -15
data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
data/vendor/faiss/faiss/IndexIVF.cpp +79 -7
data/vendor/faiss/faiss/IndexIVF.h +25 -7
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
data/vendor/faiss/faiss/IndexIVFFlat.cpp +9 -12
data/vendor/faiss/faiss/IndexIVFPQ.cpp +5 -4
data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +60 -39
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +21 -6
data/vendor/faiss/faiss/IndexLSH.cpp +4 -30
data/vendor/faiss/faiss/IndexLSH.h +2 -15
data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -2
data/vendor/faiss/faiss/IndexNSG.cpp +0 -2
data/vendor/faiss/faiss/IndexPQ.cpp +2 -51
data/vendor/faiss/faiss/IndexPQ.h +2 -17
data/vendor/faiss/faiss/IndexRefine.cpp +28 -0
data/vendor/faiss/faiss/IndexRefine.h +10 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -28
data/vendor/faiss/faiss/IndexScalarQuantizer.h +2 -16
data/vendor/faiss/faiss/VectorTransform.cpp +2 -1
data/vendor/faiss/faiss/VectorTransform.h +3 -0
data/vendor/faiss/faiss/clone_index.cpp +3 -2
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +2 -2
data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +257 -24
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +69 -9
data/vendor/faiss/faiss/impl/HNSW.cpp +10 -5
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +393 -210
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +100 -28
data/vendor/faiss/faiss/impl/NSG.cpp +0 -3
data/vendor/faiss/faiss/impl/NSG.h +1 -1
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +357 -47
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +65 -7
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +12 -19
data/vendor/faiss/faiss/impl/index_read.cpp +102 -19
data/vendor/faiss/faiss/impl/index_write.cpp +66 -16
data/vendor/faiss/faiss/impl/io.cpp +1 -1
data/vendor/faiss/faiss/impl/io_macros.h +20 -0
data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
data/vendor/faiss/faiss/index_factory.cpp +585 -414
data/vendor/faiss/faiss/index_factory.h +3 -0
data/vendor/faiss/faiss/utils/distances.cpp +4 -2
data/vendor/faiss/faiss/utils/distances.h +36 -3
data/vendor/faiss/faiss/utils/distances_simd.cpp +50 -0
data/vendor/faiss/faiss/utils/utils.h +1 -1
metadata +12 -5
data/vendor/faiss/faiss/IndexResidual.cpp +0 -291

data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp ADDED Viewed

@@ -0,0 +1,316 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+// quiet the noise
+// XXclang-format off
+#include <faiss/IndexIVFAdditiveQuantizer.h>
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/impl/ResidualQuantizer.h>
+#include <faiss/impl/ResultHandler.h>
+#include <faiss/utils/distances.h>
+#include <faiss/utils/extra_distances.h>
+#include <faiss/utils/utils.h>
+namespace faiss {
+/**************************************************************************************
+ * IndexIVFAdditiveQuantizer
+ **************************************************************************************/
+IndexIVFAdditiveQuantizer::IndexIVFAdditiveQuantizer(
+        AdditiveQuantizer* aq,
+        Index* quantizer,
+        size_t d,
+        size_t nlist,
+        MetricType metric)
+        : IndexIVF(quantizer, d, nlist, 0, metric), aq(aq) {
+    by_residual = true;
+}
+IndexIVFAdditiveQuantizer::IndexIVFAdditiveQuantizer(AdditiveQuantizer* aq)
+        : IndexIVF(), aq(aq) {}
+void IndexIVFAdditiveQuantizer::train_residual(idx_t n, const float* x) {
+    const float* x_in = x;
+    size_t max_train_points = 1024 * ((size_t)1 << aq->nbits[0]);
+    x = fvecs_maybe_subsample(
+            d, (size_t*)&n, max_train_points, x, verbose, 1234);
+    ScopeDeleter1<float> del_x(x_in == x ? nullptr : x);
+    if (by_residual) {
+        std::vector<Index::idx_t> idx(n);
+        quantizer->assign(n, x, idx.data());
+        std::vector<float> residuals(n * d);
+        quantizer->compute_residual_n(n, x, residuals.data(), idx.data());
+        aq->train(n, residuals.data());
+    } else {
+        aq->train(n, x);
+    }
+}
+void IndexIVFAdditiveQuantizer::encode_vectors(
+        idx_t n,
+        const float* x,
+        const idx_t* list_nos,
+        uint8_t* codes,
+        bool include_listnos) const {
+    FAISS_THROW_IF_NOT(is_trained);
+    // first encode then possibly add listnos
+    if (by_residual) {
+        // subtract centroids
+        std::vector<float> residuals(n * d);
+#pragma omp parallel if (n > 10000)
+        for (idx_t i = 0; i < n; i++) {
+            quantizer->compute_residual(
+                    x + i * d,
+                    residuals.data() + i * d,
+                    list_nos[i] >= 0 ? list_nos[i] : 0);
+        }
+        aq->compute_codes(residuals.data(), codes, n);
+    } else {
+        aq->compute_codes(x, codes, n);
+    }
+    if (include_listnos) {
+        // write back from the end, where there is enough space
+        size_t coarse_size = coarse_code_size();
+        for (idx_t i = n - 1; i >= 0; i--) {
+            uint8_t* code = codes + i * (code_size + coarse_size);
+            memmove(code + coarse_size, codes + i * code_size, code_size);
+            encode_listno(list_nos[i], code);
+        }
+    }
+}
+IndexIVFAdditiveQuantizer::~IndexIVFAdditiveQuantizer() {}
+/*********************************************
+ * AQInvertedListScanner
+ *********************************************/
+namespace {
+using Search_type_t = AdditiveQuantizer::Search_type_t;
+struct AQInvertedListScanner : InvertedListScanner {
+    const IndexIVFAdditiveQuantizer& ia;
+    const AdditiveQuantizer& aq;
+    std::vector<float> tmp;
+    AQInvertedListScanner(const IndexIVFAdditiveQuantizer& ia, bool store_pairs)
+            : ia(ia), aq(*ia.aq) {
+        this->store_pairs = store_pairs;
+        this->code_size = ia.code_size;
+        keep_max = ia.metric_type == METRIC_INNER_PRODUCT;
+        tmp.resize(ia.d);
+    }
+    const float* q0;
+    /// from now on we handle this query.
+    void set_query(const float* query_vector) override {
+        q0 = query_vector;
+    }
+    const float* q;
+    /// following codes come from this inverted list
+    void set_list(idx_t list_no, float coarse_dis) override {
+        if (ia.metric_type == METRIC_L2 && ia.by_residual) {
+            ia.quantizer->compute_residual(q0, tmp.data(), list_no);
+            q = tmp.data();
+        } else {
+            q = q0;
+        }
+    }
+    ~AQInvertedListScanner() {}
+};
+template <bool is_IP>
+struct AQInvertedListScannerDecompress : AQInvertedListScanner {
+    AQInvertedListScannerDecompress(
+            const IndexIVFAdditiveQuantizer& ia,
+            bool store_pairs)
+            : AQInvertedListScanner(ia, store_pairs) {}
+    float coarse_dis = 0;
+    /// following codes come from this inverted list
+    void set_list(idx_t list_no, float coarse_dis) override {
+        AQInvertedListScanner::set_list(list_no, coarse_dis);
+        if (ia.by_residual) {
+            this->coarse_dis = coarse_dis;
+        }
+    }
+    /// compute a single query-to-code distance
+    float distance_to_code(const uint8_t* code) const final {
+        std::vector<float> b(aq.d);
+        aq.decode(code, b.data(), 1);
+        FAISS_ASSERT(q);
+        FAISS_ASSERT(b.data());
+        return is_IP ? coarse_dis + fvec_inner_product(q, b.data(), aq.d)
+                     : fvec_L2sqr(q, b.data(), aq.d);
+    }
+    ~AQInvertedListScannerDecompress() override {}
+};
+template <bool is_IP, Search_type_t search_type>
+struct AQInvertedListScannerLUT : AQInvertedListScanner {
+    std::vector<float> LUT, tmp;
+    float distance_bias;
+    AQInvertedListScannerLUT(
+            const IndexIVFAdditiveQuantizer& ia,
+            bool store_pairs)
+            : AQInvertedListScanner(ia, store_pairs) {
+        LUT.resize(aq.total_codebook_size);
+        tmp.resize(ia.d);
+        distance_bias = 0;
+    }
+    /// from now on we handle this query.
+    void set_query(const float* query_vector) override {
+        AQInvertedListScanner::set_query(query_vector);
+        if (!is_IP && !ia.by_residual) {
+            distance_bias = fvec_norm_L2sqr(query_vector, ia.d);
+        }
+    }
+    /// following codes come from this inverted list
+    void set_list(idx_t list_no, float coarse_dis) override {
+        AQInvertedListScanner::set_list(list_no, coarse_dis);
+        // TODO find a way to provide the nprobes together to do a matmul
+        // +  precompute tables
+        aq.compute_LUT(1, q, LUT.data());
+        if (ia.by_residual) {
+            distance_bias = coarse_dis;
+        }
+    }
+    /// compute a single query-to-code distance
+    float distance_to_code(const uint8_t* code) const final {
+        return distance_bias +
+                aq.compute_1_distance_LUT<is_IP, search_type>(code, LUT.data());
+    }
+    ~AQInvertedListScannerLUT() override {}
+};
+} // anonymous namespace
+InvertedListScanner* IndexIVFAdditiveQuantizer::get_InvertedListScanner(
+        bool store_pairs) const {
+    if (metric_type == METRIC_INNER_PRODUCT) {
+        if (aq->search_type == AdditiveQuantizer::ST_decompress) {
+            return new AQInvertedListScannerDecompress<true>(
+                    *this, store_pairs);
+        } else {
+            return new AQInvertedListScannerLUT<
+                    true,
+                    AdditiveQuantizer::ST_LUT_nonorm>(*this, store_pairs);
+        }
+    } else {
+        switch (aq->search_type) {
+            case AdditiveQuantizer::ST_decompress:
+                return new AQInvertedListScannerDecompress<false>(
+                        *this, store_pairs);
+#define A(st)                                                              \
+    case AdditiveQuantizer::st:                                            \
+        return new AQInvertedListScannerLUT<false, AdditiveQuantizer::st>( \
+                *this, store_pairs);
+                A(ST_LUT_nonorm)
+                // A(ST_norm_from_LUT)
+                A(ST_norm_float)
+                A(ST_norm_qint8)
+                A(ST_norm_qint4)
+                A(ST_norm_cqint8)
+                A(ST_norm_cqint4)
+#undef A
+            default:
+                FAISS_THROW_FMT(
+                        "search type %d not supported", aq->search_type);
+        }
+    }
+}
+/**************************************************************************************
+ * IndexIVFResidualQuantizer
+ **************************************************************************************/
+IndexIVFResidualQuantizer::IndexIVFResidualQuantizer(
+        Index* quantizer,
+        size_t d,
+        size_t nlist,
+        const std::vector<size_t>& nbits,
+        MetricType metric,
+        Search_type_t search_type)
+        : IndexIVFAdditiveQuantizer(&rq, quantizer, d, nlist, metric),
+          rq(d, nbits, search_type) {
+    code_size = invlists->code_size = rq.code_size;
+}
+IndexIVFResidualQuantizer::IndexIVFResidualQuantizer()
+        : IndexIVFAdditiveQuantizer(&rq) {}
+IndexIVFResidualQuantizer::IndexIVFResidualQuantizer(
+        Index* quantizer,
+        size_t d,
+        size_t nlist,
+        size_t M,     /* number of subquantizers */
+        size_t nbits, /* number of bit per subvector index */
+        MetricType metric,
+        Search_type_t search_type)
+        : IndexIVFResidualQuantizer(
+                  quantizer,
+                  d,
+                  nlist,
+                  std::vector<size_t>(M, nbits),
+                  metric,
+                  search_type) {}
+IndexIVFResidualQuantizer::~IndexIVFResidualQuantizer() {}
+/**************************************************************************************
+ * IndexIVFLocalSearchQuantizer
+ **************************************************************************************/
+IndexIVFLocalSearchQuantizer::IndexIVFLocalSearchQuantizer(
+        Index* quantizer,
+        size_t d,
+        size_t nlist,
+        size_t M,     /* number of subquantizers */
+        size_t nbits, /* number of bit per subvector index */
+        MetricType metric,
+        Search_type_t search_type)
+        : IndexIVFAdditiveQuantizer(&lsq, quantizer, d, nlist, metric),
+          lsq(d, M, nbits, search_type) {
+    code_size = invlists->code_size = lsq.code_size;
+}
+IndexIVFLocalSearchQuantizer::IndexIVFLocalSearchQuantizer()
+        : IndexIVFAdditiveQuantizer(&lsq) {}
+IndexIVFLocalSearchQuantizer::~IndexIVFLocalSearchQuantizer() {}
+} // namespace faiss

data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h ADDED Viewed

@@ -0,0 +1,121 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#ifndef FAISS_INDEX_IVF_ADDITIVE_QUANTIZER_H
+#define FAISS_INDEX_IVF_ADDITIVE_QUANTIZER_H
+#include <faiss/impl/AdditiveQuantizer.h>
+#include <cstdint>
+#include <vector>
+#include <faiss/IndexIVF.h>
+#include <faiss/impl/LocalSearchQuantizer.h>
+#include <faiss/impl/ResidualQuantizer.h>
+#include <faiss/impl/platform_macros.h>
+namespace faiss {
+/// Abstract class for IVF additive quantizers.
+/// The search functions are in common.
+struct IndexIVFAdditiveQuantizer : IndexIVF {
+    // the quantizer
+    AdditiveQuantizer* aq;
+    bool by_residual = true;
+    int use_precomputed_table = 0; // for future use
+    using Search_type_t = AdditiveQuantizer::Search_type_t;
+    IndexIVFAdditiveQuantizer(
+            AdditiveQuantizer* aq,
+            Index* quantizer,
+            size_t d,
+            size_t nlist,
+            MetricType metric = METRIC_L2);
+    explicit IndexIVFAdditiveQuantizer(AdditiveQuantizer* aq);
+    void train_residual(idx_t n, const float* x) override;
+    void encode_vectors(
+            idx_t n,
+            const float* x,
+            const idx_t* list_nos,
+            uint8_t* codes,
+            bool include_listnos = false) const override;
+    InvertedListScanner* get_InvertedListScanner(
+            bool store_pairs) const override;
+    ~IndexIVFAdditiveQuantizer() override;
+};
+/** IndexIVF based on a residual quantizer. Stored vectors are
+ * approximated by residual quantization codes.
+ */
+struct IndexIVFResidualQuantizer : IndexIVFAdditiveQuantizer {
+    /// The residual quantizer used to encode the vectors
+    ResidualQuantizer rq;
+    /** Constructor.
+     *
+     * @param d      dimensionality of the input vectors
+     * @param M      number of subquantizers
+     * @param nbits  number of bit per subvector index
+     */
+    IndexIVFResidualQuantizer(
+            Index* quantizer,
+            size_t d,
+            size_t nlist,
+            const std::vector<size_t>& nbits,
+            MetricType metric = METRIC_L2,
+            Search_type_t search_type = AdditiveQuantizer::ST_decompress);
+    IndexIVFResidualQuantizer(
+            Index* quantizer,
+            size_t d,
+            size_t nlist,
+            size_t M,     /* number of subquantizers */
+            size_t nbits, /* number of bit per subvector index */
+            MetricType metric = METRIC_L2,
+            Search_type_t search_type = AdditiveQuantizer::ST_decompress);
+    IndexIVFResidualQuantizer();
+    virtual ~IndexIVFResidualQuantizer();
+};
+/** IndexIVF based on a residual quantizer. Stored vectors are
+ * approximated by residual quantization codes.
+ */
+struct IndexIVFLocalSearchQuantizer : IndexIVFAdditiveQuantizer {
+    /// The LSQ quantizer used to encode the vectors
+    LocalSearchQuantizer lsq;
+    /** Constructor.
+     *
+     * @param d      dimensionality of the input vectors
+     * @param M      number of subquantizers
+     * @param nbits  number of bit per subvector index
+     */
+    IndexIVFLocalSearchQuantizer(
+            Index* quantizer,
+            size_t d,
+            size_t nlist,
+            size_t M,     /* number of subquantizers */
+            size_t nbits, /* number of bit per subvector index */
+            MetricType metric = METRIC_L2,
+            Search_type_t search_type = AdditiveQuantizer::ST_decompress);
+    IndexIVFLocalSearchQuantizer();
+    virtual ~IndexIVFLocalSearchQuantizer();
+};
+} // namespace faiss
+#endif

data/vendor/faiss/faiss/IndexIVFFlat.cpp CHANGED Viewed

@@ -121,17 +121,16 @@ namespace {
 template <MetricType metric, class C>
 struct IVFFlatScanner : InvertedListScanner {
     size_t d;
-    bool store_pairs;
-    IVFFlatScanner(size_t d, bool store_pairs)
-            : d(d), store_pairs(store_pairs) {}
+    IVFFlatScanner(size_t d, bool store_pairs) : d(d) {
+        this->store_pairs = store_pairs;
+    }
     const float* xi;
     void set_query(const float* query) override {
         this->xi = query;
     }
-    idx_t list_no;
     void set_list(idx_t list_no, float /* coarse_dis */) override {
         this->list_no = list_no;
     }
@@ -223,18 +222,17 @@ IndexIVFFlatDedup::IndexIVFFlatDedup(
 void IndexIVFFlatDedup::train(idx_t n, const float* x) {
     std::unordered_map<uint64_t, idx_t> map;
-    float* x2 = new float[n * d];
-    ScopeDeleter<float> del(x2);
+    std::unique_ptr<float[]> x2(new float[n * d]);
     int64_t n2 = 0;
     for (int64_t i = 0; i < n; i++) {
         uint64_t hash = hash_bytes((uint8_t*)(x + i * d), code_size);
         if (map.count(hash) &&
-            !memcmp(x2 + map[hash] * d, x + i * d, code_size)) {
+            !memcmp(x2.get() + map[hash] * d, x + i * d, code_size)) {
             // is duplicate, skip
         } else {
             map[hash] = n2;
-            memcpy(x2 + n2 * d, x + i * d, code_size);
+            memcpy(x2.get() + n2 * d, x + i * d, code_size);
             n2++;
         }
     }
@@ -245,7 +243,7 @@ void IndexIVFFlatDedup::train(idx_t n, const float* x) {
                n2,
                n);
     }
-    IndexIVFFlat::train(n2, x2);
+    IndexIVFFlat::train(n2, x2.get());
 }
 void IndexIVFFlatDedup::add_with_ids(
@@ -256,9 +254,8 @@ void IndexIVFFlatDedup::add_with_ids(
     assert(invlists);
     FAISS_THROW_IF_NOT_MSG(
             direct_map.no(), "IVFFlatDedup not implemented with direct_map");
-    int64_t* idx = new int64_t[na];
-    ScopeDeleter<int64_t> del(idx);
-    quantizer->assign(na, x, idx);
+    std::unique_ptr<int64_t[]> idx(new int64_t[na]);
+    quantizer->assign(na, x, idx.get());
     int64_t n_add = 0, n_dup = 0;

data/vendor/faiss/faiss/IndexIVFPQ.cpp CHANGED Viewed

@@ -584,7 +584,7 @@ struct QueryTables {
     // field specific to query
     const float* qi;
-    // query-specific intialization
+    // query-specific initialization
     void init_query(const float* qi) {
         this->qi = qi;
         if (metric_type == METRIC_INNER_PRODUCT)
@@ -1018,21 +1018,22 @@ struct IVFPQScannerT : QueryTables {
 template <MetricType METRIC_TYPE, class C, class PQDecoder>
 struct IVFPQScanner : IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>,
                       InvertedListScanner {
-    bool store_pairs;
     int precompute_mode;
     IVFPQScanner(const IndexIVFPQ& ivfpq, bool store_pairs, int precompute_mode)
             : IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>(
                       ivfpq,
                       nullptr),
-              store_pairs(store_pairs),
-              precompute_mode(precompute_mode) {}
+              precompute_mode(precompute_mode) {
+        this->store_pairs = store_pairs;
+    }
     void set_query(const float* query) override {
         this->init_query(query);
     }
     void set_list(idx_t list_no, float coarse_dis) override {
+        this->list_no = list_no;
         this->init_list(list_no, coarse_dis, precompute_mode);
     }

data/vendor/faiss/faiss/IndexIVFPQ.h CHANGED Viewed

@@ -150,7 +150,7 @@ struct IndexIVFPQ : IndexIVF {
  *            < precomputed_tables_max_bytes), set use_precomputed_table on
  * output =1: tables that work for all quantizers (size 256 * nlist * M) =2:
  * specific version for MultiIndexQuantizer (much more compact)
- * @param precomputed_table precomputed table to intialize
+ * @param precomputed_table precomputed table to initialize
  */
 void initialize_IVFPQ_precomputed_table(