RubyGems - faiss - Versions diffs - 0.3.4 → 0.4.0 - Mend

faiss 0.3.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +11 -8
data/vendor/faiss/faiss/Clustering.cpp +0 -16
data/vendor/faiss/faiss/IVFlib.cpp +213 -0
data/vendor/faiss/faiss/IVFlib.h +42 -0
data/vendor/faiss/faiss/Index.h +1 -1
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -7
data/vendor/faiss/faiss/IndexBinaryFlat.h +2 -1
data/vendor/faiss/faiss/IndexFlatCodes.cpp +1 -1
data/vendor/faiss/faiss/IndexFlatCodes.h +4 -2
data/vendor/faiss/faiss/IndexHNSW.cpp +13 -20
data/vendor/faiss/faiss/IndexHNSW.h +1 -1
data/vendor/faiss/faiss/IndexIVF.cpp +20 -3
data/vendor/faiss/faiss/IndexIVF.h +5 -2
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -1
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +2 -1
data/vendor/faiss/faiss/IndexIVFFlat.cpp +2 -1
data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
data/vendor/faiss/faiss/IndexIVFPQ.cpp +2 -1
data/vendor/faiss/faiss/IndexIVFPQ.h +2 -1
data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +277 -0
data/vendor/faiss/faiss/IndexIVFRaBitQ.h +70 -0
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +2 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
data/vendor/faiss/faiss/IndexRaBitQ.cpp +148 -0
data/vendor/faiss/faiss/IndexRaBitQ.h +65 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -1
data/vendor/faiss/faiss/IndexScalarQuantizer.h +2 -1
data/vendor/faiss/faiss/clone_index.cpp +38 -3
data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +19 -0
data/vendor/faiss/faiss/cppcontrib/factory_tools.h +4 -11
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +2 -1
data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +13 -3
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +1 -1
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +1 -1
data/vendor/faiss/faiss/gpu/test/TestGpuIcmEncoder.cpp +112 -0
data/vendor/faiss/faiss/impl/HNSW.cpp +35 -13
data/vendor/faiss/faiss/impl/HNSW.h +5 -4
data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +519 -0
data/vendor/faiss/faiss/impl/RaBitQuantizer.h +78 -0
data/vendor/faiss/faiss/impl/ResultHandler.h +2 -2
data/vendor/faiss/faiss/impl/code_distance/code_distance-sve.h +3 -4
data/vendor/faiss/faiss/impl/index_read.cpp +220 -25
data/vendor/faiss/faiss/impl/index_write.cpp +29 -0
data/vendor/faiss/faiss/impl/io.h +2 -2
data/vendor/faiss/faiss/impl/io_macros.h +2 -0
data/vendor/faiss/faiss/impl/mapped_io.cpp +313 -0
data/vendor/faiss/faiss/impl/mapped_io.h +51 -0
data/vendor/faiss/faiss/impl/maybe_owned_vector.h +316 -0
data/vendor/faiss/faiss/impl/platform_macros.h +7 -3
data/vendor/faiss/faiss/impl/simd_result_handlers.h +1 -1
data/vendor/faiss/faiss/impl/zerocopy_io.cpp +67 -0
data/vendor/faiss/faiss/impl/zerocopy_io.h +32 -0
data/vendor/faiss/faiss/index_factory.cpp +16 -5
data/vendor/faiss/faiss/index_io.h +4 -0
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +3 -3
data/vendor/faiss/faiss/invlists/InvertedLists.h +5 -3
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +3 -3
data/vendor/faiss/faiss/python/python_callbacks.cpp +24 -0
data/vendor/faiss/faiss/python/python_callbacks.h +22 -0
data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +30 -12
data/vendor/faiss/faiss/utils/hamming.cpp +45 -21
data/vendor/faiss/faiss/utils/hamming.h +7 -3
data/vendor/faiss/faiss/utils/hamming_distance/avx512-inl.h +1 -1
data/vendor/faiss/faiss/utils/utils.cpp +4 -4
data/vendor/faiss/faiss/utils/utils.h +3 -3
metadata +16 -4

data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp ADDED Viewed

@@ -0,0 +1,519 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <faiss/impl/RaBitQuantizer.h>
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <limits>
+#include <memory>
+#include <vector>
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/utils/distances.h>
+namespace faiss {
+struct FactorsData {
+    // ||or - c||^2 - ((metric==IP) ? ||or||^2 : 0)
+    float or_minus_c_l2sqr = 0;
+    float dp_multiplier = 0;
+};
+struct QueryFactorsData {
+    float c1 = 0;
+    float c2 = 0;
+    float c34 = 0;
+    float qr_to_c_L2sqr = 0;
+    float qr_norm_L2sqr = 0;
+};
+static size_t get_code_size(const size_t d) {
+    return (d + 7) / 8 + sizeof(FactorsData);
+}
+RaBitQuantizer::RaBitQuantizer(size_t d, MetricType metric)
+        : Quantizer(d, get_code_size(d)), metric_type{metric} {}
+void RaBitQuantizer::train(size_t n, const float* x) {
+    // does nothing
+}
+void RaBitQuantizer::compute_codes(const float* x, uint8_t* codes, size_t n)
+        const {
+    compute_codes_core(x, codes, n, centroid);
+}
+void RaBitQuantizer::compute_codes_core(
+        const float* x,
+        uint8_t* codes,
+        size_t n,
+        const float* centroid_in) const {
+    FAISS_ASSERT(codes != nullptr);
+    FAISS_ASSERT(x != nullptr);
+    FAISS_ASSERT(
+            (metric_type == MetricType::METRIC_L2 ||
+             metric_type == MetricType::METRIC_INNER_PRODUCT));
+    if (n == 0) {
+        return;
+    }
+    // compute some helper constants
+    const float inv_d_sqrt = (d == 0) ? 1.0f : (1.0f / std::sqrt((float)d));
+    // compute codes
+#pragma omp parallel for if (n > 1000)
+    for (int64_t i = 0; i < n; i++) {
+        // ||or - c||^2
+        float norm_L2sqr = 0;
+        // ||or||^2, which is equal to ||P(or)||^2 and ||P^(-1)(or)||^2
+        float or_L2sqr = 0;
+        // dot product
+        float dp_oO = 0;
+        // the code
+        uint8_t* code = codes + i * code_size;
+        FactorsData* fac = reinterpret_cast<FactorsData*>(code + (d + 7) / 8);
+        // cleanup it
+        if (code != nullptr) {
+            memset(code, 0, code_size);
+        }
+        for (size_t j = 0; j < d; j++) {
+            const float or_minus_c = x[i * d + j] -
+                    ((centroid_in == nullptr) ? 0 : centroid_in[j]);
+            norm_L2sqr += or_minus_c * or_minus_c;
+            or_L2sqr += x[i * d + j] * x[i * d + j];
+            const bool xb = (or_minus_c > 0);
+            dp_oO += xb ? or_minus_c : (-or_minus_c);
+            // store the output data
+            if (code != nullptr) {
+                if (xb) {
+                    // enable a particular bit
+                    code[j / 8] |= (1 << (j % 8));
+                }
+            }
+        }
+        // compute factors
+        // compute the inverse norm
+        const float inv_norm_L2 =
+                (std::abs(norm_L2sqr) < std::numeric_limits<float>::epsilon())
+                ? 1.0f
+                : (1.0f / std::sqrt(norm_L2sqr));
+        dp_oO *= inv_norm_L2;
+        dp_oO *= inv_d_sqrt;
+        const float inv_dp_oO =
+                (std::abs(dp_oO) < std::numeric_limits<float>::epsilon())
+                ? 1.0f
+                : (1.0f / dp_oO);
+        fac->or_minus_c_l2sqr = norm_L2sqr;
+        if (metric_type == MetricType::METRIC_INNER_PRODUCT) {
+            fac->or_minus_c_l2sqr -= or_L2sqr;
+        }
+        fac->dp_multiplier = inv_dp_oO * std::sqrt(norm_L2sqr);
+    }
+}
+void RaBitQuantizer::decode(const uint8_t* codes, float* x, size_t n) const {
+    decode_core(codes, x, n, centroid);
+}
+void RaBitQuantizer::decode_core(
+        const uint8_t* codes,
+        float* x,
+        size_t n,
+        const float* centroid_in) const {
+    FAISS_ASSERT(codes != nullptr);
+    FAISS_ASSERT(x != nullptr);
+    const float inv_d_sqrt = (d == 0) ? 1.0f : (1.0f / std::sqrt((float)d));
+#pragma omp parallel for if (n > 1000)
+    for (int64_t i = 0; i < n; i++) {
+        const uint8_t* code = codes + i * code_size;
+        // split the code into parts
+        const uint8_t* binary_data = code;
+        const FactorsData* fac =
+                reinterpret_cast<const FactorsData*>(code + (d + 7) / 8);
+        //
+        for (size_t j = 0; j < d; j++) {
+            // extract i-th bit
+            const uint8_t masker = (1 << (j % 8));
+            const float bit = ((binary_data[j / 8] & masker) == masker) ? 1 : 0;
+            // compute the output code
+            x[i * d + j] = (bit - 0.5f) * fac->dp_multiplier * 2 * inv_d_sqrt +
+                    ((centroid_in == nullptr) ? 0 : centroid_in[j]);
+        }
+    }
+}
+struct RaBitDistanceComputer : FlatCodesDistanceComputer {
+    // dimensionality
+    size_t d = 0;
+    // a centroid to use
+    const float* centroid = nullptr;
+    // the metric
+    MetricType metric_type = MetricType::METRIC_L2;
+    RaBitDistanceComputer();
+    float symmetric_dis(idx_t i, idx_t j) override;
+};
+RaBitDistanceComputer::RaBitDistanceComputer() = default;
+float RaBitDistanceComputer::symmetric_dis(idx_t i, idx_t j) {
+    FAISS_THROW_MSG("Not implemented");
+}
+struct RaBitDistanceComputerNotQ : RaBitDistanceComputer {
+    // the rotated query (qr - c)
+    std::vector<float> rotated_q;
+    // some additional numbers for the query
+    QueryFactorsData query_fac;
+    RaBitDistanceComputerNotQ();
+    float distance_to_code(const uint8_t* code) override;
+    void set_query(const float* x) override;
+};
+RaBitDistanceComputerNotQ::RaBitDistanceComputerNotQ() = default;
+float RaBitDistanceComputerNotQ::distance_to_code(const uint8_t* code) {
+    FAISS_ASSERT(code != nullptr);
+    FAISS_ASSERT(
+            (metric_type == MetricType::METRIC_L2 ||
+             metric_type == MetricType::METRIC_INNER_PRODUCT));
+    // split the code into parts
+    const uint8_t* binary_data = code;
+    const FactorsData* fac =
+            reinterpret_cast<const FactorsData*>(code + (d + 7) / 8);
+    // this is the baseline code
+    //
+    // compute <q,o> using floats
+    float dot_qo = 0;
+    // It was a willful decision (after the discussion) to not to pre-cache
+    //   the sum of all bits, just in order to reduce the overhead per vector.
+    uint64_t sum_q = 0;
+    for (size_t i = 0; i < d; i++) {
+        // extract i-th bit
+        const uint8_t masker = (1 << (i % 8));
+        const bool b_bit = ((binary_data[i / 8] & masker) == masker);
+        // accumulate dp
+        dot_qo += (b_bit) ? rotated_q[i] : 0;
+        // accumulate sum-of-bits
+        sum_q += (b_bit) ? 1 : 0;
+    }
+    float final_dot = 0;
+    // dot-product itself
+    final_dot += query_fac.c1 * dot_qo;
+    // normalizer coefficients
+    final_dot += query_fac.c2 * sum_q;
+    // normalizer coefficients
+    final_dot -= query_fac.c34;
+    // this is ||or - c||^2 - (IP ? ||or||^2 : 0)
+    const float or_c_l2sqr = fac->or_minus_c_l2sqr;
+    // pre_dist = ||or - c||^2 + ||qr - c||^2 -
+    //     2 * ||or - c|| * ||qr - c|| * <q,o> - (IP ? ||or||^2 : 0)
+    const float pre_dist = or_c_l2sqr + query_fac.qr_to_c_L2sqr -
+            2 * fac->dp_multiplier * final_dot;
+    if (metric_type == MetricType::METRIC_L2) {
+        // ||or - q||^ 2
+        return pre_dist;
+    } else {
+        // metric == MetricType::METRIC_INNER_PRODUCT
+        // this is ||q||^2
+        const float query_norm_sqr = query_fac.qr_norm_L2sqr;
+        // 2 * (or, q) = (||or - q||^2 - ||q||^2 - ||or||^2)
+        return -0.5f * (pre_dist - query_norm_sqr);
+    }
+}
+void RaBitDistanceComputerNotQ::set_query(const float* x) {
+    FAISS_ASSERT(x != nullptr);
+    FAISS_ASSERT(
+            (metric_type == MetricType::METRIC_L2 ||
+             metric_type == MetricType::METRIC_INNER_PRODUCT));
+    // compute the distance from the query to the centroid
+    if (centroid != nullptr) {
+        query_fac.qr_to_c_L2sqr = fvec_L2sqr(x, centroid, d);
+    } else {
+        query_fac.qr_to_c_L2sqr = fvec_norm_L2sqr(x, d);
+    }
+    // subtract c, obtain P^(-1)(qr - c)
+    rotated_q.resize(d);
+    for (size_t i = 0; i < d; i++) {
+        rotated_q[i] = x[i] - ((centroid == nullptr) ? 0 : centroid[i]);
+    }
+    // compute some numbers
+    const float inv_d = (d == 0) ? 1.0f : (1.0f / std::sqrt((float)d));
+    // do not quantize the query
+    float sum_q = 0;
+    for (size_t i = 0; i < d; i++) {
+        sum_q += rotated_q[i];
+    }
+    query_fac.c1 = 2 * inv_d;
+    query_fac.c2 = 0;
+    query_fac.c34 = sum_q * inv_d;
+    if (metric_type == MetricType::METRIC_INNER_PRODUCT) {
+        // precompute if needed
+        query_fac.qr_norm_L2sqr = fvec_norm_L2sqr(x, d);
+    }
+}
+//
+struct RaBitDistanceComputerQ : RaBitDistanceComputer {
+    // the rotated and quantized query (qr - c)
+    std::vector<uint8_t> rotated_qq;
+    // we're using the proposed relayout-ed scheme from 3.3 that allows
+    //    using popcounts for computing the distance.
+    std::vector<uint8_t> rearranged_rotated_qq;
+    // some additional numbers for the query
+    QueryFactorsData query_fac;
+    // the number of bits for SQ quantization of the query (qb > 0)
+    uint8_t qb = 8;
+    // the smallest value divisible by 8 that is not smaller than dim
+    size_t popcount_aligned_dim = 0;
+    RaBitDistanceComputerQ();
+    float distance_to_code(const uint8_t* code) override;
+    void set_query(const float* x) override;
+};
+RaBitDistanceComputerQ::RaBitDistanceComputerQ() = default;
+float RaBitDistanceComputerQ::distance_to_code(const uint8_t* code) {
+    FAISS_ASSERT(code != nullptr);
+    FAISS_ASSERT(
+            (metric_type == MetricType::METRIC_L2 ||
+             metric_type == MetricType::METRIC_INNER_PRODUCT));
+    // split the code into parts
+    const uint8_t* binary_data = code;
+    const FactorsData* fac =
+            reinterpret_cast<const FactorsData*>(code + (d + 7) / 8);
+    // // this is the baseline code
+    // //
+    // // compute <q,o> using integers
+    // size_t dot_qo = 0;
+    // for (size_t i = 0; i < d; i++) {
+    //     // extract i-th bit
+    //     const uint8_t masker = (1 << (i % 8));
+    //     const uint8_t bit = ((binary_data[i / 8] & masker) == masker) ? 1 :
+    //     0;
+    //
+    //     // accumulate dp
+    //     dot_qo += bit * rotated_qq[i];
+    // }
+    // this is the scheme for popcount
+    const size_t di_8b = (d + 7) / 8;
+    const size_t di_64b = (di_8b / 8) * 8;
+    uint64_t dot_qo = 0;
+    for (size_t j = 0; j < qb; j++) {
+        const uint8_t* query_j = rearranged_rotated_qq.data() + j * di_8b;
+        // process 64-bit popcounts
+        uint64_t count_dot = 0;
+        for (size_t i = 0; i < di_64b; i += 8) {
+            const auto qv = *(const uint64_t*)(query_j + i);
+            const auto yv = *(const uint64_t*)(binary_data + i);
+            count_dot += __builtin_popcountll(qv & yv);
+        }
+        // process leftovers
+        for (size_t i = di_64b; i < di_8b; i++) {
+            const auto qv = *(query_j + i);
+            const auto yv = *(binary_data + i);
+            count_dot += __builtin_popcount(qv & yv);
+        }
+        dot_qo += (count_dot << j);
+    }
+    // It was a willful decision (after the discussion) to not to pre-cache
+    //   the sum of all bits, just in order to reduce the overhead per vector.
+    uint64_t sum_q = 0;
+    {
+        // process 64-bit popcounts
+        for (size_t i = 0; i < di_64b; i += 8) {
+            const auto yv = *(const uint64_t*)(binary_data + i);
+            sum_q += __builtin_popcountll(yv);
+        }
+        // process leftovers
+        for (size_t i = di_64b; i < di_8b; i++) {
+            const auto yv = *(binary_data + i);
+            sum_q += __builtin_popcount(yv);
+        }
+    }
+    float final_dot = 0;
+    // dot-product itself
+    final_dot += query_fac.c1 * dot_qo;
+    // normalizer coefficients
+    final_dot += query_fac.c2 * sum_q;
+    // normalizer coefficients
+    final_dot -= query_fac.c34;
+    // this is ||or - c||^2 - (IP ? ||or||^2 : 0)
+    const float or_c_l2sqr = fac->or_minus_c_l2sqr;
+    // pre_dist = ||or - c||^2 + ||qr - c||^2 -
+    //     2 * ||or - c|| * ||qr - c|| * <q,o> - (IP ? ||or||^2 : 0)
+    const float pre_dist = or_c_l2sqr + query_fac.qr_to_c_L2sqr -
+            2 * fac->dp_multiplier * final_dot;
+    if (metric_type == MetricType::METRIC_L2) {
+        // ||or - q||^ 2
+        return pre_dist;
+    } else {
+        // metric == MetricType::METRIC_INNER_PRODUCT
+        // this is ||q||^2
+        const float query_norm_sqr = query_fac.qr_norm_L2sqr;
+        // 2 * (or, q) = (||or - q||^2 - ||q||^2 - ||or||^2)
+        return -0.5f * (pre_dist - query_norm_sqr);
+    }
+}
+void RaBitDistanceComputerQ::set_query(const float* x) {
+    FAISS_ASSERT(x != nullptr);
+    FAISS_ASSERT(
+            (metric_type == MetricType::METRIC_L2 ||
+             metric_type == MetricType::METRIC_INNER_PRODUCT));
+    // compute the distance from the query to the centroid
+    if (centroid != nullptr) {
+        query_fac.qr_to_c_L2sqr = fvec_L2sqr(x, centroid, d);
+    } else {
+        query_fac.qr_to_c_L2sqr = fvec_norm_L2sqr(x, d);
+    }
+    // allocate space
+    rotated_qq.resize(d);
+    // rotate the query
+    std::vector<float> rotated_q(d);
+    for (size_t i = 0; i < d; i++) {
+        rotated_q[i] = x[i] - ((centroid == nullptr) ? 0 : centroid[i]);
+    }
+    // compute some numbers
+    const float inv_d = (d == 0) ? 1.0f : (1.0f / std::sqrt((float)d));
+    // quantize the query. compute min and max
+    float v_min = std::numeric_limits<float>::max();
+    float v_max = std::numeric_limits<float>::lowest();
+    for (size_t i = 0; i < d; i++) {
+        const float v_q = rotated_q[i];
+        v_min = std::min(v_min, v_q);
+        v_max = std::max(v_max, v_q);
+    }
+    const float pow_2_qb = 1 << qb;
+    const float delta = (v_max - v_min) / (pow_2_qb - 1);
+    const float inv_delta = 1.0f / delta;
+    size_t sum_qq = 0;
+    for (int32_t i = 0; i < d; i++) {
+        const float v_q = rotated_q[i];
+        // a default non-randomized SQ
+        const int v_qq = std::round((v_q - v_min) * inv_delta);
+        rotated_qq[i] = std::min(255, std::max(0, v_qq));
+        sum_qq += v_qq;
+    }
+    // rearrange the query vector
+    popcount_aligned_dim = ((d + 7) / 8) * 8;
+    size_t offset = (d + 7) / 8;
+    rearranged_rotated_qq.resize(offset * qb);
+    std::fill(rearranged_rotated_qq.begin(), rearranged_rotated_qq.end(), 0);
+    for (size_t idim = 0; idim < d; idim++) {
+        for (size_t iv = 0; iv < qb; iv++) {
+            const bool bit = ((rotated_qq[idim] & (1 << iv)) != 0);
+            rearranged_rotated_qq[iv * offset + idim / 8] |=
+                    bit ? (1 << (idim % 8)) : 0;
+        }
+    }
+    query_fac.c1 = 2 * delta * inv_d;
+    query_fac.c2 = 2 * v_min * inv_d;
+    query_fac.c34 = inv_d * (delta * sum_qq + d * v_min);
+    if (metric_type == MetricType::METRIC_INNER_PRODUCT) {
+        // precompute if needed
+        query_fac.qr_norm_L2sqr = fvec_norm_L2sqr(x, d);
+    }
+}
+FlatCodesDistanceComputer* RaBitQuantizer::get_distance_computer(
+        uint8_t qb,
+        const float* centroid_in) const {
+    if (qb == 0) {
+        auto dc = std::make_unique<RaBitDistanceComputerNotQ>();
+        dc->metric_type = metric_type;
+        dc->d = d;
+        dc->centroid = centroid_in;
+        return dc.release();
+    } else {
+        auto dc = std::make_unique<RaBitDistanceComputerQ>();
+        dc->metric_type = metric_type;
+        dc->d = d;
+        dc->centroid = centroid_in;
+        dc->qb = qb;
+        return dc.release();
+    }
+}
+} // namespace faiss

data/vendor/faiss/faiss/impl/RaBitQuantizer.h ADDED Viewed

@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <cstddef>
+#include <cstdint>
+#include <faiss/MetricType.h>
+#include <faiss/impl/DistanceComputer.h>
+#include <faiss/impl/Quantizer.h>
+namespace faiss {
+// the reference implementation of the https://arxiv.org/pdf/2405.12497
+//   Jianyang Gao, Cheng Long, "RaBitQ: Quantizing High-Dimensional Vectors
+//   with a Theoretical Error Bound for Approximate Nearest Neighbor Search".
+//
+// It is assumed that the Random Matrix Rotation is performed externally.
+struct RaBitQuantizer : Quantizer {
+    // all RaBitQ operations are provided against a centroid, which needs
+    //   to be provided Externally (!). Nullptr value implies that the centroid
+    //   consists of zero values.
+    // This is the default value that can be customized using XYZ_core() calls.
+    //   Such a customization is needed for IVF calls.
+    //
+    // This particular pointer will NOT be serialized.
+    float* centroid = nullptr;
+    // RaBitQ codes computations are independent from a metric. But it is needed
+    //   to store some additional fp32 constants together with a quantized code.
+    //   A decision was made to make this quantizer as space efficient as
+    //   possible. Thus, a quantizer has to introduce a metric.
+    MetricType metric_type = MetricType::METRIC_L2;
+    RaBitQuantizer(size_t d = 0, MetricType metric = MetricType::METRIC_L2);
+    void train(size_t n, const float* x) override;
+    // every vector is expected to take (d + 7) / 8 + sizeof(FactorsData) bytes,
+    void compute_codes(const float* x, uint8_t* codes, size_t n) const override;
+    void compute_codes_core(
+            const float* x,
+            uint8_t* codes,
+            size_t n,
+            const float* centroid_in) const;
+    // The decode output is Heavily geared towards maintaining the IP, not L2.
+    // This means that the reconstructed codes maybe less accurate than one may
+    //   expect, if one computes an L2 distance between a reconstructed code and
+    //   the corresponding original vector.
+    // But value of the dot product between a query and the original vector
+    //   might be very close to the value of the dot product between a query and
+    //   the reconstructed code.
+    // Basically, it seems to be related to the distributions of values, not
+    //   values.
+    void decode(const uint8_t* codes, float* x, size_t n) const override;
+    void decode_core(
+            const uint8_t* codes,
+            float* x,
+            size_t n,
+            const float* centroid_in) const;
+    // returns the distance computer.
+    // specify qb = 0 to get an DC that does not quantize a query
+    // specify qb > 0 to have SQ qb-bits query
+    FlatCodesDistanceComputer* get_distance_computer(
+            uint8_t qb,
+            const float* centroid_in = nullptr) const;
+};
+} // namespace faiss

data/vendor/faiss/faiss/impl/ResultHandler.h CHANGED Viewed

@@ -534,7 +534,7 @@ struct RangeSearchBlockResultHandler : BlockResultHandler<C, use_sel> {
             try {
                 // finalize the partial result
                 pres.finalize();
-            } catch (const faiss::FaissException& e) {
+            } catch ([[maybe_unused]] const faiss::FaissException& e) {
                 // Do nothing if allocation fails in finalizing partial results.
 #ifndef NDEBUG
                 std::cerr << e.what() << std::endl;
@@ -598,7 +598,7 @@ struct RangeSearchBlockResultHandler : BlockResultHandler<C, use_sel> {
             if (partial_results.size() > 0) {
                 RangeSearchPartialResult::merge(partial_results);
             }
-        } catch (const faiss::FaissException& e) {
+        } catch ([[maybe_unused]] const faiss::FaissException& e) {
             // Do nothing if allocation fails in merge.
 #ifndef NDEBUG
             std::cerr << e.what() << std::endl;

data/vendor/faiss/faiss/impl/code_distance/code_distance-sve.h CHANGED Viewed

@@ -14,6 +14,7 @@
 #include <tuple>
 #include <type_traits>
+#include <faiss/impl/ProductQuantizer.h>
 #include <faiss/impl/code_distance/code_distance-generic.h>
 namespace faiss {
@@ -48,7 +49,7 @@ static inline void distance_codes_kernel(
     partialSum = svadd_f32_m(pg, partialSum, collected);
 }
-static float distance_single_code_sve_for_small_m(
+static inline float distance_single_code_sve_for_small_m(
         // the product quantizer
         const size_t M,
         // precomputed distances, layout (M, ksub)
@@ -196,7 +197,7 @@ distance_four_codes_sve(
             result3);
 }
-static void distance_four_codes_sve_for_small_m(
+static inline void distance_four_codes_sve_for_small_m(
         // the product quantizer
         const size_t M,
         // precomputed distances, layout (M, ksub)
@@ -217,8 +218,6 @@ static void distance_four_codes_sve_for_small_m(
     const auto offsets_0 = svindex_u32(0, static_cast<uint32_t>(ksub));
-    const auto quad_lanes = svcntw();
     // loop
     const auto pg = svwhilelt_b32_u64(0, M);