RubyGems - faiss - Versions diffs - 0.4.3 → 0.5.0 - Mend

faiss 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (152) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/ext/faiss/index.cpp +25 -6
data/ext/faiss/index_binary.cpp +17 -4
data/ext/faiss/kmeans.cpp +6 -6
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +2 -3
data/vendor/faiss/faiss/AutoTune.h +1 -1
data/vendor/faiss/faiss/Clustering.cpp +2 -2
data/vendor/faiss/faiss/Clustering.h +2 -2
data/vendor/faiss/faiss/IVFlib.cpp +1 -2
data/vendor/faiss/faiss/IVFlib.h +1 -1
data/vendor/faiss/faiss/Index.h +10 -10
data/vendor/faiss/faiss/Index2Layer.cpp +1 -1
data/vendor/faiss/faiss/Index2Layer.h +2 -2
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +9 -4
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +5 -1
data/vendor/faiss/faiss/IndexBinary.h +7 -7
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +3 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHash.cpp +3 -3
data/vendor/faiss/faiss/IndexBinaryHash.h +5 -5
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +7 -6
data/vendor/faiss/faiss/IndexFastScan.cpp +125 -49
data/vendor/faiss/faiss/IndexFastScan.h +107 -7
data/vendor/faiss/faiss/IndexFlat.h +1 -1
data/vendor/faiss/faiss/IndexHNSW.cpp +3 -1
data/vendor/faiss/faiss/IndexHNSW.h +1 -1
data/vendor/faiss/faiss/IndexIDMap.cpp +14 -13
data/vendor/faiss/faiss/IndexIDMap.h +6 -6
data/vendor/faiss/faiss/IndexIVF.cpp +1 -1
data/vendor/faiss/faiss/IndexIVF.h +5 -5
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +1 -1
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +9 -3
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +3 -1
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +176 -90
data/vendor/faiss/faiss/IndexIVFFastScan.h +173 -18
data/vendor/faiss/faiss/IndexIVFFlat.cpp +1 -0
data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +366 -0
data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +64 -0
data/vendor/faiss/faiss/IndexIVFPQ.cpp +3 -1
data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +134 -2
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -1
data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +13 -6
data/vendor/faiss/faiss/IndexIVFRaBitQ.h +1 -0
data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +650 -0
data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +216 -0
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
data/vendor/faiss/faiss/IndexNSG.cpp +1 -1
data/vendor/faiss/faiss/IndexNeuralNetCodec.h +1 -1
data/vendor/faiss/faiss/IndexPQ.h +1 -1
data/vendor/faiss/faiss/IndexPQFastScan.cpp +6 -2
data/vendor/faiss/faiss/IndexPQFastScan.h +5 -1
data/vendor/faiss/faiss/IndexRaBitQ.cpp +13 -10
data/vendor/faiss/faiss/IndexRaBitQ.h +7 -2
data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +586 -0
data/vendor/faiss/faiss/IndexRaBitQFastScan.h +149 -0
data/vendor/faiss/faiss/IndexShards.cpp +1 -1
data/vendor/faiss/faiss/MatrixStats.cpp +3 -3
data/vendor/faiss/faiss/MetricType.h +1 -1
data/vendor/faiss/faiss/VectorTransform.h +2 -2
data/vendor/faiss/faiss/clone_index.cpp +3 -1
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
data/vendor/faiss/faiss/gpu/GpuIndex.h +11 -11
data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +10 -6
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIcmEncoder.cpp +7 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +1 -1
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +1 -1
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +2 -2
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -1
data/vendor/faiss/faiss/impl/CodePacker.h +2 -2
data/vendor/faiss/faiss/impl/DistanceComputer.h +3 -3
data/vendor/faiss/faiss/impl/FastScanDistancePostProcessing.h +53 -0
data/vendor/faiss/faiss/impl/HNSW.cpp +1 -1
data/vendor/faiss/faiss/impl/HNSW.h +4 -4
data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -2
data/vendor/faiss/faiss/impl/IDSelector.h +1 -1
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +4 -4
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/LookupTableScaler.h +1 -1
data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
data/vendor/faiss/faiss/impl/NNDescent.h +2 -2
data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
data/vendor/faiss/faiss/impl/PanoramaStats.cpp +33 -0
data/vendor/faiss/faiss/impl/PanoramaStats.h +38 -0
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +5 -5
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +1 -1
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +2 -0
data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +246 -0
data/vendor/faiss/faiss/impl/RaBitQUtils.h +153 -0
data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +54 -158
data/vendor/faiss/faiss/impl/RaBitQuantizer.h +2 -1
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/ResultHandler.h +4 -4
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1 -1
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +7 -4
data/vendor/faiss/faiss/impl/index_read.cpp +87 -3
data/vendor/faiss/faiss/impl/index_write.cpp +73 -3
data/vendor/faiss/faiss/impl/io.cpp +2 -2
data/vendor/faiss/faiss/impl/io.h +4 -4
data/vendor/faiss/faiss/impl/kmeans1d.cpp +1 -1
data/vendor/faiss/faiss/impl/kmeans1d.h +1 -1
data/vendor/faiss/faiss/impl/lattice_Zn.h +2 -2
data/vendor/faiss/faiss/impl/mapped_io.cpp +2 -2
data/vendor/faiss/faiss/impl/mapped_io.h +4 -3
data/vendor/faiss/faiss/impl/maybe_owned_vector.h +8 -1
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +30 -4
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +14 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +5 -6
data/vendor/faiss/faiss/impl/simd_result_handlers.h +55 -11
data/vendor/faiss/faiss/impl/zerocopy_io.h +1 -1
data/vendor/faiss/faiss/index_factory.cpp +43 -1
data/vendor/faiss/faiss/index_factory.h +1 -1
data/vendor/faiss/faiss/index_io.h +1 -1
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +205 -0
data/vendor/faiss/faiss/invlists/InvertedLists.h +62 -0
data/vendor/faiss/faiss/utils/AlignedTable.h +1 -1
data/vendor/faiss/faiss/utils/Heap.cpp +2 -2
data/vendor/faiss/faiss/utils/Heap.h +3 -3
data/vendor/faiss/faiss/utils/NeuralNet.cpp +1 -1
data/vendor/faiss/faiss/utils/NeuralNet.h +3 -3
data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +2 -2
data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +2 -2
data/vendor/faiss/faiss/utils/approx_topk/mode.h +1 -1
data/vendor/faiss/faiss/utils/distances.h +2 -2
data/vendor/faiss/faiss/utils/extra_distances-inl.h +3 -1
data/vendor/faiss/faiss/utils/hamming-inl.h +2 -0
data/vendor/faiss/faiss/utils/hamming.cpp +7 -6
data/vendor/faiss/faiss/utils/hamming.h +1 -1
data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -2
data/vendor/faiss/faiss/utils/partitioning.cpp +5 -5
data/vendor/faiss/faiss/utils/partitioning.h +2 -2
data/vendor/faiss/faiss/utils/rabitq_simd.h +222 -336
data/vendor/faiss/faiss/utils/random.cpp +1 -1
data/vendor/faiss/faiss/utils/simdlib_avx2.h +1 -1
data/vendor/faiss/faiss/utils/simdlib_avx512.h +1 -1
data/vendor/faiss/faiss/utils/simdlib_neon.h +2 -2
data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +1 -1
data/vendor/faiss/faiss/utils/utils.cpp +5 -2
data/vendor/faiss/faiss/utils/utils.h +2 -2
metadata +12 -1

data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp CHANGED Viewed

@@ -8,31 +8,20 @@
 #include <faiss/impl/RaBitQuantizer.h>
 #include <faiss/impl/FaissAssert.h>
+#include <faiss/impl/RaBitQUtils.h>
 #include <faiss/utils/distances.h>
 #include <faiss/utils/rabitq_simd.h>
 #include <algorithm>
 #include <cmath>
 #include <cstring>
-#include <limits>
 #include <memory>
 #include <vector>
 namespace faiss {
-struct FactorsData {
-    // ||or - c||^2 - ((metric==IP) ? ||or||^2 : 0)
-    float or_minus_c_l2sqr = 0;
-    float dp_multiplier = 0;
-};
-struct QueryFactorsData {
-    float c1 = 0;
-    float c2 = 0;
-    float c34 = 0;
-    float qr_to_c_L2sqr = 0;
-    float qr_norm_L2sqr = 0;
-};
+// Import shared utilities from RaBitQUtils
+using rabitq_utils::FactorsData;
+using rabitq_utils::QueryFactorsData;
 static size_t get_code_size(const size_t d) {
     return (d + 7) / 8 + sizeof(FactorsData);
@@ -65,19 +54,9 @@ void RaBitQuantizer::compute_codes_core(
         return;
     }
-    // compute some helper constants
-    const float inv_d_sqrt = (d == 0) ? 1.0f : (1.0f / std::sqrt((float)d));
     // compute codes
 #pragma omp parallel for if (n > 1000)
     for (int64_t i = 0; i < n; i++) {
-        // ||or - c||^2
-        float norm_L2sqr = 0;
-        // ||or||^2, which is equal to ||P(or)||^2 and ||P^(-1)(or)||^2
-        float or_L2sqr = 0;
-        // dot product
-        float dp_oO = 0;
         // the code
         uint8_t* code = codes + i * code_size;
         FactorsData* fac = reinterpret_cast<FactorsData*>(code + (d + 7) / 8);
@@ -87,46 +66,25 @@ void RaBitQuantizer::compute_codes_core(
             memset(code, 0, code_size);
         }
-        for (size_t j = 0; j < d; j++) {
-            const float or_minus_c = x[i * d + j] -
-                    ((centroid_in == nullptr) ? 0 : centroid_in[j]);
-            norm_L2sqr += or_minus_c * or_minus_c;
-            or_L2sqr += x[i * d + j] * x[i * d + j];
+        const float* x_row = x + i * d;
-            const bool xb = (or_minus_c > 0);
+        // Use shared utilities for computing factors
+        *fac = rabitq_utils::compute_vector_factors(
+                x_row, d, centroid_in, metric_type);
-            dp_oO += xb ? or_minus_c : (-or_minus_c);
+        // Pack bits into standard RaBitQ format
+        for (size_t j = 0; j < d; j++) {
+            const float x_val = x_row[j];
+            const float centroid_val =
+                    (centroid_in == nullptr) ? 0.0f : centroid_in[j];
+            const float or_minus_c = x_val - centroid_val;
+            const bool xb = (or_minus_c > 0.0f);
             // store the output data
-            if (code != nullptr) {
-                if (xb) {
-                    // enable a particular bit
-                    code[j / 8] |= (1 << (j % 8));
-                }
+            if (code != nullptr && xb) {
+                rabitq_utils::set_bit_standard(code, j);
             }
         }
-        // compute factors
-        // compute the inverse norm
-        const float inv_norm_L2 =
-                (std::abs(norm_L2sqr) < std::numeric_limits<float>::epsilon())
-                ? 1.0f
-                : (1.0f / std::sqrt(norm_L2sqr));
-        dp_oO *= inv_norm_L2;
-        dp_oO *= inv_d_sqrt;
-        const float inv_dp_oO =
-                (std::abs(dp_oO) < std::numeric_limits<float>::epsilon())
-                ? 1.0f
-                : (1.0f / dp_oO);
-        fac->or_minus_c_l2sqr = norm_L2sqr;
-        if (metric_type == MetricType::METRIC_INNER_PRODUCT) {
-            fac->or_minus_c_l2sqr -= or_L2sqr;
-        }
-        fac->dp_multiplier = inv_dp_oO * std::sqrt(norm_L2sqr);
     }
 }
@@ -310,6 +268,7 @@ struct RaBitDistanceComputerQ : RaBitDistanceComputer {
     // the number of bits for SQ quantization of the query (qb > 0)
     uint8_t qb = 8;
+    bool centered = false;
     // the smallest value divisible by 8 that is not smaller than dim
     size_t popcount_aligned_dim = 0;
@@ -329,57 +288,35 @@ float RaBitDistanceComputerQ::distance_to_code(const uint8_t* code) {
              metric_type == MetricType::METRIC_INNER_PRODUCT));
     // split the code into parts
+    size_t size = (d + 7) / 8;
     const uint8_t* binary_data = code;
-    const FactorsData* fac =
-            reinterpret_cast<const FactorsData*>(code + (d + 7) / 8);
+    const FactorsData* fac = reinterpret_cast<const FactorsData*>(code + size);
-    // // this is the baseline code
-    // //
-    // // compute <q,o> using integers
-    // size_t dot_qo = 0;
-    // for (size_t i = 0; i < d; i++) {
-    //     // extract i-th bit
-    //     const uint8_t masker = (1 << (i % 8));
-    //     const uint8_t bit = ((binary_data[i / 8] & masker) == masker) ? 1 :
-    //     0;
-    //
-    //     // accumulate dp
-    //     dot_qo += bit * rotated_qq[i];
-    // }
-    // this is the scheme for popcount
-    const size_t di_8b = (d + 7) / 8;
-    const size_t di_64b = (di_8b / 8) * 8;
-    // Use the optimized popcount function from rabitq_simd.h
-    float dot_qo =
-            rabitq_dp_popcnt(rearranged_rotated_qq.data(), binary_data, d, qb);
+    // this is ||or - c||^2 - (IP ? ||or||^2 : 0)
+    float final_dot = 0;
+    if (centered) {
+        int64_t int_dot = ((1 << qb) - 1) * d;
+        int_dot -= 2 *
+                rabitq::bitwise_xor_dot_product(
+                           rearranged_rotated_qq.data(), binary_data, size, qb);
+        final_dot += int_dot * query_fac.int_dot_scale;
+    } else {
+        // See RaBitDistanceComputerNotQ::distance_to_code() for baseline code.
+        auto dot_qo = rabitq::bitwise_and_dot_product(
+                rearranged_rotated_qq.data(), binary_data, size, qb);
-    // It was a willful decision (after the discussion) to not to pre-cache
-    //   the sum of all bits, just in order to reduce the overhead per vector.
-    uint64_t sum_q = 0;
-    {
+        // It was a willful decision (after the discussion) to not to pre-cache
+        // the sum of all bits, just in order to reduce the overhead per vector.
         // process 64-bit popcounts
-        for (size_t i = 0; i < di_64b; i += 8) {
-            const auto yv = *(const uint64_t*)(binary_data + i);
-            sum_q += __builtin_popcountll(yv);
-        }
-        // process leftovers
-        for (size_t i = di_64b; i < di_8b; i++) {
-            const auto yv = *(binary_data + i);
-            sum_q += __builtin_popcount(yv);
-        }
+        auto sum_q = rabitq::popcount(binary_data, size);
+        // dot-product itself
+        final_dot += query_fac.c1 * dot_qo;
+        // normalizer coefficients
+        final_dot += query_fac.c2 * sum_q;
+        // normalizer coefficients
+        final_dot -= query_fac.c34;
     }
-    float final_dot = 0;
-    // dot-product itself
-    final_dot += query_fac.c1 * dot_qo;
-    // normalizer coefficients
-    final_dot += query_fac.c2 * sum_q;
-    // normalizer coefficients
-    final_dot -= query_fac.c34;
     // this is ||or - c||^2 - (IP ? ||or||^2 : 0)
     const float or_c_l2sqr = fac->or_minus_c_l2sqr;
@@ -402,57 +339,23 @@ float RaBitDistanceComputerQ::distance_to_code(const uint8_t* code) {
     }
 }
+// Use shared constant from RaBitQUtils
+using rabitq_utils::Z_MAX_BY_QB;
 void RaBitDistanceComputerQ::set_query(const float* x) {
     FAISS_ASSERT(x != nullptr);
     FAISS_ASSERT(
             (metric_type == MetricType::METRIC_L2 ||
              metric_type == MetricType::METRIC_INNER_PRODUCT));
+    FAISS_THROW_IF_NOT(qb <= 8);
+    FAISS_THROW_IF_NOT(qb > 0);
-    // compute the distance from the query to the centroid
-    if (centroid != nullptr) {
-        query_fac.qr_to_c_L2sqr = fvec_L2sqr(x, centroid, d);
-    } else {
-        query_fac.qr_to_c_L2sqr = fvec_norm_L2sqr(x, d);
-    }
-    // allocate space
-    rotated_qq.resize(d);
-    // rotate the query
-    std::vector<float> rotated_q(d);
-    for (size_t i = 0; i < d; i++) {
-        rotated_q[i] = x[i] - ((centroid == nullptr) ? 0 : centroid[i]);
-    }
-    // compute some numbers
-    const float inv_d = (d == 0) ? 1.0f : (1.0f / std::sqrt((float)d));
-    // quantize the query. compute min and max
-    float v_min = std::numeric_limits<float>::max();
-    float v_max = std::numeric_limits<float>::lowest();
-    for (size_t i = 0; i < d; i++) {
-        const float v_q = rotated_q[i];
-        v_min = std::min(v_min, v_q);
-        v_max = std::max(v_max, v_q);
-    }
-    const float pow_2_qb = 1 << qb;
-    const float delta = (v_max - v_min) / (pow_2_qb - 1);
-    const float inv_delta = 1.0f / delta;
-    size_t sum_qq = 0;
-    for (int32_t i = 0; i < d; i++) {
-        const float v_q = rotated_q[i];
-        // a default non-randomized SQ
-        const int v_qq = std::round((v_q - v_min) * inv_delta);
-        rotated_qq[i] = std::min(255, std::max(0, v_qq));
-        sum_qq += v_qq;
-    }
+    // Use shared utilities for core query factor computation
+    std::vector<float> rotated_q;
+    query_fac = rabitq_utils::compute_query_factors(
+            x, d, centroid, qb, centered, metric_type, rotated_q, rotated_qq);
-    // rearrange the query vector
+    // Rearrange the query vector for SIMD operations (RaBitQuantizer-specific)
     popcount_aligned_dim = ((d + 7) / 8) * 8;
     size_t offset = (d + 7) / 8;
@@ -466,20 +369,12 @@ void RaBitDistanceComputerQ::set_query(const float* x) {
                     bit ? (1 << (idim % 8)) : 0;
         }
     }
-    query_fac.c1 = 2 * delta * inv_d;
-    query_fac.c2 = 2 * v_min * inv_d;
-    query_fac.c34 = inv_d * (delta * sum_qq + d * v_min);
-    if (metric_type == MetricType::METRIC_INNER_PRODUCT) {
-        // precompute if needed
-        query_fac.qr_norm_L2sqr = fvec_norm_L2sqr(x, d);
-    }
 }
 FlatCodesDistanceComputer* RaBitQuantizer::get_distance_computer(
         uint8_t qb,
-        const float* centroid_in) const {
+        const float* centroid_in,
+        bool centered) const {
     if (qb == 0) {
         auto dc = std::make_unique<RaBitDistanceComputerNotQ>();
         dc->metric_type = metric_type;
@@ -493,6 +388,7 @@ FlatCodesDistanceComputer* RaBitQuantizer::get_distance_computer(
         dc->d = d;
         dc->centroid = centroid_in;
         dc->qb = qb;
+        dc->centered = centered;
         return dc.release();
     }

data/vendor/faiss/faiss/impl/RaBitQuantizer.h CHANGED Viewed

@@ -72,7 +72,8 @@ struct RaBitQuantizer : Quantizer {
     // specify qb > 0 to have SQ qb-bits query
     FlatCodesDistanceComputer* get_distance_computer(
             uint8_t qb,
-            const float* centroid_in = nullptr) const;
+            const float* centroid_in = nullptr,
+            bool centered = false) const;
 };
 } // namespace faiss

data/vendor/faiss/faiss/impl/ResidualQuantizer.h CHANGED Viewed

@@ -49,7 +49,7 @@ struct ResidualQuantizer : AdditiveQuantizer {
      *  first element of the beam (faster but less accurate) */
     static const int Train_top_beam = 1024;
-    /** set this bit to *not* autmatically compute the codebook tables
+    /** set this bit to *not* automatically compute the codebook tables
      * after training */
     static const int Skip_codebook_tables = 2048;

data/vendor/faiss/faiss/impl/ResultHandler.h CHANGED Viewed

@@ -26,11 +26,11 @@ namespace faiss {
  * The classes below are intended to be used as template arguments
  * they handle results for batches of queries (size nq).
  * They can be called in two ways:
- * - by instanciating a SingleResultHandler that tracks results for a single
+ * - by instantiating a SingleResultHandler that tracks results for a single
  *   query
  * - with begin_multiple/add_results/end_multiple calls where a whole block of
  *   results is submitted
- * All classes are templated on C which to define wheter the min or the max of
+ * All classes are templated on C which to define whether the min or the max of
  * results is to be kept, and on sel, so that the codepaths for with / without
  * selector can be separated at compile time.
  *****************************************************************/
@@ -306,7 +306,7 @@ struct HeapBlockResultHandler : TopkBlockResultHandler<C, use_sel> {
  *
  * A reservoir is a result array of size capacity > n (number of requested
  * results) all results below a threshold are stored in an arbitrary order.
- *When the capacity is reached, a new threshold is chosen by partitionning
+ *When the capacity is reached, a new threshold is chosen by partitioning
  *the distance array.
  *****************************************************************/
@@ -572,7 +572,7 @@ struct RangeSearchBlockResultHandler : BlockResultHandler<C, use_sel> {
         RangeSearchPartialResult* pres;
         // there is one RangeSearchPartialResult structure per j0
         // (= block of columns of the large distance matrix)
-        // it is a bit tricky to find the poper PartialResult structure
+        // it is a bit tricky to find the proper PartialResult structure
         // because the inner loop is on db not on queries.
         if (pr < j0s.size() && j0 == j0s[pr]) {

data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp CHANGED Viewed

@@ -321,7 +321,7 @@ struct Codec6bit {
     static FAISS_ALWAYS_INLINE __m256
     decode_8_components(const uint8_t* code, int i) {
         // // Faster code for Intel CPUs or AMD Zen3+, just keeping it here
-        // // for the reference, maybe, it becomes used oned day.
+        // // for the reference, maybe, it becomes used one day.
         // const uint16_t* data16 = (const uint16_t*)(code + (i >> 2) * 3);
         // const uint32_t* data32 = (const uint32_t*)data16;
         // const uint64_t val = *data32 + ((uint64_t)data16[2] << 32);

data/vendor/faiss/faiss/impl/ScalarQuantizer.h CHANGED Viewed

@@ -40,7 +40,7 @@ struct ScalarQuantizer : Quantizer {
     QuantizerType qtype = QT_8bit;
     /** The uniform encoder can estimate the range of representable
-     * values of the unform encoder using different statistics. Here
+     * values of the uniform encoder using different statistics. Here
      * rs = rangestat_arg */
     // rangestat_arg.

data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h CHANGED Viewed

@@ -5,6 +5,8 @@
  * LICENSE file in the root directory of this source tree.
  */
+#pragma once
 #include <faiss/impl/FaissAssert.h>
 #include <exception>
 #include <iostream>
@@ -75,10 +77,11 @@ void ThreadedIndex<IndexT>::addIndex(IndexT* index) {
         }
     }
-    indices_.emplace_back(std::make_pair(
-            index,
-            std::unique_ptr<WorkerThread>(
-                    isThreaded_ ? new WorkerThread : nullptr)));
+    indices_.emplace_back(
+            std::make_pair(
+                    index,
+                    std::unique_ptr<WorkerThread>(
+                            isThreaded_ ? new WorkerThread : nullptr)));
     onAfterAddIndex(index);
 }

data/vendor/faiss/faiss/impl/index_read.cpp CHANGED Viewed

@@ -29,11 +29,13 @@
 #include <faiss/IndexIVFAdditiveQuantizer.h>
 #include <faiss/IndexIVFAdditiveQuantizerFastScan.h>
 #include <faiss/IndexIVFFlat.h>
+#include <faiss/IndexIVFFlatPanorama.h>
 #include <faiss/IndexIVFIndependentQuantizer.h>
 #include <faiss/IndexIVFPQ.h>
 #include <faiss/IndexIVFPQFastScan.h>
 #include <faiss/IndexIVFPQR.h>
 #include <faiss/IndexIVFRaBitQ.h>
+#include <faiss/IndexIVFRaBitQFastScan.h>
 #include <faiss/IndexIVFSpectralHash.h>
 #include <faiss/IndexLSH.h>
 #include <faiss/IndexLattice.h>
@@ -43,6 +45,7 @@
 #include <faiss/IndexPQFastScan.h>
 #include <faiss/IndexPreTransform.h>
 #include <faiss/IndexRaBitQ.h>
+#include <faiss/IndexRaBitQFastScan.h>
 #include <faiss/IndexRefine.h>
 #include <faiss/IndexRowwiseMinMax.h>
 #include <faiss/IndexScalarQuantizer.h>
@@ -68,9 +71,10 @@ namespace faiss {
  **************************************************************/
 // This is a baseline functionality for reading mmapped and zerocopied vector.
-// * if `beforeknown_size` is defined, then a size of the vector won't be read.
+// * if `beforeknown_size` is defined, then a size of the vector won't be
+// read.
 // * if `size_multiplier` is defined, then a size will be multiplied by it.
-// * returns true is the case was handled; ownerwise, false
+// * returns true is the case was handled; otherwise, false
 template <typename VectorT>
 bool read_vector_base(
         VectorT& target,
@@ -181,7 +185,7 @@ void read_vector(VectorT& target, IOReader* f) {
 // a replacement for READXBVECTOR
 template <typename VectorT>
 void read_xb_vector(VectorT& target, IOReader* f) {
-    // size is not known beforehand, nultiply the size 4x
+    // size is not known beforehand, multiply the size 4x
     if (read_vector_base<VectorT>(target, f, std::nullopt, 4)) {
         return;
     }
@@ -325,6 +329,34 @@ InvertedLists* read_InvertedLists(IOReader* f, int io_flags) {
                 "read_InvertedLists:"
                 " WARN! inverted lists not stored with IVF object\n");
         return nullptr;
+    } else if (h == fourcc("ilpn") && !(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
+        size_t nlist, code_size, n_levels;
+        READ1(nlist);
+        READ1(code_size);
+        READ1(n_levels);
+        auto ailp = new ArrayInvertedListsPanorama(nlist, code_size, n_levels);
+        std::vector<size_t> sizes(nlist);
+        read_ArrayInvertedLists_sizes(f, sizes);
+        for (size_t i = 0; i < nlist; i++) {
+            ailp->ids[i].resize(sizes[i]);
+            size_t num_elems =
+                    ((sizes[i] + ArrayInvertedListsPanorama::kBatchSize - 1) /
+                     ArrayInvertedListsPanorama::kBatchSize) *
+                    ArrayInvertedListsPanorama::kBatchSize;
+            ailp->codes[i].resize(num_elems * code_size);
+            ailp->cum_sums[i].resize(num_elems * (n_levels + 1));
+        }
+        for (size_t i = 0; i < nlist; i++) {
+            size_t n = sizes[i];
+            if (n > 0) {
+                read_vector_with_known_size(
+                        ailp->codes[i], f, ailp->codes[i].size());
+                read_vector_with_known_size(ailp->ids[i], f, n);
+                read_vector_with_known_size(
+                        ailp->cum_sums[i], f, ailp->cum_sums[i].size());
+            }
+        }
+        return ailp;
     } else if (h == fourcc("ilar") && !(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
         auto ails = new ArrayInvertedLists(0, 0);
         READ1(ails->nlist);
@@ -927,6 +959,13 @@ Index* read_index(IOReader* f, int io_flags) {
         }
         read_InvertedLists(ivfl, f, io_flags);
         idx = ivfl;
+    } else if (h == fourcc("IwPn")) {
+        IndexIVFFlatPanorama* ivfp = new IndexIVFFlatPanorama();
+        read_ivf_header(ivfp, f);
+        ivfp->code_size = ivfp->d * sizeof(float);
+        READ1(ivfp->n_levels);
+        read_InvertedLists(ivfp, f, io_flags);
+        idx = ivfp;
     } else if (h == fourcc("IwFl")) {
         IndexIVFFlat* ivfl = new IndexIVFFlat();
         read_ivf_header(ivfl, f);
@@ -1224,6 +1263,27 @@ Index* read_index(IOReader* f, int io_flags) {
         imm->own_fields = true;
         idx = imm;
+    } else if (h == fourcc("Irfs")) {
+        IndexRaBitQFastScan* idxqfs = new IndexRaBitQFastScan();
+        read_index_header(idxqfs, f);
+        read_RaBitQuantizer(&idxqfs->rabitq, f);
+        READVECTOR(idxqfs->center);
+        READ1(idxqfs->qb);
+        READVECTOR(idxqfs->factors_storage);
+        READ1(idxqfs->bbs);
+        READ1(idxqfs->ntotal2);
+        READ1(idxqfs->M2);
+        READ1(idxqfs->code_size);
+        // Need to initialize the FastScan base class fields
+        const size_t M_fastscan = (idxqfs->d + 3) / 4;
+        constexpr size_t nbits_fastscan = 4;
+        idxqfs->M = M_fastscan;
+        idxqfs->nbits = nbits_fastscan;
+        idxqfs->ksub = (1 << nbits_fastscan);
+        READVECTOR(idxqfs->codes);
+        idx = idxqfs;
     } else if (h == fourcc("Ixrq")) {
         IndexRaBitQ* idxq = new IndexRaBitQ();
         read_index_header(idxq, f);
@@ -1242,6 +1302,30 @@ Index* read_index(IOReader* f, int io_flags) {
         READ1(ivrq->qb);
         read_InvertedLists(ivrq, f, io_flags);
         idx = ivrq;
+    } else if (h == fourcc("Iwrf")) {
+        IndexIVFRaBitQFastScan* ivrqfs = new IndexIVFRaBitQFastScan();
+        read_ivf_header(ivrqfs, f);
+        read_RaBitQuantizer(&ivrqfs->rabitq, f);
+        READ1(ivrqfs->by_residual);
+        READ1(ivrqfs->code_size);
+        READ1(ivrqfs->bbs);
+        READ1(ivrqfs->qbs2);
+        READ1(ivrqfs->M2);
+        READ1(ivrqfs->implem);
+        READ1(ivrqfs->qb);
+        READ1(ivrqfs->centered);
+        READVECTOR(ivrqfs->factors_storage);
+        // Initialize FastScan base class fields
+        const size_t M_fastscan = (ivrqfs->d + 3) / 4;
+        constexpr size_t nbits_fastscan = 4;
+        ivrqfs->M = M_fastscan;
+        ivrqfs->nbits = nbits_fastscan;
+        ivrqfs->ksub = (1 << nbits_fastscan);
+        read_InvertedLists(ivrqfs, f, io_flags);
+        ivrqfs->init_code_packer();
+        idx = ivrqfs;
     } else {
         FAISS_THROW_FMT(
                 "Index type 0x%08x (\"%s\") not recognized",