RubyGems - faiss - Versions diffs - 0.4.2 → 0.5.0 - Mend

faiss 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +9 -0
data/ext/faiss/index.cpp +36 -10
data/ext/faiss/index_binary.cpp +19 -6
data/ext/faiss/kmeans.cpp +6 -6
data/ext/faiss/numo.hpp +273 -123
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +2 -3
data/vendor/faiss/faiss/AutoTune.h +1 -1
data/vendor/faiss/faiss/Clustering.cpp +2 -2
data/vendor/faiss/faiss/Clustering.h +2 -2
data/vendor/faiss/faiss/IVFlib.cpp +1 -2
data/vendor/faiss/faiss/IVFlib.h +1 -1
data/vendor/faiss/faiss/Index.h +10 -10
data/vendor/faiss/faiss/Index2Layer.cpp +1 -1
data/vendor/faiss/faiss/Index2Layer.h +2 -2
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +9 -4
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +5 -1
data/vendor/faiss/faiss/IndexBinary.h +7 -7
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +3 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHash.cpp +3 -3
data/vendor/faiss/faiss/IndexBinaryHash.h +5 -5
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +7 -6
data/vendor/faiss/faiss/IndexFastScan.cpp +125 -49
data/vendor/faiss/faiss/IndexFastScan.h +107 -7
data/vendor/faiss/faiss/IndexFlat.h +1 -1
data/vendor/faiss/faiss/IndexHNSW.cpp +3 -1
data/vendor/faiss/faiss/IndexHNSW.h +1 -1
data/vendor/faiss/faiss/IndexIDMap.cpp +14 -13
data/vendor/faiss/faiss/IndexIDMap.h +6 -6
data/vendor/faiss/faiss/IndexIVF.cpp +1 -1
data/vendor/faiss/faiss/IndexIVF.h +5 -5
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +1 -1
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +9 -3
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +3 -1
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +176 -90
data/vendor/faiss/faiss/IndexIVFFastScan.h +173 -18
data/vendor/faiss/faiss/IndexIVFFlat.cpp +1 -0
data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +366 -0
data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +64 -0
data/vendor/faiss/faiss/IndexIVFPQ.cpp +3 -1
data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +134 -2
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -1
data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +13 -6
data/vendor/faiss/faiss/IndexIVFRaBitQ.h +1 -0
data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +650 -0
data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +216 -0
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
data/vendor/faiss/faiss/IndexNSG.cpp +1 -1
data/vendor/faiss/faiss/IndexNeuralNetCodec.h +1 -1
data/vendor/faiss/faiss/IndexPQ.h +1 -1
data/vendor/faiss/faiss/IndexPQFastScan.cpp +6 -2
data/vendor/faiss/faiss/IndexPQFastScan.h +5 -1
data/vendor/faiss/faiss/IndexRaBitQ.cpp +13 -10
data/vendor/faiss/faiss/IndexRaBitQ.h +7 -2
data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +586 -0
data/vendor/faiss/faiss/IndexRaBitQFastScan.h +149 -0
data/vendor/faiss/faiss/IndexShards.cpp +1 -1
data/vendor/faiss/faiss/MatrixStats.cpp +3 -3
data/vendor/faiss/faiss/MetricType.h +1 -1
data/vendor/faiss/faiss/VectorTransform.h +2 -2
data/vendor/faiss/faiss/clone_index.cpp +3 -1
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
data/vendor/faiss/faiss/gpu/GpuIndex.h +11 -11
data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +10 -6
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIcmEncoder.cpp +7 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +1 -1
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +1 -1
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +2 -2
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -1
data/vendor/faiss/faiss/impl/CodePacker.h +2 -2
data/vendor/faiss/faiss/impl/DistanceComputer.h +3 -3
data/vendor/faiss/faiss/impl/FastScanDistancePostProcessing.h +53 -0
data/vendor/faiss/faiss/impl/HNSW.cpp +1 -1
data/vendor/faiss/faiss/impl/HNSW.h +4 -4
data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -2
data/vendor/faiss/faiss/impl/IDSelector.h +1 -1
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +4 -4
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/LookupTableScaler.h +1 -1
data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
data/vendor/faiss/faiss/impl/NNDescent.h +2 -2
data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
data/vendor/faiss/faiss/impl/PanoramaStats.cpp +33 -0
data/vendor/faiss/faiss/impl/PanoramaStats.h +38 -0
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +5 -5
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +1 -1
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +2 -0
data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +246 -0
data/vendor/faiss/faiss/impl/RaBitQUtils.h +153 -0
data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +54 -158
data/vendor/faiss/faiss/impl/RaBitQuantizer.h +2 -1
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/ResultHandler.h +4 -4
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1 -1
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +7 -4
data/vendor/faiss/faiss/impl/index_read.cpp +87 -3
data/vendor/faiss/faiss/impl/index_write.cpp +73 -3
data/vendor/faiss/faiss/impl/io.cpp +2 -2
data/vendor/faiss/faiss/impl/io.h +4 -4
data/vendor/faiss/faiss/impl/kmeans1d.cpp +1 -1
data/vendor/faiss/faiss/impl/kmeans1d.h +1 -1
data/vendor/faiss/faiss/impl/lattice_Zn.h +2 -2
data/vendor/faiss/faiss/impl/mapped_io.cpp +2 -2
data/vendor/faiss/faiss/impl/mapped_io.h +4 -3
data/vendor/faiss/faiss/impl/maybe_owned_vector.h +8 -1
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +30 -4
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +14 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +5 -6
data/vendor/faiss/faiss/impl/simd_result_handlers.h +55 -11
data/vendor/faiss/faiss/impl/zerocopy_io.h +1 -1
data/vendor/faiss/faiss/index_factory.cpp +43 -1
data/vendor/faiss/faiss/index_factory.h +1 -1
data/vendor/faiss/faiss/index_io.h +1 -1
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +205 -0
data/vendor/faiss/faiss/invlists/InvertedLists.h +62 -0
data/vendor/faiss/faiss/utils/AlignedTable.h +1 -1
data/vendor/faiss/faiss/utils/Heap.cpp +2 -2
data/vendor/faiss/faiss/utils/Heap.h +3 -3
data/vendor/faiss/faiss/utils/NeuralNet.cpp +1 -1
data/vendor/faiss/faiss/utils/NeuralNet.h +3 -3
data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +2 -2
data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +2 -2
data/vendor/faiss/faiss/utils/approx_topk/mode.h +1 -1
data/vendor/faiss/faiss/utils/distances.h +2 -2
data/vendor/faiss/faiss/utils/extra_distances-inl.h +3 -1
data/vendor/faiss/faiss/utils/hamming-inl.h +2 -0
data/vendor/faiss/faiss/utils/hamming.cpp +7 -6
data/vendor/faiss/faiss/utils/hamming.h +1 -1
data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -2
data/vendor/faiss/faiss/utils/partitioning.cpp +5 -5
data/vendor/faiss/faiss/utils/partitioning.h +2 -2
data/vendor/faiss/faiss/utils/rabitq_simd.h +222 -336
data/vendor/faiss/faiss/utils/random.cpp +1 -1
data/vendor/faiss/faiss/utils/simdlib_avx2.h +1 -1
data/vendor/faiss/faiss/utils/simdlib_avx512.h +1 -1
data/vendor/faiss/faiss/utils/simdlib_neon.h +2 -2
data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +1 -1
data/vendor/faiss/faiss/utils/utils.cpp +5 -2
data/vendor/faiss/faiss/utils/utils.h +2 -2
metadata +14 -3

data/vendor/faiss/faiss/IndexRaBitQFastScan.h ADDED Viewed

@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <vector>
+#include <faiss/IndexFastScan.h>
+#include <faiss/IndexRaBitQ.h>
+#include <faiss/impl/RaBitQUtils.h>
+#include <faiss/impl/RaBitQuantizer.h>
+#include <faiss/impl/simd_result_handlers.h>
+#include <faiss/utils/Heap.h>
+#include <faiss/utils/simdlib.h>
+namespace faiss {
+// Import shared utilities from RaBitQUtils
+using rabitq_utils::FactorsData;
+using rabitq_utils::QueryFactorsData;
+/** Fast-scan version of RaBitQ index that processes 32 database vectors at a
+ * time using SIMD operations. Similar to IndexPQFastScan but adapted for
+ * RaBitQ's bit-level quantization with factors.
+ *
+ * The key differences from IndexRaBitQ:
+ * - Processes vectors in batches of 32
+ * - Uses 4-bit groupings for SIMD optimization (4 dimensions per 4-bit unit)
+ * - Separates factors from quantized bits for efficient processing
+ * - Leverages existing PQ4 FastScan infrastructure where possible
+ */
+struct IndexRaBitQFastScan : IndexFastScan {
+    /// RaBitQ quantizer for encoding/decoding
+    RaBitQuantizer rabitq;
+    /// Center of all points (same as IndexRaBitQ)
+    std::vector<float> center;
+    /// Extracted factors storage for batch processing
+    /// Size: ntotal, stores factors separately from packed codes
+    std::vector<FactorsData> factors_storage;
+    /// Default number of bits to quantize a query with
+    uint8_t qb = 8;
+    // quantize the query with a zero-centered scalar quantizer.
+    bool centered = false;
+    IndexRaBitQFastScan();
+    explicit IndexRaBitQFastScan(
+            idx_t d,
+            MetricType metric = METRIC_L2,
+            int bbs = 32);
+    /// build from an existing IndexRaBitQ
+    explicit IndexRaBitQFastScan(const IndexRaBitQ& orig, int bbs = 32);
+    void train(idx_t n, const float* x) override;
+    void add(idx_t n, const float* x) override;
+    void compute_codes(uint8_t* codes, idx_t n, const float* x) const override;
+    void compute_float_LUT(
+            float* lut,
+            idx_t n,
+            const float* x,
+            const FastScanDistancePostProcessing& context) const override;
+    void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
+    void search(
+            idx_t n,
+            const float* x,
+            idx_t k,
+            float* distances,
+            idx_t* labels,
+            const SearchParameters* params = nullptr) const override;
+    /// Override to create RaBitQ-specific handlers
+    void* make_knn_handler(
+            bool is_max,
+            int /*impl*/,
+            idx_t n,
+            idx_t k,
+            size_t /*ntotal*/,
+            float* distances,
+            idx_t* labels,
+            const IDSelector* sel,
+            const FastScanDistancePostProcessing& context) const override;
+};
+/** SIMD result handler for RaBitQ FastScan that applies distance corrections
+ * and maintains heaps directly during SIMD operations.
+ *
+ * This handler processes batches of 32 distance computations from SIMD kernels,
+ * applies RaBitQ-specific adjustments (factors and normalizers), and
+ * immediately updates result heaps without intermediate storage. This
+ * eliminates the need for post-processing and provides significant memory and
+ * performance benefits.
+ *
+ * Key optimizations:
+ * - Direct heap integration (no intermediate result storage)
+ * - Batch-level computation of normalizers and query factors
+ * - Preserves exact mathematical equivalence to original RaBitQ distances
+ * @tparam C Comparator type (CMin/CMax) for heap operations
+ * @tparam with_id_map Whether to use id mapping (similar to HeapHandler)
+ */
+template <class C, bool with_id_map = false>
+struct RaBitQHeapHandler
+        : simd_result_handlers::ResultHandlerCompare<C, with_id_map> {
+    using RHC = simd_result_handlers::ResultHandlerCompare<C, with_id_map>;
+    using RHC::normalizers;
+    const IndexRaBitQFastScan* rabitq_index;
+    float* heap_distances; // [nq * k]
+    int64_t* heap_labels;  // [nq * k]
+    const size_t nq, k;
+    const FastScanDistancePostProcessing&
+            context; // Processing context with query offset
+    // Use float-based comparator for heap operations
+    using Cfloat = typename std::conditional<
+            C::is_max,
+            CMax<float, int64_t>,
+            CMin<float, int64_t>>::type;
+    RaBitQHeapHandler(
+            const IndexRaBitQFastScan* index,
+            size_t nq_val,
+            size_t k_val,
+            float* distances,
+            int64_t* labels,
+            const IDSelector* sel_in,
+            const FastScanDistancePostProcessing& context);
+    void handle(size_t q, size_t b, simd16uint16 d0, simd16uint16 d1) final;
+    void begin(const float* norms);
+    void end();
+};
+} // namespace faiss

data/vendor/faiss/faiss/IndexShards.cpp CHANGED Viewed

@@ -264,7 +264,7 @@ void IndexShardsTemplate<IndexT>::search(
     }
 }
-// explicit instanciations
+// explicit instantiations
 template struct IndexShardsTemplate<Index>;
 template struct IndexShardsTemplate<IndexBinary>;

data/vendor/faiss/faiss/MatrixStats.cpp CHANGED Viewed

@@ -77,7 +77,7 @@ MatrixStats::MatrixStats(size_t n, size_t d, const float* x) : n(n), d(d) {
     if (d > 1024) {
         do_comment(
                 "indexing this many dimensions is hard, "
-                "please consider dimensionality reducution (with PCAMatrix)\n");
+                "please consider dimensionality reduction (with PCAMatrix)\n");
     }
     hash_value = hash_bytes((const uint8_t*)x, n * d * sizeof(*x));
@@ -125,7 +125,7 @@ MatrixStats::MatrixStats(size_t n, size_t d, const float* x) : n(n), d(d) {
         }
     }
-    // invalid vecor stats
+    // invalid vector stats
     if (n_valid == n) {
         do_comment("no NaN or Infs in data\n");
     } else {
@@ -229,7 +229,7 @@ MatrixStats::MatrixStats(size_t n, size_t d, const float* x) : n(n), d(d) {
         } else {
             do_comment(
                     "%zd dimensions are too large "
-                    "wrt. their variance, may loose precision "
+                    "wrt. their variance, may lose precision "
                     "in IndexFlatL2 (use CenteringTransform)\n",
                     n_dangerous_range);
         }

data/vendor/faiss/faiss/MetricType.h CHANGED Viewed

@@ -35,7 +35,7 @@ enum MetricType {
     /// sum_i(min(a_i, b_i)) / sum_i(max(a_i, b_i)) where a_i, b_i > 0
     METRIC_Jaccard,
-    /// Squared Eucliden distance, ignoring NaNs
+    /// Squared Euclidean distance, ignoring NaNs
     METRIC_NaNEuclidean,
     /// Gower's distance - numeric dimensions are in [0,1] and categorical
     /// dimensions are negative integers

data/vendor/faiss/faiss/VectorTransform.h CHANGED Viewed

@@ -37,7 +37,7 @@ struct VectorTransform {
      * nothing by default.
      *
      * @param n      nb of training vectors
-     * @param x      training vecors, size n * d
+     * @param x      training vectors, size n * d
      */
     virtual void train(idx_t n, const float* x);
@@ -249,7 +249,7 @@ struct OPQMatrix : LinearTransform {
     void train(idx_t n, const float* x) override;
 };
-/** remap dimensions for intput vectors, possibly inserting 0s
+/** remap dimensions for input vectors, possibly inserting 0s
  * strictly speaking this is also a linear transform but we don't want
  * to compute it with matrix multiplies */
 struct RemapDimensionsTransform : VectorTransform {

data/vendor/faiss/faiss/clone_index.cpp CHANGED Viewed

@@ -23,6 +23,7 @@
 #include <faiss/IndexIVF.h>
 #include <faiss/IndexIVFAdditiveQuantizerFastScan.h>
 #include <faiss/IndexIVFFlat.h>
+#include <faiss/IndexIVFFlatPanorama.h>
 #include <faiss/IndexIVFPQ.h>
 #include <faiss/IndexIVFPQFastScan.h>
 #include <faiss/IndexIVFPQR.h>
@@ -97,6 +98,7 @@ IndexIVF* Cloner::clone_IndexIVF(const IndexIVF* ivf) {
     TRYCLONE(IndexIVFFlatDedup, ivf)
     TRYCLONE(IndexIVFFlat, ivf)
+    TRYCLONE(IndexIVFFlatPanorama, ivf)
     TRYCLONE(IndexIVFSpectralHash, ivf)
@@ -152,7 +154,7 @@ IndexNSG* clone_IndexNSG(const IndexNSG* insg) {
     TRYCLONE(IndexNSGPQ, insg)
     TRYCLONE(IndexNSGSQ, insg)
     TRYCLONE(IndexNSG, insg) {
-        FAISS_THROW_MSG("clone not supported for this type of IndexNNDescent");
+        FAISS_THROW_MSG("clone not supported for this type of IndexNSG");
     }
 }

data/vendor/faiss/faiss/gpu/GpuCloner.cpp CHANGED Viewed

@@ -239,7 +239,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
         config.device = device;
         GpuIndexCagra* res =
                 new GpuIndexCagra(provider, icg->d, icg->metric_type, config);
-        res->copyFromEx(icg, icg->get_numeric_type());
+        res->copyFrom_ex(icg, icg->get_numeric_type());
         return res;
     }
 #endif

data/vendor/faiss/faiss/gpu/GpuIndex.h CHANGED Viewed

@@ -77,13 +77,13 @@ class GpuIndex : public faiss::Index {
     /// as needed
     /// Handles paged adds if the add set is too large; calls addInternal_
     void add(idx_t, const float* x) override;
-    void addEx(idx_t, const void* x, NumericType numeric_type) override;
+    void add_ex(idx_t, const void* x, NumericType numeric_type) override;
     /// `x` and `ids` can be resident on the CPU or any GPU; copies are
     /// performed as needed
     /// Handles paged adds if the add set is too large; calls addInternal_
     void add_with_ids(idx_t n, const float* x, const idx_t* ids) override;
-    void add_with_idsEx(
+    void add_with_ids_ex(
             idx_t n,
             const void* x,
             NumericType numeric_type,
@@ -103,7 +103,7 @@ class GpuIndex : public faiss::Index {
             float* distances,
             idx_t* labels,
             const SearchParameters* params = nullptr) const override;
-    void searchEx(
+    void search_ex(
             idx_t n,
             const void* x,
             NumericType numeric_type,
@@ -139,7 +139,7 @@ class GpuIndex : public faiss::Index {
    protected:
     /// Copy what we need from the CPU equivalent
     void copyFrom(const faiss::Index* index);
-    void copyFromEx(const faiss::Index* index, NumericType numeric_type) {
+    void copyFrom_ex(const faiss::Index* index, NumericType numeric_type) {
         if (numeric_type == NumericType::Float32) {
             copyFrom(index);
         } else {
@@ -149,7 +149,7 @@ class GpuIndex : public faiss::Index {
     /// Copy what we have to the CPU equivalent
     void copyTo(faiss::Index* index) const;
-    void copyToEx(faiss::Index* index, NumericType numeric_type) {
+    void copyTo_ex(faiss::Index* index, NumericType numeric_type) {
         if (numeric_type == NumericType::Float32) {
             copyTo(index);
         } else {
@@ -165,7 +165,7 @@ class GpuIndex : public faiss::Index {
     /// All data is guaranteed to be resident on our device
     virtual void addImpl_(idx_t n, const float* x, const idx_t* ids) = 0;
-    virtual void addImplEx_(
+    virtual void addImpl_ex_(
             idx_t n,
             const void* x,
             NumericType numeric_type,
@@ -187,7 +187,7 @@ class GpuIndex : public faiss::Index {
             idx_t* labels,
             const SearchParameters* params) const = 0;
-    virtual void searchImplEx_(
+    virtual void searchImpl_ex_(
             idx_t n,
             const void* x,
             NumericType numeric_type,
@@ -212,7 +212,7 @@ class GpuIndex : public faiss::Index {
     /// Handles paged adds if the add set is too large, passes to
     /// addImpl_ to actually perform the add for the current page
     void addPaged_(idx_t n, const float* x, const idx_t* ids);
-    void addPagedEx_(
+    void addPaged_ex_(
             idx_t n,
             const void* x,
             NumericType numeric_type,
@@ -220,7 +220,7 @@ class GpuIndex : public faiss::Index {
     /// Calls addImpl_ for a single page of GPU-resident data
     void addPage_(idx_t n, const float* x, const idx_t* ids);
-    void addPageEx_(
+    void addPage_ex_(
             idx_t n,
             const void* x,
             NumericType numeric_type,
@@ -235,7 +235,7 @@ class GpuIndex : public faiss::Index {
             idx_t* outIndicesData,
             const SearchParameters* params) const;
-    void searchNonPagedEx_(
+    void searchNonPaged_ex_(
             idx_t n,
             const void* x,
             NumericType numeric_type,
@@ -253,7 +253,7 @@ class GpuIndex : public faiss::Index {
             float* outDistancesData,
             idx_t* outIndicesData,
             const SearchParameters* params) const;
-    void searchFromCpuPagedEx_(
+    void searchFromCpuPaged_ex_(
             idx_t n,
             const void* x,
             NumericType numeric_type,

data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h CHANGED Viewed

@@ -111,7 +111,7 @@ struct GpuIndexBinaryCagra : public IndexBinary {
             const SearchParameters* search_params) const;
    protected:
-    /// Manages streans, cuBLAS handles and scratch memory for devices
+    /// Manages streams, cuBLAS handles and scratch memory for devices
     std::shared_ptr<GpuResources> resources_;
     /// Configuration options

data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h CHANGED Viewed

@@ -86,7 +86,7 @@ class GpuIndexBinaryFlat : public IndexBinary {
             idx_t* outIndicesData) const;
    protected:
-    /// Manages streans, cuBLAS handles and scratch memory for devices
+    /// Manages streams, cuBLAS handles and scratch memory for devices
     std::shared_ptr<GpuResources> resources_;
     /// Configuration options

data/vendor/faiss/faiss/gpu/GpuIndexCagra.h CHANGED Viewed

@@ -135,7 +135,7 @@ struct IVFPQSearchCagraConfig {
     ///
     /// The use of low-precision types reduces the amount of shared memory
     /// required at search time, so fast shared memory kernels can be used even
-    /// for datasets with large dimansionality. Note that the recall is slightly
+    /// for datasets with large dimensionality. Note that the recall is slightly
     /// degraded when low-precision type is selected.
     cudaDataType_t lut_dtype = CUDA_R_32F;
@@ -166,6 +166,10 @@ struct IVFPQSearchCagraConfig {
     /// negative effects on the search performance if tweaked incorrectly.
     double preferred_shmem_carveout = 1.0;
+    /// Set the internal batch size to improve GPU utilization at the cost of
+    /// larger memory footprint.
+    uint32_t max_internal_batch_size = 4096;
 };
 struct GpuIndexCagraConfig : public GpuIndexConfig {
@@ -258,7 +262,7 @@ struct GpuIndexCagra : public GpuIndex {
     /// the base dataset. Use this function when you want to add vectors with
     /// ids. Ref: https://github.com/facebookresearch/faiss/issues/4107
     void add(idx_t n, const float* x) override;
-    void addEx(idx_t n, const void* x, NumericType numeric_type) override;
+    void add_ex(idx_t n, const void* x, NumericType numeric_type) override;
     /// Trains CAGRA based on the given vector data.
     /// NB: The use of the train function here is to build the CAGRA graph on
@@ -266,12 +270,12 @@ struct GpuIndexCagra : public GpuIndex {
     /// of vectors (without IDs) to the index. There is no external quantizer to
     /// be trained here.
     void train(idx_t n, const float* x) override;
-    void trainEx(idx_t n, const void* x, NumericType numeric_type) override;
+    void train_ex(idx_t n, const void* x, NumericType numeric_type) override;
     /// Initialize ourselves from the given CPU index; will overwrite
     /// all data in ourselves
     void copyFrom(const faiss::IndexHNSWCagra* index);
-    void copyFromEx(
+    void copyFrom_ex(
             const faiss::IndexHNSWCagra* index,
             NumericType numeric_type);
@@ -289,7 +293,7 @@ struct GpuIndexCagra : public GpuIndex {
     bool addImplRequiresIDs_() const override;
     void addImpl_(idx_t n, const float* x, const idx_t* ids) override;
-    void addImplEx_(
+    void addImpl_ex_(
             idx_t n,
             const void* x,
             NumericType numeric_type,
@@ -303,7 +307,7 @@ struct GpuIndexCagra : public GpuIndex {
             float* distances,
             idx_t* labels,
             const SearchParameters* search_params) const override;
-    void searchImplEx_(
+    void searchImpl_ex_(
             idx_t n,
             const void* x,
             NumericType numeric_type,

data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h CHANGED Viewed

@@ -5,6 +5,8 @@
  * LICENSE file in the root directory of this source tree.
  */
+#pragma once
 #include <faiss/impl/FaissAssert.h>
 namespace faiss {

data/vendor/faiss/faiss/gpu/test/TestGpuIcmEncoder.cpp CHANGED Viewed

@@ -1,3 +1,10 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
 #include <faiss/gpu/GpuIcmEncoder.h>
 #include <faiss/gpu/StandardGpuResources.h>
 #include <faiss/gpu/test/TestUtils.h>

data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp CHANGED Viewed

@@ -493,7 +493,7 @@ TEST(TestGpuIndexIVFFlat, Float32_negative) {
     // Construct a positive test set
     auto queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
-    // Put all vecs on positive size
+    // Put all vecs on positive side
     for (auto& f : queryVecs) {
         f = std::abs(f);
     }

data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp CHANGED Viewed

@@ -404,7 +404,7 @@ void AdditiveQuantizer::compute_LUT(
 namespace {
 /* compute inner products of one query with all centroids, given a look-up
- * table of all inner producst with codebook entries */
+ * table of all inner products with codebook entries */
 void compute_inner_prod_with_LUT(
         const AdditiveQuantizer& aq,
         const float* LUT,

data/vendor/faiss/faiss/impl/AdditiveQuantizer.h CHANGED Viewed

@@ -212,7 +212,7 @@ struct AdditiveQuantizer : Quantizer {
             idx_t* labels,
             const float* centroid_norms) const;
-    virtual ~AdditiveQuantizer();
+    virtual ~AdditiveQuantizer() override;
 };
 } // namespace faiss

data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp CHANGED Viewed

@@ -36,7 +36,7 @@ RangeSearchResult::RangeSearchResult(size_t nq, bool alloc_lims) : nq(nq) {
 /// for each query
 void RangeSearchResult::do_allocation() {
     // works only if all the partial results are aggregated
-    // simulatenously
+    // simultaneously
     FAISS_THROW_IF_NOT(labels == nullptr && distances == nullptr);
     size_t ofs = 0;
     for (int i = 0; i < nq; i++) {
@@ -86,7 +86,7 @@ void BufferList::append_buffer() {
     wp = 0;
 }
-/// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to
+/// copy elements ofs:ofs+n-1 seen as linear data in the buffers to
 /// tables dest_ids, dest_dis
 void BufferList::copy_range(
         size_t ofs,

data/vendor/faiss/faiss/impl/AuxIndexStructures.h CHANGED Viewed

@@ -80,7 +80,7 @@ struct BufferList {
     /// add one result, possibly appending a new buffer if needed
     void add(idx_t id, float dis);
-    /// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to
+    /// copy elements ofs:ofs+n-1 seen as linear data in the buffers to
     /// tables dest_ids, dest_dis
     void copy_range(size_t ofs, size_t n, idx_t* dest_ids, float* dest_dis);
 };

data/vendor/faiss/faiss/impl/CodePacker.h CHANGED Viewed

@@ -38,14 +38,14 @@ struct CodePacker {
                                   // code_size
     ) const = 0;
-    // pack all code in a block
+    // pack all codes in a block
     virtual void pack_all(
             const uint8_t* flat_codes, // codes to write to the block, size
                                        // (nvec * code_size)
             uint8_t* block             // block to write to (size block_size)
     ) const;
-    // unpack all code in a block
+    // unpack all codes in a block
     virtual void unpack_all(
             const uint8_t* block, // block to read from (size block_size)
             uint8_t* flat_codes // where to write the resulting codes size (nvec

data/vendor/faiss/faiss/impl/DistanceComputer.h CHANGED Viewed

@@ -60,7 +60,7 @@ struct DistanceComputer {
 };
 /* Wrap the distance computer into one that negates the
-   distances. This makes supporting INNER_PRODUCE search easier */
+   distances. This makes supporting INNER_PRODUCT search easier */
 struct NegativeDistanceComputer : DistanceComputer {
     /// owned by this
@@ -100,7 +100,7 @@ struct NegativeDistanceComputer : DistanceComputer {
         return -basedis->symmetric_dis(i, j);
     }
-    virtual ~NegativeDistanceComputer() {
+    virtual ~NegativeDistanceComputer() override {
         delete basedis;
     }
 };
@@ -125,7 +125,7 @@ struct FlatCodesDistanceComputer : DistanceComputer {
     /// compute distance of current query to an encoded vector
     virtual float distance_to_code(const uint8_t* code) = 0;
-    virtual ~FlatCodesDistanceComputer() {}
+    virtual ~FlatCodesDistanceComputer() override {}
 };
 } // namespace faiss

data/vendor/faiss/faiss/impl/FastScanDistancePostProcessing.h ADDED Viewed

@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <cstddef>
+namespace faiss {
+// Forward declarations
+struct NormTableScaler;
+namespace rabitq_utils {
+struct QueryFactorsData;
+}
+/**
+ * Simple context object that holds processors for FastScan operations.
+ * */
+struct FastScanDistancePostProcessing {
+    /// Norm scaling processor for Additive Quantizers (nullptr if not needed)
+    const NormTableScaler* norm_scaler = nullptr;
+    /// Query factors data pointer for RaBitQ (nullptr if not needed)
+    /// This pointer should point to the beginning of the relevant
+    /// QueryFactorsData subset for this context.
+    rabitq_utils::QueryFactorsData* query_factors = nullptr;
+    /// The nprobe value used when allocating query_factors storage.
+    /// This is needed because the allocation size (n * nprobe) may use a
+    /// different nprobe than index->nprobe if search params override it.
+    /// Set to 0 to use index->nprobe as fallback.
+    size_t nprobe = 0;
+    /// Default constructor - no processing
+    FastScanDistancePostProcessing() = default;
+    /// Check if norm scaling is enabled
+    bool has_norm_scaling() const {
+        return norm_scaler != nullptr;
+    }
+    /// Check if query factors processing is enabled
+    bool has_query_processing() const {
+        return query_factors != nullptr;
+    }
+};
+} // namespace faiss

data/vendor/faiss/faiss/impl/HNSW.cpp CHANGED Viewed

@@ -60,7 +60,7 @@ HNSW::HNSW(int M) : rng(12345) {
 int HNSW::random_level() {
     double f = rng.rand_float();
-    // could be a bit faster with bissection
+    // could be a bit faster with bisection
     for (int level = 0; level < assign_probas.size(); level++) {
         if (f < assign_probas[level]) {
             return level;

data/vendor/faiss/faiss/impl/HNSW.h CHANGED Viewed

@@ -31,7 +31,7 @@ namespace faiss {
  *  Yu. A. Malkov, D. A. Yashunin, arXiv 2017
  *
  * This implementation is heavily influenced by the NMSlib
- * implementation by Yury Malkov and Leonid Boystov
+ * implementation by Yury Malkov and Leonid Boytsov
  * (https://github.com/searchivarius/nmslib)
  *
  * The HNSW object stores only the neighbor link structure, see
@@ -61,7 +61,7 @@ struct HNSW {
     typedef std::pair<float, storage_idx_t> Node;
-    /** Heap structure that allows fast
+    /** Heap structure that allows fast access and updates.
      */
     struct MinimaxHeap {
         int n;
@@ -87,7 +87,7 @@ struct HNSW {
         int count_below(float thresh);
     };
-    /// to sort pairs of (id, distance) from nearest to fathest or the reverse
+    /// to sort pairs of (id, distance) from nearest to farthest or the reverse
     struct NodeDistCloser {
         float d;
         int id;
@@ -160,7 +160,7 @@ struct HNSW {
     /// nb of neighbors for this level
     int nb_neighbors(int layer_no) const;
-    /// cumumlative nb up to (and excluding) this level
+    /// cumulative nb up to (and excluding) this level
     int cum_nb_neighbors(int layer_no) const;
     /// range of entries in the neighbors table of vertex no at layer_no

data/vendor/faiss/faiss/impl/IDSelector.cpp CHANGED Viewed

@@ -31,7 +31,7 @@ void IDSelectorRange::find_sorted_ids_bounds(
         *jmin_out = *jmax_out = 0;
         return;
     }
-    // bissection to find imin
+    // bisection to find imin
     if (ids[0] >= imin) {
         *jmin_out = 0;
     } else {
@@ -46,7 +46,7 @@ void IDSelectorRange::find_sorted_ids_bounds(
         }
         *jmin_out = j1;
     }
-    // bissection to find imax
+    // bisection to find imax
     if (*jmin_out == list_size || ids[*jmin_out] >= imax) {
         *jmax_out = *jmin_out;
     } else {