RubyGems - faiss - Versions diffs - 0.4.3 → 0.5.1 - Mend

faiss 0.4.3 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (186) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +10 -0
data/README.md +2 -0
data/ext/faiss/index.cpp +33 -6
data/ext/faiss/index_binary.cpp +17 -4
data/ext/faiss/kmeans.cpp +6 -6
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +2 -3
data/vendor/faiss/faiss/AutoTune.h +1 -1
data/vendor/faiss/faiss/Clustering.cpp +2 -2
data/vendor/faiss/faiss/Clustering.h +2 -2
data/vendor/faiss/faiss/IVFlib.cpp +26 -51
data/vendor/faiss/faiss/IVFlib.h +1 -1
data/vendor/faiss/faiss/Index.cpp +11 -0
data/vendor/faiss/faiss/Index.h +34 -11
data/vendor/faiss/faiss/Index2Layer.cpp +1 -1
data/vendor/faiss/faiss/Index2Layer.h +2 -2
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +1 -0
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +9 -4
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +5 -1
data/vendor/faiss/faiss/IndexBinary.h +7 -7
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +8 -2
data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHash.cpp +3 -3
data/vendor/faiss/faiss/IndexBinaryHash.h +5 -5
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +7 -6
data/vendor/faiss/faiss/IndexFastScan.cpp +125 -49
data/vendor/faiss/faiss/IndexFastScan.h +102 -7
data/vendor/faiss/faiss/IndexFlat.cpp +374 -4
data/vendor/faiss/faiss/IndexFlat.h +81 -1
data/vendor/faiss/faiss/IndexHNSW.cpp +93 -2
data/vendor/faiss/faiss/IndexHNSW.h +58 -2
data/vendor/faiss/faiss/IndexIDMap.cpp +14 -13
data/vendor/faiss/faiss/IndexIDMap.h +6 -6
data/vendor/faiss/faiss/IndexIVF.cpp +1 -1
data/vendor/faiss/faiss/IndexIVF.h +5 -5
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +1 -1
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +9 -3
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +3 -1
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +176 -90
data/vendor/faiss/faiss/IndexIVFFastScan.h +173 -18
data/vendor/faiss/faiss/IndexIVFFlat.cpp +1 -0
data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +251 -0
data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +64 -0
data/vendor/faiss/faiss/IndexIVFPQ.cpp +3 -1
data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +134 -2
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -1
data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +99 -8
data/vendor/faiss/faiss/IndexIVFRaBitQ.h +4 -1
data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +828 -0
data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +252 -0
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
data/vendor/faiss/faiss/IndexNSG.cpp +1 -1
data/vendor/faiss/faiss/IndexNeuralNetCodec.h +1 -1
data/vendor/faiss/faiss/IndexPQ.cpp +4 -1
data/vendor/faiss/faiss/IndexPQ.h +1 -1
data/vendor/faiss/faiss/IndexPQFastScan.cpp +6 -2
data/vendor/faiss/faiss/IndexPQFastScan.h +5 -1
data/vendor/faiss/faiss/IndexPreTransform.cpp +14 -0
data/vendor/faiss/faiss/IndexPreTransform.h +9 -0
data/vendor/faiss/faiss/IndexRaBitQ.cpp +96 -13
data/vendor/faiss/faiss/IndexRaBitQ.h +11 -2
data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +731 -0
data/vendor/faiss/faiss/IndexRaBitQFastScan.h +175 -0
data/vendor/faiss/faiss/IndexRefine.cpp +49 -0
data/vendor/faiss/faiss/IndexRefine.h +17 -0
data/vendor/faiss/faiss/IndexShards.cpp +1 -1
data/vendor/faiss/faiss/MatrixStats.cpp +3 -3
data/vendor/faiss/faiss/MetricType.h +1 -1
data/vendor/faiss/faiss/VectorTransform.h +2 -2
data/vendor/faiss/faiss/clone_index.cpp +5 -1
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +3 -1
data/vendor/faiss/faiss/gpu/GpuIndex.h +11 -11
data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +11 -7
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +1 -1
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIcmEncoder.cpp +7 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +1 -1
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +1 -1
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +2 -2
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -1
data/vendor/faiss/faiss/impl/CodePacker.h +2 -2
data/vendor/faiss/faiss/impl/DistanceComputer.h +77 -6
data/vendor/faiss/faiss/impl/FastScanDistancePostProcessing.h +53 -0
data/vendor/faiss/faiss/impl/HNSW.cpp +295 -16
data/vendor/faiss/faiss/impl/HNSW.h +35 -6
data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -2
data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +4 -4
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/LookupTableScaler.h +1 -1
data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
data/vendor/faiss/faiss/impl/NNDescent.h +2 -2
data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
data/vendor/faiss/faiss/impl/Panorama.cpp +193 -0
data/vendor/faiss/faiss/impl/Panorama.h +204 -0
data/vendor/faiss/faiss/impl/PanoramaStats.cpp +33 -0
data/vendor/faiss/faiss/impl/PanoramaStats.h +38 -0
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +5 -5
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +1 -1
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +2 -0
data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/RaBitQStats.cpp +29 -0
data/vendor/faiss/faiss/impl/RaBitQStats.h +56 -0
data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +294 -0
data/vendor/faiss/faiss/impl/RaBitQUtils.h +330 -0
data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +304 -223
data/vendor/faiss/faiss/impl/RaBitQuantizer.h +72 -4
data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +362 -0
data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +112 -0
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/ResultHandler.h +4 -4
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +7 -10
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +2 -4
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +7 -4
data/vendor/faiss/faiss/impl/index_read.cpp +238 -10
data/vendor/faiss/faiss/impl/index_write.cpp +212 -19
data/vendor/faiss/faiss/impl/io.cpp +2 -2
data/vendor/faiss/faiss/impl/io.h +4 -4
data/vendor/faiss/faiss/impl/kmeans1d.cpp +1 -1
data/vendor/faiss/faiss/impl/kmeans1d.h +1 -1
data/vendor/faiss/faiss/impl/lattice_Zn.h +2 -2
data/vendor/faiss/faiss/impl/mapped_io.cpp +2 -2
data/vendor/faiss/faiss/impl/mapped_io.h +4 -3
data/vendor/faiss/faiss/impl/maybe_owned_vector.h +8 -1
data/vendor/faiss/faiss/impl/platform_macros.h +12 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +30 -4
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +14 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +5 -6
data/vendor/faiss/faiss/impl/simd_result_handlers.h +55 -11
data/vendor/faiss/faiss/impl/svs_io.cpp +86 -0
data/vendor/faiss/faiss/impl/svs_io.h +67 -0
data/vendor/faiss/faiss/impl/zerocopy_io.h +1 -1
data/vendor/faiss/faiss/index_factory.cpp +217 -8
data/vendor/faiss/faiss/index_factory.h +1 -1
data/vendor/faiss/faiss/index_io.h +1 -1
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +1 -1
data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +115 -1
data/vendor/faiss/faiss/invlists/InvertedLists.h +46 -0
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +1 -1
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +261 -0
data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +117 -0
data/vendor/faiss/faiss/svs/IndexSVSFlat.h +66 -0
data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +245 -0
data/vendor/faiss/faiss/svs/IndexSVSVamana.h +137 -0
data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +39 -0
data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +42 -0
data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +149 -0
data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +58 -0
data/vendor/faiss/faiss/utils/AlignedTable.h +1 -1
data/vendor/faiss/faiss/utils/Heap.cpp +2 -2
data/vendor/faiss/faiss/utils/Heap.h +3 -3
data/vendor/faiss/faiss/utils/NeuralNet.cpp +1 -1
data/vendor/faiss/faiss/utils/NeuralNet.h +3 -3
data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +2 -2
data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +2 -2
data/vendor/faiss/faiss/utils/approx_topk/mode.h +1 -1
data/vendor/faiss/faiss/utils/distances.cpp +0 -3
data/vendor/faiss/faiss/utils/distances.h +2 -2
data/vendor/faiss/faiss/utils/extra_distances-inl.h +3 -1
data/vendor/faiss/faiss/utils/hamming-inl.h +2 -0
data/vendor/faiss/faiss/utils/hamming.cpp +7 -6
data/vendor/faiss/faiss/utils/hamming.h +1 -1
data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -2
data/vendor/faiss/faiss/utils/partitioning.cpp +5 -5
data/vendor/faiss/faiss/utils/partitioning.h +2 -2
data/vendor/faiss/faiss/utils/rabitq_simd.h +222 -336
data/vendor/faiss/faiss/utils/random.cpp +1 -1
data/vendor/faiss/faiss/utils/simdlib_avx2.h +1 -1
data/vendor/faiss/faiss/utils/simdlib_avx512.h +1 -1
data/vendor/faiss/faiss/utils/simdlib_neon.h +2 -2
data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +1 -1
data/vendor/faiss/faiss/utils/utils.cpp +9 -2
data/vendor/faiss/faiss/utils/utils.h +2 -2
metadata +29 -1

data/vendor/faiss/faiss/IndexFlat.h CHANGED Viewed

@@ -13,6 +13,7 @@
 #include <vector>
 #include <faiss/IndexFlatCodes.h>
+#include <faiss/impl/Panorama.h>
 namespace faiss {
@@ -66,7 +67,7 @@ struct IndexFlat : IndexFlatCodes {
     FlatCodesDistanceComputer* get_FlatCodesDistanceComputer() const override;
-    /* The stanadlone codec interface (just memcopies in this case) */
+    /* The standalone codec interface (just memcopies in this case) */
     void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
     void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
@@ -99,6 +100,85 @@ struct IndexFlatL2 : IndexFlat {
     void clear_l2norms();
 };
+struct IndexFlatPanorama : IndexFlat {
+    const size_t batch_size;
+    const size_t n_levels;
+    std::vector<float> cum_sums;
+    Panorama pano;
+    /**
+     * @param d dimensionality of the input vectors
+     * @param metric metric type
+     * @param n_levels number of Panorama levels
+     * @param batch_size batch size for Panorama storage
+     */
+    explicit IndexFlatPanorama(
+            idx_t d,
+            MetricType metric,
+            size_t n_levels,
+            size_t batch_size)
+            : IndexFlat(d, metric),
+              batch_size(batch_size),
+              n_levels(n_levels),
+              pano(code_size, n_levels, batch_size) {
+        FAISS_THROW_IF_NOT(metric == METRIC_L2);
+    }
+    void add(idx_t n, const float* x) override;
+    void search(
+            idx_t n,
+            const float* x,
+            idx_t k,
+            float* distances,
+            idx_t* labels,
+            const SearchParameters* params = nullptr) const override;
+    void range_search(
+            idx_t n,
+            const float* x,
+            float radius,
+            RangeSearchResult* result,
+            const SearchParameters* params = nullptr) const override;
+    void search_subset(
+            idx_t n,
+            const float* x,
+            idx_t k_base,
+            const idx_t* base_labels,
+            idx_t k,
+            float* distances,
+            idx_t* labels) const override;
+    void reset() override;
+    void reconstruct(idx_t key, float* recons) const override;
+    void reconstruct_n(idx_t i, idx_t n, float* recons) const override;
+    size_t remove_ids(const IDSelector& sel) override;
+    void merge_from(Index& otherIndex, idx_t add_id) override;
+    void add_sa_codes(idx_t n, const uint8_t* codes_in, const idx_t* xids)
+            override;
+    void permute_entries(const idx_t* perm);
+};
+struct IndexFlatL2Panorama : IndexFlatPanorama {
+    /**
+     * @param d dimensionality of the input vectors
+     * @param n_levels number of Panorama levels
+     * @param batch_size batch size for Panorama storage
+     */
+    explicit IndexFlatL2Panorama(
+            idx_t d,
+            size_t n_levels,
+            size_t batch_size = 512)
+            : IndexFlatPanorama(d, METRIC_L2, n_levels, batch_size) {}
+};
 /// optimized version for 1D "vectors".
 struct IndexFlat1D : IndexFlatL2 {
     bool continuous_update = true; ///< is the permutation updated continuously?

data/vendor/faiss/faiss/IndexHNSW.cpp CHANGED Viewed

@@ -276,7 +276,7 @@ void hnsw_search(
                 res.begin(i);
                 dis->set_query(x + i * index->d);
-                HNSWStats stats = hnsw.search(*dis, res, vt, params);
+                HNSWStats stats = hnsw.search(*dis, index, res, vt, params);
                 n1 += stats.n1;
                 n2 += stats.n2;
                 ndis += stats.ndis;
@@ -450,7 +450,9 @@ void IndexHNSW::search_level_0(
             vt.advance();
         }
 #pragma omp critical
-        { hnsw_stats.combine(search_stats); }
+        {
+            hnsw_stats.combine(search_stats);
+        }
     }
     if (is_similarity_metric(this->metric_type)) {
 // we need to revert the negated distances
@@ -647,6 +649,95 @@ IndexHNSWFlat::IndexHNSWFlat(int d, int M, MetricType metric)
     is_trained = true;
 }
+/**************************************************************
+ * IndexHNSWFlatPanorama implementation
+ **************************************************************/
+void IndexHNSWFlatPanorama::compute_cum_sums(
+        const float* x,
+        float* dst_cum_sums,
+        int d,
+        int num_panorama_levels,
+        int panorama_level_width) {
+    // Iterate backwards through levels, accumulating sum as we go.
+    // This avoids computing the suffix sum for each vector, which takes
+    // extra memory.
+    float sum = 0.0f;
+    dst_cum_sums[num_panorama_levels] = 0.0f;
+    for (int level = num_panorama_levels - 1; level >= 0; level--) {
+        int start_idx = level * panorama_level_width;
+        int end_idx = std::min(start_idx + panorama_level_width, d);
+        for (int j = start_idx; j < end_idx; j++) {
+            sum += x[j] * x[j];
+        }
+        dst_cum_sums[level] = std::sqrt(sum);
+    }
+}
+IndexHNSWFlatPanorama::IndexHNSWFlatPanorama()
+        : IndexHNSWFlat(),
+          cum_sums(),
+          panorama_level_width(0),
+          num_panorama_levels(0) {}
+IndexHNSWFlatPanorama::IndexHNSWFlatPanorama(
+        int d,
+        int M,
+        int num_panorama_levels,
+        MetricType metric)
+        : IndexHNSWFlat(d, M, metric),
+          cum_sums(),
+          panorama_level_width(
+                  (d + num_panorama_levels - 1) / num_panorama_levels),
+          num_panorama_levels(num_panorama_levels) {
+    // For now, we only support L2 distance.
+    // Supporting dot product and cosine distance is a trivial addition
+    // left for future work.
+    FAISS_THROW_IF_NOT(metric == METRIC_L2);
+    // Enable Panorama search mode.
+    // This is not ideal, but is still more simple than making a subclass of
+    // HNSW and overriding the search logic.
+    hnsw.is_panorama = true;
+}
+void IndexHNSWFlatPanorama::add(idx_t n, const float* x) {
+    idx_t n0 = ntotal;
+    cum_sums.resize((ntotal + n) * (num_panorama_levels + 1));
+    for (size_t idx = 0; idx < n; idx++) {
+        const float* vector = x + idx * d;
+        compute_cum_sums(
+                vector,
+                &cum_sums[(n0 + idx) * (num_panorama_levels + 1)],
+                d,
+                num_panorama_levels,
+                panorama_level_width);
+    }
+    IndexHNSWFlat::add(n, x);
+}
+void IndexHNSWFlatPanorama::reset() {
+    cum_sums.clear();
+    IndexHNSWFlat::reset();
+}
+void IndexHNSWFlatPanorama::permute_entries(const idx_t* perm) {
+    std::vector<float> new_cum_sums(ntotal * (num_panorama_levels + 1));
+    for (idx_t i = 0; i < ntotal; i++) {
+        idx_t src = perm[i];
+        memcpy(&new_cum_sums[i * (num_panorama_levels + 1)],
+               &cum_sums[src * (num_panorama_levels + 1)],
+               (num_panorama_levels + 1) * sizeof(float));
+    }
+    std::swap(cum_sums, new_cum_sums);
+    IndexHNSWFlat::permute_entries(perm);
+}
 /**************************************************************
  * IndexHNSWPQ implementation
  **************************************************************/

data/vendor/faiss/faiss/IndexHNSW.h CHANGED Viewed

@@ -43,7 +43,7 @@ struct IndexHNSW : Index {
     // When set to true, all neighbors in level 0 are filled up
     // to the maximum size allowed (2 * M). This option is used by
-    // IndexHHNSWCagra to create a full base layer graph that is
+    // IndexHNSWCagra to create a full base layer graph that is
     // used when GpuIndexCagra::copyFrom(IndexHNSWCagra*) is invoked.
     bool keep_max_size_level0 = false;
@@ -111,7 +111,7 @@ struct IndexHNSW : Index {
     void link_singletons();
-    void permute_entries(const idx_t* perm);
+    virtual void permute_entries(const idx_t* perm);
     DistanceComputer* get_distance_computer() const override;
 };
@@ -125,6 +125,62 @@ struct IndexHNSWFlat : IndexHNSW {
     IndexHNSWFlat(int d, int M, MetricType metric = METRIC_L2);
 };
+/** Panorama implementation of IndexHNSWFlat following
+ * https://www.arxiv.org/pdf/2510.00566.
+ *
+ * Unlike cluster-based Panorama, the vectors have to be higher dimensional
+ * (i.e. typically d > 512) and/or be able to compress a lot of their energy in
+ * the early dimensions to be effective. This is because HNSW accesses vectors
+ * in a random order, which makes cache misses dominate the distance computation
+ * time.
+ *
+ * The `num_panorama_levels` parameter controls the granularity of progressive
+ * distance refinement, allowing candidates to be eliminated early using partial
+ * distance computations rather than computing full distances.
+ *
+ * NOTE: This version of HNSW handles search slightly differently than the
+ * vanilla HNSW, as it uses partial distance computations with progressive
+ * refinement bounds. Instead of computing full distances immediately for all
+ * candidates, Panorama maintains lower and upper bounds that are incrementally
+ * tightened across refinement levels. Candidates are inserted into the search
+ * beam using approximate distance estimates (LB+UB)/2 and are only fully
+ * evaluated when they survive pruning and enter the result heap. This allows
+ * the algorithm to prune unpromising candidates early using Cauchy-Schwarz
+ * bounds on partial inner products. Hence, recall is not guaranteed to be the
+ * same as vanilla HNSW due to the heterogeneous precision within the search
+ * beam (exact vs. partial distance estimates affecting traversal order).
+ */
+struct IndexHNSWFlatPanorama : IndexHNSWFlat {
+    IndexHNSWFlatPanorama();
+    IndexHNSWFlatPanorama(
+            int d,
+            int M,
+            int num_panorama_levels,
+            MetricType metric = METRIC_L2);
+    void add(idx_t n, const float* x) override;
+    void reset() override;
+    void permute_entries(const idx_t* perm) override;
+    /// Inline for performance - called frequently in search hot path.
+    const float* get_cum_sum(idx_t i) const {
+        return cum_sums.data() + i * (num_panorama_levels + 1);
+    }
+    /// Compute cumulative sums for a vector (used both for database points and
+    /// queries).
+    static void compute_cum_sums(
+            const float* x,
+            float* dst_cum_sums,
+            int d,
+            int num_panorama_levels,
+            int panorama_level_width);
+    std::vector<float> cum_sums;
+    const size_t panorama_level_width;
+    const size_t num_panorama_levels;
+};
 /** PQ index topped with with a HNSW structure to access elements
  *  more efficiently.
  */

data/vendor/faiss/faiss/IndexIDMap.cpp CHANGED Viewed

@@ -59,7 +59,7 @@ IndexIDMapTemplate<IndexT>::IndexIDMapTemplate(IndexT* index) : index(index) {
 }
 template <typename IndexT>
-void IndexIDMapTemplate<IndexT>::addEx(
+void IndexIDMapTemplate<IndexT>::add_ex(
         idx_t,
         const void*,
         NumericType numeric_type) {
@@ -78,11 +78,11 @@ void IndexIDMapTemplate<IndexT>::add(
 }
 template <typename IndexT>
-void IndexIDMapTemplate<IndexT>::trainEx(
+void IndexIDMapTemplate<IndexT>::train_ex(
         idx_t n,
         const void* x,
         NumericType numeric_type) {
-    index->trainEx(n, x, numeric_type);
+    index->train_ex(n, x, numeric_type);
     this->is_trained = index->is_trained;
 }
@@ -90,7 +90,8 @@ template <typename IndexT>
 void IndexIDMapTemplate<IndexT>::train(
         idx_t n,
         const typename IndexT::component_t* x) {
-    trainEx(n,
+    train_ex(
+            n,
             static_cast<const void*>(x),
             component_t_to_numeric<typename IndexT::component_t>());
 }
@@ -103,12 +104,12 @@ void IndexIDMapTemplate<IndexT>::reset() {
 }
 template <typename IndexT>
-void IndexIDMapTemplate<IndexT>::add_with_idsEx(
+void IndexIDMapTemplate<IndexT>::add_with_ids_ex(
         idx_t n,
         const void* x,
         NumericType numeric_type,
         const idx_t* xids) {
-    index->addEx(n, x, numeric_type);
+    index->add_ex(n, x, numeric_type);
     for (idx_t i = 0; i < n; i++) {
         id_map.push_back(xids[i]);
     }
@@ -120,7 +121,7 @@ void IndexIDMapTemplate<IndexT>::add_with_ids(
         idx_t n,
         const typename IndexT::component_t* x,
         const idx_t* xids) {
-    add_with_idsEx(
+    add_with_ids_ex(
             n,
             static_cast<const void*>(x),
             component_t_to_numeric<typename IndexT::component_t>(),
@@ -166,7 +167,7 @@ struct ScopedSelChange {
 } // namespace
 template <typename IndexT>
-void IndexIDMapTemplate<IndexT>::searchEx(
+void IndexIDMapTemplate<IndexT>::search_ex(
         idx_t n,
         const void* x,
         NumericType numeric_type,
@@ -193,7 +194,7 @@ void IndexIDMapTemplate<IndexT>::searchEx(
             sel_change.set(params_non_const, &this_idtrans);
         }
     }
-    index->searchEx(n, x, numeric_type, k, distances, labels, params);
+    index->search_ex(n, x, numeric_type, k, distances, labels, params);
     idx_t* li = labels;
 #pragma omp parallel for
     for (idx_t i = 0; i < n * k; i++) {
@@ -209,7 +210,7 @@ void IndexIDMapTemplate<IndexT>::search(
         typename IndexT::distance_t* distances,
         idx_t* labels,
         const SearchParameters* params) const {
-    searchEx(
+    search_ex(
             n,
             static_cast<const void*>(x),
             component_t_to_numeric<typename IndexT::component_t>(),
@@ -301,13 +302,13 @@ IndexIDMap2Template<IndexT>::IndexIDMap2Template(IndexT* index)
         : IndexIDMapTemplate<IndexT>(index) {}
 template <typename IndexT>
-void IndexIDMap2Template<IndexT>::add_with_idsEx(
+void IndexIDMap2Template<IndexT>::add_with_ids_ex(
         idx_t n,
         const void* x,
         NumericType numeric_type,
         const idx_t* xids) {
     size_t prev_ntotal = this->ntotal;
-    IndexIDMapTemplate<IndexT>::add_with_idsEx(n, x, numeric_type, xids);
+    IndexIDMapTemplate<IndexT>::add_with_ids_ex(n, x, numeric_type, xids);
     for (size_t i = prev_ntotal; i < this->ntotal; i++) {
         rev_map[this->id_map[i]] = i;
     }
@@ -318,7 +319,7 @@ void IndexIDMap2Template<IndexT>::add_with_ids(
         idx_t n,
         const typename IndexT::component_t* x,
         const idx_t* xids) {
-    add_with_idsEx(
+    add_with_ids_ex(
             n,
             static_cast<const void*>(x),
             component_t_to_numeric<typename IndexT::component_t>(),

data/vendor/faiss/faiss/IndexIDMap.h CHANGED Viewed

@@ -23,7 +23,7 @@ struct IndexIDMapTemplate : IndexT {
     using distance_t = typename IndexT::distance_t;
     IndexT* index = nullptr; ///! the sub-index
-    bool own_fields = false; ///! whether pointers are deleted in destructo
+    bool own_fields = false; ///! whether pointers are deleted in destructor
     std::vector<idx_t> id_map;
     explicit IndexIDMapTemplate(IndexT* index);
@@ -31,7 +31,7 @@ struct IndexIDMapTemplate : IndexT {
     /// @param xids if non-null, ids to store for the vectors (size n)
     void add_with_ids(idx_t n, const component_t* x, const idx_t* xids)
             override;
-    void add_with_idsEx(
+    void add_with_ids_ex(
             idx_t n,
             const void* x,
             NumericType numeric_type,
@@ -39,7 +39,7 @@ struct IndexIDMapTemplate : IndexT {
     /// this will fail. Use add_with_ids
     void add(idx_t n, const component_t* x) override;
-    void addEx(idx_t n, const void* x, NumericType numeric_type) override;
+    void add_ex(idx_t n, const void* x, NumericType numeric_type) override;
     void search(
             idx_t n,
@@ -48,7 +48,7 @@ struct IndexIDMapTemplate : IndexT {
             distance_t* distances,
             idx_t* labels,
             const SearchParameters* params = nullptr) const override;
-    void searchEx(
+    void search_ex(
             idx_t n,
             const void* x,
             NumericType numeric_type,
@@ -58,7 +58,7 @@ struct IndexIDMapTemplate : IndexT {
             const SearchParameters* params = nullptr) const override;
     void train(idx_t n, const component_t* x) override;
-    void trainEx(idx_t n, const void* x, NumericType numeric_type) override;
+    void train_ex(idx_t n, const void* x, NumericType numeric_type) override;
     void reset() override;
@@ -104,7 +104,7 @@ struct IndexIDMap2Template : IndexIDMapTemplate<IndexT> {
     void add_with_ids(idx_t n, const component_t* x, const idx_t* xids)
             override;
-    void add_with_idsEx(
+    void add_with_ids_ex(
             idx_t n,
             const void* x,
             NumericType numeric_type,

data/vendor/faiss/faiss/IndexIVF.cpp CHANGED Viewed

@@ -506,7 +506,7 @@ void IndexIVF::search_preassigned(
         };
         // single list scan using the current scanner (with query
-        // set porperly) and storing results in simi and idxi
+        // set properly) and storing results in simi and idxi
         auto scan_one_list = [&](idx_t key,
                                  float coarse_dis_i,
                                  float* simi,

data/vendor/faiss/faiss/IndexIVF.h CHANGED Viewed

@@ -160,7 +160,7 @@ struct IndexIVFInterface : Level1Quantizer {
  * index maps to a list (aka inverted list or posting list), where the
  * id of the vector is stored.
  *
- * The inverted list object is required only after trainng. If none is
+ * The inverted list object is required only after training. If none is
  * set externally, an ArrayInvertedLists is used automatically.
  *
  * At search time, the vector to be searched is also quantized, and
@@ -171,7 +171,7 @@ struct IndexIVFInterface : Level1Quantizer {
  * lists are visited.
  *
  * Sub-classes implement a post-filtering of the index that refines
- * the distance estimation from the query to databse vectors.
+ * the distance estimation from the query to database vectors.
  */
 struct IndexIVF : Index, IndexIVFInterface {
     /// Access to the actual data
@@ -497,12 +497,12 @@ struct InvertedListScanner {
     /// compute a single query-to-code distance
     virtual float distance_to_code(const uint8_t* code) const = 0;
-    /** scan a set of codes, compute distances to current query and
+    /** scan a set of codes, compute distances to current query, and
      * update heap of results if necessary. Default implementation
      * calls distance_to_code.
      *
-     * @param n      number of codes to scan
-     * @param codes  codes to scan (n * code_size)
+     * @param n          number of codes to scan
+     * @param codes      codes to scan (n * code_size)
      * @param ids        corresponding ids (ignored if store_pairs)
      * @param distances  heap distances (size k)
      * @param labels     heap labels (size k)

data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp CHANGED Viewed

@@ -198,7 +198,7 @@ struct AQInvertedListScanner : InvertedListScanner {
         }
     }
-    ~AQInvertedListScanner() = default;
+    ~AQInvertedListScanner() override = default;
 };
 template <bool is_IP>

data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp CHANGED Viewed

@@ -14,6 +14,7 @@
 #include <faiss/impl/AuxIndexStructures.h>
 #include <faiss/impl/FaissAssert.h>
+#include <faiss/impl/FastScanDistancePostProcessing.h>
 #include <faiss/impl/LookupTableScaler.h>
 #include <faiss/impl/pq4_fast_scan.h>
 #include <faiss/invlists/BlockInvertedLists.h>
@@ -212,7 +213,9 @@ void IndexIVFAdditiveQuantizerFastScan::estimate_norm_scale(
     size_t index_nprobe = nprobe;
     nprobe = 1;
     CoarseQuantized cq{index_nprobe, coarse_dis.data(), coarse_ids.data()};
-    compute_LUT(n, x, cq, dis_tables, biases);
+    FastScanDistancePostProcessing empty_context{};
+    compute_LUT(n, x, cq, dis_tables, biases, empty_context);
     nprobe = index_nprobe;
     float scale = 0;
@@ -314,8 +317,10 @@ void IndexIVFAdditiveQuantizerFastScan::search(
     }
     NormTableScaler scaler(norm_scale);
+    FastScanDistancePostProcessing context;
+    context.norm_scaler = &scaler;
     IndexIVFFastScan::CoarseQuantized cq{nprobe};
-    search_dispatch_implem(n, x, k, distances, labels, cq, &scaler);
+    search_dispatch_implem(n, x, k, distances, labels, cq, context);
 }
 /*********************************************************
@@ -383,7 +388,8 @@ void IndexIVFAdditiveQuantizerFastScan::compute_LUT(
         const float* x,
         const CoarseQuantized& cq,
         AlignedTable<float>& dis_tables,
-        AlignedTable<float>& biases) const {
+        AlignedTable<float>& biases,
+        const FastScanDistancePostProcessing&) const {
     const size_t dim12 = ksub * M;
     const size_t ip_dim12 = aq->M * ksub;
     const size_t nprobe = cq.nprobe;

data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h CHANGED Viewed

@@ -12,6 +12,7 @@
 #include <faiss/IndexIVFAdditiveQuantizer.h>
 #include <faiss/IndexIVFFastScan.h>
 #include <faiss/impl/AdditiveQuantizer.h>
+#include <faiss/impl/FastScanDistancePostProcessing.h>
 #include <faiss/impl/ProductAdditiveQuantizer.h>
 #include <faiss/utils/AlignedTable.h>
@@ -101,7 +102,8 @@ struct IndexIVFAdditiveQuantizerFastScan : IndexIVFFastScan {
             const float* x,
             const CoarseQuantized& cq,
             AlignedTable<float>& dis_tables,
-            AlignedTable<float>& biases) const override;
+            AlignedTable<float>& biases,
+            const FastScanDistancePostProcessing& context) const override;
 };
 struct IndexIVFLocalSearchQuantizerFastScan