RubyGems - faiss - Versions diffs - 0.4.2 → 0.5.0 - Mend

faiss 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +9 -0
data/ext/faiss/index.cpp +36 -10
data/ext/faiss/index_binary.cpp +19 -6
data/ext/faiss/kmeans.cpp +6 -6
data/ext/faiss/numo.hpp +273 -123
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +2 -3
data/vendor/faiss/faiss/AutoTune.h +1 -1
data/vendor/faiss/faiss/Clustering.cpp +2 -2
data/vendor/faiss/faiss/Clustering.h +2 -2
data/vendor/faiss/faiss/IVFlib.cpp +1 -2
data/vendor/faiss/faiss/IVFlib.h +1 -1
data/vendor/faiss/faiss/Index.h +10 -10
data/vendor/faiss/faiss/Index2Layer.cpp +1 -1
data/vendor/faiss/faiss/Index2Layer.h +2 -2
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +9 -4
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +5 -1
data/vendor/faiss/faiss/IndexBinary.h +7 -7
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +3 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHash.cpp +3 -3
data/vendor/faiss/faiss/IndexBinaryHash.h +5 -5
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +7 -6
data/vendor/faiss/faiss/IndexFastScan.cpp +125 -49
data/vendor/faiss/faiss/IndexFastScan.h +107 -7
data/vendor/faiss/faiss/IndexFlat.h +1 -1
data/vendor/faiss/faiss/IndexHNSW.cpp +3 -1
data/vendor/faiss/faiss/IndexHNSW.h +1 -1
data/vendor/faiss/faiss/IndexIDMap.cpp +14 -13
data/vendor/faiss/faiss/IndexIDMap.h +6 -6
data/vendor/faiss/faiss/IndexIVF.cpp +1 -1
data/vendor/faiss/faiss/IndexIVF.h +5 -5
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +1 -1
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +9 -3
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +3 -1
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +176 -90
data/vendor/faiss/faiss/IndexIVFFastScan.h +173 -18
data/vendor/faiss/faiss/IndexIVFFlat.cpp +1 -0
data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +366 -0
data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +64 -0
data/vendor/faiss/faiss/IndexIVFPQ.cpp +3 -1
data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +134 -2
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -1
data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +13 -6
data/vendor/faiss/faiss/IndexIVFRaBitQ.h +1 -0
data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +650 -0
data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +216 -0
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
data/vendor/faiss/faiss/IndexNSG.cpp +1 -1
data/vendor/faiss/faiss/IndexNeuralNetCodec.h +1 -1
data/vendor/faiss/faiss/IndexPQ.h +1 -1
data/vendor/faiss/faiss/IndexPQFastScan.cpp +6 -2
data/vendor/faiss/faiss/IndexPQFastScan.h +5 -1
data/vendor/faiss/faiss/IndexRaBitQ.cpp +13 -10
data/vendor/faiss/faiss/IndexRaBitQ.h +7 -2
data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +586 -0
data/vendor/faiss/faiss/IndexRaBitQFastScan.h +149 -0
data/vendor/faiss/faiss/IndexShards.cpp +1 -1
data/vendor/faiss/faiss/MatrixStats.cpp +3 -3
data/vendor/faiss/faiss/MetricType.h +1 -1
data/vendor/faiss/faiss/VectorTransform.h +2 -2
data/vendor/faiss/faiss/clone_index.cpp +3 -1
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
data/vendor/faiss/faiss/gpu/GpuIndex.h +11 -11
data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +10 -6
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIcmEncoder.cpp +7 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +1 -1
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +1 -1
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +2 -2
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -1
data/vendor/faiss/faiss/impl/CodePacker.h +2 -2
data/vendor/faiss/faiss/impl/DistanceComputer.h +3 -3
data/vendor/faiss/faiss/impl/FastScanDistancePostProcessing.h +53 -0
data/vendor/faiss/faiss/impl/HNSW.cpp +1 -1
data/vendor/faiss/faiss/impl/HNSW.h +4 -4
data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -2
data/vendor/faiss/faiss/impl/IDSelector.h +1 -1
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +4 -4
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/LookupTableScaler.h +1 -1
data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
data/vendor/faiss/faiss/impl/NNDescent.h +2 -2
data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
data/vendor/faiss/faiss/impl/PanoramaStats.cpp +33 -0
data/vendor/faiss/faiss/impl/PanoramaStats.h +38 -0
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +5 -5
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +1 -1
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +2 -0
data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +246 -0
data/vendor/faiss/faiss/impl/RaBitQUtils.h +153 -0
data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +54 -158
data/vendor/faiss/faiss/impl/RaBitQuantizer.h +2 -1
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/ResultHandler.h +4 -4
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1 -1
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +7 -4
data/vendor/faiss/faiss/impl/index_read.cpp +87 -3
data/vendor/faiss/faiss/impl/index_write.cpp +73 -3
data/vendor/faiss/faiss/impl/io.cpp +2 -2
data/vendor/faiss/faiss/impl/io.h +4 -4
data/vendor/faiss/faiss/impl/kmeans1d.cpp +1 -1
data/vendor/faiss/faiss/impl/kmeans1d.h +1 -1
data/vendor/faiss/faiss/impl/lattice_Zn.h +2 -2
data/vendor/faiss/faiss/impl/mapped_io.cpp +2 -2
data/vendor/faiss/faiss/impl/mapped_io.h +4 -3
data/vendor/faiss/faiss/impl/maybe_owned_vector.h +8 -1
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +30 -4
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +14 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +5 -6
data/vendor/faiss/faiss/impl/simd_result_handlers.h +55 -11
data/vendor/faiss/faiss/impl/zerocopy_io.h +1 -1
data/vendor/faiss/faiss/index_factory.cpp +43 -1
data/vendor/faiss/faiss/index_factory.h +1 -1
data/vendor/faiss/faiss/index_io.h +1 -1
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +205 -0
data/vendor/faiss/faiss/invlists/InvertedLists.h +62 -0
data/vendor/faiss/faiss/utils/AlignedTable.h +1 -1
data/vendor/faiss/faiss/utils/Heap.cpp +2 -2
data/vendor/faiss/faiss/utils/Heap.h +3 -3
data/vendor/faiss/faiss/utils/NeuralNet.cpp +1 -1
data/vendor/faiss/faiss/utils/NeuralNet.h +3 -3
data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +2 -2
data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +2 -2
data/vendor/faiss/faiss/utils/approx_topk/mode.h +1 -1
data/vendor/faiss/faiss/utils/distances.h +2 -2
data/vendor/faiss/faiss/utils/extra_distances-inl.h +3 -1
data/vendor/faiss/faiss/utils/hamming-inl.h +2 -0
data/vendor/faiss/faiss/utils/hamming.cpp +7 -6
data/vendor/faiss/faiss/utils/hamming.h +1 -1
data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -2
data/vendor/faiss/faiss/utils/partitioning.cpp +5 -5
data/vendor/faiss/faiss/utils/partitioning.h +2 -2
data/vendor/faiss/faiss/utils/rabitq_simd.h +222 -336
data/vendor/faiss/faiss/utils/random.cpp +1 -1
data/vendor/faiss/faiss/utils/simdlib_avx2.h +1 -1
data/vendor/faiss/faiss/utils/simdlib_avx512.h +1 -1
data/vendor/faiss/faiss/utils/simdlib_neon.h +2 -2
data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +1 -1
data/vendor/faiss/faiss/utils/utils.cpp +5 -2
data/vendor/faiss/faiss/utils/utils.h +2 -2
metadata +14 -3

data/vendor/faiss/faiss/IndexIVFFastScan.h CHANGED Viewed

@@ -7,9 +7,8 @@
 #pragma once
-#include <memory>
 #include <faiss/IndexIVF.h>
+#include <faiss/impl/FastScanDistancePostProcessing.h>
 #include <faiss/utils/AlignedTable.h>
 namespace faiss {
@@ -63,6 +62,15 @@ struct IndexIVFFastScan : IndexIVF {
     // quantizer used to pack the codes
     Quantizer* fine_quantizer = nullptr;
+    /** Constructor for IndexIVFFastScan
+     *
+     * @param quantizer     coarse quantizer for IVF clustering
+     * @param d             dimensionality of vectors
+     * @param nlist         number of inverted lists
+     * @param code_size     size of each code in bytes
+     * @param metric        distance metric to use
+     * @param own_invlists  whether to own the inverted lists
+     */
     IndexIVFFastScan(
             Index* quantizer,
             size_t d,
@@ -73,7 +81,16 @@ struct IndexIVFFastScan : IndexIVF {
     IndexIVFFastScan();
-    /// called by implementations
+    /** Initialize the fast scan functionality (called by implementations)
+     *
+     * @param fine_quantizer  fine quantizer for encoding
+     * @param M               number of subquantizers
+     * @param nbits           number of bits per subquantizer
+     * @param nlist           number of inverted lists
+     * @param metric          distance metric to use
+     * @param bbs             block size for SIMD processing
+     * @param own_invlists    whether to own the inverted lists
+     */
     void init_fastscan(
             Quantizer* fine_quantizer,
             size_t M,
@@ -91,34 +108,72 @@ struct IndexIVFFastScan : IndexIVF {
     /// orig's inverted lists (for debugging)
     InvertedLists* orig_invlists = nullptr;
+    /** Add vectors with specific IDs to the index
+     *
+     * @param n     number of vectors to add
+     * @param x     vectors to add (n * d)
+     * @param xids  IDs for the vectors (n)
+     */
     void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
     // prepare look-up tables
     virtual bool lookup_table_is_3d() const = 0;
     // compact way of conveying coarse quantization results
     struct CoarseQuantized {
-        size_t nprobe;
+        size_t nprobe = 0;
         const float* dis = nullptr;
         const idx_t* ids = nullptr;
     };
+    /* Compute distance table for query set, given a list of coarse
+     * quantizers.
+     *
+     * @param n             number of queries
+     * @param x             query vectors (n, d)
+     * @param cq            coarse quantization results
+     * @param dis_tables    output distance tables
+     * @param biases        output bias values
+     * @param context       processing context containing query factors
+    processor
+     */
     virtual void compute_LUT(
             size_t n,
             const float* x,
             const CoarseQuantized& cq,
             AlignedTable<float>& dis_tables,
-            AlignedTable<float>& biases) const = 0;
+            AlignedTable<float>& biases,
+            const FastScanDistancePostProcessing& context) const = 0;
+    /** Compute quantized lookup tables for distance computation
+     *
+     * @param n             number of query vectors
+     * @param x             query vectors (n * d)
+     * @param cq            coarse quantization results
+     * @param dis_tables    output quantized distance tables
+     * @param biases        output quantized bias values
+     * @param normalizers   output normalization factors
+     * @param context       processing context containing query factors
+     * processor
+     */
     void compute_LUT_uint8(
             size_t n,
             const float* x,
             const CoarseQuantized& cq,
             AlignedTable<uint8_t>& dis_tables,
             AlignedTable<uint16_t>& biases,
-            float* normalizers) const;
+            float* normalizers,
+            const FastScanDistancePostProcessing& context) const;
+    /** Search for k nearest neighbors
+     *
+     * @param n          number of query vectors
+     * @param x          query vectors (n * d)
+     * @param k          number of nearest neighbors to find
+     * @param distances  output distances (n * k)
+     * @param labels     output labels/indices (n * k)
+     * @param params     optional search parameters
+     */
     void search(
             idx_t n,
             const float* x,
@@ -127,6 +182,19 @@ struct IndexIVFFastScan : IndexIVF {
             idx_t* labels,
             const SearchParameters* params = nullptr) const override;
+    /** Search with pre-assigned coarse quantization
+     *
+     * @param n             number of query vectors
+     * @param x             query vectors (n * d)
+     * @param k             number of nearest neighbors to find
+     * @param assign        coarse cluster assignments (n * nprobe)
+     * @param centroid_dis  distances to centroids (n * nprobe)
+     * @param distances     output distances (n * k)
+     * @param labels        output labels/indices (n * k)
+     * @param store_pairs   whether to store cluster-relative pairs
+     * @param params        optional IVF search parameters
+     * @param stats         optional search statistics
+     */
     void search_preassigned(
             idx_t n,
             const float* x,
@@ -139,6 +207,14 @@ struct IndexIVFFastScan : IndexIVF {
             const IVFSearchParameters* params = nullptr,
             IndexIVFStats* stats = nullptr) const override;
+    /** Range search for all neighbors within radius
+     *
+     * @param n       number of query vectors
+     * @param x       query vectors (n * d)
+     * @param radius  search radius
+     * @param result  output range search results
+     * @param params  optional search parameters
+     */
     void range_search(
             idx_t n,
             const float* x,
@@ -146,7 +222,45 @@ struct IndexIVFFastScan : IndexIVF {
             RangeSearchResult* result,
             const SearchParameters* params = nullptr) const override;
-    // internal search funcs
+    /** Create a KNN handler for this index type
+     *
+     * This method can be overridden by derived classes to provide
+     * specialized handlers (e.g., IVFRaBitQHeapHandler for RaBitQ indexes).
+     * Base implementation creates standard handlers based on k and impl.
+     *
+     * @param is_max        true for max-heap (inner product), false for
+     *                      min-heap (L2 distance)
+     * @param impl          implementation number:
+     *                      - even (10, 12, 14): use heap for top-k
+     *                      - odd (11, 13, 15): use reservoir sampling
+     * @param n             number of queries
+     * @param k             number of neighbors to find per query
+     * @param distances     output array for distances (n * k), will be
+     *                      populated by handler
+     * @param labels        output array for result IDs (n * k), will be
+     *                      populated by handler
+     * @param sel           optional ID selector to filter results (nullptr =
+     *                      no filtering)
+     * @param context       processing context containing additional data
+     * @param normalizers   optional array of size 2*n for converting quantized
+     *                      uint16 distances to float.
+     *
+     * @return Allocated result handler (caller owns and must delete).
+     *         Handler processes SIMD batches and populates distances/labels.
+     *
+     * @note The returned handler must be deleted by caller after use.
+     *       Typical usage: handler->begin() → process batches → handler->end()
+     */
+    virtual SIMDResultHandlerToFloat* make_knn_handler(
+            bool is_max,
+            int impl,
+            idx_t n,
+            idx_t k,
+            float* distances,
+            idx_t* labels,
+            const IDSelector* sel,
+            const FastScanDistancePostProcessing& context,
+            const float* normalizers = nullptr) const;
     // dispatch to implementations and parallelize
     void search_dispatch_implem(
@@ -156,7 +270,7 @@ struct IndexIVFFastScan : IndexIVF {
             float* distances,
             idx_t* labels,
             const CoarseQuantized& cq,
-            const NormTableScaler* scaler,
+            const FastScanDistancePostProcessing& context,
             const IVFSearchParameters* params = nullptr) const;
     void range_search_dispatch_implem(
@@ -165,7 +279,7 @@ struct IndexIVFFastScan : IndexIVF {
             float radius,
             RangeSearchResult& rres,
             const CoarseQuantized& cq_in,
-            const NormTableScaler* scaler,
+            const FastScanDistancePostProcessing& context,
             const IVFSearchParameters* params = nullptr) const;
     // impl 1 and 2 are just for verification
@@ -177,7 +291,7 @@ struct IndexIVFFastScan : IndexIVF {
             float* distances,
             idx_t* labels,
             const CoarseQuantized& cq,
-            const NormTableScaler* scaler,
+            const FastScanDistancePostProcessing& context,
             const IVFSearchParameters* params = nullptr) const;
     template <class C>
@@ -188,7 +302,7 @@ struct IndexIVFFastScan : IndexIVF {
             float* distances,
             idx_t* labels,
             const CoarseQuantized& cq,
-            const NormTableScaler* scaler,
+            const FastScanDistancePostProcessing& context,
             const IVFSearchParameters* params = nullptr) const;
     // implem 10 and 12 are not multithreaded internally, so
@@ -200,7 +314,7 @@ struct IndexIVFFastScan : IndexIVF {
             const CoarseQuantized& cq,
             size_t* ndis_out,
             size_t* nlist_out,
-            const NormTableScaler* scaler,
+            const FastScanDistancePostProcessing& context,
             const IVFSearchParameters* params = nullptr) const;
     void search_implem_12(
@@ -210,7 +324,7 @@ struct IndexIVFFastScan : IndexIVF {
             const CoarseQuantized& cq,
             size_t* ndis_out,
             size_t* nlist_out,
-            const NormTableScaler* scaler,
+            const FastScanDistancePostProcessing& context,
             const IVFSearchParameters* params = nullptr) const;
     // implem 14 is multithreaded internally across nprobes and queries
@@ -222,7 +336,7 @@ struct IndexIVFFastScan : IndexIVF {
             idx_t* labels,
             const CoarseQuantized& cq,
             int impl,
-            const NormTableScaler* scaler,
+            const FastScanDistancePostProcessing& context,
             const IVFSearchParameters* params = nullptr) const;
     // reconstruct vectors from packed invlists
@@ -234,16 +348,57 @@ struct IndexIVFFastScan : IndexIVF {
     // reconstruct orig invlists (for debugging)
     void reconstruct_orig_invlists();
-    /** Decode a set of vectors.
+    /** Decode a set of vectors
      *
-     *  NOTE: The codes in the IndexFastScan object are non-contiguous.
-     *        But this method requires a contiguous representation.
+     * NOTE: The codes in the IndexFastScan object are non-contiguous.
+     *       But this method requires a contiguous representation.
      *
      * @param n       number of vectors
      * @param bytes   input encoded vectors, size n * code_size
      * @param x       output vectors, size n * d
      */
     void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
+   protected:
+    /** Preprocess metadata from encoded vectors before packing.
+     *
+     * Called during add_with_ids after encode_vectors but before codes
+     * are packed into SIMD-friendly blocks. Subclasses can override to
+     * extract and store metadata embedded in codes or perform other
+     * pre-packing operations.
+     *
+     * Default implementation: no-op
+     *
+     * Example use case:
+     * - IndexIVFRaBitQFastScan extracts factor data from codes for use
+     *   during search-time distance corrections
+     *
+     * @param n                  number of vectors encoded
+     * @param flat_codes         encoded vectors (n * code_size bytes)
+     * @param start_global_idx   starting global index (ntotal before add)
+     */
+    virtual void preprocess_code_metadata(
+            idx_t n,
+            const uint8_t* flat_codes,
+            idx_t start_global_idx);
+    /** Get stride for interpreting codes during SIMD packing.
+     *
+     * The stride determines how to read codes when packing them into
+     * SIMD-friendly block format. This is needed when codes contain
+     * embedded metadata that should be skipped during packing.
+     *
+     * Default implementation: returns 0 (use standard M-byte stride)
+     *
+     * Example use case:
+     * - IndexIVFRaBitQFastScan returns code_size because codes contain
+     *   embedded factor data after the quantized bits
+     *
+     * @return stride in bytes:
+     *         - 0: use default stride (M bytes, standard PQ/AQ codes)
+     *         - >0: use custom stride (e.g., code_size for embedded metadata)
+     */
+    virtual size_t code_packing_stride() const;
 };
 struct IVFFastScanStats {

data/vendor/faiss/faiss/IndexIVFFlat.cpp CHANGED Viewed

@@ -13,6 +13,7 @@
 #include <cinttypes>
 #include <cstdio>
+#include <numeric>
 #include <faiss/IndexFlat.h>

data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp ADDED Viewed

@@ -0,0 +1,366 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+// -*- c++ -*-
+#include <faiss/IndexIVFFlatPanorama.h>
+#include <cstdio>
+#include <faiss/IndexFlat.h>
+#include <faiss/impl/AuxIndexStructures.h>
+#include <faiss/impl/IDSelector.h>
+#include <faiss/impl/PanoramaStats.h>
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/utils/distances.h>
+#include <faiss/utils/extra_distances.h>
+#include <faiss/utils/utils.h>
+namespace faiss {
+IndexIVFFlatPanorama::IndexIVFFlatPanorama(
+        Index* quantizer,
+        size_t d,
+        size_t nlist,
+        int n_levels,
+        MetricType metric,
+        bool own_invlists)
+        : IndexIVFFlat(quantizer, d, nlist, metric, false), n_levels(n_levels) {
+    // For now, we only support L2 distance.
+    // Supporting dot product and cosine distance is a trivial addition
+    // left for future work.
+    FAISS_THROW_IF_NOT(metric == METRIC_L2);
+    // We construct the inverted lists here so that we can use the
+    // level-oriented storage. This does not cause a leak as we constructed
+    // IndexIVF first, with own_invlists set to false.
+    this->invlists = new ArrayInvertedListsPanorama(nlist, code_size, n_levels);
+    this->own_invlists = own_invlists;
+}
+IndexIVFFlatPanorama::IndexIVFFlatPanorama() : n_levels(0) {}
+namespace {
+template <typename VectorDistance, bool use_sel>
+struct IVFFlatScannerPanorama : InvertedListScanner {
+    VectorDistance vd;
+    const ArrayInvertedListsPanorama* storage;
+    using C = typename VectorDistance::C;
+    IVFFlatScannerPanorama(
+            const VectorDistance& vd,
+            const ArrayInvertedListsPanorama* storage,
+            bool store_pairs,
+            const IDSelector* sel)
+            : InvertedListScanner(store_pairs, sel), vd(vd), storage(storage) {
+        keep_max = vd.is_similarity;
+        code_size = vd.d * sizeof(float);
+        cum_sums.resize(storage->n_levels + 1);
+    }
+    const float* xi = nullptr;
+    std::vector<float> cum_sums;
+    float q_norm = 0.0f;
+    void set_query(const float* query) override {
+        this->xi = query;
+        const size_t d = vd.d;
+        const size_t level_width_floats = storage->level_width / sizeof(float);
+        std::vector<float> suffix_sums(d + 1);
+        suffix_sums[d] = 0.0f;
+        for (int j = d - 1; j >= 0; j--) {
+            float squared_val = query[j] * query[j];
+            suffix_sums[j] = suffix_sums[j + 1] + squared_val;
+        }
+        for (size_t level = 0; level < storage->n_levels; level++) {
+            size_t start_idx = level * level_width_floats;
+            if (start_idx < d) {
+                cum_sums[level] = sqrt(suffix_sums[start_idx]);
+            } else {
+                cum_sums[level] = 0.0f;
+            }
+        }
+        cum_sums[storage->n_levels] = 0.0f;
+        q_norm = suffix_sums[0];
+    }
+    void set_list(idx_t list_no, float /* coarse_dis */) override {
+        this->list_no = list_no;
+    }
+    /// This function is unreachable as `IndexIVF` only calls this within
+    /// iterators, which are not supported by `IndexIVFFlatPanorama`.
+    /// To avoid undefined behavior, we throw an error here.
+    float distance_to_code(const uint8_t* /* code */) const override {
+        FAISS_THROW_MSG(
+                "IndexIVFFlatPanorama does not support distance_to_code");
+    }
+    /// Helper function for progressive filtering that both scan_codes and
+    /// scan_codes_range use. Processes a batch of vectors through all levels,
+    /// computing exact distances and pruning based on a threshold.
+    /// Returns the number of active survivors after all levels.
+    size_t progressive_filter_batch(
+            size_t batch_no,
+            size_t list_size,
+            const uint8_t* codes_base,
+            const float* cum_sums_data,
+            float threshold,
+            std::vector<float>& exact_distances,
+            std::vector<uint32_t>& active_indices,
+            const idx_t* ids,
+            PanoramaStats& local_stats) const {
+        const size_t d = vd.d;
+        const size_t level_width_floats = storage->level_width / sizeof(float);
+        size_t batch_start = batch_no * storage->kBatchSize;
+        size_t curr_batch_size =
+                std::min(list_size - batch_start, storage->kBatchSize);
+        size_t cumsum_batch_offset =
+                batch_no * storage->kBatchSize * (storage->n_levels + 1);
+        const float* batch_cum_sums = cum_sums_data + cumsum_batch_offset;
+        size_t batch_offset = batch_no * storage->kBatchSize * code_size;
+        const uint8_t* storage_base = codes_base + batch_offset;
+        // Initialize active set with ID-filtered vectors.
+        size_t num_active = 0;
+        for (size_t i = 0; i < curr_batch_size; i++) {
+            size_t global_idx = batch_start + i;
+            bool include = !use_sel || sel->is_member(ids[global_idx]);
+            active_indices[num_active] = i;
+            float cum_sum = batch_cum_sums[i];
+            exact_distances[i] = cum_sum * cum_sum + q_norm;
+            num_active += include;
+        }
+        if (num_active == 0) {
+            return 0;
+        }
+        size_t total_active = num_active;
+        const float* level_cum_sums = batch_cum_sums + storage->kBatchSize;
+        // Progressive filtering through levels.
+        for (size_t level = 0; level < storage->n_levels; level++) {
+            local_stats.total_dims_scanned += num_active;
+            local_stats.total_dims += total_active;
+            float query_cum_norm = cum_sums[level + 1];
+            size_t level_offset =
+                    level * storage->level_width * storage->kBatchSize;
+            const float* level_storage =
+                    (const float*)(storage_base + level_offset);
+            size_t next_active = 0;
+            for (size_t i = 0; i < num_active; i++) {
+                uint32_t idx = active_indices[i];
+                const float* yj = level_storage + idx * level_width_floats;
+                const float* query_level = xi + level * level_width_floats;
+                size_t actual_level_width = std::min(
+                        level_width_floats, d - level * level_width_floats);
+                float dot_product =
+                        fvec_inner_product(query_level, yj, actual_level_width);
+                exact_distances[idx] -= 2.0f * dot_product;
+                float cum_sum = level_cum_sums[idx];
+                float cauchy_schwarz_bound = 2.0f * cum_sum * query_cum_norm;
+                float lower_bound = exact_distances[idx] - cauchy_schwarz_bound;
+                active_indices[next_active] = idx;
+                next_active += C::cmp(threshold, lower_bound) ? 1 : 0;
+            }
+            num_active = next_active;
+            level_cum_sums += storage->kBatchSize;
+        }
+        return num_active;
+    }
+    size_t scan_codes(
+            size_t list_size,
+            const uint8_t* codes,
+            const idx_t* ids,
+            float* simi,
+            idx_t* idxi,
+            size_t k) const override {
+        size_t nup = 0;
+        const size_t n_batches =
+                (list_size + storage->kBatchSize - 1) / storage->kBatchSize;
+        const uint8_t* codes_base = codes;
+        const float* cum_sums_data = storage->get_cum_sums(list_no);
+        std::vector<float> exact_distances(storage->kBatchSize);
+        std::vector<uint32_t> active_indices(storage->kBatchSize);
+        PanoramaStats local_stats;
+        local_stats.reset();
+        // Panorama's IVFFlat core progressive filtering algorithm:
+        // Process vectors in batches for cache efficiency. For each batch:
+        // 1. Apply ID selection filter and initialize distances
+        // (||y||^2 + ||x||^2).
+        // 2. Maintain an "active set" of candidate indices that haven't been
+        //    pruned yet.
+        // 3. For each level, refine distances incrementally and compact the
+        //    active set:
+        //    - Compute dot product for current level: exact_dist -= 2*<x,y>.
+        //    - Use Cauchy-Schwarz bound on remaining levels to get lower bound
+        //    - Prune candidates whose lower bound exceeds k-th best distance.
+        //    - Compact active_indices to remove pruned candidates (branchless)
+        // 4. After all levels, survivors are exact distances; update heap.
+        // This achieves early termination while maintaining SIMD-friendly
+        // sequential access patterns in the level-oriented storage layout.
+        for (size_t batch_no = 0; batch_no < n_batches; batch_no++) {
+            size_t batch_start = batch_no * storage->kBatchSize;
+            size_t num_active = progressive_filter_batch(
+                    batch_no,
+                    list_size,
+                    codes_base,
+                    cum_sums_data,
+                    simi[0],
+                    exact_distances,
+                    active_indices,
+                    ids,
+                    local_stats);
+            // Add batch survivors to heap.
+            for (size_t i = 0; i < num_active; i++) {
+                uint32_t idx = active_indices[i];
+                size_t global_idx = batch_start + idx;
+                float dis = exact_distances[idx];
+                if (C::cmp(simi[0], dis)) {
+                    int64_t id = store_pairs ? lo_build(list_no, global_idx)
+                                             : ids[global_idx];
+                    heap_replace_top<C>(k, simi, idxi, dis, id);
+                    nup++;
+                }
+            }
+        }
+        indexPanorama_stats.add(local_stats);
+        return nup;
+    }
+    void scan_codes_range(
+            size_t list_size,
+            const uint8_t* codes,
+            const idx_t* ids,
+            float radius,
+            RangeQueryResult& res) const override {
+        const size_t n_batches =
+                (list_size + storage->kBatchSize - 1) / storage->kBatchSize;
+        const uint8_t* codes_base = codes;
+        const float* cum_sums_data = storage->get_cum_sums(list_no);
+        std::vector<float> exact_distances(storage->kBatchSize);
+        std::vector<uint32_t> active_indices(storage->kBatchSize);
+        PanoramaStats local_stats;
+        local_stats.reset();
+        // Same progressive filtering as scan_codes, but with fixed radius
+        // threshold instead of dynamic heap threshold.
+        for (size_t batch_no = 0; batch_no < n_batches; batch_no++) {
+            size_t batch_start = batch_no * storage->kBatchSize;
+            size_t num_active = progressive_filter_batch(
+                    batch_no,
+                    list_size,
+                    codes_base,
+                    cum_sums_data,
+                    radius,
+                    exact_distances,
+                    active_indices,
+                    ids,
+                    local_stats);
+            // Add batch survivors to range result.
+            for (size_t i = 0; i < num_active; i++) {
+                uint32_t idx = active_indices[i];
+                size_t global_idx = batch_start + idx;
+                float dis = exact_distances[idx];
+                if (C::cmp(radius, dis)) {
+                    int64_t id = store_pairs ? lo_build(list_no, global_idx)
+                                             : ids[global_idx];
+                    res.add(dis, id);
+                }
+            }
+        }
+        indexPanorama_stats.add(local_stats);
+    }
+};
+struct Run_get_InvertedListScanner {
+    using T = InvertedListScanner*;
+    template <class VD>
+    InvertedListScanner* f(
+            VD& vd,
+            const IndexIVFFlatPanorama* ivf,
+            bool store_pairs,
+            const IDSelector* sel) {
+        // Safely cast to ArrayInvertedListsPanorama to access cumulative sums.
+        const ArrayInvertedListsPanorama* storage =
+                dynamic_cast<const ArrayInvertedListsPanorama*>(ivf->invlists);
+        FAISS_THROW_IF_NOT_MSG(
+                storage,
+                "IndexIVFFlatPanorama requires ArrayInvertedListsPanorama");
+        if (sel) {
+            return new IVFFlatScannerPanorama<VD, true>(
+                    vd, storage, store_pairs, sel);
+        } else {
+            return new IVFFlatScannerPanorama<VD, false>(
+                    vd, storage, store_pairs, sel);
+        }
+    }
+};
+} // anonymous namespace
+InvertedListScanner* IndexIVFFlatPanorama::get_InvertedListScanner(
+        bool store_pairs,
+        const IDSelector* sel,
+        const IVFSearchParameters*) const {
+    Run_get_InvertedListScanner run;
+    return dispatch_VectorDistance(
+            d, metric_type, metric_arg, run, this, store_pairs, sel);
+}
+void IndexIVFFlatPanorama::reconstruct_from_offset(
+        int64_t list_no,
+        int64_t offset,
+        float* recons) const {
+    const uint8_t* code = invlists->get_single_code(list_no, offset);
+    memcpy(recons, code, code_size);
+    invlists->release_codes(list_no, code);
+}
+} // namespace faiss