RubyGems - faiss - Versions diffs - 0.4.3 → 0.5.0 - Mend

faiss 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (152) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/ext/faiss/index.cpp +25 -6
data/ext/faiss/index_binary.cpp +17 -4
data/ext/faiss/kmeans.cpp +6 -6
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +2 -3
data/vendor/faiss/faiss/AutoTune.h +1 -1
data/vendor/faiss/faiss/Clustering.cpp +2 -2
data/vendor/faiss/faiss/Clustering.h +2 -2
data/vendor/faiss/faiss/IVFlib.cpp +1 -2
data/vendor/faiss/faiss/IVFlib.h +1 -1
data/vendor/faiss/faiss/Index.h +10 -10
data/vendor/faiss/faiss/Index2Layer.cpp +1 -1
data/vendor/faiss/faiss/Index2Layer.h +2 -2
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +9 -4
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +5 -1
data/vendor/faiss/faiss/IndexBinary.h +7 -7
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +3 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
data/vendor/faiss/faiss/IndexBinaryHash.cpp +3 -3
data/vendor/faiss/faiss/IndexBinaryHash.h +5 -5
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +7 -6
data/vendor/faiss/faiss/IndexFastScan.cpp +125 -49
data/vendor/faiss/faiss/IndexFastScan.h +107 -7
data/vendor/faiss/faiss/IndexFlat.h +1 -1
data/vendor/faiss/faiss/IndexHNSW.cpp +3 -1
data/vendor/faiss/faiss/IndexHNSW.h +1 -1
data/vendor/faiss/faiss/IndexIDMap.cpp +14 -13
data/vendor/faiss/faiss/IndexIDMap.h +6 -6
data/vendor/faiss/faiss/IndexIVF.cpp +1 -1
data/vendor/faiss/faiss/IndexIVF.h +5 -5
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +1 -1
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +9 -3
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +3 -1
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +176 -90
data/vendor/faiss/faiss/IndexIVFFastScan.h +173 -18
data/vendor/faiss/faiss/IndexIVFFlat.cpp +1 -0
data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +366 -0
data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +64 -0
data/vendor/faiss/faiss/IndexIVFPQ.cpp +3 -1
data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +134 -2
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -1
data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +13 -6
data/vendor/faiss/faiss/IndexIVFRaBitQ.h +1 -0
data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +650 -0
data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +216 -0
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
data/vendor/faiss/faiss/IndexNSG.cpp +1 -1
data/vendor/faiss/faiss/IndexNeuralNetCodec.h +1 -1
data/vendor/faiss/faiss/IndexPQ.h +1 -1
data/vendor/faiss/faiss/IndexPQFastScan.cpp +6 -2
data/vendor/faiss/faiss/IndexPQFastScan.h +5 -1
data/vendor/faiss/faiss/IndexRaBitQ.cpp +13 -10
data/vendor/faiss/faiss/IndexRaBitQ.h +7 -2
data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +586 -0
data/vendor/faiss/faiss/IndexRaBitQFastScan.h +149 -0
data/vendor/faiss/faiss/IndexShards.cpp +1 -1
data/vendor/faiss/faiss/MatrixStats.cpp +3 -3
data/vendor/faiss/faiss/MetricType.h +1 -1
data/vendor/faiss/faiss/VectorTransform.h +2 -2
data/vendor/faiss/faiss/clone_index.cpp +3 -1
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
data/vendor/faiss/faiss/gpu/GpuIndex.h +11 -11
data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +1 -1
data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +10 -6
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIcmEncoder.cpp +7 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +1 -1
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +1 -1
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +2 -2
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -1
data/vendor/faiss/faiss/impl/CodePacker.h +2 -2
data/vendor/faiss/faiss/impl/DistanceComputer.h +3 -3
data/vendor/faiss/faiss/impl/FastScanDistancePostProcessing.h +53 -0
data/vendor/faiss/faiss/impl/HNSW.cpp +1 -1
data/vendor/faiss/faiss/impl/HNSW.h +4 -4
data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -2
data/vendor/faiss/faiss/impl/IDSelector.h +1 -1
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +4 -4
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/LookupTableScaler.h +1 -1
data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
data/vendor/faiss/faiss/impl/NNDescent.h +2 -2
data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
data/vendor/faiss/faiss/impl/PanoramaStats.cpp +33 -0
data/vendor/faiss/faiss/impl/PanoramaStats.h +38 -0
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +5 -5
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +1 -1
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +2 -0
data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +246 -0
data/vendor/faiss/faiss/impl/RaBitQUtils.h +153 -0
data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +54 -158
data/vendor/faiss/faiss/impl/RaBitQuantizer.h +2 -1
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/ResultHandler.h +4 -4
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1 -1
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +1 -1
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +7 -4
data/vendor/faiss/faiss/impl/index_read.cpp +87 -3
data/vendor/faiss/faiss/impl/index_write.cpp +73 -3
data/vendor/faiss/faiss/impl/io.cpp +2 -2
data/vendor/faiss/faiss/impl/io.h +4 -4
data/vendor/faiss/faiss/impl/kmeans1d.cpp +1 -1
data/vendor/faiss/faiss/impl/kmeans1d.h +1 -1
data/vendor/faiss/faiss/impl/lattice_Zn.h +2 -2
data/vendor/faiss/faiss/impl/mapped_io.cpp +2 -2
data/vendor/faiss/faiss/impl/mapped_io.h +4 -3
data/vendor/faiss/faiss/impl/maybe_owned_vector.h +8 -1
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +30 -4
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +14 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +5 -6
data/vendor/faiss/faiss/impl/simd_result_handlers.h +55 -11
data/vendor/faiss/faiss/impl/zerocopy_io.h +1 -1
data/vendor/faiss/faiss/index_factory.cpp +43 -1
data/vendor/faiss/faiss/index_factory.h +1 -1
data/vendor/faiss/faiss/index_io.h +1 -1
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +205 -0
data/vendor/faiss/faiss/invlists/InvertedLists.h +62 -0
data/vendor/faiss/faiss/utils/AlignedTable.h +1 -1
data/vendor/faiss/faiss/utils/Heap.cpp +2 -2
data/vendor/faiss/faiss/utils/Heap.h +3 -3
data/vendor/faiss/faiss/utils/NeuralNet.cpp +1 -1
data/vendor/faiss/faiss/utils/NeuralNet.h +3 -3
data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +2 -2
data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +2 -2
data/vendor/faiss/faiss/utils/approx_topk/mode.h +1 -1
data/vendor/faiss/faiss/utils/distances.h +2 -2
data/vendor/faiss/faiss/utils/extra_distances-inl.h +3 -1
data/vendor/faiss/faiss/utils/hamming-inl.h +2 -0
data/vendor/faiss/faiss/utils/hamming.cpp +7 -6
data/vendor/faiss/faiss/utils/hamming.h +1 -1
data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -2
data/vendor/faiss/faiss/utils/partitioning.cpp +5 -5
data/vendor/faiss/faiss/utils/partitioning.h +2 -2
data/vendor/faiss/faiss/utils/rabitq_simd.h +222 -336
data/vendor/faiss/faiss/utils/random.cpp +1 -1
data/vendor/faiss/faiss/utils/simdlib_avx2.h +1 -1
data/vendor/faiss/faiss/utils/simdlib_avx512.h +1 -1
data/vendor/faiss/faiss/utils/simdlib_neon.h +2 -2
data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +1 -1
data/vendor/faiss/faiss/utils/utils.cpp +5 -2
data/vendor/faiss/faiss/utils/utils.h +2 -2
metadata +12 -1

data/vendor/faiss/faiss/index_factory.cpp CHANGED Viewed

@@ -27,10 +27,12 @@
 #include <faiss/IndexIVFAdditiveQuantizer.h>
 #include <faiss/IndexIVFAdditiveQuantizerFastScan.h>
 #include <faiss/IndexIVFFlat.h>
+#include <faiss/IndexIVFFlatPanorama.h>
 #include <faiss/IndexIVFPQ.h>
 #include <faiss/IndexIVFPQFastScan.h>
 #include <faiss/IndexIVFPQR.h>
 #include <faiss/IndexIVFRaBitQ.h>
+#include <faiss/IndexIVFRaBitQFastScan.h>
 #include <faiss/IndexIVFSpectralHash.h>
 #include <faiss/IndexLSH.h>
 #include <faiss/IndexLattice.h>
@@ -39,6 +41,7 @@
 #include <faiss/IndexPQFastScan.h>
 #include <faiss/IndexPreTransform.h>
 #include <faiss/IndexRaBitQ.h>
+#include <faiss/IndexRaBitQFastScan.h>
 #include <faiss/IndexRefine.h>
 #include <faiss/IndexRowwiseMinMax.h>
 #include <faiss/IndexScalarQuantizer.h>
@@ -49,6 +52,9 @@
 #include <faiss/IndexBinaryHNSW.h>
 #include <faiss/IndexBinaryHash.h>
 #include <faiss/IndexBinaryIVF.h>
+#include <faiss/IndexIDMap.h>
+#include <algorithm>
+#include <cctype>
 #include <string>
 namespace faiss {
@@ -326,6 +332,10 @@ IndexIVF* parse_IndexIVF(
     if (match("FlatDedup")) {
         return new IndexIVFFlatDedup(get_q(), d, nlist, mt, own_il);
     }
+    if (match("FlatPanorama([0-9]+)?")) {
+        int nlevels = mres_to_int(sm[1], 8); // default to 8 levels
+        return new IndexIVFFlatPanorama(get_q(), d, nlist, nlevels, mt, own_il);
+    }
     if (match(sq_pattern)) {
         return new IndexIVFScalarQuantizer(
                 get_q(),
@@ -450,6 +460,10 @@ IndexIVF* parse_IndexIVF(
     if (match(rabitq_pattern)) {
         return new IndexIVFRaBitQ(get_q(), d, nlist, mt, own_il);
     }
+    if (match("RaBitQfs(_[0-9]+)?")) {
+        int bbs = mres_to_int(sm[1], 32, 1);
+        return new IndexIVFRaBitQFastScan(get_q(), d, nlist, mt, bbs, own_il);
+    }
     return nullptr;
 }
@@ -676,6 +690,12 @@ Index* parse_other_indexes(
         return new IndexRaBitQ(d, metric);
     }
+    // IndexRaBitQFastScan
+    if (match("RaBitQfs(_[0-9]+)?")) {
+        int bbs = mres_to_int(sm[1], 32, 1);
+        return new IndexRaBitQFastScan(d, metric, bbs);
+    }
     return nullptr;
 }
@@ -934,6 +954,28 @@ IndexBinary* index_binary_factory(
         bool own_invlists) {
     IndexBinary* index = nullptr;
+    std::smatch sm;
+    std::string desc_str(description);
+    // Handle IDMap2 and IDMap wrappers (prefix or suffix)
+    if (re_match(desc_str, "(.+),IDMap2", sm) ||
+        re_match(desc_str, "IDMap2,(.+)", sm)) {
+        IndexBinary* sub_index =
+                index_binary_factory(d, sm[1].str().c_str(), own_invlists);
+        IndexBinaryIDMap2* idmap2 = new IndexBinaryIDMap2(sub_index);
+        idmap2->own_fields = true;
+        return idmap2;
+    }
+    if (re_match(desc_str, "(.+),IDMap", sm) ||
+        re_match(desc_str, "IDMap,(.+)", sm)) {
+        IndexBinary* sub_index =
+                index_binary_factory(d, sm[1].str().c_str(), own_invlists);
+        IndexBinaryIDMap* idmap = new IndexBinaryIDMap(sub_index);
+        idmap->own_fields = true;
+        return idmap;
+    }
     int ncentroids = -1;
     int M, nhash, b;
@@ -959,7 +1001,7 @@ IndexBinary* index_binary_factory(
     } else if (sscanf(description, "BHash%d", &b) == 1) {
         index = new IndexBinaryHash(d, b);
-    } else if (std::string(description) == "BFlat") {
+    } else if (desc_str == "BFlat") {
         index = new IndexBinaryFlat(d);
     } else {

data/vendor/faiss/faiss/index_factory.h CHANGED Viewed

@@ -12,7 +12,7 @@
 namespace faiss {
-/** Build and index with the sequence of processing steps described in
+/** Build an index with the sequence of processing steps described in
  *  the string. */
 Index* index_factory(
         int d,

data/vendor/faiss/faiss/index_io.h CHANGED Viewed

@@ -16,7 +16,7 @@
  * object that abstracts the medium.
  *
  * The read functions return objects that should be deallocated with
- * delete. All references within these objectes are owned by the
+ * delete. All references within these objects are owned by the
  * object.
  */

data/vendor/faiss/faiss/invlists/InvertedLists.cpp CHANGED Viewed

@@ -346,6 +346,211 @@ void ArrayInvertedLists::permute_invlists(const idx_t* map) {
 ArrayInvertedLists::~ArrayInvertedLists() {}
+/***********************************************
+ * ArrayInvertedListsPanorama implementation
+ **********************************************/
+ArrayInvertedListsPanorama::ArrayInvertedListsPanorama(
+        size_t nlist,
+        size_t code_size,
+        size_t n_levels)
+        : ArrayInvertedLists(nlist, code_size),
+          n_levels(n_levels),
+          level_width(
+                  (((code_size / sizeof(float)) + n_levels - 1) / n_levels) *
+                  sizeof(float)) {
+    FAISS_THROW_IF_NOT(n_levels > 0);
+    FAISS_THROW_IF_NOT(code_size % sizeof(float) == 0);
+    FAISS_THROW_IF_NOT_MSG(
+            !use_iterator,
+            "IndexIVFFlatPanorama does not support iterators, use vanilla IndexIVFFlat instead");
+    FAISS_ASSERT(level_width % sizeof(float) == 0);
+    cum_sums.resize(nlist);
+}
+const float* ArrayInvertedListsPanorama::get_cum_sums(size_t list_no) const {
+    assert(list_no < nlist);
+    return cum_sums[list_no].data();
+}
+size_t ArrayInvertedListsPanorama::add_entries(
+        size_t list_no,
+        size_t n_entry,
+        const idx_t* ids_in,
+        const uint8_t* code) {
+    assert(list_no < nlist);
+    size_t o = ids[list_no].size();
+    ids[list_no].resize(o + n_entry);
+    memcpy(&ids[list_no][o], ids_in, sizeof(ids_in[0]) * n_entry);
+    size_t new_size = o + n_entry;
+    size_t num_batches = (new_size + kBatchSize - 1) / kBatchSize;
+    codes[list_no].resize(num_batches * kBatchSize * code_size);
+    cum_sums[list_no].resize(num_batches * kBatchSize * (n_levels + 1));
+    copy_codes_to_level_layout(list_no, o, n_entry, code);
+    compute_cumulative_sums(list_no, o, n_entry, code);
+    return o;
+}
+void ArrayInvertedListsPanorama::update_entries(
+        size_t list_no,
+        size_t offset,
+        size_t n_entry,
+        const idx_t* ids_in,
+        const uint8_t* code) {
+    assert(list_no < nlist);
+    assert(n_entry + offset <= ids[list_no].size());
+    memcpy(&ids[list_no][offset], ids_in, sizeof(ids_in[0]) * n_entry);
+    copy_codes_to_level_layout(list_no, offset, n_entry, code);
+    compute_cumulative_sums(list_no, offset, n_entry, code);
+}
+void ArrayInvertedListsPanorama::resize(size_t list_no, size_t new_size) {
+    ids[list_no].resize(new_size);
+    size_t num_batches = (new_size + kBatchSize - 1) / kBatchSize;
+    codes[list_no].resize(num_batches * kBatchSize * code_size);
+    cum_sums[list_no].resize(num_batches * kBatchSize * (n_levels + 1));
+}
+const uint8_t* ArrayInvertedListsPanorama::get_single_code(
+        size_t list_no,
+        size_t offset) const {
+    assert(list_no < nlist);
+    assert(offset < ids[list_no].size());
+    uint8_t* recons_buffer = new uint8_t[code_size];
+    const uint8_t* codes_base = codes[list_no].data();
+    size_t batch_no = offset / kBatchSize;
+    size_t pos_in_batch = offset % kBatchSize;
+    size_t batch_offset = batch_no * kBatchSize * code_size;
+    for (size_t level = 0; level < n_levels; level++) {
+        size_t level_offset = level * level_width * kBatchSize;
+        const uint8_t* src = codes_base + batch_offset + level_offset +
+                pos_in_batch * level_width;
+        uint8_t* dest = recons_buffer + level * level_width;
+        size_t copy_size =
+                std::min(level_width, code_size - level * level_width);
+        memcpy(dest, src, copy_size);
+    }
+    return recons_buffer;
+}
+void ArrayInvertedListsPanorama::release_codes(
+        size_t list_no,
+        const uint8_t* codes) const {
+    // Only delete if it's heap-allocated (from get_single_code).
+    // If it's from get_codes (raw storage), it will be codes[list_no].data()
+    if (codes != this->codes[list_no].data()) {
+        delete[] codes;
+    }
+}
+InvertedListsIterator* ArrayInvertedListsPanorama::get_iterator(
+        size_t /* list_no */,
+        void* /* inverted_list_context */) const {
+    FAISS_THROW_MSG(
+            "IndexIVFFlatPanorama does not support iterators, use vanilla IndexIVFFlat instead");
+    return nullptr;
+}
+void ArrayInvertedListsPanorama::compute_cumulative_sums(
+        size_t list_no,
+        size_t offset,
+        size_t n_entry,
+        const uint8_t* code) {
+    // Cast to float* is safe here as we guarantee codes are always float
+    // vectors for `IndexIVFFlatPanorama` (verified by the constructor).
+    const float* vectors = reinterpret_cast<const float*>(code);
+    const size_t d = code_size / sizeof(float);
+    std::vector<float> suffix_sums(d + 1);
+    for (size_t entry_idx = 0; entry_idx < n_entry; entry_idx++) {
+        size_t current_pos = offset + entry_idx;
+        size_t batch_no = current_pos / kBatchSize;
+        size_t pos_in_batch = current_pos % kBatchSize;
+        const float* vector = vectors + entry_idx * d;
+        // Compute suffix sums of squared values.
+        suffix_sums[d] = 0.0f;
+        for (int j = d - 1; j >= 0; j--) {
+            float squared_val = vector[j] * vector[j];
+            suffix_sums[j] = suffix_sums[j + 1] + squared_val;
+        }
+        // Store cumulative sums in batch-oriented layout.
+        size_t cumsum_batch_offset = batch_no * kBatchSize * (n_levels + 1);
+        float* cumsum_base = cum_sums[list_no].data();
+        const size_t level_width_floats = level_width / sizeof(float);
+        for (size_t level = 0; level < n_levels; level++) {
+            size_t start_idx = level * level_width_floats;
+            size_t cumsum_offset =
+                    cumsum_batch_offset + level * kBatchSize + pos_in_batch;
+            if (start_idx < d) {
+                cumsum_base[cumsum_offset] = sqrt(suffix_sums[start_idx]);
+            } else {
+                cumsum_base[cumsum_offset] = 0.0f;
+            }
+        }
+        // Last level sum is always 0.
+        size_t cumsum_offset =
+                cumsum_batch_offset + n_levels * kBatchSize + pos_in_batch;
+        cumsum_base[cumsum_offset] = 0.0f;
+    }
+}
+// Helper method to copy codes into level-oriented batch layout at a given
+// offset in the list.
+void ArrayInvertedListsPanorama::copy_codes_to_level_layout(
+        size_t list_no,
+        size_t offset,
+        size_t n_entry,
+        const uint8_t* code) {
+    uint8_t* codes_base = codes[list_no].data();
+    size_t current_pos = offset;
+    for (size_t entry_idx = 0; entry_idx < n_entry;) {
+        // Determine which batch we're in and position within that batch.
+        size_t batch_no = current_pos / kBatchSize;
+        size_t pos_in_batch = current_pos % kBatchSize;
+        size_t entries_in_this_batch =
+                std::min(n_entry - entry_idx, kBatchSize - pos_in_batch);
+        // Copy entries into level-oriented layout for this batch.
+        size_t batch_offset = batch_no * kBatchSize * code_size;
+        for (size_t level = 0; level < n_levels; level++) {
+            size_t level_offset = level * level_width * kBatchSize;
+            size_t start_byte = level * level_width;
+            size_t copy_size =
+                    std::min(level_width, code_size - level * level_width);
+            for (size_t i = 0; i < entries_in_this_batch; i++) {
+                const uint8_t* src =
+                        code + (entry_idx + i) * code_size + start_byte;
+                uint8_t* dest = codes_base + batch_offset + level_offset +
+                        (pos_in_batch + i) * level_width;
+                memcpy(dest, src, copy_size);
+            }
+        }
+        entry_idx += entries_in_this_batch;
+        current_pos += entries_in_this_batch;
+    }
+}
 /*****************************************************************
  * Meta-inverted list implementations
  *****************************************************************/

data/vendor/faiss/faiss/invlists/InvertedLists.h CHANGED Viewed

@@ -276,6 +276,68 @@ struct ArrayInvertedLists : InvertedLists {
     ~ArrayInvertedLists() override;
 };
+/// Level-oriented storage as defined in the IVFFlat section of Panorama
+/// (https://www.arxiv.org/pdf/2510.00566).
+struct ArrayInvertedListsPanorama : ArrayInvertedLists {
+    static constexpr size_t kBatchSize = 128;
+    std::vector<MaybeOwnedVector<float>> cum_sums;
+    const size_t n_levels;
+    const size_t level_width; // in code units
+    ArrayInvertedListsPanorama(size_t nlist, size_t code_size, size_t n_levels);
+    const float* get_cum_sums(size_t list_no) const;
+    size_t add_entries(
+            size_t list_no,
+            size_t n_entry,
+            const idx_t* ids,
+            const uint8_t* code) override;
+    void update_entries(
+            size_t list_no,
+            size_t offset,
+            size_t n_entry,
+            const idx_t* ids,
+            const uint8_t* code) override;
+    void resize(size_t list_no, size_t new_size) override;
+    /// Panorama's layout make it impractical to support iterators as defined
+    /// by Faiss (i.e. `InvertedListsIterator` API). The iterator would require
+    /// to allocate and reassemble the vector at each call.
+    /// Hence, we override this method to throw an error, this effectively
+    /// disables the `iterate_codes` and `iterate_codes_range` methods.
+    InvertedListsIterator* get_iterator(
+            size_t list_no,
+            void* inverted_list_context = nullptr) const override;
+    /// Reconstructs a single code from level-oriented storage to flat format.
+    const uint8_t* get_single_code(size_t list_no, size_t offset)
+            const override;
+    /// Frees codes returned by `get_single_code`.
+    void release_codes(size_t list_no, const uint8_t* codes) const override;
+   private:
+    /// Helper method to copy codes into level-oriented batch layout at a given
+    /// offset in the list.
+    void copy_codes_to_level_layout(
+            size_t list_no,
+            size_t offset,
+            size_t n_entry,
+            const uint8_t* code);
+    /// Helper method to compute the cumulative sums of the codes.
+    /// The cumsums also follow the level-oriented batch layout to minimize the
+    /// number of random memory accesses.
+    void compute_cumulative_sums(
+            size_t list_no,
+            size_t offset,
+            size_t n_entry,
+            const uint8_t* code);
+};
 /*****************************************************************
  * Meta-inverted lists
  *

data/vendor/faiss/faiss/utils/AlignedTable.h CHANGED Viewed

@@ -25,7 +25,7 @@ inline bool is_aligned_pointer(const void* x) {
 }
 // class that manages suitably aligned arrays for SIMD
-// T should be a POV type. The default alignment is 32 for AVX
+// T should be a POD type. The default alignment is 32 for AVX
 template <class T, int A = 32>
 struct AlignedTableTightAlloc {
     T* ptr;

data/vendor/faiss/faiss/utils/Heap.cpp CHANGED Viewed

@@ -139,7 +139,7 @@ void HeapArray<C>::per_line_extrema(T* out_val, TI* out_ids) const {
     }
 }
-// explicit instanciations
+// explicit instantiations
 template struct HeapArray<CMin<float, int64_t>>;
 template struct HeapArray<CMax<float, int64_t>>;
@@ -238,7 +238,7 @@ void merge_knn_results(
     }
 }
-// explicit instanciations
+// explicit instantiations
 #define INSTANTIATE(C, distance_t)                                \
     template void merge_knn_results<int64_t, C<distance_t, int>>( \
             size_t,                                               \

data/vendor/faiss/faiss/utils/Heap.h CHANGED Viewed

@@ -150,7 +150,7 @@ inline void heap_replace_top(
     bh_ids[i] = id;
 }
-/* Partial instanciation for heaps with TI = int64_t */
+/* Partial instantiation for heaps with TI = int64_t */
 template <typename T>
 inline void minheap_pop(size_t k, T* bh_val, int64_t* bh_ids) {
@@ -393,7 +393,7 @@ inline void heap_addn(
     }
 }
-/* Partial instanciation for heaps with TI = int64_t */
+/* Partial instantiation for heaps with TI = int64_t */
 template <typename T>
 inline void minheap_addn(
@@ -489,7 +489,7 @@ struct HeapArray {
         return val + key * k;
     }
-    /// Correspponding identifiers
+    /// Corresponding identifiers
     TI* get_ids(size_t key) {
         return ids + key * k;
     }

data/vendor/faiss/faiss/utils/NeuralNet.cpp CHANGED Viewed

@@ -71,7 +71,7 @@ Tensor2DTemplate<T> Tensor2DTemplate<T>::column(size_t j) const {
     return out;
 }
-// explicit template instanciation
+// explicit template instantiation
 template struct Tensor2DTemplate<float>;
 template struct Tensor2DTemplate<int32_t>;

data/vendor/faiss/faiss/utils/NeuralNet.h CHANGED Viewed

@@ -75,7 +75,7 @@ struct Embedding {
 };
 /// Feed forward layer that expands to a hidden dimension, applies a ReLU non
-/// linearity and maps back to the orignal dimension
+/// linearity and maps back to the original dimension
 struct FFN {
     Linear linear1, linear2;
@@ -103,7 +103,7 @@ struct QINCoStep {
         return residual_blocks[i];
     }
-    /** encode a set of vectors x with intial estimate xhat. Optionally return
+    /** encode a set of vectors x with initial estimate xhat. Optionally return
      * the delta to be added to xhat to form the new xhat */
     nn::Int32Tensor2D encode(
             const nn::Tensor2D& xhat,
@@ -141,7 +141,7 @@ struct QINCo : NeuralNetCodec {
     nn::Int32Tensor2D encode(const nn::Tensor2D& x) const override;
-    virtual ~QINCo() {}
+    virtual ~QINCo() override {}
 };
 } // namespace faiss

data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h CHANGED Viewed

@@ -50,8 +50,8 @@
 //        for j in range(0, NBUCKETS):
 //          idx = beam * n + i * NBUCKETS + j
 //          if distances[idx] < local_min_distances[j]:
-//            local_min_distances[i] = distances[idx]
-//            local_min_indices[i] = indices[idx]
+//            local_min_distances[j] = distances[idx]
+//            local_min_indices[j] = indices[idx]
 //
 //    for j in range(0, NBUCKETS):
 //      heap.push(local_min_distances[j], local_min_indices[j])

data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h CHANGED Viewed

@@ -106,7 +106,7 @@ struct HeapWithBuckets<CMax<float, int>, NBUCKETS, N> {
                                 distance_candidate,
                                 _CMP_LE_OS);
-                        // // blend seems to be slower that min
+                        // // blend seems to be slower than min
                         // const __m256 min_distances_new = _mm256_blendv_ps(
                         //         distance_candidate,
                         //         min_distances_i[j][p],
@@ -120,7 +120,7 @@ struct HeapWithBuckets<CMax<float, int>, NBUCKETS, N> {
                                                 min_indices_i[j][p]),
                                         comparison));
-                        // // blend seems to be slower that min
+                        // // blend seems to be slower than min
                         // const __m256 max_distances_new = _mm256_blendv_ps(
                         //         min_distances_i[j][p],
                         //         distance_candidate,

data/vendor/faiss/faiss/utils/approx_topk/mode.h CHANGED Viewed

@@ -21,7 +21,7 @@
 /// It seems that only the limited number of combinations are
 /// meaningful, because of the limited supply of SIMD registers.
 /// Also, certain combinations, such as B32_D1 and B16_D1, were concluded
-/// to be not very precise in benchmarks, so ones were not introduced.
+/// to be not very precise in benchmarks, so they were not introduced.
 ///
 /// TODO: Consider d-ary SIMD heap.

data/vendor/faiss/faiss/utils/distances.h CHANGED Viewed

@@ -324,7 +324,7 @@ void knn_inner_product(
  *  vector y, for the L2 distance
  * @param x    query vectors, size nx * d
  * @param y    database vectors, size ny * d
- * @param res  result heap strcture, which also provides k. Sorted on output
+ * @param res  result heap structure, which also provides k. Sorted on output
  * @param y_norm2    (optional) norms for the y vectors (nullptr or size ny)
  * @param sel  search in this subset of vectors
  */
@@ -389,7 +389,7 @@ void knn_inner_products_by_idx(
  * @param x    query vectors, size nx * d
  * @param y    database vectors, size (max(ids) + 1) * d
  * @param subset subset of database vectors to consider, size (nx, nsubset)
- * @param res  rIDesult structure
+ * @param res  result structure
  * @param ld_subset stride for the subset array. -1: use nsubset, 0: all queries
  * process the same subset
  */

data/vendor/faiss/faiss/utils/extra_distances-inl.h CHANGED Viewed

@@ -5,6 +5,8 @@
  * LICENSE file in the root directory of this source tree.
  */
+#pragma once
 /** In this file are the implementations of extra metrics beyond L2
  *  and inner product */
@@ -188,7 +190,7 @@ inline float VectorDistance<METRIC_GOWER>::operator()(
 /***************************************************************************
  * Dispatching function that takes a metric type and a consumer object
- * the consumer object should contain a retun type T and a operation template
+ * the consumer object should contain a return type T and a operation template
  * function f() that is called to perform the operation. The first argument
  * of the function is the VectorDistance object. The rest are passed in as is.
  **************************************************************************/

data/vendor/faiss/faiss/utils/hamming-inl.h CHANGED Viewed

@@ -5,6 +5,8 @@
  * LICENSE file in the root directory of this source tree.
  */
+#pragma once
 namespace faiss {
 // BitstringWriter and BitstringReader functions

data/vendor/faiss/faiss/utils/hamming.cpp CHANGED Viewed

@@ -257,12 +257,13 @@ void hammings_knn_mc(
     std::vector<HCounterState<HammingComputer>> cs;
     for (size_t i = 0; i < na; ++i) {
-        cs.push_back(HCounterState<HammingComputer>(
-                all_counters.data() + i * nBuckets,
-                all_ids_per_dis.get() + i * nBuckets * k,
-                a + i * bytes_per_code,
-                8 * bytes_per_code,
-                k));
+        cs.push_back(
+                HCounterState<HammingComputer>(
+                        all_counters.data() + i * nBuckets,
+                        all_ids_per_dis.get() + i * nBuckets * k,
+                        a + i * bytes_per_code,
+                        8 * bytes_per_code,
+                        k));
     }
     const size_t block_size = hamming_batch_size;

data/vendor/faiss/faiss/utils/hamming.h CHANGED Viewed

@@ -14,7 +14,7 @@
  * fvecs2bitvecs).
  *
  * User-defined type hamdis_t is used for distances because at this time
- * it is still uncler clear how we will need to balance
+ * it is still unclear clear how we will need to balance
  * - flexibility in vector size (may need 16- or even 8-bit vectors)
  * - memory usage
  * - cache-misses when dealing with large volumes of data (fewer bits is better)

data/vendor/faiss/faiss/utils/hamming_distance/common.h CHANGED Viewed

@@ -30,8 +30,7 @@ inline int popcount64(uint64_t x) {
 // This table was moved from .cpp to .h file, because
 // otherwise it was causing compilation errors while trying to
 // compile swig modules on Windows.
-// todo for C++17: switch to 'inline constexpr'
-static constexpr uint8_t hamdis_tab_ham_bytes[256] = {
+inline constexpr uint8_t hamdis_tab_ham_bytes[256] = {
         0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
         2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
         2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,

data/vendor/faiss/faiss/utils/partitioning.cpp CHANGED Viewed

@@ -140,7 +140,7 @@ typename C::T partition_fuzzy_median3(
     using T = typename C::T;
     // here we use bissection with a median of 3 to find the threshold and
-    // compress the arrays afterwards. So it's a n*log(n) algoirithm rather than
+    // compress the arrays afterwards. So it's a n*log(n) algorithm rather than
     // qselect's O(n) but it avoids shuffling around the array.
     FAISS_THROW_IF_NOT(n >= 3);
@@ -350,7 +350,7 @@ int simd_compress_array(
         }
     }
-    // handle remaining, only striclty lt ones.
+    // handle remaining, only strictly lt ones.
     for (; i0 + 15 < n; i0 += 16) {
         simd16uint16 v(vals + i0);
         simd16uint16 max2 = max_func<C>(v, thr16);
@@ -506,7 +506,7 @@ uint16_t simd_partition_fuzzy_with_bounds(
     uint64_t t2 = get_cy();
-    partition_stats.bissect_cycles += t1 - t0;
+    partition_stats.bisect_cycles += t1 - t0;
     partition_stats.compress_cycles += t2 - t1;
     return thresh;
@@ -662,7 +662,7 @@ uint16_t simd_partition_fuzzy_with_bounds_histogram(
         }
     }
-    IFV printf("end bissection: thresh=%d q=%ld n_eq=%ld\n", thresh, q, n_eq);
+    IFV printf("end bisection: thresh=%d q=%ld n_eq=%ld\n", thresh, q, n_eq);
     if (!C::is_max) {
         if (n_eq == 0) {
@@ -762,7 +762,7 @@ typename C::T partition_fuzzy(
             vals, ids, n, q_min, q_max, q_out);
 }
-// explicit template instanciations
+// explicit template instantiations
 template float partition_fuzzy<CMin<float, int64_t>>(
         float* vals,