RubyGems - faiss - Versions diffs - 0.2.3 → 0.2.5 - Mend

faiss 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (189) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +9 -0
data/LICENSE.txt +1 -1
data/README.md +23 -21
data/ext/faiss/extconf.rb +11 -0
data/ext/faiss/index.cpp +4 -4
data/ext/faiss/index_binary.cpp +6 -6
data/ext/faiss/product_quantizer.cpp +4 -4
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +13 -0
data/vendor/faiss/faiss/Clustering.cpp +32 -0
data/vendor/faiss/faiss/Clustering.h +14 -0
data/vendor/faiss/faiss/IVFlib.cpp +101 -2
data/vendor/faiss/faiss/IVFlib.h +26 -2
data/vendor/faiss/faiss/Index.cpp +36 -3
data/vendor/faiss/faiss/Index.h +43 -6
data/vendor/faiss/faiss/Index2Layer.cpp +24 -93
data/vendor/faiss/faiss/Index2Layer.h +8 -17
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +610 -0
data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +253 -0
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +299 -0
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +199 -0
data/vendor/faiss/faiss/IndexBinary.cpp +20 -4
data/vendor/faiss/faiss/IndexBinary.h +18 -3
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -2
data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -2
data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -1
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.h +2 -1
data/vendor/faiss/faiss/IndexBinaryHash.cpp +17 -4
data/vendor/faiss/faiss/IndexBinaryHash.h +8 -4
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +28 -13
data/vendor/faiss/faiss/IndexBinaryIVF.h +10 -7
data/vendor/faiss/faiss/IndexFastScan.cpp +626 -0
data/vendor/faiss/faiss/IndexFastScan.h +145 -0
data/vendor/faiss/faiss/IndexFlat.cpp +52 -69
data/vendor/faiss/faiss/IndexFlat.h +16 -19
data/vendor/faiss/faiss/IndexFlatCodes.cpp +101 -0
data/vendor/faiss/faiss/IndexFlatCodes.h +59 -0
data/vendor/faiss/faiss/IndexHNSW.cpp +66 -138
data/vendor/faiss/faiss/IndexHNSW.h +4 -2
data/vendor/faiss/faiss/IndexIDMap.cpp +247 -0
data/vendor/faiss/faiss/IndexIDMap.h +107 -0
data/vendor/faiss/faiss/IndexIVF.cpp +200 -40
data/vendor/faiss/faiss/IndexIVF.h +59 -22
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +393 -0
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +183 -0
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +590 -0
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +171 -0
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +1290 -0
data/vendor/faiss/faiss/IndexIVFFastScan.h +213 -0
data/vendor/faiss/faiss/IndexIVFFlat.cpp +43 -26
data/vendor/faiss/faiss/IndexIVFFlat.h +4 -2
data/vendor/faiss/faiss/IndexIVFPQ.cpp +238 -53
data/vendor/faiss/faiss/IndexIVFPQ.h +6 -2
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +23 -852
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -112
data/vendor/faiss/faiss/IndexIVFPQR.cpp +3 -3
data/vendor/faiss/faiss/IndexIVFPQR.h +1 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +63 -40
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +23 -7
data/vendor/faiss/faiss/IndexLSH.cpp +8 -32
data/vendor/faiss/faiss/IndexLSH.h +4 -16
data/vendor/faiss/faiss/IndexLattice.cpp +7 -1
data/vendor/faiss/faiss/IndexLattice.h +3 -1
data/vendor/faiss/faiss/IndexNNDescent.cpp +4 -5
data/vendor/faiss/faiss/IndexNNDescent.h +2 -1
data/vendor/faiss/faiss/IndexNSG.cpp +37 -5
data/vendor/faiss/faiss/IndexNSG.h +25 -1
data/vendor/faiss/faiss/IndexPQ.cpp +108 -120
data/vendor/faiss/faiss/IndexPQ.h +21 -22
data/vendor/faiss/faiss/IndexPQFastScan.cpp +15 -450
data/vendor/faiss/faiss/IndexPQFastScan.h +15 -78
data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -8
data/vendor/faiss/faiss/IndexPreTransform.h +15 -3
data/vendor/faiss/faiss/IndexRefine.cpp +36 -4
data/vendor/faiss/faiss/IndexRefine.h +14 -2
data/vendor/faiss/faiss/IndexReplicas.cpp +4 -2
data/vendor/faiss/faiss/IndexReplicas.h +2 -1
data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +438 -0
data/vendor/faiss/faiss/IndexRowwiseMinMax.h +92 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +28 -43
data/vendor/faiss/faiss/IndexScalarQuantizer.h +8 -23
data/vendor/faiss/faiss/IndexShards.cpp +4 -1
data/vendor/faiss/faiss/IndexShards.h +2 -1
data/vendor/faiss/faiss/MetaIndexes.cpp +5 -178
data/vendor/faiss/faiss/MetaIndexes.h +3 -81
data/vendor/faiss/faiss/VectorTransform.cpp +45 -1
data/vendor/faiss/faiss/VectorTransform.h +25 -4
data/vendor/faiss/faiss/clone_index.cpp +26 -3
data/vendor/faiss/faiss/clone_index.h +3 -0
data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +300 -0
data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +24 -0
data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +195 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2058 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +408 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2147 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +460 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +465 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +1618 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +251 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +1452 -0
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +1 -0
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +2 -6
data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -1
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +10 -8
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +75 -14
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +19 -32
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -31
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +22 -28
data/vendor/faiss/faiss/gpu/GpuResources.cpp +14 -0
data/vendor/faiss/faiss/gpu/GpuResources.h +16 -3
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +3 -3
data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +32 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +311 -75
data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +10 -0
data/vendor/faiss/faiss/gpu/test/TestUtils.h +3 -0
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +2 -2
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +5 -4
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +331 -29
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +110 -19
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +0 -54
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -76
data/vendor/faiss/faiss/impl/DistanceComputer.h +64 -0
data/vendor/faiss/faiss/impl/HNSW.cpp +133 -32
data/vendor/faiss/faiss/impl/HNSW.h +19 -16
data/vendor/faiss/faiss/impl/IDSelector.cpp +125 -0
data/vendor/faiss/faiss/impl/IDSelector.h +135 -0
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +378 -217
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +106 -29
data/vendor/faiss/faiss/impl/LookupTableScaler.h +77 -0
data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -0
data/vendor/faiss/faiss/impl/NSG.cpp +1 -4
data/vendor/faiss/faiss/impl/NSG.h +1 -1
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +383 -0
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +154 -0
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +225 -145
data/vendor/faiss/faiss/impl/ProductQuantizer.h +29 -10
data/vendor/faiss/faiss/impl/Quantizer.h +43 -0
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +521 -55
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +94 -16
data/vendor/faiss/faiss/impl/ResultHandler.h +96 -0
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +108 -191
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +18 -18
data/vendor/faiss/faiss/impl/index_read.cpp +338 -24
data/vendor/faiss/faiss/impl/index_write.cpp +300 -18
data/vendor/faiss/faiss/impl/io.cpp +1 -1
data/vendor/faiss/faiss/impl/io_macros.h +20 -0
data/vendor/faiss/faiss/impl/kmeans1d.cpp +303 -0
data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +56 -16
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +25 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +66 -25
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +75 -27
data/vendor/faiss/faiss/index_factory.cpp +772 -412
data/vendor/faiss/faiss/index_factory.h +3 -0
data/vendor/faiss/faiss/index_io.h +5 -0
data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -0
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +4 -1
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +2 -1
data/vendor/faiss/faiss/python/python_callbacks.cpp +27 -0
data/vendor/faiss/faiss/python/python_callbacks.h +15 -0
data/vendor/faiss/faiss/utils/Heap.h +31 -15
data/vendor/faiss/faiss/utils/distances.cpp +384 -58
data/vendor/faiss/faiss/utils/distances.h +149 -18
data/vendor/faiss/faiss/utils/distances_simd.cpp +776 -6
data/vendor/faiss/faiss/utils/extra_distances.cpp +12 -7
data/vendor/faiss/faiss/utils/extra_distances.h +3 -1
data/vendor/faiss/faiss/utils/fp16-fp16c.h +21 -0
data/vendor/faiss/faiss/utils/fp16-inl.h +101 -0
data/vendor/faiss/faiss/utils/fp16.h +11 -0
data/vendor/faiss/faiss/utils/hamming-inl.h +54 -0
data/vendor/faiss/faiss/utils/hamming.cpp +0 -48
data/vendor/faiss/faiss/utils/ordered_key_value.h +10 -0
data/vendor/faiss/faiss/utils/quantize_lut.cpp +62 -0
data/vendor/faiss/faiss/utils/quantize_lut.h +20 -0
data/vendor/faiss/faiss/utils/random.cpp +53 -0
data/vendor/faiss/faiss/utils/random.h +5 -0
data/vendor/faiss/faiss/utils/simdlib_avx2.h +4 -0
data/vendor/faiss/faiss/utils/simdlib_emulated.h +6 -1
data/vendor/faiss/faiss/utils/simdlib_neon.h +7 -2
data/vendor/faiss/faiss/utils/utils.h +1 -1
metadata +46 -5
data/vendor/faiss/faiss/IndexResidual.cpp +0 -291
data/vendor/faiss/faiss/IndexResidual.h +0 -152

data/vendor/faiss/faiss/IndexScalarQuantizer.h CHANGED Viewed

@@ -13,26 +13,19 @@
 #include <stdint.h>
 #include <vector>
+#include <faiss/IndexFlatCodes.h>
 #include <faiss/IndexIVF.h>
 #include <faiss/impl/ScalarQuantizer.h>
 namespace faiss {
 /**
- * The uniform quantizer has a range [vmin, vmax]. The range can be
- * the same for all dimensions (uniform) or specific per dimension
- * (default).
+ * Flat index built on a scalar quantizer.
  */
-struct IndexScalarQuantizer : Index {
+struct IndexScalarQuantizer : IndexFlatCodes {
     /// Used to encode the vectors
     ScalarQuantizer sq;
-    /// Codes. Size ntotal * pq.code_size
-    std::vector<uint8_t> codes;
-    size_t code_size;
     /** Constructor.
      *
      * @param d      dimensionality of the input vectors
@@ -48,26 +41,17 @@ struct IndexScalarQuantizer : Index {
     void train(idx_t n, const float* x) override;
-    void add(idx_t n, const float* x) override;
     void search(
             idx_t n,
             const float* x,
             idx_t k,
             float* distances,
-            idx_t* labels) const override;
+            idx_t* labels,
+            const SearchParameters* params = nullptr) const override;
-    void reset() override;
-    void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
-    void reconstruct(idx_t key, float* recons) const override;
-    DistanceComputer* get_distance_computer() const override;
+    FlatCodesDistanceComputer* get_FlatCodesDistanceComputer() const override;
     /* standalone codec interface */
-    size_t sa_code_size() const override;
     void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
     void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
@@ -109,7 +93,8 @@ struct IndexIVFScalarQuantizer : IndexIVF {
             const idx_t* precomputed_idx) override;
     InvertedListScanner* get_InvertedListScanner(
-            bool store_pairs) const override;
+            bool store_pairs,
+            const IDSelector* sel) const override;
     void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
             const override;

data/vendor/faiss/faiss/IndexShards.cpp CHANGED Viewed

@@ -288,7 +288,10 @@ void IndexShardsTemplate<IndexT>::search(
         const component_t* x,
         idx_t k,
         distance_t* distances,
-        idx_t* labels) const {
+        idx_t* labels,
+        const SearchParameters* params) const {
+    FAISS_THROW_IF_NOT_MSG(
+            !params, "search params not supported for this index");
     FAISS_THROW_IF_NOT(k > 0);
     long nshard = this->count();

data/vendor/faiss/faiss/IndexShards.h CHANGED Viewed

@@ -87,7 +87,8 @@ struct IndexShardsTemplate : public ThreadedIndex<IndexT> {
             const component_t* x,
             idx_t k,
             distance_t* distances,
-            idx_t* labels) const override;
+            idx_t* labels,
+            const SearchParameters* params = nullptr) const override;
     void train(idx_t n, const component_t* x) override;

data/vendor/faiss/faiss/MetaIndexes.cpp CHANGED Viewed

@@ -16,188 +16,12 @@
 #include <faiss/impl/AuxIndexStructures.h>
 #include <faiss/impl/FaissAssert.h>
+#include <faiss/impl/IDSelector.h>
 #include <faiss/utils/Heap.h>
 #include <faiss/utils/WorkerThread.h>
 namespace faiss {
-namespace {} // namespace
-/*****************************************************
- * IndexIDMap implementation
- *******************************************************/
-template <typename IndexT>
-IndexIDMapTemplate<IndexT>::IndexIDMapTemplate(IndexT* index)
-        : index(index), own_fields(false) {
-    FAISS_THROW_IF_NOT_MSG(index->ntotal == 0, "index must be empty on input");
-    this->is_trained = index->is_trained;
-    this->metric_type = index->metric_type;
-    this->verbose = index->verbose;
-    this->d = index->d;
-}
-template <typename IndexT>
-void IndexIDMapTemplate<IndexT>::add(
-        idx_t,
-        const typename IndexT::component_t*) {
-    FAISS_THROW_MSG(
-            "add does not make sense with IndexIDMap, "
-            "use add_with_ids");
-}
-template <typename IndexT>
-void IndexIDMapTemplate<IndexT>::train(
-        idx_t n,
-        const typename IndexT::component_t* x) {
-    index->train(n, x);
-    this->is_trained = index->is_trained;
-}
-template <typename IndexT>
-void IndexIDMapTemplate<IndexT>::reset() {
-    index->reset();
-    id_map.clear();
-    this->ntotal = 0;
-}
-template <typename IndexT>
-void IndexIDMapTemplate<IndexT>::add_with_ids(
-        idx_t n,
-        const typename IndexT::component_t* x,
-        const typename IndexT::idx_t* xids) {
-    index->add(n, x);
-    for (idx_t i = 0; i < n; i++)
-        id_map.push_back(xids[i]);
-    this->ntotal = index->ntotal;
-}
-template <typename IndexT>
-void IndexIDMapTemplate<IndexT>::search(
-        idx_t n,
-        const typename IndexT::component_t* x,
-        idx_t k,
-        typename IndexT::distance_t* distances,
-        typename IndexT::idx_t* labels) const {
-    index->search(n, x, k, distances, labels);
-    idx_t* li = labels;
-#pragma omp parallel for
-    for (idx_t i = 0; i < n * k; i++) {
-        li[i] = li[i] < 0 ? li[i] : id_map[li[i]];
-    }
-}
-template <typename IndexT>
-void IndexIDMapTemplate<IndexT>::range_search(
-        typename IndexT::idx_t n,
-        const typename IndexT::component_t* x,
-        typename IndexT::distance_t radius,
-        RangeSearchResult* result) const {
-    index->range_search(n, x, radius, result);
-#pragma omp parallel for
-    for (idx_t i = 0; i < result->lims[result->nq]; i++) {
-        result->labels[i] = result->labels[i] < 0 ? result->labels[i]
-                                                  : id_map[result->labels[i]];
-    }
-}
-namespace {
-struct IDTranslatedSelector : IDSelector {
-    const std::vector<int64_t>& id_map;
-    const IDSelector& sel;
-    IDTranslatedSelector(
-            const std::vector<int64_t>& id_map,
-            const IDSelector& sel)
-            : id_map(id_map), sel(sel) {}
-    bool is_member(idx_t id) const override {
-        return sel.is_member(id_map[id]);
-    }
-};
-} // namespace
-template <typename IndexT>
-size_t IndexIDMapTemplate<IndexT>::remove_ids(const IDSelector& sel) {
-    // remove in sub-index first
-    IDTranslatedSelector sel2(id_map, sel);
-    size_t nremove = index->remove_ids(sel2);
-    int64_t j = 0;
-    for (idx_t i = 0; i < this->ntotal; i++) {
-        if (sel.is_member(id_map[i])) {
-            // remove
-        } else {
-            id_map[j] = id_map[i];
-            j++;
-        }
-    }
-    FAISS_ASSERT(j == index->ntotal);
-    this->ntotal = j;
-    id_map.resize(this->ntotal);
-    return nremove;
-}
-template <typename IndexT>
-IndexIDMapTemplate<IndexT>::~IndexIDMapTemplate() {
-    if (own_fields)
-        delete index;
-}
-/*****************************************************
- * IndexIDMap2 implementation
- *******************************************************/
-template <typename IndexT>
-IndexIDMap2Template<IndexT>::IndexIDMap2Template(IndexT* index)
-        : IndexIDMapTemplate<IndexT>(index) {}
-template <typename IndexT>
-void IndexIDMap2Template<IndexT>::add_with_ids(
-        idx_t n,
-        const typename IndexT::component_t* x,
-        const typename IndexT::idx_t* xids) {
-    size_t prev_ntotal = this->ntotal;
-    IndexIDMapTemplate<IndexT>::add_with_ids(n, x, xids);
-    for (size_t i = prev_ntotal; i < this->ntotal; i++) {
-        rev_map[this->id_map[i]] = i;
-    }
-}
-template <typename IndexT>
-void IndexIDMap2Template<IndexT>::construct_rev_map() {
-    rev_map.clear();
-    for (size_t i = 0; i < this->ntotal; i++) {
-        rev_map[this->id_map[i]] = i;
-    }
-}
-template <typename IndexT>
-size_t IndexIDMap2Template<IndexT>::remove_ids(const IDSelector& sel) {
-    // This is quite inefficient
-    size_t nremove = IndexIDMapTemplate<IndexT>::remove_ids(sel);
-    construct_rev_map();
-    return nremove;
-}
-template <typename IndexT>
-void IndexIDMap2Template<IndexT>::reconstruct(
-        idx_t key,
-        typename IndexT::component_t* recons) const {
-    try {
-        this->index->reconstruct(rev_map.at(key), recons);
-    } catch (const std::out_of_range& e) {
-        FAISS_THROW_FMT("key %" PRId64 " not found", key);
-    }
-}
-// explicit template instantiations
-template struct IndexIDMapTemplate<Index>;
-template struct IndexIDMapTemplate<IndexBinary>;
-template struct IndexIDMap2Template<Index>;
-template struct IndexIDMap2Template<IndexBinary>;
 /*****************************************************
  * IndexSplitVectors implementation
  *******************************************************/
@@ -235,7 +59,10 @@ void IndexSplitVectors::search(
         const float* x,
         idx_t k,
         float* distances,
-        idx_t* labels) const {
+        idx_t* labels,
+        const SearchParameters* params) const {
+    FAISS_THROW_IF_NOT_MSG(
+            !params, "search params not supported for this index");
     FAISS_THROW_IF_NOT_MSG(k == 1, "search implemented only for k=1");
     FAISS_THROW_IF_NOT_MSG(
             sum_d == d, "not enough indexes compared to # dimensions");

data/vendor/faiss/faiss/MetaIndexes.h CHANGED Viewed

@@ -11,92 +11,13 @@
 #define META_INDEXES_H
 #include <faiss/Index.h>
+#include <faiss/IndexIDMap.h>
 #include <faiss/IndexReplicas.h>
 #include <faiss/IndexShards.h>
-#include <unordered_map>
 #include <vector>
 namespace faiss {
-/** Index that translates search results to ids */
-template <typename IndexT>
-struct IndexIDMapTemplate : IndexT {
-    using idx_t = typename IndexT::idx_t;
-    using component_t = typename IndexT::component_t;
-    using distance_t = typename IndexT::distance_t;
-    IndexT* index;   ///! the sub-index
-    bool own_fields; ///! whether pointers are deleted in destructo
-    std::vector<idx_t> id_map;
-    explicit IndexIDMapTemplate(IndexT* index);
-    /// @param xids if non-null, ids to store for the vectors (size n)
-    void add_with_ids(idx_t n, const component_t* x, const idx_t* xids)
-            override;
-    /// this will fail. Use add_with_ids
-    void add(idx_t n, const component_t* x) override;
-    void search(
-            idx_t n,
-            const component_t* x,
-            idx_t k,
-            distance_t* distances,
-            idx_t* labels) const override;
-    void train(idx_t n, const component_t* x) override;
-    void reset() override;
-    /// remove ids adapted to IndexFlat
-    size_t remove_ids(const IDSelector& sel) override;
-    void range_search(
-            idx_t n,
-            const component_t* x,
-            distance_t radius,
-            RangeSearchResult* result) const override;
-    ~IndexIDMapTemplate() override;
-    IndexIDMapTemplate() {
-        own_fields = false;
-        index = nullptr;
-    }
-};
-using IndexIDMap = IndexIDMapTemplate<Index>;
-using IndexBinaryIDMap = IndexIDMapTemplate<IndexBinary>;
-/** same as IndexIDMap but also provides an efficient reconstruction
- *  implementation via a 2-way index */
-template <typename IndexT>
-struct IndexIDMap2Template : IndexIDMapTemplate<IndexT> {
-    using idx_t = typename IndexT::idx_t;
-    using component_t = typename IndexT::component_t;
-    using distance_t = typename IndexT::distance_t;
-    std::unordered_map<idx_t, idx_t> rev_map;
-    explicit IndexIDMap2Template(IndexT* index);
-    /// make the rev_map from scratch
-    void construct_rev_map();
-    void add_with_ids(idx_t n, const component_t* x, const idx_t* xids)
-            override;
-    size_t remove_ids(const IDSelector& sel) override;
-    void reconstruct(idx_t key, component_t* recons) const override;
-    ~IndexIDMap2Template() override {}
-    IndexIDMap2Template() {}
-};
-using IndexIDMap2 = IndexIDMap2Template<Index>;
-using IndexBinaryIDMap2 = IndexIDMap2Template<IndexBinary>;
 /** splits input vectors in segments and assigns each segment to a sub-index
  * used to distribute a MultiIndexQuantizer
  */
@@ -118,7 +39,8 @@ struct IndexSplitVectors : Index {
             const float* x,
             idx_t k,
             float* distances,
-            idx_t* labels) const override;
+            idx_t* labels,
+            const SearchParameters* params = nullptr) const override;
     void train(idx_t n, const float* x) override;

data/vendor/faiss/faiss/VectorTransform.cpp CHANGED Viewed

@@ -149,6 +149,10 @@ void VectorTransform::reverse_transform(idx_t, const float*, float*) const {
     FAISS_THROW_MSG("reverse transform not implemented");
 }
+void VectorTransform::check_identical(const VectorTransform& other) const {
+    FAISS_THROW_IF_NOT(other.d_in == d_in && other.d_in == d_in);
+}
 /*********************************************
  * LinearTransform
  *********************************************/
@@ -308,6 +312,13 @@ void LinearTransform::print_if_verbose(
     printf("]\n");
 }
+void LinearTransform::check_identical(const VectorTransform& other_in) const {
+    VectorTransform::check_identical(other_in);
+    auto other = dynamic_cast<const LinearTransform*>(&other_in);
+    FAISS_THROW_IF_NOT(other);
+    FAISS_THROW_IF_NOT(other->A == A && other->b == b);
+}
 /*********************************************
  * RandomRotationMatrix
  *********************************************/
@@ -357,6 +368,7 @@ PCAMatrix::PCAMatrix(
     is_trained = false;
     max_points_per_d = 1000;
     balanced_bins = 0;
+    epsilon = 0;
 }
 namespace {
@@ -620,7 +632,7 @@ void PCAMatrix::prepare_Ab() {
         if (eigen_power != 0) {
             float* ai = A.data();
             for (int i = 0; i < d_out; i++) {
-                float factor = pow(eigenvalues[i], eigen_power);
+                float factor = pow(eigenvalues[i] + epsilon, eigen_power);
                 for (int j = 0; j < d_in; j++)
                     *ai++ *= factor;
             }
@@ -965,6 +977,14 @@ void ITQTransform::apply_noalloc(Index::idx_t n, const float* x, float* xt)
     pca_then_itq.apply_noalloc(n, x_norm.get(), xt);
 }
+void ITQTransform::check_identical(const VectorTransform& other_in) const {
+    VectorTransform::check_identical(other_in);
+    auto other = dynamic_cast<const ITQTransform*>(&other_in);
+    FAISS_THROW_IF_NOT(other);
+    pca_then_itq.check_identical(other->pca_then_itq);
+    FAISS_THROW_IF_NOT(other->mean == mean);
+}
 /*********************************************
  * OPQMatrix
  *********************************************/
@@ -1225,6 +1245,14 @@ void NormalizationTransform::reverse_transform(
     memcpy(x, xt, sizeof(xt[0]) * n * d_in);
 }
+void NormalizationTransform::check_identical(
+        const VectorTransform& other_in) const {
+    VectorTransform::check_identical(other_in);
+    auto other = dynamic_cast<const NormalizationTransform*>(&other_in);
+    FAISS_THROW_IF_NOT(other);
+    FAISS_THROW_IF_NOT(other->norm == norm);
+}
 /*********************************************
  * CenteringTransform
  *********************************************/
@@ -1270,6 +1298,14 @@ void CenteringTransform::reverse_transform(idx_t n, const float* xt, float* x)
     }
 }
+void CenteringTransform::check_identical(
+        const VectorTransform& other_in) const {
+    VectorTransform::check_identical(other_in);
+    auto other = dynamic_cast<const CenteringTransform*>(&other_in);
+    FAISS_THROW_IF_NOT(other);
+    FAISS_THROW_IF_NOT(other->mean == mean);
+}
 /*********************************************
  * RemapDimensionsTransform
  *********************************************/
@@ -1334,3 +1370,11 @@ void RemapDimensionsTransform::reverse_transform(
         xt += d_out;
     }
 }
+void RemapDimensionsTransform::check_identical(
+        const VectorTransform& other_in) const {
+    VectorTransform::check_identical(other_in);
+    auto other = dynamic_cast<const RemapDimensionsTransform*>(&other_in);
+    FAISS_THROW_IF_NOT(other);
+    FAISS_THROW_IF_NOT(other->map == map);
+}

data/vendor/faiss/faiss/VectorTransform.h CHANGED Viewed

@@ -43,19 +43,27 @@ struct VectorTransform {
      */
     virtual void train(idx_t n, const float* x);
-    /** apply the random rotation, return new allocated matrix
-     * @param     x size n * d_in
-     * @return    size n * d_out
+    /** apply the transformation and return the result in an allocated pointer
+     * @param     n number of vectors to transform
+     * @param     x input vectors, size n * d_in
+     * @return    output vectors, size n * d_out
      */
     float* apply(idx_t n, const float* x) const;
-    /// same as apply, but result is pre-allocated
+    /** apply the transformation and return the result in a provided matrix
+     * @param     n number of vectors to transform
+     * @param     x input vectors, size n * d_in
+     * @param    xt output vectors, size n * d_out
+     */
     virtual void apply_noalloc(idx_t n, const float* x, float* xt) const = 0;
     /// reverse transformation. May not be implemented or may return
     /// approximate result
     virtual void reverse_transform(idx_t n, const float* xt, float* x) const;
+    // check that the two transforms are identical (to merge indexes)
+    virtual void check_identical(const VectorTransform& other) const = 0;
     virtual ~VectorTransform() {}
 };
@@ -100,6 +108,8 @@ struct LinearTransform : VectorTransform {
             int n,
             int d) const;
+    void check_identical(const VectorTransform& other) const override;
     ~LinearTransform() override {}
 };
@@ -129,6 +139,9 @@ struct PCAMatrix : LinearTransform {
      */
     float eigen_power;
+    /// value added to eigenvalues to avoid division by 0 when whitening
+    float epsilon;
     /// random rotation after PCA
     bool random_rotation;
@@ -204,6 +217,8 @@ struct ITQTransform : VectorTransform {
     void train(idx_t n, const float* x) override;
     void apply_noalloc(idx_t n, const float* x, float* xt) const override;
+    void check_identical(const VectorTransform& other) const override;
 };
 struct ProductQuantizer;
@@ -257,6 +272,8 @@ struct RemapDimensionsTransform : VectorTransform {
     void reverse_transform(idx_t n, const float* xt, float* x) const override;
     RemapDimensionsTransform() {}
+    void check_identical(const VectorTransform& other) const override;
 };
 /** per-vector normalization */
@@ -270,6 +287,8 @@ struct NormalizationTransform : VectorTransform {
     /// Identity transform since norm is not revertible
     void reverse_transform(idx_t n, const float* xt, float* x) const override;
+    void check_identical(const VectorTransform& other) const override;
 };
 /** Subtract the mean of each component from the vectors. */
@@ -287,6 +306,8 @@ struct CenteringTransform : VectorTransform {
     /// add the mean
     void reverse_transform(idx_t n, const float* xt, float* x) const override;
+    void check_identical(const VectorTransform& other) const override;
 };
 } // namespace faiss

data/vendor/faiss/faiss/clone_index.cpp CHANGED Viewed

@@ -15,6 +15,7 @@
 #include <faiss/impl/FaissAssert.h>
 #include <faiss/Index2Layer.h>
+#include <faiss/IndexAdditiveQuantizer.h>
 #include <faiss/IndexFlat.h>
 #include <faiss/IndexHNSW.h>
 #include <faiss/IndexIVF.h>
@@ -27,11 +28,15 @@
 #include <faiss/IndexNSG.h>
 #include <faiss/IndexPQ.h>
 #include <faiss/IndexPreTransform.h>
-#include <faiss/IndexResidual.h>
 #include <faiss/IndexScalarQuantizer.h>
 #include <faiss/MetaIndexes.h>
 #include <faiss/VectorTransform.h>
+#include <faiss/impl/LocalSearchQuantizer.h>
+#include <faiss/impl/ProductQuantizer.h>
+#include <faiss/impl/ResidualQuantizer.h>
+#include <faiss/impl/ScalarQuantizer.h>
 namespace faiss {
 /*************************************************************
@@ -80,9 +85,10 @@ Index* Cloner::clone_Index(const Index* index) {
     TRYCLONE(IndexFlatIP, index)
     TRYCLONE(IndexFlat, index)
     TRYCLONE(IndexLattice, index)
-    TRYCLONE(IndexResidual, index)
+    TRYCLONE(IndexResidualQuantizer, index)
     TRYCLONE(IndexScalarQuantizer, index)
     TRYCLONE(MultiIndexQuantizer, index)
+    TRYCLONE(ResidualCoarseQuantizer, index)
     if (const IndexIVF* ivf = dynamic_cast<const IndexIVF*>(index)) {
         IndexIVF* res = clone_IndexIVF(ivf);
         if (ivf->invlists == nullptr) {
@@ -116,7 +122,9 @@ Index* Cloner::clone_Index(const Index* index) {
         return res;
     } else if (
             const IndexIDMap* idmap = dynamic_cast<const IndexIDMap*>(index)) {
-        IndexIDMap* res = new IndexIDMap(*idmap);
+        const IndexIDMap2* idmap2 = dynamic_cast<const IndexIDMap2*>(index);
+        IndexIDMap* res =
+                idmap2 ? new IndexIDMap2(*idmap2) : new IndexIDMap(*idmap);
         res->own_fields = true;
         res->index = clone_Index(idmap->index);
         return res;
@@ -136,6 +144,13 @@ Index* Cloner::clone_Index(const Index* index) {
         res->own_fields = true;
         res->storage = clone_Index(insg->storage);
         return res;
+    } else if (
+            const IndexNNDescent* innd =
+                    dynamic_cast<const IndexNNDescent*>(index)) {
+        IndexNNDescent* res = new IndexNNDescent(*innd);
+        res->own_fields = true;
+        res->storage = clone_Index(innd->storage);
+        return res;
     } else if (
             const Index2Layer* i2l = dynamic_cast<const Index2Layer*>(index)) {
         Index2Layer* res = new Index2Layer(*i2l);
@@ -148,4 +163,12 @@ Index* Cloner::clone_Index(const Index* index) {
     return nullptr;
 }
+Quantizer* clone_Quantizer(const Quantizer* quant) {
+    TRYCLONE(ResidualQuantizer, quant)
+    TRYCLONE(LocalSearchQuantizer, quant)
+    TRYCLONE(ProductQuantizer, quant)
+    TRYCLONE(ScalarQuantizer, quant)
+    FAISS_THROW_MSG("Did not recognize quantizer to clone");
+}
 } // namespace faiss

data/vendor/faiss/faiss/clone_index.h CHANGED Viewed

@@ -16,6 +16,7 @@ namespace faiss {
 struct Index;
 struct IndexIVF;
 struct VectorTransform;
+struct Quantizer;
 /* cloning functions */
 Index* clone_index(const Index*);
@@ -30,4 +31,6 @@ struct Cloner {
     virtual ~Cloner() {}
 };
+Quantizer* clone_Quantizer(const Quantizer* quant);
 } // namespace faiss