RubyGems - faiss - Versions diffs - 0.2.3 → 0.2.5 - Mend

faiss 0.2.3 → 0.2.5

Files changed (189) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +9 -0
data/LICENSE.txt +1 -1
data/README.md +23 -21
data/ext/faiss/extconf.rb +11 -0
data/ext/faiss/index.cpp +4 -4
data/ext/faiss/index_binary.cpp +6 -6
data/ext/faiss/product_quantizer.cpp +4 -4
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +13 -0
data/vendor/faiss/faiss/Clustering.cpp +32 -0
data/vendor/faiss/faiss/Clustering.h +14 -0
data/vendor/faiss/faiss/IVFlib.cpp +101 -2
data/vendor/faiss/faiss/IVFlib.h +26 -2
data/vendor/faiss/faiss/Index.cpp +36 -3
data/vendor/faiss/faiss/Index.h +43 -6
data/vendor/faiss/faiss/Index2Layer.cpp +24 -93
data/vendor/faiss/faiss/Index2Layer.h +8 -17
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +610 -0
data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +253 -0
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +299 -0
data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +199 -0
data/vendor/faiss/faiss/IndexBinary.cpp +20 -4
data/vendor/faiss/faiss/IndexBinary.h +18 -3
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -2
data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -2
data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -1
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
data/vendor/faiss/faiss/IndexBinaryHNSW.h +2 -1
data/vendor/faiss/faiss/IndexBinaryHash.cpp +17 -4
data/vendor/faiss/faiss/IndexBinaryHash.h +8 -4
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +28 -13
data/vendor/faiss/faiss/IndexBinaryIVF.h +10 -7
data/vendor/faiss/faiss/IndexFastScan.cpp +626 -0
data/vendor/faiss/faiss/IndexFastScan.h +145 -0
data/vendor/faiss/faiss/IndexFlat.cpp +52 -69
data/vendor/faiss/faiss/IndexFlat.h +16 -19
data/vendor/faiss/faiss/IndexFlatCodes.cpp +101 -0
data/vendor/faiss/faiss/IndexFlatCodes.h +59 -0
data/vendor/faiss/faiss/IndexHNSW.cpp +66 -138
data/vendor/faiss/faiss/IndexHNSW.h +4 -2
data/vendor/faiss/faiss/IndexIDMap.cpp +247 -0
data/vendor/faiss/faiss/IndexIDMap.h +107 -0
data/vendor/faiss/faiss/IndexIVF.cpp +200 -40
data/vendor/faiss/faiss/IndexIVF.h +59 -22
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +393 -0
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +183 -0
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +590 -0
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +171 -0
data/vendor/faiss/faiss/IndexIVFFastScan.cpp +1290 -0
data/vendor/faiss/faiss/IndexIVFFastScan.h +213 -0
data/vendor/faiss/faiss/IndexIVFFlat.cpp +43 -26
data/vendor/faiss/faiss/IndexIVFFlat.h +4 -2
data/vendor/faiss/faiss/IndexIVFPQ.cpp +238 -53
data/vendor/faiss/faiss/IndexIVFPQ.h +6 -2
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +23 -852
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -112
data/vendor/faiss/faiss/IndexIVFPQR.cpp +3 -3
data/vendor/faiss/faiss/IndexIVFPQR.h +1 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +63 -40
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +23 -7
data/vendor/faiss/faiss/IndexLSH.cpp +8 -32
data/vendor/faiss/faiss/IndexLSH.h +4 -16
data/vendor/faiss/faiss/IndexLattice.cpp +7 -1
data/vendor/faiss/faiss/IndexLattice.h +3 -1
data/vendor/faiss/faiss/IndexNNDescent.cpp +4 -5
data/vendor/faiss/faiss/IndexNNDescent.h +2 -1
data/vendor/faiss/faiss/IndexNSG.cpp +37 -5
data/vendor/faiss/faiss/IndexNSG.h +25 -1
data/vendor/faiss/faiss/IndexPQ.cpp +108 -120
data/vendor/faiss/faiss/IndexPQ.h +21 -22
data/vendor/faiss/faiss/IndexPQFastScan.cpp +15 -450
data/vendor/faiss/faiss/IndexPQFastScan.h +15 -78
data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -8
data/vendor/faiss/faiss/IndexPreTransform.h +15 -3
data/vendor/faiss/faiss/IndexRefine.cpp +36 -4
data/vendor/faiss/faiss/IndexRefine.h +14 -2
data/vendor/faiss/faiss/IndexReplicas.cpp +4 -2
data/vendor/faiss/faiss/IndexReplicas.h +2 -1
data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +438 -0
data/vendor/faiss/faiss/IndexRowwiseMinMax.h +92 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +28 -43
data/vendor/faiss/faiss/IndexScalarQuantizer.h +8 -23
data/vendor/faiss/faiss/IndexShards.cpp +4 -1
data/vendor/faiss/faiss/IndexShards.h +2 -1
data/vendor/faiss/faiss/MetaIndexes.cpp +5 -178
data/vendor/faiss/faiss/MetaIndexes.h +3 -81
data/vendor/faiss/faiss/VectorTransform.cpp +45 -1
data/vendor/faiss/faiss/VectorTransform.h +25 -4
data/vendor/faiss/faiss/clone_index.cpp +26 -3
data/vendor/faiss/faiss/clone_index.h +3 -0
data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +300 -0
data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +24 -0
data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +195 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2058 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +408 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2147 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +460 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +465 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +1618 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +251 -0
data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +1452 -0
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +1 -0
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +2 -6
data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -1
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +10 -8
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +75 -14
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +19 -32
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -31
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +22 -28
data/vendor/faiss/faiss/gpu/GpuResources.cpp +14 -0
data/vendor/faiss/faiss/gpu/GpuResources.h +16 -3
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +3 -3
data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +32 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -0
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +311 -75
data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +10 -0
data/vendor/faiss/faiss/gpu/test/TestUtils.h +3 -0
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +2 -2
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +5 -4
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +331 -29
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +110 -19
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +0 -54
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -76
data/vendor/faiss/faiss/impl/DistanceComputer.h +64 -0
data/vendor/faiss/faiss/impl/HNSW.cpp +133 -32
data/vendor/faiss/faiss/impl/HNSW.h +19 -16
data/vendor/faiss/faiss/impl/IDSelector.cpp +125 -0
data/vendor/faiss/faiss/impl/IDSelector.h +135 -0
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +378 -217
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +106 -29
data/vendor/faiss/faiss/impl/LookupTableScaler.h +77 -0
data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -0
data/vendor/faiss/faiss/impl/NSG.cpp +1 -4
data/vendor/faiss/faiss/impl/NSG.h +1 -1
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +383 -0
data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +154 -0
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +225 -145
data/vendor/faiss/faiss/impl/ProductQuantizer.h +29 -10
data/vendor/faiss/faiss/impl/Quantizer.h +43 -0
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +521 -55
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +94 -16
data/vendor/faiss/faiss/impl/ResultHandler.h +96 -0
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +108 -191
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +18 -18
data/vendor/faiss/faiss/impl/index_read.cpp +338 -24
data/vendor/faiss/faiss/impl/index_write.cpp +300 -18
data/vendor/faiss/faiss/impl/io.cpp +1 -1
data/vendor/faiss/faiss/impl/io_macros.h +20 -0
data/vendor/faiss/faiss/impl/kmeans1d.cpp +303 -0
data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +56 -16
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +25 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +66 -25
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +75 -27
data/vendor/faiss/faiss/index_factory.cpp +772 -412
data/vendor/faiss/faiss/index_factory.h +3 -0
data/vendor/faiss/faiss/index_io.h +5 -0
data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -0
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +4 -1
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +2 -1
data/vendor/faiss/faiss/python/python_callbacks.cpp +27 -0
data/vendor/faiss/faiss/python/python_callbacks.h +15 -0
data/vendor/faiss/faiss/utils/Heap.h +31 -15
data/vendor/faiss/faiss/utils/distances.cpp +384 -58
data/vendor/faiss/faiss/utils/distances.h +149 -18
data/vendor/faiss/faiss/utils/distances_simd.cpp +776 -6
data/vendor/faiss/faiss/utils/extra_distances.cpp +12 -7
data/vendor/faiss/faiss/utils/extra_distances.h +3 -1
data/vendor/faiss/faiss/utils/fp16-fp16c.h +21 -0
data/vendor/faiss/faiss/utils/fp16-inl.h +101 -0
data/vendor/faiss/faiss/utils/fp16.h +11 -0
data/vendor/faiss/faiss/utils/hamming-inl.h +54 -0
data/vendor/faiss/faiss/utils/hamming.cpp +0 -48
data/vendor/faiss/faiss/utils/ordered_key_value.h +10 -0
data/vendor/faiss/faiss/utils/quantize_lut.cpp +62 -0
data/vendor/faiss/faiss/utils/quantize_lut.h +20 -0
data/vendor/faiss/faiss/utils/random.cpp +53 -0
data/vendor/faiss/faiss/utils/random.h +5 -0
data/vendor/faiss/faiss/utils/simdlib_avx2.h +4 -0
data/vendor/faiss/faiss/utils/simdlib_emulated.h +6 -1
data/vendor/faiss/faiss/utils/simdlib_neon.h +7 -2
data/vendor/faiss/faiss/utils/utils.h +1 -1
metadata +46 -5
data/vendor/faiss/faiss/IndexResidual.cpp +0 -291
data/vendor/faiss/faiss/IndexResidual.h +0 -152

data/vendor/faiss/faiss/IndexIVFPQFastScan.h CHANGED Viewed

@@ -9,6 +9,7 @@
 #include <memory>
+#include <faiss/IndexIVFFastScan.h>
 #include <faiss/IndexIVFPQ.h>
 #include <faiss/impl/ProductQuantizer.h>
 #include <faiss/utils/AlignedTable.h>
@@ -31,36 +32,20 @@ namespace faiss {
  * 13: idem, collect results in reservoir
  */
-struct IndexIVFPQFastScan : IndexIVF {
-    bool by_residual;    ///< Encode residual or plain vector?
+struct IndexIVFPQFastScan : IndexIVFFastScan {
     ProductQuantizer pq; ///< produces the codes
-    // size of the kernel
-    int bbs; // set at build time
-    // M rounded up to a multiple of 2
-    size_t M2;
     /// precomputed tables management
     int use_precomputed_table = 0;
     /// if use_precompute_table size (nlist, pq.M, pq.ksub)
     AlignedTable<float> precomputed_table;
-    // search-time implementation
-    int implem = 0;
-    // skip some parts of the computation (for timing)
-    int skip = 0;
-    // batching factors at search time (0 = default)
-    int qbs = 0;
-    size_t qbs2 = 0;
     IndexIVFPQFastScan(
             Index* quantizer,
             size_t d,
             size_t nlist,
             size_t M,
-            size_t nbits_per_idx,
+            size_t nbits,
             MetricType metric = METRIC_L2,
             int bbs = 32);
@@ -69,9 +54,6 @@ struct IndexIVFPQFastScan : IndexIVF {
     // built from an IndexIVFPQ
     explicit IndexIVFPQFastScan(const IndexIVFPQ& orig, int bbs = 32);
-    /// orig's inverted lists (for debugging)
-    InvertedLists* orig_invlists = nullptr;
     void train_residual(idx_t n, const float* x) override;
     /// build precomputed table, possibly updating use_precomputed_table
@@ -86,106 +68,19 @@ struct IndexIVFPQFastScan : IndexIVF {
             uint8_t* codes,
             bool include_listno = false) const override;
-    void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
-    void search(
-            idx_t n,
-            const float* x,
-            idx_t k,
-            float* distances,
-            idx_t* labels) const override;
     // prepare look-up tables
+    bool lookup_table_is_3d() const override;
     void compute_LUT(
             size_t n,
             const float* x,
             const idx_t* coarse_ids,
             const float* coarse_dis,
             AlignedTable<float>& dis_tables,
-            AlignedTable<float>& biases) const;
-    void compute_LUT_uint8(
-            size_t n,
-            const float* x,
-            const idx_t* coarse_ids,
-            const float* coarse_dis,
-            AlignedTable<uint8_t>& dis_tables,
-            AlignedTable<uint16_t>& biases,
-            float* normalizers) const;
-    // internal search funcs
+            AlignedTable<float>& biases) const override;
-    template <bool is_max>
-    void search_dispatch_implem(
-            idx_t n,
-            const float* x,
-            idx_t k,
-            float* distances,
-            idx_t* labels) const;
-    template <class C>
-    void search_implem_1(
-            idx_t n,
-            const float* x,
-            idx_t k,
-            float* distances,
-            idx_t* labels) const;
-    template <class C>
-    void search_implem_2(
-            idx_t n,
-            const float* x,
-            idx_t k,
-            float* distances,
-            idx_t* labels) const;
-    // implem 10 and 12 are not multithreaded internally, so
-    // export search stats
-    template <class C>
-    void search_implem_10(
-            idx_t n,
-            const float* x,
-            idx_t k,
-            float* distances,
-            idx_t* labels,
-            int impl,
-            size_t* ndis_out,
-            size_t* nlist_out) const;
-    template <class C>
-    void search_implem_12(
-            idx_t n,
-            const float* x,
-            idx_t k,
-            float* distances,
-            idx_t* labels,
-            int impl,
-            size_t* ndis_out,
-            size_t* nlist_out) const;
+    void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
 };
-struct IVFFastScanStats {
-    uint64_t times[10];
-    uint64_t t_compute_distance_tables, t_round;
-    uint64_t t_copy_pack, t_scan, t_to_flat;
-    uint64_t reservoir_times[4];
-    double Mcy_at(int i) {
-        return times[i] / (1000 * 1000.0);
-    }
-    double Mcy_reservoir_at(int i) {
-        return reservoir_times[i] / (1000 * 1000.0);
-    }
-    IVFFastScanStats() {
-        reset();
-    }
-    void reset() {
-        memset(this, 0, sizeof(*this));
-    }
-};
-FAISS_API extern IVFFastScanStats IVFFastScan_stats;
 } // namespace faiss

data/vendor/faiss/faiss/IndexIVFPQR.cpp CHANGED Viewed

@@ -201,11 +201,11 @@ void IndexIVFPQR::reconstruct_from_offset(
     }
 }
-void IndexIVFPQR::merge_from(IndexIVF& other_in, idx_t add_id) {
-    IndexIVFPQR* other = dynamic_cast<IndexIVFPQR*>(&other_in);
+void IndexIVFPQR::merge_from(Index& otherIndex, idx_t add_id) {
+    IndexIVFPQR* other = dynamic_cast<IndexIVFPQR*>(&otherIndex);
     FAISS_THROW_IF_NOT(other);
-    IndexIVF::merge_from(other_in, add_id);
+    IndexIVF::merge_from(otherIndex, add_id);
     refine_codes.insert(
             refine_codes.end(),

data/vendor/faiss/faiss/IndexIVFPQR.h CHANGED Viewed

@@ -51,7 +51,7 @@ struct IndexIVFPQR : IndexIVFPQ {
     void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
             const override;
-    void merge_from(IndexIVF& other, idx_t add_id) override;
+    void merge_from(Index& otherIndex, idx_t add_id) override;
     void search_preassigned(
             idx_t n,

data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp CHANGED Viewed

@@ -13,6 +13,8 @@
 #include <algorithm>
 #include <memory>
+#include <faiss/IndexLSH.h>
+#include <faiss/IndexPreTransform.h>
 #include <faiss/VectorTransform.h>
 #include <faiss/impl/AuxIndexStructures.h>
 #include <faiss/impl/FaissAssert.h>
@@ -31,7 +33,6 @@ IndexIVFSpectralHash::IndexIVFSpectralHash(
           nbit(nbit),
           period(period),
           threshold_type(Thresh_global) {
-    FAISS_THROW_IF_NOT(code_size % 4 == 0);
     RandomRotationMatrix* rr = new RandomRotationMatrix(d, nbit);
     rr->init(1234);
     vt = rr;
@@ -151,8 +152,8 @@ void binarize_with_freq(
     memset(codes, 0, (nbit + 7) / 8);
     for (size_t i = 0; i < nbit; i++) {
         float xf = (x[i] - c[i]);
-        int xi = int(floor(xf * freq));
-        int bit = xi & 1;
+        int64_t xi = int64_t(floor(xf * freq));
+        int64_t bit = xi & 1;
         codes[i >> 3] |= bit << (i & 7);
     }
 }
@@ -167,35 +168,33 @@ void IndexIVFSpectralHash::encode_vectors(
         bool include_listnos) const {
     FAISS_THROW_IF_NOT(is_trained);
     float freq = 2.0 / period;
-    FAISS_THROW_IF_NOT_MSG(!include_listnos, "listnos encoding not supported");
+    size_t coarse_size = include_listnos ? coarse_code_size() : 0;
     // transform with vt
     std::unique_ptr<float[]> x(vt->apply(n, x_in));
-#pragma omp parallel
-    {
-        std::vector<float> zero(nbit);
+    std::vector<float> zero(nbit);
-        // each thread takes care of a subset of lists
 #pragma omp for
-        for (idx_t i = 0; i < n; i++) {
-            int64_t list_no = list_nos[i];
-            if (list_no >= 0) {
-                const float* c;
-                if (threshold_type == Thresh_global) {
-                    c = zero.data();
-                } else {
-                    c = trained.data() + list_no * nbit;
-                }
-                binarize_with_freq(
-                        nbit,
-                        freq,
-                        x.get() + i * nbit,
-                        c,
-                        codes + i * code_size);
+    for (idx_t i = 0; i < n; i++) {
+        int64_t list_no = list_nos[i];
+        uint8_t* code = codes + i * (code_size + coarse_size);
+        if (list_no >= 0) {
+            if (coarse_size) {
+                encode_listno(list_no, code);
+            }
+            const float* c;
+            if (threshold_type == Thresh_global) {
+                c = zero.data();
+            } else {
+                c = trained.data() + list_no * nbit;
             }
+            binarize_with_freq(
+                    nbit, freq, x.get() + i * nbit, c, code + coarse_size);
+        } else {
+            memset(code, 0, code_size + coarse_size);
         }
     }
 }
@@ -206,9 +205,7 @@ template <class HammingComputer>
 struct IVFScanner : InvertedListScanner {
     // copied from index structure
     const IndexIVFSpectralHash* index;
-    size_t code_size;
     size_t nbit;
-    bool store_pairs;
     float period, freq;
     std::vector<float> q;
@@ -220,15 +217,16 @@ struct IVFScanner : InvertedListScanner {
     IVFScanner(const IndexIVFSpectralHash* index, bool store_pairs)
             : index(index),
-              code_size(index->code_size),
               nbit(index->nbit),
-              store_pairs(store_pairs),
               period(index->period),
               freq(2.0 / index->period),
               q(nbit),
               zero(nbit),
-              qcode(code_size),
-              hc(qcode.data(), code_size) {}
+              qcode(index->code_size),
+              hc(qcode.data(), index->code_size) {
+        this->store_pairs = store_pairs;
+        this->code_size = index->code_size;
+    }
     void set_query(const float* query) override {
         FAISS_THROW_IF_NOT(query);
@@ -241,8 +239,6 @@ struct IVFScanner : InvertedListScanner {
         }
     }
-    idx_t list_no;
     void set_list(idx_t list_no, float /*coarse_dis*/) override {
         this->list_no = list_no;
         if (index->threshold_type != IndexIVFSpectralHash::Thresh_global) {
@@ -297,7 +293,9 @@ struct IVFScanner : InvertedListScanner {
 } // anonymous namespace
 InvertedListScanner* IndexIVFSpectralHash::get_InvertedListScanner(
-        bool store_pairs) const {
+        bool store_pairs,
+        const IDSelector* sel) const {
+    FAISS_THROW_IF_NOT(!sel);
     switch (code_size) {
 #define HANDLE_CODE_SIZE(cs) \
     case cs:                 \
@@ -310,13 +308,38 @@ InvertedListScanner* IndexIVFSpectralHash::get_InvertedListScanner(
         HANDLE_CODE_SIZE(64);
 #undef HANDLE_CODE_SIZE
         default:
-            if (code_size % 4 == 0) {
-                return new IVFScanner<HammingComputerDefault>(
-                        this, store_pairs);
-            } else {
-                FAISS_THROW_MSG("not supported");
-            }
+            return new IVFScanner<HammingComputerDefault>(this, store_pairs);
+    }
+}
+void IndexIVFSpectralHash::replace_vt(VectorTransform* vt_in, bool own) {
+    FAISS_THROW_IF_NOT(vt_in->d_out == nbit);
+    FAISS_THROW_IF_NOT(vt_in->d_in == d);
+    if (own_fields) {
+        delete vt;
     }
+    vt = vt_in;
+    threshold_type = Thresh_global;
+    is_trained = quantizer->is_trained && quantizer->ntotal == nlist &&
+            vt->is_trained;
+    own_fields = own;
+}
+/*
+    Check that the encoder is a single vector transform followed by a LSH
+    that just does thresholding.
+    If this is not the case, the linear transform + threhsolds of the IndexLSH
+    should be merged into the VectorTransform (which is feasible).
+*/
+void IndexIVFSpectralHash::replace_vt(IndexPreTransform* encoder, bool own) {
+    FAISS_THROW_IF_NOT(encoder->chain.size() == 1);
+    auto sub_index = dynamic_cast<IndexLSH*>(encoder->index);
+    FAISS_THROW_IF_NOT_MSG(sub_index, "final index should be LSH");
+    FAISS_THROW_IF_NOT(sub_index->nbits == nbit);
+    FAISS_THROW_IF_NOT(!sub_index->rotate_data);
+    FAISS_THROW_IF_NOT(!sub_index->train_thresholds);
+    replace_vt(encoder->chain[0], own);
 }
 } // namespace faiss

data/vendor/faiss/faiss/IndexIVFSpectralHash.h CHANGED Viewed

@@ -17,6 +17,7 @@
 namespace faiss {
 struct VectorTransform;
+struct IndexPreTransform;
 /** Inverted list that stores binary codes of size nbit. Before the
  * binary conversion, the dimension of the vectors is transformed from
@@ -25,23 +26,29 @@ struct VectorTransform;
  * Each coordinate is subtracted from a value determined by
  * threshold_type, and split into intervals of size period. Half of
  * the interval is a 0 bit, the other half a 1.
+ *
  */
 struct IndexIVFSpectralHash : IndexIVF {
-    VectorTransform* vt; // transformation from d to nbit dim
+    /// transformation from d to nbit dim
+    VectorTransform* vt;
+    /// own the vt
     bool own_fields;
+    /// nb of bits of the binary signature
     int nbit;
+    /// interval size for 0s and 1s
     float period;
     enum ThresholdType {
-        Thresh_global,
-        Thresh_centroid,
-        Thresh_centroid_half,
-        Thresh_median
+        Thresh_global,        ///< global threshold at 0
+        Thresh_centroid,      ///< compare to centroid
+        Thresh_centroid_half, ///< central interval around centroid
+        Thresh_median         ///< median of training set
     };
     ThresholdType threshold_type;
-    // size nlist * nbit or 0 if Thresh_global
+    /// Trained threshold.
+    /// size nlist * nbit or 0 if Thresh_global
     std::vector<float> trained;
     IndexIVFSpectralHash(
@@ -63,7 +70,16 @@ struct IndexIVFSpectralHash : IndexIVF {
             bool include_listnos = false) const override;
     InvertedListScanner* get_InvertedListScanner(
-            bool store_pairs) const override;
+            bool store_pairs,
+            const IDSelector* sel) const override;
+    /** replace the vector transform for an empty (and possibly untrained) index
+     */
+    void replace_vt(VectorTransform* vt, bool own = false);
+    /** convenience function to get the VT from an index constucted by an
+     * index_factory (should end in "LSH") */
+    void replace_vt(IndexPreTransform* index, bool own = false);
     ~IndexIVFSpectralHash() override;
 };

data/vendor/faiss/faiss/IndexLSH.cpp CHANGED Viewed

@@ -5,8 +5,6 @@
  * LICENSE file in the root directory of this source tree.
  */
-// -*- c++ -*-
 #include <faiss/IndexLSH.h>
 #include <cstdio>
@@ -25,15 +23,13 @@ namespace faiss {
  ***************************************************************/
 IndexLSH::IndexLSH(idx_t d, int nbits, bool rotate_data, bool train_thresholds)
-        : Index(d),
+        : IndexFlatCodes((nbits + 7) / 8, d),
           nbits(nbits),
           rotate_data(rotate_data),
           train_thresholds(train_thresholds),
           rrot(d, nbits) {
     is_trained = !train_thresholds;
-    bytes_per_vec = (nbits + 7) / 8;
     if (rotate_data) {
         rrot.init(5);
     } else {
@@ -41,11 +37,7 @@ IndexLSH::IndexLSH(idx_t d, int nbits, bool rotate_data, bool train_thresholds)
     }
 }
-IndexLSH::IndexLSH()
-        : nbits(0),
-          bytes_per_vec(0),
-          rotate_data(false),
-          train_thresholds(false) {}
+IndexLSH::IndexLSH() : nbits(0), rotate_data(false), train_thresholds(false) {}
 const float* IndexLSH::apply_preprocess(idx_t n, const float* x) const {
     float* xt = nullptr;
@@ -106,28 +98,21 @@ void IndexLSH::train(idx_t n, const float* x) {
     is_trained = true;
 }
-void IndexLSH::add(idx_t n, const float* x) {
-    FAISS_THROW_IF_NOT(is_trained);
-    codes.resize((ntotal + n) * bytes_per_vec);
-    sa_encode(n, x, &codes[ntotal * bytes_per_vec]);
-    ntotal += n;
-}
 void IndexLSH::search(
         idx_t n,
         const float* x,
         idx_t k,
         float* distances,
-        idx_t* labels) const {
+        idx_t* labels,
+        const SearchParameters* params) const {
+    FAISS_THROW_IF_NOT_MSG(
+            !params, "search params not supported for this index");
     FAISS_THROW_IF_NOT(k > 0);
     FAISS_THROW_IF_NOT(is_trained);
     const float* xt = apply_preprocess(n, x);
     ScopeDeleter<float> del(xt == x ? nullptr : xt);
-    uint8_t* qcodes = new uint8_t[n * bytes_per_vec];
+    uint8_t* qcodes = new uint8_t[n * code_size];
     ScopeDeleter<uint8_t> del2(qcodes);
     fvecs2bitvecs(xt, qcodes, nbits, n);
@@ -137,7 +122,7 @@ void IndexLSH::search(
     int_maxheap_array_t res = {size_t(n), size_t(k), labels, idistances};
-    hammings_knn_hc(&res, qcodes, codes.data(), ntotal, bytes_per_vec, true);
+    hammings_knn_hc(&res, qcodes, codes.data(), ntotal, code_size, true);
     // convert distances to floats
     for (int i = 0; i < k * n; i++)
@@ -158,15 +143,6 @@ void IndexLSH::transfer_thresholds(LinearTransform* vt) {
     thresholds.clear();
 }
-void IndexLSH::reset() {
-    codes.clear();
-    ntotal = 0;
-}
-size_t IndexLSH::sa_code_size() const {
-    return bytes_per_vec;
-}
 void IndexLSH::sa_encode(idx_t n, const float* x, uint8_t* bytes) const {
     FAISS_THROW_IF_NOT(is_trained);
     const float* xt = apply_preprocess(n, x);

data/vendor/faiss/faiss/IndexLSH.h CHANGED Viewed

@@ -12,17 +12,14 @@
 #include <vector>
-#include <faiss/Index.h>
+#include <faiss/IndexFlatCodes.h>
 #include <faiss/VectorTransform.h>
 namespace faiss {
 /** The sign of each vector component is put in a binary signature */
-struct IndexLSH : Index {
-    typedef unsigned char uint8_t;
+struct IndexLSH : IndexFlatCodes {
     int nbits;             ///< nb of bits per vector
-    int bytes_per_vec;     ///< nb of 8-bits per encoded vector
     bool rotate_data;      ///< whether to apply a random rotation to input
     bool train_thresholds; ///< whether we train thresholds or use 0
@@ -30,9 +27,6 @@ struct IndexLSH : Index {
     std::vector<float> thresholds; ///< thresholds to compare with
-    /// encoded dataset
-    std::vector<uint8_t> codes;
     IndexLSH(
             idx_t d,
             int nbits,
@@ -50,16 +44,13 @@ struct IndexLSH : Index {
     void train(idx_t n, const float* x) override;
-    void add(idx_t n, const float* x) override;
     void search(
             idx_t n,
             const float* x,
             idx_t k,
             float* distances,
-            idx_t* labels) const override;
-    void reset() override;
+            idx_t* labels,
+            const SearchParameters* params = nullptr) const override;
     /// transfer the thresholds to a pre-processing stage (and unset
     /// train_thresholds)
@@ -72,9 +63,6 @@ struct IndexLSH : Index {
     /* standalone codec interface.
      *
      * The vectors are decoded to +/- 1 (not 0, 1) */
-    size_t sa_code_size() const override;
     void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
     void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;

data/vendor/faiss/faiss/IndexLattice.cpp CHANGED Viewed

@@ -118,7 +118,13 @@ void IndexLattice::add(idx_t, const float*) {
     FAISS_THROW_MSG("not implemented");
 }
-void IndexLattice::search(idx_t, const float*, idx_t, float*, idx_t*) const {
+void IndexLattice::search(
+        idx_t,
+        const float*,
+        idx_t,
+        float*,
+        idx_t*,
+        const SearchParameters*) const {
     FAISS_THROW_MSG("not implemented");
 }

data/vendor/faiss/faiss/IndexLattice.h CHANGED Viewed

@@ -54,7 +54,9 @@ struct IndexLattice : Index {
             const float* x,
             idx_t k,
             float* distances,
-            idx_t* labels) const override;
+            idx_t* labels,
+            const SearchParameters* params = nullptr) const override;
     void reset() override;
 };

data/vendor/faiss/faiss/IndexNNDescent.cpp CHANGED Viewed

@@ -135,9 +135,10 @@ void IndexNNDescent::search(
         const float* x,
         idx_t k,
         float* distances,
-        idx_t* labels) const
-{
+        idx_t* labels,
+        const SearchParameters* params) const {
+    FAISS_THROW_IF_NOT_MSG(
+            !params, "search params not supported for this index");
     FAISS_THROW_IF_NOT_MSG(
             storage,
             "Please use IndexNNDescentFlat (or variants) "
@@ -167,9 +168,7 @@ void IndexNNDescent::search(
                 float* simi = distances + i * k;
                 dis->set_query(x + i * d);
-                maxheap_heapify(k, simi, idxi);
                 nndescent.search(*dis, k, idxi, simi, vt);
-                maxheap_reorder(k, simi, idxi);
             }
         }
         InterruptCallback::check();

data/vendor/faiss/faiss/IndexNNDescent.h CHANGED Viewed

@@ -53,7 +53,8 @@ struct IndexNNDescent : Index {
             const float* x,
             idx_t k,
             float* distances,
-            idx_t* labels) const override;
+            idx_t* labels,
+            const SearchParameters* params = nullptr) const override;
     void reconstruct(idx_t key, float* recons) const override;