RubyGems - faiss - Versions diffs - 0.2.3 → 0.2.4 - Mend

faiss 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/LICENSE.txt +1 -1
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/Clustering.cpp +32 -0
data/vendor/faiss/faiss/Clustering.h +14 -0
data/vendor/faiss/faiss/Index.h +1 -1
data/vendor/faiss/faiss/Index2Layer.cpp +19 -92
data/vendor/faiss/faiss/Index2Layer.h +2 -16
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
data/vendor/faiss/faiss/{IndexResidual.h → IndexAdditiveQuantizer.h} +101 -58
data/vendor/faiss/faiss/IndexFlat.cpp +22 -52
data/vendor/faiss/faiss/IndexFlat.h +9 -15
data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
data/vendor/faiss/faiss/IndexIVF.cpp +79 -7
data/vendor/faiss/faiss/IndexIVF.h +25 -7
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
data/vendor/faiss/faiss/IndexIVFFlat.cpp +9 -12
data/vendor/faiss/faiss/IndexIVFPQ.cpp +5 -4
data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +60 -39
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +21 -6
data/vendor/faiss/faiss/IndexLSH.cpp +4 -30
data/vendor/faiss/faiss/IndexLSH.h +2 -15
data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -2
data/vendor/faiss/faiss/IndexNSG.cpp +0 -2
data/vendor/faiss/faiss/IndexPQ.cpp +2 -51
data/vendor/faiss/faiss/IndexPQ.h +2 -17
data/vendor/faiss/faiss/IndexRefine.cpp +28 -0
data/vendor/faiss/faiss/IndexRefine.h +10 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -28
data/vendor/faiss/faiss/IndexScalarQuantizer.h +2 -16
data/vendor/faiss/faiss/VectorTransform.cpp +2 -1
data/vendor/faiss/faiss/VectorTransform.h +3 -0
data/vendor/faiss/faiss/clone_index.cpp +3 -2
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +2 -2
data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +257 -24
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +69 -9
data/vendor/faiss/faiss/impl/HNSW.cpp +10 -5
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +393 -210
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +100 -28
data/vendor/faiss/faiss/impl/NSG.cpp +0 -3
data/vendor/faiss/faiss/impl/NSG.h +1 -1
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +357 -47
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +65 -7
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +12 -19
data/vendor/faiss/faiss/impl/index_read.cpp +102 -19
data/vendor/faiss/faiss/impl/index_write.cpp +66 -16
data/vendor/faiss/faiss/impl/io.cpp +1 -1
data/vendor/faiss/faiss/impl/io_macros.h +20 -0
data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
data/vendor/faiss/faiss/index_factory.cpp +585 -414
data/vendor/faiss/faiss/index_factory.h +3 -0
data/vendor/faiss/faiss/utils/distances.cpp +4 -2
data/vendor/faiss/faiss/utils/distances.h +36 -3
data/vendor/faiss/faiss/utils/distances_simd.cpp +50 -0
data/vendor/faiss/faiss/utils/utils.h +1 -1
metadata +12 -5
data/vendor/faiss/faiss/IndexResidual.cpp +0 -291

data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp CHANGED Viewed

@@ -13,6 +13,8 @@
 #include <algorithm>
 #include <memory>
+#include <faiss/IndexLSH.h>
+#include <faiss/IndexPreTransform.h>
 #include <faiss/VectorTransform.h>
 #include <faiss/impl/AuxIndexStructures.h>
 #include <faiss/impl/FaissAssert.h>
@@ -31,7 +33,6 @@ IndexIVFSpectralHash::IndexIVFSpectralHash(
           nbit(nbit),
           period(period),
           threshold_type(Thresh_global) {
-    FAISS_THROW_IF_NOT(code_size % 4 == 0);
     RandomRotationMatrix* rr = new RandomRotationMatrix(d, nbit);
     rr->init(1234);
     vt = rr;
@@ -151,8 +152,8 @@ void binarize_with_freq(
     memset(codes, 0, (nbit + 7) / 8);
     for (size_t i = 0; i < nbit; i++) {
         float xf = (x[i] - c[i]);
-        int xi = int(floor(xf * freq));
-        int bit = xi & 1;
+        int64_t xi = int64_t(floor(xf * freq));
+        int64_t bit = xi & 1;
         codes[i >> 3] |= bit << (i & 7);
     }
 }
@@ -167,35 +168,33 @@ void IndexIVFSpectralHash::encode_vectors(
         bool include_listnos) const {
     FAISS_THROW_IF_NOT(is_trained);
     float freq = 2.0 / period;
-    FAISS_THROW_IF_NOT_MSG(!include_listnos, "listnos encoding not supported");
+    size_t coarse_size = include_listnos ? coarse_code_size() : 0;
     // transform with vt
     std::unique_ptr<float[]> x(vt->apply(n, x_in));
-#pragma omp parallel
-    {
-        std::vector<float> zero(nbit);
+    std::vector<float> zero(nbit);
-        // each thread takes care of a subset of lists
 #pragma omp for
-        for (idx_t i = 0; i < n; i++) {
-            int64_t list_no = list_nos[i];
-            if (list_no >= 0) {
-                const float* c;
-                if (threshold_type == Thresh_global) {
-                    c = zero.data();
-                } else {
-                    c = trained.data() + list_no * nbit;
-                }
-                binarize_with_freq(
-                        nbit,
-                        freq,
-                        x.get() + i * nbit,
-                        c,
-                        codes + i * code_size);
+    for (idx_t i = 0; i < n; i++) {
+        int64_t list_no = list_nos[i];
+        uint8_t* code = codes + i * (code_size + coarse_size);
+        if (list_no >= 0) {
+            if (coarse_size) {
+                encode_listno(list_no, code);
+            }
+            const float* c;
+            if (threshold_type == Thresh_global) {
+                c = zero.data();
+            } else {
+                c = trained.data() + list_no * nbit;
             }
+            binarize_with_freq(
+                    nbit, freq, x.get() + i * nbit, c, code + coarse_size);
+        } else {
+            memset(code, 0, code_size + coarse_size);
         }
     }
 }
@@ -206,9 +205,7 @@ template <class HammingComputer>
 struct IVFScanner : InvertedListScanner {
     // copied from index structure
     const IndexIVFSpectralHash* index;
-    size_t code_size;
     size_t nbit;
-    bool store_pairs;
     float period, freq;
     std::vector<float> q;
@@ -220,15 +217,16 @@ struct IVFScanner : InvertedListScanner {
     IVFScanner(const IndexIVFSpectralHash* index, bool store_pairs)
             : index(index),
-              code_size(index->code_size),
               nbit(index->nbit),
-              store_pairs(store_pairs),
               period(index->period),
               freq(2.0 / index->period),
               q(nbit),
               zero(nbit),
-              qcode(code_size),
-              hc(qcode.data(), code_size) {}
+              qcode(index->code_size),
+              hc(qcode.data(), index->code_size) {
+        this->store_pairs = store_pairs;
+        this->code_size = index->code_size;
+    }
     void set_query(const float* query) override {
         FAISS_THROW_IF_NOT(query);
@@ -241,8 +239,6 @@ struct IVFScanner : InvertedListScanner {
         }
     }
-    idx_t list_no;
     void set_list(idx_t list_no, float /*coarse_dis*/) override {
         this->list_no = list_no;
         if (index->threshold_type != IndexIVFSpectralHash::Thresh_global) {
@@ -310,13 +306,38 @@ InvertedListScanner* IndexIVFSpectralHash::get_InvertedListScanner(
         HANDLE_CODE_SIZE(64);
 #undef HANDLE_CODE_SIZE
         default:
-            if (code_size % 4 == 0) {
-                return new IVFScanner<HammingComputerDefault>(
-                        this, store_pairs);
-            } else {
-                FAISS_THROW_MSG("not supported");
-            }
+            return new IVFScanner<HammingComputerDefault>(this, store_pairs);
+    }
+}
+void IndexIVFSpectralHash::replace_vt(VectorTransform* vt_in, bool own) {
+    FAISS_THROW_IF_NOT(vt_in->d_out == nbit);
+    FAISS_THROW_IF_NOT(vt_in->d_in == d);
+    if (own_fields) {
+        delete vt;
     }
+    vt = vt_in;
+    threshold_type = Thresh_global;
+    is_trained = quantizer->is_trained && quantizer->ntotal == nlist &&
+            vt->is_trained;
+    own_fields = own;
+}
+/*
+    Check that the encoder is a single vector transform followed by a LSH
+    that just does thresholding.
+    If this is not the case, the linear transform + threhsolds of the IndexLSH
+    should be merged into the VectorTransform (which is feasible).
+*/
+void IndexIVFSpectralHash::replace_vt(IndexPreTransform* encoder, bool own) {
+    FAISS_THROW_IF_NOT(encoder->chain.size() == 1);
+    auto sub_index = dynamic_cast<IndexLSH*>(encoder->index);
+    FAISS_THROW_IF_NOT_MSG(sub_index, "final index should be LSH");
+    FAISS_THROW_IF_NOT(sub_index->nbits == nbit);
+    FAISS_THROW_IF_NOT(!sub_index->rotate_data);
+    FAISS_THROW_IF_NOT(!sub_index->train_thresholds);
+    replace_vt(encoder->chain[0], own);
 }
 } // namespace faiss

data/vendor/faiss/faiss/IndexIVFSpectralHash.h CHANGED Viewed

@@ -17,6 +17,7 @@
 namespace faiss {
 struct VectorTransform;
+struct IndexPreTransform;
 /** Inverted list that stores binary codes of size nbit. Before the
  * binary conversion, the dimension of the vectors is transformed from
@@ -25,23 +26,29 @@ struct VectorTransform;
  * Each coordinate is subtracted from a value determined by
  * threshold_type, and split into intervals of size period. Half of
  * the interval is a 0 bit, the other half a 1.
+ *
  */
 struct IndexIVFSpectralHash : IndexIVF {
-    VectorTransform* vt; // transformation from d to nbit dim
+    /// transformation from d to nbit dim
+    VectorTransform* vt;
+    /// own the vt
     bool own_fields;
+    /// nb of bits of the binary signature
     int nbit;
+    /// interval size for 0s and 1s
     float period;
     enum ThresholdType {
-        Thresh_global,
-        Thresh_centroid,
-        Thresh_centroid_half,
-        Thresh_median
+        Thresh_global,        ///< global threshold at 0
+        Thresh_centroid,      ///< compare to centroid
+        Thresh_centroid_half, ///< central interval around centroid
+        Thresh_median         ///< median of training set
     };
     ThresholdType threshold_type;
-    // size nlist * nbit or 0 if Thresh_global
+    /// Trained threshold.
+    /// size nlist * nbit or 0 if Thresh_global
     std::vector<float> trained;
     IndexIVFSpectralHash(
@@ -65,6 +72,14 @@ struct IndexIVFSpectralHash : IndexIVF {
     InvertedListScanner* get_InvertedListScanner(
             bool store_pairs) const override;
+    /** replace the vector transform for an empty (and possibly untrained) index
+     */
+    void replace_vt(VectorTransform* vt, bool own = false);
+    /** convenience function to get the VT from an index constucted by an
+     * index_factory (should end in "LSH") */
+    void replace_vt(IndexPreTransform* index, bool own = false);
     ~IndexIVFSpectralHash() override;
 };

data/vendor/faiss/faiss/IndexLSH.cpp CHANGED Viewed

@@ -5,8 +5,6 @@
  * LICENSE file in the root directory of this source tree.
  */
-// -*- c++ -*-
 #include <faiss/IndexLSH.h>
 #include <cstdio>
@@ -25,15 +23,13 @@ namespace faiss {
  ***************************************************************/
 IndexLSH::IndexLSH(idx_t d, int nbits, bool rotate_data, bool train_thresholds)
-        : Index(d),
+        : IndexFlatCodes((nbits + 7) / 8, d),
           nbits(nbits),
           rotate_data(rotate_data),
           train_thresholds(train_thresholds),
           rrot(d, nbits) {
     is_trained = !train_thresholds;
-    bytes_per_vec = (nbits + 7) / 8;
     if (rotate_data) {
         rrot.init(5);
     } else {
@@ -41,11 +37,7 @@ IndexLSH::IndexLSH(idx_t d, int nbits, bool rotate_data, bool train_thresholds)
     }
 }
-IndexLSH::IndexLSH()
-        : nbits(0),
-          bytes_per_vec(0),
-          rotate_data(false),
-          train_thresholds(false) {}
+IndexLSH::IndexLSH() : nbits(0), rotate_data(false), train_thresholds(false) {}
 const float* IndexLSH::apply_preprocess(idx_t n, const float* x) const {
     float* xt = nullptr;
@@ -106,15 +98,6 @@ void IndexLSH::train(idx_t n, const float* x) {
     is_trained = true;
 }
-void IndexLSH::add(idx_t n, const float* x) {
-    FAISS_THROW_IF_NOT(is_trained);
-    codes.resize((ntotal + n) * bytes_per_vec);
-    sa_encode(n, x, &codes[ntotal * bytes_per_vec]);
-    ntotal += n;
-}
 void IndexLSH::search(
         idx_t n,
         const float* x,
@@ -127,7 +110,7 @@ void IndexLSH::search(
     const float* xt = apply_preprocess(n, x);
     ScopeDeleter<float> del(xt == x ? nullptr : xt);
-    uint8_t* qcodes = new uint8_t[n * bytes_per_vec];
+    uint8_t* qcodes = new uint8_t[n * code_size];
     ScopeDeleter<uint8_t> del2(qcodes);
     fvecs2bitvecs(xt, qcodes, nbits, n);
@@ -137,7 +120,7 @@ void IndexLSH::search(
     int_maxheap_array_t res = {size_t(n), size_t(k), labels, idistances};
-    hammings_knn_hc(&res, qcodes, codes.data(), ntotal, bytes_per_vec, true);
+    hammings_knn_hc(&res, qcodes, codes.data(), ntotal, code_size, true);
     // convert distances to floats
     for (int i = 0; i < k * n; i++)
@@ -158,15 +141,6 @@ void IndexLSH::transfer_thresholds(LinearTransform* vt) {
     thresholds.clear();
 }
-void IndexLSH::reset() {
-    codes.clear();
-    ntotal = 0;
-}
-size_t IndexLSH::sa_code_size() const {
-    return bytes_per_vec;
-}
 void IndexLSH::sa_encode(idx_t n, const float* x, uint8_t* bytes) const {
     FAISS_THROW_IF_NOT(is_trained);
     const float* xt = apply_preprocess(n, x);

data/vendor/faiss/faiss/IndexLSH.h CHANGED Viewed

@@ -12,17 +12,14 @@
 #include <vector>
-#include <faiss/Index.h>
+#include <faiss/IndexFlatCodes.h>
 #include <faiss/VectorTransform.h>
 namespace faiss {
 /** The sign of each vector component is put in a binary signature */
-struct IndexLSH : Index {
-    typedef unsigned char uint8_t;
+struct IndexLSH : IndexFlatCodes {
     int nbits;             ///< nb of bits per vector
-    int bytes_per_vec;     ///< nb of 8-bits per encoded vector
     bool rotate_data;      ///< whether to apply a random rotation to input
     bool train_thresholds; ///< whether we train thresholds or use 0
@@ -30,9 +27,6 @@ struct IndexLSH : Index {
     std::vector<float> thresholds; ///< thresholds to compare with
-    /// encoded dataset
-    std::vector<uint8_t> codes;
     IndexLSH(
             idx_t d,
             int nbits,
@@ -50,8 +44,6 @@ struct IndexLSH : Index {
     void train(idx_t n, const float* x) override;
-    void add(idx_t n, const float* x) override;
     void search(
             idx_t n,
             const float* x,
@@ -59,8 +51,6 @@ struct IndexLSH : Index {
             float* distances,
             idx_t* labels) const override;
-    void reset() override;
     /// transfer the thresholds to a pre-processing stage (and unset
     /// train_thresholds)
     void transfer_thresholds(LinearTransform* vt);
@@ -72,9 +62,6 @@ struct IndexLSH : Index {
     /* standalone codec interface.
      *
      * The vectors are decoded to +/- 1 (not 0, 1) */
-    size_t sa_code_size() const override;
     void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
     void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;

data/vendor/faiss/faiss/IndexNNDescent.cpp CHANGED Viewed

@@ -167,9 +167,7 @@ void IndexNNDescent::search(
                 float* simi = distances + i * k;
                 dis->set_query(x + i * d);
-                maxheap_heapify(k, simi, idxi);
                 nndescent.search(*dis, k, idxi, simi, vt);
-                maxheap_reorder(k, simi, idxi);
             }
         }
         InterruptCallback::check();

data/vendor/faiss/faiss/IndexNSG.cpp CHANGED Viewed

@@ -104,9 +104,7 @@ void IndexNSG::search(
                 float* simi = distances + i * k;
                 dis->set_query(x + i * d);
-                maxheap_heapify(k, simi, idxi);
                 nsg.search(*dis, k, idxi, simi, vt);
-                maxheap_reorder(k, simi, idxi);
                 vt.advance();
             }

data/vendor/faiss/faiss/IndexPQ.cpp CHANGED Viewed

@@ -28,12 +28,13 @@ namespace faiss {
  ********************************************************/
 IndexPQ::IndexPQ(int d, size_t M, size_t nbits, MetricType metric)
-        : Index(d, metric), pq(d, M, nbits) {
+        : IndexFlatCodes(0, d, metric), pq(d, M, nbits) {
     is_trained = false;
     do_polysemous_training = false;
     polysemous_ht = nbits * M + 1;
     search_type = ST_PQ;
     encode_signs = false;
+    code_size = pq.code_size;
 }
 IndexPQ::IndexPQ() {
@@ -69,53 +70,6 @@ void IndexPQ::train(idx_t n, const float* x) {
     is_trained = true;
 }
-void IndexPQ::add(idx_t n, const float* x) {
-    FAISS_THROW_IF_NOT(is_trained);
-    codes.resize((n + ntotal) * pq.code_size);
-    pq.compute_codes(x, &codes[ntotal * pq.code_size], n);
-    ntotal += n;
-}
-size_t IndexPQ::remove_ids(const IDSelector& sel) {
-    idx_t j = 0;
-    for (idx_t i = 0; i < ntotal; i++) {
-        if (sel.is_member(i)) {
-            // should be removed
-        } else {
-            if (i > j) {
-                memmove(&codes[pq.code_size * j],
-                        &codes[pq.code_size * i],
-                        pq.code_size);
-            }
-            j++;
-        }
-    }
-    size_t nremove = ntotal - j;
-    if (nremove > 0) {
-        ntotal = j;
-        codes.resize(ntotal * pq.code_size);
-    }
-    return nremove;
-}
-void IndexPQ::reset() {
-    codes.clear();
-    ntotal = 0;
-}
-void IndexPQ::reconstruct_n(idx_t i0, idx_t ni, float* recons) const {
-    FAISS_THROW_IF_NOT(ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
-    for (idx_t i = 0; i < ni; i++) {
-        const uint8_t* code = &codes[(i0 + i) * pq.code_size];
-        pq.decode(code, recons + i * d);
-    }
-}
-void IndexPQ::reconstruct(idx_t key, float* recons) const {
-    FAISS_THROW_IF_NOT(key >= 0 && key < ntotal);
-    pq.decode(&codes[key * pq.code_size], recons);
-}
 namespace {
 template <class PQDecoder>
@@ -457,9 +411,6 @@ void IndexPQ::search_core_polysemous(
 }
 /* The standalone codec interface (just remaps to the PQ functions) */
-size_t IndexPQ::sa_code_size() const {
-    return pq.code_size;
-}
 void IndexPQ::sa_encode(idx_t n, const float* x, uint8_t* bytes) const {
     pq.compute_codes(x, bytes, n);

data/vendor/faiss/faiss/IndexPQ.h CHANGED Viewed

@@ -12,7 +12,7 @@
 #include <vector>
-#include <faiss/Index.h>
+#include <faiss/IndexFlatCodes.h>
 #include <faiss/impl/PolysemousTraining.h>
 #include <faiss/impl/ProductQuantizer.h>
 #include <faiss/impl/platform_macros.h>
@@ -21,13 +21,10 @@ namespace faiss {
 /** Index based on a product quantizer. Stored vectors are
  * approximated by PQ codes. */
-struct IndexPQ : Index {
+struct IndexPQ : IndexFlatCodes {
     /// The product quantizer used to encode the vectors
     ProductQuantizer pq;
-    /// Codes. Size ntotal * pq.code_size
-    std::vector<uint8_t> codes;
     /** Constructor.
      *
      * @param d      dimensionality of the input vectors
@@ -43,8 +40,6 @@ struct IndexPQ : Index {
     void train(idx_t n, const float* x) override;
-    void add(idx_t n, const float* x) override;
     void search(
             idx_t n,
             const float* x,
@@ -52,17 +47,7 @@ struct IndexPQ : Index {
             float* distances,
             idx_t* labels) const override;
-    void reset() override;
-    void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
-    void reconstruct(idx_t key, float* recons) const override;
-    size_t remove_ids(const IDSelector& sel) override;
     /* The standalone codec interface */
-    size_t sa_code_size() const override;
     void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
     void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;

data/vendor/faiss/faiss/IndexRefine.cpp CHANGED Viewed

@@ -155,6 +155,34 @@ void IndexRefine::reconstruct(idx_t key, float* recons) const {
     refine_index->reconstruct(key, recons);
 }
+size_t IndexRefine::sa_code_size() const {
+    return base_index->sa_code_size() + refine_index->sa_code_size();
+}
+void IndexRefine::sa_encode(idx_t n, const float* x, uint8_t* bytes) const {
+    size_t cs1 = base_index->sa_code_size(), cs2 = refine_index->sa_code_size();
+    std::unique_ptr<uint8_t[]> tmp1(new uint8_t[n * cs1]);
+    base_index->sa_encode(n, x, tmp1.get());
+    std::unique_ptr<uint8_t[]> tmp2(new uint8_t[n * cs2]);
+    refine_index->sa_encode(n, x, tmp2.get());
+    for (size_t i = 0; i < n; i++) {
+        uint8_t* b = bytes + i * (cs1 + cs2);
+        memcpy(b, tmp1.get() + cs1 * i, cs1);
+        memcpy(b + cs1, tmp2.get() + cs2 * i, cs2);
+    }
+}
+void IndexRefine::sa_decode(idx_t n, const uint8_t* bytes, float* x) const {
+    size_t cs1 = base_index->sa_code_size(), cs2 = refine_index->sa_code_size();
+    std::unique_ptr<uint8_t[]> tmp2(
+            new uint8_t[n * refine_index->sa_code_size()]);
+    for (size_t i = 0; i < n; i++) {
+        memcpy(tmp2.get() + i * cs2, bytes + i * (cs1 + cs2), cs2);
+    }
+    refine_index->sa_decode(n, tmp2.get(), x);
+}
 IndexRefine::~IndexRefine() {
     if (own_fields)
         delete base_index;

data/vendor/faiss/faiss/IndexRefine.h CHANGED Viewed

@@ -49,6 +49,16 @@ struct IndexRefine : Index {
     // reconstruct is routed to the refine_index
     void reconstruct(idx_t key, float* recons) const override;
+    /* standalone codec interface: the base_index codes are interleaved with the
+     * refine_index ones */
+    size_t sa_code_size() const override;
+    void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
+    /// The sa_decode decodes from the index_refine, which is assumed to be more
+    /// accurate
+    void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
     ~IndexRefine() override;
 };

data/vendor/faiss/faiss/IndexScalarQuantizer.cpp CHANGED Viewed

@@ -29,7 +29,7 @@ IndexScalarQuantizer::IndexScalarQuantizer(
         int d,
         ScalarQuantizer::QuantizerType qtype,
         MetricType metric)
-        : Index(d, metric), sq(d, qtype) {
+        : IndexFlatCodes(0, d, metric), sq(d, qtype) {
     is_trained = qtype == ScalarQuantizer::QT_fp16 ||
             qtype == ScalarQuantizer::QT_8bit_direct;
     code_size = sq.code_size;
@@ -43,13 +43,6 @@ void IndexScalarQuantizer::train(idx_t n, const float* x) {
     is_trained = true;
 }
-void IndexScalarQuantizer::add(idx_t n, const float* x) {
-    FAISS_THROW_IF_NOT(is_trained);
-    codes.resize((n + ntotal) * code_size);
-    sq.compute_codes(x, &codes[ntotal * code_size], n);
-    ntotal += n;
-}
 void IndexScalarQuantizer::search(
         idx_t n,
         const float* x,
@@ -67,6 +60,7 @@ void IndexScalarQuantizer::search(
         InvertedListScanner* scanner =
                 sq.select_InvertedListScanner(metric_type, nullptr, true);
         ScopeDeleter1<InvertedListScanner> del(scanner);
+        scanner->list_no = 0; // directly the list number
 #pragma omp for
         for (idx_t i = 0; i < n; i++) {
@@ -99,27 +93,7 @@ DistanceComputer* IndexScalarQuantizer::get_distance_computer() const {
     return dc;
 }
-void IndexScalarQuantizer::reset() {
-    codes.clear();
-    ntotal = 0;
-}
-void IndexScalarQuantizer::reconstruct_n(idx_t i0, idx_t ni, float* recons)
-        const {
-    std::unique_ptr<ScalarQuantizer::Quantizer> squant(sq.select_quantizer());
-    for (size_t i = 0; i < ni; i++) {
-        squant->decode_vector(&codes[(i + i0) * code_size], recons + i * d);
-    }
-}
-void IndexScalarQuantizer::reconstruct(idx_t key, float* recons) const {
-    reconstruct_n(key, 1, recons);
-}
 /* Codec interface */
-size_t IndexScalarQuantizer::sa_code_size() const {
-    return sq.code_size;
-}
 void IndexScalarQuantizer::sa_encode(idx_t n, const float* x, uint8_t* bytes)
         const {

data/vendor/faiss/faiss/IndexScalarQuantizer.h CHANGED Viewed

@@ -13,6 +13,7 @@
 #include <stdint.h>
 #include <vector>
+#include <faiss/IndexFlatCodes.h>
 #include <faiss/IndexIVF.h>
 #include <faiss/impl/ScalarQuantizer.h>
@@ -24,15 +25,10 @@ namespace faiss {
  * (default).
  */
-struct IndexScalarQuantizer : Index {
+struct IndexScalarQuantizer : IndexFlatCodes {
     /// Used to encode the vectors
     ScalarQuantizer sq;
-    /// Codes. Size ntotal * pq.code_size
-    std::vector<uint8_t> codes;
-    size_t code_size;
     /** Constructor.
      *
      * @param d      dimensionality of the input vectors
@@ -48,8 +44,6 @@ struct IndexScalarQuantizer : Index {
     void train(idx_t n, const float* x) override;
-    void add(idx_t n, const float* x) override;
     void search(
             idx_t n,
             const float* x,
@@ -57,17 +51,9 @@ struct IndexScalarQuantizer : Index {
             float* distances,
             idx_t* labels) const override;
-    void reset() override;
-    void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
-    void reconstruct(idx_t key, float* recons) const override;
     DistanceComputer* get_distance_computer() const override;
     /* standalone codec interface */
-    size_t sa_code_size() const override;
     void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
     void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;