RubyGems - faiss - Versions diffs - 0.2.0 → 0.2.4 - Mend

faiss 0.2.0 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (215) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +16 -0
data/LICENSE.txt +1 -1
data/README.md +7 -7
data/ext/faiss/extconf.rb +6 -3
data/ext/faiss/numo.hpp +4 -4
data/ext/faiss/utils.cpp +1 -1
data/ext/faiss/utils.h +1 -1
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +292 -291
data/vendor/faiss/faiss/AutoTune.h +55 -56
data/vendor/faiss/faiss/Clustering.cpp +365 -194
data/vendor/faiss/faiss/Clustering.h +102 -35
data/vendor/faiss/faiss/IVFlib.cpp +171 -195
data/vendor/faiss/faiss/IVFlib.h +48 -51
data/vendor/faiss/faiss/Index.cpp +85 -103
data/vendor/faiss/faiss/Index.h +54 -48
data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
data/vendor/faiss/faiss/Index2Layer.h +22 -36
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
data/vendor/faiss/faiss/IndexBinary.h +140 -132
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
data/vendor/faiss/faiss/IndexFlat.h +42 -59
data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
data/vendor/faiss/faiss/IndexHNSW.h +57 -41
data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
data/vendor/faiss/faiss/IndexIVF.h +169 -118
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
data/vendor/faiss/faiss/IndexLSH.h +20 -38
data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
data/vendor/faiss/faiss/IndexLattice.h +11 -16
data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
data/vendor/faiss/faiss/IndexNSG.h +85 -0
data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
data/vendor/faiss/faiss/IndexPQ.h +64 -82
data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
data/vendor/faiss/faiss/IndexRefine.h +32 -23
data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
data/vendor/faiss/faiss/IndexReplicas.h +62 -56
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
data/vendor/faiss/faiss/IndexShards.cpp +256 -240
data/vendor/faiss/faiss/IndexShards.h +85 -73
data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
data/vendor/faiss/faiss/MatrixStats.h +7 -10
data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
data/vendor/faiss/faiss/MetaIndexes.h +40 -34
data/vendor/faiss/faiss/MetricType.h +7 -7
data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
data/vendor/faiss/faiss/VectorTransform.h +64 -89
data/vendor/faiss/faiss/clone_index.cpp +78 -73
data/vendor/faiss/faiss/clone_index.h +4 -9
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
data/vendor/faiss/faiss/impl/FaissException.h +41 -29
data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
data/vendor/faiss/faiss/impl/HNSW.h +179 -200
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
data/vendor/faiss/faiss/impl/NSG.h +199 -0
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
data/vendor/faiss/faiss/impl/io.cpp +76 -95
data/vendor/faiss/faiss/impl/io.h +31 -41
data/vendor/faiss/faiss/impl/io_macros.h +60 -29
data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
data/vendor/faiss/faiss/index_factory.cpp +619 -397
data/vendor/faiss/faiss/index_factory.h +8 -6
data/vendor/faiss/faiss/index_io.h +23 -26
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
data/vendor/faiss/faiss/utils/Heap.h +186 -209
data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
data/vendor/faiss/faiss/utils/distances.cpp +305 -312
data/vendor/faiss/faiss/utils/distances.h +170 -122
data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
data/vendor/faiss/faiss/utils/hamming.h +62 -85
data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
data/vendor/faiss/faiss/utils/partitioning.h +26 -21
data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
data/vendor/faiss/faiss/utils/random.cpp +39 -63
data/vendor/faiss/faiss/utils/random.h +13 -16
data/vendor/faiss/faiss/utils/simdlib.h +4 -2
data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
data/vendor/faiss/faiss/utils/utils.cpp +304 -287
data/vendor/faiss/faiss/utils/utils.h +54 -49
metadata +29 -4

data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp CHANGED Viewed

@@ -7,43 +7,48 @@
 // -*- c++ -*-
 #include <faiss/IndexIVFSpectralHash.h>
-#include <memory>
-#include <algorithm>
 #include <stdint.h>
+#include <algorithm>
+#include <memory>
+#include <faiss/IndexLSH.h>
+#include <faiss/IndexPreTransform.h>
+#include <faiss/VectorTransform.h>
+#include <faiss/impl/AuxIndexStructures.h>
+#include <faiss/impl/FaissAssert.h>
 #include <faiss/utils/hamming.h>
 #include <faiss/utils/utils.h>
-#include <faiss/impl/FaissAssert.h>
-#include <faiss/impl/AuxIndexStructures.h>
-#include <faiss/VectorTransform.h>
 namespace faiss {
-IndexIVFSpectralHash::IndexIVFSpectralHash (
-        Index * quantizer, size_t d, size_t nlist,
-        int nbit, float period):
-    IndexIVF (quantizer, d, nlist, (nbit + 7) / 8, METRIC_L2),
-    nbit (nbit), period (period), threshold_type (Thresh_global)
-{
-    FAISS_THROW_IF_NOT (code_size % 4 == 0);
-    RandomRotationMatrix *rr = new RandomRotationMatrix (d, nbit);
-    rr->init (1234);
+IndexIVFSpectralHash::IndexIVFSpectralHash(
+        Index* quantizer,
+        size_t d,
+        size_t nlist,
+        int nbit,
+        float period)
+        : IndexIVF(quantizer, d, nlist, (nbit + 7) / 8, METRIC_L2),
+          nbit(nbit),
+          period(period),
+          threshold_type(Thresh_global) {
+    RandomRotationMatrix* rr = new RandomRotationMatrix(d, nbit);
+    rr->init(1234);
     vt = rr;
     own_fields = true;
     is_trained = false;
 }
-IndexIVFSpectralHash::IndexIVFSpectralHash():
-    IndexIVF(), vt(nullptr), own_fields(false),
-    nbit(0), period(0), threshold_type(Thresh_global)
-{}
+IndexIVFSpectralHash::IndexIVFSpectralHash()
+        : IndexIVF(),
+          vt(nullptr),
+          own_fields(false),
+          nbit(0),
+          period(0),
+          threshold_type(Thresh_global) {}
-IndexIVFSpectralHash::~IndexIVFSpectralHash ()
-{
+IndexIVFSpectralHash::~IndexIVFSpectralHash() {
     if (own_fields) {
         delete vt;
     }
@@ -51,35 +56,33 @@ IndexIVFSpectralHash::~IndexIVFSpectralHash ()
 namespace {
-float median (size_t n, float *x) {
+float median(size_t n, float* x) {
     std::sort(x, x + n);
     if (n % 2 == 1) {
-        return x [n / 2];
+        return x[n / 2];
     } else {
-        return (x [n / 2 - 1] + x [n / 2]) / 2;
+        return (x[n / 2 - 1] + x[n / 2]) / 2;
     }
 }
-}
+} // namespace
-void IndexIVFSpectralHash::train_residual (idx_t n, const float *x)
-{
+void IndexIVFSpectralHash::train_residual(idx_t n, const float* x) {
     if (!vt->is_trained) {
-        vt->train (n, x);
+        vt->train(n, x);
     }
     if (threshold_type == Thresh_global) {
         // nothing to do
         return;
-    } else if (threshold_type == Thresh_centroid ||
-        threshold_type == Thresh_centroid_half) {
+    } else if (
+            threshold_type == Thresh_centroid ||
+            threshold_type == Thresh_centroid_half) {
         // convert all centroids with vt
-        std::vector<float> centroids (nlist * d);
-        quantizer->reconstruct_n (0, nlist, centroids.data());
+        std::vector<float> centroids(nlist * d);
+        quantizer->reconstruct_n(0, nlist, centroids.data());
         trained.resize(nlist * nbit);
-        vt->apply_noalloc (nlist, centroids.data(), trained.data());
+        vt->apply_noalloc(nlist, centroids.data(), trained.data());
         if (threshold_type == Thresh_centroid_half) {
             for (size_t i = 0; i < nlist * nbit; i++) {
                 trained[i] -= 0.25 * period;
@@ -90,12 +93,12 @@ void IndexIVFSpectralHash::train_residual (idx_t n, const float *x)
     // otherwise train medians
     // assign
-    std::unique_ptr<idx_t []> idx (new idx_t [n]);
-    quantizer->assign (n, x, idx.get());
+    std::unique_ptr<idx_t[]> idx(new idx_t[n]);
+    quantizer->assign(n, x, idx.get());
     std::vector<size_t> sizes(nlist + 1);
     for (size_t i = 0; i < n; i++) {
-        FAISS_THROW_IF_NOT (idx[i] >= 0);
+        FAISS_THROW_IF_NOT(idx[i] >= 0);
         sizes[idx[i]]++;
     }
@@ -107,10 +110,10 @@ void IndexIVFSpectralHash::train_residual (idx_t n, const float *x)
     }
     // transform
-    std::unique_ptr<float []> xt (vt->apply (n, x));
+    std::unique_ptr<float[]> xt(vt->apply(n, x));
     // transpose + reorder
-    std::unique_ptr<float []> xo (new float[n * nbit]);
+    std::unique_ptr<float[]> xo(new float[n * nbit]);
     for (size_t i = 0; i < n; i++) {
         size_t idest = sizes[idx[i]]++;
@@ -119,14 +122,14 @@ void IndexIVFSpectralHash::train_residual (idx_t n, const float *x)
         }
     }
-    trained.resize (n * nbit);
+    trained.resize(n * nbit);
     // compute medians
 #pragma omp for
     for (int i = 0; i < nlist; i++) {
         size_t i0 = i == 0 ? 0 : sizes[i - 1];
         size_t i1 = sizes[i];
         for (int j = 0; j < nbit; j++) {
-            float *xoi = xo.get() + i0 + n * j;
+            float* xoi = xo.get() + i0 + n * j;
             if (i0 == i1) { // nothing to train
                 trained[i * nbit + j] = 0.0;
             } else if (i1 == i0 + 1) {
@@ -138,75 +141,71 @@ void IndexIVFSpectralHash::train_residual (idx_t n, const float *x)
     }
 }
 namespace {
-void binarize_with_freq(size_t nbit, float freq,
-                        const float *x, const float *c,
-                        uint8_t *codes)
-{
-    memset (codes, 0, (nbit + 7) / 8);
+void binarize_with_freq(
+        size_t nbit,
+        float freq,
+        const float* x,
+        const float* c,
+        uint8_t* codes) {
+    memset(codes, 0, (nbit + 7) / 8);
     for (size_t i = 0; i < nbit; i++) {
         float xf = (x[i] - c[i]);
-        int xi = int(floor(xf * freq));
-        int bit = xi & 1;
+        int64_t xi = int64_t(floor(xf * freq));
+        int64_t bit = xi & 1;
         codes[i >> 3] |= bit << (i & 7);
     }
 }
+}; // namespace
-};
-void IndexIVFSpectralHash::encode_vectors(idx_t n, const float* x_in,
-                                          const idx_t *list_nos,
-                                          uint8_t * codes,
-                                          bool include_listnos) const
-{
-    FAISS_THROW_IF_NOT (is_trained);
+void IndexIVFSpectralHash::encode_vectors(
+        idx_t n,
+        const float* x_in,
+        const idx_t* list_nos,
+        uint8_t* codes,
+        bool include_listnos) const {
+    FAISS_THROW_IF_NOT(is_trained);
     float freq = 2.0 / period;
-    FAISS_THROW_IF_NOT_MSG (!include_listnos, "listnos encoding not supported");
+    size_t coarse_size = include_listnos ? coarse_code_size() : 0;
     // transform with vt
-    std::unique_ptr<float []> x (vt->apply (n, x_in));
+    std::unique_ptr<float[]> x(vt->apply(n, x_in));
-#pragma omp parallel
-    {
-        std::vector<float> zero (nbit);
+    std::vector<float> zero(nbit);
-        // each thread takes care of a subset of lists
 #pragma omp for
-        for (idx_t i = 0; i < n; i++) {
-            int64_t list_no = list_nos [i];
-            if (list_no >= 0) {
-                const float *c;
-                if (threshold_type == Thresh_global) {
-                    c = zero.data();
-                } else {
-                    c = trained.data() + list_no * nbit;
-                }
-                binarize_with_freq (nbit, freq,
-                                    x.get() + i * nbit, c,
-                                    codes + i * code_size) ;
+    for (idx_t i = 0; i < n; i++) {
+        int64_t list_no = list_nos[i];
+        uint8_t* code = codes + i * (code_size + coarse_size);
+        if (list_no >= 0) {
+            if (coarse_size) {
+                encode_listno(list_no, code);
+            }
+            const float* c;
+            if (threshold_type == Thresh_global) {
+                c = zero.data();
+            } else {
+                c = trained.data() + list_no * nbit;
             }
+            binarize_with_freq(
+                    nbit, freq, x.get() + i * nbit, c, code + coarse_size);
+        } else {
+            memset(code, 0, code_size + coarse_size);
         }
     }
 }
 namespace {
-template<class HammingComputer>
-struct IVFScanner: InvertedListScanner {
+template <class HammingComputer>
+struct IVFScanner : InvertedListScanner {
     // copied from index structure
-    const IndexIVFSpectralHash *index;
-    size_t code_size;
+    const IndexIVFSpectralHash* index;
     size_t nbit;
-    bool store_pairs;
     float period, freq;
     std::vector<float> q;
@@ -216,61 +215,57 @@ struct IVFScanner: InvertedListScanner {
     using idx_t = Index::idx_t;
-    IVFScanner (const IndexIVFSpectralHash * index,
-                bool store_pairs):
-        index (index),
-        code_size(index->code_size),
-        nbit(index->nbit),
-        store_pairs(store_pairs),
-        period(index->period), freq(2.0 / index->period),
-        q(nbit), zero(nbit), qcode(code_size),
-        hc(qcode.data(), code_size)
-    {
+    IVFScanner(const IndexIVFSpectralHash* index, bool store_pairs)
+            : index(index),
+              nbit(index->nbit),
+              period(index->period),
+              freq(2.0 / index->period),
+              q(nbit),
+              zero(nbit),
+              qcode(index->code_size),
+              hc(qcode.data(), index->code_size) {
+        this->store_pairs = store_pairs;
+        this->code_size = index->code_size;
     }
-    void set_query (const float *query) override {
+    void set_query(const float* query) override {
         FAISS_THROW_IF_NOT(query);
         FAISS_THROW_IF_NOT(q.size() == nbit);
-        index->vt->apply_noalloc (1, query, q.data());
+        index->vt->apply_noalloc(1, query, q.data());
-        if (index->threshold_type ==
-            IndexIVFSpectralHash::Thresh_global) {
-            binarize_with_freq
-                (nbit, freq, q.data(), zero.data(), qcode.data());
-            hc.set (qcode.data(), code_size);
+        if (index->threshold_type == IndexIVFSpectralHash::Thresh_global) {
+            binarize_with_freq(nbit, freq, q.data(), zero.data(), qcode.data());
+            hc.set(qcode.data(), code_size);
         }
     }
-    idx_t list_no;
-    void set_list (idx_t list_no, float /*coarse_dis*/) override {
+    void set_list(idx_t list_no, float /*coarse_dis*/) override {
         this->list_no = list_no;
         if (index->threshold_type != IndexIVFSpectralHash::Thresh_global) {
-            const float *c = index->trained.data() + list_no * nbit;
-            binarize_with_freq (nbit, freq, q.data(), c, qcode.data());
-            hc.set (qcode.data(), code_size);
+            const float* c = index->trained.data() + list_no * nbit;
+            binarize_with_freq(nbit, freq, q.data(), c, qcode.data());
+            hc.set(qcode.data(), code_size);
         }
     }
-    float distance_to_code (const uint8_t *code) const final {
-        return hc.hamming (code);
+    float distance_to_code(const uint8_t* code) const final {
+        return hc.hamming(code);
     }
-    size_t scan_codes (size_t list_size,
-                       const uint8_t *codes,
-                       const idx_t *ids,
-                       float *simi, idx_t *idxi,
-                       size_t k) const override
-    {
+    size_t scan_codes(
+            size_t list_size,
+            const uint8_t* codes,
+            const idx_t* ids,
+            float* simi,
+            idx_t* idxi,
+            size_t k) const override {
         size_t nup = 0;
         for (size_t j = 0; j < list_size; j++) {
+            float dis = hc.hamming(codes);
-            float dis = hc.hamming (codes);
-            if (dis < simi [0]) {
-                int64_t id = store_pairs ? lo_build (list_no, j) : ids[j];
-                maxheap_replace_top (k, simi, idxi, dis, id);
+            if (dis < simi[0]) {
+                int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
+                maxheap_replace_top(k, simi, idxi, dis, id);
                 nup++;
             }
             codes += code_size;
@@ -278,34 +273,31 @@ struct IVFScanner: InvertedListScanner {
         return nup;
     }
-    void scan_codes_range (size_t list_size,
-                           const uint8_t *codes,
-                           const idx_t *ids,
-                           float radius,
-                           RangeQueryResult & res) const override
-    {
+    void scan_codes_range(
+            size_t list_size,
+            const uint8_t* codes,
+            const idx_t* ids,
+            float radius,
+            RangeQueryResult& res) const override {
         for (size_t j = 0; j < list_size; j++) {
-            float dis = hc.hamming (codes);
+            float dis = hc.hamming(codes);
             if (dis < radius) {
-                int64_t id = store_pairs ? lo_build (list_no, j) : ids[j];
-                res.add (dis, id);
+                int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
+                res.add(dis, id);
             }
             codes += code_size;
         }
     }
 };
 } // anonymous namespace
-InvertedListScanner* IndexIVFSpectralHash::get_InvertedListScanner
-    (bool store_pairs) const
-{
+InvertedListScanner* IndexIVFSpectralHash::get_InvertedListScanner(
+        bool store_pairs) const {
     switch (code_size) {
 #define HANDLE_CODE_SIZE(cs) \
-    case cs: \
-        return new IVFScanner<HammingComputer ## cs> (this, store_pairs)
+    case cs:                 \
+        return new IVFScanner<HammingComputer##cs>(this, store_pairs)
         HANDLE_CODE_SIZE(4);
         HANDLE_CODE_SIZE(8);
         HANDLE_CODE_SIZE(16);
@@ -314,17 +306,38 @@ InvertedListScanner* IndexIVFSpectralHash::get_InvertedListScanner
         HANDLE_CODE_SIZE(64);
 #undef HANDLE_CODE_SIZE
         default:
-            if (code_size % 8 == 0) {
-                return new IVFScanner<HammingComputerM8>(this, store_pairs);
-            } else if (code_size % 4 == 0) {
-                return new IVFScanner<HammingComputerM4>(this, store_pairs);
-            } else {
-                FAISS_THROW_MSG("not supported");
-            }
+            return new IVFScanner<HammingComputerDefault>(this, store_pairs);
     }
 }
+void IndexIVFSpectralHash::replace_vt(VectorTransform* vt_in, bool own) {
+    FAISS_THROW_IF_NOT(vt_in->d_out == nbit);
+    FAISS_THROW_IF_NOT(vt_in->d_in == d);
+    if (own_fields) {
+        delete vt;
+    }
+    vt = vt_in;
+    threshold_type = Thresh_global;
+    is_trained = quantizer->is_trained && quantizer->ntotal == nlist &&
+            vt->is_trained;
+    own_fields = own;
+}
+/*
+    Check that the encoder is a single vector transform followed by a LSH
+    that just does thresholding.
+    If this is not the case, the linear transform + threhsolds of the IndexLSH
+    should be merged into the VectorTransform (which is feasible).
+*/
+void IndexIVFSpectralHash::replace_vt(IndexPreTransform* encoder, bool own) {
+    FAISS_THROW_IF_NOT(encoder->chain.size() == 1);
+    auto sub_index = dynamic_cast<IndexLSH*>(encoder->index);
+    FAISS_THROW_IF_NOT_MSG(sub_index, "final index should be LSH");
+    FAISS_THROW_IF_NOT(sub_index->nbits == nbit);
+    FAISS_THROW_IF_NOT(!sub_index->rotate_data);
+    FAISS_THROW_IF_NOT(!sub_index->train_thresholds);
+    replace_vt(encoder->chain[0], own);
+}
-}  // namespace faiss
+} // namespace faiss

data/vendor/faiss/faiss/IndexIVFSpectralHash.h CHANGED Viewed

@@ -10,15 +10,14 @@
 #ifndef FAISS_INDEX_IVFSH_H
 #define FAISS_INDEX_IVFSH_H
 #include <vector>
 #include <faiss/IndexIVF.h>
 namespace faiss {
 struct VectorTransform;
+struct IndexPreTransform;
 /** Inverted list that stores binary codes of size nbit. Before the
  * binary conversion, the dimension of the vectors is transformed from
@@ -27,49 +26,63 @@ struct VectorTransform;
  * Each coordinate is subtracted from a value determined by
  * threshold_type, and split into intervals of size period. Half of
  * the interval is a 0 bit, the other half a 1.
+ *
  */
-struct IndexIVFSpectralHash: IndexIVF {
-    VectorTransform *vt; // transformation from d to nbit dim
+struct IndexIVFSpectralHash : IndexIVF {
+    /// transformation from d to nbit dim
+    VectorTransform* vt;
+    /// own the vt
     bool own_fields;
+    /// nb of bits of the binary signature
     int nbit;
+    /// interval size for 0s and 1s
     float period;
     enum ThresholdType {
-        Thresh_global,
-        Thresh_centroid,
-        Thresh_centroid_half,
-        Thresh_median
+        Thresh_global,        ///< global threshold at 0
+        Thresh_centroid,      ///< compare to centroid
+        Thresh_centroid_half, ///< central interval around centroid
+        Thresh_median         ///< median of training set
     };
     ThresholdType threshold_type;
-    // size nlist * nbit or 0 if Thresh_global
+    /// Trained threshold.
+    /// size nlist * nbit or 0 if Thresh_global
     std::vector<float> trained;
-    IndexIVFSpectralHash (Index * quantizer, size_t d, size_t nlist,
-                          int nbit, float period);
+    IndexIVFSpectralHash(
+            Index* quantizer,
+            size_t d,
+            size_t nlist,
+            int nbit,
+            float period);
-    IndexIVFSpectralHash ();
+    IndexIVFSpectralHash();
     void train_residual(idx_t n, const float* x) override;
-    void encode_vectors(idx_t n, const float* x,
-                        const idx_t *list_nos,
-                        uint8_t * codes,
-                        bool include_listnos = false) const override;
+    void encode_vectors(
+            idx_t n,
+            const float* x,
+            const idx_t* list_nos,
+            uint8_t* codes,
+            bool include_listnos = false) const override;
-    InvertedListScanner *get_InvertedListScanner (bool store_pairs)
-        const override;
+    InvertedListScanner* get_InvertedListScanner(
+            bool store_pairs) const override;
-    ~IndexIVFSpectralHash () override;
+    /** replace the vector transform for an empty (and possibly untrained) index
+     */
+    void replace_vt(VectorTransform* vt, bool own = false);
-};
+    /** convenience function to get the VT from an index constucted by an
+     * index_factory (should end in "LSH") */
+    void replace_vt(IndexPreTransform* index, bool own = false);
+    ~IndexIVFSpectralHash() override;
+};
-}; // namespace faiss
+} // namespace faiss
 #endif