RubyGems - faiss - Versions diffs - 0.1.0 → 0.1.1 - Mend

faiss 0.1.0 → 0.1.1

Files changed (226) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/README.md +103 -3
data/ext/faiss/ext.cpp +99 -32
data/ext/faiss/extconf.rb +12 -2
data/lib/faiss/ext.bundle +0 -0
data/lib/faiss/index.rb +3 -3
data/lib/faiss/index_binary.rb +3 -3
data/lib/faiss/kmeans.rb +1 -1
data/lib/faiss/pca_matrix.rb +2 -2
data/lib/faiss/product_quantizer.rb +3 -3
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/AutoTune.cpp +719 -0
data/vendor/faiss/AutoTune.h +212 -0
data/vendor/faiss/Clustering.cpp +261 -0
data/vendor/faiss/Clustering.h +101 -0
data/vendor/faiss/IVFlib.cpp +339 -0
data/vendor/faiss/IVFlib.h +132 -0
data/vendor/faiss/Index.cpp +171 -0
data/vendor/faiss/Index.h +261 -0
data/vendor/faiss/Index2Layer.cpp +437 -0
data/vendor/faiss/Index2Layer.h +85 -0
data/vendor/faiss/IndexBinary.cpp +77 -0
data/vendor/faiss/IndexBinary.h +163 -0
data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
data/vendor/faiss/IndexBinaryFlat.h +54 -0
data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
data/vendor/faiss/IndexBinaryHNSW.h +56 -0
data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
data/vendor/faiss/IndexBinaryIVF.h +211 -0
data/vendor/faiss/IndexFlat.cpp +508 -0
data/vendor/faiss/IndexFlat.h +175 -0
data/vendor/faiss/IndexHNSW.cpp +1090 -0
data/vendor/faiss/IndexHNSW.h +170 -0
data/vendor/faiss/IndexIVF.cpp +909 -0
data/vendor/faiss/IndexIVF.h +353 -0
data/vendor/faiss/IndexIVFFlat.cpp +502 -0
data/vendor/faiss/IndexIVFFlat.h +118 -0
data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
data/vendor/faiss/IndexIVFPQ.h +161 -0
data/vendor/faiss/IndexIVFPQR.cpp +219 -0
data/vendor/faiss/IndexIVFPQR.h +65 -0
data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
data/vendor/faiss/IndexLSH.cpp +225 -0
data/vendor/faiss/IndexLSH.h +87 -0
data/vendor/faiss/IndexLattice.cpp +143 -0
data/vendor/faiss/IndexLattice.h +68 -0
data/vendor/faiss/IndexPQ.cpp +1188 -0
data/vendor/faiss/IndexPQ.h +199 -0
data/vendor/faiss/IndexPreTransform.cpp +288 -0
data/vendor/faiss/IndexPreTransform.h +91 -0
data/vendor/faiss/IndexReplicas.cpp +123 -0
data/vendor/faiss/IndexReplicas.h +76 -0
data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
data/vendor/faiss/IndexScalarQuantizer.h +127 -0
data/vendor/faiss/IndexShards.cpp +317 -0
data/vendor/faiss/IndexShards.h +100 -0
data/vendor/faiss/InvertedLists.cpp +623 -0
data/vendor/faiss/InvertedLists.h +334 -0
data/vendor/faiss/LICENSE +21 -0
data/vendor/faiss/MatrixStats.cpp +252 -0
data/vendor/faiss/MatrixStats.h +62 -0
data/vendor/faiss/MetaIndexes.cpp +351 -0
data/vendor/faiss/MetaIndexes.h +126 -0
data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
data/vendor/faiss/OnDiskInvertedLists.h +127 -0
data/vendor/faiss/VectorTransform.cpp +1157 -0
data/vendor/faiss/VectorTransform.h +322 -0
data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
data/vendor/faiss/c_api/AutoTune_c.h +64 -0
data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
data/vendor/faiss/c_api/Clustering_c.h +117 -0
data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
data/vendor/faiss/c_api/IndexShards_c.h +42 -0
data/vendor/faiss/c_api/Index_c.cpp +105 -0
data/vendor/faiss/c_api/Index_c.h +183 -0
data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
data/vendor/faiss/c_api/clone_index_c.h +32 -0
data/vendor/faiss/c_api/error_c.h +42 -0
data/vendor/faiss/c_api/error_impl.cpp +27 -0
data/vendor/faiss/c_api/error_impl.h +16 -0
data/vendor/faiss/c_api/faiss_c.h +58 -0
data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
data/vendor/faiss/c_api/index_factory_c.h +30 -0
data/vendor/faiss/c_api/index_io_c.cpp +42 -0
data/vendor/faiss/c_api/index_io_c.h +50 -0
data/vendor/faiss/c_api/macros_impl.h +110 -0
data/vendor/faiss/clone_index.cpp +147 -0
data/vendor/faiss/clone_index.h +38 -0
data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
data/vendor/faiss/gpu/GpuCloner.h +82 -0
data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
data/vendor/faiss/gpu/GpuDistance.h +52 -0
data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
data/vendor/faiss/gpu/GpuIndex.h +148 -0
data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
data/vendor/faiss/gpu/GpuResources.cpp +52 -0
data/vendor/faiss/gpu/GpuResources.h +73 -0
data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
data/vendor/faiss/gpu/test/TestUtils.h +93 -0
data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
data/vendor/faiss/gpu/utils/Timer.h +52 -0
data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
data/vendor/faiss/impl/FaissAssert.h +95 -0
data/vendor/faiss/impl/FaissException.cpp +66 -0
data/vendor/faiss/impl/FaissException.h +71 -0
data/vendor/faiss/impl/HNSW.cpp +818 -0
data/vendor/faiss/impl/HNSW.h +275 -0
data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
data/vendor/faiss/impl/PolysemousTraining.h +158 -0
data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
data/vendor/faiss/impl/ProductQuantizer.h +242 -0
data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
data/vendor/faiss/impl/ThreadedIndex.h +80 -0
data/vendor/faiss/impl/index_read.cpp +793 -0
data/vendor/faiss/impl/index_write.cpp +558 -0
data/vendor/faiss/impl/io.cpp +142 -0
data/vendor/faiss/impl/io.h +98 -0
data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
data/vendor/faiss/impl/lattice_Zn.h +199 -0
data/vendor/faiss/index_factory.cpp +392 -0
data/vendor/faiss/index_factory.h +25 -0
data/vendor/faiss/index_io.h +75 -0
data/vendor/faiss/misc/test_blas.cpp +84 -0
data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
data/vendor/faiss/tests/test_merge.cpp +258 -0
data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
data/vendor/faiss/tests/test_params_override.cpp +231 -0
data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
data/vendor/faiss/utils/Heap.cpp +122 -0
data/vendor/faiss/utils/Heap.h +495 -0
data/vendor/faiss/utils/WorkerThread.cpp +126 -0
data/vendor/faiss/utils/WorkerThread.h +61 -0
data/vendor/faiss/utils/distances.cpp +765 -0
data/vendor/faiss/utils/distances.h +243 -0
data/vendor/faiss/utils/distances_simd.cpp +809 -0
data/vendor/faiss/utils/extra_distances.cpp +336 -0
data/vendor/faiss/utils/extra_distances.h +54 -0
data/vendor/faiss/utils/hamming-inl.h +472 -0
data/vendor/faiss/utils/hamming.cpp +792 -0
data/vendor/faiss/utils/hamming.h +220 -0
data/vendor/faiss/utils/random.cpp +192 -0
data/vendor/faiss/utils/random.h +60 -0
data/vendor/faiss/utils/utils.cpp +783 -0
data/vendor/faiss/utils/utils.h +181 -0
metadata +216 -2

data/vendor/faiss/utils/extra_distances.cpp ADDED Viewed

@@ -0,0 +1,336 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+// -*- c++ -*-
+#include <faiss/utils/distances.h>
+#include <cmath>
+#include <omp.h>
+#include <faiss/utils/utils.h>
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/impl/AuxIndexStructures.h>
+namespace faiss {
+/***************************************************************************
+ * Distance functions (other than L2 and IP)
+ ***************************************************************************/
+struct VectorDistanceL2 {
+    size_t d;
+    float operator () (const float *x, const float *y) const {
+        return fvec_L2sqr (x, y, d);
+    }
+};
+struct VectorDistanceL1 {
+    size_t d;
+    float operator () (const float *x, const float *y) const {
+        return fvec_L1 (x, y, d);
+    }
+};
+struct VectorDistanceLinf {
+    size_t d;
+    float operator () (const float *x, const float *y) const {
+        return fvec_Linf (x, y, d);
+        /*
+        float vmax = 0;
+        for (size_t i = 0; i < d; i++) {
+            float diff = fabs (x[i] - y[i]);
+            if (diff > vmax) vmax = diff;
+        }
+        return vmax;*/
+    }
+};
+struct VectorDistanceLp {
+    size_t d;
+    const float p;
+    float operator () (const float *x, const float *y) const {
+        float accu = 0;
+        for (size_t i = 0; i < d; i++) {
+            float diff = fabs (x[i] - y[i]);
+            accu += powf (diff, p);
+        }
+        return accu;
+    }
+};
+struct VectorDistanceCanberra {
+    size_t d;
+    float operator () (const float *x, const float *y) const {
+        float accu = 0;
+        for (size_t i = 0; i < d; i++) {
+            float xi = x[i], yi = y[i];
+            accu += fabs (xi - yi) / (fabs(xi) + fabs(yi));
+        }
+        return accu;
+    }
+};
+struct VectorDistanceBrayCurtis {
+    size_t d;
+    float operator () (const float *x, const float *y) const {
+        float accu_num = 0, accu_den = 0;
+        for (size_t i = 0; i < d; i++) {
+            float xi = x[i], yi = y[i];
+            accu_num += fabs (xi - yi);
+            accu_den += fabs (xi + yi);
+        }
+        return accu_num / accu_den;
+    }
+};
+struct VectorDistanceJensenShannon {
+    size_t d;
+    float operator () (const float *x, const float *y) const {
+        float accu = 0;
+        for (size_t i = 0; i < d; i++) {
+            float xi = x[i], yi = y[i];
+            float mi = 0.5 * (xi + yi);
+            float kl1 = - xi * log(mi / xi);
+            float kl2 = - yi * log(mi / yi);
+            accu += kl1 + kl2;
+        }
+        return 0.5 * accu;
+    }
+};
+namespace {
+template<class VD>
+void pairwise_extra_distances_template (
+                     VD vd,
+                     int64_t nq, const float *xq,
+                     int64_t nb, const float *xb,
+                     float *dis,
+                     int64_t ldq, int64_t ldb, int64_t ldd)
+{
+#pragma omp parallel for if(nq > 10)
+    for (int64_t i = 0; i < nq; i++) {
+        const float *xqi = xq + i * ldq;
+        const float *xbj = xb;
+        float *disi = dis + ldd * i;
+        for (int64_t j = 0; j < nb; j++) {
+            disi[j] = vd (xqi, xbj);
+            xbj += ldb;
+        }
+    }
+}
+template<class VD>
+void knn_extra_metrics_template (
+        VD vd,
+        const float * x,
+        const float * y,
+        size_t nx, size_t ny,
+        float_maxheap_array_t * res)
+{
+    size_t k = res->k;
+    size_t d = vd.d;
+    size_t check_period = InterruptCallback::get_period_hint (ny * d);
+    check_period *= omp_get_max_threads();
+    for (size_t i0 = 0; i0 < nx; i0 += check_period) {
+        size_t i1 = std::min(i0 + check_period, nx);
+#pragma omp parallel for
+        for (size_t i = i0; i < i1; i++) {
+            const float * x_i = x + i * d;
+            const float * y_j = y;
+            size_t j;
+            float * simi = res->get_val(i);
+            int64_t * idxi = res->get_ids (i);
+            maxheap_heapify (k, simi, idxi);
+            for (j = 0; j < ny; j++) {
+                float disij = vd (x_i, y_j);
+                if (disij < simi[0]) {
+                    maxheap_pop (k, simi, idxi);
+                    maxheap_push (k, simi, idxi, disij, j);
+                }
+                y_j += d;
+            }
+            maxheap_reorder (k, simi, idxi);
+        }
+        InterruptCallback::check ();
+    }
+}
+template<class VD>
+struct ExtraDistanceComputer : DistanceComputer {
+    VD vd;
+    Index::idx_t nb;
+    const float *q;
+    const float *b;
+    float operator () (idx_t i) override {
+        return vd (q, b + i * vd.d);
+    }
+    float symmetric_dis(idx_t i, idx_t j) override {
+        return vd (b + j * vd.d, b + i * vd.d);
+    }
+    ExtraDistanceComputer(const VD & vd, const float *xb,
+                          size_t nb, const float *q = nullptr)
+        : vd(vd), nb(nb), q(q), b(xb) {}
+    void set_query(const float *x) override {
+        q = x;
+    }
+};
+} // anonymous namespace
+void pairwise_extra_distances (
+                     int64_t d,
+                     int64_t nq, const float *xq,
+                     int64_t nb, const float *xb,
+                     MetricType mt, float metric_arg,
+                     float *dis,
+                     int64_t ldq, int64_t ldb, int64_t ldd)
+{
+    if (nq == 0 || nb == 0) return;
+    if (ldq == -1) ldq = d;
+    if (ldb == -1) ldb = d;
+    if (ldd == -1) ldd = nb;
+    switch(mt) {
+#define HANDLE_VAR(kw)                                          \
+     case METRIC_ ## kw: {                                      \
+        VectorDistance ## kw vd({(size_t)d});                   \
+        pairwise_extra_distances_template (vd, nq, xq, nb, xb,  \
+                                           dis, ldq, ldb, ldd); \
+        break;                                                  \
+    }
+        HANDLE_VAR(L2);
+        HANDLE_VAR(L1);
+        HANDLE_VAR(Linf);
+        HANDLE_VAR(Canberra);
+        HANDLE_VAR(BrayCurtis);
+        HANDLE_VAR(JensenShannon);
+#undef HANDLE_VAR
+    case METRIC_Lp: {
+        VectorDistanceLp vd({(size_t)d, metric_arg});
+        pairwise_extra_distances_template (vd, nq, xq, nb, xb,
+                                           dis, ldq, ldb, ldd);
+        break;
+    }
+    default:
+        FAISS_THROW_MSG ("metric type not implemented");
+    }
+}
+void knn_extra_metrics (
+        const float * x,
+        const float * y,
+        size_t d, size_t nx, size_t ny,
+        MetricType mt, float metric_arg,
+        float_maxheap_array_t * res)
+{
+    switch(mt) {
+#define HANDLE_VAR(kw)                                          \
+     case METRIC_ ## kw: {                                      \
+        VectorDistance ## kw vd({(size_t)d});                   \
+        knn_extra_metrics_template (vd, x, y, nx, ny, res);     \
+        break;                                                  \
+    }
+        HANDLE_VAR(L2);
+        HANDLE_VAR(L1);
+        HANDLE_VAR(Linf);
+        HANDLE_VAR(Canberra);
+        HANDLE_VAR(BrayCurtis);
+        HANDLE_VAR(JensenShannon);
+#undef HANDLE_VAR
+    case METRIC_Lp: {
+        VectorDistanceLp vd({(size_t)d, metric_arg});
+        knn_extra_metrics_template (vd, x, y, nx, ny, res);
+        break;
+    }
+    default:
+        FAISS_THROW_MSG ("metric type not implemented");
+    }
+}
+DistanceComputer *get_extra_distance_computer (
+        size_t d,
+        MetricType mt, float metric_arg,
+        size_t nb, const float *xb)
+{
+    switch(mt) {
+#define HANDLE_VAR(kw)                                                  \
+     case METRIC_ ## kw: {                                              \
+        VectorDistance ## kw vd({(size_t)d});                           \
+        return new ExtraDistanceComputer<VectorDistance ## kw>(vd, xb, nb); \
+    }
+        HANDLE_VAR(L2);
+        HANDLE_VAR(L1);
+        HANDLE_VAR(Linf);
+        HANDLE_VAR(Canberra);
+        HANDLE_VAR(BrayCurtis);
+        HANDLE_VAR(JensenShannon);
+#undef HANDLE_VAR
+    case METRIC_Lp: {
+        VectorDistanceLp vd({(size_t)d, metric_arg});
+        return new ExtraDistanceComputer<VectorDistanceLp> (vd, xb, nb);
+        break;
+    }
+    default:
+        FAISS_THROW_MSG ("metric type not implemented");
+    }
+}
+} // namespace faiss

data/vendor/faiss/utils/extra_distances.h ADDED Viewed

@@ -0,0 +1,54 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+// -*- c++ -*-
+#ifndef FAISS_distances_h
+#define FAISS_distances_h
+/** In this file are the implementations of extra metrics beyond L2
+ *  and inner product */
+#include <stdint.h>
+#include <faiss/Index.h>
+#include <faiss/utils/Heap.h>
+namespace faiss {
+void pairwise_extra_distances (
+                     int64_t d,
+                     int64_t nq, const float *xq,
+                     int64_t nb, const float *xb,
+                     MetricType mt, float metric_arg,
+                     float *dis,
+                     int64_t ldq = -1, int64_t ldb = -1, int64_t ldd = -1);
+void knn_extra_metrics (
+        const float * x,
+        const float * y,
+        size_t d, size_t nx, size_t ny,
+        MetricType mt, float metric_arg,
+        float_maxheap_array_t * res);
+/** get a DistanceComputer that refers to this type of distance and
+ *  indexes a flat array of size nb */
+DistanceComputer *get_extra_distance_computer (
+        size_t d,
+        MetricType mt, float metric_arg,
+        size_t nb, const float *xb);
+}
+#endif

data/vendor/faiss/utils/hamming-inl.h ADDED Viewed

@@ -0,0 +1,472 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+namespace faiss {
+inline BitstringWriter::BitstringWriter(uint8_t *code, int code_size):
+    code (code), code_size (code_size), i(0)
+{
+    bzero (code, code_size);
+}
+inline void BitstringWriter::write(uint64_t x, int nbit) {
+    assert (code_size * 8 >= nbit + i);
+    // nb of available bits in i / 8
+    int na = 8 - (i & 7);
+    if (nbit <= na) {
+        code[i >> 3] |= x << (i & 7);
+        i += nbit;
+        return;
+    } else {
+        int j = i >> 3;
+        code[j++] |= x << (i & 7);
+        i += nbit;
+        x >>= na;
+        while (x != 0) {
+            code[j++] |= x;
+            x >>= 8;
+        }
+    }
+}
+inline BitstringReader::BitstringReader(const uint8_t *code, int code_size):
+    code (code), code_size (code_size), i(0)
+{}
+inline uint64_t BitstringReader::read(int nbit) {
+    assert (code_size * 8 >= nbit + i);
+    // nb of available bits in i / 8
+    int na = 8 - (i & 7);
+    // get available bits in current byte
+    uint64_t res = code[i >> 3] >> (i & 7);
+    if (nbit <= na) {
+        res &= (1 << nbit) - 1;
+        i += nbit;
+        return res;
+    } else {
+        int ofs = na;
+        int j = (i >> 3) + 1;
+        i += nbit;
+        nbit -= na;
+        while (nbit > 8) {
+            res |= ((uint64_t)code[j++]) << ofs;
+            ofs += 8;
+            nbit -= 8; // TODO remove nbit
+        }
+        uint64_t last_byte = code[j];
+        last_byte &= (1 << nbit) - 1;
+        res |= last_byte << ofs;
+        return res;
+    }
+}
+/******************************************************************
+ * The HammingComputer series of classes compares a single code of
+ * size 4 to 32 to incoming codes. They are intended for use as a
+ * template class where it would be inefficient to switch on the code
+ * size in the inner loop. Hopefully the compiler will inline the
+ * hamming() functions and put the a0, a1, ... in registers.
+ ******************************************************************/
+struct HammingComputer4 {
+    uint32_t a0;
+    HammingComputer4 () {}
+    HammingComputer4 (const uint8_t *a, int code_size) {
+        set (a, code_size);
+    }
+    void set (const uint8_t *a, int code_size) {
+        assert (code_size == 4);
+        a0 = *(uint32_t *)a;
+    }
+    inline int hamming (const uint8_t *b) const {
+        return popcount64 (*(uint32_t *)b ^ a0);
+    }
+};
+struct HammingComputer8 {
+    uint64_t a0;
+    HammingComputer8 () {}
+    HammingComputer8 (const uint8_t *a, int code_size) {
+        set (a, code_size);
+    }
+    void set (const uint8_t *a, int code_size) {
+        assert (code_size == 8);
+        a0 = *(uint64_t *)a;
+    }
+    inline int hamming (const uint8_t *b) const {
+        return popcount64 (*(uint64_t *)b ^ a0);
+    }
+};
+struct HammingComputer16 {
+    uint64_t a0, a1;
+    HammingComputer16 () {}
+    HammingComputer16 (const uint8_t *a8, int code_size) {
+        set (a8, code_size);
+    }
+    void set (const uint8_t *a8, int code_size) {
+        assert (code_size == 16);
+        const uint64_t *a = (uint64_t *)a8;
+        a0 = a[0]; a1 = a[1];
+    }
+    inline int hamming (const uint8_t *b8) const {
+        const uint64_t *b = (uint64_t *)b8;
+        return popcount64 (b[0] ^ a0) + popcount64 (b[1] ^ a1);
+    }
+};
+// when applied to an array, 1/2 of the 64-bit accesses are unaligned.
+// This incurs a penalty of ~10% wrt. fully aligned accesses.
+struct HammingComputer20 {
+    uint64_t a0, a1;
+    uint32_t a2;
+    HammingComputer20 () {}
+    HammingComputer20 (const uint8_t *a8, int code_size) {
+        set (a8, code_size);
+    }
+    void set (const uint8_t *a8, int code_size) {
+        assert (code_size == 20);
+        const uint64_t *a = (uint64_t *)a8;
+        a0 = a[0]; a1 = a[1]; a2 = a[2];
+    }
+    inline int hamming (const uint8_t *b8) const {
+        const uint64_t *b = (uint64_t *)b8;
+        return popcount64 (b[0] ^ a0) + popcount64 (b[1] ^ a1) +
+            popcount64 (*(uint32_t*)(b + 2) ^ a2);
+    }
+};
+struct HammingComputer32 {
+    uint64_t a0, a1, a2, a3;
+    HammingComputer32 () {}
+    HammingComputer32 (const uint8_t *a8, int code_size) {
+        set (a8, code_size);
+    }
+    void set (const uint8_t *a8, int code_size) {
+        assert (code_size == 32);
+        const uint64_t *a = (uint64_t *)a8;
+        a0 = a[0]; a1 = a[1]; a2 = a[2]; a3 = a[3];
+    }
+    inline int hamming (const uint8_t *b8) const {
+        const uint64_t *b = (uint64_t *)b8;
+        return popcount64 (b[0] ^ a0) + popcount64 (b[1] ^ a1) +
+            popcount64 (b[2] ^ a2) + popcount64 (b[3] ^ a3);
+    }
+};
+struct HammingComputer64 {
+    uint64_t a0, a1, a2, a3, a4, a5, a6, a7;
+    HammingComputer64 () {}
+    HammingComputer64 (const uint8_t *a8, int code_size) {
+        set (a8, code_size);
+    }
+    void set (const uint8_t *a8, int code_size) {
+        assert (code_size == 64);
+        const uint64_t *a = (uint64_t *)a8;
+        a0 = a[0]; a1 = a[1]; a2 = a[2]; a3 = a[3];
+        a4 = a[4]; a5 = a[5]; a6 = a[6]; a7 = a[7];
+    }
+    inline int hamming (const uint8_t *b8) const {
+        const uint64_t *b = (uint64_t *)b8;
+        return popcount64 (b[0] ^ a0) + popcount64 (b[1] ^ a1) +
+            popcount64 (b[2] ^ a2) + popcount64 (b[3] ^ a3) +
+            popcount64 (b[4] ^ a4) + popcount64 (b[5] ^ a5) +
+            popcount64 (b[6] ^ a6) + popcount64 (b[7] ^ a7);
+    }
+};
+// very inefficient...
+struct HammingComputerDefault {
+    const uint8_t *a;
+    int n;
+    HammingComputerDefault () {}
+    HammingComputerDefault (const uint8_t *a8, int code_size) {
+        set (a8, code_size);
+    }
+    void set (const uint8_t *a8, int code_size) {
+        a =  a8;
+        n = code_size;
+    }
+    int hamming (const uint8_t *b8) const {
+        int accu = 0;
+        for (int i = 0; i < n; i++)
+            accu += popcount64 (a[i] ^ b8[i]);
+        return accu;
+    }
+};
+struct HammingComputerM8 {
+    const uint64_t *a;
+    int n;
+    HammingComputerM8 () {}
+    HammingComputerM8 (const uint8_t *a8, int code_size) {
+        set (a8, code_size);
+    }
+    void set (const uint8_t *a8, int code_size) {
+        assert (code_size % 8 == 0);
+        a =  (uint64_t *)a8;
+        n = code_size / 8;
+    }
+    int hamming (const uint8_t *b8) const {
+        const uint64_t *b = (uint64_t *)b8;
+        int accu = 0;
+        for (int i = 0; i < n; i++)
+            accu += popcount64 (a[i] ^ b[i]);
+        return accu;
+    }
+};
+// even more inefficient!
+struct HammingComputerM4 {
+    const uint32_t *a;
+    int n;
+    HammingComputerM4 () {}
+    HammingComputerM4 (const uint8_t *a4, int code_size) {
+        set (a4, code_size);
+    }
+    void set (const uint8_t *a4, int code_size) {
+        assert (code_size % 4 == 0);
+        a =  (uint32_t *)a4;
+        n = code_size / 4;
+    }
+    int hamming (const uint8_t *b8) const {
+        const uint32_t *b = (uint32_t *)b8;
+        int accu = 0;
+        for (int i = 0; i < n; i++)
+             accu += popcount64 (a[i] ^ b[i]);
+        return accu;
+    }
+};
+/***************************************************************************
+ * Equivalence with a template class when code size is known at compile time
+ **************************************************************************/
+// default template
+template<int CODE_SIZE>
+struct HammingComputer: HammingComputerM8 {
+    HammingComputer (const uint8_t *a, int code_size):
+    HammingComputerM8(a, code_size) {}
+};
+#define SPECIALIZED_HC(CODE_SIZE)                     \
+    template<> struct HammingComputer<CODE_SIZE>:     \
+            HammingComputer ## CODE_SIZE {            \
+        HammingComputer (const uint8_t *a):           \
+        HammingComputer ## CODE_SIZE(a, CODE_SIZE) {} \
+    }
+SPECIALIZED_HC(4);
+SPECIALIZED_HC(8);
+SPECIALIZED_HC(16);
+SPECIALIZED_HC(20);
+SPECIALIZED_HC(32);
+SPECIALIZED_HC(64);
+#undef SPECIALIZED_HC
+/***************************************************************************
+ * generalized Hamming = number of bytes that are different between
+ * two codes.
+ ***************************************************************************/
+inline int generalized_hamming_64 (uint64_t a) {
+    a |= a >> 1;
+    a |= a >> 2;
+    a |= a >> 4;
+    a &= 0x0101010101010101UL;
+    return popcount64 (a);
+}
+struct GenHammingComputer8 {
+    uint64_t a0;
+    GenHammingComputer8 (const uint8_t *a, int code_size) {
+        assert (code_size == 8);
+        a0 = *(uint64_t *)a;
+    }
+    inline int hamming (const uint8_t *b) const {
+        return generalized_hamming_64 (*(uint64_t *)b ^ a0);
+    }
+};
+struct GenHammingComputer16 {
+    uint64_t a0, a1;
+    GenHammingComputer16 (const uint8_t *a8, int code_size) {
+        assert (code_size == 16);
+        const uint64_t *a = (uint64_t *)a8;
+        a0 = a[0]; a1 = a[1];
+    }
+    inline int hamming (const uint8_t *b8) const {
+        const uint64_t *b = (uint64_t *)b8;
+        return generalized_hamming_64 (b[0] ^ a0) +
+            generalized_hamming_64 (b[1] ^ a1);
+    }
+};
+struct GenHammingComputer32 {
+    uint64_t a0, a1, a2, a3;
+    GenHammingComputer32 (const uint8_t *a8, int code_size) {
+        assert (code_size == 32);
+        const uint64_t *a = (uint64_t *)a8;
+        a0 = a[0]; a1 = a[1]; a2 = a[2]; a3 = a[3];
+    }
+    inline int hamming (const uint8_t *b8) const {
+        const uint64_t *b = (uint64_t *)b8;
+        return generalized_hamming_64 (b[0] ^ a0) +
+            generalized_hamming_64 (b[1] ^ a1) +
+            generalized_hamming_64 (b[2] ^ a2) +
+            generalized_hamming_64 (b[3] ^ a3);
+    }
+};
+struct GenHammingComputerM8 {
+    const uint64_t *a;
+    int n;
+    GenHammingComputerM8 (const uint8_t *a8, int code_size) {
+        assert (code_size % 8 == 0);
+        a =  (uint64_t *)a8;
+        n = code_size / 8;
+    }
+    int hamming (const uint8_t *b8) const {
+        const uint64_t *b = (uint64_t *)b8;
+        int accu = 0;
+        for (int i = 0; i < n; i++)
+            accu += generalized_hamming_64 (a[i] ^ b[i]);
+        return accu;
+    }
+};
+/** generalized Hamming distances (= count number of code bytes that
+    are the same) */
+void generalized_hammings_knn_hc (
+        int_maxheap_array_t * ha,
+        const uint8_t * a,
+        const uint8_t * b,
+        size_t nb,
+        size_t code_size,
+        int ordered = true);
+/** This class maintains a list of best distances seen so far.
+ *
+ * Since the distances are in a limited range (0 to nbit), the
+ * object maintains one list per possible distance, and fills
+ * in only the n-first lists, such that the sum of sizes of the
+ * n lists is below k.
+ */
+template<class HammingComputer>
+struct HCounterState {
+  int *counters;
+  int64_t *ids_per_dis;
+  HammingComputer hc;
+  int thres;
+  int count_lt;
+  int count_eq;
+  int k;
+ HCounterState(int *counters, int64_t *ids_per_dis,
+               const uint8_t *x, int d, int k)
+ : counters(counters),
+        ids_per_dis(ids_per_dis),
+        hc(x, d / 8),
+        thres(d + 1),
+        count_lt(0),
+        count_eq(0),
+        k(k) {}
+  void update_counter(const uint8_t *y, size_t j) {
+    int32_t dis = hc.hamming(y);
+    if (dis <= thres) {
+      if (dis < thres) {
+        ids_per_dis[dis * k + counters[dis]++] = j;
+        ++count_lt;
+        while (count_lt == k && thres > 0) {
+          --thres;
+          count_eq = counters[thres];
+          count_lt -= count_eq;
+        }
+      } else if (count_eq < k) {
+        ids_per_dis[dis * k + count_eq++] = j;
+        counters[dis] = count_eq;
+      }
+    }
+  }
+};
+} // namespace faiss