RubyGems - faiss - Versions diffs - 0.2.0 → 0.2.1 - Mend

faiss 0.2.0 → 0.2.1

Files changed (202) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +292 -291
data/vendor/faiss/faiss/AutoTune.h +55 -56
data/vendor/faiss/faiss/Clustering.cpp +334 -195
data/vendor/faiss/faiss/Clustering.h +88 -35
data/vendor/faiss/faiss/IVFlib.cpp +171 -195
data/vendor/faiss/faiss/IVFlib.h +48 -51
data/vendor/faiss/faiss/Index.cpp +85 -103
data/vendor/faiss/faiss/Index.h +54 -48
data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
data/vendor/faiss/faiss/Index2Layer.h +22 -22
data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
data/vendor/faiss/faiss/IndexBinary.h +140 -132
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
data/vendor/faiss/faiss/IndexFlat.h +35 -46
data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
data/vendor/faiss/faiss/IndexHNSW.h +57 -41
data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
data/vendor/faiss/faiss/IndexIVF.h +146 -113
data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
data/vendor/faiss/faiss/IndexLSH.h +21 -26
data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
data/vendor/faiss/faiss/IndexLattice.h +11 -16
data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
data/vendor/faiss/faiss/IndexNSG.h +85 -0
data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
data/vendor/faiss/faiss/IndexPQ.h +64 -67
data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
data/vendor/faiss/faiss/IndexRefine.h +22 -23
data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
data/vendor/faiss/faiss/IndexReplicas.h +62 -56
data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
data/vendor/faiss/faiss/IndexResidual.h +152 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
data/vendor/faiss/faiss/IndexShards.cpp +256 -240
data/vendor/faiss/faiss/IndexShards.h +85 -73
data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
data/vendor/faiss/faiss/MatrixStats.h +7 -10
data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
data/vendor/faiss/faiss/MetaIndexes.h +40 -34
data/vendor/faiss/faiss/MetricType.h +7 -7
data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
data/vendor/faiss/faiss/VectorTransform.h +61 -89
data/vendor/faiss/faiss/clone_index.cpp +77 -73
data/vendor/faiss/faiss/clone_index.h +4 -9
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
data/vendor/faiss/faiss/impl/FaissException.h +41 -29
data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
data/vendor/faiss/faiss/impl/HNSW.h +179 -200
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
data/vendor/faiss/faiss/impl/NSG.h +199 -0
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
data/vendor/faiss/faiss/impl/io.cpp +75 -94
data/vendor/faiss/faiss/impl/io.h +31 -41
data/vendor/faiss/faiss/impl/io_macros.h +40 -29
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
data/vendor/faiss/faiss/index_factory.cpp +269 -218
data/vendor/faiss/faiss/index_factory.h +6 -7
data/vendor/faiss/faiss/index_io.h +23 -26
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
data/vendor/faiss/faiss/utils/Heap.h +186 -209
data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
data/vendor/faiss/faiss/utils/distances.cpp +301 -310
data/vendor/faiss/faiss/utils/distances.h +133 -118
data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
data/vendor/faiss/faiss/utils/hamming.h +62 -85
data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
data/vendor/faiss/faiss/utils/partitioning.h +26 -21
data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
data/vendor/faiss/faiss/utils/random.cpp +39 -63
data/vendor/faiss/faiss/utils/random.h +13 -16
data/vendor/faiss/faiss/utils/simdlib.h +4 -2
data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
data/vendor/faiss/faiss/utils/utils.cpp +304 -287
data/vendor/faiss/faiss/utils/utils.h +53 -48
metadata +20 -2

data/vendor/faiss/faiss/IndexLattice.h CHANGED Viewed

@@ -10,7 +10,6 @@
 #ifndef FAISS_INDEX_LATTICE_H
 #define FAISS_INDEX_LATTICE_H
 #include <vector>
 #include <faiss/IndexIVF.h>
@@ -18,14 +17,9 @@
 namespace faiss {
 /** Index that encodes a vector with a series of Zn lattice quantizers
  */
-struct IndexLattice: Index {
+struct IndexLattice : Index {
     /// number of sub-vectors
     int nsq;
     /// dimension of sub-vectors
@@ -42,25 +36,26 @@ struct IndexLattice: Index {
     /// mins and maxes of the vector norms, per subquantizer
     std::vector<float> trained;
-    IndexLattice (idx_t d, int nsq, int scale_nbit, int r2);
+    IndexLattice(idx_t d, int nsq, int scale_nbit, int r2);
     void train(idx_t n, const float* x) override;
     /* The standalone codec interface */
-    size_t sa_code_size () const override;
+    size_t sa_code_size() const override;
-    void sa_encode (idx_t n, const float *x,
-                          uint8_t *bytes) const override;
+    void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
-    void sa_decode (idx_t n, const uint8_t *bytes,
-                            float *x) const override;
+    void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
     /// not implemented
     void add(idx_t n, const float* x) override;
-    void search(idx_t n, const float* x, idx_t k,
-                float* distances, idx_t* labels) const override;
+    void search(
+            idx_t n,
+            const float* x,
+            idx_t k,
+            float* distances,
+            idx_t* labels) const override;
     void reset() override;
 };
 } // namespace faiss

data/vendor/faiss/faiss/IndexNNDescent.cpp ADDED Viewed

@@ -0,0 +1,231 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+// -*- c++ -*-
+#include <faiss/IndexNNDescent.h>
+#include <omp.h>
+#include <cinttypes>
+#include <cstdio>
+#include <cstdlib>
+#include <queue>
+#include <unordered_set>
+#ifdef __SSE__
+#endif
+#include <faiss/IndexFlat.h>
+#include <faiss/impl/AuxIndexStructures.h>
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/utils/Heap.h>
+#include <faiss/utils/distances.h>
+#include <faiss/utils/random.h>
+extern "C" {
+/* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
+int sgemm_(
+        const char* transa,
+        const char* transb,
+        FINTEGER* m,
+        FINTEGER* n,
+        FINTEGER* k,
+        const float* alpha,
+        const float* a,
+        FINTEGER* lda,
+        const float* b,
+        FINTEGER* ldb,
+        float* beta,
+        float* c,
+        FINTEGER* ldc);
+}
+namespace faiss {
+using idx_t = Index::idx_t;
+using storage_idx_t = NNDescent::storage_idx_t;
+/**************************************************************
+ * add / search blocks of descriptors
+ **************************************************************/
+namespace {
+/* Wrap the distance computer into one that negates the
+   distances. This makes supporting INNER_PRODUCE search easier */
+struct NegativeDistanceComputer : DistanceComputer {
+    /// owned by this
+    DistanceComputer* basedis;
+    explicit NegativeDistanceComputer(DistanceComputer* basedis)
+            : basedis(basedis) {}
+    void set_query(const float* x) override {
+        basedis->set_query(x);
+    }
+    /// compute distance of vector i to current query
+    float operator()(idx_t i) override {
+        return -(*basedis)(i);
+    }
+    /// compute distance between two stored vectors
+    float symmetric_dis(idx_t i, idx_t j) override {
+        return -basedis->symmetric_dis(i, j);
+    }
+    ~NegativeDistanceComputer() override {
+        delete basedis;
+    }
+};
+DistanceComputer* storage_distance_computer(const Index* storage) {
+    if (storage->metric_type == METRIC_INNER_PRODUCT) {
+        return new NegativeDistanceComputer(storage->get_distance_computer());
+    } else {
+        return storage->get_distance_computer();
+    }
+}
+} // namespace
+/**************************************************************
+ * IndexNNDescent implementation
+ **************************************************************/
+IndexNNDescent::IndexNNDescent(int d, int K, MetricType metric)
+        : Index(d, metric),
+          nndescent(d, K),
+          own_fields(false),
+          storage(nullptr) {}
+IndexNNDescent::IndexNNDescent(Index* storage, int K)
+        : Index(storage->d, storage->metric_type),
+          nndescent(storage->d, K),
+          own_fields(false),
+          storage(storage) {}
+IndexNNDescent::~IndexNNDescent() {
+    if (own_fields) {
+        delete storage;
+    }
+}
+void IndexNNDescent::train(idx_t n, const float* x) {
+    FAISS_THROW_IF_NOT_MSG(
+            storage,
+            "Please use IndexNNDescentFlat (or variants) "
+            "instead of IndexNNDescent directly");
+    // nndescent structure does not require training
+    storage->train(n, x);
+    is_trained = true;
+}
+void IndexNNDescent::search(
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels) const
+{
+    FAISS_THROW_IF_NOT_MSG(
+            storage,
+            "Please use IndexNNDescentFlat (or variants) "
+            "instead of IndexNNDescent directly");
+    if (verbose) {
+        printf("Parameters: k=%" PRId64 ", search_L=%d\n",
+               k,
+               nndescent.search_L);
+    }
+    idx_t check_period =
+            InterruptCallback::get_period_hint(d * nndescent.search_L);
+    for (idx_t i0 = 0; i0 < n; i0 += check_period) {
+        idx_t i1 = std::min(i0 + check_period, n);
+#pragma omp parallel
+        {
+            VisitedTable vt(ntotal);
+            DistanceComputer* dis = storage_distance_computer(storage);
+            ScopeDeleter1<DistanceComputer> del(dis);
+#pragma omp for
+            for (idx_t i = i0; i < i1; i++) {
+                idx_t* idxi = labels + i * k;
+                float* simi = distances + i * k;
+                dis->set_query(x + i * d);
+                maxheap_heapify(k, simi, idxi);
+                nndescent.search(*dis, k, idxi, simi, vt);
+                maxheap_reorder(k, simi, idxi);
+            }
+        }
+        InterruptCallback::check();
+    }
+    if (metric_type == METRIC_INNER_PRODUCT) {
+        // we need to revert the negated distances
+        for (size_t i = 0; i < k * n; i++) {
+            distances[i] = -distances[i];
+        }
+    }
+}
+void IndexNNDescent::add(idx_t n, const float* x) {
+    FAISS_THROW_IF_NOT_MSG(
+            storage,
+            "Please use IndexNNDescentFlat (or variants) "
+            "instead of IndexNNDescent directly");
+    FAISS_THROW_IF_NOT(is_trained);
+    if (ntotal != 0) {
+        fprintf(stderr,
+                "WARNING NNDescent doest not support dynamic insertions,"
+                "multiple insertions would lead to re-building the index");
+    }
+    storage->add(n, x);
+    ntotal = storage->ntotal;
+    DistanceComputer* dis = storage_distance_computer(storage);
+    ScopeDeleter1<DistanceComputer> del(dis);
+    nndescent.build(*dis, ntotal, verbose);
+}
+void IndexNNDescent::reset() {
+    nndescent.reset();
+    storage->reset();
+    ntotal = 0;
+}
+void IndexNNDescent::reconstruct(idx_t key, float* recons) const {
+    storage->reconstruct(key, recons);
+}
+/**************************************************************
+ * IndexNNDescentFlat implementation
+ **************************************************************/
+IndexNNDescentFlat::IndexNNDescentFlat() {
+    is_trained = true;
+}
+IndexNNDescentFlat::IndexNNDescentFlat(int d, int M, MetricType metric)
+        : IndexNNDescent(new IndexFlat(d, metric), M) {
+    own_fields = true;
+    is_trained = true;
+}
+} // namespace faiss

data/vendor/faiss/faiss/IndexNNDescent.h ADDED Viewed

@@ -0,0 +1,72 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+// -*- c++ -*-
+#pragma once
+#include <vector>
+#include <faiss/IndexFlat.h>
+#include <faiss/impl/NNDescent.h>
+#include <faiss/utils/utils.h>
+namespace faiss {
+/** The NNDescent index is a normal random-access index with an NNDescent
+ * link structure built on top */
+struct IndexNNDescent : Index {
+    // internal storage of vectors (32 bits)
+    using storage_idx_t = NNDescent::storage_idx_t;
+    /// Faiss results are 64-bit
+    using idx_t = Index::idx_t;
+    // the link strcuture
+    NNDescent nndescent;
+    // the sequential storage
+    bool own_fields;
+    Index* storage;
+    explicit IndexNNDescent(
+            int d = 0,
+            int K = 32,
+            MetricType metric = METRIC_L2);
+    explicit IndexNNDescent(Index* storage, int K = 32);
+    ~IndexNNDescent() override;
+    void add(idx_t n, const float* x) override;
+    /// Trains the storage if needed
+    void train(idx_t n, const float* x) override;
+    /// entry point for search
+    void search(
+            idx_t n,
+            const float* x,
+            idx_t k,
+            float* distances,
+            idx_t* labels) const override;
+    void reconstruct(idx_t key, float* recons) const override;
+    void reset() override;
+};
+/** Flat index topped with with a NNDescent structure to access elements
+ *  more efficiently.
+ */
+struct IndexNNDescentFlat : IndexNNDescent {
+    IndexNNDescentFlat();
+    IndexNNDescentFlat(int d, int K, MetricType metric = METRIC_L2);
+};
+} // namespace faiss

data/vendor/faiss/faiss/IndexNSG.cpp ADDED Viewed

@@ -0,0 +1,303 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+// -*- c++ -*-
+#include <faiss/IndexNSG.h>
+#include <omp.h>
+#include <cinttypes>
+#include <memory>
+#include <faiss/IndexFlat.h>
+#include <faiss/IndexNNDescent.h>
+#include <faiss/impl/AuxIndexStructures.h>
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/utils/Heap.h>
+#include <faiss/utils/distances.h>
+namespace faiss {
+using idx_t = Index::idx_t;
+using namespace nsg;
+/**************************************************************
+ * IndexNSG implementation
+ **************************************************************/
+IndexNSG::IndexNSG(int d, int R, MetricType metric)
+        : Index(d, metric),
+          nsg(R),
+          own_fields(false),
+          storage(nullptr),
+          is_built(false),
+          GK(64),
+          build_type(0) {
+    nndescent_S = 10;
+    nndescent_R = 100;
+    nndescent_L = GK + 50;
+    nndescent_iter = 10;
+}
+IndexNSG::IndexNSG(Index* storage, int R)
+        : Index(storage->d, storage->metric_type),
+          nsg(R),
+          own_fields(false),
+          storage(storage),
+          is_built(false),
+          GK(64),
+          build_type(1) {
+    nndescent_S = 10;
+    nndescent_R = 100;
+    nndescent_L = GK + 50;
+    nndescent_iter = 10;
+}
+IndexNSG::~IndexNSG() {
+    if (own_fields) {
+        delete storage;
+    }
+}
+void IndexNSG::train(idx_t n, const float* x) {
+    FAISS_THROW_IF_NOT_MSG(
+            storage,
+            "Please use IndexNSGFlat (or variants) instead of IndexNSG directly");
+    // nsg structure does not require training
+    storage->train(n, x);
+    is_trained = true;
+}
+void IndexNSG::search(
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels) const
+{
+    FAISS_THROW_IF_NOT_MSG(
+            storage,
+            "Please use IndexNSGFlat (or variants) instead of IndexNSG directly");
+    int L = std::max(nsg.search_L, (int)k); // in case of search L = -1
+    idx_t check_period = InterruptCallback::get_period_hint(d * L);
+    for (idx_t i0 = 0; i0 < n; i0 += check_period) {
+        idx_t i1 = std::min(i0 + check_period, n);
+#pragma omp parallel
+        {
+            VisitedTable vt(ntotal);
+            DistanceComputer* dis = storage_distance_computer(storage);
+            ScopeDeleter1<DistanceComputer> del(dis);
+#pragma omp for
+            for (idx_t i = i0; i < i1; i++) {
+                idx_t* idxi = labels + i * k;
+                float* simi = distances + i * k;
+                dis->set_query(x + i * d);
+                maxheap_heapify(k, simi, idxi);
+                nsg.search(*dis, k, idxi, simi, vt);
+                maxheap_reorder(k, simi, idxi);
+                vt.advance();
+            }
+        }
+        InterruptCallback::check();
+    }
+    if (metric_type == METRIC_INNER_PRODUCT) {
+        // we need to revert the negated distances
+        for (size_t i = 0; i < k * n; i++) {
+            distances[i] = -distances[i];
+        }
+    }
+}
+void IndexNSG::build(idx_t n, const float* x, idx_t* knn_graph, int GK) {
+    FAISS_THROW_IF_NOT_MSG(
+            storage,
+            "Please use IndexNSGFlat (or variants) instead of IndexNSG directly");
+    FAISS_THROW_IF_NOT_MSG(
+            !is_built && ntotal == 0, "The IndexNSG is already built");
+    storage->add(n, x);
+    ntotal = storage->ntotal;
+    // check the knn graph
+    check_knn_graph(knn_graph, n, GK);
+    const nsg::Graph<idx_t> knng(knn_graph, n, GK);
+    nsg.build(storage, n, knng, verbose);
+    is_built = true;
+}
+void IndexNSG::add(idx_t n, const float* x) {
+    FAISS_THROW_IF_NOT_MSG(
+            storage,
+            "Please use IndexNSGFlat (or variants) "
+            "instead of IndexNSG directly");
+    FAISS_THROW_IF_NOT(is_trained);
+    FAISS_THROW_IF_NOT_MSG(
+            !is_built && ntotal == 0,
+            "NSG does not support incremental addition");
+    std::vector<idx_t> knng;
+    if (verbose) {
+        printf("IndexNSG::add %zd vectors\n", size_t(n));
+    }
+    if (build_type == 0) { // build with brute force search
+        if (verbose) {
+            printf("  Build knn graph with brute force search on storage index\n");
+        }
+        storage->add(n, x);
+        ntotal = storage->ntotal;
+        FAISS_THROW_IF_NOT(ntotal == n);
+        knng.resize(ntotal * (GK + 1));
+        storage->assign(ntotal, x, knng.data(), GK + 1);
+        // Remove itself
+        // - For metric distance, we just need to remove the first neighbor
+        // - But for non-metric, e.g. inner product, we need to check
+        // - each neighbor
+        if (storage->metric_type == METRIC_INNER_PRODUCT) {
+            for (idx_t i = 0; i < ntotal; i++) {
+                int count = 0;
+                for (int j = 0; j < GK + 1; j++) {
+                    idx_t id = knng[i * (GK + 1) + j];
+                    if (id != i) {
+                        knng[i * GK + count] = id;
+                        count += 1;
+                    }
+                    if (count == GK) {
+                        break;
+                    }
+                }
+            }
+        } else {
+            for (idx_t i = 0; i < ntotal; i++) {
+                memmove(knng.data() + i * GK,
+                        knng.data() + i * (GK + 1) + 1,
+                        GK * sizeof(idx_t));
+            }
+        }
+    } else if (build_type == 1) { // build with NNDescent
+        IndexNNDescent index(storage, GK);
+        index.nndescent.S = nndescent_S;
+        index.nndescent.R = nndescent_R;
+        index.nndescent.L = std::max(nndescent_L, GK + 50);
+        index.nndescent.iter = nndescent_iter;
+        index.verbose = verbose;
+        if (verbose) {
+            printf("  Build knn graph with NNdescent S=%d R=%d L=%d niter=%d\n",
+                   index.nndescent.S,
+                   index.nndescent.R,
+                   index.nndescent.L,
+                   index.nndescent.iter);
+        }
+        // prevent IndexNSG from deleting the storage
+        index.own_fields = false;
+        index.add(n, x);
+        // storage->add is already implicit called in IndexNSG.add
+        ntotal = storage->ntotal;
+        FAISS_THROW_IF_NOT(ntotal == n);
+        knng.resize(ntotal * GK);
+        // cast from idx_t to int
+        const int* knn_graph = index.nndescent.final_graph.data();
+#pragma omp parallel for
+        for (idx_t i = 0; i < ntotal * GK; i++) {
+            knng[i] = knn_graph[i];
+        }
+    } else {
+        FAISS_THROW_MSG("build_type should be 0 or 1");
+    }
+    if (verbose) {
+        printf("  Check the knn graph\n");
+    }
+    // check the knn graph
+    check_knn_graph(knng.data(), n, GK);
+    if (verbose) {
+        printf("  nsg building\n");
+    }
+    const nsg::Graph<idx_t> knn_graph(knng.data(), n, GK);
+    nsg.build(storage, n, knn_graph, verbose);
+    is_built = true;
+}
+void IndexNSG::reset() {
+    nsg.reset();
+    storage->reset();
+    ntotal = 0;
+    is_built = false;
+}
+void IndexNSG::reconstruct(idx_t key, float* recons) const {
+    storage->reconstruct(key, recons);
+}
+void IndexNSG::check_knn_graph(const idx_t* knn_graph, idx_t n, int K) const {
+    idx_t total_count = 0;
+#pragma omp parallel for reduction(+ : total_count)
+    for (idx_t i = 0; i < n; i++) {
+        int count = 0;
+        for (int j = 0; j < K; j++) {
+            idx_t id = knn_graph[i * K + j];
+            if (id < 0 || id >= n || id == i) {
+                count += 1;
+            }
+        }
+        total_count += count;
+    }
+    if (total_count > 0) {
+        fprintf(stderr,
+                "WARNING: the input knn graph "
+                "has %" PRId64 " invalid entries\n",
+                total_count);
+    }
+    FAISS_THROW_IF_NOT_MSG(
+            total_count < n / 10,
+            "There are too much invalid entries in the knn graph. "
+            "It may be an invalid knn graph.");
+}
+/**************************************************************
+ * IndexNSGFlat implementation
+ **************************************************************/
+IndexNSGFlat::IndexNSGFlat() {
+    is_trained = true;
+}
+IndexNSGFlat::IndexNSGFlat(int d, int R, MetricType metric)
+        : IndexNSG(new IndexFlat(d, metric), R) {
+    own_fields = true;
+    is_trained = true;
+}
+} // namespace faiss