RubyGems - faiss - Versions diffs - 0.2.3 → 0.2.4 - Mend

faiss 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/LICENSE.txt +1 -1
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/Clustering.cpp +32 -0
data/vendor/faiss/faiss/Clustering.h +14 -0
data/vendor/faiss/faiss/Index.h +1 -1
data/vendor/faiss/faiss/Index2Layer.cpp +19 -92
data/vendor/faiss/faiss/Index2Layer.h +2 -16
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
data/vendor/faiss/faiss/{IndexResidual.h → IndexAdditiveQuantizer.h} +101 -58
data/vendor/faiss/faiss/IndexFlat.cpp +22 -52
data/vendor/faiss/faiss/IndexFlat.h +9 -15
data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
data/vendor/faiss/faiss/IndexIVF.cpp +79 -7
data/vendor/faiss/faiss/IndexIVF.h +25 -7
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
data/vendor/faiss/faiss/IndexIVFFlat.cpp +9 -12
data/vendor/faiss/faiss/IndexIVFPQ.cpp +5 -4
data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +60 -39
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +21 -6
data/vendor/faiss/faiss/IndexLSH.cpp +4 -30
data/vendor/faiss/faiss/IndexLSH.h +2 -15
data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -2
data/vendor/faiss/faiss/IndexNSG.cpp +0 -2
data/vendor/faiss/faiss/IndexPQ.cpp +2 -51
data/vendor/faiss/faiss/IndexPQ.h +2 -17
data/vendor/faiss/faiss/IndexRefine.cpp +28 -0
data/vendor/faiss/faiss/IndexRefine.h +10 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -28
data/vendor/faiss/faiss/IndexScalarQuantizer.h +2 -16
data/vendor/faiss/faiss/VectorTransform.cpp +2 -1
data/vendor/faiss/faiss/VectorTransform.h +3 -0
data/vendor/faiss/faiss/clone_index.cpp +3 -2
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +2 -2
data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +257 -24
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +69 -9
data/vendor/faiss/faiss/impl/HNSW.cpp +10 -5
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +393 -210
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +100 -28
data/vendor/faiss/faiss/impl/NSG.cpp +0 -3
data/vendor/faiss/faiss/impl/NSG.h +1 -1
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +357 -47
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +65 -7
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +12 -19
data/vendor/faiss/faiss/impl/index_read.cpp +102 -19
data/vendor/faiss/faiss/impl/index_write.cpp +66 -16
data/vendor/faiss/faiss/impl/io.cpp +1 -1
data/vendor/faiss/faiss/impl/io_macros.h +20 -0
data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
data/vendor/faiss/faiss/index_factory.cpp +585 -414
data/vendor/faiss/faiss/index_factory.h +3 -0
data/vendor/faiss/faiss/utils/distances.cpp +4 -2
data/vendor/faiss/faiss/utils/distances.h +36 -3
data/vendor/faiss/faiss/utils/distances_simd.cpp +50 -0
data/vendor/faiss/faiss/utils/utils.h +1 -1
metadata +12 -5
data/vendor/faiss/faiss/IndexResidual.cpp +0 -291

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 106966f6d5e7f6a3f5237a2ebb59912304bbe319ba2891c45166c753fc0e9df1
-  data.tar.gz: 3d72777c777d75beb15c09cb223c4e63f188b7e89fab06d6fa9268bc2be4ff59
+  metadata.gz: 15428eb8dd7d27f8a94e3a7797dd765827e5454def33fa785055adb7ef0d20c5
+  data.tar.gz: 3e8eafebc49163c928bcab8d0ebd0f7b69e6659e49f36d7aaeb44e8651853ac9
 SHA512:
-  metadata.gz: b1d822fd4e0850dd667aba09a02f81c43e2afde90a6d88fff1e631539a8d47f42b4dfe0705980a6ff8be88b021af805998d1e70b09cd1010302702a1c1363cac
-  data.tar.gz: 24e4febd81142150541199523f0b1e19a0be7c658e504472778173e38006234ea47f7e362c1396c3135b4449b7ab5229dc027c4bcea59c880902e1fa23e3c956
+  metadata.gz: 598f6e626d5c970e408cff68ec479bf1aa2d6ee18adeeeb2489d1c4fbf627dacbc6e398ce149f0483720080a439df4d2887e5b6c9dc9f465e8ffa1bbeede84a8
+  data.tar.gz: fefebfbbbbceb58ac6c6b02636630943d30db7af4ee92521586b4fb40e73bf1aa2b401e28e4c27872bf9344f60e3c9fb288ec6420abc1ffccd05ffd3ec7379fd

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,7 @@
+## 0.2.4 (2022-01-10)
+- Updated Faiss to 1.7.2
 ## 0.2.3 (2021-12-17)
 - Fixed installation error with ARM Mac

data/LICENSE.txt CHANGED Viewed

@@ -1,7 +1,7 @@
 MIT License
 Copyright (c) Facebook, Inc. and its affiliates.
-Copyright (c) 2020-2021 Andrew Kane
+Copyright (c) 2020-2022 Andrew Kane
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

data/lib/faiss/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Faiss
-  VERSION = "0.2.3"
+  VERSION = "0.2.4"
 end

data/vendor/faiss/faiss/Clustering.cpp CHANGED Viewed

@@ -20,6 +20,7 @@
 #include <faiss/IndexFlat.h>
 #include <faiss/impl/FaissAssert.h>
+#include <faiss/impl/kmeans1d.h>
 #include <faiss/utils/distances.h>
 #include <faiss/utils/random.h>
 #include <faiss/utils/utils.h>
@@ -553,6 +554,37 @@ void Clustering::train_encoded(
     }
 }
+Clustering1D::Clustering1D(int k) : Clustering(1, k) {}
+Clustering1D::Clustering1D(int k, const ClusteringParameters& cp)
+        : Clustering(1, k, cp) {}
+void Clustering1D::train_exact(idx_t n, const float* x) {
+    const float* xt = x;
+    std::unique_ptr<uint8_t[]> del;
+    if (n > k * max_points_per_centroid) {
+        uint8_t* x_new;
+        float* weights_new;
+        n = subsample_training_set(
+                *this,
+                n,
+                (uint8_t*)x,
+                sizeof(float) * d,
+                nullptr,
+                &x_new,
+                &weights_new);
+        del.reset(x_new);
+        xt = (float*)x_new;
+    }
+    centroids.resize(k);
+    double uf = kmeans1d(xt, n, k, centroids.data());
+    ClusteringIterationStats stats = {0.0, 0.0, 0.0, uf, 0};
+    iteration_stats.push_back(stats);
+}
 float kmeans_clustering(
         size_t d,
         size_t n,

data/vendor/faiss/faiss/Clustering.h CHANGED Viewed

@@ -111,6 +111,20 @@ struct Clustering : ClusteringParameters {
     virtual ~Clustering() {}
 };
+/** Exact 1D clustering algorithm
+ *
+ * Since it does not use an index, it does not overload the train() function
+ */
+struct Clustering1D : Clustering {
+    explicit Clustering1D(int k);
+    Clustering1D(int k, const ClusteringParameters& cp);
+    void train_exact(idx_t n, const float* x);
+    virtual ~Clustering1D() {}
+};
 struct ProgressiveDimClusteringParameters : ClusteringParameters {
     int progressive_dim_steps; ///< number of incremental steps
     bool apply_pca;            ///< apply PCA on input

data/vendor/faiss/faiss/Index.h CHANGED Viewed

@@ -18,7 +18,7 @@
 #define FAISS_VERSION_MAJOR 1
 #define FAISS_VERSION_MINOR 7
-#define FAISS_VERSION_PATCH 1
+#define FAISS_VERSION_PATCH 2
 /**
  * @namespace faiss

data/vendor/faiss/faiss/Index2Layer.cpp CHANGED Viewed

@@ -30,16 +30,6 @@
 #include <faiss/utils/distances.h>
 #include <faiss/utils/utils.h>
-/*
-#include <faiss/utils/Heap.h>
-#include <faiss/Clustering.h>
-#include <faiss/utils/hamming.h>
-*/
 namespace faiss {
 /*************************************
@@ -52,7 +42,7 @@ Index2Layer::Index2Layer(
         int M,
         int nbit,
         MetricType metric)
-        : Index(quantizer->d, metric),
+        : IndexFlatCodes(0, quantizer->d, metric),
           q1(quantizer, nlist),
           pq(quantizer->d, M, nbit) {
     is_trained = false;
@@ -116,55 +106,6 @@ void Index2Layer::train(idx_t n, const float* x) {
     is_trained = true;
 }
-void Index2Layer::add(idx_t n, const float* x) {
-    idx_t bs = 32768;
-    if (n > bs) {
-        for (idx_t i0 = 0; i0 < n; i0 += bs) {
-            idx_t i1 = std::min(i0 + bs, n);
-            if (verbose) {
-                printf("Index2Layer::add: adding %" PRId64 ":%" PRId64
-                       " / %" PRId64 "\n",
-                       i0,
-                       i1,
-                       n);
-            }
-            add(i1 - i0, x + i0 * d);
-        }
-        return;
-    }
-    std::vector<idx_t> codes1(n);
-    q1.quantizer->assign(n, x, codes1.data());
-    std::vector<float> residuals(n * d);
-    for (idx_t i = 0; i < n; i++) {
-        q1.quantizer->compute_residual(
-                x + i * d, residuals.data() + i * d, codes1[i]);
-    }
-    std::vector<uint8_t> codes2(n * code_size_2);
-    pq.compute_codes(residuals.data(), codes2.data(), n);
-    codes.resize((ntotal + n) * code_size);
-    uint8_t* wp = &codes[ntotal * code_size];
-    {
-        int i = 0x11223344;
-        const char* ip = (char*)&i;
-        FAISS_THROW_IF_NOT_MSG(
-                ip[0] == 0x44, "works only on a little-endian CPU");
-    }
-    // copy to output table
-    for (idx_t i = 0; i < n; i++) {
-        memcpy(wp, &codes1[i], code_size_1);
-        wp += code_size_1;
-        memcpy(wp, &codes2[i * code_size_2], code_size_2);
-        wp += code_size_2;
-    }
-    ntotal += n;
-}
 void Index2Layer::search(
         idx_t /*n*/,
         const float* /*x*/,
@@ -174,25 +115,6 @@ void Index2Layer::search(
     FAISS_THROW_MSG("not implemented");
 }
-void Index2Layer::reconstruct_n(idx_t i0, idx_t ni, float* recons) const {
-    std::vector<float> recons1(d);
-    FAISS_THROW_IF_NOT(i0 >= 0 && i0 + ni <= ntotal);
-    const uint8_t* rp = &codes[i0 * code_size];
-    for (idx_t i = 0; i < ni; i++) {
-        idx_t key = 0;
-        memcpy(&key, rp, code_size_1);
-        q1.quantizer->reconstruct(key, recons1.data());
-        rp += code_size_1;
-        pq.decode(rp, recons);
-        for (idx_t j = 0; j < d; j++) {
-            recons[j] += recons1[j];
-        }
-        rp += code_size_2;
-        recons += d;
-    }
-}
 void Index2Layer::transfer_to_IVFPQ(IndexIVFPQ& other) const {
     FAISS_THROW_IF_NOT(other.nlist == q1.nlist);
     FAISS_THROW_IF_NOT(other.code_size == code_size_2);
@@ -211,15 +133,6 @@ void Index2Layer::transfer_to_IVFPQ(IndexIVFPQ& other) const {
     other.ntotal = ntotal;
 }
-void Index2Layer::reconstruct(idx_t key, float* recons) const {
-    reconstruct_n(key, 1, recons);
-}
-void Index2Layer::reset() {
-    ntotal = 0;
-    codes.clear();
-}
 namespace {
 struct Distance2Level : DistanceComputer {
@@ -259,7 +172,7 @@ struct DistanceXPQ4 : Distance2Level {
         FAISS_ASSERT(quantizer);
         M = storage.pq.M;
-        pq_l1_tab = quantizer->xb.data();
+        pq_l1_tab = quantizer->get_xb();
     }
     float operator()(idx_t i) override {
@@ -368,12 +281,26 @@ DistanceComputer* Index2Layer::get_distance_computer() const {
 }
 /* The standalone codec interface */
-size_t Index2Layer::sa_code_size() const {
-    return code_size;
-}
 void Index2Layer::sa_encode(idx_t n, const float* x, uint8_t* bytes) const {
     FAISS_THROW_IF_NOT(is_trained);
+    idx_t bs = 32768;
+    if (n > bs) {
+        for (idx_t i0 = 0; i0 < n; i0 += bs) {
+            idx_t i1 = std::min(i0 + bs, n);
+            if (verbose) {
+                printf("Index2Layer::add: adding %" PRId64 ":%" PRId64
+                       " / %" PRId64 "\n",
+                       i0,
+                       i1,
+                       n);
+            }
+            sa_encode(i1 - i0, x + i0 * d, bytes + i0 * code_size);
+        }
+        return;
+    }
     std::unique_ptr<int64_t[]> list_nos(new int64_t[n]);
     q1.quantizer->assign(n, x, list_nos.get());
     std::vector<float> residuals(n * d);

data/vendor/faiss/faiss/Index2Layer.h CHANGED Viewed

@@ -11,6 +11,7 @@
 #include <vector>
+#include <faiss/IndexFlatCodes.h>
 #include <faiss/IndexIVF.h>
 #include <faiss/IndexPQ.h>
@@ -24,25 +25,19 @@ struct IndexIVFPQ;
  * The class is mainly inteded to store encoded vectors that can be
  * accessed randomly, the search function is not implemented.
  */
-struct Index2Layer : Index {
+struct Index2Layer : IndexFlatCodes {
     /// first level quantizer
     Level1Quantizer q1;
     /// second level quantizer is always a PQ
     ProductQuantizer pq;
-    /// Codes. Size ntotal * code_size.
-    std::vector<uint8_t> codes;
     /// size of the code for the first level (ceil(log8(q1.nlist)))
     size_t code_size_1;
     /// size of the code for the second level
     size_t code_size_2;
-    /// code_size_1 + code_size_2
-    size_t code_size;
     Index2Layer(
             Index* quantizer,
             size_t nlist,
@@ -55,8 +50,6 @@ struct Index2Layer : Index {
     void train(idx_t n, const float* x) override;
-    void add(idx_t n, const float* x) override;
     /// not implemented
     void search(
             idx_t n,
@@ -65,19 +58,12 @@ struct Index2Layer : Index {
             float* distances,
             idx_t* labels) const override;
-    void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
-    void reconstruct(idx_t key, float* recons) const override;
-    void reset() override;
     DistanceComputer* get_distance_computer() const override;
     /// transfer the flat codes to an IVFPQ index
     void transfer_to_IVFPQ(IndexIVFPQ& other) const;
     /* The standalone codec interface */
-    size_t sa_code_size() const override;
     void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
     void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
 };