RubyGems - faiss - Versions diffs - 0.1.2 → 0.1.3 - Mend

faiss 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (192) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 61a0b7a7d20933b60a9e0e213016b77f10eae5bb86ecf6825f5f5661f31f5d7d
-  data.tar.gz: 967378ee774a35e3a639b1d902648ebf4177ffb7ed43e7228e8432f284f397c1
+  metadata.gz: 97fd4d583754d20a5771e941fd5272205977c34b77eb9252e58f6016b90be52e
+  data.tar.gz: 26df73fc7891efd894e9ebf4ed37db2aaa3af8b62221cdd707cb287f920ad0a7
 SHA512:
-  metadata.gz: 32747a4d4a3d40f15e9802280d894b2270d4b78ac0a10859442d0fd3c7ae27a55032a92e072756cb4046964c9d53afcc9586ac954e2b3cf63d057d0a3652e5a8
-  data.tar.gz: ca5005286253b7dea1546160ffb00c4b91a8b926512fbcfb7db594171435249e88408c982dc53fed4d90f87e68979bd9c2a2c1975f94495ca77c9ac878b22c1c
+  metadata.gz: 197143a79061c299b8f5e5ae76466db1a673e46f6d842615bed46170afadb6145e14df936262ec45f7e0d1eea889037cb3123c5a9348e32774eeb97414f180fd
+  data.tar.gz: 5a8e1ecebd43886d974e52fa216d86d9c03e3f8c04629eddc04a8be8df5c7ab4fc4c57398f2893d44718c788cd1853ae1bee06f64f1d49cdc5330cb03c5c0e6c

data/CHANGELOG.md CHANGED

@@ -1,3 +1,7 @@
+## 0.1.3 (2020-10-22)
+- Updated Faiss to 1.6.4
 ## 0.1.2 (2020-08-17)
 - Updated Faiss to 1.6.3

data/ext/faiss/extconf.rb CHANGED

@@ -7,7 +7,7 @@ abort "OpenMP not found" unless have_library("omp") || have_library("gomp")
 $CXXFLAGS << " -std=c++11 -march=native -DFINTEGER=int"
 ext = File.expand_path(".", __dir__)
-vendor = File.expand_path("../../vendor", __dir__)
+vendor = File.expand_path("../../vendor/faiss", __dir__)
 $srcs = Dir["{#{ext},#{vendor}/faiss,#{vendor}/faiss/impl,#{vendor}/faiss/utils}/*.{cpp}"]
 $objs = $srcs.map { |v| v.sub(/cpp\z/, "o") }

data/lib/faiss/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Faiss
-  VERSION = "0.1.2"
+  VERSION = "0.1.3"
 end

data/vendor/faiss/benchs/bench_6bit_codec.cpp ADDED

@@ -0,0 +1,80 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <cstdio>
+#include <omp.h>
+#include <faiss/impl/ScalarQuantizer.h>
+#include <faiss/utils/utils.h>
+#include <faiss/utils/random.h>
+#include <faiss/utils/distances.h>
+using namespace faiss;
+int main() {
+    int d = 128;
+    int n = 2000;
+    std::vector<float> x(d * n);
+    float_rand(x.data(), d * n, 12345);
+    // make sure it's idempotent
+    ScalarQuantizer sq(d, ScalarQuantizer::QT_6bit);
+    omp_set_num_threads(1);
+    sq.train(n, x.data());
+    size_t code_size = sq.code_size;
+    printf("code size: %ld\n", sq.code_size);
+    // encode
+    std::vector<uint8_t> codes(code_size * n);
+    sq.compute_codes(x.data(), codes.data(), n);
+    // decode
+    std::vector<float> x2(d * n);
+    sq.decode(codes.data(), x2.data(), n);
+    printf("sqL2 recons error: %g\n",
+           fvec_L2sqr(x.data(), x2.data(), n * d) / n);
+    // encode again
+    std::vector<uint8_t> codes2(code_size * n);
+    sq.compute_codes(x2.data(), codes2.data(), n);
+    size_t ndiff = 0;
+    for (size_t i = 0; i < codes.size(); i++) {
+        if (codes[i] != codes2[i]) ndiff++;
+    }
+    printf("ndiff for idempotence: %ld / %ld\n", ndiff, codes.size());
+    std::unique_ptr<ScalarQuantizer::SQDistanceComputer>
+        dc(sq.get_distance_computer());
+    dc->codes = codes.data();
+    dc->code_size = sq.code_size;
+    printf("code size: %ld\n", dc->code_size);
+    double sum_dis = 0;
+    double t0 = getmillisecs();
+    for (int i = 0; i < n; i++) {
+        dc->set_query(&x[i * d]);
+        for (int j = 0; j < n; j++) {
+            sum_dis += (*dc)(j);
+        }
+    }
+    printf("distances computed in %.3f ms, checksum=%g\n",
+           getmillisecs() - t0, sum_dis);
+    return 0;
+}

data/vendor/faiss/c_api/AutoTune_c.h CHANGED

@@ -31,6 +31,8 @@ void faiss_ParameterRange_values(FaissParameterRange*, double**, size_t*);
  */
 FAISS_DECLARE_CLASS(ParameterSpace)
+FAISS_DECLARE_DESTRUCTOR(ParameterSpace)
 /// Parameter space default constructor
 int faiss_ParameterSpace_new(FaissParameterSpace** space);

data/vendor/faiss/c_api/IndexShards_c.cpp CHANGED

@@ -32,12 +32,6 @@ int faiss_IndexShards_add_shard(FaissIndexShards* index, FaissIndex* shard) {
     } CATCH_AND_HANDLE
 }
-int faiss_IndexShards_sync_with_shard_indexes(FaissIndexShards* index) {
-    try {
-        reinterpret_cast<IndexShards*>(index)->sync_with_shard_indexes();
-    } CATCH_AND_HANDLE
-}
 FaissIndex* faiss_IndexShards_at(FaissIndexShards* index, int i) {
     auto shard = reinterpret_cast<IndexShards*>(index)->at(i);
     return reinterpret_cast<FaissIndex*>(shard);

data/vendor/faiss/c_api/IndexShards_c.h CHANGED

@@ -31,12 +31,9 @@ int faiss_IndexShards_new_with_options(FaissIndexShards** p_index, idx_t d, int
 int faiss_IndexShards_add_shard(FaissIndexShards* index, FaissIndex* shard);
-/// update metric_type and ntotal
-int faiss_IndexShards_sync_with_shard_indexes(FaissIndexShards* index);
 FaissIndex* faiss_IndexShards_at(FaissIndexShards* index, int i);
 #ifdef __cplusplus
 }
 #endif
-#endif
+#endif

data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp CHANGED

@@ -12,8 +12,10 @@
 #include "GpuClonerOptions_c.h"
 #include "macros_impl.h"
 #include "Index.h"
-#include "gpu/GpuAutoTune.h"
-#include "gpu/GpuClonerOptions.h"
+#include <faiss/gpu/GpuCloner.h>
+#include <faiss/gpu/GpuResources.h>
+#include <faiss/gpu/GpuAutoTune.h>
+#include <faiss/gpu/GpuClonerOptions.h>
 #include <vector>
 using faiss::Index;

data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp CHANGED

@@ -9,7 +9,7 @@
 // -*- c++ -*-
 #include "GpuClonerOptions_c.h"
-#include "gpu/GpuClonerOptions.h"
+#include <faiss/gpu/GpuClonerOptions.h>
 #include "macros_impl.h"
 using faiss::gpu::IndicesOptions;

data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp CHANGED

@@ -8,7 +8,7 @@
 // Copyright 2004-present Facebook. All Rights Reserved.
 // -*- c++ -*-
-#include "gpu/GpuIndex.h"
+#include <faiss/gpu/GpuIndex.h>
 #include "GpuIndex_c.h"
 #include "macros_impl.h"

data/vendor/faiss/c_api/gpu/GpuResources_c.cpp CHANGED

@@ -9,7 +9,7 @@
 // -*- c++ -*-
 #include "gpu/GpuResources_c.h"
-#include "gpu/GpuResources.h"
+#include <faiss/gpu/GpuResources.h>
 #include "macros_impl.h"
 using faiss::gpu::GpuResources;

data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp CHANGED

@@ -9,7 +9,7 @@
 // -*- c++ -*-
 #include "gpu/StandardGpuResources_c.h"
-#include "gpu/StandardGpuResources.h"
+#include <faiss/gpu/StandardGpuResources.h>
 #include "macros_impl.h"
 using faiss::gpu::StandardGpuResources;

data/vendor/faiss/demos/demo_imi_flat.cpp CHANGED

@@ -10,6 +10,7 @@
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
+#include <random>
 #include <sys/time.h>
@@ -77,6 +78,8 @@ int main ()
     // Use 4-1024 depending on the trade-off speed accuracy that you want
     index.nprobe = 2048;
+    std::mt19937 rng;
+    std::uniform_real_distribution<> distrib;
     { // training
         printf ("[%.3f s] Generating %ld vectors in %dD for training\n",
@@ -84,7 +87,7 @@ int main ()
         std::vector <float> trainvecs (nt * d);
         for (size_t i = 0; i < nt * d; i++) {
-            trainvecs[i] = drand48();
+            trainvecs[i] = distrib(rng);
         }
         printf ("[%.3f s] Training the index\n", elapsed() - t0);
@@ -101,7 +104,7 @@ int main ()
         std::vector <float> database (nb * d);
         for (size_t i = 0; i < nb * d; i++) {
-            database[i] = drand48();
+            database[i] = distrib(rng);
         }
         printf ("[%.3f s] Adding the vectors to the index\n", elapsed() - t0);

data/vendor/faiss/demos/demo_imi_pq.cpp CHANGED

@@ -10,6 +10,7 @@
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
+#include <random>
 #include <sys/time.h>
@@ -87,6 +88,9 @@ int main ()
     index.nprobe = 2048;
+    std::mt19937 rng;
+    std::uniform_real_distribution<> distrib;
     { // training.
         // The distribution of the training vectors should be the same
@@ -100,7 +104,7 @@ int main ()
         std::vector <float> trainvecs (nt * d);
         for (size_t i = 0; i < nt; i++) {
             for (size_t j = 0; j < d; j++) {
-                trainvecs[i * d + j] = drand48();
+                trainvecs[i * d + j] = distrib(rng);
             }
         }
@@ -124,7 +128,7 @@ int main ()
         std::vector <long> ids (nb);
         for (size_t i = 0; i < nb; i++) {
             for (size_t j = 0; j < d; j++) {
-                database[i * d + j] = drand48();
+                database[i * d + j] = distrib(rng);
             }
             ids[i] = 8760000000L + i;
         }

data/vendor/faiss/demos/demo_ivfpq_indexing.cpp CHANGED

@@ -10,6 +10,7 @@
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
+#include <random>
 #include <sys/time.h>
@@ -54,13 +55,16 @@ int main ()
                              ncentroids, 4, 8);
+    std::mt19937 rng;
     { // training
         printf ("[%.3f s] Generating %ld vectors in %dD for training\n",
                 elapsed() - t0, nt, d);
         std::vector <float> trainvecs (nt * d);
+        std::uniform_real_distribution<> distrib;
         for (size_t i = 0; i < nt * d; i++) {
-            trainvecs[i] = drand48();
+            trainvecs[i] = distrib(rng);
         }
         printf ("[%.3f s] Training the index\n",
@@ -86,8 +90,9 @@ int main ()
                 elapsed() - t0, nb);
         std::vector <float> database (nb * d);
+        std::uniform_real_distribution<> distrib;
         for (size_t i = 0; i < nb * d; i++) {
-            database[i] = drand48();
+            database[i] = distrib(rng);
         }
         printf ("[%.3f s] Adding the vectors to the index\n",

data/vendor/faiss/{AutoTune.cpp → faiss/AutoTune.cpp} RENAMED

@@ -13,6 +13,7 @@
 #include <faiss/AutoTune.h>
+#include <cinttypes>
 #include <cmath>
 #include <faiss/impl/FaissAssert.h>
@@ -233,7 +234,7 @@ void OperatingPoints::display (bool only_optimal) const
 {
     const std::vector<OperatingPoint> &pts =
         only_optimal ? optimal_pts : all_pts;
-    printf("Tested %ld operating points, %ld ones are optimal:\n",
+    printf("Tested %zd operating points, %zd ones are optimal:\n",
            all_pts.size(), optimal_pts.size());
     for (int i = 0; i < pts.size(); i++) {
@@ -247,7 +248,7 @@ void OperatingPoints::display (bool only_optimal) const
                 }
             }
         }
-        printf ("cno=%ld key=%s perf=%.4f t=%.3f %s\n",
+        printf ("cno=%" PRId64 " key=%s perf=%.4f t=%.3f %s\n",
                 op.cno, op.key.c_str(), op.perf, op.t, star);
     }
@@ -437,11 +438,10 @@ void ParameterSpace::set_index_parameters (Index *index, size_t cno) const
 void ParameterSpace::set_index_parameters (
      Index *index, const char *description_in) const
 {
-    char description[strlen(description_in) + 1];
+    std::string description(description_in);
     char *ptr;
-    memcpy (description, description_in, strlen(description_in) + 1);
-    for (char *tok = strtok_r (description, " ,", &ptr);
+    for (char *tok = strtok_r (&description[0], " ,", &ptr);
          tok;
          tok = strtok_r (nullptr, " ,", &ptr)) {
         char name[100];
@@ -566,7 +566,7 @@ void ParameterSpace::set_index_parameter (
 void ParameterSpace::display () const
 {
-    printf ("ParameterSpace, %ld parameters, %ld combinations:\n",
+    printf ("ParameterSpace, %zd parameters, %zd combinations:\n",
             parameter_ranges.size (), n_combinations ());
     for (int i = 0; i < parameter_ranges.size(); i++) {
         const ParameterRange & pr = parameter_ranges[i];
@@ -622,7 +622,7 @@ void ParameterSpace::explore (Index *index,
             bool keep = ops->add (perf, t_search, combination_name (cno), cno);
             if (verbose)
-                printf("  %ld/%ld: %s perf=%.3f t=%.3f s %s\n", cno, n_comb,
+                printf("  %zd/%zd: %s perf=%.3f t=%.3f s %s\n", cno, n_comb,
                        combination_name (cno).c_str(), perf, t_search,
                        keep ? "*" : "");
         }
@@ -646,7 +646,7 @@ void ParameterSpace::explore (Index *index,
         size_t cno = perm[xp];
         if (verbose)
-            printf("  %ld/%d: cno=%ld %s ", xp, n_exp, cno,
+            printf("  %zd/%d: cno=%zd %s ", xp, n_exp, cno,
                    combination_name (cno).c_str());
         {
@@ -677,7 +677,7 @@ void ParameterSpace::explore (Index *index,
             if (thread_over_batches) {
 #pragma omp parallel for
-                for (size_t q0 = 0; q0 < nq; q0 += batchsize) {
+                for (Index::idx_t q0 = 0; q0 < nq; q0 += batchsize) {
                     size_t q1 = q0 + batchsize;
                     if (q1 > nq) q1 = nq;
                     index->search (q1 - q0, xq + q0 * index->d,

data/vendor/faiss/{AutoTune.h → faiss/AutoTune.h} RENAMED

File without changes

data/vendor/faiss/{Clustering.cpp → faiss/Clustering.cpp} RENAMED

@@ -10,6 +10,7 @@
 #include <faiss/Clustering.h>
 #include <faiss/impl/AuxIndexStructures.h>
+#include <cinttypes>
 #include <cmath>
 #include <cstdio>
 #include <cstring>
@@ -97,7 +98,7 @@ idx_t subsample_training_set(
 )
 {
     if (clus.verbose) {
-        printf("Sampling a subset of %ld / %ld for training\n",
+        printf("Sampling a subset of %zd / %" PRId64 " for training\n",
                clus.k * clus.max_points_per_centroid, nx);
     }
     std::vector<int> perm (nx);
@@ -190,7 +191,7 @@ void compute_centroids (size_t d, size_t k, size_t n,
     }
 #pragma omp parallel for
-    for (size_t ci = 0; ci < k; ci++) {
+    for (idx_t ci = 0; ci < k; ci++) {
         if (hassign[ci] == 0) {
             continue;
         }
@@ -209,7 +210,7 @@ void compute_centroids (size_t d, size_t k, size_t n,
 /** Handle empty clusters by splitting larger ones.
  *
  * It works by slightly changing the centroids to make 2 clusters from
- * a single one. Takes the same arguements as compute_centroids.
+ * a single one. Takes the same arguments as compute_centroids.
  *
  * @return           nb of spliting operations (larger is worse)
  */
@@ -269,8 +270,8 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
                                 const float *weights) {
     FAISS_THROW_IF_NOT_FMT (nx >= k,
-             "Number of training points (%ld) should be at least "
-             "as large as number of clusters (%ld)", nx, k);
+             "Number of training points (%" PRId64 ") should be at least "
+             "as large as number of clusters (%zd)", nx, k);
     FAISS_THROW_IF_NOT_FMT ((!codec || codec->d == d),
              "Codec dimension %d not the same as data dimension %d",
@@ -288,7 +289,7 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
         // reports.
         const float *x = reinterpret_cast<const float *>(x_in);
         for (size_t i = 0; i < nx * d; i++) {
-            FAISS_THROW_IF_NOT_MSG (finite (x[i]),
+            FAISS_THROW_IF_NOT_MSG (std::isfinite (x[i]),
                                     "input contains NaN's or Inf's");
         }
     }
@@ -307,15 +308,15 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
         del3.reset (weights_new); weights = weights_new;
     } else if (nx < k * min_points_per_centroid) {
         fprintf (stderr,
-                 "WARNING clustering %ld points to %ld centroids: "
-                 "please provide at least %ld training points\n",
+                 "WARNING clustering %" PRId64 " points to %zd centroids: "
+                 "please provide at least %" PRId64 " training points\n",
                  nx, k, idx_t(k) * min_points_per_centroid);
     }
     if (nx == k) {
         // this is a corner case, just copy training set to clusters
         if (verbose) {
-            printf("Number of training points (%ld) same as number of "
+            printf("Number of training points (%" PRId64 ") same as number of "
                    "clusters, just copying\n", nx);
         }
         centroids.resize (d * k);
@@ -336,11 +337,11 @@ void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
     if (verbose) {
-        printf("Clustering %d points in %ldD to %ld clusters, "
+        printf("Clustering %" PRId64 " points in %zdD to %zd clusters, "
                "redo %d times, %d iterations\n",
-               int(nx), d, k, nredo, niter);
+               nx, d, k, nredo, niter);
         if (codec) {
-            printf("Input data encoded in %ld bytes per vector\n",
+            printf("Input data encoded in %zd bytes per vector\n",
                    codec->sa_code_size ());
         }
     }