RubyGems - faiss - Versions diffs - 0.1.1 → 0.1.2 - Mend

faiss 0.1.1 → 0.1.2

Files changed (77) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/LICENSE.txt +18 -18
data/README.md +1 -1
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/Clustering.cpp +318 -53
data/vendor/faiss/Clustering.h +39 -11
data/vendor/faiss/DirectMap.cpp +267 -0
data/vendor/faiss/DirectMap.h +120 -0
data/vendor/faiss/IVFlib.cpp +24 -4
data/vendor/faiss/IVFlib.h +4 -0
data/vendor/faiss/Index.h +5 -24
data/vendor/faiss/Index2Layer.cpp +0 -1
data/vendor/faiss/IndexBinary.h +7 -3
data/vendor/faiss/IndexBinaryFlat.cpp +5 -0
data/vendor/faiss/IndexBinaryFlat.h +3 -0
data/vendor/faiss/IndexBinaryHash.cpp +492 -0
data/vendor/faiss/IndexBinaryHash.h +116 -0
data/vendor/faiss/IndexBinaryIVF.cpp +160 -107
data/vendor/faiss/IndexBinaryIVF.h +14 -4
data/vendor/faiss/IndexFlat.h +2 -1
data/vendor/faiss/IndexHNSW.cpp +68 -16
data/vendor/faiss/IndexHNSW.h +3 -3
data/vendor/faiss/IndexIVF.cpp +72 -76
data/vendor/faiss/IndexIVF.h +24 -5
data/vendor/faiss/IndexIVFFlat.cpp +19 -54
data/vendor/faiss/IndexIVFFlat.h +1 -11
data/vendor/faiss/IndexIVFPQ.cpp +49 -26
data/vendor/faiss/IndexIVFPQ.h +9 -10
data/vendor/faiss/IndexIVFPQR.cpp +2 -2
data/vendor/faiss/IndexIVFSpectralHash.cpp +2 -2
data/vendor/faiss/IndexLSH.h +4 -1
data/vendor/faiss/IndexPreTransform.cpp +0 -1
data/vendor/faiss/IndexScalarQuantizer.cpp +8 -1
data/vendor/faiss/InvertedLists.cpp +0 -2
data/vendor/faiss/MetaIndexes.cpp +0 -1
data/vendor/faiss/MetricType.h +36 -0
data/vendor/faiss/c_api/Clustering_c.cpp +13 -7
data/vendor/faiss/c_api/Clustering_c.h +11 -5
data/vendor/faiss/c_api/IndexIVF_c.cpp +7 -0
data/vendor/faiss/c_api/IndexIVF_c.h +7 -0
data/vendor/faiss/c_api/IndexPreTransform_c.cpp +21 -0
data/vendor/faiss/c_api/IndexPreTransform_c.h +32 -0
data/vendor/faiss/demos/demo_weighted_kmeans.cpp +185 -0
data/vendor/faiss/gpu/GpuCloner.cpp +4 -0
data/vendor/faiss/gpu/GpuClonerOptions.cpp +1 -1
data/vendor/faiss/gpu/GpuDistance.h +93 -0
data/vendor/faiss/gpu/GpuIndex.h +7 -0
data/vendor/faiss/gpu/GpuIndexFlat.h +0 -10
data/vendor/faiss/gpu/GpuIndexIVF.h +1 -0
data/vendor/faiss/gpu/StandardGpuResources.cpp +8 -0
data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +49 -27
data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +110 -2
data/vendor/faiss/gpu/utils/DeviceUtils.h +6 -0
data/vendor/faiss/impl/AuxIndexStructures.cpp +17 -0
data/vendor/faiss/impl/AuxIndexStructures.h +14 -3
data/vendor/faiss/impl/HNSW.cpp +0 -1
data/vendor/faiss/impl/PolysemousTraining.h +5 -5
data/vendor/faiss/impl/ProductQuantizer-inl.h +138 -0
data/vendor/faiss/impl/ProductQuantizer.cpp +1 -113
data/vendor/faiss/impl/ProductQuantizer.h +42 -47
data/vendor/faiss/impl/index_read.cpp +103 -7
data/vendor/faiss/impl/index_write.cpp +101 -5
data/vendor/faiss/impl/io.cpp +111 -1
data/vendor/faiss/impl/io.h +38 -0
data/vendor/faiss/index_factory.cpp +0 -1
data/vendor/faiss/tests/test_merge.cpp +0 -1
data/vendor/faiss/tests/test_pq_encoding.cpp +6 -6
data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +1 -0
data/vendor/faiss/utils/distances.cpp +4 -5
data/vendor/faiss/utils/distances_simd.cpp +0 -1
data/vendor/faiss/utils/hamming.cpp +85 -3
data/vendor/faiss/utils/hamming.h +20 -0
data/vendor/faiss/utils/utils.cpp +0 -96
data/vendor/faiss/utils/utils.h +0 -15
metadata +11 -3
data/lib/faiss/ext.bundle +0 -0

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: a0369e5dda330b1490e48a88863baa01df9cadfa570078892cec439f82efaad1
-  data.tar.gz: bb7d89fa17f782e8163b114b520b8c2c082cf37661b4b6fc4593460dc5958484
+  metadata.gz: 61a0b7a7d20933b60a9e0e213016b77f10eae5bb86ecf6825f5f5661f31f5d7d
+  data.tar.gz: 967378ee774a35e3a639b1d902648ebf4177ffb7ed43e7228e8432f284f397c1
 SHA512:
-  metadata.gz: 0a9f1515d142d11c688f1a9cdbcf9af0c36fa3fc98b240f236554b1067cf2daad1cefa377d18d236674b8fc1b94d64a3acc070c2528c47b68f4d231f29b7648d
-  data.tar.gz: ae02808dbda4831c7165c987b77c72f9d436bba94e3e28d372f69ceee18fb4971c6cc99a2bf7ce9bd9a9a6e4befcd372515a728bf379294ecc870f2c58f85eb2
+  metadata.gz: 32747a4d4a3d40f15e9802280d894b2270d4b78ac0a10859442d0fd3c7ae27a55032a92e072756cb4046964c9d53afcc9586ac954e2b3cf63d057d0a3652e5a8
+  data.tar.gz: ca5005286253b7dea1546160ffb00c4b91a8b926512fbcfb7db594171435249e88408c982dc53fed4d90f87e68979bd9c2a2c1975f94495ca77c9ac878b22c1c

data/CHANGELOG.md CHANGED

@@ -1,3 +1,7 @@
+## 0.1.2 (2020-08-17)
+- Updated Faiss to 1.6.3
 ## 0.1.1 (2020-03-09)
 - Vendored library

data/LICENSE.txt CHANGED

@@ -1,22 +1,22 @@
-Copyright (c) 2020 Andrew Kane
 MIT License
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
+Copyright (c) Facebook, Inc. and its affiliates.
+Copyright (c) 2020 Andrew Kane
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

data/README.md CHANGED

@@ -24,7 +24,7 @@ Add this line to your application’s Gemfile:
 gem 'faiss'
 ```
-Faiss is not available for Windows yet
+Faiss is not available for Windows
 ## Getting Started

data/lib/faiss/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Faiss
-  VERSION = "0.1.1"
+  VERSION = "0.1.2"
 end

data/vendor/faiss/Clustering.cpp CHANGED

@@ -10,11 +10,12 @@
 #include <faiss/Clustering.h>
 #include <faiss/impl/AuxIndexStructures.h>
 #include <cmath>
 #include <cstdio>
 #include <cstring>
+#include <omp.h>
 #include <faiss/utils/utils.h>
 #include <faiss/utils/random.h>
 #include <faiss/utils/distances.h>
@@ -33,7 +34,8 @@ ClusteringParameters::ClusteringParameters ():
     frozen_centroids(false),
     min_points_per_centroid(39),
     max_points_per_centroid(256),
-    seed(1234)
+    seed(1234),
+    decode_block_size(32768)
 {}
 // 39 corresponds to 10000 / 256 -> to avoid warnings on PQ tests with randu10k
@@ -76,35 +78,233 @@ void Clustering::post_process_centroids ()
 }
-void Clustering::train (idx_t nx, const float *x_in, Index & index) {
+void Clustering::train (idx_t nx, const float *x_in, Index & index,
+                        const float *weights) {
+    train_encoded (nx, reinterpret_cast<const uint8_t *>(x_in), nullptr,
+                   index, weights);
+}
+namespace {
+using idx_t = Clustering::idx_t;
+idx_t subsample_training_set(
+          const Clustering &clus, idx_t nx, const uint8_t *x,
+          size_t line_size, const float * weights,
+          uint8_t **x_out,
+          float **weights_out
+)
+{
+    if (clus.verbose) {
+        printf("Sampling a subset of %ld / %ld for training\n",
+               clus.k * clus.max_points_per_centroid, nx);
+    }
+    std::vector<int> perm (nx);
+    rand_perm (perm.data (), nx, clus.seed);
+    nx = clus.k * clus.max_points_per_centroid;
+    uint8_t * x_new = new uint8_t [nx * line_size];
+    *x_out = x_new;
+    for (idx_t i = 0; i < nx; i++) {
+        memcpy (x_new + i * line_size, x + perm[i] * line_size, line_size);
+    }
+    if (weights) {
+        float *weights_new = new float[nx];
+        for (idx_t i = 0; i < nx; i++) {
+            weights_new[i] = weights[perm[i]];
+        }
+        *weights_out = weights_new;
+    } else {
+        *weights_out = nullptr;
+    }
+    return nx;
+}
+/** compute centroids as (weighted) sum of training points
+ *
+ * @param x            training vectors, size n * code_size (from codec)
+ * @param codec        how to decode the vectors (if NULL then cast to float*)
+ * @param weights      per-training vector weight, size n (or NULL)
+ * @param assign       nearest centroid for each training vector, size n
+ * @param k_frozen     do not update the k_frozen first centroids
+ * @param centroids    centroid vectors (output only), size k * d
+ * @param hassign      histogram of assignments per centroid (size k),
+ *                     should be 0 on input
+ *
+ */
+void compute_centroids (size_t d, size_t k, size_t n,
+                       size_t k_frozen,
+                       const uint8_t * x, const Index *codec,
+                       const int64_t * assign,
+                       const float * weights,
+                       float * hassign,
+                       float * centroids)
+{
+    k -= k_frozen;
+    centroids += k_frozen * d;
+    memset (centroids, 0, sizeof(*centroids) * d * k);
+    size_t line_size = codec ? codec->sa_code_size() : d * sizeof (float);
+#pragma omp parallel
+    {
+        int nt = omp_get_num_threads();
+        int rank = omp_get_thread_num();
+        // this thread is taking care of centroids c0:c1
+        size_t c0 = (k * rank) / nt;
+        size_t c1 = (k * (rank + 1)) / nt;
+        std::vector<float> decode_buffer (d);
+        for (size_t i = 0; i < n; i++) {
+            int64_t ci = assign[i];
+            assert (ci >= 0 && ci < k + k_frozen);
+            ci -= k_frozen;
+            if (ci >= c0 && ci < c1)  {
+                float * c = centroids + ci * d;
+                const float * xi;
+                if (!codec) {
+                    xi = reinterpret_cast<const float*>(x + i * line_size);
+                } else {
+                    float *xif = decode_buffer.data();
+                    codec->sa_decode (1, x + i * line_size, xif);
+                    xi = xif;
+                }
+                if (weights) {
+                    float w = weights[i];
+                    hassign[ci] += w;
+                    for (size_t j = 0; j < d; j++) {
+                        c[j] += xi[j] * w;
+                    }
+                } else {
+                    hassign[ci] += 1.0;
+                    for (size_t j = 0; j < d; j++) {
+                        c[j] += xi[j];
+                    }
+                }
+            }
+        }
+    }
+#pragma omp parallel for
+    for (size_t ci = 0; ci < k; ci++) {
+        if (hassign[ci] == 0) {
+            continue;
+        }
+        float norm = 1 / hassign[ci];
+        float * c = centroids + ci * d;
+        for (size_t j = 0; j < d; j++) {
+            c[j] *= norm;
+        }
+    }
+}
+// a bit above machine epsilon for float16
+#define EPS (1 / 1024.)
+/** Handle empty clusters by splitting larger ones.
+ *
+ * It works by slightly changing the centroids to make 2 clusters from
+ * a single one. Takes the same arguements as compute_centroids.
+ *
+ * @return           nb of spliting operations (larger is worse)
+ */
+int split_clusters (size_t d, size_t k, size_t n,
+                    size_t k_frozen,
+                    float * hassign,
+                    float * centroids)
+{
+    k -= k_frozen;
+    centroids += k_frozen * d;
+    /* Take care of void clusters */
+    size_t nsplit = 0;
+    RandomGenerator rng (1234);
+    for (size_t ci = 0; ci < k; ci++) {
+        if (hassign[ci] == 0) { /* need to redefine a centroid */
+            size_t cj;
+            for (cj = 0; 1; cj = (cj + 1) % k) {
+                /* probability to pick this cluster for split */
+                float p = (hassign[cj] - 1.0) / (float) (n - k);
+                float r = rng.rand_float ();
+                if (r < p) {
+                    break; /* found our cluster to be split */
+                }
+            }
+            memcpy (centroids+ci*d, centroids+cj*d, sizeof(*centroids) * d);
+            /* small symmetric pertubation */
+            for (size_t j = 0; j < d; j++) {
+                if (j % 2 == 0) {
+                    centroids[ci * d + j] *= 1 + EPS;
+                    centroids[cj * d + j] *= 1 - EPS;
+                } else {
+                    centroids[ci * d + j] *= 1 - EPS;
+                    centroids[cj * d + j] *= 1 + EPS;
+                }
+            }
+            /* assume even split of the cluster */
+            hassign[ci] = hassign[cj] / 2;
+            hassign[cj] -= hassign[ci];
+            nsplit++;
+        }
+    }
+    return nsplit;
+}
+};
+void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
+                                const Index * codec, Index & index,
+                                const float *weights) {
     FAISS_THROW_IF_NOT_FMT (nx >= k,
              "Number of training points (%ld) should be at least "
              "as large as number of clusters (%ld)", nx, k);
+    FAISS_THROW_IF_NOT_FMT ((!codec || codec->d == d),
+             "Codec dimension %d not the same as data dimension %d",
+             int(codec->d), int(d));
+    FAISS_THROW_IF_NOT_FMT (index.d == d,
+            "Index dimension %d not the same as data dimension %d",
+            int(index.d), int(d));
     double t0 = getmillisecs();
-    // yes it is the user's responsibility, but it may spare us some
-    // hard-to-debug reports.
-    for (size_t i = 0; i < nx * d; i++) {
-      FAISS_THROW_IF_NOT_MSG (finite (x_in[i]),
-                        "input contains NaN's or Inf's");
+    if (!codec) {
+        // Check for NaNs in input data. Normally it is the user's
+        // responsibility, but it may spare us some hard-to-debug
+        // reports.
+        const float *x = reinterpret_cast<const float *>(x_in);
+        for (size_t i = 0; i < nx * d; i++) {
+            FAISS_THROW_IF_NOT_MSG (finite (x[i]),
+                                    "input contains NaN's or Inf's");
+        }
     }
-    const float *x = x_in;
-    ScopeDeleter<float> del1;
+    const uint8_t *x = x_in;
+    std::unique_ptr<uint8_t []> del1;
+    std::unique_ptr<float []> del3;
+    size_t line_size = codec ? codec->sa_code_size() : sizeof(float) * d;
     if (nx > k * max_points_per_centroid) {
-        if (verbose)
-            printf("Sampling a subset of %ld / %ld for training\n",
-                   k * max_points_per_centroid, nx);
-        std::vector<int> perm (nx);
-        rand_perm (perm.data (), nx, seed);
-        nx = k * max_points_per_centroid;
-        float * x_new = new float [nx * d];
-        for (idx_t i = 0; i < nx; i++)
-            memcpy (x_new + i * d, x + perm[i] * d, sizeof(x_new[0]) * d);
-        x = x_new;
-        del1.set (x);
+        uint8_t *x_new;
+        float *weights_new;
+        nx = subsample_training_set (*this, nx, x, line_size, weights,
+                                &x_new, &weights_new);
+        del1.reset (x_new); x = x_new;
+        del3.reset (weights_new); weights = weights_new;
     } else if (nx < k * min_points_per_centroid) {
         fprintf (stderr,
                  "WARNING clustering %ld points to %ld centroids: "
@@ -112,41 +312,53 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
                  nx, k, idx_t(k) * min_points_per_centroid);
     }
     if (nx == k) {
+        // this is a corner case, just copy training set to clusters
         if (verbose) {
             printf("Number of training points (%ld) same as number of "
                    "clusters, just copying\n", nx);
         }
-        // this is a corner case, just copy training set to clusters
         centroids.resize (d * k);
-        memcpy (centroids.data(), x_in, sizeof (*x_in) * d * k);
+        if (!codec) {
+            memcpy (centroids.data(), x_in, sizeof (float) * d * k);
+        } else {
+            codec->sa_decode (nx, x_in, centroids.data());
+        }
+        // one fake iteration...
+        ClusteringIterationStats stats = { 0.0, 0.0, 0.0, 1.0, 0 };
+        iteration_stats.push_back (stats);
         index.reset();
-        index.add(k, x_in);
+        index.add(k, centroids.data());
         return;
     }
-    if (verbose)
+    if (verbose) {
         printf("Clustering %d points in %ldD to %ld clusters, "
                "redo %d times, %d iterations\n",
                int(nx), d, k, nredo, niter);
+        if (codec) {
+            printf("Input data encoded in %ld bytes per vector\n",
+                   codec->sa_code_size ());
+        }
+    }
-    idx_t * assign = new idx_t[nx];
-    ScopeDeleter<idx_t> del (assign);
-    float * dis = new float[nx];
-    ScopeDeleter<float> del2(dis);
+    std::unique_ptr<idx_t []> assign(new idx_t[nx]);
+    std::unique_ptr<float []> dis(new float[nx]);
-    // for redo
+    // remember best iteration for redo
     float best_err = HUGE_VALF;
-    std::vector<float> best_obj;
+    std::vector<ClusteringIterationStats> best_obj;
     std::vector<float> best_centroids;
     // support input centroids
     FAISS_THROW_IF_NOT_MSG (
        centroids.size() % d == 0,
-       "size of provided input centroids not a multiple of dimension");
+       "size of provided input centroids not a multiple of dimension"
+    );
     size_t n_input_centroids = centroids.size() / d;
@@ -162,23 +374,36 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
     }
     t0 = getmillisecs();
+    // temporary buffer to decode vectors during the optimization
+    std::vector<float> decode_buffer
+        (codec ? d * decode_block_size : 0);
     for (int redo = 0; redo < nredo; redo++) {
         if (verbose && nredo > 1) {
             printf("Outer iteration %d / %d\n", redo, nredo);
         }
-        // initialize remaining centroids with random points from the dataset
+        // initialize (remaining) centroids with random points from the dataset
         centroids.resize (d * k);
         std::vector<int> perm (nx);
         rand_perm (perm.data(), nx, seed + 1 + redo * 15486557L);
-        for (int i = n_input_centroids; i < k ; i++)
-            memcpy (&centroids[i * d], x + perm[i] * d,
-                    d * sizeof (float));
+        if (!codec) {
+            for (int i = n_input_centroids; i < k ; i++) {
+                memcpy (&centroids[i * d], x + perm[i] * line_size, line_size);
+            }
+        } else {
+            for (int i = n_input_centroids; i < k ; i++) {
+                codec->sa_decode (1, x + perm[i] * line_size, &centroids[i * d]);
+            }
+        }
         post_process_centroids ();
+        // prepare the index
         if (index.ntotal != 0) {
             index.reset();
         }
@@ -188,49 +413,89 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
         }
         index.add (k, centroids.data());
+        // k-means iterations
         float err = 0;
         for (int i = 0; i < niter; i++) {
             double t0s = getmillisecs();
-            index.search (nx, x, 1, dis, assign);
+            if (!codec) {
+                index.search (nx, reinterpret_cast<const float *>(x), 1,
+                              dis.get(), assign.get());
+            } else {
+                // search by blocks of decode_block_size vectors
+                size_t code_size = codec->sa_code_size ();
+                for (size_t i0 = 0; i0 < nx; i0 += decode_block_size) {
+                    size_t i1 = i0 + decode_block_size;
+                    if (i1 > nx) { i1 = nx; }
+                    codec->sa_decode (i1 - i0, x + code_size * i0,
+                                      decode_buffer.data ());
+                    index.search (i1 - i0, decode_buffer.data (), 1,
+                                  dis.get() + i0, assign.get() + i0);
+                }
+            }
             InterruptCallback::check();
             t_search_tot += getmillisecs() - t0s;
+            // accumulate error
             err = 0;
-            for (int j = 0; j < nx; j++)
+            for (int j = 0; j < nx; j++) {
                 err += dis[j];
-            obj.push_back (err);
+            }
+            // update the centroids
+            std::vector<float> hassign (k);
-            int nsplit = km_update_centroids (
-                  x, centroids.data(),
-                  assign, d, k, nx, frozen_centroids ? n_input_centroids : 0);
+            size_t k_frozen = frozen_centroids ? n_input_centroids : 0;
+            compute_centroids (
+                  d, k, nx, k_frozen,
+                  x, codec, assign.get(), weights,
+                  hassign.data(), centroids.data()
+            );
+            int nsplit = split_clusters (
+                  d, k, nx, k_frozen,
+                  hassign.data(), centroids.data()
+            );
+            // collect statistics
+            ClusteringIterationStats stats =
+                { err, (getmillisecs() - t0) / 1000.0,
+                  t_search_tot / 1000, imbalance_factor (nx, k, assign.get()),
+                  nsplit };
+            iteration_stats.push_back(stats);
             if (verbose) {
                 printf ("  Iteration %d (%.2f s, search %.2f s): "
                         "objective=%g imbalance=%.3f nsplit=%d       \r",
-                        i, (getmillisecs() - t0) / 1000.0,
-                        t_search_tot / 1000,
-                        err, imbalance_factor (nx, k, assign),
-                        nsplit);
+                        i, stats.time, stats.time_search, stats.obj,
+                        stats.imbalance_factor, nsplit);
                 fflush (stdout);
             }
             post_process_centroids ();
+            // add centroids to index for the next iteration (or for output)
             index.reset ();
-            if (update_index)
+            if (update_index) {
                 index.train (k, centroids.data());
+            }
-            assert (index.ntotal == 0);
             index.add (k, centroids.data());
             InterruptCallback::check ();
         }
         if (verbose) printf("\n");
         if (nredo > 1) {
             if (err < best_err) {
-                if (verbose)
+                if (verbose) {
                     printf ("Objective improved: keep new clusters\n");
+                }
                 best_centroids = centroids;
-                best_obj = obj;
+                best_obj = iteration_stats;
                 best_err = err;
             }
             index.reset ();
@@ -238,7 +503,7 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
     }
     if (nredo > 1) {
         centroids = best_centroids;
-        obj = best_obj;
+        iteration_stats = best_obj;
         index.reset();
         index.add(k, best_centroids.data());
     }
@@ -255,7 +520,7 @@ float kmeans_clustering (size_t d, size_t n, size_t k,
     IndexFlatL2 index (d);
     clus.train (n, x, index);
     memcpy(centroids, clus.centroids.data(), sizeof(*centroids) * d * k);
-    return clus.obj.back();
+    return clus.iteration_stats.back().obj;
 }
 } // namespace faiss