RubyGems - faiss - Versions diffs - 0.1.1 → 0.1.2 - Mend

faiss 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/LICENSE.txt +18 -18
data/README.md +1 -1
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/Clustering.cpp +318 -53
data/vendor/faiss/Clustering.h +39 -11
data/vendor/faiss/DirectMap.cpp +267 -0
data/vendor/faiss/DirectMap.h +120 -0
data/vendor/faiss/IVFlib.cpp +24 -4
data/vendor/faiss/IVFlib.h +4 -0
data/vendor/faiss/Index.h +5 -24
data/vendor/faiss/Index2Layer.cpp +0 -1
data/vendor/faiss/IndexBinary.h +7 -3
data/vendor/faiss/IndexBinaryFlat.cpp +5 -0
data/vendor/faiss/IndexBinaryFlat.h +3 -0
data/vendor/faiss/IndexBinaryHash.cpp +492 -0
data/vendor/faiss/IndexBinaryHash.h +116 -0
data/vendor/faiss/IndexBinaryIVF.cpp +160 -107
data/vendor/faiss/IndexBinaryIVF.h +14 -4
data/vendor/faiss/IndexFlat.h +2 -1
data/vendor/faiss/IndexHNSW.cpp +68 -16
data/vendor/faiss/IndexHNSW.h +3 -3
data/vendor/faiss/IndexIVF.cpp +72 -76
data/vendor/faiss/IndexIVF.h +24 -5
data/vendor/faiss/IndexIVFFlat.cpp +19 -54
data/vendor/faiss/IndexIVFFlat.h +1 -11
data/vendor/faiss/IndexIVFPQ.cpp +49 -26
data/vendor/faiss/IndexIVFPQ.h +9 -10
data/vendor/faiss/IndexIVFPQR.cpp +2 -2
data/vendor/faiss/IndexIVFSpectralHash.cpp +2 -2
data/vendor/faiss/IndexLSH.h +4 -1
data/vendor/faiss/IndexPreTransform.cpp +0 -1
data/vendor/faiss/IndexScalarQuantizer.cpp +8 -1
data/vendor/faiss/InvertedLists.cpp +0 -2
data/vendor/faiss/MetaIndexes.cpp +0 -1
data/vendor/faiss/MetricType.h +36 -0
data/vendor/faiss/c_api/Clustering_c.cpp +13 -7
data/vendor/faiss/c_api/Clustering_c.h +11 -5
data/vendor/faiss/c_api/IndexIVF_c.cpp +7 -0
data/vendor/faiss/c_api/IndexIVF_c.h +7 -0
data/vendor/faiss/c_api/IndexPreTransform_c.cpp +21 -0
data/vendor/faiss/c_api/IndexPreTransform_c.h +32 -0
data/vendor/faiss/demos/demo_weighted_kmeans.cpp +185 -0
data/vendor/faiss/gpu/GpuCloner.cpp +4 -0
data/vendor/faiss/gpu/GpuClonerOptions.cpp +1 -1
data/vendor/faiss/gpu/GpuDistance.h +93 -0
data/vendor/faiss/gpu/GpuIndex.h +7 -0
data/vendor/faiss/gpu/GpuIndexFlat.h +0 -10
data/vendor/faiss/gpu/GpuIndexIVF.h +1 -0
data/vendor/faiss/gpu/StandardGpuResources.cpp +8 -0
data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +49 -27
data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +110 -2
data/vendor/faiss/gpu/utils/DeviceUtils.h +6 -0
data/vendor/faiss/impl/AuxIndexStructures.cpp +17 -0
data/vendor/faiss/impl/AuxIndexStructures.h +14 -3
data/vendor/faiss/impl/HNSW.cpp +0 -1
data/vendor/faiss/impl/PolysemousTraining.h +5 -5
data/vendor/faiss/impl/ProductQuantizer-inl.h +138 -0
data/vendor/faiss/impl/ProductQuantizer.cpp +1 -113
data/vendor/faiss/impl/ProductQuantizer.h +42 -47
data/vendor/faiss/impl/index_read.cpp +103 -7
data/vendor/faiss/impl/index_write.cpp +101 -5
data/vendor/faiss/impl/io.cpp +111 -1
data/vendor/faiss/impl/io.h +38 -0
data/vendor/faiss/index_factory.cpp +0 -1
data/vendor/faiss/tests/test_merge.cpp +0 -1
data/vendor/faiss/tests/test_pq_encoding.cpp +6 -6
data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +1 -0
data/vendor/faiss/utils/distances.cpp +4 -5
data/vendor/faiss/utils/distances_simd.cpp +0 -1
data/vendor/faiss/utils/hamming.cpp +85 -3
data/vendor/faiss/utils/hamming.h +20 -0
data/vendor/faiss/utils/utils.cpp +0 -96
data/vendor/faiss/utils/utils.h +0 -15
metadata +11 -3
data/lib/faiss/ext.bundle +0 -0

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: a0369e5dda330b1490e48a88863baa01df9cadfa570078892cec439f82efaad1
-  data.tar.gz: bb7d89fa17f782e8163b114b520b8c2c082cf37661b4b6fc4593460dc5958484
+  metadata.gz: 61a0b7a7d20933b60a9e0e213016b77f10eae5bb86ecf6825f5f5661f31f5d7d
+  data.tar.gz: 967378ee774a35e3a639b1d902648ebf4177ffb7ed43e7228e8432f284f397c1
 SHA512:
-  metadata.gz: 0a9f1515d142d11c688f1a9cdbcf9af0c36fa3fc98b240f236554b1067cf2daad1cefa377d18d236674b8fc1b94d64a3acc070c2528c47b68f4d231f29b7648d
-  data.tar.gz: ae02808dbda4831c7165c987b77c72f9d436bba94e3e28d372f69ceee18fb4971c6cc99a2bf7ce9bd9a9a6e4befcd372515a728bf379294ecc870f2c58f85eb2
+  metadata.gz: 32747a4d4a3d40f15e9802280d894b2270d4b78ac0a10859442d0fd3c7ae27a55032a92e072756cb4046964c9d53afcc9586ac954e2b3cf63d057d0a3652e5a8
+  data.tar.gz: ca5005286253b7dea1546160ffb00c4b91a8b926512fbcfb7db594171435249e88408c982dc53fed4d90f87e68979bd9c2a2c1975f94495ca77c9ac878b22c1c

data/CHANGELOG.md CHANGED

@@ -1,3 +1,7 @@
+## 0.1.2 (2020-08-17)
+- Updated Faiss to 1.6.3
 ## 0.1.1 (2020-03-09)
 - Vendored library

data/LICENSE.txt CHANGED

@@ -1,22 +1,22 @@
-Copyright (c) 2020 Andrew Kane
 MIT License
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
+Copyright (c) Facebook, Inc. and its affiliates.
+Copyright (c) 2020 Andrew Kane
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

data/README.md CHANGED

@@ -24,7 +24,7 @@ Add this line to your application’s Gemfile:
 gem 'faiss'
 ```
-Faiss is not available for Windows yet
+Faiss is not available for Windows
 ## Getting Started

data/lib/faiss/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Faiss
-  VERSION = "0.1.1"
+  VERSION = "0.1.2"
 end

data/vendor/faiss/Clustering.cpp CHANGED

@@ -10,11 +10,12 @@
 #include <faiss/Clustering.h>
 #include <faiss/impl/AuxIndexStructures.h>
 #include <cmath>
 #include <cstdio>
 #include <cstring>
+#include <omp.h>
 #include <faiss/utils/utils.h>
 #include <faiss/utils/random.h>
 #include <faiss/utils/distances.h>
@@ -33,7 +34,8 @@ ClusteringParameters::ClusteringParameters ():
     frozen_centroids(false),
     min_points_per_centroid(39),
     max_points_per_centroid(256),
-    seed(1234)
+    seed(1234),
+    decode_block_size(32768)
 {}
 // 39 corresponds to 10000 / 256 -> to avoid warnings on PQ tests with randu10k
@@ -76,35 +78,233 @@ void Clustering::post_process_centroids ()
 }
-void Clustering::train (idx_t nx, const float *x_in, Index & index) {
+void Clustering::train (idx_t nx, const float *x_in, Index & index,
+                        const float *weights) {
+    train_encoded (nx, reinterpret_cast<const uint8_t *>(x_in), nullptr,
+                   index, weights);
+}
+namespace {
+using idx_t = Clustering::idx_t;
+idx_t subsample_training_set(
+          const Clustering &clus, idx_t nx, const uint8_t *x,
+          size_t line_size, const float * weights,
+          uint8_t **x_out,
+          float **weights_out
+)
+{
+    if (clus.verbose) {
+        printf("Sampling a subset of %ld / %ld for training\n",
+               clus.k * clus.max_points_per_centroid, nx);
+    }
+    std::vector<int> perm (nx);
+    rand_perm (perm.data (), nx, clus.seed);
+    nx = clus.k * clus.max_points_per_centroid;
+    uint8_t * x_new = new uint8_t [nx * line_size];
+    *x_out = x_new;
+    for (idx_t i = 0; i < nx; i++) {
+        memcpy (x_new + i * line_size, x + perm[i] * line_size, line_size);
+    }
+    if (weights) {
+        float *weights_new = new float[nx];
+        for (idx_t i = 0; i < nx; i++) {
+            weights_new[i] = weights[perm[i]];
+        }
+        *weights_out = weights_new;
+    } else {
+        *weights_out = nullptr;
+    }
+    return nx;
+}
+/** compute centroids as (weighted) sum of training points
+ *
+ * @param x            training vectors, size n * code_size (from codec)
+ * @param codec        how to decode the vectors (if NULL then cast to float*)
+ * @param weights      per-training vector weight, size n (or NULL)
+ * @param assign       nearest centroid for each training vector, size n
+ * @param k_frozen     do not update the k_frozen first centroids
+ * @param centroids    centroid vectors (output only), size k * d
+ * @param hassign      histogram of assignments per centroid (size k),
+ *                     should be 0 on input
+ *
+ */
+void compute_centroids (size_t d, size_t k, size_t n,
+                       size_t k_frozen,
+                       const uint8_t * x, const Index *codec,
+                       const int64_t * assign,
+                       const float * weights,
+                       float * hassign,
+                       float * centroids)
+{
+    k -= k_frozen;
+    centroids += k_frozen * d;
+    memset (centroids, 0, sizeof(*centroids) * d * k);
+    size_t line_size = codec ? codec->sa_code_size() : d * sizeof (float);
+#pragma omp parallel
+    {
+        int nt = omp_get_num_threads();
+        int rank = omp_get_thread_num();
+        // this thread is taking care of centroids c0:c1
+        size_t c0 = (k * rank) / nt;
+        size_t c1 = (k * (rank + 1)) / nt;
+        std::vector<float> decode_buffer (d);
+        for (size_t i = 0; i < n; i++) {
+            int64_t ci = assign[i];
+            assert (ci >= 0 && ci < k + k_frozen);
+            ci -= k_frozen;
+            if (ci >= c0 && ci < c1)  {
+                float * c = centroids + ci * d;
+                const float * xi;
+                if (!codec) {
+                    xi = reinterpret_cast<const float*>(x + i * line_size);
+                } else {
+                    float *xif = decode_buffer.data();
+                    codec->sa_decode (1, x + i * line_size, xif);
+                    xi = xif;
+                }
+                if (weights) {
+                    float w = weights[i];
+                    hassign[ci] += w;
+                    for (size_t j = 0; j < d; j++) {
+                        c[j] += xi[j] * w;
+                    }
+                } else {
+                    hassign[ci] += 1.0;
+                    for (size_t j = 0; j < d; j++) {
+                        c[j] += xi[j];
+                    }
+                }
+            }
+        }
+    }
+#pragma omp parallel for
+    for (size_t ci = 0; ci < k; ci++) {
+        if (hassign[ci] == 0) {
+            continue;
+        }
+        float norm = 1 / hassign[ci];
+        float * c = centroids + ci * d;
+        for (size_t j = 0; j < d; j++) {
+            c[j] *= norm;
+        }
+    }
+}
+// a bit above machine epsilon for float16
+#define EPS (1 / 1024.)
+/** Handle empty clusters by splitting larger ones.
+ *
+ * It works by slightly changing the centroids to make 2 clusters from
+ * a single one. Takes the same arguements as compute_centroids.
+ *
+ * @return           nb of spliting operations (larger is worse)
+ */
+int split_clusters (size_t d, size_t k, size_t n,
+                    size_t k_frozen,
+                    float * hassign,
+                    float * centroids)
+{
+    k -= k_frozen;
+    centroids += k_frozen * d;
+    /* Take care of void clusters */
+    size_t nsplit = 0;
+    RandomGenerator rng (1234);
+    for (size_t ci = 0; ci < k; ci++) {
+        if (hassign[ci] == 0) { /* need to redefine a centroid */
+            size_t cj;
+            for (cj = 0; 1; cj = (cj + 1) % k) {
+                /* probability to pick this cluster for split */
+                float p = (hassign[cj] - 1.0) / (float) (n - k);
+                float r = rng.rand_float ();
+                if (r < p) {
+                    break; /* found our cluster to be split */
+                }
+            }
+            memcpy (centroids+ci*d, centroids+cj*d, sizeof(*centroids) * d);
+            /* small symmetric pertubation */
+            for (size_t j = 0; j < d; j++) {
+                if (j % 2 == 0) {
+                    centroids[ci * d + j] *= 1 + EPS;
+                    centroids[cj * d + j] *= 1 - EPS;
+                } else {
+                    centroids[ci * d + j] *= 1 - EPS;
+                    centroids[cj * d + j] *= 1 + EPS;
+                }
+            }
+            /* assume even split of the cluster */
+            hassign[ci] = hassign[cj] / 2;
+            hassign[cj] -= hassign[ci];
+            nsplit++;
+        }
+    }
+    return nsplit;
+}
+};
+void Clustering::train_encoded (idx_t nx, const uint8_t *x_in,
+                                const Index * codec, Index & index,
+                                const float *weights) {
     FAISS_THROW_IF_NOT_FMT (nx >= k,
              "Number of training points (%ld) should be at least "
              "as large as number of clusters (%ld)", nx, k);
+    FAISS_THROW_IF_NOT_FMT ((!codec || codec->d == d),
+             "Codec dimension %d not the same as data dimension %d",
+             int(codec->d), int(d));
+    FAISS_THROW_IF_NOT_FMT (index.d == d,
+            "Index dimension %d not the same as data dimension %d",
+            int(index.d), int(d));
     double t0 = getmillisecs();
-    // yes it is the user's responsibility, but it may spare us some
-    // hard-to-debug reports.
-    for (size_t i = 0; i < nx * d; i++) {
-      FAISS_THROW_IF_NOT_MSG (finite (x_in[i]),
-                        "input contains NaN's or Inf's");
+    if (!codec) {
+        // Check for NaNs in input data. Normally it is the user's
+        // responsibility, but it may spare us some hard-to-debug
+        // reports.
+        const float *x = reinterpret_cast<const float *>(x_in);
+        for (size_t i = 0; i < nx * d; i++) {
+            FAISS_THROW_IF_NOT_MSG (finite (x[i]),
+                                    "input contains NaN's or Inf's");
+        }
     }
-    const float *x = x_in;
-    ScopeDeleter<float> del1;
+    const uint8_t *x = x_in;
+    std::unique_ptr<uint8_t []> del1;
+    std::unique_ptr<float []> del3;
+    size_t line_size = codec ? codec->sa_code_size() : sizeof(float) * d;
     if (nx > k * max_points_per_centroid) {
-        if (verbose)
-            printf("Sampling a subset of %ld / %ld for training\n",
-                   k * max_points_per_centroid, nx);
-        std::vector<int> perm (nx);
-        rand_perm (perm.data (), nx, seed);
-        nx = k * max_points_per_centroid;
-        float * x_new = new float [nx * d];
-        for (idx_t i = 0; i < nx; i++)
-            memcpy (x_new + i * d, x + perm[i] * d, sizeof(x_new[0]) * d);
-        x = x_new;
-        del1.set (x);
+        uint8_t *x_new;
+        float *weights_new;
+        nx = subsample_training_set (*this, nx, x, line_size, weights,
+                                &x_new, &weights_new);
+        del1.reset (x_new); x = x_new;
+        del3.reset (weights_new); weights = weights_new;
     } else if (nx < k * min_points_per_centroid) {
         fprintf (stderr,
                  "WARNING clustering %ld points to %ld centroids: "
@@ -112,41 +312,53 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
                  nx, k, idx_t(k) * min_points_per_centroid);
     }
     if (nx == k) {
+        // this is a corner case, just copy training set to clusters
         if (verbose) {
             printf("Number of training points (%ld) same as number of "
                    "clusters, just copying\n", nx);
         }
-        // this is a corner case, just copy training set to clusters
         centroids.resize (d * k);
-        memcpy (centroids.data(), x_in, sizeof (*x_in) * d * k);
+        if (!codec) {
+            memcpy (centroids.data(), x_in, sizeof (float) * d * k);
+        } else {
+            codec->sa_decode (nx, x_in, centroids.data());
+        }
+        // one fake iteration...
+        ClusteringIterationStats stats = { 0.0, 0.0, 0.0, 1.0, 0 };
+        iteration_stats.push_back (stats);
         index.reset();
-        index.add(k, x_in);
+        index.add(k, centroids.data());
         return;
     }
-    if (verbose)
+    if (verbose) {
         printf("Clustering %d points in %ldD to %ld clusters, "
                "redo %d times, %d iterations\n",
                int(nx), d, k, nredo, niter);
+        if (codec) {
+            printf("Input data encoded in %ld bytes per vector\n",
+                   codec->sa_code_size ());
+        }
+    }
-    idx_t * assign = new idx_t[nx];
-    ScopeDeleter<idx_t> del (assign);
-    float * dis = new float[nx];
-    ScopeDeleter<float> del2(dis);
+    std::unique_ptr<idx_t []> assign(new idx_t[nx]);
+    std::unique_ptr<float []> dis(new float[nx]);
-    // for redo
+    // remember best iteration for redo
     float best_err = HUGE_VALF;
-    std::vector<float> best_obj;
+    std::vector<ClusteringIterationStats> best_obj;
     std::vector<float> best_centroids;
     // support input centroids
     FAISS_THROW_IF_NOT_MSG (
        centroids.size() % d == 0,
-       "size of provided input centroids not a multiple of dimension");
+       "size of provided input centroids not a multiple of dimension"
+    );
     size_t n_input_centroids = centroids.size() / d;
@@ -162,23 +374,36 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
     }
     t0 = getmillisecs();
+    // temporary buffer to decode vectors during the optimization
+    std::vector<float> decode_buffer
+        (codec ? d * decode_block_size : 0);
     for (int redo = 0; redo < nredo; redo++) {
         if (verbose && nredo > 1) {
             printf("Outer iteration %d / %d\n", redo, nredo);
         }
-        // initialize remaining centroids with random points from the dataset
+        // initialize (remaining) centroids with random points from the dataset
         centroids.resize (d * k);
         std::vector<int> perm (nx);
         rand_perm (perm.data(), nx, seed + 1 + redo * 15486557L);
-        for (int i = n_input_centroids; i < k ; i++)
-            memcpy (&centroids[i * d], x + perm[i] * d,
-                    d * sizeof (float));
+        if (!codec) {
+            for (int i = n_input_centroids; i < k ; i++) {
+                memcpy (&centroids[i * d], x + perm[i] * line_size, line_size);
+            }
+        } else {
+            for (int i = n_input_centroids; i < k ; i++) {
+                codec->sa_decode (1, x + perm[i] * line_size, &centroids[i * d]);
+            }
+        }
         post_process_centroids ();
+        // prepare the index
         if (index.ntotal != 0) {
             index.reset();
         }
@@ -188,49 +413,89 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
         }
         index.add (k, centroids.data());
+        // k-means iterations
         float err = 0;
         for (int i = 0; i < niter; i++) {
             double t0s = getmillisecs();
-            index.search (nx, x, 1, dis, assign);
+            if (!codec) {
+                index.search (nx, reinterpret_cast<const float *>(x), 1,
+                              dis.get(), assign.get());
+            } else {
+                // search by blocks of decode_block_size vectors
+                size_t code_size = codec->sa_code_size ();
+                for (size_t i0 = 0; i0 < nx; i0 += decode_block_size) {
+                    size_t i1 = i0 + decode_block_size;
+                    if (i1 > nx) { i1 = nx; }
+                    codec->sa_decode (i1 - i0, x + code_size * i0,
+                                      decode_buffer.data ());
+                    index.search (i1 - i0, decode_buffer.data (), 1,
+                                  dis.get() + i0, assign.get() + i0);
+                }
+            }
             InterruptCallback::check();
             t_search_tot += getmillisecs() - t0s;
+            // accumulate error
             err = 0;
-            for (int j = 0; j < nx; j++)
+            for (int j = 0; j < nx; j++) {
                 err += dis[j];
-            obj.push_back (err);
+            }
+            // update the centroids
+            std::vector<float> hassign (k);
-            int nsplit = km_update_centroids (
-                  x, centroids.data(),
-                  assign, d, k, nx, frozen_centroids ? n_input_centroids : 0);
+            size_t k_frozen = frozen_centroids ? n_input_centroids : 0;
+            compute_centroids (
+                  d, k, nx, k_frozen,
+                  x, codec, assign.get(), weights,
+                  hassign.data(), centroids.data()
+            );
+            int nsplit = split_clusters (
+                  d, k, nx, k_frozen,
+                  hassign.data(), centroids.data()
+            );
+            // collect statistics
+            ClusteringIterationStats stats =
+                { err, (getmillisecs() - t0) / 1000.0,
+                  t_search_tot / 1000, imbalance_factor (nx, k, assign.get()),
+                  nsplit };
+            iteration_stats.push_back(stats);
             if (verbose) {
                 printf ("  Iteration %d (%.2f s, search %.2f s): "
                         "objective=%g imbalance=%.3f nsplit=%d       \r",
-                        i, (getmillisecs() - t0) / 1000.0,
-                        t_search_tot / 1000,
-                        err, imbalance_factor (nx, k, assign),
-                        nsplit);
+                        i, stats.time, stats.time_search, stats.obj,
+                        stats.imbalance_factor, nsplit);
                 fflush (stdout);
             }
             post_process_centroids ();
+            // add centroids to index for the next iteration (or for output)
             index.reset ();
-            if (update_index)
+            if (update_index) {
                 index.train (k, centroids.data());
+            }
-            assert (index.ntotal == 0);
             index.add (k, centroids.data());
             InterruptCallback::check ();
         }
         if (verbose) printf("\n");
         if (nredo > 1) {
             if (err < best_err) {
-                if (verbose)
+                if (verbose) {
                     printf ("Objective improved: keep new clusters\n");
+                }
                 best_centroids = centroids;
-                best_obj = obj;
+                best_obj = iteration_stats;
                 best_err = err;
             }
             index.reset ();
@@ -238,7 +503,7 @@ void Clustering::train (idx_t nx, const float *x_in, Index & index) {
     }
     if (nredo > 1) {
         centroids = best_centroids;
-        obj = best_obj;
+        iteration_stats = best_obj;
         index.reset();
         index.add(k, best_centroids.data());
     }
@@ -255,7 +520,7 @@ float kmeans_clustering (size_t d, size_t n, size_t k,
     IndexFlatL2 index (d);
     clus.train (n, x, index);
     memcpy(centroids, clus.centroids.data(), sizeof(*centroids) * d * k);
-    return clus.obj.back();
+    return clus.iteration_stats.back().obj;
 }
 } // namespace faiss