RubyGems - faiss - Versions diffs - 0.2.0 → 0.2.1 - Mend

faiss 0.2.0 → 0.2.1

Files changed (202) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +292 -291
data/vendor/faiss/faiss/AutoTune.h +55 -56
data/vendor/faiss/faiss/Clustering.cpp +334 -195
data/vendor/faiss/faiss/Clustering.h +88 -35
data/vendor/faiss/faiss/IVFlib.cpp +171 -195
data/vendor/faiss/faiss/IVFlib.h +48 -51
data/vendor/faiss/faiss/Index.cpp +85 -103
data/vendor/faiss/faiss/Index.h +54 -48
data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
data/vendor/faiss/faiss/Index2Layer.h +22 -22
data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
data/vendor/faiss/faiss/IndexBinary.h +140 -132
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
data/vendor/faiss/faiss/IndexFlat.h +35 -46
data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
data/vendor/faiss/faiss/IndexHNSW.h +57 -41
data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
data/vendor/faiss/faiss/IndexIVF.h +146 -113
data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
data/vendor/faiss/faiss/IndexLSH.h +21 -26
data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
data/vendor/faiss/faiss/IndexLattice.h +11 -16
data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
data/vendor/faiss/faiss/IndexNSG.h +85 -0
data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
data/vendor/faiss/faiss/IndexPQ.h +64 -67
data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
data/vendor/faiss/faiss/IndexRefine.h +22 -23
data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
data/vendor/faiss/faiss/IndexReplicas.h +62 -56
data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
data/vendor/faiss/faiss/IndexResidual.h +152 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
data/vendor/faiss/faiss/IndexShards.cpp +256 -240
data/vendor/faiss/faiss/IndexShards.h +85 -73
data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
data/vendor/faiss/faiss/MatrixStats.h +7 -10
data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
data/vendor/faiss/faiss/MetaIndexes.h +40 -34
data/vendor/faiss/faiss/MetricType.h +7 -7
data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
data/vendor/faiss/faiss/VectorTransform.h +61 -89
data/vendor/faiss/faiss/clone_index.cpp +77 -73
data/vendor/faiss/faiss/clone_index.h +4 -9
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
data/vendor/faiss/faiss/impl/FaissException.h +41 -29
data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
data/vendor/faiss/faiss/impl/HNSW.h +179 -200
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
data/vendor/faiss/faiss/impl/NSG.h +199 -0
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
data/vendor/faiss/faiss/impl/io.cpp +75 -94
data/vendor/faiss/faiss/impl/io.h +31 -41
data/vendor/faiss/faiss/impl/io_macros.h +40 -29
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
data/vendor/faiss/faiss/index_factory.cpp +269 -218
data/vendor/faiss/faiss/index_factory.h +6 -7
data/vendor/faiss/faiss/index_io.h +23 -26
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
data/vendor/faiss/faiss/utils/Heap.h +186 -209
data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
data/vendor/faiss/faiss/utils/distances.cpp +301 -310
data/vendor/faiss/faiss/utils/distances.h +133 -118
data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
data/vendor/faiss/faiss/utils/hamming.h +62 -85
data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
data/vendor/faiss/faiss/utils/partitioning.h +26 -21
data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
data/vendor/faiss/faiss/utils/random.cpp +39 -63
data/vendor/faiss/faiss/utils/random.h +13 -16
data/vendor/faiss/faiss/utils/simdlib.h +4 -2
data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
data/vendor/faiss/faiss/utils/utils.cpp +304 -287
data/vendor/faiss/faiss/utils/utils.h +53 -48
metadata +20 -2

data/vendor/faiss/faiss/impl/NSG.h ADDED Viewed

@@ -0,0 +1,199 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+// -*- c++ -*-
+#pragma once
+#include <memory>
+#include <mutex>
+#include <vector>
+#include <omp.h>
+#include <faiss/Index.h>
+#include <faiss/impl/AuxIndexStructures.h>
+#include <faiss/impl/FaissAssert.h>
+#include <faiss/utils/Heap.h>
+#include <faiss/utils/random.h>
+namespace faiss {
+/** Implementation of the Navigating Spreading-out Graph (NSG)
+ * datastructure.
+ *
+ * Fast Approximate Nearest Neighbor Search With The
+ * Navigating Spreading-out Graph
+ *
+ *  Cong Fu, Chao Xiang, Changxu Wang, Deng Cai, VLDB 2019
+ *
+ * This implementation is heavily influenced by the NSG
+ * implementation by ZJULearning Group
+ * (https://github.com/zjulearning/nsg)
+ *
+ * The NSG object stores only the neighbor link structure, see
+ * IndexNSG.h for the full index object.
+ */
+struct DistanceComputer; // from AuxIndexStructures
+struct Neighbor;
+struct Node;
+namespace nsg {
+/***********************************************************
+ * Graph structure to store a graph.
+ *
+ * It is represented by an adjacency matrix `data`, where
+ * data[i, j] is the j-th neighbor of node i.
+ ***********************************************************/
+template <class node_t>
+struct Graph {
+    node_t* data;    ///< the flattened adjacency matrix
+    int K;           ///< nb of neighbors per node
+    int N;           ///< total nb of nodes
+    bool own_fields; ///< the underlying data owned by itself or not
+    // construct from a known graph
+    Graph(node_t* data, int N, int K)
+            : data(data), K(K), N(N), own_fields(false) {}
+    // construct an empty graph
+    // NOTE: the newly allocated data needs to be destroyed at destruction time
+    Graph(int N, int K) : K(K), N(N), own_fields(true) {
+        data = new node_t[N * K];
+    }
+    // copy constructor
+    Graph(const Graph& g) : Graph(g.N, g.K) {
+        memcpy(data, g.data, N * K * sizeof(node_t));
+    }
+    // release the allocated memory if needed
+    ~Graph() {
+        if (own_fields) {
+            delete[] data;
+        }
+    }
+    // access the j-th neighbor of node i
+    inline node_t at(int i, int j) const {
+        return data[i * K + j];
+    }
+    // access the j-th neighbor of node i by reference
+    inline node_t& at(int i, int j) {
+        return data[i * K + j];
+    }
+};
+DistanceComputer* storage_distance_computer(const Index* storage);
+} // namespace nsg
+struct NSG {
+    /// internal storage of vectors (32 bits: this is expensive)
+    using storage_idx_t = int;
+    /// Faiss results are 64-bit
+    using idx_t = Index::idx_t;
+    int ntotal; ///< nb of nodes
+    /// construction-time parameters
+    int R; ///< nb of neighbors per node
+    int L; ///< length of the search path at construction time
+    int C; ///< candidate pool size at construction time
+    // search-time parameters
+    int search_L; ///< length of the search path
+    int enterpoint; ///< enterpoint
+    std::shared_ptr<nsg::Graph<int>> final_graph; ///< NSG graph structure
+    bool is_built; ///< NSG is built or not
+    RandomGenerator rng; ///< random generator
+    explicit NSG(int R = 32);
+    // build NSG from a KNN graph
+    void build(
+            Index* storage,
+            idx_t n,
+            const nsg::Graph<idx_t>& knn_graph,
+            bool verbose);
+    // reset the graph
+    void reset();
+    // search interface
+    void search(
+            DistanceComputer& dis,
+            int k,
+            idx_t* I,
+            float* D,
+            VisitedTable& vt) const;
+    // Compute the center point
+    void init_graph(Index* storage, const nsg::Graph<idx_t>& knn_graph);
+    // Search on a built graph.
+    // If collect_fullset is true, the visited nodes will be
+    // collected in `fullset`.
+    template <bool collect_fullset, class index_t>
+    void search_on_graph(
+            const nsg::Graph<index_t>& graph,
+            DistanceComputer& dis,
+            VisitedTable& vt,
+            int ep,
+            int pool_size,
+            std::vector<Neighbor>& retset,
+            std::vector<Node>& fullset) const;
+    // Add reverse links
+    void add_reverse_links(
+            int q,
+            std::vector<std::mutex>& locks,
+            DistanceComputer& dis,
+            nsg::Graph<Node>& graph);
+    void sync_prune(
+            int q,
+            std::vector<Node>& pool,
+            DistanceComputer& dis,
+            VisitedTable& vt,
+            const nsg::Graph<idx_t>& knn_graph,
+            nsg::Graph<Node>& graph);
+    void link(
+            Index* storage,
+            const nsg::Graph<idx_t>& knn_graph,
+            nsg::Graph<Node>& graph,
+            bool verbose);
+    // make NSG be fully connected
+    int tree_grow(Index* storage, std::vector<int>& degrees);
+    // count the size of the connected component
+    // using depth first search start by root
+    int dfs(VisitedTable& vt, int root, int cnt) const;
+    // attach one unlinked node
+    int attach_unlinked(
+            Index* storage,
+            VisitedTable& vt,
+            VisitedTable& vt2,
+            std::vector<int>& degrees);
+    // check the integrity of the NSG built
+    void check_graph() const;
+};
+} // namespace faiss

data/vendor/faiss/faiss/impl/PolysemousTraining.cpp CHANGED Viewed

@@ -8,18 +8,21 @@
 // -*- c++ -*-
 #include <faiss/impl/PolysemousTraining.h>
+#include "faiss/impl/FaissAssert.h"
+#include <omp.h>
+#include <stdint.h>
-#include <cstdlib>
 #include <cmath>
+#include <cstdlib>
 #include <cstring>
-#include <stdint.h>
 #include <algorithm>
-#include <faiss/utils/random.h>
-#include <faiss/utils/utils.h>
 #include <faiss/utils/distances.h>
 #include <faiss/utils/hamming.h>
+#include <faiss/utils/random.h>
+#include <faiss/utils/utils.h>
 #include <faiss/impl/FaissAssert.h>
@@ -29,16 +32,14 @@
 namespace faiss {
 /****************************************************
  * Optimization code
  ****************************************************/
-SimulatedAnnealingParameters::SimulatedAnnealingParameters ()
-{
+SimulatedAnnealingParameters::SimulatedAnnealingParameters() {
     // set some reasonable defaults for the optimization
     init_temperature = 0.7;
-    temperature_decay = pow (0.9, 1/500.);
+    temperature_decay = pow(0.9, 1 / 500.);
     // reduce by a factor 0.9 every 500 it
     n_iter = 500000;
     n_redo = 2;
@@ -50,44 +51,37 @@ SimulatedAnnealingParameters::SimulatedAnnealingParameters ()
 // what would the cost update be if iw and jw were swapped?
 // default implementation just computes both and computes the difference
-double PermutationObjective::cost_update (
-        const int *perm, int iw, int jw) const
-{
-    double orig_cost = compute_cost (perm);
+double PermutationObjective::cost_update(const int* perm, int iw, int jw)
+        const {
+    double orig_cost = compute_cost(perm);
-    std::vector<int> perm2 (n);
+    std::vector<int> perm2(n);
     for (int i = 0; i < n; i++)
         perm2[i] = perm[i];
     perm2[iw] = perm[jw];
     perm2[jw] = perm[iw];
-    double new_cost = compute_cost (perm2.data());
+    double new_cost = compute_cost(perm2.data());
     return new_cost - orig_cost;
 }
-SimulatedAnnealingOptimizer::SimulatedAnnealingOptimizer (
-        PermutationObjective *obj,
-        const SimulatedAnnealingParameters &p):
-    SimulatedAnnealingParameters (p),
-    obj (obj),
-    n(obj->n),
-    logfile (nullptr)
-{
-    rnd = new RandomGenerator (p.seed);
-    FAISS_THROW_IF_NOT (n < 100000 && n >=0 );
+SimulatedAnnealingOptimizer::SimulatedAnnealingOptimizer(
+        PermutationObjective* obj,
+        const SimulatedAnnealingParameters& p)
+        : SimulatedAnnealingParameters(p),
+          obj(obj),
+          n(obj->n),
+          logfile(nullptr) {
+    rnd = new RandomGenerator(p.seed);
+    FAISS_THROW_IF_NOT(n < 100000 && n >= 0);
 }
-SimulatedAnnealingOptimizer::~SimulatedAnnealingOptimizer ()
-{
+SimulatedAnnealingOptimizer::~SimulatedAnnealingOptimizer() {
     delete rnd;
 }
 // run the optimization and return the best result in best_perm
-double SimulatedAnnealingOptimizer::run_optimization (int * best_perm)
-{
+double SimulatedAnnealingOptimizer::run_optimization(int* best_perm) {
     double min_cost = 1e30;
     // just do a few runs of the annealing and keep the lowest output cost
@@ -95,84 +89,89 @@ double SimulatedAnnealingOptimizer::run_optimization (int * best_perm)
         std::vector<int> perm(n);
         for (int i = 0; i < n; i++)
             perm[i] = i;
-         if (init_random) {
+        if (init_random) {
             for (int i = 0; i < n; i++) {
-                int j = i + rnd->rand_int (n - i);
-                std::swap (perm[i], perm[j]);
+                int j = i + rnd->rand_int(n - i);
+                std::swap(perm[i], perm[j]);
             }
         }
-         float cost = optimize (perm.data());
-        if (logfile) fprintf (logfile, "\n");
-        if(verbose > 1) {
-            printf ("    optimization run %d: cost=%g %s\n",
-                    it, cost, cost < min_cost ? "keep" : "");
+        float cost = optimize(perm.data());
+        if (logfile)
+            fprintf(logfile, "\n");
+        if (verbose > 1) {
+            printf("    optimization run %d: cost=%g %s\n",
+                   it,
+                   cost,
+                   cost < min_cost ? "keep" : "");
         }
         if (cost < min_cost) {
-            memcpy (best_perm, perm.data(), sizeof(perm[0]) * n);
+            memcpy(best_perm, perm.data(), sizeof(perm[0]) * n);
             min_cost = cost;
         }
     }
-     return min_cost;
+    return min_cost;
 }
 // perform the optimization loop, starting from and modifying
 // permutation in-place
-double SimulatedAnnealingOptimizer::optimize (int *perm)
-{
-    double cost = init_cost = obj->compute_cost (perm);
+double SimulatedAnnealingOptimizer::optimize(int* perm) {
+    double cost = init_cost = obj->compute_cost(perm);
     int log2n = 0;
-    while (!(n <= (1 << log2n))) log2n++;
+    while (!(n <= (1 << log2n)))
+        log2n++;
     double temperature = init_temperature;
-     int n_swap = 0, n_hot = 0;
+    int n_swap = 0, n_hot = 0;
     for (int it = 0; it < n_iter; it++) {
         temperature = temperature * temperature_decay;
         int iw, jw;
         if (only_bit_flips) {
-            iw = rnd->rand_int (n);
-            jw = iw ^ (1 << rnd->rand_int (log2n));
+            iw = rnd->rand_int(n);
+            jw = iw ^ (1 << rnd->rand_int(log2n));
         } else {
-            iw = rnd->rand_int (n);
-            jw = rnd->rand_int (n - 1);
-            if (jw == iw) jw++;
+            iw = rnd->rand_int(n);
+            jw = rnd->rand_int(n - 1);
+            if (jw == iw)
+                jw++;
         }
-         double delta_cost = obj->cost_update (perm, iw, jw);
-         if (delta_cost < 0 || rnd->rand_float () < temperature) {
-            std::swap (perm[iw], perm[jw]);
+        double delta_cost = obj->cost_update(perm, iw, jw);
+        if (delta_cost < 0 || rnd->rand_float() < temperature) {
+            std::swap(perm[iw], perm[jw]);
             cost += delta_cost;
             n_swap++;
-            if (delta_cost >= 0) n_hot++;
+            if (delta_cost >= 0)
+                n_hot++;
         }
-         if (verbose > 2 || (verbose > 1 && it % 10000 == 0)) {
-            printf ("      iteration %d cost %g temp %g n_swap %d "
-                    "(%d hot)     \r",
-                    it, cost, temperature, n_swap, n_hot);
+        if (verbose > 2 || (verbose > 1 && it % 10000 == 0)) {
+            printf("      iteration %d cost %g temp %g n_swap %d "
+                   "(%d hot)     \r",
+                   it,
+                   cost,
+                   temperature,
+                   n_swap,
+                   n_hot);
             fflush(stdout);
         }
         if (logfile) {
-            fprintf (logfile, "%d %g %g %d %d\n",
-                    it, cost, temperature, n_swap, n_hot);
+            fprintf(logfile,
+                    "%d %g %g %d %d\n",
+                    it,
+                    cost,
+                    temperature,
+                    n_swap,
+                    n_hot);
         }
-     }
-    if (verbose > 1) printf("\n");
+    }
+    if (verbose > 1)
+        printf("\n");
     return cost;
 }
 /****************************************************
  * Cost functions: ReproduceDistanceTable
  ****************************************************/
-static inline int hamming_dis (uint64_t a, uint64_t b)
-{
-    return __builtin_popcountl (a ^ b);
+static inline int hamming_dis(uint64_t a, uint64_t b) {
+    return __builtin_popcountl(a ^ b);
 }
 namespace {
@@ -182,14 +181,14 @@ struct ReproduceWithHammingObjective : PermutationObjective {
     int nbits;
     double dis_weight_factor;
-    static double sqr (double x) { return x * x; }
+    static double sqr(double x) {
+        return x * x;
+    }
     // weihgting of distances: it is more important to reproduce small
     // distances well
-    double dis_weight (double x) const
-    {
-        return exp (-dis_weight_factor * x);
+    double dis_weight(double x) const {
+        return exp(-dis_weight_factor * x);
     }
     std::vector<double> target_dis; // wanted distances (size n^2)
@@ -197,101 +196,105 @@ struct ReproduceWithHammingObjective : PermutationObjective {
     // cost = quadratic difference between actual distance and Hamming distance
     double compute_cost(const int* perm) const override {
-      double cost = 0;
-      for (int i = 0; i < n; i++) {
-        for (int j = 0; j < n; j++) {
-          double wanted = target_dis[i * n + j];
-          double w = weights[i * n + j];
-          double actual = hamming_dis(perm[i], perm[j]);
-          cost += w * sqr(wanted - actual);
+        double cost = 0;
+        for (int i = 0; i < n; i++) {
+            for (int j = 0; j < n; j++) {
+                double wanted = target_dis[i * n + j];
+                double w = weights[i * n + j];
+                double actual = hamming_dis(perm[i], perm[j]);
+                cost += w * sqr(wanted - actual);
+            }
         }
-      }
-      return cost;
+        return cost;
     }
     // what would the cost update be if iw and jw were swapped?
     // computed in O(n) instead of O(n^2) for the full re-computation
     double cost_update(const int* perm, int iw, int jw) const override {
-      double delta_cost = 0;
+        double delta_cost = 0;
-      for (int i = 0; i < n; i++) {
-        if (i == iw) {
-          for (int j = 0; j < n; j++) {
-            double wanted = target_dis[i * n + j], w = weights[i * n + j];
-            double actual = hamming_dis(perm[i], perm[j]);
-            delta_cost -= w * sqr(wanted - actual);
-            double new_actual =
-                hamming_dis(perm[jw], perm[j == iw ? jw : j == jw ? iw : j]);
-            delta_cost += w * sqr(wanted - new_actual);
-          }
-        } else if (i == jw) {
-          for (int j = 0; j < n; j++) {
-            double wanted = target_dis[i * n + j], w = weights[i * n + j];
-            double actual = hamming_dis(perm[i], perm[j]);
-            delta_cost -= w * sqr(wanted - actual);
-            double new_actual =
-                hamming_dis(perm[iw], perm[j == iw ? jw : j == jw ? iw : j]);
-            delta_cost += w * sqr(wanted - new_actual);
-          }
-        } else {
-          int j = iw;
-          {
-            double wanted = target_dis[i * n + j], w = weights[i * n + j];
-            double actual = hamming_dis(perm[i], perm[j]);
-            delta_cost -= w * sqr(wanted - actual);
-            double new_actual = hamming_dis(perm[i], perm[jw]);
-            delta_cost += w * sqr(wanted - new_actual);
-          }
-          j = jw;
-          {
-            double wanted = target_dis[i * n + j], w = weights[i * n + j];
-            double actual = hamming_dis(perm[i], perm[j]);
-            delta_cost -= w * sqr(wanted - actual);
-            double new_actual = hamming_dis(perm[i], perm[iw]);
-            delta_cost += w * sqr(wanted - new_actual);
-          }
+        for (int i = 0; i < n; i++) {
+            if (i == iw) {
+                for (int j = 0; j < n; j++) {
+                    double wanted = target_dis[i * n + j],
+                           w = weights[i * n + j];
+                    double actual = hamming_dis(perm[i], perm[j]);
+                    delta_cost -= w * sqr(wanted - actual);
+                    double new_actual = hamming_dis(
+                            perm[jw],
+                            perm[j == iw           ? jw
+                                         : j == jw ? iw
+                                                   : j]);
+                    delta_cost += w * sqr(wanted - new_actual);
+                }
+            } else if (i == jw) {
+                for (int j = 0; j < n; j++) {
+                    double wanted = target_dis[i * n + j],
+                           w = weights[i * n + j];
+                    double actual = hamming_dis(perm[i], perm[j]);
+                    delta_cost -= w * sqr(wanted - actual);
+                    double new_actual = hamming_dis(
+                            perm[iw],
+                            perm[j == iw           ? jw
+                                         : j == jw ? iw
+                                                   : j]);
+                    delta_cost += w * sqr(wanted - new_actual);
+                }
+            } else {
+                int j = iw;
+                {
+                    double wanted = target_dis[i * n + j],
+                           w = weights[i * n + j];
+                    double actual = hamming_dis(perm[i], perm[j]);
+                    delta_cost -= w * sqr(wanted - actual);
+                    double new_actual = hamming_dis(perm[i], perm[jw]);
+                    delta_cost += w * sqr(wanted - new_actual);
+                }
+                j = jw;
+                {
+                    double wanted = target_dis[i * n + j],
+                           w = weights[i * n + j];
+                    double actual = hamming_dis(perm[i], perm[j]);
+                    delta_cost -= w * sqr(wanted - actual);
+                    double new_actual = hamming_dis(perm[i], perm[iw]);
+                    delta_cost += w * sqr(wanted - new_actual);
+                }
+            }
         }
-      }
-      return delta_cost;
+        return delta_cost;
     }
-    ReproduceWithHammingObjective (
-           int nbits,
-           const std::vector<double> & dis_table,
-           double dis_weight_factor):
-        nbits (nbits), dis_weight_factor (dis_weight_factor)
-    {
+    ReproduceWithHammingObjective(
+            int nbits,
+            const std::vector<double>& dis_table,
+            double dis_weight_factor)
+            : nbits(nbits), dis_weight_factor(dis_weight_factor) {
         n = 1 << nbits;
-        FAISS_THROW_IF_NOT (dis_table.size() == n * n);
-        set_affine_target_dis (dis_table);
+        FAISS_THROW_IF_NOT(dis_table.size() == n * n);
+        set_affine_target_dis(dis_table);
     }
-    void set_affine_target_dis (const std::vector<double> & dis_table)
-    {
+    void set_affine_target_dis(const std::vector<double>& dis_table) {
         double sum = 0, sum2 = 0;
         int n2 = n * n;
         for (int i = 0; i < n2; i++) {
-            sum += dis_table [i];
-            sum2 += dis_table [i] * dis_table [i];
+            sum += dis_table[i];
+            sum2 += dis_table[i] * dis_table[i];
         }
         double mean = sum / n2;
         double stddev = sqrt(sum2 / n2 - (sum / n2) * (sum / n2));
-        target_dis.resize (n2);
+        target_dis.resize(n2);
         for (int i = 0; i < n2; i++) {
             // the mapping function
-            double td = (dis_table [i] - mean) / stddev * sqrt(nbits / 4) +
-                nbits / 2;
+            double td = (dis_table[i] - mean) / stddev * sqrt(nbits / 4) +
+                    nbits / 2;
             target_dis[i] = td;
             // compute a weight
-            weights.push_back (dis_weight (td));
+            weights.push_back(dis_weight(td));
         }
     }
     ~ReproduceWithHammingObjective() override {}
@@ -301,27 +304,23 @@ struct ReproduceWithHammingObjective : PermutationObjective {
 // weihgting of distances: it is more important to reproduce small
 // distances well
-double ReproduceDistancesObjective::dis_weight (double x) const
-{
-    return exp (-dis_weight_factor * x);
+double ReproduceDistancesObjective::dis_weight(double x) const {
+    return exp(-dis_weight_factor * x);
 }
-double ReproduceDistancesObjective::get_source_dis (int i, int j) const
-{
-    return source_dis [i * n + j];
+double ReproduceDistancesObjective::get_source_dis(int i, int j) const {
+    return source_dis[i * n + j];
 }
 // cost = quadratic difference between actual distance and Hamming distance
-double ReproduceDistancesObjective::compute_cost (const int *perm) const
-{
+double ReproduceDistancesObjective::compute_cost(const int* perm) const {
     double cost = 0;
     for (int i = 0; i < n; i++) {
         for (int j = 0; j < n; j++) {
-            double wanted = target_dis [i * n + j];
-            double w = weights [i * n + j];
-            double actual = get_source_dis (perm[i], perm[j]);
-            cost += w * sqr (wanted - actual);
+            double wanted = target_dis[i * n + j];
+            double w = weights[i * n + j];
+            double actual = get_source_dis(perm[i], perm[j]);
+            cost += w * sqr(wanted - actual);
         }
     }
     return cost;
@@ -329,79 +328,75 @@ double ReproduceDistancesObjective::compute_cost (const int *perm) const
 // what would the cost update be if iw and jw were swapped?
 // computed in O(n) instead of O(n^2) for the full re-computation
-double ReproduceDistancesObjective::cost_update(
-        const int *perm, int iw, int jw) const
-{
+double ReproduceDistancesObjective::cost_update(const int* perm, int iw, int jw)
+        const {
     double delta_cost = 0;
-     for (int i = 0; i < n; i++) {
+    for (int i = 0; i < n; i++) {
         if (i == iw) {
             for (int j = 0; j < n; j++) {
-                double wanted = target_dis [i * n + j],
-                    w = weights [i * n + j];
-                double actual = get_source_dis (perm[i], perm[j]);
-                delta_cost -= w * sqr (wanted - actual);
-                double new_actual = get_source_dis (
-                       perm[jw],
-                       perm[j == iw ? jw : j == jw ? iw : j]);
-                delta_cost += w * sqr (wanted - new_actual);
+                double wanted = target_dis[i * n + j], w = weights[i * n + j];
+                double actual = get_source_dis(perm[i], perm[j]);
+                delta_cost -= w * sqr(wanted - actual);
+                double new_actual = get_source_dis(
+                        perm[jw],
+                        perm[j == iw           ? jw
+                                     : j == jw ? iw
+                                               : j]);
+                delta_cost += w * sqr(wanted - new_actual);
             }
         } else if (i == jw) {
             for (int j = 0; j < n; j++) {
-                double wanted = target_dis [i * n + j],
-                    w = weights [i * n + j];
-                double actual = get_source_dis (perm[i], perm[j]);
-                delta_cost -= w * sqr (wanted - actual);
-                double new_actual = get_source_dis (
-                       perm[iw],
-                       perm[j == iw ? jw : j == jw ? iw : j]);
-                delta_cost += w * sqr (wanted - new_actual);
+                double wanted = target_dis[i * n + j], w = weights[i * n + j];
+                double actual = get_source_dis(perm[i], perm[j]);
+                delta_cost -= w * sqr(wanted - actual);
+                double new_actual = get_source_dis(
+                        perm[iw],
+                        perm[j == iw           ? jw
+                                     : j == jw ? iw
+                                               : j]);
+                delta_cost += w * sqr(wanted - new_actual);
             }
-        } else  {
+        } else {
             int j = iw;
             {
-                double wanted = target_dis [i * n + j],
-                    w = weights [i * n + j];
-                double actual = get_source_dis (perm[i], perm[j]);
-                delta_cost -= w * sqr (wanted - actual);
-                double new_actual = get_source_dis (perm[i], perm[jw]);
-                delta_cost += w * sqr (wanted - new_actual);
+                double wanted = target_dis[i * n + j], w = weights[i * n + j];
+                double actual = get_source_dis(perm[i], perm[j]);
+                delta_cost -= w * sqr(wanted - actual);
+                double new_actual = get_source_dis(perm[i], perm[jw]);
+                delta_cost += w * sqr(wanted - new_actual);
             }
             j = jw;
             {
-                double wanted = target_dis [i * n + j],
-                    w = weights [i * n + j];
-                double actual = get_source_dis (perm[i], perm[j]);
-                delta_cost -= w * sqr (wanted - actual);
-                double new_actual = get_source_dis (perm[i], perm[iw]);
-                delta_cost += w * sqr (wanted - new_actual);
+                double wanted = target_dis[i * n + j], w = weights[i * n + j];
+                double actual = get_source_dis(perm[i], perm[j]);
+                delta_cost -= w * sqr(wanted - actual);
+                double new_actual = get_source_dis(perm[i], perm[iw]);
+                delta_cost += w * sqr(wanted - new_actual);
             }
         }
     }
-     return delta_cost;
+    return delta_cost;
 }
-ReproduceDistancesObjective::ReproduceDistancesObjective (
-       int n,
-       const double *source_dis_in,
-       const double *target_dis_in,
-       double dis_weight_factor):
-    dis_weight_factor (dis_weight_factor),
-    target_dis (target_dis_in)
-{
+ReproduceDistancesObjective::ReproduceDistancesObjective(
+        int n,
+        const double* source_dis_in,
+        const double* target_dis_in,
+        double dis_weight_factor)
+        : dis_weight_factor(dis_weight_factor), target_dis(target_dis_in) {
     this->n = n;
-    set_affine_target_dis (source_dis_in);
+    set_affine_target_dis(source_dis_in);
 }
-void ReproduceDistancesObjective::compute_mean_stdev (
-          const double *tab, size_t n2,
-          double *mean_out, double *stddev_out)
-{
+void ReproduceDistancesObjective::compute_mean_stdev(
+        const double* tab,
+        size_t n2,
+        double* mean_out,
+        double* stddev_out) {
     double sum = 0, sum2 = 0;
     for (int i = 0; i < n2; i++) {
-        sum += tab [i];
-        sum2 += tab [i] * tab [i];
+        sum += tab[i];
+        sum2 += tab[i] * tab[i];
     }
     double mean = sum / n2;
     double stddev = sqrt(sum2 / n2 - (sum / n2) * (sum / n2));
@@ -409,32 +404,34 @@ void ReproduceDistancesObjective::compute_mean_stdev (
     *stddev_out = stddev;
 }
-void ReproduceDistancesObjective::set_affine_target_dis (
-          const double *source_dis_in)
-{
+void ReproduceDistancesObjective::set_affine_target_dis(
+        const double* source_dis_in) {
     int n2 = n * n;
     double mean_src, stddev_src;
-    compute_mean_stdev (source_dis_in, n2, &mean_src, &stddev_src);
+    compute_mean_stdev(source_dis_in, n2, &mean_src, &stddev_src);
     double mean_target, stddev_target;
-    compute_mean_stdev (target_dis, n2, &mean_target, &stddev_target);
+    compute_mean_stdev(target_dis, n2, &mean_target, &stddev_target);
-    printf ("map mean %g std %g -> mean %g std %g\n",
-            mean_src, stddev_src, mean_target, stddev_target);
+    printf("map mean %g std %g -> mean %g std %g\n",
+           mean_src,
+           stddev_src,
+           mean_target,
+           stddev_target);
-    source_dis.resize (n2);
-    weights.resize (n2);
+    source_dis.resize(n2);
+    weights.resize(n2);
     for (int i = 0; i < n2; i++) {
         // the mapping function
-        source_dis[i] = (source_dis_in[i] - mean_src) / stddev_src
-            * stddev_target + mean_target;
+        source_dis[i] =
+                (source_dis_in[i] - mean_src) / stddev_src * stddev_target +
+                mean_target;
         // compute a weight
-        weights [i] = dis_weight (target_dis[i]);
+        weights[i] = dis_weight(target_dis[i]);
     }
 }
 /****************************************************
@@ -444,8 +441,7 @@ void ReproduceDistancesObjective::set_affine_target_dis (
 /// Maintains a 3D table of elementary costs.
 /// Accumulates elements based on Hamming distance comparisons
 template <typename Ttab, typename Taccu>
-struct Score3Computer: PermutationObjective {
+struct Score3Computer : PermutationObjective {
     int nc;
     // cost matrix of size nc * nc *nc
@@ -453,21 +449,18 @@ struct Score3Computer: PermutationObjective {
     // where x has PQ code i, y- PQ code j and y+ PQ code k
     std::vector<Ttab> n_gt;
     /// the cost is a triple loop on the nc * nc * nc matrix of entries.
     ///
-    Taccu compute (const int * perm) const
-    {
+    Taccu compute(const int* perm) const {
         Taccu accu = 0;
-        const Ttab *p = n_gt.data();
+        const Ttab* p = n_gt.data();
         for (int i = 0; i < nc; i++) {
-            int ip = perm [i];
+            int ip = perm[i];
             for (int j = 0; j < nc; j++) {
-                int jp = perm [j];
+                int jp = perm[j];
                 for (int k = 0; k < nc; k++) {
-                    int kp = perm [k];
-                    if (hamming_dis (ip, jp) <
-                        hamming_dis (ip, kp)) {
+                    int kp = perm[k];
+                    if (hamming_dis(ip, jp) < hamming_dis(ip, kp)) {
                         accu += *p; // n_gt [ ( i * nc + j) * nc + k];
                     }
                     p++;
@@ -477,7 +470,6 @@ struct Score3Computer: PermutationObjective {
         return accu;
     }
     /** cost update if entries iw and jw of the permutation would be
      * swapped.
      *
@@ -487,25 +479,23 @@ struct Score3Computer: PermutationObjective {
      * cells. Practical speedup is about 8x, and the code is quite
      * complex :-/
      */
-    Taccu compute_update (const int *perm, int iw, int jw) const
-    {
-        assert (iw != jw);
-        if (iw > jw) std::swap (iw, jw);
+    Taccu compute_update(const int* perm, int iw, int jw) const {
+        assert(iw != jw);
+        if (iw > jw)
+            std::swap(iw, jw);
         Taccu accu = 0;
-        const Ttab * n_gt_i = n_gt.data();
+        const Ttab* n_gt_i = n_gt.data();
         for (int i = 0; i < nc; i++) {
-            int ip0 = perm [i];
-            int ip = perm [i == iw ? jw : i == jw ? iw : i];
+            int ip0 = perm[i];
+            int ip = perm[i == iw ? jw : i == jw ? iw : i];
-            //accu += update_i (perm, iw, jw, ip0, ip, n_gt_i);
+            // accu += update_i (perm, iw, jw, ip0, ip, n_gt_i);
-            accu += update_i_cross (perm, iw, jw,
-                                    ip0, ip, n_gt_i);
+            accu += update_i_cross(perm, iw, jw, ip0, ip, n_gt_i);
             if (ip != ip0)
-                accu += update_i_plane (perm, iw, jw,
-                                       ip0, ip, n_gt_i);
+                accu += update_i_plane(perm, iw, jw, ip0, ip, n_gt_i);
             n_gt_i += nc * nc;
         }
@@ -513,23 +503,26 @@ struct Score3Computer: PermutationObjective {
         return accu;
     }
-    Taccu update_i (const int *perm, int iw, int jw,
-                   int ip0, int ip, const Ttab * n_gt_i) const
-    {
+    Taccu update_i(
+            const int* perm,
+            int iw,
+            int jw,
+            int ip0,
+            int ip,
+            const Ttab* n_gt_i) const {
         Taccu accu = 0;
-        const Ttab *n_gt_ij = n_gt_i;
+        const Ttab* n_gt_ij = n_gt_i;
         for (int j = 0; j < nc; j++) {
             int jp0 = perm[j];
-            int jp = perm [j == iw ? jw : j == jw ? iw : j];
+            int jp = perm[j == iw ? jw : j == jw ? iw : j];
             for (int k = 0; k < nc; k++) {
-                int kp0 = perm [k];
-                int kp = perm [k == iw ? jw : k == jw ? iw : k];
-                int ng = n_gt_ij [k];
-                if (hamming_dis (ip, jp) < hamming_dis (ip, kp)) {
+                int kp0 = perm[k];
+                int kp = perm[k == iw ? jw : k == jw ? iw : k];
+                int ng = n_gt_ij[k];
+                if (hamming_dis(ip, jp) < hamming_dis(ip, kp)) {
                     accu += ng;
                 }
-                if (hamming_dis (ip0, jp0) < hamming_dis (ip0, kp0)) {
+                if (hamming_dis(ip0, jp0) < hamming_dis(ip0, kp0)) {
                     accu -= ng;
                 }
             }
@@ -539,23 +532,27 @@ struct Score3Computer: PermutationObjective {
     }
     // 2 inner loops for the case ip0 != ip
-    Taccu update_i_plane (const int *perm, int iw, int jw,
-                         int ip0, int ip, const Ttab * n_gt_i) const
-    {
+    Taccu update_i_plane(
+            const int* perm,
+            int iw,
+            int jw,
+            int ip0,
+            int ip,
+            const Ttab* n_gt_i) const {
         Taccu accu = 0;
-        const Ttab *n_gt_ij = n_gt_i;
+        const Ttab* n_gt_ij = n_gt_i;
         for (int j = 0; j < nc; j++) {
             if (j != iw && j != jw) {
                 int jp = perm[j];
                 for (int k = 0; k < nc; k++) {
                     if (k != iw && k != jw) {
-                        int kp = perm [k];
-                        Ttab ng = n_gt_ij [k];
-                        if (hamming_dis (ip, jp) < hamming_dis (ip, kp)) {
+                        int kp = perm[k];
+                        Ttab ng = n_gt_ij[k];
+                        if (hamming_dis(ip, jp) < hamming_dis(ip, kp)) {
                             accu += ng;
                         }
-                        if (hamming_dis (ip0, jp) < hamming_dis (ip0, kp)) {
+                        if (hamming_dis(ip0, jp) < hamming_dis(ip0, kp)) {
                             accu -= ng;
                         }
                     }
@@ -567,114 +564,128 @@ struct Score3Computer: PermutationObjective {
     }
     /// used for the 8 cells were the 3 indices are swapped
-    inline Taccu update_k (const int *perm, int iw, int jw,
-                          int ip0, int ip, int jp0, int jp,
-                          int k,
-                          const Ttab * n_gt_ij) const
-    {
+    inline Taccu update_k(
+            const int* perm,
+            int iw,
+            int jw,
+            int ip0,
+            int ip,
+            int jp0,
+            int jp,
+            int k,
+            const Ttab* n_gt_ij) const {
         Taccu accu = 0;
-        int kp0 = perm [k];
-        int kp = perm [k == iw ? jw : k == jw ? iw : k];
-        Ttab ng = n_gt_ij [k];
-        if (hamming_dis (ip, jp) < hamming_dis (ip, kp)) {
+        int kp0 = perm[k];
+        int kp = perm[k == iw ? jw : k == jw ? iw : k];
+        Ttab ng = n_gt_ij[k];
+        if (hamming_dis(ip, jp) < hamming_dis(ip, kp)) {
             accu += ng;
         }
-        if (hamming_dis (ip0, jp0) < hamming_dis (ip0, kp0)) {
+        if (hamming_dis(ip0, jp0) < hamming_dis(ip0, kp0)) {
             accu -= ng;
         }
         return accu;
     }
     /// compute update on a line of k's, where i and j are swapped
-    Taccu update_j_line (const int *perm, int iw, int jw,
-                        int ip0, int ip, int jp0, int jp,
-                        const Ttab * n_gt_ij) const
-    {
+    Taccu update_j_line(
+            const int* perm,
+            int iw,
+            int jw,
+            int ip0,
+            int ip,
+            int jp0,
+            int jp,
+            const Ttab* n_gt_ij) const {
         Taccu accu = 0;
         for (int k = 0; k < nc; k++) {
-            if (k == iw || k == jw) continue;
-            int kp = perm [k];
-            Ttab ng = n_gt_ij [k];
-            if (hamming_dis (ip, jp) < hamming_dis (ip, kp)) {
+            if (k == iw || k == jw)
+                continue;
+            int kp = perm[k];
+            Ttab ng = n_gt_ij[k];
+            if (hamming_dis(ip, jp) < hamming_dis(ip, kp)) {
                 accu += ng;
             }
-            if (hamming_dis (ip0, jp0) < hamming_dis (ip0, kp)) {
+            if (hamming_dis(ip0, jp0) < hamming_dis(ip0, kp)) {
                 accu -= ng;
             }
         }
         return accu;
     }
     /// considers the 2 pairs of crossing lines j=iw or jw and k = iw or kw
-    Taccu update_i_cross (const int *perm, int iw, int jw,
-                        int ip0, int ip, const Ttab * n_gt_i) const
-    {
+    Taccu update_i_cross(
+            const int* perm,
+            int iw,
+            int jw,
+            int ip0,
+            int ip,
+            const Ttab* n_gt_i) const {
         Taccu accu = 0;
-        const Ttab *n_gt_ij = n_gt_i;
+        const Ttab* n_gt_ij = n_gt_i;
         for (int j = 0; j < nc; j++) {
             int jp0 = perm[j];
-            int jp = perm [j == iw ? jw : j == jw ? iw : j];
+            int jp = perm[j == iw ? jw : j == jw ? iw : j];
-            accu += update_k (perm, iw, jw, ip0, ip, jp0, jp, iw, n_gt_ij);
-            accu += update_k (perm, iw, jw, ip0, ip, jp0, jp, jw, n_gt_ij);
+            accu += update_k(perm, iw, jw, ip0, ip, jp0, jp, iw, n_gt_ij);
+            accu += update_k(perm, iw, jw, ip0, ip, jp0, jp, jw, n_gt_ij);
             if (jp != jp0)
-                accu += update_j_line (perm, iw, jw, ip0, ip, jp0, jp, n_gt_ij);
+                accu += update_j_line(perm, iw, jw, ip0, ip, jp0, jp, n_gt_ij);
             n_gt_ij += nc;
         }
         return accu;
     }
     /// PermutationObjective implementeation (just negates the scores
     /// for minimization)
     double compute_cost(const int* perm) const override {
-      return -compute(perm);
+        return -compute(perm);
     }
     double cost_update(const int* perm, int iw, int jw) const override {
-      double ret = -compute_update(perm, iw, jw);
-      return ret;
+        double ret = -compute_update(perm, iw, jw);
+        return ret;
     }
     ~Score3Computer() override {}
 };
 struct IndirectSort {
-    const float *tab;
-    bool operator () (int a, int b) {return tab[a] < tab[b]; }
+    const float* tab;
+    bool operator()(int a, int b) {
+        return tab[a] < tab[b];
+    }
 };
-struct RankingScore2: Score3Computer<float, double> {
+struct RankingScore2 : Score3Computer<float, double> {
     int nbits;
     int nq, nb;
     const uint32_t *qcodes, *bcodes;
-    const float *gt_distances;
-    RankingScore2 (int nbits, int nq, int nb,
-                  const uint32_t *qcodes, const uint32_t *bcodes,
-                  const float *gt_distances):
-        nbits(nbits), nq(nq), nb(nb), qcodes(qcodes),
-        bcodes(bcodes), gt_distances(gt_distances)
-    {
+    const float* gt_distances;
+    RankingScore2(
+            int nbits,
+            int nq,
+            int nb,
+            const uint32_t* qcodes,
+            const uint32_t* bcodes,
+            const float* gt_distances)
+            : nbits(nbits),
+              nq(nq),
+              nb(nb),
+              qcodes(qcodes),
+              bcodes(bcodes),
+              gt_distances(gt_distances) {
         n = nc = 1 << nbits;
-        n_gt.resize (nc * nc * nc);
-        init_n_gt ();
+        n_gt.resize(nc * nc * nc);
+        init_n_gt();
     }
-    double rank_weight (int r)
-    {
+    double rank_weight(int r) {
         return 1.0 / (r + 1);
     }
@@ -683,271 +694,290 @@ struct RankingScore2: Score3Computer<float, double> {
     /// they are the ranks of j and k respectively.
     /// specific version for diff-of-rank weighting, cannot optimized
     /// with a cumulative table
-    double accum_gt_weight_diff (const std::vector<int> & a,
-                                 const std::vector<int> & b)
-    {
+    double accum_gt_weight_diff(
+            const std::vector<int>& a,
+            const std::vector<int>& b) {
         int nb = b.size(), na = a.size();
         double accu = 0;
         int j = 0;
         for (int i = 0; i < na; i++) {
             int ai = a[i];
-            while (j < nb && ai >= b[j]) j++;
+            while (j < nb && ai >= b[j])
+                j++;
             double accu_i = 0;
             for (int k = j; k < b.size(); k++)
-                accu_i += rank_weight (b[k] - ai);
-            accu += rank_weight (ai) * accu_i;
+                accu_i += rank_weight(b[k] - ai);
+            accu += rank_weight(ai) * accu_i;
         }
         return accu;
     }
-    void init_n_gt ()
-    {
+    void init_n_gt() {
         for (int q = 0; q < nq; q++) {
-            const float *gtd = gt_distances + q * nb;
-            const uint32_t *cb = bcodes;// all same codes
-            float * n_gt_q = & n_gt [qcodes[q] * nc * nc];
+            const float* gtd = gt_distances + q * nb;
+            const uint32_t* cb = bcodes; // all same codes
+            float* n_gt_q = &n_gt[qcodes[q] * nc * nc];
-            printf("init gt for q=%d/%d    \r", q, nq); fflush(stdout);
+            printf("init gt for q=%d/%d    \r", q, nq);
+            fflush(stdout);
-            std::vector<int> rankv (nb);
-            int * ranks = rankv.data();
+            std::vector<int> rankv(nb);
+            int* ranks = rankv.data();
             // elements in each code bin, ordered by rank within each bin
-            std::vector<std::vector<int> > tab (nc);
+            std::vector<std::vector<int>> tab(nc);
             { // build rank table
                 IndirectSort s = {gtd};
-                for (int j = 0; j < nb; j++) ranks[j] = j;
-                std::sort (ranks, ranks + nb, s);
+                for (int j = 0; j < nb; j++)
+                    ranks[j] = j;
+                std::sort(ranks, ranks + nb, s);
             }
             for (int rank = 0; rank < nb; rank++) {
-                int i = ranks [rank];
-                tab [cb[i]].push_back (rank);
+                int i = ranks[rank];
+                tab[cb[i]].push_back(rank);
             }
             // this is very expensive. Any suggestion for improvement
             // welcome.
             for (int i = 0; i < nc; i++) {
-                std::vector<int> & di = tab[i];
+                std::vector<int>& di = tab[i];
                 for (int j = 0; j < nc; j++) {
-                    std::vector<int> & dj = tab[j];
-                    n_gt_q [i * nc + j] += accum_gt_weight_diff (di, dj);
+                    std::vector<int>& dj = tab[j];
+                    n_gt_q[i * nc + j] += accum_gt_weight_diff(di, dj);
                 }
             }
         }
     }
 };
 /*****************************************
  * PolysemousTraining
  ******************************************/
-PolysemousTraining::PolysemousTraining ()
-{
+PolysemousTraining::PolysemousTraining() {
     optimization_type = OT_ReproduceDistances_affine;
     ntrain_permutation = 0;
     dis_weight_factor = log(2);
+    // max 20 G RAM
+    max_memory = (size_t)(20) * 1024 * 1024 * 1024;
 }
-void PolysemousTraining::optimize_reproduce_distances (
-       ProductQuantizer &pq) const
-{
+void PolysemousTraining::optimize_reproduce_distances(
+        ProductQuantizer& pq) const {
     int dsub = pq.dsub;
     int n = pq.ksub;
     int nbits = pq.nbits;
-#pragma omp parallel for
+    size_t mem1 = memory_usage_per_thread(pq);
+    int nt = std::min(omp_get_max_threads(), int(pq.M));
+    FAISS_THROW_IF_NOT_FMT(
+            mem1 < max_memory,
+            "Polysemous training will use %zd bytes per thread, while the max is set to %zd",
+            mem1,
+            max_memory);
+    if (mem1 * nt > max_memory) {
+        nt = max_memory / mem1;
+        fprintf(stderr,
+                "Polysemous training: WARN, reducing number of threads to %d to save memory",
+                nt);
+    }
+#pragma omp parallel for num_threads(nt)
     for (int m = 0; m < pq.M; m++) {
         std::vector<double> dis_table;
         // printf ("Optimizing quantizer %d\n", m);
-        float * centroids = pq.get_centroids (m, 0);
+        float* centroids = pq.get_centroids(m, 0);
         for (int i = 0; i < n; i++) {
             for (int j = 0; j < n; j++) {
-                dis_table.push_back (fvec_L2sqr (centroids + i * dsub,
-                                                 centroids + j * dsub,
-                                                 dsub));
+                dis_table.push_back(fvec_L2sqr(
+                        centroids + i * dsub, centroids + j * dsub, dsub));
             }
         }
-        std::vector<int> perm (n);
-        ReproduceWithHammingObjective obj (
-               nbits, dis_table,
-               dis_weight_factor);
+        std::vector<int> perm(n);
+        ReproduceWithHammingObjective obj(nbits, dis_table, dis_weight_factor);
-        SimulatedAnnealingOptimizer optim (&obj, *this);
+        SimulatedAnnealingOptimizer optim(&obj, *this);
         if (log_pattern.size()) {
             char fname[256];
-            snprintf (fname, 256, log_pattern.c_str(), m);
-            printf ("opening log file %s\n", fname);
-            optim.logfile = fopen (fname, "w");
-            FAISS_THROW_IF_NOT_MSG (optim.logfile, "could not open logfile");
+            snprintf(fname, 256, log_pattern.c_str(), m);
+            printf("opening log file %s\n", fname);
+            optim.logfile = fopen(fname, "w");
+            FAISS_THROW_IF_NOT_MSG(optim.logfile, "could not open logfile");
         }
-        double final_cost = optim.run_optimization (perm.data());
+        double final_cost = optim.run_optimization(perm.data());
         if (verbose > 0) {
-            printf ("SimulatedAnnealingOptimizer for m=%d: %g -> %g\n",
-                    m, optim.init_cost, final_cost);
+            printf("SimulatedAnnealingOptimizer for m=%d: %g -> %g\n",
+                   m,
+                   optim.init_cost,
+                   final_cost);
         }
-        if (log_pattern.size()) fclose (optim.logfile);
+        if (log_pattern.size())
+            fclose(optim.logfile);
         std::vector<float> centroids_copy;
         for (int i = 0; i < dsub * n; i++)
-            centroids_copy.push_back (centroids[i]);
+            centroids_copy.push_back(centroids[i]);
         for (int i = 0; i < n; i++)
-            memcpy (centroids + perm[i] * dsub,
-                    centroids_copy.data() + i * dsub,
-                    dsub * sizeof(centroids[0]));
+            memcpy(centroids + perm[i] * dsub,
+                   centroids_copy.data() + i * dsub,
+                   dsub * sizeof(centroids[0]));
     }
 }
-void PolysemousTraining::optimize_ranking (
-      ProductQuantizer &pq, size_t n, const float *x) const
-{
+void PolysemousTraining::optimize_ranking(
+        ProductQuantizer& pq,
+        size_t n,
+        const float* x) const {
     int dsub = pq.dsub;
     int nbits = pq.nbits;
-    std::vector<uint8_t> all_codes (pq.code_size * n);
+    std::vector<uint8_t> all_codes(pq.code_size * n);
-    pq.compute_codes (x, all_codes.data(), n);
+    pq.compute_codes(x, all_codes.data(), n);
-    FAISS_THROW_IF_NOT (pq.nbits == 8);
+    FAISS_THROW_IF_NOT(pq.nbits == 8);
-    if (n == 0)
-        pq.compute_sdc_table ();
+    if (n == 0) {
+        pq.compute_sdc_table();
+    }
 #pragma omp parallel for
     for (int m = 0; m < pq.M; m++) {
         size_t nq, nb;
-        std::vector <uint32_t> codes; // query codes, then db codes
-        std::vector <float> gt_distances; // nq * nb matrix of distances
+        std::vector<uint32_t> codes;     // query codes, then db codes
+        std::vector<float> gt_distances; // nq * nb matrix of distances
         if (n > 0) {
-            std::vector<float> xtrain (n * dsub);
+            std::vector<float> xtrain(n * dsub);
             for (int i = 0; i < n; i++)
-                memcpy (xtrain.data() + i * dsub,
-                        x + i * pq.d + m * dsub,
-                        sizeof(float) * dsub);
+                memcpy(xtrain.data() + i * dsub,
+                       x + i * pq.d + m * dsub,
+                       sizeof(float) * dsub);
-            codes.resize (n);
+            codes.resize(n);
             for (int i = 0; i < n; i++)
-                codes [i] = all_codes [i * pq.code_size + m];
+                codes[i] = all_codes[i * pq.code_size + m];
-            nq = n / 4; nb = n - nq;
-            const float *xq = xtrain.data();
-            const float *xb = xq + nq * dsub;
+            nq = n / 4;
+            nb = n - nq;
+            const float* xq = xtrain.data();
+            const float* xb = xq + nq * dsub;
-            gt_distances.resize (nq * nb);
+            gt_distances.resize(nq * nb);
-            pairwise_L2sqr (dsub,
-                            nq, xq,
-                            nb, xb,
-                            gt_distances.data());
+            pairwise_L2sqr(dsub, nq, xq, nb, xb, gt_distances.data());
         } else {
             nq = nb = pq.ksub;
-            codes.resize (2 * nq);
+            codes.resize(2 * nq);
             for (int i = 0; i < nq; i++)
-                codes[i] = codes [i + nq] = i;
+                codes[i] = codes[i + nq] = i;
-            gt_distances.resize (nq * nb);
+            gt_distances.resize(nq * nb);
-            memcpy (gt_distances.data (),
-                    pq.sdc_table.data () + m * nq * nb,
-                    sizeof (float) * nq * nb);
+            memcpy(gt_distances.data(),
+                   pq.sdc_table.data() + m * nq * nb,
+                   sizeof(float) * nq * nb);
         }
-        double t0 = getmillisecs ();
+        double t0 = getmillisecs();
-        PermutationObjective *obj = new RankingScore2 (
-                  nbits, nq, nb,
-                  codes.data(), codes.data() + nq,
-                  gt_distances.data ());
-        ScopeDeleter1<PermutationObjective> del (obj);
+        PermutationObjective* obj = new RankingScore2(
+                nbits,
+                nq,
+                nb,
+                codes.data(),
+                codes.data() + nq,
+                gt_distances.data());
+        ScopeDeleter1<PermutationObjective> del(obj);
         if (verbose > 0) {
             printf("   m=%d, nq=%zd, nb=%zd, intialize RankingScore "
                    "in %.3f ms\n",
-                   m, nq, nb, getmillisecs () - t0);
+                   m,
+                   nq,
+                   nb,
+                   getmillisecs() - t0);
         }
-        SimulatedAnnealingOptimizer optim (obj, *this);
+        SimulatedAnnealingOptimizer optim(obj, *this);
         if (log_pattern.size()) {
             char fname[256];
-            snprintf (fname, 256, log_pattern.c_str(), m);
-            printf ("opening log file %s\n", fname);
-            optim.logfile = fopen (fname, "w");
-            FAISS_THROW_IF_NOT_FMT (optim.logfile,
-                                    "could not open logfile %s", fname);
+            snprintf(fname, 256, log_pattern.c_str(), m);
+            printf("opening log file %s\n", fname);
+            optim.logfile = fopen(fname, "w");
+            FAISS_THROW_IF_NOT_FMT(
+                    optim.logfile, "could not open logfile %s", fname);
         }
-        std::vector<int> perm (pq.ksub);
+        std::vector<int> perm(pq.ksub);
-        double final_cost = optim.run_optimization (perm.data());
-        printf ("SimulatedAnnealingOptimizer for m=%d: %g -> %g\n",
-                m, optim.init_cost, final_cost);
+        double final_cost = optim.run_optimization(perm.data());
+        printf("SimulatedAnnealingOptimizer for m=%d: %g -> %g\n",
+               m,
+               optim.init_cost,
+               final_cost);
-        if (log_pattern.size()) fclose (optim.logfile);
+        if (log_pattern.size())
+            fclose(optim.logfile);
-        float * centroids = pq.get_centroids (m, 0);
+        float* centroids = pq.get_centroids(m, 0);
         std::vector<float> centroids_copy;
         for (int i = 0; i < dsub * pq.ksub; i++)
-            centroids_copy.push_back (centroids[i]);
+            centroids_copy.push_back(centroids[i]);
         for (int i = 0; i < pq.ksub; i++)
-            memcpy (centroids + perm[i] * dsub,
-                    centroids_copy.data() + i * dsub,
-                    dsub * sizeof(centroids[0]));
+            memcpy(centroids + perm[i] * dsub,
+                   centroids_copy.data() + i * dsub,
+                   dsub * sizeof(centroids[0]));
     }
 }
-void PolysemousTraining::optimize_pq_for_hamming (ProductQuantizer &pq,
-                                                size_t n, const float *x) const
-{
+void PolysemousTraining::optimize_pq_for_hamming(
+        ProductQuantizer& pq,
+        size_t n,
+        const float* x) const {
     if (optimization_type == OT_None) {
     } else if (optimization_type == OT_ReproduceDistances_affine) {
-        optimize_reproduce_distances (pq);
+        optimize_reproduce_distances(pq);
     } else {
-        optimize_ranking (pq, n, x);
+        optimize_ranking(pq, n, x);
     }
-    pq.compute_sdc_table ();
+    pq.compute_sdc_table();
 }
+size_t PolysemousTraining::memory_usage_per_thread(
+        const ProductQuantizer& pq) const {
+    size_t n = pq.ksub;
+    switch (optimization_type) {
+        case OT_None:
+            return 0;
+        case OT_ReproduceDistances_affine:
+            return n * n * sizeof(double) * 3;
+        case OT_Ranking_weighted_diff:
+            return n * n * n * sizeof(float);
+    }
+    FAISS_THROW_MSG("Invalid optmization type");
+    return 0;
+}
 } // namespace faiss