RubyGems - faiss - Versions diffs - 0.2.0 → 0.2.1 - Mend

faiss 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (202) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +292 -291
data/vendor/faiss/faiss/AutoTune.h +55 -56
data/vendor/faiss/faiss/Clustering.cpp +334 -195
data/vendor/faiss/faiss/Clustering.h +88 -35
data/vendor/faiss/faiss/IVFlib.cpp +171 -195
data/vendor/faiss/faiss/IVFlib.h +48 -51
data/vendor/faiss/faiss/Index.cpp +85 -103
data/vendor/faiss/faiss/Index.h +54 -48
data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
data/vendor/faiss/faiss/Index2Layer.h +22 -22
data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
data/vendor/faiss/faiss/IndexBinary.h +140 -132
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
data/vendor/faiss/faiss/IndexFlat.h +35 -46
data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
data/vendor/faiss/faiss/IndexHNSW.h +57 -41
data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
data/vendor/faiss/faiss/IndexIVF.h +146 -113
data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
data/vendor/faiss/faiss/IndexLSH.h +21 -26
data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
data/vendor/faiss/faiss/IndexLattice.h +11 -16
data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
data/vendor/faiss/faiss/IndexNSG.h +85 -0
data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
data/vendor/faiss/faiss/IndexPQ.h +64 -67
data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
data/vendor/faiss/faiss/IndexRefine.h +22 -23
data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
data/vendor/faiss/faiss/IndexReplicas.h +62 -56
data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
data/vendor/faiss/faiss/IndexResidual.h +152 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
data/vendor/faiss/faiss/IndexShards.cpp +256 -240
data/vendor/faiss/faiss/IndexShards.h +85 -73
data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
data/vendor/faiss/faiss/MatrixStats.h +7 -10
data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
data/vendor/faiss/faiss/MetaIndexes.h +40 -34
data/vendor/faiss/faiss/MetricType.h +7 -7
data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
data/vendor/faiss/faiss/VectorTransform.h +61 -89
data/vendor/faiss/faiss/clone_index.cpp +77 -73
data/vendor/faiss/faiss/clone_index.h +4 -9
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
data/vendor/faiss/faiss/impl/FaissException.h +41 -29
data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
data/vendor/faiss/faiss/impl/HNSW.h +179 -200
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
data/vendor/faiss/faiss/impl/NSG.h +199 -0
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
data/vendor/faiss/faiss/impl/io.cpp +75 -94
data/vendor/faiss/faiss/impl/io.h +31 -41
data/vendor/faiss/faiss/impl/io_macros.h +40 -29
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
data/vendor/faiss/faiss/index_factory.cpp +269 -218
data/vendor/faiss/faiss/index_factory.h +6 -7
data/vendor/faiss/faiss/index_io.h +23 -26
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
data/vendor/faiss/faiss/utils/Heap.h +186 -209
data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
data/vendor/faiss/faiss/utils/distances.cpp +301 -310
data/vendor/faiss/faiss/utils/distances.h +133 -118
data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
data/vendor/faiss/faiss/utils/hamming.h +62 -85
data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
data/vendor/faiss/faiss/utils/partitioning.h +26 -21
data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
data/vendor/faiss/faiss/utils/random.cpp +39 -63
data/vendor/faiss/faiss/utils/random.h +13 -16
data/vendor/faiss/faiss/utils/simdlib.h +4 -2
data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
data/vendor/faiss/faiss/utils/utils.cpp +304 -287
data/vendor/faiss/faiss/utils/utils.h +53 -48
metadata +20 -2

data/vendor/faiss/faiss/utils/utils.cpp CHANGED Viewed

@@ -9,10 +9,10 @@
 #include <faiss/utils/utils.h>
-#include <cstdio>
 #include <cassert>
-#include <cstring>
 #include <cmath>
+#include <cstdio>
+#include <cstring>
 #include <sys/types.h>
@@ -32,46 +32,94 @@
 #include <faiss/impl/AuxIndexStructures.h>
 #include <faiss/impl/FaissAssert.h>
+#include <faiss/impl/platform_macros.h>
 #include <faiss/utils/random.h>
 #ifndef FINTEGER
 #define FINTEGER long
 #endif
 extern "C" {
 /* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
-int sgemm_ (const char *transa, const char *transb, FINTEGER *m, FINTEGER *
-            n, FINTEGER *k, const float *alpha, const float *a,
-            FINTEGER *lda, const float *b, FINTEGER *
-            ldb, float *beta, float *c, FINTEGER *ldc);
+int sgemm_(
+        const char* transa,
+        const char* transb,
+        FINTEGER* m,
+        FINTEGER* n,
+        FINTEGER* k,
+        const float* alpha,
+        const float* a,
+        FINTEGER* lda,
+        const float* b,
+        FINTEGER* ldb,
+        float* beta,
+        float* c,
+        FINTEGER* ldc);
 /* Lapack functions, see http://www.netlib.org/clapack/old/single/sgeqrf.c */
-int sgeqrf_ (FINTEGER *m, FINTEGER *n, float *a, FINTEGER *lda,
-                 float *tau, float *work, FINTEGER *lwork, FINTEGER *info);
-int sorgqr_(FINTEGER *m, FINTEGER *n, FINTEGER *k, float *a,
-            FINTEGER *lda, float *tau, float *work,
-            FINTEGER *lwork, FINTEGER *info);
-int sgemv_(const char *trans, FINTEGER *m, FINTEGER *n, float *alpha,
-           const float *a, FINTEGER *lda, const float *x, FINTEGER *incx,
-           float *beta, float *y, FINTEGER *incy);
+int sgeqrf_(
+        FINTEGER* m,
+        FINTEGER* n,
+        float* a,
+        FINTEGER* lda,
+        float* tau,
+        float* work,
+        FINTEGER* lwork,
+        FINTEGER* info);
+int sorgqr_(
+        FINTEGER* m,
+        FINTEGER* n,
+        FINTEGER* k,
+        float* a,
+        FINTEGER* lda,
+        float* tau,
+        float* work,
+        FINTEGER* lwork,
+        FINTEGER* info);
+int sgemv_(
+        const char* trans,
+        FINTEGER* m,
+        FINTEGER* n,
+        float* alpha,
+        const float* a,
+        FINTEGER* lda,
+        const float* x,
+        FINTEGER* incx,
+        float* beta,
+        float* y,
+        FINTEGER* incy);
 }
 /**************************************************
  * Get some stats about the system
  **************************************************/
 namespace faiss {
+std::string get_compile_options() {
+    std::string options;
+    // this flag is set by GCC and Clang
+#ifdef __OPTIMIZE__
+    options += "OPTIMIZE ";
+#endif
+#ifdef __AVX2__
+    options += "AVX2";
+#elif defined(__aarch64__)
+    options += "NEON";
+#else
+    options += "GENERIC";
+#endif
+    return options;
+}
 #ifdef _MSC_VER
 double getmillisecs() {
     LARGE_INTEGER ts;
@@ -81,73 +129,69 @@ double getmillisecs() {
     return (ts.QuadPart * 1e3) / freq.QuadPart;
 }
-#else // _MSC_VER
-double getmillisecs () {
+#else  // _MSC_VER
+double getmillisecs() {
     struct timeval tv;
-    gettimeofday (&tv, nullptr);
+    gettimeofday(&tv, nullptr);
     return tv.tv_sec * 1e3 + tv.tv_usec * 1e-3;
 }
 #endif // _MSC_VER
-uint64_t get_cycles () {
-#ifdef  __x86_64__
+uint64_t get_cycles() {
+#ifdef __x86_64__
     uint32_t high, low;
-    asm volatile("rdtsc \n\t"
-                 : "=a" (low),
-                   "=d" (high));
+    asm volatile("rdtsc \n\t" : "=a"(low), "=d"(high));
     return ((uint64_t)high << 32) | (low);
 #else
     return 0;
 #endif
 }
 #ifdef __linux__
-size_t get_mem_usage_kb ()
-{
-    int pid = getpid ();
+size_t get_mem_usage_kb() {
+    int pid = getpid();
     char fname[256];
-    snprintf (fname, 256, "/proc/%d/status", pid);
-    FILE * f = fopen (fname, "r");
-    FAISS_THROW_IF_NOT_MSG (f, "cannot open proc status file");
+    snprintf(fname, 256, "/proc/%d/status", pid);
+    FILE* f = fopen(fname, "r");
+    FAISS_THROW_IF_NOT_MSG(f, "cannot open proc status file");
     size_t sz = 0;
     for (;;) {
-        char buf [256];
-        if (!fgets (buf, 256, f)) break;
-        if (sscanf (buf, "VmRSS: %ld kB", &sz) == 1) break;
+        char buf[256];
+        if (!fgets(buf, 256, f))
+            break;
+        if (sscanf(buf, "VmRSS: %ld kB", &sz) == 1)
+            break;
     }
-    fclose (f);
+    fclose(f);
     return sz;
 }
 #else
-size_t get_mem_usage_kb ()
-{
-    fprintf(stderr, "WARN: get_mem_usage_kb not implemented on current architecture\n");
+size_t get_mem_usage_kb() {
+    fprintf(stderr,
+            "WARN: get_mem_usage_kb not implemented on current architecture\n");
     return 0;
 }
 #endif
-void reflection (const float * __restrict u,
-                 float * __restrict x,
-                 size_t n, size_t d, size_t nu)
-{
+void reflection(
+        const float* __restrict u,
+        float* __restrict x,
+        size_t n,
+        size_t d,
+        size_t nu) {
     size_t i, j, l;
     for (i = 0; i < n; i++) {
-        const float * up = u;
+        const float* up = u;
         for (l = 0; l < nu; l++) {
             float ip1 = 0, ip2 = 0;
-            for (j = 0; j < d; j+=2) {
+            for (j = 0; j < d; j += 2) {
                 ip1 += up[j] * x[j];
-                ip2 += up[j+1] * x[j+1];
+                ip2 += up[j + 1] * x[j + 1];
             }
             float ip = 2 * (ip1 + ip2);
@@ -159,13 +203,11 @@ void reflection (const float * __restrict u,
     }
 }
 /* Reference implementation (slower) */
-void reflection_ref (const float * u, float * x, size_t n, size_t d, size_t nu)
-{
+void reflection_ref(const float* u, float* x, size_t n, size_t d, size_t nu) {
     size_t i, j, l;
     for (i = 0; i < n; i++) {
-        const float * up = u;
+        const float* up = u;
         for (l = 0; l < nu; l++) {
             double ip = 0;
@@ -182,53 +224,38 @@ void reflection_ref (const float * u, float * x, size_t n, size_t d, size_t nu)
     }
 }
 /***************************************************************************
  * Some matrix manipulation functions
  ***************************************************************************/
-void matrix_qr (int m, int n, float *a)
-{
-    FAISS_THROW_IF_NOT (m >= n);
+void matrix_qr(int m, int n, float* a) {
+    FAISS_THROW_IF_NOT(m >= n);
     FINTEGER mi = m, ni = n, ki = mi < ni ? mi : ni;
-    std::vector<float> tau (ki);
+    std::vector<float> tau(ki);
     FINTEGER lwork = -1, info;
     float work_size;
-    sgeqrf_ (&mi, &ni, a, &mi, tau.data(),
-             &work_size, &lwork, &info);
+    sgeqrf_(&mi, &ni, a, &mi, tau.data(), &work_size, &lwork, &info);
     lwork = size_t(work_size);
-    std::vector<float> work (lwork);
-    sgeqrf_ (&mi, &ni, a, &mi,
-             tau.data(), work.data(), &lwork, &info);
+    std::vector<float> work(lwork);
-    sorgqr_ (&mi, &ni, &ki, a, &mi, tau.data(),
-             work.data(), &lwork, &info);
+    sgeqrf_(&mi, &ni, a, &mi, tau.data(), work.data(), &lwork, &info);
+    sorgqr_(&mi, &ni, &ki, a, &mi, tau.data(), work.data(), &lwork, &info);
 }
 /***************************************************************************
  * Result list routines
  ***************************************************************************/
-void ranklist_handle_ties (int k, int64_t *idx, const float *dis)
-{
+void ranklist_handle_ties(int k, int64_t* idx, const float* dis) {
     float prev_dis = -1e38;
     int prev_i = -1;
     for (int i = 0; i < k; i++) {
         if (dis[i] != prev_dis) {
             if (i > prev_i + 1) {
                 // sort between prev_i and i - 1
-                std::sort (idx + prev_i, idx + i);
+                std::sort(idx + prev_i, idx + i);
             }
             prev_i = i;
             prev_dis = dis[i];
@@ -236,31 +263,33 @@ void ranklist_handle_ties (int k, int64_t *idx, const float *dis)
     }
 }
-size_t merge_result_table_with (size_t n, size_t k,
-                                int64_t *I0, float *D0,
-                                const int64_t *I1, const float *D1,
-                                bool keep_min,
-                                int64_t translation)
-{
+size_t merge_result_table_with(
+        size_t n,
+        size_t k,
+        int64_t* I0,
+        float* D0,
+        const int64_t* I1,
+        const float* D1,
+        bool keep_min,
+        int64_t translation) {
     size_t n1 = 0;
-#pragma omp parallel reduction(+:n1)
+#pragma omp parallel reduction(+ : n1)
     {
-        std::vector<int64_t> tmpI (k);
-        std::vector<float> tmpD (k);
+        std::vector<int64_t> tmpI(k);
+        std::vector<float> tmpD(k);
 #pragma omp for
         for (int64_t i = 0; i < n; i++) {
-            int64_t *lI0 = I0 + i * k;
-            float *lD0 = D0 + i * k;
-            const int64_t *lI1 = I1 + i * k;
-            const float *lD1 = D1 + i * k;
+            int64_t* lI0 = I0 + i * k;
+            float* lD0 = D0 + i * k;
+            const int64_t* lI1 = I1 + i * k;
+            const float* lD1 = D1 + i * k;
             size_t r0 = 0;
             size_t r1 = 0;
             if (keep_min) {
                 for (size_t j = 0; j < k; j++) {
                     if (lI0[r0] >= 0 && lD0[r0] < lD1[r1]) {
                         tmpD[j] = lD0[r0];
                         tmpI[j] = lI0[r0];
@@ -291,29 +320,30 @@ size_t merge_result_table_with (size_t n, size_t k,
                 }
             }
             n1 += r1;
-            memcpy (lD0, tmpD.data(), sizeof (lD0[0]) * k);
-            memcpy (lI0, tmpI.data(), sizeof (lI0[0]) * k);
+            memcpy(lD0, tmpD.data(), sizeof(lD0[0]) * k);
+            memcpy(lI0, tmpI.data(), sizeof(lI0[0]) * k);
         }
     }
     return n1;
 }
-size_t ranklist_intersection_size (size_t k1, const int64_t *v1,
-                                   size_t k2, const int64_t *v2_in)
-{
-    if (k2 > k1) return ranklist_intersection_size (k2, v2_in, k1, v1);
-    int64_t *v2 = new int64_t [k2];
-    memcpy (v2, v2_in, sizeof (int64_t) * k2);
-    std::sort (v2, v2 + k2);
+size_t ranklist_intersection_size(
+        size_t k1,
+        const int64_t* v1,
+        size_t k2,
+        const int64_t* v2_in) {
+    if (k2 > k1)
+        return ranklist_intersection_size(k2, v2_in, k1, v1);
+    int64_t* v2 = new int64_t[k2];
+    memcpy(v2, v2_in, sizeof(int64_t) * k2);
+    std::sort(v2, v2 + k2);
     { // de-dup v2
         int64_t prev = -1;
         size_t wp = 0;
         for (size_t i = 0; i < k2; i++) {
-            if (v2 [i] != prev) {
-                v2[wp++] = prev = v2 [i];
+            if (v2[i] != prev) {
+                v2[wp++] = prev = v2[i];
             }
         }
         k2 = wp;
@@ -321,195 +351,196 @@ size_t ranklist_intersection_size (size_t k1, const int64_t *v1,
     const int64_t seen_flag = int64_t{1} << 60;
     size_t count = 0;
     for (size_t i = 0; i < k1; i++) {
-        int64_t q = v1 [i];
+        int64_t q = v1[i];
         size_t i0 = 0, i1 = k2;
         while (i0 + 1 < i1) {
             size_t imed = (i1 + i0) / 2;
-            int64_t piv = v2 [imed] & ~seen_flag;
-            if (piv <= q) i0 = imed;
-            else          i1 = imed;
+            int64_t piv = v2[imed] & ~seen_flag;
+            if (piv <= q)
+                i0 = imed;
+            else
+                i1 = imed;
         }
-        if (v2 [i0] == q) {
+        if (v2[i0] == q) {
             count++;
-            v2 [i0] |= seen_flag;
+            v2[i0] |= seen_flag;
         }
     }
-    delete [] v2;
+    delete[] v2;
     return count;
 }
-double imbalance_factor (int k, const int *hist) {
+double imbalance_factor(int k, const int* hist) {
     double tot = 0, uf = 0;
-    for (int i = 0 ; i < k ; i++) {
+    for (int i = 0; i < k; i++) {
         tot += hist[i];
-        uf += hist[i] * (double) hist[i];
+        uf += hist[i] * (double)hist[i];
     }
     uf = uf * k / (tot * tot);
     return uf;
 }
-double imbalance_factor (int n, int k, const int64_t *assign) {
+double imbalance_factor(int n, int k, const int64_t* assign) {
     std::vector<int> hist(k, 0);
     for (int i = 0; i < n; i++) {
         hist[assign[i]]++;
     }
-    return imbalance_factor (k, hist.data());
+    return imbalance_factor(k, hist.data());
 }
-int ivec_hist (size_t n, const int * v, int vmax, int *hist) {
-    memset (hist, 0, sizeof(hist[0]) * vmax);
+int ivec_hist(size_t n, const int* v, int vmax, int* hist) {
+    memset(hist, 0, sizeof(hist[0]) * vmax);
     int nout = 0;
     while (n--) {
-        if (v[n] < 0 || v[n] >= vmax) nout++;
-        else hist[v[n]]++;
+        if (v[n] < 0 || v[n] >= vmax)
+            nout++;
+        else
+            hist[v[n]]++;
     }
     return nout;
 }
-void bincode_hist(size_t n, size_t nbits, const uint8_t *codes, int *hist)
-{
-    FAISS_THROW_IF_NOT (nbits % 8 == 0);
+void bincode_hist(size_t n, size_t nbits, const uint8_t* codes, int* hist) {
+    FAISS_THROW_IF_NOT(nbits % 8 == 0);
     size_t d = nbits / 8;
     std::vector<int> accu(d * 256);
-    const uint8_t *c = codes;
+    const uint8_t* c = codes;
     for (size_t i = 0; i < n; i++)
-        for(int j = 0; j < d; j++)
+        for (int j = 0; j < d; j++)
             accu[j * 256 + *c++]++;
-    memset (hist, 0, sizeof(*hist) * nbits);
+    memset(hist, 0, sizeof(*hist) * nbits);
     for (int i = 0; i < d; i++) {
-        const int *ai = accu.data() + i * 256;
-        int * hi = hist + i * 8;
+        const int* ai = accu.data() + i * 256;
+        int* hi = hist + i * 8;
         for (int j = 0; j < 256; j++)
             for (int k = 0; k < 8; k++)
                 if ((j >> k) & 1)
                     hi[k] += ai[j];
     }
 }
-size_t ivec_checksum (size_t n, const int *a)
-{
+size_t ivec_checksum(size_t n, const int* a) {
     size_t cs = 112909;
-    while (n--) cs = cs * 65713 + a[n] * 1686049;
+    while (n--)
+        cs = cs * 65713 + a[n] * 1686049;
     return cs;
 }
 namespace {
-    struct ArgsortComparator {
-        const float *vals;
-        bool operator() (const size_t a, const size_t b) const {
-            return vals[a] < vals[b];
-        }
-    };
+struct ArgsortComparator {
+    const float* vals;
+    bool operator()(const size_t a, const size_t b) const {
+        return vals[a] < vals[b];
+    }
+};
-    struct SegmentS {
-        size_t i0; // begin pointer in the permutation array
-        size_t i1; // end
-        size_t len() const {
-            return i1 - i0;
-        }
-    };
-    // see https://en.wikipedia.org/wiki/Merge_algorithm#Parallel_merge
-    // extended to > 1 merge thread
-    // merges 2 ranges that should be consecutive on the source into
-    // the union of the two on the destination
-    template<typename T>
-    void parallel_merge (const T *src, T *dst,
-                         SegmentS &s1, SegmentS & s2, int nt,
-                         const ArgsortComparator & comp) {
-        if (s2.len() > s1.len()) { // make sure that s1 larger than s2
-            std::swap(s1, s2);
-        }
+struct SegmentS {
+    size_t i0; // begin pointer in the permutation array
+    size_t i1; // end
+    size_t len() const {
+        return i1 - i0;
+    }
+};
+// see https://en.wikipedia.org/wiki/Merge_algorithm#Parallel_merge
+// extended to > 1 merge thread
+// merges 2 ranges that should be consecutive on the source into
+// the union of the two on the destination
+template <typename T>
+void parallel_merge(
+        const T* src,
+        T* dst,
+        SegmentS& s1,
+        SegmentS& s2,
+        int nt,
+        const ArgsortComparator& comp) {
+    if (s2.len() > s1.len()) { // make sure that s1 larger than s2
+        std::swap(s1, s2);
+    }
-        // compute sub-ranges for each thread
-        std::vector<SegmentS> s1s(nt), s2s(nt), sws(nt);
-        s2s[0].i0 = s2.i0;
-        s2s[nt - 1].i1 = s2.i1;
+    // compute sub-ranges for each thread
+    std::vector<SegmentS> s1s(nt), s2s(nt), sws(nt);
+    s2s[0].i0 = s2.i0;
+    s2s[nt - 1].i1 = s2.i1;
-        // not sure parallel actually helps here
+    // not sure parallel actually helps here
 #pragma omp parallel for num_threads(nt)
-        for (int t = 0; t < nt; t++) {
-            s1s[t].i0 = s1.i0 + s1.len() * t / nt;
-            s1s[t].i1 = s1.i0 + s1.len() * (t + 1) / nt;
-            if (t + 1 < nt) {
-                T pivot = src[s1s[t].i1];
-                size_t i0 = s2.i0, i1 = s2.i1;
-                while (i0 + 1 < i1) {
-                    size_t imed = (i1 + i0) / 2;
-                    if (comp (pivot, src[imed])) {i1 = imed; }
-                    else                         {i0 = imed; }
+    for (int t = 0; t < nt; t++) {
+        s1s[t].i0 = s1.i0 + s1.len() * t / nt;
+        s1s[t].i1 = s1.i0 + s1.len() * (t + 1) / nt;
+        if (t + 1 < nt) {
+            T pivot = src[s1s[t].i1];
+            size_t i0 = s2.i0, i1 = s2.i1;
+            while (i0 + 1 < i1) {
+                size_t imed = (i1 + i0) / 2;
+                if (comp(pivot, src[imed])) {
+                    i1 = imed;
+                } else {
+                    i0 = imed;
                 }
-                s2s[t].i1 = s2s[t + 1].i0 = i1;
             }
+            s2s[t].i1 = s2s[t + 1].i0 = i1;
         }
-        s1.i0 = std::min(s1.i0, s2.i0);
-        s1.i1 = std::max(s1.i1, s2.i1);
-        s2 = s1;
-        sws[0].i0 = s1.i0;
-        for (int t = 0; t < nt; t++) {
-            sws[t].i1 = sws[t].i0 + s1s[t].len() + s2s[t].len();
-            if (t + 1 < nt) {
-                sws[t + 1].i0 = sws[t].i1;
-            }
+    }
+    s1.i0 = std::min(s1.i0, s2.i0);
+    s1.i1 = std::max(s1.i1, s2.i1);
+    s2 = s1;
+    sws[0].i0 = s1.i0;
+    for (int t = 0; t < nt; t++) {
+        sws[t].i1 = sws[t].i0 + s1s[t].len() + s2s[t].len();
+        if (t + 1 < nt) {
+            sws[t + 1].i0 = sws[t].i1;
         }
-        assert(sws[nt - 1].i1 == s1.i1);
+    }
+    assert(sws[nt - 1].i1 == s1.i1);
-        // do the actual merging
+    // do the actual merging
 #pragma omp parallel for num_threads(nt)
-        for (int t = 0; t < nt; t++) {
-            SegmentS sw = sws[t];
-            SegmentS s1t = s1s[t];
-            SegmentS s2t = s2s[t];
-            if (s1t.i0 < s1t.i1 && s2t.i0 < s2t.i1) {
-                for (;;) {
-                    // assert (sw.len() == s1t.len() + s2t.len());
-                    if (comp(src[s1t.i0], src[s2t.i0])) {
-                        dst[sw.i0++] = src[s1t.i0++];
-                        if (s1t.i0 == s1t.i1) break;
-                    } else {
-                        dst[sw.i0++] = src[s2t.i0++];
-                        if (s2t.i0 == s2t.i1) break;
-                    }
+    for (int t = 0; t < nt; t++) {
+        SegmentS sw = sws[t];
+        SegmentS s1t = s1s[t];
+        SegmentS s2t = s2s[t];
+        if (s1t.i0 < s1t.i1 && s2t.i0 < s2t.i1) {
+            for (;;) {
+                // assert (sw.len() == s1t.len() + s2t.len());
+                if (comp(src[s1t.i0], src[s2t.i0])) {
+                    dst[sw.i0++] = src[s1t.i0++];
+                    if (s1t.i0 == s1t.i1)
+                        break;
+                } else {
+                    dst[sw.i0++] = src[s2t.i0++];
+                    if (s2t.i0 == s2t.i1)
+                        break;
                 }
             }
-            if (s1t.len() > 0) {
-                assert(s1t.len() == sw.len());
-                memcpy(dst + sw.i0, src + s1t.i0, s1t.len() * sizeof(dst[0]));
-            } else if (s2t.len() > 0) {
-                assert(s2t.len() == sw.len());
-                memcpy(dst + sw.i0, src + s2t.i0, s2t.len() * sizeof(dst[0]));
-            }
+        }
+        if (s1t.len() > 0) {
+            assert(s1t.len() == sw.len());
+            memcpy(dst + sw.i0, src + s1t.i0, s1t.len() * sizeof(dst[0]));
+        } else if (s2t.len() > 0) {
+            assert(s2t.len() == sw.len());
+            memcpy(dst + sw.i0, src + s2t.i0, s2t.len() * sizeof(dst[0]));
         }
     }
+}
-};
+}; // namespace
-void fvec_argsort (size_t n, const float *vals,
-                    size_t *perm)
-{
-    for (size_t i = 0; i < n; i++) perm[i] = i;
+void fvec_argsort(size_t n, const float* vals, size_t* perm) {
+    for (size_t i = 0; i < n; i++)
+        perm[i] = i;
     ArgsortComparator comp = {vals};
-    std::sort (perm, perm + n, comp);
+    std::sort(perm, perm + n, comp);
 }
-void fvec_argsort_parallel (size_t n, const float *vals,
-                            size_t *perm)
-{
-    size_t * perm2 = new size_t[n];
+void fvec_argsort_parallel(size_t n, const float* vals, size_t* perm) {
+    size_t* perm2 = new size_t[n];
     // 2 result tables, during merging, flip between them
     size_t *permB = perm2, *permA = perm;
@@ -519,12 +550,13 @@ void fvec_argsort_parallel (size_t n, const float *vals,
         int nseg = nt;
         while (nseg > 1) {
             nseg = (nseg + 1) / 2;
-            std::swap (permA, permB);
+            std::swap(permA, permB);
         }
     }
 #pragma omp parallel
-    for (size_t i = 0; i < n; i++) permA[i] = i;
+    for (size_t i = 0; i < n; i++)
+        permA[i] = i;
     ArgsortComparator comp = {vals};
@@ -536,7 +568,7 @@ void fvec_argsort_parallel (size_t n, const float *vals,
         size_t i0 = t * n / nt;
         size_t i1 = (t + 1) * n / nt;
         SegmentS seg = {i0, i1};
-        std::sort (permA + seg.i0, permA + seg.i1, comp);
+        std::sort(permA + seg.i0, permA + seg.i1, comp);
         segs[t] = seg;
     }
     int prev_nested = omp_get_nested();
@@ -551,99 +583,84 @@ void fvec_argsort_parallel (size_t n, const float *vals,
 #pragma omp parallel for num_threads(nseg1)
         for (int s = 0; s < nseg; s += 2) {
             if (s + 1 == nseg) { // otherwise isolated segment
-                memcpy(permB + segs[s].i0, permA + segs[s].i0,
+                memcpy(permB + segs[s].i0,
+                       permA + segs[s].i0,
                        segs[s].len() * sizeof(size_t));
             } else {
                 int t0 = s * sub_nt / sub_nseg1;
                 int t1 = (s + 1) * sub_nt / sub_nseg1;
                 printf("merge %d %d, %d threads\n", s, s + 1, t1 - t0);
-                parallel_merge(permA, permB, segs[s], segs[s + 1],
-                               t1 - t0, comp);
+                parallel_merge(
+                        permA, permB, segs[s], segs[s + 1], t1 - t0, comp);
             }
         }
         for (int s = 0; s < nseg; s += 2)
             segs[s / 2] = segs[s];
         nseg = nseg1;
-        std::swap (permA, permB);
+        std::swap(permA, permB);
     }
-    assert (permA == perm);
+    assert(permA == perm);
     omp_set_nested(prev_nested);
-    delete [] perm2;
+    delete[] perm2;
 }
-const float *fvecs_maybe_subsample (
-          size_t d, size_t *n, size_t nmax, const float *x,
-          bool verbose, int64_t seed)
-{
-    if (*n <= nmax) return x; // nothing to do
+const float* fvecs_maybe_subsample(
+        size_t d,
+        size_t* n,
+        size_t nmax,
+        const float* x,
+        bool verbose,
+        int64_t seed) {
+    if (*n <= nmax)
+        return x; // nothing to do
     size_t n2 = nmax;
     if (verbose) {
-        printf ("  Input training set too big (max size is %zd), sampling "
-                "%zd / %zd vectors\n", nmax, n2, *n);
+        printf("  Input training set too big (max size is %zd), sampling "
+               "%zd / %zd vectors\n",
+               nmax,
+               n2,
+               *n);
     }
-    std::vector<int> subset (*n);
-    rand_perm (subset.data (), *n, seed);
-    float *x_subset = new float[n2 * d];
+    std::vector<int> subset(*n);
+    rand_perm(subset.data(), *n, seed);
+    float* x_subset = new float[n2 * d];
     for (int64_t i = 0; i < n2; i++)
-        memcpy (&x_subset[i * d],
-                &x[subset[i] * size_t(d)],
-                sizeof (x[0]) * d);
+        memcpy(&x_subset[i * d], &x[subset[i] * size_t(d)], sizeof(x[0]) * d);
     *n = n2;
     return x_subset;
 }
-void binary_to_real(size_t d, const uint8_t *x_in, float *x_out) {
+void binary_to_real(size_t d, const uint8_t* x_in, float* x_out) {
     for (size_t i = 0; i < d; ++i) {
         x_out[i] = 2 * ((x_in[i >> 3] >> (i & 7)) & 1) - 1;
     }
 }
-void real_to_binary(size_t d, const float *x_in, uint8_t *x_out) {
-  for (size_t i = 0; i < d / 8; ++i) {
-    uint8_t b = 0;
-    for (int j = 0; j < 8; ++j) {
-      if (x_in[8 * i + j] > 0) {
-        b |= (1 << j);
-      }
+void real_to_binary(size_t d, const float* x_in, uint8_t* x_out) {
+    for (size_t i = 0; i < d / 8; ++i) {
+        uint8_t b = 0;
+        for (int j = 0; j < 8; ++j) {
+            if (x_in[8 * i + j] > 0) {
+                b |= (1 << j);
+            }
+        }
+        x_out[i] = b;
     }
-    x_out[i] = b;
-  }
 }
 // from Python's stringobject.c
-uint64_t hash_bytes (const uint8_t *bytes, int64_t n) {
-    const uint8_t *p = bytes;
+uint64_t hash_bytes(const uint8_t* bytes, int64_t n) {
+    const uint8_t* p = bytes;
     uint64_t x = (uint64_t)(*p) << 7;
     int64_t len = n;
     while (--len >= 0) {
-        x = (1000003*x) ^ *p++;
+        x = (1000003 * x) ^ *p++;
     }
     x ^= n;
     return x;
 }
 bool check_openmp() {
     omp_set_num_threads(10);
@@ -654,7 +671,7 @@ bool check_openmp() {
     std::vector<int> nt_per_thread(10);
     size_t sum = 0;
     bool in_parallel = true;
-#pragma omp parallel reduction(+: sum)
+#pragma omp parallel reduction(+ : sum)
     {
         if (!omp_in_parallel()) {
             in_parallel = false;
@@ -665,7 +682,7 @@ bool check_openmp() {
         nt_per_thread[rank] = nt;
 #pragma omp for
-        for(int i = 0; i < 1000 * 1000 * 10; i++) {
+        for (int i = 0; i < 1000 * 1000 * 10; i++) {
             sum += i;
         }
     }