RubyGems - faiss - Versions diffs - 0.2.0 → 0.2.1 - Mend

faiss 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (202) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +292 -291
data/vendor/faiss/faiss/AutoTune.h +55 -56
data/vendor/faiss/faiss/Clustering.cpp +334 -195
data/vendor/faiss/faiss/Clustering.h +88 -35
data/vendor/faiss/faiss/IVFlib.cpp +171 -195
data/vendor/faiss/faiss/IVFlib.h +48 -51
data/vendor/faiss/faiss/Index.cpp +85 -103
data/vendor/faiss/faiss/Index.h +54 -48
data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
data/vendor/faiss/faiss/Index2Layer.h +22 -22
data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
data/vendor/faiss/faiss/IndexBinary.h +140 -132
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
data/vendor/faiss/faiss/IndexFlat.h +35 -46
data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
data/vendor/faiss/faiss/IndexHNSW.h +57 -41
data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
data/vendor/faiss/faiss/IndexIVF.h +146 -113
data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
data/vendor/faiss/faiss/IndexLSH.h +21 -26
data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
data/vendor/faiss/faiss/IndexLattice.h +11 -16
data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
data/vendor/faiss/faiss/IndexNSG.h +85 -0
data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
data/vendor/faiss/faiss/IndexPQ.h +64 -67
data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
data/vendor/faiss/faiss/IndexRefine.h +22 -23
data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
data/vendor/faiss/faiss/IndexReplicas.h +62 -56
data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
data/vendor/faiss/faiss/IndexResidual.h +152 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
data/vendor/faiss/faiss/IndexShards.cpp +256 -240
data/vendor/faiss/faiss/IndexShards.h +85 -73
data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
data/vendor/faiss/faiss/MatrixStats.h +7 -10
data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
data/vendor/faiss/faiss/MetaIndexes.h +40 -34
data/vendor/faiss/faiss/MetricType.h +7 -7
data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
data/vendor/faiss/faiss/VectorTransform.h +61 -89
data/vendor/faiss/faiss/clone_index.cpp +77 -73
data/vendor/faiss/faiss/clone_index.h +4 -9
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
data/vendor/faiss/faiss/impl/FaissException.h +41 -29
data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
data/vendor/faiss/faiss/impl/HNSW.h +179 -200
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
data/vendor/faiss/faiss/impl/NSG.h +199 -0
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
data/vendor/faiss/faiss/impl/io.cpp +75 -94
data/vendor/faiss/faiss/impl/io.h +31 -41
data/vendor/faiss/faiss/impl/io_macros.h +40 -29
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
data/vendor/faiss/faiss/index_factory.cpp +269 -218
data/vendor/faiss/faiss/index_factory.h +6 -7
data/vendor/faiss/faiss/index_io.h +23 -26
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
data/vendor/faiss/faiss/utils/Heap.h +186 -209
data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
data/vendor/faiss/faiss/utils/distances.cpp +301 -310
data/vendor/faiss/faiss/utils/distances.h +133 -118
data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
data/vendor/faiss/faiss/utils/hamming.h +62 -85
data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
data/vendor/faiss/faiss/utils/partitioning.h +26 -21
data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
data/vendor/faiss/faiss/utils/random.cpp +39 -63
data/vendor/faiss/faiss/utils/random.h +13 -16
data/vendor/faiss/faiss/utils/simdlib.h +4 -2
data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
data/vendor/faiss/faiss/utils/utils.cpp +304 -287
data/vendor/faiss/faiss/utils/utils.h +53 -48
metadata +20 -2

data/vendor/faiss/faiss/utils/WorkerThread.cpp CHANGED Viewed

@@ -5,9 +5,8 @@
  * LICENSE file in the root directory of this source tree.
  */
-#include <faiss/utils/WorkerThread.h>
 #include <faiss/impl/FaissAssert.h>
+#include <faiss/utils/WorkerThread.h>
 #include <exception>
 namespace faiss {
@@ -15,112 +14,104 @@ namespace faiss {
 namespace {
 // Captures any exceptions thrown by the lambda and returns them via the promise
-void runCallback(std::function<void()>& fn,
-                 std::promise<bool>& promise) {
-  try {
-    fn();
-    promise.set_value(true);
-  } catch (...) {
-    promise.set_exception(std::current_exception());
-  }
+void runCallback(std::function<void()>& fn, std::promise<bool>& promise) {
+    try {
+        fn();
+        promise.set_value(true);
+    } catch (...) {
+        promise.set_exception(std::current_exception());
+    }
 }
 } // namespace
-WorkerThread::WorkerThread() :
-    wantStop_(false) {
-  startThread();
+WorkerThread::WorkerThread() : wantStop_(false) {
+    startThread();
-  // Make sure that the thread has started before continuing
-  add([](){}).get();
+    // Make sure that the thread has started before continuing
+    add([]() {}).get();
 }
 WorkerThread::~WorkerThread() {
-  stop();
-  waitForThreadExit();
+    stop();
+    waitForThreadExit();
 }
-void
-WorkerThread::startThread() {
-  thread_ = std::thread([this](){ threadMain(); });
+void WorkerThread::startThread() {
+    thread_ = std::thread([this]() { threadMain(); });
 }
-void
-WorkerThread::stop() {
-  std::lock_guard<std::mutex> guard(mutex_);
+void WorkerThread::stop() {
+    std::lock_guard<std::mutex> guard(mutex_);
-  wantStop_ = true;
-  monitor_.notify_one();
+    wantStop_ = true;
+    monitor_.notify_one();
 }
-std::future<bool>
-WorkerThread::add(std::function<void()> f) {
-  std::lock_guard<std::mutex> guard(mutex_);
+std::future<bool> WorkerThread::add(std::function<void()> f) {
+    std::lock_guard<std::mutex> guard(mutex_);
-  if (wantStop_) {
-    // The timer thread has been stopped, or we want to stop; we can't
-    // schedule anything else
-    std::promise<bool> p;
-    auto fut = p.get_future();
+    if (wantStop_) {
+        // The timer thread has been stopped, or we want to stop; we can't
+        // schedule anything else
+        std::promise<bool> p;
+        auto fut = p.get_future();
-    // did not execute
-    p.set_value(false);
-    return fut;
-  }
+        // did not execute
+        p.set_value(false);
+        return fut;
+    }
-  auto pr = std::promise<bool>();
-  auto fut = pr.get_future();
+    auto pr = std::promise<bool>();
+    auto fut = pr.get_future();
-  queue_.emplace_back(std::make_pair(std::move(f), std::move(pr)));
+    queue_.emplace_back(std::make_pair(std::move(f), std::move(pr)));
-  // Wake up our thread
-  monitor_.notify_one();
-  return fut;
+    // Wake up our thread
+    monitor_.notify_one();
+    return fut;
 }
-void
-WorkerThread::threadMain() {
-  threadLoop();
+void WorkerThread::threadMain() {
+    threadLoop();
-  // Call all pending tasks
-  FAISS_ASSERT(wantStop_);
+    // Call all pending tasks
+    FAISS_ASSERT(wantStop_);
-  // flush all pending operations
-  for (auto& f : queue_) {
-    runCallback(f.first, f.second);
-  }
+    // flush all pending operations
+    for (auto& f : queue_) {
+        runCallback(f.first, f.second);
+    }
 }
-void
-WorkerThread::threadLoop() {
-  while (true) {
-    std::pair<std::function<void()>, std::promise<bool>> data;
+void WorkerThread::threadLoop() {
+    while (true) {
+        std::pair<std::function<void()>, std::promise<bool>> data;
-    {
-      std::unique_lock<std::mutex> lock(mutex_);
+        {
+            std::unique_lock<std::mutex> lock(mutex_);
-      while (!wantStop_ && queue_.empty()) {
-        monitor_.wait(lock);
-      }
+            while (!wantStop_ && queue_.empty()) {
+                monitor_.wait(lock);
+            }
-      if (wantStop_) {
-        return;
-      }
+            if (wantStop_) {
+                return;
+            }
-      data = std::move(queue_.front());
-      queue_.pop_front();
-    }
+            data = std::move(queue_.front());
+            queue_.pop_front();
+        }
-    runCallback(data.first, data.second);
-  }
+        runCallback(data.first, data.second);
+    }
 }
-void
-WorkerThread::waitForThreadExit() {
-  try {
-    thread_.join();
-  } catch (...) {
-  }
+void WorkerThread::waitForThreadExit() {
+    try {
+        thread_.join();
+    } catch (...) {
+    }
 }
-} // namespace
+} // namespace faiss

data/vendor/faiss/faiss/utils/WorkerThread.h CHANGED Viewed

@@ -5,57 +5,56 @@
  * LICENSE file in the root directory of this source tree.
  */
 #pragma once
 #include <condition_variable>
-#include <future>
 #include <deque>
+#include <future>
 #include <thread>
 namespace faiss {
 class WorkerThread {
- public:
-  WorkerThread();
+   public:
+    WorkerThread();
-  /// Stops and waits for the worker thread to exit, flushing all
-  /// pending lambdas
-  ~WorkerThread();
+    /// Stops and waits for the worker thread to exit, flushing all
+    /// pending lambdas
+    ~WorkerThread();
-  /// Request that the worker thread stop itself
-  void stop();
+    /// Request that the worker thread stop itself
+    void stop();
-  /// Blocking waits in the current thread for the worker thread to
-  /// stop
-  void waitForThreadExit();
+    /// Blocking waits in the current thread for the worker thread to
+    /// stop
+    void waitForThreadExit();
-  /// Adds a lambda to run on the worker thread; returns a future that
-  /// can be used to block on its completion.
-  /// Future status is `true` if the lambda was run in the worker
-  /// thread; `false` if it was not run, because the worker thread is
-  /// exiting or has exited.
-  std::future<bool> add(std::function<void()> f);
+    /// Adds a lambda to run on the worker thread; returns a future that
+    /// can be used to block on its completion.
+    /// Future status is `true` if the lambda was run in the worker
+    /// thread; `false` if it was not run, because the worker thread is
+    /// exiting or has exited.
+    std::future<bool> add(std::function<void()> f);
- private:
-  void startThread();
-  void threadMain();
-  void threadLoop();
+   private:
+    void startThread();
+    void threadMain();
+    void threadLoop();
-  /// Thread that all queued lambdas are run on
-  std::thread thread_;
+    /// Thread that all queued lambdas are run on
+    std::thread thread_;
-  /// Mutex for the queue and exit status
-  std::mutex mutex_;
+    /// Mutex for the queue and exit status
+    std::mutex mutex_;
-  /// Monitor for the exit status and the queue
-  std::condition_variable monitor_;
+    /// Monitor for the exit status and the queue
+    std::condition_variable monitor_;
-  /// Whether or not we want the thread to exit
-  bool wantStop_;
+    /// Whether or not we want the thread to exit
+    bool wantStop_;
-  /// Queue of pending lambdas to call
-  std::deque<std::pair<std::function<void()>, std::promise<bool>>> queue_;
+    /// Queue of pending lambdas to call
+    std::deque<std::pair<std::function<void()>, std::promise<bool>>> queue_;
 };
-} // namespace
+} // namespace faiss

data/vendor/faiss/faiss/utils/distances.cpp CHANGED Viewed

@@ -10,10 +10,10 @@
 #include <faiss/utils/distances.h>
 #include <algorithm>
-#include <cstdio>
 #include <cassert>
-#include <cstring>
 #include <cmath>
+#include <cstdio>
+#include <cstring>
 #include <omp.h>
@@ -21,186 +21,151 @@
 #include <faiss/impl/FaissAssert.h>
 #include <faiss/impl/ResultHandler.h>
 #ifndef FINTEGER
 #define FINTEGER long
 #endif
 extern "C" {
 /* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
-int sgemm_ (const char *transa, const char *transb, FINTEGER *m, FINTEGER *
-            n, FINTEGER *k, const float *alpha, const float *a,
-            FINTEGER *lda, const float *b, FINTEGER *
-            ldb, float *beta, float *c, FINTEGER *ldc);
+int sgemm_(
+        const char* transa,
+        const char* transb,
+        FINTEGER* m,
+        FINTEGER* n,
+        FINTEGER* k,
+        const float* alpha,
+        const float* a,
+        FINTEGER* lda,
+        const float* b,
+        FINTEGER* ldb,
+        float* beta,
+        float* c,
+        FINTEGER* ldc);
 }
 namespace faiss {
 /***************************************************************************
  * Matrix/vector ops
  ***************************************************************************/
 /* Compute the L2 norm of a set of nx vectors */
-void fvec_norms_L2 (float * __restrict nr,
-                    const float * __restrict x,
-                    size_t d, size_t nx)
-{
+void fvec_norms_L2(
+        float* __restrict nr,
+        const float* __restrict x,
+        size_t d,
+        size_t nx) {
 #pragma omp parallel for
     for (int64_t i = 0; i < nx; i++) {
-        nr[i] = sqrtf (fvec_norm_L2sqr (x + i * d, d));
+        nr[i] = sqrtf(fvec_norm_L2sqr(x + i * d, d));
     }
 }
-void fvec_norms_L2sqr (float * __restrict nr,
-                       const float * __restrict x,
-                       size_t d, size_t nx)
-{
+void fvec_norms_L2sqr(
+        float* __restrict nr,
+        const float* __restrict x,
+        size_t d,
+        size_t nx) {
 #pragma omp parallel for
     for (int64_t i = 0; i < nx; i++)
-        nr[i] = fvec_norm_L2sqr (x + i * d, d);
+        nr[i] = fvec_norm_L2sqr(x + i * d, d);
 }
-void fvec_renorm_L2 (size_t d, size_t nx, float * __restrict x)
-{
+void fvec_renorm_L2(size_t d, size_t nx, float* __restrict x) {
 #pragma omp parallel for
     for (int64_t i = 0; i < nx; i++) {
-        float * __restrict xi = x + i * d;
+        float* __restrict xi = x + i * d;
-        float nr = fvec_norm_L2sqr (xi, d);
+        float nr = fvec_norm_L2sqr(xi, d);
         if (nr > 0) {
             size_t j;
-            const float inv_nr = 1.0 / sqrtf (nr);
+            const float inv_nr = 1.0 / sqrtf(nr);
             for (j = 0; j < d; j++)
                 xi[j] *= inv_nr;
         }
     }
 }
 /***************************************************************************
  * KNN functions
  ***************************************************************************/
 namespace {
 /* Find the nearest neighbors for nx queries in a set of ny vectors */
-template<class ResultHandler>
-void exhaustive_inner_product_seq (
-        const float * x,
-        const float * y,
-        size_t d, size_t nx, size_t ny,
-        ResultHandler &res)
-{
-    size_t check_period = InterruptCallback::get_period_hint (ny * d);
-    check_period *= omp_get_max_threads();
+template <class ResultHandler>
+void exhaustive_inner_product_seq(
+        const float* x,
+        const float* y,
+        size_t d,
+        size_t nx,
+        size_t ny,
+        ResultHandler& res) {
     using SingleResultHandler = typename ResultHandler::SingleResultHandler;
-    for (size_t i0 = 0; i0 < nx; i0 += check_period) {
-        size_t i1 = std::min(i0 + check_period, nx);
 #pragma omp parallel
-        {
-            SingleResultHandler resi(res);
+    {
+        SingleResultHandler resi(res);
 #pragma omp for
-            for (int64_t i = i0; i < i1; i++) {
-                const float * x_i = x + i * d;
-                const float * y_j = y;
+        for (int64_t i = 0; i < nx; i++) {
+            const float* x_i = x + i * d;
+            const float* y_j = y;
-                resi.begin(i);
+            resi.begin(i);
-                for (size_t j = 0; j < ny; j++) {
-                    float ip = fvec_inner_product (x_i, y_j, d);
-                    resi.add_result(ip, j);
-                    y_j += d;
-                }
-                resi.end();
+            for (size_t j = 0; j < ny; j++) {
+                float ip = fvec_inner_product(x_i, y_j, d);
+                resi.add_result(ip, j);
+                y_j += d;
             }
+            resi.end();
         }
-        InterruptCallback::check ();
     }
 }
-template<class ResultHandler>
-void exhaustive_L2sqr_seq (
-                const float * x,
-                const float * y,
-                size_t d, size_t nx, size_t ny,
-                ResultHandler & res)
-{
-    size_t check_period = InterruptCallback::get_period_hint (ny * d);
-    check_period *= omp_get_max_threads();
+template <class ResultHandler>
+void exhaustive_L2sqr_seq(
+        const float* x,
+        const float* y,
+        size_t d,
+        size_t nx,
+        size_t ny,
+        ResultHandler& res) {
     using SingleResultHandler = typename ResultHandler::SingleResultHandler;
-    for (size_t i0 = 0; i0 < nx; i0 += check_period) {
-        size_t i1 = std::min(i0 + check_period, nx);
 #pragma omp parallel
-        {
-            SingleResultHandler resi(res);
+    {
+        SingleResultHandler resi(res);
 #pragma omp for
-            for (int64_t i = i0; i < i1; i++) {
-                const float * x_i = x + i * d;
-                const float * y_j = y;
-                resi.begin(i);
-                for (size_t j = 0; j < ny; j++) {
-                    float disij = fvec_L2sqr (x_i, y_j, d);
-                    resi.add_result(disij, j);
-                    y_j += d;
-                }
-                resi.end();
+        for (int64_t i = 0; i < nx; i++) {
+            const float* x_i = x + i * d;
+            const float* y_j = y;
+            resi.begin(i);
+            for (size_t j = 0; j < ny; j++) {
+                float disij = fvec_L2sqr(x_i, y_j, d);
+                resi.add_result(disij, j);
+                y_j += d;
             }
+            resi.end();
         }
-        InterruptCallback::check ();
     }
-};
+}
 /** Find the nearest neighbors for nx queries in a set of ny vectors */
-template<class ResultHandler>
-void exhaustive_inner_product_blas (
-        const float * x,
-        const float * y,
-        size_t d, size_t nx, size_t ny,
-        ResultHandler & res)
-{
+template <class ResultHandler>
+void exhaustive_inner_product_blas(
+        const float* x,
+        const float* y,
+        size_t d,
+        size_t nx,
+        size_t ny,
+        ResultHandler& res) {
     // BLAS does not like empty matrices
-    if (nx == 0 || ny == 0) return;
+    if (nx == 0 || ny == 0)
+        return;
     /* block sizes */
     const size_t bs_x = distance_compute_blas_query_bs;
@@ -209,86 +174,105 @@ void exhaustive_inner_product_blas (
     for (size_t i0 = 0; i0 < nx; i0 += bs_x) {
         size_t i1 = i0 + bs_x;
-        if(i1 > nx) i1 = nx;
+        if (i1 > nx)
+            i1 = nx;
         res.begin_multiple(i0, i1);
         for (size_t j0 = 0; j0 < ny; j0 += bs_y) {
             size_t j1 = j0 + bs_y;
-            if (j1 > ny) j1 = ny;
+            if (j1 > ny)
+                j1 = ny;
             /* compute the actual dot products */
             {
                 float one = 1, zero = 0;
                 FINTEGER nyi = j1 - j0, nxi = i1 - i0, di = d;
-                sgemm_ ("Transpose", "Not transpose", &nyi, &nxi, &di, &one,
-                        y + j0 * d, &di,
-                        x + i0 * d, &di, &zero,
-                        ip_block.get(), &nyi);
+                sgemm_("Transpose",
+                       "Not transpose",
+                       &nyi,
+                       &nxi,
+                       &di,
+                       &one,
+                       y + j0 * d,
+                       &di,
+                       x + i0 * d,
+                       &di,
+                       &zero,
+                       ip_block.get(),
+                       &nyi);
             }
             res.add_results(j0, j1, ip_block.get());
         }
         res.end_multiple();
-        InterruptCallback::check ();
+        InterruptCallback::check();
     }
 }
 // distance correction is an operator that can be applied to transform
 // the distances
-template<class ResultHandler>
-void exhaustive_L2sqr_blas (
-        const float * x,
-        const float * y,
-        size_t d, size_t nx, size_t ny,
-        ResultHandler & res,
-        const float *y_norms = nullptr)
-{
+template <class ResultHandler>
+void exhaustive_L2sqr_blas(
+        const float* x,
+        const float* y,
+        size_t d,
+        size_t nx,
+        size_t ny,
+        ResultHandler& res,
+        const float* y_norms = nullptr) {
     // BLAS does not like empty matrices
-    if (nx == 0 || ny == 0) return;
+    if (nx == 0 || ny == 0)
+        return;
     /* block sizes */
     const size_t bs_x = distance_compute_blas_query_bs;
     const size_t bs_y = distance_compute_blas_database_bs;
     // const size_t bs_x = 16, bs_y = 16;
-    std::unique_ptr<float []> ip_block(new float[bs_x * bs_y]);
-    std::unique_ptr<float []> x_norms(new float[nx]);
-    std::unique_ptr<float []> del2;
+    std::unique_ptr<float[]> ip_block(new float[bs_x * bs_y]);
+    std::unique_ptr<float[]> x_norms(new float[nx]);
+    std::unique_ptr<float[]> del2;
-    fvec_norms_L2sqr (x_norms.get(), x, d, nx);
+    fvec_norms_L2sqr(x_norms.get(), x, d, nx);
     if (!y_norms) {
-        float *y_norms2 = new float[ny];
+        float* y_norms2 = new float[ny];
         del2.reset(y_norms2);
-        fvec_norms_L2sqr (y_norms2, y, d, ny);
+        fvec_norms_L2sqr(y_norms2, y, d, ny);
         y_norms = y_norms2;
     }
     for (size_t i0 = 0; i0 < nx; i0 += bs_x) {
         size_t i1 = i0 + bs_x;
-        if(i1 > nx) i1 = nx;
+        if (i1 > nx)
+            i1 = nx;
         res.begin_multiple(i0, i1);
         for (size_t j0 = 0; j0 < ny; j0 += bs_y) {
             size_t j1 = j0 + bs_y;
-            if (j1 > ny) j1 = ny;
+            if (j1 > ny)
+                j1 = ny;
             /* compute the actual dot products */
             {
                 float one = 1, zero = 0;
                 FINTEGER nyi = j1 - j0, nxi = i1 - i0, di = d;
-                sgemm_ ("Transpose", "Not transpose", &nyi, &nxi, &di, &one,
-                        y + j0 * d, &di,
-                        x + i0 * d, &di, &zero,
-                        ip_block.get(), &nyi);
+                sgemm_("Transpose",
+                       "Not transpose",
+                       &nyi,
+                       &nxi,
+                       &di,
+                       &one,
+                       y + j0 * d,
+                       &di,
+                       x + i0 * d,
+                       &di,
+                       &zero,
+                       ip_block.get(),
+                       &nyi);
             }
+#pragma omp parallel for
             for (int64_t i = i0; i < i1; i++) {
-                float *ip_line = ip_block.get() + (i - i0) * (j1 - j0);
+                float* ip_line = ip_block.get() + (i - i0) * (j1 - j0);
                 for (size_t j = j0; j < j1; j++) {
                     float ip = *ip_line;
@@ -296,7 +280,8 @@ void exhaustive_L2sqr_blas (
                     // negative values can occur for identical vectors
                     // due to roundoff errors
-                    if (dis < 0) dis = 0;
+                    if (dis < 0)
+                        dis = 0;
                     *ip_line = dis;
                     ip_line++;
@@ -305,18 +290,12 @@ void exhaustive_L2sqr_blas (
             res.add_results(j0, j1, ip_block.get());
         }
         res.end_multiple();
-        InterruptCallback::check ();
+        InterruptCallback::check();
     }
 }
 } // anonymous namespace
 /*******************************************************
  * KNN driver functions
  *******************************************************/
@@ -326,268 +305,275 @@ int distance_compute_blas_query_bs = 4096;
 int distance_compute_blas_database_bs = 1024;
 int distance_compute_min_k_reservoir = 100;
-void knn_inner_product (const float * x,
-        const float * y,
-        size_t d, size_t nx, size_t ny,
-        float_minheap_array_t * ha)
-{
+void knn_inner_product(
+        const float* x,
+        const float* y,
+        size_t d,
+        size_t nx,
+        size_t ny,
+        float_minheap_array_t* ha) {
     if (ha->k < distance_compute_min_k_reservoir) {
         HeapResultHandler<CMin<float, int64_t>> res(
-            ha->nh, ha->val, ha->ids, ha->k);
+                ha->nh, ha->val, ha->ids, ha->k);
         if (nx < distance_compute_blas_threshold) {
-            exhaustive_inner_product_seq (x, y, d, nx, ny, res);
+            exhaustive_inner_product_seq(x, y, d, nx, ny, res);
         } else {
-            exhaustive_inner_product_blas (x, y, d, nx, ny, res);
+            exhaustive_inner_product_blas(x, y, d, nx, ny, res);
         }
     } else {
         ReservoirResultHandler<CMin<float, int64_t>> res(
-            ha->nh, ha->val, ha->ids, ha->k);
+                ha->nh, ha->val, ha->ids, ha->k);
         if (nx < distance_compute_blas_threshold) {
-            exhaustive_inner_product_seq (x, y, d, nx, ny, res);
+            exhaustive_inner_product_seq(x, y, d, nx, ny, res);
         } else {
-            exhaustive_inner_product_blas (x, y, d, nx, ny, res);
+            exhaustive_inner_product_blas(x, y, d, nx, ny, res);
         }
     }
 }
-void knn_L2sqr (
-        const float * x,
-        const float * y,
-        size_t d, size_t nx, size_t ny,
-        float_maxheap_array_t * ha,
-        const float *y_norm2
-) {
+void knn_L2sqr(
+        const float* x,
+        const float* y,
+        size_t d,
+        size_t nx,
+        size_t ny,
+        float_maxheap_array_t* ha,
+        const float* y_norm2) {
     if (ha->k < distance_compute_min_k_reservoir) {
         HeapResultHandler<CMax<float, int64_t>> res(
-            ha->nh, ha->val, ha->ids, ha->k);
+                ha->nh, ha->val, ha->ids, ha->k);
         if (nx < distance_compute_blas_threshold) {
-            exhaustive_L2sqr_seq (x, y, d, nx, ny, res);
+            exhaustive_L2sqr_seq(x, y, d, nx, ny, res);
         } else {
-            exhaustive_L2sqr_blas (x, y, d, nx, ny, res, y_norm2);
+            exhaustive_L2sqr_blas(x, y, d, nx, ny, res, y_norm2);
         }
     } else {
         ReservoirResultHandler<CMax<float, int64_t>> res(
-            ha->nh, ha->val, ha->ids, ha->k);
+                ha->nh, ha->val, ha->ids, ha->k);
         if (nx < distance_compute_blas_threshold) {
-            exhaustive_L2sqr_seq (x, y, d, nx, ny, res);
+            exhaustive_L2sqr_seq(x, y, d, nx, ny, res);
         } else {
-            exhaustive_L2sqr_blas (x, y, d, nx, ny, res, y_norm2);
+            exhaustive_L2sqr_blas(x, y, d, nx, ny, res, y_norm2);
         }
     }
 }
 /***************************************************************************
  * Range search
  ***************************************************************************/
-void range_search_L2sqr (
-        const float * x,
-        const float * y,
-        size_t d, size_t nx, size_t ny,
+void range_search_L2sqr(
+        const float* x,
+        const float* y,
+        size_t d,
+        size_t nx,
+        size_t ny,
         float radius,
-        RangeSearchResult *res)
-{
+        RangeSearchResult* res) {
     RangeSearchResultHandler<CMax<float, int64_t>> resh(res, radius);
     if (nx < distance_compute_blas_threshold) {
-        exhaustive_L2sqr_seq (x, y, d, nx, ny, resh);
+        exhaustive_L2sqr_seq(x, y, d, nx, ny, resh);
     } else {
-        exhaustive_L2sqr_blas (x, y, d, nx, ny, resh);
+        exhaustive_L2sqr_blas(x, y, d, nx, ny, resh);
     }
 }
-void range_search_inner_product (
-        const float * x,
-        const float * y,
-        size_t d, size_t nx, size_t ny,
+void range_search_inner_product(
+        const float* x,
+        const float* y,
+        size_t d,
+        size_t nx,
+        size_t ny,
         float radius,
-        RangeSearchResult *res)
-{
+        RangeSearchResult* res) {
     RangeSearchResultHandler<CMin<float, int64_t>> resh(res, radius);
     if (nx < distance_compute_blas_threshold) {
-        exhaustive_inner_product_seq (x, y, d, nx, ny, resh);
+        exhaustive_inner_product_seq(x, y, d, nx, ny, resh);
     } else {
-        exhaustive_inner_product_blas (x, y, d, nx, ny, resh);
+        exhaustive_inner_product_blas(x, y, d, nx, ny, resh);
     }
 }
 /***************************************************************************
  * compute a subset of  distances
  ***************************************************************************/
 /* compute the inner product between x and a subset y of ny vectors,
    whose indices are given by idy.  */
-void fvec_inner_products_by_idx (float * __restrict ip,
-                                 const float * x,
-                                 const float * y,
-                                 const int64_t * __restrict ids, /* for y vecs */
-                                 size_t d, size_t nx, size_t ny)
-{
+void fvec_inner_products_by_idx(
+        float* __restrict ip,
+        const float* x,
+        const float* y,
+        const int64_t* __restrict ids, /* for y vecs */
+        size_t d,
+        size_t nx,
+        size_t ny) {
 #pragma omp parallel for
     for (int64_t j = 0; j < nx; j++) {
-        const int64_t * __restrict idsj = ids + j * ny;
-        const float * xj = x + j * d;
-        float * __restrict ipj = ip + j * ny;
+        const int64_t* __restrict idsj = ids + j * ny;
+        const float* xj = x + j * d;
+        float* __restrict ipj = ip + j * ny;
         for (size_t i = 0; i < ny; i++) {
             if (idsj[i] < 0)
                 continue;
-            ipj[i] = fvec_inner_product (xj, y + d * idsj[i], d);
+            ipj[i] = fvec_inner_product(xj, y + d * idsj[i], d);
         }
     }
 }
 /* compute the inner product between x and a subset y of ny vectors,
    whose indices are given by idy.  */
-void fvec_L2sqr_by_idx (float * __restrict dis,
-                        const float * x,
-                        const float * y,
-                        const int64_t * __restrict ids, /* ids of y vecs */
-                        size_t d, size_t nx, size_t ny)
-{
+void fvec_L2sqr_by_idx(
+        float* __restrict dis,
+        const float* x,
+        const float* y,
+        const int64_t* __restrict ids, /* ids of y vecs */
+        size_t d,
+        size_t nx,
+        size_t ny) {
 #pragma omp parallel for
     for (int64_t j = 0; j < nx; j++) {
-        const int64_t * __restrict idsj = ids + j * ny;
-        const float * xj = x + j * d;
-        float * __restrict disj = dis + j * ny;
+        const int64_t* __restrict idsj = ids + j * ny;
+        const float* xj = x + j * d;
+        float* __restrict disj = dis + j * ny;
         for (size_t i = 0; i < ny; i++) {
             if (idsj[i] < 0)
                 continue;
-            disj[i] = fvec_L2sqr (xj, y + d * idsj[i], d);
+            disj[i] = fvec_L2sqr(xj, y + d * idsj[i], d);
         }
     }
 }
-void pairwise_indexed_L2sqr (
-        size_t d, size_t n,
-        const float * x, const int64_t *ix,
-        const float * y, const int64_t *iy,
-        float *dis)
-{
+void pairwise_indexed_L2sqr(
+        size_t d,
+        size_t n,
+        const float* x,
+        const int64_t* ix,
+        const float* y,
+        const int64_t* iy,
+        float* dis) {
 #pragma omp parallel for
     for (int64_t j = 0; j < n; j++) {
         if (ix[j] >= 0 && iy[j] >= 0) {
-            dis[j] = fvec_L2sqr (x + d * ix[j], y + d * iy[j], d);
+            dis[j] = fvec_L2sqr(x + d * ix[j], y + d * iy[j], d);
         }
     }
 }
-void pairwise_indexed_inner_product (
-        size_t d, size_t n,
-        const float * x, const int64_t *ix,
-        const float * y, const int64_t *iy,
-        float *dis)
-{
+void pairwise_indexed_inner_product(
+        size_t d,
+        size_t n,
+        const float* x,
+        const int64_t* ix,
+        const float* y,
+        const int64_t* iy,
+        float* dis) {
 #pragma omp parallel for
     for (int64_t j = 0; j < n; j++) {
         if (ix[j] >= 0 && iy[j] >= 0) {
-            dis[j] = fvec_inner_product (x + d * ix[j], y + d * iy[j], d);
+            dis[j] = fvec_inner_product(x + d * ix[j], y + d * iy[j], d);
         }
     }
 }
 /* Find the nearest neighbors for nx queries in a set of ny vectors
    indexed by ids. May be useful for re-ranking a pre-selected vector list */
-void knn_inner_products_by_idx (const float * x,
-                                const float * y,
-                                const int64_t * ids,
-                                size_t d, size_t nx, size_t ny,
-                                float_minheap_array_t * res)
-{
+void knn_inner_products_by_idx(
+        const float* x,
+        const float* y,
+        const int64_t* ids,
+        size_t d,
+        size_t nx,
+        size_t ny,
+        float_minheap_array_t* res) {
     size_t k = res->k;
 #pragma omp parallel for
     for (int64_t i = 0; i < nx; i++) {
-        const float * x_ = x + i * d;
-        const int64_t * idsi = ids + i * ny;
+        const float* x_ = x + i * d;
+        const int64_t* idsi = ids + i * ny;
         size_t j;
-        float * __restrict simi = res->get_val(i);
-        int64_t * __restrict idxi = res->get_ids (i);
-        minheap_heapify (k, simi, idxi);
+        float* __restrict simi = res->get_val(i);
+        int64_t* __restrict idxi = res->get_ids(i);
+        minheap_heapify(k, simi, idxi);
         for (j = 0; j < ny; j++) {
-            if (idsi[j] < 0) break;
-            float ip = fvec_inner_product (x_, y + d * idsi[j], d);
+            if (idsi[j] < 0)
+                break;
+            float ip = fvec_inner_product(x_, y + d * idsi[j], d);
             if (ip > simi[0]) {
-                minheap_replace_top (k, simi, idxi, ip, idsi[j]);
+                minheap_replace_top(k, simi, idxi, ip, idsi[j]);
             }
         }
-        minheap_reorder (k, simi, idxi);
+        minheap_reorder(k, simi, idxi);
     }
 }
-void knn_L2sqr_by_idx (const float * x,
-                       const float * y,
-                       const int64_t * __restrict ids,
-                       size_t d, size_t nx, size_t ny,
-                       float_maxheap_array_t * res)
-{
+void knn_L2sqr_by_idx(
+        const float* x,
+        const float* y,
+        const int64_t* __restrict ids,
+        size_t d,
+        size_t nx,
+        size_t ny,
+        float_maxheap_array_t* res) {
     size_t k = res->k;
 #pragma omp parallel for
     for (int64_t i = 0; i < nx; i++) {
-        const float * x_ = x + i * d;
-        const int64_t * __restrict idsi = ids + i * ny;
-        float * __restrict simi = res->get_val(i);
-        int64_t * __restrict idxi = res->get_ids (i);
-        maxheap_heapify (res->k, simi, idxi);
+        const float* x_ = x + i * d;
+        const int64_t* __restrict idsi = ids + i * ny;
+        float* __restrict simi = res->get_val(i);
+        int64_t* __restrict idxi = res->get_ids(i);
+        maxheap_heapify(res->k, simi, idxi);
         for (size_t j = 0; j < ny; j++) {
-            float disij = fvec_L2sqr (x_, y + d * idsi[j], d);
+            float disij = fvec_L2sqr(x_, y + d * idsi[j], d);
             if (disij < simi[0]) {
-                maxheap_replace_top (k, simi, idxi, disij, idsi[j]);
+                maxheap_replace_top(k, simi, idxi, disij, idsi[j]);
             }
         }
-        maxheap_reorder (res->k, simi, idxi);
+        maxheap_reorder(res->k, simi, idxi);
     }
 }
-void pairwise_L2sqr (int64_t d,
-                     int64_t nq, const float *xq,
-                     int64_t nb, const float *xb,
-                     float *dis,
-                     int64_t ldq, int64_t ldb, int64_t ldd)
-{
-    if (nq == 0 || nb == 0) return;
-    if (ldq == -1) ldq = d;
-    if (ldb == -1) ldb = d;
-    if (ldd == -1) ldd = nb;
+void pairwise_L2sqr(
+        int64_t d,
+        int64_t nq,
+        const float* xq,
+        int64_t nb,
+        const float* xb,
+        float* dis,
+        int64_t ldq,
+        int64_t ldb,
+        int64_t ldd) {
+    if (nq == 0 || nb == 0)
+        return;
+    if (ldq == -1)
+        ldq = d;
+    if (ldb == -1)
+        ldb = d;
+    if (ldd == -1)
+        ldd = nb;
     // store in beginning of distance matrix to avoid malloc
-    float *b_norms = dis;
+    float* b_norms = dis;
 #pragma omp parallel for
     for (int64_t i = 0; i < nb; i++)
-        b_norms [i] = fvec_norm_L2sqr (xb + i * ldb, d);
+        b_norms[i] = fvec_norm_L2sqr(xb + i * ldb, d);
 #pragma omp parallel for
     for (int64_t i = 1; i < nq; i++) {
-        float q_norm = fvec_norm_L2sqr (xq + i * ldq, d);
+        float q_norm = fvec_norm_L2sqr(xq + i * ldq, d);
         for (int64_t j = 0; j < nb; j++)
-            dis[i * ldd + j] = q_norm + b_norms [j];
+            dis[i * ldd + j] = q_norm + b_norms[j];
     }
     {
-        float q_norm = fvec_norm_L2sqr (xq, d);
+        float q_norm = fvec_norm_L2sqr(xq, d);
         for (int64_t j = 0; j < nb; j++)
             dis[j] += q_norm;
     }
@@ -596,22 +582,28 @@ void pairwise_L2sqr (int64_t d,
         FINTEGER nbi = nb, nqi = nq, di = d, ldqi = ldq, ldbi = ldb, lddi = ldd;
         float one = 1.0, minus_2 = -2.0;
-        sgemm_ ("Transposed", "Not transposed",
-                &nbi, &nqi, &di,
-                &minus_2,
-                xb, &ldbi,
-                xq, &ldqi,
-                &one, dis, &lddi);
+        sgemm_("Transposed",
+               "Not transposed",
+               &nbi,
+               &nqi,
+               &di,
+               &minus_2,
+               xb,
+               &ldbi,
+               xq,
+               &ldqi,
+               &one,
+               dis,
+               &lddi);
     }
 }
-void inner_product_to_L2sqr(float* __restrict dis,
-    const float* nr1,
-    const float* nr2,
-    size_t n1, size_t n2)
-{
+void inner_product_to_L2sqr(
+        float* __restrict dis,
+        const float* nr1,
+        const float* nr2,
+        size_t n1,
+        size_t n2) {
 #pragma omp parallel for
     for (int64_t j = 0; j < n1; j++) {
         float* disj = dis + j * n2;
@@ -620,5 +612,4 @@ void inner_product_to_L2sqr(float* __restrict dis,
     }
 }
 } // namespace faiss