RubyGems - faiss - Versions diffs - 0.2.0 → 0.2.4 - Mend

faiss 0.2.0 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (215) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +16 -0
data/LICENSE.txt +1 -1
data/README.md +7 -7
data/ext/faiss/extconf.rb +6 -3
data/ext/faiss/numo.hpp +4 -4
data/ext/faiss/utils.cpp +1 -1
data/ext/faiss/utils.h +1 -1
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +292 -291
data/vendor/faiss/faiss/AutoTune.h +55 -56
data/vendor/faiss/faiss/Clustering.cpp +365 -194
data/vendor/faiss/faiss/Clustering.h +102 -35
data/vendor/faiss/faiss/IVFlib.cpp +171 -195
data/vendor/faiss/faiss/IVFlib.h +48 -51
data/vendor/faiss/faiss/Index.cpp +85 -103
data/vendor/faiss/faiss/Index.h +54 -48
data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
data/vendor/faiss/faiss/Index2Layer.h +22 -36
data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
data/vendor/faiss/faiss/IndexBinary.h +140 -132
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
data/vendor/faiss/faiss/IndexFlat.h +42 -59
data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
data/vendor/faiss/faiss/IndexHNSW.h +57 -41
data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
data/vendor/faiss/faiss/IndexIVF.h +169 -118
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
data/vendor/faiss/faiss/IndexLSH.h +20 -38
data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
data/vendor/faiss/faiss/IndexLattice.h +11 -16
data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
data/vendor/faiss/faiss/IndexNSG.h +85 -0
data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
data/vendor/faiss/faiss/IndexPQ.h +64 -82
data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
data/vendor/faiss/faiss/IndexRefine.h +32 -23
data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
data/vendor/faiss/faiss/IndexReplicas.h +62 -56
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
data/vendor/faiss/faiss/IndexShards.cpp +256 -240
data/vendor/faiss/faiss/IndexShards.h +85 -73
data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
data/vendor/faiss/faiss/MatrixStats.h +7 -10
data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
data/vendor/faiss/faiss/MetaIndexes.h +40 -34
data/vendor/faiss/faiss/MetricType.h +7 -7
data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
data/vendor/faiss/faiss/VectorTransform.h +64 -89
data/vendor/faiss/faiss/clone_index.cpp +78 -73
data/vendor/faiss/faiss/clone_index.h +4 -9
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
data/vendor/faiss/faiss/impl/FaissException.h +41 -29
data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
data/vendor/faiss/faiss/impl/HNSW.h +179 -200
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
data/vendor/faiss/faiss/impl/NSG.h +199 -0
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
data/vendor/faiss/faiss/impl/io.cpp +76 -95
data/vendor/faiss/faiss/impl/io.h +31 -41
data/vendor/faiss/faiss/impl/io_macros.h +60 -29
data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
data/vendor/faiss/faiss/index_factory.cpp +619 -397
data/vendor/faiss/faiss/index_factory.h +8 -6
data/vendor/faiss/faiss/index_io.h +23 -26
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
data/vendor/faiss/faiss/utils/Heap.h +186 -209
data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
data/vendor/faiss/faiss/utils/distances.cpp +305 -312
data/vendor/faiss/faiss/utils/distances.h +170 -122
data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
data/vendor/faiss/faiss/utils/hamming.h +62 -85
data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
data/vendor/faiss/faiss/utils/partitioning.h +26 -21
data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
data/vendor/faiss/faiss/utils/random.cpp +39 -63
data/vendor/faiss/faiss/utils/random.h +13 -16
data/vendor/faiss/faiss/utils/simdlib.h +4 -2
data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
data/vendor/faiss/faiss/utils/utils.cpp +304 -287
data/vendor/faiss/faiss/utils/utils.h +54 -49
metadata +29 -4

data/vendor/faiss/faiss/impl/ResidualQuantizer.h ADDED Viewed

@@ -0,0 +1,188 @@
+/**
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <cstdint>
+#include <vector>
+#include <faiss/Clustering.h>
+#include <faiss/impl/AdditiveQuantizer.h>
+namespace faiss {
+/** Residual quantizer with variable number of bits per sub-quantizer
+ *
+ * The residual centroids are stored in a big cumulative centroid table.
+ * The codes are represented either as a non-compact table of size (n, M) or
+ * as the compact output (n, code_size).
+ */
+struct ResidualQuantizer : AdditiveQuantizer {
+    /// initialization
+    enum train_type_t {
+        Train_default = 0,         ///< regular k-means
+        Train_progressive_dim = 1, ///< progressive dim clustering
+        Train_default_Train_top_beam = 1024,
+        Train_progressive_dim_Train_top_beam = 1025,
+        Train_default_Skip_codebook_tables = 2048,
+        Train_progressive_dim_Skip_codebook_tables = 2049,
+        Train_default_Train_top_beam_Skip_codebook_tables = 3072,
+        Train_progressive_dim_Train_top_beam_Skip_codebook_tables = 3073,
+    };
+    train_type_t train_type;
+    // set this bit on train_type if beam is to be trained only on the
+    // first element of the beam (faster but less accurate)
+    static const int Train_top_beam = 1024;
+    // set this bit to not autmatically compute the codebook tables
+    // after training
+    static const int Skip_codebook_tables = 2048;
+    /// beam size used for training and for encoding
+    int max_beam_size;
+    /// use LUT for beam search
+    int use_beam_LUT;
+    /// distance matrixes with beam search can get large, so use this
+    /// to batch computations at encoding time.
+    size_t max_mem_distances;
+    /// clustering parameters
+    ProgressiveDimClusteringParameters cp;
+    /// if non-NULL, use this index for assignment
+    ProgressiveDimIndexFactory* assign_index_factory;
+    ResidualQuantizer(
+            size_t d,
+            const std::vector<size_t>& nbits,
+            Search_type_t search_type = ST_decompress);
+    ResidualQuantizer(
+            size_t d,     /* dimensionality of the input vectors */
+            size_t M,     /* number of subquantizers */
+            size_t nbits, /* number of bit per subvector index */
+            Search_type_t search_type = ST_decompress);
+    ResidualQuantizer();
+    // Train the residual quantizer
+    void train(size_t n, const float* x) override;
+    /** Encode a set of vectors
+     *
+     * @param x      vectors to encode, size n * d
+     * @param codes  output codes, size n * code_size
+     */
+    void compute_codes(const float* x, uint8_t* codes, size_t n) const override;
+    /** lower-level encode function
+     *
+     * @param n              number of vectors to hanlde
+     * @param residuals      vectors to encode, size (n, beam_size, d)
+     * @param beam_size      input beam size
+     * @param new_beam_size  output beam size (should be <= K * beam_size)
+     * @param new_codes      output codes, size (n, new_beam_size, m + 1)
+     * @param new_residuals  output residuals, size (n, new_beam_size, d)
+     * @param new_distances  output distances, size (n, new_beam_size)
+     */
+    void refine_beam(
+            size_t n,
+            size_t beam_size,
+            const float* residuals,
+            int new_beam_size,
+            int32_t* new_codes,
+            float* new_residuals = nullptr,
+            float* new_distances = nullptr) const;
+    void refine_beam_LUT(
+            size_t n,
+            const float* query_norms,
+            const float* query_cp,
+            int new_beam_size,
+            int32_t* new_codes,
+            float* new_distances = nullptr) const;
+    /** Beam search can consume a lot of memory. This function estimates the
+     * amount of mem used by refine_beam to adjust the batch size
+     *
+     * @param beam_size  if != -1, override the beam size
+     */
+    size_t memory_per_point(int beam_size = -1) const;
+    /** Cross products used in codebook tables
+     *
+     * These are used to keep trak of norms of centroids.
+     */
+    void compute_codebook_tables();
+    /// dot products of all codebook vectors with each other
+    /// size total_codebook_size * total_codebook_size
+    std::vector<float> codebook_cross_products;
+    /// norms of all vectors
+    std::vector<float> cent_norms;
+};
+/** Encode a residual by sampling from a centroid table.
+ *
+ * This is a single encoding step the residual quantizer.
+ * It allows low-level access to the encoding function, exposed mainly for unit
+ * tests.
+ *
+ * @param n              number of vectors to hanlde
+ * @param residuals      vectors to encode, size (n, beam_size, d)
+ * @param cent           centroids, size (K, d)
+ * @param beam_size      input beam size
+ * @param m              size of the codes for the previous encoding steps
+ * @param codes          code array for the previous steps of the beam (n,
+ * beam_size, m)
+ * @param new_beam_size  output beam size (should be <= K * beam_size)
+ * @param new_codes      output codes, size (n, new_beam_size, m + 1)
+ * @param new_residuals  output residuals, size (n, new_beam_size, d)
+ * @param new_distances  output distances, size (n, new_beam_size)
+ * @param assign_index   if non-NULL, will be used to perform assignment
+ */
+void beam_search_encode_step(
+        size_t d,
+        size_t K,
+        const float* cent,
+        size_t n,
+        size_t beam_size,
+        const float* residuals,
+        size_t m,
+        const int32_t* codes,
+        size_t new_beam_size,
+        int32_t* new_codes,
+        float* new_residuals,
+        float* new_distances,
+        Index* assign_index = nullptr);
+/** Encode a set of vectors using their dot products with the codebooks
+ *
+ */
+void beam_search_encode_step_tab(
+        size_t K,
+        size_t n,
+        size_t beam_size,                  // input sizes
+        const float* codebook_cross_norms, // size K * ldc
+        size_t ldc,                        // >= K
+        const uint64_t* codebook_offsets,  // m
+        const float* query_cp,             // size n * ldqc
+        size_t ldqc,                       // >= K
+        const float* cent_norms_i,         // size K
+        size_t m,
+        const int32_t* codes,   // n * beam_size * m
+        const float* distances, // n * beam_size
+        size_t new_beam_size,
+        int32_t* new_codes,    // n * new_beam_size * (m + 1)
+        float* new_distances); // n * new_beam_size
+}; // namespace faiss

data/vendor/faiss/faiss/impl/ResultHandler.h CHANGED Viewed

@@ -5,49 +5,38 @@
  * LICENSE file in the root directory of this source tree.
  */
 /*
  * Structures that collect search results from distance computations
  */
 #pragma once
+#include <faiss/impl/AuxIndexStructures.h>
 #include <faiss/utils/Heap.h>
 #include <faiss/utils/partitioning.h>
-#include <faiss/impl/AuxIndexStructures.h>
 namespace faiss {
 /*****************************************************************
  * Heap based result handler
  *****************************************************************/
-template<class C>
+template <class C>
 struct HeapResultHandler {
     using T = typename C::T;
     using TI = typename C::TI;
     int nq;
-    T *heap_dis_tab;
-    TI *heap_ids_tab;
+    T* heap_dis_tab;
+    TI* heap_ids_tab;
-    int64_t k;  // number of results to keep
+    int64_t k; // number of results to keep
-    HeapResultHandler(
-        size_t nq,
-        T * heap_dis_tab, TI * heap_ids_tab,
-        size_t k):
-        nq(nq),
-        heap_dis_tab(heap_dis_tab), heap_ids_tab(heap_ids_tab), k(k)
-    {
-    }
+    HeapResultHandler(size_t nq, T* heap_dis_tab, TI* heap_ids_tab, size_t k)
+            : nq(nq),
+              heap_dis_tab(heap_dis_tab),
+              heap_ids_tab(heap_ids_tab),
+              k(k) {}
     /******************************************************
      * API for 1 result at a time (each SingleResultHandler is
@@ -55,20 +44,20 @@ struct HeapResultHandler {
      */
     struct SingleResultHandler {
-        HeapResultHandler & hr;
+        HeapResultHandler& hr;
         size_t k;
-        T *heap_dis;
-        TI *heap_ids;
+        T* heap_dis;
+        TI* heap_ids;
         T thresh;
-        SingleResultHandler(HeapResultHandler &hr): hr(hr), k(hr.k) {}
+        SingleResultHandler(HeapResultHandler& hr) : hr(hr), k(hr.k) {}
         /// begin results for query # i
         void begin(size_t i) {
             heap_dis = hr.heap_dis_tab + i * k;
             heap_ids = hr.heap_ids_tab + i * k;
-            heap_heapify<C> (k, heap_dis, heap_ids);
+            heap_heapify<C>(k, heap_dis, heap_ids);
             thresh = heap_dis[0];
         }
@@ -82,11 +71,10 @@ struct HeapResultHandler {
         /// series of results for query i is done
         void end() {
-            heap_reorder<C> (k, heap_dis, heap_ids);
+            heap_reorder<C>(k, heap_dis, heap_ids);
         }
     };
     /******************************************************
      * API for multiple results (called from 1 thread)
      */
@@ -97,20 +85,21 @@ struct HeapResultHandler {
     void begin_multiple(size_t i0, size_t i1) {
         this->i0 = i0;
         this->i1 = i1;
-        for(size_t i = i0; i < i1; i++) {
-            heap_heapify<C> (k, heap_dis_tab + i * k, heap_ids_tab + i * k);
+        for (size_t i = i0; i < i1; i++) {
+            heap_heapify<C>(k, heap_dis_tab + i * k, heap_ids_tab + i * k);
         }
     }
     /// add results for query i0..i1 and j0..j1
-    void add_results(size_t j0, size_t j1, const T *dis_tab) {
-        // maybe parallel for
-        for (size_t i = i0; i < i1; i++) {
-            T * heap_dis = heap_dis_tab + i * k;
-            TI * heap_ids = heap_ids_tab + i * k;
+    void add_results(size_t j0, size_t j1, const T* dis_tab) {
+#pragma omp parallel for
+        for (int64_t i = i0; i < i1; i++) {
+            T* heap_dis = heap_dis_tab + i * k;
+            TI* heap_ids = heap_ids_tab + i * k;
+            const T* dis_tab_i = dis_tab + (j1 - j0) * (i - i0) - j0;
             T thresh = heap_dis[0];
             for (size_t j = j0; j < j1; j++) {
-                T dis = *dis_tab++;
+                T dis = dis_tab_i[j];
                 if (C::cmp(thresh, dis)) {
                     heap_replace_top<C>(k, heap_dis, heap_ids, dis, j);
                     thresh = heap_dis[0];
@@ -122,11 +111,10 @@ struct HeapResultHandler {
     /// series of results for queries i0..i1 is done
     void end_multiple() {
         // maybe parallel for
-        for(size_t i = i0; i < i1; i++) {
-            heap_reorder<C> (k, heap_dis_tab + i * k, heap_ids_tab + i * k);
+        for (size_t i = i0; i < i1; i++) {
+            heap_reorder<C>(k, heap_dis_tab + i * k, heap_ids_tab + i * k);
         }
     }
 };
 /*****************************************************************
@@ -138,31 +126,25 @@ struct HeapResultHandler {
  * distance array.
  *****************************************************************/
 /// Reservoir for a single query
-template<class C>
+template <class C>
 struct ReservoirTopN {
     using T = typename C::T;
     using TI = typename C::TI;
-    T *vals;
-    TI *ids;
+    T* vals;
+    TI* ids;
-    size_t i; // number of stored elements
-    size_t n; // number of requested elements
-    size_t capacity;  // size of storage
+    size_t i;        // number of stored elements
+    size_t n;        // number of requested elements
+    size_t capacity; // size of storage
     T threshold; // current threshold
     ReservoirTopN() {}
-    ReservoirTopN(
-        size_t n, size_t capacity,
-        T *vals, TI *ids
-    ):
-        vals(vals), ids(ids),
-        i(0), n(n), capacity(capacity) {
+    ReservoirTopN(size_t n, size_t capacity, T* vals, TI* ids)
+            : vals(vals), ids(ids), i(0), n(n), capacity(capacity) {
         assert(n < capacity);
         threshold = C::neutral();
     }
@@ -184,55 +166,47 @@ struct ReservoirTopN {
         assert(i == capacity);
         threshold = partition_fuzzy<C>(
-            vals, ids, capacity, n, (capacity + n) / 2,
-            &i);
+                vals, ids, capacity, n, (capacity + n) / 2, &i);
     }
-    void to_result(T *heap_dis, TI *heap_ids) const {
+    void to_result(T* heap_dis, TI* heap_ids) const {
         for (int j = 0; j < std::min(i, n); j++) {
-            heap_push<C>(
-                j + 1, heap_dis, heap_ids,
-                vals[j], ids[j]
-            );
+            heap_push<C>(j + 1, heap_dis, heap_ids, vals[j], ids[j]);
         }
         if (i < n) {
-            heap_reorder<C> (i, heap_dis, heap_ids);
+            heap_reorder<C>(i, heap_dis, heap_ids);
             // add empty results
-            heap_heapify<C> (n - i, heap_dis + i, heap_ids + i);
+            heap_heapify<C>(n - i, heap_dis + i, heap_ids + i);
         } else {
             // add remaining elements
-            heap_addn<C> (n, heap_dis, heap_ids, vals + n, ids + n, i - n);
-            heap_reorder<C> (n, heap_dis, heap_ids);
+            heap_addn<C>(n, heap_dis, heap_ids, vals + n, ids + n, i - n);
+            heap_reorder<C>(n, heap_dis, heap_ids);
         }
     }
 };
-template<class C>
+template <class C>
 struct ReservoirResultHandler {
     using T = typename C::T;
     using TI = typename C::TI;
     int nq;
-    T *heap_dis_tab;
-    TI *heap_ids_tab;
+    T* heap_dis_tab;
+    TI* heap_ids_tab;
-    int64_t k;  // number of results to keep
+    int64_t k;       // number of results to keep
     size_t capacity; // capacity of the reservoirs
     ReservoirResultHandler(
-        size_t nq,
-        T * heap_dis_tab, TI * heap_ids_tab,
-        size_t k):
-        nq(nq),
-        heap_dis_tab(heap_dis_tab), heap_ids_tab(heap_ids_tab), k(k)
-    {
+            size_t nq,
+            T* heap_dis_tab,
+            TI* heap_ids_tab,
+            size_t k)
+            : nq(nq),
+              heap_dis_tab(heap_dis_tab),
+              heap_ids_tab(heap_ids_tab),
+              k(k) {
         // double then round up to multiple of 16 (for SIMD alignment)
         capacity = (2 * k + 15) & ~15;
     }
@@ -243,23 +217,26 @@ struct ReservoirResultHandler {
      */
     struct SingleResultHandler {
-        ReservoirResultHandler & hr;
+        ReservoirResultHandler& hr;
         std::vector<T> reservoir_dis;
         std::vector<TI> reservoir_ids;
         ReservoirTopN<C> res1;
-        SingleResultHandler(ReservoirResultHandler &hr):
-            hr(hr), reservoir_dis(hr.capacity), reservoir_ids(hr.capacity)
-        {
-        }
+        SingleResultHandler(ReservoirResultHandler& hr)
+                : hr(hr),
+                  reservoir_dis(hr.capacity),
+                  reservoir_ids(hr.capacity) {}
         size_t i;
         /// begin results for query # i
         void begin(size_t i) {
             res1 = ReservoirTopN<C>(
-                hr.k, hr.capacity, reservoir_dis.data(), reservoir_ids.data());
+                    hr.k,
+                    hr.capacity,
+                    reservoir_dis.data(),
+                    reservoir_ids.data());
             this->i = i;
         }
@@ -270,8 +247,8 @@ struct ReservoirResultHandler {
         /// series of results for query i is done
         void end() {
-            T * heap_dis = hr.heap_dis_tab + i * hr.k;
-            TI * heap_ids = hr.heap_ids_tab + i * hr.k;
+            T* heap_dis = hr.heap_dis_tab + i * hr.k;
+            TI* heap_ids = hr.heap_ids_tab + i * hr.k;
             res1.to_result(heap_dis, heap_ids);
         }
     };
@@ -295,20 +272,22 @@ struct ReservoirResultHandler {
         reservoirs.clear();
         for (size_t i = i0; i < i1; i++) {
             reservoirs.emplace_back(
-                k, capacity,
-                reservoir_dis.data() + (i - i0) * capacity,
-                reservoir_ids.data() + (i - i0) * capacity
-            );
+                    k,
+                    capacity,
+                    reservoir_dis.data() + (i - i0) * capacity,
+                    reservoir_ids.data() + (i - i0) * capacity);
         }
     }
     /// add results for query i0..i1 and j0..j1
-    void add_results(size_t j0, size_t j1, const T *dis_tab) {
+    void add_results(size_t j0, size_t j1, const T* dis_tab) {
         // maybe parallel for
-        for (size_t i = i0; i < i1; i++) {
-            ReservoirTopN<C> & reservoir = reservoirs[i - i0];
+#pragma omp parallel for
+        for (int64_t i = i0; i < i1; i++) {
+            ReservoirTopN<C>& reservoir = reservoirs[i - i0];
+            const T* dis_tab_i = dis_tab + (j1 - j0) * (i - i0) - j0;
             for (size_t j = j0; j < j1; j++) {
-                T dis = *dis_tab++;
+                T dis = dis_tab_i[j];
                 reservoir.add(dis, j);
             }
         }
@@ -317,32 +296,27 @@ struct ReservoirResultHandler {
     /// series of results for queries i0..i1 is done
     void end_multiple() {
         // maybe parallel for
-        for(size_t i = i0; i < i1; i++) {
+        for (size_t i = i0; i < i1; i++) {
             reservoirs[i - i0].to_result(
-                heap_dis_tab + i * k, heap_ids_tab + i * k);
+                    heap_dis_tab + i * k, heap_ids_tab + i * k);
         }
     }
 };
 /*****************************************************************
  * Result handler for range searches
  *****************************************************************/
-template<class C>
+template <class C>
 struct RangeSearchResultHandler {
     using T = typename C::T;
     using TI = typename C::TI;
-    RangeSearchResult *res;
+    RangeSearchResult* res;
     float radius;
-    RangeSearchResultHandler(RangeSearchResult *res, float radius):
-        res(res), radius(radius)
-    {}
+    RangeSearchResultHandler(RangeSearchResult* res, float radius)
+            : res(res), radius(radius) {}
     /******************************************************
      * API for 1 result at a time (each SingleResultHandler is
@@ -353,11 +327,10 @@ struct RangeSearchResultHandler {
         // almost the same interface as RangeSearchResultHandler
         RangeSearchPartialResult pres;
         float radius;
-        RangeQueryResult *qr = nullptr;
+        RangeQueryResult* qr = nullptr;
-        SingleResultHandler(RangeSearchResultHandler &rh):
-            pres(rh.res), radius(rh.radius)
-        {}
+        SingleResultHandler(RangeSearchResultHandler& rh)
+                : pres(rh.res), radius(rh.radius) {}
         /// begin results for query # i
         void begin(size_t i) {
@@ -366,15 +339,13 @@ struct RangeSearchResultHandler {
         /// add one result for query i
         void add_result(T dis, TI idx) {
             if (C::cmp(radius, dis)) {
                 qr->add(dis, idx);
             }
         }
         /// series of results for query i is done
-        void end() {
-        }
+        void end() {}
         ~SingleResultHandler() {
             pres.finalize();
@@ -387,8 +358,8 @@ struct RangeSearchResultHandler {
     size_t i0, i1;
-    std::vector <RangeSearchPartialResult *> partial_results;
-    std::vector <size_t> j0s;
+    std::vector<RangeSearchPartialResult*> partial_results;
+    std::vector<size_t> j0s;
     int pr = 0;
     /// begin
@@ -399,8 +370,8 @@ struct RangeSearchResultHandler {
     /// add results for query i0..i1 and j0..j1
-    void add_results(size_t j0, size_t j1, const T *dis_tab) {
-        RangeSearchPartialResult *pres;
+    void add_results(size_t j0, size_t j1, const T* dis_tab) {
+        RangeSearchPartialResult* pres;
         // there is one RangeSearchPartialResult structure per j0
         // (= block of columns of the large distance matrix)
         // it is a bit tricky to find the poper PartialResult structure
@@ -414,39 +385,32 @@ struct RangeSearchResultHandler {
             pres = partial_results[pr];
             pr++;
         } else { // did not find this j0
-            pres = new RangeSearchPartialResult (res);
+            pres = new RangeSearchPartialResult(res);
             partial_results.push_back(pres);
             j0s.push_back(j0);
             pr = partial_results.size();
         }
         for (size_t i = i0; i < i1; i++) {
-            const float *ip_line = dis_tab + (i - i0) * (j1 - j0);
-            RangeQueryResult & qres = pres->new_result (i);
+            const float* ip_line = dis_tab + (i - i0) * (j1 - j0);
+            RangeQueryResult& qres = pres->new_result(i);
             for (size_t j = j0; j < j1; j++) {
                 float dis = *ip_line++;
                 if (C::cmp(radius, dis)) {
-                    qres.add (dis, j);
+                    qres.add(dis, j);
                 }
             }
         }
     }
-    void end_multiple() {
-    }
+    void end_multiple() {}
     ~RangeSearchResultHandler() {
         if (partial_results.size() > 0) {
-            RangeSearchPartialResult::merge (partial_results);
+            RangeSearchPartialResult::merge(partial_results);
         }
     }
 };
-}  // namespace faiss
+} // namespace faiss