RubyGems - faiss - Versions diffs - 0.2.0 → 0.2.1 - Mend

faiss 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (202) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +292 -291
data/vendor/faiss/faiss/AutoTune.h +55 -56
data/vendor/faiss/faiss/Clustering.cpp +334 -195
data/vendor/faiss/faiss/Clustering.h +88 -35
data/vendor/faiss/faiss/IVFlib.cpp +171 -195
data/vendor/faiss/faiss/IVFlib.h +48 -51
data/vendor/faiss/faiss/Index.cpp +85 -103
data/vendor/faiss/faiss/Index.h +54 -48
data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
data/vendor/faiss/faiss/Index2Layer.h +22 -22
data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
data/vendor/faiss/faiss/IndexBinary.h +140 -132
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
data/vendor/faiss/faiss/IndexFlat.h +35 -46
data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
data/vendor/faiss/faiss/IndexHNSW.h +57 -41
data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
data/vendor/faiss/faiss/IndexIVF.h +146 -113
data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
data/vendor/faiss/faiss/IndexLSH.h +21 -26
data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
data/vendor/faiss/faiss/IndexLattice.h +11 -16
data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
data/vendor/faiss/faiss/IndexNSG.h +85 -0
data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
data/vendor/faiss/faiss/IndexPQ.h +64 -67
data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
data/vendor/faiss/faiss/IndexRefine.h +22 -23
data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
data/vendor/faiss/faiss/IndexReplicas.h +62 -56
data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
data/vendor/faiss/faiss/IndexResidual.h +152 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
data/vendor/faiss/faiss/IndexShards.cpp +256 -240
data/vendor/faiss/faiss/IndexShards.h +85 -73
data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
data/vendor/faiss/faiss/MatrixStats.h +7 -10
data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
data/vendor/faiss/faiss/MetaIndexes.h +40 -34
data/vendor/faiss/faiss/MetricType.h +7 -7
data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
data/vendor/faiss/faiss/VectorTransform.h +61 -89
data/vendor/faiss/faiss/clone_index.cpp +77 -73
data/vendor/faiss/faiss/clone_index.h +4 -9
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
data/vendor/faiss/faiss/impl/FaissException.h +41 -29
data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
data/vendor/faiss/faiss/impl/HNSW.h +179 -200
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
data/vendor/faiss/faiss/impl/NSG.h +199 -0
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
data/vendor/faiss/faiss/impl/io.cpp +75 -94
data/vendor/faiss/faiss/impl/io.h +31 -41
data/vendor/faiss/faiss/impl/io_macros.h +40 -29
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
data/vendor/faiss/faiss/index_factory.cpp +269 -218
data/vendor/faiss/faiss/index_factory.h +6 -7
data/vendor/faiss/faiss/index_io.h +23 -26
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
data/vendor/faiss/faiss/utils/Heap.h +186 -209
data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
data/vendor/faiss/faiss/utils/distances.cpp +301 -310
data/vendor/faiss/faiss/utils/distances.h +133 -118
data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
data/vendor/faiss/faiss/utils/hamming.h +62 -85
data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
data/vendor/faiss/faiss/utils/partitioning.h +26 -21
data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
data/vendor/faiss/faiss/utils/random.cpp +39 -63
data/vendor/faiss/faiss/utils/random.h +13 -16
data/vendor/faiss/faiss/utils/simdlib.h +4 -2
data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
data/vendor/faiss/faiss/utils/utils.cpp +304 -287
data/vendor/faiss/faiss/utils/utils.h +53 -48
metadata +20 -2

data/vendor/faiss/faiss/IndexIVFPQ.h CHANGED Viewed

@@ -10,7 +10,6 @@
 #ifndef FAISS_INDEX_IVFPQ_H
 #define FAISS_INDEX_IVFPQ_H
 #include <vector>
 #include <faiss/IndexIVF.h>
@@ -20,32 +19,29 @@
 namespace faiss {
-struct IVFPQSearchParameters: IVFSearchParameters {
-    size_t scan_table_threshold;   ///< use table computation or on-the-fly?
-    int polysemous_ht;             ///< Hamming thresh for polysemous filtering
-    IVFPQSearchParameters (): scan_table_threshold(0), polysemous_ht(0) {}
-    ~IVFPQSearchParameters () {}
+struct IVFPQSearchParameters : IVFSearchParameters {
+    size_t scan_table_threshold; ///< use table computation or on-the-fly?
+    int polysemous_ht;           ///< Hamming thresh for polysemous filtering
+    IVFPQSearchParameters() : scan_table_threshold(0), polysemous_ht(0) {}
+    ~IVFPQSearchParameters() {}
 };
 FAISS_API extern size_t precomputed_table_max_bytes;
 /** Inverted file with Product Quantizer encoding. Each residual
  * vector is encoded as a product quantizer code.
  */
-struct IndexIVFPQ: IndexIVF {
-    bool by_residual;              ///< Encode residual or plain vector?
+struct IndexIVFPQ : IndexIVF {
+    bool by_residual; ///< Encode residual or plain vector?
-    ProductQuantizer pq;           ///< produces the codes
+    ProductQuantizer pq; ///< produces the codes
-    bool do_polysemous_training;   ///< reorder PQ centroids after training?
-    PolysemousTraining *polysemous_training; ///< if NULL, use default
+    bool do_polysemous_training; ///< reorder PQ centroids after training?
+    PolysemousTraining* polysemous_training; ///< if NULL, use default
     // search-time parameters
-    size_t scan_table_threshold;   ///< use table computation or on-the-fly?
-    int polysemous_ht;             ///< Hamming thresh for polysemous filtering
+    size_t scan_table_threshold; ///< use table computation or on-the-fly?
+    int polysemous_ht;           ///< Hamming thresh for polysemous filtering
     /** Precompute table that speed up query preprocessing at some
      * memory cost (used only for by_residual with L2 metric)
@@ -56,37 +52,47 @@ struct IndexIVFPQ: IndexIVF {
     /// size nlist * pq.M * pq.ksub
     AlignedTable<float> precomputed_table;
-    IndexIVFPQ (
-            Index * quantizer, size_t d, size_t nlist,
-            size_t M, size_t nbits_per_idx, MetricType metric = METRIC_L2);
-    void add_with_ids(idx_t n, const float* x, const idx_t* xids = nullptr)
-        override;
-    void encode_vectors(idx_t n, const float* x,
-                        const idx_t *list_nos,
-                        uint8_t * codes,
-                        bool include_listnos = false) const override;
-    void sa_decode (idx_t n, const uint8_t *bytes,
-                    float *x) const override;
+    IndexIVFPQ(
+            Index* quantizer,
+            size_t d,
+            size_t nlist,
+            size_t M,
+            size_t nbits_per_idx,
+            MetricType metric = METRIC_L2);
+    void encode_vectors(
+            idx_t n,
+            const float* x,
+            const idx_t* list_nos,
+            uint8_t* codes,
+            bool include_listnos = false) const override;
+    void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
+    void add_core(
+            idx_t n,
+            const float* x,
+            const idx_t* xids,
+            const idx_t* precomputed_idx) override;
     /// same as add_core, also:
     /// - output 2nd level residuals if residuals_2 != NULL
-    /// - use precomputed list numbers if precomputed_idx != NULL
-    void add_core_o (idx_t n, const float *x,
-                     const idx_t *xids, float *residuals_2,
-                     const idx_t *precomputed_idx = nullptr);
+    /// - accepts precomputed_idx = nullptr
+    void add_core_o(
+            idx_t n,
+            const float* x,
+            const idx_t* xids,
+            float* residuals_2,
+            const idx_t* precomputed_idx = nullptr);
     /// trains the product quantizer
     void train_residual(idx_t n, const float* x) override;
     /// same as train_residual, also output 2nd level residuals
-    void train_residual_o (idx_t n, const float *x, float *residuals_2);
+    void train_residual_o(idx_t n, const float* x, float* residuals_2);
-    void reconstruct_from_offset (int64_t list_no, int64_t offset,
-                                  float* recons) const override;
+    void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
+            const override;
     /** Find exact duplicates in the dataset.
      *
@@ -99,10 +105,10 @@ struct IndexIVFPQ: IndexIVF {
      *                duplicates (max size ntotal)
      * @return n      number of groups found
      */
-    size_t find_duplicates (idx_t *ids, size_t *lims) const;
+    size_t find_duplicates(idx_t* ids, size_t* lims) const;
     // map a vector to a binary code knowning the index
-    void encode (idx_t key, const float * x, uint8_t * code) const;
+    void encode(idx_t key, const float* x, uint8_t* code) const;
     /** Encode multiple vectors
      *
@@ -113,22 +119,27 @@ struct IndexIVFPQ: IndexIVF {
      * @param compute_keys  if false, assume keys are precomputed,
      *                      otherwise compute them
      */
-    void encode_multiple (size_t n, idx_t *keys,
-                          const float * x, uint8_t * codes,
-                          bool compute_keys = false) const;
+    void encode_multiple(
+            size_t n,
+            idx_t* keys,
+            const float* x,
+            uint8_t* codes,
+            bool compute_keys = false) const;
     /// inverse of encode_multiple
-    void decode_multiple (size_t n, const idx_t *keys,
-                          const uint8_t * xcodes, float * x) const;
+    void decode_multiple(
+            size_t n,
+            const idx_t* keys,
+            const uint8_t* xcodes,
+            float* x) const;
-    InvertedListScanner *get_InvertedListScanner (bool store_pairs)
-        const override;
+    InvertedListScanner* get_InvertedListScanner(
+            bool store_pairs) const override;
     /// build precomputed table
-    void precompute_table ();
-    IndexIVFPQ ();
+    void precompute_table();
+    IndexIVFPQ();
 };
 /** Pre-compute distance tables for IVFPQ with by-residual and METRIC_L2
@@ -136,24 +147,23 @@ struct IndexIVFPQ: IndexIVF {
  * @param use_precomputed_table (I/O)
  *        =-1: force disable
  *        =0: decide heuristically (default: use tables only if they are
- *            < precomputed_tables_max_bytes), set use_precomputed_table on output
- *        =1: tables that work for all quantizers (size 256 * nlist * M)
- *        =2: specific version for MultiIndexQuantizer (much more compact)
+ *            < precomputed_tables_max_bytes), set use_precomputed_table on
+ * output =1: tables that work for all quantizers (size 256 * nlist * M) =2:
+ * specific version for MultiIndexQuantizer (much more compact)
  * @param precomputed_table precomputed table to intialize
  */
 void initialize_IVFPQ_precomputed_table(
-    int &use_precomputed_table,
-    const Index *quantizer,
-    const ProductQuantizer &pq,
-    AlignedTable<float> & precomputed_table,
-    bool verbose
-);
+        int& use_precomputed_table,
+        const Index* quantizer,
+        const ProductQuantizer& pq,
+        AlignedTable<float>& precomputed_table,
+        bool verbose);
 /// statistics are robust to internal threading, but not if
 /// IndexIVFPQ::search_preassigned is called by multiple threads
 struct IndexIVFPQStats {
-    size_t nrefine;  ///< nb of refines (IVFPQR)
+    size_t nrefine; ///< nb of refines (IVFPQR)
     size_t n_hamming_pass;
     ///< nb of passed Hamming distance tests (for polysemous)
@@ -162,17 +172,15 @@ struct IndexIVFPQStats {
     size_t search_cycles;
     size_t refine_cycles; ///< only for IVFPQR
-    IndexIVFPQStats () {reset (); }
-    void reset ();
+    IndexIVFPQStats() {
+        reset();
+    }
+    void reset();
 };
 // global var that collects them all
 FAISS_API extern IndexIVFPQStats indexIVFPQ_stats;
 } // namespace faiss
 #endif

data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp CHANGED Viewed

@@ -8,70 +8,68 @@
 #include <faiss/IndexIVFPQFastScan.h>
 #include <cassert>
+#include <cinttypes>
 #include <cstdio>
-#include <inttypes.h>
 #include <omp.h>
 #include <memory>
+#include <faiss/impl/AuxIndexStructures.h>
 #include <faiss/impl/FaissAssert.h>
-#include <faiss/utils/utils.h>
 #include <faiss/utils/distances.h>
 #include <faiss/utils/simdlib.h>
-#include <faiss/impl/AuxIndexStructures.h>
+#include <faiss/utils/utils.h>
 #include <faiss/invlists/BlockInvertedLists.h>
+#include <faiss/impl/pq4_fast_scan.h>
 #include <faiss/impl/simd_result_handlers.h>
 #include <faiss/utils/quantize_lut.h>
-#include <faiss/impl/pq4_fast_scan.h>
 namespace faiss {
 using namespace simd_result_handlers;
 inline size_t roundup(size_t a, size_t b) {
     return (a + b - 1) / b * b;
 }
-IndexIVFPQFastScan::IndexIVFPQFastScan (
-            Index * quantizer, size_t d, size_t nlist,
-            size_t M, size_t nbits_per_idx,
-            MetricType metric, int bbs):
-    IndexIVF (quantizer, d, nlist, 0, metric),
-    pq (d, M, nbits_per_idx),
-    bbs (bbs)
-{
+IndexIVFPQFastScan::IndexIVFPQFastScan(
+        Index* quantizer,
+        size_t d,
+        size_t nlist,
+        size_t M,
+        size_t nbits_per_idx,
+        MetricType metric,
+        int bbs)
+        : IndexIVF(quantizer, d, nlist, 0, metric),
+          pq(d, M, nbits_per_idx),
+          bbs(bbs) {
     FAISS_THROW_IF_NOT(nbits_per_idx == 4);
     M2 = roundup(pq.M, 2);
     by_residual = false; // set to false by default because it's much faster
     is_trained = false;
     code_size = pq.code_size;
-    replace_invlists(
-        new BlockInvertedLists(nlist, bbs, bbs * M2 / 2),
-        true
-    );
+    replace_invlists(new BlockInvertedLists(nlist, bbs, bbs * M2 / 2), true);
 }
-IndexIVFPQFastScan::IndexIVFPQFastScan ()
-{
+IndexIVFPQFastScan::IndexIVFPQFastScan() {
     by_residual = false;
     bbs = 0;
     M2 = 0;
 }
-IndexIVFPQFastScan::IndexIVFPQFastScan(const IndexIVFPQ & orig, int bbs):
-    IndexIVF(
-        orig.quantizer, orig.d, orig.nlist,
-        orig.pq.code_size, orig.metric_type),
-    pq(orig.pq),
-    bbs(bbs)
-{
+IndexIVFPQFastScan::IndexIVFPQFastScan(const IndexIVFPQ& orig, int bbs)
+        : IndexIVF(
+                  orig.quantizer,
+                  orig.d,
+                  orig.nlist,
+                  orig.pq.code_size,
+                  orig.metric_type),
+          pq(orig.pq),
+          bbs(bbs) {
     FAISS_THROW_IF_NOT(orig.pq.nbits == 4);
     by_residual = orig.by_residual;
@@ -83,69 +81,68 @@ IndexIVFPQFastScan::IndexIVFPQFastScan(const IndexIVFPQ & orig, int bbs):
     M2 = roundup(M, 2);
     replace_invlists(
-        new BlockInvertedLists(orig.nlist, bbs, bbs * M2 / 2),
-        true
-    );
+            new BlockInvertedLists(orig.nlist, bbs, bbs * M2 / 2), true);
     precomputed_table.resize(orig.precomputed_table.size());
     if (precomputed_table.nbytes() > 0) {
-        memcpy(precomputed_table.get(), orig.precomputed_table.data(),
-               precomputed_table.nbytes()
-        );
+        memcpy(precomputed_table.get(),
+               orig.precomputed_table.data(),
+               precomputed_table.nbytes());
     }
-    for(size_t i = 0; i < nlist; i++) {
+    for (size_t i = 0; i < nlist; i++) {
         size_t nb = orig.invlists->list_size(i);
         size_t nb2 = roundup(nb, bbs);
         AlignedTable<uint8_t> tmp(nb2 * M2 / 2);
         pq4_pack_codes(
-            InvertedLists::ScopedCodes(orig.invlists, i).get(),
-            nb, M, nb2, bbs, M2,
-            tmp.get()
-        );
+                InvertedLists::ScopedCodes(orig.invlists, i).get(),
+                nb,
+                M,
+                nb2,
+                bbs,
+                M2,
+                tmp.get());
         invlists->add_entries(
-            i, nb,
-            InvertedLists::ScopedIds(orig.invlists, i).get(),
-            tmp.get()
-        );
+                i,
+                nb,
+                InvertedLists::ScopedIds(orig.invlists, i).get(),
+                tmp.get());
     }
     orig_invlists = orig.invlists;
 }
 /*********************************************************
  * Training
  *********************************************************/
-void IndexIVFPQFastScan::train_residual (idx_t n, const float *x_in)
-{
+void IndexIVFPQFastScan::train_residual(idx_t n, const float* x_in) {
+    const float* x = fvecs_maybe_subsample(
+            d,
+            (size_t*)&n,
+            pq.cp.max_points_per_centroid * pq.ksub,
+            x_in,
+            verbose,
+            pq.cp.seed);
-    const float * x = fvecs_maybe_subsample (
-         d, (size_t*)&n, pq.cp.max_points_per_centroid * pq.ksub,
-         x_in, verbose, pq.cp.seed);
-    std::unique_ptr<float []> del_x;
+    std::unique_ptr<float[]> del_x;
     if (x != x_in) {
         del_x.reset((float*)x);
     }
-    const float *trainset;
+    const float* trainset;
     AlignedTable<float> residuals;
     if (by_residual) {
-        if(verbose) printf("computing residuals\n");
+        if (verbose)
+            printf("computing residuals\n");
         std::vector<idx_t> assign(n);
-        quantizer->assign (n, x, assign.data());
+        quantizer->assign(n, x, assign.data());
         residuals.resize(n * d);
         for (idx_t i = 0; i < n; i++) {
-           quantizer->compute_residual (
-                x + i * d,
-                residuals.data() + i * d,
-                assign[i]
-            );
+            quantizer->compute_residual(
+                    x + i * d, residuals.data() + i * d, assign[i]);
         }
         trainset = residuals.data();
     } else {
@@ -153,82 +150,78 @@ void IndexIVFPQFastScan::train_residual (idx_t n, const float *x_in)
     }
     if (verbose) {
-        printf ("training %zdx%zd product quantizer on %zd vectors in %dD\n",
-                pq.M, pq.ksub, long(n), d);
+        printf("training %zdx%zd product quantizer on "
+               "%" PRId64 " vectors in %dD\n",
+               pq.M,
+               pq.ksub,
+               n,
+               d);
     }
     pq.verbose = verbose;
-    pq.train (n, trainset);
+    pq.train(n, trainset);
     if (by_residual && metric_type == METRIC_L2) {
         precompute_table();
     }
 }
-void IndexIVFPQFastScan::precompute_table ()
-{
+void IndexIVFPQFastScan::precompute_table() {
     initialize_IVFPQ_precomputed_table(
-        use_precomputed_table,
-        quantizer, pq, precomputed_table, verbose
-    );
+            use_precomputed_table, quantizer, pq, precomputed_table, verbose);
 }
 /*********************************************************
  * Code management functions
  *********************************************************/
 void IndexIVFPQFastScan::encode_vectors(
-        idx_t n, const float* x, const idx_t *list_nos,
-        uint8_t * codes, bool include_listnos) const
-{
+        idx_t n,
+        const float* x,
+        const idx_t* list_nos,
+        uint8_t* codes,
+        bool include_listnos) const {
     if (by_residual) {
-        AlignedTable<float> residuals (n * d);
+        AlignedTable<float> residuals(n * d);
         for (size_t i = 0; i < n; i++) {
             if (list_nos[i] < 0) {
-                memset (residuals.data() + i * d, 0, sizeof(residuals[0]) * d);
+                memset(residuals.data() + i * d, 0, sizeof(residuals[0]) * d);
             } else {
-                quantizer->compute_residual (
-                    x + i * d, residuals.data() + i * d, list_nos[i]);
+                quantizer->compute_residual(
+                        x + i * d, residuals.data() + i * d, list_nos[i]);
             }
         }
-        pq.compute_codes (residuals.data(), codes, n);
+        pq.compute_codes(residuals.data(), codes, n);
     } else {
-        pq.compute_codes (x, codes, n);
+        pq.compute_codes(x, codes, n);
     }
     if (include_listnos) {
         size_t coarse_size = coarse_code_size();
         for (idx_t i = n - 1; i >= 0; i--) {
-            uint8_t * code = codes + i * (coarse_size + code_size);
-            memmove (code + coarse_size,
-                     codes + i * code_size, code_size);
-            encode_listno (list_nos[i], code);
+            uint8_t* code = codes + i * (coarse_size + code_size);
+            memmove(code + coarse_size, codes + i * code_size, code_size);
+            encode_listno(list_nos[i], code);
         }
     }
 }
-void IndexIVFPQFastScan::add_with_ids (
-        idx_t n, const float * x, const idx_t *xids) {
+void IndexIVFPQFastScan::add_with_ids(
+        idx_t n,
+        const float* x,
+        const idx_t* xids) {
     // copied from IndexIVF::add_with_ids --->
     // do some blocking to avoid excessive allocs
     idx_t bs = 65536;
     if (n > bs) {
         for (idx_t i0 = 0; i0 < n; i0 += bs) {
-            idx_t i1 = std::min (n, i0 + bs);
+            idx_t i1 = std::min(n, i0 + bs);
             if (verbose) {
                 printf("   IndexIVFPQFastScan::add_with_ids %zd: %zd",
-                       size_t(i0), size_t(i1));
+                       size_t(i0),
+                       size_t(i1));
             }
-            add_with_ids (i1 - i0, x + i0 * d,
-                          xids ? xids + i0 : nullptr);
+            add_with_ids(i1 - i0, x + i0 * d, xids ? xids + i0 : nullptr);
         }
         return;
     }
@@ -236,37 +229,38 @@ void IndexIVFPQFastScan::add_with_ids (
     AlignedTable<uint8_t> codes(n * code_size);
-    FAISS_THROW_IF_NOT (is_trained);
-    direct_map.check_can_add (xids);
+    FAISS_THROW_IF_NOT(is_trained);
+    direct_map.check_can_add(xids);
-    std::unique_ptr<idx_t []> idx(new idx_t[n]);
-    quantizer->assign (n, x, idx.get());
+    std::unique_ptr<idx_t[]> idx(new idx_t[n]);
+    quantizer->assign(n, x, idx.get());
     size_t nadd = 0, nminus1 = 0;
     for (size_t i = 0; i < n; i++) {
-        if (idx[i] < 0) nminus1++;
+        if (idx[i] < 0)
+            nminus1++;
     }
     AlignedTable<uint8_t> flat_codes(n * code_size);
-    encode_vectors (n, x, idx.get(), flat_codes.get());
+    encode_vectors(n, x, idx.get(), flat_codes.get());
     DirectMapAdd dm_adder(direct_map, n, xids);
     // <---
-    BlockInvertedLists *bil = dynamic_cast<BlockInvertedLists*>(invlists);
-    FAISS_THROW_IF_NOT_MSG (bil, "only block inverted lists supported");
+    BlockInvertedLists* bil = dynamic_cast<BlockInvertedLists*>(invlists);
+    FAISS_THROW_IF_NOT_MSG(bil, "only block inverted lists supported");
     // prepare batches
     std::vector<idx_t> order(n);
-    for(idx_t i = 0; i < n ; i++) { order[i] = i; }
+    for (idx_t i = 0; i < n; i++) {
+        order[i] = i;
+    }
     // TODO should not need stable
-    std::stable_sort(order.begin(), order.end(),
-        [&idx](idx_t a, idx_t b) {
-            return idx[a] < idx[b];
-        }
-    );
+    std::stable_sort(order.begin(), order.end(), [&idx](idx_t a, idx_t b) {
+        return idx[a] < idx[b];
+    });
     // TODO parallelize
     idx_t i0 = 0;
@@ -274,7 +268,7 @@ void IndexIVFPQFastScan::add_with_ids (
         idx_t list_no = idx[order[i0]];
         idx_t i1 = i0 + 1;
         while (i1 < n && idx[order[i1]] == list_no) {
-            i1 ++;
+            i1++;
         }
         if (list_no == -1) {
@@ -288,58 +282,57 @@ void IndexIVFPQFastScan::add_with_ids (
         bil->resize(list_no, list_size + i1 - i0);
-        for(idx_t i = i0; i < i1; i++) {
+        for (idx_t i = i0; i < i1; i++) {
             size_t ofs = list_size + i - i0;
             idx_t id = xids ? xids[order[i]] : ntotal + order[i];
-            dm_adder.add (order[i], list_no, ofs);
+            dm_adder.add(order[i], list_no, ofs);
             bil->ids[list_no][ofs] = id;
-            memcpy(
-                list_codes.data() + (i - i0) * code_size,
-                flat_codes.data() + order[i] * code_size,
-                code_size
-            );
+            memcpy(list_codes.data() + (i - i0) * code_size,
+                   flat_codes.data() + order[i] * code_size,
+                   code_size);
             nadd++;
         }
         pq4_pack_codes_range(
-            list_codes.data(), pq.M,
-            list_size, list_size + i1 - i0,
-            bbs, M2, bil->codes[list_no].data()
-        );
+                list_codes.data(),
+                pq.M,
+                list_size,
+                list_size + i1 - i0,
+                bbs,
+                M2,
+                bil->codes[list_no].data());
         i0 = i1;
     }
     ntotal += n;
 }
 /*********************************************************
  * search
  *********************************************************/
 namespace {
 // from impl/ProductQuantizer.cpp
 template <class C, typename dis_t>
 void pq_estimators_from_tables_generic(
-        const ProductQuantizer& pq, size_t nbits,
-        const uint8_t *codes, size_t ncodes,
-        const dis_t *dis_table, const int64_t * ids,
+        const ProductQuantizer& pq,
+        size_t nbits,
+        const uint8_t* codes,
+        size_t ncodes,
+        const dis_t* dis_table,
+        const int64_t* ids,
         float dis0,
-        size_t k, typename C::T *heap_dis, int64_t *heap_ids)
-{
+        size_t k,
+        typename C::T* heap_dis,
+        int64_t* heap_ids) {
     using accu_t = typename C::T;
     const size_t M = pq.M;
     const size_t ksub = pq.ksub;
     for (size_t j = 0; j < ncodes; ++j) {
-        PQDecoderGeneric decoder(
-                codes + j * pq.code_size, nbits
-        );
+        PQDecoderGeneric decoder(codes + j * pq.code_size, nbits);
         accu_t dis = dis0;
-        const dis_t * dt = dis_table;
+        const dis_t* dt = dis_table;
         for (size_t m = 0; m < M; m++) {
             uint64_t c = decoder.decode();
             dis += dt[c];
@@ -356,17 +349,19 @@ void pq_estimators_from_tables_generic(
 using idx_t = Index::idx_t;
 using namespace quantize_lut;
-void fvec_madd_avx (
-        size_t n, const float *a,
-        float bf, const float *b, float *c)
-{
+void fvec_madd_avx(
+        size_t n,
+        const float* a,
+        float bf,
+        const float* b,
+        float* c) {
     assert(is_aligned_pointer(a));
     assert(is_aligned_pointer(b));
     assert(is_aligned_pointer(c));
     assert(n % 8 == 0);
     simd8float32 bf8(bf);
     n /= 8;
-    for(size_t i = 0; i < n; i++) {
+    for (size_t i = 0; i < n; i++) {
         simd8float32 ai(a);
         simd8float32 bi(b);
@@ -376,7 +371,6 @@ void fvec_madd_avx (
         a += 8;
         b += 8;
     }
 }
 } // anonymous namespace
@@ -385,23 +379,20 @@ void fvec_madd_avx (
  * Look-Up Table functions
  *********************************************************/
 void IndexIVFPQFastScan::compute_LUT(
-    size_t n, const float *x,
-    const idx_t *coarse_ids, const float *coarse_dis,
-    AlignedTable<float> & dis_tables,
-    AlignedTable<float> & biases
-) const
-{
-    const IndexIVFPQFastScan & ivfpq = *this;
+        size_t n,
+        const float* x,
+        const idx_t* coarse_ids,
+        const float* coarse_dis,
+        AlignedTable<float>& dis_tables,
+        AlignedTable<float>& biases) const {
+    const IndexIVFPQFastScan& ivfpq = *this;
     size_t dim12 = pq.ksub * pq.M;
     size_t d = pq.d;
     size_t nprobe = ivfpq.nprobe;
     if (ivfpq.by_residual) {
         if (ivfpq.metric_type == METRIC_L2) {
             dis_tables.resize(n * nprobe * dim12);
             if (ivfpq.use_precomputed_table == 1) {
@@ -409,57 +400,54 @@ void IndexIVFPQFastScan::compute_LUT(
                 memcpy(biases.get(), coarse_dis, sizeof(float) * n * nprobe);
                 AlignedTable<float> ip_table(n * dim12);
-                pq.compute_inner_prod_tables (n, x, ip_table.get());
+                pq.compute_inner_prod_tables(n, x, ip_table.get());
 #pragma omp parallel for if (n * nprobe > 8000)
-                for(idx_t ij = 0; ij < n * nprobe; ij++) {
+                for (idx_t ij = 0; ij < n * nprobe; ij++) {
                     idx_t i = ij / nprobe;
-                    float *tab = dis_tables.get() + ij * dim12;
+                    float* tab = dis_tables.get() + ij * dim12;
                     idx_t cij = coarse_ids[ij];
                     if (cij >= 0) {
-                        fvec_madd_avx (
-                            dim12,
-                            precomputed_table.get() + cij * dim12,
-                            -2, ip_table.get() + i * dim12,
-                            tab
-                        );
+                        fvec_madd_avx(
+                                dim12,
+                                precomputed_table.get() + cij * dim12,
+                                -2,
+                                ip_table.get() + i * dim12,
+                                tab);
                     } else {
                         // fill with NaNs so that they are ignored during
                         // LUT quantization
-                        memset (tab, -1, sizeof(float) * dim12);
+                        memset(tab, -1, sizeof(float) * dim12);
                     }
                 }
             } else {
                 std::unique_ptr<float[]> xrel(new float[n * nprobe * d]);
                 biases.resize(n * nprobe);
                 memset(biases.get(), 0, sizeof(float) * n * nprobe);
 #pragma omp parallel for if (n * nprobe > 8000)
-                for(idx_t ij = 0; ij < n * nprobe; ij++) {
+                for (idx_t ij = 0; ij < n * nprobe; ij++) {
                     idx_t i = ij / nprobe;
-                    float *xij = &xrel[ij * d];
+                    float* xij = &xrel[ij * d];
                     idx_t cij = coarse_ids[ij];
                     if (cij >= 0) {
-                        ivfpq.quantizer->compute_residual(
-                            x + i * d, xij, cij);
+                        ivfpq.quantizer->compute_residual(x + i * d, xij, cij);
                     } else {
                         // will fill with NaNs
                         memset(xij, -1, sizeof(float) * d);
                     }
                 }
-                pq.compute_distance_tables (
+                pq.compute_distance_tables(
                         n * nprobe, xrel.get(), dis_tables.get());
             }
         } else if (ivfpq.metric_type == METRIC_INNER_PRODUCT) {
             dis_tables.resize(n * dim12);
-            pq.compute_inner_prod_tables (n, x, dis_tables.get());
+            pq.compute_inner_prod_tables(n, x, dis_tables.get());
             // compute_inner_prod_tables(pq, n, x, dis_tables.get());
             biases.resize(n * nprobe);
@@ -471,33 +459,29 @@ void IndexIVFPQFastScan::compute_LUT(
     } else {
         dis_tables.resize(n * dim12);
         if (ivfpq.metric_type == METRIC_L2) {
-            pq.compute_distance_tables (n, x, dis_tables.get());
+            pq.compute_distance_tables(n, x, dis_tables.get());
         } else if (ivfpq.metric_type == METRIC_INNER_PRODUCT) {
-            pq.compute_inner_prod_tables (n, x, dis_tables.get());
+            pq.compute_inner_prod_tables(n, x, dis_tables.get());
         } else {
             FAISS_THROW_FMT("metric %d not supported", ivfpq.metric_type);
         }
     }
 }
 void IndexIVFPQFastScan::compute_LUT_uint8(
-    size_t n, const float *x,
-    const idx_t *coarse_ids, const float *coarse_dis,
-    AlignedTable<uint8_t> & dis_tables,
-    AlignedTable<uint16_t> & biases,
-    float * normalizers
-) const {
-    const IndexIVFPQFastScan & ivfpq = *this;
+        size_t n,
+        const float* x,
+        const idx_t* coarse_ids,
+        const float* coarse_dis,
+        AlignedTable<uint8_t>& dis_tables,
+        AlignedTable<uint16_t>& biases,
+        float* normalizers) const {
+    const IndexIVFPQFastScan& ivfpq = *this;
     AlignedTable<float> dis_tables_float;
     AlignedTable<float> biases_float;
     uint64_t t0 = get_cy();
-    compute_LUT(
-            n, x,
-            coarse_ids, coarse_dis,
-            dis_tables_float, biases_float
-    );
+    compute_LUT(n, x, coarse_ids, coarse_dis, dis_tables_float, biases_float);
     IVFFastScan_stats.t_compute_distance_tables += get_cy() - t0;
     bool lut_is_3d = ivfpq.by_residual && ivfpq.metric_type == METRIC_L2;
@@ -514,45 +498,52 @@ void IndexIVFPQFastScan::compute_LUT_uint8(
     uint64_t t1 = get_cy();
 #pragma omp parallel for if (n > 100)
-    for(int64_t i = 0; i < n; i++) {
-        const float *t_in = dis_tables_float.get() + i * dim123;
-        const float *b_in = nullptr;
-        uint8_t *t_out = dis_tables.get() + i * dim123_2;
-        uint16_t *b_out = nullptr;
+    for (int64_t i = 0; i < n; i++) {
+        const float* t_in = dis_tables_float.get() + i * dim123;
+        const float* b_in = nullptr;
+        uint8_t* t_out = dis_tables.get() + i * dim123_2;
+        uint16_t* b_out = nullptr;
         if (biases_float.get()) {
             b_in = biases_float.get() + i * nprobe;
             b_out = biases.get() + i * nprobe;
         }
         quantize_LUT_and_bias(
-            nprobe, pq.M, pq.ksub, lut_is_3d,
-            t_in, b_in,
-            t_out, M2, b_out,
-            normalizers + 2 * i, normalizers + 2 * i + 1
-        );
+                nprobe,
+                pq.M,
+                pq.ksub,
+                lut_is_3d,
+                t_in,
+                b_in,
+                t_out,
+                M2,
+                b_out,
+                normalizers + 2 * i,
+                normalizers + 2 * i + 1);
     }
     IVFFastScan_stats.t_round += get_cy() - t1;
 }
 /*********************************************************
  * Search functions
  *********************************************************/
-template<bool is_max>
+template <bool is_max>
 void IndexIVFPQFastScan::search_dispatch_implem(
-                idx_t n,
-                const float* x,
-                idx_t k,
-                float* distances,
-                idx_t* labels) const
-{
-    using Cfloat = typename std::conditional<is_max,
-        CMax<float, int64_t>, CMin<float, int64_t> >::type;
-    using C = typename std::conditional<is_max,
-        CMax<uint16_t, int64_t>, CMin<uint16_t, int64_t> >::type;
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels) const {
+    using Cfloat = typename std::conditional<
+            is_max,
+            CMax<float, int64_t>,
+            CMin<float, int64_t>>::type;
+    using C = typename std::conditional<
+            is_max,
+            CMax<uint16_t, int64_t>,
+            CMin<uint16_t, int64_t>>::type;
     if (n == 0) {
         return;
@@ -568,7 +559,7 @@ void IndexIVFPQFastScan::search_dispatch_implem(
             impl = 10;
         }
         if (k > 20) {
-            impl ++;
+            impl++;
         }
     }
@@ -582,11 +573,25 @@ void IndexIVFPQFastScan::search_dispatch_implem(
         if (n < 2) {
             if (impl == 12 || impl == 13) {
-                search_implem_12<C>
-                    (n, x, k, distances, labels, impl, &ndis, &nlist_visited);
+                search_implem_12<C>(
+                        n,
+                        x,
+                        k,
+                        distances,
+                        labels,
+                        impl,
+                        &ndis,
+                        &nlist_visited);
             } else {
-                search_implem_10<C>
-                    (n, x, k, distances, labels, impl, &ndis, &nlist_visited);
+                search_implem_10<C>(
+                        n,
+                        x,
+                        k,
+                        distances,
+                        labels,
+                        impl,
+                        &ndis,
+                        &nlist_visited);
             }
         } else {
             // explicitly slice over threads
@@ -595,34 +600,47 @@ void IndexIVFPQFastScan::search_dispatch_implem(
                 nslice = n;
             } else if (by_residual && metric_type == METRIC_L2) {
                 // make sure we don't make too big LUT tables
-                size_t lut_size_per_query =
-                    pq.M * pq.ksub * nprobe * (sizeof(float) + sizeof(uint8_t));
+                size_t lut_size_per_query = pq.M * pq.ksub * nprobe *
+                        (sizeof(float) + sizeof(uint8_t));
                 size_t max_lut_size = precomputed_table_max_bytes;
                 // how many queries we can handle within mem budget
-                size_t nq_ok = std::max(max_lut_size / lut_size_per_query, size_t(1));
-                nslice = roundup(std::max(size_t(n / nq_ok), size_t(1)), omp_get_max_threads());
+                size_t nq_ok =
+                        std::max(max_lut_size / lut_size_per_query, size_t(1));
+                nslice =
+                        roundup(std::max(size_t(n / nq_ok), size_t(1)),
+                                omp_get_max_threads());
             } else {
                 // LUTs unlikely to be a limiting factor
                 nslice = omp_get_max_threads();
             }
-#pragma omp parallel for reduction(+: ndis, nlist_visited)
+#pragma omp parallel for reduction(+ : ndis, nlist_visited)
             for (int slice = 0; slice < nslice; slice++) {
                 idx_t i0 = n * slice / nslice;
                 idx_t i1 = n * (slice + 1) / nslice;
-                float *dis_i = distances + i0 * k;
-                idx_t *lab_i = labels + i0 * k;
+                float* dis_i = distances + i0 * k;
+                idx_t* lab_i = labels + i0 * k;
                 if (impl == 12 || impl == 13) {
                     search_implem_12<C>(
-                        i1 - i0, x + i0 * d, k, dis_i, lab_i,
-                        impl, &ndis, &nlist_visited
-                    );
+                            i1 - i0,
+                            x + i0 * d,
+                            k,
+                            dis_i,
+                            lab_i,
+                            impl,
+                            &ndis,
+                            &nlist_visited);
                 } else {
                     search_implem_10<C>(
-                        i1 - i0, x + i0 * d, k, dis_i, lab_i,
-                        impl, &ndis, &nlist_visited
-                    );
+                            i1 - i0,
+                            x + i0 * d,
+                            k,
+                            dis_i,
+                            lab_i,
+                            impl,
+                            &ndis,
+                            &nlist_visited);
                 }
             }
         }
@@ -632,14 +650,16 @@ void IndexIVFPQFastScan::search_dispatch_implem(
     } else {
         FAISS_THROW_FMT("implem %d does not exist", implem);
     }
 }
 void IndexIVFPQFastScan::search(
-                idx_t n, const float* x, idx_t k,
-                float* distances, idx_t* labels) const
-{
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels) const {
+    FAISS_THROW_IF_NOT(k > 0);
     if (metric_type == METRIC_L2) {
         search_dispatch_implem<true>(n, x, k, distances, labels);
     } else {
@@ -647,133 +667,150 @@ void IndexIVFPQFastScan::search(
     }
 }
-template<class C>
+template <class C>
 void IndexIVFPQFastScan::search_implem_1(
-                idx_t n, const float* x, idx_t k,
-                float* distances, idx_t* labels) const
-{
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels) const {
     FAISS_THROW_IF_NOT(orig_invlists);
     std::unique_ptr<idx_t[]> coarse_ids(new idx_t[n * nprobe]);
     std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
-    quantizer->search (n, x, nprobe, coarse_dis.get(), coarse_ids.get());
+    quantizer->search(n, x, nprobe, coarse_dis.get(), coarse_ids.get());
     size_t dim12 = pq.ksub * pq.M;
     AlignedTable<float> dis_tables;
     AlignedTable<float> biases;
-    compute_LUT (
-            n, x,
-            coarse_ids.get(), coarse_dis.get(),
-            dis_tables, biases
-    );
+    compute_LUT(n, x, coarse_ids.get(), coarse_dis.get(), dis_tables, biases);
     bool single_LUT = !(by_residual && metric_type == METRIC_L2);
     size_t ndis = 0, nlist_visited = 0;
-#pragma omp parallel for reduction(+: ndis, nlist_visited)
-    for(idx_t i = 0; i < n; i++) {
-        int64_t *heap_ids = labels + i * k;
-        float *heap_dis = distances + i * k;
-        heap_heapify<C> (k, heap_dis, heap_ids);
-        float *LUT = nullptr;
+#pragma omp parallel for reduction(+ : ndis, nlist_visited)
+    for (idx_t i = 0; i < n; i++) {
+        int64_t* heap_ids = labels + i * k;
+        float* heap_dis = distances + i * k;
+        heap_heapify<C>(k, heap_dis, heap_ids);
+        float* LUT = nullptr;
         if (single_LUT) {
             LUT = dis_tables.get() + i * dim12;
         }
-        for(idx_t j = 0; j < nprobe; j++) {
+        for (idx_t j = 0; j < nprobe; j++) {
             if (!single_LUT) {
                 LUT = dis_tables.get() + (i * nprobe + j) * dim12;
             }
             idx_t list_no = coarse_ids[i * nprobe + j];
-            if (list_no < 0) continue;
+            if (list_no < 0)
+                continue;
             size_t ls = orig_invlists->list_size(list_no);
-            if (ls == 0) continue;
+            if (ls == 0)
+                continue;
             InvertedLists::ScopedCodes codes(orig_invlists, list_no);
             InvertedLists::ScopedIds ids(orig_invlists, list_no);
             float bias = biases.get() ? biases[i * nprobe + j] : 0;
             pq_estimators_from_tables_generic<C>(
-                pq, pq.nbits, codes.get(), ls,
-                LUT, ids.get(), bias,
-                k, heap_dis, heap_ids
-            );
-            nlist_visited ++;
-            ndis ++;
+                    pq,
+                    pq.nbits,
+                    codes.get(),
+                    ls,
+                    LUT,
+                    ids.get(),
+                    bias,
+                    k,
+                    heap_dis,
+                    heap_ids);
+            nlist_visited++;
+            ndis++;
         }
-        heap_reorder<C> (k, heap_dis, heap_ids);
+        heap_reorder<C>(k, heap_dis, heap_ids);
     }
     indexIVF_stats.nq += n;
     indexIVF_stats.ndis += ndis;
     indexIVF_stats.nlist += nlist_visited;
 }
-template<class C>
+template <class C>
 void IndexIVFPQFastScan::search_implem_2(
-                idx_t n, const float* x, idx_t k,
-                float* distances, idx_t* labels) const
-{
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels) const {
     FAISS_THROW_IF_NOT(orig_invlists);
     std::unique_ptr<idx_t[]> coarse_ids(new idx_t[n * nprobe]);
     std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
-    quantizer->search (n, x, nprobe, coarse_dis.get(), coarse_ids.get());
+    quantizer->search(n, x, nprobe, coarse_dis.get(), coarse_ids.get());
     size_t dim12 = pq.ksub * M2;
     AlignedTable<uint8_t> dis_tables;
     AlignedTable<uint16_t> biases;
     std::unique_ptr<float[]> normalizers(new float[2 * n]);
-    compute_LUT_uint8 (
-        n, x,
-        coarse_ids.get(), coarse_dis.get(),
-        dis_tables, biases,
-        normalizers.get()
-    );
+    compute_LUT_uint8(
+            n,
+            x,
+            coarse_ids.get(),
+            coarse_dis.get(),
+            dis_tables,
+            biases,
+            normalizers.get());
     bool single_LUT = !(by_residual && metric_type == METRIC_L2);
     size_t ndis = 0, nlist_visited = 0;
-#pragma omp parallel for reduction(+: ndis, nlist_visited)
-    for(idx_t i = 0; i < n; i++) {
+#pragma omp parallel for reduction(+ : ndis, nlist_visited)
+    for (idx_t i = 0; i < n; i++) {
         std::vector<uint16_t> tmp_dis(k);
-        int64_t *heap_ids = labels + i * k;
-        uint16_t *heap_dis = tmp_dis.data();
-        heap_heapify<C> (k, heap_dis, heap_ids);
-        const uint8_t *LUT = nullptr;
+        int64_t* heap_ids = labels + i * k;
+        uint16_t* heap_dis = tmp_dis.data();
+        heap_heapify<C>(k, heap_dis, heap_ids);
+        const uint8_t* LUT = nullptr;
         if (single_LUT) {
             LUT = dis_tables.get() + i * dim12;
         }
-        for(idx_t j = 0; j < nprobe; j++) {
+        for (idx_t j = 0; j < nprobe; j++) {
             if (!single_LUT) {
                 LUT = dis_tables.get() + (i * nprobe + j) * dim12;
             }
             idx_t list_no = coarse_ids[i * nprobe + j];
-            if (list_no < 0) continue;
+            if (list_no < 0)
+                continue;
             size_t ls = orig_invlists->list_size(list_no);
-            if (ls == 0) continue;
+            if (ls == 0)
+                continue;
             InvertedLists::ScopedCodes codes(orig_invlists, list_no);
             InvertedLists::ScopedIds ids(orig_invlists, list_no);
             uint16_t bias = biases.get() ? biases[i * nprobe + j] : 0;
             pq_estimators_from_tables_generic<C>(
-                pq, pq.nbits, codes.get(), ls,
-                LUT, ids.get(), bias,
-                k, heap_dis, heap_ids
-            );
+                    pq,
+                    pq.nbits,
+                    codes.get(),
+                    ls,
+                    LUT,
+                    ids.get(),
+                    bias,
+                    k,
+                    heap_dis,
+                    heap_ids);
             nlist_visited++;
             ndis += ls;
         }
-        heap_reorder<C> (k, heap_dis, heap_ids);
+        heap_reorder<C>(k, heap_dis, heap_ids);
         // convert distances to float
         {
             float one_a = 1 / normalizers[2 * i], b = normalizers[2 * i + 1];
@@ -781,7 +818,7 @@ void IndexIVFPQFastScan::search_implem_2(
                 one_a = 1;
                 b = 0;
             }
-            float *heap_dis_float = distances + i * k;
+            float* heap_dis_float = distances + i * k;
             for (int j = 0; j < k; j++) {
                 heap_dis_float[j] = b + heap_dis[j] * one_a;
             }
@@ -792,14 +829,16 @@ void IndexIVFPQFastScan::search_implem_2(
     indexIVF_stats.nlist += nlist_visited;
 }
-template<class C>
+template <class C>
 void IndexIVFPQFastScan::search_implem_10(
-                idx_t n, const float* x, idx_t k,
-                float* distances, idx_t* labels,
-                int impl, size_t *ndis_out, size_t *nlist_out) const
-{
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels,
+        int impl,
+        size_t* ndis_out,
+        size_t* nlist_out) const {
     memset(distances, -1, sizeof(float) * k * n);
     memset(labels, -1, sizeof(idx_t) * k * n);
@@ -807,7 +846,6 @@ void IndexIVFPQFastScan::search_implem_10(
     using ReservoirHC = ReservoirHandler<C, true>;
     using SingleResultHC = SingleResultHandler<C, true>;
     std::unique_ptr<idx_t[]> coarse_ids(new idx_t[n * nprobe]);
     std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
@@ -817,20 +855,23 @@ void IndexIVFPQFastScan::search_implem_10(
 #define TIC times[ti++] = get_cy()
     TIC;
-    quantizer->search (n, x, nprobe, coarse_dis.get(), coarse_ids.get());
+    quantizer->search(n, x, nprobe, coarse_dis.get(), coarse_ids.get());
     TIC;
     size_t dim12 = pq.ksub * M2;
     AlignedTable<uint8_t> dis_tables;
     AlignedTable<uint16_t> biases;
-    std::unique_ptr<float[]> normalizers (new float[2 * n]);
+    std::unique_ptr<float[]> normalizers(new float[2 * n]);
-    compute_LUT_uint8 (
-            n, x,
-            coarse_ids.get(), coarse_dis.get(),
-            dis_tables, biases, normalizers.get()
-    );
+    compute_LUT_uint8(
+            n,
+            x,
+            coarse_ids.get(),
+            coarse_dis.get(),
+            dis_tables,
+            biases,
+            normalizers.get());
     TIC;
@@ -841,15 +882,16 @@ void IndexIVFPQFastScan::search_implem_10(
     {
         AlignedTable<uint16_t> tmp_distances(k);
-        for(idx_t i = 0; i < n; i++) {
-            const uint8_t *LUT = nullptr;
+        for (idx_t i = 0; i < n; i++) {
+            const uint8_t* LUT = nullptr;
             int qmap1[1] = {0};
-            std::unique_ptr<SIMDResultHandler<C, true> > handler;
+            std::unique_ptr<SIMDResultHandler<C, true>> handler;
             if (k == 1) {
                 handler.reset(new SingleResultHC(1, 0));
             } else if (impl == 10) {
-                handler.reset(new HeapHC(1, tmp_distances.get(), labels + i * k, k, 0));
+                handler.reset(new HeapHC(
+                        1, tmp_distances.get(), labels + i * k, k, 0));
             } else if (impl == 11) {
                 handler.reset(new ReservoirHC(1, 0, k, 2 * k));
             } else {
@@ -861,7 +903,7 @@ void IndexIVFPQFastScan::search_implem_10(
             if (single_LUT) {
                 LUT = dis_tables.get() + i * dim12;
             }
-            for(idx_t j = 0; j < nprobe; j++) {
+            for (idx_t j = 0; j < nprobe; j++) {
                 size_t ij = i * nprobe + j;
                 if (!single_LUT) {
                     LUT = dis_tables.get() + ij * dim12;
@@ -871,9 +913,11 @@ void IndexIVFPQFastScan::search_implem_10(
                 }
                 idx_t list_no = coarse_ids[ij];
-                if (list_no < 0) continue;
+                if (list_no < 0)
+                    continue;
                 size_t ls = invlists->list_size(list_no);
-                if (ls == 0) continue;
+                if (ls == 0)
+                    continue;
                 InvertedLists::ScopedCodes codes(invlists, list_no);
                 InvertedLists::ScopedIds ids(invlists, list_no);
@@ -881,41 +925,40 @@ void IndexIVFPQFastScan::search_implem_10(
                 handler->ntotal = ls;
                 handler->id_map = ids.get();
-#define DISPATCH(classHC) \
-                if(auto *res = dynamic_cast<classHC* > (handler.get())) { \
-                    pq4_accumulate_loop( \
-                            1, roundup(ls, bbs), bbs, M2, \
-                            codes.get(), LUT, \
-                            *res \
-                        ); \
-                }
+#define DISPATCH(classHC)                                              \
+    if (dynamic_cast<classHC*>(handler.get())) {                       \
+        auto* res = static_cast<classHC*>(handler.get());              \
+        pq4_accumulate_loop(                                           \
+                1, roundup(ls, bbs), bbs, M2, codes.get(), LUT, *res); \
+    }
                 DISPATCH(HeapHC)
-                else DISPATCH(ReservoirHC)
-                else DISPATCH(SingleResultHC)
+                else DISPATCH(ReservoirHC) else DISPATCH(SingleResultHC)
 #undef DISPATCH
-                nlist_visited ++;
-                ndis ++;
+                        nlist_visited++;
+                ndis++;
             }
             handler->to_flat_arrays(
-                distances + i * k, labels + i * k,
-                skip & 16 ? nullptr : normalizers.get() + i * 2
-            );
+                    distances + i * k,
+                    labels + i * k,
+                    skip & 16 ? nullptr : normalizers.get() + i * 2);
         }
     }
     *ndis_out = ndis;
     *nlist_out = nlist;
 }
-template<class C>
+template <class C>
 void IndexIVFPQFastScan::search_implem_12(
-                idx_t n, const float* x, idx_t k,
-                float* distances, idx_t* labels,
-                int impl, size_t *ndis_out, size_t *nlist_out) const
-{
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels,
+        int impl,
+        size_t* ndis_out,
+        size_t* nlist_out) const {
     if (n == 0) { // does not work well with reservoir
         return;
     }
@@ -930,53 +973,53 @@ void IndexIVFPQFastScan::search_implem_12(
 #define TIC times[ti++] = get_cy()
     TIC;
-    quantizer->search (n, x, nprobe, coarse_dis.get(), coarse_ids.get());
+    quantizer->search(n, x, nprobe, coarse_dis.get(), coarse_ids.get());
     TIC;
     size_t dim12 = pq.ksub * M2;
     AlignedTable<uint8_t> dis_tables;
     AlignedTable<uint16_t> biases;
-    std::unique_ptr<float[]> normalizers (new float[2 * n]);
+    std::unique_ptr<float[]> normalizers(new float[2 * n]);
-    compute_LUT_uint8 (
-            n, x,
-            coarse_ids.get(), coarse_dis.get(),
-            dis_tables, biases, normalizers.get()
-    );
+    compute_LUT_uint8(
+            n,
+            x,
+            coarse_ids.get(),
+            coarse_dis.get(),
+            dis_tables,
+            biases,
+            normalizers.get());
     TIC;
     struct QC {
-        int qno;      // sequence number of the query
-        int list_no;  // list to visit
-        int rank;     // this is the rank'th result of the coarse quantizer
+        int qno;     // sequence number of the query
+        int list_no; // list to visit
+        int rank;    // this is the rank'th result of the coarse quantizer
     };
     bool single_LUT = !(by_residual && metric_type == METRIC_L2);
     std::vector<QC> qcs;
     {
         int ij = 0;
-        for(int i = 0; i < n; i++) {
-            for(int j = 0; j < nprobe; j++) {
+        for (int i = 0; i < n; i++) {
+            for (int j = 0; j < nprobe; j++) {
                 if (coarse_ids[ij] >= 0) {
                     qcs.push_back(QC{i, int(coarse_ids[ij]), int(j)});
                 }
                 ij++;
             }
         }
-        std::sort(
-            qcs.begin(), qcs.end(),
-            [](const QC &a, const QC & b) {
-                return a.list_no < b.list_no;
-            }
-        );
+        std::sort(qcs.begin(), qcs.end(), [](const QC& a, const QC& b) {
+            return a.list_no < b.list_no;
+        });
     }
     TIC;
     // prepare the result handlers
-    std::unique_ptr<SIMDResultHandler<C, true> > handler;
+    std::unique_ptr<SIMDResultHandler<C, true>> handler;
     AlignedTable<uint16_t> tmp_distances;
     using HeapHC = HeapHandler<C, true>;
@@ -1012,7 +1055,7 @@ void IndexIVFPQFastScan::search_implem_12(
         int list_no = qcs[i0].list_no;
         size_t i1 = i0 + 1;
-        while(i1 < qcs.size() && i1 < i0 + qbs2) {
+        while (i1 < qcs.size() && i1 < i0 + qbs2) {
             if (qcs[i1].list_no != list_no) {
                 break;
             }
@@ -1034,8 +1077,8 @@ void IndexIVFPQFastScan::search_implem_12(
         memset(LUT.get(), -1, nc * dim12);
         int qbs = pq4_preferred_qbs(nc);
-        for(size_t i = i0; i < i1; i++) {
-            const QC & qc = qcs[i];
+        for (size_t i = i0; i < i1; i++) {
+            const QC& qc = qcs[i];
             q_map[i - i0] = qc.qno;
             int ij = qc.qno * nprobe + qc.rank;
             lut_entries[i - i0] = single_LUT ? qc.qno : ij;
@@ -1044,9 +1087,7 @@ void IndexIVFPQFastScan::search_implem_12(
             }
         }
         pq4_pack_LUT_qbs_q_map(
-            qbs, M2, dis_tables.get(), lut_entries.data(),
-            LUT.get()
-        );
+                qbs, M2, dis_tables.get(), lut_entries.data(), LUT.get());
         // access the inverted list
@@ -1062,20 +1103,17 @@ void IndexIVFPQFastScan::search_implem_12(
         handler->id_map = ids.get();
         uint64_t tt1 = get_cy();
-#define DISPATCH(classHC) \
-        if(auto *res = dynamic_cast<classHC* > (handler.get())) { \
-            pq4_accumulate_loop_qbs( \
-                    qbs, list_size, M2, \
-                    codes.get(), LUT.get(), \
-                    *res \
-                ); \
-        }
+#define DISPATCH(classHC)                                          \
+    if (dynamic_cast<classHC*>(handler.get())) {                   \
+        auto* res = static_cast<classHC*>(handler.get());          \
+        pq4_accumulate_loop_qbs(                                   \
+                qbs, list_size, M2, codes.get(), LUT.get(), *res); \
+    }
         DISPATCH(HeapHC)
-        else DISPATCH(ReservoirHC)
-        else DISPATCH(SingleResultHC)
+        else DISPATCH(ReservoirHC) else DISPATCH(SingleResultHC)
-        // prepare for next loop
-        i0 = i1;
+                // prepare for next loop
+                i0 = i1;
         uint64_t tt2 = get_cy();
         t_copy_pack += tt1 - tt0;
@@ -1085,21 +1123,19 @@ void IndexIVFPQFastScan::search_implem_12(
     // labels is in-place for HeapHC
     handler->to_flat_arrays(
-            distances, labels,
-            skip & 16 ? nullptr : normalizers.get()
-    );
+            distances, labels, skip & 16 ? nullptr : normalizers.get());
     TIC;
     // these stats are not thread-safe
-    for(int i = 1; i < ti; i++) {
-        IVFFastScan_stats.times[i] += times[i] - times[i-1];
+    for (int i = 1; i < ti; i++) {
+        IVFFastScan_stats.times[i] += times[i] - times[i - 1];
     }
     IVFFastScan_stats.t_copy_pack += t_copy_pack;
     IVFFastScan_stats.t_scan += t_scan;
-    if (auto *rh = dynamic_cast<ReservoirHC*> (handler.get())) {
+    if (auto* rh = dynamic_cast<ReservoirHC*>(handler.get())) {
         for (int i = 0; i < 4; i++) {
             IVFFastScan_stats.reservoir_times[i] += rh->times[i];
         }
@@ -1107,10 +1143,8 @@ void IndexIVFPQFastScan::search_implem_12(
     *ndis_out = ndis;
     *nlist_out = nlist;
 }
 IVFFastScanStats IVFFastScan_stats;
 } // namespace faiss