RubyGems - faiss - Versions diffs - 0.1.4 → 0.2.1 - Mend

faiss 0.1.4 → 0.2.1

Files changed (219) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +26 -1
data/README.md +15 -3
data/ext/faiss/ext.cpp +12 -308
data/ext/faiss/extconf.rb +5 -2
data/ext/faiss/index.cpp +189 -0
data/ext/faiss/index_binary.cpp +75 -0
data/ext/faiss/kmeans.cpp +40 -0
data/ext/faiss/numo.hpp +867 -0
data/ext/faiss/pca_matrix.cpp +33 -0
data/ext/faiss/product_quantizer.cpp +53 -0
data/ext/faiss/utils.cpp +13 -0
data/ext/faiss/utils.h +5 -0
data/lib/faiss.rb +0 -5
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +292 -291
data/vendor/faiss/faiss/AutoTune.h +55 -56
data/vendor/faiss/faiss/Clustering.cpp +334 -195
data/vendor/faiss/faiss/Clustering.h +88 -35
data/vendor/faiss/faiss/IVFlib.cpp +171 -195
data/vendor/faiss/faiss/IVFlib.h +48 -51
data/vendor/faiss/faiss/Index.cpp +85 -103
data/vendor/faiss/faiss/Index.h +54 -48
data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
data/vendor/faiss/faiss/Index2Layer.h +22 -22
data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
data/vendor/faiss/faiss/IndexBinary.h +140 -132
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
data/vendor/faiss/faiss/IndexFlat.h +35 -46
data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
data/vendor/faiss/faiss/IndexHNSW.h +57 -41
data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
data/vendor/faiss/faiss/IndexIVF.h +146 -113
data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
data/vendor/faiss/faiss/IndexLSH.h +21 -26
data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
data/vendor/faiss/faiss/IndexLattice.h +11 -16
data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
data/vendor/faiss/faiss/IndexNSG.h +85 -0
data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
data/vendor/faiss/faiss/IndexPQ.h +64 -67
data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
data/vendor/faiss/faiss/IndexRefine.h +22 -23
data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
data/vendor/faiss/faiss/IndexReplicas.h +62 -56
data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
data/vendor/faiss/faiss/IndexResidual.h +152 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
data/vendor/faiss/faiss/IndexShards.cpp +256 -240
data/vendor/faiss/faiss/IndexShards.h +85 -73
data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
data/vendor/faiss/faiss/MatrixStats.h +7 -10
data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
data/vendor/faiss/faiss/MetaIndexes.h +40 -34
data/vendor/faiss/faiss/MetricType.h +7 -7
data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
data/vendor/faiss/faiss/VectorTransform.h +61 -89
data/vendor/faiss/faiss/clone_index.cpp +77 -73
data/vendor/faiss/faiss/clone_index.h +4 -9
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
data/vendor/faiss/faiss/impl/FaissException.h +41 -29
data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
data/vendor/faiss/faiss/impl/HNSW.h +179 -200
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
data/vendor/faiss/faiss/impl/NSG.h +199 -0
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
data/vendor/faiss/faiss/impl/io.cpp +75 -94
data/vendor/faiss/faiss/impl/io.h +31 -41
data/vendor/faiss/faiss/impl/io_macros.h +40 -29
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
data/vendor/faiss/faiss/index_factory.cpp +269 -218
data/vendor/faiss/faiss/index_factory.h +6 -7
data/vendor/faiss/faiss/index_io.h +23 -26
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
data/vendor/faiss/faiss/utils/Heap.h +186 -209
data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
data/vendor/faiss/faiss/utils/distances.cpp +301 -310
data/vendor/faiss/faiss/utils/distances.h +133 -118
data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
data/vendor/faiss/faiss/utils/hamming.h +62 -85
data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
data/vendor/faiss/faiss/utils/partitioning.h +26 -21
data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
data/vendor/faiss/faiss/utils/random.cpp +39 -63
data/vendor/faiss/faiss/utils/random.h +13 -16
data/vendor/faiss/faiss/utils/simdlib.h +4 -2
data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
data/vendor/faiss/faiss/utils/utils.cpp +304 -287
data/vendor/faiss/faiss/utils/utils.h +53 -48
metadata +31 -10
data/lib/faiss/index.rb +0 -20
data/lib/faiss/index_binary.rb +0 -20
data/lib/faiss/kmeans.rb +0 -15
data/lib/faiss/pca_matrix.rb +0 -15
data/lib/faiss/product_quantizer.rb +0 -22

data/vendor/faiss/faiss/IndexIVFPQ.h CHANGED Viewed

@@ -10,7 +10,6 @@
 #ifndef FAISS_INDEX_IVFPQ_H
 #define FAISS_INDEX_IVFPQ_H
 #include <vector>
 #include <faiss/IndexIVF.h>
@@ -20,32 +19,29 @@
 namespace faiss {
-struct IVFPQSearchParameters: IVFSearchParameters {
-    size_t scan_table_threshold;   ///< use table computation or on-the-fly?
-    int polysemous_ht;             ///< Hamming thresh for polysemous filtering
-    IVFPQSearchParameters (): scan_table_threshold(0), polysemous_ht(0) {}
-    ~IVFPQSearchParameters () {}
+struct IVFPQSearchParameters : IVFSearchParameters {
+    size_t scan_table_threshold; ///< use table computation or on-the-fly?
+    int polysemous_ht;           ///< Hamming thresh for polysemous filtering
+    IVFPQSearchParameters() : scan_table_threshold(0), polysemous_ht(0) {}
+    ~IVFPQSearchParameters() {}
 };
 FAISS_API extern size_t precomputed_table_max_bytes;
 /** Inverted file with Product Quantizer encoding. Each residual
  * vector is encoded as a product quantizer code.
  */
-struct IndexIVFPQ: IndexIVF {
-    bool by_residual;              ///< Encode residual or plain vector?
+struct IndexIVFPQ : IndexIVF {
+    bool by_residual; ///< Encode residual or plain vector?
-    ProductQuantizer pq;           ///< produces the codes
+    ProductQuantizer pq; ///< produces the codes
-    bool do_polysemous_training;   ///< reorder PQ centroids after training?
-    PolysemousTraining *polysemous_training; ///< if NULL, use default
+    bool do_polysemous_training; ///< reorder PQ centroids after training?
+    PolysemousTraining* polysemous_training; ///< if NULL, use default
     // search-time parameters
-    size_t scan_table_threshold;   ///< use table computation or on-the-fly?
-    int polysemous_ht;             ///< Hamming thresh for polysemous filtering
+    size_t scan_table_threshold; ///< use table computation or on-the-fly?
+    int polysemous_ht;           ///< Hamming thresh for polysemous filtering
     /** Precompute table that speed up query preprocessing at some
      * memory cost (used only for by_residual with L2 metric)
@@ -56,37 +52,47 @@ struct IndexIVFPQ: IndexIVF {
     /// size nlist * pq.M * pq.ksub
     AlignedTable<float> precomputed_table;
-    IndexIVFPQ (
-            Index * quantizer, size_t d, size_t nlist,
-            size_t M, size_t nbits_per_idx, MetricType metric = METRIC_L2);
-    void add_with_ids(idx_t n, const float* x, const idx_t* xids = nullptr)
-        override;
-    void encode_vectors(idx_t n, const float* x,
-                        const idx_t *list_nos,
-                        uint8_t * codes,
-                        bool include_listnos = false) const override;
-    void sa_decode (idx_t n, const uint8_t *bytes,
-                    float *x) const override;
+    IndexIVFPQ(
+            Index* quantizer,
+            size_t d,
+            size_t nlist,
+            size_t M,
+            size_t nbits_per_idx,
+            MetricType metric = METRIC_L2);
+    void encode_vectors(
+            idx_t n,
+            const float* x,
+            const idx_t* list_nos,
+            uint8_t* codes,
+            bool include_listnos = false) const override;
+    void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
+    void add_core(
+            idx_t n,
+            const float* x,
+            const idx_t* xids,
+            const idx_t* precomputed_idx) override;
     /// same as add_core, also:
     /// - output 2nd level residuals if residuals_2 != NULL
-    /// - use precomputed list numbers if precomputed_idx != NULL
-    void add_core_o (idx_t n, const float *x,
-                     const idx_t *xids, float *residuals_2,
-                     const idx_t *precomputed_idx = nullptr);
+    /// - accepts precomputed_idx = nullptr
+    void add_core_o(
+            idx_t n,
+            const float* x,
+            const idx_t* xids,
+            float* residuals_2,
+            const idx_t* precomputed_idx = nullptr);
     /// trains the product quantizer
     void train_residual(idx_t n, const float* x) override;
     /// same as train_residual, also output 2nd level residuals
-    void train_residual_o (idx_t n, const float *x, float *residuals_2);
+    void train_residual_o(idx_t n, const float* x, float* residuals_2);
-    void reconstruct_from_offset (int64_t list_no, int64_t offset,
-                                  float* recons) const override;
+    void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
+            const override;
     /** Find exact duplicates in the dataset.
      *
@@ -99,10 +105,10 @@ struct IndexIVFPQ: IndexIVF {
      *                duplicates (max size ntotal)
      * @return n      number of groups found
      */
-    size_t find_duplicates (idx_t *ids, size_t *lims) const;
+    size_t find_duplicates(idx_t* ids, size_t* lims) const;
     // map a vector to a binary code knowning the index
-    void encode (idx_t key, const float * x, uint8_t * code) const;
+    void encode(idx_t key, const float* x, uint8_t* code) const;
     /** Encode multiple vectors
      *
@@ -113,22 +119,27 @@ struct IndexIVFPQ: IndexIVF {
      * @param compute_keys  if false, assume keys are precomputed,
      *                      otherwise compute them
      */
-    void encode_multiple (size_t n, idx_t *keys,
-                          const float * x, uint8_t * codes,
-                          bool compute_keys = false) const;
+    void encode_multiple(
+            size_t n,
+            idx_t* keys,
+            const float* x,
+            uint8_t* codes,
+            bool compute_keys = false) const;
     /// inverse of encode_multiple
-    void decode_multiple (size_t n, const idx_t *keys,
-                          const uint8_t * xcodes, float * x) const;
+    void decode_multiple(
+            size_t n,
+            const idx_t* keys,
+            const uint8_t* xcodes,
+            float* x) const;
-    InvertedListScanner *get_InvertedListScanner (bool store_pairs)
-        const override;
+    InvertedListScanner* get_InvertedListScanner(
+            bool store_pairs) const override;
     /// build precomputed table
-    void precompute_table ();
-    IndexIVFPQ ();
+    void precompute_table();
+    IndexIVFPQ();
 };
 /** Pre-compute distance tables for IVFPQ with by-residual and METRIC_L2
@@ -136,24 +147,23 @@ struct IndexIVFPQ: IndexIVF {
  * @param use_precomputed_table (I/O)
  *        =-1: force disable
  *        =0: decide heuristically (default: use tables only if they are
- *            < precomputed_tables_max_bytes), set use_precomputed_table on output
- *        =1: tables that work for all quantizers (size 256 * nlist * M)
- *        =2: specific version for MultiIndexQuantizer (much more compact)
+ *            < precomputed_tables_max_bytes), set use_precomputed_table on
+ * output =1: tables that work for all quantizers (size 256 * nlist * M) =2:
+ * specific version for MultiIndexQuantizer (much more compact)
  * @param precomputed_table precomputed table to intialize
  */
 void initialize_IVFPQ_precomputed_table(
-    int &use_precomputed_table,
-    const Index *quantizer,
-    const ProductQuantizer &pq,
-    AlignedTable<float> & precomputed_table,
-    bool verbose
-);
+        int& use_precomputed_table,
+        const Index* quantizer,
+        const ProductQuantizer& pq,
+        AlignedTable<float>& precomputed_table,
+        bool verbose);
 /// statistics are robust to internal threading, but not if
 /// IndexIVFPQ::search_preassigned is called by multiple threads
 struct IndexIVFPQStats {
-    size_t nrefine;  ///< nb of refines (IVFPQR)
+    size_t nrefine; ///< nb of refines (IVFPQR)
     size_t n_hamming_pass;
     ///< nb of passed Hamming distance tests (for polysemous)
@@ -162,17 +172,15 @@ struct IndexIVFPQStats {
     size_t search_cycles;
     size_t refine_cycles; ///< only for IVFPQR
-    IndexIVFPQStats () {reset (); }
-    void reset ();
+    IndexIVFPQStats() {
+        reset();
+    }
+    void reset();
 };
 // global var that collects them all
 FAISS_API extern IndexIVFPQStats indexIVFPQ_stats;
 } // namespace faiss
 #endif

data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp CHANGED Viewed

@@ -8,70 +8,68 @@
 #include <faiss/IndexIVFPQFastScan.h>
 #include <cassert>
+#include <cinttypes>
 #include <cstdio>
-#include <inttypes.h>
 #include <omp.h>
 #include <memory>
+#include <faiss/impl/AuxIndexStructures.h>
 #include <faiss/impl/FaissAssert.h>
-#include <faiss/utils/utils.h>
 #include <faiss/utils/distances.h>
 #include <faiss/utils/simdlib.h>
-#include <faiss/impl/AuxIndexStructures.h>
+#include <faiss/utils/utils.h>
 #include <faiss/invlists/BlockInvertedLists.h>
+#include <faiss/impl/pq4_fast_scan.h>
 #include <faiss/impl/simd_result_handlers.h>
 #include <faiss/utils/quantize_lut.h>
-#include <faiss/impl/pq4_fast_scan.h>
 namespace faiss {
 using namespace simd_result_handlers;
 inline size_t roundup(size_t a, size_t b) {
     return (a + b - 1) / b * b;
 }
-IndexIVFPQFastScan::IndexIVFPQFastScan (
-            Index * quantizer, size_t d, size_t nlist,
-            size_t M, size_t nbits_per_idx,
-            MetricType metric, int bbs):
-    IndexIVF (quantizer, d, nlist, 0, metric),
-    pq (d, M, nbits_per_idx),
-    bbs (bbs)
-{
+IndexIVFPQFastScan::IndexIVFPQFastScan(
+        Index* quantizer,
+        size_t d,
+        size_t nlist,
+        size_t M,
+        size_t nbits_per_idx,
+        MetricType metric,
+        int bbs)
+        : IndexIVF(quantizer, d, nlist, 0, metric),
+          pq(d, M, nbits_per_idx),
+          bbs(bbs) {
     FAISS_THROW_IF_NOT(nbits_per_idx == 4);
     M2 = roundup(pq.M, 2);
     by_residual = false; // set to false by default because it's much faster
     is_trained = false;
     code_size = pq.code_size;
-    replace_invlists(
-        new BlockInvertedLists(nlist, bbs, bbs * M2 / 2),
-        true
-    );
+    replace_invlists(new BlockInvertedLists(nlist, bbs, bbs * M2 / 2), true);
 }
-IndexIVFPQFastScan::IndexIVFPQFastScan ()
-{
+IndexIVFPQFastScan::IndexIVFPQFastScan() {
     by_residual = false;
     bbs = 0;
     M2 = 0;
 }
-IndexIVFPQFastScan::IndexIVFPQFastScan(const IndexIVFPQ & orig, int bbs):
-    IndexIVF(
-        orig.quantizer, orig.d, orig.nlist,
-        orig.pq.code_size, orig.metric_type),
-    pq(orig.pq),
-    bbs(bbs)
-{
+IndexIVFPQFastScan::IndexIVFPQFastScan(const IndexIVFPQ& orig, int bbs)
+        : IndexIVF(
+                  orig.quantizer,
+                  orig.d,
+                  orig.nlist,
+                  orig.pq.code_size,
+                  orig.metric_type),
+          pq(orig.pq),
+          bbs(bbs) {
     FAISS_THROW_IF_NOT(orig.pq.nbits == 4);
     by_residual = orig.by_residual;
@@ -83,69 +81,68 @@ IndexIVFPQFastScan::IndexIVFPQFastScan(const IndexIVFPQ & orig, int bbs):
     M2 = roundup(M, 2);
     replace_invlists(
-        new BlockInvertedLists(orig.nlist, bbs, bbs * M2 / 2),
-        true
-    );
+            new BlockInvertedLists(orig.nlist, bbs, bbs * M2 / 2), true);
     precomputed_table.resize(orig.precomputed_table.size());
     if (precomputed_table.nbytes() > 0) {
-        memcpy(precomputed_table.get(), orig.precomputed_table.data(),
-               precomputed_table.nbytes()
-        );
+        memcpy(precomputed_table.get(),
+               orig.precomputed_table.data(),
+               precomputed_table.nbytes());
     }
-    for(size_t i = 0; i < nlist; i++) {
+    for (size_t i = 0; i < nlist; i++) {
         size_t nb = orig.invlists->list_size(i);
         size_t nb2 = roundup(nb, bbs);
         AlignedTable<uint8_t> tmp(nb2 * M2 / 2);
         pq4_pack_codes(
-            InvertedLists::ScopedCodes(orig.invlists, i).get(),
-            nb, M, nb2, bbs, M2,
-            tmp.get()
-        );
+                InvertedLists::ScopedCodes(orig.invlists, i).get(),
+                nb,
+                M,
+                nb2,
+                bbs,
+                M2,
+                tmp.get());
         invlists->add_entries(
-            i, nb,
-            InvertedLists::ScopedIds(orig.invlists, i).get(),
-            tmp.get()
-        );
+                i,
+                nb,
+                InvertedLists::ScopedIds(orig.invlists, i).get(),
+                tmp.get());
     }
     orig_invlists = orig.invlists;
 }
 /*********************************************************
  * Training
  *********************************************************/
-void IndexIVFPQFastScan::train_residual (idx_t n, const float *x_in)
-{
+void IndexIVFPQFastScan::train_residual(idx_t n, const float* x_in) {
+    const float* x = fvecs_maybe_subsample(
+            d,
+            (size_t*)&n,
+            pq.cp.max_points_per_centroid * pq.ksub,
+            x_in,
+            verbose,
+            pq.cp.seed);
-    const float * x = fvecs_maybe_subsample (
-         d, (size_t*)&n, pq.cp.max_points_per_centroid * pq.ksub,
-         x_in, verbose, pq.cp.seed);
-    std::unique_ptr<float []> del_x;
+    std::unique_ptr<float[]> del_x;
     if (x != x_in) {
         del_x.reset((float*)x);
     }
-    const float *trainset;
+    const float* trainset;
     AlignedTable<float> residuals;
     if (by_residual) {
-        if(verbose) printf("computing residuals\n");
+        if (verbose)
+            printf("computing residuals\n");
         std::vector<idx_t> assign(n);
-        quantizer->assign (n, x, assign.data());
+        quantizer->assign(n, x, assign.data());
         residuals.resize(n * d);
         for (idx_t i = 0; i < n; i++) {
-           quantizer->compute_residual (
-                x + i * d,
-                residuals.data() + i * d,
-                assign[i]
-            );
+            quantizer->compute_residual(
+                    x + i * d, residuals.data() + i * d, assign[i]);
         }
         trainset = residuals.data();
     } else {
@@ -153,82 +150,78 @@ void IndexIVFPQFastScan::train_residual (idx_t n, const float *x_in)
     }
     if (verbose) {
-        printf ("training %zdx%zd product quantizer on %zd vectors in %dD\n",
-                pq.M, pq.ksub, long(n), d);
+        printf("training %zdx%zd product quantizer on "
+               "%" PRId64 " vectors in %dD\n",
+               pq.M,
+               pq.ksub,
+               n,
+               d);
     }
     pq.verbose = verbose;
-    pq.train (n, trainset);
+    pq.train(n, trainset);
     if (by_residual && metric_type == METRIC_L2) {
         precompute_table();
     }
 }
-void IndexIVFPQFastScan::precompute_table ()
-{
+void IndexIVFPQFastScan::precompute_table() {
     initialize_IVFPQ_precomputed_table(
-        use_precomputed_table,
-        quantizer, pq, precomputed_table, verbose
-    );
+            use_precomputed_table, quantizer, pq, precomputed_table, verbose);
 }
 /*********************************************************
  * Code management functions
  *********************************************************/
 void IndexIVFPQFastScan::encode_vectors(
-        idx_t n, const float* x, const idx_t *list_nos,
-        uint8_t * codes, bool include_listnos) const
-{
+        idx_t n,
+        const float* x,
+        const idx_t* list_nos,
+        uint8_t* codes,
+        bool include_listnos) const {
     if (by_residual) {
-        AlignedTable<float> residuals (n * d);
+        AlignedTable<float> residuals(n * d);
         for (size_t i = 0; i < n; i++) {
             if (list_nos[i] < 0) {
-                memset (residuals.data() + i * d, 0, sizeof(residuals[0]) * d);
+                memset(residuals.data() + i * d, 0, sizeof(residuals[0]) * d);
             } else {
-                quantizer->compute_residual (
-                    x + i * d, residuals.data() + i * d, list_nos[i]);
+                quantizer->compute_residual(
+                        x + i * d, residuals.data() + i * d, list_nos[i]);
             }
         }
-        pq.compute_codes (residuals.data(), codes, n);
+        pq.compute_codes(residuals.data(), codes, n);
     } else {
-        pq.compute_codes (x, codes, n);
+        pq.compute_codes(x, codes, n);
     }
     if (include_listnos) {
         size_t coarse_size = coarse_code_size();
         for (idx_t i = n - 1; i >= 0; i--) {
-            uint8_t * code = codes + i * (coarse_size + code_size);
-            memmove (code + coarse_size,
-                     codes + i * code_size, code_size);
-            encode_listno (list_nos[i], code);
+            uint8_t* code = codes + i * (coarse_size + code_size);
+            memmove(code + coarse_size, codes + i * code_size, code_size);
+            encode_listno(list_nos[i], code);
         }
     }
 }
-void IndexIVFPQFastScan::add_with_ids (
-        idx_t n, const float * x, const idx_t *xids) {
+void IndexIVFPQFastScan::add_with_ids(
+        idx_t n,
+        const float* x,
+        const idx_t* xids) {
     // copied from IndexIVF::add_with_ids --->
     // do some blocking to avoid excessive allocs
     idx_t bs = 65536;
     if (n > bs) {
         for (idx_t i0 = 0; i0 < n; i0 += bs) {
-            idx_t i1 = std::min (n, i0 + bs);
+            idx_t i1 = std::min(n, i0 + bs);
             if (verbose) {
                 printf("   IndexIVFPQFastScan::add_with_ids %zd: %zd",
-                       size_t(i0), size_t(i1));
+                       size_t(i0),
+                       size_t(i1));
             }
-            add_with_ids (i1 - i0, x + i0 * d,
-                          xids ? xids + i0 : nullptr);
+            add_with_ids(i1 - i0, x + i0 * d, xids ? xids + i0 : nullptr);
         }
         return;
     }
@@ -236,37 +229,38 @@ void IndexIVFPQFastScan::add_with_ids (
     AlignedTable<uint8_t> codes(n * code_size);
-    FAISS_THROW_IF_NOT (is_trained);
-    direct_map.check_can_add (xids);
+    FAISS_THROW_IF_NOT(is_trained);
+    direct_map.check_can_add(xids);
-    std::unique_ptr<idx_t []> idx(new idx_t[n]);
-    quantizer->assign (n, x, idx.get());
+    std::unique_ptr<idx_t[]> idx(new idx_t[n]);
+    quantizer->assign(n, x, idx.get());
     size_t nadd = 0, nminus1 = 0;
     for (size_t i = 0; i < n; i++) {
-        if (idx[i] < 0) nminus1++;
+        if (idx[i] < 0)
+            nminus1++;
     }
     AlignedTable<uint8_t> flat_codes(n * code_size);
-    encode_vectors (n, x, idx.get(), flat_codes.get());
+    encode_vectors(n, x, idx.get(), flat_codes.get());
     DirectMapAdd dm_adder(direct_map, n, xids);
     // <---
-    BlockInvertedLists *bil = dynamic_cast<BlockInvertedLists*>(invlists);
-    FAISS_THROW_IF_NOT_MSG (bil, "only block inverted lists supported");
+    BlockInvertedLists* bil = dynamic_cast<BlockInvertedLists*>(invlists);
+    FAISS_THROW_IF_NOT_MSG(bil, "only block inverted lists supported");
     // prepare batches
     std::vector<idx_t> order(n);
-    for(idx_t i = 0; i < n ; i++) { order[i] = i; }
+    for (idx_t i = 0; i < n; i++) {
+        order[i] = i;
+    }
     // TODO should not need stable
-    std::stable_sort(order.begin(), order.end(),
-        [&idx](idx_t a, idx_t b) {
-            return idx[a] < idx[b];
-        }
-    );
+    std::stable_sort(order.begin(), order.end(), [&idx](idx_t a, idx_t b) {
+        return idx[a] < idx[b];
+    });
     // TODO parallelize
     idx_t i0 = 0;
@@ -274,7 +268,7 @@ void IndexIVFPQFastScan::add_with_ids (
         idx_t list_no = idx[order[i0]];
         idx_t i1 = i0 + 1;
         while (i1 < n && idx[order[i1]] == list_no) {
-            i1 ++;
+            i1++;
         }
         if (list_no == -1) {
@@ -288,58 +282,57 @@ void IndexIVFPQFastScan::add_with_ids (
         bil->resize(list_no, list_size + i1 - i0);
-        for(idx_t i = i0; i < i1; i++) {
+        for (idx_t i = i0; i < i1; i++) {
             size_t ofs = list_size + i - i0;
             idx_t id = xids ? xids[order[i]] : ntotal + order[i];
-            dm_adder.add (order[i], list_no, ofs);
+            dm_adder.add(order[i], list_no, ofs);
             bil->ids[list_no][ofs] = id;
-            memcpy(
-                list_codes.data() + (i - i0) * code_size,
-                flat_codes.data() + order[i] * code_size,
-                code_size
-            );
+            memcpy(list_codes.data() + (i - i0) * code_size,
+                   flat_codes.data() + order[i] * code_size,
+                   code_size);
             nadd++;
         }
         pq4_pack_codes_range(
-            list_codes.data(), pq.M,
-            list_size, list_size + i1 - i0,
-            bbs, M2, bil->codes[list_no].data()
-        );
+                list_codes.data(),
+                pq.M,
+                list_size,
+                list_size + i1 - i0,
+                bbs,
+                M2,
+                bil->codes[list_no].data());
         i0 = i1;
     }
     ntotal += n;
 }
 /*********************************************************
  * search
  *********************************************************/
 namespace {
 // from impl/ProductQuantizer.cpp
 template <class C, typename dis_t>
 void pq_estimators_from_tables_generic(
-        const ProductQuantizer& pq, size_t nbits,
-        const uint8_t *codes, size_t ncodes,
-        const dis_t *dis_table, const int64_t * ids,
+        const ProductQuantizer& pq,
+        size_t nbits,
+        const uint8_t* codes,
+        size_t ncodes,
+        const dis_t* dis_table,
+        const int64_t* ids,
         float dis0,
-        size_t k, typename C::T *heap_dis, int64_t *heap_ids)
-{
+        size_t k,
+        typename C::T* heap_dis,
+        int64_t* heap_ids) {
     using accu_t = typename C::T;
     const size_t M = pq.M;
     const size_t ksub = pq.ksub;
     for (size_t j = 0; j < ncodes; ++j) {
-        PQDecoderGeneric decoder(
-                codes + j * pq.code_size, nbits
-        );
+        PQDecoderGeneric decoder(codes + j * pq.code_size, nbits);
         accu_t dis = dis0;
-        const dis_t * dt = dis_table;
+        const dis_t* dt = dis_table;
         for (size_t m = 0; m < M; m++) {
             uint64_t c = decoder.decode();
             dis += dt[c];
@@ -356,17 +349,19 @@ void pq_estimators_from_tables_generic(
 using idx_t = Index::idx_t;
 using namespace quantize_lut;
-void fvec_madd_avx (
-        size_t n, const float *a,
-        float bf, const float *b, float *c)
-{
+void fvec_madd_avx(
+        size_t n,
+        const float* a,
+        float bf,
+        const float* b,
+        float* c) {
     assert(is_aligned_pointer(a));
     assert(is_aligned_pointer(b));
     assert(is_aligned_pointer(c));
     assert(n % 8 == 0);
     simd8float32 bf8(bf);
     n /= 8;
-    for(size_t i = 0; i < n; i++) {
+    for (size_t i = 0; i < n; i++) {
         simd8float32 ai(a);
         simd8float32 bi(b);
@@ -376,7 +371,6 @@ void fvec_madd_avx (
         a += 8;
         b += 8;
     }
 }
 } // anonymous namespace
@@ -385,23 +379,20 @@ void fvec_madd_avx (
  * Look-Up Table functions
  *********************************************************/
 void IndexIVFPQFastScan::compute_LUT(
-    size_t n, const float *x,
-    const idx_t *coarse_ids, const float *coarse_dis,
-    AlignedTable<float> & dis_tables,
-    AlignedTable<float> & biases
-) const
-{
-    const IndexIVFPQFastScan & ivfpq = *this;
+        size_t n,
+        const float* x,
+        const idx_t* coarse_ids,
+        const float* coarse_dis,
+        AlignedTable<float>& dis_tables,
+        AlignedTable<float>& biases) const {
+    const IndexIVFPQFastScan& ivfpq = *this;
     size_t dim12 = pq.ksub * pq.M;
     size_t d = pq.d;
     size_t nprobe = ivfpq.nprobe;
     if (ivfpq.by_residual) {
         if (ivfpq.metric_type == METRIC_L2) {
             dis_tables.resize(n * nprobe * dim12);
             if (ivfpq.use_precomputed_table == 1) {
@@ -409,57 +400,54 @@ void IndexIVFPQFastScan::compute_LUT(
                 memcpy(biases.get(), coarse_dis, sizeof(float) * n * nprobe);
                 AlignedTable<float> ip_table(n * dim12);
-                pq.compute_inner_prod_tables (n, x, ip_table.get());
+                pq.compute_inner_prod_tables(n, x, ip_table.get());
 #pragma omp parallel for if (n * nprobe > 8000)
-                for(idx_t ij = 0; ij < n * nprobe; ij++) {
+                for (idx_t ij = 0; ij < n * nprobe; ij++) {
                     idx_t i = ij / nprobe;
-                    float *tab = dis_tables.get() + ij * dim12;
+                    float* tab = dis_tables.get() + ij * dim12;
                     idx_t cij = coarse_ids[ij];
                     if (cij >= 0) {
-                        fvec_madd_avx (
-                            dim12,
-                            precomputed_table.get() + cij * dim12,
-                            -2, ip_table.get() + i * dim12,
-                            tab
-                        );
+                        fvec_madd_avx(
+                                dim12,
+                                precomputed_table.get() + cij * dim12,
+                                -2,
+                                ip_table.get() + i * dim12,
+                                tab);
                     } else {
                         // fill with NaNs so that they are ignored during
                         // LUT quantization
-                        memset (tab, -1, sizeof(float) * dim12);
+                        memset(tab, -1, sizeof(float) * dim12);
                     }
                 }
             } else {
                 std::unique_ptr<float[]> xrel(new float[n * nprobe * d]);
                 biases.resize(n * nprobe);
                 memset(biases.get(), 0, sizeof(float) * n * nprobe);
 #pragma omp parallel for if (n * nprobe > 8000)
-                for(idx_t ij = 0; ij < n * nprobe; ij++) {
+                for (idx_t ij = 0; ij < n * nprobe; ij++) {
                     idx_t i = ij / nprobe;
-                    float *xij = &xrel[ij * d];
+                    float* xij = &xrel[ij * d];
                     idx_t cij = coarse_ids[ij];
                     if (cij >= 0) {
-                        ivfpq.quantizer->compute_residual(
-                            x + i * d, xij, cij);
+                        ivfpq.quantizer->compute_residual(x + i * d, xij, cij);
                     } else {
                         // will fill with NaNs
                         memset(xij, -1, sizeof(float) * d);
                     }
                 }
-                pq.compute_distance_tables (
+                pq.compute_distance_tables(
                         n * nprobe, xrel.get(), dis_tables.get());
             }
         } else if (ivfpq.metric_type == METRIC_INNER_PRODUCT) {
             dis_tables.resize(n * dim12);
-            pq.compute_inner_prod_tables (n, x, dis_tables.get());
+            pq.compute_inner_prod_tables(n, x, dis_tables.get());
             // compute_inner_prod_tables(pq, n, x, dis_tables.get());
             biases.resize(n * nprobe);
@@ -471,33 +459,29 @@ void IndexIVFPQFastScan::compute_LUT(
     } else {
         dis_tables.resize(n * dim12);
         if (ivfpq.metric_type == METRIC_L2) {
-            pq.compute_distance_tables (n, x, dis_tables.get());
+            pq.compute_distance_tables(n, x, dis_tables.get());
         } else if (ivfpq.metric_type == METRIC_INNER_PRODUCT) {
-            pq.compute_inner_prod_tables (n, x, dis_tables.get());
+            pq.compute_inner_prod_tables(n, x, dis_tables.get());
         } else {
             FAISS_THROW_FMT("metric %d not supported", ivfpq.metric_type);
         }
     }
 }
 void IndexIVFPQFastScan::compute_LUT_uint8(
-    size_t n, const float *x,
-    const idx_t *coarse_ids, const float *coarse_dis,
-    AlignedTable<uint8_t> & dis_tables,
-    AlignedTable<uint16_t> & biases,
-    float * normalizers
-) const {
-    const IndexIVFPQFastScan & ivfpq = *this;
+        size_t n,
+        const float* x,
+        const idx_t* coarse_ids,
+        const float* coarse_dis,
+        AlignedTable<uint8_t>& dis_tables,
+        AlignedTable<uint16_t>& biases,
+        float* normalizers) const {
+    const IndexIVFPQFastScan& ivfpq = *this;
     AlignedTable<float> dis_tables_float;
     AlignedTable<float> biases_float;
     uint64_t t0 = get_cy();
-    compute_LUT(
-            n, x,
-            coarse_ids, coarse_dis,
-            dis_tables_float, biases_float
-    );
+    compute_LUT(n, x, coarse_ids, coarse_dis, dis_tables_float, biases_float);
     IVFFastScan_stats.t_compute_distance_tables += get_cy() - t0;
     bool lut_is_3d = ivfpq.by_residual && ivfpq.metric_type == METRIC_L2;
@@ -514,45 +498,52 @@ void IndexIVFPQFastScan::compute_LUT_uint8(
     uint64_t t1 = get_cy();
 #pragma omp parallel for if (n > 100)
-    for(int64_t i = 0; i < n; i++) {
-        const float *t_in = dis_tables_float.get() + i * dim123;
-        const float *b_in = nullptr;
-        uint8_t *t_out = dis_tables.get() + i * dim123_2;
-        uint16_t *b_out = nullptr;
+    for (int64_t i = 0; i < n; i++) {
+        const float* t_in = dis_tables_float.get() + i * dim123;
+        const float* b_in = nullptr;
+        uint8_t* t_out = dis_tables.get() + i * dim123_2;
+        uint16_t* b_out = nullptr;
         if (biases_float.get()) {
             b_in = biases_float.get() + i * nprobe;
             b_out = biases.get() + i * nprobe;
         }
         quantize_LUT_and_bias(
-            nprobe, pq.M, pq.ksub, lut_is_3d,
-            t_in, b_in,
-            t_out, M2, b_out,
-            normalizers + 2 * i, normalizers + 2 * i + 1
-        );
+                nprobe,
+                pq.M,
+                pq.ksub,
+                lut_is_3d,
+                t_in,
+                b_in,
+                t_out,
+                M2,
+                b_out,
+                normalizers + 2 * i,
+                normalizers + 2 * i + 1);
     }
     IVFFastScan_stats.t_round += get_cy() - t1;
 }
 /*********************************************************
  * Search functions
  *********************************************************/
-template<bool is_max>
+template <bool is_max>
 void IndexIVFPQFastScan::search_dispatch_implem(
-                idx_t n,
-                const float* x,
-                idx_t k,
-                float* distances,
-                idx_t* labels) const
-{
-    using Cfloat = typename std::conditional<is_max,
-        CMax<float, int64_t>, CMin<float, int64_t> >::type;
-    using C = typename std::conditional<is_max,
-        CMax<uint16_t, int64_t>, CMin<uint16_t, int64_t> >::type;
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels) const {
+    using Cfloat = typename std::conditional<
+            is_max,
+            CMax<float, int64_t>,
+            CMin<float, int64_t>>::type;
+    using C = typename std::conditional<
+            is_max,
+            CMax<uint16_t, int64_t>,
+            CMin<uint16_t, int64_t>>::type;
     if (n == 0) {
         return;
@@ -568,7 +559,7 @@ void IndexIVFPQFastScan::search_dispatch_implem(
             impl = 10;
         }
         if (k > 20) {
-            impl ++;
+            impl++;
         }
     }
@@ -582,11 +573,25 @@ void IndexIVFPQFastScan::search_dispatch_implem(
         if (n < 2) {
             if (impl == 12 || impl == 13) {
-                search_implem_12<C>
-                    (n, x, k, distances, labels, impl, &ndis, &nlist_visited);
+                search_implem_12<C>(
+                        n,
+                        x,
+                        k,
+                        distances,
+                        labels,
+                        impl,
+                        &ndis,
+                        &nlist_visited);
             } else {
-                search_implem_10<C>
-                    (n, x, k, distances, labels, impl, &ndis, &nlist_visited);
+                search_implem_10<C>(
+                        n,
+                        x,
+                        k,
+                        distances,
+                        labels,
+                        impl,
+                        &ndis,
+                        &nlist_visited);
             }
         } else {
             // explicitly slice over threads
@@ -595,34 +600,47 @@ void IndexIVFPQFastScan::search_dispatch_implem(
                 nslice = n;
             } else if (by_residual && metric_type == METRIC_L2) {
                 // make sure we don't make too big LUT tables
-                size_t lut_size_per_query =
-                    pq.M * pq.ksub * nprobe * (sizeof(float) + sizeof(uint8_t));
+                size_t lut_size_per_query = pq.M * pq.ksub * nprobe *
+                        (sizeof(float) + sizeof(uint8_t));
                 size_t max_lut_size = precomputed_table_max_bytes;
                 // how many queries we can handle within mem budget
-                size_t nq_ok = std::max(max_lut_size / lut_size_per_query, size_t(1));
-                nslice = roundup(std::max(size_t(n / nq_ok), size_t(1)), omp_get_max_threads());
+                size_t nq_ok =
+                        std::max(max_lut_size / lut_size_per_query, size_t(1));
+                nslice =
+                        roundup(std::max(size_t(n / nq_ok), size_t(1)),
+                                omp_get_max_threads());
             } else {
                 // LUTs unlikely to be a limiting factor
                 nslice = omp_get_max_threads();
             }
-#pragma omp parallel for reduction(+: ndis, nlist_visited)
+#pragma omp parallel for reduction(+ : ndis, nlist_visited)
             for (int slice = 0; slice < nslice; slice++) {
                 idx_t i0 = n * slice / nslice;
                 idx_t i1 = n * (slice + 1) / nslice;
-                float *dis_i = distances + i0 * k;
-                idx_t *lab_i = labels + i0 * k;
+                float* dis_i = distances + i0 * k;
+                idx_t* lab_i = labels + i0 * k;
                 if (impl == 12 || impl == 13) {
                     search_implem_12<C>(
-                        i1 - i0, x + i0 * d, k, dis_i, lab_i,
-                        impl, &ndis, &nlist_visited
-                    );
+                            i1 - i0,
+                            x + i0 * d,
+                            k,
+                            dis_i,
+                            lab_i,
+                            impl,
+                            &ndis,
+                            &nlist_visited);
                 } else {
                     search_implem_10<C>(
-                        i1 - i0, x + i0 * d, k, dis_i, lab_i,
-                        impl, &ndis, &nlist_visited
-                    );
+                            i1 - i0,
+                            x + i0 * d,
+                            k,
+                            dis_i,
+                            lab_i,
+                            impl,
+                            &ndis,
+                            &nlist_visited);
                 }
             }
         }
@@ -632,14 +650,16 @@ void IndexIVFPQFastScan::search_dispatch_implem(
     } else {
         FAISS_THROW_FMT("implem %d does not exist", implem);
     }
 }
 void IndexIVFPQFastScan::search(
-                idx_t n, const float* x, idx_t k,
-                float* distances, idx_t* labels) const
-{
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels) const {
+    FAISS_THROW_IF_NOT(k > 0);
     if (metric_type == METRIC_L2) {
         search_dispatch_implem<true>(n, x, k, distances, labels);
     } else {
@@ -647,133 +667,150 @@ void IndexIVFPQFastScan::search(
     }
 }
-template<class C>
+template <class C>
 void IndexIVFPQFastScan::search_implem_1(
-                idx_t n, const float* x, idx_t k,
-                float* distances, idx_t* labels) const
-{
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels) const {
     FAISS_THROW_IF_NOT(orig_invlists);
     std::unique_ptr<idx_t[]> coarse_ids(new idx_t[n * nprobe]);
     std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
-    quantizer->search (n, x, nprobe, coarse_dis.get(), coarse_ids.get());
+    quantizer->search(n, x, nprobe, coarse_dis.get(), coarse_ids.get());
     size_t dim12 = pq.ksub * pq.M;
     AlignedTable<float> dis_tables;
     AlignedTable<float> biases;
-    compute_LUT (
-            n, x,
-            coarse_ids.get(), coarse_dis.get(),
-            dis_tables, biases
-    );
+    compute_LUT(n, x, coarse_ids.get(), coarse_dis.get(), dis_tables, biases);
     bool single_LUT = !(by_residual && metric_type == METRIC_L2);
     size_t ndis = 0, nlist_visited = 0;
-#pragma omp parallel for reduction(+: ndis, nlist_visited)
-    for(idx_t i = 0; i < n; i++) {
-        int64_t *heap_ids = labels + i * k;
-        float *heap_dis = distances + i * k;
-        heap_heapify<C> (k, heap_dis, heap_ids);
-        float *LUT = nullptr;
+#pragma omp parallel for reduction(+ : ndis, nlist_visited)
+    for (idx_t i = 0; i < n; i++) {
+        int64_t* heap_ids = labels + i * k;
+        float* heap_dis = distances + i * k;
+        heap_heapify<C>(k, heap_dis, heap_ids);
+        float* LUT = nullptr;
         if (single_LUT) {
             LUT = dis_tables.get() + i * dim12;
         }
-        for(idx_t j = 0; j < nprobe; j++) {
+        for (idx_t j = 0; j < nprobe; j++) {
             if (!single_LUT) {
                 LUT = dis_tables.get() + (i * nprobe + j) * dim12;
             }
             idx_t list_no = coarse_ids[i * nprobe + j];
-            if (list_no < 0) continue;
+            if (list_no < 0)
+                continue;
             size_t ls = orig_invlists->list_size(list_no);
-            if (ls == 0) continue;
+            if (ls == 0)
+                continue;
             InvertedLists::ScopedCodes codes(orig_invlists, list_no);
             InvertedLists::ScopedIds ids(orig_invlists, list_no);
             float bias = biases.get() ? biases[i * nprobe + j] : 0;
             pq_estimators_from_tables_generic<C>(
-                pq, pq.nbits, codes.get(), ls,
-                LUT, ids.get(), bias,
-                k, heap_dis, heap_ids
-            );
-            nlist_visited ++;
-            ndis ++;
+                    pq,
+                    pq.nbits,
+                    codes.get(),
+                    ls,
+                    LUT,
+                    ids.get(),
+                    bias,
+                    k,
+                    heap_dis,
+                    heap_ids);
+            nlist_visited++;
+            ndis++;
         }
-        heap_reorder<C> (k, heap_dis, heap_ids);
+        heap_reorder<C>(k, heap_dis, heap_ids);
     }
     indexIVF_stats.nq += n;
     indexIVF_stats.ndis += ndis;
     indexIVF_stats.nlist += nlist_visited;
 }
-template<class C>
+template <class C>
 void IndexIVFPQFastScan::search_implem_2(
-                idx_t n, const float* x, idx_t k,
-                float* distances, idx_t* labels) const
-{
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels) const {
     FAISS_THROW_IF_NOT(orig_invlists);
     std::unique_ptr<idx_t[]> coarse_ids(new idx_t[n * nprobe]);
     std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
-    quantizer->search (n, x, nprobe, coarse_dis.get(), coarse_ids.get());
+    quantizer->search(n, x, nprobe, coarse_dis.get(), coarse_ids.get());
     size_t dim12 = pq.ksub * M2;
     AlignedTable<uint8_t> dis_tables;
     AlignedTable<uint16_t> biases;
     std::unique_ptr<float[]> normalizers(new float[2 * n]);
-    compute_LUT_uint8 (
-        n, x,
-        coarse_ids.get(), coarse_dis.get(),
-        dis_tables, biases,
-        normalizers.get()
-    );
+    compute_LUT_uint8(
+            n,
+            x,
+            coarse_ids.get(),
+            coarse_dis.get(),
+            dis_tables,
+            biases,
+            normalizers.get());
     bool single_LUT = !(by_residual && metric_type == METRIC_L2);
     size_t ndis = 0, nlist_visited = 0;
-#pragma omp parallel for reduction(+: ndis, nlist_visited)
-    for(idx_t i = 0; i < n; i++) {
+#pragma omp parallel for reduction(+ : ndis, nlist_visited)
+    for (idx_t i = 0; i < n; i++) {
         std::vector<uint16_t> tmp_dis(k);
-        int64_t *heap_ids = labels + i * k;
-        uint16_t *heap_dis = tmp_dis.data();
-        heap_heapify<C> (k, heap_dis, heap_ids);
-        const uint8_t *LUT = nullptr;
+        int64_t* heap_ids = labels + i * k;
+        uint16_t* heap_dis = tmp_dis.data();
+        heap_heapify<C>(k, heap_dis, heap_ids);
+        const uint8_t* LUT = nullptr;
         if (single_LUT) {
             LUT = dis_tables.get() + i * dim12;
         }
-        for(idx_t j = 0; j < nprobe; j++) {
+        for (idx_t j = 0; j < nprobe; j++) {
             if (!single_LUT) {
                 LUT = dis_tables.get() + (i * nprobe + j) * dim12;
             }
             idx_t list_no = coarse_ids[i * nprobe + j];
-            if (list_no < 0) continue;
+            if (list_no < 0)
+                continue;
             size_t ls = orig_invlists->list_size(list_no);
-            if (ls == 0) continue;
+            if (ls == 0)
+                continue;
             InvertedLists::ScopedCodes codes(orig_invlists, list_no);
             InvertedLists::ScopedIds ids(orig_invlists, list_no);
             uint16_t bias = biases.get() ? biases[i * nprobe + j] : 0;
             pq_estimators_from_tables_generic<C>(
-                pq, pq.nbits, codes.get(), ls,
-                LUT, ids.get(), bias,
-                k, heap_dis, heap_ids
-            );
+                    pq,
+                    pq.nbits,
+                    codes.get(),
+                    ls,
+                    LUT,
+                    ids.get(),
+                    bias,
+                    k,
+                    heap_dis,
+                    heap_ids);
             nlist_visited++;
             ndis += ls;
         }
-        heap_reorder<C> (k, heap_dis, heap_ids);
+        heap_reorder<C>(k, heap_dis, heap_ids);
         // convert distances to float
         {
             float one_a = 1 / normalizers[2 * i], b = normalizers[2 * i + 1];
@@ -781,7 +818,7 @@ void IndexIVFPQFastScan::search_implem_2(
                 one_a = 1;
                 b = 0;
             }
-            float *heap_dis_float = distances + i * k;
+            float* heap_dis_float = distances + i * k;
             for (int j = 0; j < k; j++) {
                 heap_dis_float[j] = b + heap_dis[j] * one_a;
             }
@@ -792,14 +829,16 @@ void IndexIVFPQFastScan::search_implem_2(
     indexIVF_stats.nlist += nlist_visited;
 }
-template<class C>
+template <class C>
 void IndexIVFPQFastScan::search_implem_10(
-                idx_t n, const float* x, idx_t k,
-                float* distances, idx_t* labels,
-                int impl, size_t *ndis_out, size_t *nlist_out) const
-{
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels,
+        int impl,
+        size_t* ndis_out,
+        size_t* nlist_out) const {
     memset(distances, -1, sizeof(float) * k * n);
     memset(labels, -1, sizeof(idx_t) * k * n);
@@ -807,7 +846,6 @@ void IndexIVFPQFastScan::search_implem_10(
     using ReservoirHC = ReservoirHandler<C, true>;
     using SingleResultHC = SingleResultHandler<C, true>;
     std::unique_ptr<idx_t[]> coarse_ids(new idx_t[n * nprobe]);
     std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
@@ -817,20 +855,23 @@ void IndexIVFPQFastScan::search_implem_10(
 #define TIC times[ti++] = get_cy()
     TIC;
-    quantizer->search (n, x, nprobe, coarse_dis.get(), coarse_ids.get());
+    quantizer->search(n, x, nprobe, coarse_dis.get(), coarse_ids.get());
     TIC;
     size_t dim12 = pq.ksub * M2;
     AlignedTable<uint8_t> dis_tables;
     AlignedTable<uint16_t> biases;
-    std::unique_ptr<float[]> normalizers (new float[2 * n]);
+    std::unique_ptr<float[]> normalizers(new float[2 * n]);
-    compute_LUT_uint8 (
-            n, x,
-            coarse_ids.get(), coarse_dis.get(),
-            dis_tables, biases, normalizers.get()
-    );
+    compute_LUT_uint8(
+            n,
+            x,
+            coarse_ids.get(),
+            coarse_dis.get(),
+            dis_tables,
+            biases,
+            normalizers.get());
     TIC;
@@ -841,15 +882,16 @@ void IndexIVFPQFastScan::search_implem_10(
     {
         AlignedTable<uint16_t> tmp_distances(k);
-        for(idx_t i = 0; i < n; i++) {
-            const uint8_t *LUT = nullptr;
+        for (idx_t i = 0; i < n; i++) {
+            const uint8_t* LUT = nullptr;
             int qmap1[1] = {0};
-            std::unique_ptr<SIMDResultHandler<C, true> > handler;
+            std::unique_ptr<SIMDResultHandler<C, true>> handler;
             if (k == 1) {
                 handler.reset(new SingleResultHC(1, 0));
             } else if (impl == 10) {
-                handler.reset(new HeapHC(1, tmp_distances.get(), labels + i * k, k, 0));
+                handler.reset(new HeapHC(
+                        1, tmp_distances.get(), labels + i * k, k, 0));
             } else if (impl == 11) {
                 handler.reset(new ReservoirHC(1, 0, k, 2 * k));
             } else {
@@ -861,7 +903,7 @@ void IndexIVFPQFastScan::search_implem_10(
             if (single_LUT) {
                 LUT = dis_tables.get() + i * dim12;
             }
-            for(idx_t j = 0; j < nprobe; j++) {
+            for (idx_t j = 0; j < nprobe; j++) {
                 size_t ij = i * nprobe + j;
                 if (!single_LUT) {
                     LUT = dis_tables.get() + ij * dim12;
@@ -871,9 +913,11 @@ void IndexIVFPQFastScan::search_implem_10(
                 }
                 idx_t list_no = coarse_ids[ij];
-                if (list_no < 0) continue;
+                if (list_no < 0)
+                    continue;
                 size_t ls = invlists->list_size(list_no);
-                if (ls == 0) continue;
+                if (ls == 0)
+                    continue;
                 InvertedLists::ScopedCodes codes(invlists, list_no);
                 InvertedLists::ScopedIds ids(invlists, list_no);
@@ -881,41 +925,40 @@ void IndexIVFPQFastScan::search_implem_10(
                 handler->ntotal = ls;
                 handler->id_map = ids.get();
-#define DISPATCH(classHC) \
-                if(auto *res = dynamic_cast<classHC* > (handler.get())) { \
-                    pq4_accumulate_loop( \
-                            1, roundup(ls, bbs), bbs, M2, \
-                            codes.get(), LUT, \
-                            *res \
-                        ); \
-                }
+#define DISPATCH(classHC)                                              \
+    if (dynamic_cast<classHC*>(handler.get())) {                       \
+        auto* res = static_cast<classHC*>(handler.get());              \
+        pq4_accumulate_loop(                                           \
+                1, roundup(ls, bbs), bbs, M2, codes.get(), LUT, *res); \
+    }
                 DISPATCH(HeapHC)
-                else DISPATCH(ReservoirHC)
-                else DISPATCH(SingleResultHC)
+                else DISPATCH(ReservoirHC) else DISPATCH(SingleResultHC)
 #undef DISPATCH
-                nlist_visited ++;
-                ndis ++;
+                        nlist_visited++;
+                ndis++;
             }
             handler->to_flat_arrays(
-                distances + i * k, labels + i * k,
-                skip & 16 ? nullptr : normalizers.get() + i * 2
-            );
+                    distances + i * k,
+                    labels + i * k,
+                    skip & 16 ? nullptr : normalizers.get() + i * 2);
         }
     }
     *ndis_out = ndis;
     *nlist_out = nlist;
 }
-template<class C>
+template <class C>
 void IndexIVFPQFastScan::search_implem_12(
-                idx_t n, const float* x, idx_t k,
-                float* distances, idx_t* labels,
-                int impl, size_t *ndis_out, size_t *nlist_out) const
-{
+        idx_t n,
+        const float* x,
+        idx_t k,
+        float* distances,
+        idx_t* labels,
+        int impl,
+        size_t* ndis_out,
+        size_t* nlist_out) const {
     if (n == 0) { // does not work well with reservoir
         return;
     }
@@ -930,53 +973,53 @@ void IndexIVFPQFastScan::search_implem_12(
 #define TIC times[ti++] = get_cy()
     TIC;
-    quantizer->search (n, x, nprobe, coarse_dis.get(), coarse_ids.get());
+    quantizer->search(n, x, nprobe, coarse_dis.get(), coarse_ids.get());
     TIC;
     size_t dim12 = pq.ksub * M2;
     AlignedTable<uint8_t> dis_tables;
     AlignedTable<uint16_t> biases;
-    std::unique_ptr<float[]> normalizers (new float[2 * n]);
+    std::unique_ptr<float[]> normalizers(new float[2 * n]);
-    compute_LUT_uint8 (
-            n, x,
-            coarse_ids.get(), coarse_dis.get(),
-            dis_tables, biases, normalizers.get()
-    );
+    compute_LUT_uint8(
+            n,
+            x,
+            coarse_ids.get(),
+            coarse_dis.get(),
+            dis_tables,
+            biases,
+            normalizers.get());
     TIC;
     struct QC {
-        int qno;      // sequence number of the query
-        int list_no;  // list to visit
-        int rank;     // this is the rank'th result of the coarse quantizer
+        int qno;     // sequence number of the query
+        int list_no; // list to visit
+        int rank;    // this is the rank'th result of the coarse quantizer
     };
     bool single_LUT = !(by_residual && metric_type == METRIC_L2);
     std::vector<QC> qcs;
     {
         int ij = 0;
-        for(int i = 0; i < n; i++) {
-            for(int j = 0; j < nprobe; j++) {
+        for (int i = 0; i < n; i++) {
+            for (int j = 0; j < nprobe; j++) {
                 if (coarse_ids[ij] >= 0) {
                     qcs.push_back(QC{i, int(coarse_ids[ij]), int(j)});
                 }
                 ij++;
             }
         }
-        std::sort(
-            qcs.begin(), qcs.end(),
-            [](const QC &a, const QC & b) {
-                return a.list_no < b.list_no;
-            }
-        );
+        std::sort(qcs.begin(), qcs.end(), [](const QC& a, const QC& b) {
+            return a.list_no < b.list_no;
+        });
     }
     TIC;
     // prepare the result handlers
-    std::unique_ptr<SIMDResultHandler<C, true> > handler;
+    std::unique_ptr<SIMDResultHandler<C, true>> handler;
     AlignedTable<uint16_t> tmp_distances;
     using HeapHC = HeapHandler<C, true>;
@@ -1012,7 +1055,7 @@ void IndexIVFPQFastScan::search_implem_12(
         int list_no = qcs[i0].list_no;
         size_t i1 = i0 + 1;
-        while(i1 < qcs.size() && i1 < i0 + qbs2) {
+        while (i1 < qcs.size() && i1 < i0 + qbs2) {
             if (qcs[i1].list_no != list_no) {
                 break;
             }
@@ -1034,8 +1077,8 @@ void IndexIVFPQFastScan::search_implem_12(
         memset(LUT.get(), -1, nc * dim12);
         int qbs = pq4_preferred_qbs(nc);
-        for(size_t i = i0; i < i1; i++) {
-            const QC & qc = qcs[i];
+        for (size_t i = i0; i < i1; i++) {
+            const QC& qc = qcs[i];
             q_map[i - i0] = qc.qno;
             int ij = qc.qno * nprobe + qc.rank;
             lut_entries[i - i0] = single_LUT ? qc.qno : ij;
@@ -1044,9 +1087,7 @@ void IndexIVFPQFastScan::search_implem_12(
             }
         }
         pq4_pack_LUT_qbs_q_map(
-            qbs, M2, dis_tables.get(), lut_entries.data(),
-            LUT.get()
-        );
+                qbs, M2, dis_tables.get(), lut_entries.data(), LUT.get());
         // access the inverted list
@@ -1062,20 +1103,17 @@ void IndexIVFPQFastScan::search_implem_12(
         handler->id_map = ids.get();
         uint64_t tt1 = get_cy();
-#define DISPATCH(classHC) \
-        if(auto *res = dynamic_cast<classHC* > (handler.get())) { \
-            pq4_accumulate_loop_qbs( \
-                    qbs, list_size, M2, \
-                    codes.get(), LUT.get(), \
-                    *res \
-                ); \
-        }
+#define DISPATCH(classHC)                                          \
+    if (dynamic_cast<classHC*>(handler.get())) {                   \
+        auto* res = static_cast<classHC*>(handler.get());          \
+        pq4_accumulate_loop_qbs(                                   \
+                qbs, list_size, M2, codes.get(), LUT.get(), *res); \
+    }
         DISPATCH(HeapHC)
-        else DISPATCH(ReservoirHC)
-        else DISPATCH(SingleResultHC)
+        else DISPATCH(ReservoirHC) else DISPATCH(SingleResultHC)
-        // prepare for next loop
-        i0 = i1;
+                // prepare for next loop
+                i0 = i1;
         uint64_t tt2 = get_cy();
         t_copy_pack += tt1 - tt0;
@@ -1085,21 +1123,19 @@ void IndexIVFPQFastScan::search_implem_12(
     // labels is in-place for HeapHC
     handler->to_flat_arrays(
-            distances, labels,
-            skip & 16 ? nullptr : normalizers.get()
-    );
+            distances, labels, skip & 16 ? nullptr : normalizers.get());
     TIC;
     // these stats are not thread-safe
-    for(int i = 1; i < ti; i++) {
-        IVFFastScan_stats.times[i] += times[i] - times[i-1];
+    for (int i = 1; i < ti; i++) {
+        IVFFastScan_stats.times[i] += times[i] - times[i - 1];
     }
     IVFFastScan_stats.t_copy_pack += t_copy_pack;
     IVFFastScan_stats.t_scan += t_scan;
-    if (auto *rh = dynamic_cast<ReservoirHC*> (handler.get())) {
+    if (auto* rh = dynamic_cast<ReservoirHC*>(handler.get())) {
         for (int i = 0; i < 4; i++) {
             IVFFastScan_stats.reservoir_times[i] += rh->times[i];
         }
@@ -1107,10 +1143,8 @@ void IndexIVFPQFastScan::search_implem_12(
     *ndis_out = ndis;
     *nlist_out = nlist;
 }
 IVFFastScanStats IVFFastScan_stats;
 } // namespace faiss