RubyGems - faiss - Versions diffs - 0.2.0 → 0.2.1 - Mend

faiss 0.2.0 → 0.2.1

Files changed (202) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/lib/faiss/version.rb +1 -1
data/vendor/faiss/faiss/AutoTune.cpp +292 -291
data/vendor/faiss/faiss/AutoTune.h +55 -56
data/vendor/faiss/faiss/Clustering.cpp +334 -195
data/vendor/faiss/faiss/Clustering.h +88 -35
data/vendor/faiss/faiss/IVFlib.cpp +171 -195
data/vendor/faiss/faiss/IVFlib.h +48 -51
data/vendor/faiss/faiss/Index.cpp +85 -103
data/vendor/faiss/faiss/Index.h +54 -48
data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
data/vendor/faiss/faiss/Index2Layer.h +22 -22
data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
data/vendor/faiss/faiss/IndexBinary.h +140 -132
data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
data/vendor/faiss/faiss/IndexFlat.h +35 -46
data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
data/vendor/faiss/faiss/IndexHNSW.h +57 -41
data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
data/vendor/faiss/faiss/IndexIVF.h +146 -113
data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
data/vendor/faiss/faiss/IndexLSH.h +21 -26
data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
data/vendor/faiss/faiss/IndexLattice.h +11 -16
data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
data/vendor/faiss/faiss/IndexNSG.h +85 -0
data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
data/vendor/faiss/faiss/IndexPQ.h +64 -67
data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
data/vendor/faiss/faiss/IndexRefine.h +22 -23
data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
data/vendor/faiss/faiss/IndexReplicas.h +62 -56
data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
data/vendor/faiss/faiss/IndexResidual.h +152 -0
data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
data/vendor/faiss/faiss/IndexShards.cpp +256 -240
data/vendor/faiss/faiss/IndexShards.h +85 -73
data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
data/vendor/faiss/faiss/MatrixStats.h +7 -10
data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
data/vendor/faiss/faiss/MetaIndexes.h +40 -34
data/vendor/faiss/faiss/MetricType.h +7 -7
data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
data/vendor/faiss/faiss/VectorTransform.h +61 -89
data/vendor/faiss/faiss/clone_index.cpp +77 -73
data/vendor/faiss/faiss/clone_index.h +4 -9
data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
data/vendor/faiss/faiss/impl/FaissException.h +41 -29
data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
data/vendor/faiss/faiss/impl/HNSW.h +179 -200
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
data/vendor/faiss/faiss/impl/NSG.h +199 -0
data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
data/vendor/faiss/faiss/impl/io.cpp +75 -94
data/vendor/faiss/faiss/impl/io.h +31 -41
data/vendor/faiss/faiss/impl/io_macros.h +40 -29
data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
data/vendor/faiss/faiss/index_factory.cpp +269 -218
data/vendor/faiss/faiss/index_factory.h +6 -7
data/vendor/faiss/faiss/index_io.h +23 -26
data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
data/vendor/faiss/faiss/utils/Heap.h +186 -209
data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
data/vendor/faiss/faiss/utils/distances.cpp +301 -310
data/vendor/faiss/faiss/utils/distances.h +133 -118
data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
data/vendor/faiss/faiss/utils/hamming.h +62 -85
data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
data/vendor/faiss/faiss/utils/partitioning.h +26 -21
data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
data/vendor/faiss/faiss/utils/random.cpp +39 -63
data/vendor/faiss/faiss/utils/random.h +13 -16
data/vendor/faiss/faiss/utils/simdlib.h +4 -2
data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
data/vendor/faiss/faiss/utils/utils.cpp +304 -287
data/vendor/faiss/faiss/utils/utils.h +53 -48
metadata +20 -2

data/vendor/faiss/faiss/utils/hamming.h CHANGED Viewed

@@ -24,13 +24,11 @@
 #ifndef FAISS_hamming_h
 #define FAISS_hamming_h
 #include <stdint.h>
 #include <faiss/impl/platform_macros.h>
 #include <faiss/utils/Heap.h>
 /* The Hamming distance type */
 typedef int32_t hamdis_t;
@@ -42,8 +40,7 @@ namespace faiss {
 struct RangeSearchResult;
-void bitvec_print (const uint8_t * b, size_t d);
+void bitvec_print(const uint8_t* b, size_t d);
 /* Functions for casting vectors of regular types to compact bits.
    They assume proper allocation done beforehand, meaning that b
@@ -52,53 +49,45 @@ void bitvec_print (const uint8_t * b, size_t d);
 /* Makes an array of bits from the signs of a float array. The length
    of the output array b is rounded up to byte size (allocate
    accordingly) */
-void fvecs2bitvecs (
-        const float * x,
-        uint8_t * b,
-        size_t d,
-        size_t n);
-void bitvecs2fvecs (
-        const uint8_t * b,
-        float * x,
-        size_t d,
-        size_t n);
+void fvecs2bitvecs(const float* x, uint8_t* b, size_t d, size_t n);
+void bitvecs2fvecs(const uint8_t* b, float* x, size_t d, size_t n);
-void fvec2bitvec (const float * x, uint8_t * b, size_t d);
+void fvec2bitvec(const float* x, uint8_t* b, size_t d);
 /** Shuffle the bits from b(i, j) := a(i, order[j])
  */
-void bitvec_shuffle (size_t n, size_t da, size_t db,
-                     const int *order,
-                     const uint8_t *a,
-                     uint8_t *b);
+void bitvec_shuffle(
+        size_t n,
+        size_t da,
+        size_t db,
+        const int* order,
+        const uint8_t* a,
+        uint8_t* b);
 /***********************************************
  * Generic reader/writer for bit strings
  ***********************************************/
 struct BitstringWriter {
-    uint8_t *code;
+    uint8_t* code;
     size_t code_size;
     size_t i; // current bit offset
     // code_size in bytes
-    BitstringWriter(uint8_t *code, size_t code_size);
+    BitstringWriter(uint8_t* code, size_t code_size);
     // write the nbit low bits of x
     void write(uint64_t x, int nbit);
 };
 struct BitstringReader {
-    const uint8_t *code;
+    const uint8_t* code;
     size_t code_size;
     size_t i;
     // code_size in bytes
-    BitstringReader(const uint8_t *code, size_t code_size);
+    BitstringReader(const uint8_t* code, size_t code_size);
     // read nbit bits from the code
     uint64_t read(int nbit);
@@ -108,15 +97,12 @@ struct BitstringReader {
  * Hamming distance computation functions
  **************************************************/
 FAISS_API extern size_t hamming_batch_size;
 inline int popcount64(uint64_t x) {
     return __builtin_popcountl(x);
 }
 /** Compute a set of Hamming distances between na and nb binary vectors
  *
  * @param  a             size na * nbytespercode
@@ -124,15 +110,13 @@ inline int popcount64(uint64_t x) {
  * @param  nbytespercode should be multiple of 8
  * @param  dis           output distances, size na * nb
  */
-void hammings (
-        const uint8_t * a,
-        const uint8_t * b,
-        size_t na, size_t nb,
+void hammings(
+        const uint8_t* a,
+        const uint8_t* b,
+        size_t na,
+        size_t nb,
         size_t nbytespercode,
-        hamdis_t * dis);
+        hamdis_t* dis);
 /** Return the k smallest Hamming distances for a set of binary query vectors,
  * using a max heap.
@@ -142,22 +126,22 @@ void hammings (
  * @param ncodes  size of the binary codes (bytes)
  * @param ordered if != 0: order the results by decreasing distance
  *                (may be bottleneck for k/n > 0.01) */
-void hammings_knn_hc (
-        int_maxheap_array_t * ha,
-        const uint8_t * a,
-        const uint8_t * b,
+void hammings_knn_hc(
+        int_maxheap_array_t* ha,
+        const uint8_t* a,
+        const uint8_t* b,
         size_t nb,
         size_t ncodes,
         int ordered);
 /* Legacy alias to hammings_knn_hc. */
-void hammings_knn (
-  int_maxheap_array_t * ha,
-  const uint8_t * a,
-  const uint8_t * b,
-  size_t nb,
-  size_t ncodes,
-  int ordered);
+void hammings_knn(
+        int_maxheap_array_t* ha,
+        const uint8_t* a,
+        const uint8_t* b,
+        size_t nb,
+        size_t ncodes,
+        int ordered);
 /** Return the k smallest Hamming distances for a set of binary query vectors,
  * using counting max.
@@ -171,66 +155,59 @@ void hammings_knn (
  *                neighbors
  * @param labels  output ids of the k nearest neighbors to each query vector
  */
-void hammings_knn_mc (
-  const uint8_t * a,
-  const uint8_t * b,
-  size_t na,
-  size_t nb,
-  size_t k,
-  size_t ncodes,
-  int32_t *distances,
-  int64_t *labels);
+void hammings_knn_mc(
+        const uint8_t* a,
+        const uint8_t* b,
+        size_t na,
+        size_t nb,
+        size_t k,
+        size_t ncodes,
+        int32_t* distances,
+        int64_t* labels);
 /** same as hammings_knn except we are doing a range search with radius */
-void hamming_range_search (
-    const uint8_t * a,
-    const uint8_t * b,
-    size_t na,
-    size_t nb,
-    int radius,
-    size_t ncodes,
-    RangeSearchResult *result);
+void hamming_range_search(
+        const uint8_t* a,
+        const uint8_t* b,
+        size_t na,
+        size_t nb,
+        int radius,
+        size_t ncodes,
+        RangeSearchResult* result);
 /* Counting the number of matches or of cross-matches (without returning them)
    For use with function that assume pre-allocated memory */
-void hamming_count_thres (
-        const uint8_t * bs1,
-        const uint8_t * bs2,
+void hamming_count_thres(
+        const uint8_t* bs1,
+        const uint8_t* bs2,
         size_t n1,
         size_t n2,
         hamdis_t ht,
         size_t ncodes,
-        size_t * nptr);
+        size_t* nptr);
 /* Return all Hamming distances/index passing a thres. Pre-allocation of output
    is required. Use hamming_count_thres to determine the proper size. */
-size_t match_hamming_thres (
-        const uint8_t * bs1,
-        const uint8_t * bs2,
+size_t match_hamming_thres(
+        const uint8_t* bs1,
+        const uint8_t* bs2,
         size_t n1,
         size_t n2,
         hamdis_t ht,
         size_t ncodes,
-        int64_t * idx,
-        hamdis_t * dis);
+        int64_t* idx,
+        hamdis_t* dis);
 /* Cross-matching in a set of vectors */
-void crosshamming_count_thres (
-        const uint8_t * dbs,
+void crosshamming_count_thres(
+        const uint8_t* dbs,
         size_t n,
         hamdis_t ht,
         size_t ncodes,
-        size_t * nptr);
+        size_t* nptr);
 /* compute the Hamming distances between two codewords of nwords*64 bits */
-hamdis_t hamming (
-        const uint64_t * bs1,
-        const uint64_t * bs2,
-        size_t nwords);
+hamdis_t hamming(const uint64_t* bs1, const uint64_t* bs2, size_t nwords);
 } // namespace faiss

data/vendor/faiss/faiss/utils/ordered_key_value.h CHANGED Viewed

@@ -5,8 +5,6 @@
  * LICENSE file in the root directory of this source tree.
  */
 #pragma once
 #include <climits>
@@ -14,7 +12,6 @@
 #include <limits>
 namespace faiss {
 /*******************************************************************
@@ -34,8 +31,10 @@ namespace faiss {
 template <typename T_, typename TI_>
 struct CMax;
-template<typename T> inline T cmin_nextafter(T x);
-template<typename T> inline T cmax_nextafter(T x);
+template <typename T>
+inline T cmin_nextafter(T x);
+template <typename T>
+inline T cmax_nextafter(T x);
 // traits of minheaps = heaps where the minimum value is stored on top
 // useful to find the *max* values of an array
@@ -44,10 +43,10 @@ struct CMin {
     typedef T_ T;
     typedef TI_ TI;
     typedef CMax<T_, TI_> Crev; // reference to reverse comparison
-    inline static bool cmp (T a, T b) {
+    inline static bool cmp(T a, T b) {
         return a < b;
     }
-    inline static T neutral () {
+    inline static T neutral() {
         return std::numeric_limits<T>::lowest();
     }
     static const bool is_max = false;
@@ -57,18 +56,15 @@ struct CMin {
     }
 };
 template <typename T_, typename TI_>
 struct CMax {
     typedef T_ T;
     typedef TI_ TI;
     typedef CMin<T_, TI_> Crev;
-    inline static bool cmp (T a, T b) {
+    inline static bool cmp(T a, T b) {
         return a > b;
     }
-    inline static T neutral () {
+    inline static T neutral() {
         return std::numeric_limits<T>::max();
     }
     static const bool is_max = true;
@@ -77,22 +73,24 @@ struct CMax {
     }
 };
-template<> inline float cmin_nextafter<float>(float x) {
+template <>
+inline float cmin_nextafter<float>(float x) {
     return std::nextafterf(x, -HUGE_VALF);
 }
-template<> inline float cmax_nextafter<float>(float x) {
+template <>
+inline float cmax_nextafter<float>(float x) {
     return std::nextafterf(x, HUGE_VALF);
 }
-template<> inline uint16_t cmin_nextafter<uint16_t>(uint16_t x) {
+template <>
+inline uint16_t cmin_nextafter<uint16_t>(uint16_t x) {
     return x - 1;
 }
-template<> inline uint16_t cmax_nextafter<uint16_t>(uint16_t x) {
+template <>
+inline uint16_t cmax_nextafter<uint16_t>(uint16_t x) {
     return x + 1;
 }
 } // namespace faiss

data/vendor/faiss/faiss/utils/partitioning.cpp CHANGED Viewed

@@ -7,8 +7,8 @@
 #include <faiss/utils/partitioning.h>
-#include <cmath>
 #include <cassert>
+#include <cmath>
 #include <faiss/impl/FaissAssert.h>
 #include <faiss/utils/AlignedTable.h>
@@ -19,15 +19,13 @@
 namespace faiss {
 /******************************************************************
  * Internal routines
  ******************************************************************/
 namespace partitioning {
-template<typename T>
+template <typename T>
 T median3(T a, T b, T c) {
     if (a > b) {
         std::swap(a, b);
@@ -41,12 +39,12 @@ T median3(T a, T b, T c) {
     return a;
 }
-template<class C>
+template <class C>
 typename C::T sample_threshold_median3(
-    const typename C::T * vals, int n,
-    typename C::T thresh_inf, typename C::T thresh_sup
-) {
+        const typename C::T* vals,
+        int n,
+        typename C::T thresh_inf,
+        typename C::T thresh_sup) {
     using T = typename C::T;
     size_t big_prime = 6700417;
     T val3[3];
@@ -73,31 +71,34 @@ typename C::T sample_threshold_median3(
     }
 }
-template<class C>
+template <class C>
 void count_lt_and_eq(
-    const typename C::T * vals, size_t n, typename C::T thresh,
-    size_t & n_lt, size_t & n_eq
-) {
+        const typename C::T* vals,
+        size_t n,
+        typename C::T thresh,
+        size_t& n_lt,
+        size_t& n_eq) {
     n_lt = n_eq = 0;
-    for(size_t i = 0; i < n; i++) {
+    for (size_t i = 0; i < n; i++) {
         typename C::T v = *vals++;
-        if(C::cmp(thresh, v)) {
+        if (C::cmp(thresh, v)) {
             n_lt++;
-        } else if(v == thresh) {
+        } else if (v == thresh) {
             n_eq++;
         }
     }
 }
-template<class C>
+template <class C>
 size_t compress_array(
-    typename C::T *vals, typename C::TI * ids,
-    size_t n, typename C::T thresh, size_t n_eq
-) {
+        typename C::T* vals,
+        typename C::TI* ids,
+        size_t n,
+        typename C::T thresh,
+        size_t n_eq) {
     size_t wp = 0;
-    for(size_t i = 0; i < n; i++) {
+    for (size_t i = 0; i < n; i++) {
         if (C::cmp(thresh, vals[i])) {
             vals[wp] = vals[i];
             ids[wp] = ids[i];
@@ -113,15 +114,16 @@ size_t compress_array(
     return wp;
 }
+#define IFV if (false)
-#define IFV if(false)
-template<class C>
+template <class C>
 typename C::T partition_fuzzy_median3(
-    typename C::T *vals, typename C::TI * ids, size_t n,
-    size_t q_min, size_t q_max, size_t * q_out)
-{
+        typename C::T* vals,
+        typename C::TI* ids,
+        size_t n,
+        size_t q_min,
+        size_t q_max,
+        size_t* q_out) {
     if (q_min == 0) {
         if (q_out) {
             *q_out = C::Crev::neutral();
@@ -150,12 +152,19 @@ typename C::T partition_fuzzy_median3(
     size_t n_eq = 0, n_lt = 0;
     size_t q = 0;
-    for(int it = 0; it < 200; it++) {
+    for (int it = 0; it < 200; it++) {
         count_lt_and_eq<C>(vals, n, thresh, n_lt, n_eq);
-        IFV  printf("   thresh=%g [%g %g] n_lt=%ld n_eq=%ld, q=%ld:%ld/%ld\n",
-            float(thresh), float(thresh_inf), float(thresh_sup),
-            long(n_lt), long(n_eq), long(q_min), long(q_max), long(n));
+        IFV printf(
+                "   thresh=%g [%g %g] n_lt=%ld n_eq=%ld, q=%ld:%ld/%ld\n",
+                float(thresh),
+                float(thresh_inf),
+                float(thresh_sup),
+                long(n_lt),
+                long(n_eq),
+                long(q_min),
+                long(q_max),
+                long(n));
         if (n_lt <= q_min) {
             if (n_lt + n_eq >= q_min) {
@@ -172,8 +181,12 @@ typename C::T partition_fuzzy_median3(
         }
         // FIXME avoid a second pass over the array to sample the threshold
-        IFV  printf("     sample thresh in [%g %g]\n", float(thresh_inf), float(thresh_sup));
-        T new_thresh = sample_threshold_median3<C>(vals, n, thresh_inf, thresh_sup);
+        IFV printf(
+                "     sample thresh in [%g %g]\n",
+                float(thresh_inf),
+                float(thresh_sup));
+        T new_thresh =
+                sample_threshold_median3<C>(vals, n, thresh_inf, thresh_sup);
         if (new_thresh == thresh_inf) {
             // then there is nothing between thresh_inf and thresh_sup
             break;
@@ -203,25 +216,19 @@ typename C::T partition_fuzzy_median3(
     return thresh;
 }
 } // namespace partitioning
 /******************************************************************
  * SIMD routines when vals is an aligned array of uint16_t
  ******************************************************************/
 namespace simd_partitioning {
 void find_minimax(
-        const uint16_t * vals, size_t n,
-        uint16_t & smin, uint16_t & smax
-) {
+        const uint16_t* vals,
+        size_t n,
+        uint16_t& smin,
+        uint16_t& smax) {
     simd16uint16 vmin(0xffff), vmax(0);
     for (size_t i = 0; i + 15 < n; i += 16) {
         simd16uint16 v(vals + i);
@@ -235,22 +242,20 @@ void find_minimax(
     smin = tab32[0], smax = tab32[16];
-    for(int i = 1; i < 16; i++) {
+    for (int i = 1; i < 16; i++) {
         smin = std::min(smin, tab32[i]);
         smax = std::max(smax, tab32[i + 16]);
     }
     // missing values
-    for(size_t i = (n & ~15); i < n; i++) {
+    for (size_t i = (n & ~15); i < n; i++) {
         smin = std::min(smin, vals[i]);
         smax = std::max(smax, vals[i]);
     }
 }
 // max func differentiates between CMin and CMax (keep lowest or largest)
-template<class C>
+template <class C>
 simd16uint16 max_func(simd16uint16 v, simd16uint16 thr16) {
     constexpr bool is_max = C::is_max;
     if (is_max) {
@@ -260,11 +265,13 @@ simd16uint16 max_func(simd16uint16 v, simd16uint16 thr16) {
     }
 }
-template<class C>
+template <class C>
 void count_lt_and_eq(
-    const uint16_t * vals, int n, uint16_t thresh,
-    size_t & n_lt, size_t & n_eq
-) {
+        const uint16_t* vals,
+        int n,
+        uint16_t thresh,
+        size_t& n_lt,
+        size_t& n_eq) {
     n_lt = n_eq = 0;
     simd16uint16 thr16(thresh);
@@ -283,24 +290,25 @@ void count_lt_and_eq(
         n_lt += 16 - i_ge;
     }
-    for(size_t i = n1 * 16; i < n; i++) {
+    for (size_t i = n1 * 16; i < n; i++) {
         uint16_t v = *vals++;
-        if(C::cmp(thresh, v)) {
+        if (C::cmp(thresh, v)) {
             n_lt++;
-        } else if(v == thresh) {
+        } else if (v == thresh) {
             n_eq++;
         }
     }
 }
 /* compress separated values and ids table, keeping all values < thresh and at
  * most n_eq equal values */
-template<class C>
+template <class C>
 int simd_compress_array(
-    uint16_t *vals, typename C::TI * ids, size_t n, uint16_t thresh, int n_eq
-) {
+        uint16_t* vals,
+        typename C::TI* ids,
+        size_t n,
+        uint16_t thresh,
+        int n_eq) {
     simd16uint16 thr16(thresh);
     simd16uint16 mixmask(0xff00);
@@ -313,13 +321,15 @@ int simd_compress_array(
         simd16uint16 max2 = max_func<C>(v, thr16);
         simd16uint16 gemask = (v == max2);
         simd16uint16 eqmask = (v == thr16);
-        uint32_t bits = get_MSBs(blendv(
-            simd32uint8(eqmask), simd32uint8(gemask), simd32uint8(mixmask)));
+        uint32_t bits = get_MSBs(
+                blendv(simd32uint8(eqmask),
+                       simd32uint8(gemask),
+                       simd32uint8(mixmask)));
         bits ^= 0xAAAAAAAA;
         // bit 2*i     : eq
         // bit 2*i + 1 : lt
-        while(bits) {
+        while (bits) {
             int j = __builtin_ctz(bits) & (~1);
             bool is_eq = (bits >> j) & 1;
             bool is_lt = (bits >> j) & 2;
@@ -330,7 +340,7 @@ int simd_compress_array(
                 vals[wp] = vals[i0 + j];
                 ids[wp] = ids[i0 + j];
                 wp++;
-            } else if(is_eq && n_eq > 0) {
+            } else if (is_eq && n_eq > 0) {
                 vals[wp] = vals[i0 + j];
                 ids[wp] = ids[i0 + j];
                 wp++;
@@ -346,7 +356,7 @@ int simd_compress_array(
         simd16uint16 gemask = (v == max2);
         uint32_t bits = ~get_MSBs(simd32uint8(gemask));
-        while(bits) {
+        while (bits) {
             int j = __builtin_ctz(bits);
             bits &= ~(3 << j);
             j >>= 1;
@@ -358,7 +368,7 @@ int simd_compress_array(
     }
     // end with scalar
-    for(int i = (n & ~15); i < n; i++) {
+    for (int i = (n & ~15); i < n; i++) {
         if (C::cmp(thresh, vals[i])) {
             vals[wp] = vals[i];
             ids[wp] = ids[i];
@@ -376,29 +386,28 @@ int simd_compress_array(
 // #define MICRO_BENCHMARK
-static uint64_t get_cy () {
-#ifdef  MICRO_BENCHMARK
+static uint64_t get_cy() {
+#ifdef MICRO_BENCHMARK
     uint32_t high, low;
-    asm volatile("rdtsc \n\t"
-                 : "=a" (low),
-                   "=d" (high));
+    asm volatile("rdtsc \n\t" : "=a"(low), "=d"(high));
     return ((uint64_t)high << 32) | (low);
 #else
     return 0;
 #endif
 }
+#define IFV if (false)
-#define IFV if(false)
-template<class C>
+template <class C>
 uint16_t simd_partition_fuzzy_with_bounds(
-    uint16_t *vals, typename C::TI * ids, size_t n,
-    size_t q_min, size_t q_max, size_t * q_out,
-    uint16_t s0i, uint16_t s1i)
-{
+        uint16_t* vals,
+        typename C::TI* ids,
+        size_t n,
+        size_t q_min,
+        size_t q_max,
+        size_t* q_out,
+        uint16_t s0i,
+        uint16_t s1i) {
     if (q_min == 0) {
         if (q_out) {
             *q_out = 0;
@@ -428,13 +437,21 @@ uint16_t simd_partition_fuzzy_with_bounds(
     size_t n_eq = 0, n_lt = 0;
     size_t q = 0;
-    for(int it = 0; it < 200; it++) {
+    for (int it = 0; it < 200; it++) {
         // while(s0 + 1 < s1) {
         thresh = (s0 + s1) / 2;
         count_lt_and_eq<C>(vals, n, thresh, n_lt, n_eq);
-        IFV  printf("   [%ld %ld] thresh=%d n_lt=%ld n_eq=%ld, q=%ld:%ld/%ld\n",
-            s0, s1, thresh, n_lt, n_eq, q_min, q_max, n);
+        IFV printf(
+                "   [%ld %ld] thresh=%d n_lt=%ld n_eq=%ld, q=%ld:%ld/%ld\n",
+                s0,
+                s1,
+                thresh,
+                n_lt,
+                n_eq,
+                q_min,
+                q_max,
+                n);
         if (n_lt <= q_min) {
             if (n_lt + n_eq >= q_min) {
                 q = q_min;
@@ -456,7 +473,6 @@ uint16_t simd_partition_fuzzy_with_bounds(
                 s0 = thresh;
             }
         }
     }
     uint64_t t1 = get_cy();
@@ -495,14 +511,16 @@ uint16_t simd_partition_fuzzy_with_bounds(
     return thresh;
 }
-template<class C>
+template <class C>
 uint16_t simd_partition_fuzzy_with_bounds_histogram(
-    uint16_t *vals, typename C::TI * ids, size_t n,
-    size_t q_min, size_t q_max, size_t * q_out,
-    uint16_t s0i, uint16_t s1i)
-{
+        uint16_t* vals,
+        typename C::TI* ids,
+        size_t n,
+        size_t q_min,
+        size_t q_max,
+        size_t* q_out,
+        uint16_t s0i,
+        uint16_t s1i) {
     if (q_min == 0) {
         if (q_out) {
             *q_out = 0;
@@ -522,11 +540,17 @@ uint16_t simd_partition_fuzzy_with_bounds_histogram(
         return s0i;
     }
-    IFV printf("partition fuzzy, q=%ld:%ld / %ld, bounds=%d %d\n",
-        q_min, q_max, n, s0i, s1i);
+    IFV printf(
+            "partition fuzzy, q=%ld:%ld / %ld, bounds=%d %d\n",
+            q_min,
+            q_max,
+            n,
+            s0i,
+            s1i);
     if (!C::is_max) {
-        IFV printf("revert due to CMin, q_min:q_max -> %ld:%ld\n", q_min, q_max);
+        IFV printf(
+                "revert due to CMin, q_min:q_max -> %ld:%ld\n", q_min, q_max);
         q_min = n - q_min;
         q_max = n - q_max;
     }
@@ -537,31 +561,39 @@ uint16_t simd_partition_fuzzy_with_bounds_histogram(
     size_t n_lt = 0, n_gt = 0;
     // output of loop:
-    int thresh; // final threshold
-    uint64_t tot_eq = 0;   // total nb of equal values
-    uint64_t n_eq = 0;     // nb of equal values to keep
-    size_t q;  // final quantile
+    int thresh;          // final threshold
+    uint64_t tot_eq = 0; // total nb of equal values
+    uint64_t n_eq = 0;   // nb of equal values to keep
+    size_t q;            // final quantile
     // buffer for the histograms
     int hist[16];
-    for(int it = 0; it < 20; it++) {
+    for (int it = 0; it < 20; it++) {
         // otherwise we would be done already
         int shift = 0;
-        IFV printf("  it %d bounds: %d %d n_lt=%ld n_gt=%ld\n",
-                it, s0, s1, n_lt, n_gt);
+        IFV printf(
+                "  it %d bounds: %d %d n_lt=%ld n_gt=%ld\n",
+                it,
+                s0,
+                s1,
+                n_lt,
+                n_gt);
         int maxval = s1 - s0;
-        while(maxval > 15) {
+        while (maxval > 15) {
             shift++;
             maxval >>= 1;
         }
-        IFV printf("    histogram shift %d maxval %d ?= %d\n",
-                shift, maxval, int((s1 - s0) >> shift));
+        IFV printf(
+                "    histogram shift %d maxval %d ?= %d\n",
+                shift,
+                maxval,
+                int((s1 - s0) >> shift));
         if (maxval > 7) {
             simd_histogram_16(vals, n, s0, shift, hist);
@@ -571,7 +603,7 @@ uint16_t simd_partition_fuzzy_with_bounds_histogram(
         IFV {
             int sum = n_lt + n_gt;
             printf("    n_lt=%ld hist=[", n_lt);
-            for(int i = 0; i <= maxval; i++) {
+            for (int i = 0; i <= maxval; i++) {
                 printf("%d ", hist[i]);
                 sum += hist[i];
             }
@@ -597,7 +629,12 @@ uint16_t simd_partition_fuzzy_with_bounds_histogram(
             assert(!"not implemented");
         }
-        IFV printf("    new bin: s0=%d s1=%d n_lt=%ld n_gt=%ld\n", s0, s1, n_lt, n_gt);
+        IFV printf(
+                "    new bin: s0=%d s1=%d n_lt=%ld n_gt=%ld\n",
+                s0,
+                s1,
+                n_lt,
+                n_gt);
         if (s1 > s0) {
             if (n_lt >= q_min && q_max >= n_lt) {
@@ -628,7 +665,7 @@ uint16_t simd_partition_fuzzy_with_bounds_histogram(
     if (!C::is_max) {
         if (n_eq == 0) {
-            thresh --;
+            thresh--;
         } else {
             // thresh unchanged
             n_eq = tot_eq - n_eq;
@@ -647,14 +684,14 @@ uint16_t simd_partition_fuzzy_with_bounds_histogram(
     return thresh;
 }
-template<class C>
+template <class C>
 uint16_t simd_partition_fuzzy(
-    uint16_t *vals, typename C::TI * ids, size_t n,
-    size_t q_min, size_t q_max, size_t * q_out
-) {
+        uint16_t* vals,
+        typename C::TI* ids,
+        size_t n,
+        size_t q_min,
+        size_t q_max,
+        size_t* q_out) {
     assert(is_aligned_pointer(vals));
     uint16_t s0i, s1i;
@@ -662,14 +699,15 @@ uint16_t simd_partition_fuzzy(
     // QSelect_stats.t0 += get_cy() - t0;
     return simd_partition_fuzzy_with_bounds<C>(
-        vals, ids, n, q_min, q_max, q_out, s0i, s1i);
+            vals, ids, n, q_min, q_max, q_out, s0i, s1i);
 }
-template<class C>
-uint16_t simd_partition(uint16_t *vals, typename C::TI * ids, size_t n, size_t q) {
+template <class C>
+uint16_t simd_partition(
+        uint16_t* vals,
+        typename C::TI* ids,
+        size_t n,
+        size_t q) {
     assert(is_aligned_pointer(vals));
     if (q == 0) {
@@ -683,72 +721,97 @@ uint16_t simd_partition(uint16_t *vals, typename C::TI * ids, size_t n, size_t q
     find_minimax(vals, n, s0i, s1i);
     return simd_partition_fuzzy_with_bounds<C>(
-        vals, ids, n, q, q, nullptr, s0i, s1i);
+            vals, ids, n, q, q, nullptr, s0i, s1i);
 }
-template<class C>
+template <class C>
 uint16_t simd_partition_with_bounds(
-    uint16_t *vals, typename C::TI * ids, size_t n, size_t q,
-    uint16_t s0i, uint16_t s1i)
-{
+        uint16_t* vals,
+        typename C::TI* ids,
+        size_t n,
+        size_t q,
+        uint16_t s0i,
+        uint16_t s1i) {
     return simd_partition_fuzzy_with_bounds<C>(
-        vals, ids, n, q, q, nullptr, s0i, s1i);
+            vals, ids, n, q, q, nullptr, s0i, s1i);
 }
 } // namespace simd_partitioning
 /******************************************************************
  * Driver routine
  ******************************************************************/
-template<class C>
+template <class C>
 typename C::T partition_fuzzy(
-    typename C::T *vals, typename C::TI * ids, size_t n,
-    size_t q_min, size_t q_max, size_t * q_out)
-{
+        typename C::T* vals,
+        typename C::TI* ids,
+        size_t n,
+        size_t q_min,
+        size_t q_max,
+        size_t* q_out) {
     // the code below compiles and runs without AVX2 but it's slower than
     // the scalar implementation
 #ifdef __AVX2__
     constexpr bool is_uint16 = std::is_same<typename C::T, uint16_t>::value;
     if (is_uint16 && is_aligned_pointer(vals)) {
         return simd_partitioning::simd_partition_fuzzy<C>(
-            (uint16_t*)vals, ids, n, q_min, q_max, q_out);
+                (uint16_t*)vals, ids, n, q_min, q_max, q_out);
     }
 #endif
     return partitioning::partition_fuzzy_median3<C>(
-        vals, ids, n, q_min, q_max, q_out);
+            vals, ids, n, q_min, q_max, q_out);
 }
 // explicit template instanciations
-template float partition_fuzzy<CMin<float, int64_t>> (
-    float *vals, int64_t * ids, size_t n,
-    size_t q_min, size_t q_max, size_t * q_out);
-template float partition_fuzzy<CMax<float, int64_t>> (
-    float *vals, int64_t * ids, size_t n,
-    size_t q_min, size_t q_max, size_t * q_out);
-template uint16_t partition_fuzzy<CMin<uint16_t, int64_t>> (
-    uint16_t *vals, int64_t * ids, size_t n,
-    size_t q_min, size_t q_max, size_t * q_out);
-template uint16_t partition_fuzzy<CMax<uint16_t, int64_t>> (
-    uint16_t *vals, int64_t * ids, size_t n,
-    size_t q_min, size_t q_max, size_t * q_out);
-template uint16_t partition_fuzzy<CMin<uint16_t, int>> (
-    uint16_t *vals, int * ids, size_t n,
-    size_t q_min, size_t q_max, size_t * q_out);
-template uint16_t partition_fuzzy<CMax<uint16_t, int>> (
-    uint16_t *vals, int * ids, size_t n,
-    size_t q_min, size_t q_max, size_t * q_out);
+template float partition_fuzzy<CMin<float, int64_t>>(
+        float* vals,
+        int64_t* ids,
+        size_t n,
+        size_t q_min,
+        size_t q_max,
+        size_t* q_out);
+template float partition_fuzzy<CMax<float, int64_t>>(
+        float* vals,
+        int64_t* ids,
+        size_t n,
+        size_t q_min,
+        size_t q_max,
+        size_t* q_out);
+template uint16_t partition_fuzzy<CMin<uint16_t, int64_t>>(
+        uint16_t* vals,
+        int64_t* ids,
+        size_t n,
+        size_t q_min,
+        size_t q_max,
+        size_t* q_out);
+template uint16_t partition_fuzzy<CMax<uint16_t, int64_t>>(
+        uint16_t* vals,
+        int64_t* ids,
+        size_t n,
+        size_t q_min,
+        size_t q_max,
+        size_t* q_out);
+template uint16_t partition_fuzzy<CMin<uint16_t, int>>(
+        uint16_t* vals,
+        int* ids,
+        size_t n,
+        size_t q_min,
+        size_t q_max,
+        size_t* q_out);
+template uint16_t partition_fuzzy<CMax<uint16_t, int>>(
+        uint16_t* vals,
+        int* ids,
+        size_t n,
+        size_t q_min,
+        size_t q_max,
+        size_t* q_out);
 /******************************************************************
  * Histogram subroutines
@@ -758,7 +821,7 @@ template uint16_t partition_fuzzy<CMax<uint16_t, int>> (
 /// FIXME when MSB of uint16 is set
 // this code does not compile properly with GCC 7.4.0
-namespace  {
+namespace {
 /************************************************************
  * 8 bins
@@ -773,7 +836,6 @@ simd32uint8 accu4to8(simd16uint16 a4) {
     return simd32uint8(_mm256_hadd_epi16(a8_0.i, a8_1.i));
 }
 simd16uint16 accu8to16(simd32uint8 a8) {
     simd16uint16 mask8(0x00ff);
@@ -783,27 +845,53 @@ simd16uint16 accu8to16(simd32uint8 a8) {
     return simd16uint16(_mm256_hadd_epi16(a8_0.i, a8_1.i));
 }
 static const simd32uint8 shifts(_mm256_setr_epi8(
-    1, 16, 0, 0,  4, 64, 0, 0,
-    0, 0, 1, 16,  0, 0, 4, 64,
-    1, 16, 0, 0,  4, 64, 0, 0,
-    0, 0, 1, 16,  0, 0, 4, 64
-));
+        1,
+        16,
+        0,
+        0,
+        4,
+        64,
+        0,
+        0,
+        0,
+        0,
+        1,
+        16,
+        0,
+        0,
+        4,
+        64,
+        1,
+        16,
+        0,
+        0,
+        4,
+        64,
+        0,
+        0,
+        0,
+        0,
+        1,
+        16,
+        0,
+        0,
+        4,
+        64));
 // 2-bit accumulator: we can add only up to 3 elements
 // on output we return 2*4-bit results
 // preproc returns either an index in 0..7 or 0xffff
 // that yeilds a 0 when used in the table look-up
-template<int N, class Preproc>
+template <int N, class Preproc>
 void compute_accu2(
-        const uint16_t * & data,
-        Preproc & pp,
-        simd16uint16 & a4lo, simd16uint16 & a4hi
-) {
+        const uint16_t*& data,
+        Preproc& pp,
+        simd16uint16& a4lo,
+        simd16uint16& a4hi) {
     simd16uint16 mask2(0x3333);
     simd16uint16 a2((uint16_t)0); // 2-bit accu
-    for (int j = 0; j < N; j ++) {
+    for (int j = 0; j < N; j++) {
         simd16uint16 v(data);
         data += 16;
         v = pp(v);
@@ -815,34 +903,30 @@ void compute_accu2(
     a4hi += (a2 >> 2) & mask2;
 }
-template<class Preproc>
-simd16uint16 histogram_8(
-        const uint16_t * data, Preproc pp,
-        size_t n_in) {
-    assert (n_in % 16 == 0);
+template <class Preproc>
+simd16uint16 histogram_8(const uint16_t* data, Preproc pp, size_t n_in) {
+    assert(n_in % 16 == 0);
     int n = n_in / 16;
     simd32uint8 a8lo(0);
     simd32uint8 a8hi(0);
-    for(int i0 = 0; i0 < n; i0 += 15) {
-        simd16uint16 a4lo(0);  // 4-bit accus
+    for (int i0 = 0; i0 < n; i0 += 15) {
+        simd16uint16 a4lo(0); // 4-bit accus
         simd16uint16 a4hi(0);
         int i1 = std::min(i0 + 15, n);
         int i;
-        for(i = i0; i + 2 < i1; i += 3) {
+        for (i = i0; i + 2 < i1; i += 3) {
             compute_accu2<3>(data, pp, a4lo, a4hi); // adds 3 max
         }
         switch (i1 - i) {
-        case 2:
-            compute_accu2<2>(data, pp, a4lo, a4hi);
-            break;
-        case 1:
-            compute_accu2<1>(data, pp, a4lo, a4hi);
-            break;
+            case 2:
+                compute_accu2<2>(data, pp, a4lo, a4hi);
+                break;
+            case 1:
+                compute_accu2<1>(data, pp, a4lo, a4hi);
+                break;
         }
         a8lo += accu4to8(a4lo);
@@ -859,50 +943,72 @@ simd16uint16 histogram_8(
     return a16;
 }
 /************************************************************
  * 16 bins
  ************************************************************/
 static const simd32uint8 shifts2(_mm256_setr_epi8(
-    1, 2, 4, 8, 16, 32, 64, (char)128,
-    1, 2, 4, 8, 16, 32, 64, (char)128,
-    1, 2, 4, 8, 16, 32, 64, (char)128,
-    1, 2, 4, 8, 16, 32, 64, (char)128
-));
-simd32uint8 shiftr_16(simd32uint8 x, int n)
-{
+        1,
+        2,
+        4,
+        8,
+        16,
+        32,
+        64,
+        (char)128,
+        1,
+        2,
+        4,
+        8,
+        16,
+        32,
+        64,
+        (char)128,
+        1,
+        2,
+        4,
+        8,
+        16,
+        32,
+        64,
+        (char)128,
+        1,
+        2,
+        4,
+        8,
+        16,
+        32,
+        64,
+        (char)128));
+simd32uint8 shiftr_16(simd32uint8 x, int n) {
     return simd32uint8(simd16uint16(x) >> n);
 }
 inline simd32uint8 combine_2x2(simd32uint8 a, simd32uint8 b) {
     __m256i a1b0 = _mm256_permute2f128_si256(a.i, b.i, 0x21);
     __m256i a0b1 = _mm256_blend_epi32(a.i, b.i, 0xF0);
     return simd32uint8(a1b0) + simd32uint8(a0b1);
 }
 // 2-bit accumulator: we can add only up to 3 elements
 // on output we return 2*4-bit results
-template<int N, class Preproc>
+template <int N, class Preproc>
 void compute_accu2_16(
-        const uint16_t * & data, Preproc pp,
-        simd32uint8 & a4_0, simd32uint8 & a4_1,
-        simd32uint8 & a4_2, simd32uint8 & a4_3
-) {
+        const uint16_t*& data,
+        Preproc pp,
+        simd32uint8& a4_0,
+        simd32uint8& a4_1,
+        simd32uint8& a4_2,
+        simd32uint8& a4_3) {
     simd32uint8 mask1(0x55);
     simd32uint8 a2_0; // 2-bit accu
     simd32uint8 a2_1; // 2-bit accu
-    a2_0.clear(); a2_1.clear();
+    a2_0.clear();
+    a2_1.clear();
-    for (int j = 0; j < N; j ++) {
+    for (int j = 0; j < N; j++) {
         simd16uint16 v(data);
         data += 16;
         v = pp(v);
@@ -925,38 +1031,27 @@ void compute_accu2_16(
     a4_1 += a2_1 & mask2;
     a4_2 += shiftr_16(a2_0, 2) & mask2;
     a4_3 += shiftr_16(a2_1, 2) & mask2;
 }
 simd32uint8 accu4to8_2(simd32uint8 a4_0, simd32uint8 a4_1) {
     simd32uint8 mask4(0x0f);
-    simd32uint8 a8_0 = combine_2x2(
-        a4_0 & mask4,
-        shiftr_16(a4_0, 4) & mask4
-    );
+    simd32uint8 a8_0 = combine_2x2(a4_0 & mask4, shiftr_16(a4_0, 4) & mask4);
-    simd32uint8 a8_1 = combine_2x2(
-        a4_1 & mask4,
-        shiftr_16(a4_1, 4) & mask4
-    );
+    simd32uint8 a8_1 = combine_2x2(a4_1 & mask4, shiftr_16(a4_1, 4) & mask4);
     return simd32uint8(_mm256_hadd_epi16(a8_0.i, a8_1.i));
 }
-template<class Preproc>
-simd16uint16 histogram_16(const uint16_t * data, Preproc pp, size_t n_in) {
-    assert (n_in % 16 == 0);
+template <class Preproc>
+simd16uint16 histogram_16(const uint16_t* data, Preproc pp, size_t n_in) {
+    assert(n_in % 16 == 0);
     int n = n_in / 16;
     simd32uint8 a8lo((uint8_t)0);
     simd32uint8 a8hi((uint8_t)0);
-    for(int i0 = 0; i0 < n; i0 += 7) {
+    for (int i0 = 0; i0 < n; i0 += 7) {
         simd32uint8 a4_0(0); // 0, 4, 8, 12
         simd32uint8 a4_1(0); // 1, 5, 9, 13
         simd32uint8 a4_2(0); // 2, 6, 10, 14
@@ -964,16 +1059,16 @@ simd16uint16 histogram_16(const uint16_t * data, Preproc pp, size_t n_in) {
         int i1 = std::min(i0 + 7, n);
         int i;
-        for(i = i0; i + 2 < i1; i += 3) {
+        for (i = i0; i + 2 < i1; i += 3) {
             compute_accu2_16<3>(data, pp, a4_0, a4_1, a4_2, a4_3);
         }
         switch (i1 - i) {
-        case 2:
-            compute_accu2_16<2>(data, pp, a4_0, a4_1, a4_2, a4_3);
-            break;
-        case 1:
-            compute_accu2_16<1>(data, pp, a4_0, a4_1, a4_2, a4_3);
-            break;
+            case 2:
+                compute_accu2_16<2>(data, pp, a4_0, a4_1, a4_2, a4_3);
+                break;
+            case 1:
+                compute_accu2_16<1>(data, pp, a4_0, a4_1, a4_2, a4_3);
+                break;
         }
         a8lo += accu4to8_2(a4_0, a4_1);
@@ -986,23 +1081,19 @@ simd16uint16 histogram_16(const uint16_t * data, Preproc pp, size_t n_in) {
     simd16uint16 a16 = simd16uint16(_mm256_hadd_epi16(a16lo.i, a16hi.i));
-    __m256i perm32 = _mm256_setr_epi32(
-        0, 2, 4, 6, 1, 3, 5, 7
-    );
+    __m256i perm32 = _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7);
     a16.i = _mm256_permutevar8x32_epi32(a16.i, perm32);
     return a16;
 }
 struct PreprocNOP {
-    simd16uint16 operator () (simd16uint16 x)  {
+    simd16uint16 operator()(simd16uint16 x) {
         return x;
     }
 };
-template<int shift, int nbin>
+template <int shift, int nbin>
 struct PreprocMinShift {
     simd16uint16 min16;
     simd16uint16 max16;
@@ -1014,59 +1105,46 @@ struct PreprocMinShift {
         max16.set1(vmax); // vmax inclusive
     }
-    simd16uint16 operator () (simd16uint16 x)  {
+    simd16uint16 operator()(simd16uint16 x) {
         x = x - min16;
         simd16uint16 mask = (x == max(x, max16)) - (x == max16);
         return (x >> shift) | mask;
     }
 };
 /* unbounded versions of the functions */
-void simd_histogram_8_unbounded(
-    const uint16_t *data, int n,
-    int *hist)
-{
+void simd_histogram_8_unbounded(const uint16_t* data, int n, int* hist) {
     PreprocNOP pp;
     simd16uint16 a16 = histogram_8(data, pp, (n & ~15));
     ALIGNED(32) uint16_t a16_tab[16];
     a16.store(a16_tab);
-    for(int i = 0; i < 8; i++) {
+    for (int i = 0; i < 8; i++) {
         hist[i] = a16_tab[i] + a16_tab[i + 8];
     }
-    for(int i = (n & ~15); i < n; i++) {
+    for (int i = (n & ~15); i < n; i++) {
         hist[data[i]]++;
     }
 }
-void simd_histogram_16_unbounded(
-    const uint16_t *data, int n,
-    int *hist)
-{
+void simd_histogram_16_unbounded(const uint16_t* data, int n, int* hist) {
     simd16uint16 a16 = histogram_16(data, PreprocNOP(), (n & ~15));
     ALIGNED(32) uint16_t a16_tab[16];
     a16.store(a16_tab);
-    for(int i = 0; i < 16; i++) {
+    for (int i = 0; i < 16; i++) {
         hist[i] = a16_tab[i];
     }
-    for(int i = (n & ~15); i < n; i++) {
+    for (int i = (n & ~15); i < n; i++) {
         hist[data[i]]++;
     }
 }
 } // anonymous namespace
 /************************************************************
@@ -1074,10 +1152,11 @@ void simd_histogram_16_unbounded(
  ************************************************************/
 void simd_histogram_8(
-    const uint16_t *data, int n,
-    uint16_t min, int shift,
-    int *hist)
-{
+        const uint16_t* data,
+        int n,
+        uint16_t min,
+        int shift,
+        int* hist) {
     if (shift < 0) {
         simd_histogram_8_unbounded(data, n, hist);
         return;
@@ -1085,12 +1164,12 @@ void simd_histogram_8(
     simd16uint16 a16;
-#define DISPATCH(s)  \
-     case s: \
+#define DISPATCH(s)                                                     \
+    case s:                                                             \
         a16 = histogram_8(data, PreprocMinShift<s, 8>(min), (n & ~15)); \
         break
-    switch(shift) {
+    switch (shift) {
         DISPATCH(0);
         DISPATCH(1);
         DISPATCH(2);
@@ -1105,35 +1184,35 @@ void simd_histogram_8(
         DISPATCH(11);
         DISPATCH(12);
         DISPATCH(13);
-    default:
-        FAISS_THROW_FMT("dispatch for shift=%d not instantiated", shift);
+        default:
+            FAISS_THROW_FMT("dispatch for shift=%d not instantiated", shift);
     }
 #undef DISPATCH
     ALIGNED(32) uint16_t a16_tab[16];
     a16.store(a16_tab);
-    for(int i = 0; i < 8; i++) {
+    for (int i = 0; i < 8; i++) {
         hist[i] = a16_tab[i] + a16_tab[i + 8];
     }
     // complete with remaining bins
-    for(int i = (n & ~15); i < n; i++) {
-        if (data[i] < min) continue;
+    for (int i = (n & ~15); i < n; i++) {
+        if (data[i] < min)
+            continue;
         uint16_t v = data[i] - min;
         v >>= shift;
-        if (v < 8) hist[v]++;
+        if (v < 8)
+            hist[v]++;
     }
 }
 void simd_histogram_16(
-    const uint16_t *data, int n,
-    uint16_t min, int shift,
-    int *hist)
-{
+        const uint16_t* data,
+        int n,
+        uint16_t min,
+        int shift,
+        int* hist) {
     if (shift < 0) {
         simd_histogram_16_unbounded(data, n, hist);
         return;
@@ -1141,12 +1220,12 @@ void simd_histogram_16(
     simd16uint16 a16;
-#define DISPATCH(s)  \
-     case s: \
+#define DISPATCH(s)                                                       \
+    case s:                                                               \
         a16 = histogram_16(data, PreprocMinShift<s, 16>(min), (n & ~15)); \
         break
-    switch(shift) {
+    switch (shift) {
         DISPATCH(0);
         DISPATCH(1);
         DISPATCH(2);
@@ -1160,48 +1239,47 @@ void simd_histogram_16(
         DISPATCH(10);
         DISPATCH(11);
         DISPATCH(12);
-    default:
-        FAISS_THROW_FMT("dispatch for shift=%d not instantiated", shift);
+        default:
+            FAISS_THROW_FMT("dispatch for shift=%d not instantiated", shift);
     }
 #undef DISPATCH
     ALIGNED(32) uint16_t a16_tab[16];
     a16.store(a16_tab);
-    for(int i = 0; i < 16; i++) {
+    for (int i = 0; i < 16; i++) {
         hist[i] = a16_tab[i];
     }
-    for(int i = (n & ~15); i < n; i++) {
-        if (data[i] < min) continue;
+    for (int i = (n & ~15); i < n; i++) {
+        if (data[i] < min)
+            continue;
         uint16_t v = data[i] - min;
         v >>= shift;
-        if (v < 16) hist[v]++;
+        if (v < 16)
+            hist[v]++;
     }
 }
 // no AVX2
 #else
 void simd_histogram_16(
-    const uint16_t *data, int n,
-    uint16_t min, int shift,
-    int *hist)
-{
+        const uint16_t* data,
+        int n,
+        uint16_t min,
+        int shift,
+        int* hist) {
     memset(hist, 0, sizeof(*hist) * 16);
     if (shift < 0) {
-        for(size_t i = 0; i < n; i++) {
+        for (size_t i = 0; i < n; i++) {
             hist[data[i]]++;
         }
     } else {
         int vmax0 = std::min((16 << shift) + min, 65536);
         uint16_t vmax = uint16_t(vmax0 - 1 - min);
-        for(size_t i = 0; i < n; i++) {
+        for (size_t i = 0; i < n; i++) {
             uint16_t v = data[i];
             v -= min;
             if (!(v <= vmax))
@@ -1217,40 +1295,37 @@ void simd_histogram_16(
             */
         }
     }
 }
 void simd_histogram_8(
-    const uint16_t *data, int n,
-    uint16_t min, int shift,
-    int *hist)
-{
+        const uint16_t* data,
+        int n,
+        uint16_t min,
+        int shift,
+        int* hist) {
     memset(hist, 0, sizeof(*hist) * 8);
     if (shift < 0) {
-        for(size_t i = 0; i < n; i++) {
+        for (size_t i = 0; i < n; i++) {
             hist[data[i]]++;
         }
     } else {
-        for(size_t i = 0; i < n; i++) {
-            if (data[i] < min) continue;
+        for (size_t i = 0; i < n; i++) {
+            if (data[i] < min)
+                continue;
             uint16_t v = data[i] - min;
             v >>= shift;
-            if (v < 8) hist[v]++;
+            if (v < 8)
+                hist[v]++;
         }
     }
 }
 #endif
 void PartitionStats::reset() {
     memset(this, 0, sizeof(*this));
 }
 PartitionStats partition_stats;
 } // namespace faiss