faiss 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (202) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/faiss/version.rb +1 -1
  4. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  5. data/vendor/faiss/faiss/AutoTune.h +55 -56
  6. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  7. data/vendor/faiss/faiss/Clustering.h +88 -35
  8. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  9. data/vendor/faiss/faiss/IVFlib.h +48 -51
  10. data/vendor/faiss/faiss/Index.cpp +85 -103
  11. data/vendor/faiss/faiss/Index.h +54 -48
  12. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  13. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  14. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  15. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  16. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  17. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  18. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  19. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  20. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  21. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  22. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  23. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  24. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  25. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  26. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  27. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  29. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  30. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  31. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  32. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  33. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  34. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  35. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  36. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  37. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  38. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  39. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  40. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  41. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  42. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  43. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  44. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  45. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  46. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  47. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  48. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  49. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  50. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  51. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  52. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  53. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  54. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  55. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  56. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  57. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  58. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  59. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  60. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  61. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  62. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  63. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  64. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  65. data/vendor/faiss/faiss/IndexShards.h +85 -73
  66. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  67. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  68. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  69. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  70. data/vendor/faiss/faiss/MetricType.h +7 -7
  71. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  72. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  73. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  74. data/vendor/faiss/faiss/clone_index.h +4 -9
  75. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  76. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  77. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  78. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  79. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  80. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  81. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  82. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  83. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  84. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  85. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  86. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  87. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  88. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  89. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  90. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  91. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  92. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  93. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  94. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  95. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  96. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  97. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  98. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  99. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  100. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  101. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  102. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  103. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  104. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  105. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  109. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  110. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  111. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  112. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  113. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  114. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  115. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  116. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  117. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  118. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  119. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  121. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  123. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  124. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  125. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  126. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  127. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  128. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  130. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  131. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  132. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  133. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  134. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  135. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  136. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  137. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  138. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  139. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  141. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  142. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  144. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  145. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  146. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  147. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  148. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  149. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  150. data/vendor/faiss/faiss/impl/io.h +31 -41
  151. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  152. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  153. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  154. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  155. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  156. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  157. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  158. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  159. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  160. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  161. data/vendor/faiss/faiss/index_factory.h +6 -7
  162. data/vendor/faiss/faiss/index_io.h +23 -26
  163. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  164. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  165. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  166. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  167. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  168. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  169. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  170. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  171. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  172. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  173. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  174. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  175. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  176. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  177. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  178. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  179. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  180. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  181. data/vendor/faiss/faiss/utils/distances.h +133 -118
  182. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  183. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  184. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  185. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  186. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  187. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  188. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  189. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  190. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  191. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  192. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  193. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  194. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  195. data/vendor/faiss/faiss/utils/random.h +13 -16
  196. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  197. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  198. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  199. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  200. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  201. data/vendor/faiss/faiss/utils/utils.h +53 -48
  202. metadata +20 -2
@@ -24,13 +24,11 @@
24
24
  #ifndef FAISS_hamming_h
25
25
  #define FAISS_hamming_h
26
26
 
27
-
28
27
  #include <stdint.h>
29
28
 
30
29
  #include <faiss/impl/platform_macros.h>
31
30
  #include <faiss/utils/Heap.h>
32
31
 
33
-
34
32
  /* The Hamming distance type */
35
33
  typedef int32_t hamdis_t;
36
34
 
@@ -42,8 +40,7 @@ namespace faiss {
42
40
 
43
41
  struct RangeSearchResult;
44
42
 
45
- void bitvec_print (const uint8_t * b, size_t d);
46
-
43
+ void bitvec_print(const uint8_t* b, size_t d);
47
44
 
48
45
  /* Functions for casting vectors of regular types to compact bits.
49
46
  They assume proper allocation done beforehand, meaning that b
@@ -52,53 +49,45 @@ void bitvec_print (const uint8_t * b, size_t d);
52
49
  /* Makes an array of bits from the signs of a float array. The length
53
50
  of the output array b is rounded up to byte size (allocate
54
51
  accordingly) */
55
- void fvecs2bitvecs (
56
- const float * x,
57
- uint8_t * b,
58
- size_t d,
59
- size_t n);
60
-
61
- void bitvecs2fvecs (
62
- const uint8_t * b,
63
- float * x,
64
- size_t d,
65
- size_t n);
52
+ void fvecs2bitvecs(const float* x, uint8_t* b, size_t d, size_t n);
66
53
 
54
+ void bitvecs2fvecs(const uint8_t* b, float* x, size_t d, size_t n);
67
55
 
68
- void fvec2bitvec (const float * x, uint8_t * b, size_t d);
56
+ void fvec2bitvec(const float* x, uint8_t* b, size_t d);
69
57
 
70
58
  /** Shuffle the bits from b(i, j) := a(i, order[j])
71
59
  */
72
- void bitvec_shuffle (size_t n, size_t da, size_t db,
73
- const int *order,
74
- const uint8_t *a,
75
- uint8_t *b);
76
-
60
+ void bitvec_shuffle(
61
+ size_t n,
62
+ size_t da,
63
+ size_t db,
64
+ const int* order,
65
+ const uint8_t* a,
66
+ uint8_t* b);
77
67
 
78
68
  /***********************************************
79
69
  * Generic reader/writer for bit strings
80
70
  ***********************************************/
81
71
 
82
-
83
72
  struct BitstringWriter {
84
- uint8_t *code;
73
+ uint8_t* code;
85
74
  size_t code_size;
86
75
  size_t i; // current bit offset
87
76
 
88
77
  // code_size in bytes
89
- BitstringWriter(uint8_t *code, size_t code_size);
78
+ BitstringWriter(uint8_t* code, size_t code_size);
90
79
 
91
80
  // write the nbit low bits of x
92
81
  void write(uint64_t x, int nbit);
93
82
  };
94
83
 
95
84
  struct BitstringReader {
96
- const uint8_t *code;
85
+ const uint8_t* code;
97
86
  size_t code_size;
98
87
  size_t i;
99
88
 
100
89
  // code_size in bytes
101
- BitstringReader(const uint8_t *code, size_t code_size);
90
+ BitstringReader(const uint8_t* code, size_t code_size);
102
91
 
103
92
  // read nbit bits from the code
104
93
  uint64_t read(int nbit);
@@ -108,15 +97,12 @@ struct BitstringReader {
108
97
  * Hamming distance computation functions
109
98
  **************************************************/
110
99
 
111
-
112
-
113
100
  FAISS_API extern size_t hamming_batch_size;
114
101
 
115
102
  inline int popcount64(uint64_t x) {
116
103
  return __builtin_popcountl(x);
117
104
  }
118
105
 
119
-
120
106
  /** Compute a set of Hamming distances between na and nb binary vectors
121
107
  *
122
108
  * @param a size na * nbytespercode
@@ -124,15 +110,13 @@ inline int popcount64(uint64_t x) {
124
110
  * @param nbytespercode should be multiple of 8
125
111
  * @param dis output distances, size na * nb
126
112
  */
127
- void hammings (
128
- const uint8_t * a,
129
- const uint8_t * b,
130
- size_t na, size_t nb,
113
+ void hammings(
114
+ const uint8_t* a,
115
+ const uint8_t* b,
116
+ size_t na,
117
+ size_t nb,
131
118
  size_t nbytespercode,
132
- hamdis_t * dis);
133
-
134
-
135
-
119
+ hamdis_t* dis);
136
120
 
137
121
  /** Return the k smallest Hamming distances for a set of binary query vectors,
138
122
  * using a max heap.
@@ -142,22 +126,22 @@ void hammings (
142
126
  * @param ncodes size of the binary codes (bytes)
143
127
  * @param ordered if != 0: order the results by decreasing distance
144
128
  * (may be bottleneck for k/n > 0.01) */
145
- void hammings_knn_hc (
146
- int_maxheap_array_t * ha,
147
- const uint8_t * a,
148
- const uint8_t * b,
129
+ void hammings_knn_hc(
130
+ int_maxheap_array_t* ha,
131
+ const uint8_t* a,
132
+ const uint8_t* b,
149
133
  size_t nb,
150
134
  size_t ncodes,
151
135
  int ordered);
152
136
 
153
137
  /* Legacy alias to hammings_knn_hc. */
154
- void hammings_knn (
155
- int_maxheap_array_t * ha,
156
- const uint8_t * a,
157
- const uint8_t * b,
158
- size_t nb,
159
- size_t ncodes,
160
- int ordered);
138
+ void hammings_knn(
139
+ int_maxheap_array_t* ha,
140
+ const uint8_t* a,
141
+ const uint8_t* b,
142
+ size_t nb,
143
+ size_t ncodes,
144
+ int ordered);
161
145
 
162
146
  /** Return the k smallest Hamming distances for a set of binary query vectors,
163
147
  * using counting max.
@@ -171,66 +155,59 @@ void hammings_knn (
171
155
  * neighbors
172
156
  * @param labels output ids of the k nearest neighbors to each query vector
173
157
  */
174
- void hammings_knn_mc (
175
- const uint8_t * a,
176
- const uint8_t * b,
177
- size_t na,
178
- size_t nb,
179
- size_t k,
180
- size_t ncodes,
181
- int32_t *distances,
182
- int64_t *labels);
158
+ void hammings_knn_mc(
159
+ const uint8_t* a,
160
+ const uint8_t* b,
161
+ size_t na,
162
+ size_t nb,
163
+ size_t k,
164
+ size_t ncodes,
165
+ int32_t* distances,
166
+ int64_t* labels);
183
167
 
184
168
  /** same as hammings_knn except we are doing a range search with radius */
185
- void hamming_range_search (
186
- const uint8_t * a,
187
- const uint8_t * b,
188
- size_t na,
189
- size_t nb,
190
- int radius,
191
- size_t ncodes,
192
- RangeSearchResult *result);
193
-
169
+ void hamming_range_search(
170
+ const uint8_t* a,
171
+ const uint8_t* b,
172
+ size_t na,
173
+ size_t nb,
174
+ int radius,
175
+ size_t ncodes,
176
+ RangeSearchResult* result);
194
177
 
195
178
  /* Counting the number of matches or of cross-matches (without returning them)
196
179
  For use with function that assume pre-allocated memory */
197
- void hamming_count_thres (
198
- const uint8_t * bs1,
199
- const uint8_t * bs2,
180
+ void hamming_count_thres(
181
+ const uint8_t* bs1,
182
+ const uint8_t* bs2,
200
183
  size_t n1,
201
184
  size_t n2,
202
185
  hamdis_t ht,
203
186
  size_t ncodes,
204
- size_t * nptr);
187
+ size_t* nptr);
205
188
 
206
189
  /* Return all Hamming distances/index passing a thres. Pre-allocation of output
207
190
  is required. Use hamming_count_thres to determine the proper size. */
208
- size_t match_hamming_thres (
209
- const uint8_t * bs1,
210
- const uint8_t * bs2,
191
+ size_t match_hamming_thres(
192
+ const uint8_t* bs1,
193
+ const uint8_t* bs2,
211
194
  size_t n1,
212
195
  size_t n2,
213
196
  hamdis_t ht,
214
197
  size_t ncodes,
215
- int64_t * idx,
216
- hamdis_t * dis);
198
+ int64_t* idx,
199
+ hamdis_t* dis);
217
200
 
218
201
  /* Cross-matching in a set of vectors */
219
- void crosshamming_count_thres (
220
- const uint8_t * dbs,
202
+ void crosshamming_count_thres(
203
+ const uint8_t* dbs,
221
204
  size_t n,
222
205
  hamdis_t ht,
223
206
  size_t ncodes,
224
- size_t * nptr);
225
-
207
+ size_t* nptr);
226
208
 
227
209
  /* compute the Hamming distances between two codewords of nwords*64 bits */
228
- hamdis_t hamming (
229
- const uint64_t * bs1,
230
- const uint64_t * bs2,
231
- size_t nwords);
232
-
233
-
210
+ hamdis_t hamming(const uint64_t* bs1, const uint64_t* bs2, size_t nwords);
234
211
 
235
212
  } // namespace faiss
236
213
 
@@ -5,8 +5,6 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
-
9
-
10
8
  #pragma once
11
9
 
12
10
  #include <climits>
@@ -14,7 +12,6 @@
14
12
 
15
13
  #include <limits>
16
14
 
17
-
18
15
  namespace faiss {
19
16
 
20
17
  /*******************************************************************
@@ -34,8 +31,10 @@ namespace faiss {
34
31
  template <typename T_, typename TI_>
35
32
  struct CMax;
36
33
 
37
- template<typename T> inline T cmin_nextafter(T x);
38
- template<typename T> inline T cmax_nextafter(T x);
34
+ template <typename T>
35
+ inline T cmin_nextafter(T x);
36
+ template <typename T>
37
+ inline T cmax_nextafter(T x);
39
38
 
40
39
  // traits of minheaps = heaps where the minimum value is stored on top
41
40
  // useful to find the *max* values of an array
@@ -44,10 +43,10 @@ struct CMin {
44
43
  typedef T_ T;
45
44
  typedef TI_ TI;
46
45
  typedef CMax<T_, TI_> Crev; // reference to reverse comparison
47
- inline static bool cmp (T a, T b) {
46
+ inline static bool cmp(T a, T b) {
48
47
  return a < b;
49
48
  }
50
- inline static T neutral () {
49
+ inline static T neutral() {
51
50
  return std::numeric_limits<T>::lowest();
52
51
  }
53
52
  static const bool is_max = false;
@@ -57,18 +56,15 @@ struct CMin {
57
56
  }
58
57
  };
59
58
 
60
-
61
-
62
-
63
59
  template <typename T_, typename TI_>
64
60
  struct CMax {
65
61
  typedef T_ T;
66
62
  typedef TI_ TI;
67
63
  typedef CMin<T_, TI_> Crev;
68
- inline static bool cmp (T a, T b) {
64
+ inline static bool cmp(T a, T b) {
69
65
  return a > b;
70
66
  }
71
- inline static T neutral () {
67
+ inline static T neutral() {
72
68
  return std::numeric_limits<T>::max();
73
69
  }
74
70
  static const bool is_max = true;
@@ -77,22 +73,24 @@ struct CMax {
77
73
  }
78
74
  };
79
75
 
80
-
81
- template<> inline float cmin_nextafter<float>(float x) {
76
+ template <>
77
+ inline float cmin_nextafter<float>(float x) {
82
78
  return std::nextafterf(x, -HUGE_VALF);
83
79
  }
84
80
 
85
- template<> inline float cmax_nextafter<float>(float x) {
81
+ template <>
82
+ inline float cmax_nextafter<float>(float x) {
86
83
  return std::nextafterf(x, HUGE_VALF);
87
84
  }
88
85
 
89
- template<> inline uint16_t cmin_nextafter<uint16_t>(uint16_t x) {
86
+ template <>
87
+ inline uint16_t cmin_nextafter<uint16_t>(uint16_t x) {
90
88
  return x - 1;
91
89
  }
92
90
 
93
- template<> inline uint16_t cmax_nextafter<uint16_t>(uint16_t x) {
91
+ template <>
92
+ inline uint16_t cmax_nextafter<uint16_t>(uint16_t x) {
94
93
  return x + 1;
95
94
  }
96
95
 
97
-
98
96
  } // namespace faiss
@@ -7,8 +7,8 @@
7
7
 
8
8
  #include <faiss/utils/partitioning.h>
9
9
 
10
- #include <cmath>
11
10
  #include <cassert>
11
+ #include <cmath>
12
12
 
13
13
  #include <faiss/impl/FaissAssert.h>
14
14
  #include <faiss/utils/AlignedTable.h>
@@ -19,15 +19,13 @@
19
19
 
20
20
  namespace faiss {
21
21
 
22
-
23
22
  /******************************************************************
24
23
  * Internal routines
25
24
  ******************************************************************/
26
25
 
27
-
28
26
  namespace partitioning {
29
27
 
30
- template<typename T>
28
+ template <typename T>
31
29
  T median3(T a, T b, T c) {
32
30
  if (a > b) {
33
31
  std::swap(a, b);
@@ -41,12 +39,12 @@ T median3(T a, T b, T c) {
41
39
  return a;
42
40
  }
43
41
 
44
-
45
- template<class C>
42
+ template <class C>
46
43
  typename C::T sample_threshold_median3(
47
- const typename C::T * vals, int n,
48
- typename C::T thresh_inf, typename C::T thresh_sup
49
- ) {
44
+ const typename C::T* vals,
45
+ int n,
46
+ typename C::T thresh_inf,
47
+ typename C::T thresh_sup) {
50
48
  using T = typename C::T;
51
49
  size_t big_prime = 6700417;
52
50
  T val3[3];
@@ -73,31 +71,34 @@ typename C::T sample_threshold_median3(
73
71
  }
74
72
  }
75
73
 
76
- template<class C>
74
+ template <class C>
77
75
  void count_lt_and_eq(
78
- const typename C::T * vals, size_t n, typename C::T thresh,
79
- size_t & n_lt, size_t & n_eq
80
- ) {
76
+ const typename C::T* vals,
77
+ size_t n,
78
+ typename C::T thresh,
79
+ size_t& n_lt,
80
+ size_t& n_eq) {
81
81
  n_lt = n_eq = 0;
82
82
 
83
- for(size_t i = 0; i < n; i++) {
83
+ for (size_t i = 0; i < n; i++) {
84
84
  typename C::T v = *vals++;
85
- if(C::cmp(thresh, v)) {
85
+ if (C::cmp(thresh, v)) {
86
86
  n_lt++;
87
- } else if(v == thresh) {
87
+ } else if (v == thresh) {
88
88
  n_eq++;
89
89
  }
90
90
  }
91
91
  }
92
92
 
93
-
94
- template<class C>
93
+ template <class C>
95
94
  size_t compress_array(
96
- typename C::T *vals, typename C::TI * ids,
97
- size_t n, typename C::T thresh, size_t n_eq
98
- ) {
95
+ typename C::T* vals,
96
+ typename C::TI* ids,
97
+ size_t n,
98
+ typename C::T thresh,
99
+ size_t n_eq) {
99
100
  size_t wp = 0;
100
- for(size_t i = 0; i < n; i++) {
101
+ for (size_t i = 0; i < n; i++) {
101
102
  if (C::cmp(thresh, vals[i])) {
102
103
  vals[wp] = vals[i];
103
104
  ids[wp] = ids[i];
@@ -113,15 +114,16 @@ size_t compress_array(
113
114
  return wp;
114
115
  }
115
116
 
117
+ #define IFV if (false)
116
118
 
117
- #define IFV if(false)
118
-
119
- template<class C>
119
+ template <class C>
120
120
  typename C::T partition_fuzzy_median3(
121
- typename C::T *vals, typename C::TI * ids, size_t n,
122
- size_t q_min, size_t q_max, size_t * q_out)
123
- {
124
-
121
+ typename C::T* vals,
122
+ typename C::TI* ids,
123
+ size_t n,
124
+ size_t q_min,
125
+ size_t q_max,
126
+ size_t* q_out) {
125
127
  if (q_min == 0) {
126
128
  if (q_out) {
127
129
  *q_out = C::Crev::neutral();
@@ -150,12 +152,19 @@ typename C::T partition_fuzzy_median3(
150
152
  size_t n_eq = 0, n_lt = 0;
151
153
  size_t q = 0;
152
154
 
153
- for(int it = 0; it < 200; it++) {
155
+ for (int it = 0; it < 200; it++) {
154
156
  count_lt_and_eq<C>(vals, n, thresh, n_lt, n_eq);
155
157
 
156
- IFV printf(" thresh=%g [%g %g] n_lt=%ld n_eq=%ld, q=%ld:%ld/%ld\n",
157
- float(thresh), float(thresh_inf), float(thresh_sup),
158
- long(n_lt), long(n_eq), long(q_min), long(q_max), long(n));
158
+ IFV printf(
159
+ " thresh=%g [%g %g] n_lt=%ld n_eq=%ld, q=%ld:%ld/%ld\n",
160
+ float(thresh),
161
+ float(thresh_inf),
162
+ float(thresh_sup),
163
+ long(n_lt),
164
+ long(n_eq),
165
+ long(q_min),
166
+ long(q_max),
167
+ long(n));
159
168
 
160
169
  if (n_lt <= q_min) {
161
170
  if (n_lt + n_eq >= q_min) {
@@ -172,8 +181,12 @@ typename C::T partition_fuzzy_median3(
172
181
  }
173
182
 
174
183
  // FIXME avoid a second pass over the array to sample the threshold
175
- IFV printf(" sample thresh in [%g %g]\n", float(thresh_inf), float(thresh_sup));
176
- T new_thresh = sample_threshold_median3<C>(vals, n, thresh_inf, thresh_sup);
184
+ IFV printf(
185
+ " sample thresh in [%g %g]\n",
186
+ float(thresh_inf),
187
+ float(thresh_sup));
188
+ T new_thresh =
189
+ sample_threshold_median3<C>(vals, n, thresh_inf, thresh_sup);
177
190
  if (new_thresh == thresh_inf) {
178
191
  // then there is nothing between thresh_inf and thresh_sup
179
192
  break;
@@ -203,25 +216,19 @@ typename C::T partition_fuzzy_median3(
203
216
  return thresh;
204
217
  }
205
218
 
206
-
207
219
  } // namespace partitioning
208
220
 
209
-
210
-
211
221
  /******************************************************************
212
222
  * SIMD routines when vals is an aligned array of uint16_t
213
223
  ******************************************************************/
214
224
 
215
-
216
225
  namespace simd_partitioning {
217
226
 
218
-
219
-
220
227
  void find_minimax(
221
- const uint16_t * vals, size_t n,
222
- uint16_t & smin, uint16_t & smax
223
- ) {
224
-
228
+ const uint16_t* vals,
229
+ size_t n,
230
+ uint16_t& smin,
231
+ uint16_t& smax) {
225
232
  simd16uint16 vmin(0xffff), vmax(0);
226
233
  for (size_t i = 0; i + 15 < n; i += 16) {
227
234
  simd16uint16 v(vals + i);
@@ -235,22 +242,20 @@ void find_minimax(
235
242
 
236
243
  smin = tab32[0], smax = tab32[16];
237
244
 
238
- for(int i = 1; i < 16; i++) {
245
+ for (int i = 1; i < 16; i++) {
239
246
  smin = std::min(smin, tab32[i]);
240
247
  smax = std::max(smax, tab32[i + 16]);
241
248
  }
242
249
 
243
250
  // missing values
244
- for(size_t i = (n & ~15); i < n; i++) {
251
+ for (size_t i = (n & ~15); i < n; i++) {
245
252
  smin = std::min(smin, vals[i]);
246
253
  smax = std::max(smax, vals[i]);
247
254
  }
248
-
249
255
  }
250
256
 
251
-
252
257
  // max func differentiates between CMin and CMax (keep lowest or largest)
253
- template<class C>
258
+ template <class C>
254
259
  simd16uint16 max_func(simd16uint16 v, simd16uint16 thr16) {
255
260
  constexpr bool is_max = C::is_max;
256
261
  if (is_max) {
@@ -260,11 +265,13 @@ simd16uint16 max_func(simd16uint16 v, simd16uint16 thr16) {
260
265
  }
261
266
  }
262
267
 
263
- template<class C>
268
+ template <class C>
264
269
  void count_lt_and_eq(
265
- const uint16_t * vals, int n, uint16_t thresh,
266
- size_t & n_lt, size_t & n_eq
267
- ) {
270
+ const uint16_t* vals,
271
+ int n,
272
+ uint16_t thresh,
273
+ size_t& n_lt,
274
+ size_t& n_eq) {
268
275
  n_lt = n_eq = 0;
269
276
  simd16uint16 thr16(thresh);
270
277
 
@@ -283,24 +290,25 @@ void count_lt_and_eq(
283
290
  n_lt += 16 - i_ge;
284
291
  }
285
292
 
286
- for(size_t i = n1 * 16; i < n; i++) {
293
+ for (size_t i = n1 * 16; i < n; i++) {
287
294
  uint16_t v = *vals++;
288
- if(C::cmp(thresh, v)) {
295
+ if (C::cmp(thresh, v)) {
289
296
  n_lt++;
290
- } else if(v == thresh) {
297
+ } else if (v == thresh) {
291
298
  n_eq++;
292
299
  }
293
300
  }
294
301
  }
295
302
 
296
-
297
-
298
303
  /* compress separated values and ids table, keeping all values < thresh and at
299
304
  * most n_eq equal values */
300
- template<class C>
305
+ template <class C>
301
306
  int simd_compress_array(
302
- uint16_t *vals, typename C::TI * ids, size_t n, uint16_t thresh, int n_eq
303
- ) {
307
+ uint16_t* vals,
308
+ typename C::TI* ids,
309
+ size_t n,
310
+ uint16_t thresh,
311
+ int n_eq) {
304
312
  simd16uint16 thr16(thresh);
305
313
  simd16uint16 mixmask(0xff00);
306
314
 
@@ -313,13 +321,15 @@ int simd_compress_array(
313
321
  simd16uint16 max2 = max_func<C>(v, thr16);
314
322
  simd16uint16 gemask = (v == max2);
315
323
  simd16uint16 eqmask = (v == thr16);
316
- uint32_t bits = get_MSBs(blendv(
317
- simd32uint8(eqmask), simd32uint8(gemask), simd32uint8(mixmask)));
324
+ uint32_t bits = get_MSBs(
325
+ blendv(simd32uint8(eqmask),
326
+ simd32uint8(gemask),
327
+ simd32uint8(mixmask)));
318
328
  bits ^= 0xAAAAAAAA;
319
329
  // bit 2*i : eq
320
330
  // bit 2*i + 1 : lt
321
331
 
322
- while(bits) {
332
+ while (bits) {
323
333
  int j = __builtin_ctz(bits) & (~1);
324
334
  bool is_eq = (bits >> j) & 1;
325
335
  bool is_lt = (bits >> j) & 2;
@@ -330,7 +340,7 @@ int simd_compress_array(
330
340
  vals[wp] = vals[i0 + j];
331
341
  ids[wp] = ids[i0 + j];
332
342
  wp++;
333
- } else if(is_eq && n_eq > 0) {
343
+ } else if (is_eq && n_eq > 0) {
334
344
  vals[wp] = vals[i0 + j];
335
345
  ids[wp] = ids[i0 + j];
336
346
  wp++;
@@ -346,7 +356,7 @@ int simd_compress_array(
346
356
  simd16uint16 gemask = (v == max2);
347
357
  uint32_t bits = ~get_MSBs(simd32uint8(gemask));
348
358
 
349
- while(bits) {
359
+ while (bits) {
350
360
  int j = __builtin_ctz(bits);
351
361
  bits &= ~(3 << j);
352
362
  j >>= 1;
@@ -358,7 +368,7 @@ int simd_compress_array(
358
368
  }
359
369
 
360
370
  // end with scalar
361
- for(int i = (n & ~15); i < n; i++) {
371
+ for (int i = (n & ~15); i < n; i++) {
362
372
  if (C::cmp(thresh, vals[i])) {
363
373
  vals[wp] = vals[i];
364
374
  ids[wp] = ids[i];
@@ -376,29 +386,28 @@ int simd_compress_array(
376
386
 
377
387
  // #define MICRO_BENCHMARK
378
388
 
379
- static uint64_t get_cy () {
380
- #ifdef MICRO_BENCHMARK
389
+ static uint64_t get_cy() {
390
+ #ifdef MICRO_BENCHMARK
381
391
  uint32_t high, low;
382
- asm volatile("rdtsc \n\t"
383
- : "=a" (low),
384
- "=d" (high));
392
+ asm volatile("rdtsc \n\t" : "=a"(low), "=d"(high));
385
393
  return ((uint64_t)high << 32) | (low);
386
394
  #else
387
395
  return 0;
388
396
  #endif
389
397
  }
390
398
 
399
+ #define IFV if (false)
391
400
 
392
-
393
- #define IFV if(false)
394
-
395
- template<class C>
401
+ template <class C>
396
402
  uint16_t simd_partition_fuzzy_with_bounds(
397
- uint16_t *vals, typename C::TI * ids, size_t n,
398
- size_t q_min, size_t q_max, size_t * q_out,
399
- uint16_t s0i, uint16_t s1i)
400
- {
401
-
403
+ uint16_t* vals,
404
+ typename C::TI* ids,
405
+ size_t n,
406
+ size_t q_min,
407
+ size_t q_max,
408
+ size_t* q_out,
409
+ uint16_t s0i,
410
+ uint16_t s1i) {
402
411
  if (q_min == 0) {
403
412
  if (q_out) {
404
413
  *q_out = 0;
@@ -428,13 +437,21 @@ uint16_t simd_partition_fuzzy_with_bounds(
428
437
  size_t n_eq = 0, n_lt = 0;
429
438
  size_t q = 0;
430
439
 
431
- for(int it = 0; it < 200; it++) {
440
+ for (int it = 0; it < 200; it++) {
432
441
  // while(s0 + 1 < s1) {
433
442
  thresh = (s0 + s1) / 2;
434
443
  count_lt_and_eq<C>(vals, n, thresh, n_lt, n_eq);
435
444
 
436
- IFV printf(" [%ld %ld] thresh=%d n_lt=%ld n_eq=%ld, q=%ld:%ld/%ld\n",
437
- s0, s1, thresh, n_lt, n_eq, q_min, q_max, n);
445
+ IFV printf(
446
+ " [%ld %ld] thresh=%d n_lt=%ld n_eq=%ld, q=%ld:%ld/%ld\n",
447
+ s0,
448
+ s1,
449
+ thresh,
450
+ n_lt,
451
+ n_eq,
452
+ q_min,
453
+ q_max,
454
+ n);
438
455
  if (n_lt <= q_min) {
439
456
  if (n_lt + n_eq >= q_min) {
440
457
  q = q_min;
@@ -456,7 +473,6 @@ uint16_t simd_partition_fuzzy_with_bounds(
456
473
  s0 = thresh;
457
474
  }
458
475
  }
459
-
460
476
  }
461
477
 
462
478
  uint64_t t1 = get_cy();
@@ -495,14 +511,16 @@ uint16_t simd_partition_fuzzy_with_bounds(
495
511
  return thresh;
496
512
  }
497
513
 
498
-
499
- template<class C>
514
+ template <class C>
500
515
  uint16_t simd_partition_fuzzy_with_bounds_histogram(
501
- uint16_t *vals, typename C::TI * ids, size_t n,
502
- size_t q_min, size_t q_max, size_t * q_out,
503
- uint16_t s0i, uint16_t s1i)
504
- {
505
-
516
+ uint16_t* vals,
517
+ typename C::TI* ids,
518
+ size_t n,
519
+ size_t q_min,
520
+ size_t q_max,
521
+ size_t* q_out,
522
+ uint16_t s0i,
523
+ uint16_t s1i) {
506
524
  if (q_min == 0) {
507
525
  if (q_out) {
508
526
  *q_out = 0;
@@ -522,11 +540,17 @@ uint16_t simd_partition_fuzzy_with_bounds_histogram(
522
540
  return s0i;
523
541
  }
524
542
 
525
- IFV printf("partition fuzzy, q=%ld:%ld / %ld, bounds=%d %d\n",
526
- q_min, q_max, n, s0i, s1i);
543
+ IFV printf(
544
+ "partition fuzzy, q=%ld:%ld / %ld, bounds=%d %d\n",
545
+ q_min,
546
+ q_max,
547
+ n,
548
+ s0i,
549
+ s1i);
527
550
 
528
551
  if (!C::is_max) {
529
- IFV printf("revert due to CMin, q_min:q_max -> %ld:%ld\n", q_min, q_max);
552
+ IFV printf(
553
+ "revert due to CMin, q_min:q_max -> %ld:%ld\n", q_min, q_max);
530
554
  q_min = n - q_min;
531
555
  q_max = n - q_max;
532
556
  }
@@ -537,31 +561,39 @@ uint16_t simd_partition_fuzzy_with_bounds_histogram(
537
561
  size_t n_lt = 0, n_gt = 0;
538
562
 
539
563
  // output of loop:
540
- int thresh; // final threshold
541
- uint64_t tot_eq = 0; // total nb of equal values
542
- uint64_t n_eq = 0; // nb of equal values to keep
543
- size_t q; // final quantile
564
+ int thresh; // final threshold
565
+ uint64_t tot_eq = 0; // total nb of equal values
566
+ uint64_t n_eq = 0; // nb of equal values to keep
567
+ size_t q; // final quantile
544
568
 
545
569
  // buffer for the histograms
546
570
  int hist[16];
547
571
 
548
- for(int it = 0; it < 20; it++) {
572
+ for (int it = 0; it < 20; it++) {
549
573
  // otherwise we would be done already
550
574
 
551
575
  int shift = 0;
552
576
 
553
- IFV printf(" it %d bounds: %d %d n_lt=%ld n_gt=%ld\n",
554
- it, s0, s1, n_lt, n_gt);
577
+ IFV printf(
578
+ " it %d bounds: %d %d n_lt=%ld n_gt=%ld\n",
579
+ it,
580
+ s0,
581
+ s1,
582
+ n_lt,
583
+ n_gt);
555
584
 
556
585
  int maxval = s1 - s0;
557
586
 
558
- while(maxval > 15) {
587
+ while (maxval > 15) {
559
588
  shift++;
560
589
  maxval >>= 1;
561
590
  }
562
591
 
563
- IFV printf(" histogram shift %d maxval %d ?= %d\n",
564
- shift, maxval, int((s1 - s0) >> shift));
592
+ IFV printf(
593
+ " histogram shift %d maxval %d ?= %d\n",
594
+ shift,
595
+ maxval,
596
+ int((s1 - s0) >> shift));
565
597
 
566
598
  if (maxval > 7) {
567
599
  simd_histogram_16(vals, n, s0, shift, hist);
@@ -571,7 +603,7 @@ uint16_t simd_partition_fuzzy_with_bounds_histogram(
571
603
  IFV {
572
604
  int sum = n_lt + n_gt;
573
605
  printf(" n_lt=%ld hist=[", n_lt);
574
- for(int i = 0; i <= maxval; i++) {
606
+ for (int i = 0; i <= maxval; i++) {
575
607
  printf("%d ", hist[i]);
576
608
  sum += hist[i];
577
609
  }
@@ -597,7 +629,12 @@ uint16_t simd_partition_fuzzy_with_bounds_histogram(
597
629
  assert(!"not implemented");
598
630
  }
599
631
 
600
- IFV printf(" new bin: s0=%d s1=%d n_lt=%ld n_gt=%ld\n", s0, s1, n_lt, n_gt);
632
+ IFV printf(
633
+ " new bin: s0=%d s1=%d n_lt=%ld n_gt=%ld\n",
634
+ s0,
635
+ s1,
636
+ n_lt,
637
+ n_gt);
601
638
 
602
639
  if (s1 > s0) {
603
640
  if (n_lt >= q_min && q_max >= n_lt) {
@@ -628,7 +665,7 @@ uint16_t simd_partition_fuzzy_with_bounds_histogram(
628
665
 
629
666
  if (!C::is_max) {
630
667
  if (n_eq == 0) {
631
- thresh --;
668
+ thresh--;
632
669
  } else {
633
670
  // thresh unchanged
634
671
  n_eq = tot_eq - n_eq;
@@ -647,14 +684,14 @@ uint16_t simd_partition_fuzzy_with_bounds_histogram(
647
684
  return thresh;
648
685
  }
649
686
 
650
-
651
-
652
- template<class C>
687
+ template <class C>
653
688
  uint16_t simd_partition_fuzzy(
654
- uint16_t *vals, typename C::TI * ids, size_t n,
655
- size_t q_min, size_t q_max, size_t * q_out
656
- ) {
657
-
689
+ uint16_t* vals,
690
+ typename C::TI* ids,
691
+ size_t n,
692
+ size_t q_min,
693
+ size_t q_max,
694
+ size_t* q_out) {
658
695
  assert(is_aligned_pointer(vals));
659
696
 
660
697
  uint16_t s0i, s1i;
@@ -662,14 +699,15 @@ uint16_t simd_partition_fuzzy(
662
699
  // QSelect_stats.t0 += get_cy() - t0;
663
700
 
664
701
  return simd_partition_fuzzy_with_bounds<C>(
665
- vals, ids, n, q_min, q_max, q_out, s0i, s1i);
702
+ vals, ids, n, q_min, q_max, q_out, s0i, s1i);
666
703
  }
667
704
 
668
-
669
-
670
- template<class C>
671
- uint16_t simd_partition(uint16_t *vals, typename C::TI * ids, size_t n, size_t q) {
672
-
705
+ template <class C>
706
+ uint16_t simd_partition(
707
+ uint16_t* vals,
708
+ typename C::TI* ids,
709
+ size_t n,
710
+ size_t q) {
673
711
  assert(is_aligned_pointer(vals));
674
712
 
675
713
  if (q == 0) {
@@ -683,72 +721,97 @@ uint16_t simd_partition(uint16_t *vals, typename C::TI * ids, size_t n, size_t q
683
721
  find_minimax(vals, n, s0i, s1i);
684
722
 
685
723
  return simd_partition_fuzzy_with_bounds<C>(
686
- vals, ids, n, q, q, nullptr, s0i, s1i);
724
+ vals, ids, n, q, q, nullptr, s0i, s1i);
687
725
  }
688
726
 
689
- template<class C>
727
+ template <class C>
690
728
  uint16_t simd_partition_with_bounds(
691
- uint16_t *vals, typename C::TI * ids, size_t n, size_t q,
692
- uint16_t s0i, uint16_t s1i)
693
- {
729
+ uint16_t* vals,
730
+ typename C::TI* ids,
731
+ size_t n,
732
+ size_t q,
733
+ uint16_t s0i,
734
+ uint16_t s1i) {
694
735
  return simd_partition_fuzzy_with_bounds<C>(
695
- vals, ids, n, q, q, nullptr, s0i, s1i);
736
+ vals, ids, n, q, q, nullptr, s0i, s1i);
696
737
  }
697
738
 
698
739
  } // namespace simd_partitioning
699
740
 
700
-
701
741
  /******************************************************************
702
742
  * Driver routine
703
743
  ******************************************************************/
704
744
 
705
-
706
- template<class C>
745
+ template <class C>
707
746
  typename C::T partition_fuzzy(
708
- typename C::T *vals, typename C::TI * ids, size_t n,
709
- size_t q_min, size_t q_max, size_t * q_out)
710
- {
747
+ typename C::T* vals,
748
+ typename C::TI* ids,
749
+ size_t n,
750
+ size_t q_min,
751
+ size_t q_max,
752
+ size_t* q_out) {
711
753
  // the code below compiles and runs without AVX2 but it's slower than
712
754
  // the scalar implementation
713
755
  #ifdef __AVX2__
714
756
  constexpr bool is_uint16 = std::is_same<typename C::T, uint16_t>::value;
715
757
  if (is_uint16 && is_aligned_pointer(vals)) {
716
758
  return simd_partitioning::simd_partition_fuzzy<C>(
717
- (uint16_t*)vals, ids, n, q_min, q_max, q_out);
759
+ (uint16_t*)vals, ids, n, q_min, q_max, q_out);
718
760
  }
719
761
  #endif
720
762
  return partitioning::partition_fuzzy_median3<C>(
721
- vals, ids, n, q_min, q_max, q_out);
763
+ vals, ids, n, q_min, q_max, q_out);
722
764
  }
723
765
 
724
-
725
766
  // explicit template instanciations
726
767
 
727
- template float partition_fuzzy<CMin<float, int64_t>> (
728
- float *vals, int64_t * ids, size_t n,
729
- size_t q_min, size_t q_max, size_t * q_out);
730
-
731
- template float partition_fuzzy<CMax<float, int64_t>> (
732
- float *vals, int64_t * ids, size_t n,
733
- size_t q_min, size_t q_max, size_t * q_out);
734
-
735
- template uint16_t partition_fuzzy<CMin<uint16_t, int64_t>> (
736
- uint16_t *vals, int64_t * ids, size_t n,
737
- size_t q_min, size_t q_max, size_t * q_out);
738
-
739
- template uint16_t partition_fuzzy<CMax<uint16_t, int64_t>> (
740
- uint16_t *vals, int64_t * ids, size_t n,
741
- size_t q_min, size_t q_max, size_t * q_out);
742
-
743
- template uint16_t partition_fuzzy<CMin<uint16_t, int>> (
744
- uint16_t *vals, int * ids, size_t n,
745
- size_t q_min, size_t q_max, size_t * q_out);
746
-
747
- template uint16_t partition_fuzzy<CMax<uint16_t, int>> (
748
- uint16_t *vals, int * ids, size_t n,
749
- size_t q_min, size_t q_max, size_t * q_out);
750
-
751
-
768
+ template float partition_fuzzy<CMin<float, int64_t>>(
769
+ float* vals,
770
+ int64_t* ids,
771
+ size_t n,
772
+ size_t q_min,
773
+ size_t q_max,
774
+ size_t* q_out);
775
+
776
+ template float partition_fuzzy<CMax<float, int64_t>>(
777
+ float* vals,
778
+ int64_t* ids,
779
+ size_t n,
780
+ size_t q_min,
781
+ size_t q_max,
782
+ size_t* q_out);
783
+
784
+ template uint16_t partition_fuzzy<CMin<uint16_t, int64_t>>(
785
+ uint16_t* vals,
786
+ int64_t* ids,
787
+ size_t n,
788
+ size_t q_min,
789
+ size_t q_max,
790
+ size_t* q_out);
791
+
792
+ template uint16_t partition_fuzzy<CMax<uint16_t, int64_t>>(
793
+ uint16_t* vals,
794
+ int64_t* ids,
795
+ size_t n,
796
+ size_t q_min,
797
+ size_t q_max,
798
+ size_t* q_out);
799
+
800
+ template uint16_t partition_fuzzy<CMin<uint16_t, int>>(
801
+ uint16_t* vals,
802
+ int* ids,
803
+ size_t n,
804
+ size_t q_min,
805
+ size_t q_max,
806
+ size_t* q_out);
807
+
808
+ template uint16_t partition_fuzzy<CMax<uint16_t, int>>(
809
+ uint16_t* vals,
810
+ int* ids,
811
+ size_t n,
812
+ size_t q_min,
813
+ size_t q_max,
814
+ size_t* q_out);
752
815
 
753
816
  /******************************************************************
754
817
  * Histogram subroutines
@@ -758,7 +821,7 @@ template uint16_t partition_fuzzy<CMax<uint16_t, int>> (
758
821
  /// FIXME when MSB of uint16 is set
759
822
  // this code does not compile properly with GCC 7.4.0
760
823
 
761
- namespace {
824
+ namespace {
762
825
 
763
826
  /************************************************************
764
827
  * 8 bins
@@ -773,7 +836,6 @@ simd32uint8 accu4to8(simd16uint16 a4) {
773
836
  return simd32uint8(_mm256_hadd_epi16(a8_0.i, a8_1.i));
774
837
  }
775
838
 
776
-
777
839
  simd16uint16 accu8to16(simd32uint8 a8) {
778
840
  simd16uint16 mask8(0x00ff);
779
841
 
@@ -783,27 +845,53 @@ simd16uint16 accu8to16(simd32uint8 a8) {
783
845
  return simd16uint16(_mm256_hadd_epi16(a8_0.i, a8_1.i));
784
846
  }
785
847
 
786
-
787
848
  static const simd32uint8 shifts(_mm256_setr_epi8(
788
- 1, 16, 0, 0, 4, 64, 0, 0,
789
- 0, 0, 1, 16, 0, 0, 4, 64,
790
- 1, 16, 0, 0, 4, 64, 0, 0,
791
- 0, 0, 1, 16, 0, 0, 4, 64
792
- ));
849
+ 1,
850
+ 16,
851
+ 0,
852
+ 0,
853
+ 4,
854
+ 64,
855
+ 0,
856
+ 0,
857
+ 0,
858
+ 0,
859
+ 1,
860
+ 16,
861
+ 0,
862
+ 0,
863
+ 4,
864
+ 64,
865
+ 1,
866
+ 16,
867
+ 0,
868
+ 0,
869
+ 4,
870
+ 64,
871
+ 0,
872
+ 0,
873
+ 0,
874
+ 0,
875
+ 1,
876
+ 16,
877
+ 0,
878
+ 0,
879
+ 4,
880
+ 64));
793
881
 
794
882
  // 2-bit accumulator: we can add only up to 3 elements
795
883
  // on output we return 2*4-bit results
796
884
  // preproc returns either an index in 0..7 or 0xffff
797
885
  // that yeilds a 0 when used in the table look-up
798
- template<int N, class Preproc>
886
+ template <int N, class Preproc>
799
887
  void compute_accu2(
800
- const uint16_t * & data,
801
- Preproc & pp,
802
- simd16uint16 & a4lo, simd16uint16 & a4hi
803
- ) {
888
+ const uint16_t*& data,
889
+ Preproc& pp,
890
+ simd16uint16& a4lo,
891
+ simd16uint16& a4hi) {
804
892
  simd16uint16 mask2(0x3333);
805
893
  simd16uint16 a2((uint16_t)0); // 2-bit accu
806
- for (int j = 0; j < N; j ++) {
894
+ for (int j = 0; j < N; j++) {
807
895
  simd16uint16 v(data);
808
896
  data += 16;
809
897
  v = pp(v);
@@ -815,34 +903,30 @@ void compute_accu2(
815
903
  a4hi += (a2 >> 2) & mask2;
816
904
  }
817
905
 
818
-
819
- template<class Preproc>
820
- simd16uint16 histogram_8(
821
- const uint16_t * data, Preproc pp,
822
- size_t n_in) {
823
-
824
- assert (n_in % 16 == 0);
906
+ template <class Preproc>
907
+ simd16uint16 histogram_8(const uint16_t* data, Preproc pp, size_t n_in) {
908
+ assert(n_in % 16 == 0);
825
909
  int n = n_in / 16;
826
910
 
827
911
  simd32uint8 a8lo(0);
828
912
  simd32uint8 a8hi(0);
829
913
 
830
- for(int i0 = 0; i0 < n; i0 += 15) {
831
- simd16uint16 a4lo(0); // 4-bit accus
914
+ for (int i0 = 0; i0 < n; i0 += 15) {
915
+ simd16uint16 a4lo(0); // 4-bit accus
832
916
  simd16uint16 a4hi(0);
833
917
 
834
918
  int i1 = std::min(i0 + 15, n);
835
919
  int i;
836
- for(i = i0; i + 2 < i1; i += 3) {
920
+ for (i = i0; i + 2 < i1; i += 3) {
837
921
  compute_accu2<3>(data, pp, a4lo, a4hi); // adds 3 max
838
922
  }
839
923
  switch (i1 - i) {
840
- case 2:
841
- compute_accu2<2>(data, pp, a4lo, a4hi);
842
- break;
843
- case 1:
844
- compute_accu2<1>(data, pp, a4lo, a4hi);
845
- break;
924
+ case 2:
925
+ compute_accu2<2>(data, pp, a4lo, a4hi);
926
+ break;
927
+ case 1:
928
+ compute_accu2<1>(data, pp, a4lo, a4hi);
929
+ break;
846
930
  }
847
931
 
848
932
  a8lo += accu4to8(a4lo);
@@ -859,50 +943,72 @@ simd16uint16 histogram_8(
859
943
  return a16;
860
944
  }
861
945
 
862
-
863
946
  /************************************************************
864
947
  * 16 bins
865
948
  ************************************************************/
866
949
 
867
-
868
-
869
950
  static const simd32uint8 shifts2(_mm256_setr_epi8(
870
- 1, 2, 4, 8, 16, 32, 64, (char)128,
871
- 1, 2, 4, 8, 16, 32, 64, (char)128,
872
- 1, 2, 4, 8, 16, 32, 64, (char)128,
873
- 1, 2, 4, 8, 16, 32, 64, (char)128
874
- ));
875
-
876
-
877
- simd32uint8 shiftr_16(simd32uint8 x, int n)
878
- {
951
+ 1,
952
+ 2,
953
+ 4,
954
+ 8,
955
+ 16,
956
+ 32,
957
+ 64,
958
+ (char)128,
959
+ 1,
960
+ 2,
961
+ 4,
962
+ 8,
963
+ 16,
964
+ 32,
965
+ 64,
966
+ (char)128,
967
+ 1,
968
+ 2,
969
+ 4,
970
+ 8,
971
+ 16,
972
+ 32,
973
+ 64,
974
+ (char)128,
975
+ 1,
976
+ 2,
977
+ 4,
978
+ 8,
979
+ 16,
980
+ 32,
981
+ 64,
982
+ (char)128));
983
+
984
+ simd32uint8 shiftr_16(simd32uint8 x, int n) {
879
985
  return simd32uint8(simd16uint16(x) >> n);
880
986
  }
881
987
 
882
-
883
988
  inline simd32uint8 combine_2x2(simd32uint8 a, simd32uint8 b) {
884
-
885
989
  __m256i a1b0 = _mm256_permute2f128_si256(a.i, b.i, 0x21);
886
990
  __m256i a0b1 = _mm256_blend_epi32(a.i, b.i, 0xF0);
887
991
 
888
992
  return simd32uint8(a1b0) + simd32uint8(a0b1);
889
993
  }
890
994
 
891
-
892
995
  // 2-bit accumulator: we can add only up to 3 elements
893
996
  // on output we return 2*4-bit results
894
- template<int N, class Preproc>
997
+ template <int N, class Preproc>
895
998
  void compute_accu2_16(
896
- const uint16_t * & data, Preproc pp,
897
- simd32uint8 & a4_0, simd32uint8 & a4_1,
898
- simd32uint8 & a4_2, simd32uint8 & a4_3
899
- ) {
999
+ const uint16_t*& data,
1000
+ Preproc pp,
1001
+ simd32uint8& a4_0,
1002
+ simd32uint8& a4_1,
1003
+ simd32uint8& a4_2,
1004
+ simd32uint8& a4_3) {
900
1005
  simd32uint8 mask1(0x55);
901
1006
  simd32uint8 a2_0; // 2-bit accu
902
1007
  simd32uint8 a2_1; // 2-bit accu
903
- a2_0.clear(); a2_1.clear();
1008
+ a2_0.clear();
1009
+ a2_1.clear();
904
1010
 
905
- for (int j = 0; j < N; j ++) {
1011
+ for (int j = 0; j < N; j++) {
906
1012
  simd16uint16 v(data);
907
1013
  data += 16;
908
1014
  v = pp(v);
@@ -925,38 +1031,27 @@ void compute_accu2_16(
925
1031
  a4_1 += a2_1 & mask2;
926
1032
  a4_2 += shiftr_16(a2_0, 2) & mask2;
927
1033
  a4_3 += shiftr_16(a2_1, 2) & mask2;
928
-
929
1034
  }
930
1035
 
931
-
932
1036
  simd32uint8 accu4to8_2(simd32uint8 a4_0, simd32uint8 a4_1) {
933
1037
  simd32uint8 mask4(0x0f);
934
1038
 
935
- simd32uint8 a8_0 = combine_2x2(
936
- a4_0 & mask4,
937
- shiftr_16(a4_0, 4) & mask4
938
- );
1039
+ simd32uint8 a8_0 = combine_2x2(a4_0 & mask4, shiftr_16(a4_0, 4) & mask4);
939
1040
 
940
- simd32uint8 a8_1 = combine_2x2(
941
- a4_1 & mask4,
942
- shiftr_16(a4_1, 4) & mask4
943
- );
1041
+ simd32uint8 a8_1 = combine_2x2(a4_1 & mask4, shiftr_16(a4_1, 4) & mask4);
944
1042
 
945
1043
  return simd32uint8(_mm256_hadd_epi16(a8_0.i, a8_1.i));
946
1044
  }
947
1045
 
948
-
949
-
950
- template<class Preproc>
951
- simd16uint16 histogram_16(const uint16_t * data, Preproc pp, size_t n_in) {
952
-
953
- assert (n_in % 16 == 0);
1046
+ template <class Preproc>
1047
+ simd16uint16 histogram_16(const uint16_t* data, Preproc pp, size_t n_in) {
1048
+ assert(n_in % 16 == 0);
954
1049
  int n = n_in / 16;
955
1050
 
956
1051
  simd32uint8 a8lo((uint8_t)0);
957
1052
  simd32uint8 a8hi((uint8_t)0);
958
1053
 
959
- for(int i0 = 0; i0 < n; i0 += 7) {
1054
+ for (int i0 = 0; i0 < n; i0 += 7) {
960
1055
  simd32uint8 a4_0(0); // 0, 4, 8, 12
961
1056
  simd32uint8 a4_1(0); // 1, 5, 9, 13
962
1057
  simd32uint8 a4_2(0); // 2, 6, 10, 14
@@ -964,16 +1059,16 @@ simd16uint16 histogram_16(const uint16_t * data, Preproc pp, size_t n_in) {
964
1059
 
965
1060
  int i1 = std::min(i0 + 7, n);
966
1061
  int i;
967
- for(i = i0; i + 2 < i1; i += 3) {
1062
+ for (i = i0; i + 2 < i1; i += 3) {
968
1063
  compute_accu2_16<3>(data, pp, a4_0, a4_1, a4_2, a4_3);
969
1064
  }
970
1065
  switch (i1 - i) {
971
- case 2:
972
- compute_accu2_16<2>(data, pp, a4_0, a4_1, a4_2, a4_3);
973
- break;
974
- case 1:
975
- compute_accu2_16<1>(data, pp, a4_0, a4_1, a4_2, a4_3);
976
- break;
1066
+ case 2:
1067
+ compute_accu2_16<2>(data, pp, a4_0, a4_1, a4_2, a4_3);
1068
+ break;
1069
+ case 1:
1070
+ compute_accu2_16<1>(data, pp, a4_0, a4_1, a4_2, a4_3);
1071
+ break;
977
1072
  }
978
1073
 
979
1074
  a8lo += accu4to8_2(a4_0, a4_1);
@@ -986,23 +1081,19 @@ simd16uint16 histogram_16(const uint16_t * data, Preproc pp, size_t n_in) {
986
1081
 
987
1082
  simd16uint16 a16 = simd16uint16(_mm256_hadd_epi16(a16lo.i, a16hi.i));
988
1083
 
989
- __m256i perm32 = _mm256_setr_epi32(
990
- 0, 2, 4, 6, 1, 3, 5, 7
991
- );
1084
+ __m256i perm32 = _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7);
992
1085
  a16.i = _mm256_permutevar8x32_epi32(a16.i, perm32);
993
1086
 
994
1087
  return a16;
995
1088
  }
996
1089
 
997
1090
  struct PreprocNOP {
998
- simd16uint16 operator () (simd16uint16 x) {
1091
+ simd16uint16 operator()(simd16uint16 x) {
999
1092
  return x;
1000
1093
  }
1001
-
1002
1094
  };
1003
1095
 
1004
-
1005
- template<int shift, int nbin>
1096
+ template <int shift, int nbin>
1006
1097
  struct PreprocMinShift {
1007
1098
  simd16uint16 min16;
1008
1099
  simd16uint16 max16;
@@ -1014,59 +1105,46 @@ struct PreprocMinShift {
1014
1105
  max16.set1(vmax); // vmax inclusive
1015
1106
  }
1016
1107
 
1017
- simd16uint16 operator () (simd16uint16 x) {
1108
+ simd16uint16 operator()(simd16uint16 x) {
1018
1109
  x = x - min16;
1019
1110
  simd16uint16 mask = (x == max(x, max16)) - (x == max16);
1020
1111
  return (x >> shift) | mask;
1021
1112
  }
1022
-
1023
1113
  };
1024
1114
 
1025
1115
  /* unbounded versions of the functions */
1026
1116
 
1027
- void simd_histogram_8_unbounded(
1028
- const uint16_t *data, int n,
1029
- int *hist)
1030
- {
1117
+ void simd_histogram_8_unbounded(const uint16_t* data, int n, int* hist) {
1031
1118
  PreprocNOP pp;
1032
1119
  simd16uint16 a16 = histogram_8(data, pp, (n & ~15));
1033
1120
 
1034
1121
  ALIGNED(32) uint16_t a16_tab[16];
1035
1122
  a16.store(a16_tab);
1036
1123
 
1037
- for(int i = 0; i < 8; i++) {
1124
+ for (int i = 0; i < 8; i++) {
1038
1125
  hist[i] = a16_tab[i] + a16_tab[i + 8];
1039
1126
  }
1040
1127
 
1041
- for(int i = (n & ~15); i < n; i++) {
1128
+ for (int i = (n & ~15); i < n; i++) {
1042
1129
  hist[data[i]]++;
1043
1130
  }
1044
-
1045
1131
  }
1046
1132
 
1047
-
1048
- void simd_histogram_16_unbounded(
1049
- const uint16_t *data, int n,
1050
- int *hist)
1051
- {
1052
-
1133
+ void simd_histogram_16_unbounded(const uint16_t* data, int n, int* hist) {
1053
1134
  simd16uint16 a16 = histogram_16(data, PreprocNOP(), (n & ~15));
1054
1135
 
1055
1136
  ALIGNED(32) uint16_t a16_tab[16];
1056
1137
  a16.store(a16_tab);
1057
1138
 
1058
- for(int i = 0; i < 16; i++) {
1139
+ for (int i = 0; i < 16; i++) {
1059
1140
  hist[i] = a16_tab[i];
1060
1141
  }
1061
1142
 
1062
- for(int i = (n & ~15); i < n; i++) {
1143
+ for (int i = (n & ~15); i < n; i++) {
1063
1144
  hist[data[i]]++;
1064
1145
  }
1065
-
1066
1146
  }
1067
1147
 
1068
-
1069
-
1070
1148
  } // anonymous namespace
1071
1149
 
1072
1150
  /************************************************************
@@ -1074,10 +1152,11 @@ void simd_histogram_16_unbounded(
1074
1152
  ************************************************************/
1075
1153
 
1076
1154
  void simd_histogram_8(
1077
- const uint16_t *data, int n,
1078
- uint16_t min, int shift,
1079
- int *hist)
1080
- {
1155
+ const uint16_t* data,
1156
+ int n,
1157
+ uint16_t min,
1158
+ int shift,
1159
+ int* hist) {
1081
1160
  if (shift < 0) {
1082
1161
  simd_histogram_8_unbounded(data, n, hist);
1083
1162
  return;
@@ -1085,12 +1164,12 @@ void simd_histogram_8(
1085
1164
 
1086
1165
  simd16uint16 a16;
1087
1166
 
1088
- #define DISPATCH(s) \
1089
- case s: \
1167
+ #define DISPATCH(s) \
1168
+ case s: \
1090
1169
  a16 = histogram_8(data, PreprocMinShift<s, 8>(min), (n & ~15)); \
1091
1170
  break
1092
1171
 
1093
- switch(shift) {
1172
+ switch (shift) {
1094
1173
  DISPATCH(0);
1095
1174
  DISPATCH(1);
1096
1175
  DISPATCH(2);
@@ -1105,35 +1184,35 @@ void simd_histogram_8(
1105
1184
  DISPATCH(11);
1106
1185
  DISPATCH(12);
1107
1186
  DISPATCH(13);
1108
- default:
1109
- FAISS_THROW_FMT("dispatch for shift=%d not instantiated", shift);
1187
+ default:
1188
+ FAISS_THROW_FMT("dispatch for shift=%d not instantiated", shift);
1110
1189
  }
1111
1190
  #undef DISPATCH
1112
1191
 
1113
1192
  ALIGNED(32) uint16_t a16_tab[16];
1114
1193
  a16.store(a16_tab);
1115
1194
 
1116
- for(int i = 0; i < 8; i++) {
1195
+ for (int i = 0; i < 8; i++) {
1117
1196
  hist[i] = a16_tab[i] + a16_tab[i + 8];
1118
1197
  }
1119
1198
 
1120
1199
  // complete with remaining bins
1121
- for(int i = (n & ~15); i < n; i++) {
1122
- if (data[i] < min) continue;
1200
+ for (int i = (n & ~15); i < n; i++) {
1201
+ if (data[i] < min)
1202
+ continue;
1123
1203
  uint16_t v = data[i] - min;
1124
1204
  v >>= shift;
1125
- if (v < 8) hist[v]++;
1205
+ if (v < 8)
1206
+ hist[v]++;
1126
1207
  }
1127
-
1128
1208
  }
1129
1209
 
1130
-
1131
-
1132
1210
  void simd_histogram_16(
1133
- const uint16_t *data, int n,
1134
- uint16_t min, int shift,
1135
- int *hist)
1136
- {
1211
+ const uint16_t* data,
1212
+ int n,
1213
+ uint16_t min,
1214
+ int shift,
1215
+ int* hist) {
1137
1216
  if (shift < 0) {
1138
1217
  simd_histogram_16_unbounded(data, n, hist);
1139
1218
  return;
@@ -1141,12 +1220,12 @@ void simd_histogram_16(
1141
1220
 
1142
1221
  simd16uint16 a16;
1143
1222
 
1144
- #define DISPATCH(s) \
1145
- case s: \
1223
+ #define DISPATCH(s) \
1224
+ case s: \
1146
1225
  a16 = histogram_16(data, PreprocMinShift<s, 16>(min), (n & ~15)); \
1147
1226
  break
1148
1227
 
1149
- switch(shift) {
1228
+ switch (shift) {
1150
1229
  DISPATCH(0);
1151
1230
  DISPATCH(1);
1152
1231
  DISPATCH(2);
@@ -1160,48 +1239,47 @@ void simd_histogram_16(
1160
1239
  DISPATCH(10);
1161
1240
  DISPATCH(11);
1162
1241
  DISPATCH(12);
1163
- default:
1164
- FAISS_THROW_FMT("dispatch for shift=%d not instantiated", shift);
1242
+ default:
1243
+ FAISS_THROW_FMT("dispatch for shift=%d not instantiated", shift);
1165
1244
  }
1166
1245
  #undef DISPATCH
1167
1246
 
1168
1247
  ALIGNED(32) uint16_t a16_tab[16];
1169
1248
  a16.store(a16_tab);
1170
1249
 
1171
- for(int i = 0; i < 16; i++) {
1250
+ for (int i = 0; i < 16; i++) {
1172
1251
  hist[i] = a16_tab[i];
1173
1252
  }
1174
1253
 
1175
- for(int i = (n & ~15); i < n; i++) {
1176
- if (data[i] < min) continue;
1254
+ for (int i = (n & ~15); i < n; i++) {
1255
+ if (data[i] < min)
1256
+ continue;
1177
1257
  uint16_t v = data[i] - min;
1178
1258
  v >>= shift;
1179
- if (v < 16) hist[v]++;
1259
+ if (v < 16)
1260
+ hist[v]++;
1180
1261
  }
1181
-
1182
1262
  }
1183
1263
 
1184
-
1185
1264
  // no AVX2
1186
1265
  #else
1187
1266
 
1188
-
1189
-
1190
1267
  void simd_histogram_16(
1191
- const uint16_t *data, int n,
1192
- uint16_t min, int shift,
1193
- int *hist)
1194
- {
1268
+ const uint16_t* data,
1269
+ int n,
1270
+ uint16_t min,
1271
+ int shift,
1272
+ int* hist) {
1195
1273
  memset(hist, 0, sizeof(*hist) * 16);
1196
1274
  if (shift < 0) {
1197
- for(size_t i = 0; i < n; i++) {
1275
+ for (size_t i = 0; i < n; i++) {
1198
1276
  hist[data[i]]++;
1199
1277
  }
1200
1278
  } else {
1201
1279
  int vmax0 = std::min((16 << shift) + min, 65536);
1202
1280
  uint16_t vmax = uint16_t(vmax0 - 1 - min);
1203
1281
 
1204
- for(size_t i = 0; i < n; i++) {
1282
+ for (size_t i = 0; i < n; i++) {
1205
1283
  uint16_t v = data[i];
1206
1284
  v -= min;
1207
1285
  if (!(v <= vmax))
@@ -1217,40 +1295,37 @@ void simd_histogram_16(
1217
1295
  */
1218
1296
  }
1219
1297
  }
1220
-
1221
1298
  }
1222
1299
 
1223
1300
  void simd_histogram_8(
1224
- const uint16_t *data, int n,
1225
- uint16_t min, int shift,
1226
- int *hist)
1227
- {
1301
+ const uint16_t* data,
1302
+ int n,
1303
+ uint16_t min,
1304
+ int shift,
1305
+ int* hist) {
1228
1306
  memset(hist, 0, sizeof(*hist) * 8);
1229
1307
  if (shift < 0) {
1230
- for(size_t i = 0; i < n; i++) {
1308
+ for (size_t i = 0; i < n; i++) {
1231
1309
  hist[data[i]]++;
1232
1310
  }
1233
1311
  } else {
1234
- for(size_t i = 0; i < n; i++) {
1235
- if (data[i] < min) continue;
1312
+ for (size_t i = 0; i < n; i++) {
1313
+ if (data[i] < min)
1314
+ continue;
1236
1315
  uint16_t v = data[i] - min;
1237
1316
  v >>= shift;
1238
- if (v < 8) hist[v]++;
1317
+ if (v < 8)
1318
+ hist[v]++;
1239
1319
  }
1240
1320
  }
1241
-
1242
1321
  }
1243
1322
 
1244
-
1245
1323
  #endif
1246
1324
 
1247
-
1248
1325
  void PartitionStats::reset() {
1249
1326
  memset(this, 0, sizeof(*this));
1250
1327
  }
1251
1328
 
1252
1329
  PartitionStats partition_stats;
1253
1330
 
1254
-
1255
-
1256
1331
  } // namespace faiss