faiss 0.3.0 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +9 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +9 -2
  6. data/ext/faiss/index.cpp +1 -1
  7. data/ext/faiss/index_binary.cpp +2 -2
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +7 -7
  11. data/vendor/faiss/faiss/AutoTune.h +1 -2
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -22
  13. data/vendor/faiss/faiss/Clustering.h +40 -21
  14. data/vendor/faiss/faiss/IVFlib.cpp +26 -12
  15. data/vendor/faiss/faiss/Index.cpp +1 -1
  16. data/vendor/faiss/faiss/Index.h +40 -10
  17. data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
  20. data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
  21. data/vendor/faiss/faiss/IndexBinary.h +8 -19
  22. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
  23. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
  24. data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
  26. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +107 -188
  27. data/vendor/faiss/faiss/IndexFastScan.cpp +95 -146
  28. data/vendor/faiss/faiss/IndexFastScan.h +9 -8
  29. data/vendor/faiss/faiss/IndexFlat.cpp +206 -10
  30. data/vendor/faiss/faiss/IndexFlat.h +20 -1
  31. data/vendor/faiss/faiss/IndexFlatCodes.cpp +170 -5
  32. data/vendor/faiss/faiss/IndexFlatCodes.h +23 -4
  33. data/vendor/faiss/faiss/IndexHNSW.cpp +231 -382
  34. data/vendor/faiss/faiss/IndexHNSW.h +62 -49
  35. data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
  36. data/vendor/faiss/faiss/IndexIDMap.h +24 -2
  37. data/vendor/faiss/faiss/IndexIVF.cpp +162 -56
  38. data/vendor/faiss/faiss/IndexIVF.h +46 -6
  39. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +33 -26
  40. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +6 -2
  41. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
  43. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +502 -401
  44. data/vendor/faiss/faiss/IndexIVFFastScan.h +63 -26
  45. data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
  46. data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
  47. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
  48. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
  49. data/vendor/faiss/faiss/IndexIVFPQ.cpp +79 -125
  50. data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
  51. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +39 -52
  52. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
  53. data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
  54. data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
  55. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
  56. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
  57. data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
  58. data/vendor/faiss/faiss/IndexLattice.cpp +1 -19
  59. data/vendor/faiss/faiss/IndexLattice.h +3 -22
  60. data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -33
  61. data/vendor/faiss/faiss/IndexNNDescent.h +1 -1
  62. data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
  63. data/vendor/faiss/faiss/IndexNSG.h +11 -11
  64. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +56 -0
  65. data/vendor/faiss/faiss/IndexNeuralNetCodec.h +49 -0
  66. data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
  67. data/vendor/faiss/faiss/IndexPQ.h +1 -4
  68. data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
  69. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
  70. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  71. data/vendor/faiss/faiss/IndexRefine.cpp +54 -24
  72. data/vendor/faiss/faiss/IndexRefine.h +7 -0
  73. data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
  74. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +25 -17
  75. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
  76. data/vendor/faiss/faiss/IndexShards.cpp +21 -29
  77. data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
  78. data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
  79. data/vendor/faiss/faiss/MatrixStats.h +21 -9
  80. data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
  81. data/vendor/faiss/faiss/MetricType.h +7 -2
  82. data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
  83. data/vendor/faiss/faiss/VectorTransform.h +7 -7
  84. data/vendor/faiss/faiss/clone_index.cpp +15 -10
  85. data/vendor/faiss/faiss/clone_index.h +3 -0
  86. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +95 -17
  87. data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +152 -0
  88. data/vendor/faiss/faiss/cppcontrib/factory_tools.h +24 -0
  89. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +83 -30
  90. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +123 -8
  91. data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
  92. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +13 -0
  93. data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
  94. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -1
  95. data/vendor/faiss/faiss/gpu/GpuIndex.h +30 -12
  96. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +282 -0
  97. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +14 -9
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +20 -3
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
  101. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
  102. data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +142 -17
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
  106. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +26 -21
  107. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +7 -1
  108. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +8 -5
  109. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
  110. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
  111. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +332 -40
  112. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
  113. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
  114. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
  115. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
  116. data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
  117. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +4 -1
  118. data/vendor/faiss/faiss/gpu/utils/Timer.h +1 -1
  119. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
  121. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +26 -1
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +10 -3
  123. data/vendor/faiss/faiss/impl/DistanceComputer.h +70 -1
  124. data/vendor/faiss/faiss/impl/FaissAssert.h +4 -2
  125. data/vendor/faiss/faiss/impl/FaissException.h +13 -34
  126. data/vendor/faiss/faiss/impl/HNSW.cpp +605 -186
  127. data/vendor/faiss/faiss/impl/HNSW.h +52 -30
  128. data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +11 -9
  130. data/vendor/faiss/faiss/impl/LookupTableScaler.h +34 -0
  131. data/vendor/faiss/faiss/impl/NNDescent.cpp +42 -27
  132. data/vendor/faiss/faiss/impl/NSG.cpp +0 -29
  133. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  134. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
  135. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  136. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -22
  137. data/vendor/faiss/faiss/impl/ProductQuantizer.h +6 -2
  138. data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
  139. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
  141. data/vendor/faiss/faiss/impl/ResultHandler.h +347 -172
  142. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1104 -147
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +3 -8
  144. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +285 -42
  145. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +248 -0
  146. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
  147. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
  148. data/vendor/faiss/faiss/impl/index_read.cpp +74 -34
  149. data/vendor/faiss/faiss/impl/index_read_utils.h +37 -0
  150. data/vendor/faiss/faiss/impl/index_write.cpp +88 -51
  151. data/vendor/faiss/faiss/impl/io.cpp +23 -15
  152. data/vendor/faiss/faiss/impl/io.h +4 -4
  153. data/vendor/faiss/faiss/impl/io_macros.h +6 -0
  154. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  155. data/vendor/faiss/faiss/impl/platform_macros.h +40 -1
  156. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +14 -0
  157. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
  158. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
  159. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +487 -49
  160. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
  161. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
  162. data/vendor/faiss/faiss/impl/simd_result_handlers.h +481 -225
  163. data/vendor/faiss/faiss/index_factory.cpp +41 -20
  164. data/vendor/faiss/faiss/index_io.h +12 -5
  165. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +28 -8
  166. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +3 -0
  167. data/vendor/faiss/faiss/invlists/DirectMap.cpp +10 -2
  168. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +73 -17
  169. data/vendor/faiss/faiss/invlists/InvertedLists.h +26 -8
  170. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +24 -9
  171. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +2 -1
  172. data/vendor/faiss/faiss/python/python_callbacks.cpp +4 -4
  173. data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
  174. data/vendor/faiss/faiss/utils/Heap.h +105 -0
  175. data/vendor/faiss/faiss/utils/NeuralNet.cpp +342 -0
  176. data/vendor/faiss/faiss/utils/NeuralNet.h +147 -0
  177. data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
  178. data/vendor/faiss/faiss/utils/bf16.h +36 -0
  179. data/vendor/faiss/faiss/utils/distances.cpp +147 -123
  180. data/vendor/faiss/faiss/utils/distances.h +86 -9
  181. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
  182. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
  183. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
  184. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
  185. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
  186. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
  187. data/vendor/faiss/faiss/utils/distances_simd.cpp +1589 -243
  188. data/vendor/faiss/faiss/utils/extra_distances-inl.h +70 -0
  189. data/vendor/faiss/faiss/utils/extra_distances.cpp +85 -137
  190. data/vendor/faiss/faiss/utils/extra_distances.h +3 -2
  191. data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
  192. data/vendor/faiss/faiss/utils/fp16.h +2 -0
  193. data/vendor/faiss/faiss/utils/hamming.cpp +163 -111
  194. data/vendor/faiss/faiss/utils/hamming.h +58 -0
  195. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
  196. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
  197. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +19 -88
  198. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +58 -0
  199. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
  200. data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
  201. data/vendor/faiss/faiss/utils/prefetch.h +77 -0
  202. data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
  203. data/vendor/faiss/faiss/utils/random.cpp +43 -0
  204. data/vendor/faiss/faiss/utils/random.h +25 -0
  205. data/vendor/faiss/faiss/utils/simdlib.h +10 -1
  206. data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
  207. data/vendor/faiss/faiss/utils/simdlib_avx512.h +296 -0
  208. data/vendor/faiss/faiss/utils/simdlib_neon.h +77 -79
  209. data/vendor/faiss/faiss/utils/simdlib_ppc64.h +1084 -0
  210. data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
  211. data/vendor/faiss/faiss/utils/sorting.h +27 -0
  212. data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +176 -0
  213. data/vendor/faiss/faiss/utils/utils.cpp +120 -7
  214. data/vendor/faiss/faiss/utils/utils.h +60 -20
  215. metadata +23 -4
  216. data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +0 -102
@@ -166,9 +166,12 @@ struct HammingComputer20 {
166
166
  void set(const uint8_t* a8, int code_size) {
167
167
  assert(code_size == 20);
168
168
  const uint64_t* a = (uint64_t*)a8;
169
+ const uint32_t* b = (uint32_t*)a8;
169
170
  a0 = a[0];
170
171
  a1 = a[1];
171
- a2 = a[2];
172
+ // can't read a[2] since it is uint64_t, not uint32_t
173
+ // results in AddressSanitizer failure reading past end of array
174
+ a2 = b[4];
172
175
  }
173
176
 
174
177
  inline int hamming(const uint8_t* b8) const {
@@ -275,24 +278,31 @@ struct HammingComputerDefault {
275
278
  len -= 8;
276
279
  accu += popcount64(a64[i] ^ b64[i]);
277
280
  i++;
281
+ [[fallthrough]];
278
282
  case 7:
279
283
  accu += popcount64(a64[i] ^ b64[i]);
280
284
  i++;
285
+ [[fallthrough]];
281
286
  case 6:
282
287
  accu += popcount64(a64[i] ^ b64[i]);
283
288
  i++;
289
+ [[fallthrough]];
284
290
  case 5:
285
291
  accu += popcount64(a64[i] ^ b64[i]);
286
292
  i++;
293
+ [[fallthrough]];
287
294
  case 4:
288
295
  accu += popcount64(a64[i] ^ b64[i]);
289
296
  i++;
297
+ [[fallthrough]];
290
298
  case 3:
291
299
  accu += popcount64(a64[i] ^ b64[i]);
292
300
  i++;
301
+ [[fallthrough]];
293
302
  case 2:
294
303
  accu += popcount64(a64[i] ^ b64[i]);
295
304
  i++;
305
+ [[fallthrough]];
296
306
  case 1:
297
307
  accu += popcount64(a64[i] ^ b64[i]);
298
308
  i++;
@@ -302,20 +312,28 @@ struct HammingComputerDefault {
302
312
  const uint8_t* a = a8 + 8 * quotient8;
303
313
  const uint8_t* b = b8 + 8 * quotient8;
304
314
  switch (remainder8) {
315
+ [[fallthrough]];
305
316
  case 7:
306
317
  accu += hamdis_tab_ham_bytes[a[6] ^ b[6]];
318
+ [[fallthrough]];
307
319
  case 6:
308
320
  accu += hamdis_tab_ham_bytes[a[5] ^ b[5]];
321
+ [[fallthrough]];
309
322
  case 5:
310
323
  accu += hamdis_tab_ham_bytes[a[4] ^ b[4]];
324
+ [[fallthrough]];
311
325
  case 4:
312
326
  accu += hamdis_tab_ham_bytes[a[3] ^ b[3]];
327
+ [[fallthrough]];
313
328
  case 3:
314
329
  accu += hamdis_tab_ham_bytes[a[2] ^ b[2]];
330
+ [[fallthrough]];
315
331
  case 2:
316
332
  accu += hamdis_tab_ham_bytes[a[1] ^ b[1]];
333
+ [[fallthrough]];
317
334
  case 1:
318
335
  accu += hamdis_tab_ham_bytes[a[0] ^ b[0]];
336
+ [[fallthrough]];
319
337
  default:
320
338
  break;
321
339
  }
@@ -329,93 +347,6 @@ struct HammingComputerDefault {
329
347
  }
330
348
  };
331
349
 
332
- // more inefficient than HammingComputerDefault (obsolete)
333
- struct HammingComputerM8 {
334
- const uint64_t* a;
335
- int n;
336
-
337
- HammingComputerM8() {}
338
-
339
- HammingComputerM8(const uint8_t* a8, int code_size) {
340
- set(a8, code_size);
341
- }
342
-
343
- void set(const uint8_t* a8, int code_size) {
344
- assert(code_size % 8 == 0);
345
- a = (uint64_t*)a8;
346
- n = code_size / 8;
347
- }
348
-
349
- int hamming(const uint8_t* b8) const {
350
- const uint64_t* b = (uint64_t*)b8;
351
- int accu = 0;
352
- for (int i = 0; i < n; i++)
353
- accu += popcount64(a[i] ^ b[i]);
354
- return accu;
355
- }
356
-
357
- inline int get_code_size() const {
358
- return n * 8;
359
- }
360
- };
361
-
362
- // more inefficient than HammingComputerDefault (obsolete)
363
- struct HammingComputerM4 {
364
- const uint32_t* a;
365
- int n;
366
-
367
- HammingComputerM4() {}
368
-
369
- HammingComputerM4(const uint8_t* a4, int code_size) {
370
- set(a4, code_size);
371
- }
372
-
373
- void set(const uint8_t* a4, int code_size) {
374
- assert(code_size % 4 == 0);
375
- a = (uint32_t*)a4;
376
- n = code_size / 4;
377
- }
378
-
379
- int hamming(const uint8_t* b8) const {
380
- const uint32_t* b = (uint32_t*)b8;
381
- int accu = 0;
382
- for (int i = 0; i < n; i++)
383
- accu += popcount64(a[i] ^ b[i]);
384
- return accu;
385
- }
386
-
387
- inline int get_code_size() const {
388
- return n * 4;
389
- }
390
- };
391
-
392
- /***************************************************************************
393
- * Equivalence with a template class when code size is known at compile time
394
- **************************************************************************/
395
-
396
- // default template
397
- template <int CODE_SIZE>
398
- struct HammingComputer : HammingComputerDefault {
399
- HammingComputer(const uint8_t* a, int code_size)
400
- : HammingComputerDefault(a, code_size) {}
401
- };
402
-
403
- #define SPECIALIZED_HC(CODE_SIZE) \
404
- template <> \
405
- struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
406
- HammingComputer(const uint8_t* a) \
407
- : HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
408
- }
409
-
410
- SPECIALIZED_HC(4);
411
- SPECIALIZED_HC(8);
412
- SPECIALIZED_HC(16);
413
- SPECIALIZED_HC(20);
414
- SPECIALIZED_HC(32);
415
- SPECIALIZED_HC(64);
416
-
417
- #undef SPECIALIZED_HC
418
-
419
350
  /***************************************************************************
420
351
  * generalized Hamming = number of bytes that are different between
421
352
  * two codes.
@@ -23,4 +23,62 @@
23
23
  #include <faiss/utils/hamming_distance/generic-inl.h>
24
24
  #endif
25
25
 
26
+ namespace faiss {
27
+
28
+ /***************************************************************************
29
+ * Equivalence with a template class when code size is known at compile time
30
+ **************************************************************************/
31
+
32
+ // default template
33
+ template <int CODE_SIZE>
34
+ struct HammingComputer : HammingComputerDefault {
35
+ HammingComputer(const uint8_t* a, int code_size)
36
+ : HammingComputerDefault(a, code_size) {}
37
+ };
38
+
39
+ #define SPECIALIZED_HC(CODE_SIZE) \
40
+ template <> \
41
+ struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
42
+ HammingComputer(const uint8_t* a) \
43
+ : HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
44
+ }
45
+
46
+ SPECIALIZED_HC(4);
47
+ SPECIALIZED_HC(8);
48
+ SPECIALIZED_HC(16);
49
+ SPECIALIZED_HC(20);
50
+ SPECIALIZED_HC(32);
51
+ SPECIALIZED_HC(64);
52
+
53
+ #undef SPECIALIZED_HC
54
+
55
+ /***************************************************************************
56
+ * Dispatching function that takes a code size and a consumer object
57
+ * the consumer object should contain a retun type t and a operation template
58
+ * function f() that must be called to perform the operation.
59
+ **************************************************************************/
60
+
61
+ template <class Consumer, class... Types>
62
+ typename Consumer::T dispatch_HammingComputer(
63
+ int code_size,
64
+ Consumer& consumer,
65
+ Types... args) {
66
+ switch (code_size) {
67
+ #define DISPATCH_HC(CODE_SIZE) \
68
+ case CODE_SIZE: \
69
+ return consumer.template f<HammingComputer##CODE_SIZE>(args...);
70
+ DISPATCH_HC(4);
71
+ DISPATCH_HC(8);
72
+ DISPATCH_HC(16);
73
+ DISPATCH_HC(20);
74
+ DISPATCH_HC(32);
75
+ DISPATCH_HC(64);
76
+ default:
77
+ return consumer.template f<HammingComputerDefault>(args...);
78
+ }
79
+ #undef DISPATCH_HC
80
+ }
81
+
82
+ } // namespace faiss
83
+
26
84
  #endif
@@ -260,7 +260,6 @@ struct HammingComputer32 {
260
260
  }
261
261
 
262
262
  inline int hamming(const uint8_t* b8) const {
263
- const uint64_t* b = (uint64_t*)b8;
264
263
  uint8x16_t b0 = vld1q_u8(b8);
265
264
  uint8x16_t b1 = vld1q_u8(b8 + 16);
266
265
 
@@ -338,24 +337,31 @@ struct HammingComputerDefault {
338
337
  len -= 8;
339
338
  accu += popcount64(a64[i] ^ b64[i]);
340
339
  i++;
340
+ [[fallthrough]];
341
341
  case 7:
342
342
  accu += popcount64(a64[i] ^ b64[i]);
343
343
  i++;
344
+ [[fallthrough]];
344
345
  case 6:
345
346
  accu += popcount64(a64[i] ^ b64[i]);
346
347
  i++;
348
+ [[fallthrough]];
347
349
  case 5:
348
350
  accu += popcount64(a64[i] ^ b64[i]);
349
351
  i++;
352
+ [[fallthrough]];
350
353
  case 4:
351
354
  accu += popcount64(a64[i] ^ b64[i]);
352
355
  i++;
356
+ [[fallthrough]];
353
357
  case 3:
354
358
  accu += popcount64(a64[i] ^ b64[i]);
355
359
  i++;
360
+ [[fallthrough]];
356
361
  case 2:
357
362
  accu += popcount64(a64[i] ^ b64[i]);
358
363
  i++;
364
+ [[fallthrough]];
359
365
  case 1:
360
366
  accu += popcount64(a64[i] ^ b64[i]);
361
367
  i++;
@@ -367,18 +373,25 @@ struct HammingComputerDefault {
367
373
  switch (remainder8) {
368
374
  case 7:
369
375
  accu += hamdis_tab_ham_bytes[a[6] ^ b[6]];
376
+ [[fallthrough]];
370
377
  case 6:
371
378
  accu += hamdis_tab_ham_bytes[a[5] ^ b[5]];
379
+ [[fallthrough]];
372
380
  case 5:
373
381
  accu += hamdis_tab_ham_bytes[a[4] ^ b[4]];
382
+ [[fallthrough]];
374
383
  case 4:
375
384
  accu += hamdis_tab_ham_bytes[a[3] ^ b[3]];
385
+ [[fallthrough]];
376
386
  case 3:
377
387
  accu += hamdis_tab_ham_bytes[a[2] ^ b[2]];
388
+ [[fallthrough]];
378
389
  case 2:
379
390
  accu += hamdis_tab_ham_bytes[a[1] ^ b[1]];
391
+ [[fallthrough]];
380
392
  case 1:
381
393
  accu += hamdis_tab_ham_bytes[a[0] ^ b[0]];
394
+ [[fallthrough]];
382
395
  default:
383
396
  break;
384
397
  }
@@ -392,109 +405,6 @@ struct HammingComputerDefault {
392
405
  }
393
406
  };
394
407
 
395
- // more inefficient than HammingComputerDefault (obsolete)
396
- struct HammingComputerM8 {
397
- const uint64_t* a;
398
- int n;
399
-
400
- HammingComputerM8() {}
401
-
402
- HammingComputerM8(const uint8_t* a8, int code_size) {
403
- set(a8, code_size);
404
- }
405
-
406
- void set(const uint8_t* a8, int code_size) {
407
- assert(code_size % 8 == 0);
408
- a = (uint64_t*)a8;
409
- n = code_size / 8;
410
- }
411
-
412
- int hamming(const uint8_t* b8) const {
413
- const uint64_t* b = (uint64_t*)b8;
414
- int n4 = (n / 4) * 4;
415
- int accu = 0;
416
-
417
- int i = 0;
418
- for (; i < n4; i += 4) {
419
- accu += ::faiss::hamming<256>(a + i, b + i);
420
- }
421
- for (; i < n; i++) {
422
- accu += popcount64(a[i] ^ b[i]);
423
- }
424
- return accu;
425
- }
426
-
427
- inline int get_code_size() const {
428
- return n * 8;
429
- }
430
- };
431
-
432
- // more inefficient than HammingComputerDefault (obsolete)
433
- struct HammingComputerM4 {
434
- const uint32_t* a;
435
- int n;
436
-
437
- HammingComputerM4() {}
438
-
439
- HammingComputerM4(const uint8_t* a4, int code_size) {
440
- set(a4, code_size);
441
- }
442
-
443
- void set(const uint8_t* a4, int code_size) {
444
- assert(code_size % 4 == 0);
445
- a = (uint32_t*)a4;
446
- n = code_size / 4;
447
- }
448
-
449
- int hamming(const uint8_t* b8) const {
450
- const uint32_t* b = (uint32_t*)b8;
451
-
452
- int n8 = (n / 8) * 8;
453
- int accu = 0;
454
-
455
- int i = 0;
456
- for (; i < n8; i += 8) {
457
- accu += ::faiss::hamming<256>(
458
- (const uint64_t*)(a + i), (const uint64_t*)(b + i));
459
- }
460
- for (; i < n; i++) {
461
- accu += popcount64(a[i] ^ b[i]);
462
- }
463
- return accu;
464
- }
465
-
466
- inline int get_code_size() const {
467
- return n * 4;
468
- }
469
- };
470
-
471
- /***************************************************************************
472
- * Equivalence with a template class when code size is known at compile time
473
- **************************************************************************/
474
-
475
- // default template
476
- template <int CODE_SIZE>
477
- struct HammingComputer : HammingComputerDefault {
478
- HammingComputer(const uint8_t* a, int code_size)
479
- : HammingComputerDefault(a, code_size) {}
480
- };
481
-
482
- #define SPECIALIZED_HC(CODE_SIZE) \
483
- template <> \
484
- struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
485
- HammingComputer(const uint8_t* a) \
486
- : HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
487
- }
488
-
489
- SPECIALIZED_HC(4);
490
- SPECIALIZED_HC(8);
491
- SPECIALIZED_HC(16);
492
- SPECIALIZED_HC(20);
493
- SPECIALIZED_HC(32);
494
- SPECIALIZED_HC(64);
495
-
496
- #undef SPECIALIZED_HC
497
-
498
408
  /***************************************************************************
499
409
  * generalized Hamming = number of bytes that are different between
500
410
  * two codes.
@@ -206,7 +206,8 @@ typename C::T partition_fuzzy_median3(
206
206
  assert(n_eq_1 <= n_eq);
207
207
  }
208
208
 
209
- int wp = compress_array<C>(vals, ids, n, thresh, n_eq_1);
209
+ [[maybe_unused]] const int wp =
210
+ compress_array<C>(vals, ids, n, thresh, n_eq_1);
210
211
 
211
212
  assert(wp == q);
212
213
  if (q_out) {
@@ -750,8 +751,6 @@ typename C::T partition_fuzzy(
750
751
  size_t q_min,
751
752
  size_t q_max,
752
753
  size_t* q_out) {
753
- // the code below compiles and runs without AVX2 but it's slower than
754
- // the scalar implementation
755
754
  #ifdef __AVX2__
756
755
  constexpr bool is_uint16 = std::is_same<typename C::T, uint16_t>::value;
757
756
  if (is_uint16 && is_aligned_pointer(vals)) {
@@ -882,7 +881,7 @@ static const simd32uint8 shifts = simd32uint8::create<
882
881
  // 2-bit accumulator: we can add only up to 3 elements
883
882
  // on output we return 2*4-bit results
884
883
  // preproc returns either an index in 0..7 or 0xffff
885
- // that yeilds a 0 when used in the table look-up
884
+ // that yields a 0 when used in the table look-up
886
885
  template <int N, class Preproc>
887
886
  void compute_accu2(
888
887
  const uint16_t*& data,
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ // prefetches
11
+
12
+ #ifdef __AVX__
13
+
14
+ // AVX
15
+
16
+ #include <xmmintrin.h>
17
+
18
+ inline void prefetch_L1(const void* address) {
19
+ _mm_prefetch((const char*)address, _MM_HINT_T0);
20
+ }
21
+ inline void prefetch_L2(const void* address) {
22
+ _mm_prefetch((const char*)address, _MM_HINT_T1);
23
+ }
24
+ inline void prefetch_L3(const void* address) {
25
+ _mm_prefetch((const char*)address, _MM_HINT_T2);
26
+ }
27
+
28
+ #elif defined(__aarch64__)
29
+
30
+ // ARM64
31
+
32
+ #ifdef _MSC_VER
33
+
34
+ // todo: arm on MSVC
35
+ inline void prefetch_L1(const void* address) {}
36
+ inline void prefetch_L2(const void* address) {}
37
+ inline void prefetch_L3(const void* address) {}
38
+
39
+ #else
40
+ // arm on non-MSVC
41
+
42
+ inline void prefetch_L1(const void* address) {
43
+ __builtin_prefetch(address, 0, 3);
44
+ }
45
+ inline void prefetch_L2(const void* address) {
46
+ __builtin_prefetch(address, 0, 2);
47
+ }
48
+ inline void prefetch_L3(const void* address) {
49
+ __builtin_prefetch(address, 0, 1);
50
+ }
51
+ #endif
52
+
53
+ #else
54
+
55
+ // a generic platform
56
+
57
+ #ifdef _MSC_VER
58
+
59
+ inline void prefetch_L1(const void* address) {}
60
+ inline void prefetch_L2(const void* address) {}
61
+ inline void prefetch_L3(const void* address) {}
62
+
63
+ #else
64
+
65
+ inline void prefetch_L1(const void* address) {
66
+ __builtin_prefetch(address, 0, 3);
67
+ }
68
+ inline void prefetch_L2(const void* address) {
69
+ __builtin_prefetch(address, 0, 2);
70
+ }
71
+ inline void prefetch_L3(const void* address) {
72
+ __builtin_prefetch(address, 0, 1);
73
+ }
74
+
75
+ #endif
76
+
77
+ #endif
@@ -24,20 +24,6 @@ namespace quantize_lut {
24
24
 
25
25
  namespace {
26
26
 
27
- float round_uint8_and_mul(float* tab, size_t n) {
28
- float max = 0;
29
- for (int i = 0; i < n; i++) {
30
- if (fabs(tab[i]) > max) {
31
- max = fabs(tab[i]);
32
- }
33
- }
34
- float multiplier = 127 / max;
35
- for (int i = 0; i < n; i++) {
36
- tab[i] = floorf(tab[i] * multiplier + 128);
37
- }
38
- return multiplier;
39
- }
40
-
41
27
  // there can be NaNs in tables, they should be ignored
42
28
  float tab_min(const float* tab, size_t n) {
43
29
  float min = HUGE_VAL;
@@ -54,6 +54,37 @@ double RandomGenerator::rand_double() {
54
54
  return mt() / double(mt.max());
55
55
  }
56
56
 
57
+ SplitMix64RandomGenerator::SplitMix64RandomGenerator(int64_t seed)
58
+ : state{static_cast<uint64_t>(seed)} {}
59
+
60
+ int SplitMix64RandomGenerator::rand_int() {
61
+ return next() & 0x7fffffff;
62
+ }
63
+
64
+ int64_t SplitMix64RandomGenerator::rand_int64() {
65
+ uint64_t value = next();
66
+ return static_cast<int64_t>(value & 0x7fffffffffffffffULL);
67
+ }
68
+
69
+ int SplitMix64RandomGenerator::rand_int(int max) {
70
+ return next() % max;
71
+ }
72
+
73
+ float SplitMix64RandomGenerator::rand_float() {
74
+ return next() / float(std::numeric_limits<uint64_t>::max());
75
+ }
76
+
77
+ double SplitMix64RandomGenerator::rand_double() {
78
+ return next() / double(std::numeric_limits<uint64_t>::max());
79
+ }
80
+
81
+ uint64_t SplitMix64RandomGenerator::next() {
82
+ uint64_t z = (state += 0x9e3779b97f4a7c15ULL);
83
+ z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9ULL;
84
+ z = (z ^ (z >> 27)) * 0x94d049bb133111ebULL;
85
+ return z ^ (z >> 31);
86
+ }
87
+
57
88
  /***********************************************************************
58
89
  * Random functions in this C file only exist because Torch
59
90
  * counterparts are slow and not multi-threaded. Typical use is for
@@ -162,6 +193,18 @@ void rand_perm(int* perm, size_t n, int64_t seed) {
162
193
  }
163
194
  }
164
195
 
196
+ void rand_perm_splitmix64(int* perm, size_t n, int64_t seed) {
197
+ for (size_t i = 0; i < n; i++)
198
+ perm[i] = i;
199
+
200
+ SplitMix64RandomGenerator rng(seed);
201
+
202
+ for (size_t i = 0; i + 1 < n; i++) {
203
+ int i2 = i + rng.rand_int(n - i);
204
+ std::swap(perm[i], perm[i2]);
205
+ }
206
+ }
207
+
165
208
  void byte_rand(uint8_t* x, size_t n, int64_t seed) {
166
209
  // only try to parallelize on large enough arrays
167
210
  const size_t nblock = n < 1024 ? 1 : 1024;
@@ -43,6 +43,30 @@ struct RandomGenerator {
43
43
  explicit RandomGenerator(int64_t seed = 1234);
44
44
  };
45
45
 
46
+ /// fast random generator that cannot be used in multithreaded contexts.
47
+ /// based on https://prng.di.unimi.it/
48
+ struct SplitMix64RandomGenerator {
49
+ uint64_t state;
50
+
51
+ /// random positive integer
52
+ int rand_int();
53
+
54
+ /// random int64_t
55
+ int64_t rand_int64();
56
+
57
+ /// generate random integer between 0 and max-1
58
+ int rand_int(int max);
59
+
60
+ /// between 0 and 1
61
+ float rand_float();
62
+
63
+ double rand_double();
64
+
65
+ explicit SplitMix64RandomGenerator(int64_t seed = 1234);
66
+
67
+ uint64_t next();
68
+ };
69
+
46
70
  /* Generate an array of uniform random floats / multi-threaded implementation */
47
71
  void float_rand(float* x, size_t n, int64_t seed);
48
72
  void float_randn(float* x, size_t n, int64_t seed);
@@ -53,6 +77,7 @@ void int64_rand_max(int64_t* x, size_t n, uint64_t max, int64_t seed);
53
77
 
54
78
  /* random permutation */
55
79
  void rand_perm(int* perm, size_t n, int64_t seed);
80
+ void rand_perm_splitmix64(int* perm, size_t n, int64_t seed);
56
81
 
57
82
  /* Random set of vectors with intrinsic dimensionality 10 that is harder to
58
83
  * index than a subspace of dim 10 but easier than uniform data in dimension d
@@ -14,7 +14,12 @@
14
14
  * functions.
15
15
  */
16
16
 
17
- #ifdef __AVX2__
17
+ #if defined(__AVX512F__)
18
+
19
+ #include <faiss/utils/simdlib_avx2.h>
20
+ #include <faiss/utils/simdlib_avx512.h>
21
+
22
+ #elif defined(__AVX2__)
18
23
 
19
24
  #include <faiss/utils/simdlib_avx2.h>
20
25
 
@@ -22,6 +27,10 @@
22
27
 
23
28
  #include <faiss/utils/simdlib_neon.h>
24
29
 
30
+ #elif defined(__PPC64__)
31
+
32
+ #include <faiss/utils/simdlib_ppc64.h>
33
+
25
34
  #else
26
35
 
27
36
  // emulated = all operations are implemented as scalars
@@ -202,12 +202,6 @@ struct simd16uint16 : simd256bit {
202
202
  return simd16uint16(_mm256_cmpeq_epi16(lhs.i, rhs.i));
203
203
  }
204
204
 
205
- bool is_same(simd16uint16 other) const {
206
- const __m256i pcmp = _mm256_cmpeq_epi16(i, other.i);
207
- unsigned bitmask = _mm256_movemask_epi8(pcmp);
208
- return (bitmask == 0xffffffffU);
209
- }
210
-
211
205
  simd16uint16 operator~() const {
212
206
  return simd16uint16(_mm256_xor_si256(i, _mm256_set1_epi32(-1)));
213
207
  }