faiss 0.2.7 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +9 -2
  6. data/ext/faiss/index.cpp +1 -1
  7. data/ext/faiss/index_binary.cpp +2 -2
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/lib/faiss.rb +1 -1
  11. data/vendor/faiss/faiss/AutoTune.cpp +7 -7
  12. data/vendor/faiss/faiss/AutoTune.h +0 -1
  13. data/vendor/faiss/faiss/Clustering.cpp +4 -18
  14. data/vendor/faiss/faiss/Clustering.h +31 -21
  15. data/vendor/faiss/faiss/IVFlib.cpp +22 -11
  16. data/vendor/faiss/faiss/Index.cpp +1 -1
  17. data/vendor/faiss/faiss/Index.h +20 -5
  18. data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
  21. data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
  22. data/vendor/faiss/faiss/IndexBinary.h +8 -19
  23. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
  24. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
  25. data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
  26. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +106 -187
  27. data/vendor/faiss/faiss/IndexFastScan.cpp +90 -159
  28. data/vendor/faiss/faiss/IndexFastScan.h +9 -8
  29. data/vendor/faiss/faiss/IndexFlat.cpp +195 -3
  30. data/vendor/faiss/faiss/IndexFlat.h +20 -1
  31. data/vendor/faiss/faiss/IndexFlatCodes.cpp +11 -0
  32. data/vendor/faiss/faiss/IndexFlatCodes.h +3 -1
  33. data/vendor/faiss/faiss/IndexHNSW.cpp +112 -316
  34. data/vendor/faiss/faiss/IndexHNSW.h +12 -48
  35. data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
  36. data/vendor/faiss/faiss/IndexIDMap.h +24 -2
  37. data/vendor/faiss/faiss/IndexIVF.cpp +159 -53
  38. data/vendor/faiss/faiss/IndexIVF.h +37 -5
  39. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +18 -26
  40. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -2
  41. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
  43. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +433 -405
  44. data/vendor/faiss/faiss/IndexIVFFastScan.h +56 -26
  45. data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
  46. data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
  47. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
  48. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
  49. data/vendor/faiss/faiss/IndexIVFPQ.cpp +78 -122
  50. data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
  51. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +18 -50
  52. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
  53. data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
  54. data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
  55. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
  56. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
  57. data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -4
  59. data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
  60. data/vendor/faiss/faiss/IndexNSG.h +10 -10
  61. data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
  62. data/vendor/faiss/faiss/IndexPQ.h +1 -4
  63. data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
  64. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
  65. data/vendor/faiss/faiss/IndexRefine.cpp +49 -19
  66. data/vendor/faiss/faiss/IndexRefine.h +7 -0
  67. data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
  68. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +22 -16
  69. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
  70. data/vendor/faiss/faiss/IndexShards.cpp +21 -29
  71. data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
  72. data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
  73. data/vendor/faiss/faiss/MatrixStats.h +21 -9
  74. data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
  75. data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
  76. data/vendor/faiss/faiss/VectorTransform.h +7 -7
  77. data/vendor/faiss/faiss/clone_index.cpp +15 -10
  78. data/vendor/faiss/faiss/clone_index.h +3 -0
  79. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +87 -4
  80. data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
  81. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +7 -0
  82. data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
  83. data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
  84. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
  85. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +8 -9
  86. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +18 -3
  87. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
  88. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
  89. data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
  90. data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
  91. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +117 -17
  92. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
  93. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +1 -1
  94. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
  95. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
  96. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +267 -40
  97. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
  98. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
  99. data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
  100. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
  101. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
  102. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
  103. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -2
  104. data/vendor/faiss/faiss/impl/DistanceComputer.h +24 -1
  105. data/vendor/faiss/faiss/impl/FaissException.h +13 -34
  106. data/vendor/faiss/faiss/impl/HNSW.cpp +321 -70
  107. data/vendor/faiss/faiss/impl/HNSW.h +9 -8
  108. data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
  109. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +3 -1
  110. data/vendor/faiss/faiss/impl/NNDescent.cpp +29 -19
  111. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  112. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
  113. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  114. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +24 -22
  115. data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
  116. data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
  117. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
  118. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
  119. data/vendor/faiss/faiss/impl/ResultHandler.h +232 -176
  120. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +444 -104
  121. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -8
  122. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +280 -42
  123. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
  124. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
  125. data/vendor/faiss/faiss/impl/index_read.cpp +45 -19
  126. data/vendor/faiss/faiss/impl/index_write.cpp +60 -41
  127. data/vendor/faiss/faiss/impl/io.cpp +10 -10
  128. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  129. data/vendor/faiss/faiss/impl/platform_macros.h +18 -1
  130. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +3 -0
  131. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
  132. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
  133. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +40 -49
  134. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
  135. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
  136. data/vendor/faiss/faiss/impl/simd_result_handlers.h +374 -202
  137. data/vendor/faiss/faiss/index_factory.cpp +10 -7
  138. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
  139. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +27 -9
  140. data/vendor/faiss/faiss/invlists/InvertedLists.h +12 -3
  141. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
  142. data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
  143. data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
  144. data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
  145. data/vendor/faiss/faiss/utils/distances.cpp +128 -74
  146. data/vendor/faiss/faiss/utils/distances.h +81 -4
  147. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
  148. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
  149. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
  150. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
  151. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
  152. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
  153. data/vendor/faiss/faiss/utils/distances_simd.cpp +428 -70
  154. data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
  155. data/vendor/faiss/faiss/utils/fp16.h +2 -0
  156. data/vendor/faiss/faiss/utils/hamming.cpp +162 -110
  157. data/vendor/faiss/faiss/utils/hamming.h +58 -0
  158. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
  159. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
  160. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +15 -87
  161. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +57 -0
  162. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
  163. data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
  164. data/vendor/faiss/faiss/utils/prefetch.h +77 -0
  165. data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
  166. data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
  167. data/vendor/faiss/faiss/utils/simdlib_neon.h +72 -77
  168. data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
  169. data/vendor/faiss/faiss/utils/sorting.h +27 -0
  170. data/vendor/faiss/faiss/utils/utils.cpp +112 -6
  171. data/vendor/faiss/faiss/utils/utils.h +57 -20
  172. metadata +11 -4
@@ -275,24 +275,31 @@ struct HammingComputerDefault {
275
275
  len -= 8;
276
276
  accu += popcount64(a64[i] ^ b64[i]);
277
277
  i++;
278
+ [[fallthrough]];
278
279
  case 7:
279
280
  accu += popcount64(a64[i] ^ b64[i]);
280
281
  i++;
282
+ [[fallthrough]];
281
283
  case 6:
282
284
  accu += popcount64(a64[i] ^ b64[i]);
283
285
  i++;
286
+ [[fallthrough]];
284
287
  case 5:
285
288
  accu += popcount64(a64[i] ^ b64[i]);
286
289
  i++;
290
+ [[fallthrough]];
287
291
  case 4:
288
292
  accu += popcount64(a64[i] ^ b64[i]);
289
293
  i++;
294
+ [[fallthrough]];
290
295
  case 3:
291
296
  accu += popcount64(a64[i] ^ b64[i]);
292
297
  i++;
298
+ [[fallthrough]];
293
299
  case 2:
294
300
  accu += popcount64(a64[i] ^ b64[i]);
295
301
  i++;
302
+ [[fallthrough]];
296
303
  case 1:
297
304
  accu += popcount64(a64[i] ^ b64[i]);
298
305
  i++;
@@ -302,20 +309,28 @@ struct HammingComputerDefault {
302
309
  const uint8_t* a = a8 + 8 * quotient8;
303
310
  const uint8_t* b = b8 + 8 * quotient8;
304
311
  switch (remainder8) {
312
+ [[fallthrough]];
305
313
  case 7:
306
314
  accu += hamdis_tab_ham_bytes[a[6] ^ b[6]];
315
+ [[fallthrough]];
307
316
  case 6:
308
317
  accu += hamdis_tab_ham_bytes[a[5] ^ b[5]];
318
+ [[fallthrough]];
309
319
  case 5:
310
320
  accu += hamdis_tab_ham_bytes[a[4] ^ b[4]];
321
+ [[fallthrough]];
311
322
  case 4:
312
323
  accu += hamdis_tab_ham_bytes[a[3] ^ b[3]];
324
+ [[fallthrough]];
313
325
  case 3:
314
326
  accu += hamdis_tab_ham_bytes[a[2] ^ b[2]];
327
+ [[fallthrough]];
315
328
  case 2:
316
329
  accu += hamdis_tab_ham_bytes[a[1] ^ b[1]];
330
+ [[fallthrough]];
317
331
  case 1:
318
332
  accu += hamdis_tab_ham_bytes[a[0] ^ b[0]];
333
+ [[fallthrough]];
319
334
  default:
320
335
  break;
321
336
  }
@@ -329,93 +344,6 @@ struct HammingComputerDefault {
329
344
  }
330
345
  };
331
346
 
332
- // more inefficient than HammingComputerDefault (obsolete)
333
- struct HammingComputerM8 {
334
- const uint64_t* a;
335
- int n;
336
-
337
- HammingComputerM8() {}
338
-
339
- HammingComputerM8(const uint8_t* a8, int code_size) {
340
- set(a8, code_size);
341
- }
342
-
343
- void set(const uint8_t* a8, int code_size) {
344
- assert(code_size % 8 == 0);
345
- a = (uint64_t*)a8;
346
- n = code_size / 8;
347
- }
348
-
349
- int hamming(const uint8_t* b8) const {
350
- const uint64_t* b = (uint64_t*)b8;
351
- int accu = 0;
352
- for (int i = 0; i < n; i++)
353
- accu += popcount64(a[i] ^ b[i]);
354
- return accu;
355
- }
356
-
357
- inline int get_code_size() const {
358
- return n * 8;
359
- }
360
- };
361
-
362
- // more inefficient than HammingComputerDefault (obsolete)
363
- struct HammingComputerM4 {
364
- const uint32_t* a;
365
- int n;
366
-
367
- HammingComputerM4() {}
368
-
369
- HammingComputerM4(const uint8_t* a4, int code_size) {
370
- set(a4, code_size);
371
- }
372
-
373
- void set(const uint8_t* a4, int code_size) {
374
- assert(code_size % 4 == 0);
375
- a = (uint32_t*)a4;
376
- n = code_size / 4;
377
- }
378
-
379
- int hamming(const uint8_t* b8) const {
380
- const uint32_t* b = (uint32_t*)b8;
381
- int accu = 0;
382
- for (int i = 0; i < n; i++)
383
- accu += popcount64(a[i] ^ b[i]);
384
- return accu;
385
- }
386
-
387
- inline int get_code_size() const {
388
- return n * 4;
389
- }
390
- };
391
-
392
- /***************************************************************************
393
- * Equivalence with a template class when code size is known at compile time
394
- **************************************************************************/
395
-
396
- // default template
397
- template <int CODE_SIZE>
398
- struct HammingComputer : HammingComputerDefault {
399
- HammingComputer(const uint8_t* a, int code_size)
400
- : HammingComputerDefault(a, code_size) {}
401
- };
402
-
403
- #define SPECIALIZED_HC(CODE_SIZE) \
404
- template <> \
405
- struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
406
- HammingComputer(const uint8_t* a) \
407
- : HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
408
- }
409
-
410
- SPECIALIZED_HC(4);
411
- SPECIALIZED_HC(8);
412
- SPECIALIZED_HC(16);
413
- SPECIALIZED_HC(20);
414
- SPECIALIZED_HC(32);
415
- SPECIALIZED_HC(64);
416
-
417
- #undef SPECIALIZED_HC
418
-
419
347
  /***************************************************************************
420
348
  * generalized Hamming = number of bytes that are different between
421
349
  * two codes.
@@ -23,4 +23,61 @@
23
23
  #include <faiss/utils/hamming_distance/generic-inl.h>
24
24
  #endif
25
25
 
26
+ namespace faiss {
27
+
28
+ /***************************************************************************
29
+ * Equivalence with a template class when code size is known at compile time
30
+ **************************************************************************/
31
+
32
+ // default template
33
+ template <int CODE_SIZE>
34
+ struct HammingComputer : HammingComputerDefault {
35
+ HammingComputer(const uint8_t* a, int code_size)
36
+ : HammingComputerDefault(a, code_size) {}
37
+ };
38
+
39
+ #define SPECIALIZED_HC(CODE_SIZE) \
40
+ template <> \
41
+ struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
42
+ HammingComputer(const uint8_t* a) \
43
+ : HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
44
+ }
45
+
46
+ SPECIALIZED_HC(4);
47
+ SPECIALIZED_HC(8);
48
+ SPECIALIZED_HC(16);
49
+ SPECIALIZED_HC(20);
50
+ SPECIALIZED_HC(32);
51
+ SPECIALIZED_HC(64);
52
+
53
+ #undef SPECIALIZED_HC
54
+
55
+ /***************************************************************************
56
+ * Dispatching function that takes a code size and a consumer object
57
+ * the consumer object should contain a retun type t and a operation template
58
+ * function f() that to be called to perform the operation.
59
+ **************************************************************************/
60
+
61
+ template <class Consumer, class... Types>
62
+ typename Consumer::T dispatch_HammingComputer(
63
+ int code_size,
64
+ Consumer& consumer,
65
+ Types... args) {
66
+ switch (code_size) {
67
+ #define DISPATCH_HC(CODE_SIZE) \
68
+ case CODE_SIZE: \
69
+ return consumer.template f<HammingComputer##CODE_SIZE>(args...);
70
+ DISPATCH_HC(4);
71
+ DISPATCH_HC(8);
72
+ DISPATCH_HC(16);
73
+ DISPATCH_HC(20);
74
+ DISPATCH_HC(32);
75
+ DISPATCH_HC(64);
76
+ default:
77
+ return consumer.template f<HammingComputerDefault>(args...);
78
+ }
79
+ }
80
+
81
+ } // namespace faiss
82
+
26
83
  #endif
@@ -260,7 +260,6 @@ struct HammingComputer32 {
260
260
  }
261
261
 
262
262
  inline int hamming(const uint8_t* b8) const {
263
- const uint64_t* b = (uint64_t*)b8;
264
263
  uint8x16_t b0 = vld1q_u8(b8);
265
264
  uint8x16_t b1 = vld1q_u8(b8 + 16);
266
265
 
@@ -338,24 +337,31 @@ struct HammingComputerDefault {
338
337
  len -= 8;
339
338
  accu += popcount64(a64[i] ^ b64[i]);
340
339
  i++;
340
+ [[fallthrough]];
341
341
  case 7:
342
342
  accu += popcount64(a64[i] ^ b64[i]);
343
343
  i++;
344
+ [[fallthrough]];
344
345
  case 6:
345
346
  accu += popcount64(a64[i] ^ b64[i]);
346
347
  i++;
348
+ [[fallthrough]];
347
349
  case 5:
348
350
  accu += popcount64(a64[i] ^ b64[i]);
349
351
  i++;
352
+ [[fallthrough]];
350
353
  case 4:
351
354
  accu += popcount64(a64[i] ^ b64[i]);
352
355
  i++;
356
+ [[fallthrough]];
353
357
  case 3:
354
358
  accu += popcount64(a64[i] ^ b64[i]);
355
359
  i++;
360
+ [[fallthrough]];
356
361
  case 2:
357
362
  accu += popcount64(a64[i] ^ b64[i]);
358
363
  i++;
364
+ [[fallthrough]];
359
365
  case 1:
360
366
  accu += popcount64(a64[i] ^ b64[i]);
361
367
  i++;
@@ -367,18 +373,25 @@ struct HammingComputerDefault {
367
373
  switch (remainder8) {
368
374
  case 7:
369
375
  accu += hamdis_tab_ham_bytes[a[6] ^ b[6]];
376
+ [[fallthrough]];
370
377
  case 6:
371
378
  accu += hamdis_tab_ham_bytes[a[5] ^ b[5]];
379
+ [[fallthrough]];
372
380
  case 5:
373
381
  accu += hamdis_tab_ham_bytes[a[4] ^ b[4]];
382
+ [[fallthrough]];
374
383
  case 4:
375
384
  accu += hamdis_tab_ham_bytes[a[3] ^ b[3]];
385
+ [[fallthrough]];
376
386
  case 3:
377
387
  accu += hamdis_tab_ham_bytes[a[2] ^ b[2]];
388
+ [[fallthrough]];
378
389
  case 2:
379
390
  accu += hamdis_tab_ham_bytes[a[1] ^ b[1]];
391
+ [[fallthrough]];
380
392
  case 1:
381
393
  accu += hamdis_tab_ham_bytes[a[0] ^ b[0]];
394
+ [[fallthrough]];
382
395
  default:
383
396
  break;
384
397
  }
@@ -392,109 +405,6 @@ struct HammingComputerDefault {
392
405
  }
393
406
  };
394
407
 
395
- // more inefficient than HammingComputerDefault (obsolete)
396
- struct HammingComputerM8 {
397
- const uint64_t* a;
398
- int n;
399
-
400
- HammingComputerM8() {}
401
-
402
- HammingComputerM8(const uint8_t* a8, int code_size) {
403
- set(a8, code_size);
404
- }
405
-
406
- void set(const uint8_t* a8, int code_size) {
407
- assert(code_size % 8 == 0);
408
- a = (uint64_t*)a8;
409
- n = code_size / 8;
410
- }
411
-
412
- int hamming(const uint8_t* b8) const {
413
- const uint64_t* b = (uint64_t*)b8;
414
- int n4 = (n / 4) * 4;
415
- int accu = 0;
416
-
417
- int i = 0;
418
- for (; i < n4; i += 4) {
419
- accu += ::faiss::hamming<256>(a + i, b + i);
420
- }
421
- for (; i < n; i++) {
422
- accu += popcount64(a[i] ^ b[i]);
423
- }
424
- return accu;
425
- }
426
-
427
- inline int get_code_size() const {
428
- return n * 8;
429
- }
430
- };
431
-
432
- // more inefficient than HammingComputerDefault (obsolete)
433
- struct HammingComputerM4 {
434
- const uint32_t* a;
435
- int n;
436
-
437
- HammingComputerM4() {}
438
-
439
- HammingComputerM4(const uint8_t* a4, int code_size) {
440
- set(a4, code_size);
441
- }
442
-
443
- void set(const uint8_t* a4, int code_size) {
444
- assert(code_size % 4 == 0);
445
- a = (uint32_t*)a4;
446
- n = code_size / 4;
447
- }
448
-
449
- int hamming(const uint8_t* b8) const {
450
- const uint32_t* b = (uint32_t*)b8;
451
-
452
- int n8 = (n / 8) * 8;
453
- int accu = 0;
454
-
455
- int i = 0;
456
- for (; i < n8; i += 8) {
457
- accu += ::faiss::hamming<256>(
458
- (const uint64_t*)(a + i), (const uint64_t*)(b + i));
459
- }
460
- for (; i < n; i++) {
461
- accu += popcount64(a[i] ^ b[i]);
462
- }
463
- return accu;
464
- }
465
-
466
- inline int get_code_size() const {
467
- return n * 4;
468
- }
469
- };
470
-
471
- /***************************************************************************
472
- * Equivalence with a template class when code size is known at compile time
473
- **************************************************************************/
474
-
475
- // default template
476
- template <int CODE_SIZE>
477
- struct HammingComputer : HammingComputerDefault {
478
- HammingComputer(const uint8_t* a, int code_size)
479
- : HammingComputerDefault(a, code_size) {}
480
- };
481
-
482
- #define SPECIALIZED_HC(CODE_SIZE) \
483
- template <> \
484
- struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
485
- HammingComputer(const uint8_t* a) \
486
- : HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
487
- }
488
-
489
- SPECIALIZED_HC(4);
490
- SPECIALIZED_HC(8);
491
- SPECIALIZED_HC(16);
492
- SPECIALIZED_HC(20);
493
- SPECIALIZED_HC(32);
494
- SPECIALIZED_HC(64);
495
-
496
- #undef SPECIALIZED_HC
497
-
498
408
  /***************************************************************************
499
409
  * generalized Hamming = number of bytes that are different between
500
410
  * two codes.
@@ -206,7 +206,8 @@ typename C::T partition_fuzzy_median3(
206
206
  assert(n_eq_1 <= n_eq);
207
207
  }
208
208
 
209
- int wp = compress_array<C>(vals, ids, n, thresh, n_eq_1);
209
+ [[maybe_unused]] const int wp =
210
+ compress_array<C>(vals, ids, n, thresh, n_eq_1);
210
211
 
211
212
  assert(wp == q);
212
213
  if (q_out) {
@@ -750,8 +751,6 @@ typename C::T partition_fuzzy(
750
751
  size_t q_min,
751
752
  size_t q_max,
752
753
  size_t* q_out) {
753
- // the code below compiles and runs without AVX2 but it's slower than
754
- // the scalar implementation
755
754
  #ifdef __AVX2__
756
755
  constexpr bool is_uint16 = std::is_same<typename C::T, uint16_t>::value;
757
756
  if (is_uint16 && is_aligned_pointer(vals)) {
@@ -882,7 +881,7 @@ static const simd32uint8 shifts = simd32uint8::create<
882
881
  // 2-bit accumulator: we can add only up to 3 elements
883
882
  // on output we return 2*4-bit results
884
883
  // preproc returns either an index in 0..7 or 0xffff
885
- // that yeilds a 0 when used in the table look-up
884
+ // that yields a 0 when used in the table look-up
886
885
  template <int N, class Preproc>
887
886
  void compute_accu2(
888
887
  const uint16_t*& data,
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ // prefetches
11
+
12
+ #ifdef __AVX__
13
+
14
+ // AVX
15
+
16
+ #include <xmmintrin.h>
17
+
18
+ inline void prefetch_L1(const void* address) {
19
+ _mm_prefetch((const char*)address, _MM_HINT_T0);
20
+ }
21
+ inline void prefetch_L2(const void* address) {
22
+ _mm_prefetch((const char*)address, _MM_HINT_T1);
23
+ }
24
+ inline void prefetch_L3(const void* address) {
25
+ _mm_prefetch((const char*)address, _MM_HINT_T2);
26
+ }
27
+
28
+ #elif defined(__aarch64__)
29
+
30
+ // ARM64
31
+
32
+ #ifdef _MSC_VER
33
+
34
+ // todo: arm on MSVC
35
+ inline void prefetch_L1(const void* address) {}
36
+ inline void prefetch_L2(const void* address) {}
37
+ inline void prefetch_L3(const void* address) {}
38
+
39
+ #else
40
+ // arm on non-MSVC
41
+
42
+ inline void prefetch_L1(const void* address) {
43
+ __builtin_prefetch(address, 0, 3);
44
+ }
45
+ inline void prefetch_L2(const void* address) {
46
+ __builtin_prefetch(address, 0, 2);
47
+ }
48
+ inline void prefetch_L3(const void* address) {
49
+ __builtin_prefetch(address, 0, 1);
50
+ }
51
+ #endif
52
+
53
+ #else
54
+
55
+ // a generic platform
56
+
57
+ #ifdef _MSC_VER
58
+
59
+ inline void prefetch_L1(const void* address) {}
60
+ inline void prefetch_L2(const void* address) {}
61
+ inline void prefetch_L3(const void* address) {}
62
+
63
+ #else
64
+
65
+ inline void prefetch_L1(const void* address) {
66
+ __builtin_prefetch(address, 0, 3);
67
+ }
68
+ inline void prefetch_L2(const void* address) {
69
+ __builtin_prefetch(address, 0, 2);
70
+ }
71
+ inline void prefetch_L3(const void* address) {
72
+ __builtin_prefetch(address, 0, 1);
73
+ }
74
+
75
+ #endif
76
+
77
+ #endif
@@ -24,20 +24,6 @@ namespace quantize_lut {
24
24
 
25
25
  namespace {
26
26
 
27
- float round_uint8_and_mul(float* tab, size_t n) {
28
- float max = 0;
29
- for (int i = 0; i < n; i++) {
30
- if (fabs(tab[i]) > max) {
31
- max = fabs(tab[i]);
32
- }
33
- }
34
- float multiplier = 127 / max;
35
- for (int i = 0; i < n; i++) {
36
- tab[i] = floorf(tab[i] * multiplier + 128);
37
- }
38
- return multiplier;
39
- }
40
-
41
27
  // there can be NaNs in tables, they should be ignored
42
28
  float tab_min(const float* tab, size_t n) {
43
29
  float min = HUGE_VAL;
@@ -202,12 +202,6 @@ struct simd16uint16 : simd256bit {
202
202
  return simd16uint16(_mm256_cmpeq_epi16(lhs.i, rhs.i));
203
203
  }
204
204
 
205
- bool is_same(simd16uint16 other) const {
206
- const __m256i pcmp = _mm256_cmpeq_epi16(i, other.i);
207
- unsigned bitmask = _mm256_movemask_epi8(pcmp);
208
- return (bitmask == 0xffffffffU);
209
- }
210
-
211
205
  simd16uint16 operator~() const {
212
206
  return simd16uint16(_mm256_xor_si256(i, _mm256_set1_epi32(-1)));
213
207
  }