faiss 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +9 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +9 -2
  6. data/ext/faiss/index.cpp +1 -1
  7. data/ext/faiss/index_binary.cpp +2 -2
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +7 -7
  11. data/vendor/faiss/faiss/AutoTune.h +1 -2
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -22
  13. data/vendor/faiss/faiss/Clustering.h +40 -21
  14. data/vendor/faiss/faiss/IVFlib.cpp +26 -12
  15. data/vendor/faiss/faiss/Index.cpp +1 -1
  16. data/vendor/faiss/faiss/Index.h +40 -10
  17. data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
  20. data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
  21. data/vendor/faiss/faiss/IndexBinary.h +8 -19
  22. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
  23. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
  24. data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
  26. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +107 -188
  27. data/vendor/faiss/faiss/IndexFastScan.cpp +95 -146
  28. data/vendor/faiss/faiss/IndexFastScan.h +9 -8
  29. data/vendor/faiss/faiss/IndexFlat.cpp +206 -10
  30. data/vendor/faiss/faiss/IndexFlat.h +20 -1
  31. data/vendor/faiss/faiss/IndexFlatCodes.cpp +170 -5
  32. data/vendor/faiss/faiss/IndexFlatCodes.h +23 -4
  33. data/vendor/faiss/faiss/IndexHNSW.cpp +231 -382
  34. data/vendor/faiss/faiss/IndexHNSW.h +62 -49
  35. data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
  36. data/vendor/faiss/faiss/IndexIDMap.h +24 -2
  37. data/vendor/faiss/faiss/IndexIVF.cpp +162 -56
  38. data/vendor/faiss/faiss/IndexIVF.h +46 -6
  39. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +33 -26
  40. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +6 -2
  41. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
  43. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +502 -401
  44. data/vendor/faiss/faiss/IndexIVFFastScan.h +63 -26
  45. data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
  46. data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
  47. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
  48. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
  49. data/vendor/faiss/faiss/IndexIVFPQ.cpp +79 -125
  50. data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
  51. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +39 -52
  52. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
  53. data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
  54. data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
  55. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
  56. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
  57. data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
  58. data/vendor/faiss/faiss/IndexLattice.cpp +1 -19
  59. data/vendor/faiss/faiss/IndexLattice.h +3 -22
  60. data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -33
  61. data/vendor/faiss/faiss/IndexNNDescent.h +1 -1
  62. data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
  63. data/vendor/faiss/faiss/IndexNSG.h +11 -11
  64. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +56 -0
  65. data/vendor/faiss/faiss/IndexNeuralNetCodec.h +49 -0
  66. data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
  67. data/vendor/faiss/faiss/IndexPQ.h +1 -4
  68. data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
  69. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
  70. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  71. data/vendor/faiss/faiss/IndexRefine.cpp +54 -24
  72. data/vendor/faiss/faiss/IndexRefine.h +7 -0
  73. data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
  74. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +25 -17
  75. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
  76. data/vendor/faiss/faiss/IndexShards.cpp +21 -29
  77. data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
  78. data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
  79. data/vendor/faiss/faiss/MatrixStats.h +21 -9
  80. data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
  81. data/vendor/faiss/faiss/MetricType.h +7 -2
  82. data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
  83. data/vendor/faiss/faiss/VectorTransform.h +7 -7
  84. data/vendor/faiss/faiss/clone_index.cpp +15 -10
  85. data/vendor/faiss/faiss/clone_index.h +3 -0
  86. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +95 -17
  87. data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +152 -0
  88. data/vendor/faiss/faiss/cppcontrib/factory_tools.h +24 -0
  89. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +83 -30
  90. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +123 -8
  91. data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
  92. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +13 -0
  93. data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
  94. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -1
  95. data/vendor/faiss/faiss/gpu/GpuIndex.h +30 -12
  96. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +282 -0
  97. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +14 -9
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +20 -3
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
  101. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
  102. data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +142 -17
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
  106. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +26 -21
  107. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +7 -1
  108. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +8 -5
  109. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
  110. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
  111. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +332 -40
  112. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
  113. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
  114. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
  115. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
  116. data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
  117. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +4 -1
  118. data/vendor/faiss/faiss/gpu/utils/Timer.h +1 -1
  119. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
  121. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +26 -1
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +10 -3
  123. data/vendor/faiss/faiss/impl/DistanceComputer.h +70 -1
  124. data/vendor/faiss/faiss/impl/FaissAssert.h +4 -2
  125. data/vendor/faiss/faiss/impl/FaissException.h +13 -34
  126. data/vendor/faiss/faiss/impl/HNSW.cpp +605 -186
  127. data/vendor/faiss/faiss/impl/HNSW.h +52 -30
  128. data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +11 -9
  130. data/vendor/faiss/faiss/impl/LookupTableScaler.h +34 -0
  131. data/vendor/faiss/faiss/impl/NNDescent.cpp +42 -27
  132. data/vendor/faiss/faiss/impl/NSG.cpp +0 -29
  133. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  134. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
  135. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  136. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -22
  137. data/vendor/faiss/faiss/impl/ProductQuantizer.h +6 -2
  138. data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
  139. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
  141. data/vendor/faiss/faiss/impl/ResultHandler.h +347 -172
  142. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1104 -147
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +3 -8
  144. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +285 -42
  145. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +248 -0
  146. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
  147. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
  148. data/vendor/faiss/faiss/impl/index_read.cpp +74 -34
  149. data/vendor/faiss/faiss/impl/index_read_utils.h +37 -0
  150. data/vendor/faiss/faiss/impl/index_write.cpp +88 -51
  151. data/vendor/faiss/faiss/impl/io.cpp +23 -15
  152. data/vendor/faiss/faiss/impl/io.h +4 -4
  153. data/vendor/faiss/faiss/impl/io_macros.h +6 -0
  154. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  155. data/vendor/faiss/faiss/impl/platform_macros.h +40 -1
  156. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +14 -0
  157. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
  158. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
  159. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +487 -49
  160. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
  161. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
  162. data/vendor/faiss/faiss/impl/simd_result_handlers.h +481 -225
  163. data/vendor/faiss/faiss/index_factory.cpp +41 -20
  164. data/vendor/faiss/faiss/index_io.h +12 -5
  165. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +28 -8
  166. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +3 -0
  167. data/vendor/faiss/faiss/invlists/DirectMap.cpp +10 -2
  168. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +73 -17
  169. data/vendor/faiss/faiss/invlists/InvertedLists.h +26 -8
  170. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +24 -9
  171. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +2 -1
  172. data/vendor/faiss/faiss/python/python_callbacks.cpp +4 -4
  173. data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
  174. data/vendor/faiss/faiss/utils/Heap.h +105 -0
  175. data/vendor/faiss/faiss/utils/NeuralNet.cpp +342 -0
  176. data/vendor/faiss/faiss/utils/NeuralNet.h +147 -0
  177. data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
  178. data/vendor/faiss/faiss/utils/bf16.h +36 -0
  179. data/vendor/faiss/faiss/utils/distances.cpp +147 -123
  180. data/vendor/faiss/faiss/utils/distances.h +86 -9
  181. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
  182. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
  183. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
  184. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
  185. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
  186. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
  187. data/vendor/faiss/faiss/utils/distances_simd.cpp +1589 -243
  188. data/vendor/faiss/faiss/utils/extra_distances-inl.h +70 -0
  189. data/vendor/faiss/faiss/utils/extra_distances.cpp +85 -137
  190. data/vendor/faiss/faiss/utils/extra_distances.h +3 -2
  191. data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
  192. data/vendor/faiss/faiss/utils/fp16.h +2 -0
  193. data/vendor/faiss/faiss/utils/hamming.cpp +163 -111
  194. data/vendor/faiss/faiss/utils/hamming.h +58 -0
  195. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
  196. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
  197. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +19 -88
  198. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +58 -0
  199. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
  200. data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
  201. data/vendor/faiss/faiss/utils/prefetch.h +77 -0
  202. data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
  203. data/vendor/faiss/faiss/utils/random.cpp +43 -0
  204. data/vendor/faiss/faiss/utils/random.h +25 -0
  205. data/vendor/faiss/faiss/utils/simdlib.h +10 -1
  206. data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
  207. data/vendor/faiss/faiss/utils/simdlib_avx512.h +296 -0
  208. data/vendor/faiss/faiss/utils/simdlib_neon.h +77 -79
  209. data/vendor/faiss/faiss/utils/simdlib_ppc64.h +1084 -0
  210. data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
  211. data/vendor/faiss/faiss/utils/sorting.h +27 -0
  212. data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +176 -0
  213. data/vendor/faiss/faiss/utils/utils.cpp +120 -7
  214. data/vendor/faiss/faiss/utils/utils.h +60 -20
  215. metadata +23 -4
  216. data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +0 -102
@@ -166,9 +166,12 @@ struct HammingComputer20 {
166
166
  void set(const uint8_t* a8, int code_size) {
167
167
  assert(code_size == 20);
168
168
  const uint64_t* a = (uint64_t*)a8;
169
+ const uint32_t* b = (uint32_t*)a8;
169
170
  a0 = a[0];
170
171
  a1 = a[1];
171
- a2 = a[2];
172
+ // can't read a[2] since it is uint64_t, not uint32_t
173
+ // results in AddressSanitizer failure reading past end of array
174
+ a2 = b[4];
172
175
  }
173
176
 
174
177
  inline int hamming(const uint8_t* b8) const {
@@ -275,24 +278,31 @@ struct HammingComputerDefault {
275
278
  len -= 8;
276
279
  accu += popcount64(a64[i] ^ b64[i]);
277
280
  i++;
281
+ [[fallthrough]];
278
282
  case 7:
279
283
  accu += popcount64(a64[i] ^ b64[i]);
280
284
  i++;
285
+ [[fallthrough]];
281
286
  case 6:
282
287
  accu += popcount64(a64[i] ^ b64[i]);
283
288
  i++;
289
+ [[fallthrough]];
284
290
  case 5:
285
291
  accu += popcount64(a64[i] ^ b64[i]);
286
292
  i++;
293
+ [[fallthrough]];
287
294
  case 4:
288
295
  accu += popcount64(a64[i] ^ b64[i]);
289
296
  i++;
297
+ [[fallthrough]];
290
298
  case 3:
291
299
  accu += popcount64(a64[i] ^ b64[i]);
292
300
  i++;
301
+ [[fallthrough]];
293
302
  case 2:
294
303
  accu += popcount64(a64[i] ^ b64[i]);
295
304
  i++;
305
+ [[fallthrough]];
296
306
  case 1:
297
307
  accu += popcount64(a64[i] ^ b64[i]);
298
308
  i++;
@@ -302,20 +312,28 @@ struct HammingComputerDefault {
302
312
  const uint8_t* a = a8 + 8 * quotient8;
303
313
  const uint8_t* b = b8 + 8 * quotient8;
304
314
  switch (remainder8) {
315
+ [[fallthrough]];
305
316
  case 7:
306
317
  accu += hamdis_tab_ham_bytes[a[6] ^ b[6]];
318
+ [[fallthrough]];
307
319
  case 6:
308
320
  accu += hamdis_tab_ham_bytes[a[5] ^ b[5]];
321
+ [[fallthrough]];
309
322
  case 5:
310
323
  accu += hamdis_tab_ham_bytes[a[4] ^ b[4]];
324
+ [[fallthrough]];
311
325
  case 4:
312
326
  accu += hamdis_tab_ham_bytes[a[3] ^ b[3]];
327
+ [[fallthrough]];
313
328
  case 3:
314
329
  accu += hamdis_tab_ham_bytes[a[2] ^ b[2]];
330
+ [[fallthrough]];
315
331
  case 2:
316
332
  accu += hamdis_tab_ham_bytes[a[1] ^ b[1]];
333
+ [[fallthrough]];
317
334
  case 1:
318
335
  accu += hamdis_tab_ham_bytes[a[0] ^ b[0]];
336
+ [[fallthrough]];
319
337
  default:
320
338
  break;
321
339
  }
@@ -329,93 +347,6 @@ struct HammingComputerDefault {
329
347
  }
330
348
  };
331
349
 
332
- // more inefficient than HammingComputerDefault (obsolete)
333
- struct HammingComputerM8 {
334
- const uint64_t* a;
335
- int n;
336
-
337
- HammingComputerM8() {}
338
-
339
- HammingComputerM8(const uint8_t* a8, int code_size) {
340
- set(a8, code_size);
341
- }
342
-
343
- void set(const uint8_t* a8, int code_size) {
344
- assert(code_size % 8 == 0);
345
- a = (uint64_t*)a8;
346
- n = code_size / 8;
347
- }
348
-
349
- int hamming(const uint8_t* b8) const {
350
- const uint64_t* b = (uint64_t*)b8;
351
- int accu = 0;
352
- for (int i = 0; i < n; i++)
353
- accu += popcount64(a[i] ^ b[i]);
354
- return accu;
355
- }
356
-
357
- inline int get_code_size() const {
358
- return n * 8;
359
- }
360
- };
361
-
362
- // more inefficient than HammingComputerDefault (obsolete)
363
- struct HammingComputerM4 {
364
- const uint32_t* a;
365
- int n;
366
-
367
- HammingComputerM4() {}
368
-
369
- HammingComputerM4(const uint8_t* a4, int code_size) {
370
- set(a4, code_size);
371
- }
372
-
373
- void set(const uint8_t* a4, int code_size) {
374
- assert(code_size % 4 == 0);
375
- a = (uint32_t*)a4;
376
- n = code_size / 4;
377
- }
378
-
379
- int hamming(const uint8_t* b8) const {
380
- const uint32_t* b = (uint32_t*)b8;
381
- int accu = 0;
382
- for (int i = 0; i < n; i++)
383
- accu += popcount64(a[i] ^ b[i]);
384
- return accu;
385
- }
386
-
387
- inline int get_code_size() const {
388
- return n * 4;
389
- }
390
- };
391
-
392
- /***************************************************************************
393
- * Equivalence with a template class when code size is known at compile time
394
- **************************************************************************/
395
-
396
- // default template
397
- template <int CODE_SIZE>
398
- struct HammingComputer : HammingComputerDefault {
399
- HammingComputer(const uint8_t* a, int code_size)
400
- : HammingComputerDefault(a, code_size) {}
401
- };
402
-
403
- #define SPECIALIZED_HC(CODE_SIZE) \
404
- template <> \
405
- struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
406
- HammingComputer(const uint8_t* a) \
407
- : HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
408
- }
409
-
410
- SPECIALIZED_HC(4);
411
- SPECIALIZED_HC(8);
412
- SPECIALIZED_HC(16);
413
- SPECIALIZED_HC(20);
414
- SPECIALIZED_HC(32);
415
- SPECIALIZED_HC(64);
416
-
417
- #undef SPECIALIZED_HC
418
-
419
350
  /***************************************************************************
420
351
  * generalized Hamming = number of bytes that are different between
421
352
  * two codes.
@@ -23,4 +23,62 @@
23
23
  #include <faiss/utils/hamming_distance/generic-inl.h>
24
24
  #endif
25
25
 
26
+ namespace faiss {
27
+
28
+ /***************************************************************************
29
+ * Equivalence with a template class when code size is known at compile time
30
+ **************************************************************************/
31
+
32
+ // default template
33
+ template <int CODE_SIZE>
34
+ struct HammingComputer : HammingComputerDefault {
35
+ HammingComputer(const uint8_t* a, int code_size)
36
+ : HammingComputerDefault(a, code_size) {}
37
+ };
38
+
39
+ #define SPECIALIZED_HC(CODE_SIZE) \
40
+ template <> \
41
+ struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
42
+ HammingComputer(const uint8_t* a) \
43
+ : HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
44
+ }
45
+
46
+ SPECIALIZED_HC(4);
47
+ SPECIALIZED_HC(8);
48
+ SPECIALIZED_HC(16);
49
+ SPECIALIZED_HC(20);
50
+ SPECIALIZED_HC(32);
51
+ SPECIALIZED_HC(64);
52
+
53
+ #undef SPECIALIZED_HC
54
+
55
+ /***************************************************************************
56
+ * Dispatching function that takes a code size and a consumer object
57
+ * the consumer object should contain a retun type t and a operation template
58
+ * function f() that must be called to perform the operation.
59
+ **************************************************************************/
60
+
61
+ template <class Consumer, class... Types>
62
+ typename Consumer::T dispatch_HammingComputer(
63
+ int code_size,
64
+ Consumer& consumer,
65
+ Types... args) {
66
+ switch (code_size) {
67
+ #define DISPATCH_HC(CODE_SIZE) \
68
+ case CODE_SIZE: \
69
+ return consumer.template f<HammingComputer##CODE_SIZE>(args...);
70
+ DISPATCH_HC(4);
71
+ DISPATCH_HC(8);
72
+ DISPATCH_HC(16);
73
+ DISPATCH_HC(20);
74
+ DISPATCH_HC(32);
75
+ DISPATCH_HC(64);
76
+ default:
77
+ return consumer.template f<HammingComputerDefault>(args...);
78
+ }
79
+ #undef DISPATCH_HC
80
+ }
81
+
82
+ } // namespace faiss
83
+
26
84
  #endif
@@ -260,7 +260,6 @@ struct HammingComputer32 {
260
260
  }
261
261
 
262
262
  inline int hamming(const uint8_t* b8) const {
263
- const uint64_t* b = (uint64_t*)b8;
264
263
  uint8x16_t b0 = vld1q_u8(b8);
265
264
  uint8x16_t b1 = vld1q_u8(b8 + 16);
266
265
 
@@ -338,24 +337,31 @@ struct HammingComputerDefault {
338
337
  len -= 8;
339
338
  accu += popcount64(a64[i] ^ b64[i]);
340
339
  i++;
340
+ [[fallthrough]];
341
341
  case 7:
342
342
  accu += popcount64(a64[i] ^ b64[i]);
343
343
  i++;
344
+ [[fallthrough]];
344
345
  case 6:
345
346
  accu += popcount64(a64[i] ^ b64[i]);
346
347
  i++;
348
+ [[fallthrough]];
347
349
  case 5:
348
350
  accu += popcount64(a64[i] ^ b64[i]);
349
351
  i++;
352
+ [[fallthrough]];
350
353
  case 4:
351
354
  accu += popcount64(a64[i] ^ b64[i]);
352
355
  i++;
356
+ [[fallthrough]];
353
357
  case 3:
354
358
  accu += popcount64(a64[i] ^ b64[i]);
355
359
  i++;
360
+ [[fallthrough]];
356
361
  case 2:
357
362
  accu += popcount64(a64[i] ^ b64[i]);
358
363
  i++;
364
+ [[fallthrough]];
359
365
  case 1:
360
366
  accu += popcount64(a64[i] ^ b64[i]);
361
367
  i++;
@@ -367,18 +373,25 @@ struct HammingComputerDefault {
367
373
  switch (remainder8) {
368
374
  case 7:
369
375
  accu += hamdis_tab_ham_bytes[a[6] ^ b[6]];
376
+ [[fallthrough]];
370
377
  case 6:
371
378
  accu += hamdis_tab_ham_bytes[a[5] ^ b[5]];
379
+ [[fallthrough]];
372
380
  case 5:
373
381
  accu += hamdis_tab_ham_bytes[a[4] ^ b[4]];
382
+ [[fallthrough]];
374
383
  case 4:
375
384
  accu += hamdis_tab_ham_bytes[a[3] ^ b[3]];
385
+ [[fallthrough]];
376
386
  case 3:
377
387
  accu += hamdis_tab_ham_bytes[a[2] ^ b[2]];
388
+ [[fallthrough]];
378
389
  case 2:
379
390
  accu += hamdis_tab_ham_bytes[a[1] ^ b[1]];
391
+ [[fallthrough]];
380
392
  case 1:
381
393
  accu += hamdis_tab_ham_bytes[a[0] ^ b[0]];
394
+ [[fallthrough]];
382
395
  default:
383
396
  break;
384
397
  }
@@ -392,109 +405,6 @@ struct HammingComputerDefault {
392
405
  }
393
406
  };
394
407
 
395
- // more inefficient than HammingComputerDefault (obsolete)
396
- struct HammingComputerM8 {
397
- const uint64_t* a;
398
- int n;
399
-
400
- HammingComputerM8() {}
401
-
402
- HammingComputerM8(const uint8_t* a8, int code_size) {
403
- set(a8, code_size);
404
- }
405
-
406
- void set(const uint8_t* a8, int code_size) {
407
- assert(code_size % 8 == 0);
408
- a = (uint64_t*)a8;
409
- n = code_size / 8;
410
- }
411
-
412
- int hamming(const uint8_t* b8) const {
413
- const uint64_t* b = (uint64_t*)b8;
414
- int n4 = (n / 4) * 4;
415
- int accu = 0;
416
-
417
- int i = 0;
418
- for (; i < n4; i += 4) {
419
- accu += ::faiss::hamming<256>(a + i, b + i);
420
- }
421
- for (; i < n; i++) {
422
- accu += popcount64(a[i] ^ b[i]);
423
- }
424
- return accu;
425
- }
426
-
427
- inline int get_code_size() const {
428
- return n * 8;
429
- }
430
- };
431
-
432
- // more inefficient than HammingComputerDefault (obsolete)
433
- struct HammingComputerM4 {
434
- const uint32_t* a;
435
- int n;
436
-
437
- HammingComputerM4() {}
438
-
439
- HammingComputerM4(const uint8_t* a4, int code_size) {
440
- set(a4, code_size);
441
- }
442
-
443
- void set(const uint8_t* a4, int code_size) {
444
- assert(code_size % 4 == 0);
445
- a = (uint32_t*)a4;
446
- n = code_size / 4;
447
- }
448
-
449
- int hamming(const uint8_t* b8) const {
450
- const uint32_t* b = (uint32_t*)b8;
451
-
452
- int n8 = (n / 8) * 8;
453
- int accu = 0;
454
-
455
- int i = 0;
456
- for (; i < n8; i += 8) {
457
- accu += ::faiss::hamming<256>(
458
- (const uint64_t*)(a + i), (const uint64_t*)(b + i));
459
- }
460
- for (; i < n; i++) {
461
- accu += popcount64(a[i] ^ b[i]);
462
- }
463
- return accu;
464
- }
465
-
466
- inline int get_code_size() const {
467
- return n * 4;
468
- }
469
- };
470
-
471
- /***************************************************************************
472
- * Equivalence with a template class when code size is known at compile time
473
- **************************************************************************/
474
-
475
- // default template
476
- template <int CODE_SIZE>
477
- struct HammingComputer : HammingComputerDefault {
478
- HammingComputer(const uint8_t* a, int code_size)
479
- : HammingComputerDefault(a, code_size) {}
480
- };
481
-
482
- #define SPECIALIZED_HC(CODE_SIZE) \
483
- template <> \
484
- struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
485
- HammingComputer(const uint8_t* a) \
486
- : HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
487
- }
488
-
489
- SPECIALIZED_HC(4);
490
- SPECIALIZED_HC(8);
491
- SPECIALIZED_HC(16);
492
- SPECIALIZED_HC(20);
493
- SPECIALIZED_HC(32);
494
- SPECIALIZED_HC(64);
495
-
496
- #undef SPECIALIZED_HC
497
-
498
408
  /***************************************************************************
499
409
  * generalized Hamming = number of bytes that are different between
500
410
  * two codes.
@@ -206,7 +206,8 @@ typename C::T partition_fuzzy_median3(
206
206
  assert(n_eq_1 <= n_eq);
207
207
  }
208
208
 
209
- int wp = compress_array<C>(vals, ids, n, thresh, n_eq_1);
209
+ [[maybe_unused]] const int wp =
210
+ compress_array<C>(vals, ids, n, thresh, n_eq_1);
210
211
 
211
212
  assert(wp == q);
212
213
  if (q_out) {
@@ -750,8 +751,6 @@ typename C::T partition_fuzzy(
750
751
  size_t q_min,
751
752
  size_t q_max,
752
753
  size_t* q_out) {
753
- // the code below compiles and runs without AVX2 but it's slower than
754
- // the scalar implementation
755
754
  #ifdef __AVX2__
756
755
  constexpr bool is_uint16 = std::is_same<typename C::T, uint16_t>::value;
757
756
  if (is_uint16 && is_aligned_pointer(vals)) {
@@ -882,7 +881,7 @@ static const simd32uint8 shifts = simd32uint8::create<
882
881
  // 2-bit accumulator: we can add only up to 3 elements
883
882
  // on output we return 2*4-bit results
884
883
  // preproc returns either an index in 0..7 or 0xffff
885
- // that yeilds a 0 when used in the table look-up
884
+ // that yields a 0 when used in the table look-up
886
885
  template <int N, class Preproc>
887
886
  void compute_accu2(
888
887
  const uint16_t*& data,
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ // prefetches
11
+
12
+ #ifdef __AVX__
13
+
14
+ // AVX
15
+
16
+ #include <xmmintrin.h>
17
+
18
+ inline void prefetch_L1(const void* address) {
19
+ _mm_prefetch((const char*)address, _MM_HINT_T0);
20
+ }
21
+ inline void prefetch_L2(const void* address) {
22
+ _mm_prefetch((const char*)address, _MM_HINT_T1);
23
+ }
24
+ inline void prefetch_L3(const void* address) {
25
+ _mm_prefetch((const char*)address, _MM_HINT_T2);
26
+ }
27
+
28
+ #elif defined(__aarch64__)
29
+
30
+ // ARM64
31
+
32
+ #ifdef _MSC_VER
33
+
34
+ // todo: arm on MSVC
35
+ inline void prefetch_L1(const void* address) {}
36
+ inline void prefetch_L2(const void* address) {}
37
+ inline void prefetch_L3(const void* address) {}
38
+
39
+ #else
40
+ // arm on non-MSVC
41
+
42
+ inline void prefetch_L1(const void* address) {
43
+ __builtin_prefetch(address, 0, 3);
44
+ }
45
+ inline void prefetch_L2(const void* address) {
46
+ __builtin_prefetch(address, 0, 2);
47
+ }
48
+ inline void prefetch_L3(const void* address) {
49
+ __builtin_prefetch(address, 0, 1);
50
+ }
51
+ #endif
52
+
53
+ #else
54
+
55
+ // a generic platform
56
+
57
+ #ifdef _MSC_VER
58
+
59
+ inline void prefetch_L1(const void* address) {}
60
+ inline void prefetch_L2(const void* address) {}
61
+ inline void prefetch_L3(const void* address) {}
62
+
63
+ #else
64
+
65
+ inline void prefetch_L1(const void* address) {
66
+ __builtin_prefetch(address, 0, 3);
67
+ }
68
+ inline void prefetch_L2(const void* address) {
69
+ __builtin_prefetch(address, 0, 2);
70
+ }
71
+ inline void prefetch_L3(const void* address) {
72
+ __builtin_prefetch(address, 0, 1);
73
+ }
74
+
75
+ #endif
76
+
77
+ #endif
@@ -24,20 +24,6 @@ namespace quantize_lut {
24
24
 
25
25
  namespace {
26
26
 
27
- float round_uint8_and_mul(float* tab, size_t n) {
28
- float max = 0;
29
- for (int i = 0; i < n; i++) {
30
- if (fabs(tab[i]) > max) {
31
- max = fabs(tab[i]);
32
- }
33
- }
34
- float multiplier = 127 / max;
35
- for (int i = 0; i < n; i++) {
36
- tab[i] = floorf(tab[i] * multiplier + 128);
37
- }
38
- return multiplier;
39
- }
40
-
41
27
  // there can be NaNs in tables, they should be ignored
42
28
  float tab_min(const float* tab, size_t n) {
43
29
  float min = HUGE_VAL;
@@ -54,6 +54,37 @@ double RandomGenerator::rand_double() {
54
54
  return mt() / double(mt.max());
55
55
  }
56
56
 
57
+ SplitMix64RandomGenerator::SplitMix64RandomGenerator(int64_t seed)
58
+ : state{static_cast<uint64_t>(seed)} {}
59
+
60
+ int SplitMix64RandomGenerator::rand_int() {
61
+ return next() & 0x7fffffff;
62
+ }
63
+
64
+ int64_t SplitMix64RandomGenerator::rand_int64() {
65
+ uint64_t value = next();
66
+ return static_cast<int64_t>(value & 0x7fffffffffffffffULL);
67
+ }
68
+
69
+ int SplitMix64RandomGenerator::rand_int(int max) {
70
+ return next() % max;
71
+ }
72
+
73
+ float SplitMix64RandomGenerator::rand_float() {
74
+ return next() / float(std::numeric_limits<uint64_t>::max());
75
+ }
76
+
77
+ double SplitMix64RandomGenerator::rand_double() {
78
+ return next() / double(std::numeric_limits<uint64_t>::max());
79
+ }
80
+
81
+ uint64_t SplitMix64RandomGenerator::next() {
82
+ uint64_t z = (state += 0x9e3779b97f4a7c15ULL);
83
+ z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9ULL;
84
+ z = (z ^ (z >> 27)) * 0x94d049bb133111ebULL;
85
+ return z ^ (z >> 31);
86
+ }
87
+
57
88
  /***********************************************************************
58
89
  * Random functions in this C file only exist because Torch
59
90
  * counterparts are slow and not multi-threaded. Typical use is for
@@ -162,6 +193,18 @@ void rand_perm(int* perm, size_t n, int64_t seed) {
162
193
  }
163
194
  }
164
195
 
196
+ void rand_perm_splitmix64(int* perm, size_t n, int64_t seed) {
197
+ for (size_t i = 0; i < n; i++)
198
+ perm[i] = i;
199
+
200
+ SplitMix64RandomGenerator rng(seed);
201
+
202
+ for (size_t i = 0; i + 1 < n; i++) {
203
+ int i2 = i + rng.rand_int(n - i);
204
+ std::swap(perm[i], perm[i2]);
205
+ }
206
+ }
207
+
165
208
  void byte_rand(uint8_t* x, size_t n, int64_t seed) {
166
209
  // only try to parallelize on large enough arrays
167
210
  const size_t nblock = n < 1024 ? 1 : 1024;
@@ -43,6 +43,30 @@ struct RandomGenerator {
43
43
  explicit RandomGenerator(int64_t seed = 1234);
44
44
  };
45
45
 
46
+ /// fast random generator that cannot be used in multithreaded contexts.
47
+ /// based on https://prng.di.unimi.it/
48
+ struct SplitMix64RandomGenerator {
49
+ uint64_t state;
50
+
51
+ /// random positive integer
52
+ int rand_int();
53
+
54
+ /// random int64_t
55
+ int64_t rand_int64();
56
+
57
+ /// generate random integer between 0 and max-1
58
+ int rand_int(int max);
59
+
60
+ /// between 0 and 1
61
+ float rand_float();
62
+
63
+ double rand_double();
64
+
65
+ explicit SplitMix64RandomGenerator(int64_t seed = 1234);
66
+
67
+ uint64_t next();
68
+ };
69
+
46
70
  /* Generate an array of uniform random floats / multi-threaded implementation */
47
71
  void float_rand(float* x, size_t n, int64_t seed);
48
72
  void float_randn(float* x, size_t n, int64_t seed);
@@ -53,6 +77,7 @@ void int64_rand_max(int64_t* x, size_t n, uint64_t max, int64_t seed);
53
77
 
54
78
  /* random permutation */
55
79
  void rand_perm(int* perm, size_t n, int64_t seed);
80
+ void rand_perm_splitmix64(int* perm, size_t n, int64_t seed);
56
81
 
57
82
  /* Random set of vectors with intrinsic dimensionality 10 that is harder to
58
83
  * index than a subspace of dim 10 but easier than uniform data in dimension d
@@ -14,7 +14,12 @@
14
14
  * functions.
15
15
  */
16
16
 
17
- #ifdef __AVX2__
17
+ #if defined(__AVX512F__)
18
+
19
+ #include <faiss/utils/simdlib_avx2.h>
20
+ #include <faiss/utils/simdlib_avx512.h>
21
+
22
+ #elif defined(__AVX2__)
18
23
 
19
24
  #include <faiss/utils/simdlib_avx2.h>
20
25
 
@@ -22,6 +27,10 @@
22
27
 
23
28
  #include <faiss/utils/simdlib_neon.h>
24
29
 
30
+ #elif defined(__PPC64__)
31
+
32
+ #include <faiss/utils/simdlib_ppc64.h>
33
+
25
34
  #else
26
35
 
27
36
  // emulated = all operations are implemented as scalars
@@ -202,12 +202,6 @@ struct simd16uint16 : simd256bit {
202
202
  return simd16uint16(_mm256_cmpeq_epi16(lhs.i, rhs.i));
203
203
  }
204
204
 
205
- bool is_same(simd16uint16 other) const {
206
- const __m256i pcmp = _mm256_cmpeq_epi16(i, other.i);
207
- unsigned bitmask = _mm256_movemask_epi8(pcmp);
208
- return (bitmask == 0xffffffffU);
209
- }
210
-
211
205
  simd16uint16 operator~() const {
212
206
  return simd16uint16(_mm256_xor_si256(i, _mm256_set1_epi32(-1)));
213
207
  }