faiss 0.2.7 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (172) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +9 -2
  6. data/ext/faiss/index.cpp +1 -1
  7. data/ext/faiss/index_binary.cpp +2 -2
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/lib/faiss.rb +1 -1
  11. data/vendor/faiss/faiss/AutoTune.cpp +7 -7
  12. data/vendor/faiss/faiss/AutoTune.h +0 -1
  13. data/vendor/faiss/faiss/Clustering.cpp +4 -18
  14. data/vendor/faiss/faiss/Clustering.h +31 -21
  15. data/vendor/faiss/faiss/IVFlib.cpp +22 -11
  16. data/vendor/faiss/faiss/Index.cpp +1 -1
  17. data/vendor/faiss/faiss/Index.h +20 -5
  18. data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
  21. data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
  22. data/vendor/faiss/faiss/IndexBinary.h +8 -19
  23. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
  24. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
  25. data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
  26. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +106 -187
  27. data/vendor/faiss/faiss/IndexFastScan.cpp +90 -159
  28. data/vendor/faiss/faiss/IndexFastScan.h +9 -8
  29. data/vendor/faiss/faiss/IndexFlat.cpp +195 -3
  30. data/vendor/faiss/faiss/IndexFlat.h +20 -1
  31. data/vendor/faiss/faiss/IndexFlatCodes.cpp +11 -0
  32. data/vendor/faiss/faiss/IndexFlatCodes.h +3 -1
  33. data/vendor/faiss/faiss/IndexHNSW.cpp +112 -316
  34. data/vendor/faiss/faiss/IndexHNSW.h +12 -48
  35. data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
  36. data/vendor/faiss/faiss/IndexIDMap.h +24 -2
  37. data/vendor/faiss/faiss/IndexIVF.cpp +159 -53
  38. data/vendor/faiss/faiss/IndexIVF.h +37 -5
  39. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +18 -26
  40. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -2
  41. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
  43. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +433 -405
  44. data/vendor/faiss/faiss/IndexIVFFastScan.h +56 -26
  45. data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
  46. data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
  47. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
  48. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
  49. data/vendor/faiss/faiss/IndexIVFPQ.cpp +78 -122
  50. data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
  51. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +18 -50
  52. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
  53. data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
  54. data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
  55. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
  56. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
  57. data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -4
  59. data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
  60. data/vendor/faiss/faiss/IndexNSG.h +10 -10
  61. data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
  62. data/vendor/faiss/faiss/IndexPQ.h +1 -4
  63. data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
  64. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
  65. data/vendor/faiss/faiss/IndexRefine.cpp +49 -19
  66. data/vendor/faiss/faiss/IndexRefine.h +7 -0
  67. data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
  68. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +22 -16
  69. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
  70. data/vendor/faiss/faiss/IndexShards.cpp +21 -29
  71. data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
  72. data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
  73. data/vendor/faiss/faiss/MatrixStats.h +21 -9
  74. data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
  75. data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
  76. data/vendor/faiss/faiss/VectorTransform.h +7 -7
  77. data/vendor/faiss/faiss/clone_index.cpp +15 -10
  78. data/vendor/faiss/faiss/clone_index.h +3 -0
  79. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +87 -4
  80. data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
  81. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +7 -0
  82. data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
  83. data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
  84. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
  85. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +8 -9
  86. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +18 -3
  87. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
  88. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
  89. data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
  90. data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
  91. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +117 -17
  92. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
  93. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +1 -1
  94. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
  95. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
  96. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +267 -40
  97. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
  98. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
  99. data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
  100. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
  101. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
  102. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
  103. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -2
  104. data/vendor/faiss/faiss/impl/DistanceComputer.h +24 -1
  105. data/vendor/faiss/faiss/impl/FaissException.h +13 -34
  106. data/vendor/faiss/faiss/impl/HNSW.cpp +321 -70
  107. data/vendor/faiss/faiss/impl/HNSW.h +9 -8
  108. data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
  109. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +3 -1
  110. data/vendor/faiss/faiss/impl/NNDescent.cpp +29 -19
  111. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  112. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
  113. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  114. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +24 -22
  115. data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
  116. data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
  117. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
  118. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
  119. data/vendor/faiss/faiss/impl/ResultHandler.h +232 -176
  120. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +444 -104
  121. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -8
  122. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +280 -42
  123. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
  124. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
  125. data/vendor/faiss/faiss/impl/index_read.cpp +45 -19
  126. data/vendor/faiss/faiss/impl/index_write.cpp +60 -41
  127. data/vendor/faiss/faiss/impl/io.cpp +10 -10
  128. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  129. data/vendor/faiss/faiss/impl/platform_macros.h +18 -1
  130. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +3 -0
  131. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
  132. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
  133. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +40 -49
  134. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
  135. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
  136. data/vendor/faiss/faiss/impl/simd_result_handlers.h +374 -202
  137. data/vendor/faiss/faiss/index_factory.cpp +10 -7
  138. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
  139. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +27 -9
  140. data/vendor/faiss/faiss/invlists/InvertedLists.h +12 -3
  141. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
  142. data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
  143. data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
  144. data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
  145. data/vendor/faiss/faiss/utils/distances.cpp +128 -74
  146. data/vendor/faiss/faiss/utils/distances.h +81 -4
  147. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
  148. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
  149. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
  150. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
  151. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
  152. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
  153. data/vendor/faiss/faiss/utils/distances_simd.cpp +428 -70
  154. data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
  155. data/vendor/faiss/faiss/utils/fp16.h +2 -0
  156. data/vendor/faiss/faiss/utils/hamming.cpp +162 -110
  157. data/vendor/faiss/faiss/utils/hamming.h +58 -0
  158. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
  159. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
  160. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +15 -87
  161. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +57 -0
  162. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
  163. data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
  164. data/vendor/faiss/faiss/utils/prefetch.h +77 -0
  165. data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
  166. data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
  167. data/vendor/faiss/faiss/utils/simdlib_neon.h +72 -77
  168. data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
  169. data/vendor/faiss/faiss/utils/sorting.h +27 -0
  170. data/vendor/faiss/faiss/utils/utils.cpp +112 -6
  171. data/vendor/faiss/faiss/utils/utils.h +57 -20
  172. metadata +11 -4
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <arm_neon.h>
11
+ #include <cstdint>
12
+
13
+ namespace faiss {
14
+
15
+ inline uint16_t encode_fp16(float x) {
16
+ float32x4_t fx4 = vdupq_n_f32(x);
17
+ float16x4_t f16x4 = vcvt_f16_f32(fx4);
18
+ uint16x4_t ui16x4 = vreinterpret_u16_f16(f16x4);
19
+ return vduph_lane_u16(ui16x4, 3);
20
+ }
21
+
22
+ inline float decode_fp16(uint16_t x) {
23
+ uint16x4_t ui16x4 = vdup_n_u16(x);
24
+ float16x4_t f16x4 = vreinterpret_f16_u16(ui16x4);
25
+ float32x4_t fx4 = vcvt_f32_f16(f16x4);
26
+ return vdups_laneq_f32(fx4, 3);
27
+ }
28
+
29
+ } // namespace faiss
@@ -13,6 +13,8 @@
13
13
 
14
14
  #if defined(__F16C__)
15
15
  #include <faiss/utils/fp16-fp16c.h>
16
+ #elif defined(__aarch64__)
17
+ #include <faiss/utils/fp16-arm.h>
16
18
  #else
17
19
  #include <faiss/utils/fp16-inl.h>
18
20
  #endif
@@ -5,14 +5,13 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
- // -*- c++ -*-
9
-
10
8
  /*
11
9
  * Implementation of Hamming related functions (distances, smallest distance
12
10
  * selection with regular heap|radix and probabilistic heap|radix.
13
11
  *
14
12
  * IMPLEMENTATION NOTES
15
- * Bitvectors are generally assumed to be multiples of 64 bits.
13
+ * Optimal speed is typically obtained for vector sizes of multiples of 64
14
+ * bits.
16
15
  *
17
16
  * hamdis_t is used for distances because at this time
18
17
  * it is not clear how we will need to balance
@@ -20,15 +19,13 @@
20
19
  * - memory usage
21
20
  * - cache-misses when dealing with large volumes of data (lower bits is better)
22
21
  *
23
- * The hamdis_t should optimally be compatibe with one of the Torch Storage
24
- * (Byte,Short,Long) and therefore should be signed for 2-bytes and 4-bytes
25
22
  */
26
23
 
27
24
  #include <faiss/utils/hamming.h>
28
25
 
29
- #include <math.h>
30
- #include <stdio.h>
31
26
  #include <algorithm>
27
+ #include <cmath>
28
+ #include <cstdio>
32
29
  #include <memory>
33
30
  #include <vector>
34
31
 
@@ -38,8 +35,6 @@
38
35
  #include <faiss/utils/approx_topk_hamming/approx_topk_hamming.h>
39
36
  #include <faiss/utils/utils.h>
40
37
 
41
- static const size_t BLOCKSIZE_QUERY = 8192;
42
-
43
38
  namespace faiss {
44
39
 
45
40
  size_t hamming_batch_size = 65536;
@@ -165,9 +160,11 @@ size_t match_hamming_thres(
165
160
  return posm;
166
161
  }
167
162
 
163
+ namespace {
164
+
168
165
  /* Return closest neighbors w.r.t Hamming distance, using a heap. */
169
166
  template <class HammingComputer>
170
- static void hammings_knn_hc(
167
+ void hammings_knn_hc(
171
168
  int bytes_per_code,
172
169
  int_maxheap_array_t* __restrict ha,
173
170
  const uint8_t* __restrict bs1,
@@ -234,7 +231,7 @@ static void hammings_knn_hc(
234
231
 
235
232
  /* Return closest neighbors w.r.t Hamming distance, using max count. */
236
233
  template <class HammingComputer>
237
- static void hammings_knn_mc(
234
+ void hammings_knn_mc(
238
235
  int bytes_per_code,
239
236
  const uint8_t* __restrict a,
240
237
  const uint8_t* __restrict b,
@@ -272,10 +269,10 @@ static void hammings_knn_mc(
272
269
  HCounterState<HammingComputer>& csi = cs[i];
273
270
 
274
271
  int nres = 0;
275
- for (int b = 0; b < nBuckets && nres < k; b++) {
276
- for (int l = 0; l < csi.counters[b] && nres < k; l++) {
277
- labels[i * k + nres] = csi.ids_per_dis[b * k + l];
278
- distances[i * k + nres] = b;
272
+ for (int b_2 = 0; b_2 < nBuckets && nres < k; b_2++) {
273
+ for (int l = 0; l < csi.counters[b_2] && nres < k; l++) {
274
+ labels[i * k + nres] = csi.ids_per_dis[b_2 * k + l];
275
+ distances[i * k + nres] = b_2;
279
276
  nres++;
280
277
  }
281
278
  }
@@ -287,6 +284,63 @@ static void hammings_knn_mc(
287
284
  }
288
285
  }
289
286
 
287
+ template <class HammingComputer>
288
+ void hamming_range_search(
289
+ const uint8_t* a,
290
+ const uint8_t* b,
291
+ size_t na,
292
+ size_t nb,
293
+ int radius,
294
+ size_t code_size,
295
+ RangeSearchResult* res) {
296
+ #pragma omp parallel
297
+ {
298
+ RangeSearchPartialResult pres(res);
299
+
300
+ #pragma omp for
301
+ for (int64_t i = 0; i < na; i++) {
302
+ HammingComputer hc(a + i * code_size, code_size);
303
+ const uint8_t* yi = b;
304
+ RangeQueryResult& qres = pres.new_result(i);
305
+
306
+ for (size_t j = 0; j < nb; j++) {
307
+ int dis = hc.hamming(yi);
308
+ if (dis < radius) {
309
+ qres.add(dis, j);
310
+ }
311
+ yi += code_size;
312
+ }
313
+ }
314
+ pres.finalize();
315
+ }
316
+ }
317
+
318
+ struct Run_hammings_knn_hc {
319
+ using T = void;
320
+ template <class HammingComputer, class... Types>
321
+ void f(Types... args) {
322
+ hammings_knn_hc<HammingComputer>(args...);
323
+ }
324
+ };
325
+
326
+ struct Run_hammings_knn_mc {
327
+ using T = void;
328
+ template <class HammingComputer, class... Types>
329
+ void f(Types... args) {
330
+ hammings_knn_mc<HammingComputer>(args...);
331
+ }
332
+ };
333
+
334
+ struct Run_hamming_range_search {
335
+ using T = void;
336
+ template <class HammingComputer, class... Types>
337
+ void f(Types... args) {
338
+ hamming_range_search<HammingComputer>(args...);
339
+ }
340
+ };
341
+
342
+ } // namespace
343
+
290
344
  /* Functions to maps vectors to bits. Assume proper allocation done beforehand,
291
345
  meaning that b should be be able to receive as many bits as x may produce. */
292
346
 
@@ -437,28 +491,9 @@ void hammings_knn_hc(
437
491
  size_t ncodes,
438
492
  int order,
439
493
  ApproxTopK_mode_t approx_topk_mode) {
440
- switch (ncodes) {
441
- case 4:
442
- hammings_knn_hc<faiss::HammingComputer4>(
443
- 4, ha, a, b, nb, order, true, approx_topk_mode);
444
- break;
445
- case 8:
446
- hammings_knn_hc<faiss::HammingComputer8>(
447
- 8, ha, a, b, nb, order, true, approx_topk_mode);
448
- break;
449
- case 16:
450
- hammings_knn_hc<faiss::HammingComputer16>(
451
- 16, ha, a, b, nb, order, true, approx_topk_mode);
452
- break;
453
- case 32:
454
- hammings_knn_hc<faiss::HammingComputer32>(
455
- 32, ha, a, b, nb, order, true, approx_topk_mode);
456
- break;
457
- default:
458
- hammings_knn_hc<faiss::HammingComputerDefault>(
459
- ncodes, ha, a, b, nb, order, true, approx_topk_mode);
460
- break;
461
- }
494
+ Run_hammings_knn_hc r;
495
+ dispatch_HammingComputer(
496
+ ncodes, r, ncodes, ha, a, b, nb, order, true, approx_topk_mode);
462
497
  }
463
498
 
464
499
  void hammings_knn_mc(
@@ -470,58 +505,9 @@ void hammings_knn_mc(
470
505
  size_t ncodes,
471
506
  int32_t* __restrict distances,
472
507
  int64_t* __restrict labels) {
473
- switch (ncodes) {
474
- case 4:
475
- hammings_knn_mc<faiss::HammingComputer4>(
476
- 4, a, b, na, nb, k, distances, labels);
477
- break;
478
- case 8:
479
- hammings_knn_mc<faiss::HammingComputer8>(
480
- 8, a, b, na, nb, k, distances, labels);
481
- break;
482
- case 16:
483
- hammings_knn_mc<faiss::HammingComputer16>(
484
- 16, a, b, na, nb, k, distances, labels);
485
- break;
486
- case 32:
487
- hammings_knn_mc<faiss::HammingComputer32>(
488
- 32, a, b, na, nb, k, distances, labels);
489
- break;
490
- default:
491
- hammings_knn_mc<faiss::HammingComputerDefault>(
492
- ncodes, a, b, na, nb, k, distances, labels);
493
- break;
494
- }
495
- }
496
- template <class HammingComputer>
497
- static void hamming_range_search_template(
498
- const uint8_t* a,
499
- const uint8_t* b,
500
- size_t na,
501
- size_t nb,
502
- int radius,
503
- size_t code_size,
504
- RangeSearchResult* res) {
505
- #pragma omp parallel
506
- {
507
- RangeSearchPartialResult pres(res);
508
-
509
- #pragma omp for
510
- for (int64_t i = 0; i < na; i++) {
511
- HammingComputer hc(a + i * code_size, code_size);
512
- const uint8_t* yi = b;
513
- RangeQueryResult& qres = pres.new_result(i);
514
-
515
- for (size_t j = 0; j < nb; j++) {
516
- int dis = hc.hamming(yi);
517
- if (dis < radius) {
518
- qres.add(dis, j);
519
- }
520
- yi += code_size;
521
- }
522
- }
523
- pres.finalize();
524
- }
508
+ Run_hammings_knn_mc r;
509
+ dispatch_HammingComputer(
510
+ ncodes, r, ncodes, a, b, na, nb, k, distances, labels);
525
511
  }
526
512
 
527
513
  void hamming_range_search(
@@ -532,27 +518,9 @@ void hamming_range_search(
532
518
  int radius,
533
519
  size_t code_size,
534
520
  RangeSearchResult* result) {
535
- #define HC(name) \
536
- hamming_range_search_template<name>(a, b, na, nb, radius, code_size, result)
537
-
538
- switch (code_size) {
539
- case 4:
540
- HC(HammingComputer4);
541
- break;
542
- case 8:
543
- HC(HammingComputer8);
544
- break;
545
- case 16:
546
- HC(HammingComputer16);
547
- break;
548
- case 32:
549
- HC(HammingComputer32);
550
- break;
551
- default:
552
- HC(HammingComputerDefault);
553
- break;
554
- }
555
- #undef HC
521
+ Run_hamming_range_search r;
522
+ dispatch_HammingComputer(
523
+ code_size, r, a, b, na, nb, radius, code_size, result);
556
524
  }
557
525
 
558
526
  /* Count number of matches given a max threshold */
@@ -711,4 +679,88 @@ void generalized_hammings_knn_hc(
711
679
  ha->reorder();
712
680
  }
713
681
 
682
+ void pack_bitstrings(
683
+ size_t n,
684
+ size_t M,
685
+ int nbit,
686
+ const int32_t* unpacked,
687
+ uint8_t* packed,
688
+ size_t code_size) {
689
+ FAISS_THROW_IF_NOT(code_size >= (M * nbit + 7) / 8);
690
+ #pragma omp parallel for if (n > 1000)
691
+ for (int64_t i = 0; i < n; i++) {
692
+ const int32_t* in = unpacked + i * M;
693
+ uint8_t* out = packed + i * code_size;
694
+ BitstringWriter wr(out, code_size);
695
+ for (int j = 0; j < M; j++) {
696
+ wr.write(in[j], nbit);
697
+ }
698
+ }
699
+ }
700
+
701
+ void pack_bitstrings(
702
+ size_t n,
703
+ size_t M,
704
+ const int32_t* nbit,
705
+ const int32_t* unpacked,
706
+ uint8_t* packed,
707
+ size_t code_size) {
708
+ int totbit = 0;
709
+ for (int j = 0; j < M; j++) {
710
+ totbit += nbit[j];
711
+ }
712
+ FAISS_THROW_IF_NOT(code_size >= (totbit + 7) / 8);
713
+ #pragma omp parallel for if (n > 1000)
714
+ for (int64_t i = 0; i < n; i++) {
715
+ const int32_t* in = unpacked + i * M;
716
+ uint8_t* out = packed + i * code_size;
717
+ BitstringWriter wr(out, code_size);
718
+ for (int j = 0; j < M; j++) {
719
+ wr.write(in[j], nbit[j]);
720
+ }
721
+ }
722
+ }
723
+
724
+ void unpack_bitstrings(
725
+ size_t n,
726
+ size_t M,
727
+ int nbit,
728
+ const uint8_t* packed,
729
+ size_t code_size,
730
+ int32_t* unpacked) {
731
+ FAISS_THROW_IF_NOT(code_size >= (M * nbit + 7) / 8);
732
+ #pragma omp parallel for if (n > 1000)
733
+ for (int64_t i = 0; i < n; i++) {
734
+ const uint8_t* in = packed + i * code_size;
735
+ int32_t* out = unpacked + i * M;
736
+ BitstringReader rd(in, code_size);
737
+ for (int j = 0; j < M; j++) {
738
+ out[j] = rd.read(nbit);
739
+ }
740
+ }
741
+ }
742
+
743
+ void unpack_bitstrings(
744
+ size_t n,
745
+ size_t M,
746
+ const int32_t* nbit,
747
+ const uint8_t* packed,
748
+ size_t code_size,
749
+ int32_t* unpacked) {
750
+ int totbit = 0;
751
+ for (int j = 0; j < M; j++) {
752
+ totbit += nbit[j];
753
+ }
754
+ FAISS_THROW_IF_NOT(code_size >= (totbit + 7) / 8);
755
+ #pragma omp parallel for if (n > 1000)
756
+ for (int64_t i = 0; i < n; i++) {
757
+ const uint8_t* in = packed + i * code_size;
758
+ int32_t* out = unpacked + i * M;
759
+ BitstringReader rd(in, code_size);
760
+ for (int j = 0; j < M; j++) {
761
+ out[j] = rd.read(nbit[j]);
762
+ }
763
+ }
764
+ }
765
+
714
766
  } // namespace faiss
@@ -222,6 +222,64 @@ void generalized_hammings_knn_hc(
222
222
  size_t code_size,
223
223
  int ordered = true);
224
224
 
225
+ /** Pack a set of n codes of size M * nbit
226
+ *
227
+ * @param n number of codes to pack
228
+ * @param M number of elementary codes per code
229
+ * @param nbit number of bits per elementary code
230
+ * @param unpacked input unpacked codes, size (n, M)
231
+ * @param packed output packed codes, size (n, code_size)
232
+ * @param code_size should be >= ceil(M * nbit / 8)
233
+ */
234
+ void pack_bitstrings(
235
+ size_t n,
236
+ size_t M,
237
+ int nbit,
238
+ const int32_t* unpacked,
239
+ uint8_t* packed,
240
+ size_t code_size);
241
+
242
+ /** Pack a set of n codes of variable sizes
243
+ *
244
+ * @param nbit number of bits per entry (size M)
245
+ */
246
+ void pack_bitstrings(
247
+ size_t n,
248
+ size_t M,
249
+ const int32_t* nbits,
250
+ const int32_t* unpacked,
251
+ uint8_t* packed,
252
+ size_t code_size);
253
+
254
+ /** Unpack a set of n codes of size M * nbit
255
+ *
256
+ * @param n number of codes to pack
257
+ * @param M number of elementary codes per code
258
+ * @param nbit number of bits per elementary code
259
+ * @param unpacked input unpacked codes, size (n, M)
260
+ * @param packed output packed codes, size (n, code_size)
261
+ * @param code_size should be >= ceil(M * nbit / 8)
262
+ */
263
+ void unpack_bitstrings(
264
+ size_t n,
265
+ size_t M,
266
+ int nbit,
267
+ const uint8_t* packed,
268
+ size_t code_size,
269
+ int32_t* unpacked);
270
+
271
+ /** Unpack a set of n codes of variable sizes
272
+ *
273
+ * @param nbit number of bits per entry (size M)
274
+ */
275
+ void unpack_bitstrings(
276
+ size_t n,
277
+ size_t M,
278
+ const int32_t* nbits,
279
+ const uint8_t* packed,
280
+ size_t code_size,
281
+ int32_t* unpacked);
282
+
225
283
  } // namespace faiss
226
284
 
227
285
  #include <faiss/utils/hamming-inl.h>
@@ -259,8 +259,8 @@ struct HammingComputerDefault {
259
259
  set(a8, code_size);
260
260
  }
261
261
 
262
- void set(const uint8_t* a8, int code_size) {
263
- this->a8 = a8;
262
+ void set(const uint8_t* a8_2, int code_size) {
263
+ this->a8 = a8_2;
264
264
  quotient8 = code_size / 8;
265
265
  remainder8 = code_size % 8;
266
266
  }
@@ -277,24 +277,31 @@ struct HammingComputerDefault {
277
277
  len -= 8;
278
278
  accu += popcount64(a64[i] ^ b64[i]);
279
279
  i++;
280
+ [[fallthrough]];
280
281
  case 7:
281
282
  accu += popcount64(a64[i] ^ b64[i]);
282
283
  i++;
284
+ [[fallthrough]];
283
285
  case 6:
284
286
  accu += popcount64(a64[i] ^ b64[i]);
285
287
  i++;
288
+ [[fallthrough]];
286
289
  case 5:
287
290
  accu += popcount64(a64[i] ^ b64[i]);
288
291
  i++;
292
+ [[fallthrough]];
289
293
  case 4:
290
294
  accu += popcount64(a64[i] ^ b64[i]);
291
295
  i++;
296
+ [[fallthrough]];
292
297
  case 3:
293
298
  accu += popcount64(a64[i] ^ b64[i]);
294
299
  i++;
300
+ [[fallthrough]];
295
301
  case 2:
296
302
  accu += popcount64(a64[i] ^ b64[i]);
297
303
  i++;
304
+ [[fallthrough]];
298
305
  case 1:
299
306
  accu += popcount64(a64[i] ^ b64[i]);
300
307
  i++;
@@ -306,18 +313,25 @@ struct HammingComputerDefault {
306
313
  switch (remainder8) {
307
314
  case 7:
308
315
  accu += hamdis_tab_ham_bytes[a[6] ^ b[6]];
316
+ [[fallthrough]];
309
317
  case 6:
310
318
  accu += hamdis_tab_ham_bytes[a[5] ^ b[5]];
319
+ [[fallthrough]];
311
320
  case 5:
312
321
  accu += hamdis_tab_ham_bytes[a[4] ^ b[4]];
322
+ [[fallthrough]];
313
323
  case 4:
314
324
  accu += hamdis_tab_ham_bytes[a[3] ^ b[3]];
325
+ [[fallthrough]];
315
326
  case 3:
316
327
  accu += hamdis_tab_ham_bytes[a[2] ^ b[2]];
328
+ [[fallthrough]];
317
329
  case 2:
318
330
  accu += hamdis_tab_ham_bytes[a[1] ^ b[1]];
331
+ [[fallthrough]];
319
332
  case 1:
320
333
  accu += hamdis_tab_ham_bytes[a[0] ^ b[0]];
334
+ [[fallthrough]];
321
335
  default:
322
336
  break;
323
337
  }
@@ -331,93 +345,6 @@ struct HammingComputerDefault {
331
345
  }
332
346
  };
333
347
 
334
- // more inefficient than HammingComputerDefault (obsolete)
335
- struct HammingComputerM8 {
336
- const uint64_t* a;
337
- int n;
338
-
339
- HammingComputerM8() {}
340
-
341
- HammingComputerM8(const uint8_t* a8, int code_size) {
342
- set(a8, code_size);
343
- }
344
-
345
- void set(const uint8_t* a8, int code_size) {
346
- assert(code_size % 8 == 0);
347
- a = (uint64_t*)a8;
348
- n = code_size / 8;
349
- }
350
-
351
- int hamming(const uint8_t* b8) const {
352
- const uint64_t* b = (uint64_t*)b8;
353
- int accu = 0;
354
- for (int i = 0; i < n; i++)
355
- accu += popcount64(a[i] ^ b[i]);
356
- return accu;
357
- }
358
-
359
- inline int get_code_size() const {
360
- return n * 8;
361
- }
362
- };
363
-
364
- // more inefficient than HammingComputerDefault (obsolete)
365
- struct HammingComputerM4 {
366
- const uint32_t* a;
367
- int n;
368
-
369
- HammingComputerM4() {}
370
-
371
- HammingComputerM4(const uint8_t* a4, int code_size) {
372
- set(a4, code_size);
373
- }
374
-
375
- void set(const uint8_t* a4, int code_size) {
376
- assert(code_size % 4 == 0);
377
- a = (uint32_t*)a4;
378
- n = code_size / 4;
379
- }
380
-
381
- int hamming(const uint8_t* b8) const {
382
- const uint32_t* b = (uint32_t*)b8;
383
- int accu = 0;
384
- for (int i = 0; i < n; i++)
385
- accu += popcount64(a[i] ^ b[i]);
386
- return accu;
387
- }
388
-
389
- inline int get_code_size() const {
390
- return n * 4;
391
- }
392
- };
393
-
394
- /***************************************************************************
395
- * Equivalence with a template class when code size is known at compile time
396
- **************************************************************************/
397
-
398
- // default template
399
- template <int CODE_SIZE>
400
- struct HammingComputer : HammingComputerDefault {
401
- HammingComputer(const uint8_t* a, int code_size)
402
- : HammingComputerDefault(a, code_size) {}
403
- };
404
-
405
- #define SPECIALIZED_HC(CODE_SIZE) \
406
- template <> \
407
- struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
408
- HammingComputer(const uint8_t* a) \
409
- : HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
410
- }
411
-
412
- SPECIALIZED_HC(4);
413
- SPECIALIZED_HC(8);
414
- SPECIALIZED_HC(16);
415
- SPECIALIZED_HC(20);
416
- SPECIALIZED_HC(32);
417
- SPECIALIZED_HC(64);
418
-
419
- #undef SPECIALIZED_HC
420
-
421
348
  /***************************************************************************
422
349
  * generalized Hamming = number of bytes that are different between
423
350
  * two codes.
@@ -17,6 +17,7 @@ using hamdis_t = int32_t;
17
17
 
18
18
  namespace faiss {
19
19
 
20
+ // trust the compiler to provide efficient popcount implementations
20
21
  inline int popcount32(uint32_t x) {
21
22
  return __builtin_popcount(x);
22
23
  }