faiss 0.2.6 → 0.2.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (189) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/lib/faiss.rb +2 -2
  6. data/vendor/faiss/faiss/AutoTune.cpp +15 -4
  7. data/vendor/faiss/faiss/AutoTune.h +0 -1
  8. data/vendor/faiss/faiss/Clustering.cpp +1 -5
  9. data/vendor/faiss/faiss/Clustering.h +0 -2
  10. data/vendor/faiss/faiss/IVFlib.h +0 -2
  11. data/vendor/faiss/faiss/Index.h +1 -2
  12. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
  13. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
  14. data/vendor/faiss/faiss/IndexBinary.h +0 -1
  15. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
  16. data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
  17. data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
  18. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
  19. data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
  20. data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
  21. data/vendor/faiss/faiss/IndexFastScan.h +5 -1
  22. data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
  23. data/vendor/faiss/faiss/IndexFlat.h +1 -1
  24. data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
  25. data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
  26. data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
  27. data/vendor/faiss/faiss/IndexHNSW.h +0 -1
  28. data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
  29. data/vendor/faiss/faiss/IndexIDMap.h +0 -2
  30. data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
  31. data/vendor/faiss/faiss/IndexIVF.h +121 -61
  32. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
  33. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
  34. data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
  35. data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
  36. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
  37. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
  38. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
  39. data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
  40. data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
  41. data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
  42. data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
  43. data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
  44. data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
  45. data/vendor/faiss/faiss/IndexReplicas.h +0 -1
  46. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
  47. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
  48. data/vendor/faiss/faiss/IndexShards.cpp +26 -109
  49. data/vendor/faiss/faiss/IndexShards.h +2 -3
  50. data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
  51. data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
  52. data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
  53. data/vendor/faiss/faiss/MetaIndexes.h +29 -0
  54. data/vendor/faiss/faiss/MetricType.h +14 -0
  55. data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
  56. data/vendor/faiss/faiss/VectorTransform.h +1 -3
  57. data/vendor/faiss/faiss/clone_index.cpp +232 -18
  58. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
  59. data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
  60. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
  61. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
  62. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
  63. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
  64. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
  65. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
  66. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
  67. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
  68. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
  69. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
  70. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
  71. data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
  72. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
  73. data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
  74. data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
  75. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
  76. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
  77. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
  78. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
  79. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
  80. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
  81. data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
  82. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
  83. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
  84. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
  85. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
  86. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
  87. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
  88. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
  89. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
  90. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
  91. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
  92. data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
  93. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
  94. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
  95. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
  96. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
  97. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
  98. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
  99. data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
  100. data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
  101. data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
  102. data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
  103. data/vendor/faiss/faiss/impl/HNSW.h +6 -9
  104. data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
  105. data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
  106. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
  107. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
  108. data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
  109. data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
  110. data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
  111. data/vendor/faiss/faiss/impl/NSG.h +4 -7
  112. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
  113. data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
  114. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
  115. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
  116. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
  117. data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
  118. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
  119. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
  120. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
  121. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
  122. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
  123. data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
  124. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
  125. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
  126. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
  127. data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
  128. data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
  129. data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
  130. data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
  131. data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
  132. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  133. data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
  134. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
  135. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
  136. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
  137. data/vendor/faiss/faiss/index_factory.cpp +8 -10
  138. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
  139. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
  140. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
  141. data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
  142. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
  143. data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
  144. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
  145. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  146. data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
  147. data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
  148. data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
  149. data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
  150. data/vendor/faiss/faiss/utils/Heap.h +35 -1
  151. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
  152. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
  153. data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
  154. data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
  155. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
  156. data/vendor/faiss/faiss/utils/distances.cpp +61 -7
  157. data/vendor/faiss/faiss/utils/distances.h +11 -0
  158. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
  159. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
  160. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
  161. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
  162. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
  163. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
  164. data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
  165. data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
  166. data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
  167. data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
  168. data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
  169. data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
  170. data/vendor/faiss/faiss/utils/fp16.h +7 -0
  171. data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
  172. data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
  173. data/vendor/faiss/faiss/utils/hamming.h +21 -10
  174. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
  175. data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
  176. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
  177. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
  178. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
  179. data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
  180. data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
  181. data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
  182. data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
  183. data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
  184. data/vendor/faiss/faiss/utils/sorting.h +71 -0
  185. data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
  186. data/vendor/faiss/faiss/utils/utils.cpp +4 -176
  187. data/vendor/faiss/faiss/utils/utils.h +2 -9
  188. metadata +29 -3
  189. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
@@ -0,0 +1,535 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #ifndef HAMMING_AVX2_INL_H
9
+ #define HAMMING_AVX2_INL_H
10
+
11
+ // AVX2 version
12
+
13
+ #include <cassert>
14
+ #include <cstddef>
15
+ #include <cstdint>
16
+
17
+ #include <faiss/impl/platform_macros.h>
18
+
19
+ #include <immintrin.h>
20
+
21
+ namespace faiss {
22
+
23
+ /* Elementary Hamming distance computation: unoptimized */
24
+ template <size_t nbits, typename T>
25
+ inline T hamming(const uint8_t* bs1, const uint8_t* bs2) {
26
+ const size_t nbytes = nbits / 8;
27
+ size_t i;
28
+ T h = 0;
29
+ for (i = 0; i < nbytes; i++) {
30
+ h += (T)hamdis_tab_ham_bytes[bs1[i] ^ bs2[i]];
31
+ }
32
+ return h;
33
+ }
34
+
35
+ /* Hamming distances for multiples of 64 bits */
36
+ template <size_t nbits>
37
+ inline hamdis_t hamming(const uint64_t* bs1, const uint64_t* bs2) {
38
+ const size_t nwords = nbits / 64;
39
+ size_t i;
40
+ hamdis_t h = 0;
41
+ for (i = 0; i < nwords; i++) {
42
+ h += popcount64(bs1[i] ^ bs2[i]);
43
+ }
44
+ return h;
45
+ }
46
+
47
+ /* specialized (optimized) functions */
48
+ template <>
49
+ inline hamdis_t hamming<64>(const uint64_t* pa, const uint64_t* pb) {
50
+ return popcount64(pa[0] ^ pb[0]);
51
+ }
52
+
53
+ template <>
54
+ inline hamdis_t hamming<128>(const uint64_t* pa, const uint64_t* pb) {
55
+ return popcount64(pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]);
56
+ }
57
+
58
+ template <>
59
+ inline hamdis_t hamming<256>(const uint64_t* pa, const uint64_t* pb) {
60
+ return popcount64(pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]) +
61
+ popcount64(pa[2] ^ pb[2]) + popcount64(pa[3] ^ pb[3]);
62
+ }
63
+
64
+ /* Hamming distances for multiple of 64 bits */
65
+ inline hamdis_t hamming(
66
+ const uint64_t* bs1,
67
+ const uint64_t* bs2,
68
+ size_t nwords) {
69
+ hamdis_t h = 0;
70
+ for (size_t i = 0; i < nwords; i++) {
71
+ h += popcount64(bs1[i] ^ bs2[i]);
72
+ }
73
+ return h;
74
+ }
75
+
76
+ /******************************************************************
77
+ * The HammingComputer series of classes compares a single code of
78
+ * size 4 to 32 to incoming codes. They are intended for use as a
79
+ * template class where it would be inefficient to switch on the code
80
+ * size in the inner loop. Hopefully the compiler will inline the
81
+ * hamming() functions and put the a0, a1, ... in registers.
82
+ ******************************************************************/
83
+
84
+ struct HammingComputer4 {
85
+ uint32_t a0;
86
+
87
+ HammingComputer4() {}
88
+
89
+ HammingComputer4(const uint8_t* a, int code_size) {
90
+ set(a, code_size);
91
+ }
92
+
93
+ void set(const uint8_t* a, int code_size) {
94
+ assert(code_size == 4);
95
+ a0 = *(uint32_t*)a;
96
+ }
97
+
98
+ inline int hamming(const uint8_t* b) const {
99
+ return popcount64(*(uint32_t*)b ^ a0);
100
+ }
101
+
102
+ inline static constexpr int get_code_size() {
103
+ return 4;
104
+ }
105
+ };
106
+
107
+ struct HammingComputer8 {
108
+ uint64_t a0;
109
+
110
+ HammingComputer8() {}
111
+
112
+ HammingComputer8(const uint8_t* a, int code_size) {
113
+ set(a, code_size);
114
+ }
115
+
116
+ void set(const uint8_t* a, int code_size) {
117
+ assert(code_size == 8);
118
+ a0 = *(uint64_t*)a;
119
+ }
120
+
121
+ inline int hamming(const uint8_t* b) const {
122
+ return popcount64(*(uint64_t*)b ^ a0);
123
+ }
124
+
125
+ inline static constexpr int get_code_size() {
126
+ return 8;
127
+ }
128
+ };
129
+
130
+ struct HammingComputer16 {
131
+ uint64_t a0, a1;
132
+
133
+ HammingComputer16() {}
134
+
135
+ HammingComputer16(const uint8_t* a8, int code_size) {
136
+ set(a8, code_size);
137
+ }
138
+
139
+ void set(const uint8_t* a8, int code_size) {
140
+ assert(code_size == 16);
141
+ const uint64_t* a = (uint64_t*)a8;
142
+ a0 = a[0];
143
+ a1 = a[1];
144
+ }
145
+
146
+ inline int hamming(const uint8_t* b8) const {
147
+ const uint64_t* b = (uint64_t*)b8;
148
+ return popcount64(b[0] ^ a0) + popcount64(b[1] ^ a1);
149
+ }
150
+
151
+ inline static constexpr int get_code_size() {
152
+ return 16;
153
+ }
154
+ };
155
+
156
+ // when applied to an array, 1/2 of the 64-bit accesses are unaligned.
157
+ // This incurs a penalty of ~10% wrt. fully aligned accesses.
158
+ struct HammingComputer20 {
159
+ uint64_t a0, a1;
160
+ uint32_t a2;
161
+
162
+ HammingComputer20() {}
163
+
164
+ HammingComputer20(const uint8_t* a8, int code_size) {
165
+ set(a8, code_size);
166
+ }
167
+
168
+ void set(const uint8_t* a8, int code_size) {
169
+ assert(code_size == 20);
170
+ const uint64_t* a = (uint64_t*)a8;
171
+ a0 = a[0];
172
+ a1 = a[1];
173
+ a2 = a[2];
174
+ }
175
+
176
+ inline int hamming(const uint8_t* b8) const {
177
+ const uint64_t* b = (uint64_t*)b8;
178
+ return popcount64(b[0] ^ a0) + popcount64(b[1] ^ a1) +
179
+ popcount64(*(uint32_t*)(b + 2) ^ a2);
180
+ }
181
+
182
+ inline static constexpr int get_code_size() {
183
+ return 20;
184
+ }
185
+ };
186
+
187
+ struct HammingComputer32 {
188
+ uint64_t a0, a1, a2, a3;
189
+
190
+ HammingComputer32() {}
191
+
192
+ HammingComputer32(const uint8_t* a8, int code_size) {
193
+ set(a8, code_size);
194
+ }
195
+
196
+ void set(const uint8_t* a8, int code_size) {
197
+ assert(code_size == 32);
198
+ const uint64_t* a = (uint64_t*)a8;
199
+ a0 = a[0];
200
+ a1 = a[1];
201
+ a2 = a[2];
202
+ a3 = a[3];
203
+ }
204
+
205
+ inline int hamming(const uint8_t* b8) const {
206
+ const uint64_t* b = (uint64_t*)b8;
207
+ return popcount64(b[0] ^ a0) + popcount64(b[1] ^ a1) +
208
+ popcount64(b[2] ^ a2) + popcount64(b[3] ^ a3);
209
+ }
210
+
211
+ inline static constexpr int get_code_size() {
212
+ return 32;
213
+ }
214
+ };
215
+
216
+ struct HammingComputer64 {
217
+ uint64_t a0, a1, a2, a3, a4, a5, a6, a7;
218
+
219
+ HammingComputer64() {}
220
+
221
+ HammingComputer64(const uint8_t* a8, int code_size) {
222
+ set(a8, code_size);
223
+ }
224
+
225
+ void set(const uint8_t* a8, int code_size) {
226
+ assert(code_size == 64);
227
+ const uint64_t* a = (uint64_t*)a8;
228
+ a0 = a[0];
229
+ a1 = a[1];
230
+ a2 = a[2];
231
+ a3 = a[3];
232
+ a4 = a[4];
233
+ a5 = a[5];
234
+ a6 = a[6];
235
+ a7 = a[7];
236
+ }
237
+
238
+ inline int hamming(const uint8_t* b8) const {
239
+ const uint64_t* b = (uint64_t*)b8;
240
+ return popcount64(b[0] ^ a0) + popcount64(b[1] ^ a1) +
241
+ popcount64(b[2] ^ a2) + popcount64(b[3] ^ a3) +
242
+ popcount64(b[4] ^ a4) + popcount64(b[5] ^ a5) +
243
+ popcount64(b[6] ^ a6) + popcount64(b[7] ^ a7);
244
+ }
245
+
246
+ inline static constexpr int get_code_size() {
247
+ return 64;
248
+ }
249
+ };
250
+
251
+ struct HammingComputerDefault {
252
+ const uint8_t* a8;
253
+ int quotient8;
254
+ int remainder8;
255
+
256
+ HammingComputerDefault() {}
257
+
258
+ HammingComputerDefault(const uint8_t* a8, int code_size) {
259
+ set(a8, code_size);
260
+ }
261
+
262
+ void set(const uint8_t* a8, int code_size) {
263
+ this->a8 = a8;
264
+ quotient8 = code_size / 8;
265
+ remainder8 = code_size % 8;
266
+ }
267
+
268
+ int hamming(const uint8_t* b8) const {
269
+ int accu = 0;
270
+
271
+ const uint64_t* a64 = reinterpret_cast<const uint64_t*>(a8);
272
+ const uint64_t* b64 = reinterpret_cast<const uint64_t*>(b8);
273
+ int i = 0, len = quotient8;
274
+ switch (len & 7) {
275
+ default:
276
+ while (len > 7) {
277
+ len -= 8;
278
+ accu += popcount64(a64[i] ^ b64[i]);
279
+ i++;
280
+ case 7:
281
+ accu += popcount64(a64[i] ^ b64[i]);
282
+ i++;
283
+ case 6:
284
+ accu += popcount64(a64[i] ^ b64[i]);
285
+ i++;
286
+ case 5:
287
+ accu += popcount64(a64[i] ^ b64[i]);
288
+ i++;
289
+ case 4:
290
+ accu += popcount64(a64[i] ^ b64[i]);
291
+ i++;
292
+ case 3:
293
+ accu += popcount64(a64[i] ^ b64[i]);
294
+ i++;
295
+ case 2:
296
+ accu += popcount64(a64[i] ^ b64[i]);
297
+ i++;
298
+ case 1:
299
+ accu += popcount64(a64[i] ^ b64[i]);
300
+ i++;
301
+ }
302
+ }
303
+ if (remainder8) {
304
+ const uint8_t* a = a8 + 8 * quotient8;
305
+ const uint8_t* b = b8 + 8 * quotient8;
306
+ switch (remainder8) {
307
+ case 7:
308
+ accu += hamdis_tab_ham_bytes[a[6] ^ b[6]];
309
+ case 6:
310
+ accu += hamdis_tab_ham_bytes[a[5] ^ b[5]];
311
+ case 5:
312
+ accu += hamdis_tab_ham_bytes[a[4] ^ b[4]];
313
+ case 4:
314
+ accu += hamdis_tab_ham_bytes[a[3] ^ b[3]];
315
+ case 3:
316
+ accu += hamdis_tab_ham_bytes[a[2] ^ b[2]];
317
+ case 2:
318
+ accu += hamdis_tab_ham_bytes[a[1] ^ b[1]];
319
+ case 1:
320
+ accu += hamdis_tab_ham_bytes[a[0] ^ b[0]];
321
+ default:
322
+ break;
323
+ }
324
+ }
325
+
326
+ return accu;
327
+ }
328
+
329
+ inline int get_code_size() const {
330
+ return quotient8 * 8 + remainder8;
331
+ }
332
+ };
333
+
334
+ // more inefficient than HammingComputerDefault (obsolete)
335
+ struct HammingComputerM8 {
336
+ const uint64_t* a;
337
+ int n;
338
+
339
+ HammingComputerM8() {}
340
+
341
+ HammingComputerM8(const uint8_t* a8, int code_size) {
342
+ set(a8, code_size);
343
+ }
344
+
345
+ void set(const uint8_t* a8, int code_size) {
346
+ assert(code_size % 8 == 0);
347
+ a = (uint64_t*)a8;
348
+ n = code_size / 8;
349
+ }
350
+
351
+ int hamming(const uint8_t* b8) const {
352
+ const uint64_t* b = (uint64_t*)b8;
353
+ int accu = 0;
354
+ for (int i = 0; i < n; i++)
355
+ accu += popcount64(a[i] ^ b[i]);
356
+ return accu;
357
+ }
358
+
359
+ inline int get_code_size() const {
360
+ return n * 8;
361
+ }
362
+ };
363
+
364
+ // more inefficient than HammingComputerDefault (obsolete)
365
+ struct HammingComputerM4 {
366
+ const uint32_t* a;
367
+ int n;
368
+
369
+ HammingComputerM4() {}
370
+
371
+ HammingComputerM4(const uint8_t* a4, int code_size) {
372
+ set(a4, code_size);
373
+ }
374
+
375
+ void set(const uint8_t* a4, int code_size) {
376
+ assert(code_size % 4 == 0);
377
+ a = (uint32_t*)a4;
378
+ n = code_size / 4;
379
+ }
380
+
381
+ int hamming(const uint8_t* b8) const {
382
+ const uint32_t* b = (uint32_t*)b8;
383
+ int accu = 0;
384
+ for (int i = 0; i < n; i++)
385
+ accu += popcount64(a[i] ^ b[i]);
386
+ return accu;
387
+ }
388
+
389
+ inline int get_code_size() const {
390
+ return n * 4;
391
+ }
392
+ };
393
+
394
+ /***************************************************************************
395
+ * Equivalence with a template class when code size is known at compile time
396
+ **************************************************************************/
397
+
398
+ // default template
399
+ template <int CODE_SIZE>
400
+ struct HammingComputer : HammingComputerDefault {
401
+ HammingComputer(const uint8_t* a, int code_size)
402
+ : HammingComputerDefault(a, code_size) {}
403
+ };
404
+
405
+ #define SPECIALIZED_HC(CODE_SIZE) \
406
+ template <> \
407
+ struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
408
+ HammingComputer(const uint8_t* a) \
409
+ : HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
410
+ }
411
+
412
+ SPECIALIZED_HC(4);
413
+ SPECIALIZED_HC(8);
414
+ SPECIALIZED_HC(16);
415
+ SPECIALIZED_HC(20);
416
+ SPECIALIZED_HC(32);
417
+ SPECIALIZED_HC(64);
418
+
419
+ #undef SPECIALIZED_HC
420
+
421
+ /***************************************************************************
422
+ * generalized Hamming = number of bytes that are different between
423
+ * two codes.
424
+ ***************************************************************************/
425
+
426
+ inline int generalized_hamming_64(uint64_t a) {
427
+ a |= a >> 1;
428
+ a |= a >> 2;
429
+ a |= a >> 4;
430
+ a &= 0x0101010101010101UL;
431
+ return popcount64(a);
432
+ }
433
+
434
+ struct GenHammingComputer8 {
435
+ uint64_t a0;
436
+
437
+ GenHammingComputer8(const uint8_t* a, int code_size) {
438
+ assert(code_size == 8);
439
+ a0 = *(uint64_t*)a;
440
+ }
441
+
442
+ inline int hamming(const uint8_t* b) const {
443
+ return generalized_hamming_64(*(uint64_t*)b ^ a0);
444
+ }
445
+
446
+ inline static constexpr int get_code_size() {
447
+ return 8;
448
+ }
449
+ };
450
+
451
+ // I'm not sure whether this version is faster of slower, tbh
452
+ // todo: test on different CPUs
453
+ struct GenHammingComputer16 {
454
+ __m128i a;
455
+
456
+ GenHammingComputer16(const uint8_t* a8, int code_size) {
457
+ assert(code_size == 16);
458
+ a = _mm_loadu_si128((const __m128i_u*)a8);
459
+ }
460
+
461
+ inline int hamming(const uint8_t* b8) const {
462
+ const __m128i b = _mm_loadu_si128((const __m128i_u*)b8);
463
+ const __m128i cmp = _mm_cmpeq_epi8(a, b);
464
+ const auto movemask = _mm_movemask_epi8(cmp);
465
+ return 16 - popcount32(movemask);
466
+ }
467
+
468
+ inline static constexpr int get_code_size() {
469
+ return 16;
470
+ }
471
+ };
472
+
473
+ struct GenHammingComputer32 {
474
+ __m256i a;
475
+
476
+ GenHammingComputer32(const uint8_t* a8, int code_size) {
477
+ assert(code_size == 32);
478
+ a = _mm256_loadu_si256((const __m256i_u*)a8);
479
+ }
480
+
481
+ inline int hamming(const uint8_t* b8) const {
482
+ const __m256i b = _mm256_loadu_si256((const __m256i_u*)b8);
483
+ const __m256i cmp = _mm256_cmpeq_epi8(a, b);
484
+ const uint32_t movemask = _mm256_movemask_epi8(cmp);
485
+ return 32 - popcount32(movemask);
486
+ }
487
+
488
+ inline static constexpr int get_code_size() {
489
+ return 32;
490
+ }
491
+ };
492
+
493
+ // A specialized version might be needed for the very long
494
+ // GenHamming code_size. In such a case, one may accumulate
495
+ // counts using _mm256_sub_epi8 and then compute a horizontal
496
+ // sum (using _mm256_sad_epu8, maybe, in blocks of no larger
497
+ // than 256 * 32 bytes).
498
+
499
+ struct GenHammingComputerM8 {
500
+ const uint64_t* a;
501
+ int n;
502
+
503
+ GenHammingComputerM8(const uint8_t* a8, int code_size) {
504
+ assert(code_size % 8 == 0);
505
+ a = (uint64_t*)a8;
506
+ n = code_size / 8;
507
+ }
508
+
509
+ int hamming(const uint8_t* b8) const {
510
+ const uint64_t* b = (uint64_t*)b8;
511
+ int accu = 0;
512
+
513
+ int i = 0;
514
+ int n4 = (n / 4) * 4;
515
+ for (; i < n4; i += 4) {
516
+ const __m256i av = _mm256_loadu_si256((const __m256i_u*)(a + i));
517
+ const __m256i bv = _mm256_loadu_si256((const __m256i_u*)(b + i));
518
+ const __m256i cmp = _mm256_cmpeq_epi8(av, bv);
519
+ const uint32_t movemask = _mm256_movemask_epi8(cmp);
520
+ accu += 32 - popcount32(movemask);
521
+ }
522
+
523
+ for (; i < n; i++)
524
+ accu += generalized_hamming_64(a[i] ^ b[i]);
525
+ return accu;
526
+ }
527
+
528
+ inline int get_code_size() const {
529
+ return n * 8;
530
+ }
531
+ };
532
+
533
+ } // namespace faiss
534
+
535
+ #endif
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #ifndef FAISS_hamming_common_h
9
+ #define FAISS_hamming_common_h
10
+
11
+ #include <cstdint>
12
+
13
+ #include <faiss/impl/platform_macros.h>
14
+
15
+ /* The Hamming distance type */
16
+ using hamdis_t = int32_t;
17
+
18
+ namespace faiss {
19
+
20
+ inline int popcount32(uint32_t x) {
21
+ return __builtin_popcount(x);
22
+ }
23
+
24
+ // popcount
25
+ inline int popcount64(uint64_t x) {
26
+ return __builtin_popcountl(x);
27
+ }
28
+
29
+ // This table was moved from .cpp to .h file, because
30
+ // otherwise it was causing compilation errors while trying to
31
+ // compile swig modules on Windows.
32
+ // todo for C++17: switch to 'inline constexpr'
33
+ static constexpr uint8_t hamdis_tab_ham_bytes[256] = {
34
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
35
+ 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
36
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
37
+ 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
38
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
39
+ 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
40
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
41
+ 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
42
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
43
+ 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
44
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
45
+
46
+ } // namespace faiss
47
+
48
+ #endif