faiss 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,792 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ /*
11
+ * Implementation of Hamming related functions (distances, smallest distance
12
+ * selection with regular heap|radix and probabilistic heap|radix.
13
+ *
14
+ * IMPLEMENTATION NOTES
15
+ * Bitvectors are generally assumed to be multiples of 64 bits.
16
+ *
17
+ * hamdis_t is used for distances because at this time
18
+ * it is not clear how we will need to balance
19
+ * - flexibility in vector size (unclear more than 2^16 or even 2^8 bitvectors)
20
+ * - memory usage
21
+ * - cache-misses when dealing with large volumes of data (lower bits is better)
22
+ *
23
+ * The hamdis_t should optimally be compatibe with one of the Torch Storage
24
+ * (Byte,Short,Long) and therefore should be signed for 2-bytes and 4-bytes
25
+ */
26
+
27
+ #include <faiss/utils/hamming.h>
28
+
29
+ #include <vector>
30
+ #include <memory>
31
+ #include <stdlib.h>
32
+ #include <stdio.h>
33
+ #include <math.h>
34
+ #include <assert.h>
35
+ #include <limits.h>
36
+
37
+ #include <faiss/utils/Heap.h>
38
+ #include <faiss/impl/FaissAssert.h>
39
+ #include <faiss/utils/utils.h>
40
+
41
+ static const size_t BLOCKSIZE_QUERY = 8192;
42
+
43
+
44
+ namespace faiss {
45
+
46
+ size_t hamming_batch_size = 65536;
47
+
48
+ static const uint8_t hamdis_tab_ham_bytes[256] = {
49
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
50
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
51
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
52
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
53
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
54
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
55
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
56
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
57
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
58
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
59
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
60
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
61
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
62
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
63
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
64
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
65
+ };
66
+
67
+
68
+ /* Elementary Hamming distance computation: unoptimized */
69
+ template <size_t nbits, typename T>
70
+ T hamming (const uint8_t *bs1,
71
+ const uint8_t *bs2)
72
+ {
73
+ const size_t nbytes = nbits / 8;
74
+ size_t i;
75
+ T h = 0;
76
+ for (i = 0; i < nbytes; i++)
77
+ h += (T) hamdis_tab_ham_bytes[bs1[i]^bs2[i]];
78
+ return h;
79
+ }
80
+
81
+
82
+ /* Hamming distances for multiples of 64 bits */
83
+ template <size_t nbits>
84
+ hamdis_t hamming (const uint64_t * bs1, const uint64_t * bs2)
85
+ {
86
+ const size_t nwords = nbits / 64;
87
+ size_t i;
88
+ hamdis_t h = 0;
89
+ for (i = 0; i < nwords; i++)
90
+ h += popcount64 (bs1[i] ^ bs2[i]);
91
+ return h;
92
+ }
93
+
94
+
95
+
96
+ /* specialized (optimized) functions */
97
+ template <>
98
+ hamdis_t hamming<64> (const uint64_t * pa, const uint64_t * pb)
99
+ {
100
+ return popcount64 (pa[0] ^ pb[0]);
101
+ }
102
+
103
+
104
+ template <>
105
+ hamdis_t hamming<128> (const uint64_t *pa, const uint64_t *pb)
106
+ {
107
+ return popcount64 (pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]);
108
+ }
109
+
110
+
111
+ template <>
112
+ hamdis_t hamming<256> (const uint64_t * pa, const uint64_t * pb)
113
+ {
114
+ return popcount64 (pa[0] ^ pb[0])
115
+ + popcount64 (pa[1] ^ pb[1])
116
+ + popcount64 (pa[2] ^ pb[2])
117
+ + popcount64 (pa[3] ^ pb[3]);
118
+ }
119
+
120
+
121
+ /* Hamming distances for multiple of 64 bits */
122
+ hamdis_t hamming (
123
+ const uint64_t * bs1,
124
+ const uint64_t * bs2,
125
+ size_t nwords)
126
+ {
127
+ size_t i;
128
+ hamdis_t h = 0;
129
+ for (i = 0; i < nwords; i++)
130
+ h += popcount64 (bs1[i] ^ bs2[i]);
131
+ return h;
132
+ }
133
+
134
+
135
+
136
+ template <size_t nbits>
137
+ void hammings (
138
+ const uint64_t * bs1,
139
+ const uint64_t * bs2,
140
+ size_t n1, size_t n2,
141
+ hamdis_t * dis)
142
+
143
+ {
144
+ size_t i, j;
145
+ const size_t nwords = nbits / 64;
146
+ for (i = 0; i < n1; i++) {
147
+ const uint64_t * __restrict bs1_ = bs1 + i * nwords;
148
+ hamdis_t * __restrict dis_ = dis + i * n2;
149
+ for (j = 0; j < n2; j++)
150
+ dis_[j] = hamming<nbits>(bs1_, bs2 + j * nwords);
151
+ }
152
+ }
153
+
154
+
155
+
156
+ void hammings (
157
+ const uint64_t * bs1,
158
+ const uint64_t * bs2,
159
+ size_t n1,
160
+ size_t n2,
161
+ size_t nwords,
162
+ hamdis_t * __restrict dis)
163
+ {
164
+ size_t i, j;
165
+ n1 *= nwords;
166
+ n2 *= nwords;
167
+ for (i = 0; i < n1; i+=nwords) {
168
+ const uint64_t * bs1_ = bs1+i;
169
+ for (j = 0; j < n2; j+=nwords)
170
+ dis[j] = hamming (bs1_, bs2+j, nwords);
171
+ }
172
+ }
173
+
174
+
175
+
176
+
177
+ /* Count number of matches given a max threshold */
178
+ template <size_t nbits>
179
+ void hamming_count_thres (
180
+ const uint64_t * bs1,
181
+ const uint64_t * bs2,
182
+ size_t n1,
183
+ size_t n2,
184
+ hamdis_t ht,
185
+ size_t * nptr)
186
+ {
187
+ const size_t nwords = nbits / 64;
188
+ size_t i, j, posm = 0;
189
+ const uint64_t * bs2_ = bs2;
190
+
191
+ for (i = 0; i < n1; i++) {
192
+ bs2 = bs2_;
193
+ for (j = 0; j < n2; j++) {
194
+ /* collect the match only if this satisfies the threshold */
195
+ if (hamming <nbits> (bs1, bs2) <= ht)
196
+ posm++;
197
+ bs2 += nwords;
198
+ }
199
+ bs1 += nwords; /* next signature */
200
+ }
201
+ *nptr = posm;
202
+ }
203
+
204
+
205
+ template <size_t nbits>
206
+ void crosshamming_count_thres (
207
+ const uint64_t * dbs,
208
+ size_t n,
209
+ int ht,
210
+ size_t * nptr)
211
+ {
212
+ const size_t nwords = nbits / 64;
213
+ size_t i, j, posm = 0;
214
+ const uint64_t * bs1 = dbs;
215
+ for (i = 0; i < n; i++) {
216
+ const uint64_t * bs2 = bs1 + 2;
217
+ for (j = i + 1; j < n; j++) {
218
+ /* collect the match only if this satisfies the threshold */
219
+ if (hamming <nbits> (bs1, bs2) <= ht)
220
+ posm++;
221
+ bs2 += nwords;
222
+ }
223
+ bs1 += nwords;
224
+ }
225
+ *nptr = posm;
226
+ }
227
+
228
+
229
+ template <size_t nbits>
230
+ size_t match_hamming_thres (
231
+ const uint64_t * bs1,
232
+ const uint64_t * bs2,
233
+ size_t n1,
234
+ size_t n2,
235
+ int ht,
236
+ int64_t * idx,
237
+ hamdis_t * hams)
238
+ {
239
+ const size_t nwords = nbits / 64;
240
+ size_t i, j, posm = 0;
241
+ hamdis_t h;
242
+ const uint64_t * bs2_ = bs2;
243
+ for (i = 0; i < n1; i++) {
244
+ bs2 = bs2_;
245
+ for (j = 0; j < n2; j++) {
246
+ /* Here perform the real work of computing the distance */
247
+ h = hamming <nbits> (bs1, bs2);
248
+
249
+ /* collect the match only if this satisfies the threshold */
250
+ if (h <= ht) {
251
+ /* Enough space to store another match ? */
252
+ *idx = i; idx++;
253
+ *idx = j; idx++;
254
+ *hams = h;
255
+ hams++;
256
+ posm++;
257
+ }
258
+ bs2+=nwords; /* next signature */
259
+ }
260
+ bs1+=nwords;
261
+ }
262
+ return posm;
263
+ }
264
+
265
+
266
+ /* Return closest neighbors w.r.t Hamming distance, using a heap. */
267
+ template <class HammingComputer>
268
+ static
269
+ void hammings_knn_hc (
270
+ int bytes_per_code,
271
+ int_maxheap_array_t * ha,
272
+ const uint8_t * bs1,
273
+ const uint8_t * bs2,
274
+ size_t n2,
275
+ bool order = true,
276
+ bool init_heap = true)
277
+ {
278
+ size_t k = ha->k;
279
+ if (init_heap) ha->heapify ();
280
+
281
+ const size_t block_size = hamming_batch_size;
282
+ for (size_t j0 = 0; j0 < n2; j0 += block_size) {
283
+ const size_t j1 = std::min(j0 + block_size, n2);
284
+ #pragma omp parallel for
285
+ for (size_t i = 0; i < ha->nh; i++) {
286
+ HammingComputer hc (bs1 + i * bytes_per_code, bytes_per_code);
287
+
288
+ const uint8_t * bs2_ = bs2 + j0 * bytes_per_code;
289
+ hamdis_t dis;
290
+ hamdis_t * __restrict bh_val_ = ha->val + i * k;
291
+ int64_t * __restrict bh_ids_ = ha->ids + i * k;
292
+ size_t j;
293
+ for (j = j0; j < j1; j++, bs2_+= bytes_per_code) {
294
+ dis = hc.hamming (bs2_);
295
+ if (dis < bh_val_[0]) {
296
+ faiss::maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
297
+ faiss::maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, dis, j);
298
+ }
299
+ }
300
+ }
301
+ }
302
+ if (order) ha->reorder ();
303
+ }
304
+
305
+ /* Return closest neighbors w.r.t Hamming distance, using max count. */
306
+ template <class HammingComputer>
307
+ static
308
+ void hammings_knn_mc (
309
+ int bytes_per_code,
310
+ const uint8_t *a,
311
+ const uint8_t *b,
312
+ size_t na,
313
+ size_t nb,
314
+ size_t k,
315
+ int32_t *distances,
316
+ int64_t *labels)
317
+ {
318
+ const int nBuckets = bytes_per_code * 8 + 1;
319
+ std::vector<int> all_counters(na * nBuckets, 0);
320
+ std::unique_ptr<int64_t[]> all_ids_per_dis(new int64_t[na * nBuckets * k]);
321
+
322
+ std::vector<HCounterState<HammingComputer>> cs;
323
+ for (size_t i = 0; i < na; ++i) {
324
+ cs.push_back(HCounterState<HammingComputer>(
325
+ all_counters.data() + i * nBuckets,
326
+ all_ids_per_dis.get() + i * nBuckets * k,
327
+ a + i * bytes_per_code,
328
+ 8 * bytes_per_code,
329
+ k
330
+ ));
331
+ }
332
+
333
+ const size_t block_size = hamming_batch_size;
334
+ for (size_t j0 = 0; j0 < nb; j0 += block_size) {
335
+ const size_t j1 = std::min(j0 + block_size, nb);
336
+ #pragma omp parallel for
337
+ for (size_t i = 0; i < na; ++i) {
338
+ for (size_t j = j0; j < j1; ++j) {
339
+ cs[i].update_counter(b + j * bytes_per_code, j);
340
+ }
341
+ }
342
+ }
343
+
344
+ for (size_t i = 0; i < na; ++i) {
345
+ HCounterState<HammingComputer>& csi = cs[i];
346
+
347
+ int nres = 0;
348
+ for (int b = 0; b < nBuckets && nres < k; b++) {
349
+ for (int l = 0; l < csi.counters[b] && nres < k; l++) {
350
+ labels[i * k + nres] = csi.ids_per_dis[b * k + l];
351
+ distances[i * k + nres] = b;
352
+ nres++;
353
+ }
354
+ }
355
+ while (nres < k) {
356
+ labels[i * k + nres] = -1;
357
+ distances[i * k + nres] = std::numeric_limits<int32_t>::max();
358
+ ++nres;
359
+ }
360
+ }
361
+ }
362
+
363
+
364
+
365
+ // works faster than the template version
366
+ static
367
+ void hammings_knn_hc_1 (
368
+ int_maxheap_array_t * ha,
369
+ const uint64_t * bs1,
370
+ const uint64_t * bs2,
371
+ size_t n2,
372
+ bool order = true,
373
+ bool init_heap = true)
374
+ {
375
+ const size_t nwords = 1;
376
+ size_t k = ha->k;
377
+
378
+
379
+ if (init_heap) {
380
+ ha->heapify ();
381
+ }
382
+
383
+ #pragma omp parallel for
384
+ for (size_t i = 0; i < ha->nh; i++) {
385
+ const uint64_t bs1_ = bs1 [i];
386
+ const uint64_t * bs2_ = bs2;
387
+ hamdis_t dis;
388
+ hamdis_t * bh_val_ = ha->val + i * k;
389
+ hamdis_t bh_val_0 = bh_val_[0];
390
+ int64_t * bh_ids_ = ha->ids + i * k;
391
+ size_t j;
392
+ for (j = 0; j < n2; j++, bs2_+= nwords) {
393
+ dis = popcount64 (bs1_ ^ *bs2_);
394
+ if (dis < bh_val_0) {
395
+ faiss::maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
396
+ faiss::maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, dis, j);
397
+ bh_val_0 = bh_val_[0];
398
+ }
399
+ }
400
+ }
401
+ if (order) {
402
+ ha->reorder ();
403
+ }
404
+ }
405
+
406
+
407
+
408
+
409
+ /* Functions to maps vectors to bits. Assume proper allocation done beforehand,
410
+ meaning that b should be be able to receive as many bits as x may produce. */
411
+
412
+ /*
413
+ * dimension 0 corresponds to the least significant bit of b[0], or
414
+ * equivalently to the lsb of the first byte that is stored.
415
+ */
416
+ void fvec2bitvec (const float * x, uint8_t * b, size_t d)
417
+ {
418
+ for (int i = 0; i < d; i += 8) {
419
+ uint8_t w = 0;
420
+ uint8_t mask = 1;
421
+ int nj = i + 8 <= d ? 8 : d - i;
422
+ for (int j = 0; j < nj; j++) {
423
+ if (x[i + j] >= 0)
424
+ w |= mask;
425
+ mask <<= 1;
426
+ }
427
+ *b = w;
428
+ b++;
429
+ }
430
+ }
431
+
432
+
433
+
434
+ /* Same but for n vectors.
435
+ Ensure that the ouptut b is byte-aligned (pad with 0s). */
436
+ void fvecs2bitvecs (const float * x, uint8_t * b, size_t d, size_t n)
437
+ {
438
+ const int64_t ncodes = ((d + 7) / 8);
439
+ #pragma omp parallel for if(n > 100000)
440
+ for (size_t i = 0; i < n; i++)
441
+ fvec2bitvec (x + i * d, b + i * ncodes, d);
442
+ }
443
+
444
+
445
+
446
+ void bitvecs2fvecs (
447
+ const uint8_t * b,
448
+ float * x,
449
+ size_t d,
450
+ size_t n) {
451
+
452
+ const int64_t ncodes = ((d + 7) / 8);
453
+ #pragma omp parallel for if(n > 100000)
454
+ for (size_t i = 0; i < n; i++) {
455
+ binary_to_real (d, b + i * ncodes, x + i * d);
456
+ }
457
+ }
458
+
459
+
460
+ /* Reverse bit (NOT a optimized function, only used for print purpose) */
461
+ static uint64_t uint64_reverse_bits (uint64_t b)
462
+ {
463
+ int i;
464
+ uint64_t revb = 0;
465
+ for (i = 0; i < 64; i++) {
466
+ revb <<= 1;
467
+ revb |= b & 1;
468
+ b >>= 1;
469
+ }
470
+ return revb;
471
+ }
472
+
473
+
474
+ /* print the bit vector */
475
+ void bitvec_print (const uint8_t * b, size_t d)
476
+ {
477
+ size_t i, j;
478
+ for (i = 0; i < d; ) {
479
+ uint64_t brev = uint64_reverse_bits (* (uint64_t *) b);
480
+ for (j = 0; j < 64 && i < d; j++, i++) {
481
+ printf ("%d", (int) (brev & 1));
482
+ brev >>= 1;
483
+ }
484
+ b += 8;
485
+ printf (" ");
486
+ }
487
+ }
488
+
489
+
490
+
491
+
492
+
493
+ /*----------------------------------------*/
494
+ /* Hamming distance computation and k-nn */
495
+
496
+
497
+ #define C64(x) ((uint64_t *)x)
498
+
499
+
500
+ /* Compute a set of Hamming distances */
501
+ void hammings (
502
+ const uint8_t * a,
503
+ const uint8_t * b,
504
+ size_t na, size_t nb,
505
+ size_t ncodes,
506
+ hamdis_t * __restrict dis)
507
+ {
508
+ FAISS_THROW_IF_NOT (ncodes % 8 == 0);
509
+ switch (ncodes) {
510
+ case 8:
511
+ faiss::hammings <64> (C64(a), C64(b), na, nb, dis); return;
512
+ case 16:
513
+ faiss::hammings <128> (C64(a), C64(b), na, nb, dis); return;
514
+ case 32:
515
+ faiss::hammings <256> (C64(a), C64(b), na, nb, dis); return;
516
+ case 64:
517
+ faiss::hammings <512> (C64(a), C64(b), na, nb, dis); return;
518
+ default:
519
+ faiss::hammings (C64(a), C64(b), na, nb, ncodes * 8, dis); return;
520
+ }
521
+ }
522
+
523
+ void hammings_knn(
524
+ int_maxheap_array_t *ha,
525
+ const uint8_t *a,
526
+ const uint8_t *b,
527
+ size_t nb,
528
+ size_t ncodes,
529
+ int order)
530
+ {
531
+ hammings_knn_hc(ha, a, b, nb, ncodes, order);
532
+ }
533
+ void hammings_knn_hc (
534
+ int_maxheap_array_t * ha,
535
+ const uint8_t * a,
536
+ const uint8_t * b,
537
+ size_t nb,
538
+ size_t ncodes,
539
+ int order)
540
+ {
541
+ switch (ncodes) {
542
+ case 4:
543
+ hammings_knn_hc<faiss::HammingComputer4>
544
+ (4, ha, a, b, nb, order, true);
545
+ break;
546
+ case 8:
547
+ hammings_knn_hc_1 (ha, C64(a), C64(b), nb, order, true);
548
+ // hammings_knn_hc<faiss::HammingComputer8>
549
+ // (8, ha, a, b, nb, order, true);
550
+ break;
551
+ case 16:
552
+ hammings_knn_hc<faiss::HammingComputer16>
553
+ (16, ha, a, b, nb, order, true);
554
+ break;
555
+ case 32:
556
+ hammings_knn_hc<faiss::HammingComputer32>
557
+ (32, ha, a, b, nb, order, true);
558
+ break;
559
+ default:
560
+ if(ncodes % 8 == 0) {
561
+ hammings_knn_hc<faiss::HammingComputerM8>
562
+ (ncodes, ha, a, b, nb, order, true);
563
+ } else {
564
+ hammings_knn_hc<faiss::HammingComputerDefault>
565
+ (ncodes, ha, a, b, nb, order, true);
566
+
567
+ }
568
+ }
569
+ }
570
+
571
+ void hammings_knn_mc(
572
+ const uint8_t * a,
573
+ const uint8_t * b,
574
+ size_t na,
575
+ size_t nb,
576
+ size_t k,
577
+ size_t ncodes,
578
+ int32_t *distances,
579
+ int64_t *labels)
580
+ {
581
+ switch (ncodes) {
582
+ case 4:
583
+ hammings_knn_mc<faiss::HammingComputer4>(
584
+ 4, a, b, na, nb, k, distances, labels
585
+ );
586
+ break;
587
+ case 8:
588
+ // TODO(hoss): Write analog to hammings_knn_hc_1
589
+ // hammings_knn_hc_1 (ha, C64(a), C64(b), nb, order, true);
590
+ hammings_knn_mc<faiss::HammingComputer8>(
591
+ 8, a, b, na, nb, k, distances, labels
592
+ );
593
+ break;
594
+ case 16:
595
+ hammings_knn_mc<faiss::HammingComputer16>(
596
+ 16, a, b, na, nb, k, distances, labels
597
+ );
598
+ break;
599
+ case 32:
600
+ hammings_knn_mc<faiss::HammingComputer32>(
601
+ 32, a, b, na, nb, k, distances, labels
602
+ );
603
+ break;
604
+ default:
605
+ if(ncodes % 8 == 0) {
606
+ hammings_knn_mc<faiss::HammingComputerM8>(
607
+ ncodes, a, b, na, nb, k, distances, labels
608
+ );
609
+ } else {
610
+ hammings_knn_mc<faiss::HammingComputerDefault>(
611
+ ncodes, a, b, na, nb, k, distances, labels
612
+ );
613
+ }
614
+ }
615
+ }
616
+
617
+
618
+
619
+
620
+ /* Count number of matches given a max threshold */
621
+ void hamming_count_thres (
622
+ const uint8_t * bs1,
623
+ const uint8_t * bs2,
624
+ size_t n1,
625
+ size_t n2,
626
+ hamdis_t ht,
627
+ size_t ncodes,
628
+ size_t * nptr)
629
+ {
630
+ switch (ncodes) {
631
+ case 8:
632
+ faiss::hamming_count_thres <64> (C64(bs1), C64(bs2),
633
+ n1, n2, ht, nptr);
634
+ return;
635
+ case 16:
636
+ faiss::hamming_count_thres <128> (C64(bs1), C64(bs2),
637
+ n1, n2, ht, nptr);
638
+ return;
639
+ case 32:
640
+ faiss::hamming_count_thres <256> (C64(bs1), C64(bs2),
641
+ n1, n2, ht, nptr);
642
+ return;
643
+ case 64:
644
+ faiss::hamming_count_thres <512> (C64(bs1), C64(bs2),
645
+ n1, n2, ht, nptr);
646
+ return;
647
+ default:
648
+ FAISS_THROW_FMT ("not implemented for %zu bits", ncodes);
649
+ }
650
+ }
651
+
652
+
653
+ /* Count number of cross-matches given a threshold */
654
+ void crosshamming_count_thres (
655
+ const uint8_t * dbs,
656
+ size_t n,
657
+ hamdis_t ht,
658
+ size_t ncodes,
659
+ size_t * nptr)
660
+ {
661
+ switch (ncodes) {
662
+ case 8:
663
+ faiss::crosshamming_count_thres <64> (C64(dbs), n, ht, nptr);
664
+ return;
665
+ case 16:
666
+ faiss::crosshamming_count_thres <128> (C64(dbs), n, ht, nptr);
667
+ return;
668
+ case 32:
669
+ faiss::crosshamming_count_thres <256> (C64(dbs), n, ht, nptr);
670
+ return;
671
+ case 64:
672
+ faiss::crosshamming_count_thres <512> (C64(dbs), n, ht, nptr);
673
+ return;
674
+ default:
675
+ FAISS_THROW_FMT ("not implemented for %zu bits", ncodes);
676
+ }
677
+ }
678
+
679
+
680
+ /* Returns all matches given a threshold */
681
+ size_t match_hamming_thres (
682
+ const uint8_t * bs1,
683
+ const uint8_t * bs2,
684
+ size_t n1,
685
+ size_t n2,
686
+ hamdis_t ht,
687
+ size_t ncodes,
688
+ int64_t * idx,
689
+ hamdis_t * dis)
690
+ {
691
+ switch (ncodes) {
692
+ case 8:
693
+ return faiss::match_hamming_thres <64> (C64(bs1), C64(bs2),
694
+ n1, n2, ht, idx, dis);
695
+ case 16:
696
+ return faiss::match_hamming_thres <128> (C64(bs1), C64(bs2),
697
+ n1, n2, ht, idx, dis);
698
+ case 32:
699
+ return faiss::match_hamming_thres <256> (C64(bs1), C64(bs2),
700
+ n1, n2, ht, idx, dis);
701
+ case 64:
702
+ return faiss::match_hamming_thres <512> (C64(bs1), C64(bs2),
703
+ n1, n2, ht, idx, dis);
704
+ default:
705
+ FAISS_THROW_FMT ("not implemented for %zu bits", ncodes);
706
+ return 0;
707
+ }
708
+ }
709
+
710
+
711
+ #undef C64
712
+
713
+
714
+
715
+ /*************************************
716
+ * generalized Hamming distances
717
+ ************************************/
718
+
719
+
720
+
721
+ template <class HammingComputer>
722
+ static void hamming_dis_inner_loop (
723
+ const uint8_t *ca,
724
+ const uint8_t *cb,
725
+ size_t nb,
726
+ size_t code_size,
727
+ int k,
728
+ hamdis_t * bh_val_,
729
+ int64_t * bh_ids_)
730
+ {
731
+
732
+ HammingComputer hc (ca, code_size);
733
+
734
+ for (size_t j = 0; j < nb; j++) {
735
+ int ndiff = hc.hamming (cb);
736
+ cb += code_size;
737
+ if (ndiff < bh_val_[0]) {
738
+ maxheap_pop<hamdis_t> (k, bh_val_, bh_ids_);
739
+ maxheap_push<hamdis_t> (k, bh_val_, bh_ids_, ndiff, j);
740
+ }
741
+ }
742
+ }
743
+
744
+ void generalized_hammings_knn_hc (
745
+ int_maxheap_array_t * ha,
746
+ const uint8_t * a,
747
+ const uint8_t * b,
748
+ size_t nb,
749
+ size_t code_size,
750
+ int ordered)
751
+ {
752
+ int na = ha->nh;
753
+ int k = ha->k;
754
+
755
+ if (ordered)
756
+ ha->heapify ();
757
+
758
+ #pragma omp parallel for
759
+ for (int i = 0; i < na; i++) {
760
+ const uint8_t *ca = a + i * code_size;
761
+ const uint8_t *cb = b;
762
+
763
+ hamdis_t * bh_val_ = ha->val + i * k;
764
+ int64_t * bh_ids_ = ha->ids + i * k;
765
+
766
+ switch (code_size) {
767
+ case 8:
768
+ hamming_dis_inner_loop<GenHammingComputer8>
769
+ (ca, cb, nb, 8, k, bh_val_, bh_ids_);
770
+ break;
771
+ case 16:
772
+ hamming_dis_inner_loop<GenHammingComputer16>
773
+ (ca, cb, nb, 16, k, bh_val_, bh_ids_);
774
+ break;
775
+ case 32:
776
+ hamming_dis_inner_loop<GenHammingComputer32>
777
+ (ca, cb, nb, 32, k, bh_val_, bh_ids_);
778
+ break;
779
+ default:
780
+ hamming_dis_inner_loop<GenHammingComputerM8>
781
+ (ca, cb, nb, code_size, k, bh_val_, bh_ids_);
782
+ break;
783
+ }
784
+ }
785
+
786
+ if (ordered)
787
+ ha->reorder ();
788
+
789
+ }
790
+
791
+
792
+ } // namespace faiss