faiss 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,220 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ /*
11
+ * Hamming distances. The binary vector dimensionality should be a
12
+ * multiple of 8, as the elementary operations operate on bytes. If
13
+ * you need other sizes, just pad with 0s (this is done by function
14
+ * fvecs2bitvecs).
15
+ *
16
+ * User-defined type hamdis_t is used for distances because at this time
17
+ * it is still uncler clear how we will need to balance
18
+ * - flexibility in vector size (may need 16- or even 8-bit vectors)
19
+ * - memory usage
20
+ * - cache-misses when dealing with large volumes of data (fewer bits is better)
21
+ *
22
+ */
23
+
24
+ #ifndef FAISS_hamming_h
25
+ #define FAISS_hamming_h
26
+
27
+
28
+ #include <stdint.h>
29
+
30
+ #include <faiss/utils/Heap.h>
31
+
32
+
33
+ /* The Hamming distance type */
34
+ typedef int32_t hamdis_t;
35
+
36
+ namespace faiss {
37
+
38
+ /**************************************************
39
+ * General bit vector functions
40
+ **************************************************/
41
+
42
+
43
+ void bitvec_print (const uint8_t * b, size_t d);
44
+
45
+
46
+ /* Functions for casting vectors of regular types to compact bits.
47
+ They assume proper allocation done beforehand, meaning that b
48
+ should be be able to receive as many bits as x may produce. */
49
+
50
+ /* Makes an array of bits from the signs of a float array. The length
51
+ of the output array b is rounded up to byte size (allocate
52
+ accordingly) */
53
+ void fvecs2bitvecs (
54
+ const float * x,
55
+ uint8_t * b,
56
+ size_t d,
57
+ size_t n);
58
+
59
+ void bitvecs2fvecs (
60
+ const uint8_t * b,
61
+ float * x,
62
+ size_t d,
63
+ size_t n);
64
+
65
+
66
+ void fvec2bitvec (const float * x, uint8_t * b, size_t d);
67
+
68
+ /***********************************************
69
+ * Generic reader/writer for bit strings
70
+ ***********************************************/
71
+
72
+
73
+ struct BitstringWriter {
74
+ uint8_t *code;
75
+ size_t code_size;
76
+ size_t i; // current bit offset
77
+
78
+ // code_size in bytes
79
+ BitstringWriter(uint8_t *code, int code_size);
80
+
81
+ // write the nbit low bits of x
82
+ void write(uint64_t x, int nbit);
83
+ };
84
+
85
+ struct BitstringReader {
86
+ const uint8_t *code;
87
+ size_t code_size;
88
+ size_t i;
89
+
90
+ // code_size in bytes
91
+ BitstringReader(const uint8_t *code, int code_size);
92
+
93
+ // read nbit bits from the code
94
+ uint64_t read(int nbit);
95
+ };
96
+
97
+ /**************************************************
98
+ * Hamming distance computation functions
99
+ **************************************************/
100
+
101
+
102
+
103
+ extern size_t hamming_batch_size;
104
+
105
+ inline int popcount64(uint64_t x) {
106
+ return __builtin_popcountl(x);
107
+ }
108
+
109
+
110
+ /** Compute a set of Hamming distances between na and nb binary vectors
111
+ *
112
+ * @param a size na * nbytespercode
113
+ * @param b size nb * nbytespercode
114
+ * @param nbytespercode should be multiple of 8
115
+ * @param dis output distances, size na * nb
116
+ */
117
+ void hammings (
118
+ const uint8_t * a,
119
+ const uint8_t * b,
120
+ size_t na, size_t nb,
121
+ size_t nbytespercode,
122
+ hamdis_t * dis);
123
+
124
+
125
+
126
+
127
+ /** Return the k smallest Hamming distances for a set of binary query vectors,
128
+ * using a max heap.
129
+ * @param a queries, size ha->nh * ncodes
130
+ * @param b database, size nb * ncodes
131
+ * @param nb number of database vectors
132
+ * @param ncodes size of the binary codes (bytes)
133
+ * @param ordered if != 0: order the results by decreasing distance
134
+ * (may be bottleneck for k/n > 0.01) */
135
+ void hammings_knn_hc (
136
+ int_maxheap_array_t * ha,
137
+ const uint8_t * a,
138
+ const uint8_t * b,
139
+ size_t nb,
140
+ size_t ncodes,
141
+ int ordered);
142
+
143
+ /* Legacy alias to hammings_knn_hc. */
144
+ void hammings_knn (
145
+ int_maxheap_array_t * ha,
146
+ const uint8_t * a,
147
+ const uint8_t * b,
148
+ size_t nb,
149
+ size_t ncodes,
150
+ int ordered);
151
+
152
+ /** Return the k smallest Hamming distances for a set of binary query vectors,
153
+ * using counting max.
154
+ * @param a queries, size na * ncodes
155
+ * @param b database, size nb * ncodes
156
+ * @param na number of query vectors
157
+ * @param nb number of database vectors
158
+ * @param k number of vectors/distances to return
159
+ * @param ncodes size of the binary codes (bytes)
160
+ * @param distances output distances from each query vector to its k nearest
161
+ * neighbors
162
+ * @param labels output ids of the k nearest neighbors to each query vector
163
+ */
164
+ void hammings_knn_mc (
165
+ const uint8_t * a,
166
+ const uint8_t * b,
167
+ size_t na,
168
+ size_t nb,
169
+ size_t k,
170
+ size_t ncodes,
171
+ int32_t *distances,
172
+ int64_t *labels);
173
+
174
+ /* Counting the number of matches or of cross-matches (without returning them)
175
+ For use with function that assume pre-allocated memory */
176
+ void hamming_count_thres (
177
+ const uint8_t * bs1,
178
+ const uint8_t * bs2,
179
+ size_t n1,
180
+ size_t n2,
181
+ hamdis_t ht,
182
+ size_t ncodes,
183
+ size_t * nptr);
184
+
185
+ /* Return all Hamming distances/index passing a thres. Pre-allocation of output
186
+ is required. Use hamming_count_thres to determine the proper size. */
187
+ size_t match_hamming_thres (
188
+ const uint8_t * bs1,
189
+ const uint8_t * bs2,
190
+ size_t n1,
191
+ size_t n2,
192
+ hamdis_t ht,
193
+ size_t ncodes,
194
+ int64_t * idx,
195
+ hamdis_t * dis);
196
+
197
+ /* Cross-matching in a set of vectors */
198
+ void crosshamming_count_thres (
199
+ const uint8_t * dbs,
200
+ size_t n,
201
+ hamdis_t ht,
202
+ size_t ncodes,
203
+ size_t * nptr);
204
+
205
+
206
+ /* compute the Hamming distances between two codewords of nwords*64 bits */
207
+ hamdis_t hamming (
208
+ const uint64_t * bs1,
209
+ const uint64_t * bs2,
210
+ size_t nwords);
211
+
212
+
213
+
214
+ } // namespace faiss
215
+
216
+ // inlined definitions of HammingComputerXX and GenHammingComputerXX
217
+
218
+ #include <faiss/utils/hamming-inl.h>
219
+
220
+ #endif /* FAISS_hamming_h */
@@ -0,0 +1,192 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include <faiss/utils/random.h>
11
+
12
+ namespace faiss {
13
+
14
+ /**************************************************
15
+ * Random data generation functions
16
+ **************************************************/
17
+
18
+ RandomGenerator::RandomGenerator (int64_t seed)
19
+ : mt((unsigned int)seed) {}
20
+
21
+ int RandomGenerator::rand_int ()
22
+ {
23
+ return mt() & 0x7fffffff;
24
+ }
25
+
26
+ int64_t RandomGenerator::rand_int64 ()
27
+ {
28
+ return int64_t(rand_int()) | int64_t(rand_int()) << 31;
29
+ }
30
+
31
+ int RandomGenerator::rand_int (int max)
32
+ {
33
+ return mt() % max;
34
+ }
35
+
36
+ float RandomGenerator::rand_float ()
37
+ {
38
+ return mt() / float(mt.max());
39
+ }
40
+
41
+ double RandomGenerator::rand_double ()
42
+ {
43
+ return mt() / double(mt.max());
44
+ }
45
+
46
+
47
+ /***********************************************************************
48
+ * Random functions in this C file only exist because Torch
49
+ * counterparts are slow and not multi-threaded. Typical use is for
50
+ * more than 1-100 billion values. */
51
+
52
+
53
+ /* Generate a set of random floating point values such that x[i] in [0,1]
54
+ multi-threading. For this reason, we rely on re-entreant functions. */
55
+ void float_rand (float * x, size_t n, int64_t seed)
56
+ {
57
+ // only try to parallelize on large enough arrays
58
+ const size_t nblock = n < 1024 ? 1 : 1024;
59
+
60
+ RandomGenerator rng0 (seed);
61
+ int a0 = rng0.rand_int (), b0 = rng0.rand_int ();
62
+
63
+ #pragma omp parallel for
64
+ for (size_t j = 0; j < nblock; j++) {
65
+
66
+ RandomGenerator rng (a0 + j * b0);
67
+
68
+ const size_t istart = j * n / nblock;
69
+ const size_t iend = (j + 1) * n / nblock;
70
+
71
+ for (size_t i = istart; i < iend; i++)
72
+ x[i] = rng.rand_float ();
73
+ }
74
+ }
75
+
76
+
77
+ void float_randn (float * x, size_t n, int64_t seed)
78
+ {
79
+ // only try to parallelize on large enough arrays
80
+ const size_t nblock = n < 1024 ? 1 : 1024;
81
+
82
+ RandomGenerator rng0 (seed);
83
+ int a0 = rng0.rand_int (), b0 = rng0.rand_int ();
84
+
85
+ #pragma omp parallel for
86
+ for (size_t j = 0; j < nblock; j++) {
87
+ RandomGenerator rng (a0 + j * b0);
88
+
89
+ double a = 0, b = 0, s = 0;
90
+ int state = 0; /* generate two number per "do-while" loop */
91
+
92
+ const size_t istart = j * n / nblock;
93
+ const size_t iend = (j + 1) * n / nblock;
94
+
95
+ for (size_t i = istart; i < iend; i++) {
96
+ /* Marsaglia's method (see Knuth) */
97
+ if (state == 0) {
98
+ do {
99
+ a = 2.0 * rng.rand_double () - 1;
100
+ b = 2.0 * rng.rand_double () - 1;
101
+ s = a * a + b * b;
102
+ } while (s >= 1.0);
103
+ x[i] = a * sqrt(-2.0 * log(s) / s);
104
+ }
105
+ else
106
+ x[i] = b * sqrt(-2.0 * log(s) / s);
107
+ state = 1 - state;
108
+ }
109
+ }
110
+ }
111
+
112
+
113
+ /* Integer versions */
114
+ void int64_rand (int64_t * x, size_t n, int64_t seed)
115
+ {
116
+ // only try to parallelize on large enough arrays
117
+ const size_t nblock = n < 1024 ? 1 : 1024;
118
+
119
+ RandomGenerator rng0 (seed);
120
+ int a0 = rng0.rand_int (), b0 = rng0.rand_int ();
121
+
122
+ #pragma omp parallel for
123
+ for (size_t j = 0; j < nblock; j++) {
124
+
125
+ RandomGenerator rng (a0 + j * b0);
126
+
127
+ const size_t istart = j * n / nblock;
128
+ const size_t iend = (j + 1) * n / nblock;
129
+ for (size_t i = istart; i < iend; i++)
130
+ x[i] = rng.rand_int64 ();
131
+ }
132
+ }
133
+
134
+ void int64_rand_max (int64_t * x, size_t n, uint64_t max, int64_t seed)
135
+ {
136
+ // only try to parallelize on large enough arrays
137
+ const size_t nblock = n < 1024 ? 1 : 1024;
138
+
139
+ RandomGenerator rng0 (seed);
140
+ int a0 = rng0.rand_int (), b0 = rng0.rand_int ();
141
+
142
+ #pragma omp parallel for
143
+ for (size_t j = 0; j < nblock; j++) {
144
+
145
+ RandomGenerator rng (a0 + j * b0);
146
+
147
+ const size_t istart = j * n / nblock;
148
+ const size_t iend = (j + 1) * n / nblock;
149
+ for (size_t i = istart; i < iend; i++)
150
+ x[i] = rng.rand_int64 () % max;
151
+ }
152
+ }
153
+
154
+
155
+ void rand_perm (int *perm, size_t n, int64_t seed)
156
+ {
157
+ for (size_t i = 0; i < n; i++) perm[i] = i;
158
+
159
+ RandomGenerator rng (seed);
160
+
161
+ for (size_t i = 0; i + 1 < n; i++) {
162
+ int i2 = i + rng.rand_int (n - i);
163
+ std::swap(perm[i], perm[i2]);
164
+ }
165
+ }
166
+
167
+
168
+
169
+
170
+ void byte_rand (uint8_t * x, size_t n, int64_t seed)
171
+ {
172
+ // only try to parallelize on large enough arrays
173
+ const size_t nblock = n < 1024 ? 1 : 1024;
174
+
175
+ RandomGenerator rng0 (seed);
176
+ int a0 = rng0.rand_int (), b0 = rng0.rand_int ();
177
+
178
+ #pragma omp parallel for
179
+ for (size_t j = 0; j < nblock; j++) {
180
+
181
+ RandomGenerator rng (a0 + j * b0);
182
+
183
+ const size_t istart = j * n / nblock;
184
+ const size_t iend = (j + 1) * n / nblock;
185
+
186
+ size_t i;
187
+ for (i = istart; i < iend; i++)
188
+ x[i] = rng.rand_int64 ();
189
+ }
190
+ }
191
+
192
+ } // namespace faiss
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ /* Random generators. Implemented here for speed and to make
11
+ * sequences reproducible.
12
+ */
13
+
14
+ #pragma once
15
+
16
+ #include <random>
17
+ #include <stdint.h>
18
+
19
+
20
+ namespace faiss {
21
+
22
+ /**************************************************
23
+ * Random data generation functions
24
+ **************************************************/
25
+
26
+ /// random generator that can be used in multithreaded contexts
27
+ struct RandomGenerator {
28
+
29
+ std::mt19937 mt;
30
+
31
+ /// random positive integer
32
+ int rand_int ();
33
+
34
+ /// random int64_t
35
+ int64_t rand_int64 ();
36
+
37
+ /// generate random integer between 0 and max-1
38
+ int rand_int (int max);
39
+
40
+ /// between 0 and 1
41
+ float rand_float ();
42
+
43
+ double rand_double ();
44
+
45
+ explicit RandomGenerator (int64_t seed = 1234);
46
+ };
47
+
48
+ /* Generate an array of uniform random floats / multi-threaded implementation */
49
+ void float_rand (float * x, size_t n, int64_t seed);
50
+ void float_randn (float * x, size_t n, int64_t seed);
51
+ void int64_rand (int64_t * x, size_t n, int64_t seed);
52
+ void byte_rand (uint8_t * x, size_t n, int64_t seed);
53
+ // max is actually the maximum value + 1
54
+ void int64_rand_max (int64_t * x, size_t n, uint64_t max, int64_t seed);
55
+
56
+ /* random permutation */
57
+ void rand_perm (int * perm, size_t n, int64_t seed);
58
+
59
+
60
+ } // namespace faiss