faiss 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,783 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include <faiss/utils/utils.h>
11
+
12
+ #include <cstdio>
13
+ #include <cassert>
14
+ #include <cstring>
15
+ #include <cmath>
16
+
17
+ #include <sys/time.h>
18
+ #include <sys/types.h>
19
+ #include <unistd.h>
20
+
21
+ #include <omp.h>
22
+
23
+ #include <algorithm>
24
+ #include <vector>
25
+
26
+ #include <faiss/impl/AuxIndexStructures.h>
27
+ #include <faiss/impl/FaissAssert.h>
28
+ #include <faiss/utils/random.h>
29
+
30
+
31
+
32
+ #ifndef FINTEGER
33
+ #define FINTEGER long
34
+ #endif
35
+
36
+
37
+ extern "C" {
38
+
39
+ /* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
40
+
41
+ int sgemm_ (const char *transa, const char *transb, FINTEGER *m, FINTEGER *
42
+ n, FINTEGER *k, const float *alpha, const float *a,
43
+ FINTEGER *lda, const float *b, FINTEGER *
44
+ ldb, float *beta, float *c, FINTEGER *ldc);
45
+
46
+ /* Lapack functions, see http://www.netlib.org/clapack/old/single/sgeqrf.c */
47
+
48
+ int sgeqrf_ (FINTEGER *m, FINTEGER *n, float *a, FINTEGER *lda,
49
+ float *tau, float *work, FINTEGER *lwork, FINTEGER *info);
50
+
51
+ int sorgqr_(FINTEGER *m, FINTEGER *n, FINTEGER *k, float *a,
52
+ FINTEGER *lda, float *tau, float *work,
53
+ FINTEGER *lwork, FINTEGER *info);
54
+
55
+ int sgemv_(const char *trans, FINTEGER *m, FINTEGER *n, float *alpha,
56
+ const float *a, FINTEGER *lda, const float *x, FINTEGER *incx,
57
+ float *beta, float *y, FINTEGER *incy);
58
+
59
+ }
60
+
61
+
62
+ /**************************************************
63
+ * Get some stats about the system
64
+ **************************************************/
65
+
66
+ namespace faiss {
67
+
68
+ double getmillisecs () {
69
+ struct timeval tv;
70
+ gettimeofday (&tv, nullptr);
71
+ return tv.tv_sec * 1e3 + tv.tv_usec * 1e-3;
72
+ }
73
+
74
+ uint64_t get_cycles () {
75
+ #ifdef __x86_64__
76
+ uint32_t high, low;
77
+ asm volatile("rdtsc \n\t"
78
+ : "=a" (low),
79
+ "=d" (high));
80
+ return ((uint64_t)high << 32) | (low);
81
+ #else
82
+ return 0;
83
+ #endif
84
+ }
85
+
86
+
87
+ #ifdef __linux__
88
+
89
+ size_t get_mem_usage_kb ()
90
+ {
91
+ int pid = getpid ();
92
+ char fname[256];
93
+ snprintf (fname, 256, "/proc/%d/status", pid);
94
+ FILE * f = fopen (fname, "r");
95
+ FAISS_THROW_IF_NOT_MSG (f, "cannot open proc status file");
96
+ size_t sz = 0;
97
+ for (;;) {
98
+ char buf [256];
99
+ if (!fgets (buf, 256, f)) break;
100
+ if (sscanf (buf, "VmRSS: %ld kB", &sz) == 1) break;
101
+ }
102
+ fclose (f);
103
+ return sz;
104
+ }
105
+
106
+ #elif __APPLE__
107
+
108
+ size_t get_mem_usage_kb ()
109
+ {
110
+ fprintf(stderr, "WARN: get_mem_usage_kb not implemented on the mac\n");
111
+ return 0;
112
+ }
113
+
114
+ #endif
115
+
116
+
117
+
118
+
119
+
120
+ void reflection (const float * __restrict u,
121
+ float * __restrict x,
122
+ size_t n, size_t d, size_t nu)
123
+ {
124
+ size_t i, j, l;
125
+ for (i = 0; i < n; i++) {
126
+ const float * up = u;
127
+ for (l = 0; l < nu; l++) {
128
+ float ip1 = 0, ip2 = 0;
129
+
130
+ for (j = 0; j < d; j+=2) {
131
+ ip1 += up[j] * x[j];
132
+ ip2 += up[j+1] * x[j+1];
133
+ }
134
+ float ip = 2 * (ip1 + ip2);
135
+
136
+ for (j = 0; j < d; j++)
137
+ x[j] -= ip * up[j];
138
+ up += d;
139
+ }
140
+ x += d;
141
+ }
142
+ }
143
+
144
+
145
+ /* Reference implementation (slower) */
146
+ void reflection_ref (const float * u, float * x, size_t n, size_t d, size_t nu)
147
+ {
148
+ size_t i, j, l;
149
+ for (i = 0; i < n; i++) {
150
+ const float * up = u;
151
+ for (l = 0; l < nu; l++) {
152
+ double ip = 0;
153
+
154
+ for (j = 0; j < d; j++)
155
+ ip += up[j] * x[j];
156
+ ip *= 2;
157
+
158
+ for (j = 0; j < d; j++)
159
+ x[j] -= ip * up[j];
160
+
161
+ up += d;
162
+ }
163
+ x += d;
164
+ }
165
+ }
166
+
167
+
168
+
169
+
170
+
171
+
172
+ /***************************************************************************
173
+ * Some matrix manipulation functions
174
+ ***************************************************************************/
175
+
176
+
177
+ /* This function exists because the Torch counterpart is extremly slow
178
+ (not multi-threaded + unexpected overhead even in single thread).
179
+ It is here to implement the usual property |x-y|^2=|x|^2+|y|^2-2<x|y> */
180
+ void inner_product_to_L2sqr (float * __restrict dis,
181
+ const float * nr1,
182
+ const float * nr2,
183
+ size_t n1, size_t n2)
184
+ {
185
+
186
+ #pragma omp parallel for
187
+ for (size_t j = 0 ; j < n1 ; j++) {
188
+ float * disj = dis + j * n2;
189
+ for (size_t i = 0 ; i < n2 ; i++)
190
+ disj[i] = nr1[j] + nr2[i] - 2 * disj[i];
191
+ }
192
+ }
193
+
194
+
195
+ void matrix_qr (int m, int n, float *a)
196
+ {
197
+ FAISS_THROW_IF_NOT (m >= n);
198
+ FINTEGER mi = m, ni = n, ki = mi < ni ? mi : ni;
199
+ std::vector<float> tau (ki);
200
+ FINTEGER lwork = -1, info;
201
+ float work_size;
202
+
203
+ sgeqrf_ (&mi, &ni, a, &mi, tau.data(),
204
+ &work_size, &lwork, &info);
205
+ lwork = size_t(work_size);
206
+ std::vector<float> work (lwork);
207
+
208
+ sgeqrf_ (&mi, &ni, a, &mi,
209
+ tau.data(), work.data(), &lwork, &info);
210
+
211
+ sorgqr_ (&mi, &ni, &ki, a, &mi, tau.data(),
212
+ work.data(), &lwork, &info);
213
+
214
+ }
215
+
216
+
217
+ /***************************************************************************
218
+ * Kmeans subroutine
219
+ ***************************************************************************/
220
+
221
+ // a bit above machine epsilon for float16
222
+
223
+ #define EPS (1 / 1024.)
224
+
225
+ /* For k-means, compute centroids given assignment of vectors to centroids */
226
+ int km_update_centroids (const float * x,
227
+ float * centroids,
228
+ int64_t * assign,
229
+ size_t d, size_t k, size_t n,
230
+ size_t k_frozen)
231
+ {
232
+ k -= k_frozen;
233
+ centroids += k_frozen * d;
234
+
235
+ std::vector<size_t> hassign(k);
236
+ memset (centroids, 0, sizeof(*centroids) * d * k);
237
+
238
+ #pragma omp parallel
239
+ {
240
+ int nt = omp_get_num_threads();
241
+ int rank = omp_get_thread_num();
242
+ // this thread is taking care of centroids c0:c1
243
+ size_t c0 = (k * rank) / nt;
244
+ size_t c1 = (k * (rank + 1)) / nt;
245
+ const float *xi = x;
246
+ size_t nacc = 0;
247
+
248
+ for (size_t i = 0; i < n; i++) {
249
+ int64_t ci = assign[i];
250
+ assert (ci >= 0 && ci < k + k_frozen);
251
+ ci -= k_frozen;
252
+ if (ci >= c0 && ci < c1) {
253
+ float * c = centroids + ci * d;
254
+ hassign[ci]++;
255
+ for (size_t j = 0; j < d; j++)
256
+ c[j] += xi[j];
257
+ nacc++;
258
+ }
259
+ xi += d;
260
+ }
261
+
262
+ }
263
+
264
+ #pragma omp parallel for
265
+ for (size_t ci = 0; ci < k; ci++) {
266
+ float * c = centroids + ci * d;
267
+ float ni = (float) hassign[ci];
268
+ if (ni != 0) {
269
+ for (size_t j = 0; j < d; j++)
270
+ c[j] /= ni;
271
+ }
272
+ }
273
+
274
+ /* Take care of void clusters */
275
+ size_t nsplit = 0;
276
+ RandomGenerator rng (1234);
277
+ for (size_t ci = 0; ci < k; ci++) {
278
+ if (hassign[ci] == 0) { /* need to redefine a centroid */
279
+ size_t cj;
280
+ for (cj = 0; 1; cj = (cj + 1) % k) {
281
+ /* probability to pick this cluster for split */
282
+ float p = (hassign[cj] - 1.0) / (float) (n - k);
283
+ float r = rng.rand_float ();
284
+ if (r < p) {
285
+ break; /* found our cluster to be split */
286
+ }
287
+ }
288
+ memcpy (centroids+ci*d, centroids+cj*d, sizeof(*centroids) * d);
289
+
290
+ /* small symmetric pertubation. Much better than */
291
+ for (size_t j = 0; j < d; j++) {
292
+ if (j % 2 == 0) {
293
+ centroids[ci * d + j] *= 1 + EPS;
294
+ centroids[cj * d + j] *= 1 - EPS;
295
+ } else {
296
+ centroids[ci * d + j] *= 1 - EPS;
297
+ centroids[cj * d + j] *= 1 + EPS;
298
+ }
299
+ }
300
+
301
+ /* assume even split of the cluster */
302
+ hassign[ci] = hassign[cj] / 2;
303
+ hassign[cj] -= hassign[ci];
304
+ nsplit++;
305
+ }
306
+ }
307
+
308
+ return nsplit;
309
+ }
310
+
311
+ #undef EPS
312
+
313
+
314
+
315
+ /***************************************************************************
316
+ * Result list routines
317
+ ***************************************************************************/
318
+
319
+
320
+ void ranklist_handle_ties (int k, int64_t *idx, const float *dis)
321
+ {
322
+ float prev_dis = -1e38;
323
+ int prev_i = -1;
324
+ for (int i = 0; i < k; i++) {
325
+ if (dis[i] != prev_dis) {
326
+ if (i > prev_i + 1) {
327
+ // sort between prev_i and i - 1
328
+ std::sort (idx + prev_i, idx + i);
329
+ }
330
+ prev_i = i;
331
+ prev_dis = dis[i];
332
+ }
333
+ }
334
+ }
335
+
336
+ size_t merge_result_table_with (size_t n, size_t k,
337
+ int64_t *I0, float *D0,
338
+ const int64_t *I1, const float *D1,
339
+ bool keep_min,
340
+ int64_t translation)
341
+ {
342
+ size_t n1 = 0;
343
+
344
+ #pragma omp parallel reduction(+:n1)
345
+ {
346
+ std::vector<int64_t> tmpI (k);
347
+ std::vector<float> tmpD (k);
348
+
349
+ #pragma omp for
350
+ for (size_t i = 0; i < n; i++) {
351
+ int64_t *lI0 = I0 + i * k;
352
+ float *lD0 = D0 + i * k;
353
+ const int64_t *lI1 = I1 + i * k;
354
+ const float *lD1 = D1 + i * k;
355
+ size_t r0 = 0;
356
+ size_t r1 = 0;
357
+
358
+ if (keep_min) {
359
+ for (size_t j = 0; j < k; j++) {
360
+
361
+ if (lI0[r0] >= 0 && lD0[r0] < lD1[r1]) {
362
+ tmpD[j] = lD0[r0];
363
+ tmpI[j] = lI0[r0];
364
+ r0++;
365
+ } else if (lD1[r1] >= 0) {
366
+ tmpD[j] = lD1[r1];
367
+ tmpI[j] = lI1[r1] + translation;
368
+ r1++;
369
+ } else { // both are NaNs
370
+ tmpD[j] = NAN;
371
+ tmpI[j] = -1;
372
+ }
373
+ }
374
+ } else {
375
+ for (size_t j = 0; j < k; j++) {
376
+ if (lI0[r0] >= 0 && lD0[r0] > lD1[r1]) {
377
+ tmpD[j] = lD0[r0];
378
+ tmpI[j] = lI0[r0];
379
+ r0++;
380
+ } else if (lD1[r1] >= 0) {
381
+ tmpD[j] = lD1[r1];
382
+ tmpI[j] = lI1[r1] + translation;
383
+ r1++;
384
+ } else { // both are NaNs
385
+ tmpD[j] = NAN;
386
+ tmpI[j] = -1;
387
+ }
388
+ }
389
+ }
390
+ n1 += r1;
391
+ memcpy (lD0, tmpD.data(), sizeof (lD0[0]) * k);
392
+ memcpy (lI0, tmpI.data(), sizeof (lI0[0]) * k);
393
+ }
394
+ }
395
+
396
+ return n1;
397
+ }
398
+
399
+
400
+
401
+ size_t ranklist_intersection_size (size_t k1, const int64_t *v1,
402
+ size_t k2, const int64_t *v2_in)
403
+ {
404
+ if (k2 > k1) return ranklist_intersection_size (k2, v2_in, k1, v1);
405
+ int64_t *v2 = new int64_t [k2];
406
+ memcpy (v2, v2_in, sizeof (int64_t) * k2);
407
+ std::sort (v2, v2 + k2);
408
+ { // de-dup v2
409
+ int64_t prev = -1;
410
+ size_t wp = 0;
411
+ for (size_t i = 0; i < k2; i++) {
412
+ if (v2 [i] != prev) {
413
+ v2[wp++] = prev = v2 [i];
414
+ }
415
+ }
416
+ k2 = wp;
417
+ }
418
+ const int64_t seen_flag = 1L << 60;
419
+ size_t count = 0;
420
+ for (size_t i = 0; i < k1; i++) {
421
+ int64_t q = v1 [i];
422
+ size_t i0 = 0, i1 = k2;
423
+ while (i0 + 1 < i1) {
424
+ size_t imed = (i1 + i0) / 2;
425
+ int64_t piv = v2 [imed] & ~seen_flag;
426
+ if (piv <= q) i0 = imed;
427
+ else i1 = imed;
428
+ }
429
+ if (v2 [i0] == q) {
430
+ count++;
431
+ v2 [i0] |= seen_flag;
432
+ }
433
+ }
434
+ delete [] v2;
435
+
436
+ return count;
437
+ }
438
+
439
+ double imbalance_factor (int k, const int *hist) {
440
+ double tot = 0, uf = 0;
441
+
442
+ for (int i = 0 ; i < k ; i++) {
443
+ tot += hist[i];
444
+ uf += hist[i] * (double) hist[i];
445
+ }
446
+ uf = uf * k / (tot * tot);
447
+
448
+ return uf;
449
+ }
450
+
451
+
452
+ double imbalance_factor (int n, int k, const int64_t *assign) {
453
+ std::vector<int> hist(k, 0);
454
+ for (int i = 0; i < n; i++) {
455
+ hist[assign[i]]++;
456
+ }
457
+
458
+ return imbalance_factor (k, hist.data());
459
+ }
460
+
461
+
462
+
463
+ int ivec_hist (size_t n, const int * v, int vmax, int *hist) {
464
+ memset (hist, 0, sizeof(hist[0]) * vmax);
465
+ int nout = 0;
466
+ while (n--) {
467
+ if (v[n] < 0 || v[n] >= vmax) nout++;
468
+ else hist[v[n]]++;
469
+ }
470
+ return nout;
471
+ }
472
+
473
+
474
+ void bincode_hist(size_t n, size_t nbits, const uint8_t *codes, int *hist)
475
+ {
476
+ FAISS_THROW_IF_NOT (nbits % 8 == 0);
477
+ size_t d = nbits / 8;
478
+ std::vector<int> accu(d * 256);
479
+ const uint8_t *c = codes;
480
+ for (size_t i = 0; i < n; i++)
481
+ for(int j = 0; j < d; j++)
482
+ accu[j * 256 + *c++]++;
483
+ memset (hist, 0, sizeof(*hist) * nbits);
484
+ for (int i = 0; i < d; i++) {
485
+ const int *ai = accu.data() + i * 256;
486
+ int * hi = hist + i * 8;
487
+ for (int j = 0; j < 256; j++)
488
+ for (int k = 0; k < 8; k++)
489
+ if ((j >> k) & 1)
490
+ hi[k] += ai[j];
491
+ }
492
+
493
+ }
494
+
495
+
496
+
497
+ size_t ivec_checksum (size_t n, const int *a)
498
+ {
499
+ size_t cs = 112909;
500
+ while (n--) cs = cs * 65713 + a[n] * 1686049;
501
+ return cs;
502
+ }
503
+
504
+
505
+ namespace {
506
+ struct ArgsortComparator {
507
+ const float *vals;
508
+ bool operator() (const size_t a, const size_t b) const {
509
+ return vals[a] < vals[b];
510
+ }
511
+ };
512
+
513
+ struct SegmentS {
514
+ size_t i0; // begin pointer in the permutation array
515
+ size_t i1; // end
516
+ size_t len() const {
517
+ return i1 - i0;
518
+ }
519
+ };
520
+
521
+ // see https://en.wikipedia.org/wiki/Merge_algorithm#Parallel_merge
522
+ // extended to > 1 merge thread
523
+
524
+ // merges 2 ranges that should be consecutive on the source into
525
+ // the union of the two on the destination
526
+ template<typename T>
527
+ void parallel_merge (const T *src, T *dst,
528
+ SegmentS &s1, SegmentS & s2, int nt,
529
+ const ArgsortComparator & comp) {
530
+ if (s2.len() > s1.len()) { // make sure that s1 larger than s2
531
+ std::swap(s1, s2);
532
+ }
533
+
534
+ // compute sub-ranges for each thread
535
+ SegmentS s1s[nt], s2s[nt], sws[nt];
536
+ s2s[0].i0 = s2.i0;
537
+ s2s[nt - 1].i1 = s2.i1;
538
+
539
+ // not sure parallel actually helps here
540
+ #pragma omp parallel for num_threads(nt)
541
+ for (int t = 0; t < nt; t++) {
542
+ s1s[t].i0 = s1.i0 + s1.len() * t / nt;
543
+ s1s[t].i1 = s1.i0 + s1.len() * (t + 1) / nt;
544
+
545
+ if (t + 1 < nt) {
546
+ T pivot = src[s1s[t].i1];
547
+ size_t i0 = s2.i0, i1 = s2.i1;
548
+ while (i0 + 1 < i1) {
549
+ size_t imed = (i1 + i0) / 2;
550
+ if (comp (pivot, src[imed])) {i1 = imed; }
551
+ else {i0 = imed; }
552
+ }
553
+ s2s[t].i1 = s2s[t + 1].i0 = i1;
554
+ }
555
+ }
556
+ s1.i0 = std::min(s1.i0, s2.i0);
557
+ s1.i1 = std::max(s1.i1, s2.i1);
558
+ s2 = s1;
559
+ sws[0].i0 = s1.i0;
560
+ for (int t = 0; t < nt; t++) {
561
+ sws[t].i1 = sws[t].i0 + s1s[t].len() + s2s[t].len();
562
+ if (t + 1 < nt) {
563
+ sws[t + 1].i0 = sws[t].i1;
564
+ }
565
+ }
566
+ assert(sws[nt - 1].i1 == s1.i1);
567
+
568
+ // do the actual merging
569
+ #pragma omp parallel for num_threads(nt)
570
+ for (int t = 0; t < nt; t++) {
571
+ SegmentS sw = sws[t];
572
+ SegmentS s1t = s1s[t];
573
+ SegmentS s2t = s2s[t];
574
+ if (s1t.i0 < s1t.i1 && s2t.i0 < s2t.i1) {
575
+ for (;;) {
576
+ // assert (sw.len() == s1t.len() + s2t.len());
577
+ if (comp(src[s1t.i0], src[s2t.i0])) {
578
+ dst[sw.i0++] = src[s1t.i0++];
579
+ if (s1t.i0 == s1t.i1) break;
580
+ } else {
581
+ dst[sw.i0++] = src[s2t.i0++];
582
+ if (s2t.i0 == s2t.i1) break;
583
+ }
584
+ }
585
+ }
586
+ if (s1t.len() > 0) {
587
+ assert(s1t.len() == sw.len());
588
+ memcpy(dst + sw.i0, src + s1t.i0, s1t.len() * sizeof(dst[0]));
589
+ } else if (s2t.len() > 0) {
590
+ assert(s2t.len() == sw.len());
591
+ memcpy(dst + sw.i0, src + s2t.i0, s2t.len() * sizeof(dst[0]));
592
+ }
593
+ }
594
+ }
595
+
596
+ };
597
+
598
+ void fvec_argsort (size_t n, const float *vals,
599
+ size_t *perm)
600
+ {
601
+ for (size_t i = 0; i < n; i++) perm[i] = i;
602
+ ArgsortComparator comp = {vals};
603
+ std::sort (perm, perm + n, comp);
604
+ }
605
+
606
+ void fvec_argsort_parallel (size_t n, const float *vals,
607
+ size_t *perm)
608
+ {
609
+ size_t * perm2 = new size_t[n];
610
+ // 2 result tables, during merging, flip between them
611
+ size_t *permB = perm2, *permA = perm;
612
+
613
+ int nt = omp_get_max_threads();
614
+ { // prepare correct permutation so that the result ends in perm
615
+ // at final iteration
616
+ int nseg = nt;
617
+ while (nseg > 1) {
618
+ nseg = (nseg + 1) / 2;
619
+ std::swap (permA, permB);
620
+ }
621
+ }
622
+
623
+ #pragma omp parallel
624
+ for (size_t i = 0; i < n; i++) permA[i] = i;
625
+
626
+ ArgsortComparator comp = {vals};
627
+
628
+ SegmentS segs[nt];
629
+
630
+ // independent sorts
631
+ #pragma omp parallel for
632
+ for (int t = 0; t < nt; t++) {
633
+ size_t i0 = t * n / nt;
634
+ size_t i1 = (t + 1) * n / nt;
635
+ SegmentS seg = {i0, i1};
636
+ std::sort (permA + seg.i0, permA + seg.i1, comp);
637
+ segs[t] = seg;
638
+ }
639
+ int prev_nested = omp_get_nested();
640
+ omp_set_nested(1);
641
+
642
+ int nseg = nt;
643
+ while (nseg > 1) {
644
+ int nseg1 = (nseg + 1) / 2;
645
+ int sub_nt = nseg % 2 == 0 ? nt : nt - 1;
646
+ int sub_nseg1 = nseg / 2;
647
+
648
+ #pragma omp parallel for num_threads(nseg1)
649
+ for (int s = 0; s < nseg; s += 2) {
650
+ if (s + 1 == nseg) { // otherwise isolated segment
651
+ memcpy(permB + segs[s].i0, permA + segs[s].i0,
652
+ segs[s].len() * sizeof(size_t));
653
+ } else {
654
+ int t0 = s * sub_nt / sub_nseg1;
655
+ int t1 = (s + 1) * sub_nt / sub_nseg1;
656
+ printf("merge %d %d, %d threads\n", s, s + 1, t1 - t0);
657
+ parallel_merge(permA, permB, segs[s], segs[s + 1],
658
+ t1 - t0, comp);
659
+ }
660
+ }
661
+ for (int s = 0; s < nseg; s += 2)
662
+ segs[s / 2] = segs[s];
663
+ nseg = nseg1;
664
+ std::swap (permA, permB);
665
+ }
666
+ assert (permA == perm);
667
+ omp_set_nested(prev_nested);
668
+ delete [] perm2;
669
+ }
670
+
671
+
672
+
673
+
674
+
675
+
676
+
677
+
678
+
679
+
680
+
681
+
682
+
683
+
684
+
685
+
686
+
687
+
688
+ const float *fvecs_maybe_subsample (
689
+ size_t d, size_t *n, size_t nmax, const float *x,
690
+ bool verbose, int64_t seed)
691
+ {
692
+
693
+ if (*n <= nmax) return x; // nothing to do
694
+
695
+ size_t n2 = nmax;
696
+ if (verbose) {
697
+ printf (" Input training set too big (max size is %ld), sampling "
698
+ "%ld / %ld vectors\n", nmax, n2, *n);
699
+ }
700
+ std::vector<int> subset (*n);
701
+ rand_perm (subset.data (), *n, seed);
702
+ float *x_subset = new float[n2 * d];
703
+ for (int64_t i = 0; i < n2; i++)
704
+ memcpy (&x_subset[i * d],
705
+ &x[subset[i] * size_t(d)],
706
+ sizeof (x[0]) * d);
707
+ *n = n2;
708
+ return x_subset;
709
+ }
710
+
711
+
712
+ void binary_to_real(size_t d, const uint8_t *x_in, float *x_out) {
713
+ for (size_t i = 0; i < d; ++i) {
714
+ x_out[i] = 2 * ((x_in[i >> 3] >> (i & 7)) & 1) - 1;
715
+ }
716
+ }
717
+
718
+ void real_to_binary(size_t d, const float *x_in, uint8_t *x_out) {
719
+ for (size_t i = 0; i < d / 8; ++i) {
720
+ uint8_t b = 0;
721
+ for (int j = 0; j < 8; ++j) {
722
+ if (x_in[8 * i + j] > 0) {
723
+ b |= (1 << j);
724
+ }
725
+ }
726
+ x_out[i] = b;
727
+ }
728
+ }
729
+
730
+
731
+ // from Python's stringobject.c
732
+ uint64_t hash_bytes (const uint8_t *bytes, int64_t n) {
733
+ const uint8_t *p = bytes;
734
+ uint64_t x = (uint64_t)(*p) << 7;
735
+ int64_t len = n;
736
+ while (--len >= 0) {
737
+ x = (1000003*x) ^ *p++;
738
+ }
739
+ x ^= n;
740
+ return x;
741
+ }
742
+
743
+
744
+ bool check_openmp() {
745
+ omp_set_num_threads(10);
746
+
747
+ if (omp_get_max_threads() != 10) {
748
+ return false;
749
+ }
750
+
751
+ std::vector<int> nt_per_thread(10);
752
+ size_t sum = 0;
753
+ bool in_parallel = true;
754
+ #pragma omp parallel reduction(+: sum)
755
+ {
756
+ if (!omp_in_parallel()) {
757
+ in_parallel = false;
758
+ }
759
+
760
+ int nt = omp_get_num_threads();
761
+ int rank = omp_get_thread_num();
762
+
763
+ nt_per_thread[rank] = nt;
764
+ #pragma omp for
765
+ for(int i = 0; i < 1000 * 1000 * 10; i++) {
766
+ sum += i;
767
+ }
768
+ }
769
+
770
+ if (!in_parallel) {
771
+ return false;
772
+ }
773
+ if (nt_per_thread[0] != 10) {
774
+ return false;
775
+ }
776
+ if (sum == 0) {
777
+ return false;
778
+ }
779
+
780
+ return true;
781
+ }
782
+
783
+ } // namespace faiss