faiss 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,783 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include <faiss/utils/utils.h>
11
+
12
+ #include <cstdio>
13
+ #include <cassert>
14
+ #include <cstring>
15
+ #include <cmath>
16
+
17
+ #include <sys/time.h>
18
+ #include <sys/types.h>
19
+ #include <unistd.h>
20
+
21
+ #include <omp.h>
22
+
23
+ #include <algorithm>
24
+ #include <vector>
25
+
26
+ #include <faiss/impl/AuxIndexStructures.h>
27
+ #include <faiss/impl/FaissAssert.h>
28
+ #include <faiss/utils/random.h>
29
+
30
+
31
+
32
+ #ifndef FINTEGER
33
+ #define FINTEGER long
34
+ #endif
35
+
36
+
37
+ extern "C" {
38
+
39
+ /* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
40
+
41
+ int sgemm_ (const char *transa, const char *transb, FINTEGER *m, FINTEGER *
42
+ n, FINTEGER *k, const float *alpha, const float *a,
43
+ FINTEGER *lda, const float *b, FINTEGER *
44
+ ldb, float *beta, float *c, FINTEGER *ldc);
45
+
46
+ /* Lapack functions, see http://www.netlib.org/clapack/old/single/sgeqrf.c */
47
+
48
+ int sgeqrf_ (FINTEGER *m, FINTEGER *n, float *a, FINTEGER *lda,
49
+ float *tau, float *work, FINTEGER *lwork, FINTEGER *info);
50
+
51
+ int sorgqr_(FINTEGER *m, FINTEGER *n, FINTEGER *k, float *a,
52
+ FINTEGER *lda, float *tau, float *work,
53
+ FINTEGER *lwork, FINTEGER *info);
54
+
55
+ int sgemv_(const char *trans, FINTEGER *m, FINTEGER *n, float *alpha,
56
+ const float *a, FINTEGER *lda, const float *x, FINTEGER *incx,
57
+ float *beta, float *y, FINTEGER *incy);
58
+
59
+ }
60
+
61
+
62
+ /**************************************************
63
+ * Get some stats about the system
64
+ **************************************************/
65
+
66
+ namespace faiss {
67
+
68
+ double getmillisecs () {
69
+ struct timeval tv;
70
+ gettimeofday (&tv, nullptr);
71
+ return tv.tv_sec * 1e3 + tv.tv_usec * 1e-3;
72
+ }
73
+
74
+ uint64_t get_cycles () {
75
+ #ifdef __x86_64__
76
+ uint32_t high, low;
77
+ asm volatile("rdtsc \n\t"
78
+ : "=a" (low),
79
+ "=d" (high));
80
+ return ((uint64_t)high << 32) | (low);
81
+ #else
82
+ return 0;
83
+ #endif
84
+ }
85
+
86
+
87
+ #ifdef __linux__
88
+
89
+ size_t get_mem_usage_kb ()
90
+ {
91
+ int pid = getpid ();
92
+ char fname[256];
93
+ snprintf (fname, 256, "/proc/%d/status", pid);
94
+ FILE * f = fopen (fname, "r");
95
+ FAISS_THROW_IF_NOT_MSG (f, "cannot open proc status file");
96
+ size_t sz = 0;
97
+ for (;;) {
98
+ char buf [256];
99
+ if (!fgets (buf, 256, f)) break;
100
+ if (sscanf (buf, "VmRSS: %ld kB", &sz) == 1) break;
101
+ }
102
+ fclose (f);
103
+ return sz;
104
+ }
105
+
106
+ #elif __APPLE__
107
+
108
+ size_t get_mem_usage_kb ()
109
+ {
110
+ fprintf(stderr, "WARN: get_mem_usage_kb not implemented on the mac\n");
111
+ return 0;
112
+ }
113
+
114
+ #endif
115
+
116
+
117
+
118
+
119
+
120
+ void reflection (const float * __restrict u,
121
+ float * __restrict x,
122
+ size_t n, size_t d, size_t nu)
123
+ {
124
+ size_t i, j, l;
125
+ for (i = 0; i < n; i++) {
126
+ const float * up = u;
127
+ for (l = 0; l < nu; l++) {
128
+ float ip1 = 0, ip2 = 0;
129
+
130
+ for (j = 0; j < d; j+=2) {
131
+ ip1 += up[j] * x[j];
132
+ ip2 += up[j+1] * x[j+1];
133
+ }
134
+ float ip = 2 * (ip1 + ip2);
135
+
136
+ for (j = 0; j < d; j++)
137
+ x[j] -= ip * up[j];
138
+ up += d;
139
+ }
140
+ x += d;
141
+ }
142
+ }
143
+
144
+
145
+ /* Reference implementation (slower) */
146
+ void reflection_ref (const float * u, float * x, size_t n, size_t d, size_t nu)
147
+ {
148
+ size_t i, j, l;
149
+ for (i = 0; i < n; i++) {
150
+ const float * up = u;
151
+ for (l = 0; l < nu; l++) {
152
+ double ip = 0;
153
+
154
+ for (j = 0; j < d; j++)
155
+ ip += up[j] * x[j];
156
+ ip *= 2;
157
+
158
+ for (j = 0; j < d; j++)
159
+ x[j] -= ip * up[j];
160
+
161
+ up += d;
162
+ }
163
+ x += d;
164
+ }
165
+ }
166
+
167
+
168
+
169
+
170
+
171
+
172
+ /***************************************************************************
173
+ * Some matrix manipulation functions
174
+ ***************************************************************************/
175
+
176
+
177
+ /* This function exists because the Torch counterpart is extremly slow
178
+ (not multi-threaded + unexpected overhead even in single thread).
179
+ It is here to implement the usual property |x-y|^2=|x|^2+|y|^2-2<x|y> */
180
+ void inner_product_to_L2sqr (float * __restrict dis,
181
+ const float * nr1,
182
+ const float * nr2,
183
+ size_t n1, size_t n2)
184
+ {
185
+
186
+ #pragma omp parallel for
187
+ for (size_t j = 0 ; j < n1 ; j++) {
188
+ float * disj = dis + j * n2;
189
+ for (size_t i = 0 ; i < n2 ; i++)
190
+ disj[i] = nr1[j] + nr2[i] - 2 * disj[i];
191
+ }
192
+ }
193
+
194
+
195
+ void matrix_qr (int m, int n, float *a)
196
+ {
197
+ FAISS_THROW_IF_NOT (m >= n);
198
+ FINTEGER mi = m, ni = n, ki = mi < ni ? mi : ni;
199
+ std::vector<float> tau (ki);
200
+ FINTEGER lwork = -1, info;
201
+ float work_size;
202
+
203
+ sgeqrf_ (&mi, &ni, a, &mi, tau.data(),
204
+ &work_size, &lwork, &info);
205
+ lwork = size_t(work_size);
206
+ std::vector<float> work (lwork);
207
+
208
+ sgeqrf_ (&mi, &ni, a, &mi,
209
+ tau.data(), work.data(), &lwork, &info);
210
+
211
+ sorgqr_ (&mi, &ni, &ki, a, &mi, tau.data(),
212
+ work.data(), &lwork, &info);
213
+
214
+ }
215
+
216
+
217
+ /***************************************************************************
218
+ * Kmeans subroutine
219
+ ***************************************************************************/
220
+
221
+ // a bit above machine epsilon for float16
222
+
223
+ #define EPS (1 / 1024.)
224
+
225
+ /* For k-means, compute centroids given assignment of vectors to centroids */
226
+ int km_update_centroids (const float * x,
227
+ float * centroids,
228
+ int64_t * assign,
229
+ size_t d, size_t k, size_t n,
230
+ size_t k_frozen)
231
+ {
232
+ k -= k_frozen;
233
+ centroids += k_frozen * d;
234
+
235
+ std::vector<size_t> hassign(k);
236
+ memset (centroids, 0, sizeof(*centroids) * d * k);
237
+
238
+ #pragma omp parallel
239
+ {
240
+ int nt = omp_get_num_threads();
241
+ int rank = omp_get_thread_num();
242
+ // this thread is taking care of centroids c0:c1
243
+ size_t c0 = (k * rank) / nt;
244
+ size_t c1 = (k * (rank + 1)) / nt;
245
+ const float *xi = x;
246
+ size_t nacc = 0;
247
+
248
+ for (size_t i = 0; i < n; i++) {
249
+ int64_t ci = assign[i];
250
+ assert (ci >= 0 && ci < k + k_frozen);
251
+ ci -= k_frozen;
252
+ if (ci >= c0 && ci < c1) {
253
+ float * c = centroids + ci * d;
254
+ hassign[ci]++;
255
+ for (size_t j = 0; j < d; j++)
256
+ c[j] += xi[j];
257
+ nacc++;
258
+ }
259
+ xi += d;
260
+ }
261
+
262
+ }
263
+
264
+ #pragma omp parallel for
265
+ for (size_t ci = 0; ci < k; ci++) {
266
+ float * c = centroids + ci * d;
267
+ float ni = (float) hassign[ci];
268
+ if (ni != 0) {
269
+ for (size_t j = 0; j < d; j++)
270
+ c[j] /= ni;
271
+ }
272
+ }
273
+
274
+ /* Take care of void clusters */
275
+ size_t nsplit = 0;
276
+ RandomGenerator rng (1234);
277
+ for (size_t ci = 0; ci < k; ci++) {
278
+ if (hassign[ci] == 0) { /* need to redefine a centroid */
279
+ size_t cj;
280
+ for (cj = 0; 1; cj = (cj + 1) % k) {
281
+ /* probability to pick this cluster for split */
282
+ float p = (hassign[cj] - 1.0) / (float) (n - k);
283
+ float r = rng.rand_float ();
284
+ if (r < p) {
285
+ break; /* found our cluster to be split */
286
+ }
287
+ }
288
+ memcpy (centroids+ci*d, centroids+cj*d, sizeof(*centroids) * d);
289
+
290
+ /* small symmetric pertubation. Much better than */
291
+ for (size_t j = 0; j < d; j++) {
292
+ if (j % 2 == 0) {
293
+ centroids[ci * d + j] *= 1 + EPS;
294
+ centroids[cj * d + j] *= 1 - EPS;
295
+ } else {
296
+ centroids[ci * d + j] *= 1 - EPS;
297
+ centroids[cj * d + j] *= 1 + EPS;
298
+ }
299
+ }
300
+
301
+ /* assume even split of the cluster */
302
+ hassign[ci] = hassign[cj] / 2;
303
+ hassign[cj] -= hassign[ci];
304
+ nsplit++;
305
+ }
306
+ }
307
+
308
+ return nsplit;
309
+ }
310
+
311
+ #undef EPS
312
+
313
+
314
+
315
+ /***************************************************************************
316
+ * Result list routines
317
+ ***************************************************************************/
318
+
319
+
320
+ void ranklist_handle_ties (int k, int64_t *idx, const float *dis)
321
+ {
322
+ float prev_dis = -1e38;
323
+ int prev_i = -1;
324
+ for (int i = 0; i < k; i++) {
325
+ if (dis[i] != prev_dis) {
326
+ if (i > prev_i + 1) {
327
+ // sort between prev_i and i - 1
328
+ std::sort (idx + prev_i, idx + i);
329
+ }
330
+ prev_i = i;
331
+ prev_dis = dis[i];
332
+ }
333
+ }
334
+ }
335
+
336
+ size_t merge_result_table_with (size_t n, size_t k,
337
+ int64_t *I0, float *D0,
338
+ const int64_t *I1, const float *D1,
339
+ bool keep_min,
340
+ int64_t translation)
341
+ {
342
+ size_t n1 = 0;
343
+
344
+ #pragma omp parallel reduction(+:n1)
345
+ {
346
+ std::vector<int64_t> tmpI (k);
347
+ std::vector<float> tmpD (k);
348
+
349
+ #pragma omp for
350
+ for (size_t i = 0; i < n; i++) {
351
+ int64_t *lI0 = I0 + i * k;
352
+ float *lD0 = D0 + i * k;
353
+ const int64_t *lI1 = I1 + i * k;
354
+ const float *lD1 = D1 + i * k;
355
+ size_t r0 = 0;
356
+ size_t r1 = 0;
357
+
358
+ if (keep_min) {
359
+ for (size_t j = 0; j < k; j++) {
360
+
361
+ if (lI0[r0] >= 0 && lD0[r0] < lD1[r1]) {
362
+ tmpD[j] = lD0[r0];
363
+ tmpI[j] = lI0[r0];
364
+ r0++;
365
+ } else if (lD1[r1] >= 0) {
366
+ tmpD[j] = lD1[r1];
367
+ tmpI[j] = lI1[r1] + translation;
368
+ r1++;
369
+ } else { // both are NaNs
370
+ tmpD[j] = NAN;
371
+ tmpI[j] = -1;
372
+ }
373
+ }
374
+ } else {
375
+ for (size_t j = 0; j < k; j++) {
376
+ if (lI0[r0] >= 0 && lD0[r0] > lD1[r1]) {
377
+ tmpD[j] = lD0[r0];
378
+ tmpI[j] = lI0[r0];
379
+ r0++;
380
+ } else if (lD1[r1] >= 0) {
381
+ tmpD[j] = lD1[r1];
382
+ tmpI[j] = lI1[r1] + translation;
383
+ r1++;
384
+ } else { // both are NaNs
385
+ tmpD[j] = NAN;
386
+ tmpI[j] = -1;
387
+ }
388
+ }
389
+ }
390
+ n1 += r1;
391
+ memcpy (lD0, tmpD.data(), sizeof (lD0[0]) * k);
392
+ memcpy (lI0, tmpI.data(), sizeof (lI0[0]) * k);
393
+ }
394
+ }
395
+
396
+ return n1;
397
+ }
398
+
399
+
400
+
401
+ size_t ranklist_intersection_size (size_t k1, const int64_t *v1,
402
+ size_t k2, const int64_t *v2_in)
403
+ {
404
+ if (k2 > k1) return ranklist_intersection_size (k2, v2_in, k1, v1);
405
+ int64_t *v2 = new int64_t [k2];
406
+ memcpy (v2, v2_in, sizeof (int64_t) * k2);
407
+ std::sort (v2, v2 + k2);
408
+ { // de-dup v2
409
+ int64_t prev = -1;
410
+ size_t wp = 0;
411
+ for (size_t i = 0; i < k2; i++) {
412
+ if (v2 [i] != prev) {
413
+ v2[wp++] = prev = v2 [i];
414
+ }
415
+ }
416
+ k2 = wp;
417
+ }
418
+ const int64_t seen_flag = 1L << 60;
419
+ size_t count = 0;
420
+ for (size_t i = 0; i < k1; i++) {
421
+ int64_t q = v1 [i];
422
+ size_t i0 = 0, i1 = k2;
423
+ while (i0 + 1 < i1) {
424
+ size_t imed = (i1 + i0) / 2;
425
+ int64_t piv = v2 [imed] & ~seen_flag;
426
+ if (piv <= q) i0 = imed;
427
+ else i1 = imed;
428
+ }
429
+ if (v2 [i0] == q) {
430
+ count++;
431
+ v2 [i0] |= seen_flag;
432
+ }
433
+ }
434
+ delete [] v2;
435
+
436
+ return count;
437
+ }
438
+
439
+ double imbalance_factor (int k, const int *hist) {
440
+ double tot = 0, uf = 0;
441
+
442
+ for (int i = 0 ; i < k ; i++) {
443
+ tot += hist[i];
444
+ uf += hist[i] * (double) hist[i];
445
+ }
446
+ uf = uf * k / (tot * tot);
447
+
448
+ return uf;
449
+ }
450
+
451
+
452
+ double imbalance_factor (int n, int k, const int64_t *assign) {
453
+ std::vector<int> hist(k, 0);
454
+ for (int i = 0; i < n; i++) {
455
+ hist[assign[i]]++;
456
+ }
457
+
458
+ return imbalance_factor (k, hist.data());
459
+ }
460
+
461
+
462
+
463
+ int ivec_hist (size_t n, const int * v, int vmax, int *hist) {
464
+ memset (hist, 0, sizeof(hist[0]) * vmax);
465
+ int nout = 0;
466
+ while (n--) {
467
+ if (v[n] < 0 || v[n] >= vmax) nout++;
468
+ else hist[v[n]]++;
469
+ }
470
+ return nout;
471
+ }
472
+
473
+
474
+ void bincode_hist(size_t n, size_t nbits, const uint8_t *codes, int *hist)
475
+ {
476
+ FAISS_THROW_IF_NOT (nbits % 8 == 0);
477
+ size_t d = nbits / 8;
478
+ std::vector<int> accu(d * 256);
479
+ const uint8_t *c = codes;
480
+ for (size_t i = 0; i < n; i++)
481
+ for(int j = 0; j < d; j++)
482
+ accu[j * 256 + *c++]++;
483
+ memset (hist, 0, sizeof(*hist) * nbits);
484
+ for (int i = 0; i < d; i++) {
485
+ const int *ai = accu.data() + i * 256;
486
+ int * hi = hist + i * 8;
487
+ for (int j = 0; j < 256; j++)
488
+ for (int k = 0; k < 8; k++)
489
+ if ((j >> k) & 1)
490
+ hi[k] += ai[j];
491
+ }
492
+
493
+ }
494
+
495
+
496
+
497
+ size_t ivec_checksum (size_t n, const int *a)
498
+ {
499
+ size_t cs = 112909;
500
+ while (n--) cs = cs * 65713 + a[n] * 1686049;
501
+ return cs;
502
+ }
503
+
504
+
505
+ namespace {
506
+ struct ArgsortComparator {
507
+ const float *vals;
508
+ bool operator() (const size_t a, const size_t b) const {
509
+ return vals[a] < vals[b];
510
+ }
511
+ };
512
+
513
+ struct SegmentS {
514
+ size_t i0; // begin pointer in the permutation array
515
+ size_t i1; // end
516
+ size_t len() const {
517
+ return i1 - i0;
518
+ }
519
+ };
520
+
521
+ // see https://en.wikipedia.org/wiki/Merge_algorithm#Parallel_merge
522
+ // extended to > 1 merge thread
523
+
524
+ // merges 2 ranges that should be consecutive on the source into
525
+ // the union of the two on the destination
526
+ template<typename T>
527
+ void parallel_merge (const T *src, T *dst,
528
+ SegmentS &s1, SegmentS & s2, int nt,
529
+ const ArgsortComparator & comp) {
530
+ if (s2.len() > s1.len()) { // make sure that s1 larger than s2
531
+ std::swap(s1, s2);
532
+ }
533
+
534
+ // compute sub-ranges for each thread
535
+ SegmentS s1s[nt], s2s[nt], sws[nt];
536
+ s2s[0].i0 = s2.i0;
537
+ s2s[nt - 1].i1 = s2.i1;
538
+
539
+ // not sure parallel actually helps here
540
+ #pragma omp parallel for num_threads(nt)
541
+ for (int t = 0; t < nt; t++) {
542
+ s1s[t].i0 = s1.i0 + s1.len() * t / nt;
543
+ s1s[t].i1 = s1.i0 + s1.len() * (t + 1) / nt;
544
+
545
+ if (t + 1 < nt) {
546
+ T pivot = src[s1s[t].i1];
547
+ size_t i0 = s2.i0, i1 = s2.i1;
548
+ while (i0 + 1 < i1) {
549
+ size_t imed = (i1 + i0) / 2;
550
+ if (comp (pivot, src[imed])) {i1 = imed; }
551
+ else {i0 = imed; }
552
+ }
553
+ s2s[t].i1 = s2s[t + 1].i0 = i1;
554
+ }
555
+ }
556
+ s1.i0 = std::min(s1.i0, s2.i0);
557
+ s1.i1 = std::max(s1.i1, s2.i1);
558
+ s2 = s1;
559
+ sws[0].i0 = s1.i0;
560
+ for (int t = 0; t < nt; t++) {
561
+ sws[t].i1 = sws[t].i0 + s1s[t].len() + s2s[t].len();
562
+ if (t + 1 < nt) {
563
+ sws[t + 1].i0 = sws[t].i1;
564
+ }
565
+ }
566
+ assert(sws[nt - 1].i1 == s1.i1);
567
+
568
+ // do the actual merging
569
+ #pragma omp parallel for num_threads(nt)
570
+ for (int t = 0; t < nt; t++) {
571
+ SegmentS sw = sws[t];
572
+ SegmentS s1t = s1s[t];
573
+ SegmentS s2t = s2s[t];
574
+ if (s1t.i0 < s1t.i1 && s2t.i0 < s2t.i1) {
575
+ for (;;) {
576
+ // assert (sw.len() == s1t.len() + s2t.len());
577
+ if (comp(src[s1t.i0], src[s2t.i0])) {
578
+ dst[sw.i0++] = src[s1t.i0++];
579
+ if (s1t.i0 == s1t.i1) break;
580
+ } else {
581
+ dst[sw.i0++] = src[s2t.i0++];
582
+ if (s2t.i0 == s2t.i1) break;
583
+ }
584
+ }
585
+ }
586
+ if (s1t.len() > 0) {
587
+ assert(s1t.len() == sw.len());
588
+ memcpy(dst + sw.i0, src + s1t.i0, s1t.len() * sizeof(dst[0]));
589
+ } else if (s2t.len() > 0) {
590
+ assert(s2t.len() == sw.len());
591
+ memcpy(dst + sw.i0, src + s2t.i0, s2t.len() * sizeof(dst[0]));
592
+ }
593
+ }
594
+ }
595
+
596
+ };
597
+
598
+ void fvec_argsort (size_t n, const float *vals,
599
+ size_t *perm)
600
+ {
601
+ for (size_t i = 0; i < n; i++) perm[i] = i;
602
+ ArgsortComparator comp = {vals};
603
+ std::sort (perm, perm + n, comp);
604
+ }
605
+
606
+ void fvec_argsort_parallel (size_t n, const float *vals,
607
+ size_t *perm)
608
+ {
609
+ size_t * perm2 = new size_t[n];
610
+ // 2 result tables, during merging, flip between them
611
+ size_t *permB = perm2, *permA = perm;
612
+
613
+ int nt = omp_get_max_threads();
614
+ { // prepare correct permutation so that the result ends in perm
615
+ // at final iteration
616
+ int nseg = nt;
617
+ while (nseg > 1) {
618
+ nseg = (nseg + 1) / 2;
619
+ std::swap (permA, permB);
620
+ }
621
+ }
622
+
623
+ #pragma omp parallel
624
+ for (size_t i = 0; i < n; i++) permA[i] = i;
625
+
626
+ ArgsortComparator comp = {vals};
627
+
628
+ SegmentS segs[nt];
629
+
630
+ // independent sorts
631
+ #pragma omp parallel for
632
+ for (int t = 0; t < nt; t++) {
633
+ size_t i0 = t * n / nt;
634
+ size_t i1 = (t + 1) * n / nt;
635
+ SegmentS seg = {i0, i1};
636
+ std::sort (permA + seg.i0, permA + seg.i1, comp);
637
+ segs[t] = seg;
638
+ }
639
+ int prev_nested = omp_get_nested();
640
+ omp_set_nested(1);
641
+
642
+ int nseg = nt;
643
+ while (nseg > 1) {
644
+ int nseg1 = (nseg + 1) / 2;
645
+ int sub_nt = nseg % 2 == 0 ? nt : nt - 1;
646
+ int sub_nseg1 = nseg / 2;
647
+
648
+ #pragma omp parallel for num_threads(nseg1)
649
+ for (int s = 0; s < nseg; s += 2) {
650
+ if (s + 1 == nseg) { // otherwise isolated segment
651
+ memcpy(permB + segs[s].i0, permA + segs[s].i0,
652
+ segs[s].len() * sizeof(size_t));
653
+ } else {
654
+ int t0 = s * sub_nt / sub_nseg1;
655
+ int t1 = (s + 1) * sub_nt / sub_nseg1;
656
+ printf("merge %d %d, %d threads\n", s, s + 1, t1 - t0);
657
+ parallel_merge(permA, permB, segs[s], segs[s + 1],
658
+ t1 - t0, comp);
659
+ }
660
+ }
661
+ for (int s = 0; s < nseg; s += 2)
662
+ segs[s / 2] = segs[s];
663
+ nseg = nseg1;
664
+ std::swap (permA, permB);
665
+ }
666
+ assert (permA == perm);
667
+ omp_set_nested(prev_nested);
668
+ delete [] perm2;
669
+ }
670
+
671
+
672
+
673
+
674
+
675
+
676
+
677
+
678
+
679
+
680
+
681
+
682
+
683
+
684
+
685
+
686
+
687
+
688
+ const float *fvecs_maybe_subsample (
689
+ size_t d, size_t *n, size_t nmax, const float *x,
690
+ bool verbose, int64_t seed)
691
+ {
692
+
693
+ if (*n <= nmax) return x; // nothing to do
694
+
695
+ size_t n2 = nmax;
696
+ if (verbose) {
697
+ printf (" Input training set too big (max size is %ld), sampling "
698
+ "%ld / %ld vectors\n", nmax, n2, *n);
699
+ }
700
+ std::vector<int> subset (*n);
701
+ rand_perm (subset.data (), *n, seed);
702
+ float *x_subset = new float[n2 * d];
703
+ for (int64_t i = 0; i < n2; i++)
704
+ memcpy (&x_subset[i * d],
705
+ &x[subset[i] * size_t(d)],
706
+ sizeof (x[0]) * d);
707
+ *n = n2;
708
+ return x_subset;
709
+ }
710
+
711
+
712
+ void binary_to_real(size_t d, const uint8_t *x_in, float *x_out) {
713
+ for (size_t i = 0; i < d; ++i) {
714
+ x_out[i] = 2 * ((x_in[i >> 3] >> (i & 7)) & 1) - 1;
715
+ }
716
+ }
717
+
718
+ void real_to_binary(size_t d, const float *x_in, uint8_t *x_out) {
719
+ for (size_t i = 0; i < d / 8; ++i) {
720
+ uint8_t b = 0;
721
+ for (int j = 0; j < 8; ++j) {
722
+ if (x_in[8 * i + j] > 0) {
723
+ b |= (1 << j);
724
+ }
725
+ }
726
+ x_out[i] = b;
727
+ }
728
+ }
729
+
730
+
731
+ // from Python's stringobject.c
732
+ uint64_t hash_bytes (const uint8_t *bytes, int64_t n) {
733
+ const uint8_t *p = bytes;
734
+ uint64_t x = (uint64_t)(*p) << 7;
735
+ int64_t len = n;
736
+ while (--len >= 0) {
737
+ x = (1000003*x) ^ *p++;
738
+ }
739
+ x ^= n;
740
+ return x;
741
+ }
742
+
743
+
744
+ bool check_openmp() {
745
+ omp_set_num_threads(10);
746
+
747
+ if (omp_get_max_threads() != 10) {
748
+ return false;
749
+ }
750
+
751
+ std::vector<int> nt_per_thread(10);
752
+ size_t sum = 0;
753
+ bool in_parallel = true;
754
+ #pragma omp parallel reduction(+: sum)
755
+ {
756
+ if (!omp_in_parallel()) {
757
+ in_parallel = false;
758
+ }
759
+
760
+ int nt = omp_get_num_threads();
761
+ int rank = omp_get_thread_num();
762
+
763
+ nt_per_thread[rank] = nt;
764
+ #pragma omp for
765
+ for(int i = 0; i < 1000 * 1000 * 10; i++) {
766
+ sum += i;
767
+ }
768
+ }
769
+
770
+ if (!in_parallel) {
771
+ return false;
772
+ }
773
+ if (nt_per_thread[0] != 10) {
774
+ return false;
775
+ }
776
+ if (sum == 0) {
777
+ return false;
778
+ }
779
+
780
+ return true;
781
+ }
782
+
783
+ } // namespace faiss