faiss 0.3.0 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +9 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +9 -2
  6. data/ext/faiss/index.cpp +1 -1
  7. data/ext/faiss/index_binary.cpp +2 -2
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +7 -7
  11. data/vendor/faiss/faiss/AutoTune.h +1 -2
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -22
  13. data/vendor/faiss/faiss/Clustering.h +40 -21
  14. data/vendor/faiss/faiss/IVFlib.cpp +26 -12
  15. data/vendor/faiss/faiss/Index.cpp +1 -1
  16. data/vendor/faiss/faiss/Index.h +40 -10
  17. data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
  20. data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
  21. data/vendor/faiss/faiss/IndexBinary.h +8 -19
  22. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
  23. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
  24. data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
  26. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +107 -188
  27. data/vendor/faiss/faiss/IndexFastScan.cpp +95 -146
  28. data/vendor/faiss/faiss/IndexFastScan.h +9 -8
  29. data/vendor/faiss/faiss/IndexFlat.cpp +206 -10
  30. data/vendor/faiss/faiss/IndexFlat.h +20 -1
  31. data/vendor/faiss/faiss/IndexFlatCodes.cpp +170 -5
  32. data/vendor/faiss/faiss/IndexFlatCodes.h +23 -4
  33. data/vendor/faiss/faiss/IndexHNSW.cpp +231 -382
  34. data/vendor/faiss/faiss/IndexHNSW.h +62 -49
  35. data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
  36. data/vendor/faiss/faiss/IndexIDMap.h +24 -2
  37. data/vendor/faiss/faiss/IndexIVF.cpp +162 -56
  38. data/vendor/faiss/faiss/IndexIVF.h +46 -6
  39. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +33 -26
  40. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +6 -2
  41. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
  43. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +502 -401
  44. data/vendor/faiss/faiss/IndexIVFFastScan.h +63 -26
  45. data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
  46. data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
  47. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
  48. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
  49. data/vendor/faiss/faiss/IndexIVFPQ.cpp +79 -125
  50. data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
  51. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +39 -52
  52. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
  53. data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
  54. data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
  55. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
  56. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
  57. data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
  58. data/vendor/faiss/faiss/IndexLattice.cpp +1 -19
  59. data/vendor/faiss/faiss/IndexLattice.h +3 -22
  60. data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -33
  61. data/vendor/faiss/faiss/IndexNNDescent.h +1 -1
  62. data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
  63. data/vendor/faiss/faiss/IndexNSG.h +11 -11
  64. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +56 -0
  65. data/vendor/faiss/faiss/IndexNeuralNetCodec.h +49 -0
  66. data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
  67. data/vendor/faiss/faiss/IndexPQ.h +1 -4
  68. data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
  69. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
  70. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  71. data/vendor/faiss/faiss/IndexRefine.cpp +54 -24
  72. data/vendor/faiss/faiss/IndexRefine.h +7 -0
  73. data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
  74. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +25 -17
  75. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
  76. data/vendor/faiss/faiss/IndexShards.cpp +21 -29
  77. data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
  78. data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
  79. data/vendor/faiss/faiss/MatrixStats.h +21 -9
  80. data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
  81. data/vendor/faiss/faiss/MetricType.h +7 -2
  82. data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
  83. data/vendor/faiss/faiss/VectorTransform.h +7 -7
  84. data/vendor/faiss/faiss/clone_index.cpp +15 -10
  85. data/vendor/faiss/faiss/clone_index.h +3 -0
  86. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +95 -17
  87. data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +152 -0
  88. data/vendor/faiss/faiss/cppcontrib/factory_tools.h +24 -0
  89. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +83 -30
  90. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +123 -8
  91. data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
  92. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +13 -0
  93. data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
  94. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -1
  95. data/vendor/faiss/faiss/gpu/GpuIndex.h +30 -12
  96. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +282 -0
  97. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +14 -9
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +20 -3
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
  101. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
  102. data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +142 -17
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
  106. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +26 -21
  107. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +7 -1
  108. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +8 -5
  109. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
  110. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
  111. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +332 -40
  112. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
  113. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
  114. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
  115. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
  116. data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
  117. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +4 -1
  118. data/vendor/faiss/faiss/gpu/utils/Timer.h +1 -1
  119. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
  121. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +26 -1
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +10 -3
  123. data/vendor/faiss/faiss/impl/DistanceComputer.h +70 -1
  124. data/vendor/faiss/faiss/impl/FaissAssert.h +4 -2
  125. data/vendor/faiss/faiss/impl/FaissException.h +13 -34
  126. data/vendor/faiss/faiss/impl/HNSW.cpp +605 -186
  127. data/vendor/faiss/faiss/impl/HNSW.h +52 -30
  128. data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +11 -9
  130. data/vendor/faiss/faiss/impl/LookupTableScaler.h +34 -0
  131. data/vendor/faiss/faiss/impl/NNDescent.cpp +42 -27
  132. data/vendor/faiss/faiss/impl/NSG.cpp +0 -29
  133. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  134. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
  135. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  136. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -22
  137. data/vendor/faiss/faiss/impl/ProductQuantizer.h +6 -2
  138. data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
  139. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
  141. data/vendor/faiss/faiss/impl/ResultHandler.h +347 -172
  142. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1104 -147
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +3 -8
  144. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +285 -42
  145. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +248 -0
  146. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
  147. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
  148. data/vendor/faiss/faiss/impl/index_read.cpp +74 -34
  149. data/vendor/faiss/faiss/impl/index_read_utils.h +37 -0
  150. data/vendor/faiss/faiss/impl/index_write.cpp +88 -51
  151. data/vendor/faiss/faiss/impl/io.cpp +23 -15
  152. data/vendor/faiss/faiss/impl/io.h +4 -4
  153. data/vendor/faiss/faiss/impl/io_macros.h +6 -0
  154. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  155. data/vendor/faiss/faiss/impl/platform_macros.h +40 -1
  156. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +14 -0
  157. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
  158. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
  159. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +487 -49
  160. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
  161. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
  162. data/vendor/faiss/faiss/impl/simd_result_handlers.h +481 -225
  163. data/vendor/faiss/faiss/index_factory.cpp +41 -20
  164. data/vendor/faiss/faiss/index_io.h +12 -5
  165. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +28 -8
  166. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +3 -0
  167. data/vendor/faiss/faiss/invlists/DirectMap.cpp +10 -2
  168. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +73 -17
  169. data/vendor/faiss/faiss/invlists/InvertedLists.h +26 -8
  170. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +24 -9
  171. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +2 -1
  172. data/vendor/faiss/faiss/python/python_callbacks.cpp +4 -4
  173. data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
  174. data/vendor/faiss/faiss/utils/Heap.h +105 -0
  175. data/vendor/faiss/faiss/utils/NeuralNet.cpp +342 -0
  176. data/vendor/faiss/faiss/utils/NeuralNet.h +147 -0
  177. data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
  178. data/vendor/faiss/faiss/utils/bf16.h +36 -0
  179. data/vendor/faiss/faiss/utils/distances.cpp +147 -123
  180. data/vendor/faiss/faiss/utils/distances.h +86 -9
  181. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
  182. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
  183. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
  184. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
  185. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
  186. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
  187. data/vendor/faiss/faiss/utils/distances_simd.cpp +1589 -243
  188. data/vendor/faiss/faiss/utils/extra_distances-inl.h +70 -0
  189. data/vendor/faiss/faiss/utils/extra_distances.cpp +85 -137
  190. data/vendor/faiss/faiss/utils/extra_distances.h +3 -2
  191. data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
  192. data/vendor/faiss/faiss/utils/fp16.h +2 -0
  193. data/vendor/faiss/faiss/utils/hamming.cpp +163 -111
  194. data/vendor/faiss/faiss/utils/hamming.h +58 -0
  195. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
  196. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
  197. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +19 -88
  198. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +58 -0
  199. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
  200. data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
  201. data/vendor/faiss/faiss/utils/prefetch.h +77 -0
  202. data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
  203. data/vendor/faiss/faiss/utils/random.cpp +43 -0
  204. data/vendor/faiss/faiss/utils/random.h +25 -0
  205. data/vendor/faiss/faiss/utils/simdlib.h +10 -1
  206. data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
  207. data/vendor/faiss/faiss/utils/simdlib_avx512.h +296 -0
  208. data/vendor/faiss/faiss/utils/simdlib_neon.h +77 -79
  209. data/vendor/faiss/faiss/utils/simdlib_ppc64.h +1084 -0
  210. data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
  211. data/vendor/faiss/faiss/utils/sorting.h +27 -0
  212. data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +176 -0
  213. data/vendor/faiss/faiss/utils/utils.cpp +120 -7
  214. data/vendor/faiss/faiss/utils/utils.h +60 -20
  215. metadata +23 -4
  216. data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +0 -102
@@ -30,6 +30,7 @@
30
30
  #include <cstdio>
31
31
 
32
32
  #include <limits>
33
+ #include <utility>
33
34
 
34
35
  #include <faiss/utils/ordered_key_value.h>
35
36
 
@@ -200,6 +201,110 @@ inline void maxheap_replace_top(
200
201
  heap_replace_top<CMax<T, int64_t>>(k, bh_val, bh_ids, val, ids);
201
202
  }
202
203
 
204
+ /*******************************************************************
205
+ * Basic heap<std:pair<>> ops: push and pop
206
+ *******************************************************************/
207
+
208
+ // This section contains a heap implementation that works with
209
+ // std::pair<Priority, Value> elements.
210
+
211
+ /** Pops the top element from the heap defined by bh_val[0..k-1] and
212
+ * bh_ids[0..k-1]. on output the element at k-1 is undefined.
213
+ */
214
+ template <class C>
215
+ inline void heap_pop(size_t k, std::pair<typename C::T, typename C::TI>* bh) {
216
+ bh--; /* Use 1-based indexing for easier node->child translation */
217
+ typename C::T val = bh[k].first;
218
+ typename C::TI id = bh[k].second;
219
+ size_t i = 1, i1, i2;
220
+ while (1) {
221
+ i1 = i << 1;
222
+ i2 = i1 + 1;
223
+ if (i1 > k)
224
+ break;
225
+ if ((i2 == k + 1) ||
226
+ C::cmp2(bh[i1].first, bh[i2].first, bh[i1].second, bh[i2].second)) {
227
+ if (C::cmp2(val, bh[i1].first, id, bh[i1].second)) {
228
+ break;
229
+ }
230
+ bh[i] = bh[i1];
231
+ i = i1;
232
+ } else {
233
+ if (C::cmp2(val, bh[i2].first, id, bh[i2].second)) {
234
+ break;
235
+ }
236
+ bh[i] = bh[i2];
237
+ i = i2;
238
+ }
239
+ }
240
+ bh[i] = bh[k];
241
+ }
242
+
243
+ /** Pushes the element (val, ids) into the heap bh_val[0..k-2] and
244
+ * bh_ids[0..k-2]. on output the element at k-1 is defined.
245
+ */
246
+ template <class C>
247
+ inline void heap_push(
248
+ size_t k,
249
+ std::pair<typename C::T, typename C::TI>* bh,
250
+ typename C::T val,
251
+ typename C::TI id) {
252
+ bh--; /* Use 1-based indexing for easier node->child translation */
253
+ size_t i = k, i_father;
254
+ while (i > 1) {
255
+ i_father = i >> 1;
256
+ auto bh_v = bh[i_father];
257
+ if (!C::cmp2(val, bh_v.first, id, bh_v.second)) {
258
+ /* the heap structure is ok */
259
+ break;
260
+ }
261
+ bh[i] = bh_v;
262
+ i = i_father;
263
+ }
264
+ bh[i] = std::make_pair(val, id);
265
+ }
266
+
267
+ /**
268
+ * Replaces the top element from the heap defined by bh_val[0..k-1] and
269
+ * bh_ids[0..k-1], and for identical bh_val[] values also sorts by bh_ids[]
270
+ * values.
271
+ */
272
+ template <class C>
273
+ inline void heap_replace_top(
274
+ size_t k,
275
+ std::pair<typename C::T, typename C::TI>* bh,
276
+ typename C::T val,
277
+ typename C::TI id) {
278
+ bh--; /* Use 1-based indexing for easier node->child translation */
279
+ size_t i = 1, i1, i2;
280
+ while (1) {
281
+ i1 = i << 1;
282
+ i2 = i1 + 1;
283
+ if (i1 > k) {
284
+ break;
285
+ }
286
+
287
+ // Note that C::cmp2() is a bool function answering
288
+ // `(a1 > b1) || ((a1 == b1) && (a2 > b2))` for max
289
+ // heap and same with the `<` sign for min heap.
290
+ if ((i2 == k + 1) ||
291
+ C::cmp2(bh[i1].first, bh[i2].first, bh[i1].second, bh[i2].second)) {
292
+ if (C::cmp2(val, bh[i1].first, id, bh[i1].second)) {
293
+ break;
294
+ }
295
+ bh[i] = bh[i1];
296
+ i = i1;
297
+ } else {
298
+ if (C::cmp2(val, bh[i2].first, id, bh[i2].second)) {
299
+ break;
300
+ }
301
+ bh[i] = bh[i2];
302
+ i = i2;
303
+ }
304
+ }
305
+ bh[i] = std::make_pair(val, id);
306
+ }
307
+
203
308
  /*******************************************************************
204
309
  * Heap initialization
205
310
  *******************************************************************/
@@ -0,0 +1,342 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <faiss/utils/NeuralNet.h>
9
+
10
+ #include <algorithm>
11
+ #include <cstddef>
12
+ #include <cstring>
13
+
14
+ #include <faiss/impl/FaissAssert.h>
15
+ #include <faiss/utils/distances.h>
16
+
17
+ /* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
18
+
19
+ extern "C" {
20
+
21
+ int sgemm_(
22
+ const char* transa,
23
+ const char* transb,
24
+ FINTEGER* m,
25
+ FINTEGER* n,
26
+ FINTEGER* k,
27
+ const float* alpha,
28
+ const float* a,
29
+ FINTEGER* lda,
30
+ const float* b,
31
+ FINTEGER* ldb,
32
+ float* beta,
33
+ float* c,
34
+ FINTEGER* ldc);
35
+ }
36
+
37
+ namespace faiss {
38
+
39
+ namespace nn {
40
+
41
+ /*************************************************************
42
+ * Tensor2D implementation
43
+ *************************************************************/
44
+
45
+ template <typename T>
46
+ Tensor2DTemplate<T>::Tensor2DTemplate(size_t n0, size_t n1, const T* data_in)
47
+ : shape{n0, n1}, v(n0 * n1) {
48
+ if (data_in) {
49
+ memcpy(data(), data_in, n0 * n1 * sizeof(T));
50
+ }
51
+ }
52
+
53
+ template <typename T>
54
+ Tensor2DTemplate<T>& Tensor2DTemplate<T>::operator+=(
55
+ const Tensor2DTemplate<T>& other) {
56
+ FAISS_THROW_IF_NOT(shape[0] == other.shape[0]);
57
+ FAISS_THROW_IF_NOT(shape[1] == other.shape[1]);
58
+ for (size_t i = 0; i < numel(); i++) {
59
+ v[i] += other.v[i];
60
+ }
61
+ return *this;
62
+ }
63
+
64
+ template <typename T>
65
+ Tensor2DTemplate<T> Tensor2DTemplate<T>::column(size_t j) const {
66
+ size_t n = shape[0], d = shape[1];
67
+ Tensor2DTemplate<T> out(n, 1);
68
+ for (size_t i = 0; i < n; i++) {
69
+ out.v[i] = v[i * d + j];
70
+ }
71
+ return out;
72
+ }
73
+
74
+ // explicit template instanciation
75
+ template struct Tensor2DTemplate<float>;
76
+ template struct Tensor2DTemplate<int32_t>;
77
+
78
+ /*************************************************************
79
+ * Layers implementation
80
+ *************************************************************/
81
+
82
+ Linear::Linear(size_t in_features, size_t out_features, bool bias)
83
+ : in_features(in_features),
84
+ out_features(out_features),
85
+ weight(in_features * out_features) {
86
+ if (bias) {
87
+ this->bias.resize(out_features);
88
+ }
89
+ }
90
+
91
+ Tensor2D Linear::operator()(const Tensor2D& x) const {
92
+ FAISS_THROW_IF_NOT(x.shape[1] == in_features);
93
+ size_t n = x.shape[0];
94
+ Tensor2D output(n, out_features);
95
+
96
+ float one = 1, zero = 0;
97
+ FINTEGER nbiti = out_features, ni = n, di = in_features;
98
+
99
+ sgemm_("Transposed",
100
+ "Not transposed",
101
+ &nbiti,
102
+ &ni,
103
+ &di,
104
+ &one,
105
+ weight.data(),
106
+ &di,
107
+ x.data(),
108
+ &di,
109
+ &zero,
110
+ output.data(),
111
+ &nbiti);
112
+
113
+ if (bias.size() > 0) {
114
+ FAISS_THROW_IF_NOT(bias.size() == out_features);
115
+ for (size_t i = 0; i < n; i++) {
116
+ for (size_t j = 0; j < out_features; j++) {
117
+ output.v[i * out_features + j] += bias[j];
118
+ }
119
+ }
120
+ }
121
+
122
+ return output;
123
+ }
124
+
125
+ Embedding::Embedding(size_t num_embeddings, size_t embedding_dim)
126
+ : num_embeddings(num_embeddings), embedding_dim(embedding_dim) {
127
+ weight.resize(num_embeddings * embedding_dim);
128
+ }
129
+
130
+ Tensor2D Embedding::operator()(const Int32Tensor2D& code) const {
131
+ FAISS_THROW_IF_NOT(code.shape[1] == 1);
132
+ size_t n = code.shape[0];
133
+ Tensor2D output(n, embedding_dim);
134
+ for (size_t i = 0; i < n; ++i) {
135
+ size_t ci = code.v[i];
136
+ FAISS_THROW_IF_NOT(ci < num_embeddings);
137
+ memcpy(output.data() + i * embedding_dim,
138
+ weight.data() + ci * embedding_dim,
139
+ sizeof(float) * embedding_dim);
140
+ }
141
+ return output; // TODO figure out how std::move works
142
+ }
143
+
144
+ namespace {
145
+
146
+ void inplace_relu(Tensor2D& x) {
147
+ for (size_t i = 0; i < x.numel(); i++) {
148
+ x.v[i] = std::max(0.0f, x.v[i]);
149
+ }
150
+ }
151
+
152
+ Tensor2D concatenate_rows(const Tensor2D& x, const Tensor2D& y) {
153
+ size_t n = x.shape[0], d1 = x.shape[1], d2 = y.shape[1];
154
+ FAISS_THROW_IF_NOT(n == y.shape[0]);
155
+ Tensor2D out(n, d1 + d2);
156
+ for (size_t i = 0; i < n; i++) {
157
+ memcpy(out.data() + i * (d1 + d2),
158
+ x.data() + i * d1,
159
+ sizeof(float) * d1);
160
+ memcpy(out.data() + i * (d1 + d2) + d1,
161
+ y.data() + i * d2,
162
+ sizeof(float) * d2);
163
+ }
164
+ return out;
165
+ }
166
+
167
+ } // anonymous namespace
168
+
169
+ Tensor2D FFN::operator()(const Tensor2D& x_in) const {
170
+ Tensor2D x = linear1(x_in);
171
+ inplace_relu(x);
172
+ return linear2(x);
173
+ }
174
+
175
+ } // namespace nn
176
+
177
+ /*************************************************************
178
+ * QINCoStep implementation
179
+ *************************************************************/
180
+
181
+ using namespace nn;
182
+
183
+ QINCoStep::QINCoStep(int d, int K, int L, int h)
184
+ : d(d), K(K), L(L), h(h), codebook(K, d), MLPconcat(2 * d, d) {
185
+ for (int i = 0; i < L; i++) {
186
+ residual_blocks.emplace_back(d, h);
187
+ }
188
+ }
189
+
190
+ nn::Tensor2D QINCoStep::decode(
191
+ const nn::Tensor2D& xhat,
192
+ const nn::Int32Tensor2D& codes) const {
193
+ size_t n = xhat.shape[0];
194
+ FAISS_THROW_IF_NOT(n == codes.shape[0]);
195
+ Tensor2D zqs = codebook(codes);
196
+ Tensor2D cc = concatenate_rows(zqs, xhat);
197
+ zqs += MLPconcat(cc);
198
+ for (int i = 0; i < L; i++) {
199
+ zqs += residual_blocks[i](zqs);
200
+ }
201
+ return zqs;
202
+ }
203
+
204
+ nn::Int32Tensor2D QINCoStep::encode(
205
+ const nn::Tensor2D& xhat,
206
+ const nn::Tensor2D& x,
207
+ nn::Tensor2D* residuals) const {
208
+ size_t n = xhat.shape[0];
209
+ FAISS_THROW_IF_NOT(
210
+ n == x.shape[0] && xhat.shape[1] == d && x.shape[1] == d);
211
+
212
+ // repeated codebook
213
+ Tensor2D zqs_r(n * K, d); // size n, K, d
214
+ Tensor2D cc(n * K, d * 2); // size n, K, d * 2
215
+ size_t d = this->d;
216
+
217
+ auto copy_row = [d](Tensor2D& t, size_t i, size_t j, const float* data) {
218
+ assert(i <= t.shape[0] && j <= t.shape[1]);
219
+ memcpy(t.data() + i * t.shape[1] + j, data, sizeof(float) * d);
220
+ };
221
+
222
+ // manual broadcasting
223
+ for (size_t i = 0; i < n; i++) {
224
+ for (size_t j = 0; j < K; j++) {
225
+ copy_row(zqs_r, i * K + j, 0, codebook.data() + j * d);
226
+ copy_row(cc, i * K + j, 0, codebook.data() + j * d);
227
+ copy_row(cc, i * K + j, d, xhat.data() + i * d);
228
+ }
229
+ }
230
+
231
+ zqs_r += MLPconcat(cc);
232
+
233
+ // residual blocks
234
+ for (int i = 0; i < L; i++) {
235
+ zqs_r += residual_blocks[i](zqs_r);
236
+ }
237
+
238
+ // add the xhat
239
+ for (size_t i = 0; i < n; i++) {
240
+ float* zqs_r_row = zqs_r.data() + i * K * d;
241
+ const float* xhat_row = xhat.data() + i * d;
242
+ for (size_t l = 0; l < K; l++) {
243
+ for (size_t j = 0; j < d; j++) {
244
+ zqs_r_row[j] += xhat_row[j];
245
+ }
246
+ zqs_r_row += d;
247
+ }
248
+ }
249
+
250
+ // perform assignment, finding the nearest
251
+ nn::Int32Tensor2D codes(n, 1);
252
+ float* res = nullptr;
253
+ if (residuals) {
254
+ FAISS_THROW_IF_NOT(
255
+ residuals->shape[0] == n && residuals->shape[1] == d);
256
+ res = residuals->data();
257
+ }
258
+
259
+ for (size_t i = 0; i < n; i++) {
260
+ const float* q = x.data() + i * d;
261
+ const float* db = zqs_r.data() + i * K * d;
262
+ float dis_min = HUGE_VALF;
263
+ int64_t idx = -1;
264
+ for (size_t j = 0; j < K; j++) {
265
+ float dis = fvec_L2sqr(q, db, d);
266
+ if (dis < dis_min) {
267
+ dis_min = dis;
268
+ idx = j;
269
+ }
270
+ db += d;
271
+ }
272
+ codes.v[i] = idx;
273
+ if (res) {
274
+ const float* xhat_row = xhat.data() + i * d;
275
+ const float* xhat_next_row = zqs_r.data() + (i * K + idx) * d;
276
+ for (size_t j = 0; j < d; j++) {
277
+ res[j] = xhat_next_row[j] - xhat_row[j];
278
+ }
279
+ res += d;
280
+ }
281
+ }
282
+ return codes;
283
+ }
284
+
285
+ /*************************************************************
286
+ * QINCo implementation
287
+ *************************************************************/
288
+
289
+ QINCo::QINCo(int d, int K, int L, int M, int h)
290
+ : NeuralNetCodec(d, M), K(K), L(L), h(h), codebook0(K, d) {
291
+ for (int i = 1; i < M; i++) {
292
+ steps.emplace_back(d, K, L, h);
293
+ }
294
+ }
295
+
296
+ nn::Tensor2D QINCo::decode(const nn::Int32Tensor2D& codes) const {
297
+ FAISS_THROW_IF_NOT(codes.shape[1] == M);
298
+ Tensor2D xhat = codebook0(codes.column(0));
299
+ for (int i = 1; i < M; i++) {
300
+ xhat += steps[i - 1].decode(xhat, codes.column(i));
301
+ }
302
+ return xhat;
303
+ }
304
+
305
+ nn::Int32Tensor2D QINCo::encode(const nn::Tensor2D& x) const {
306
+ FAISS_THROW_IF_NOT(x.shape[1] == d);
307
+ size_t n = x.shape[0];
308
+ Int32Tensor2D codes(n, M);
309
+ Tensor2D xhat(n, d);
310
+ {
311
+ // assign to first codebook as a batch
312
+ std::vector<float> dis(n);
313
+ std::vector<int64_t> codes64(n);
314
+ knn_L2sqr(
315
+ x.data(),
316
+ codebook0.data(),
317
+ d,
318
+ n,
319
+ K,
320
+ 1,
321
+ dis.data(),
322
+ codes64.data());
323
+ for (size_t i = 0; i < n; i++) {
324
+ codes.v[i * M] = codes64[i];
325
+ memcpy(xhat.data() + i * d,
326
+ codebook0.data() + codes64[i] * d,
327
+ sizeof(float) * d);
328
+ }
329
+ }
330
+
331
+ Tensor2D toadd(n, d);
332
+ for (int i = 1; i < M; i++) {
333
+ Int32Tensor2D ci = steps[i - 1].encode(xhat, x, &toadd);
334
+ for (size_t j = 0; j < n; j++) {
335
+ codes.v[j * M + i] = ci.v[j];
336
+ }
337
+ xhat += toadd;
338
+ }
339
+ return codes;
340
+ }
341
+
342
+ } // namespace faiss
@@ -0,0 +1,147 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ /** Implements a few neural net layers, mainly to support QINCo */
9
+
10
+ #pragma once
11
+
12
+ #include <cstdint>
13
+ #include <cstdio>
14
+ #include <vector>
15
+
16
+ namespace faiss {
17
+
18
+ // the names are based on the Pytorch names (more or less)
19
+ namespace nn {
20
+
21
+ // container for intermediate steps of the neural net
22
+ template <typename T>
23
+ struct Tensor2DTemplate {
24
+ size_t shape[2];
25
+ std::vector<T> v;
26
+
27
+ Tensor2DTemplate(size_t n0, size_t n1, const T* data = nullptr);
28
+
29
+ Tensor2DTemplate& operator+=(const Tensor2DTemplate&);
30
+
31
+ /// get column #j as a 1-column Tensor2D
32
+ Tensor2DTemplate column(size_t j) const;
33
+
34
+ size_t numel() const {
35
+ return shape[0] * shape[1];
36
+ }
37
+ T* data() {
38
+ return v.data();
39
+ }
40
+ const T* data() const {
41
+ return v.data();
42
+ }
43
+ };
44
+
45
+ using Tensor2D = Tensor2DTemplate<float>;
46
+ using Int32Tensor2D = Tensor2DTemplate<int32_t>;
47
+
48
+ /// minimal translation of nn.Linear
49
+ struct Linear {
50
+ size_t in_features, out_features;
51
+ std::vector<float> weight;
52
+ std::vector<float> bias;
53
+
54
+ Linear(size_t in_features, size_t out_features, bool bias = true);
55
+
56
+ Tensor2D operator()(const Tensor2D& x) const;
57
+ };
58
+
59
+ /// minimal translation of nn.Embedding
60
+ struct Embedding {
61
+ size_t num_embeddings, embedding_dim;
62
+ std::vector<float> weight;
63
+
64
+ Embedding(size_t num_embeddings, size_t embedding_dim);
65
+
66
+ Tensor2D operator()(const Int32Tensor2D&) const;
67
+
68
+ float* data() {
69
+ return weight.data();
70
+ }
71
+
72
+ const float* data() const {
73
+ return weight.data();
74
+ }
75
+ };
76
+
77
+ /// Feed forward layer that expands to a hidden dimension, applies a ReLU non
78
+ /// linearity and maps back to the orignal dimension
79
+ struct FFN {
80
+ Linear linear1, linear2;
81
+
82
+ FFN(int d, int h) : linear1(d, h, false), linear2(h, d, false) {}
83
+
84
+ Tensor2D operator()(const Tensor2D& x) const;
85
+ };
86
+
87
+ } // namespace nn
88
+
89
+ // Translation of the QINCo implementation from
90
+ // https://github.com/facebookresearch/Qinco/blob/main/model_qinco.py
91
+
92
+ struct QINCoStep {
93
+ /// d: input dim, K: codebook size, L: # of residual blocks, h: hidden dim
94
+ int d, K, L, h;
95
+
96
+ QINCoStep(int d, int K, int L, int h);
97
+
98
+ nn::Embedding codebook;
99
+ nn::Linear MLPconcat;
100
+ std::vector<nn::FFN> residual_blocks;
101
+
102
+ nn::FFN& get_residual_block(int i) {
103
+ return residual_blocks[i];
104
+ }
105
+
106
+ /** encode a set of vectors x with intial estimate xhat. Optionally return
107
+ * the delta to be added to xhat to form the new xhat */
108
+ nn::Int32Tensor2D encode(
109
+ const nn::Tensor2D& xhat,
110
+ const nn::Tensor2D& x,
111
+ nn::Tensor2D* residuals = nullptr) const;
112
+
113
+ nn::Tensor2D decode(
114
+ const nn::Tensor2D& xhat,
115
+ const nn::Int32Tensor2D& codes) const;
116
+ };
117
+
118
+ struct NeuralNetCodec {
119
+ int d, M;
120
+
121
+ NeuralNetCodec(int d, int M) : d(d), M(M) {}
122
+
123
+ virtual nn::Tensor2D decode(const nn::Int32Tensor2D& codes) const = 0;
124
+ virtual nn::Int32Tensor2D encode(const nn::Tensor2D& x) const = 0;
125
+
126
+ virtual ~NeuralNetCodec() {}
127
+ };
128
+
129
+ struct QINCo : NeuralNetCodec {
130
+ int K, L, h;
131
+ nn::Embedding codebook0;
132
+ std::vector<QINCoStep> steps;
133
+
134
+ QINCo(int d, int K, int L, int M, int h);
135
+
136
+ QINCoStep& get_step(int i) {
137
+ return steps[i];
138
+ }
139
+
140
+ nn::Tensor2D decode(const nn::Int32Tensor2D& codes) const override;
141
+
142
+ nn::Int32Tensor2D encode(const nn::Tensor2D& x) const override;
143
+
144
+ virtual ~QINCo() {}
145
+ };
146
+
147
+ } // namespace faiss
@@ -9,6 +9,7 @@
9
9
 
10
10
  #include <condition_variable>
11
11
  #include <deque>
12
+ #include <functional>
12
13
  #include <future>
13
14
  #include <thread>
14
15
 
@@ -0,0 +1,36 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <cstdint>
11
+
12
+ namespace faiss {
13
+
14
+ namespace {
15
+
16
+ union fp32_bits {
17
+ uint32_t as_u32;
18
+ float as_f32;
19
+ };
20
+
21
+ } // namespace
22
+
23
+ inline uint16_t encode_bf16(const float f) {
24
+ // Round off
25
+ fp32_bits fp;
26
+ fp.as_f32 = f;
27
+ return static_cast<uint16_t>((fp.as_u32 + 0x8000) >> 16);
28
+ }
29
+
30
+ inline float decode_bf16(const uint16_t v) {
31
+ fp32_bits fp;
32
+ fp.as_u32 = (uint32_t(v) << 16);
33
+ return fp.as_f32;
34
+ }
35
+
36
+ } // namespace faiss