faiss 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +9 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +9 -2
  6. data/ext/faiss/index.cpp +1 -1
  7. data/ext/faiss/index_binary.cpp +2 -2
  8. data/ext/faiss/product_quantizer.cpp +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +7 -7
  11. data/vendor/faiss/faiss/AutoTune.h +1 -2
  12. data/vendor/faiss/faiss/Clustering.cpp +39 -22
  13. data/vendor/faiss/faiss/Clustering.h +40 -21
  14. data/vendor/faiss/faiss/IVFlib.cpp +26 -12
  15. data/vendor/faiss/faiss/Index.cpp +1 -1
  16. data/vendor/faiss/faiss/Index.h +40 -10
  17. data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
  20. data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
  21. data/vendor/faiss/faiss/IndexBinary.h +8 -19
  22. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
  23. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
  24. data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
  26. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +107 -188
  27. data/vendor/faiss/faiss/IndexFastScan.cpp +95 -146
  28. data/vendor/faiss/faiss/IndexFastScan.h +9 -8
  29. data/vendor/faiss/faiss/IndexFlat.cpp +206 -10
  30. data/vendor/faiss/faiss/IndexFlat.h +20 -1
  31. data/vendor/faiss/faiss/IndexFlatCodes.cpp +170 -5
  32. data/vendor/faiss/faiss/IndexFlatCodes.h +23 -4
  33. data/vendor/faiss/faiss/IndexHNSW.cpp +231 -382
  34. data/vendor/faiss/faiss/IndexHNSW.h +62 -49
  35. data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
  36. data/vendor/faiss/faiss/IndexIDMap.h +24 -2
  37. data/vendor/faiss/faiss/IndexIVF.cpp +162 -56
  38. data/vendor/faiss/faiss/IndexIVF.h +46 -6
  39. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +33 -26
  40. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +6 -2
  41. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
  43. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +502 -401
  44. data/vendor/faiss/faiss/IndexIVFFastScan.h +63 -26
  45. data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
  46. data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
  47. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
  48. data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
  49. data/vendor/faiss/faiss/IndexIVFPQ.cpp +79 -125
  50. data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
  51. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +39 -52
  52. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
  53. data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
  54. data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
  55. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
  56. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
  57. data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
  58. data/vendor/faiss/faiss/IndexLattice.cpp +1 -19
  59. data/vendor/faiss/faiss/IndexLattice.h +3 -22
  60. data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -33
  61. data/vendor/faiss/faiss/IndexNNDescent.h +1 -1
  62. data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
  63. data/vendor/faiss/faiss/IndexNSG.h +11 -11
  64. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +56 -0
  65. data/vendor/faiss/faiss/IndexNeuralNetCodec.h +49 -0
  66. data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
  67. data/vendor/faiss/faiss/IndexPQ.h +1 -4
  68. data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
  69. data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
  70. data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
  71. data/vendor/faiss/faiss/IndexRefine.cpp +54 -24
  72. data/vendor/faiss/faiss/IndexRefine.h +7 -0
  73. data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
  74. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +25 -17
  75. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
  76. data/vendor/faiss/faiss/IndexShards.cpp +21 -29
  77. data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
  78. data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
  79. data/vendor/faiss/faiss/MatrixStats.h +21 -9
  80. data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
  81. data/vendor/faiss/faiss/MetricType.h +7 -2
  82. data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
  83. data/vendor/faiss/faiss/VectorTransform.h +7 -7
  84. data/vendor/faiss/faiss/clone_index.cpp +15 -10
  85. data/vendor/faiss/faiss/clone_index.h +3 -0
  86. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +95 -17
  87. data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +152 -0
  88. data/vendor/faiss/faiss/cppcontrib/factory_tools.h +24 -0
  89. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +83 -30
  90. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +123 -8
  91. data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
  92. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +13 -0
  93. data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
  94. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -1
  95. data/vendor/faiss/faiss/gpu/GpuIndex.h +30 -12
  96. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +282 -0
  97. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +14 -9
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +20 -3
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
  101. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
  102. data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +142 -17
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
  106. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +26 -21
  107. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +7 -1
  108. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +8 -5
  109. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
  110. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
  111. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +332 -40
  112. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
  113. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
  114. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
  115. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
  116. data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
  117. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +4 -1
  118. data/vendor/faiss/faiss/gpu/utils/Timer.h +1 -1
  119. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
  121. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +26 -1
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +10 -3
  123. data/vendor/faiss/faiss/impl/DistanceComputer.h +70 -1
  124. data/vendor/faiss/faiss/impl/FaissAssert.h +4 -2
  125. data/vendor/faiss/faiss/impl/FaissException.h +13 -34
  126. data/vendor/faiss/faiss/impl/HNSW.cpp +605 -186
  127. data/vendor/faiss/faiss/impl/HNSW.h +52 -30
  128. data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +11 -9
  130. data/vendor/faiss/faiss/impl/LookupTableScaler.h +34 -0
  131. data/vendor/faiss/faiss/impl/NNDescent.cpp +42 -27
  132. data/vendor/faiss/faiss/impl/NSG.cpp +0 -29
  133. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  134. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
  135. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  136. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -22
  137. data/vendor/faiss/faiss/impl/ProductQuantizer.h +6 -2
  138. data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
  139. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
  141. data/vendor/faiss/faiss/impl/ResultHandler.h +347 -172
  142. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1104 -147
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +3 -8
  144. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +285 -42
  145. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +248 -0
  146. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
  147. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
  148. data/vendor/faiss/faiss/impl/index_read.cpp +74 -34
  149. data/vendor/faiss/faiss/impl/index_read_utils.h +37 -0
  150. data/vendor/faiss/faiss/impl/index_write.cpp +88 -51
  151. data/vendor/faiss/faiss/impl/io.cpp +23 -15
  152. data/vendor/faiss/faiss/impl/io.h +4 -4
  153. data/vendor/faiss/faiss/impl/io_macros.h +6 -0
  154. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  155. data/vendor/faiss/faiss/impl/platform_macros.h +40 -1
  156. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +14 -0
  157. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
  158. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
  159. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +487 -49
  160. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
  161. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
  162. data/vendor/faiss/faiss/impl/simd_result_handlers.h +481 -225
  163. data/vendor/faiss/faiss/index_factory.cpp +41 -20
  164. data/vendor/faiss/faiss/index_io.h +12 -5
  165. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +28 -8
  166. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +3 -0
  167. data/vendor/faiss/faiss/invlists/DirectMap.cpp +10 -2
  168. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +73 -17
  169. data/vendor/faiss/faiss/invlists/InvertedLists.h +26 -8
  170. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +24 -9
  171. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +2 -1
  172. data/vendor/faiss/faiss/python/python_callbacks.cpp +4 -4
  173. data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
  174. data/vendor/faiss/faiss/utils/Heap.h +105 -0
  175. data/vendor/faiss/faiss/utils/NeuralNet.cpp +342 -0
  176. data/vendor/faiss/faiss/utils/NeuralNet.h +147 -0
  177. data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
  178. data/vendor/faiss/faiss/utils/bf16.h +36 -0
  179. data/vendor/faiss/faiss/utils/distances.cpp +147 -123
  180. data/vendor/faiss/faiss/utils/distances.h +86 -9
  181. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
  182. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
  183. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
  184. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
  185. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
  186. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
  187. data/vendor/faiss/faiss/utils/distances_simd.cpp +1589 -243
  188. data/vendor/faiss/faiss/utils/extra_distances-inl.h +70 -0
  189. data/vendor/faiss/faiss/utils/extra_distances.cpp +85 -137
  190. data/vendor/faiss/faiss/utils/extra_distances.h +3 -2
  191. data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
  192. data/vendor/faiss/faiss/utils/fp16.h +2 -0
  193. data/vendor/faiss/faiss/utils/hamming.cpp +163 -111
  194. data/vendor/faiss/faiss/utils/hamming.h +58 -0
  195. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
  196. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
  197. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +19 -88
  198. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +58 -0
  199. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
  200. data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
  201. data/vendor/faiss/faiss/utils/prefetch.h +77 -0
  202. data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
  203. data/vendor/faiss/faiss/utils/random.cpp +43 -0
  204. data/vendor/faiss/faiss/utils/random.h +25 -0
  205. data/vendor/faiss/faiss/utils/simdlib.h +10 -1
  206. data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
  207. data/vendor/faiss/faiss/utils/simdlib_avx512.h +296 -0
  208. data/vendor/faiss/faiss/utils/simdlib_neon.h +77 -79
  209. data/vendor/faiss/faiss/utils/simdlib_ppc64.h +1084 -0
  210. data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
  211. data/vendor/faiss/faiss/utils/sorting.h +27 -0
  212. data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +176 -0
  213. data/vendor/faiss/faiss/utils/utils.cpp +120 -7
  214. data/vendor/faiss/faiss/utils/utils.h +60 -20
  215. metadata +23 -4
  216. data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +0 -102
@@ -30,6 +30,7 @@
30
30
  #include <cstdio>
31
31
 
32
32
  #include <limits>
33
+ #include <utility>
33
34
 
34
35
  #include <faiss/utils/ordered_key_value.h>
35
36
 
@@ -200,6 +201,110 @@ inline void maxheap_replace_top(
200
201
  heap_replace_top<CMax<T, int64_t>>(k, bh_val, bh_ids, val, ids);
201
202
  }
202
203
 
204
+ /*******************************************************************
205
+ * Basic heap<std:pair<>> ops: push and pop
206
+ *******************************************************************/
207
+
208
+ // This section contains a heap implementation that works with
209
+ // std::pair<Priority, Value> elements.
210
+
211
+ /** Pops the top element from the heap defined by bh_val[0..k-1] and
212
+ * bh_ids[0..k-1]. on output the element at k-1 is undefined.
213
+ */
214
+ template <class C>
215
+ inline void heap_pop(size_t k, std::pair<typename C::T, typename C::TI>* bh) {
216
+ bh--; /* Use 1-based indexing for easier node->child translation */
217
+ typename C::T val = bh[k].first;
218
+ typename C::TI id = bh[k].second;
219
+ size_t i = 1, i1, i2;
220
+ while (1) {
221
+ i1 = i << 1;
222
+ i2 = i1 + 1;
223
+ if (i1 > k)
224
+ break;
225
+ if ((i2 == k + 1) ||
226
+ C::cmp2(bh[i1].first, bh[i2].first, bh[i1].second, bh[i2].second)) {
227
+ if (C::cmp2(val, bh[i1].first, id, bh[i1].second)) {
228
+ break;
229
+ }
230
+ bh[i] = bh[i1];
231
+ i = i1;
232
+ } else {
233
+ if (C::cmp2(val, bh[i2].first, id, bh[i2].second)) {
234
+ break;
235
+ }
236
+ bh[i] = bh[i2];
237
+ i = i2;
238
+ }
239
+ }
240
+ bh[i] = bh[k];
241
+ }
242
+
243
+ /** Pushes the element (val, ids) into the heap bh_val[0..k-2] and
244
+ * bh_ids[0..k-2]. on output the element at k-1 is defined.
245
+ */
246
+ template <class C>
247
+ inline void heap_push(
248
+ size_t k,
249
+ std::pair<typename C::T, typename C::TI>* bh,
250
+ typename C::T val,
251
+ typename C::TI id) {
252
+ bh--; /* Use 1-based indexing for easier node->child translation */
253
+ size_t i = k, i_father;
254
+ while (i > 1) {
255
+ i_father = i >> 1;
256
+ auto bh_v = bh[i_father];
257
+ if (!C::cmp2(val, bh_v.first, id, bh_v.second)) {
258
+ /* the heap structure is ok */
259
+ break;
260
+ }
261
+ bh[i] = bh_v;
262
+ i = i_father;
263
+ }
264
+ bh[i] = std::make_pair(val, id);
265
+ }
266
+
267
+ /**
268
+ * Replaces the top element from the heap defined by bh_val[0..k-1] and
269
+ * bh_ids[0..k-1], and for identical bh_val[] values also sorts by bh_ids[]
270
+ * values.
271
+ */
272
+ template <class C>
273
+ inline void heap_replace_top(
274
+ size_t k,
275
+ std::pair<typename C::T, typename C::TI>* bh,
276
+ typename C::T val,
277
+ typename C::TI id) {
278
+ bh--; /* Use 1-based indexing for easier node->child translation */
279
+ size_t i = 1, i1, i2;
280
+ while (1) {
281
+ i1 = i << 1;
282
+ i2 = i1 + 1;
283
+ if (i1 > k) {
284
+ break;
285
+ }
286
+
287
+ // Note that C::cmp2() is a bool function answering
288
+ // `(a1 > b1) || ((a1 == b1) && (a2 > b2))` for max
289
+ // heap and same with the `<` sign for min heap.
290
+ if ((i2 == k + 1) ||
291
+ C::cmp2(bh[i1].first, bh[i2].first, bh[i1].second, bh[i2].second)) {
292
+ if (C::cmp2(val, bh[i1].first, id, bh[i1].second)) {
293
+ break;
294
+ }
295
+ bh[i] = bh[i1];
296
+ i = i1;
297
+ } else {
298
+ if (C::cmp2(val, bh[i2].first, id, bh[i2].second)) {
299
+ break;
300
+ }
301
+ bh[i] = bh[i2];
302
+ i = i2;
303
+ }
304
+ }
305
+ bh[i] = std::make_pair(val, id);
306
+ }
307
+
203
308
  /*******************************************************************
204
309
  * Heap initialization
205
310
  *******************************************************************/
@@ -0,0 +1,342 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <faiss/utils/NeuralNet.h>
9
+
10
+ #include <algorithm>
11
+ #include <cstddef>
12
+ #include <cstring>
13
+
14
+ #include <faiss/impl/FaissAssert.h>
15
+ #include <faiss/utils/distances.h>
16
+
17
+ /* declare BLAS functions, see http://www.netlib.org/clapack/cblas/ */
18
+
19
+ extern "C" {
20
+
21
+ int sgemm_(
22
+ const char* transa,
23
+ const char* transb,
24
+ FINTEGER* m,
25
+ FINTEGER* n,
26
+ FINTEGER* k,
27
+ const float* alpha,
28
+ const float* a,
29
+ FINTEGER* lda,
30
+ const float* b,
31
+ FINTEGER* ldb,
32
+ float* beta,
33
+ float* c,
34
+ FINTEGER* ldc);
35
+ }
36
+
37
+ namespace faiss {
38
+
39
+ namespace nn {
40
+
41
+ /*************************************************************
42
+ * Tensor2D implementation
43
+ *************************************************************/
44
+
45
+ template <typename T>
46
+ Tensor2DTemplate<T>::Tensor2DTemplate(size_t n0, size_t n1, const T* data_in)
47
+ : shape{n0, n1}, v(n0 * n1) {
48
+ if (data_in) {
49
+ memcpy(data(), data_in, n0 * n1 * sizeof(T));
50
+ }
51
+ }
52
+
53
+ template <typename T>
54
+ Tensor2DTemplate<T>& Tensor2DTemplate<T>::operator+=(
55
+ const Tensor2DTemplate<T>& other) {
56
+ FAISS_THROW_IF_NOT(shape[0] == other.shape[0]);
57
+ FAISS_THROW_IF_NOT(shape[1] == other.shape[1]);
58
+ for (size_t i = 0; i < numel(); i++) {
59
+ v[i] += other.v[i];
60
+ }
61
+ return *this;
62
+ }
63
+
64
+ template <typename T>
65
+ Tensor2DTemplate<T> Tensor2DTemplate<T>::column(size_t j) const {
66
+ size_t n = shape[0], d = shape[1];
67
+ Tensor2DTemplate<T> out(n, 1);
68
+ for (size_t i = 0; i < n; i++) {
69
+ out.v[i] = v[i * d + j];
70
+ }
71
+ return out;
72
+ }
73
+
74
+ // explicit template instanciation
75
+ template struct Tensor2DTemplate<float>;
76
+ template struct Tensor2DTemplate<int32_t>;
77
+
78
+ /*************************************************************
79
+ * Layers implementation
80
+ *************************************************************/
81
+
82
+ Linear::Linear(size_t in_features, size_t out_features, bool bias)
83
+ : in_features(in_features),
84
+ out_features(out_features),
85
+ weight(in_features * out_features) {
86
+ if (bias) {
87
+ this->bias.resize(out_features);
88
+ }
89
+ }
90
+
91
+ Tensor2D Linear::operator()(const Tensor2D& x) const {
92
+ FAISS_THROW_IF_NOT(x.shape[1] == in_features);
93
+ size_t n = x.shape[0];
94
+ Tensor2D output(n, out_features);
95
+
96
+ float one = 1, zero = 0;
97
+ FINTEGER nbiti = out_features, ni = n, di = in_features;
98
+
99
+ sgemm_("Transposed",
100
+ "Not transposed",
101
+ &nbiti,
102
+ &ni,
103
+ &di,
104
+ &one,
105
+ weight.data(),
106
+ &di,
107
+ x.data(),
108
+ &di,
109
+ &zero,
110
+ output.data(),
111
+ &nbiti);
112
+
113
+ if (bias.size() > 0) {
114
+ FAISS_THROW_IF_NOT(bias.size() == out_features);
115
+ for (size_t i = 0; i < n; i++) {
116
+ for (size_t j = 0; j < out_features; j++) {
117
+ output.v[i * out_features + j] += bias[j];
118
+ }
119
+ }
120
+ }
121
+
122
+ return output;
123
+ }
124
+
125
+ Embedding::Embedding(size_t num_embeddings, size_t embedding_dim)
126
+ : num_embeddings(num_embeddings), embedding_dim(embedding_dim) {
127
+ weight.resize(num_embeddings * embedding_dim);
128
+ }
129
+
130
+ Tensor2D Embedding::operator()(const Int32Tensor2D& code) const {
131
+ FAISS_THROW_IF_NOT(code.shape[1] == 1);
132
+ size_t n = code.shape[0];
133
+ Tensor2D output(n, embedding_dim);
134
+ for (size_t i = 0; i < n; ++i) {
135
+ size_t ci = code.v[i];
136
+ FAISS_THROW_IF_NOT(ci < num_embeddings);
137
+ memcpy(output.data() + i * embedding_dim,
138
+ weight.data() + ci * embedding_dim,
139
+ sizeof(float) * embedding_dim);
140
+ }
141
+ return output; // TODO figure out how std::move works
142
+ }
143
+
144
+ namespace {
145
+
146
+ void inplace_relu(Tensor2D& x) {
147
+ for (size_t i = 0; i < x.numel(); i++) {
148
+ x.v[i] = std::max(0.0f, x.v[i]);
149
+ }
150
+ }
151
+
152
+ Tensor2D concatenate_rows(const Tensor2D& x, const Tensor2D& y) {
153
+ size_t n = x.shape[0], d1 = x.shape[1], d2 = y.shape[1];
154
+ FAISS_THROW_IF_NOT(n == y.shape[0]);
155
+ Tensor2D out(n, d1 + d2);
156
+ for (size_t i = 0; i < n; i++) {
157
+ memcpy(out.data() + i * (d1 + d2),
158
+ x.data() + i * d1,
159
+ sizeof(float) * d1);
160
+ memcpy(out.data() + i * (d1 + d2) + d1,
161
+ y.data() + i * d2,
162
+ sizeof(float) * d2);
163
+ }
164
+ return out;
165
+ }
166
+
167
+ } // anonymous namespace
168
+
169
+ Tensor2D FFN::operator()(const Tensor2D& x_in) const {
170
+ Tensor2D x = linear1(x_in);
171
+ inplace_relu(x);
172
+ return linear2(x);
173
+ }
174
+
175
+ } // namespace nn
176
+
177
+ /*************************************************************
178
+ * QINCoStep implementation
179
+ *************************************************************/
180
+
181
+ using namespace nn;
182
+
183
+ QINCoStep::QINCoStep(int d, int K, int L, int h)
184
+ : d(d), K(K), L(L), h(h), codebook(K, d), MLPconcat(2 * d, d) {
185
+ for (int i = 0; i < L; i++) {
186
+ residual_blocks.emplace_back(d, h);
187
+ }
188
+ }
189
+
190
+ nn::Tensor2D QINCoStep::decode(
191
+ const nn::Tensor2D& xhat,
192
+ const nn::Int32Tensor2D& codes) const {
193
+ size_t n = xhat.shape[0];
194
+ FAISS_THROW_IF_NOT(n == codes.shape[0]);
195
+ Tensor2D zqs = codebook(codes);
196
+ Tensor2D cc = concatenate_rows(zqs, xhat);
197
+ zqs += MLPconcat(cc);
198
+ for (int i = 0; i < L; i++) {
199
+ zqs += residual_blocks[i](zqs);
200
+ }
201
+ return zqs;
202
+ }
203
+
204
+ nn::Int32Tensor2D QINCoStep::encode(
205
+ const nn::Tensor2D& xhat,
206
+ const nn::Tensor2D& x,
207
+ nn::Tensor2D* residuals) const {
208
+ size_t n = xhat.shape[0];
209
+ FAISS_THROW_IF_NOT(
210
+ n == x.shape[0] && xhat.shape[1] == d && x.shape[1] == d);
211
+
212
+ // repeated codebook
213
+ Tensor2D zqs_r(n * K, d); // size n, K, d
214
+ Tensor2D cc(n * K, d * 2); // size n, K, d * 2
215
+ size_t d = this->d;
216
+
217
+ auto copy_row = [d](Tensor2D& t, size_t i, size_t j, const float* data) {
218
+ assert(i <= t.shape[0] && j <= t.shape[1]);
219
+ memcpy(t.data() + i * t.shape[1] + j, data, sizeof(float) * d);
220
+ };
221
+
222
+ // manual broadcasting
223
+ for (size_t i = 0; i < n; i++) {
224
+ for (size_t j = 0; j < K; j++) {
225
+ copy_row(zqs_r, i * K + j, 0, codebook.data() + j * d);
226
+ copy_row(cc, i * K + j, 0, codebook.data() + j * d);
227
+ copy_row(cc, i * K + j, d, xhat.data() + i * d);
228
+ }
229
+ }
230
+
231
+ zqs_r += MLPconcat(cc);
232
+
233
+ // residual blocks
234
+ for (int i = 0; i < L; i++) {
235
+ zqs_r += residual_blocks[i](zqs_r);
236
+ }
237
+
238
+ // add the xhat
239
+ for (size_t i = 0; i < n; i++) {
240
+ float* zqs_r_row = zqs_r.data() + i * K * d;
241
+ const float* xhat_row = xhat.data() + i * d;
242
+ for (size_t l = 0; l < K; l++) {
243
+ for (size_t j = 0; j < d; j++) {
244
+ zqs_r_row[j] += xhat_row[j];
245
+ }
246
+ zqs_r_row += d;
247
+ }
248
+ }
249
+
250
+ // perform assignment, finding the nearest
251
+ nn::Int32Tensor2D codes(n, 1);
252
+ float* res = nullptr;
253
+ if (residuals) {
254
+ FAISS_THROW_IF_NOT(
255
+ residuals->shape[0] == n && residuals->shape[1] == d);
256
+ res = residuals->data();
257
+ }
258
+
259
+ for (size_t i = 0; i < n; i++) {
260
+ const float* q = x.data() + i * d;
261
+ const float* db = zqs_r.data() + i * K * d;
262
+ float dis_min = HUGE_VALF;
263
+ int64_t idx = -1;
264
+ for (size_t j = 0; j < K; j++) {
265
+ float dis = fvec_L2sqr(q, db, d);
266
+ if (dis < dis_min) {
267
+ dis_min = dis;
268
+ idx = j;
269
+ }
270
+ db += d;
271
+ }
272
+ codes.v[i] = idx;
273
+ if (res) {
274
+ const float* xhat_row = xhat.data() + i * d;
275
+ const float* xhat_next_row = zqs_r.data() + (i * K + idx) * d;
276
+ for (size_t j = 0; j < d; j++) {
277
+ res[j] = xhat_next_row[j] - xhat_row[j];
278
+ }
279
+ res += d;
280
+ }
281
+ }
282
+ return codes;
283
+ }
284
+
285
+ /*************************************************************
286
+ * QINCo implementation
287
+ *************************************************************/
288
+
289
+ QINCo::QINCo(int d, int K, int L, int M, int h)
290
+ : NeuralNetCodec(d, M), K(K), L(L), h(h), codebook0(K, d) {
291
+ for (int i = 1; i < M; i++) {
292
+ steps.emplace_back(d, K, L, h);
293
+ }
294
+ }
295
+
296
+ nn::Tensor2D QINCo::decode(const nn::Int32Tensor2D& codes) const {
297
+ FAISS_THROW_IF_NOT(codes.shape[1] == M);
298
+ Tensor2D xhat = codebook0(codes.column(0));
299
+ for (int i = 1; i < M; i++) {
300
+ xhat += steps[i - 1].decode(xhat, codes.column(i));
301
+ }
302
+ return xhat;
303
+ }
304
+
305
+ nn::Int32Tensor2D QINCo::encode(const nn::Tensor2D& x) const {
306
+ FAISS_THROW_IF_NOT(x.shape[1] == d);
307
+ size_t n = x.shape[0];
308
+ Int32Tensor2D codes(n, M);
309
+ Tensor2D xhat(n, d);
310
+ {
311
+ // assign to first codebook as a batch
312
+ std::vector<float> dis(n);
313
+ std::vector<int64_t> codes64(n);
314
+ knn_L2sqr(
315
+ x.data(),
316
+ codebook0.data(),
317
+ d,
318
+ n,
319
+ K,
320
+ 1,
321
+ dis.data(),
322
+ codes64.data());
323
+ for (size_t i = 0; i < n; i++) {
324
+ codes.v[i * M] = codes64[i];
325
+ memcpy(xhat.data() + i * d,
326
+ codebook0.data() + codes64[i] * d,
327
+ sizeof(float) * d);
328
+ }
329
+ }
330
+
331
+ Tensor2D toadd(n, d);
332
+ for (int i = 1; i < M; i++) {
333
+ Int32Tensor2D ci = steps[i - 1].encode(xhat, x, &toadd);
334
+ for (size_t j = 0; j < n; j++) {
335
+ codes.v[j * M + i] = ci.v[j];
336
+ }
337
+ xhat += toadd;
338
+ }
339
+ return codes;
340
+ }
341
+
342
+ } // namespace faiss
@@ -0,0 +1,147 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ /** Implements a few neural net layers, mainly to support QINCo */
9
+
10
+ #pragma once
11
+
12
+ #include <cstdint>
13
+ #include <cstdio>
14
+ #include <vector>
15
+
16
+ namespace faiss {
17
+
18
+ // the names are based on the Pytorch names (more or less)
19
+ namespace nn {
20
+
21
+ // container for intermediate steps of the neural net
22
+ template <typename T>
23
+ struct Tensor2DTemplate {
24
+ size_t shape[2];
25
+ std::vector<T> v;
26
+
27
+ Tensor2DTemplate(size_t n0, size_t n1, const T* data = nullptr);
28
+
29
+ Tensor2DTemplate& operator+=(const Tensor2DTemplate&);
30
+
31
+ /// get column #j as a 1-column Tensor2D
32
+ Tensor2DTemplate column(size_t j) const;
33
+
34
+ size_t numel() const {
35
+ return shape[0] * shape[1];
36
+ }
37
+ T* data() {
38
+ return v.data();
39
+ }
40
+ const T* data() const {
41
+ return v.data();
42
+ }
43
+ };
44
+
45
+ using Tensor2D = Tensor2DTemplate<float>;
46
+ using Int32Tensor2D = Tensor2DTemplate<int32_t>;
47
+
48
+ /// minimal translation of nn.Linear
49
+ struct Linear {
50
+ size_t in_features, out_features;
51
+ std::vector<float> weight;
52
+ std::vector<float> bias;
53
+
54
+ Linear(size_t in_features, size_t out_features, bool bias = true);
55
+
56
+ Tensor2D operator()(const Tensor2D& x) const;
57
+ };
58
+
59
+ /// minimal translation of nn.Embedding
60
+ struct Embedding {
61
+ size_t num_embeddings, embedding_dim;
62
+ std::vector<float> weight;
63
+
64
+ Embedding(size_t num_embeddings, size_t embedding_dim);
65
+
66
+ Tensor2D operator()(const Int32Tensor2D&) const;
67
+
68
+ float* data() {
69
+ return weight.data();
70
+ }
71
+
72
+ const float* data() const {
73
+ return weight.data();
74
+ }
75
+ };
76
+
77
+ /// Feed forward layer that expands to a hidden dimension, applies a ReLU non
78
+ /// linearity and maps back to the orignal dimension
79
+ struct FFN {
80
+ Linear linear1, linear2;
81
+
82
+ FFN(int d, int h) : linear1(d, h, false), linear2(h, d, false) {}
83
+
84
+ Tensor2D operator()(const Tensor2D& x) const;
85
+ };
86
+
87
+ } // namespace nn
88
+
89
+ // Translation of the QINCo implementation from
90
+ // https://github.com/facebookresearch/Qinco/blob/main/model_qinco.py
91
+
92
+ struct QINCoStep {
93
+ /// d: input dim, K: codebook size, L: # of residual blocks, h: hidden dim
94
+ int d, K, L, h;
95
+
96
+ QINCoStep(int d, int K, int L, int h);
97
+
98
+ nn::Embedding codebook;
99
+ nn::Linear MLPconcat;
100
+ std::vector<nn::FFN> residual_blocks;
101
+
102
+ nn::FFN& get_residual_block(int i) {
103
+ return residual_blocks[i];
104
+ }
105
+
106
+ /** encode a set of vectors x with intial estimate xhat. Optionally return
107
+ * the delta to be added to xhat to form the new xhat */
108
+ nn::Int32Tensor2D encode(
109
+ const nn::Tensor2D& xhat,
110
+ const nn::Tensor2D& x,
111
+ nn::Tensor2D* residuals = nullptr) const;
112
+
113
+ nn::Tensor2D decode(
114
+ const nn::Tensor2D& xhat,
115
+ const nn::Int32Tensor2D& codes) const;
116
+ };
117
+
118
+ struct NeuralNetCodec {
119
+ int d, M;
120
+
121
+ NeuralNetCodec(int d, int M) : d(d), M(M) {}
122
+
123
+ virtual nn::Tensor2D decode(const nn::Int32Tensor2D& codes) const = 0;
124
+ virtual nn::Int32Tensor2D encode(const nn::Tensor2D& x) const = 0;
125
+
126
+ virtual ~NeuralNetCodec() {}
127
+ };
128
+
129
+ struct QINCo : NeuralNetCodec {
130
+ int K, L, h;
131
+ nn::Embedding codebook0;
132
+ std::vector<QINCoStep> steps;
133
+
134
+ QINCo(int d, int K, int L, int M, int h);
135
+
136
+ QINCoStep& get_step(int i) {
137
+ return steps[i];
138
+ }
139
+
140
+ nn::Tensor2D decode(const nn::Int32Tensor2D& codes) const override;
141
+
142
+ nn::Int32Tensor2D encode(const nn::Tensor2D& x) const override;
143
+
144
+ virtual ~QINCo() {}
145
+ };
146
+
147
+ } // namespace faiss
@@ -9,6 +9,7 @@
9
9
 
10
10
  #include <condition_variable>
11
11
  #include <deque>
12
+ #include <functional>
12
13
  #include <future>
13
14
  #include <thread>
14
15
 
@@ -0,0 +1,36 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <cstdint>
11
+
12
+ namespace faiss {
13
+
14
+ namespace {
15
+
16
+ union fp32_bits {
17
+ uint32_t as_u32;
18
+ float as_f32;
19
+ };
20
+
21
+ } // namespace
22
+
23
+ inline uint16_t encode_bf16(const float f) {
24
+ // Round off
25
+ fp32_bits fp;
26
+ fp.as_f32 = f;
27
+ return static_cast<uint16_t>((fp.as_u32 + 0x8000) >> 16);
28
+ }
29
+
30
+ inline float decode_bf16(const uint16_t v) {
31
+ fp32_bits fp;
32
+ fp.as_u32 = (uint32_t(v) << 16);
33
+ return fp.as_f32;
34
+ }
35
+
36
+ } // namespace faiss