faiss 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +16 -4
  5. data/ext/faiss/ext.cpp +12 -308
  6. data/ext/faiss/extconf.rb +6 -3
  7. data/ext/faiss/index.cpp +189 -0
  8. data/ext/faiss/index_binary.cpp +75 -0
  9. data/ext/faiss/kmeans.cpp +40 -0
  10. data/ext/faiss/numo.hpp +867 -0
  11. data/ext/faiss/pca_matrix.cpp +33 -0
  12. data/ext/faiss/product_quantizer.cpp +53 -0
  13. data/ext/faiss/utils.cpp +13 -0
  14. data/ext/faiss/utils.h +5 -0
  15. data/lib/faiss.rb +0 -5
  16. data/lib/faiss/version.rb +1 -1
  17. data/vendor/faiss/faiss/AutoTune.cpp +36 -33
  18. data/vendor/faiss/faiss/AutoTune.h +6 -3
  19. data/vendor/faiss/faiss/Clustering.cpp +16 -12
  20. data/vendor/faiss/faiss/Index.cpp +3 -4
  21. data/vendor/faiss/faiss/Index.h +3 -3
  22. data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
  23. data/vendor/faiss/faiss/IndexBinary.h +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
  25. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
  26. data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
  27. data/vendor/faiss/faiss/IndexFlat.h +0 -51
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
  29. data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
  30. data/vendor/faiss/faiss/IndexIVF.h +22 -15
  31. data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
  32. data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
  33. data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
  34. data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
  35. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
  36. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
  37. data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
  38. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
  39. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
  40. data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
  41. data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
  42. data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
  43. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
  44. data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
  45. data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
  46. data/vendor/faiss/faiss/IndexRefine.h +73 -0
  47. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
  48. data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
  49. data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
  50. data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
  51. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
  52. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
  53. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
  54. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
  55. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
  56. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
  57. data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
  58. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
  59. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
  60. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
  61. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
  62. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
  63. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
  64. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
  65. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
  66. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
  67. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
  68. data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
  69. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
  70. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
  71. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
  72. data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
  73. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
  74. data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
  75. data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
  76. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
  77. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
  78. data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
  79. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
  80. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
  81. data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
  82. data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
  83. data/vendor/faiss/faiss/impl/io.cpp +33 -2
  84. data/vendor/faiss/faiss/impl/io.h +7 -2
  85. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
  86. data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
  87. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
  88. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
  89. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
  90. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
  91. data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
  92. data/vendor/faiss/faiss/index_factory.cpp +112 -7
  93. data/vendor/faiss/faiss/index_io.h +1 -48
  94. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
  95. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
  96. data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
  97. data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
  98. data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
  99. data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
  100. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
  101. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
  102. data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
  103. data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
  104. data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
  105. data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
  106. data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
  107. data/vendor/faiss/faiss/utils/Heap.h +61 -50
  108. data/vendor/faiss/faiss/utils/distances.cpp +164 -319
  109. data/vendor/faiss/faiss/utils/distances.h +28 -20
  110. data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
  111. data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
  112. data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
  113. data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
  114. data/vendor/faiss/faiss/utils/hamming.h +2 -7
  115. data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
  116. data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
  117. data/vendor/faiss/faiss/utils/partitioning.h +69 -0
  118. data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
  119. data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
  120. data/vendor/faiss/faiss/utils/simdlib.h +31 -0
  121. data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
  122. data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
  123. metadata +54 -149
  124. data/lib/faiss/index.rb +0 -20
  125. data/lib/faiss/index_binary.rb +0 -20
  126. data/lib/faiss/kmeans.rb +0 -15
  127. data/lib/faiss/pca_matrix.rb +0 -15
  128. data/lib/faiss/product_quantizer.rb +0 -22
  129. data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
  130. data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
  131. data/vendor/faiss/c_api/AutoTune_c.h +0 -66
  132. data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
  133. data/vendor/faiss/c_api/Clustering_c.h +0 -123
  134. data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
  135. data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
  136. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
  137. data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
  138. data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
  139. data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
  140. data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
  141. data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
  142. data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
  143. data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
  144. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
  145. data/vendor/faiss/c_api/IndexShards_c.h +0 -39
  146. data/vendor/faiss/c_api/Index_c.cpp +0 -105
  147. data/vendor/faiss/c_api/Index_c.h +0 -183
  148. data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
  149. data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
  150. data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
  151. data/vendor/faiss/c_api/clone_index_c.h +0 -32
  152. data/vendor/faiss/c_api/error_c.h +0 -42
  153. data/vendor/faiss/c_api/error_impl.cpp +0 -27
  154. data/vendor/faiss/c_api/error_impl.h +0 -16
  155. data/vendor/faiss/c_api/faiss_c.h +0 -58
  156. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
  157. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
  158. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
  159. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
  160. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
  161. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
  162. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
  163. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
  164. data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
  165. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
  166. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
  167. data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
  168. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
  169. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
  170. data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
  171. data/vendor/faiss/c_api/index_factory_c.h +0 -30
  172. data/vendor/faiss/c_api/index_io_c.cpp +0 -42
  173. data/vendor/faiss/c_api/index_io_c.h +0 -50
  174. data/vendor/faiss/c_api/macros_impl.h +0 -110
  175. data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
  176. data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
  177. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
  178. data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
  179. data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
  180. data/vendor/faiss/misc/test_blas.cpp +0 -87
  181. data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
  182. data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
  183. data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
  184. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
  185. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
  186. data/vendor/faiss/tests/test_merge.cpp +0 -260
  187. data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
  188. data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
  189. data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
  190. data/vendor/faiss/tests/test_params_override.cpp +0 -236
  191. data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
  192. data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
  193. data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
  194. data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
  195. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
  196. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
  197. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
  198. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
  199. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+
11
+ #include <stdint.h>
12
+ #include <stdio.h>
13
+
14
+ #include <faiss/impl/platform_macros.h>
15
+
16
+ namespace faiss {
17
+
18
+
19
+ /** partitions the table into 0:q and q:n where all elements above q are >= all
20
+ * elements below q (for C = CMax, for CMin comparisons are reversed)
21
+ *
22
+ * Returns the partition threshold. The elements q:n are destroyed on output.
23
+ */
24
+ template<class C>
25
+ typename C::T partition_fuzzy(
26
+ typename C::T *vals, typename C::TI * ids, size_t n,
27
+ size_t q_min, size_t q_max, size_t * q_out);
28
+
29
+ /** simplified interface for when the parition is not fuzzy */
30
+ template<class C>
31
+ inline typename C::T partition(
32
+ typename C::T *vals, typename C::TI * ids, size_t n,
33
+ size_t q)
34
+ {
35
+ return partition_fuzzy<C>(vals, ids, n, q, q, nullptr);
36
+ }
37
+
38
+ /** low level SIMD histogramming functions */
39
+
40
+ /** 8-bin histogram of (x - min) >> shift
41
+ * values outside the range are ignored.
42
+ * the data table should be aligned on 32 bytes */
43
+ void simd_histogram_8(
44
+ const uint16_t *data, int n,
45
+ uint16_t min, int shift,
46
+ int *hist);
47
+
48
+ /** same for 16-bin histogram */
49
+ void simd_histogram_16(
50
+ const uint16_t *data, int n,
51
+ uint16_t min, int shift,
52
+ int *hist);
53
+
54
+
55
+ struct PartitionStats {
56
+ uint64_t bissect_cycles;
57
+ uint64_t compress_cycles;
58
+
59
+ PartitionStats () {reset (); }
60
+ void reset ();
61
+ };
62
+
63
+ // global var that collects them all
64
+ FAISS_API extern PartitionStats partition_stats;
65
+
66
+
67
+
68
+ } // namespace faiss
69
+
@@ -0,0 +1,277 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/utils/quantize_lut.h>
10
+
11
+ #include <cmath>
12
+ #include <cstring>
13
+ #include <vector>
14
+ #include <algorithm>
15
+
16
+ #include <faiss/impl/FaissAssert.h>
17
+
18
+
19
+ namespace faiss {
20
+
21
+
22
+ namespace quantize_lut {
23
+
24
+
25
+ /******************************************************
26
+ * Quantize look-up tables
27
+ ******************************************************/
28
+
29
+ namespace {
30
+
31
+ float round_uint8_and_mul(float *tab, size_t n) {
32
+ float max = 0;
33
+ for(int i = 0; i < n; i++) {
34
+ if(fabs(tab[i]) > max) {
35
+ max = fabs(tab[i]);
36
+ }
37
+ }
38
+ float multiplier = 127 / max;
39
+ for(int i = 0; i < n; i++) {
40
+ tab[i] = floorf(tab[i] * multiplier + 128);
41
+ }
42
+ return multiplier;
43
+ }
44
+
45
+ // there can be NaNs in tables, they should be ignored
46
+ float tab_min(const float *tab, size_t n) {
47
+ float min = HUGE_VAL;
48
+ for(int i = 0; i < n; i++) {
49
+ if (tab[i] < min) min = tab[i];
50
+ }
51
+ return min;
52
+ }
53
+
54
+ float tab_max(const float *tab, size_t n) {
55
+ float max = -HUGE_VAL;
56
+ for(int i = 0; i < n; i++) {
57
+ if (tab[i] > max) max = tab[i];
58
+ }
59
+ return max;
60
+ }
61
+
62
+ void round_tab(float *tab, size_t n, float a, float bi) {
63
+ for(int i = 0; i < n; i++) {
64
+ tab[i] = floorf((tab[i] - bi) * a + 0.5);
65
+ }
66
+ }
67
+
68
+ template<typename T>
69
+ void round_tab(const float *tab, size_t n, float a, float bi, T *tab_out) {
70
+ for(int i = 0; i < n; i++) {
71
+ tab_out[i] = (T)floorf((tab[i] - bi) * a + 0.5);
72
+ }
73
+ }
74
+
75
+
76
+
77
+ } // anonymous namespace
78
+
79
+ void round_uint8_per_column(
80
+ float *tab, size_t n, size_t d,
81
+ float *a_out, float *b_out)
82
+ {
83
+ float max_span = 0;
84
+ std::vector<float> mins(n);
85
+ for(int i = 0; i < n; i++) {
86
+ mins[i] = tab_min(tab + i * d, d);
87
+ float span = tab_max(tab + i * d, d) - mins[i];
88
+ if(span > max_span) {
89
+ max_span = span;
90
+ }
91
+ }
92
+ float a = 255 / max_span;
93
+ float b = 0;
94
+ for(int i = 0; i < n; i++) {
95
+ b += mins[i];
96
+ round_tab(tab + i * d, d, a, mins[i]);
97
+ }
98
+ if (a_out) *a_out = a;
99
+ if (b_out) *b_out = b;
100
+ }
101
+
102
+ void round_uint8_per_column_multi(
103
+ float *tab, size_t m, size_t n, size_t d,
104
+ float *a_out, float *b_out)
105
+ {
106
+ float max_span = 0;
107
+ std::vector<float> mins(n);
108
+ for(int i = 0; i < n; i++) {
109
+ float min_i = HUGE_VAL;
110
+ float max_i = -HUGE_VAL;
111
+ for(int j = 0; j < m; j++) {
112
+ min_i = std::min(min_i, tab_min(tab + (j * n + i) * d, d));
113
+ max_i = std::max(max_i, tab_max(tab + (j * n + i) * d, d));
114
+ }
115
+ mins[i] = min_i;
116
+ float span = max_i - min_i;
117
+ if(span > max_span) {
118
+ max_span = span;
119
+ }
120
+ }
121
+ float a = 255 / max_span;
122
+ float b = 0;
123
+ for(int i = 0; i < n; i++) {
124
+ b += mins[i];
125
+ for(int j = 0; j < m; j++) {
126
+ round_tab(tab + (j * n + i) * d, d, a, mins[i]);
127
+ }
128
+ }
129
+ if (a_out) *a_out = a;
130
+ if (b_out) *b_out = b;
131
+ }
132
+
133
+
134
+ // translation of
135
+ // https://github.com/fairinternal/faiss_improvements/blob/7122c3cc6ddb0a371d8aa6f1309cd8bcf2335e61/LUT_quantization.ipynb
136
+ void quantize_LUT_and_bias(
137
+ size_t nprobe, size_t M, size_t ksub,
138
+ bool lut_is_3d,
139
+ const float *LUT,
140
+ const float *bias,
141
+ uint8_t *LUTq, size_t M2,
142
+ uint16_t *biasq,
143
+ float *a_out, float *b_out)
144
+ {
145
+ float a, b;
146
+ if (!bias) {
147
+ FAISS_THROW_IF_NOT(!lut_is_3d);
148
+ std::vector<float> mins(M);
149
+ float max_span_LUT = -HUGE_VAL, max_span_dis = 0;
150
+ b = 0;
151
+ for(int i = 0; i < M; i++) {
152
+ mins[i] = tab_min(LUT + i * ksub, ksub);
153
+ float span = tab_max(LUT + i * ksub, ksub) - mins[i];
154
+ max_span_LUT = std::max(max_span_LUT, span);
155
+ max_span_dis += span;
156
+ b += mins[i];
157
+ }
158
+ a = std::min(255 / max_span_LUT, 65535 / max_span_dis);
159
+
160
+ for(int i = 0; i < M; i++) {
161
+ round_tab(LUT + i * ksub, ksub, a, mins[i], LUTq + i * ksub);
162
+ }
163
+ memset(LUTq + M * ksub, 0, ksub * (M2 - M));
164
+ } else if (!lut_is_3d) {
165
+ std::vector<float> mins(M);
166
+ float max_span_LUT = -HUGE_VAL, max_span_dis;
167
+ float bias_min = tab_min(bias, nprobe);
168
+ float bias_max = tab_max(bias, nprobe);
169
+ max_span_dis = bias_max - bias_min;
170
+ b = 0;
171
+ for(int i = 0; i < M; i++) {
172
+ mins[i] = tab_min(LUT + i * ksub, ksub);
173
+ float span = tab_max(LUT + i * ksub, ksub) - mins[i];
174
+ max_span_LUT = std::max(max_span_LUT, span);
175
+ max_span_dis += span;
176
+ b += mins[i];
177
+ }
178
+ a = std::min(255 / max_span_LUT, 65535 / max_span_dis);
179
+ b += bias_min;
180
+
181
+ for(int i = 0; i < M; i++) {
182
+ round_tab(LUT + i * ksub, ksub, a, mins[i], LUTq + i * ksub);
183
+ }
184
+ memset(LUTq + M * ksub, 0, ksub * (M2 - M));
185
+ round_tab(bias, nprobe, a, bias_min, biasq);
186
+
187
+ } else if (biasq) {
188
+ // LUT is 3D
189
+ std::vector<float> mins(nprobe * M);
190
+ std::vector<float> bias2(nprobe);
191
+ float bias_min = tab_min(bias, nprobe);
192
+ float max_span_LUT = -HUGE_VAL, max_span_dis = -HUGE_VAL;
193
+
194
+ b = HUGE_VAL;
195
+ size_t ij = 0;
196
+ for (int j = 0; j < nprobe; j++) {
197
+ float max_span_dis_j = bias[j] - bias_min;
198
+ float b2j = bias[j];
199
+ for(int i = 0; i < M; i++) {
200
+ mins[ij] = tab_min(LUT + ij * ksub, ksub);
201
+ float span = tab_max(LUT + ij * ksub, ksub) - mins[ij];
202
+ max_span_LUT = std::max(max_span_LUT, span);
203
+ max_span_dis_j += span;
204
+ b2j += mins[ij];
205
+ ij++;
206
+ }
207
+ max_span_dis = std::max(max_span_dis, max_span_dis_j);
208
+ bias2[j] = b2j;
209
+ b = std::min(b, b2j);
210
+ }
211
+
212
+ a = std::min(255 / max_span_LUT, 65535 / max_span_dis);
213
+
214
+ ij = 0;
215
+ size_t ij_2 = 0;
216
+ for (int j = 0; j < nprobe; j++) {
217
+ for(int i = 0; i < M; i++) {
218
+ round_tab(LUT + ij * ksub, ksub, a, mins[ij], LUTq + ij_2 * ksub);
219
+ ij++; ij_2++;
220
+ }
221
+ memset(LUTq + ij_2 * ksub, 0, ksub * (M2 - M));
222
+ ij_2 += M2 - M;
223
+ }
224
+
225
+ round_tab(bias2.data(), nprobe, a, b, biasq);
226
+
227
+ } else { // !biasq
228
+ // then we integrate the bias into the LUTs
229
+ std::vector<float> LUT2_storage(nprobe * M * ksub);
230
+ float *LUT2 = LUT2_storage.data();
231
+ size_t ijc = 0;
232
+ for (int j = 0; j < nprobe; j++) {
233
+ float bias_j = bias[j] / M;
234
+ for(int i = 0; i < M; i++) {
235
+ for (int c = 0; c < ksub; c++) {
236
+ LUT2[ijc] = LUT[ijc] + bias_j;
237
+ ijc++;
238
+ }
239
+ }
240
+ }
241
+ std::vector<float> mins(M, HUGE_VAL), maxs(M, -HUGE_VAL);
242
+ size_t ij = 0;
243
+ for (int j = 0; j < nprobe; j++) {
244
+ for(int i = 0; i < M; i++) {
245
+ mins[i] = std::min(mins[i], tab_min(LUT2 + ij * ksub, ksub));
246
+ maxs[i] = std::max(maxs[i], tab_max(LUT2 + ij * ksub, ksub));
247
+ ij++;
248
+ }
249
+ }
250
+
251
+ float max_span = -HUGE_VAL;
252
+ b = 0;
253
+ for(int i = 0; i < M; i++) {
254
+ float span = maxs[i] - mins[i];
255
+ max_span = std::max(max_span, span);
256
+ b += mins[i];
257
+ }
258
+ a = 255 / max_span;
259
+ ij = 0;
260
+ size_t ij_2 = 0;
261
+ for (int j = 0; j < nprobe; j++) {
262
+ for(int i = 0; i < M; i++) {
263
+ round_tab(LUT2 + ij * ksub, ksub, a, mins[i], LUTq + ij_2 * ksub);
264
+ ij++; ij_2++;
265
+ }
266
+ memset(LUTq + ij_2 * ksub, 0, ksub * (M2 - M));
267
+ ij_2 += M2 - M;
268
+ }
269
+ }
270
+ if (a_out) *a_out = a;
271
+ if (b_out) *b_out = b;
272
+ }
273
+
274
+
275
+ } // namespace quantize_lut
276
+
277
+ } // namespace faiss
@@ -0,0 +1,80 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+
12
+ #include <cstdio>
13
+ #include <cstdint>
14
+
15
+ namespace faiss {
16
+
17
+ /** Functions to quantize PQ floating-point Look Up Tables (LUT) to uint8, and
18
+ * biases to uint16. The accumulation is supposed to take place in uint16.
19
+ * The quantization coefficients are float (a, b) such that
20
+ *
21
+ * original_value = quantized_value * a / b
22
+ *
23
+ * The hardest part of the quantization is with multiple LUTs that need to be
24
+ * added up together. In that case, coefficient a has to be chosen so that
25
+ * the sum fits in a uint16 accumulator.
26
+ */
27
+
28
+ namespace quantize_lut {
29
+
30
+ /* affine quantizer, a and b are the affine coefficients, marginalize over d
31
+ *
32
+ * @param tab input/output, size (n, d)
33
+ */
34
+ void round_uint8_per_column(
35
+ float *tab, size_t n, size_t d,
36
+ float *a_out = nullptr,
37
+ float *b_out = nullptr
38
+ );
39
+
40
+
41
+ /* affine quantizer, a and b are the affine coefficients
42
+ *
43
+ * @param tab input/output, size (m, n, d)
44
+ */
45
+ void round_uint8_per_column_multi(
46
+ float *tab, size_t m, size_t n, size_t d,
47
+ float *a_out = nullptr, float *b_out = nullptr);
48
+
49
+ /** LUT quantization to uint8 and bias to uint16.
50
+ *
51
+ * (nprobe, M, ksub, lut_is_3d) determine the size of the the LUT
52
+ *
53
+ * LUT input:
54
+ * - 2D size (M, ksub): single matrix per probe (lut_is_3d=false)
55
+ * - 3D size (nprobe, M, ksub): separate LUT per probe (lut_is_3d=true)
56
+ * bias input:
57
+ * - nullptr: bias is 0
58
+ * - size (nprobe): one bias per probe
59
+ * Output:
60
+ * - LUTq uint8 version of the LUT (M size is rounded up to M2)
61
+ * - biasq (or nullptr): uint16 version of the LUT
62
+ * - a, b: scalars to approximate the true distance
63
+ */
64
+
65
+ void quantize_LUT_and_bias(
66
+ size_t nprobe, size_t M, size_t ksub,
67
+ bool lut_is_3d,
68
+ const float *LUT,
69
+ const float *bias,
70
+ uint8_t *LUTq, size_t M2,
71
+ uint16_t *biasq,
72
+ float *a_out = nullptr, float *b_out = nullptr
73
+ );
74
+
75
+
76
+ } // namespace quantize_lut
77
+
78
+ } // namespace faiss
79
+
80
+
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+
11
+
12
+ /** Abstractions for 256-bit registers
13
+ *
14
+ * The objective is to separate the different interpretations of the same
15
+ * registers (as a vector of uint8, uint16 or uint32), to provide printing
16
+ * functions.
17
+ */
18
+
19
+ #ifdef __AVX2__
20
+
21
+ #include <faiss/utils/simdlib_avx2.h>
22
+
23
+ #else
24
+
25
+ // emulated = all operations are implemented as scalars
26
+ #include <faiss/utils/simdlib_emulated.h>
27
+
28
+ // FIXME: make a SSE version
29
+ // is this ever going to happen? We will probably rather implement AVX512
30
+
31
+ #endif