faiss 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +1 -1
  6. data/lib/faiss/version.rb +1 -1
  7. data/vendor/faiss/faiss/AutoTune.cpp +36 -33
  8. data/vendor/faiss/faiss/AutoTune.h +6 -3
  9. data/vendor/faiss/faiss/Clustering.cpp +16 -12
  10. data/vendor/faiss/faiss/Index.cpp +3 -4
  11. data/vendor/faiss/faiss/Index.h +3 -3
  12. data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
  13. data/vendor/faiss/faiss/IndexBinary.h +1 -1
  14. data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
  15. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
  16. data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
  17. data/vendor/faiss/faiss/IndexFlat.h +0 -51
  18. data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
  19. data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
  20. data/vendor/faiss/faiss/IndexIVF.h +22 -15
  21. data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
  22. data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
  23. data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
  24. data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
  25. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
  26. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
  27. data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
  28. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
  29. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
  30. data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
  31. data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
  32. data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
  33. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
  34. data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
  35. data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
  36. data/vendor/faiss/faiss/IndexRefine.h +73 -0
  37. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
  38. data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
  39. data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
  40. data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
  41. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
  42. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
  43. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
  44. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
  45. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
  46. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
  47. data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
  48. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
  49. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
  50. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
  51. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
  52. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
  53. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
  54. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
  55. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
  56. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
  57. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
  58. data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
  59. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
  60. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
  61. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
  62. data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
  63. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
  64. data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
  65. data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
  66. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
  67. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
  68. data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
  69. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
  70. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
  71. data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
  72. data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
  73. data/vendor/faiss/faiss/impl/io.cpp +33 -2
  74. data/vendor/faiss/faiss/impl/io.h +7 -2
  75. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
  76. data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
  77. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
  78. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
  79. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
  80. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
  81. data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
  82. data/vendor/faiss/faiss/index_factory.cpp +112 -7
  83. data/vendor/faiss/faiss/index_io.h +1 -48
  84. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
  85. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
  86. data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
  87. data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
  88. data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
  89. data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
  90. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
  91. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
  92. data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
  93. data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
  94. data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
  95. data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
  96. data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
  97. data/vendor/faiss/faiss/utils/Heap.h +61 -50
  98. data/vendor/faiss/faiss/utils/distances.cpp +164 -319
  99. data/vendor/faiss/faiss/utils/distances.h +28 -20
  100. data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
  101. data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
  102. data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
  103. data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
  104. data/vendor/faiss/faiss/utils/hamming.h +2 -7
  105. data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
  106. data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
  107. data/vendor/faiss/faiss/utils/partitioning.h +69 -0
  108. data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
  109. data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
  110. data/vendor/faiss/faiss/utils/simdlib.h +31 -0
  111. data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
  112. data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
  113. metadata +43 -141
  114. data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
  115. data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
  116. data/vendor/faiss/c_api/AutoTune_c.h +0 -66
  117. data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
  118. data/vendor/faiss/c_api/Clustering_c.h +0 -123
  119. data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
  120. data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
  121. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
  122. data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
  123. data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
  124. data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
  125. data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
  126. data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
  127. data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
  128. data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
  129. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
  130. data/vendor/faiss/c_api/IndexShards_c.h +0 -39
  131. data/vendor/faiss/c_api/Index_c.cpp +0 -105
  132. data/vendor/faiss/c_api/Index_c.h +0 -183
  133. data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
  134. data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
  135. data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
  136. data/vendor/faiss/c_api/clone_index_c.h +0 -32
  137. data/vendor/faiss/c_api/error_c.h +0 -42
  138. data/vendor/faiss/c_api/error_impl.cpp +0 -27
  139. data/vendor/faiss/c_api/error_impl.h +0 -16
  140. data/vendor/faiss/c_api/faiss_c.h +0 -58
  141. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
  142. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
  143. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
  144. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
  145. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
  146. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
  147. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
  148. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
  149. data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
  150. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
  151. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
  152. data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
  153. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
  154. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
  155. data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
  156. data/vendor/faiss/c_api/index_factory_c.h +0 -30
  157. data/vendor/faiss/c_api/index_io_c.cpp +0 -42
  158. data/vendor/faiss/c_api/index_io_c.h +0 -50
  159. data/vendor/faiss/c_api/macros_impl.h +0 -110
  160. data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
  161. data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
  162. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
  163. data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
  164. data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
  165. data/vendor/faiss/misc/test_blas.cpp +0 -87
  166. data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
  167. data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
  168. data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
  169. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
  170. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
  171. data/vendor/faiss/tests/test_merge.cpp +0 -260
  172. data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
  173. data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
  174. data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
  175. data/vendor/faiss/tests/test_params_override.cpp +0 -236
  176. data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
  177. data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
  178. data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
  179. data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
  180. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
  181. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
  182. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
  183. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
  184. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+
11
+ #include <stdint.h>
12
+ #include <stdio.h>
13
+
14
+ #include <faiss/impl/platform_macros.h>
15
+
16
+ namespace faiss {
17
+
18
+
19
+ /** partitions the table into 0:q and q:n where all elements above q are >= all
20
+ * elements below q (for C = CMax, for CMin comparisons are reversed)
21
+ *
22
+ * Returns the partition threshold. The elements q:n are destroyed on output.
23
+ */
24
+ template<class C>
25
+ typename C::T partition_fuzzy(
26
+ typename C::T *vals, typename C::TI * ids, size_t n,
27
+ size_t q_min, size_t q_max, size_t * q_out);
28
+
29
+ /** simplified interface for when the parition is not fuzzy */
30
+ template<class C>
31
+ inline typename C::T partition(
32
+ typename C::T *vals, typename C::TI * ids, size_t n,
33
+ size_t q)
34
+ {
35
+ return partition_fuzzy<C>(vals, ids, n, q, q, nullptr);
36
+ }
37
+
38
+ /** low level SIMD histogramming functions */
39
+
40
+ /** 8-bin histogram of (x - min) >> shift
41
+ * values outside the range are ignored.
42
+ * the data table should be aligned on 32 bytes */
43
+ void simd_histogram_8(
44
+ const uint16_t *data, int n,
45
+ uint16_t min, int shift,
46
+ int *hist);
47
+
48
+ /** same for 16-bin histogram */
49
+ void simd_histogram_16(
50
+ const uint16_t *data, int n,
51
+ uint16_t min, int shift,
52
+ int *hist);
53
+
54
+
55
+ struct PartitionStats {
56
+ uint64_t bissect_cycles;
57
+ uint64_t compress_cycles;
58
+
59
+ PartitionStats () {reset (); }
60
+ void reset ();
61
+ };
62
+
63
+ // global var that collects them all
64
+ FAISS_API extern PartitionStats partition_stats;
65
+
66
+
67
+
68
+ } // namespace faiss
69
+
@@ -0,0 +1,277 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/utils/quantize_lut.h>
10
+
11
+ #include <cmath>
12
+ #include <cstring>
13
+ #include <vector>
14
+ #include <algorithm>
15
+
16
+ #include <faiss/impl/FaissAssert.h>
17
+
18
+
19
+ namespace faiss {
20
+
21
+
22
+ namespace quantize_lut {
23
+
24
+
25
+ /******************************************************
26
+ * Quantize look-up tables
27
+ ******************************************************/
28
+
29
+ namespace {
30
+
31
+ float round_uint8_and_mul(float *tab, size_t n) {
32
+ float max = 0;
33
+ for(int i = 0; i < n; i++) {
34
+ if(fabs(tab[i]) > max) {
35
+ max = fabs(tab[i]);
36
+ }
37
+ }
38
+ float multiplier = 127 / max;
39
+ for(int i = 0; i < n; i++) {
40
+ tab[i] = floorf(tab[i] * multiplier + 128);
41
+ }
42
+ return multiplier;
43
+ }
44
+
45
+ // there can be NaNs in tables, they should be ignored
46
+ float tab_min(const float *tab, size_t n) {
47
+ float min = HUGE_VAL;
48
+ for(int i = 0; i < n; i++) {
49
+ if (tab[i] < min) min = tab[i];
50
+ }
51
+ return min;
52
+ }
53
+
54
+ float tab_max(const float *tab, size_t n) {
55
+ float max = -HUGE_VAL;
56
+ for(int i = 0; i < n; i++) {
57
+ if (tab[i] > max) max = tab[i];
58
+ }
59
+ return max;
60
+ }
61
+
62
+ void round_tab(float *tab, size_t n, float a, float bi) {
63
+ for(int i = 0; i < n; i++) {
64
+ tab[i] = floorf((tab[i] - bi) * a + 0.5);
65
+ }
66
+ }
67
+
68
+ template<typename T>
69
+ void round_tab(const float *tab, size_t n, float a, float bi, T *tab_out) {
70
+ for(int i = 0; i < n; i++) {
71
+ tab_out[i] = (T)floorf((tab[i] - bi) * a + 0.5);
72
+ }
73
+ }
74
+
75
+
76
+
77
+ } // anonymous namespace
78
+
79
+ void round_uint8_per_column(
80
+ float *tab, size_t n, size_t d,
81
+ float *a_out, float *b_out)
82
+ {
83
+ float max_span = 0;
84
+ std::vector<float> mins(n);
85
+ for(int i = 0; i < n; i++) {
86
+ mins[i] = tab_min(tab + i * d, d);
87
+ float span = tab_max(tab + i * d, d) - mins[i];
88
+ if(span > max_span) {
89
+ max_span = span;
90
+ }
91
+ }
92
+ float a = 255 / max_span;
93
+ float b = 0;
94
+ for(int i = 0; i < n; i++) {
95
+ b += mins[i];
96
+ round_tab(tab + i * d, d, a, mins[i]);
97
+ }
98
+ if (a_out) *a_out = a;
99
+ if (b_out) *b_out = b;
100
+ }
101
+
102
+ void round_uint8_per_column_multi(
103
+ float *tab, size_t m, size_t n, size_t d,
104
+ float *a_out, float *b_out)
105
+ {
106
+ float max_span = 0;
107
+ std::vector<float> mins(n);
108
+ for(int i = 0; i < n; i++) {
109
+ float min_i = HUGE_VAL;
110
+ float max_i = -HUGE_VAL;
111
+ for(int j = 0; j < m; j++) {
112
+ min_i = std::min(min_i, tab_min(tab + (j * n + i) * d, d));
113
+ max_i = std::max(max_i, tab_max(tab + (j * n + i) * d, d));
114
+ }
115
+ mins[i] = min_i;
116
+ float span = max_i - min_i;
117
+ if(span > max_span) {
118
+ max_span = span;
119
+ }
120
+ }
121
+ float a = 255 / max_span;
122
+ float b = 0;
123
+ for(int i = 0; i < n; i++) {
124
+ b += mins[i];
125
+ for(int j = 0; j < m; j++) {
126
+ round_tab(tab + (j * n + i) * d, d, a, mins[i]);
127
+ }
128
+ }
129
+ if (a_out) *a_out = a;
130
+ if (b_out) *b_out = b;
131
+ }
132
+
133
+
134
+ // translation of
135
+ // https://github.com/fairinternal/faiss_improvements/blob/7122c3cc6ddb0a371d8aa6f1309cd8bcf2335e61/LUT_quantization.ipynb
136
+ void quantize_LUT_and_bias(
137
+ size_t nprobe, size_t M, size_t ksub,
138
+ bool lut_is_3d,
139
+ const float *LUT,
140
+ const float *bias,
141
+ uint8_t *LUTq, size_t M2,
142
+ uint16_t *biasq,
143
+ float *a_out, float *b_out)
144
+ {
145
+ float a, b;
146
+ if (!bias) {
147
+ FAISS_THROW_IF_NOT(!lut_is_3d);
148
+ std::vector<float> mins(M);
149
+ float max_span_LUT = -HUGE_VAL, max_span_dis = 0;
150
+ b = 0;
151
+ for(int i = 0; i < M; i++) {
152
+ mins[i] = tab_min(LUT + i * ksub, ksub);
153
+ float span = tab_max(LUT + i * ksub, ksub) - mins[i];
154
+ max_span_LUT = std::max(max_span_LUT, span);
155
+ max_span_dis += span;
156
+ b += mins[i];
157
+ }
158
+ a = std::min(255 / max_span_LUT, 65535 / max_span_dis);
159
+
160
+ for(int i = 0; i < M; i++) {
161
+ round_tab(LUT + i * ksub, ksub, a, mins[i], LUTq + i * ksub);
162
+ }
163
+ memset(LUTq + M * ksub, 0, ksub * (M2 - M));
164
+ } else if (!lut_is_3d) {
165
+ std::vector<float> mins(M);
166
+ float max_span_LUT = -HUGE_VAL, max_span_dis;
167
+ float bias_min = tab_min(bias, nprobe);
168
+ float bias_max = tab_max(bias, nprobe);
169
+ max_span_dis = bias_max - bias_min;
170
+ b = 0;
171
+ for(int i = 0; i < M; i++) {
172
+ mins[i] = tab_min(LUT + i * ksub, ksub);
173
+ float span = tab_max(LUT + i * ksub, ksub) - mins[i];
174
+ max_span_LUT = std::max(max_span_LUT, span);
175
+ max_span_dis += span;
176
+ b += mins[i];
177
+ }
178
+ a = std::min(255 / max_span_LUT, 65535 / max_span_dis);
179
+ b += bias_min;
180
+
181
+ for(int i = 0; i < M; i++) {
182
+ round_tab(LUT + i * ksub, ksub, a, mins[i], LUTq + i * ksub);
183
+ }
184
+ memset(LUTq + M * ksub, 0, ksub * (M2 - M));
185
+ round_tab(bias, nprobe, a, bias_min, biasq);
186
+
187
+ } else if (biasq) {
188
+ // LUT is 3D
189
+ std::vector<float> mins(nprobe * M);
190
+ std::vector<float> bias2(nprobe);
191
+ float bias_min = tab_min(bias, nprobe);
192
+ float max_span_LUT = -HUGE_VAL, max_span_dis = -HUGE_VAL;
193
+
194
+ b = HUGE_VAL;
195
+ size_t ij = 0;
196
+ for (int j = 0; j < nprobe; j++) {
197
+ float max_span_dis_j = bias[j] - bias_min;
198
+ float b2j = bias[j];
199
+ for(int i = 0; i < M; i++) {
200
+ mins[ij] = tab_min(LUT + ij * ksub, ksub);
201
+ float span = tab_max(LUT + ij * ksub, ksub) - mins[ij];
202
+ max_span_LUT = std::max(max_span_LUT, span);
203
+ max_span_dis_j += span;
204
+ b2j += mins[ij];
205
+ ij++;
206
+ }
207
+ max_span_dis = std::max(max_span_dis, max_span_dis_j);
208
+ bias2[j] = b2j;
209
+ b = std::min(b, b2j);
210
+ }
211
+
212
+ a = std::min(255 / max_span_LUT, 65535 / max_span_dis);
213
+
214
+ ij = 0;
215
+ size_t ij_2 = 0;
216
+ for (int j = 0; j < nprobe; j++) {
217
+ for(int i = 0; i < M; i++) {
218
+ round_tab(LUT + ij * ksub, ksub, a, mins[ij], LUTq + ij_2 * ksub);
219
+ ij++; ij_2++;
220
+ }
221
+ memset(LUTq + ij_2 * ksub, 0, ksub * (M2 - M));
222
+ ij_2 += M2 - M;
223
+ }
224
+
225
+ round_tab(bias2.data(), nprobe, a, b, biasq);
226
+
227
+ } else { // !biasq
228
+ // then we integrate the bias into the LUTs
229
+ std::vector<float> LUT2_storage(nprobe * M * ksub);
230
+ float *LUT2 = LUT2_storage.data();
231
+ size_t ijc = 0;
232
+ for (int j = 0; j < nprobe; j++) {
233
+ float bias_j = bias[j] / M;
234
+ for(int i = 0; i < M; i++) {
235
+ for (int c = 0; c < ksub; c++) {
236
+ LUT2[ijc] = LUT[ijc] + bias_j;
237
+ ijc++;
238
+ }
239
+ }
240
+ }
241
+ std::vector<float> mins(M, HUGE_VAL), maxs(M, -HUGE_VAL);
242
+ size_t ij = 0;
243
+ for (int j = 0; j < nprobe; j++) {
244
+ for(int i = 0; i < M; i++) {
245
+ mins[i] = std::min(mins[i], tab_min(LUT2 + ij * ksub, ksub));
246
+ maxs[i] = std::max(maxs[i], tab_max(LUT2 + ij * ksub, ksub));
247
+ ij++;
248
+ }
249
+ }
250
+
251
+ float max_span = -HUGE_VAL;
252
+ b = 0;
253
+ for(int i = 0; i < M; i++) {
254
+ float span = maxs[i] - mins[i];
255
+ max_span = std::max(max_span, span);
256
+ b += mins[i];
257
+ }
258
+ a = 255 / max_span;
259
+ ij = 0;
260
+ size_t ij_2 = 0;
261
+ for (int j = 0; j < nprobe; j++) {
262
+ for(int i = 0; i < M; i++) {
263
+ round_tab(LUT2 + ij * ksub, ksub, a, mins[i], LUTq + ij_2 * ksub);
264
+ ij++; ij_2++;
265
+ }
266
+ memset(LUTq + ij_2 * ksub, 0, ksub * (M2 - M));
267
+ ij_2 += M2 - M;
268
+ }
269
+ }
270
+ if (a_out) *a_out = a;
271
+ if (b_out) *b_out = b;
272
+ }
273
+
274
+
275
+ } // namespace quantize_lut
276
+
277
+ } // namespace faiss
@@ -0,0 +1,80 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+
12
+ #include <cstdio>
13
+ #include <cstdint>
14
+
15
+ namespace faiss {
16
+
17
+ /** Functions to quantize PQ floating-point Look Up Tables (LUT) to uint8, and
18
+ * biases to uint16. The accumulation is supposed to take place in uint16.
19
+ * The quantization coefficients are float (a, b) such that
20
+ *
21
+ * original_value = quantized_value * a / b
22
+ *
23
+ * The hardest part of the quantization is with multiple LUTs that need to be
24
+ * added up together. In that case, coefficient a has to be chosen so that
25
+ * the sum fits in a uint16 accumulator.
26
+ */
27
+
28
+ namespace quantize_lut {
29
+
30
+ /* affine quantizer, a and b are the affine coefficients, marginalize over d
31
+ *
32
+ * @param tab input/output, size (n, d)
33
+ */
34
+ void round_uint8_per_column(
35
+ float *tab, size_t n, size_t d,
36
+ float *a_out = nullptr,
37
+ float *b_out = nullptr
38
+ );
39
+
40
+
41
+ /* affine quantizer, a and b are the affine coefficients
42
+ *
43
+ * @param tab input/output, size (m, n, d)
44
+ */
45
+ void round_uint8_per_column_multi(
46
+ float *tab, size_t m, size_t n, size_t d,
47
+ float *a_out = nullptr, float *b_out = nullptr);
48
+
49
+ /** LUT quantization to uint8 and bias to uint16.
50
+ *
51
+ * (nprobe, M, ksub, lut_is_3d) determine the size of the the LUT
52
+ *
53
+ * LUT input:
54
+ * - 2D size (M, ksub): single matrix per probe (lut_is_3d=false)
55
+ * - 3D size (nprobe, M, ksub): separate LUT per probe (lut_is_3d=true)
56
+ * bias input:
57
+ * - nullptr: bias is 0
58
+ * - size (nprobe): one bias per probe
59
+ * Output:
60
+ * - LUTq uint8 version of the LUT (M size is rounded up to M2)
61
+ * - biasq (or nullptr): uint16 version of the LUT
62
+ * - a, b: scalars to approximate the true distance
63
+ */
64
+
65
+ void quantize_LUT_and_bias(
66
+ size_t nprobe, size_t M, size_t ksub,
67
+ bool lut_is_3d,
68
+ const float *LUT,
69
+ const float *bias,
70
+ uint8_t *LUTq, size_t M2,
71
+ uint16_t *biasq,
72
+ float *a_out = nullptr, float *b_out = nullptr
73
+ );
74
+
75
+
76
+ } // namespace quantize_lut
77
+
78
+ } // namespace faiss
79
+
80
+
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+
11
+
12
+ /** Abstractions for 256-bit registers
13
+ *
14
+ * The objective is to separate the different interpretations of the same
15
+ * registers (as a vector of uint8, uint16 or uint32), to provide printing
16
+ * functions.
17
+ */
18
+
19
+ #ifdef __AVX2__
20
+
21
+ #include <faiss/utils/simdlib_avx2.h>
22
+
23
+ #else
24
+
25
+ // emulated = all operations are implemented as scalars
26
+ #include <faiss/utils/simdlib_emulated.h>
27
+
28
+ // FIXME: make a SSE version
29
+ // is this ever going to happen? We will probably rather implement AVX512
30
+
31
+ #endif