faiss 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +1 -1
  6. data/lib/faiss/version.rb +1 -1
  7. data/vendor/faiss/faiss/AutoTune.cpp +36 -33
  8. data/vendor/faiss/faiss/AutoTune.h +6 -3
  9. data/vendor/faiss/faiss/Clustering.cpp +16 -12
  10. data/vendor/faiss/faiss/Index.cpp +3 -4
  11. data/vendor/faiss/faiss/Index.h +3 -3
  12. data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
  13. data/vendor/faiss/faiss/IndexBinary.h +1 -1
  14. data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
  15. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
  16. data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
  17. data/vendor/faiss/faiss/IndexFlat.h +0 -51
  18. data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
  19. data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
  20. data/vendor/faiss/faiss/IndexIVF.h +22 -15
  21. data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
  22. data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
  23. data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
  24. data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
  25. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
  26. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
  27. data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
  28. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
  29. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
  30. data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
  31. data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
  32. data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
  33. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
  34. data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
  35. data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
  36. data/vendor/faiss/faiss/IndexRefine.h +73 -0
  37. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
  38. data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
  39. data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
  40. data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
  41. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
  42. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
  43. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
  44. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
  45. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
  46. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
  47. data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
  48. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
  49. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
  50. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
  51. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
  52. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
  53. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
  54. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
  55. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
  56. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
  57. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
  58. data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
  59. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
  60. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
  61. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
  62. data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
  63. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
  64. data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
  65. data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
  66. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
  67. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
  68. data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
  69. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
  70. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
  71. data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
  72. data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
  73. data/vendor/faiss/faiss/impl/io.cpp +33 -2
  74. data/vendor/faiss/faiss/impl/io.h +7 -2
  75. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
  76. data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
  77. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
  78. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
  79. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
  80. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
  81. data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
  82. data/vendor/faiss/faiss/index_factory.cpp +112 -7
  83. data/vendor/faiss/faiss/index_io.h +1 -48
  84. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
  85. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
  86. data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
  87. data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
  88. data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
  89. data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
  90. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
  91. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
  92. data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
  93. data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
  94. data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
  95. data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
  96. data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
  97. data/vendor/faiss/faiss/utils/Heap.h +61 -50
  98. data/vendor/faiss/faiss/utils/distances.cpp +164 -319
  99. data/vendor/faiss/faiss/utils/distances.h +28 -20
  100. data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
  101. data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
  102. data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
  103. data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
  104. data/vendor/faiss/faiss/utils/hamming.h +2 -7
  105. data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
  106. data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
  107. data/vendor/faiss/faiss/utils/partitioning.h +69 -0
  108. data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
  109. data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
  110. data/vendor/faiss/faiss/utils/simdlib.h +31 -0
  111. data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
  112. data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
  113. metadata +43 -141
  114. data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
  115. data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
  116. data/vendor/faiss/c_api/AutoTune_c.h +0 -66
  117. data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
  118. data/vendor/faiss/c_api/Clustering_c.h +0 -123
  119. data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
  120. data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
  121. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
  122. data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
  123. data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
  124. data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
  125. data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
  126. data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
  127. data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
  128. data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
  129. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
  130. data/vendor/faiss/c_api/IndexShards_c.h +0 -39
  131. data/vendor/faiss/c_api/Index_c.cpp +0 -105
  132. data/vendor/faiss/c_api/Index_c.h +0 -183
  133. data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
  134. data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
  135. data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
  136. data/vendor/faiss/c_api/clone_index_c.h +0 -32
  137. data/vendor/faiss/c_api/error_c.h +0 -42
  138. data/vendor/faiss/c_api/error_impl.cpp +0 -27
  139. data/vendor/faiss/c_api/error_impl.h +0 -16
  140. data/vendor/faiss/c_api/faiss_c.h +0 -58
  141. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
  142. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
  143. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
  144. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
  145. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
  146. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
  147. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
  148. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
  149. data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
  150. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
  151. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
  152. data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
  153. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
  154. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
  155. data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
  156. data/vendor/faiss/c_api/index_factory_c.h +0 -30
  157. data/vendor/faiss/c_api/index_io_c.cpp +0 -42
  158. data/vendor/faiss/c_api/index_io_c.h +0 -50
  159. data/vendor/faiss/c_api/macros_impl.h +0 -110
  160. data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
  161. data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
  162. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
  163. data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
  164. data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
  165. data/vendor/faiss/misc/test_blas.cpp +0 -87
  166. data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
  167. data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
  168. data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
  169. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
  170. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
  171. data/vendor/faiss/tests/test_merge.cpp +0 -260
  172. data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
  173. data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
  174. data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
  175. data/vendor/faiss/tests/test_params_override.cpp +0 -236
  176. data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
  177. data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
  178. data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
  179. data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
  180. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
  181. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
  182. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
  183. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
  184. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+
11
+ #include <stdint.h>
12
+ #include <stdio.h>
13
+
14
+ #include <faiss/impl/platform_macros.h>
15
+
16
+ namespace faiss {
17
+
18
+
19
+ /** partitions the table into 0:q and q:n where all elements above q are >= all
20
+ * elements below q (for C = CMax, for CMin comparisons are reversed)
21
+ *
22
+ * Returns the partition threshold. The elements q:n are destroyed on output.
23
+ */
24
+ template<class C>
25
+ typename C::T partition_fuzzy(
26
+ typename C::T *vals, typename C::TI * ids, size_t n,
27
+ size_t q_min, size_t q_max, size_t * q_out);
28
+
29
+ /** simplified interface for when the parition is not fuzzy */
30
+ template<class C>
31
+ inline typename C::T partition(
32
+ typename C::T *vals, typename C::TI * ids, size_t n,
33
+ size_t q)
34
+ {
35
+ return partition_fuzzy<C>(vals, ids, n, q, q, nullptr);
36
+ }
37
+
38
+ /** low level SIMD histogramming functions */
39
+
40
+ /** 8-bin histogram of (x - min) >> shift
41
+ * values outside the range are ignored.
42
+ * the data table should be aligned on 32 bytes */
43
+ void simd_histogram_8(
44
+ const uint16_t *data, int n,
45
+ uint16_t min, int shift,
46
+ int *hist);
47
+
48
+ /** same for 16-bin histogram */
49
+ void simd_histogram_16(
50
+ const uint16_t *data, int n,
51
+ uint16_t min, int shift,
52
+ int *hist);
53
+
54
+
55
+ struct PartitionStats {
56
+ uint64_t bissect_cycles;
57
+ uint64_t compress_cycles;
58
+
59
+ PartitionStats () {reset (); }
60
+ void reset ();
61
+ };
62
+
63
+ // global var that collects them all
64
+ FAISS_API extern PartitionStats partition_stats;
65
+
66
+
67
+
68
+ } // namespace faiss
69
+
@@ -0,0 +1,277 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #include <faiss/utils/quantize_lut.h>
10
+
11
+ #include <cmath>
12
+ #include <cstring>
13
+ #include <vector>
14
+ #include <algorithm>
15
+
16
+ #include <faiss/impl/FaissAssert.h>
17
+
18
+
19
+ namespace faiss {
20
+
21
+
22
+ namespace quantize_lut {
23
+
24
+
25
+ /******************************************************
26
+ * Quantize look-up tables
27
+ ******************************************************/
28
+
29
+ namespace {
30
+
31
+ float round_uint8_and_mul(float *tab, size_t n) {
32
+ float max = 0;
33
+ for(int i = 0; i < n; i++) {
34
+ if(fabs(tab[i]) > max) {
35
+ max = fabs(tab[i]);
36
+ }
37
+ }
38
+ float multiplier = 127 / max;
39
+ for(int i = 0; i < n; i++) {
40
+ tab[i] = floorf(tab[i] * multiplier + 128);
41
+ }
42
+ return multiplier;
43
+ }
44
+
45
+ // there can be NaNs in tables, they should be ignored
46
+ float tab_min(const float *tab, size_t n) {
47
+ float min = HUGE_VAL;
48
+ for(int i = 0; i < n; i++) {
49
+ if (tab[i] < min) min = tab[i];
50
+ }
51
+ return min;
52
+ }
53
+
54
+ float tab_max(const float *tab, size_t n) {
55
+ float max = -HUGE_VAL;
56
+ for(int i = 0; i < n; i++) {
57
+ if (tab[i] > max) max = tab[i];
58
+ }
59
+ return max;
60
+ }
61
+
62
+ void round_tab(float *tab, size_t n, float a, float bi) {
63
+ for(int i = 0; i < n; i++) {
64
+ tab[i] = floorf((tab[i] - bi) * a + 0.5);
65
+ }
66
+ }
67
+
68
+ template<typename T>
69
+ void round_tab(const float *tab, size_t n, float a, float bi, T *tab_out) {
70
+ for(int i = 0; i < n; i++) {
71
+ tab_out[i] = (T)floorf((tab[i] - bi) * a + 0.5);
72
+ }
73
+ }
74
+
75
+
76
+
77
+ } // anonymous namespace
78
+
79
+ void round_uint8_per_column(
80
+ float *tab, size_t n, size_t d,
81
+ float *a_out, float *b_out)
82
+ {
83
+ float max_span = 0;
84
+ std::vector<float> mins(n);
85
+ for(int i = 0; i < n; i++) {
86
+ mins[i] = tab_min(tab + i * d, d);
87
+ float span = tab_max(tab + i * d, d) - mins[i];
88
+ if(span > max_span) {
89
+ max_span = span;
90
+ }
91
+ }
92
+ float a = 255 / max_span;
93
+ float b = 0;
94
+ for(int i = 0; i < n; i++) {
95
+ b += mins[i];
96
+ round_tab(tab + i * d, d, a, mins[i]);
97
+ }
98
+ if (a_out) *a_out = a;
99
+ if (b_out) *b_out = b;
100
+ }
101
+
102
+ void round_uint8_per_column_multi(
103
+ float *tab, size_t m, size_t n, size_t d,
104
+ float *a_out, float *b_out)
105
+ {
106
+ float max_span = 0;
107
+ std::vector<float> mins(n);
108
+ for(int i = 0; i < n; i++) {
109
+ float min_i = HUGE_VAL;
110
+ float max_i = -HUGE_VAL;
111
+ for(int j = 0; j < m; j++) {
112
+ min_i = std::min(min_i, tab_min(tab + (j * n + i) * d, d));
113
+ max_i = std::max(max_i, tab_max(tab + (j * n + i) * d, d));
114
+ }
115
+ mins[i] = min_i;
116
+ float span = max_i - min_i;
117
+ if(span > max_span) {
118
+ max_span = span;
119
+ }
120
+ }
121
+ float a = 255 / max_span;
122
+ float b = 0;
123
+ for(int i = 0; i < n; i++) {
124
+ b += mins[i];
125
+ for(int j = 0; j < m; j++) {
126
+ round_tab(tab + (j * n + i) * d, d, a, mins[i]);
127
+ }
128
+ }
129
+ if (a_out) *a_out = a;
130
+ if (b_out) *b_out = b;
131
+ }
132
+
133
+
134
+ // translation of
135
+ // https://github.com/fairinternal/faiss_improvements/blob/7122c3cc6ddb0a371d8aa6f1309cd8bcf2335e61/LUT_quantization.ipynb
136
+ void quantize_LUT_and_bias(
137
+ size_t nprobe, size_t M, size_t ksub,
138
+ bool lut_is_3d,
139
+ const float *LUT,
140
+ const float *bias,
141
+ uint8_t *LUTq, size_t M2,
142
+ uint16_t *biasq,
143
+ float *a_out, float *b_out)
144
+ {
145
+ float a, b;
146
+ if (!bias) {
147
+ FAISS_THROW_IF_NOT(!lut_is_3d);
148
+ std::vector<float> mins(M);
149
+ float max_span_LUT = -HUGE_VAL, max_span_dis = 0;
150
+ b = 0;
151
+ for(int i = 0; i < M; i++) {
152
+ mins[i] = tab_min(LUT + i * ksub, ksub);
153
+ float span = tab_max(LUT + i * ksub, ksub) - mins[i];
154
+ max_span_LUT = std::max(max_span_LUT, span);
155
+ max_span_dis += span;
156
+ b += mins[i];
157
+ }
158
+ a = std::min(255 / max_span_LUT, 65535 / max_span_dis);
159
+
160
+ for(int i = 0; i < M; i++) {
161
+ round_tab(LUT + i * ksub, ksub, a, mins[i], LUTq + i * ksub);
162
+ }
163
+ memset(LUTq + M * ksub, 0, ksub * (M2 - M));
164
+ } else if (!lut_is_3d) {
165
+ std::vector<float> mins(M);
166
+ float max_span_LUT = -HUGE_VAL, max_span_dis;
167
+ float bias_min = tab_min(bias, nprobe);
168
+ float bias_max = tab_max(bias, nprobe);
169
+ max_span_dis = bias_max - bias_min;
170
+ b = 0;
171
+ for(int i = 0; i < M; i++) {
172
+ mins[i] = tab_min(LUT + i * ksub, ksub);
173
+ float span = tab_max(LUT + i * ksub, ksub) - mins[i];
174
+ max_span_LUT = std::max(max_span_LUT, span);
175
+ max_span_dis += span;
176
+ b += mins[i];
177
+ }
178
+ a = std::min(255 / max_span_LUT, 65535 / max_span_dis);
179
+ b += bias_min;
180
+
181
+ for(int i = 0; i < M; i++) {
182
+ round_tab(LUT + i * ksub, ksub, a, mins[i], LUTq + i * ksub);
183
+ }
184
+ memset(LUTq + M * ksub, 0, ksub * (M2 - M));
185
+ round_tab(bias, nprobe, a, bias_min, biasq);
186
+
187
+ } else if (biasq) {
188
+ // LUT is 3D
189
+ std::vector<float> mins(nprobe * M);
190
+ std::vector<float> bias2(nprobe);
191
+ float bias_min = tab_min(bias, nprobe);
192
+ float max_span_LUT = -HUGE_VAL, max_span_dis = -HUGE_VAL;
193
+
194
+ b = HUGE_VAL;
195
+ size_t ij = 0;
196
+ for (int j = 0; j < nprobe; j++) {
197
+ float max_span_dis_j = bias[j] - bias_min;
198
+ float b2j = bias[j];
199
+ for(int i = 0; i < M; i++) {
200
+ mins[ij] = tab_min(LUT + ij * ksub, ksub);
201
+ float span = tab_max(LUT + ij * ksub, ksub) - mins[ij];
202
+ max_span_LUT = std::max(max_span_LUT, span);
203
+ max_span_dis_j += span;
204
+ b2j += mins[ij];
205
+ ij++;
206
+ }
207
+ max_span_dis = std::max(max_span_dis, max_span_dis_j);
208
+ bias2[j] = b2j;
209
+ b = std::min(b, b2j);
210
+ }
211
+
212
+ a = std::min(255 / max_span_LUT, 65535 / max_span_dis);
213
+
214
+ ij = 0;
215
+ size_t ij_2 = 0;
216
+ for (int j = 0; j < nprobe; j++) {
217
+ for(int i = 0; i < M; i++) {
218
+ round_tab(LUT + ij * ksub, ksub, a, mins[ij], LUTq + ij_2 * ksub);
219
+ ij++; ij_2++;
220
+ }
221
+ memset(LUTq + ij_2 * ksub, 0, ksub * (M2 - M));
222
+ ij_2 += M2 - M;
223
+ }
224
+
225
+ round_tab(bias2.data(), nprobe, a, b, biasq);
226
+
227
+ } else { // !biasq
228
+ // then we integrate the bias into the LUTs
229
+ std::vector<float> LUT2_storage(nprobe * M * ksub);
230
+ float *LUT2 = LUT2_storage.data();
231
+ size_t ijc = 0;
232
+ for (int j = 0; j < nprobe; j++) {
233
+ float bias_j = bias[j] / M;
234
+ for(int i = 0; i < M; i++) {
235
+ for (int c = 0; c < ksub; c++) {
236
+ LUT2[ijc] = LUT[ijc] + bias_j;
237
+ ijc++;
238
+ }
239
+ }
240
+ }
241
+ std::vector<float> mins(M, HUGE_VAL), maxs(M, -HUGE_VAL);
242
+ size_t ij = 0;
243
+ for (int j = 0; j < nprobe; j++) {
244
+ for(int i = 0; i < M; i++) {
245
+ mins[i] = std::min(mins[i], tab_min(LUT2 + ij * ksub, ksub));
246
+ maxs[i] = std::max(maxs[i], tab_max(LUT2 + ij * ksub, ksub));
247
+ ij++;
248
+ }
249
+ }
250
+
251
+ float max_span = -HUGE_VAL;
252
+ b = 0;
253
+ for(int i = 0; i < M; i++) {
254
+ float span = maxs[i] - mins[i];
255
+ max_span = std::max(max_span, span);
256
+ b += mins[i];
257
+ }
258
+ a = 255 / max_span;
259
+ ij = 0;
260
+ size_t ij_2 = 0;
261
+ for (int j = 0; j < nprobe; j++) {
262
+ for(int i = 0; i < M; i++) {
263
+ round_tab(LUT2 + ij * ksub, ksub, a, mins[i], LUTq + ij_2 * ksub);
264
+ ij++; ij_2++;
265
+ }
266
+ memset(LUTq + ij_2 * ksub, 0, ksub * (M2 - M));
267
+ ij_2 += M2 - M;
268
+ }
269
+ }
270
+ if (a_out) *a_out = a;
271
+ if (b_out) *b_out = b;
272
+ }
273
+
274
+
275
+ } // namespace quantize_lut
276
+
277
+ } // namespace faiss
@@ -0,0 +1,80 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+
12
+ #include <cstdio>
13
+ #include <cstdint>
14
+
15
+ namespace faiss {
16
+
17
+ /** Functions to quantize PQ floating-point Look Up Tables (LUT) to uint8, and
18
+ * biases to uint16. The accumulation is supposed to take place in uint16.
19
+ * The quantization coefficients are float (a, b) such that
20
+ *
21
+ * original_value = quantized_value * a / b
22
+ *
23
+ * The hardest part of the quantization is with multiple LUTs that need to be
24
+ * added up together. In that case, coefficient a has to be chosen so that
25
+ * the sum fits in a uint16 accumulator.
26
+ */
27
+
28
+ namespace quantize_lut {
29
+
30
+ /* affine quantizer, a and b are the affine coefficients, marginalize over d
31
+ *
32
+ * @param tab input/output, size (n, d)
33
+ */
34
+ void round_uint8_per_column(
35
+ float *tab, size_t n, size_t d,
36
+ float *a_out = nullptr,
37
+ float *b_out = nullptr
38
+ );
39
+
40
+
41
+ /* affine quantizer, a and b are the affine coefficients
42
+ *
43
+ * @param tab input/output, size (m, n, d)
44
+ */
45
+ void round_uint8_per_column_multi(
46
+ float *tab, size_t m, size_t n, size_t d,
47
+ float *a_out = nullptr, float *b_out = nullptr);
48
+
49
+ /** LUT quantization to uint8 and bias to uint16.
50
+ *
51
+ * (nprobe, M, ksub, lut_is_3d) determine the size of the the LUT
52
+ *
53
+ * LUT input:
54
+ * - 2D size (M, ksub): single matrix per probe (lut_is_3d=false)
55
+ * - 3D size (nprobe, M, ksub): separate LUT per probe (lut_is_3d=true)
56
+ * bias input:
57
+ * - nullptr: bias is 0
58
+ * - size (nprobe): one bias per probe
59
+ * Output:
60
+ * - LUTq uint8 version of the LUT (M size is rounded up to M2)
61
+ * - biasq (or nullptr): uint16 version of the LUT
62
+ * - a, b: scalars to approximate the true distance
63
+ */
64
+
65
+ void quantize_LUT_and_bias(
66
+ size_t nprobe, size_t M, size_t ksub,
67
+ bool lut_is_3d,
68
+ const float *LUT,
69
+ const float *bias,
70
+ uint8_t *LUTq, size_t M2,
71
+ uint16_t *biasq,
72
+ float *a_out = nullptr, float *b_out = nullptr
73
+ );
74
+
75
+
76
+ } // namespace quantize_lut
77
+
78
+ } // namespace faiss
79
+
80
+
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+
11
+
12
+ /** Abstractions for 256-bit registers
13
+ *
14
+ * The objective is to separate the different interpretations of the same
15
+ * registers (as a vector of uint8, uint16 or uint32), to provide printing
16
+ * functions.
17
+ */
18
+
19
+ #ifdef __AVX2__
20
+
21
+ #include <faiss/utils/simdlib_avx2.h>
22
+
23
+ #else
24
+
25
+ // emulated = all operations are implemented as scalars
26
+ #include <faiss/utils/simdlib_emulated.h>
27
+
28
+ // FIXME: make a SSE version
29
+ // is this ever going to happen? We will probably rather implement AVX512
30
+
31
+ #endif