faiss 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +36 -33
- data/vendor/faiss/faiss/AutoTune.h +6 -3
- data/vendor/faiss/faiss/Clustering.cpp +16 -12
- data/vendor/faiss/faiss/Index.cpp +3 -4
- data/vendor/faiss/faiss/Index.h +3 -3
- data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
- data/vendor/faiss/faiss/IndexBinary.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
- data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
- data/vendor/faiss/faiss/IndexFlat.h +0 -51
- data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
- data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
- data/vendor/faiss/faiss/IndexIVF.h +22 -15
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
- data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
- data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
- data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
- data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
- data/vendor/faiss/faiss/IndexRefine.h +73 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
- data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
- data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
- data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
- data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
- data/vendor/faiss/faiss/impl/io.cpp +33 -2
- data/vendor/faiss/faiss/impl/io.h +7 -2
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
- data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
- data/vendor/faiss/faiss/index_factory.cpp +112 -7
- data/vendor/faiss/faiss/index_io.h +1 -48
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
- data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
- data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
- data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
- data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
- data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
- data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
- data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
- data/vendor/faiss/faiss/utils/Heap.h +61 -50
- data/vendor/faiss/faiss/utils/distances.cpp +164 -319
- data/vendor/faiss/faiss/utils/distances.h +28 -20
- data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
- data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
- data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
- data/vendor/faiss/faiss/utils/hamming.h +2 -7
- data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
- data/vendor/faiss/faiss/utils/partitioning.h +69 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
- data/vendor/faiss/faiss/utils/simdlib.h +31 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
- metadata +43 -141
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
- data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
- data/vendor/faiss/c_api/AutoTune_c.h +0 -66
- data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
- data/vendor/faiss/c_api/Clustering_c.h +0 -123
- data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
- data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
- data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
- data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
- data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
- data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
- data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
- data/vendor/faiss/c_api/IndexShards_c.h +0 -39
- data/vendor/faiss/c_api/Index_c.cpp +0 -105
- data/vendor/faiss/c_api/Index_c.h +0 -183
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
- data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
- data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
- data/vendor/faiss/c_api/clone_index_c.h +0 -32
- data/vendor/faiss/c_api/error_c.h +0 -42
- data/vendor/faiss/c_api/error_impl.cpp +0 -27
- data/vendor/faiss/c_api/error_impl.h +0 -16
- data/vendor/faiss/c_api/faiss_c.h +0 -58
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
- data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
- data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
- data/vendor/faiss/c_api/index_factory_c.h +0 -30
- data/vendor/faiss/c_api/index_io_c.cpp +0 -42
- data/vendor/faiss/c_api/index_io_c.h +0 -50
- data/vendor/faiss/c_api/macros_impl.h +0 -110
- data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
- data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
- data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
- data/vendor/faiss/misc/test_blas.cpp +0 -87
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
- data/vendor/faiss/tests/test_merge.cpp +0 -260
- data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
- data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
- data/vendor/faiss/tests/test_params_override.cpp +0 -236
- data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
- data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
- data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
- data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -0,0 +1,69 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
#pragma once
|
9
|
+
|
10
|
+
|
11
|
+
#include <stdint.h>
|
12
|
+
#include <stdio.h>
|
13
|
+
|
14
|
+
#include <faiss/impl/platform_macros.h>
|
15
|
+
|
16
|
+
namespace faiss {
|
17
|
+
|
18
|
+
|
19
|
+
/** partitions the table into 0:q and q:n where all elements above q are >= all
|
20
|
+
* elements below q (for C = CMax, for CMin comparisons are reversed)
|
21
|
+
*
|
22
|
+
* Returns the partition threshold. The elements q:n are destroyed on output.
|
23
|
+
*/
|
24
|
+
template<class C>
|
25
|
+
typename C::T partition_fuzzy(
|
26
|
+
typename C::T *vals, typename C::TI * ids, size_t n,
|
27
|
+
size_t q_min, size_t q_max, size_t * q_out);
|
28
|
+
|
29
|
+
/** simplified interface for when the parition is not fuzzy */
|
30
|
+
template<class C>
|
31
|
+
inline typename C::T partition(
|
32
|
+
typename C::T *vals, typename C::TI * ids, size_t n,
|
33
|
+
size_t q)
|
34
|
+
{
|
35
|
+
return partition_fuzzy<C>(vals, ids, n, q, q, nullptr);
|
36
|
+
}
|
37
|
+
|
38
|
+
/** low level SIMD histogramming functions */
|
39
|
+
|
40
|
+
/** 8-bin histogram of (x - min) >> shift
|
41
|
+
* values outside the range are ignored.
|
42
|
+
* the data table should be aligned on 32 bytes */
|
43
|
+
void simd_histogram_8(
|
44
|
+
const uint16_t *data, int n,
|
45
|
+
uint16_t min, int shift,
|
46
|
+
int *hist);
|
47
|
+
|
48
|
+
/** same for 16-bin histogram */
|
49
|
+
void simd_histogram_16(
|
50
|
+
const uint16_t *data, int n,
|
51
|
+
uint16_t min, int shift,
|
52
|
+
int *hist);
|
53
|
+
|
54
|
+
|
55
|
+
struct PartitionStats {
|
56
|
+
uint64_t bissect_cycles;
|
57
|
+
uint64_t compress_cycles;
|
58
|
+
|
59
|
+
PartitionStats () {reset (); }
|
60
|
+
void reset ();
|
61
|
+
};
|
62
|
+
|
63
|
+
// global var that collects them all
|
64
|
+
FAISS_API extern PartitionStats partition_stats;
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
} // namespace faiss
|
69
|
+
|
@@ -0,0 +1,277 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
|
9
|
+
#include <faiss/utils/quantize_lut.h>
|
10
|
+
|
11
|
+
#include <cmath>
|
12
|
+
#include <cstring>
|
13
|
+
#include <vector>
|
14
|
+
#include <algorithm>
|
15
|
+
|
16
|
+
#include <faiss/impl/FaissAssert.h>
|
17
|
+
|
18
|
+
|
19
|
+
namespace faiss {
|
20
|
+
|
21
|
+
|
22
|
+
namespace quantize_lut {
|
23
|
+
|
24
|
+
|
25
|
+
/******************************************************
|
26
|
+
* Quantize look-up tables
|
27
|
+
******************************************************/
|
28
|
+
|
29
|
+
namespace {
|
30
|
+
|
31
|
+
float round_uint8_and_mul(float *tab, size_t n) {
|
32
|
+
float max = 0;
|
33
|
+
for(int i = 0; i < n; i++) {
|
34
|
+
if(fabs(tab[i]) > max) {
|
35
|
+
max = fabs(tab[i]);
|
36
|
+
}
|
37
|
+
}
|
38
|
+
float multiplier = 127 / max;
|
39
|
+
for(int i = 0; i < n; i++) {
|
40
|
+
tab[i] = floorf(tab[i] * multiplier + 128);
|
41
|
+
}
|
42
|
+
return multiplier;
|
43
|
+
}
|
44
|
+
|
45
|
+
// there can be NaNs in tables, they should be ignored
|
46
|
+
float tab_min(const float *tab, size_t n) {
|
47
|
+
float min = HUGE_VAL;
|
48
|
+
for(int i = 0; i < n; i++) {
|
49
|
+
if (tab[i] < min) min = tab[i];
|
50
|
+
}
|
51
|
+
return min;
|
52
|
+
}
|
53
|
+
|
54
|
+
float tab_max(const float *tab, size_t n) {
|
55
|
+
float max = -HUGE_VAL;
|
56
|
+
for(int i = 0; i < n; i++) {
|
57
|
+
if (tab[i] > max) max = tab[i];
|
58
|
+
}
|
59
|
+
return max;
|
60
|
+
}
|
61
|
+
|
62
|
+
void round_tab(float *tab, size_t n, float a, float bi) {
|
63
|
+
for(int i = 0; i < n; i++) {
|
64
|
+
tab[i] = floorf((tab[i] - bi) * a + 0.5);
|
65
|
+
}
|
66
|
+
}
|
67
|
+
|
68
|
+
template<typename T>
|
69
|
+
void round_tab(const float *tab, size_t n, float a, float bi, T *tab_out) {
|
70
|
+
for(int i = 0; i < n; i++) {
|
71
|
+
tab_out[i] = (T)floorf((tab[i] - bi) * a + 0.5);
|
72
|
+
}
|
73
|
+
}
|
74
|
+
|
75
|
+
|
76
|
+
|
77
|
+
} // anonymous namespace
|
78
|
+
|
79
|
+
void round_uint8_per_column(
|
80
|
+
float *tab, size_t n, size_t d,
|
81
|
+
float *a_out, float *b_out)
|
82
|
+
{
|
83
|
+
float max_span = 0;
|
84
|
+
std::vector<float> mins(n);
|
85
|
+
for(int i = 0; i < n; i++) {
|
86
|
+
mins[i] = tab_min(tab + i * d, d);
|
87
|
+
float span = tab_max(tab + i * d, d) - mins[i];
|
88
|
+
if(span > max_span) {
|
89
|
+
max_span = span;
|
90
|
+
}
|
91
|
+
}
|
92
|
+
float a = 255 / max_span;
|
93
|
+
float b = 0;
|
94
|
+
for(int i = 0; i < n; i++) {
|
95
|
+
b += mins[i];
|
96
|
+
round_tab(tab + i * d, d, a, mins[i]);
|
97
|
+
}
|
98
|
+
if (a_out) *a_out = a;
|
99
|
+
if (b_out) *b_out = b;
|
100
|
+
}
|
101
|
+
|
102
|
+
void round_uint8_per_column_multi(
|
103
|
+
float *tab, size_t m, size_t n, size_t d,
|
104
|
+
float *a_out, float *b_out)
|
105
|
+
{
|
106
|
+
float max_span = 0;
|
107
|
+
std::vector<float> mins(n);
|
108
|
+
for(int i = 0; i < n; i++) {
|
109
|
+
float min_i = HUGE_VAL;
|
110
|
+
float max_i = -HUGE_VAL;
|
111
|
+
for(int j = 0; j < m; j++) {
|
112
|
+
min_i = std::min(min_i, tab_min(tab + (j * n + i) * d, d));
|
113
|
+
max_i = std::max(max_i, tab_max(tab + (j * n + i) * d, d));
|
114
|
+
}
|
115
|
+
mins[i] = min_i;
|
116
|
+
float span = max_i - min_i;
|
117
|
+
if(span > max_span) {
|
118
|
+
max_span = span;
|
119
|
+
}
|
120
|
+
}
|
121
|
+
float a = 255 / max_span;
|
122
|
+
float b = 0;
|
123
|
+
for(int i = 0; i < n; i++) {
|
124
|
+
b += mins[i];
|
125
|
+
for(int j = 0; j < m; j++) {
|
126
|
+
round_tab(tab + (j * n + i) * d, d, a, mins[i]);
|
127
|
+
}
|
128
|
+
}
|
129
|
+
if (a_out) *a_out = a;
|
130
|
+
if (b_out) *b_out = b;
|
131
|
+
}
|
132
|
+
|
133
|
+
|
134
|
+
// translation of
|
135
|
+
// https://github.com/fairinternal/faiss_improvements/blob/7122c3cc6ddb0a371d8aa6f1309cd8bcf2335e61/LUT_quantization.ipynb
|
136
|
+
void quantize_LUT_and_bias(
|
137
|
+
size_t nprobe, size_t M, size_t ksub,
|
138
|
+
bool lut_is_3d,
|
139
|
+
const float *LUT,
|
140
|
+
const float *bias,
|
141
|
+
uint8_t *LUTq, size_t M2,
|
142
|
+
uint16_t *biasq,
|
143
|
+
float *a_out, float *b_out)
|
144
|
+
{
|
145
|
+
float a, b;
|
146
|
+
if (!bias) {
|
147
|
+
FAISS_THROW_IF_NOT(!lut_is_3d);
|
148
|
+
std::vector<float> mins(M);
|
149
|
+
float max_span_LUT = -HUGE_VAL, max_span_dis = 0;
|
150
|
+
b = 0;
|
151
|
+
for(int i = 0; i < M; i++) {
|
152
|
+
mins[i] = tab_min(LUT + i * ksub, ksub);
|
153
|
+
float span = tab_max(LUT + i * ksub, ksub) - mins[i];
|
154
|
+
max_span_LUT = std::max(max_span_LUT, span);
|
155
|
+
max_span_dis += span;
|
156
|
+
b += mins[i];
|
157
|
+
}
|
158
|
+
a = std::min(255 / max_span_LUT, 65535 / max_span_dis);
|
159
|
+
|
160
|
+
for(int i = 0; i < M; i++) {
|
161
|
+
round_tab(LUT + i * ksub, ksub, a, mins[i], LUTq + i * ksub);
|
162
|
+
}
|
163
|
+
memset(LUTq + M * ksub, 0, ksub * (M2 - M));
|
164
|
+
} else if (!lut_is_3d) {
|
165
|
+
std::vector<float> mins(M);
|
166
|
+
float max_span_LUT = -HUGE_VAL, max_span_dis;
|
167
|
+
float bias_min = tab_min(bias, nprobe);
|
168
|
+
float bias_max = tab_max(bias, nprobe);
|
169
|
+
max_span_dis = bias_max - bias_min;
|
170
|
+
b = 0;
|
171
|
+
for(int i = 0; i < M; i++) {
|
172
|
+
mins[i] = tab_min(LUT + i * ksub, ksub);
|
173
|
+
float span = tab_max(LUT + i * ksub, ksub) - mins[i];
|
174
|
+
max_span_LUT = std::max(max_span_LUT, span);
|
175
|
+
max_span_dis += span;
|
176
|
+
b += mins[i];
|
177
|
+
}
|
178
|
+
a = std::min(255 / max_span_LUT, 65535 / max_span_dis);
|
179
|
+
b += bias_min;
|
180
|
+
|
181
|
+
for(int i = 0; i < M; i++) {
|
182
|
+
round_tab(LUT + i * ksub, ksub, a, mins[i], LUTq + i * ksub);
|
183
|
+
}
|
184
|
+
memset(LUTq + M * ksub, 0, ksub * (M2 - M));
|
185
|
+
round_tab(bias, nprobe, a, bias_min, biasq);
|
186
|
+
|
187
|
+
} else if (biasq) {
|
188
|
+
// LUT is 3D
|
189
|
+
std::vector<float> mins(nprobe * M);
|
190
|
+
std::vector<float> bias2(nprobe);
|
191
|
+
float bias_min = tab_min(bias, nprobe);
|
192
|
+
float max_span_LUT = -HUGE_VAL, max_span_dis = -HUGE_VAL;
|
193
|
+
|
194
|
+
b = HUGE_VAL;
|
195
|
+
size_t ij = 0;
|
196
|
+
for (int j = 0; j < nprobe; j++) {
|
197
|
+
float max_span_dis_j = bias[j] - bias_min;
|
198
|
+
float b2j = bias[j];
|
199
|
+
for(int i = 0; i < M; i++) {
|
200
|
+
mins[ij] = tab_min(LUT + ij * ksub, ksub);
|
201
|
+
float span = tab_max(LUT + ij * ksub, ksub) - mins[ij];
|
202
|
+
max_span_LUT = std::max(max_span_LUT, span);
|
203
|
+
max_span_dis_j += span;
|
204
|
+
b2j += mins[ij];
|
205
|
+
ij++;
|
206
|
+
}
|
207
|
+
max_span_dis = std::max(max_span_dis, max_span_dis_j);
|
208
|
+
bias2[j] = b2j;
|
209
|
+
b = std::min(b, b2j);
|
210
|
+
}
|
211
|
+
|
212
|
+
a = std::min(255 / max_span_LUT, 65535 / max_span_dis);
|
213
|
+
|
214
|
+
ij = 0;
|
215
|
+
size_t ij_2 = 0;
|
216
|
+
for (int j = 0; j < nprobe; j++) {
|
217
|
+
for(int i = 0; i < M; i++) {
|
218
|
+
round_tab(LUT + ij * ksub, ksub, a, mins[ij], LUTq + ij_2 * ksub);
|
219
|
+
ij++; ij_2++;
|
220
|
+
}
|
221
|
+
memset(LUTq + ij_2 * ksub, 0, ksub * (M2 - M));
|
222
|
+
ij_2 += M2 - M;
|
223
|
+
}
|
224
|
+
|
225
|
+
round_tab(bias2.data(), nprobe, a, b, biasq);
|
226
|
+
|
227
|
+
} else { // !biasq
|
228
|
+
// then we integrate the bias into the LUTs
|
229
|
+
std::vector<float> LUT2_storage(nprobe * M * ksub);
|
230
|
+
float *LUT2 = LUT2_storage.data();
|
231
|
+
size_t ijc = 0;
|
232
|
+
for (int j = 0; j < nprobe; j++) {
|
233
|
+
float bias_j = bias[j] / M;
|
234
|
+
for(int i = 0; i < M; i++) {
|
235
|
+
for (int c = 0; c < ksub; c++) {
|
236
|
+
LUT2[ijc] = LUT[ijc] + bias_j;
|
237
|
+
ijc++;
|
238
|
+
}
|
239
|
+
}
|
240
|
+
}
|
241
|
+
std::vector<float> mins(M, HUGE_VAL), maxs(M, -HUGE_VAL);
|
242
|
+
size_t ij = 0;
|
243
|
+
for (int j = 0; j < nprobe; j++) {
|
244
|
+
for(int i = 0; i < M; i++) {
|
245
|
+
mins[i] = std::min(mins[i], tab_min(LUT2 + ij * ksub, ksub));
|
246
|
+
maxs[i] = std::max(maxs[i], tab_max(LUT2 + ij * ksub, ksub));
|
247
|
+
ij++;
|
248
|
+
}
|
249
|
+
}
|
250
|
+
|
251
|
+
float max_span = -HUGE_VAL;
|
252
|
+
b = 0;
|
253
|
+
for(int i = 0; i < M; i++) {
|
254
|
+
float span = maxs[i] - mins[i];
|
255
|
+
max_span = std::max(max_span, span);
|
256
|
+
b += mins[i];
|
257
|
+
}
|
258
|
+
a = 255 / max_span;
|
259
|
+
ij = 0;
|
260
|
+
size_t ij_2 = 0;
|
261
|
+
for (int j = 0; j < nprobe; j++) {
|
262
|
+
for(int i = 0; i < M; i++) {
|
263
|
+
round_tab(LUT2 + ij * ksub, ksub, a, mins[i], LUTq + ij_2 * ksub);
|
264
|
+
ij++; ij_2++;
|
265
|
+
}
|
266
|
+
memset(LUTq + ij_2 * ksub, 0, ksub * (M2 - M));
|
267
|
+
ij_2 += M2 - M;
|
268
|
+
}
|
269
|
+
}
|
270
|
+
if (a_out) *a_out = a;
|
271
|
+
if (b_out) *b_out = b;
|
272
|
+
}
|
273
|
+
|
274
|
+
|
275
|
+
} // namespace quantize_lut
|
276
|
+
|
277
|
+
} // namespace faiss
|
@@ -0,0 +1,80 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
|
12
|
+
#include <cstdio>
|
13
|
+
#include <cstdint>
|
14
|
+
|
15
|
+
namespace faiss {
|
16
|
+
|
17
|
+
/** Functions to quantize PQ floating-point Look Up Tables (LUT) to uint8, and
|
18
|
+
* biases to uint16. The accumulation is supposed to take place in uint16.
|
19
|
+
* The quantization coefficients are float (a, b) such that
|
20
|
+
*
|
21
|
+
* original_value = quantized_value * a / b
|
22
|
+
*
|
23
|
+
* The hardest part of the quantization is with multiple LUTs that need to be
|
24
|
+
* added up together. In that case, coefficient a has to be chosen so that
|
25
|
+
* the sum fits in a uint16 accumulator.
|
26
|
+
*/
|
27
|
+
|
28
|
+
namespace quantize_lut {
|
29
|
+
|
30
|
+
/* affine quantizer, a and b are the affine coefficients, marginalize over d
|
31
|
+
*
|
32
|
+
* @param tab input/output, size (n, d)
|
33
|
+
*/
|
34
|
+
void round_uint8_per_column(
|
35
|
+
float *tab, size_t n, size_t d,
|
36
|
+
float *a_out = nullptr,
|
37
|
+
float *b_out = nullptr
|
38
|
+
);
|
39
|
+
|
40
|
+
|
41
|
+
/* affine quantizer, a and b are the affine coefficients
|
42
|
+
*
|
43
|
+
* @param tab input/output, size (m, n, d)
|
44
|
+
*/
|
45
|
+
void round_uint8_per_column_multi(
|
46
|
+
float *tab, size_t m, size_t n, size_t d,
|
47
|
+
float *a_out = nullptr, float *b_out = nullptr);
|
48
|
+
|
49
|
+
/** LUT quantization to uint8 and bias to uint16.
|
50
|
+
*
|
51
|
+
* (nprobe, M, ksub, lut_is_3d) determine the size of the the LUT
|
52
|
+
*
|
53
|
+
* LUT input:
|
54
|
+
* - 2D size (M, ksub): single matrix per probe (lut_is_3d=false)
|
55
|
+
* - 3D size (nprobe, M, ksub): separate LUT per probe (lut_is_3d=true)
|
56
|
+
* bias input:
|
57
|
+
* - nullptr: bias is 0
|
58
|
+
* - size (nprobe): one bias per probe
|
59
|
+
* Output:
|
60
|
+
* - LUTq uint8 version of the LUT (M size is rounded up to M2)
|
61
|
+
* - biasq (or nullptr): uint16 version of the LUT
|
62
|
+
* - a, b: scalars to approximate the true distance
|
63
|
+
*/
|
64
|
+
|
65
|
+
void quantize_LUT_and_bias(
|
66
|
+
size_t nprobe, size_t M, size_t ksub,
|
67
|
+
bool lut_is_3d,
|
68
|
+
const float *LUT,
|
69
|
+
const float *bias,
|
70
|
+
uint8_t *LUTq, size_t M2,
|
71
|
+
uint16_t *biasq,
|
72
|
+
float *a_out = nullptr, float *b_out = nullptr
|
73
|
+
);
|
74
|
+
|
75
|
+
|
76
|
+
} // namespace quantize_lut
|
77
|
+
|
78
|
+
} // namespace faiss
|
79
|
+
|
80
|
+
|
@@ -0,0 +1,31 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
#pragma once
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
/** Abstractions for 256-bit registers
|
13
|
+
*
|
14
|
+
* The objective is to separate the different interpretations of the same
|
15
|
+
* registers (as a vector of uint8, uint16 or uint32), to provide printing
|
16
|
+
* functions.
|
17
|
+
*/
|
18
|
+
|
19
|
+
#ifdef __AVX2__
|
20
|
+
|
21
|
+
#include <faiss/utils/simdlib_avx2.h>
|
22
|
+
|
23
|
+
#else
|
24
|
+
|
25
|
+
// emulated = all operations are implemented as scalars
|
26
|
+
#include <faiss/utils/simdlib_emulated.h>
|
27
|
+
|
28
|
+
// FIXME: make a SSE version
|
29
|
+
// is this ever going to happen? We will probably rather implement AVX512
|
30
|
+
|
31
|
+
#endif
|