faiss 0.2.6 → 0.2.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +2 -2
- data/vendor/faiss/faiss/AutoTune.cpp +15 -4
- data/vendor/faiss/faiss/AutoTune.h +0 -1
- data/vendor/faiss/faiss/Clustering.cpp +1 -5
- data/vendor/faiss/faiss/Clustering.h +0 -2
- data/vendor/faiss/faiss/IVFlib.h +0 -2
- data/vendor/faiss/faiss/Index.h +1 -2
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
- data/vendor/faiss/faiss/IndexBinary.h +0 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
- data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
- data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
- data/vendor/faiss/faiss/IndexFastScan.h +5 -1
- data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
- data/vendor/faiss/faiss/IndexFlat.h +1 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
- data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
- data/vendor/faiss/faiss/IndexHNSW.h +0 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
- data/vendor/faiss/faiss/IndexIDMap.h +0 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
- data/vendor/faiss/faiss/IndexIVF.h +121 -61
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
- data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
- data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
- data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
- data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
- data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
- data/vendor/faiss/faiss/IndexReplicas.h +0 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
- data/vendor/faiss/faiss/IndexShards.cpp +26 -109
- data/vendor/faiss/faiss/IndexShards.h +2 -3
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
- data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
- data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
- data/vendor/faiss/faiss/MetaIndexes.h +29 -0
- data/vendor/faiss/faiss/MetricType.h +14 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
- data/vendor/faiss/faiss/VectorTransform.h +1 -3
- data/vendor/faiss/faiss/clone_index.cpp +232 -18
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
- data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
- data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
- data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
- data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
- data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
- data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
- data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
- data/vendor/faiss/faiss/impl/HNSW.h +6 -9
- data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
- data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
- data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
- data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
- data/vendor/faiss/faiss/impl/NSG.h +4 -7
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
- data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
- data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
- data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
- data/vendor/faiss/faiss/index_factory.cpp +8 -10
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
- data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
- data/vendor/faiss/faiss/utils/Heap.h +35 -1
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
- data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
- data/vendor/faiss/faiss/utils/distances.cpp +61 -7
- data/vendor/faiss/faiss/utils/distances.h +11 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
- data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
- data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
- data/vendor/faiss/faiss/utils/fp16.h +7 -0
- data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
- data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
- data/vendor/faiss/faiss/utils/hamming.h +21 -10
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
- data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
- data/vendor/faiss/faiss/utils/sorting.h +71 -0
- data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
- data/vendor/faiss/faiss/utils/utils.cpp +4 -176
- data/vendor/faiss/faiss/utils/utils.h +2 -9
- metadata +29 -3
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
@@ -13,6 +13,8 @@
|
|
13
13
|
#include <faiss/Clustering.h>
|
14
14
|
#include <faiss/impl/AdditiveQuantizer.h>
|
15
15
|
|
16
|
+
#include <faiss/utils/approx_topk/mode.h>
|
17
|
+
|
16
18
|
namespace faiss {
|
17
19
|
|
18
20
|
/** Residual quantizer with variable number of bits per sub-quantizer
|
@@ -29,7 +31,7 @@ struct ResidualQuantizer : AdditiveQuantizer {
|
|
29
31
|
using train_type_t = int;
|
30
32
|
|
31
33
|
/// Binary or of the Train_* flags below
|
32
|
-
train_type_t train_type;
|
34
|
+
train_type_t train_type = Train_progressive_dim;
|
33
35
|
|
34
36
|
/// regular k-means (minimal amount of computation)
|
35
37
|
static const int Train_default = 0;
|
@@ -41,7 +43,7 @@ struct ResidualQuantizer : AdditiveQuantizer {
|
|
41
43
|
static const int Train_refine_codebook = 2;
|
42
44
|
|
43
45
|
/// number of iterations for codebook refinement.
|
44
|
-
int niter_codebook_refine;
|
46
|
+
int niter_codebook_refine = 5;
|
45
47
|
|
46
48
|
/** set this bit on train_type if beam is to be trained only on the
|
47
49
|
* first element of the beam (faster but less accurate) */
|
@@ -52,16 +54,20 @@ struct ResidualQuantizer : AdditiveQuantizer {
|
|
52
54
|
static const int Skip_codebook_tables = 2048;
|
53
55
|
|
54
56
|
/// beam size used for training and for encoding
|
55
|
-
int max_beam_size;
|
57
|
+
int max_beam_size = 5;
|
56
58
|
|
57
59
|
/// use LUT for beam search
|
58
|
-
int use_beam_LUT;
|
60
|
+
int use_beam_LUT = 0;
|
61
|
+
|
62
|
+
/// Currently used mode of approximate min-k computations.
|
63
|
+
/// Default value is EXACT_TOPK.
|
64
|
+
ApproxTopK_mode_t approx_topk_mode = ApproxTopK_mode_t::EXACT_TOPK;
|
59
65
|
|
60
66
|
/// clustering parameters
|
61
67
|
ProgressiveDimClusteringParameters cp;
|
62
68
|
|
63
69
|
/// if non-NULL, use this index for assignment
|
64
|
-
ProgressiveDimIndexFactory* assign_index_factory;
|
70
|
+
ProgressiveDimIndexFactory* assign_index_factory = nullptr;
|
65
71
|
|
66
72
|
ResidualQuantizer(
|
67
73
|
size_t d,
|
@@ -183,7 +189,8 @@ void beam_search_encode_step(
|
|
183
189
|
int32_t* new_codes,
|
184
190
|
float* new_residuals,
|
185
191
|
float* new_distances,
|
186
|
-
Index* assign_index = nullptr
|
192
|
+
Index* assign_index = nullptr,
|
193
|
+
ApproxTopK_mode_t approx_topk = ApproxTopK_mode_t::EXACT_TOPK);
|
187
194
|
|
188
195
|
/** Encode a set of vectors using their dot products with the codebooks
|
189
196
|
*
|
@@ -202,7 +209,8 @@ void beam_search_encode_step_tab(
|
|
202
209
|
const int32_t* codes, // n * beam_size * m
|
203
210
|
const float* distances, // n * beam_size
|
204
211
|
size_t new_beam_size,
|
205
|
-
int32_t* new_codes,
|
206
|
-
float* new_distances
|
212
|
+
int32_t* new_codes, // n * new_beam_size * (m + 1)
|
213
|
+
float* new_distances, // n * new_beam_size
|
214
|
+
ApproxTopK_mode_t approx_topk = ApproxTopK_mode_t::EXACT_TOPK);
|
207
215
|
|
208
216
|
}; // namespace faiss
|
@@ -54,7 +54,6 @@ namespace faiss {
|
|
54
54
|
|
55
55
|
namespace {
|
56
56
|
|
57
|
-
typedef Index::idx_t idx_t;
|
58
57
|
typedef ScalarQuantizer::QuantizerType QuantizerType;
|
59
58
|
typedef ScalarQuantizer::RangeStat RangeStat;
|
60
59
|
using SQDistanceComputer = ScalarQuantizer::SQDistanceComputer;
|
@@ -1048,12 +1047,11 @@ SQDistanceComputer* select_distance_computer(
|
|
1048
1047
|
********************************************************************/
|
1049
1048
|
|
1050
1049
|
ScalarQuantizer::ScalarQuantizer(size_t d, QuantizerType qtype)
|
1051
|
-
: Quantizer(d), qtype(qtype)
|
1050
|
+
: Quantizer(d), qtype(qtype) {
|
1052
1051
|
set_derived_sizes();
|
1053
1052
|
}
|
1054
1053
|
|
1055
|
-
ScalarQuantizer::ScalarQuantizer()
|
1056
|
-
: qtype(QT_8bit), rangestat(RS_minmax), rangestat_arg(0), bits(0) {}
|
1054
|
+
ScalarQuantizer::ScalarQuantizer() {}
|
1057
1055
|
|
1058
1056
|
void ScalarQuantizer::set_derived_sizes() {
|
1059
1057
|
switch (qtype) {
|
@@ -1131,7 +1129,7 @@ void ScalarQuantizer::train_residual(
|
|
1131
1129
|
ScopeDeleter<float> del_x(x_in == x ? nullptr : x);
|
1132
1130
|
|
1133
1131
|
if (by_residual) {
|
1134
|
-
std::vector<
|
1132
|
+
std::vector<idx_t> idx(n);
|
1135
1133
|
quantizer->assign(n, x, idx.data());
|
1136
1134
|
|
1137
1135
|
std::vector<float> residuals(n * d);
|
@@ -34,7 +34,7 @@ struct ScalarQuantizer : Quantizer {
|
|
34
34
|
QT_6bit, ///< 6 bits per component
|
35
35
|
};
|
36
36
|
|
37
|
-
QuantizerType qtype;
|
37
|
+
QuantizerType qtype = QT_8bit;
|
38
38
|
|
39
39
|
/** The uniform encoder can estimate the range of representable
|
40
40
|
* values of the unform encoder using different statistics. Here
|
@@ -48,11 +48,11 @@ struct ScalarQuantizer : Quantizer {
|
|
48
48
|
RS_optim, ///< alternate optimization of reconstruction error
|
49
49
|
};
|
50
50
|
|
51
|
-
RangeStat rangestat;
|
52
|
-
float rangestat_arg;
|
51
|
+
RangeStat rangestat = RS_minmax;
|
52
|
+
float rangestat_arg = 0;
|
53
53
|
|
54
54
|
/// bits per scalar code
|
55
|
-
size_t bits;
|
55
|
+
size_t bits = 0;
|
56
56
|
|
57
57
|
/// trained values (including the range)
|
58
58
|
std::vector<float> trained;
|
@@ -18,7 +18,7 @@ ThreadedIndex<IndexT>::ThreadedIndex(bool threaded)
|
|
18
18
|
|
19
19
|
template <typename IndexT>
|
20
20
|
ThreadedIndex<IndexT>::ThreadedIndex(int d, bool threaded)
|
21
|
-
: IndexT(d),
|
21
|
+
: IndexT(d), isThreaded_(threaded) {}
|
22
22
|
|
23
23
|
template <typename IndexT>
|
24
24
|
ThreadedIndex<IndexT>::~ThreadedIndex() {
|
@@ -35,7 +35,7 @@ ThreadedIndex<IndexT>::~ThreadedIndex() {
|
|
35
35
|
FAISS_ASSERT(!(bool)p.second);
|
36
36
|
}
|
37
37
|
|
38
|
-
if (
|
38
|
+
if (own_indices) {
|
39
39
|
delete p.first;
|
40
40
|
}
|
41
41
|
}
|
@@ -102,7 +102,7 @@ void ThreadedIndex<IndexT>::removeIndex(IndexT* index) {
|
|
102
102
|
indices_.erase(it);
|
103
103
|
onAfterRemoveIndex(index);
|
104
104
|
|
105
|
-
if (
|
105
|
+
if (own_indices) {
|
106
106
|
delete index;
|
107
107
|
}
|
108
108
|
|
@@ -29,7 +29,7 @@ class ThreadedIndex : public IndexT {
|
|
29
29
|
/// WARNING: once an index is added, it becomes unsafe to touch it from any
|
30
30
|
/// other thread than that on which is managing it, until we are shut
|
31
31
|
/// down. Use runOnIndex to perform work on it instead.
|
32
|
-
void addIndex(IndexT* index);
|
32
|
+
virtual void addIndex(IndexT* index);
|
33
33
|
|
34
34
|
/// Remove an index that is managed by ourselves.
|
35
35
|
/// This will flush all pending work on that index, and then shut
|
@@ -52,17 +52,17 @@ class ThreadedIndex : public IndexT {
|
|
52
52
|
}
|
53
53
|
|
54
54
|
/// Returns the i-th sub-index
|
55
|
-
IndexT* at(
|
55
|
+
IndexT* at(size_t i) {
|
56
56
|
return indices_[i].first;
|
57
57
|
}
|
58
58
|
|
59
59
|
/// Returns the i-th sub-index (const version)
|
60
|
-
const IndexT* at(
|
60
|
+
const IndexT* at(size_t i) const {
|
61
61
|
return indices_[i].first;
|
62
62
|
}
|
63
63
|
|
64
64
|
/// Whether or not we are responsible for deleting our contained indices
|
65
|
-
bool
|
65
|
+
bool own_indices = false;
|
66
66
|
|
67
67
|
protected:
|
68
68
|
/// Called just after an index is added
|
@@ -0,0 +1,291 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
#pragma once
|
9
|
+
|
10
|
+
#ifdef __AVX2__
|
11
|
+
|
12
|
+
#include <immintrin.h>
|
13
|
+
|
14
|
+
#include <type_traits>
|
15
|
+
|
16
|
+
#include <faiss/impl/code_distance/code_distance-generic.h>
|
17
|
+
|
18
|
+
namespace {
|
19
|
+
|
20
|
+
// Computes a horizontal sum over an __m256 register
|
21
|
+
inline float horizontal_sum(const __m256 reg) {
|
22
|
+
const __m256 h0 = _mm256_hadd_ps(reg, reg);
|
23
|
+
const __m256 h1 = _mm256_hadd_ps(h0, h0);
|
24
|
+
|
25
|
+
// extract high and low __m128 regs from __m256
|
26
|
+
const __m128 h2 = _mm256_extractf128_ps(h1, 1);
|
27
|
+
const __m128 h3 = _mm256_castps256_ps128(h1);
|
28
|
+
|
29
|
+
// get a final hsum into all 4 regs
|
30
|
+
const __m128 h4 = _mm_add_ss(h2, h3);
|
31
|
+
|
32
|
+
// extract f[0] from __m128
|
33
|
+
const float hsum = _mm_cvtss_f32(h4);
|
34
|
+
return hsum;
|
35
|
+
}
|
36
|
+
|
37
|
+
} // namespace
|
38
|
+
|
39
|
+
namespace faiss {
|
40
|
+
|
41
|
+
template <typename PQDecoderT>
|
42
|
+
typename std::enable_if<!std::is_same<PQDecoderT, PQDecoder8>::value, float>::
|
43
|
+
type inline distance_single_code_avx2(
|
44
|
+
// the product quantizer
|
45
|
+
const ProductQuantizer& pq,
|
46
|
+
// precomputed distances, layout (M, ksub)
|
47
|
+
const float* sim_table,
|
48
|
+
const uint8_t* code) {
|
49
|
+
// default implementation
|
50
|
+
return distance_single_code_generic<PQDecoderT>(pq, sim_table, code);
|
51
|
+
}
|
52
|
+
|
53
|
+
template <typename PQDecoderT>
|
54
|
+
typename std::enable_if<std::is_same<PQDecoderT, PQDecoder8>::value, float>::
|
55
|
+
type inline distance_single_code_avx2(
|
56
|
+
// the product quantizer
|
57
|
+
const ProductQuantizer& pq,
|
58
|
+
// precomputed distances, layout (M, ksub)
|
59
|
+
const float* sim_table,
|
60
|
+
const uint8_t* code) {
|
61
|
+
float result = 0;
|
62
|
+
|
63
|
+
size_t m = 0;
|
64
|
+
const size_t pqM16 = pq.M / 16;
|
65
|
+
|
66
|
+
const float* tab = sim_table;
|
67
|
+
|
68
|
+
if (pqM16 > 0) {
|
69
|
+
// process 16 values per loop
|
70
|
+
|
71
|
+
const __m256i ksub = _mm256_set1_epi32(pq.ksub);
|
72
|
+
__m256i offsets_0 = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
73
|
+
offsets_0 = _mm256_mullo_epi32(offsets_0, ksub);
|
74
|
+
|
75
|
+
// accumulators of partial sums
|
76
|
+
__m256 partialSum = _mm256_setzero_ps();
|
77
|
+
|
78
|
+
// loop
|
79
|
+
for (m = 0; m < pqM16 * 16; m += 16) {
|
80
|
+
// load 16 uint8 values
|
81
|
+
const __m128i mm1 = _mm_loadu_si128((const __m128i_u*)(code + m));
|
82
|
+
{
|
83
|
+
// convert uint8 values (low part of __m128i) to int32
|
84
|
+
// values
|
85
|
+
const __m256i idx1 = _mm256_cvtepu8_epi32(mm1);
|
86
|
+
|
87
|
+
// add offsets
|
88
|
+
const __m256i indices_to_read_from =
|
89
|
+
_mm256_add_epi32(idx1, offsets_0);
|
90
|
+
|
91
|
+
// gather 8 values, similar to 8 operations of tab[idx]
|
92
|
+
__m256 collected = _mm256_i32gather_ps(
|
93
|
+
tab, indices_to_read_from, sizeof(float));
|
94
|
+
tab += pq.ksub * 8;
|
95
|
+
|
96
|
+
// collect partial sums
|
97
|
+
partialSum = _mm256_add_ps(partialSum, collected);
|
98
|
+
}
|
99
|
+
|
100
|
+
// move high 8 uint8 to low ones
|
101
|
+
const __m128i mm2 = _mm_unpackhi_epi64(mm1, _mm_setzero_si128());
|
102
|
+
{
|
103
|
+
// convert uint8 values (low part of __m128i) to int32
|
104
|
+
// values
|
105
|
+
const __m256i idx1 = _mm256_cvtepu8_epi32(mm2);
|
106
|
+
|
107
|
+
// add offsets
|
108
|
+
const __m256i indices_to_read_from =
|
109
|
+
_mm256_add_epi32(idx1, offsets_0);
|
110
|
+
|
111
|
+
// gather 8 values, similar to 8 operations of tab[idx]
|
112
|
+
__m256 collected = _mm256_i32gather_ps(
|
113
|
+
tab, indices_to_read_from, sizeof(float));
|
114
|
+
tab += pq.ksub * 8;
|
115
|
+
|
116
|
+
// collect partial sums
|
117
|
+
partialSum = _mm256_add_ps(partialSum, collected);
|
118
|
+
}
|
119
|
+
}
|
120
|
+
|
121
|
+
// horizontal sum for partialSum
|
122
|
+
result += horizontal_sum(partialSum);
|
123
|
+
}
|
124
|
+
|
125
|
+
//
|
126
|
+
if (m < pq.M) {
|
127
|
+
// process leftovers
|
128
|
+
PQDecoder8 decoder(code + m, pq.nbits);
|
129
|
+
|
130
|
+
for (; m < pq.M; m++) {
|
131
|
+
result += tab[decoder.decode()];
|
132
|
+
tab += pq.ksub;
|
133
|
+
}
|
134
|
+
}
|
135
|
+
|
136
|
+
return result;
|
137
|
+
}
|
138
|
+
|
139
|
+
template <typename PQDecoderT>
|
140
|
+
typename std::enable_if<!std::is_same<PQDecoderT, PQDecoder8>::value, void>::
|
141
|
+
type
|
142
|
+
distance_four_codes_avx2(
|
143
|
+
// the product quantizer
|
144
|
+
const ProductQuantizer& pq,
|
145
|
+
// precomputed distances, layout (M, ksub)
|
146
|
+
const float* sim_table,
|
147
|
+
// codes
|
148
|
+
const uint8_t* __restrict code0,
|
149
|
+
const uint8_t* __restrict code1,
|
150
|
+
const uint8_t* __restrict code2,
|
151
|
+
const uint8_t* __restrict code3,
|
152
|
+
// computed distances
|
153
|
+
float& result0,
|
154
|
+
float& result1,
|
155
|
+
float& result2,
|
156
|
+
float& result3) {
|
157
|
+
distance_four_codes_generic<PQDecoderT>(
|
158
|
+
pq,
|
159
|
+
sim_table,
|
160
|
+
code0,
|
161
|
+
code1,
|
162
|
+
code2,
|
163
|
+
code3,
|
164
|
+
result0,
|
165
|
+
result1,
|
166
|
+
result2,
|
167
|
+
result3);
|
168
|
+
}
|
169
|
+
|
170
|
+
// Combines 4 operations of distance_single_code()
|
171
|
+
template <typename PQDecoderT>
|
172
|
+
typename std::enable_if<std::is_same<PQDecoderT, PQDecoder8>::value, void>::type
|
173
|
+
distance_four_codes_avx2(
|
174
|
+
// the product quantizer
|
175
|
+
const ProductQuantizer& pq,
|
176
|
+
// precomputed distances, layout (M, ksub)
|
177
|
+
const float* sim_table,
|
178
|
+
// codes
|
179
|
+
const uint8_t* __restrict code0,
|
180
|
+
const uint8_t* __restrict code1,
|
181
|
+
const uint8_t* __restrict code2,
|
182
|
+
const uint8_t* __restrict code3,
|
183
|
+
// computed distances
|
184
|
+
float& result0,
|
185
|
+
float& result1,
|
186
|
+
float& result2,
|
187
|
+
float& result3) {
|
188
|
+
result0 = 0;
|
189
|
+
result1 = 0;
|
190
|
+
result2 = 0;
|
191
|
+
result3 = 0;
|
192
|
+
|
193
|
+
size_t m = 0;
|
194
|
+
const size_t pqM16 = pq.M / 16;
|
195
|
+
|
196
|
+
constexpr intptr_t N = 4;
|
197
|
+
|
198
|
+
const float* tab = sim_table;
|
199
|
+
|
200
|
+
if (pqM16 > 0) {
|
201
|
+
// process 16 values per loop
|
202
|
+
const __m256i ksub = _mm256_set1_epi32(pq.ksub);
|
203
|
+
__m256i offsets_0 = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
204
|
+
offsets_0 = _mm256_mullo_epi32(offsets_0, ksub);
|
205
|
+
|
206
|
+
// accumulators of partial sums
|
207
|
+
__m256 partialSums[N];
|
208
|
+
for (intptr_t j = 0; j < N; j++) {
|
209
|
+
partialSums[j] = _mm256_setzero_ps();
|
210
|
+
}
|
211
|
+
|
212
|
+
// loop
|
213
|
+
for (m = 0; m < pqM16 * 16; m += 16) {
|
214
|
+
// load 16 uint8 values
|
215
|
+
__m128i mm1[N];
|
216
|
+
mm1[0] = _mm_loadu_si128((const __m128i_u*)(code0 + m));
|
217
|
+
mm1[1] = _mm_loadu_si128((const __m128i_u*)(code1 + m));
|
218
|
+
mm1[2] = _mm_loadu_si128((const __m128i_u*)(code2 + m));
|
219
|
+
mm1[3] = _mm_loadu_si128((const __m128i_u*)(code3 + m));
|
220
|
+
|
221
|
+
// process first 8 codes
|
222
|
+
for (intptr_t j = 0; j < N; j++) {
|
223
|
+
// convert uint8 values (low part of __m128i) to int32
|
224
|
+
// values
|
225
|
+
const __m256i idx1 = _mm256_cvtepu8_epi32(mm1[j]);
|
226
|
+
|
227
|
+
// add offsets
|
228
|
+
const __m256i indices_to_read_from =
|
229
|
+
_mm256_add_epi32(idx1, offsets_0);
|
230
|
+
|
231
|
+
// gather 8 values, similar to 8 operations of tab[idx]
|
232
|
+
__m256 collected = _mm256_i32gather_ps(
|
233
|
+
tab, indices_to_read_from, sizeof(float));
|
234
|
+
|
235
|
+
// collect partial sums
|
236
|
+
partialSums[j] = _mm256_add_ps(partialSums[j], collected);
|
237
|
+
}
|
238
|
+
tab += pq.ksub * 8;
|
239
|
+
|
240
|
+
// process next 8 codes
|
241
|
+
for (intptr_t j = 0; j < N; j++) {
|
242
|
+
// move high 8 uint8 to low ones
|
243
|
+
const __m128i mm2 =
|
244
|
+
_mm_unpackhi_epi64(mm1[j], _mm_setzero_si128());
|
245
|
+
|
246
|
+
// convert uint8 values (low part of __m128i) to int32
|
247
|
+
// values
|
248
|
+
const __m256i idx1 = _mm256_cvtepu8_epi32(mm2);
|
249
|
+
|
250
|
+
// add offsets
|
251
|
+
const __m256i indices_to_read_from =
|
252
|
+
_mm256_add_epi32(idx1, offsets_0);
|
253
|
+
|
254
|
+
// gather 8 values, similar to 8 operations of tab[idx]
|
255
|
+
__m256 collected = _mm256_i32gather_ps(
|
256
|
+
tab, indices_to_read_from, sizeof(float));
|
257
|
+
|
258
|
+
// collect partial sums
|
259
|
+
partialSums[j] = _mm256_add_ps(partialSums[j], collected);
|
260
|
+
}
|
261
|
+
|
262
|
+
tab += pq.ksub * 8;
|
263
|
+
}
|
264
|
+
|
265
|
+
// horizontal sum for partialSum
|
266
|
+
result0 += horizontal_sum(partialSums[0]);
|
267
|
+
result1 += horizontal_sum(partialSums[1]);
|
268
|
+
result2 += horizontal_sum(partialSums[2]);
|
269
|
+
result3 += horizontal_sum(partialSums[3]);
|
270
|
+
}
|
271
|
+
|
272
|
+
//
|
273
|
+
if (m < pq.M) {
|
274
|
+
// process leftovers
|
275
|
+
PQDecoder8 decoder0(code0 + m, pq.nbits);
|
276
|
+
PQDecoder8 decoder1(code1 + m, pq.nbits);
|
277
|
+
PQDecoder8 decoder2(code2 + m, pq.nbits);
|
278
|
+
PQDecoder8 decoder3(code3 + m, pq.nbits);
|
279
|
+
for (; m < pq.M; m++) {
|
280
|
+
result0 += tab[decoder0.decode()];
|
281
|
+
result1 += tab[decoder1.decode()];
|
282
|
+
result2 += tab[decoder2.decode()];
|
283
|
+
result3 += tab[decoder3.decode()];
|
284
|
+
tab += pq.ksub;
|
285
|
+
}
|
286
|
+
}
|
287
|
+
}
|
288
|
+
|
289
|
+
} // namespace faiss
|
290
|
+
|
291
|
+
#endif
|
@@ -0,0 +1,74 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
#pragma once
|
9
|
+
|
10
|
+
#include <faiss/impl/ProductQuantizer.h>
|
11
|
+
|
12
|
+
namespace faiss {
|
13
|
+
|
14
|
+
/// Returns the distance to a single code.
|
15
|
+
template <typename PQDecoderT>
|
16
|
+
inline float distance_single_code_generic(
|
17
|
+
// the product quantizer
|
18
|
+
const ProductQuantizer& pq,
|
19
|
+
// precomputed distances, layout (M, ksub)
|
20
|
+
const float* sim_table,
|
21
|
+
// the code
|
22
|
+
const uint8_t* code) {
|
23
|
+
PQDecoderT decoder(code, pq.nbits);
|
24
|
+
|
25
|
+
const float* tab = sim_table;
|
26
|
+
float result = 0;
|
27
|
+
|
28
|
+
for (size_t m = 0; m < pq.M; m++) {
|
29
|
+
result += tab[decoder.decode()];
|
30
|
+
tab += pq.ksub;
|
31
|
+
}
|
32
|
+
|
33
|
+
return result;
|
34
|
+
}
|
35
|
+
|
36
|
+
/// Combines 4 operations of distance_single_code()
|
37
|
+
/// General-purpose version.
|
38
|
+
template <typename PQDecoderT>
|
39
|
+
inline void distance_four_codes_generic(
|
40
|
+
// the product quantizer
|
41
|
+
const ProductQuantizer& pq,
|
42
|
+
// precomputed distances, layout (M, ksub)
|
43
|
+
const float* sim_table,
|
44
|
+
// codes
|
45
|
+
const uint8_t* __restrict code0,
|
46
|
+
const uint8_t* __restrict code1,
|
47
|
+
const uint8_t* __restrict code2,
|
48
|
+
const uint8_t* __restrict code3,
|
49
|
+
// computed distances
|
50
|
+
float& result0,
|
51
|
+
float& result1,
|
52
|
+
float& result2,
|
53
|
+
float& result3) {
|
54
|
+
PQDecoderT decoder0(code0, pq.nbits);
|
55
|
+
PQDecoderT decoder1(code1, pq.nbits);
|
56
|
+
PQDecoderT decoder2(code2, pq.nbits);
|
57
|
+
PQDecoderT decoder3(code3, pq.nbits);
|
58
|
+
|
59
|
+
const float* tab = sim_table;
|
60
|
+
result0 = 0;
|
61
|
+
result1 = 0;
|
62
|
+
result2 = 0;
|
63
|
+
result3 = 0;
|
64
|
+
|
65
|
+
for (size_t m = 0; m < pq.M; m++) {
|
66
|
+
result0 += tab[decoder0.decode()];
|
67
|
+
result1 += tab[decoder1.decode()];
|
68
|
+
result2 += tab[decoder2.decode()];
|
69
|
+
result3 += tab[decoder3.decode()];
|
70
|
+
tab += pq.ksub;
|
71
|
+
}
|
72
|
+
}
|
73
|
+
|
74
|
+
} // namespace faiss
|
@@ -0,0 +1,123 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
#pragma once
|
9
|
+
|
10
|
+
#include <faiss/impl/platform_macros.h>
|
11
|
+
|
12
|
+
// This directory contains functions to compute a distance
|
13
|
+
// from a given PQ code to a query vector, given that the
|
14
|
+
// distances to a query vector for pq.M codebooks are precomputed.
|
15
|
+
//
|
16
|
+
// The code was originally the part of IndexIVFPQ.cpp.
|
17
|
+
// The baseline implementation can be found in
|
18
|
+
// code_distance-generic.h, distance_single_code_generic().
|
19
|
+
|
20
|
+
// The reason for this somewhat unusual structure is that
|
21
|
+
// custom implementations may need to fall off to generic
|
22
|
+
// implementation in certain cases. So, say, avx2 header file
|
23
|
+
// needs to reference the generic header file. This is
|
24
|
+
// why the names of the functions for custom implementations
|
25
|
+
// have this _generic or _avx2 suffix.
|
26
|
+
|
27
|
+
#ifdef __AVX2__
|
28
|
+
|
29
|
+
#include <faiss/impl/code_distance/code_distance-avx2.h>
|
30
|
+
|
31
|
+
namespace faiss {
|
32
|
+
|
33
|
+
template <typename PQDecoderT>
|
34
|
+
inline float distance_single_code(
|
35
|
+
// the product quantizer
|
36
|
+
const ProductQuantizer& pq,
|
37
|
+
// precomputed distances, layout (M, ksub)
|
38
|
+
const float* sim_table,
|
39
|
+
// the code
|
40
|
+
const uint8_t* code) {
|
41
|
+
return distance_single_code_avx2<PQDecoderT>(pq, sim_table, code);
|
42
|
+
}
|
43
|
+
|
44
|
+
template <typename PQDecoderT>
|
45
|
+
inline void distance_four_codes(
|
46
|
+
// the product quantizer
|
47
|
+
const ProductQuantizer& pq,
|
48
|
+
// precomputed distances, layout (M, ksub)
|
49
|
+
const float* sim_table,
|
50
|
+
// codes
|
51
|
+
const uint8_t* __restrict code0,
|
52
|
+
const uint8_t* __restrict code1,
|
53
|
+
const uint8_t* __restrict code2,
|
54
|
+
const uint8_t* __restrict code3,
|
55
|
+
// computed distances
|
56
|
+
float& result0,
|
57
|
+
float& result1,
|
58
|
+
float& result2,
|
59
|
+
float& result3) {
|
60
|
+
distance_four_codes_avx2<PQDecoderT>(
|
61
|
+
pq,
|
62
|
+
sim_table,
|
63
|
+
code0,
|
64
|
+
code1,
|
65
|
+
code2,
|
66
|
+
code3,
|
67
|
+
result0,
|
68
|
+
result1,
|
69
|
+
result2,
|
70
|
+
result3);
|
71
|
+
}
|
72
|
+
|
73
|
+
} // namespace faiss
|
74
|
+
|
75
|
+
#else
|
76
|
+
|
77
|
+
#include <faiss/impl/code_distance/code_distance-generic.h>
|
78
|
+
|
79
|
+
namespace faiss {
|
80
|
+
|
81
|
+
template <typename PQDecoderT>
|
82
|
+
inline float distance_single_code(
|
83
|
+
// the product quantizer
|
84
|
+
const ProductQuantizer& pq,
|
85
|
+
// precomputed distances, layout (M, ksub)
|
86
|
+
const float* sim_table,
|
87
|
+
// the code
|
88
|
+
const uint8_t* code) {
|
89
|
+
return distance_single_code_generic<PQDecoderT>(pq, sim_table, code);
|
90
|
+
}
|
91
|
+
|
92
|
+
template <typename PQDecoderT>
|
93
|
+
inline void distance_four_codes(
|
94
|
+
// the product quantizer
|
95
|
+
const ProductQuantizer& pq,
|
96
|
+
// precomputed distances, layout (M, ksub)
|
97
|
+
const float* sim_table,
|
98
|
+
// codes
|
99
|
+
const uint8_t* __restrict code0,
|
100
|
+
const uint8_t* __restrict code1,
|
101
|
+
const uint8_t* __restrict code2,
|
102
|
+
const uint8_t* __restrict code3,
|
103
|
+
// computed distances
|
104
|
+
float& result0,
|
105
|
+
float& result1,
|
106
|
+
float& result2,
|
107
|
+
float& result3) {
|
108
|
+
distance_four_codes_generic<PQDecoderT>(
|
109
|
+
pq,
|
110
|
+
sim_table,
|
111
|
+
code0,
|
112
|
+
code1,
|
113
|
+
code2,
|
114
|
+
code3,
|
115
|
+
result0,
|
116
|
+
result1,
|
117
|
+
result2,
|
118
|
+
result3);
|
119
|
+
}
|
120
|
+
|
121
|
+
} // namespace faiss
|
122
|
+
|
123
|
+
#endif
|